code-context-control 2.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +1 -0
- cli/_hook_utils.py +99 -0
- cli/c3.py +6152 -0
- cli/commands/__init__.py +1 -0
- cli/commands/common.py +312 -0
- cli/commands/parser.py +286 -0
- cli/docs.html +3178 -0
- cli/edits.html +878 -0
- cli/hook_auto_snapshot.py +142 -0
- cli/hook_c3_signal.py +61 -0
- cli/hook_c3read.py +116 -0
- cli/hook_edit_ledger.py +213 -0
- cli/hook_edit_unlock.py +170 -0
- cli/hook_filter.py +130 -0
- cli/hook_ghost_files.py +238 -0
- cli/hook_pretool_enforce.py +334 -0
- cli/hook_read.py +200 -0
- cli/hook_session_stats.py +62 -0
- cli/hook_terse_advisor.py +190 -0
- cli/hub.html +3764 -0
- cli/hub_server.py +1619 -0
- cli/mcp_proxy.py +428 -0
- cli/mcp_server.py +660 -0
- cli/server.py +2985 -0
- cli/tools/__init__.py +4 -0
- cli/tools/_helpers.py +65 -0
- cli/tools/agent.py +1165 -0
- cli/tools/compress.py +215 -0
- cli/tools/delegate.py +1184 -0
- cli/tools/edit.py +313 -0
- cli/tools/edits.py +118 -0
- cli/tools/filter.py +285 -0
- cli/tools/impact.py +163 -0
- cli/tools/memory.py +469 -0
- cli/tools/read.py +224 -0
- cli/tools/search.py +337 -0
- cli/tools/session.py +95 -0
- cli/tools/shell.py +193 -0
- cli/tools/status.py +306 -0
- cli/tools/validate.py +310 -0
- cli/ui/api.js +36 -0
- cli/ui/app.js +207 -0
- cli/ui/components/chat.js +758 -0
- cli/ui/components/dashboard.js +689 -0
- cli/ui/components/edits.js +220 -0
- cli/ui/components/instructions.js +481 -0
- cli/ui/components/memory.js +626 -0
- cli/ui/components/sessions.js +606 -0
- cli/ui/components/settings.js +1404 -0
- cli/ui/components/sidebar.js +156 -0
- cli/ui/icons.js +51 -0
- cli/ui/shared.js +119 -0
- cli/ui/theme.js +22 -0
- cli/ui.html +168 -0
- cli/ui_legacy.html +6797 -0
- cli/ui_nano.html +503 -0
- code_context_control-2.28.0.dist-info/METADATA +248 -0
- code_context_control-2.28.0.dist-info/RECORD +150 -0
- code_context_control-2.28.0.dist-info/WHEEL +5 -0
- code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
- code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
- code_context_control-2.28.0.dist-info/top_level.txt +5 -0
- core/__init__.py +75 -0
- core/config.py +269 -0
- core/ide.py +188 -0
- oracle/__init__.py +1 -0
- oracle/config.py +75 -0
- oracle/oracle.html +3900 -0
- oracle/oracle_server.py +663 -0
- oracle/services/__init__.py +1 -0
- oracle/services/c3_bridge.py +210 -0
- oracle/services/chat_engine.py +1103 -0
- oracle/services/chat_store.py +155 -0
- oracle/services/cross_memory.py +154 -0
- oracle/services/federated_graph.py +463 -0
- oracle/services/health_checker.py +117 -0
- oracle/services/insight_engine.py +307 -0
- oracle/services/memory_reader.py +106 -0
- oracle/services/memory_writer.py +182 -0
- oracle/services/ollama_bridge.py +332 -0
- oracle/services/project_scanner.py +87 -0
- oracle/services/review_agent.py +206 -0
- services/__init__.py +1 -0
- services/activity_log.py +93 -0
- services/agent_base.py +124 -0
- services/agents.py +1529 -0
- services/auto_memory.py +407 -0
- services/bench/__init__.py +6 -0
- services/bench/external/__init__.py +29 -0
- services/bench/external/aider_polyglot.py +405 -0
- services/bench/external/swe_bench.py +485 -0
- services/benchmark_dashboard.py +596 -0
- services/claude_md.py +785 -0
- services/compressor.py +592 -0
- services/context_snapshot.py +356 -0
- services/conversation_store.py +870 -0
- services/doc_index.py +537 -0
- services/e2e_benchmark.py +2884 -0
- services/e2e_evaluator.py +396 -0
- services/e2e_tasks.py +743 -0
- services/edit_ledger.py +459 -0
- services/embedding_index.py +341 -0
- services/error_reporting.py +123 -0
- services/file_memory.py +734 -0
- services/hub_service.py +585 -0
- services/indexer.py +712 -0
- services/memory.py +318 -0
- services/memory_consolidator.py +538 -0
- services/memory_graph.py +382 -0
- services/memory_grounder.py +304 -0
- services/memory_scorer.py +246 -0
- services/metrics.py +86 -0
- services/notifications.py +209 -0
- services/ollama_client.py +201 -0
- services/output_filter.py +488 -0
- services/parser.py +1238 -0
- services/project_manager.py +579 -0
- services/protocol.py +306 -0
- services/proxy_state.py +152 -0
- services/retrieval_broker.py +129 -0
- services/router.py +414 -0
- services/runtime.py +326 -0
- services/session_benchmark.py +1945 -0
- services/session_manager.py +1026 -0
- services/session_preloader.py +251 -0
- services/text_index.py +90 -0
- services/tool_classifier.py +176 -0
- services/transcript_index.py +340 -0
- services/validation_cache.py +155 -0
- services/vector_store.py +299 -0
- services/version_tracker.py +271 -0
- services/watcher.py +192 -0
- tui/__init__.py +0 -0
- tui/backend.py +59 -0
- tui/main.py +145 -0
- tui/screens/__init__.py +1 -0
- tui/screens/benchmark_view.py +109 -0
- tui/screens/claudemd_view.py +46 -0
- tui/screens/compress_view.py +52 -0
- tui/screens/index_view.py +74 -0
- tui/screens/init_view.py +82 -0
- tui/screens/mcp_view.py +73 -0
- tui/screens/optimize_view.py +41 -0
- tui/screens/pipe_view.py +46 -0
- tui/screens/projects_view.py +355 -0
- tui/screens/search_view.py +55 -0
- tui/screens/session_view.py +143 -0
- tui/screens/stats.py +158 -0
- tui/screens/ui_view.py +54 -0
- tui/theme.tcss +335 -0
cli/tools/delegate.py
ADDED
|
@@ -0,0 +1,1184 @@
|
|
|
1
|
+
"""c3_delegate — LLM task offload via Ollama (local) or Codex CLI (cloud).
|
|
2
|
+
|
|
3
|
+
Absorbs former c3_intelligence routing logic internally.
|
|
4
|
+
Supports task_type='available' for zero-cost Ollama status check.
|
|
5
|
+
Supports backend='codex' for OpenAI Codex CLI delegation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
import shutil
|
|
12
|
+
import subprocess
|
|
13
|
+
import sys
|
|
14
|
+
import time
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from core import count_tokens
|
|
18
|
+
|
|
19
|
+
log = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _log_progress(svc, message):
|
|
23
|
+
"""Emit progress notification if callback is set."""
|
|
24
|
+
cb = getattr(svc, "_agent_progress_cb", None)
|
|
25
|
+
if cb:
|
|
26
|
+
try:
|
|
27
|
+
cb(message)
|
|
28
|
+
except Exception:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
# Subprocess helpers
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _kill_proc_tree(proc):
|
|
37
|
+
"""Kill a subprocess and its entire process tree."""
|
|
38
|
+
try:
|
|
39
|
+
if sys.platform == "win32":
|
|
40
|
+
subprocess.run(
|
|
41
|
+
["taskkill", "/F", "/T", "/PID", str(proc.pid)],
|
|
42
|
+
capture_output=True, stdin=subprocess.DEVNULL,
|
|
43
|
+
)
|
|
44
|
+
else:
|
|
45
|
+
proc.kill()
|
|
46
|
+
proc.wait(timeout=5)
|
|
47
|
+
except Exception:
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _communicate_with_heartbeat(proc, timeout=45, idle_timeout=15):
|
|
52
|
+
"""communicate() replacement with idle-activity watchdog.
|
|
53
|
+
|
|
54
|
+
Monitors stderr for activity. If no stderr output for idle_timeout seconds,
|
|
55
|
+
kills the process early (catches MCP startup hangs). Also enforces total timeout.
|
|
56
|
+
|
|
57
|
+
Returns (stdout, stderr, status) where status is 'ok', 'timeout', or 'idle_timeout'.
|
|
58
|
+
"""
|
|
59
|
+
import threading
|
|
60
|
+
|
|
61
|
+
stdout_parts = []
|
|
62
|
+
stderr_parts = []
|
|
63
|
+
last_activity = [time.time()]
|
|
64
|
+
|
|
65
|
+
def _read_stream(stream, parts, track_activity=False):
|
|
66
|
+
try:
|
|
67
|
+
for line in stream:
|
|
68
|
+
parts.append(line)
|
|
69
|
+
if track_activity:
|
|
70
|
+
last_activity[0] = time.time()
|
|
71
|
+
except (ValueError, OSError):
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
t_out = threading.Thread(target=_read_stream, args=(proc.stdout, stdout_parts), daemon=True)
|
|
75
|
+
t_err = threading.Thread(target=_read_stream, args=(proc.stderr, stderr_parts, True), daemon=True)
|
|
76
|
+
t_out.start()
|
|
77
|
+
t_err.start()
|
|
78
|
+
|
|
79
|
+
deadline = time.time() + timeout
|
|
80
|
+
status = "ok"
|
|
81
|
+
while proc.poll() is None:
|
|
82
|
+
now = time.time()
|
|
83
|
+
if now >= deadline:
|
|
84
|
+
_kill_proc_tree(proc)
|
|
85
|
+
status = "timeout"
|
|
86
|
+
break
|
|
87
|
+
if idle_timeout and (now - last_activity[0]) > idle_timeout:
|
|
88
|
+
_kill_proc_tree(proc)
|
|
89
|
+
status = "idle_timeout"
|
|
90
|
+
break
|
|
91
|
+
time.sleep(0.5)
|
|
92
|
+
|
|
93
|
+
t_out.join(timeout=3)
|
|
94
|
+
t_err.join(timeout=3)
|
|
95
|
+
return "".join(stdout_parts), "".join(stderr_parts), status
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _popen_kwargs():
|
|
99
|
+
"""Platform-specific Popen kwargs for clean subprocess management."""
|
|
100
|
+
kwargs = {}
|
|
101
|
+
if sys.platform == "win32":
|
|
102
|
+
CREATE_NO_WINDOW = 0x08000000
|
|
103
|
+
CREATE_NEW_PROCESS_GROUP = 0x00000200
|
|
104
|
+
kwargs["creationflags"] = CREATE_NO_WINDOW | CREATE_NEW_PROCESS_GROUP
|
|
105
|
+
return kwargs
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# ---------------------------------------------------------------------------
|
|
109
|
+
# Codex CLI backend
|
|
110
|
+
# ---------------------------------------------------------------------------
|
|
111
|
+
|
|
112
|
+
CODEX_MODELS = {
|
|
113
|
+
"review": {"model": "gpt-5.3-codex-spark", "sandbox": "read-only", "reasoning": "high"},
|
|
114
|
+
"explain": {"model": "gpt-5.3-codex-spark", "sandbox": "read-only", "reasoning": "medium"},
|
|
115
|
+
"improve": {"model": "gpt-5.4", "sandbox": "read-only", "reasoning": "high"},
|
|
116
|
+
"diagnose": {"model": "gpt-5.3-codex", "sandbox": "read-only", "reasoning": "high"},
|
|
117
|
+
"test": {"model": "gpt-5.3-codex-spark", "sandbox": "workspace-write", "reasoning": "medium"},
|
|
118
|
+
"summarize":{"model": "gpt-5.3-codex-spark", "sandbox": "read-only", "reasoning": "low"},
|
|
119
|
+
"docstring":{"model": "gpt-5.3-codex-spark", "sandbox": "read-only", "reasoning": "low"},
|
|
120
|
+
"ask": {"model": "gpt-5.3-codex-spark", "sandbox": "read-only", "reasoning": "medium"},
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
_codex_available: bool | None = None # cached after first check
|
|
124
|
+
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
# Gemini CLI backend
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
|
|
129
|
+
GEMINI_MODELS = {
|
|
130
|
+
"review": {"model": "gemini-2.5-pro"},
|
|
131
|
+
"explain": {"model": "gemini-2.5-flash"},
|
|
132
|
+
"improve": {"model": "gemini-2.5-pro"},
|
|
133
|
+
"diagnose": {"model": "gemini-2.5-pro"},
|
|
134
|
+
"test": {"model": "gemini-2.5-flash"},
|
|
135
|
+
"summarize":{"model": "gemini-2.5-flash"},
|
|
136
|
+
"docstring":{"model": "gemini-2.5-flash"},
|
|
137
|
+
"ask": {"model": "gemini-2.5-flash"},
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
_gemini_available: bool | None = None # cached after first check
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _npm_global_bin() -> str:
|
|
144
|
+
"""Return the npm global bin directory (Windows: AppData/Roaming/npm)."""
|
|
145
|
+
if sys.platform == "win32":
|
|
146
|
+
appdata = os.environ.get("APPDATA", "")
|
|
147
|
+
if appdata:
|
|
148
|
+
return os.path.join(appdata, "npm")
|
|
149
|
+
return ""
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _ensure_npm_on_path() -> None:
|
|
153
|
+
"""Ensure npm global bin is on PATH so shutil.which() finds npm-installed CLIs."""
|
|
154
|
+
npm_bin = _npm_global_bin()
|
|
155
|
+
if npm_bin and npm_bin not in os.environ.get("PATH", ""):
|
|
156
|
+
os.environ["PATH"] = npm_bin + os.pathsep + os.environ.get("PATH", "")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _which(name: str) -> str | None:
|
|
160
|
+
"""Resolve full path for a CLI name, ensuring npm global bin is on PATH."""
|
|
161
|
+
_ensure_npm_on_path()
|
|
162
|
+
return shutil.which(name)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _is_gemini_on_path() -> bool:
|
|
166
|
+
"""Check if gemini CLI binary is on PATH."""
|
|
167
|
+
return _which("gemini") is not None
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# ---------------------------------------------------------------------------
|
|
171
|
+
# Claude Code CLI backend
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
_claude_available = None # None=unknown, True=up, False=down
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _is_claude_on_path() -> bool:
|
|
178
|
+
"""Check if claude CLI binary is on PATH."""
|
|
179
|
+
return _which("claude") is not None
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def check_claude() -> dict:
|
|
183
|
+
"""Zero-cost health check for Claude CLI. Returns status dict."""
|
|
184
|
+
global _claude_available
|
|
185
|
+
exe = _which("claude")
|
|
186
|
+
if not exe:
|
|
187
|
+
_claude_available = False
|
|
188
|
+
return {"status": "not_installed", "detail": "claude CLI not found on PATH"}
|
|
189
|
+
try:
|
|
190
|
+
proc = subprocess.run(
|
|
191
|
+
[exe, "--version"],
|
|
192
|
+
capture_output=True, text=True, timeout=10,
|
|
193
|
+
stdin=subprocess.DEVNULL,
|
|
194
|
+
)
|
|
195
|
+
if proc.returncode == 0:
|
|
196
|
+
_claude_available = True
|
|
197
|
+
return {"status": "ok", "version": proc.stdout.strip()}
|
|
198
|
+
_claude_available = False
|
|
199
|
+
return {"status": "error", "detail": proc.stderr.strip() or f"exit {proc.returncode}"}
|
|
200
|
+
except subprocess.TimeoutExpired:
|
|
201
|
+
_claude_available = False
|
|
202
|
+
return {"status": "timeout", "detail": "claude --version timed out (10s)"}
|
|
203
|
+
except Exception as e:
|
|
204
|
+
_claude_available = False
|
|
205
|
+
return {"status": "error", "detail": str(e)}
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _run_claude(task: str, context: str, cwd: str | None = None,
|
|
209
|
+
timeout: int = 90, idle_timeout: int = 30) -> tuple:
|
|
210
|
+
"""Run claude -p in non-interactive print mode. Returns (output, success)."""
|
|
211
|
+
exe = _which("claude")
|
|
212
|
+
if not exe:
|
|
213
|
+
return "[claude:error] claude CLI not on PATH", False
|
|
214
|
+
prompt = f"Context:\n{context}\n\nTask:\n{task}" if context else task
|
|
215
|
+
cmd = [exe, "-p", prompt, "--output-format", "text"]
|
|
216
|
+
try:
|
|
217
|
+
proc = subprocess.Popen(
|
|
218
|
+
cmd,
|
|
219
|
+
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
220
|
+
stdin=subprocess.DEVNULL,
|
|
221
|
+
text=True, cwd=cwd,
|
|
222
|
+
**_popen_kwargs(),
|
|
223
|
+
)
|
|
224
|
+
output, err = _communicate_with_heartbeat(proc, timeout=timeout, idle_timeout=idle_timeout)
|
|
225
|
+
if proc.returncode == 0 and output.strip():
|
|
226
|
+
return output.strip(), True
|
|
227
|
+
return f"[claude:error] {(err or '').strip() or 'no output'}", False
|
|
228
|
+
except Exception as e:
|
|
229
|
+
return f"[claude:error] {e}", False
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _claude_memory_bridge(output: str, task_type: str, task: str, svc) -> None:
|
|
233
|
+
"""Auto-extract key findings from Claude responses into c3_memory."""
|
|
234
|
+
try:
|
|
235
|
+
from services.auto_memory import _save_or_merge_standalone
|
|
236
|
+
_save_or_merge_standalone(output[:400], f"auto:claude:{task_type}", svc)
|
|
237
|
+
except Exception:
|
|
238
|
+
pass
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def _handle_claude_delegate(task: str, task_type: str, context: str,
|
|
242
|
+
file_path: str, svc, dcfg: dict, finalize) -> str:
|
|
243
|
+
"""Handle delegation via Claude Code CLI."""
|
|
244
|
+
timeout = int(dcfg.get("claude_timeout", 90))
|
|
245
|
+
_log_progress(svc, f"[delegate] Routing {task_type} → Claude CLI...")
|
|
246
|
+
output, ok = _run_claude(task, context, cwd=str(svc.project_path), timeout=timeout)
|
|
247
|
+
if not ok:
|
|
248
|
+
return finalize("c3_delegate", {"task_type": task_type, "backend": "claude"},
|
|
249
|
+
output, "error")
|
|
250
|
+
return finalize("c3_delegate", {"task_type": task_type, "backend": "claude"},
|
|
251
|
+
output, "ok")
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def check_gemini() -> dict:
|
|
255
|
+
"""Zero-cost health check for Gemini CLI. Returns status dict."""
|
|
256
|
+
global _gemini_available
|
|
257
|
+
exe = _which("gemini")
|
|
258
|
+
if not exe:
|
|
259
|
+
_gemini_available = False
|
|
260
|
+
return {"status": "not_installed", "detail": "gemini CLI not found on PATH"}
|
|
261
|
+
try:
|
|
262
|
+
proc = subprocess.run(
|
|
263
|
+
[exe, "--version"],
|
|
264
|
+
capture_output=True, text=True, timeout=10,
|
|
265
|
+
stdin=subprocess.DEVNULL,
|
|
266
|
+
)
|
|
267
|
+
if proc.returncode == 0:
|
|
268
|
+
version = proc.stdout.strip()
|
|
269
|
+
_gemini_available = True
|
|
270
|
+
return {"status": "ok", "version": version}
|
|
271
|
+
else:
|
|
272
|
+
_gemini_available = False
|
|
273
|
+
return {"status": "error", "detail": proc.stderr.strip() or f"exit code {proc.returncode}"}
|
|
274
|
+
except subprocess.TimeoutExpired:
|
|
275
|
+
_gemini_available = False
|
|
276
|
+
return {"status": "timeout", "detail": "gemini --version timed out (10s)"}
|
|
277
|
+
except Exception as e:
|
|
278
|
+
_gemini_available = False
|
|
279
|
+
return {"status": "error", "detail": str(e)}
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _start_gemini_early(model: str, timeout: int = 45, idle_timeout: int = 15,
|
|
283
|
+
cwd: str | None = None):
|
|
284
|
+
"""Start Gemini subprocess with stdin=PIPE so the prompt can be fed later.
|
|
285
|
+
|
|
286
|
+
Call this before the compress step so Gemini's ~9s MCP startup overlaps
|
|
287
|
+
with other work. Then call _finish_gemini_early() to send the prompt and
|
|
288
|
+
collect the result.
|
|
289
|
+
|
|
290
|
+
Returns the Popen object, or None if Gemini is not available.
|
|
291
|
+
"""
|
|
292
|
+
gem_exe = _which("gemini") or "gemini"
|
|
293
|
+
if not gem_exe or gem_exe == "gemini":
|
|
294
|
+
exe = _which("gemini")
|
|
295
|
+
if not exe:
|
|
296
|
+
return None
|
|
297
|
+
cmd = [
|
|
298
|
+
gem_exe,
|
|
299
|
+
"--output-format", "json",
|
|
300
|
+
"--approval-mode", "yolo",
|
|
301
|
+
"--allowed-mcp-server-names", "__none__",
|
|
302
|
+
]
|
|
303
|
+
if model:
|
|
304
|
+
cmd += ["-m", model]
|
|
305
|
+
try:
|
|
306
|
+
proc = subprocess.Popen(
|
|
307
|
+
cmd,
|
|
308
|
+
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
309
|
+
stdin=subprocess.PIPE,
|
|
310
|
+
text=True,
|
|
311
|
+
cwd=cwd,
|
|
312
|
+
**_popen_kwargs(),
|
|
313
|
+
)
|
|
314
|
+
return proc
|
|
315
|
+
except Exception:
|
|
316
|
+
return None
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def _finish_gemini_early(proc, task: str, context: str,
|
|
320
|
+
timeout: int = 45, idle_timeout: int = 15):
|
|
321
|
+
"""Feed the prompt to an early-started Gemini process and collect result.
|
|
322
|
+
|
|
323
|
+
Returns (output, success, token_stats).
|
|
324
|
+
"""
|
|
325
|
+
import json as _json
|
|
326
|
+
|
|
327
|
+
empty_stats = {"input_tokens": 0, "output_tokens": 0, "cached_tokens": 0}
|
|
328
|
+
if proc is None:
|
|
329
|
+
return "[gemini:error] process not started", False, empty_stats
|
|
330
|
+
|
|
331
|
+
prompt = f"{task}\n\nContext:\n{context}" if context else task
|
|
332
|
+
|
|
333
|
+
import threading
|
|
334
|
+
stdout_parts = []
|
|
335
|
+
stderr_parts = []
|
|
336
|
+
last_activity = [time.time()]
|
|
337
|
+
|
|
338
|
+
def _read_stream(stream, parts, track_activity=False):
|
|
339
|
+
try:
|
|
340
|
+
for line in stream:
|
|
341
|
+
parts.append(line)
|
|
342
|
+
if track_activity:
|
|
343
|
+
last_activity[0] = time.time()
|
|
344
|
+
except (ValueError, OSError):
|
|
345
|
+
pass
|
|
346
|
+
|
|
347
|
+
t_out = threading.Thread(target=_read_stream, args=(proc.stdout, stdout_parts), daemon=True)
|
|
348
|
+
t_err = threading.Thread(target=_read_stream, args=(proc.stderr, stderr_parts, True), daemon=True)
|
|
349
|
+
t_out.start()
|
|
350
|
+
t_err.start()
|
|
351
|
+
|
|
352
|
+
# Write prompt to stdin in a daemon thread — avoids blocking the caller if
|
|
353
|
+
# the pipe buffer fills up before Gemini reads (it reads only after MCP startup).
|
|
354
|
+
def _write_stdin():
|
|
355
|
+
try:
|
|
356
|
+
proc.stdin.write(prompt)
|
|
357
|
+
proc.stdin.close()
|
|
358
|
+
except Exception:
|
|
359
|
+
pass
|
|
360
|
+
threading.Thread(target=_write_stdin, daemon=True).start()
|
|
361
|
+
|
|
362
|
+
deadline = time.time() + timeout
|
|
363
|
+
status = "ok"
|
|
364
|
+
while proc.poll() is None:
|
|
365
|
+
now = time.time()
|
|
366
|
+
if now >= deadline:
|
|
367
|
+
_kill_proc_tree(proc)
|
|
368
|
+
status = "timeout"
|
|
369
|
+
break
|
|
370
|
+
if idle_timeout and (now - last_activity[0]) > idle_timeout:
|
|
371
|
+
_kill_proc_tree(proc)
|
|
372
|
+
status = "idle_timeout"
|
|
373
|
+
break
|
|
374
|
+
time.sleep(0.5)
|
|
375
|
+
|
|
376
|
+
t_out.join(timeout=3)
|
|
377
|
+
t_err.join(timeout=3)
|
|
378
|
+
stdout = "".join(stdout_parts)
|
|
379
|
+
stderr = "".join(stderr_parts)
|
|
380
|
+
|
|
381
|
+
if status == "idle_timeout":
|
|
382
|
+
return (f"[gemini:idle_timeout] No stderr activity for {idle_timeout}s "
|
|
383
|
+
f"(likely MCP startup hang)"), False, empty_stats
|
|
384
|
+
if status == "timeout":
|
|
385
|
+
return f"[gemini:timeout] No response after {timeout}s", False, empty_stats
|
|
386
|
+
if proc.returncode != 0:
|
|
387
|
+
err = stderr.strip() if stderr else f"exit code {proc.returncode}"
|
|
388
|
+
return f"[gemini:error] {err}", False, empty_stats
|
|
389
|
+
|
|
390
|
+
# Parse JSON output
|
|
391
|
+
raw = stdout.strip()
|
|
392
|
+
json_start = raw.find("{")
|
|
393
|
+
if json_start > 0:
|
|
394
|
+
raw = raw[json_start:]
|
|
395
|
+
try:
|
|
396
|
+
data = _json.loads(raw)
|
|
397
|
+
except (_json.JSONDecodeError, TypeError):
|
|
398
|
+
return raw, True, empty_stats
|
|
399
|
+
|
|
400
|
+
if isinstance(data, dict):
|
|
401
|
+
text = data.get("response", data.get("text", data.get("result", raw)))
|
|
402
|
+
elif isinstance(data, list):
|
|
403
|
+
texts = [msg.get("text", msg.get("content", ""))
|
|
404
|
+
for msg in data if isinstance(msg, dict)]
|
|
405
|
+
text = "\n".join(t for t in texts if t)
|
|
406
|
+
else:
|
|
407
|
+
text = str(data)
|
|
408
|
+
|
|
409
|
+
token_stats = dict(empty_stats)
|
|
410
|
+
if isinstance(data, dict):
|
|
411
|
+
stats = data.get("stats", {})
|
|
412
|
+
models = stats.get("models", {})
|
|
413
|
+
for _model_id, mdata in models.items():
|
|
414
|
+
tok = mdata.get("tokens", {})
|
|
415
|
+
token_stats["input_tokens"] += tok.get("input", 0) or 0
|
|
416
|
+
token_stats["output_tokens"] += tok.get("candidates", 0) or 0
|
|
417
|
+
token_stats["cached_tokens"] += tok.get("cached", 0) or 0
|
|
418
|
+
|
|
419
|
+
return text, True, token_stats
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def _run_gemini(task: str, context: str, model: str,
|
|
423
|
+
timeout: int = 45, idle_timeout: int = 15,
|
|
424
|
+
cwd: str | None = None) -> tuple[str, bool, dict]:
|
|
425
|
+
"""Run gemini CLI as subprocess. Returns (output, success, token_stats).
|
|
426
|
+
|
|
427
|
+
Uses heartbeat monitor: kills process if no stderr activity for idle_timeout
|
|
428
|
+
seconds (catches MCP startup hangs). Also enforces total timeout (default 45s).
|
|
429
|
+
Parses structured JSON output for response text and token metrics.
|
|
430
|
+
"""
|
|
431
|
+
import json as _json
|
|
432
|
+
|
|
433
|
+
prompt = f"{task}\n\nContext:\n{context}" if context else task
|
|
434
|
+
gem_exe = _which("gemini") or "gemini"
|
|
435
|
+
cmd = [
|
|
436
|
+
gem_exe, "-p", prompt,
|
|
437
|
+
"--output-format", "json",
|
|
438
|
+
"--approval-mode", "yolo",
|
|
439
|
+
"--allowed-mcp-server-names", "__none__",
|
|
440
|
+
]
|
|
441
|
+
if model:
|
|
442
|
+
cmd += ["-m", model]
|
|
443
|
+
|
|
444
|
+
empty_stats = {"input_tokens": 0, "output_tokens": 0, "cached_tokens": 0}
|
|
445
|
+
|
|
446
|
+
try:
|
|
447
|
+
proc = subprocess.Popen(
|
|
448
|
+
cmd,
|
|
449
|
+
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
450
|
+
stdin=subprocess.DEVNULL,
|
|
451
|
+
text=True,
|
|
452
|
+
cwd=cwd,
|
|
453
|
+
**_popen_kwargs(),
|
|
454
|
+
)
|
|
455
|
+
stdout, stderr, status = _communicate_with_heartbeat(
|
|
456
|
+
proc, timeout=timeout, idle_timeout=idle_timeout,
|
|
457
|
+
)
|
|
458
|
+
if status == "idle_timeout":
|
|
459
|
+
return (f"[gemini:idle_timeout] No stderr activity for {idle_timeout}s "
|
|
460
|
+
f"(likely MCP startup hang)"), False, empty_stats
|
|
461
|
+
if status == "timeout":
|
|
462
|
+
return f"[gemini:timeout] No response after {timeout}s", False, empty_stats
|
|
463
|
+
|
|
464
|
+
if proc.returncode != 0:
|
|
465
|
+
err = stderr.strip() if stderr else f"exit code {proc.returncode}"
|
|
466
|
+
return f"[gemini:error] {err}", False, empty_stats
|
|
467
|
+
|
|
468
|
+
# Parse JSON output — strip non-JSON prefix lines (MCP startup messages)
|
|
469
|
+
raw = stdout.strip()
|
|
470
|
+
json_start = raw.find("{")
|
|
471
|
+
if json_start > 0:
|
|
472
|
+
raw = raw[json_start:]
|
|
473
|
+
|
|
474
|
+
try:
|
|
475
|
+
data = _json.loads(raw)
|
|
476
|
+
except (_json.JSONDecodeError, TypeError):
|
|
477
|
+
# Fallback: treat entire stdout as plain text
|
|
478
|
+
return raw, True, empty_stats
|
|
479
|
+
|
|
480
|
+
# Extract response text
|
|
481
|
+
if isinstance(data, dict):
|
|
482
|
+
text = data.get("response", data.get("text", data.get("result", raw)))
|
|
483
|
+
elif isinstance(data, list):
|
|
484
|
+
texts = [msg.get("text", msg.get("content", ""))
|
|
485
|
+
for msg in data if isinstance(msg, dict)]
|
|
486
|
+
text = "\n".join(t for t in texts if t)
|
|
487
|
+
else:
|
|
488
|
+
text = str(data)
|
|
489
|
+
|
|
490
|
+
# Extract token stats from stats.models.<id>.tokens
|
|
491
|
+
token_stats = dict(empty_stats)
|
|
492
|
+
if isinstance(data, dict):
|
|
493
|
+
stats = data.get("stats", {})
|
|
494
|
+
models = stats.get("models", {})
|
|
495
|
+
for _model_id, mdata in models.items():
|
|
496
|
+
tok = mdata.get("tokens", {})
|
|
497
|
+
token_stats["input_tokens"] += tok.get("input", 0) or 0
|
|
498
|
+
token_stats["output_tokens"] += tok.get("candidates", 0) or 0
|
|
499
|
+
token_stats["cached_tokens"] += tok.get("cached", 0) or 0
|
|
500
|
+
|
|
501
|
+
return text, True, token_stats
|
|
502
|
+
except FileNotFoundError:
|
|
503
|
+
return "[gemini:error] gemini CLI not found on PATH", False, empty_stats
|
|
504
|
+
except Exception as e:
|
|
505
|
+
return f"[gemini:error] {e}", False, empty_stats
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def _is_codex_on_path() -> bool:
|
|
509
|
+
"""Check if codex CLI binary is on PATH."""
|
|
510
|
+
return _which("codex") is not None
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def check_codex() -> dict:
|
|
514
|
+
"""Zero-cost health check for Codex CLI. Returns status dict."""
|
|
515
|
+
global _codex_available
|
|
516
|
+
exe = _which("codex")
|
|
517
|
+
if not exe:
|
|
518
|
+
_codex_available = False
|
|
519
|
+
return {"status": "not_installed", "detail": "codex CLI not found on PATH"}
|
|
520
|
+
try:
|
|
521
|
+
proc = subprocess.run(
|
|
522
|
+
[exe, "--version"],
|
|
523
|
+
capture_output=True, text=True, timeout=10,
|
|
524
|
+
stdin=subprocess.DEVNULL,
|
|
525
|
+
)
|
|
526
|
+
if proc.returncode == 0:
|
|
527
|
+
version = proc.stdout.strip()
|
|
528
|
+
_codex_available = True
|
|
529
|
+
return {"status": "ok", "version": version}
|
|
530
|
+
else:
|
|
531
|
+
_codex_available = False
|
|
532
|
+
return {"status": "error", "detail": proc.stderr.strip() or f"exit code {proc.returncode}"}
|
|
533
|
+
except subprocess.TimeoutExpired:
|
|
534
|
+
_codex_available = False
|
|
535
|
+
return {"status": "timeout", "detail": "codex --version timed out (10s)"}
|
|
536
|
+
except Exception as e:
|
|
537
|
+
_codex_available = False
|
|
538
|
+
return {"status": "error", "detail": str(e)}
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def _run_codex(task: str, context: str, model: str, sandbox: str,
|
|
542
|
+
reasoning: str = "high", timeout: int = 120,
|
|
543
|
+
idle_timeout: int = 20,
|
|
544
|
+
cwd: str | None = None) -> tuple[str, bool]:
|
|
545
|
+
"""Run codex exec as a subprocess. Returns (output, success).
|
|
546
|
+
|
|
547
|
+
Uses heartbeat monitor: kills process if no stderr activity for idle_timeout
|
|
548
|
+
seconds (catches MCP startup hangs). Also enforces total timeout.
|
|
549
|
+
"""
|
|
550
|
+
prompt = f"{task}\n\nContext:\n{context}" if context else task
|
|
551
|
+
codex_exe = _which("codex") or "codex"
|
|
552
|
+
cmd = [
|
|
553
|
+
codex_exe, "exec",
|
|
554
|
+
"-m", model,
|
|
555
|
+
"--config", f"model_reasoning_effort={reasoning}",
|
|
556
|
+
"--sandbox", sandbox,
|
|
557
|
+
"--full-auto",
|
|
558
|
+
"--skip-git-repo-check",
|
|
559
|
+
prompt,
|
|
560
|
+
]
|
|
561
|
+
try:
|
|
562
|
+
proc = subprocess.Popen(
|
|
563
|
+
cmd,
|
|
564
|
+
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
565
|
+
stdin=subprocess.DEVNULL,
|
|
566
|
+
text=True,
|
|
567
|
+
cwd=cwd,
|
|
568
|
+
**_popen_kwargs(),
|
|
569
|
+
)
|
|
570
|
+
stdout, stderr, status = _communicate_with_heartbeat(
|
|
571
|
+
proc, timeout=timeout, idle_timeout=idle_timeout,
|
|
572
|
+
)
|
|
573
|
+
if status == "idle_timeout":
|
|
574
|
+
return (f"[codex:idle_timeout] No stderr activity for {idle_timeout}s "
|
|
575
|
+
f"(likely MCP startup hang)"), False
|
|
576
|
+
if status == "timeout":
|
|
577
|
+
return f"[codex:timeout] No response after {timeout}s", False
|
|
578
|
+
|
|
579
|
+
if proc.returncode != 0:
|
|
580
|
+
err = stderr.strip() if stderr else f"exit code {proc.returncode}"
|
|
581
|
+
return f"[codex:error] {err}", False
|
|
582
|
+
|
|
583
|
+
return stdout.strip(), True
|
|
584
|
+
except FileNotFoundError:
|
|
585
|
+
return "[codex:error] codex CLI not found on PATH", False
|
|
586
|
+
except Exception as e:
|
|
587
|
+
return f"[codex:error] {e}", False
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
def _run_codex_resume(follow_up: str, timeout: int = 120,
|
|
591
|
+
cwd: str | None = None) -> tuple[str, bool]:
|
|
592
|
+
"""Resume last Codex session with a follow-up prompt."""
|
|
593
|
+
cmd = ["codex", "exec", "--skip-git-repo-check", "resume", "--last"]
|
|
594
|
+
try:
|
|
595
|
+
import sys
|
|
596
|
+
proc = subprocess.Popen(
|
|
597
|
+
cmd,
|
|
598
|
+
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
599
|
+
stdin=subprocess.PIPE,
|
|
600
|
+
text=True,
|
|
601
|
+
cwd=cwd,
|
|
602
|
+
)
|
|
603
|
+
try:
|
|
604
|
+
stdout, stderr = proc.communicate(input=follow_up, timeout=timeout)
|
|
605
|
+
except subprocess.TimeoutExpired:
|
|
606
|
+
if sys.platform == "win32":
|
|
607
|
+
subprocess.run(
|
|
608
|
+
["taskkill", "/F", "/T", "/PID", str(proc.pid)],
|
|
609
|
+
capture_output=True, stdin=subprocess.DEVNULL,
|
|
610
|
+
)
|
|
611
|
+
else:
|
|
612
|
+
proc.kill()
|
|
613
|
+
proc.wait(timeout=5)
|
|
614
|
+
return f"[codex:timeout] Resume timed out after {timeout}s", False
|
|
615
|
+
|
|
616
|
+
if proc.returncode != 0:
|
|
617
|
+
err = stderr.strip() if stderr else f"exit code {proc.returncode}"
|
|
618
|
+
return f"[codex:error] {err}", False
|
|
619
|
+
|
|
620
|
+
return stdout.strip(), True
|
|
621
|
+
except Exception as e:
|
|
622
|
+
return f"[codex:error] {e}", False
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
# Delegate task definitions
|
|
626
|
+
DELEGATE_TASKS = {
|
|
627
|
+
"summarize": {
|
|
628
|
+
"default_model": "gemma3n:latest",
|
|
629
|
+
"system": "You are a concise technical summarizer. Keep the answer compact and concrete.",
|
|
630
|
+
"prompt_template": "Context:\n{context}\n\nTask:\n{task}\n\nReturn a compact summary with only the key points.",
|
|
631
|
+
"temperature": 0.2,
|
|
632
|
+
},
|
|
633
|
+
"explain": {
|
|
634
|
+
"default_model": "llama3.2:3b",
|
|
635
|
+
"system": "You explain code precisely and concisely. Prefer short bullet points and specific references.",
|
|
636
|
+
"prompt_template": "Context:\n{context}\n\nQuestion:\n{task}\n\nExplain only what is needed to answer the question.",
|
|
637
|
+
"temperature": 0.2,
|
|
638
|
+
},
|
|
639
|
+
"docstring": {
|
|
640
|
+
"default_model": "gemma3n:latest",
|
|
641
|
+
"system": "Write terse, accurate code documentation.",
|
|
642
|
+
"prompt_template": "Context:\n{context}\n\nTask:\n{task}\n\nProduce a concise docstring or documentation snippet.",
|
|
643
|
+
"temperature": 0.2,
|
|
644
|
+
},
|
|
645
|
+
"review": {
|
|
646
|
+
"default_model": "llama3.2:3b",
|
|
647
|
+
"system": "You are a pragmatic code reviewer. Prioritize bugs, regressions, and missing tests.",
|
|
648
|
+
"prompt_template": "Context:\n{context}\n\nReview task:\n{task}\n\nReturn the most important findings first.",
|
|
649
|
+
"temperature": 0.2,
|
|
650
|
+
},
|
|
651
|
+
"ask": {
|
|
652
|
+
"default_model": "deepseek-r1:1.5b",
|
|
653
|
+
"system": "Answer narrowly and directly from the provided context.",
|
|
654
|
+
"prompt_template": "Context:\n{context}\n\nQuestion:\n{task}\n\nAnswer concisely.",
|
|
655
|
+
"temperature": 0.2,
|
|
656
|
+
},
|
|
657
|
+
"test": {
|
|
658
|
+
"default_model": "llama3.2:3b",
|
|
659
|
+
"system": "Design targeted tests that maximize defect coverage with minimal redundancy.",
|
|
660
|
+
"prompt_template": "Context:\n{context}\n\nTask:\n{task}\n\nProduce focused test ideas or test code.",
|
|
661
|
+
"temperature": 0.2,
|
|
662
|
+
},
|
|
663
|
+
"diagnose": {
|
|
664
|
+
"default_model": "llama3.2:3b",
|
|
665
|
+
"system": "You diagnose failures from logs and execution context. Focus on root cause and next step.",
|
|
666
|
+
"prompt_template": "Context:\n{context}\n\nProblem:\n{task}\n\nIdentify the most likely cause and the next debugging step.",
|
|
667
|
+
"temperature": 0.1,
|
|
668
|
+
},
|
|
669
|
+
"improve": {
|
|
670
|
+
"default_model": "llama3.2:3b",
|
|
671
|
+
"system": "You improve code with minimal, high-value changes.",
|
|
672
|
+
"prompt_template": "Context:\n{context}\n\nTask:\n{task}\n\nSuggest the smallest useful improvement plan.",
|
|
673
|
+
"temperature": 0.2,
|
|
674
|
+
},
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
# Module-level cache and metrics
|
|
678
|
+
_delegate_cache: dict[str, tuple[str, int]] = {}
|
|
679
|
+
_delegate_metrics = {"total_calls": 0, "tokens_saved": 0}
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
def get_delegate_metrics() -> dict:
|
|
683
|
+
return dict(_delegate_metrics)
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
def infer_task_type(task: str, context: str = "") -> str:
|
|
687
|
+
text = f"{task}\n{context}".lower()
|
|
688
|
+
if any(tok in text for tok in ("traceback", "exception", "stack trace", "exit code", "failed", "error")):
|
|
689
|
+
return "diagnose"
|
|
690
|
+
if any(tok in text for tok in ("review", "regression", "bug risk", "audit")):
|
|
691
|
+
return "review"
|
|
692
|
+
if any(tok in text for tok in ("test", "pytest", "unit test", "integration test")):
|
|
693
|
+
return "test"
|
|
694
|
+
if any(tok in text for tok in ("docstring", "document", "documentation")):
|
|
695
|
+
return "docstring"
|
|
696
|
+
if any(tok in text for tok in ("summarize", "summary", "tl;dr")):
|
|
697
|
+
return "summarize"
|
|
698
|
+
if any(tok in text for tok in ("improve", "refactor", "clean up", "optimize")):
|
|
699
|
+
return "improve"
|
|
700
|
+
return "explain"
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
def resolve_model_name(candidate: str, available: list[str]) -> str:
|
|
704
|
+
if not candidate:
|
|
705
|
+
return ""
|
|
706
|
+
normalized = candidate.strip().lower()
|
|
707
|
+
if not normalized:
|
|
708
|
+
return ""
|
|
709
|
+
for model in available:
|
|
710
|
+
if model.lower() == normalized:
|
|
711
|
+
return model
|
|
712
|
+
base = normalized.split(":", 1)[0]
|
|
713
|
+
for model in available:
|
|
714
|
+
lower = model.lower()
|
|
715
|
+
if lower == base or lower.startswith(base + ":"):
|
|
716
|
+
return model
|
|
717
|
+
for model in available:
|
|
718
|
+
if base in model.lower():
|
|
719
|
+
return model
|
|
720
|
+
return ""
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
def _fallback_model_order(task_type: str) -> list[str]:
|
|
724
|
+
if task_type in {"ask", "diagnose", "explain"}:
|
|
725
|
+
return ["llama3.2:latest", "llama3.2:3b", "qwen3-coder-next:latest", "llama3.1:latest", "gemma3n:latest"]
|
|
726
|
+
return ["llama3.2:latest", "llama3.2:3b", "qwen3-coder-next:latest", "gemma3n:latest"]
|
|
727
|
+
|
|
728
|
+
|
|
729
|
+
def _estimate_confidence(task_type: str, response: str, response_tokens: int) -> str:
|
|
730
|
+
hedging = [
|
|
731
|
+
"i'm not sure", "i don't know", "it's unclear", "might be",
|
|
732
|
+
"possibly", "i cannot determine", "hard to say", "not enough context",
|
|
733
|
+
]
|
|
734
|
+
hedge_count = sum(1 for phrase in hedging if phrase in (response or "").lower())
|
|
735
|
+
min_tokens = {"summarize": 15, "explain": 30, "docstring": 10, "review": 20,
|
|
736
|
+
"ask": 10, "test": 30, "diagnose": 20, "improve": 10}
|
|
737
|
+
too_short = response_tokens < min_tokens.get(task_type, 10)
|
|
738
|
+
if too_short or hedge_count >= 2:
|
|
739
|
+
return "low"
|
|
740
|
+
if hedge_count == 1 or response_tokens < min_tokens.get(task_type, 10) * 2:
|
|
741
|
+
return "medium"
|
|
742
|
+
return "high"
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
# ---------------------------------------------------------------------------
|
|
746
|
+
# Codex delegate handler
|
|
747
|
+
# ---------------------------------------------------------------------------
|
|
748
|
+
|
|
749
|
+
def _handle_codex_delegate(task: str, task_type: str, context: str,
|
|
750
|
+
file_path: str, svc, dcfg: dict, finalize) -> str:
|
|
751
|
+
"""Handle delegation via Codex CLI."""
|
|
752
|
+
if not dcfg.get("codex_enabled", False):
|
|
753
|
+
return finalize("c3_delegate", {"task_type": task_type, "backend": "codex"},
|
|
754
|
+
"[delegate:error] Codex not enabled. Set delegate.codex_enabled=true in .c3/config.json",
|
|
755
|
+
"disabled")
|
|
756
|
+
|
|
757
|
+
global _codex_available
|
|
758
|
+
if _codex_available is None:
|
|
759
|
+
check_codex() # populates _codex_available
|
|
760
|
+
if not _codex_available:
|
|
761
|
+
return finalize("c3_delegate", {"task_type": task_type, "backend": "codex"},
|
|
762
|
+
"[delegate:error] Codex CLI not available. Run 'codex --version' to diagnose.",
|
|
763
|
+
"unavailable")
|
|
764
|
+
|
|
765
|
+
# Resolve model/sandbox/reasoning from config or defaults
|
|
766
|
+
cdef = CODEX_MODELS.get(task_type, CODEX_MODELS.get("ask", {}))
|
|
767
|
+
model = dcfg.get("codex_default_model") or cdef.get("model", "gpt-5.3-codex-spark")
|
|
768
|
+
sandbox = dcfg.get("codex_default_sandbox") or cdef.get("sandbox", "read-only")
|
|
769
|
+
reasoning = dcfg.get("codex_reasoning_effort") or cdef.get("reasoning", "high")
|
|
770
|
+
timeout = int(dcfg.get("codex_timeout", 120))
|
|
771
|
+
|
|
772
|
+
# Context enrichment (reuse existing pattern)
|
|
773
|
+
enriched = context
|
|
774
|
+
if file_path and dcfg.get("auto_compress", True):
|
|
775
|
+
for p in [p.strip() for p in file_path.split(",") if p.strip()]:
|
|
776
|
+
try:
|
|
777
|
+
res = svc.compressor.compress_file(str(Path(svc.project_path) / p), "smart")
|
|
778
|
+
if isinstance(res, dict) and res.get("compressed"):
|
|
779
|
+
enriched += f"\n--- file: {p} ---\n{res['compressed']}"
|
|
780
|
+
except Exception:
|
|
781
|
+
continue
|
|
782
|
+
|
|
783
|
+
# Truncate context to avoid blowing Codex's input
|
|
784
|
+
max_ctx = max(200, int(dcfg.get("codex_max_context_tokens", 4000) or 4000))
|
|
785
|
+
if count_tokens(enriched) > max_ctx:
|
|
786
|
+
enriched = enriched[:max_ctx * 4]
|
|
787
|
+
|
|
788
|
+
# Cache check
|
|
789
|
+
ckey = hashlib.md5(f"codex|{task_type}|{model}|{enriched}|{task}".encode()).hexdigest()
|
|
790
|
+
if ckey in _delegate_cache:
|
|
791
|
+
cached_resp, _ = _delegate_cache[ckey]
|
|
792
|
+
return finalize("c3_delegate", {"task_type": task_type, "backend": "codex", "cached": True},
|
|
793
|
+
cached_resp, "cached")
|
|
794
|
+
|
|
795
|
+
# Run Codex
|
|
796
|
+
_log_progress(svc, f"[delegate] Codex {model} ({sandbox}, reasoning={reasoning})...")
|
|
797
|
+
t0 = time.monotonic()
|
|
798
|
+
output, ok = _run_codex(
|
|
799
|
+
task=task, context=enriched,
|
|
800
|
+
model=model, sandbox=sandbox,
|
|
801
|
+
reasoning=reasoning, timeout=timeout,
|
|
802
|
+
cwd=str(svc.project_path),
|
|
803
|
+
)
|
|
804
|
+
elapsed = round(time.monotonic() - t0, 1)
|
|
805
|
+
|
|
806
|
+
if not ok:
|
|
807
|
+
return finalize("c3_delegate",
|
|
808
|
+
{"task_type": task_type, "backend": "codex", "model": model, "elapsed": f"{elapsed}s"},
|
|
809
|
+
output, "error")
|
|
810
|
+
|
|
811
|
+
_delegate_metrics["total_calls"] += 1
|
|
812
|
+
_delegate_cache[ckey] = (output, count_tokens(output))
|
|
813
|
+
|
|
814
|
+
# Memory bridge — auto-extract key findings from substantial Codex responses
|
|
815
|
+
_codex_memory_bridge(output, task_type, task, svc)
|
|
816
|
+
|
|
817
|
+
return finalize("c3_delegate",
|
|
818
|
+
{"task_type": task_type, "backend": "codex", "model": model, "elapsed": f"{elapsed}s"},
|
|
819
|
+
output, "ok")
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
def _codex_memory_bridge(output: str, task_type: str, task: str, svc):
|
|
823
|
+
"""Auto-extract key findings from Codex responses into c3_memory.
|
|
824
|
+
|
|
825
|
+
Only stores when the response is substantial and actionable.
|
|
826
|
+
"""
|
|
827
|
+
try:
|
|
828
|
+
memory = getattr(svc, "memory", None)
|
|
829
|
+
if not memory:
|
|
830
|
+
return
|
|
831
|
+
dcfg = svc.delegate_config or {}
|
|
832
|
+
if not dcfg.get("codex_memory_bridge", True):
|
|
833
|
+
return
|
|
834
|
+
|
|
835
|
+
# Only bridge substantial responses (not trivial or error)
|
|
836
|
+
tokens = count_tokens(output)
|
|
837
|
+
if tokens < 50 or tokens > 3000:
|
|
838
|
+
return # too short = trivial, too long = dump
|
|
839
|
+
|
|
840
|
+
# Skip benign responses
|
|
841
|
+
lower = output.lower()
|
|
842
|
+
benign = ("no issues", "looks good", "no problems", "lgtm", "all good",
|
|
843
|
+
"no regressions", "no bugs")
|
|
844
|
+
if any(b in lower for b in benign):
|
|
845
|
+
return
|
|
846
|
+
|
|
847
|
+
# Build a concise fact from the Codex output
|
|
848
|
+
# Truncate to keep facts digestible
|
|
849
|
+
summary = output[:400].strip()
|
|
850
|
+
if len(output) > 400:
|
|
851
|
+
summary += "..."
|
|
852
|
+
|
|
853
|
+
fact = f"[codex:{task_type}] {task[:80]} — {summary}"
|
|
854
|
+
memory.remember(fact, category=f"codex_{task_type}")
|
|
855
|
+
log.debug("codex_memory_bridge: stored fact for task_type=%s", task_type)
|
|
856
|
+
except Exception:
|
|
857
|
+
pass # never break delegation for memory
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
# ---------------------------------------------------------------------------
|
|
861
|
+
# Gemini delegate handler
|
|
862
|
+
# ---------------------------------------------------------------------------
|
|
863
|
+
|
|
864
|
+
def _handle_gemini_delegate(task: str, task_type: str, context: str,
|
|
865
|
+
file_path: str, svc, dcfg: dict, finalize) -> str:
|
|
866
|
+
"""Handle delegation via Gemini CLI."""
|
|
867
|
+
if not dcfg.get("gemini_enabled", False):
|
|
868
|
+
return finalize("c3_delegate", {"task_type": task_type, "backend": "gemini"},
|
|
869
|
+
"[delegate:error] Gemini not enabled. Set delegate.gemini_enabled=true in .c3/config.json",
|
|
870
|
+
"disabled")
|
|
871
|
+
|
|
872
|
+
global _gemini_available
|
|
873
|
+
if _gemini_available is None:
|
|
874
|
+
check_gemini()
|
|
875
|
+
if not _gemini_available:
|
|
876
|
+
return finalize("c3_delegate", {"task_type": task_type, "backend": "gemini"},
|
|
877
|
+
"[delegate:error] Gemini CLI not available. Run 'gemini --version' to diagnose.",
|
|
878
|
+
"unavailable")
|
|
879
|
+
|
|
880
|
+
# Resolve model from config or defaults
|
|
881
|
+
gdef = GEMINI_MODELS.get(task_type, GEMINI_MODELS.get("ask", {}))
|
|
882
|
+
model = dcfg.get("gemini_default_model") or gdef.get("model", "gemini-2.5-flash")
|
|
883
|
+
timeout = int(dcfg.get("gemini_timeout", 120))
|
|
884
|
+
|
|
885
|
+
# Context enrichment (reuse existing pattern)
|
|
886
|
+
enriched = context
|
|
887
|
+
if file_path and dcfg.get("auto_compress", True):
|
|
888
|
+
for p in [p.strip() for p in file_path.split(",") if p.strip()]:
|
|
889
|
+
try:
|
|
890
|
+
res = svc.compressor.compress_file(str(Path(svc.project_path) / p), "smart")
|
|
891
|
+
if isinstance(res, dict) and res.get("compressed"):
|
|
892
|
+
enriched += f"\n--- file: {p} ---\n{res['compressed']}"
|
|
893
|
+
except Exception:
|
|
894
|
+
continue
|
|
895
|
+
|
|
896
|
+
# Truncate context
|
|
897
|
+
max_ctx = max(200, int(dcfg.get("gemini_max_context_tokens", 8000) or 8000))
|
|
898
|
+
if count_tokens(enriched) > max_ctx:
|
|
899
|
+
enriched = enriched[:max_ctx * 4]
|
|
900
|
+
|
|
901
|
+
# Cache check
|
|
902
|
+
ckey = hashlib.md5(f"gemini|{task_type}|{model}|{enriched}|{task}".encode()).hexdigest()
|
|
903
|
+
if ckey in _delegate_cache:
|
|
904
|
+
cached_resp, _ = _delegate_cache[ckey]
|
|
905
|
+
return finalize("c3_delegate", {"task_type": task_type, "backend": "gemini", "cached": True},
|
|
906
|
+
cached_resp, "cached")
|
|
907
|
+
|
|
908
|
+
# Run Gemini
|
|
909
|
+
_log_progress(svc, f"[delegate] Gemini {model}...")
|
|
910
|
+
t0 = time.monotonic()
|
|
911
|
+
output, ok, token_stats = _run_gemini(
|
|
912
|
+
task=task, context=enriched,
|
|
913
|
+
model=model, timeout=timeout,
|
|
914
|
+
cwd=str(svc.project_path),
|
|
915
|
+
)
|
|
916
|
+
elapsed = round(time.monotonic() - t0, 1)
|
|
917
|
+
|
|
918
|
+
if not ok:
|
|
919
|
+
return finalize("c3_delegate",
|
|
920
|
+
{"task_type": task_type, "backend": "gemini", "model": model, "elapsed": f"{elapsed}s"},
|
|
921
|
+
output, "error")
|
|
922
|
+
|
|
923
|
+
_delegate_metrics["total_calls"] += 1
|
|
924
|
+
_delegate_cache[ckey] = (output, count_tokens(output))
|
|
925
|
+
|
|
926
|
+
# Memory bridge
|
|
927
|
+
_gemini_memory_bridge(output, task_type, task, svc)
|
|
928
|
+
|
|
929
|
+
return finalize("c3_delegate",
|
|
930
|
+
{"task_type": task_type, "backend": "gemini", "model": model,
|
|
931
|
+
"elapsed": f"{elapsed}s", **token_stats},
|
|
932
|
+
output, "ok")
|
|
933
|
+
|
|
934
|
+
|
|
935
|
+
def _gemini_memory_bridge(output: str, task_type: str, task: str, svc):
|
|
936
|
+
"""Auto-extract key findings from Gemini responses into c3_memory."""
|
|
937
|
+
try:
|
|
938
|
+
memory = getattr(svc, "memory", None)
|
|
939
|
+
if not memory:
|
|
940
|
+
return
|
|
941
|
+
dcfg = svc.delegate_config or {}
|
|
942
|
+
if not dcfg.get("gemini_memory_bridge", True):
|
|
943
|
+
return
|
|
944
|
+
|
|
945
|
+
tokens = count_tokens(output)
|
|
946
|
+
if tokens < 50 or tokens > 3000:
|
|
947
|
+
return
|
|
948
|
+
|
|
949
|
+
lower = output.lower()
|
|
950
|
+
benign = ("no issues", "looks good", "no problems", "lgtm", "all good",
|
|
951
|
+
"no regressions", "no bugs")
|
|
952
|
+
if any(b in lower for b in benign):
|
|
953
|
+
return
|
|
954
|
+
|
|
955
|
+
summary = output[:400].strip()
|
|
956
|
+
if len(output) > 400:
|
|
957
|
+
summary += "..."
|
|
958
|
+
|
|
959
|
+
fact = f"[gemini:{task_type}] {task[:80]} -- {summary}"
|
|
960
|
+
memory.remember(fact, category=f"gemini_{task_type}")
|
|
961
|
+
log.debug("gemini_memory_bridge: stored fact for task_type=%s", task_type)
|
|
962
|
+
except Exception:
|
|
963
|
+
pass
|
|
964
|
+
|
|
965
|
+
|
|
966
|
+
def handle_delegate(task: str, task_type: str, context: str, file_path: str,
|
|
967
|
+
svc, finalize, backend: str = "ollama") -> str:
|
|
968
|
+
dcfg = svc.delegate_config or {}
|
|
969
|
+
if not dcfg.get("enabled", True):
|
|
970
|
+
return "[delegate:disabled]"
|
|
971
|
+
|
|
972
|
+
# --- Health checks -----------------------------------------------------
|
|
973
|
+
if task_type == "available":
|
|
974
|
+
# Parallel health check across all backends
|
|
975
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
976
|
+
results = {}
|
|
977
|
+
|
|
978
|
+
def _check_ollama():
|
|
979
|
+
ollama = svc.ollama_client
|
|
980
|
+
if not ollama:
|
|
981
|
+
return "ollama", "down", "client=None", []
|
|
982
|
+
up = ollama.is_available()
|
|
983
|
+
models = ollama.list_models() if up else []
|
|
984
|
+
return "ollama", "up" if up else "down", "", models or []
|
|
985
|
+
|
|
986
|
+
def _check_codex():
|
|
987
|
+
info = check_codex()
|
|
988
|
+
s = info.get("status", "unknown")
|
|
989
|
+
d = info.get("version") or info.get("detail", "")
|
|
990
|
+
return "codex", s, d, []
|
|
991
|
+
|
|
992
|
+
def _check_gemini():
|
|
993
|
+
info = check_gemini()
|
|
994
|
+
s = info.get("status", "unknown")
|
|
995
|
+
d = info.get("version") or info.get("detail", "")
|
|
996
|
+
return "gemini", s, d, []
|
|
997
|
+
|
|
998
|
+
def _check_claude():
|
|
999
|
+
info = check_claude()
|
|
1000
|
+
s = info.get("status", "unknown")
|
|
1001
|
+
d = info.get("version") or info.get("detail", "")
|
|
1002
|
+
return "claude", s, d, []
|
|
1003
|
+
|
|
1004
|
+
with ThreadPoolExecutor(max_workers=4) as pool:
|
|
1005
|
+
futs = [pool.submit(fn) for fn in [_check_ollama, _check_codex, _check_gemini, _check_claude]]
|
|
1006
|
+
for fut in as_completed(futs):
|
|
1007
|
+
name, status, detail, models = fut.result()
|
|
1008
|
+
results[name] = (status, detail, models)
|
|
1009
|
+
|
|
1010
|
+
lines = []
|
|
1011
|
+
for name in ("ollama", "codex", "gemini", "claude"):
|
|
1012
|
+
status, detail, models = results.get(name, ("unknown", "", []))
|
|
1013
|
+
line = f" {name}={status}"
|
|
1014
|
+
if detail:
|
|
1015
|
+
line += f" {detail}"
|
|
1016
|
+
if models:
|
|
1017
|
+
line += f" models={len(models)} [{', '.join(models[:5])}]"
|
|
1018
|
+
lines.append(line)
|
|
1019
|
+
|
|
1020
|
+
summary_statuses = [results.get(n, ("unknown",))[0] for n in ("ollama", "codex", "gemini", "claude")]
|
|
1021
|
+
up_count = sum(1 for s in summary_statuses if s in ("up", "ok"))
|
|
1022
|
+
return finalize("c3_delegate", {"task_type": "available"},
|
|
1023
|
+
f"[delegate:available] {up_count}/4 backends up\n" + "\n".join(lines),
|
|
1024
|
+
f"{up_count}/4 up")
|
|
1025
|
+
|
|
1026
|
+
if task_type == "codex_check":
|
|
1027
|
+
info = check_codex()
|
|
1028
|
+
status = info.get("status", "unknown")
|
|
1029
|
+
detail = info.get("version") or info.get("detail", "")
|
|
1030
|
+
return finalize("c3_delegate", {"task_type": "codex_check"},
|
|
1031
|
+
f"[delegate:codex_check] status={status} {detail}".strip(),
|
|
1032
|
+
status)
|
|
1033
|
+
|
|
1034
|
+
if task_type == "codex_resume":
|
|
1035
|
+
if not dcfg.get("codex_enabled", False):
|
|
1036
|
+
return finalize("c3_delegate", {"task_type": "codex_resume"},
|
|
1037
|
+
"[delegate:error] Codex not enabled in config", "disabled")
|
|
1038
|
+
timeout = int(dcfg.get("codex_timeout", 120))
|
|
1039
|
+
output, ok = _run_codex_resume(task, timeout=timeout,
|
|
1040
|
+
cwd=str(svc.project_path))
|
|
1041
|
+
return finalize("c3_delegate", {"task_type": "codex_resume"},
|
|
1042
|
+
output, "ok" if ok else "error")
|
|
1043
|
+
|
|
1044
|
+
if task_type == "gemini_check":
|
|
1045
|
+
info = check_gemini()
|
|
1046
|
+
status = info.get("status", "unknown")
|
|
1047
|
+
detail = info.get("version") or info.get("detail", "")
|
|
1048
|
+
return finalize("c3_delegate", {"task_type": "gemini_check"},
|
|
1049
|
+
f"[delegate:gemini_check] status={status} {detail}".strip(),
|
|
1050
|
+
status)
|
|
1051
|
+
|
|
1052
|
+
# --- Backend routing ---------------------------------------------------
|
|
1053
|
+
if backend == "auto":
|
|
1054
|
+
# Priority: Codex > Gemini > Ollama for heavy tasks
|
|
1055
|
+
heavy_codex = set(dcfg.get("codex_task_types", ["review", "diagnose", "improve", "test"]))
|
|
1056
|
+
heavy_gemini = set(dcfg.get("gemini_task_types", ["review", "diagnose", "improve", "test"]))
|
|
1057
|
+
# For heavy tasks, prefer cloud CLIs when available (faster than Ollama).
|
|
1058
|
+
# "Available" = pre-warm health check passed OR found on PATH.
|
|
1059
|
+
# The `enabled` config flag remains the primary gate, but availability
|
|
1060
|
+
# on-PATH is enough to prefer cloud over slow Ollama for heavy tasks.
|
|
1061
|
+
_light_tasks = {"ask", "explain", "summarize", "docstring"}
|
|
1062
|
+
_codex_avail = (_codex_available is True) or (
|
|
1063
|
+
_codex_available is None and task_type not in _light_tasks and _is_codex_on_path()
|
|
1064
|
+
)
|
|
1065
|
+
_gemini_avail = (_gemini_available is True) or (
|
|
1066
|
+
_gemini_available is None and task_type not in _light_tasks and _is_gemini_on_path()
|
|
1067
|
+
)
|
|
1068
|
+
if task_type in heavy_codex and _codex_avail and _codex_available is not False:
|
|
1069
|
+
backend = "codex"
|
|
1070
|
+
elif task_type in heavy_gemini and _gemini_avail and _gemini_available is not False:
|
|
1071
|
+
backend = "gemini"
|
|
1072
|
+
else:
|
|
1073
|
+
backend = "ollama"
|
|
1074
|
+
|
|
1075
|
+
if backend == "codex":
|
|
1076
|
+
_log_progress(svc, f"[delegate] Routing {task_type} → Codex...")
|
|
1077
|
+
return _handle_codex_delegate(task, task_type, context, file_path, svc, dcfg, finalize)
|
|
1078
|
+
|
|
1079
|
+
if backend == "gemini":
|
|
1080
|
+
_log_progress(svc, f"[delegate] Routing {task_type} → Gemini...")
|
|
1081
|
+
return _handle_gemini_delegate(task, task_type, context, file_path, svc, dcfg, finalize)
|
|
1082
|
+
|
|
1083
|
+
if backend == "claude":
|
|
1084
|
+
return _handle_claude_delegate(task, task_type, context, file_path, svc, dcfg, finalize)
|
|
1085
|
+
|
|
1086
|
+
# --- Original Ollama path (backend="ollama") ---------------------------
|
|
1087
|
+
|
|
1088
|
+
if task_type == "auto":
|
|
1089
|
+
task_type = infer_task_type(task, context)
|
|
1090
|
+
|
|
1091
|
+
tdef = DELEGATE_TASKS.get(task_type)
|
|
1092
|
+
if not tdef:
|
|
1093
|
+
return f"[delegate:error] Unknown type: {task_type}"
|
|
1094
|
+
ollama = svc.ollama_client
|
|
1095
|
+
if not ollama or not ollama.is_available():
|
|
1096
|
+
return "[delegate:error] Ollama unavailable. Requires Ollama for local LLM tasks."
|
|
1097
|
+
|
|
1098
|
+
# Context enrichment
|
|
1099
|
+
enriched = context
|
|
1100
|
+
if file_path and dcfg.get("auto_compress", True):
|
|
1101
|
+
for p in [p.strip() for p in file_path.split(",") if p.strip()]:
|
|
1102
|
+
try:
|
|
1103
|
+
res = svc.compressor.compress_file(str(Path(svc.project_path) / p), "smart")
|
|
1104
|
+
if isinstance(res, dict) and res.get("compressed"):
|
|
1105
|
+
enriched += f"\n--- file: {p} ---\n{res['compressed']}"
|
|
1106
|
+
except Exception:
|
|
1107
|
+
continue
|
|
1108
|
+
|
|
1109
|
+
if task_type == "diagnose" and dcfg.get("auto_activity_log", True):
|
|
1110
|
+
recent = svc.activity_log.get_recent(limit=8)
|
|
1111
|
+
if recent:
|
|
1112
|
+
enriched += "\nRecent Activity:\n" + "\n".join(
|
|
1113
|
+
[f"[{e.get('timestamp','').split('T')[-1][:8]}] {e.get('tool','')}..."
|
|
1114
|
+
for e in reversed(recent)])
|
|
1115
|
+
|
|
1116
|
+
max_context_tokens = max(200, int(dcfg.get("max_context_tokens", 1400) or 1400))
|
|
1117
|
+
if count_tokens(enriched) > max_context_tokens:
|
|
1118
|
+
enriched = enriched[:max_context_tokens * 4]
|
|
1119
|
+
|
|
1120
|
+
# Model resolution
|
|
1121
|
+
req_model = dcfg.get(f"{task_type}_model") or dcfg.get("preferred_model") or tdef["default_model"]
|
|
1122
|
+
avail = ollama.list_models() or []
|
|
1123
|
+
model = resolve_model_name(req_model, avail)
|
|
1124
|
+
if not model:
|
|
1125
|
+
for cand in _fallback_model_order(task_type) + avail:
|
|
1126
|
+
model = resolve_model_name(cand, avail)
|
|
1127
|
+
if model:
|
|
1128
|
+
break
|
|
1129
|
+
if not model:
|
|
1130
|
+
return "[delegate:error] No compatible local model found"
|
|
1131
|
+
|
|
1132
|
+
# Cache check
|
|
1133
|
+
ckey = hashlib.md5(f"{task_type}|{model}|{enriched}|{task}".encode()).hexdigest()
|
|
1134
|
+
if ckey in _delegate_cache:
|
|
1135
|
+
cached_resp, _ = _delegate_cache[ckey]
|
|
1136
|
+
return finalize("c3_delegate", {"task_type": task_type, "cached": True},
|
|
1137
|
+
cached_resp, "cached")
|
|
1138
|
+
|
|
1139
|
+
# Generate
|
|
1140
|
+
_log_progress(svc, f"[delegate] Running Ollama ({model})...")
|
|
1141
|
+
timeout_s = int(dcfg.get("timeout", 90) or 90)
|
|
1142
|
+
_t0 = time.monotonic()
|
|
1143
|
+
resp = ollama.generate(
|
|
1144
|
+
prompt=tdef["prompt_template"].format(context=enriched, task=task),
|
|
1145
|
+
model=model, system=tdef["system"],
|
|
1146
|
+
temperature=tdef.get("temperature", 0.3),
|
|
1147
|
+
max_tokens=int(dcfg.get("max_tokens", 512) or 512),
|
|
1148
|
+
timeout=timeout_s)
|
|
1149
|
+
_elapsed = round(time.monotonic() - _t0, 1)
|
|
1150
|
+
if resp is None:
|
|
1151
|
+
return finalize("c3_delegate", {"task_type": task_type, "model": model},
|
|
1152
|
+
f"[delegate:timeout] No response from {model} after {_elapsed}s "
|
|
1153
|
+
f"(limit {timeout_s}s)", "timeout")
|
|
1154
|
+
|
|
1155
|
+
# Self-correction: retry with fallback model on low confidence
|
|
1156
|
+
conf = _estimate_confidence(task_type, resp, count_tokens(resp))
|
|
1157
|
+
if conf == "low" and dcfg.get("allow_model_fallback", True):
|
|
1158
|
+
tried = {model}
|
|
1159
|
+
for fallback_cand in _fallback_model_order(task_type) + avail:
|
|
1160
|
+
fallback = resolve_model_name(fallback_cand, avail)
|
|
1161
|
+
if not fallback or fallback in tried:
|
|
1162
|
+
continue
|
|
1163
|
+
tried.add(fallback)
|
|
1164
|
+
retry_resp = ollama.generate(
|
|
1165
|
+
prompt=tdef["prompt_template"].format(context=enriched, task=task),
|
|
1166
|
+
model=fallback, system=tdef["system"],
|
|
1167
|
+
temperature=tdef.get("temperature", 0.3),
|
|
1168
|
+
max_tokens=int(dcfg.get("max_tokens", 512) or 512),
|
|
1169
|
+
)
|
|
1170
|
+
retry_conf = _estimate_confidence(task_type, retry_resp, count_tokens(retry_resp))
|
|
1171
|
+
if retry_conf != "low":
|
|
1172
|
+
resp = retry_resp
|
|
1173
|
+
conf = retry_conf
|
|
1174
|
+
model = fallback
|
|
1175
|
+
break
|
|
1176
|
+
if retry_conf == "low" and count_tokens(retry_resp) > count_tokens(resp):
|
|
1177
|
+
resp = retry_resp
|
|
1178
|
+
model = fallback
|
|
1179
|
+
conf = "medium"
|
|
1180
|
+
|
|
1181
|
+
_delegate_metrics["total_calls"] += 1
|
|
1182
|
+
_delegate_cache[ckey] = (resp, count_tokens(resp))
|
|
1183
|
+
return finalize("c3_delegate", {"task": task_type, "model": model, "elapsed": f"{_elapsed}s"},
|
|
1184
|
+
resp, conf)
|