code-context-control 2.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. cli/__init__.py +1 -0
  2. cli/_hook_utils.py +99 -0
  3. cli/c3.py +6152 -0
  4. cli/commands/__init__.py +1 -0
  5. cli/commands/common.py +312 -0
  6. cli/commands/parser.py +286 -0
  7. cli/docs.html +3178 -0
  8. cli/edits.html +878 -0
  9. cli/hook_auto_snapshot.py +142 -0
  10. cli/hook_c3_signal.py +61 -0
  11. cli/hook_c3read.py +116 -0
  12. cli/hook_edit_ledger.py +213 -0
  13. cli/hook_edit_unlock.py +170 -0
  14. cli/hook_filter.py +130 -0
  15. cli/hook_ghost_files.py +238 -0
  16. cli/hook_pretool_enforce.py +334 -0
  17. cli/hook_read.py +200 -0
  18. cli/hook_session_stats.py +62 -0
  19. cli/hook_terse_advisor.py +190 -0
  20. cli/hub.html +3764 -0
  21. cli/hub_server.py +1619 -0
  22. cli/mcp_proxy.py +428 -0
  23. cli/mcp_server.py +660 -0
  24. cli/server.py +2985 -0
  25. cli/tools/__init__.py +4 -0
  26. cli/tools/_helpers.py +65 -0
  27. cli/tools/agent.py +1165 -0
  28. cli/tools/compress.py +215 -0
  29. cli/tools/delegate.py +1184 -0
  30. cli/tools/edit.py +313 -0
  31. cli/tools/edits.py +118 -0
  32. cli/tools/filter.py +285 -0
  33. cli/tools/impact.py +163 -0
  34. cli/tools/memory.py +469 -0
  35. cli/tools/read.py +224 -0
  36. cli/tools/search.py +337 -0
  37. cli/tools/session.py +95 -0
  38. cli/tools/shell.py +193 -0
  39. cli/tools/status.py +306 -0
  40. cli/tools/validate.py +310 -0
  41. cli/ui/api.js +36 -0
  42. cli/ui/app.js +207 -0
  43. cli/ui/components/chat.js +758 -0
  44. cli/ui/components/dashboard.js +689 -0
  45. cli/ui/components/edits.js +220 -0
  46. cli/ui/components/instructions.js +481 -0
  47. cli/ui/components/memory.js +626 -0
  48. cli/ui/components/sessions.js +606 -0
  49. cli/ui/components/settings.js +1404 -0
  50. cli/ui/components/sidebar.js +156 -0
  51. cli/ui/icons.js +51 -0
  52. cli/ui/shared.js +119 -0
  53. cli/ui/theme.js +22 -0
  54. cli/ui.html +168 -0
  55. cli/ui_legacy.html +6797 -0
  56. cli/ui_nano.html +503 -0
  57. code_context_control-2.28.0.dist-info/METADATA +248 -0
  58. code_context_control-2.28.0.dist-info/RECORD +150 -0
  59. code_context_control-2.28.0.dist-info/WHEEL +5 -0
  60. code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
  61. code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
  62. code_context_control-2.28.0.dist-info/top_level.txt +5 -0
  63. core/__init__.py +75 -0
  64. core/config.py +269 -0
  65. core/ide.py +188 -0
  66. oracle/__init__.py +1 -0
  67. oracle/config.py +75 -0
  68. oracle/oracle.html +3900 -0
  69. oracle/oracle_server.py +663 -0
  70. oracle/services/__init__.py +1 -0
  71. oracle/services/c3_bridge.py +210 -0
  72. oracle/services/chat_engine.py +1103 -0
  73. oracle/services/chat_store.py +155 -0
  74. oracle/services/cross_memory.py +154 -0
  75. oracle/services/federated_graph.py +463 -0
  76. oracle/services/health_checker.py +117 -0
  77. oracle/services/insight_engine.py +307 -0
  78. oracle/services/memory_reader.py +106 -0
  79. oracle/services/memory_writer.py +182 -0
  80. oracle/services/ollama_bridge.py +332 -0
  81. oracle/services/project_scanner.py +87 -0
  82. oracle/services/review_agent.py +206 -0
  83. services/__init__.py +1 -0
  84. services/activity_log.py +93 -0
  85. services/agent_base.py +124 -0
  86. services/agents.py +1529 -0
  87. services/auto_memory.py +407 -0
  88. services/bench/__init__.py +6 -0
  89. services/bench/external/__init__.py +29 -0
  90. services/bench/external/aider_polyglot.py +405 -0
  91. services/bench/external/swe_bench.py +485 -0
  92. services/benchmark_dashboard.py +596 -0
  93. services/claude_md.py +785 -0
  94. services/compressor.py +592 -0
  95. services/context_snapshot.py +356 -0
  96. services/conversation_store.py +870 -0
  97. services/doc_index.py +537 -0
  98. services/e2e_benchmark.py +2884 -0
  99. services/e2e_evaluator.py +396 -0
  100. services/e2e_tasks.py +743 -0
  101. services/edit_ledger.py +459 -0
  102. services/embedding_index.py +341 -0
  103. services/error_reporting.py +123 -0
  104. services/file_memory.py +734 -0
  105. services/hub_service.py +585 -0
  106. services/indexer.py +712 -0
  107. services/memory.py +318 -0
  108. services/memory_consolidator.py +538 -0
  109. services/memory_graph.py +382 -0
  110. services/memory_grounder.py +304 -0
  111. services/memory_scorer.py +246 -0
  112. services/metrics.py +86 -0
  113. services/notifications.py +209 -0
  114. services/ollama_client.py +201 -0
  115. services/output_filter.py +488 -0
  116. services/parser.py +1238 -0
  117. services/project_manager.py +579 -0
  118. services/protocol.py +306 -0
  119. services/proxy_state.py +152 -0
  120. services/retrieval_broker.py +129 -0
  121. services/router.py +414 -0
  122. services/runtime.py +326 -0
  123. services/session_benchmark.py +1945 -0
  124. services/session_manager.py +1026 -0
  125. services/session_preloader.py +251 -0
  126. services/text_index.py +90 -0
  127. services/tool_classifier.py +176 -0
  128. services/transcript_index.py +340 -0
  129. services/validation_cache.py +155 -0
  130. services/vector_store.py +299 -0
  131. services/version_tracker.py +271 -0
  132. services/watcher.py +192 -0
  133. tui/__init__.py +0 -0
  134. tui/backend.py +59 -0
  135. tui/main.py +145 -0
  136. tui/screens/__init__.py +1 -0
  137. tui/screens/benchmark_view.py +109 -0
  138. tui/screens/claudemd_view.py +46 -0
  139. tui/screens/compress_view.py +52 -0
  140. tui/screens/index_view.py +74 -0
  141. tui/screens/init_view.py +82 -0
  142. tui/screens/mcp_view.py +73 -0
  143. tui/screens/optimize_view.py +41 -0
  144. tui/screens/pipe_view.py +46 -0
  145. tui/screens/projects_view.py +355 -0
  146. tui/screens/search_view.py +55 -0
  147. tui/screens/session_view.py +143 -0
  148. tui/screens/stats.py +158 -0
  149. tui/screens/ui_view.py +54 -0
  150. tui/theme.tcss +335 -0
cli/tools/delegate.py ADDED
@@ -0,0 +1,1184 @@
1
+ """c3_delegate — LLM task offload via Ollama (local) or Codex CLI (cloud).
2
+
3
+ Absorbs former c3_intelligence routing logic internally.
4
+ Supports task_type='available' for zero-cost Ollama status check.
5
+ Supports backend='codex' for OpenAI Codex CLI delegation.
6
+ """
7
+
8
+ import hashlib
9
+ import logging
10
+ import os
11
+ import shutil
12
+ import subprocess
13
+ import sys
14
+ import time
15
+ from pathlib import Path
16
+
17
+ from core import count_tokens
18
+
19
+ log = logging.getLogger(__name__)
20
+
21
+
22
+ def _log_progress(svc, message):
23
+ """Emit progress notification if callback is set."""
24
+ cb = getattr(svc, "_agent_progress_cb", None)
25
+ if cb:
26
+ try:
27
+ cb(message)
28
+ except Exception:
29
+ pass
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # Subprocess helpers
33
+ # ---------------------------------------------------------------------------
34
+
35
+
36
+ def _kill_proc_tree(proc):
37
+ """Kill a subprocess and its entire process tree."""
38
+ try:
39
+ if sys.platform == "win32":
40
+ subprocess.run(
41
+ ["taskkill", "/F", "/T", "/PID", str(proc.pid)],
42
+ capture_output=True, stdin=subprocess.DEVNULL,
43
+ )
44
+ else:
45
+ proc.kill()
46
+ proc.wait(timeout=5)
47
+ except Exception:
48
+ pass
49
+
50
+
51
+ def _communicate_with_heartbeat(proc, timeout=45, idle_timeout=15):
52
+ """communicate() replacement with idle-activity watchdog.
53
+
54
+ Monitors stderr for activity. If no stderr output for idle_timeout seconds,
55
+ kills the process early (catches MCP startup hangs). Also enforces total timeout.
56
+
57
+ Returns (stdout, stderr, status) where status is 'ok', 'timeout', or 'idle_timeout'.
58
+ """
59
+ import threading
60
+
61
+ stdout_parts = []
62
+ stderr_parts = []
63
+ last_activity = [time.time()]
64
+
65
+ def _read_stream(stream, parts, track_activity=False):
66
+ try:
67
+ for line in stream:
68
+ parts.append(line)
69
+ if track_activity:
70
+ last_activity[0] = time.time()
71
+ except (ValueError, OSError):
72
+ pass
73
+
74
+ t_out = threading.Thread(target=_read_stream, args=(proc.stdout, stdout_parts), daemon=True)
75
+ t_err = threading.Thread(target=_read_stream, args=(proc.stderr, stderr_parts, True), daemon=True)
76
+ t_out.start()
77
+ t_err.start()
78
+
79
+ deadline = time.time() + timeout
80
+ status = "ok"
81
+ while proc.poll() is None:
82
+ now = time.time()
83
+ if now >= deadline:
84
+ _kill_proc_tree(proc)
85
+ status = "timeout"
86
+ break
87
+ if idle_timeout and (now - last_activity[0]) > idle_timeout:
88
+ _kill_proc_tree(proc)
89
+ status = "idle_timeout"
90
+ break
91
+ time.sleep(0.5)
92
+
93
+ t_out.join(timeout=3)
94
+ t_err.join(timeout=3)
95
+ return "".join(stdout_parts), "".join(stderr_parts), status
96
+
97
+
98
+ def _popen_kwargs():
99
+ """Platform-specific Popen kwargs for clean subprocess management."""
100
+ kwargs = {}
101
+ if sys.platform == "win32":
102
+ CREATE_NO_WINDOW = 0x08000000
103
+ CREATE_NEW_PROCESS_GROUP = 0x00000200
104
+ kwargs["creationflags"] = CREATE_NO_WINDOW | CREATE_NEW_PROCESS_GROUP
105
+ return kwargs
106
+
107
+
108
+ # ---------------------------------------------------------------------------
109
+ # Codex CLI backend
110
+ # ---------------------------------------------------------------------------
111
+
112
+ CODEX_MODELS = {
113
+ "review": {"model": "gpt-5.3-codex-spark", "sandbox": "read-only", "reasoning": "high"},
114
+ "explain": {"model": "gpt-5.3-codex-spark", "sandbox": "read-only", "reasoning": "medium"},
115
+ "improve": {"model": "gpt-5.4", "sandbox": "read-only", "reasoning": "high"},
116
+ "diagnose": {"model": "gpt-5.3-codex", "sandbox": "read-only", "reasoning": "high"},
117
+ "test": {"model": "gpt-5.3-codex-spark", "sandbox": "workspace-write", "reasoning": "medium"},
118
+ "summarize":{"model": "gpt-5.3-codex-spark", "sandbox": "read-only", "reasoning": "low"},
119
+ "docstring":{"model": "gpt-5.3-codex-spark", "sandbox": "read-only", "reasoning": "low"},
120
+ "ask": {"model": "gpt-5.3-codex-spark", "sandbox": "read-only", "reasoning": "medium"},
121
+ }
122
+
123
+ _codex_available: bool | None = None # cached after first check
124
+
125
+ # ---------------------------------------------------------------------------
126
+ # Gemini CLI backend
127
+ # ---------------------------------------------------------------------------
128
+
129
+ GEMINI_MODELS = {
130
+ "review": {"model": "gemini-2.5-pro"},
131
+ "explain": {"model": "gemini-2.5-flash"},
132
+ "improve": {"model": "gemini-2.5-pro"},
133
+ "diagnose": {"model": "gemini-2.5-pro"},
134
+ "test": {"model": "gemini-2.5-flash"},
135
+ "summarize":{"model": "gemini-2.5-flash"},
136
+ "docstring":{"model": "gemini-2.5-flash"},
137
+ "ask": {"model": "gemini-2.5-flash"},
138
+ }
139
+
140
+ _gemini_available: bool | None = None # cached after first check
141
+
142
+
143
+ def _npm_global_bin() -> str:
144
+ """Return the npm global bin directory (Windows: AppData/Roaming/npm)."""
145
+ if sys.platform == "win32":
146
+ appdata = os.environ.get("APPDATA", "")
147
+ if appdata:
148
+ return os.path.join(appdata, "npm")
149
+ return ""
150
+
151
+
152
+ def _ensure_npm_on_path() -> None:
153
+ """Ensure npm global bin is on PATH so shutil.which() finds npm-installed CLIs."""
154
+ npm_bin = _npm_global_bin()
155
+ if npm_bin and npm_bin not in os.environ.get("PATH", ""):
156
+ os.environ["PATH"] = npm_bin + os.pathsep + os.environ.get("PATH", "")
157
+
158
+
159
+ def _which(name: str) -> str | None:
160
+ """Resolve full path for a CLI name, ensuring npm global bin is on PATH."""
161
+ _ensure_npm_on_path()
162
+ return shutil.which(name)
163
+
164
+
165
+ def _is_gemini_on_path() -> bool:
166
+ """Check if gemini CLI binary is on PATH."""
167
+ return _which("gemini") is not None
168
+
169
+
170
+ # ---------------------------------------------------------------------------
171
+ # Claude Code CLI backend
172
+ # ---------------------------------------------------------------------------
173
+
174
+ _claude_available = None # None=unknown, True=up, False=down
175
+
176
+
177
+ def _is_claude_on_path() -> bool:
178
+ """Check if claude CLI binary is on PATH."""
179
+ return _which("claude") is not None
180
+
181
+
182
+ def check_claude() -> dict:
183
+ """Zero-cost health check for Claude CLI. Returns status dict."""
184
+ global _claude_available
185
+ exe = _which("claude")
186
+ if not exe:
187
+ _claude_available = False
188
+ return {"status": "not_installed", "detail": "claude CLI not found on PATH"}
189
+ try:
190
+ proc = subprocess.run(
191
+ [exe, "--version"],
192
+ capture_output=True, text=True, timeout=10,
193
+ stdin=subprocess.DEVNULL,
194
+ )
195
+ if proc.returncode == 0:
196
+ _claude_available = True
197
+ return {"status": "ok", "version": proc.stdout.strip()}
198
+ _claude_available = False
199
+ return {"status": "error", "detail": proc.stderr.strip() or f"exit {proc.returncode}"}
200
+ except subprocess.TimeoutExpired:
201
+ _claude_available = False
202
+ return {"status": "timeout", "detail": "claude --version timed out (10s)"}
203
+ except Exception as e:
204
+ _claude_available = False
205
+ return {"status": "error", "detail": str(e)}
206
+
207
+
208
+ def _run_claude(task: str, context: str, cwd: str | None = None,
209
+ timeout: int = 90, idle_timeout: int = 30) -> tuple:
210
+ """Run claude -p in non-interactive print mode. Returns (output, success)."""
211
+ exe = _which("claude")
212
+ if not exe:
213
+ return "[claude:error] claude CLI not on PATH", False
214
+ prompt = f"Context:\n{context}\n\nTask:\n{task}" if context else task
215
+ cmd = [exe, "-p", prompt, "--output-format", "text"]
216
+ try:
217
+ proc = subprocess.Popen(
218
+ cmd,
219
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
220
+ stdin=subprocess.DEVNULL,
221
+ text=True, cwd=cwd,
222
+ **_popen_kwargs(),
223
+ )
224
+ output, err = _communicate_with_heartbeat(proc, timeout=timeout, idle_timeout=idle_timeout)
225
+ if proc.returncode == 0 and output.strip():
226
+ return output.strip(), True
227
+ return f"[claude:error] {(err or '').strip() or 'no output'}", False
228
+ except Exception as e:
229
+ return f"[claude:error] {e}", False
230
+
231
+
232
+ def _claude_memory_bridge(output: str, task_type: str, task: str, svc) -> None:
233
+ """Auto-extract key findings from Claude responses into c3_memory."""
234
+ try:
235
+ from services.auto_memory import _save_or_merge_standalone
236
+ _save_or_merge_standalone(output[:400], f"auto:claude:{task_type}", svc)
237
+ except Exception:
238
+ pass
239
+
240
+
241
+ def _handle_claude_delegate(task: str, task_type: str, context: str,
242
+ file_path: str, svc, dcfg: dict, finalize) -> str:
243
+ """Handle delegation via Claude Code CLI."""
244
+ timeout = int(dcfg.get("claude_timeout", 90))
245
+ _log_progress(svc, f"[delegate] Routing {task_type} → Claude CLI...")
246
+ output, ok = _run_claude(task, context, cwd=str(svc.project_path), timeout=timeout)
247
+ if not ok:
248
+ return finalize("c3_delegate", {"task_type": task_type, "backend": "claude"},
249
+ output, "error")
250
+ return finalize("c3_delegate", {"task_type": task_type, "backend": "claude"},
251
+ output, "ok")
252
+
253
+
254
+ def check_gemini() -> dict:
255
+ """Zero-cost health check for Gemini CLI. Returns status dict."""
256
+ global _gemini_available
257
+ exe = _which("gemini")
258
+ if not exe:
259
+ _gemini_available = False
260
+ return {"status": "not_installed", "detail": "gemini CLI not found on PATH"}
261
+ try:
262
+ proc = subprocess.run(
263
+ [exe, "--version"],
264
+ capture_output=True, text=True, timeout=10,
265
+ stdin=subprocess.DEVNULL,
266
+ )
267
+ if proc.returncode == 0:
268
+ version = proc.stdout.strip()
269
+ _gemini_available = True
270
+ return {"status": "ok", "version": version}
271
+ else:
272
+ _gemini_available = False
273
+ return {"status": "error", "detail": proc.stderr.strip() or f"exit code {proc.returncode}"}
274
+ except subprocess.TimeoutExpired:
275
+ _gemini_available = False
276
+ return {"status": "timeout", "detail": "gemini --version timed out (10s)"}
277
+ except Exception as e:
278
+ _gemini_available = False
279
+ return {"status": "error", "detail": str(e)}
280
+
281
+
282
+ def _start_gemini_early(model: str, timeout: int = 45, idle_timeout: int = 15,
283
+ cwd: str | None = None):
284
+ """Start Gemini subprocess with stdin=PIPE so the prompt can be fed later.
285
+
286
+ Call this before the compress step so Gemini's ~9s MCP startup overlaps
287
+ with other work. Then call _finish_gemini_early() to send the prompt and
288
+ collect the result.
289
+
290
+ Returns the Popen object, or None if Gemini is not available.
291
+ """
292
+ gem_exe = _which("gemini") or "gemini"
293
+ if not gem_exe or gem_exe == "gemini":
294
+ exe = _which("gemini")
295
+ if not exe:
296
+ return None
297
+ cmd = [
298
+ gem_exe,
299
+ "--output-format", "json",
300
+ "--approval-mode", "yolo",
301
+ "--allowed-mcp-server-names", "__none__",
302
+ ]
303
+ if model:
304
+ cmd += ["-m", model]
305
+ try:
306
+ proc = subprocess.Popen(
307
+ cmd,
308
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
309
+ stdin=subprocess.PIPE,
310
+ text=True,
311
+ cwd=cwd,
312
+ **_popen_kwargs(),
313
+ )
314
+ return proc
315
+ except Exception:
316
+ return None
317
+
318
+
319
+ def _finish_gemini_early(proc, task: str, context: str,
320
+ timeout: int = 45, idle_timeout: int = 15):
321
+ """Feed the prompt to an early-started Gemini process and collect result.
322
+
323
+ Returns (output, success, token_stats).
324
+ """
325
+ import json as _json
326
+
327
+ empty_stats = {"input_tokens": 0, "output_tokens": 0, "cached_tokens": 0}
328
+ if proc is None:
329
+ return "[gemini:error] process not started", False, empty_stats
330
+
331
+ prompt = f"{task}\n\nContext:\n{context}" if context else task
332
+
333
+ import threading
334
+ stdout_parts = []
335
+ stderr_parts = []
336
+ last_activity = [time.time()]
337
+
338
+ def _read_stream(stream, parts, track_activity=False):
339
+ try:
340
+ for line in stream:
341
+ parts.append(line)
342
+ if track_activity:
343
+ last_activity[0] = time.time()
344
+ except (ValueError, OSError):
345
+ pass
346
+
347
+ t_out = threading.Thread(target=_read_stream, args=(proc.stdout, stdout_parts), daemon=True)
348
+ t_err = threading.Thread(target=_read_stream, args=(proc.stderr, stderr_parts, True), daemon=True)
349
+ t_out.start()
350
+ t_err.start()
351
+
352
+ # Write prompt to stdin in a daemon thread — avoids blocking the caller if
353
+ # the pipe buffer fills up before Gemini reads (it reads only after MCP startup).
354
+ def _write_stdin():
355
+ try:
356
+ proc.stdin.write(prompt)
357
+ proc.stdin.close()
358
+ except Exception:
359
+ pass
360
+ threading.Thread(target=_write_stdin, daemon=True).start()
361
+
362
+ deadline = time.time() + timeout
363
+ status = "ok"
364
+ while proc.poll() is None:
365
+ now = time.time()
366
+ if now >= deadline:
367
+ _kill_proc_tree(proc)
368
+ status = "timeout"
369
+ break
370
+ if idle_timeout and (now - last_activity[0]) > idle_timeout:
371
+ _kill_proc_tree(proc)
372
+ status = "idle_timeout"
373
+ break
374
+ time.sleep(0.5)
375
+
376
+ t_out.join(timeout=3)
377
+ t_err.join(timeout=3)
378
+ stdout = "".join(stdout_parts)
379
+ stderr = "".join(stderr_parts)
380
+
381
+ if status == "idle_timeout":
382
+ return (f"[gemini:idle_timeout] No stderr activity for {idle_timeout}s "
383
+ f"(likely MCP startup hang)"), False, empty_stats
384
+ if status == "timeout":
385
+ return f"[gemini:timeout] No response after {timeout}s", False, empty_stats
386
+ if proc.returncode != 0:
387
+ err = stderr.strip() if stderr else f"exit code {proc.returncode}"
388
+ return f"[gemini:error] {err}", False, empty_stats
389
+
390
+ # Parse JSON output
391
+ raw = stdout.strip()
392
+ json_start = raw.find("{")
393
+ if json_start > 0:
394
+ raw = raw[json_start:]
395
+ try:
396
+ data = _json.loads(raw)
397
+ except (_json.JSONDecodeError, TypeError):
398
+ return raw, True, empty_stats
399
+
400
+ if isinstance(data, dict):
401
+ text = data.get("response", data.get("text", data.get("result", raw)))
402
+ elif isinstance(data, list):
403
+ texts = [msg.get("text", msg.get("content", ""))
404
+ for msg in data if isinstance(msg, dict)]
405
+ text = "\n".join(t for t in texts if t)
406
+ else:
407
+ text = str(data)
408
+
409
+ token_stats = dict(empty_stats)
410
+ if isinstance(data, dict):
411
+ stats = data.get("stats", {})
412
+ models = stats.get("models", {})
413
+ for _model_id, mdata in models.items():
414
+ tok = mdata.get("tokens", {})
415
+ token_stats["input_tokens"] += tok.get("input", 0) or 0
416
+ token_stats["output_tokens"] += tok.get("candidates", 0) or 0
417
+ token_stats["cached_tokens"] += tok.get("cached", 0) or 0
418
+
419
+ return text, True, token_stats
420
+
421
+
422
+ def _run_gemini(task: str, context: str, model: str,
423
+ timeout: int = 45, idle_timeout: int = 15,
424
+ cwd: str | None = None) -> tuple[str, bool, dict]:
425
+ """Run gemini CLI as subprocess. Returns (output, success, token_stats).
426
+
427
+ Uses heartbeat monitor: kills process if no stderr activity for idle_timeout
428
+ seconds (catches MCP startup hangs). Also enforces total timeout (default 45s).
429
+ Parses structured JSON output for response text and token metrics.
430
+ """
431
+ import json as _json
432
+
433
+ prompt = f"{task}\n\nContext:\n{context}" if context else task
434
+ gem_exe = _which("gemini") or "gemini"
435
+ cmd = [
436
+ gem_exe, "-p", prompt,
437
+ "--output-format", "json",
438
+ "--approval-mode", "yolo",
439
+ "--allowed-mcp-server-names", "__none__",
440
+ ]
441
+ if model:
442
+ cmd += ["-m", model]
443
+
444
+ empty_stats = {"input_tokens": 0, "output_tokens": 0, "cached_tokens": 0}
445
+
446
+ try:
447
+ proc = subprocess.Popen(
448
+ cmd,
449
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
450
+ stdin=subprocess.DEVNULL,
451
+ text=True,
452
+ cwd=cwd,
453
+ **_popen_kwargs(),
454
+ )
455
+ stdout, stderr, status = _communicate_with_heartbeat(
456
+ proc, timeout=timeout, idle_timeout=idle_timeout,
457
+ )
458
+ if status == "idle_timeout":
459
+ return (f"[gemini:idle_timeout] No stderr activity for {idle_timeout}s "
460
+ f"(likely MCP startup hang)"), False, empty_stats
461
+ if status == "timeout":
462
+ return f"[gemini:timeout] No response after {timeout}s", False, empty_stats
463
+
464
+ if proc.returncode != 0:
465
+ err = stderr.strip() if stderr else f"exit code {proc.returncode}"
466
+ return f"[gemini:error] {err}", False, empty_stats
467
+
468
+ # Parse JSON output — strip non-JSON prefix lines (MCP startup messages)
469
+ raw = stdout.strip()
470
+ json_start = raw.find("{")
471
+ if json_start > 0:
472
+ raw = raw[json_start:]
473
+
474
+ try:
475
+ data = _json.loads(raw)
476
+ except (_json.JSONDecodeError, TypeError):
477
+ # Fallback: treat entire stdout as plain text
478
+ return raw, True, empty_stats
479
+
480
+ # Extract response text
481
+ if isinstance(data, dict):
482
+ text = data.get("response", data.get("text", data.get("result", raw)))
483
+ elif isinstance(data, list):
484
+ texts = [msg.get("text", msg.get("content", ""))
485
+ for msg in data if isinstance(msg, dict)]
486
+ text = "\n".join(t for t in texts if t)
487
+ else:
488
+ text = str(data)
489
+
490
+ # Extract token stats from stats.models.<id>.tokens
491
+ token_stats = dict(empty_stats)
492
+ if isinstance(data, dict):
493
+ stats = data.get("stats", {})
494
+ models = stats.get("models", {})
495
+ for _model_id, mdata in models.items():
496
+ tok = mdata.get("tokens", {})
497
+ token_stats["input_tokens"] += tok.get("input", 0) or 0
498
+ token_stats["output_tokens"] += tok.get("candidates", 0) or 0
499
+ token_stats["cached_tokens"] += tok.get("cached", 0) or 0
500
+
501
+ return text, True, token_stats
502
+ except FileNotFoundError:
503
+ return "[gemini:error] gemini CLI not found on PATH", False, empty_stats
504
+ except Exception as e:
505
+ return f"[gemini:error] {e}", False, empty_stats
506
+
507
+
508
+ def _is_codex_on_path() -> bool:
509
+ """Check if codex CLI binary is on PATH."""
510
+ return _which("codex") is not None
511
+
512
+
513
+ def check_codex() -> dict:
514
+ """Zero-cost health check for Codex CLI. Returns status dict."""
515
+ global _codex_available
516
+ exe = _which("codex")
517
+ if not exe:
518
+ _codex_available = False
519
+ return {"status": "not_installed", "detail": "codex CLI not found on PATH"}
520
+ try:
521
+ proc = subprocess.run(
522
+ [exe, "--version"],
523
+ capture_output=True, text=True, timeout=10,
524
+ stdin=subprocess.DEVNULL,
525
+ )
526
+ if proc.returncode == 0:
527
+ version = proc.stdout.strip()
528
+ _codex_available = True
529
+ return {"status": "ok", "version": version}
530
+ else:
531
+ _codex_available = False
532
+ return {"status": "error", "detail": proc.stderr.strip() or f"exit code {proc.returncode}"}
533
+ except subprocess.TimeoutExpired:
534
+ _codex_available = False
535
+ return {"status": "timeout", "detail": "codex --version timed out (10s)"}
536
+ except Exception as e:
537
+ _codex_available = False
538
+ return {"status": "error", "detail": str(e)}
539
+
540
+
541
+ def _run_codex(task: str, context: str, model: str, sandbox: str,
542
+ reasoning: str = "high", timeout: int = 120,
543
+ idle_timeout: int = 20,
544
+ cwd: str | None = None) -> tuple[str, bool]:
545
+ """Run codex exec as a subprocess. Returns (output, success).
546
+
547
+ Uses heartbeat monitor: kills process if no stderr activity for idle_timeout
548
+ seconds (catches MCP startup hangs). Also enforces total timeout.
549
+ """
550
+ prompt = f"{task}\n\nContext:\n{context}" if context else task
551
+ codex_exe = _which("codex") or "codex"
552
+ cmd = [
553
+ codex_exe, "exec",
554
+ "-m", model,
555
+ "--config", f"model_reasoning_effort={reasoning}",
556
+ "--sandbox", sandbox,
557
+ "--full-auto",
558
+ "--skip-git-repo-check",
559
+ prompt,
560
+ ]
561
+ try:
562
+ proc = subprocess.Popen(
563
+ cmd,
564
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
565
+ stdin=subprocess.DEVNULL,
566
+ text=True,
567
+ cwd=cwd,
568
+ **_popen_kwargs(),
569
+ )
570
+ stdout, stderr, status = _communicate_with_heartbeat(
571
+ proc, timeout=timeout, idle_timeout=idle_timeout,
572
+ )
573
+ if status == "idle_timeout":
574
+ return (f"[codex:idle_timeout] No stderr activity for {idle_timeout}s "
575
+ f"(likely MCP startup hang)"), False
576
+ if status == "timeout":
577
+ return f"[codex:timeout] No response after {timeout}s", False
578
+
579
+ if proc.returncode != 0:
580
+ err = stderr.strip() if stderr else f"exit code {proc.returncode}"
581
+ return f"[codex:error] {err}", False
582
+
583
+ return stdout.strip(), True
584
+ except FileNotFoundError:
585
+ return "[codex:error] codex CLI not found on PATH", False
586
+ except Exception as e:
587
+ return f"[codex:error] {e}", False
588
+
589
+
590
+ def _run_codex_resume(follow_up: str, timeout: int = 120,
591
+ cwd: str | None = None) -> tuple[str, bool]:
592
+ """Resume last Codex session with a follow-up prompt."""
593
+ cmd = ["codex", "exec", "--skip-git-repo-check", "resume", "--last"]
594
+ try:
595
+ import sys
596
+ proc = subprocess.Popen(
597
+ cmd,
598
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
599
+ stdin=subprocess.PIPE,
600
+ text=True,
601
+ cwd=cwd,
602
+ )
603
+ try:
604
+ stdout, stderr = proc.communicate(input=follow_up, timeout=timeout)
605
+ except subprocess.TimeoutExpired:
606
+ if sys.platform == "win32":
607
+ subprocess.run(
608
+ ["taskkill", "/F", "/T", "/PID", str(proc.pid)],
609
+ capture_output=True, stdin=subprocess.DEVNULL,
610
+ )
611
+ else:
612
+ proc.kill()
613
+ proc.wait(timeout=5)
614
+ return f"[codex:timeout] Resume timed out after {timeout}s", False
615
+
616
+ if proc.returncode != 0:
617
+ err = stderr.strip() if stderr else f"exit code {proc.returncode}"
618
+ return f"[codex:error] {err}", False
619
+
620
+ return stdout.strip(), True
621
+ except Exception as e:
622
+ return f"[codex:error] {e}", False
623
+
624
+
625
+ # Delegate task definitions
626
+ DELEGATE_TASKS = {
627
+ "summarize": {
628
+ "default_model": "gemma3n:latest",
629
+ "system": "You are a concise technical summarizer. Keep the answer compact and concrete.",
630
+ "prompt_template": "Context:\n{context}\n\nTask:\n{task}\n\nReturn a compact summary with only the key points.",
631
+ "temperature": 0.2,
632
+ },
633
+ "explain": {
634
+ "default_model": "llama3.2:3b",
635
+ "system": "You explain code precisely and concisely. Prefer short bullet points and specific references.",
636
+ "prompt_template": "Context:\n{context}\n\nQuestion:\n{task}\n\nExplain only what is needed to answer the question.",
637
+ "temperature": 0.2,
638
+ },
639
+ "docstring": {
640
+ "default_model": "gemma3n:latest",
641
+ "system": "Write terse, accurate code documentation.",
642
+ "prompt_template": "Context:\n{context}\n\nTask:\n{task}\n\nProduce a concise docstring or documentation snippet.",
643
+ "temperature": 0.2,
644
+ },
645
+ "review": {
646
+ "default_model": "llama3.2:3b",
647
+ "system": "You are a pragmatic code reviewer. Prioritize bugs, regressions, and missing tests.",
648
+ "prompt_template": "Context:\n{context}\n\nReview task:\n{task}\n\nReturn the most important findings first.",
649
+ "temperature": 0.2,
650
+ },
651
+ "ask": {
652
+ "default_model": "deepseek-r1:1.5b",
653
+ "system": "Answer narrowly and directly from the provided context.",
654
+ "prompt_template": "Context:\n{context}\n\nQuestion:\n{task}\n\nAnswer concisely.",
655
+ "temperature": 0.2,
656
+ },
657
+ "test": {
658
+ "default_model": "llama3.2:3b",
659
+ "system": "Design targeted tests that maximize defect coverage with minimal redundancy.",
660
+ "prompt_template": "Context:\n{context}\n\nTask:\n{task}\n\nProduce focused test ideas or test code.",
661
+ "temperature": 0.2,
662
+ },
663
+ "diagnose": {
664
+ "default_model": "llama3.2:3b",
665
+ "system": "You diagnose failures from logs and execution context. Focus on root cause and next step.",
666
+ "prompt_template": "Context:\n{context}\n\nProblem:\n{task}\n\nIdentify the most likely cause and the next debugging step.",
667
+ "temperature": 0.1,
668
+ },
669
+ "improve": {
670
+ "default_model": "llama3.2:3b",
671
+ "system": "You improve code with minimal, high-value changes.",
672
+ "prompt_template": "Context:\n{context}\n\nTask:\n{task}\n\nSuggest the smallest useful improvement plan.",
673
+ "temperature": 0.2,
674
+ },
675
+ }
676
+
677
+ # Module-level cache and metrics
678
+ _delegate_cache: dict[str, tuple[str, int]] = {}
679
+ _delegate_metrics = {"total_calls": 0, "tokens_saved": 0}
680
+
681
+
682
+ def get_delegate_metrics() -> dict:
683
+ return dict(_delegate_metrics)
684
+
685
+
686
+ def infer_task_type(task: str, context: str = "") -> str:
687
+ text = f"{task}\n{context}".lower()
688
+ if any(tok in text for tok in ("traceback", "exception", "stack trace", "exit code", "failed", "error")):
689
+ return "diagnose"
690
+ if any(tok in text for tok in ("review", "regression", "bug risk", "audit")):
691
+ return "review"
692
+ if any(tok in text for tok in ("test", "pytest", "unit test", "integration test")):
693
+ return "test"
694
+ if any(tok in text for tok in ("docstring", "document", "documentation")):
695
+ return "docstring"
696
+ if any(tok in text for tok in ("summarize", "summary", "tl;dr")):
697
+ return "summarize"
698
+ if any(tok in text for tok in ("improve", "refactor", "clean up", "optimize")):
699
+ return "improve"
700
+ return "explain"
701
+
702
+
703
+ def resolve_model_name(candidate: str, available: list[str]) -> str:
704
+ if not candidate:
705
+ return ""
706
+ normalized = candidate.strip().lower()
707
+ if not normalized:
708
+ return ""
709
+ for model in available:
710
+ if model.lower() == normalized:
711
+ return model
712
+ base = normalized.split(":", 1)[0]
713
+ for model in available:
714
+ lower = model.lower()
715
+ if lower == base or lower.startswith(base + ":"):
716
+ return model
717
+ for model in available:
718
+ if base in model.lower():
719
+ return model
720
+ return ""
721
+
722
+
723
+ def _fallback_model_order(task_type: str) -> list[str]:
724
+ if task_type in {"ask", "diagnose", "explain"}:
725
+ return ["llama3.2:latest", "llama3.2:3b", "qwen3-coder-next:latest", "llama3.1:latest", "gemma3n:latest"]
726
+ return ["llama3.2:latest", "llama3.2:3b", "qwen3-coder-next:latest", "gemma3n:latest"]
727
+
728
+
729
+ def _estimate_confidence(task_type: str, response: str, response_tokens: int) -> str:
730
+ hedging = [
731
+ "i'm not sure", "i don't know", "it's unclear", "might be",
732
+ "possibly", "i cannot determine", "hard to say", "not enough context",
733
+ ]
734
+ hedge_count = sum(1 for phrase in hedging if phrase in (response or "").lower())
735
+ min_tokens = {"summarize": 15, "explain": 30, "docstring": 10, "review": 20,
736
+ "ask": 10, "test": 30, "diagnose": 20, "improve": 10}
737
+ too_short = response_tokens < min_tokens.get(task_type, 10)
738
+ if too_short or hedge_count >= 2:
739
+ return "low"
740
+ if hedge_count == 1 or response_tokens < min_tokens.get(task_type, 10) * 2:
741
+ return "medium"
742
+ return "high"
743
+
744
+
745
+ # ---------------------------------------------------------------------------
746
+ # Codex delegate handler
747
+ # ---------------------------------------------------------------------------
748
+
749
+ def _handle_codex_delegate(task: str, task_type: str, context: str,
750
+ file_path: str, svc, dcfg: dict, finalize) -> str:
751
+ """Handle delegation via Codex CLI."""
752
+ if not dcfg.get("codex_enabled", False):
753
+ return finalize("c3_delegate", {"task_type": task_type, "backend": "codex"},
754
+ "[delegate:error] Codex not enabled. Set delegate.codex_enabled=true in .c3/config.json",
755
+ "disabled")
756
+
757
+ global _codex_available
758
+ if _codex_available is None:
759
+ check_codex() # populates _codex_available
760
+ if not _codex_available:
761
+ return finalize("c3_delegate", {"task_type": task_type, "backend": "codex"},
762
+ "[delegate:error] Codex CLI not available. Run 'codex --version' to diagnose.",
763
+ "unavailable")
764
+
765
+ # Resolve model/sandbox/reasoning from config or defaults
766
+ cdef = CODEX_MODELS.get(task_type, CODEX_MODELS.get("ask", {}))
767
+ model = dcfg.get("codex_default_model") or cdef.get("model", "gpt-5.3-codex-spark")
768
+ sandbox = dcfg.get("codex_default_sandbox") or cdef.get("sandbox", "read-only")
769
+ reasoning = dcfg.get("codex_reasoning_effort") or cdef.get("reasoning", "high")
770
+ timeout = int(dcfg.get("codex_timeout", 120))
771
+
772
+ # Context enrichment (reuse existing pattern)
773
+ enriched = context
774
+ if file_path and dcfg.get("auto_compress", True):
775
+ for p in [p.strip() for p in file_path.split(",") if p.strip()]:
776
+ try:
777
+ res = svc.compressor.compress_file(str(Path(svc.project_path) / p), "smart")
778
+ if isinstance(res, dict) and res.get("compressed"):
779
+ enriched += f"\n--- file: {p} ---\n{res['compressed']}"
780
+ except Exception:
781
+ continue
782
+
783
+ # Truncate context to avoid blowing Codex's input
784
+ max_ctx = max(200, int(dcfg.get("codex_max_context_tokens", 4000) or 4000))
785
+ if count_tokens(enriched) > max_ctx:
786
+ enriched = enriched[:max_ctx * 4]
787
+
788
+ # Cache check
789
+ ckey = hashlib.md5(f"codex|{task_type}|{model}|{enriched}|{task}".encode()).hexdigest()
790
+ if ckey in _delegate_cache:
791
+ cached_resp, _ = _delegate_cache[ckey]
792
+ return finalize("c3_delegate", {"task_type": task_type, "backend": "codex", "cached": True},
793
+ cached_resp, "cached")
794
+
795
+ # Run Codex
796
+ _log_progress(svc, f"[delegate] Codex {model} ({sandbox}, reasoning={reasoning})...")
797
+ t0 = time.monotonic()
798
+ output, ok = _run_codex(
799
+ task=task, context=enriched,
800
+ model=model, sandbox=sandbox,
801
+ reasoning=reasoning, timeout=timeout,
802
+ cwd=str(svc.project_path),
803
+ )
804
+ elapsed = round(time.monotonic() - t0, 1)
805
+
806
+ if not ok:
807
+ return finalize("c3_delegate",
808
+ {"task_type": task_type, "backend": "codex", "model": model, "elapsed": f"{elapsed}s"},
809
+ output, "error")
810
+
811
+ _delegate_metrics["total_calls"] += 1
812
+ _delegate_cache[ckey] = (output, count_tokens(output))
813
+
814
+ # Memory bridge — auto-extract key findings from substantial Codex responses
815
+ _codex_memory_bridge(output, task_type, task, svc)
816
+
817
+ return finalize("c3_delegate",
818
+ {"task_type": task_type, "backend": "codex", "model": model, "elapsed": f"{elapsed}s"},
819
+ output, "ok")
820
+
821
+
822
+ def _codex_memory_bridge(output: str, task_type: str, task: str, svc):
823
+ """Auto-extract key findings from Codex responses into c3_memory.
824
+
825
+ Only stores when the response is substantial and actionable.
826
+ """
827
+ try:
828
+ memory = getattr(svc, "memory", None)
829
+ if not memory:
830
+ return
831
+ dcfg = svc.delegate_config or {}
832
+ if not dcfg.get("codex_memory_bridge", True):
833
+ return
834
+
835
+ # Only bridge substantial responses (not trivial or error)
836
+ tokens = count_tokens(output)
837
+ if tokens < 50 or tokens > 3000:
838
+ return # too short = trivial, too long = dump
839
+
840
+ # Skip benign responses
841
+ lower = output.lower()
842
+ benign = ("no issues", "looks good", "no problems", "lgtm", "all good",
843
+ "no regressions", "no bugs")
844
+ if any(b in lower for b in benign):
845
+ return
846
+
847
+ # Build a concise fact from the Codex output
848
+ # Truncate to keep facts digestible
849
+ summary = output[:400].strip()
850
+ if len(output) > 400:
851
+ summary += "..."
852
+
853
+ fact = f"[codex:{task_type}] {task[:80]} — {summary}"
854
+ memory.remember(fact, category=f"codex_{task_type}")
855
+ log.debug("codex_memory_bridge: stored fact for task_type=%s", task_type)
856
+ except Exception:
857
+ pass # never break delegation for memory
858
+
859
+
860
+ # ---------------------------------------------------------------------------
861
+ # Gemini delegate handler
862
+ # ---------------------------------------------------------------------------
863
+
864
+ def _handle_gemini_delegate(task: str, task_type: str, context: str,
865
+ file_path: str, svc, dcfg: dict, finalize) -> str:
866
+ """Handle delegation via Gemini CLI."""
867
+ if not dcfg.get("gemini_enabled", False):
868
+ return finalize("c3_delegate", {"task_type": task_type, "backend": "gemini"},
869
+ "[delegate:error] Gemini not enabled. Set delegate.gemini_enabled=true in .c3/config.json",
870
+ "disabled")
871
+
872
+ global _gemini_available
873
+ if _gemini_available is None:
874
+ check_gemini()
875
+ if not _gemini_available:
876
+ return finalize("c3_delegate", {"task_type": task_type, "backend": "gemini"},
877
+ "[delegate:error] Gemini CLI not available. Run 'gemini --version' to diagnose.",
878
+ "unavailable")
879
+
880
+ # Resolve model from config or defaults
881
+ gdef = GEMINI_MODELS.get(task_type, GEMINI_MODELS.get("ask", {}))
882
+ model = dcfg.get("gemini_default_model") or gdef.get("model", "gemini-2.5-flash")
883
+ timeout = int(dcfg.get("gemini_timeout", 120))
884
+
885
+ # Context enrichment (reuse existing pattern)
886
+ enriched = context
887
+ if file_path and dcfg.get("auto_compress", True):
888
+ for p in [p.strip() for p in file_path.split(",") if p.strip()]:
889
+ try:
890
+ res = svc.compressor.compress_file(str(Path(svc.project_path) / p), "smart")
891
+ if isinstance(res, dict) and res.get("compressed"):
892
+ enriched += f"\n--- file: {p} ---\n{res['compressed']}"
893
+ except Exception:
894
+ continue
895
+
896
+ # Truncate context
897
+ max_ctx = max(200, int(dcfg.get("gemini_max_context_tokens", 8000) or 8000))
898
+ if count_tokens(enriched) > max_ctx:
899
+ enriched = enriched[:max_ctx * 4]
900
+
901
+ # Cache check
902
+ ckey = hashlib.md5(f"gemini|{task_type}|{model}|{enriched}|{task}".encode()).hexdigest()
903
+ if ckey in _delegate_cache:
904
+ cached_resp, _ = _delegate_cache[ckey]
905
+ return finalize("c3_delegate", {"task_type": task_type, "backend": "gemini", "cached": True},
906
+ cached_resp, "cached")
907
+
908
+ # Run Gemini
909
+ _log_progress(svc, f"[delegate] Gemini {model}...")
910
+ t0 = time.monotonic()
911
+ output, ok, token_stats = _run_gemini(
912
+ task=task, context=enriched,
913
+ model=model, timeout=timeout,
914
+ cwd=str(svc.project_path),
915
+ )
916
+ elapsed = round(time.monotonic() - t0, 1)
917
+
918
+ if not ok:
919
+ return finalize("c3_delegate",
920
+ {"task_type": task_type, "backend": "gemini", "model": model, "elapsed": f"{elapsed}s"},
921
+ output, "error")
922
+
923
+ _delegate_metrics["total_calls"] += 1
924
+ _delegate_cache[ckey] = (output, count_tokens(output))
925
+
926
+ # Memory bridge
927
+ _gemini_memory_bridge(output, task_type, task, svc)
928
+
929
+ return finalize("c3_delegate",
930
+ {"task_type": task_type, "backend": "gemini", "model": model,
931
+ "elapsed": f"{elapsed}s", **token_stats},
932
+ output, "ok")
933
+
934
+
935
+ def _gemini_memory_bridge(output: str, task_type: str, task: str, svc):
936
+ """Auto-extract key findings from Gemini responses into c3_memory."""
937
+ try:
938
+ memory = getattr(svc, "memory", None)
939
+ if not memory:
940
+ return
941
+ dcfg = svc.delegate_config or {}
942
+ if not dcfg.get("gemini_memory_bridge", True):
943
+ return
944
+
945
+ tokens = count_tokens(output)
946
+ if tokens < 50 or tokens > 3000:
947
+ return
948
+
949
+ lower = output.lower()
950
+ benign = ("no issues", "looks good", "no problems", "lgtm", "all good",
951
+ "no regressions", "no bugs")
952
+ if any(b in lower for b in benign):
953
+ return
954
+
955
+ summary = output[:400].strip()
956
+ if len(output) > 400:
957
+ summary += "..."
958
+
959
+ fact = f"[gemini:{task_type}] {task[:80]} -- {summary}"
960
+ memory.remember(fact, category=f"gemini_{task_type}")
961
+ log.debug("gemini_memory_bridge: stored fact for task_type=%s", task_type)
962
+ except Exception:
963
+ pass
964
+
965
+
966
+ def handle_delegate(task: str, task_type: str, context: str, file_path: str,
967
+ svc, finalize, backend: str = "ollama") -> str:
968
+ dcfg = svc.delegate_config or {}
969
+ if not dcfg.get("enabled", True):
970
+ return "[delegate:disabled]"
971
+
972
+ # --- Health checks -----------------------------------------------------
973
+ if task_type == "available":
974
+ # Parallel health check across all backends
975
+ from concurrent.futures import ThreadPoolExecutor, as_completed
976
+ results = {}
977
+
978
+ def _check_ollama():
979
+ ollama = svc.ollama_client
980
+ if not ollama:
981
+ return "ollama", "down", "client=None", []
982
+ up = ollama.is_available()
983
+ models = ollama.list_models() if up else []
984
+ return "ollama", "up" if up else "down", "", models or []
985
+
986
+ def _check_codex():
987
+ info = check_codex()
988
+ s = info.get("status", "unknown")
989
+ d = info.get("version") or info.get("detail", "")
990
+ return "codex", s, d, []
991
+
992
+ def _check_gemini():
993
+ info = check_gemini()
994
+ s = info.get("status", "unknown")
995
+ d = info.get("version") or info.get("detail", "")
996
+ return "gemini", s, d, []
997
+
998
+ def _check_claude():
999
+ info = check_claude()
1000
+ s = info.get("status", "unknown")
1001
+ d = info.get("version") or info.get("detail", "")
1002
+ return "claude", s, d, []
1003
+
1004
+ with ThreadPoolExecutor(max_workers=4) as pool:
1005
+ futs = [pool.submit(fn) for fn in [_check_ollama, _check_codex, _check_gemini, _check_claude]]
1006
+ for fut in as_completed(futs):
1007
+ name, status, detail, models = fut.result()
1008
+ results[name] = (status, detail, models)
1009
+
1010
+ lines = []
1011
+ for name in ("ollama", "codex", "gemini", "claude"):
1012
+ status, detail, models = results.get(name, ("unknown", "", []))
1013
+ line = f" {name}={status}"
1014
+ if detail:
1015
+ line += f" {detail}"
1016
+ if models:
1017
+ line += f" models={len(models)} [{', '.join(models[:5])}]"
1018
+ lines.append(line)
1019
+
1020
+ summary_statuses = [results.get(n, ("unknown",))[0] for n in ("ollama", "codex", "gemini", "claude")]
1021
+ up_count = sum(1 for s in summary_statuses if s in ("up", "ok"))
1022
+ return finalize("c3_delegate", {"task_type": "available"},
1023
+ f"[delegate:available] {up_count}/4 backends up\n" + "\n".join(lines),
1024
+ f"{up_count}/4 up")
1025
+
1026
+ if task_type == "codex_check":
1027
+ info = check_codex()
1028
+ status = info.get("status", "unknown")
1029
+ detail = info.get("version") or info.get("detail", "")
1030
+ return finalize("c3_delegate", {"task_type": "codex_check"},
1031
+ f"[delegate:codex_check] status={status} {detail}".strip(),
1032
+ status)
1033
+
1034
+ if task_type == "codex_resume":
1035
+ if not dcfg.get("codex_enabled", False):
1036
+ return finalize("c3_delegate", {"task_type": "codex_resume"},
1037
+ "[delegate:error] Codex not enabled in config", "disabled")
1038
+ timeout = int(dcfg.get("codex_timeout", 120))
1039
+ output, ok = _run_codex_resume(task, timeout=timeout,
1040
+ cwd=str(svc.project_path))
1041
+ return finalize("c3_delegate", {"task_type": "codex_resume"},
1042
+ output, "ok" if ok else "error")
1043
+
1044
+ if task_type == "gemini_check":
1045
+ info = check_gemini()
1046
+ status = info.get("status", "unknown")
1047
+ detail = info.get("version") or info.get("detail", "")
1048
+ return finalize("c3_delegate", {"task_type": "gemini_check"},
1049
+ f"[delegate:gemini_check] status={status} {detail}".strip(),
1050
+ status)
1051
+
1052
+ # --- Backend routing ---------------------------------------------------
1053
+ if backend == "auto":
1054
+ # Priority: Codex > Gemini > Ollama for heavy tasks
1055
+ heavy_codex = set(dcfg.get("codex_task_types", ["review", "diagnose", "improve", "test"]))
1056
+ heavy_gemini = set(dcfg.get("gemini_task_types", ["review", "diagnose", "improve", "test"]))
1057
+ # For heavy tasks, prefer cloud CLIs when available (faster than Ollama).
1058
+ # "Available" = pre-warm health check passed OR found on PATH.
1059
+ # The `enabled` config flag remains the primary gate, but availability
1060
+ # on-PATH is enough to prefer cloud over slow Ollama for heavy tasks.
1061
+ _light_tasks = {"ask", "explain", "summarize", "docstring"}
1062
+ _codex_avail = (_codex_available is True) or (
1063
+ _codex_available is None and task_type not in _light_tasks and _is_codex_on_path()
1064
+ )
1065
+ _gemini_avail = (_gemini_available is True) or (
1066
+ _gemini_available is None and task_type not in _light_tasks and _is_gemini_on_path()
1067
+ )
1068
+ if task_type in heavy_codex and _codex_avail and _codex_available is not False:
1069
+ backend = "codex"
1070
+ elif task_type in heavy_gemini and _gemini_avail and _gemini_available is not False:
1071
+ backend = "gemini"
1072
+ else:
1073
+ backend = "ollama"
1074
+
1075
+ if backend == "codex":
1076
+ _log_progress(svc, f"[delegate] Routing {task_type} → Codex...")
1077
+ return _handle_codex_delegate(task, task_type, context, file_path, svc, dcfg, finalize)
1078
+
1079
+ if backend == "gemini":
1080
+ _log_progress(svc, f"[delegate] Routing {task_type} → Gemini...")
1081
+ return _handle_gemini_delegate(task, task_type, context, file_path, svc, dcfg, finalize)
1082
+
1083
+ if backend == "claude":
1084
+ return _handle_claude_delegate(task, task_type, context, file_path, svc, dcfg, finalize)
1085
+
1086
+ # --- Original Ollama path (backend="ollama") ---------------------------
1087
+
1088
+ if task_type == "auto":
1089
+ task_type = infer_task_type(task, context)
1090
+
1091
+ tdef = DELEGATE_TASKS.get(task_type)
1092
+ if not tdef:
1093
+ return f"[delegate:error] Unknown type: {task_type}"
1094
+ ollama = svc.ollama_client
1095
+ if not ollama or not ollama.is_available():
1096
+ return "[delegate:error] Ollama unavailable. Requires Ollama for local LLM tasks."
1097
+
1098
+ # Context enrichment
1099
+ enriched = context
1100
+ if file_path and dcfg.get("auto_compress", True):
1101
+ for p in [p.strip() for p in file_path.split(",") if p.strip()]:
1102
+ try:
1103
+ res = svc.compressor.compress_file(str(Path(svc.project_path) / p), "smart")
1104
+ if isinstance(res, dict) and res.get("compressed"):
1105
+ enriched += f"\n--- file: {p} ---\n{res['compressed']}"
1106
+ except Exception:
1107
+ continue
1108
+
1109
+ if task_type == "diagnose" and dcfg.get("auto_activity_log", True):
1110
+ recent = svc.activity_log.get_recent(limit=8)
1111
+ if recent:
1112
+ enriched += "\nRecent Activity:\n" + "\n".join(
1113
+ [f"[{e.get('timestamp','').split('T')[-1][:8]}] {e.get('tool','')}..."
1114
+ for e in reversed(recent)])
1115
+
1116
+ max_context_tokens = max(200, int(dcfg.get("max_context_tokens", 1400) or 1400))
1117
+ if count_tokens(enriched) > max_context_tokens:
1118
+ enriched = enriched[:max_context_tokens * 4]
1119
+
1120
+ # Model resolution
1121
+ req_model = dcfg.get(f"{task_type}_model") or dcfg.get("preferred_model") or tdef["default_model"]
1122
+ avail = ollama.list_models() or []
1123
+ model = resolve_model_name(req_model, avail)
1124
+ if not model:
1125
+ for cand in _fallback_model_order(task_type) + avail:
1126
+ model = resolve_model_name(cand, avail)
1127
+ if model:
1128
+ break
1129
+ if not model:
1130
+ return "[delegate:error] No compatible local model found"
1131
+
1132
+ # Cache check
1133
+ ckey = hashlib.md5(f"{task_type}|{model}|{enriched}|{task}".encode()).hexdigest()
1134
+ if ckey in _delegate_cache:
1135
+ cached_resp, _ = _delegate_cache[ckey]
1136
+ return finalize("c3_delegate", {"task_type": task_type, "cached": True},
1137
+ cached_resp, "cached")
1138
+
1139
+ # Generate
1140
+ _log_progress(svc, f"[delegate] Running Ollama ({model})...")
1141
+ timeout_s = int(dcfg.get("timeout", 90) or 90)
1142
+ _t0 = time.monotonic()
1143
+ resp = ollama.generate(
1144
+ prompt=tdef["prompt_template"].format(context=enriched, task=task),
1145
+ model=model, system=tdef["system"],
1146
+ temperature=tdef.get("temperature", 0.3),
1147
+ max_tokens=int(dcfg.get("max_tokens", 512) or 512),
1148
+ timeout=timeout_s)
1149
+ _elapsed = round(time.monotonic() - _t0, 1)
1150
+ if resp is None:
1151
+ return finalize("c3_delegate", {"task_type": task_type, "model": model},
1152
+ f"[delegate:timeout] No response from {model} after {_elapsed}s "
1153
+ f"(limit {timeout_s}s)", "timeout")
1154
+
1155
+ # Self-correction: retry with fallback model on low confidence
1156
+ conf = _estimate_confidence(task_type, resp, count_tokens(resp))
1157
+ if conf == "low" and dcfg.get("allow_model_fallback", True):
1158
+ tried = {model}
1159
+ for fallback_cand in _fallback_model_order(task_type) + avail:
1160
+ fallback = resolve_model_name(fallback_cand, avail)
1161
+ if not fallback or fallback in tried:
1162
+ continue
1163
+ tried.add(fallback)
1164
+ retry_resp = ollama.generate(
1165
+ prompt=tdef["prompt_template"].format(context=enriched, task=task),
1166
+ model=fallback, system=tdef["system"],
1167
+ temperature=tdef.get("temperature", 0.3),
1168
+ max_tokens=int(dcfg.get("max_tokens", 512) or 512),
1169
+ )
1170
+ retry_conf = _estimate_confidence(task_type, retry_resp, count_tokens(retry_resp))
1171
+ if retry_conf != "low":
1172
+ resp = retry_resp
1173
+ conf = retry_conf
1174
+ model = fallback
1175
+ break
1176
+ if retry_conf == "low" and count_tokens(retry_resp) > count_tokens(resp):
1177
+ resp = retry_resp
1178
+ model = fallback
1179
+ conf = "medium"
1180
+
1181
+ _delegate_metrics["total_calls"] += 1
1182
+ _delegate_cache[ckey] = (resp, count_tokens(resp))
1183
+ return finalize("c3_delegate", {"task": task_type, "model": model, "elapsed": f"{_elapsed}s"},
1184
+ resp, conf)