code-context-control 2.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. cli/__init__.py +1 -0
  2. cli/_hook_utils.py +99 -0
  3. cli/c3.py +6152 -0
  4. cli/commands/__init__.py +1 -0
  5. cli/commands/common.py +312 -0
  6. cli/commands/parser.py +286 -0
  7. cli/docs.html +3178 -0
  8. cli/edits.html +878 -0
  9. cli/hook_auto_snapshot.py +142 -0
  10. cli/hook_c3_signal.py +61 -0
  11. cli/hook_c3read.py +116 -0
  12. cli/hook_edit_ledger.py +213 -0
  13. cli/hook_edit_unlock.py +170 -0
  14. cli/hook_filter.py +130 -0
  15. cli/hook_ghost_files.py +238 -0
  16. cli/hook_pretool_enforce.py +334 -0
  17. cli/hook_read.py +200 -0
  18. cli/hook_session_stats.py +62 -0
  19. cli/hook_terse_advisor.py +190 -0
  20. cli/hub.html +3764 -0
  21. cli/hub_server.py +1619 -0
  22. cli/mcp_proxy.py +428 -0
  23. cli/mcp_server.py +660 -0
  24. cli/server.py +2985 -0
  25. cli/tools/__init__.py +4 -0
  26. cli/tools/_helpers.py +65 -0
  27. cli/tools/agent.py +1165 -0
  28. cli/tools/compress.py +215 -0
  29. cli/tools/delegate.py +1184 -0
  30. cli/tools/edit.py +313 -0
  31. cli/tools/edits.py +118 -0
  32. cli/tools/filter.py +285 -0
  33. cli/tools/impact.py +163 -0
  34. cli/tools/memory.py +469 -0
  35. cli/tools/read.py +224 -0
  36. cli/tools/search.py +337 -0
  37. cli/tools/session.py +95 -0
  38. cli/tools/shell.py +193 -0
  39. cli/tools/status.py +306 -0
  40. cli/tools/validate.py +310 -0
  41. cli/ui/api.js +36 -0
  42. cli/ui/app.js +207 -0
  43. cli/ui/components/chat.js +758 -0
  44. cli/ui/components/dashboard.js +689 -0
  45. cli/ui/components/edits.js +220 -0
  46. cli/ui/components/instructions.js +481 -0
  47. cli/ui/components/memory.js +626 -0
  48. cli/ui/components/sessions.js +606 -0
  49. cli/ui/components/settings.js +1404 -0
  50. cli/ui/components/sidebar.js +156 -0
  51. cli/ui/icons.js +51 -0
  52. cli/ui/shared.js +119 -0
  53. cli/ui/theme.js +22 -0
  54. cli/ui.html +168 -0
  55. cli/ui_legacy.html +6797 -0
  56. cli/ui_nano.html +503 -0
  57. code_context_control-2.28.0.dist-info/METADATA +248 -0
  58. code_context_control-2.28.0.dist-info/RECORD +150 -0
  59. code_context_control-2.28.0.dist-info/WHEEL +5 -0
  60. code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
  61. code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
  62. code_context_control-2.28.0.dist-info/top_level.txt +5 -0
  63. core/__init__.py +75 -0
  64. core/config.py +269 -0
  65. core/ide.py +188 -0
  66. oracle/__init__.py +1 -0
  67. oracle/config.py +75 -0
  68. oracle/oracle.html +3900 -0
  69. oracle/oracle_server.py +663 -0
  70. oracle/services/__init__.py +1 -0
  71. oracle/services/c3_bridge.py +210 -0
  72. oracle/services/chat_engine.py +1103 -0
  73. oracle/services/chat_store.py +155 -0
  74. oracle/services/cross_memory.py +154 -0
  75. oracle/services/federated_graph.py +463 -0
  76. oracle/services/health_checker.py +117 -0
  77. oracle/services/insight_engine.py +307 -0
  78. oracle/services/memory_reader.py +106 -0
  79. oracle/services/memory_writer.py +182 -0
  80. oracle/services/ollama_bridge.py +332 -0
  81. oracle/services/project_scanner.py +87 -0
  82. oracle/services/review_agent.py +206 -0
  83. services/__init__.py +1 -0
  84. services/activity_log.py +93 -0
  85. services/agent_base.py +124 -0
  86. services/agents.py +1529 -0
  87. services/auto_memory.py +407 -0
  88. services/bench/__init__.py +6 -0
  89. services/bench/external/__init__.py +29 -0
  90. services/bench/external/aider_polyglot.py +405 -0
  91. services/bench/external/swe_bench.py +485 -0
  92. services/benchmark_dashboard.py +596 -0
  93. services/claude_md.py +785 -0
  94. services/compressor.py +592 -0
  95. services/context_snapshot.py +356 -0
  96. services/conversation_store.py +870 -0
  97. services/doc_index.py +537 -0
  98. services/e2e_benchmark.py +2884 -0
  99. services/e2e_evaluator.py +396 -0
  100. services/e2e_tasks.py +743 -0
  101. services/edit_ledger.py +459 -0
  102. services/embedding_index.py +341 -0
  103. services/error_reporting.py +123 -0
  104. services/file_memory.py +734 -0
  105. services/hub_service.py +585 -0
  106. services/indexer.py +712 -0
  107. services/memory.py +318 -0
  108. services/memory_consolidator.py +538 -0
  109. services/memory_graph.py +382 -0
  110. services/memory_grounder.py +304 -0
  111. services/memory_scorer.py +246 -0
  112. services/metrics.py +86 -0
  113. services/notifications.py +209 -0
  114. services/ollama_client.py +201 -0
  115. services/output_filter.py +488 -0
  116. services/parser.py +1238 -0
  117. services/project_manager.py +579 -0
  118. services/protocol.py +306 -0
  119. services/proxy_state.py +152 -0
  120. services/retrieval_broker.py +129 -0
  121. services/router.py +414 -0
  122. services/runtime.py +326 -0
  123. services/session_benchmark.py +1945 -0
  124. services/session_manager.py +1026 -0
  125. services/session_preloader.py +251 -0
  126. services/text_index.py +90 -0
  127. services/tool_classifier.py +176 -0
  128. services/transcript_index.py +340 -0
  129. services/validation_cache.py +155 -0
  130. services/vector_store.py +299 -0
  131. services/version_tracker.py +271 -0
  132. services/watcher.py +192 -0
  133. tui/__init__.py +0 -0
  134. tui/backend.py +59 -0
  135. tui/main.py +145 -0
  136. tui/screens/__init__.py +1 -0
  137. tui/screens/benchmark_view.py +109 -0
  138. tui/screens/claudemd_view.py +46 -0
  139. tui/screens/compress_view.py +52 -0
  140. tui/screens/index_view.py +74 -0
  141. tui/screens/init_view.py +82 -0
  142. tui/screens/mcp_view.py +73 -0
  143. tui/screens/optimize_view.py +41 -0
  144. tui/screens/pipe_view.py +46 -0
  145. tui/screens/projects_view.py +355 -0
  146. tui/screens/search_view.py +55 -0
  147. tui/screens/session_view.py +143 -0
  148. tui/screens/stats.py +158 -0
  149. tui/screens/ui_view.py +54 -0
  150. tui/theme.tcss +335 -0
@@ -0,0 +1,1103 @@
1
+ """Chat engine for Oracle — tool-calling loop with streaming."""
2
+
3
+ import concurrent.futures
4
+ import json
5
+ import queue
6
+ import re
7
+ import threading
8
+ import time
9
+ import uuid
10
+ from pathlib import Path
11
+
12
+ # Thread-local used to hand the active agent-event sink + parent tool_id
13
+ # into worker threads running _execute_tool, so nested sub-agent loops
14
+ # (_tool_delegate_task) can emit lifecycle events back to the main chat()
15
+ # generator without threading them through every tool signature.
16
+ _agent_tls = threading.local()
17
+
18
+ from oracle.config import load_config
19
+ from oracle.services.chat_store import ChatStore
20
+ from oracle.services.cross_memory import CrossMemory
21
+ from oracle.services.health_checker import HealthChecker
22
+ from oracle.services.insight_engine import InsightEngine
23
+ from oracle.services.memory_reader import MemoryReader
24
+ from oracle.services.memory_writer import MemoryWriter
25
+ from oracle.services.ollama_bridge import OllamaBridge
26
+ from oracle.services.project_scanner import ProjectScanner
27
+
28
+ # ── Tool definitions (embedded in system prompt) ──────────
29
+
30
+ _TOOL_DEFS = """
31
+ Available tools — call ONE at a time by outputting exactly:
32
+ <tool_call>{"name": "tool_name", "args": {…}}</tool_call>
33
+
34
+ After you see the result, continue your response using the data.
35
+ Do NOT call a tool if you can answer from context or prior results.
36
+
37
+ Tools:
38
+ 1. list_projects()
39
+ Returns all registered C3 projects with fact counts and paths.
40
+
41
+ 2. query_memory(project_path, query?, category?, limit=10)
42
+ Search or list memory facts from a specific project.
43
+ - project_path (required): full path to the project
44
+ - query (optional): keyword filter on fact text
45
+ - category (optional): filter by category
46
+ - limit: max results (default 10)
47
+
48
+ 3. search_facts(query, limit=20)
49
+ Search facts across ALL projects. Returns matches with project source.
50
+
51
+ 4. project_health(project_path)
52
+ Run a health check on a project's memory. Returns status, issues, stats.
53
+
54
+ 5. analyze_project(project_path)
55
+ Deep LLM-powered analysis of a project's memory patterns and themes.
56
+
57
+ 6. cross_insights(project_path?)
58
+ Get cross-project insights. If project_path given, filter to that project.
59
+
60
+ 7. suggest_action(project_path, action, fact_ids, reason)
61
+ Create a pending suggestion (merge_facts, archive_facts, or add_fact).
62
+ - action: "merge_facts" | "archive_facts" | "add_fact"
63
+ - fact_ids: list of fact IDs involved
64
+ - reason: explanation string
65
+
66
+ 8. read_graph(project_path)
67
+ Get memory graph statistics for a project (nodes, edges, types).
68
+
69
+ --- C3 Code Intelligence Tools (require project_path) ---
70
+
71
+ 9. c3_search(project_path, query, action="code", top_k=3, max_tokens=1200)
72
+ Code intelligence search within a project.
73
+ action: code|exact|files|semantic|transcript
74
+
75
+ 10. c3_read(project_path, file_path, symbols=null, lines=null)
76
+ Read file contents with optional symbol or line-range extraction.
77
+
78
+ 11. c3_edits(project_path, action="history", file="", limit=50, since="", tag="")
79
+ Query the edit ledger: history, versions, stats.
80
+
81
+ 12. c3_edits_cross(action="history", tag="", limit=20)
82
+ Query edit ledgers across ALL projects. No project_path needed.
83
+
84
+ 13. c3_memory_query(project_path, action="query", query="", category="", top_k=10)
85
+ Query project memory (read-only: recall, query, list, score, graph, trends).
86
+
87
+ 14. c3_compress(project_path, file_path, mode="map")
88
+ Token-efficient file summary. mode: map|dense_map|smart|diff|bug_scan
89
+
90
+ 15. c3_validate(project_path, file_path)
91
+ Syntax validation on a file.
92
+
93
+ 16. c3_status(project_path, view="health", detailed=false)
94
+ Project health/budget/sessions overview. view: budget|health|sessions
95
+
96
+ 17. c3_search_cross(query, action="code", top_k=3)
97
+ Search code across ALL projects. No project_path needed.
98
+
99
+ 18. delegate_task(agent_id, task)
100
+ Delegate a specific sub-task to a specialized agent.
101
+ - agent_id: The ID of the active agent to use.
102
+ - task: A detailed prompt explaining what the agent needs to do.
103
+ """
104
+
105
+ _SYSTEM_BASE = """You are Oracle, an AI assistant specializing in cross-project code intelligence and memory analysis.
106
+ You have access to memory facts, project health data, cross-project insights,
107
+ AND full C3 code intelligence (code search, file reading, edit history, validation)
108
+ for all registered C3 projects. You help developers understand patterns across
109
+ their projects, investigate code, trace edit history, and maintain healthy memory.
110
+
111
+ When the user asks about projects, code, memory, patterns, or needs analysis — use your
112
+ tools to retrieve real data before answering. Always ground responses in actual
113
+ project data.
114
+
115
+ For code-level investigation, use c3_search/c3_read/c3_compress to explore files.
116
+ For edit history, use c3_edits or c3_edits_cross to trace changes across projects.
117
+ Use list_projects first to discover project paths before calling project-specific tools.
118
+ """
119
+
120
+ _DEPTH_INSTRUCTIONS = {
121
+ "brief": "\nBe very concise. Use bullet points. Max 3 sentences per answer. Only use a tool if you truly cannot answer without it — prefer answering from context.\n",
122
+ "normal": "\nBe concise and specific. Use a tool only when the user asks about specific data you don't have in context. Limit yourself to one tool call when possible.\n",
123
+ "deep": "\nProvide thorough, detailed analysis with examples, data, and recommendations. Use multiple tool calls to gather comprehensive data when needed.\n",
124
+ }
125
+
126
+ _SYSTEM_RULES = """
127
+ Important rules:
128
+ - You can call multiple tools at once by outputting multiple `<tool_call>...</tool_call>` blocks.
129
+ - Call tools in parallel when tasks are independent.
130
+ - If the user's question can be answered from conversation context, do NOT call a tool.
131
+ - After receiving tool results, synthesize them into a clear, helpful answer.
132
+ - Format your answers with markdown for readability.
133
+ """
134
+
135
+ # ── Slash command registry ────────────────────────────────
136
+
137
+ COMMANDS = {
138
+ "project": {"args": "<name...> | clear", "desc": "Focus on specific projects"},
139
+ "model": {"args": "<model-name>", "desc": "Switch LLM model for this conversation"},
140
+ "depth": {"args": "brief | normal | deep", "desc": "Set response detail level"},
141
+ "health": {"args": "[project-name]", "desc": "Quick health check (no LLM)"},
142
+ "clear": {"args": "", "desc": "Clear conversation history"},
143
+ "help": {"args": "", "desc": "Show available commands"},
144
+ "tools": {"args": "", "desc": "List available Oracle tools"},
145
+ "team": {"args": "", "desc": "Show active agents and their specializations"},
146
+ }
147
+
148
+ _TOOL_CALL_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
149
+ _MAX_TOOL_ROUNDS = 8
150
+ # Rounds = LLM calls. One tool use needs 2 rounds (call + response synthesis).
151
+ _DEPTH_MAX_ROUNDS = {"brief": 2, "normal": 4, "deep": 8}
152
+ _MAX_HISTORY_MESSAGES = 40
153
+ _MAX_TOOL_RESULT_CHARS = 3000
154
+ _VISIBLE_RETRY_PROMPT = (
155
+ "Your previous response contained only hidden reasoning and no user-visible "
156
+ "assistant content. Now provide the visible response. If you need a tool, "
157
+ "output exactly one <tool_call>{...}</tool_call> block in assistant content; "
158
+ "otherwise answer the user directly."
159
+ )
160
+
161
+ _TC_OPEN = "<tool_call>"
162
+ _TC_CLOSE = "</tool_call>"
163
+ # If the pre-strip visible answer in round 0 is at least this many chars, we
164
+ # treat any trailing <tool_call> as speculative and do NOT regenerate. Prevents
165
+ # the "correct answer then wrong answer on next round" failure mode.
166
+ _TRUST_ANSWER_MIN_CHARS = 120
167
+
168
+
169
+ class _ToolCallStripper:
170
+ """Streaming-friendly stripper for <tool_call>...</tool_call> blocks.
171
+
172
+ Buffers chunks so partial open/close tags that straddle chunk boundaries
173
+ are never leaked to the UI. feed() returns only visible (stripped) text;
174
+ flush() emits any trailing buffer that is not inside a tool_call.
175
+ """
176
+
177
+ def __init__(self) -> None:
178
+ self._buf = ""
179
+ self._in_call = False
180
+
181
+ def feed(self, chunk: str) -> str:
182
+ self._buf += chunk
183
+ out: list[str] = []
184
+ while True:
185
+ if self._in_call:
186
+ end = self._buf.find(_TC_CLOSE)
187
+ if end == -1:
188
+ return "".join(out)
189
+ self._buf = self._buf[end + len(_TC_CLOSE):]
190
+ self._in_call = False
191
+ continue
192
+ start = self._buf.find(_TC_OPEN)
193
+ if start == -1:
194
+ hold = 0
195
+ for i in range(1, len(_TC_OPEN)):
196
+ if self._buf.endswith(_TC_OPEN[:i]):
197
+ hold = i
198
+ if hold:
199
+ out.append(self._buf[:-hold])
200
+ self._buf = self._buf[-hold:]
201
+ else:
202
+ out.append(self._buf)
203
+ self._buf = ""
204
+ return "".join(out)
205
+ out.append(self._buf[:start])
206
+ self._buf = self._buf[start + len(_TC_OPEN):]
207
+ self._in_call = True
208
+
209
+ def flush(self) -> str:
210
+ if self._in_call:
211
+ return ""
212
+ tail = self._buf
213
+ self._buf = ""
214
+ return tail
215
+
216
+
217
+ def _build_system_prompt(state: dict) -> str:
218
+ """Build system prompt dynamically based on conversation state."""
219
+ parts = [_SYSTEM_BASE]
220
+
221
+ # Active Sub-Agents (Supervisor Role)
222
+ cfg = load_config()
223
+ active_agents = [a for a in cfg.get("agents", []) if a.get("active")]
224
+ if active_agents:
225
+ parts.append("\n\nYou are the Oracle Supervisor. You lead a team of specialized agents. You can delegate specific sub-tasks to them using the `delegate_task` tool. If a task requires deep specialization, delegate it.\n**Active Agents:**\n")
226
+ for agent in active_agents:
227
+ parts.append(f"- `{agent.get('id')}`: {agent.get('description', '')}\n")
228
+
229
+ # Depth
230
+ depth = state.get("depth", "normal")
231
+ parts.append(_DEPTH_INSTRUCTIONS.get(depth, _DEPTH_INSTRUCTIONS["normal"]))
232
+
233
+ # Project focus
234
+ focused = state.get("focused_projects", [])
235
+ if focused:
236
+ names = ", ".join(f'"{p["name"]}" ({p["path"]})' for p in focused)
237
+ parts.append(
238
+ f"\nYou are currently focused on these projects: {names}.\n"
239
+ "Prefer querying these projects first. When the user says 'this project' "
240
+ "or 'my project', they mean one of the focused projects.\n"
241
+ )
242
+
243
+ parts.append(_TOOL_DEFS)
244
+ parts.append(_SYSTEM_RULES)
245
+ return "".join(parts)
246
+
247
+
248
+ class ChatEngine:
249
+ """Orchestrates chat with tool-calling loop and streaming."""
250
+
251
+ def __init__(
252
+ self,
253
+ bridge: OllamaBridge,
254
+ reader: MemoryReader,
255
+ writer: MemoryWriter,
256
+ cross_memory: CrossMemory,
257
+ health_checker: HealthChecker,
258
+ insight_engine: InsightEngine,
259
+ scanner: ProjectScanner,
260
+ store: ChatStore,
261
+ c3_bridge=None,
262
+ ):
263
+ self.bridge = bridge
264
+ self.reader = reader
265
+ self.writer = writer
266
+ self.cross_memory = cross_memory
267
+ self.health_checker = health_checker
268
+ self.insight_engine = insight_engine
269
+ self.scanner = scanner
270
+ self.store = store
271
+ self.c3_bridge = c3_bridge
272
+
273
+ # ── Main chat generator ───────────────────────────────
274
+
275
+ def chat(self, conv_id: str | None, user_message: str):
276
+ """
277
+ Generator yielding SSE event dicts:
278
+ {"type": "meta", ...}
279
+ {"type": "status", "message": ..., "detail": ...}
280
+ {"type": "text", "content": "..."}
281
+ {"type": "tool_call", "name": ..., "args": ..., "tool_id": ...}
282
+ {"type": "tool_result", "tool_id": ..., "name": ..., "result": ..., "duration_ms": ...}
283
+ {"type": "done", "conv_id": ..., "stats": ...}
284
+ {"type": "error", "message": ...}
285
+ """
286
+ turn_start = time.time()
287
+ total_tokens = 0
288
+ thinking_chars = 0
289
+ response_chars = 0
290
+ tool_calls_count = 0
291
+ ollama_stats = {}
292
+
293
+ # Ensure conversation exists
294
+ if not conv_id:
295
+ conv_id = self.store.create_conversation()
296
+
297
+ # Load conversation state (project focus, model, depth)
298
+ state = self.store.get_state(conv_id)
299
+ use_model = state.get("model") or self.bridge.model
300
+ focused = state.get("focused_projects", [])
301
+ focus_label = ", ".join(p["name"] for p in focused) if focused else "all projects"
302
+
303
+ yield {
304
+ "type": "meta", "conv_id": conv_id, "model": use_model,
305
+ "state": state,
306
+ }
307
+ yield {"type": "status", "message": "Preparing context", "detail": f"Focus: {focus_label}"}
308
+
309
+ # Save user message
310
+ self.store.append_message(conv_id, {"role": "user", "content": user_message})
311
+
312
+ # Build messages for LLM
313
+ history = self.store.get_conversation(conv_id)
314
+ llm_messages = self._build_llm_messages(history, state)
315
+ context_msgs = len(llm_messages) - 1 # exclude system prompt
316
+ yield {"type": "status", "message": "Context ready", "detail": f"{context_msgs} messages in context"}
317
+
318
+ # Tool-calling loop — depth controls max rounds
319
+ depth = state.get("depth", "normal")
320
+ max_rounds = _DEPTH_MAX_ROUNDS.get(depth, 2)
321
+ round_messages = [] # messages generated in this turn
322
+ try:
323
+ for _round in range(max_rounds):
324
+ round_label = f"Round {_round + 1}" if _round > 0 else ""
325
+ yield {"type": "status", "message": f"Streaming from {use_model}", "detail": round_label or "Generating response"}
326
+
327
+ full_text = ""
328
+ thinking_text = ""
329
+ stream_start = time.time()
330
+ chunk_count = 0
331
+ stripper = _ToolCallStripper()
332
+ try:
333
+ for item in self.bridge.stream_chat(llm_messages, model=use_model):
334
+ # Bridge yields (type, content) tuples or plain strings
335
+ if isinstance(item, tuple):
336
+ kind, chunk = item
337
+ else:
338
+ kind, chunk = "text", item
339
+ if kind == "thinking":
340
+ thinking_text += chunk
341
+ thinking_chars += len(chunk)
342
+ yield {"type": "thinking", "content": chunk}
343
+ elif kind == "stats":
344
+ # Ollama token stats from final chunk
345
+ ollama_stats = chunk
346
+ else:
347
+ full_text += chunk
348
+ visible = stripper.feed(chunk)
349
+ if visible:
350
+ response_chars += len(visible)
351
+ yield {"type": "text", "content": visible}
352
+ chunk_count += 1
353
+ tail = stripper.flush()
354
+ if tail:
355
+ response_chars += len(tail)
356
+ yield {"type": "text", "content": tail}
357
+ except Exception as e:
358
+ yield {"type": "error", "message": f"LLM error: {e}"}
359
+ break
360
+
361
+ if not full_text.strip() and thinking_text.strip():
362
+ yield {
363
+ "type": "status",
364
+ "message": "Retrying visible response",
365
+ "detail": "Model returned thinking without assistant content",
366
+ }
367
+ retry_messages = llm_messages + [{"role": "user", "content": _VISIBLE_RETRY_PROMPT}]
368
+ try:
369
+ for item in self.bridge.stream_chat(
370
+ retry_messages, model=use_model, think=False
371
+ ):
372
+ if isinstance(item, tuple):
373
+ kind, chunk = item
374
+ else:
375
+ kind, chunk = "text", item
376
+ if kind == "thinking":
377
+ thinking_text += chunk
378
+ thinking_chars += len(chunk)
379
+ yield {"type": "thinking", "content": chunk}
380
+ elif kind == "stats":
381
+ ollama_stats = chunk
382
+ else:
383
+ full_text += chunk
384
+ response_chars += len(chunk)
385
+ yield {"type": "text", "content": chunk}
386
+ chunk_count += 1
387
+ except Exception as e:
388
+ yield {"type": "error", "message": f"Visible response retry failed: {e}"}
389
+
390
+ if not full_text.strip() and thinking_text.strip():
391
+ fallback = (
392
+ f"{use_model} returned hidden reasoning but no visible response. "
393
+ "I retried with thinking disabled and still did not receive assistant content. "
394
+ "Try again or switch to another model with /model."
395
+ )
396
+ full_text = fallback
397
+ response_chars += len(fallback)
398
+ yield {"type": "text", "content": fallback}
399
+
400
+ stream_ms = int((time.time() - stream_start) * 1000)
401
+ total_tokens += chunk_count
402
+ yield {"type": "status", "message": "Response received", "detail": f"{chunk_count} chunks in {stream_ms}ms"}
403
+
404
+ # Check for tool call(s) in response
405
+ tool_matches = list(_TOOL_CALL_RE.finditer(full_text))
406
+ visible_text = _TOOL_CALL_RE.sub("", full_text).strip()
407
+ final_text = visible_text or full_text
408
+
409
+ if not tool_matches:
410
+ # No tool call — final answer
411
+ round_messages.append({"role": "assistant", "content": final_text})
412
+ break
413
+
414
+ # Extract tool calls
415
+ valid_calls = []
416
+ for match in tool_matches:
417
+ try:
418
+ call = json.loads(match.group(1))
419
+ valid_calls.append(call)
420
+ except json.JSONDecodeError:
421
+ continue
422
+
423
+ if not valid_calls:
424
+ round_messages.append({"role": "assistant", "content": final_text})
425
+ break
426
+
427
+ # If the model already produced a substantive answer alongside
428
+ # speculative tool calls, trust the answer and stop. Regenerating
429
+ # with tool results usually corrupts the correct answer because
430
+ # "continue" is read as "restart".
431
+ if len(visible_text) >= _TRUST_ANSWER_MIN_CHARS:
432
+ round_messages.append({"role": "assistant", "content": visible_text})
433
+ yield {
434
+ "type": "status",
435
+ "message": "Answer finalized",
436
+ "detail": "Speculative tool calls skipped — answer already provided",
437
+ }
438
+ break
439
+
440
+ # Record the assistant response before tool results (stripped so
441
+ # the next LLM round sees clean context without <tool_call> noise).
442
+ round_messages.append({"role": "assistant", "content": final_text})
443
+ llm_messages.append({"role": "assistant", "content": final_text})
444
+
445
+ # Execute all tools in parallel
446
+ tool_calls_count += len(valid_calls)
447
+
448
+ # Per-turn sink for sub-agent lifecycle events emitted from
449
+ # worker threads (e.g. _tool_delegate_task). Drained in the
450
+ # future-polling loop below and yielded to the HTTP stream.
451
+ agent_event_sink: queue.Queue = queue.Queue()
452
+
453
+ def _run_tool(tool_name, tool_args, tool_id):
454
+ _agent_tls.agent_sink = agent_event_sink
455
+ _agent_tls.parent_tool_id = tool_id
456
+ t0 = time.perf_counter_ns()
457
+ try:
458
+ result = self._execute_tool(tool_name, tool_args)
459
+ finally:
460
+ _agent_tls.agent_sink = None
461
+ _agent_tls.parent_tool_id = None
462
+ dur_ms = (time.perf_counter_ns() - t0) // 1_000_000
463
+ return result, dur_ms
464
+
465
+ with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(valid_calls), 5)) as executor:
466
+ # Prepare tasks
467
+ future_to_call = {}
468
+ for call in valid_calls:
469
+ tool_name = call.get("name", "unknown")
470
+ tool_args = call.get("args", {})
471
+ tool_id = uuid.uuid4().hex[:8]
472
+
473
+ yield {"type": "tool_call", "name": tool_name, "args": tool_args, "tool_id": tool_id}
474
+
475
+ future = executor.submit(_run_tool, tool_name, tool_args, tool_id)
476
+ future_to_call[future] = (tool_name, tool_args, tool_id, call)
477
+
478
+ # Poll futures with short timeout so we can drain the
479
+ # agent event sink between poll ticks. This interleaves
480
+ # sub-agent events with tool_result events in real time.
481
+ pending = set(future_to_call.keys())
482
+ while pending:
483
+ while True:
484
+ try:
485
+ ev = agent_event_sink.get_nowait()
486
+ except queue.Empty:
487
+ break
488
+ yield ev
489
+ done_now, pending = concurrent.futures.wait(
490
+ pending,
491
+ timeout=0.05,
492
+ return_when=concurrent.futures.FIRST_COMPLETED,
493
+ )
494
+ for future in done_now:
495
+ tool_name, tool_args, tool_id, call = future_to_call[future]
496
+ try:
497
+ result, dur_ms = future.result()
498
+ except Exception as e:
499
+ result, dur_ms = {"error": str(e)}, 0
500
+
501
+ # Drain any remaining sub-agent events for this
502
+ # tool before emitting its tool_result so the UI
503
+ # sees agent_done before tool completion.
504
+ while True:
505
+ try:
506
+ ev = agent_event_sink.get_nowait()
507
+ except queue.Empty:
508
+ break
509
+ yield ev
510
+
511
+ result_str = json.dumps(result, default=str)
512
+ truncated = result_str[:_MAX_TOOL_RESULT_CHARS]
513
+ if len(result_str) > _MAX_TOOL_RESULT_CHARS:
514
+ truncated += "... (truncated)"
515
+
516
+ yield {
517
+ "type": "tool_result", "tool_id": tool_id, "name": tool_name,
518
+ "result": result, "duration_ms": dur_ms,
519
+ }
520
+
521
+ # Record in round messages
522
+ round_messages.append({
523
+ "role": "tool_call", "content": json.dumps(call),
524
+ "tool_name": tool_name, "tool_id": tool_id,
525
+ })
526
+ round_messages.append({
527
+ "role": "tool_result", "content": truncated,
528
+ "tool_name": tool_name, "tool_id": tool_id,
529
+ })
530
+
531
+ # Extend next LLM context
532
+ llm_messages.append({
533
+ "role": "user",
534
+ "content": f"<tool_result name=\"{tool_name}\">\n{truncated}\n</tool_result>",
535
+ })
536
+
537
+ # Final drain after all tools complete
538
+ while True:
539
+ try:
540
+ ev = agent_event_sink.get_nowait()
541
+ except queue.Empty:
542
+ break
543
+ yield ev
544
+
545
+ llm_messages.append({
546
+ "role": "user",
547
+ "content": (
548
+ "The tool results above contain the data you requested. "
549
+ "Now write your final answer to the user based on those results. "
550
+ "Do NOT output any <tool_call> blocks. Do NOT restart or repeat "
551
+ "your previous message — the user has already seen it. "
552
+ "Write only the remaining, finalized answer."
553
+ ),
554
+ })
555
+ yield {"type": "status", "message": "Finalizing answer", "detail": f"After {len(valid_calls)} parallel results"}
556
+ else:
557
+ # Exhausted rounds without a final answer — synthesize one
558
+ fallback = (
559
+ f"I gathered data using {tool_calls_count} tool call(s) but reached "
560
+ f"the {max_rounds}-round limit before producing a final answer. "
561
+ f"Try increasing depth with /depth deep, or rephrase your question."
562
+ )
563
+ yield {"type": "text", "content": fallback}
564
+ round_messages.append({"role": "assistant", "content": fallback})
565
+
566
+ except Exception as e:
567
+ yield {"type": "error", "message": str(e)}
568
+
569
+ total_ms = int((time.time() - turn_start) * 1000)
570
+ # Build token stats from Ollama if available
571
+ token_stats = {}
572
+ if ollama_stats:
573
+ # Ollama durations are in nanoseconds
574
+ eval_count = ollama_stats.get("eval_count", 0)
575
+ eval_ns = ollama_stats.get("eval_duration", 0)
576
+ prompt_count = ollama_stats.get("prompt_eval_count", 0)
577
+ token_stats = {
578
+ "eval_tokens": eval_count,
579
+ "prompt_tokens": prompt_count,
580
+ "tokens_per_sec": round(eval_count / (eval_ns / 1e9), 1) if eval_ns else 0,
581
+ }
582
+
583
+ rounds_used = min(_round + 1, max_rounds) if round_messages else 0
584
+
585
+ # Attach per-turn metadata to the final assistant message so it
586
+ # survives conversation reload. The UI renders a footer from this.
587
+ assistant_metadata = {
588
+ "model": use_model,
589
+ "duration_ms": total_ms,
590
+ "rounds": rounds_used,
591
+ "tool_calls": tool_calls_count,
592
+ "thinking_chars": thinking_chars,
593
+ "response_chars": response_chars,
594
+ **token_stats,
595
+ }
596
+ for msg in reversed(round_messages):
597
+ if msg.get("role") == "assistant":
598
+ msg["metadata"] = assistant_metadata
599
+ break
600
+
601
+ # Persist all round messages
602
+ if round_messages:
603
+ self.store.append_messages(conv_id, round_messages)
604
+
605
+ yield {
606
+ "type": "done", "conv_id": conv_id,
607
+ "model": use_model,
608
+ "stats": {
609
+ "model": use_model,
610
+ "total_ms": total_ms,
611
+ "chunks": total_tokens,
612
+ "thinking_chars": thinking_chars,
613
+ "response_chars": response_chars,
614
+ "tool_calls": tool_calls_count,
615
+ "rounds": rounds_used,
616
+ **token_stats,
617
+ },
618
+ }
619
+
620
+ # ── LLM message building ─────────────────────────────
621
+
622
+ def _build_llm_messages(self, history: list[dict], state: dict | None = None) -> list[dict]:
623
+ """Convert stored history to Ollama chat messages with sliding window."""
624
+ system_prompt = _build_system_prompt(state or {})
625
+ messages = [{"role": "system", "content": system_prompt}]
626
+
627
+ # Take last N messages, skip tool_call/tool_result (they were inlined)
628
+ recent = history[-_MAX_HISTORY_MESSAGES:]
629
+ for msg in recent:
630
+ role = msg.get("role", "user")
631
+ content = msg.get("content", "")
632
+ if role in ("user", "assistant"):
633
+ messages.append({"role": role, "content": content})
634
+ elif role == "tool_result":
635
+ # Re-inject as user message so LLM has context
636
+ name = msg.get("tool_name", "tool")
637
+ messages.append({
638
+ "role": "user",
639
+ "content": f"<tool_result name=\"{name}\">\n{content}\n</tool_result>",
640
+ })
641
+
642
+ return messages
643
+
644
+ # ── Slash commands ────────────────────────────────────
645
+
646
+ @staticmethod
647
+ def get_commands() -> dict:
648
+ """Return command registry for the frontend autocomplete."""
649
+ return COMMANDS
650
+
651
+ def execute_command(self, conv_id: str | None, command_str: str) -> dict:
652
+ """Parse and execute a slash command. Returns result dict."""
653
+ command_str = command_str.strip()
654
+ if command_str.startswith("/"):
655
+ command_str = command_str[1:]
656
+
657
+ parts = command_str.split(None, 1)
658
+ cmd = parts[0].lower() if parts else ""
659
+ args = parts[1].strip() if len(parts) > 1 else ""
660
+
661
+ if cmd not in COMMANDS:
662
+ return {"ok": False, "command": cmd, "message": f"Unknown command: /{cmd}"}
663
+
664
+ # Ensure conversation exists
665
+ if not conv_id:
666
+ conv_id = self.store.create_conversation()
667
+
668
+ match cmd:
669
+ case "project":
670
+ return self._cmd_project(conv_id, args)
671
+ case "model":
672
+ return self._cmd_model(conv_id, args)
673
+ case "depth":
674
+ return self._cmd_depth(conv_id, args)
675
+ case "health":
676
+ return self._cmd_health(conv_id, args)
677
+ case "clear":
678
+ return self._cmd_clear(conv_id)
679
+ case "help":
680
+ return self._cmd_help()
681
+ case "tools":
682
+ return self._cmd_tools()
683
+ case "team":
684
+ return self._cmd_team()
685
+ case _:
686
+ return {"ok": False, "command": cmd, "message": f"Unknown command: /{cmd}"}
687
+
688
+ def _cmd_project(self, conv_id: str, args: str) -> dict:
689
+ if args.lower() == "clear" or not args:
690
+ self.store.update_state(conv_id, focused_projects=[])
691
+ return {
692
+ "ok": True, "command": "project",
693
+ "message": "Project focus cleared. Now querying all projects.",
694
+ "state": self.store.get_state(conv_id),
695
+ }
696
+
697
+ # Fuzzy-match project names
698
+ projects = self.scanner.discover()
699
+ names = [n.strip() for n in args.replace(",", " ").split()]
700
+ matched = []
701
+ not_found = []
702
+ for name in names:
703
+ name_lower = name.lower()
704
+ found = None
705
+ for p in projects:
706
+ p_name = Path(p.get("path", "")).name.lower()
707
+ if name_lower == p_name or name_lower in p_name:
708
+ found = {"name": Path(p["path"]).name, "path": p["path"]}
709
+ break
710
+ if found:
711
+ matched.append(found)
712
+ else:
713
+ not_found.append(name)
714
+
715
+ if not matched:
716
+ available = ", ".join(Path(p.get("path", "")).name for p in projects[:10])
717
+ return {
718
+ "ok": False, "command": "project",
719
+ "message": f"No projects matched: {', '.join(not_found)}. Available: {available}",
720
+ }
721
+
722
+ self.store.update_state(conv_id, focused_projects=matched)
723
+ msg = f"Focused on: {', '.join(m['name'] for m in matched)}"
724
+ if not_found:
725
+ msg += f"\nNot found: {', '.join(not_found)}"
726
+ return {
727
+ "ok": True, "command": "project", "message": msg,
728
+ "state": self.store.get_state(conv_id),
729
+ }
730
+
731
+ def _cmd_model(self, conv_id: str, args: str) -> dict:
732
+ if not args:
733
+ state = self.store.get_state(conv_id)
734
+ current = state.get("model") or self.bridge.model
735
+ return {"ok": True, "command": "model", "message": f"Current model: {current}"}
736
+
737
+ # Validate model
738
+ if self.bridge.has_model(args):
739
+ self.store.update_state(conv_id, model=args)
740
+ return {
741
+ "ok": True, "command": "model",
742
+ "message": f"Model switched to: {args}",
743
+ "state": self.store.get_state(conv_id),
744
+ }
745
+ else:
746
+ models = self.bridge.list_models() or []
747
+ available = ", ".join(models[:10]) if models else "(none found)"
748
+ return {
749
+ "ok": False, "command": "model",
750
+ "message": f"Model '{args}' not found. Available: {available}",
751
+ }
752
+
753
+ def _cmd_depth(self, conv_id: str, args: str) -> dict:
754
+ level = args.lower().strip()
755
+ if level not in ("brief", "normal", "deep"):
756
+ return {
757
+ "ok": False, "command": "depth",
758
+ "message": f"Invalid depth: '{args}'. Use: brief, normal, or deep",
759
+ }
760
+ self.store.update_state(conv_id, depth=level)
761
+ labels = {"brief": "Concise bullet points", "normal": "Standard detail", "deep": "Thorough analysis"}
762
+ return {
763
+ "ok": True, "command": "depth",
764
+ "message": f"Response depth set to: {level} ({labels[level]})",
765
+ "state": self.store.get_state(conv_id),
766
+ }
767
+
768
+ def _cmd_health(self, conv_id: str, args: str) -> dict:
769
+ state = self.store.get_state(conv_id)
770
+ focused = state.get("focused_projects", [])
771
+
772
+ if args:
773
+ # Find matching project
774
+ projects = self.scanner.discover()
775
+ target = None
776
+ for p in projects:
777
+ if args.lower() in Path(p.get("path", "")).name.lower():
778
+ target = p.get("path")
779
+ break
780
+ if not target:
781
+ return {"ok": False, "command": "health", "message": f"Project not found: {args}"}
782
+ results = [self.health_checker.check(target)]
783
+ elif focused:
784
+ results = [self.health_checker.check(p["path"]) for p in focused]
785
+ else:
786
+ projects = self.scanner.discover()
787
+ results = [self.health_checker.check(p["path"]) for p in projects[:5]]
788
+
789
+ return {"ok": True, "command": "health", "results": results}
790
+
791
+ def _cmd_clear(self, conv_id: str) -> dict:
792
+ # Delete messages but keep state
793
+ state = self.store.get_state(conv_id)
794
+ path = self.store._conv_path(conv_id)
795
+ path.write_text("[]", "utf-8")
796
+ self.store._touch_index(conv_id, message_count=0)
797
+ return {
798
+ "ok": True, "command": "clear",
799
+ "message": "Conversation cleared. State preserved.",
800
+ "state": state,
801
+ }
802
+
803
+ def _cmd_help(self) -> dict:
804
+ lines = ["**Available commands:**\n"]
805
+ for cmd, info in COMMANDS.items():
806
+ arg_str = f" `{info['args']}`" if info["args"] else ""
807
+ lines.append(f"- **/{cmd}**{arg_str} — {info['desc']}")
808
+ return {"ok": True, "command": "help", "message": "\n".join(lines)}
809
+
810
+ def _cmd_tools(self) -> dict:
811
+ return {"ok": True, "command": "tools", "message": _TOOL_DEFS.strip()}
812
+
813
+ def _cmd_team(self) -> dict:
814
+ cfg = load_config()
815
+ agents = cfg.get("agents", [])
816
+ active = [a for a in agents if a.get("active")]
817
+ if not active:
818
+ return {"ok": True, "command": "team", "message": "No specialized agents are currently active. You can activate them in the **Team / Agents** tab."}
819
+
820
+ lines = ["**Active Specialized Agents:**\n"]
821
+ for a in active:
822
+ lines.append(f"- **{a['name']}** (`{a['id']}`): {a.get('description', 'No description')}")
823
+
824
+ return {"ok": True, "command": "team", "message": "\n".join(lines)}
825
+
826
+ # ── Tool execution ────────────────────────────────────
827
+
828
+ def _execute_tool(self, name: str, args: dict) -> dict:
829
+ """Dispatch tool call to the appropriate service."""
830
+ try:
831
+ match name:
832
+ case "list_projects":
833
+ return self._tool_list_projects()
834
+ case "query_memory":
835
+ return self._tool_query_memory(**args)
836
+ case "search_facts":
837
+ return self._tool_search_facts(**args)
838
+ case "project_health":
839
+ return self._tool_project_health(**args)
840
+ case "analyze_project":
841
+ return self._tool_analyze_project(**args)
842
+ case "cross_insights":
843
+ return self._tool_cross_insights(**args)
844
+ case "suggest_action":
845
+ return self._tool_suggest_action(**args)
846
+ case "read_graph":
847
+ return self._tool_read_graph(**args)
848
+ case "delegate_task":
849
+ return self._tool_delegate_task(**args)
850
+ # ── C3 code intelligence tools ──
851
+ case "c3_search" | "c3_read" | "c3_edits" | "c3_edits_cross" | \
852
+ "c3_memory_query" | "c3_compress" | "c3_validate" | \
853
+ "c3_status" | "c3_search_cross":
854
+ return self._dispatch_c3(name, args)
855
+ case _:
856
+ return {"error": f"Unknown tool: {name}"}
857
+ except Exception as e:
858
+ return {"error": f"Tool '{name}' failed: {e}"}
859
+
860
+ # ── C3 bridge dispatch ─────────────────────────────────
861
+
862
+ def _dispatch_c3(self, name: str, args: dict) -> dict:
863
+ """Dispatch a C3 tool call through the bridge."""
864
+ if self.c3_bridge is None:
865
+ return {"error": "C3 bridge not configured. C3 code intelligence is unavailable."}
866
+ # Map tool names to bridge methods.
867
+ _C3_METHODS = {
868
+ "c3_search": self.c3_bridge.c3_search,
869
+ "c3_read": self.c3_bridge.c3_read,
870
+ "c3_edits": self.c3_bridge.c3_edits,
871
+ "c3_edits_cross": self.c3_bridge.c3_edits_cross,
872
+ "c3_memory_query": self.c3_bridge.c3_memory,
873
+ "c3_compress": self.c3_bridge.c3_compress,
874
+ "c3_validate": self.c3_bridge.c3_validate,
875
+ "c3_status": self.c3_bridge.c3_status,
876
+ "c3_search_cross": self.c3_bridge.c3_search_cross,
877
+ }
878
+ method = _C3_METHODS.get(name)
879
+ if not method:
880
+ return {"error": f"Unknown C3 tool: {name}"}
881
+ try:
882
+ return method(**args)
883
+ except Exception as e:
884
+ return {"error": f"C3 tool '{name}' failed: {e}"}
885
+
886
+ # ── Tool implementations ──────────────────────────────
887
+
888
+ def _tool_list_projects(self) -> dict:
889
+ projects = self.scanner.discover()
890
+ return {
891
+ "count": len(projects),
892
+ "projects": [
893
+ {
894
+ "name": p.get("name", Path(p.get("path", "")).name),
895
+ "path": p.get("path", ""),
896
+ "facts_count": p.get("facts_count", 0),
897
+ "has_c3": p.get("has_c3", False),
898
+ }
899
+ for p in projects
900
+ ],
901
+ }
902
+
903
+ def _tool_query_memory(
904
+ self, project_path: str, query: str = "", category: str = "", limit: int = 10
905
+ ) -> dict:
906
+ facts = self.reader.read_facts(project_path)
907
+ if category:
908
+ facts = [f for f in facts if f.get("category", "") == category]
909
+ if query:
910
+ query_lower = query.lower()
911
+ terms = query_lower.split()
912
+ facts = [
913
+ f for f in facts
914
+ if any(t in f.get("fact", "").lower() for t in terms)
915
+ ]
916
+ # Sort by relevance_count descending
917
+ facts.sort(key=lambda f: int(f.get("relevance_count", 0)), reverse=True)
918
+ top = facts[:limit]
919
+ return {
920
+ "project": project_path,
921
+ "total_matching": len(facts),
922
+ "returned": len(top),
923
+ "facts": [
924
+ {
925
+ "id": f.get("id", ""),
926
+ "category": f.get("category", "general"),
927
+ "fact": f.get("fact", "")[:300],
928
+ "lifecycle": f.get("lifecycle", "active"),
929
+ "relevance": f.get("relevance_count", 0),
930
+ }
931
+ for f in top
932
+ ],
933
+ }
934
+
935
+ def _tool_search_facts(self, query: str, limit: int = 20) -> dict:
936
+ """Search facts across all projects."""
937
+ projects = self.scanner.discover()
938
+ all_matches = []
939
+ query_lower = query.lower()
940
+ terms = query_lower.split()
941
+ for p in projects:
942
+ path = p.get("path", "")
943
+ facts = self.reader.read_facts(path)
944
+ for f in facts:
945
+ text = f.get("fact", "").lower()
946
+ score = sum(1 for t in terms if t in text)
947
+ if score > 0:
948
+ all_matches.append({
949
+ "project": Path(path).name,
950
+ "project_path": path,
951
+ "id": f.get("id", ""),
952
+ "category": f.get("category", "general"),
953
+ "fact": f.get("fact", "")[:300],
954
+ "score": score,
955
+ })
956
+ all_matches.sort(key=lambda m: m["score"], reverse=True)
957
+ top = all_matches[:limit]
958
+ return {"query": query, "total_matches": len(all_matches), "results": top}
959
+
960
+ def _tool_project_health(self, project_path: str) -> dict:
961
+ return self.health_checker.check(project_path)
962
+
963
+ def _tool_analyze_project(self, project_path: str) -> dict:
964
+ return self.insight_engine.analyze_project(project_path)
965
+
966
+ def _tool_cross_insights(self, project_path: str = "") -> dict:
967
+ if project_path:
968
+ insights = self.cross_memory.get_for_project(project_path)
969
+ else:
970
+ insights = self.cross_memory.get_all_insights()
971
+ stats = self.cross_memory.stats()
972
+ return {
973
+ "insights": [
974
+ {
975
+ "id": i.get("id", ""),
976
+ "type": i.get("type", ""),
977
+ "text": i.get("text", "")[:400],
978
+ "source_projects": i.get("source_projects", []),
979
+ "confidence": i.get("confidence", 0),
980
+ }
981
+ for i in insights[:20]
982
+ ],
983
+ "stats": stats,
984
+ }
985
+
986
+ def _tool_suggest_action(
987
+ self, project_path: str, action: str, fact_ids: list, reason: str
988
+ ) -> dict:
989
+ data = {"fact_ids": fact_ids, "reason": reason}
990
+ suggestion = self.writer.suggest(project_path, action, data)
991
+ return {"suggestion_id": suggestion.get("id"), "status": "pending", "type": action}
992
+
993
+ def _tool_read_graph(self, project_path: str) -> dict:
994
+ return self.reader.get_graph_stats(project_path)
995
+
996
+ def _tool_delegate_task(self, agent_id: str, task: str) -> dict:
997
+ """Execute a sub-agent loop for the delegated task.
998
+
999
+ Pushes lifecycle events onto the thread-local _agent_tls.agent_sink
1000
+ (set by the main chat() worker wrapper) so the UI can stream live
1001
+ sub-agent thinking, nested tool calls, and response tokens.
1002
+ """
1003
+ sink = getattr(_agent_tls, "agent_sink", None)
1004
+ parent_tool_id = getattr(_agent_tls, "parent_tool_id", None)
1005
+
1006
+ def _emit(ev_type: str, **payload):
1007
+ if sink is not None and parent_tool_id is not None:
1008
+ sink.put({"type": ev_type, "tool_id": parent_tool_id, **payload})
1009
+
1010
+ cfg = load_config()
1011
+ agent = next((a for a in cfg.get("agents", []) if a.get("id") == agent_id and a.get("active")), None)
1012
+ if not agent:
1013
+ _emit("agent_done", agent_id=agent_id, error="not_active")
1014
+ return {"error": f"Agent '{agent_id}' is not active or does not exist."}
1015
+
1016
+ system_prompt = f"{agent.get('system_prompt', '')}\n\n{_TOOL_DEFS}\n{_SYSTEM_RULES}"
1017
+ llm_messages = [
1018
+ {"role": "system", "content": system_prompt},
1019
+ {"role": "user", "content": task}
1020
+ ]
1021
+
1022
+ rounds = 0
1023
+ max_rounds = 6
1024
+ model = agent.get("model") or self.bridge.model
1025
+ agent_start_ns = time.perf_counter_ns()
1026
+ total_result_chars = 0
1027
+
1028
+ _emit("agent_start", agent_id=agent_id, task=task, model=model)
1029
+
1030
+ while rounds < max_rounds:
1031
+ rounds += 1
1032
+ _emit("agent_round", agent_id=agent_id, round=rounds)
1033
+ full_text = ""
1034
+ try:
1035
+ for item in self.bridge.stream_chat(llm_messages, model=model):
1036
+ if isinstance(item, tuple):
1037
+ kind, chunk = item
1038
+ else:
1039
+ kind, chunk = "text", item
1040
+ if kind == "text":
1041
+ full_text += chunk
1042
+ _emit("agent_text", content=chunk)
1043
+ elif kind == "thinking":
1044
+ _emit("agent_thinking", content=chunk)
1045
+ # stats chunks are ignored for sub-agents
1046
+ except Exception as e:
1047
+ _emit("agent_done", agent_id=agent_id, rounds=rounds, error=str(e))
1048
+ return {"error": f"Agent '{agent_id}' encountered LLM error: {e}"}
1049
+
1050
+ if not full_text.strip():
1051
+ _emit("agent_done", agent_id=agent_id, rounds=rounds, error="empty_response")
1052
+ return {"error": f"Agent '{agent_id}' returned empty response."}
1053
+
1054
+ tool_match = _TOOL_CALL_RE.search(full_text)
1055
+ if not tool_match:
1056
+ total_result_chars = len(full_text)
1057
+ dur_ms = (time.perf_counter_ns() - agent_start_ns) // 1_000_000
1058
+ _emit("agent_done", agent_id=agent_id, rounds=rounds,
1059
+ result_chars=total_result_chars, duration_ms=dur_ms)
1060
+ return {"agent": agent_id, "result": full_text}
1061
+
1062
+ try:
1063
+ call = json.loads(tool_match.group(1))
1064
+ except json.JSONDecodeError:
1065
+ total_result_chars = len(full_text)
1066
+ dur_ms = (time.perf_counter_ns() - agent_start_ns) // 1_000_000
1067
+ _emit("agent_done", agent_id=agent_id, rounds=rounds,
1068
+ result_chars=total_result_chars, duration_ms=dur_ms)
1069
+ return {"agent": agent_id, "result": full_text}
1070
+
1071
+ tool_name = call.get("name", "unknown")
1072
+ tool_args = call.get("args", {})
1073
+ sub_tool_id = uuid.uuid4().hex[:8]
1074
+
1075
+ _emit("agent_tool_call", sub_tool_id=sub_tool_id, name=tool_name, args=tool_args)
1076
+
1077
+ t0 = time.perf_counter_ns()
1078
+ if tool_name == "delegate_task":
1079
+ tool_result = {"error": "Sub-agents cannot delegate tasks."}
1080
+ else:
1081
+ tool_result = self._execute_tool(tool_name, tool_args)
1082
+ sub_dur_ms = (time.perf_counter_ns() - t0) // 1_000_000
1083
+
1084
+ _emit("agent_tool_result", sub_tool_id=sub_tool_id, name=tool_name,
1085
+ result=tool_result, duration_ms=sub_dur_ms)
1086
+
1087
+ result_str = json.dumps(tool_result, default=str)
1088
+ truncated = result_str[:_MAX_TOOL_RESULT_CHARS]
1089
+ if len(result_str) > _MAX_TOOL_RESULT_CHARS:
1090
+ truncated += "... (truncated)"
1091
+
1092
+ llm_messages.append({"role": "assistant", "content": full_text})
1093
+ llm_messages.append({
1094
+ "role": "user",
1095
+ "content": f"<tool_result name=\"{tool_name}\">\n{truncated}\n</tool_result>\nContinue your response using this data."
1096
+ })
1097
+
1098
+ dur_ms = (time.perf_counter_ns() - agent_start_ns) // 1_000_000
1099
+ _emit("agent_done", agent_id=agent_id, rounds=rounds,
1100
+ result_chars=len(full_text), duration_ms=dur_ms,
1101
+ error="max_rounds_reached")
1102
+ return {"agent": agent_id, "error": "Agent reached max tool rounds.", "partial_result": full_text}
1103
+