llmcode-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. llm_code/__init__.py +2 -0
  2. llm_code/analysis/__init__.py +6 -0
  3. llm_code/analysis/cache.py +33 -0
  4. llm_code/analysis/engine.py +256 -0
  5. llm_code/analysis/go_rules.py +114 -0
  6. llm_code/analysis/js_rules.py +84 -0
  7. llm_code/analysis/python_rules.py +311 -0
  8. llm_code/analysis/rules.py +140 -0
  9. llm_code/analysis/rust_rules.py +108 -0
  10. llm_code/analysis/universal_rules.py +111 -0
  11. llm_code/api/__init__.py +0 -0
  12. llm_code/api/client.py +90 -0
  13. llm_code/api/errors.py +73 -0
  14. llm_code/api/openai_compat.py +390 -0
  15. llm_code/api/provider.py +35 -0
  16. llm_code/api/sse.py +52 -0
  17. llm_code/api/types.py +140 -0
  18. llm_code/cli/__init__.py +0 -0
  19. llm_code/cli/commands.py +70 -0
  20. llm_code/cli/image.py +122 -0
  21. llm_code/cli/render.py +214 -0
  22. llm_code/cli/status_line.py +79 -0
  23. llm_code/cli/streaming.py +92 -0
  24. llm_code/cli/tui_main.py +220 -0
  25. llm_code/computer_use/__init__.py +11 -0
  26. llm_code/computer_use/app_detect.py +49 -0
  27. llm_code/computer_use/app_tier.py +57 -0
  28. llm_code/computer_use/coordinator.py +99 -0
  29. llm_code/computer_use/input_control.py +71 -0
  30. llm_code/computer_use/screenshot.py +93 -0
  31. llm_code/cron/__init__.py +13 -0
  32. llm_code/cron/parser.py +145 -0
  33. llm_code/cron/scheduler.py +135 -0
  34. llm_code/cron/storage.py +126 -0
  35. llm_code/enterprise/__init__.py +1 -0
  36. llm_code/enterprise/audit.py +59 -0
  37. llm_code/enterprise/auth.py +26 -0
  38. llm_code/enterprise/oidc.py +95 -0
  39. llm_code/enterprise/rbac.py +65 -0
  40. llm_code/harness/__init__.py +5 -0
  41. llm_code/harness/config.py +33 -0
  42. llm_code/harness/engine.py +129 -0
  43. llm_code/harness/guides.py +41 -0
  44. llm_code/harness/sensors.py +68 -0
  45. llm_code/harness/templates.py +84 -0
  46. llm_code/hida/__init__.py +1 -0
  47. llm_code/hida/classifier.py +187 -0
  48. llm_code/hida/engine.py +49 -0
  49. llm_code/hida/profiles.py +95 -0
  50. llm_code/hida/types.py +28 -0
  51. llm_code/ide/__init__.py +1 -0
  52. llm_code/ide/bridge.py +80 -0
  53. llm_code/ide/detector.py +76 -0
  54. llm_code/ide/server.py +169 -0
  55. llm_code/logging.py +29 -0
  56. llm_code/lsp/__init__.py +0 -0
  57. llm_code/lsp/client.py +298 -0
  58. llm_code/lsp/detector.py +42 -0
  59. llm_code/lsp/manager.py +56 -0
  60. llm_code/lsp/tools.py +288 -0
  61. llm_code/marketplace/__init__.py +0 -0
  62. llm_code/marketplace/builtin_registry.py +102 -0
  63. llm_code/marketplace/installer.py +162 -0
  64. llm_code/marketplace/plugin.py +78 -0
  65. llm_code/marketplace/registry.py +360 -0
  66. llm_code/mcp/__init__.py +0 -0
  67. llm_code/mcp/bridge.py +87 -0
  68. llm_code/mcp/client.py +117 -0
  69. llm_code/mcp/health.py +120 -0
  70. llm_code/mcp/manager.py +214 -0
  71. llm_code/mcp/oauth.py +219 -0
  72. llm_code/mcp/transport.py +254 -0
  73. llm_code/mcp/types.py +53 -0
  74. llm_code/remote/__init__.py +0 -0
  75. llm_code/remote/client.py +136 -0
  76. llm_code/remote/protocol.py +22 -0
  77. llm_code/remote/server.py +275 -0
  78. llm_code/remote/ssh_proxy.py +56 -0
  79. llm_code/runtime/__init__.py +0 -0
  80. llm_code/runtime/auto_commit.py +56 -0
  81. llm_code/runtime/auto_diagnose.py +62 -0
  82. llm_code/runtime/checkpoint.py +70 -0
  83. llm_code/runtime/checkpoint_recovery.py +142 -0
  84. llm_code/runtime/compaction.py +35 -0
  85. llm_code/runtime/compressor.py +415 -0
  86. llm_code/runtime/config.py +533 -0
  87. llm_code/runtime/context.py +49 -0
  88. llm_code/runtime/conversation.py +921 -0
  89. llm_code/runtime/cost_tracker.py +126 -0
  90. llm_code/runtime/dream.py +127 -0
  91. llm_code/runtime/file_protection.py +150 -0
  92. llm_code/runtime/hardware.py +85 -0
  93. llm_code/runtime/hooks.py +223 -0
  94. llm_code/runtime/indexer.py +230 -0
  95. llm_code/runtime/knowledge_compiler.py +232 -0
  96. llm_code/runtime/memory.py +132 -0
  97. llm_code/runtime/memory_layers.py +467 -0
  98. llm_code/runtime/memory_lint.py +252 -0
  99. llm_code/runtime/model_aliases.py +37 -0
  100. llm_code/runtime/ollama.py +93 -0
  101. llm_code/runtime/overlay.py +124 -0
  102. llm_code/runtime/permissions.py +200 -0
  103. llm_code/runtime/plan.py +45 -0
  104. llm_code/runtime/prompt.py +238 -0
  105. llm_code/runtime/repo_map.py +174 -0
  106. llm_code/runtime/sandbox.py +116 -0
  107. llm_code/runtime/session.py +268 -0
  108. llm_code/runtime/skill_resolver.py +61 -0
  109. llm_code/runtime/skills.py +133 -0
  110. llm_code/runtime/speculative.py +75 -0
  111. llm_code/runtime/streaming_executor.py +216 -0
  112. llm_code/runtime/telemetry.py +196 -0
  113. llm_code/runtime/token_budget.py +26 -0
  114. llm_code/runtime/vcr.py +142 -0
  115. llm_code/runtime/vision.py +102 -0
  116. llm_code/swarm/__init__.py +1 -0
  117. llm_code/swarm/backend_subprocess.py +108 -0
  118. llm_code/swarm/backend_tmux.py +103 -0
  119. llm_code/swarm/backend_worktree.py +306 -0
  120. llm_code/swarm/checkpoint.py +74 -0
  121. llm_code/swarm/coordinator.py +236 -0
  122. llm_code/swarm/mailbox.py +88 -0
  123. llm_code/swarm/manager.py +202 -0
  124. llm_code/swarm/memory_sync.py +80 -0
  125. llm_code/swarm/recovery.py +21 -0
  126. llm_code/swarm/team.py +67 -0
  127. llm_code/swarm/types.py +31 -0
  128. llm_code/task/__init__.py +16 -0
  129. llm_code/task/diagnostics.py +93 -0
  130. llm_code/task/manager.py +162 -0
  131. llm_code/task/types.py +112 -0
  132. llm_code/task/verifier.py +104 -0
  133. llm_code/tools/__init__.py +0 -0
  134. llm_code/tools/agent.py +145 -0
  135. llm_code/tools/agent_roles.py +82 -0
  136. llm_code/tools/base.py +94 -0
  137. llm_code/tools/bash.py +565 -0
  138. llm_code/tools/computer_use_tools.py +278 -0
  139. llm_code/tools/coordinator_tool.py +75 -0
  140. llm_code/tools/cron_create.py +90 -0
  141. llm_code/tools/cron_delete.py +49 -0
  142. llm_code/tools/cron_list.py +51 -0
  143. llm_code/tools/deferred.py +92 -0
  144. llm_code/tools/dump.py +116 -0
  145. llm_code/tools/edit_file.py +282 -0
  146. llm_code/tools/git_tools.py +531 -0
  147. llm_code/tools/glob_search.py +112 -0
  148. llm_code/tools/grep_search.py +144 -0
  149. llm_code/tools/ide_diagnostics.py +59 -0
  150. llm_code/tools/ide_open.py +58 -0
  151. llm_code/tools/ide_selection.py +52 -0
  152. llm_code/tools/memory_tools.py +138 -0
  153. llm_code/tools/multi_edit.py +143 -0
  154. llm_code/tools/notebook_edit.py +107 -0
  155. llm_code/tools/notebook_read.py +81 -0
  156. llm_code/tools/parsing.py +63 -0
  157. llm_code/tools/read_file.py +154 -0
  158. llm_code/tools/registry.py +58 -0
  159. llm_code/tools/search_backends/__init__.py +56 -0
  160. llm_code/tools/search_backends/brave.py +56 -0
  161. llm_code/tools/search_backends/duckduckgo.py +129 -0
  162. llm_code/tools/search_backends/searxng.py +71 -0
  163. llm_code/tools/search_backends/tavily.py +73 -0
  164. llm_code/tools/swarm_create.py +109 -0
  165. llm_code/tools/swarm_delete.py +95 -0
  166. llm_code/tools/swarm_list.py +44 -0
  167. llm_code/tools/swarm_message.py +109 -0
  168. llm_code/tools/task_close.py +79 -0
  169. llm_code/tools/task_plan.py +79 -0
  170. llm_code/tools/task_verify.py +90 -0
  171. llm_code/tools/tool_search.py +65 -0
  172. llm_code/tools/web_common.py +258 -0
  173. llm_code/tools/web_fetch.py +223 -0
  174. llm_code/tools/web_search.py +280 -0
  175. llm_code/tools/write_file.py +118 -0
  176. llm_code/tui/__init__.py +1 -0
  177. llm_code/tui/app.py +2432 -0
  178. llm_code/tui/chat_view.py +82 -0
  179. llm_code/tui/chat_widgets.py +309 -0
  180. llm_code/tui/header_bar.py +46 -0
  181. llm_code/tui/input_bar.py +349 -0
  182. llm_code/tui/keybindings.py +142 -0
  183. llm_code/tui/marketplace.py +210 -0
  184. llm_code/tui/status_bar.py +72 -0
  185. llm_code/tui/theme.py +96 -0
  186. llm_code/utils/__init__.py +0 -0
  187. llm_code/utils/diff.py +111 -0
  188. llm_code/utils/errors.py +70 -0
  189. llm_code/utils/hyperlink.py +73 -0
  190. llm_code/utils/notebook.py +179 -0
  191. llm_code/utils/search.py +69 -0
  192. llm_code/utils/text_normalize.py +28 -0
  193. llm_code/utils/version_check.py +62 -0
  194. llm_code/vim/__init__.py +4 -0
  195. llm_code/vim/engine.py +51 -0
  196. llm_code/vim/motions.py +172 -0
  197. llm_code/vim/operators.py +183 -0
  198. llm_code/vim/text_objects.py +139 -0
  199. llm_code/vim/transitions.py +279 -0
  200. llm_code/vim/types.py +68 -0
  201. llm_code/voice/__init__.py +1 -0
  202. llm_code/voice/languages.py +43 -0
  203. llm_code/voice/recorder.py +136 -0
  204. llm_code/voice/stt.py +36 -0
  205. llm_code/voice/stt_anthropic.py +66 -0
  206. llm_code/voice/stt_google.py +32 -0
  207. llm_code/voice/stt_whisper.py +52 -0
  208. llmcode_cli-1.0.0.dist-info/METADATA +524 -0
  209. llmcode_cli-1.0.0.dist-info/RECORD +212 -0
  210. llmcode_cli-1.0.0.dist-info/WHEEL +4 -0
  211. llmcode_cli-1.0.0.dist-info/entry_points.txt +2 -0
  212. llmcode_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,921 @@
1
+ """Core agentic conversation runtime: turn loop with streaming and tool execution."""
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import dataclasses
6
+ import json
7
+ import time
8
+ from concurrent.futures import ThreadPoolExecutor
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING, Any, AsyncIterator
11
+
12
+ from pydantic import ValidationError
13
+
14
+ from llm_code.logging import get_logger
15
+ from llm_code.api.types import (
16
+ Message,
17
+ MessageRequest,
18
+ StreamEvent,
19
+ StreamMessageStop,
20
+ StreamPermissionRequest,
21
+ StreamTextDelta,
22
+ StreamToolExecResult,
23
+ StreamToolExecStart,
24
+ StreamToolProgress,
25
+ StreamToolUseInputDelta,
26
+ StreamToolUseStart,
27
+ TextBlock,
28
+ TokenUsage,
29
+ ToolResultBlock,
30
+ ToolUseBlock,
31
+ )
32
+ from llm_code.runtime.compressor import ContextCompressor
33
+ from llm_code.runtime.cost_tracker import BudgetExceededError
34
+ from llm_code.runtime.permissions import PermissionOutcome
35
+ from llm_code.runtime.streaming_executor import StreamingToolExecutor
36
+ from llm_code.runtime.telemetry import Telemetry, get_noop_telemetry
37
+ from llm_code.tools.base import PermissionLevel, ToolResult
38
+ from llm_code.tools.parsing import ParsedToolCall, parse_tool_calls
39
+
40
+ if TYPE_CHECKING:
41
+ from llm_code.runtime.context import ProjectContext
42
+ from llm_code.runtime.permissions import PermissionPolicy
43
+ from llm_code.runtime.prompt import SystemPromptBuilder
44
+ from llm_code.runtime.session import Session
45
+ from llm_code.tools.registry import ToolRegistry
46
+
47
+
48
+ def build_thinking_extra_body(thinking_config, *, is_local: bool = False) -> dict | None:
49
+ """Build extra_body dict for thinking mode configuration.
50
+
51
+ Returns None for adaptive mode (let provider decide),
52
+ explicit enable/disable dict for other modes.
53
+
54
+ Local models get unlimited thinking budget (no cost concern).
55
+ """
56
+ mode = thinking_config.mode
57
+ if mode == "enabled":
58
+ # Local models: no budget cap; cloud: use configured budget
59
+ budget = thinking_config.budget_tokens
60
+ if is_local:
61
+ budget = max(budget, 131072) # At least 128K tokens for local
62
+ return {
63
+ "chat_template_kwargs": {
64
+ "enable_thinking": True,
65
+ "thinking_budget": budget,
66
+ }
67
+ }
68
+ if mode == "disabled":
69
+ return {"chat_template_kwargs": {"enable_thinking": False}}
70
+ # adaptive: for local models, enable with generous budget; for cloud, let provider decide
71
+ if is_local:
72
+ budget = max(thinking_config.budget_tokens, 131072)
73
+ return {
74
+ "chat_template_kwargs": {
75
+ "enable_thinking": True,
76
+ "thinking_budget": budget,
77
+ }
78
+ }
79
+ return None
80
+
81
+
82
+ # Thread pool for running blocking tool execution off the event loop
83
+ _TOOL_EXECUTOR = ThreadPoolExecutor(max_workers=4)
84
+
85
+ logger = get_logger(__name__)
86
+
87
+ # Maximum number of characters to inline in tool results
88
+ _MAX_INLINE_RESULT = 4000
89
+
90
+
91
+ @dataclasses.dataclass(frozen=True)
92
+ class TurnSummary:
93
+ iterations: int
94
+ total_usage: TokenUsage
95
+
96
+
97
+ # ---------------------------------------------------------------------------
98
+ # ConversationRuntime
99
+ # ---------------------------------------------------------------------------
100
+
101
+ class ConversationRuntime:
102
+ """Agentic loop that drives LLM turns, tool execution, and session updates."""
103
+
104
+ def __init__(
105
+ self,
106
+ provider: Any,
107
+ tool_registry: "ToolRegistry",
108
+ permission_policy: "PermissionPolicy",
109
+ hook_runner: Any,
110
+ prompt_builder: "SystemPromptBuilder",
111
+ config: Any,
112
+ session: "Session",
113
+ context: "ProjectContext",
114
+ checkpoint_manager: Any = None,
115
+ token_budget: Any = None,
116
+ vcr_recorder: Any = None,
117
+ deferred_tool_manager: Any = None,
118
+ telemetry: Telemetry | None = None,
119
+ recovery_checkpoint: Any = None,
120
+ cost_tracker: Any = None,
121
+ skills: Any = None,
122
+ mcp_manager: Any = None,
123
+ memory_store: Any = None,
124
+ task_manager: Any = None,
125
+ project_index: Any = None,
126
+ lsp_manager: Any = None,
127
+ ) -> None:
128
+ self._provider = provider
129
+ self._tool_registry = tool_registry
130
+ self._permissions = permission_policy
131
+ self._hooks = hook_runner
132
+ self._prompt_builder = prompt_builder
133
+ self._config = config
134
+ self.session = session
135
+ self._context = context
136
+ self._checkpoint_mgr = checkpoint_manager
137
+ self._token_budget = token_budget
138
+ self._vcr_recorder = vcr_recorder
139
+ self._deferred_tool_manager = deferred_tool_manager
140
+ self._telemetry: Telemetry = telemetry if telemetry is not None else get_noop_telemetry()
141
+ self._recovery_checkpoint = recovery_checkpoint
142
+ self._cost_tracker = cost_tracker
143
+ self._skills = skills
144
+ self._mcp_manager = mcp_manager
145
+ self._memory_store = memory_store
146
+ self._task_manager = task_manager
147
+ self._project_index = project_index
148
+ self._lsp_manager = lsp_manager
149
+ # Harness Engine — unified quality controls
150
+ from llm_code.harness.engine import HarnessEngine
151
+ from llm_code.harness.config import HarnessConfig
152
+ from llm_code.harness.templates import detect_template, default_controls
153
+ cwd = Path(self._context.cwd) if self._context and hasattr(self._context, "cwd") else Path.cwd()
154
+ harness_cfg = getattr(config, "harness", HarnessConfig())
155
+ if harness_cfg.template == "auto" and not harness_cfg.controls:
156
+ template = detect_template(cwd)
157
+ resolved_controls = default_controls(template)
158
+ harness_cfg = HarnessConfig(template=template, controls=resolved_controls)
159
+ elif harness_cfg.template == "auto":
160
+ template = detect_template(cwd)
161
+ harness_cfg = HarnessConfig(template=template, controls=harness_cfg.controls)
162
+ self._harness = HarnessEngine(config=harness_cfg, cwd=cwd)
163
+ self._harness.lsp_manager = lsp_manager
164
+ if hasattr(config, "auto_commit") and not config.auto_commit:
165
+ self._harness.disable("auto_commit")
166
+ if hasattr(config, "lsp_auto_diagnose") and not config.lsp_auto_diagnose:
167
+ self._harness.disable("lsp_diagnose")
168
+ self._permission_future: asyncio.Future[str] | None = None
169
+ self._has_attempted_reactive_compact = False
170
+ self._consecutive_failures: int = 0
171
+ self._compressor = ContextCompressor()
172
+ self._active_model: str = getattr(config, "model", "")
173
+ self._hida_classifier: Any | None = None
174
+ self._hida_engine: Any | None = None
175
+ self._last_hida_profile: Any | None = None
176
+
177
+ # Initialize HIDA if enabled in config
178
+ if getattr(config, "hida", None) is not None and config.hida.enabled:
179
+ try:
180
+ from llm_code.hida.classifier import TaskClassifier
181
+ from llm_code.hida.engine import HidaEngine
182
+ from llm_code.hida.profiles import DEFAULT_PROFILES
183
+ self._hida_classifier = TaskClassifier(profiles=DEFAULT_PROFILES)
184
+ self._hida_engine = HidaEngine()
185
+ except ImportError:
186
+ pass
187
+
188
+ # ------------------------------------------------------------------
189
+ # Backward-compatible properties delegating to HarnessEngine
190
+ # ------------------------------------------------------------------
191
+
192
+ @property
193
+ def plan_mode(self) -> bool:
194
+ return self._harness.plan_mode
195
+
196
+ @plan_mode.setter
197
+ def plan_mode(self, value: bool) -> None:
198
+ self._harness.plan_mode = value
199
+
200
+ @property
201
+ def analysis_context(self) -> str | None:
202
+ return self._harness.analysis_context
203
+
204
+ @analysis_context.setter
205
+ def analysis_context(self, value: str | None) -> None:
206
+ self._harness.analysis_context = value
207
+
208
+ def _fire_hook(self, event: str, context: dict | None = None) -> None:
209
+ """Fire a hook event if the hook runner supports the generic fire() method."""
210
+ if hasattr(self._hooks, "fire"):
211
+ self._hooks.fire(event, context or {})
212
+
213
+ def send_permission_response(self, response: str) -> None:
214
+ """Resolve the pending permission prompt with 'allow', 'deny', or 'always'.
215
+
216
+ Called by the TUI when the user presses y/n/a on a permission inline widget.
217
+
218
+ IMPORTANT: Must be called from the same event loop thread that owns
219
+ ``_permission_future``. In Textual, this is guaranteed when called from
220
+ an ``on_key`` handler since both the app and ``run_worker`` share the
221
+ same asyncio event loop.
222
+ """
223
+ if self._permission_future is not None and not self._permission_future.done():
224
+ self._permission_future.set_result(response)
225
+
226
+ async def run_turn(self, user_input: str, images: list | None = None) -> AsyncIterator[StreamEvent]:
227
+ """Run one user turn (may involve multiple LLM calls for tool use)."""
228
+ logger.debug("Starting turn: %s", user_input[:80])
229
+ _turn_start = time.monotonic()
230
+ self._fire_hook("prompt_submit", {"text": user_input[:200]})
231
+ if self._vcr_recorder is not None:
232
+ self._vcr_recorder.record("user_input", {"text": user_input})
233
+ # 1. Add user message to session (with optional images)
234
+ content_blocks: list = [TextBlock(text=user_input)]
235
+ if images:
236
+ content_blocks.extend(images)
237
+ user_msg = Message(role="user", content=tuple(content_blocks))
238
+ self.session = self.session.add_message(user_msg)
239
+
240
+ accumulated_usage = TokenUsage(input_tokens=0, output_tokens=0)
241
+ self._has_attempted_reactive_compact = False
242
+ force_xml = getattr(self, "_force_xml_mode", False)
243
+ # Token limit auto-upgrade state: reset each turn, doubles on max_tokens stop
244
+ _current_max_tokens: int = self._config.max_tokens
245
+ # Local models (localhost/private network) have no cost concern — no cap
246
+ _base_url = getattr(self._config, "provider_base_url", "") or ""
247
+ _is_local = any(h in _base_url for h in ("localhost", "127.0.0.1", "0.0.0.0", "192.168.", "10.", "172."))
248
+ _TOKEN_UPGRADE_CAP = 0 if _is_local else 65536 # 0 means unlimited
249
+
250
+ for _iteration in range(self._config.max_turn_iterations):
251
+ # Budget enforcement: check before each LLM call
252
+ if self._cost_tracker is not None:
253
+ try:
254
+ self._cost_tracker.check_budget()
255
+ except BudgetExceededError as exc:
256
+ yield StreamTextDelta(
257
+ text=f"Budget limit (${exc.budget:.2f}) reached. Use /budget to increase."
258
+ )
259
+ return
260
+
261
+ # HIDA dynamic context filtering
262
+ allowed_tool_names: set[str] | None = None
263
+
264
+ if (
265
+ self._hida_classifier is not None
266
+ and self._hida_engine is not None
267
+ and getattr(self._config, "hida", None) is not None
268
+ and self._config.hida.enabled
269
+ ):
270
+ hida_profile = await self._hida_classifier.classify(
271
+ user_input,
272
+ provider=self._provider if hasattr(self._provider, "complete") else None,
273
+ confidence_threshold=self._config.hida.confidence_threshold,
274
+ )
275
+ self._last_hida_profile = hida_profile
276
+
277
+ if not hida_profile.load_full_prompt:
278
+ all_tool_names = {t.name for t in self._tool_registry.all_tools()}
279
+ allowed_tool_names = self._hida_engine.filter_tools(hida_profile, all_tool_names)
280
+
281
+ # 2. Build system prompt
282
+ use_native = getattr(self._provider, "supports_native_tools", lambda: True)() and not force_xml
283
+
284
+ # Deferred tool loading: when a manager is present, split tools into
285
+ # visible and deferred; inject a hint into the system prompt.
286
+ _deferred_hint: str | None = None
287
+ if self._deferred_tool_manager is not None:
288
+ all_defs = list(self._tool_registry.definitions(allowed=allowed_tool_names))
289
+ max_visible = getattr(self._config, "max_visible_tools", 20)
290
+ visible_list, deferred_list = self._deferred_tool_manager.select_tools(
291
+ all_defs, max_visible=max_visible
292
+ )
293
+ tool_defs = tuple(visible_list)
294
+ if deferred_list:
295
+ _deferred_count = len(deferred_list)
296
+ _deferred_hint = (
297
+ "## Tool Discovery\n\n"
298
+ f"There are {_deferred_count} additional tool(s) not shown here. "
299
+ "Use the 'tool_search' tool with a query to find and unlock them."
300
+ )
301
+ else:
302
+ tool_defs = self._tool_registry.definitions(
303
+ allowed=allowed_tool_names,
304
+ )
305
+
306
+ # Collect MCP instructions if manager is available
307
+ _mcp_instructions: dict[str, str] | None = None
308
+ if self._mcp_manager is not None:
309
+ _mcp_instructions = self._mcp_manager.get_all_instructions() or None
310
+
311
+ # Collect memory entries if store is available
312
+ _memory_entries: dict | None = None
313
+ if self._memory_store is not None and hasattr(self._memory_store, "list_entries"):
314
+ try:
315
+ _memory_entries = self._memory_store.list_entries() or None
316
+ except Exception:
317
+ pass
318
+
319
+ system_prompt = self._prompt_builder.build(
320
+ self._context,
321
+ tools=tool_defs,
322
+ native_tools=use_native,
323
+ skills=self._skills,
324
+ mcp_instructions=_mcp_instructions,
325
+ memory_entries=_memory_entries,
326
+ task_manager=self._task_manager,
327
+ project_index=self._project_index,
328
+ )
329
+ if _deferred_hint:
330
+ system_prompt = system_prompt + "\n\n" + _deferred_hint
331
+
332
+ # Inject harness guide context (repo map, analysis, etc.)
333
+ for injection in self._harness.pre_turn():
334
+ if injection:
335
+ system_prompt = system_prompt + "\n\n" + injection
336
+
337
+ self._fire_hook("prompt_compile", {"prompt_length": len(system_prompt), "tool_count": len(tool_defs)})
338
+
339
+ # 3. Create request and stream
340
+ request = MessageRequest(
341
+ model=self._active_model,
342
+ messages=self.session.messages,
343
+ system=system_prompt,
344
+ tools=tool_defs if use_native else (),
345
+ max_tokens=_current_max_tokens,
346
+ temperature=self._config.temperature,
347
+ extra_body=build_thinking_extra_body(self._config.thinking, is_local=_is_local) if not use_native else None,
348
+ )
349
+
350
+ if self._vcr_recorder is not None:
351
+ self._vcr_recorder.record("llm_request", {
352
+ "model": request.model,
353
+ "max_tokens": request.max_tokens,
354
+ })
355
+
356
+ # Error recovery: tool choice fallback + reactive compact
357
+ self._fire_hook("http_request", {"model": self._active_model, "url": getattr(self._config, "provider_base_url", "")})
358
+ try:
359
+ stream = await self._provider.stream_message(request)
360
+ except Exception as exc:
361
+ _exc_str = str(exc)
362
+ self._fire_hook("http_error", {"error": _exc_str[:200], "model": self._active_model})
363
+ # Auto-fallback: if native tool calling is not supported by server
364
+ if "tool-call-parser" in _exc_str or "tool choice" in _exc_str.lower():
365
+ logger.warning("Server does not support native tool calling; falling back to XML tag mode")
366
+ self._fire_hook("http_fallback", {"reason": "xml_mode", "model": self._active_model})
367
+ self._force_xml_mode = True
368
+ # Rebuild request without tools
369
+ system_prompt = self._prompt_builder.build(
370
+ self._context,
371
+ tools=tool_defs,
372
+ native_tools=False,
373
+ skills=self._skills,
374
+ mcp_instructions=_mcp_instructions,
375
+ memory_entries=_memory_entries,
376
+ task_manager=self._task_manager,
377
+ project_index=self._project_index,
378
+ )
379
+ request = MessageRequest(
380
+ model=self._active_model,
381
+ messages=self.session.messages,
382
+ system=system_prompt,
383
+ tools=(),
384
+ max_tokens=_current_max_tokens,
385
+ temperature=self._config.temperature,
386
+ extra_body=build_thinking_extra_body(self._config.thinking, is_local=_is_local),
387
+ )
388
+ stream = await self._provider.stream_message(request)
389
+ elif (
390
+ ("413" in _exc_str or "prompt too long" in _exc_str.lower())
391
+ and not self._has_attempted_reactive_compact
392
+ ):
393
+ logger.warning("Prompt too long; compacting context and retrying")
394
+ self._fire_hook("session_compact", {"reason": "prompt_too_long"})
395
+ self._has_attempted_reactive_compact = True
396
+ _compressor = ContextCompressor()
397
+ self.session = _compressor.compress(
398
+ self.session,
399
+ self._config.compact_after_tokens // 2,
400
+ )
401
+ continue # retry this iteration of the turn loop
402
+ else:
403
+ # Layer 3: model fallback — track consecutive provider errors
404
+ self._consecutive_failures += 1
405
+ _fallback = getattr(
406
+ getattr(self._config, "model_routing", None), "fallback", ""
407
+ )
408
+ if _fallback and self._active_model != _fallback:
409
+ # Still have retries remaining before switching — retry same model
410
+ if self._consecutive_failures < 3:
411
+ self._fire_hook("http_retry", {"attempt": self._consecutive_failures, "model": self._active_model})
412
+ logger.warning(
413
+ "Provider error (attempt %d/3): %s",
414
+ self._consecutive_failures,
415
+ exc,
416
+ )
417
+ continue # retry this iteration
418
+ # 3rd consecutive failure: switch to fallback model
419
+ self._fire_hook("http_fallback", {"reason": "consecutive_failures", "from": self._active_model, "to": _fallback})
420
+ logger.warning(
421
+ "3 consecutive provider errors; switching from %s to fallback model %s",
422
+ self._active_model,
423
+ _fallback,
424
+ )
425
+ self._active_model = _fallback
426
+ continue # retry with fallback model
427
+ logger.error("Provider stream error: %s", exc)
428
+ raise
429
+
430
+ # 4. Collect events and buffers
431
+ text_parts: list[str] = []
432
+ native_tool_calls: dict[str, dict] = {} # id -> {id, name, json_parts}
433
+ native_tool_list: list[dict] = []
434
+ stop_event: StreamMessageStop | None = None
435
+
436
+ # StreamingToolExecutor: starts read-only tools in background while streaming
437
+ _streaming_executor = StreamingToolExecutor(self._tool_registry, self._permissions)
438
+ _current_streaming_tool_id: str | None = None
439
+
440
+ async for event in stream:
441
+ # Yield streaming events to caller
442
+ yield event
443
+
444
+ if isinstance(event, StreamTextDelta):
445
+ text_parts.append(event.text)
446
+ elif isinstance(event, StreamToolUseStart):
447
+ # Finalize the previously streaming tool (if any) before starting new one
448
+ if _current_streaming_tool_id is not None:
449
+ _streaming_executor.finalize(_current_streaming_tool_id)
450
+ _current_streaming_tool_id = event.id
451
+ native_tool_calls[event.id] = {
452
+ "id": event.id,
453
+ "name": event.name,
454
+ "json_parts": [],
455
+ }
456
+ _streaming_executor.start_tool(event.id, event.name)
457
+ elif isinstance(event, StreamToolUseInputDelta):
458
+ if event.id in native_tool_calls:
459
+ native_tool_calls[event.id]["json_parts"].append(event.partial_json)
460
+ _streaming_executor.submit(event.id, event.partial_json)
461
+ elif isinstance(event, StreamMessageStop):
462
+ # Finalize the last streaming tool
463
+ if _current_streaming_tool_id is not None:
464
+ _streaming_executor.finalize(_current_streaming_tool_id)
465
+ _current_streaming_tool_id = None
466
+ stop_event = event
467
+
468
+ # Reset consecutive failure counter on successful stream
469
+ self._consecutive_failures = 0
470
+ self._fire_hook("http_response", {"model": self._active_model, "status": "ok"})
471
+
472
+ # Prompt cache hit/miss events based on compressor state
473
+ _n_messages = len(self.session.messages)
474
+ _n_cached = sum(1 for i in range(_n_messages) if self._compressor._is_cached(i))
475
+ if _n_cached > 0:
476
+ self._fire_hook("prompt_cache_hit", {"cached_messages": _n_cached, "total_messages": _n_messages})
477
+ else:
478
+ self._fire_hook("prompt_cache_miss", {"total_messages": _n_messages})
479
+
480
+ # Mark all messages sent in this request as cached (API has seen them)
481
+ self._compressor.mark_as_cached(set(range(len(self.session.messages))))
482
+
483
+ # Accumulate usage
484
+ if stop_event:
485
+ accumulated_usage = TokenUsage(
486
+ input_tokens=accumulated_usage.input_tokens + stop_event.usage.input_tokens,
487
+ output_tokens=accumulated_usage.output_tokens + stop_event.usage.output_tokens,
488
+ )
489
+
490
+ # Layer 2: Token limit auto-upgrade
491
+ # If the model stopped due to hitting max_tokens, double the limit and retry
492
+ if stop_event is not None and stop_event.stop_reason in ("max_tokens", "length"):
493
+ _upgraded = _current_max_tokens * 2 if _TOKEN_UPGRADE_CAP == 0 else min(_current_max_tokens * 2, _TOKEN_UPGRADE_CAP)
494
+ if _upgraded > _current_max_tokens:
495
+ logger.warning(
496
+ "Hit max_tokens limit (%d); upgrading to %d and retrying",
497
+ _current_max_tokens,
498
+ _upgraded,
499
+ )
500
+ _current_max_tokens = _upgraded
501
+ continue # retry this iteration with higher token limit
502
+
503
+ # Build native tool call list for parsing
504
+ for call_data in native_tool_calls.values():
505
+ raw_json = "".join(call_data["json_parts"])
506
+ try:
507
+ parsed_input = json.loads(raw_json) if raw_json else {}
508
+ except json.JSONDecodeError:
509
+ parsed_input = {}
510
+ native_tool_list.append({
511
+ "id": call_data["id"],
512
+ "name": call_data["name"],
513
+ "input": parsed_input,
514
+ })
515
+
516
+ if self._vcr_recorder is not None:
517
+ self._vcr_recorder.record("llm_response", {
518
+ "text": "".join(text_parts)[:500],
519
+ })
520
+
521
+ # 5. Parse tool calls (dual-track)
522
+ response_text = "".join(text_parts)
523
+ parsed_calls = parse_tool_calls(
524
+ response_text=response_text,
525
+ native_tool_calls=native_tool_list if native_tool_list else None,
526
+ )
527
+
528
+ # 6. Build assistant message content
529
+ assistant_blocks: list = []
530
+ if response_text:
531
+ assistant_blocks.append(TextBlock(text=response_text))
532
+ for call in parsed_calls:
533
+ assistant_blocks.append(
534
+ ToolUseBlock(id=call.id, name=call.name, input=call.args)
535
+ )
536
+
537
+ # 7. Add assistant message to session
538
+ if assistant_blocks:
539
+ assistant_msg = Message(
540
+ role="assistant",
541
+ content=tuple(assistant_blocks),
542
+ )
543
+ self.session = self.session.add_message(assistant_msg)
544
+
545
+ # 8. If no tool calls → end turn
546
+ if not parsed_calls:
547
+ break
548
+
549
+ # 9. Execute tools via the validate→safety→permission→progress pipeline
550
+ # Collect read-only results that were pre-computed during streaming,
551
+ # and get the list of write calls still needing execution.
552
+ _precomputed_results, _write_pending_calls = await _streaming_executor.collect_results()
553
+ _precomputed_by_id: dict[str, ToolResultBlock] = {r.tool_use_id: r for r in _precomputed_results}
554
+
555
+ # Split agent calls from non-agent calls so agents can run in parallel
556
+ agent_calls = [c for c in parsed_calls if c.name == "agent"]
557
+ non_agent_calls = [c for c in parsed_calls if c.name != "agent"]
558
+ for ac in agent_calls:
559
+ self._fire_hook("agent_spawn", {"agent_id": ac.id, "args": str(ac.args)[:200]})
560
+
561
+ tool_result_blocks: list[ToolResultBlock] = []
562
+
563
+ # Non-agent calls: use pre-computed result if available, else execute normally
564
+ for call in non_agent_calls:
565
+ if call.id in _precomputed_by_id:
566
+ # Read-only tool already executed concurrently — emit events and reuse result
567
+ precomputed = _precomputed_by_id[call.id]
568
+ yield StreamToolExecStart(tool_name=call.name, args_summary=str(call.args)[:80])
569
+ yield StreamToolExecResult(
570
+ tool_name=call.name,
571
+ output=precomputed.content[:200],
572
+ is_error=precomputed.is_error,
573
+ metadata=None,
574
+ )
575
+ tool_result_blocks.append(precomputed)
576
+ else:
577
+ async for event in self._execute_tool_with_streaming(call):
578
+ if isinstance(event, ToolResultBlock):
579
+ tool_result_blocks.append(event)
580
+ else:
581
+ yield event # StreamToolProgress
582
+
583
+ # Agent calls: run in parallel when there are multiple
584
+ if len(agent_calls) > 1:
585
+ async def _run_agent(c):
586
+ results: list[StreamEvent | ToolResultBlock] = []
587
+ async for ev in self._execute_tool_with_streaming(c):
588
+ results.append(ev)
589
+ return results
590
+
591
+ all_agent_results = await asyncio.gather(
592
+ *[_run_agent(c) for c in agent_calls]
593
+ )
594
+ for idx, result_events in enumerate(all_agent_results):
595
+ ac = agent_calls[idx]
596
+ for event in result_events:
597
+ if isinstance(event, ToolResultBlock):
598
+ tool_result_blocks.append(event)
599
+ if event.is_error:
600
+ self._fire_hook("agent_error", {"agent_id": ac.id, "error": event.content[:200]})
601
+ else:
602
+ self._fire_hook("agent_message", {"agent_id": ac.id, "text": event.content[:200]})
603
+ else:
604
+ yield event
605
+ self._fire_hook("agent_complete", {"agent_id": ac.id})
606
+ elif agent_calls:
607
+ # Single agent call — sequential
608
+ for call in agent_calls:
609
+ async for event in self._execute_tool_with_streaming(call):
610
+ if isinstance(event, ToolResultBlock):
611
+ tool_result_blocks.append(event)
612
+ if event.is_error:
613
+ self._fire_hook("agent_error", {"agent_id": call.id, "error": event.content[:200]})
614
+ else:
615
+ self._fire_hook("agent_message", {"agent_id": call.id, "text": event.content[:200]})
616
+ else:
617
+ yield event
618
+ self._fire_hook("agent_complete", {"agent_id": call.id})
619
+
620
+ # Add tool results as user message
621
+ if tool_result_blocks:
622
+ tool_result_msg = Message(
623
+ role="user",
624
+ content=tuple(tool_result_blocks),
625
+ )
626
+ self.session = self.session.add_message(tool_result_msg)
627
+
628
+ # 10. Loop back for LLM to process results
629
+
630
+ # Update session usage
631
+ self.session = self.session.update_usage(accumulated_usage)
632
+ _turn_duration_ms = (time.monotonic() - _turn_start) * 1000
633
+ logger.debug(
634
+ "Turn complete: %d input tokens, %d output tokens",
635
+ accumulated_usage.input_tokens,
636
+ accumulated_usage.output_tokens,
637
+ )
638
+ self._telemetry.trace_turn(
639
+ session_id=getattr(self.session, "session_id", ""),
640
+ model=self._active_model,
641
+ input_tokens=accumulated_usage.input_tokens,
642
+ output_tokens=accumulated_usage.output_tokens,
643
+ duration_ms=_turn_duration_ms,
644
+ )
645
+
646
+ # Auto-checkpoint: persist session state after each turn completes
647
+ if self._recovery_checkpoint is not None:
648
+ try:
649
+ self._recovery_checkpoint.save_checkpoint(self.session)
650
+ except Exception as exc:
651
+ logger.debug("Recovery checkpoint save failed: %s", exc)
652
+
653
+ async def _execute_tool_with_streaming(
654
+ self, call: ParsedToolCall
655
+ ) -> AsyncIterator[StreamEvent | ToolResultBlock]:
656
+ """Validate → safety → permission → run in thread → yield progress + result."""
657
+ logger.debug("Executing tool: %s", call.name)
658
+ # 1. Look up tool
659
+ tool = self._tool_registry.get(call.name)
660
+ if tool is None:
661
+ logger.warning("Unknown tool requested: %s", call.name)
662
+ self._fire_hook("tool_error", {"tool_name": call.name, "error": "unknown tool"})
663
+ yield ToolResultBlock(
664
+ tool_use_id=call.id,
665
+ content=f"Unknown tool '{call.name}'",
666
+ is_error=True,
667
+ )
668
+ return
669
+
670
+ # 2. Validate input
671
+ try:
672
+ validated_args = tool.validate_input(call.args)
673
+ except ValidationError as exc:
674
+ # Format Pydantic validation errors into a readable message
675
+ errors = exc.errors()
676
+ fields = ", ".join(
677
+ f"{'.'.join(str(loc) for loc in e['loc'])}: {e['msg']}"
678
+ for e in errors
679
+ )
680
+ yield ToolResultBlock(
681
+ tool_use_id=call.id,
682
+ content=f"Invalid input for tool '{call.name}': {fields}",
683
+ is_error=True,
684
+ )
685
+ return
686
+
687
+ # 3. Safety analysis → effective permission level
688
+ # For bash-like tools, truly dangerous (blocked) commands are denied
689
+ # immediately without entering the permission prompt flow.
690
+ if hasattr(tool, "classify") and callable(tool.classify):
691
+ safety = tool.classify(validated_args)
692
+ if safety.is_blocked:
693
+ self._fire_hook("tool_denied", {"tool_name": call.name})
694
+ yield ToolResultBlock(
695
+ tool_use_id=call.id,
696
+ content=f"Dangerous command blocked: {'; '.join(safety.reasons)}",
697
+ is_error=True,
698
+ )
699
+ return
700
+
701
+ if tool.is_read_only(validated_args):
702
+ effective = PermissionLevel.READ_ONLY
703
+ elif tool.is_destructive(validated_args):
704
+ effective = PermissionLevel.FULL_ACCESS
705
+ else:
706
+ effective = tool.required_permission
707
+
708
+ # 4a. Plan mode — deny write tools (via harness)
709
+ denial_msg = self._harness.check_pre_tool(call.name)
710
+ if denial_msg:
711
+ self._fire_hook("tool_denied", {"tool_name": call.name})
712
+ yield ToolResultBlock(
713
+ tool_use_id=call.id,
714
+ content=denial_msg,
715
+ is_error=True,
716
+ )
717
+ return
718
+
719
+ # 4. Permission check (deny/allow lists still take precedence via authorize)
720
+ outcome = self._permissions.authorize(
721
+ call.name,
722
+ tool.required_permission,
723
+ effective_level=effective,
724
+ )
725
+
726
+ if outcome == PermissionOutcome.DENY:
727
+ self._fire_hook("tool_denied", {"tool_name": call.name})
728
+ yield ToolResultBlock(
729
+ tool_use_id=call.id,
730
+ content=f"Permission denied for tool '{call.name}'",
731
+ is_error=True,
732
+ )
733
+ return
734
+
735
+ if outcome == PermissionOutcome.NEED_PROMPT:
736
+ # Build a short preview of tool arguments for the permission prompt
737
+ args_preview = json.dumps(validated_args, default=str)[:120]
738
+
739
+ # Attempt speculative pre-execution via overlay so the result is
740
+ # ready the moment the user approves.
741
+ spec_executor = None
742
+ try:
743
+ from llm_code.runtime.speculative import SpeculativeExecutor
744
+ import uuid as _uuid
745
+ session_id = f"{call.name}-{_uuid.uuid4().hex[:8]}"
746
+ spec_executor = SpeculativeExecutor(
747
+ tool=tool,
748
+ args=validated_args,
749
+ base_dir=self._context.cwd,
750
+ session_id=session_id,
751
+ )
752
+ spec_executor.pre_execute()
753
+ except Exception:
754
+ spec_executor = None
755
+
756
+ # Yield permission request and wait for user response
757
+ yield StreamPermissionRequest(
758
+ tool_name=call.name,
759
+ args_preview=args_preview,
760
+ )
761
+
762
+ loop = asyncio.get_running_loop()
763
+ self._permission_future = loop.create_future()
764
+ try:
765
+ response = await asyncio.wait_for(self._permission_future, timeout=300)
766
+ except asyncio.TimeoutError:
767
+ response = "deny"
768
+ logger.warning("Permission prompt for '%s' timed out (300s), auto-denying", call.name)
769
+ finally:
770
+ self._permission_future = None
771
+
772
+ if response in ("allow", "always"):
773
+ if response == "always":
774
+ # Add to allow list so future calls skip prompting
775
+ if hasattr(self._permissions, "allow_tool"):
776
+ self._permissions.allow_tool(call.name)
777
+ if spec_executor is not None:
778
+ try:
779
+ spec_executor.confirm()
780
+ except Exception:
781
+ pass
782
+ # Fall through to execute the tool normally below
783
+ else:
784
+ # Denied by user
785
+ if spec_executor is not None:
786
+ try:
787
+ spec_executor.deny()
788
+ except Exception:
789
+ pass
790
+ self._fire_hook("tool_denied", {"tool_name": call.name})
791
+ yield ToolResultBlock(
792
+ tool_use_id=call.id,
793
+ content=f"Tool '{call.name}' denied by user",
794
+ is_error=True,
795
+ )
796
+ return
797
+
798
+ # 4b. Create checkpoint before mutating tools
799
+ if self._checkpoint_mgr is not None and not tool.is_read_only(validated_args):
800
+ try:
801
+ self._checkpoint_mgr.create(call.name, validated_args)
802
+ except Exception:
803
+ pass # Don't block tool execution if checkpoint fails
804
+
805
+ # 5. Pre-tool hook
806
+ args = validated_args
807
+ hook_runner = self._hooks
808
+ if hasattr(hook_runner, "pre_tool_use"):
809
+ hook_result = hook_runner.pre_tool_use(call.name, args)
810
+ if hasattr(hook_result, "__await__"):
811
+ hook_result = await hook_result
812
+ if hasattr(hook_result, "denied") and hook_result.denied:
813
+ yield ToolResultBlock(
814
+ tool_use_id=call.id,
815
+ content=f"Tool '{call.name}' blocked by hook",
816
+ is_error=True,
817
+ )
818
+ return
819
+ if isinstance(hook_result, dict):
820
+ args = hook_result
821
+
822
+ # 6. Emit tool execution start event
823
+ args_preview = str(args)[:80]
824
+ if self._vcr_recorder is not None:
825
+ self._vcr_recorder.record("tool_call", {"name": call.name, "args": args_preview})
826
+ yield StreamToolExecStart(tool_name=call.name, args_summary=args_preview)
827
+ _tool_start = time.monotonic()
828
+
829
+ # 7. Execute in thread pool with asyncio.Queue progress bridge
830
+ loop = asyncio.get_running_loop()
831
+ queue: asyncio.Queue = asyncio.Queue()
832
+
833
+ def on_progress(p):
834
+ loop.call_soon_threadsafe(queue.put_nowait, p)
835
+
836
+ def run_tool():
837
+ result = tool.execute_with_progress(args, on_progress)
838
+ loop.call_soon_threadsafe(queue.put_nowait, None) # sentinel
839
+ return result
840
+
841
+ future = loop.run_in_executor(_TOOL_EXECUTOR, run_tool)
842
+
843
+ while True:
844
+ progress = await queue.get()
845
+ if progress is None:
846
+ break
847
+ yield StreamToolProgress(
848
+ tool_name=progress.tool_name,
849
+ message=progress.message,
850
+ percent=progress.percent,
851
+ )
852
+
853
+ tool_result = await future
854
+ tool_result = self._budget_tool_result(tool_result, call.id)
855
+ _tool_duration_ms = (time.monotonic() - _tool_start) * 1000
856
+ self._telemetry.trace_tool(
857
+ tool_name=call.name,
858
+ duration_ms=_tool_duration_ms,
859
+ is_error=tool_result.is_error,
860
+ )
861
+
862
+ # 7. Post-tool hook
863
+ if hasattr(hook_runner, "post_tool_use"):
864
+ post_result = hook_runner.post_tool_use(call.name, args, tool_result)
865
+ if hasattr(post_result, "__await__"):
866
+ await post_result
867
+
868
+ # 7b. Run harness sensors (auto-commit, LSP diagnose, code rules)
869
+ try:
870
+ findings = await self._harness.post_tool(
871
+ tool_name=call.name,
872
+ file_path=args.get("file_path") or args.get("path", ""),
873
+ is_error=tool_result.is_error,
874
+ )
875
+ for finding in findings:
876
+ if finding.severity == "error":
877
+ yield StreamToolProgress(
878
+ tool_name=finding.sensor,
879
+ message=f"{finding.sensor} found issues in {Path(finding.file_path).name}:\n{finding.message}",
880
+ percent=None,
881
+ )
882
+ except Exception:
883
+ pass # Never block tool flow for harness failure
884
+
885
+ # 8. Emit tool execution result event
886
+ if self._vcr_recorder is not None:
887
+ self._vcr_recorder.record("tool_result", {
888
+ "name": call.name,
889
+ "output": tool_result.output[:200],
890
+ "is_error": tool_result.is_error,
891
+ })
892
+ yield StreamToolExecResult(
893
+ tool_name=call.name,
894
+ output=tool_result.output[:200],
895
+ is_error=tool_result.is_error,
896
+ metadata=tool_result.metadata,
897
+ )
898
+
899
+ yield ToolResultBlock(
900
+ tool_use_id=call.id,
901
+ content=tool_result.output,
902
+ is_error=tool_result.is_error,
903
+ )
904
+
905
+ def _budget_tool_result(self, result: ToolResult, call_id: str) -> ToolResult:
906
+ """If result is too large, persist to disk and return truncated summary."""
907
+ if len(result.output) <= _MAX_INLINE_RESULT:
908
+ return result
909
+
910
+ # Save full output
911
+ cache_dir = self._context.cwd / ".llm-code" / "result_cache"
912
+ cache_dir.mkdir(parents=True, exist_ok=True)
913
+ cache_path = cache_dir / f"{call_id}.txt"
914
+ cache_path.write_text(result.output, encoding="utf-8")
915
+
916
+ # Truncated summary
917
+ summary = (
918
+ result.output[:1000]
919
+ + f"\n\n... [{len(result.output)} chars total, full output saved to {cache_path}. Use read_file to access.]"
920
+ )
921
+ return ToolResult(output=summary, is_error=result.is_error, metadata=result.metadata)