ctrlcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ctrlcode/__init__.py +8 -0
- ctrlcode/agents/__init__.py +29 -0
- ctrlcode/agents/cleanup.py +388 -0
- ctrlcode/agents/communication.py +439 -0
- ctrlcode/agents/observability.py +421 -0
- ctrlcode/agents/react_loop.py +297 -0
- ctrlcode/agents/registry.py +211 -0
- ctrlcode/agents/result_parser.py +242 -0
- ctrlcode/agents/workflow.py +723 -0
- ctrlcode/analysis/__init__.py +28 -0
- ctrlcode/analysis/ast_diff.py +163 -0
- ctrlcode/analysis/bug_detector.py +149 -0
- ctrlcode/analysis/code_graphs.py +329 -0
- ctrlcode/analysis/semantic.py +205 -0
- ctrlcode/analysis/static.py +183 -0
- ctrlcode/analysis/synthesizer.py +281 -0
- ctrlcode/analysis/tests.py +189 -0
- ctrlcode/cleanup/__init__.py +16 -0
- ctrlcode/cleanup/auto_merge.py +350 -0
- ctrlcode/cleanup/doc_gardening.py +388 -0
- ctrlcode/cleanup/pr_automation.py +330 -0
- ctrlcode/cleanup/scheduler.py +356 -0
- ctrlcode/config.py +380 -0
- ctrlcode/embeddings/__init__.py +6 -0
- ctrlcode/embeddings/embedder.py +192 -0
- ctrlcode/embeddings/vector_store.py +213 -0
- ctrlcode/fuzzing/__init__.py +24 -0
- ctrlcode/fuzzing/analyzer.py +280 -0
- ctrlcode/fuzzing/budget.py +112 -0
- ctrlcode/fuzzing/context.py +665 -0
- ctrlcode/fuzzing/context_fuzzer.py +506 -0
- ctrlcode/fuzzing/derived_orchestrator.py +732 -0
- ctrlcode/fuzzing/oracle_adapter.py +135 -0
- ctrlcode/linters/__init__.py +11 -0
- ctrlcode/linters/hand_rolled_utils.py +221 -0
- ctrlcode/linters/yolo_parsing.py +217 -0
- ctrlcode/metrics/__init__.py +6 -0
- ctrlcode/metrics/dashboard.py +283 -0
- ctrlcode/metrics/tech_debt.py +663 -0
- ctrlcode/paths.py +68 -0
- ctrlcode/permissions.py +179 -0
- ctrlcode/providers/__init__.py +15 -0
- ctrlcode/providers/anthropic.py +138 -0
- ctrlcode/providers/base.py +77 -0
- ctrlcode/providers/openai.py +197 -0
- ctrlcode/providers/parallel.py +104 -0
- ctrlcode/server.py +871 -0
- ctrlcode/session/__init__.py +6 -0
- ctrlcode/session/baseline.py +57 -0
- ctrlcode/session/manager.py +967 -0
- ctrlcode/skills/__init__.py +10 -0
- ctrlcode/skills/builtin/commit.toml +29 -0
- ctrlcode/skills/builtin/docs.toml +25 -0
- ctrlcode/skills/builtin/refactor.toml +33 -0
- ctrlcode/skills/builtin/review.toml +28 -0
- ctrlcode/skills/builtin/test.toml +28 -0
- ctrlcode/skills/loader.py +111 -0
- ctrlcode/skills/registry.py +139 -0
- ctrlcode/storage/__init__.py +19 -0
- ctrlcode/storage/history_db.py +708 -0
- ctrlcode/tools/__init__.py +220 -0
- ctrlcode/tools/bash.py +112 -0
- ctrlcode/tools/browser.py +352 -0
- ctrlcode/tools/executor.py +153 -0
- ctrlcode/tools/explore.py +486 -0
- ctrlcode/tools/mcp.py +108 -0
- ctrlcode/tools/observability.py +561 -0
- ctrlcode/tools/registry.py +193 -0
- ctrlcode/tools/todo.py +291 -0
- ctrlcode/tools/update.py +266 -0
- ctrlcode/tools/webfetch.py +147 -0
- ctrlcode-0.1.0.dist-info/METADATA +93 -0
- ctrlcode-0.1.0.dist-info/RECORD +75 -0
- ctrlcode-0.1.0.dist-info/WHEEL +4 -0
- ctrlcode-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,967 @@
|
|
|
1
|
+
"""Session management using harness-utils."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import logging
|
|
5
|
+
import time
|
|
6
|
+
from typing import Any, AsyncIterator, Optional
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import uuid
|
|
10
|
+
|
|
11
|
+
from harnessutils import ConversationManager, Message, TextPart
|
|
12
|
+
from harnessutils.storage import FilesystemStorage
|
|
13
|
+
from harnessutils.config import HarnessConfig, StorageConfig
|
|
14
|
+
|
|
15
|
+
from ..providers.base import Provider, StreamEvent
|
|
16
|
+
from .baseline import BaselineManager, Baseline
|
|
17
|
+
from ..fuzzing.derived_orchestrator import DerivedFuzzingOrchestrator
|
|
18
|
+
from ..tools.executor import ToolExecutor
|
|
19
|
+
from ..tools.registry import ToolRegistry
|
|
20
|
+
from ..skills.registry import SkillRegistry
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def strip_markdown_fences(content: str) -> str:
|
|
26
|
+
"""Remove markdown code fences from content if present."""
|
|
27
|
+
lines = content.splitlines()
|
|
28
|
+
|
|
29
|
+
# Check if first line is a code fence
|
|
30
|
+
if lines and lines[0].strip().startswith("```"):
|
|
31
|
+
# Remove first line
|
|
32
|
+
lines = lines[1:]
|
|
33
|
+
|
|
34
|
+
# Check if last line is a closing fence
|
|
35
|
+
if lines and lines[-1].strip() == "```":
|
|
36
|
+
# Remove last line
|
|
37
|
+
lines = lines[:-1]
|
|
38
|
+
|
|
39
|
+
# Remove any trailing instruction lines (e.g., "Run with: ...")
|
|
40
|
+
while lines and lines[-1].strip().startswith("Run with:"):
|
|
41
|
+
lines = lines[:-1]
|
|
42
|
+
|
|
43
|
+
return "\n".join(lines)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class Session:
|
|
48
|
+
"""Represents a conversation session."""
|
|
49
|
+
|
|
50
|
+
id: str
|
|
51
|
+
conv_id: str
|
|
52
|
+
provider: Provider
|
|
53
|
+
cumulative_tokens: int = 0 # Track total tokens used across all turns
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class SessionManager:
|
|
57
|
+
"""Manages conversation sessions with baseline tracking."""
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
provider: Provider,
|
|
62
|
+
storage_path: str,
|
|
63
|
+
config: Optional[HarnessConfig] = None,
|
|
64
|
+
fuzzing_orchestrator: Optional[DerivedFuzzingOrchestrator] = None,
|
|
65
|
+
fuzzing_enabled: bool = False,
|
|
66
|
+
tool_executor: Optional[ToolExecutor] = None,
|
|
67
|
+
tool_registry: Optional[ToolRegistry] = None,
|
|
68
|
+
skill_registry: Optional[SkillRegistry] = None,
|
|
69
|
+
context_limit: int = 200000,
|
|
70
|
+
workspace_root: Optional[Path] = None,
|
|
71
|
+
):
|
|
72
|
+
"""
|
|
73
|
+
Initialize session manager.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
provider: LLM provider to use
|
|
77
|
+
storage_path: Path for conversation storage
|
|
78
|
+
config: Optional harness config
|
|
79
|
+
fuzzing_orchestrator: Optional fuzzing orchestrator
|
|
80
|
+
fuzzing_enabled: Whether to use fuzzing
|
|
81
|
+
tool_executor: Optional tool executor for MCP tools
|
|
82
|
+
tool_registry: Optional tool registry for tool definitions
|
|
83
|
+
skill_registry: Optional skill registry
|
|
84
|
+
context_limit: Context window limit
|
|
85
|
+
workspace_root: Workspace root for AGENT.md lookup
|
|
86
|
+
"""
|
|
87
|
+
storage_config = StorageConfig(base_path=Path(storage_path))
|
|
88
|
+
self.conv_manager = ConversationManager(
|
|
89
|
+
storage=FilesystemStorage(storage_config),
|
|
90
|
+
config=config or HarnessConfig()
|
|
91
|
+
)
|
|
92
|
+
self.provider = provider
|
|
93
|
+
self.sessions: dict[str, Session] = {}
|
|
94
|
+
self.baseline_manager = BaselineManager()
|
|
95
|
+
self.fuzzing_orchestrator = fuzzing_orchestrator
|
|
96
|
+
self.fuzzing_enabled = fuzzing_enabled
|
|
97
|
+
self.tool_executor = tool_executor
|
|
98
|
+
self.tool_registry = tool_registry
|
|
99
|
+
self.skill_registry = skill_registry
|
|
100
|
+
self.context_limit = context_limit
|
|
101
|
+
self.workspace_root = workspace_root
|
|
102
|
+
self._base_prompt = self._load_base_prompt()
|
|
103
|
+
self._agent_instructions = self._load_agent_instructions()
|
|
104
|
+
|
|
105
|
+
def _load_base_prompt(self) -> str:
|
|
106
|
+
"""
|
|
107
|
+
Load base system prompt from prompts/SYSTEM_PROMPT.md.
|
|
108
|
+
|
|
109
|
+
Search order:
|
|
110
|
+
1. {workspace_root}/prompts/SYSTEM_PROMPT.md (project override)
|
|
111
|
+
2. ~/.config/ctrlcode/SYSTEM_PROMPT.md (user global override)
|
|
112
|
+
3. Bundled default
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Base system prompt content.
|
|
116
|
+
"""
|
|
117
|
+
candidates = []
|
|
118
|
+
|
|
119
|
+
# 1. Project-local override
|
|
120
|
+
if self.workspace_root:
|
|
121
|
+
candidates.append(self.workspace_root / "prompts" / "SYSTEM_PROMPT.md")
|
|
122
|
+
|
|
123
|
+
# 2. User global override
|
|
124
|
+
try:
|
|
125
|
+
from platformdirs import user_config_dir
|
|
126
|
+
candidates.append(Path(user_config_dir("ctrlcode")) / "SYSTEM_PROMPT.md")
|
|
127
|
+
except Exception:
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
for prompt_file in candidates:
|
|
131
|
+
if prompt_file.exists():
|
|
132
|
+
try:
|
|
133
|
+
content = prompt_file.read_text(encoding="utf-8")
|
|
134
|
+
logger.info(f"Loaded system prompt from {prompt_file} ({len(content)} chars)")
|
|
135
|
+
return content
|
|
136
|
+
except Exception as e:
|
|
137
|
+
logger.warning(f"Failed to load {prompt_file}: {e}")
|
|
138
|
+
|
|
139
|
+
# 3. Bundled default
|
|
140
|
+
logger.debug("Using bundled default system prompt")
|
|
141
|
+
return """You are Ctrl+Code, Canoozie's personal coding assistant.
|
|
142
|
+
|
|
143
|
+
## CRITICAL: Act immediately, never introduce yourself
|
|
144
|
+
|
|
145
|
+
**NEVER** say "I'm ready to help", "I have access to tools", or introduce your capabilities.
|
|
146
|
+
**NEVER** greet the user or ask what they want when they've already told you.
|
|
147
|
+
When given a task, **immediately call the appropriate tool**. Your first output must be a tool call, not text.
|
|
148
|
+
|
|
149
|
+
Examples of correct behaviour:
|
|
150
|
+
- User says "show me the last git commit" → call `run_command` with `git log -1` immediately
|
|
151
|
+
- User says "find the login function" → call `search_code` with "login" immediately
|
|
152
|
+
- User says "read app.py" → call `read_file` with "app.py" immediately
|
|
153
|
+
|
|
154
|
+
## Tools available
|
|
155
|
+
|
|
156
|
+
- `run_command` — run shell commands (git, tests, builds, etc.)
|
|
157
|
+
- `read_file` — read a file's contents
|
|
158
|
+
- `write_file` — create a new file
|
|
159
|
+
- `update_file` — edit an existing file
|
|
160
|
+
- `search_files` — find files by glob pattern
|
|
161
|
+
- `search_code` — search for code by content
|
|
162
|
+
- `list_directory` — list directory contents
|
|
163
|
+
- `web_fetch` — fetch a URL
|
|
164
|
+
|
|
165
|
+
## Tool usage rules
|
|
166
|
+
|
|
167
|
+
- Call ALL tools needed in a SINGLE response — do not wait for results before calling the next tool
|
|
168
|
+
- Use `run_command` for git operations, tests, builds, and any shell commands
|
|
169
|
+
- Use `read_file` / `search_files` / `search_code` for exploring the codebase
|
|
170
|
+
- Use `update_file` to edit existing files, `write_file` only for new files
|
|
171
|
+
- When referencing code, include `file_path:line_number` so the user can navigate to it
|
|
172
|
+
|
|
173
|
+
## Workspace and file paths
|
|
174
|
+
|
|
175
|
+
- Use relative paths (`src/main.py`) not absolute paths (`/home/user/src/main.py`)
|
|
176
|
+
- If unsure of a file's location, call `search_files` first
|
|
177
|
+
|
|
178
|
+
## Tone
|
|
179
|
+
|
|
180
|
+
- Be concise and direct. No emojis unless asked. Output renders in a monospace terminal."""
|
|
181
|
+
|
|
182
|
+
def _load_agent_instructions(self) -> str:
|
|
183
|
+
"""
|
|
184
|
+
Load AGENT.md instructions hierarchically.
|
|
185
|
+
|
|
186
|
+
Order (most general to most specific):
|
|
187
|
+
1. Global: ~/.config/ctrlcode/AGENT.md
|
|
188
|
+
2. Project: {workspace_root}/AGENT.md
|
|
189
|
+
|
|
190
|
+
Returns combined instructions with clear section markers.
|
|
191
|
+
"""
|
|
192
|
+
instructions = []
|
|
193
|
+
|
|
194
|
+
# 1. Global config AGENT.md
|
|
195
|
+
try:
|
|
196
|
+
from platformdirs import user_config_dir
|
|
197
|
+
config_dir = Path(user_config_dir("ctrlcode"))
|
|
198
|
+
global_agent = config_dir / "AGENT.md"
|
|
199
|
+
if global_agent.exists():
|
|
200
|
+
content = global_agent.read_text(encoding="utf-8")
|
|
201
|
+
instructions.append(f"# Global Agent Instructions\n\n{content}")
|
|
202
|
+
logger.info(f"Loaded global AGENT.md ({len(content)} chars)")
|
|
203
|
+
except Exception as e:
|
|
204
|
+
logger.debug(f"No global AGENT.md: {e}")
|
|
205
|
+
|
|
206
|
+
# 2. Project AGENT.md
|
|
207
|
+
if self.workspace_root:
|
|
208
|
+
project_agent = self.workspace_root / "AGENT.md"
|
|
209
|
+
if project_agent.exists():
|
|
210
|
+
try:
|
|
211
|
+
content = project_agent.read_text(encoding="utf-8")
|
|
212
|
+
instructions.append(f"# Project-Specific Instructions\n\n{content}")
|
|
213
|
+
logger.info(f"Loaded project AGENT.md ({len(content)} chars)")
|
|
214
|
+
except Exception as e:
|
|
215
|
+
logger.warning(f"Failed to load project AGENT.md: {e}")
|
|
216
|
+
|
|
217
|
+
return "\n\n---\n\n".join(instructions) if instructions else ""
|
|
218
|
+
|
|
219
|
+
def create_session(self, provider: Optional[Provider] = None) -> Session:
|
|
220
|
+
"""
|
|
221
|
+
Create a new session.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
provider: Optional provider override
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
New session instance
|
|
228
|
+
"""
|
|
229
|
+
conv = self.conv_manager.create_conversation(project_id="ctrl-code")
|
|
230
|
+
session = Session(
|
|
231
|
+
id=str(uuid.uuid4()),
|
|
232
|
+
conv_id=conv.id,
|
|
233
|
+
provider=provider or self.provider
|
|
234
|
+
)
|
|
235
|
+
self.sessions[session.id] = session
|
|
236
|
+
return session
|
|
237
|
+
|
|
238
|
+
def get_session(self, session_id: str) -> Optional[Session]:
|
|
239
|
+
"""Get session by ID."""
|
|
240
|
+
return self.sessions.get(session_id)
|
|
241
|
+
|
|
242
|
+
async def process_turn(
|
|
243
|
+
self,
|
|
244
|
+
session_id: str,
|
|
245
|
+
user_input: str,
|
|
246
|
+
tools: list[dict] | None = None
|
|
247
|
+
) -> AsyncIterator[StreamEvent]:
|
|
248
|
+
"""
|
|
249
|
+
Process a conversation turn.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
session_id: Session identifier
|
|
253
|
+
user_input: User's input message
|
|
254
|
+
tools: Optional tool definitions
|
|
255
|
+
|
|
256
|
+
Yields:
|
|
257
|
+
StreamEvent: Streaming events from provider
|
|
258
|
+
"""
|
|
259
|
+
session = self.sessions.get(session_id)
|
|
260
|
+
if not session:
|
|
261
|
+
raise ValueError(f"Session not found: {session_id}")
|
|
262
|
+
|
|
263
|
+
# Expand skills if present
|
|
264
|
+
if self.skill_registry:
|
|
265
|
+
expanded_input, was_skill = self.skill_registry.process_input(user_input)
|
|
266
|
+
if was_skill:
|
|
267
|
+
# Emit skill expansion event
|
|
268
|
+
yield StreamEvent(
|
|
269
|
+
type="skill_expanded",
|
|
270
|
+
data={"original": user_input, "expanded": expanded_input}
|
|
271
|
+
)
|
|
272
|
+
user_input = expanded_input
|
|
273
|
+
|
|
274
|
+
# Extract baseline if present
|
|
275
|
+
baseline_code = self.baseline_manager.extract_from_request(user_input)
|
|
276
|
+
if baseline_code:
|
|
277
|
+
baseline = Baseline(code=baseline_code)
|
|
278
|
+
self.baseline_manager.store(session_id, baseline)
|
|
279
|
+
|
|
280
|
+
# Prune before adding new user message (makes room for the new turn)
|
|
281
|
+
self.conv_manager.prune_before_turn(session.conv_id)
|
|
282
|
+
|
|
283
|
+
# Add user message
|
|
284
|
+
user_msg = Message(id=self._generate_msg_id(), role="user")
|
|
285
|
+
user_msg.add_part(TextPart(text=user_input))
|
|
286
|
+
self.conv_manager.add_message(session.conv_id, user_msg)
|
|
287
|
+
|
|
288
|
+
# Track context size before LLM call for accurate token delta
|
|
289
|
+
session.context_before_turn = self.conv_manager.calculate_context_usage(
|
|
290
|
+
session.conv_id,
|
|
291
|
+
model=session.provider.model
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# Get messages for LLM
|
|
295
|
+
messages = self.conv_manager.to_model_format(session.conv_id)
|
|
296
|
+
logger.info(f"to_model_format: {len(messages)} messages, roles={[m.get('role') for m in messages]}")
|
|
297
|
+
|
|
298
|
+
# Get tool definitions from registry if not provided
|
|
299
|
+
if tools is None and self.tool_registry:
|
|
300
|
+
tools = self.tool_registry.get_tool_definitions()
|
|
301
|
+
logger.info(f"Fetched {len(tools)} tool definitions from registry")
|
|
302
|
+
else:
|
|
303
|
+
logger.info(f"Tools parameter: {tools}")
|
|
304
|
+
|
|
305
|
+
# Track if we should fuzz (detected code block)
|
|
306
|
+
should_fuzz = False
|
|
307
|
+
accumulated_text = []
|
|
308
|
+
|
|
309
|
+
# Track streaming rate for fuzzing output
|
|
310
|
+
streaming_start_time = time.time()
|
|
311
|
+
streaming_end_time = None
|
|
312
|
+
|
|
313
|
+
# Track usage tokens (will be replaced by fuzzed output tokens if fuzzing occurs)
|
|
314
|
+
usage_tokens = 0
|
|
315
|
+
|
|
316
|
+
# Build system prompt once; reuse for all continuation calls
|
|
317
|
+
system_prompt_msg: dict | None = None
|
|
318
|
+
if not messages or messages[0].get("role") != "system":
|
|
319
|
+
# Build AGENT.md section
|
|
320
|
+
agent_section = ""
|
|
321
|
+
if self._agent_instructions:
|
|
322
|
+
agent_section = f"\n\n{self._agent_instructions}"
|
|
323
|
+
|
|
324
|
+
# Inject live environment info
|
|
325
|
+
import platform
|
|
326
|
+
from datetime import date
|
|
327
|
+
work_dir = self.workspace_root or Path.cwd()
|
|
328
|
+
is_git = (work_dir / ".git").exists()
|
|
329
|
+
env_section = f"""
|
|
330
|
+
**Environment:**
|
|
331
|
+
```
|
|
332
|
+
Working directory: {work_dir}
|
|
333
|
+
Is directory a git repo: {"Yes" if is_git else "No"}
|
|
334
|
+
Platform: {platform.system().lower()}
|
|
335
|
+
OS Version: {platform.platform()}
|
|
336
|
+
Today's date: {date.today().isoformat()}
|
|
337
|
+
```"""
|
|
338
|
+
|
|
339
|
+
system_prompt_msg = {
|
|
340
|
+
"role": "system",
|
|
341
|
+
"content": f"{self._base_prompt}{agent_section}{env_section}"
|
|
342
|
+
}
|
|
343
|
+
messages = [system_prompt_msg] + messages
|
|
344
|
+
|
|
345
|
+
# Stream response with tool execution loop
|
|
346
|
+
assistant_text = []
|
|
347
|
+
tool_calls: list[dict] = []
|
|
348
|
+
current_tool_call: dict | None = None
|
|
349
|
+
|
|
350
|
+
# First pass: stream and collect tool calls
|
|
351
|
+
async for event in session.provider.stream(messages, tools=tools): # type: ignore[attr-defined]
|
|
352
|
+
# Accumulate text and detect code blocks
|
|
353
|
+
if event.type == "text":
|
|
354
|
+
text_chunk = event.data["text"]
|
|
355
|
+
assistant_text.append(text_chunk)
|
|
356
|
+
accumulated_text.append(text_chunk)
|
|
357
|
+
|
|
358
|
+
# Check if we've accumulated a code block
|
|
359
|
+
full_text = "".join(accumulated_text)
|
|
360
|
+
if "```" in full_text and not should_fuzz:
|
|
361
|
+
should_fuzz = True
|
|
362
|
+
logger.info("Code block detected, will run fuzzing after completion")
|
|
363
|
+
# Emit fuzzing start status instead of showing original text
|
|
364
|
+
yield StreamEvent(
|
|
365
|
+
type="fuzzing_progress",
|
|
366
|
+
data={"stage": "detected", "message": "Code detected, preparing to optimize..."}
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
# Only yield text if we're NOT fuzzing
|
|
370
|
+
if not should_fuzz:
|
|
371
|
+
yield event
|
|
372
|
+
else:
|
|
373
|
+
logger.debug("Suppressing text event (fuzzing enabled)")
|
|
374
|
+
|
|
375
|
+
elif event.type == "usage":
|
|
376
|
+
# Capture end time when response completes
|
|
377
|
+
streaming_end_time = time.time()
|
|
378
|
+
|
|
379
|
+
# Store completion tokens from API (tokens generated by assistant)
|
|
380
|
+
usage = event.data.get("usage", {})
|
|
381
|
+
usage_tokens += usage.get("completion_tokens", 0)
|
|
382
|
+
|
|
383
|
+
# Don't yield yet - we'll yield cumulative at the end
|
|
384
|
+
logger.debug(f"Captured {usage.get('completion_tokens', 0)} tokens, cumulative: {usage_tokens}")
|
|
385
|
+
|
|
386
|
+
elif event.type == "tool_call_start":
|
|
387
|
+
current_tool_call = {
|
|
388
|
+
"tool": event.data["tool"],
|
|
389
|
+
"call_id": event.data["call_id"],
|
|
390
|
+
"input": ""
|
|
391
|
+
}
|
|
392
|
+
yield event
|
|
393
|
+
|
|
394
|
+
elif event.type == "tool_call_delta":
|
|
395
|
+
if current_tool_call:
|
|
396
|
+
current_tool_call["input"] += event.data.get("delta", "")
|
|
397
|
+
yield event
|
|
398
|
+
|
|
399
|
+
elif event.type == "content_block_stop":
|
|
400
|
+
if current_tool_call:
|
|
401
|
+
import json
|
|
402
|
+
try:
|
|
403
|
+
current_tool_call["input"] = json.loads(current_tool_call["input"])
|
|
404
|
+
except json.JSONDecodeError:
|
|
405
|
+
current_tool_call["input"] = {}
|
|
406
|
+
tool_calls.append(current_tool_call)
|
|
407
|
+
current_tool_call = None
|
|
408
|
+
yield event
|
|
409
|
+
|
|
410
|
+
else:
|
|
411
|
+
# All other events
|
|
412
|
+
yield event
|
|
413
|
+
|
|
414
|
+
# Check if we should fuzz before executing tools
|
|
415
|
+
# If write_file or update_file is called, fuzz the content
|
|
416
|
+
write_file_calls = [tc for tc in tool_calls if tc["tool"] == "write_file"]
|
|
417
|
+
update_file_calls = [tc for tc in tool_calls if tc["tool"] == "update_file"]
|
|
418
|
+
|
|
419
|
+
if write_file_calls:
|
|
420
|
+
# Fuzz any write_file content
|
|
421
|
+
for write_call in write_file_calls:
|
|
422
|
+
content = write_call["input"].get("content", "")
|
|
423
|
+
# Strip markdown fences if present
|
|
424
|
+
content = strip_markdown_fences(content)
|
|
425
|
+
write_call["input"]["content"] = content
|
|
426
|
+
path = write_call["input"].get("path", "")
|
|
427
|
+
|
|
428
|
+
should_fuzz = True
|
|
429
|
+
# Add to accumulated_text so fuzzing has the content
|
|
430
|
+
accumulated_text.append(content)
|
|
431
|
+
logger.info(f"write_file detected for {path}, will fuzz before writing")
|
|
432
|
+
break
|
|
433
|
+
|
|
434
|
+
if update_file_calls:
|
|
435
|
+
# Fuzz any update_file content
|
|
436
|
+
for update_call in update_file_calls:
|
|
437
|
+
content = update_call["input"].get("content", "")
|
|
438
|
+
if content: # Only fuzz if there's content to update
|
|
439
|
+
# Strip markdown fences if present
|
|
440
|
+
content = strip_markdown_fences(content)
|
|
441
|
+
update_call["input"]["content"] = content
|
|
442
|
+
path = update_call["input"].get("path", "")
|
|
443
|
+
|
|
444
|
+
should_fuzz = True
|
|
445
|
+
# Add to accumulated_text so fuzzing has the content
|
|
446
|
+
accumulated_text.append(content)
|
|
447
|
+
logger.info(f"update_file detected for {path}, will fuzz before updating")
|
|
448
|
+
break
|
|
449
|
+
|
|
450
|
+
# Execute tools if any were called (but not write_file if fuzzing)
|
|
451
|
+
if tool_calls and self.tool_executor:
|
|
452
|
+
# Add assistant message with tool use
|
|
453
|
+
assistant_msg = Message(id=self._generate_msg_id(), role="assistant")
|
|
454
|
+
if assistant_text:
|
|
455
|
+
assistant_msg.add_part(TextPart(text="".join(assistant_text)))
|
|
456
|
+
self.conv_manager.add_message(session.conv_id, assistant_msg)
|
|
457
|
+
|
|
458
|
+
# Execute non-write/update tools first (or all tools if not fuzzing)
|
|
459
|
+
auto_chain_calls = [] # Track tools to auto-chain
|
|
460
|
+
|
|
461
|
+
for tool_call in tool_calls:
|
|
462
|
+
# Skip write_file or update_file if we're going to fuzz
|
|
463
|
+
if tool_call["tool"] in ("write_file", "update_file") and should_fuzz:
|
|
464
|
+
continue
|
|
465
|
+
|
|
466
|
+
result = await self.tool_executor.execute(
|
|
467
|
+
tool_name=tool_call["tool"],
|
|
468
|
+
arguments=tool_call["input"],
|
|
469
|
+
call_id=tool_call["call_id"]
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
# Emit tool result event
|
|
473
|
+
yield StreamEvent(
|
|
474
|
+
type="tool_result",
|
|
475
|
+
data={
|
|
476
|
+
"tool": tool_call["tool"],
|
|
477
|
+
"success": result.success,
|
|
478
|
+
"result": result.result if result.success else result.error
|
|
479
|
+
}
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
# Add tool result message
|
|
483
|
+
tool_result_msg = Message(id=self._generate_msg_id(), role="user")
|
|
484
|
+
result_text = f"[Tool: {tool_call['tool']}]\n"
|
|
485
|
+
if result.success:
|
|
486
|
+
result_text += f"Result: {result.result}"
|
|
487
|
+
else:
|
|
488
|
+
result_text += f"Error: {result.error}"
|
|
489
|
+
tool_result_msg.add_part(TextPart(text=result_text))
|
|
490
|
+
self.conv_manager.add_message(session.conv_id, tool_result_msg)
|
|
491
|
+
|
|
492
|
+
# Auto-chaining logic: check if we should automatically call next tool
|
|
493
|
+
if result.success:
|
|
494
|
+
chained = self._check_auto_chain(tool_call["tool"], result.result)
|
|
495
|
+
if chained:
|
|
496
|
+
auto_chain_calls.append(chained)
|
|
497
|
+
logger.info(f"Auto-chaining: {tool_call['tool']} → {chained['tool']}")
|
|
498
|
+
|
|
499
|
+
# Execute auto-chained tool calls
|
|
500
|
+
for chained_call in auto_chain_calls:
|
|
501
|
+
result = await self.tool_executor.execute(
|
|
502
|
+
tool_name=chained_call["tool"],
|
|
503
|
+
arguments=chained_call["arguments"],
|
|
504
|
+
call_id=chained_call["call_id"]
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
# Emit tool result event
|
|
508
|
+
yield StreamEvent(
|
|
509
|
+
type="tool_result",
|
|
510
|
+
data={
|
|
511
|
+
"tool": chained_call["tool"],
|
|
512
|
+
"success": result.success,
|
|
513
|
+
"result": result.result if result.success else result.error
|
|
514
|
+
}
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
# Add tool result message
|
|
518
|
+
tool_result_msg = Message(id=self._generate_msg_id(), role="user")
|
|
519
|
+
result_text = f"[Tool: {chained_call['tool']}]\n"
|
|
520
|
+
if result.success:
|
|
521
|
+
result_text += f"Result: {result.result}"
|
|
522
|
+
else:
|
|
523
|
+
result_text += f"Error: {result.error}"
|
|
524
|
+
tool_result_msg.add_part(TextPart(text=result_text))
|
|
525
|
+
self.conv_manager.add_message(session.conv_id, tool_result_msg)
|
|
526
|
+
|
|
527
|
+
# FSM: Continue looping through tool use until no more tools called
|
|
528
|
+
# This implements a proper ReAct loop: observe -> reason -> act -> repeat
|
|
529
|
+
non_write_tools = [tc for tc in tool_calls if tc["tool"] not in ("write_file", "update_file")]
|
|
530
|
+
logger.info(f"ReAct loop check: non_write_tools={len(non_write_tools)}, should_fuzz={should_fuzz}")
|
|
531
|
+
|
|
532
|
+
# Loop with high safety limit - let LLM decide when it's done
|
|
533
|
+
continuation_count = 0
|
|
534
|
+
max_continuations = 50 # Safety limit, not expected to hit in normal operation
|
|
535
|
+
|
|
536
|
+
while non_write_tools and not should_fuzz and continuation_count < max_continuations:
|
|
537
|
+
continuation_count += 1
|
|
538
|
+
logger.info(f"ReAct loop iteration {continuation_count}/{max_continuations}")
|
|
539
|
+
|
|
540
|
+
# Add continuation prompt
|
|
541
|
+
reminder_msg = Message(id=self._generate_msg_id(), role="user")
|
|
542
|
+
reminder_msg.add_part(TextPart(text="If you have all the information needed, present your findings to the user. If you need more data, call additional tools."))
|
|
543
|
+
self.conv_manager.add_message(session.conv_id, reminder_msg)
|
|
544
|
+
|
|
545
|
+
# Get updated messages for continuation (always include system prompt)
|
|
546
|
+
messages = self.conv_manager.to_model_format(session.conv_id)
|
|
547
|
+
if system_prompt_msg:
|
|
548
|
+
messages = [system_prompt_msg] + messages
|
|
549
|
+
|
|
550
|
+
# Stream continuation and track tool calls
|
|
551
|
+
assistant_text = []
|
|
552
|
+
continuation_tool_calls = []
|
|
553
|
+
current_continuation_tool = None
|
|
554
|
+
event_count = 0
|
|
555
|
+
|
|
556
|
+
async for event in session.provider.stream(messages, tools=tools): # type: ignore[attr-defined]
|
|
557
|
+
event_count += 1
|
|
558
|
+
yield event
|
|
559
|
+
|
|
560
|
+
if event.type == "text":
|
|
561
|
+
assistant_text.append(event.data["text"])
|
|
562
|
+
elif event.type == "usage":
|
|
563
|
+
# Accumulate tokens from continuation
|
|
564
|
+
usage = event.data.get("usage", {})
|
|
565
|
+
usage_tokens += usage.get("completion_tokens", 0)
|
|
566
|
+
logger.debug(f"Continuation added {usage.get('completion_tokens', 0)} tokens, cumulative: {usage_tokens}")
|
|
567
|
+
elif event.type == "tool_call_start":
|
|
568
|
+
current_continuation_tool = {
|
|
569
|
+
"tool": event.data["tool"],
|
|
570
|
+
"call_id": event.data["call_id"],
|
|
571
|
+
"input": ""
|
|
572
|
+
}
|
|
573
|
+
elif event.type == "tool_call_delta":
|
|
574
|
+
if current_continuation_tool:
|
|
575
|
+
current_continuation_tool["input"] += event.data.get("delta", "")
|
|
576
|
+
elif event.type == "content_block_stop":
|
|
577
|
+
if current_continuation_tool:
|
|
578
|
+
import json
|
|
579
|
+
try:
|
|
580
|
+
current_continuation_tool["input"] = json.loads(current_continuation_tool["input"])
|
|
581
|
+
except json.JSONDecodeError:
|
|
582
|
+
current_continuation_tool["input"] = {}
|
|
583
|
+
continuation_tool_calls.append(current_continuation_tool)
|
|
584
|
+
current_continuation_tool = None
|
|
585
|
+
|
|
586
|
+
logger.info(f"Continuation finished with {event_count} events, {len(continuation_tool_calls)} tool calls")
|
|
587
|
+
|
|
588
|
+
# Add assistant message from continuation (whether tools were called or not)
|
|
589
|
+
cont_assistant_msg = Message(id=self._generate_msg_id(), role="assistant")
|
|
590
|
+
if assistant_text:
|
|
591
|
+
cont_assistant_msg.add_part(TextPart(text="".join(assistant_text)))
|
|
592
|
+
self.conv_manager.add_message(session.conv_id, cont_assistant_msg)
|
|
593
|
+
|
|
594
|
+
# Execute any tool calls from continuation
|
|
595
|
+
if continuation_tool_calls and self.tool_executor:
|
|
596
|
+
logger.info(f"Executing {len(continuation_tool_calls)} tools from continuation")
|
|
597
|
+
|
|
598
|
+
# Check if continuation has write_file/update_file that should be fuzzed
|
|
599
|
+
cont_write_calls = [tc for tc in continuation_tool_calls if tc["tool"] == "write_file"]
|
|
600
|
+
cont_update_calls = [tc for tc in continuation_tool_calls if tc["tool"] == "update_file"]
|
|
601
|
+
|
|
602
|
+
if cont_write_calls:
|
|
603
|
+
for write_call in cont_write_calls:
|
|
604
|
+
content = write_call["input"].get("content", "")
|
|
605
|
+
content = strip_markdown_fences(content)
|
|
606
|
+
write_call["input"]["content"] = content
|
|
607
|
+
path = write_call["input"].get("path", "")
|
|
608
|
+
should_fuzz = True
|
|
609
|
+
accumulated_text.append(content)
|
|
610
|
+
logger.info(f"write_file in continuation detected for {path}, will fuzz")
|
|
611
|
+
# Add to main list so it gets executed after fuzzing
|
|
612
|
+
write_file_calls.extend(cont_write_calls)
|
|
613
|
+
break
|
|
614
|
+
|
|
615
|
+
if cont_update_calls:
|
|
616
|
+
for update_call in cont_update_calls:
|
|
617
|
+
content = update_call["input"].get("content", "")
|
|
618
|
+
if content:
|
|
619
|
+
content = strip_markdown_fences(content)
|
|
620
|
+
update_call["input"]["content"] = content
|
|
621
|
+
path = update_call["input"].get("path", "")
|
|
622
|
+
should_fuzz = True
|
|
623
|
+
accumulated_text.append(content)
|
|
624
|
+
logger.info(f"update_file in continuation detected for {path}, will fuzz")
|
|
625
|
+
# Add to main list so it gets executed after fuzzing
|
|
626
|
+
update_file_calls.extend(cont_update_calls)
|
|
627
|
+
break
|
|
628
|
+
|
|
629
|
+
# Execute non-write/update tools (or all if not fuzzing)
|
|
630
|
+
for tool_call in continuation_tool_calls:
|
|
631
|
+
# Skip write_file/update_file if fuzzing
|
|
632
|
+
if tool_call["tool"] in ("write_file", "update_file") and should_fuzz:
|
|
633
|
+
logger.info(f"Skipping {tool_call['tool']} execution, will fuzz first")
|
|
634
|
+
continue
|
|
635
|
+
|
|
636
|
+
result = await self.tool_executor.execute(
|
|
637
|
+
tool_name=tool_call["tool"],
|
|
638
|
+
arguments=tool_call["input"],
|
|
639
|
+
call_id=tool_call["call_id"]
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
# Emit tool result event (marked as continuation)
|
|
643
|
+
yield StreamEvent(
|
|
644
|
+
type="tool_result",
|
|
645
|
+
data={
|
|
646
|
+
"tool": tool_call["tool"],
|
|
647
|
+
"success": result.success,
|
|
648
|
+
"result": result.result if result.success else result.error,
|
|
649
|
+
"continuation": True, # Mark as continuation for TUI to handle differently
|
|
650
|
+
"continuation_index": continuation_count
|
|
651
|
+
}
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
# Add tool result message
|
|
655
|
+
tool_result_msg = Message(id=self._generate_msg_id(), role="user")
|
|
656
|
+
result_text = f"[Tool: {tool_call['tool']}]\n"
|
|
657
|
+
if result.success:
|
|
658
|
+
result_text += f"Result: {result.result}"
|
|
659
|
+
else:
|
|
660
|
+
result_text += f"Error: {result.error}"
|
|
661
|
+
tool_result_msg.add_part(TextPart(text=result_text))
|
|
662
|
+
self.conv_manager.add_message(session.conv_id, tool_result_msg)
|
|
663
|
+
|
|
664
|
+
# Check if we should continue the ReAct loop
|
|
665
|
+
non_write_tools = [tc for tc in continuation_tool_calls if tc["tool"] not in ("write_file", "update_file")]
|
|
666
|
+
if not non_write_tools:
|
|
667
|
+
logger.info(f"ReAct loop ending: no more non-write tools after {continuation_count} continuations")
|
|
668
|
+
break
|
|
669
|
+
|
|
670
|
+
# Update messages for next iteration (always include system prompt)
|
|
671
|
+
messages = self.conv_manager.to_model_format(session.conv_id)
|
|
672
|
+
if system_prompt_msg:
|
|
673
|
+
messages = [system_prompt_msg] + messages
|
|
674
|
+
logger.info(f"ReAct loop continuing: iteration {continuation_count}/{max_continuations}")
|
|
675
|
+
|
|
676
|
+
# After loop exits, emit continuation summary for TUI
|
|
677
|
+
tool_count_estimate = len(tool_calls) + continuation_count # Rough estimate
|
|
678
|
+
yield StreamEvent(
|
|
679
|
+
type="continuation_complete",
|
|
680
|
+
data={
|
|
681
|
+
"iterations": continuation_count,
|
|
682
|
+
"tool_count": tool_count_estimate
|
|
683
|
+
}
|
|
684
|
+
)
|
|
685
|
+
|
|
686
|
+
# Only do a final summary call if the loop hit the safety limit.
|
|
687
|
+
# If it ended naturally (model gave a text response), the summary is already done.
|
|
688
|
+
if non_write_tools:
|
|
689
|
+
logger.info(f"ReAct loop hit max iterations ({max_continuations}), requesting final summary")
|
|
690
|
+
messages = self.conv_manager.to_model_format(session.conv_id)
|
|
691
|
+
if system_prompt_msg:
|
|
692
|
+
messages = [system_prompt_msg] + messages
|
|
693
|
+
async for event in session.provider.stream(messages, tools=None): # type: ignore[attr-defined]
|
|
694
|
+
yield event
|
|
695
|
+
if event.type == "usage":
|
|
696
|
+
usage = event.data.get("usage", {})
|
|
697
|
+
usage_tokens += usage.get("completion_tokens", 0)
|
|
698
|
+
else:
|
|
699
|
+
logger.info("ReAct loop completed naturally, skipping redundant final summary")
|
|
700
|
+
|
|
701
|
+
# If fuzzing is enabled and we detected code block, run fuzzing now
|
|
702
|
+
if should_fuzz and self.fuzzing_enabled and self.fuzzing_orchestrator:
|
|
703
|
+
logger.info("Running fuzzing on code response")
|
|
704
|
+
|
|
705
|
+
# Get baseline if exists
|
|
706
|
+
baseline = self.baseline_manager.get(session_id) # type: ignore[assignment]
|
|
707
|
+
baseline_code = baseline.code if baseline else None # type: ignore[union-attr]
|
|
708
|
+
|
|
709
|
+
# Use accumulated text as context for fuzzing
|
|
710
|
+
full_response = "".join(accumulated_text)
|
|
711
|
+
|
|
712
|
+
# Emit fuzzing start
|
|
713
|
+
yield StreamEvent(
|
|
714
|
+
type="fuzzing_progress",
|
|
715
|
+
data={"stage": "starting", "message": "Improving code quality..."}
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
# Run fuzzing and forward progress events
|
|
719
|
+
result = None
|
|
720
|
+
async for event in self.fuzzing_orchestrator.fuzz(
|
|
721
|
+
user_request=user_input,
|
|
722
|
+
generated_code=baseline_code or full_response,
|
|
723
|
+
context_messages=messages,
|
|
724
|
+
):
|
|
725
|
+
from ..fuzzing.derived_orchestrator import FuzzingResult
|
|
726
|
+
if isinstance(event, FuzzingResult):
|
|
727
|
+
result = event
|
|
728
|
+
yield StreamEvent(
|
|
729
|
+
type="fuzzing_complete",
|
|
730
|
+
data={
|
|
731
|
+
"iterations": result.iterations,
|
|
732
|
+
"quality_score": result.quality_score,
|
|
733
|
+
"budget_used": result.budget_used,
|
|
734
|
+
"divergences_found": result.divergences_found,
|
|
735
|
+
"divergences_fixed": result.divergences_fixed,
|
|
736
|
+
"oracle_corrections": result.oracle_corrections,
|
|
737
|
+
}
|
|
738
|
+
)
|
|
739
|
+
else:
|
|
740
|
+
yield event
|
|
741
|
+
|
|
742
|
+
if result:
|
|
743
|
+
# Calculate actual streaming rate from original response
|
|
744
|
+
if streaming_end_time and streaming_start_time:
|
|
745
|
+
elapsed = streaming_end_time - streaming_start_time
|
|
746
|
+
char_count = len(full_response)
|
|
747
|
+
# Estimate tokens (1 token ≈ 4 chars)
|
|
748
|
+
estimated_tokens = char_count / 4
|
|
749
|
+
tokens_per_sec = max(estimated_tokens / elapsed, 20.0) if elapsed > 0 else 70.0
|
|
750
|
+
logger.info(f"Using measured streaming rate: {tokens_per_sec:.1f} tokens/sec")
|
|
751
|
+
else:
|
|
752
|
+
tokens_per_sec = 70.0
|
|
753
|
+
|
|
754
|
+
# Stream improved output at same rate as original (unless it's for write_file/update_file)
|
|
755
|
+
assistant_text = []
|
|
756
|
+
if write_file_calls or update_file_calls:
|
|
757
|
+
# Don't stream write_file/update_file content - it will be shown in collapsible widget
|
|
758
|
+
logger.info("Skipping streaming for write_file/update_file fuzzed output")
|
|
759
|
+
assistant_text.append(result.final_output)
|
|
760
|
+
else:
|
|
761
|
+
# Stream normally for non-write/update fuzzing
|
|
762
|
+
logger.info("Streaming fuzzed output (no write_file/update_file)")
|
|
763
|
+
async for event in self._stream_text_with_rate(result.final_output, tokens_per_sec):
|
|
764
|
+
assistant_text.append(event.data["text"])
|
|
765
|
+
yield event
|
|
766
|
+
|
|
767
|
+
# Count tokens in fuzzed output (this is what goes into context, not the original)
|
|
768
|
+
try:
|
|
769
|
+
import tiktoken
|
|
770
|
+
encoding = tiktoken.get_encoding("cl100k_base")
|
|
771
|
+
fuzzed_tokens = len(encoding.encode(result.final_output))
|
|
772
|
+
session.cumulative_tokens += fuzzed_tokens
|
|
773
|
+
logger.info(f"Session {session_id} added {fuzzed_tokens} tokens from fuzzed output (cumulative: {session.cumulative_tokens})")
|
|
774
|
+
except ImportError:
|
|
775
|
+
# Fallback to usage tokens if tiktoken not available
|
|
776
|
+
session.cumulative_tokens += usage_tokens
|
|
777
|
+
logger.info(f"Session {session_id} added {usage_tokens} tokens (cumulative: {session.cumulative_tokens})")
|
|
778
|
+
|
|
779
|
+
# Now execute write_file and update_file calls with the fuzzed code
|
|
780
|
+
for tool_call in write_file_calls + update_file_calls:
|
|
781
|
+
# Update content with fuzzed output (strip markdown fences as safety measure)
|
|
782
|
+
fuzzed_content = strip_markdown_fences(result.final_output)
|
|
783
|
+
tool_call["input"]["content"] = fuzzed_content
|
|
784
|
+
|
|
785
|
+
result_exec = await self.tool_executor.execute(
|
|
786
|
+
tool_name=tool_call["tool"],
|
|
787
|
+
arguments=tool_call["input"],
|
|
788
|
+
call_id=tool_call["call_id"]
|
|
789
|
+
)
|
|
790
|
+
|
|
791
|
+
# Emit tool result event
|
|
792
|
+
yield StreamEvent(
|
|
793
|
+
type="tool_result",
|
|
794
|
+
data={
|
|
795
|
+
"tool": tool_call["tool"],
|
|
796
|
+
"success": result_exec.success,
|
|
797
|
+
"result": result_exec.result if result_exec.success else result_exec.error
|
|
798
|
+
}
|
|
799
|
+
)
|
|
800
|
+
|
|
801
|
+
# Add tool result message
|
|
802
|
+
tool_result_msg = Message(id=self._generate_msg_id(), role="user")
|
|
803
|
+
result_text = f"[Tool: {tool_call['tool']}]\n"
|
|
804
|
+
if result_exec.success:
|
|
805
|
+
result_text += f"Result: {result_exec.result}\n\n"
|
|
806
|
+
# Add explicit instruction for search tools
|
|
807
|
+
if tool_call['tool'] in ('search_code', 'search_files', 'list_directory'):
|
|
808
|
+
result_text += "Present these results to the user clearly. Show file paths, line numbers, and relevant content."
|
|
809
|
+
else:
|
|
810
|
+
result_text += f"Error: {result_exec.error}"
|
|
811
|
+
tool_result_msg.add_part(TextPart(text=result_text))
|
|
812
|
+
self.conv_manager.add_message(session.conv_id, tool_result_msg)
|
|
813
|
+
|
|
814
|
+
# Add final assistant message to conversation
|
|
815
|
+
if assistant_text:
|
|
816
|
+
assistant_msg = Message(id=self._generate_msg_id(), role="assistant")
|
|
817
|
+
assistant_msg.add_part(TextPart(text="".join(assistant_text)))
|
|
818
|
+
self.conv_manager.add_message(session.conv_id, assistant_msg)
|
|
819
|
+
|
|
820
|
+
# Calculate exact token usage AFTER turn using harness-utils
|
|
821
|
+
context_after = self.conv_manager.calculate_context_usage(
|
|
822
|
+
session.conv_id,
|
|
823
|
+
model=session.provider.model
|
|
824
|
+
)
|
|
825
|
+
|
|
826
|
+
# Calculate tokens used this turn (delta from before turn started)
|
|
827
|
+
context_before = session.context_before_turn if hasattr(session, 'context_before_turn') else 0
|
|
828
|
+
total_turn_tokens = context_after - context_before
|
|
829
|
+
|
|
830
|
+
# Update cumulative tracking
|
|
831
|
+
if not should_fuzz and usage_tokens > 0:
|
|
832
|
+
session.cumulative_tokens += usage_tokens
|
|
833
|
+
logger.info(f"Session {session_id} added {usage_tokens} tokens (no fuzzing, cumulative: {session.cumulative_tokens})")
|
|
834
|
+
|
|
835
|
+
# Emit token usage event for this turn
|
|
836
|
+
yield StreamEvent(
|
|
837
|
+
type="token_usage",
|
|
838
|
+
data={
|
|
839
|
+
"total_tokens": context_after, # Cumulative total for context counter
|
|
840
|
+
"turn_tokens": total_turn_tokens, # Tokens used this turn
|
|
841
|
+
"output_tokens": usage_tokens, # Output tokens from API
|
|
842
|
+
"input_tokens": total_turn_tokens - usage_tokens if total_turn_tokens > usage_tokens else 0,
|
|
843
|
+
}
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
|
|
847
|
+
def get_baseline(self, session_id: str) -> Optional[Baseline]:
|
|
848
|
+
"""Get baseline for session."""
|
|
849
|
+
return self.baseline_manager.get(session_id)
|
|
850
|
+
|
|
851
|
+
def get_context_stats(self, session_id: str) -> dict[str, Any]:
|
|
852
|
+
"""Get context statistics for session."""
|
|
853
|
+
session = self.sessions.get(session_id)
|
|
854
|
+
if not session:
|
|
855
|
+
raise ValueError(f"Session not found: {session_id}")
|
|
856
|
+
|
|
857
|
+
# Get conversation messages from harness-utils
|
|
858
|
+
messages = self.conv_manager.to_model_format(session.conv_id)
|
|
859
|
+
|
|
860
|
+
# Calculate exact token count using harness-utils
|
|
861
|
+
token_count = self.conv_manager.calculate_context_usage(
|
|
862
|
+
session.conv_id,
|
|
863
|
+
model=session.provider.model
|
|
864
|
+
)
|
|
865
|
+
|
|
866
|
+
logger.info(f"Context stats for {session_id}: {len(messages)} messages, {token_count} tokens")
|
|
867
|
+
|
|
868
|
+
return {
|
|
869
|
+
"message_count": len(messages),
|
|
870
|
+
"estimated_tokens": token_count,
|
|
871
|
+
"max_tokens": self.context_limit,
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
def compact_conversation(self, session_id: str) -> None:
|
|
875
|
+
"""Compact conversation using harness-utils."""
|
|
876
|
+
session = self.sessions.get(session_id)
|
|
877
|
+
if not session:
|
|
878
|
+
raise ValueError(f"Session not found: {session_id}")
|
|
879
|
+
|
|
880
|
+
# Trigger compaction manually
|
|
881
|
+
self.conv_manager.prune_before_turn(session.conv_id)
|
|
882
|
+
|
|
883
|
+
def clear_conversation(self, session_id: str) -> None:
|
|
884
|
+
"""Clear conversation history."""
|
|
885
|
+
session = self.sessions.get(session_id)
|
|
886
|
+
if not session:
|
|
887
|
+
raise ValueError(f"Session not found: {session_id}")
|
|
888
|
+
|
|
889
|
+
# Create a new conversation to replace the old one
|
|
890
|
+
old_conv_id = session.conv_id
|
|
891
|
+
new_conv = self.conv_manager.create_conversation(project_id="ctrl-code")
|
|
892
|
+
session.conv_id = new_conv.id
|
|
893
|
+
|
|
894
|
+
# Reset cumulative tokens
|
|
895
|
+
session.cumulative_tokens = 0
|
|
896
|
+
|
|
897
|
+
logger.info(f"Cleared conversation history for session {session_id} (old: {old_conv_id}, new: {new_conv.id})")
|
|
898
|
+
|
|
899
|
+
def _generate_msg_id(self) -> str:
|
|
900
|
+
"""Generate message ID."""
|
|
901
|
+
return f"msg_{uuid.uuid4().hex[:12]}"
|
|
902
|
+
|
|
903
|
+
def _check_auto_chain(self, tool_name: str, result: dict) -> dict | None:
|
|
904
|
+
"""
|
|
905
|
+
Check if we should auto-chain to another tool call.
|
|
906
|
+
|
|
907
|
+
Args:
|
|
908
|
+
tool_name: Name of the tool that just executed
|
|
909
|
+
result: Result from the tool
|
|
910
|
+
|
|
911
|
+
Returns:
|
|
912
|
+
Dict with tool call info if should chain, None otherwise
|
|
913
|
+
"""
|
|
914
|
+
# search_files → read_file (if exactly 1 file found)
|
|
915
|
+
if tool_name == "search_files":
|
|
916
|
+
# Result is a list of file paths
|
|
917
|
+
if isinstance(result, list) and len(result) == 1:
|
|
918
|
+
file_path = result[0]
|
|
919
|
+
return {
|
|
920
|
+
"tool": "read_file",
|
|
921
|
+
"arguments": {"path": file_path},
|
|
922
|
+
"call_id": f"auto_{uuid.uuid4().hex[:12]}"
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
return None
|
|
926
|
+
|
|
927
|
+
async def _stream_text_with_rate(
|
|
928
|
+
self, text: str, tokens_per_sec: float = 50.0
|
|
929
|
+
) -> AsyncIterator[StreamEvent]:
|
|
930
|
+
"""
|
|
931
|
+
Stream text in chunks to simulate realistic token generation rate.
|
|
932
|
+
|
|
933
|
+
Args:
|
|
934
|
+
text: Complete text to stream
|
|
935
|
+
tokens_per_sec: Rate to simulate (default ~50 tokens/sec, typical for LLMs)
|
|
936
|
+
|
|
937
|
+
Yields:
|
|
938
|
+
StreamEvent with text chunks
|
|
939
|
+
"""
|
|
940
|
+
if not text:
|
|
941
|
+
return
|
|
942
|
+
|
|
943
|
+
# Split by lines to preserve newlines
|
|
944
|
+
lines = text.split('\n')
|
|
945
|
+
|
|
946
|
+
# Calculate delay: tokens/sec * 4 chars/token = chars/sec
|
|
947
|
+
# delay = 1 / chars_per_sec
|
|
948
|
+
chars_per_sec = tokens_per_sec * 4.0
|
|
949
|
+
delay_per_char = 1.0 / chars_per_sec
|
|
950
|
+
|
|
951
|
+
# Stream line by line, with char batches within lines
|
|
952
|
+
batch_size = 20 # Send 20 chars at a time for smooth streaming
|
|
953
|
+
|
|
954
|
+
for line_idx, line in enumerate(lines):
|
|
955
|
+
# Stream the line content in chunks
|
|
956
|
+
for i in range(0, len(line), batch_size):
|
|
957
|
+
chunk = line[i:i + batch_size]
|
|
958
|
+
yield StreamEvent(type="text", data={"text": chunk})
|
|
959
|
+
|
|
960
|
+
# Delay before next chunk
|
|
961
|
+
if i + batch_size < len(line):
|
|
962
|
+
await asyncio.sleep(delay_per_char * len(chunk))
|
|
963
|
+
|
|
964
|
+
# Add newline after each line (except the last one)
|
|
965
|
+
if line_idx < len(lines) - 1:
|
|
966
|
+
yield StreamEvent(type="text", data={"text": "\n"})
|
|
967
|
+
await asyncio.sleep(delay_per_char)
|