ctrlcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. ctrlcode/__init__.py +8 -0
  2. ctrlcode/agents/__init__.py +29 -0
  3. ctrlcode/agents/cleanup.py +388 -0
  4. ctrlcode/agents/communication.py +439 -0
  5. ctrlcode/agents/observability.py +421 -0
  6. ctrlcode/agents/react_loop.py +297 -0
  7. ctrlcode/agents/registry.py +211 -0
  8. ctrlcode/agents/result_parser.py +242 -0
  9. ctrlcode/agents/workflow.py +723 -0
  10. ctrlcode/analysis/__init__.py +28 -0
  11. ctrlcode/analysis/ast_diff.py +163 -0
  12. ctrlcode/analysis/bug_detector.py +149 -0
  13. ctrlcode/analysis/code_graphs.py +329 -0
  14. ctrlcode/analysis/semantic.py +205 -0
  15. ctrlcode/analysis/static.py +183 -0
  16. ctrlcode/analysis/synthesizer.py +281 -0
  17. ctrlcode/analysis/tests.py +189 -0
  18. ctrlcode/cleanup/__init__.py +16 -0
  19. ctrlcode/cleanup/auto_merge.py +350 -0
  20. ctrlcode/cleanup/doc_gardening.py +388 -0
  21. ctrlcode/cleanup/pr_automation.py +330 -0
  22. ctrlcode/cleanup/scheduler.py +356 -0
  23. ctrlcode/config.py +380 -0
  24. ctrlcode/embeddings/__init__.py +6 -0
  25. ctrlcode/embeddings/embedder.py +192 -0
  26. ctrlcode/embeddings/vector_store.py +213 -0
  27. ctrlcode/fuzzing/__init__.py +24 -0
  28. ctrlcode/fuzzing/analyzer.py +280 -0
  29. ctrlcode/fuzzing/budget.py +112 -0
  30. ctrlcode/fuzzing/context.py +665 -0
  31. ctrlcode/fuzzing/context_fuzzer.py +506 -0
  32. ctrlcode/fuzzing/derived_orchestrator.py +732 -0
  33. ctrlcode/fuzzing/oracle_adapter.py +135 -0
  34. ctrlcode/linters/__init__.py +11 -0
  35. ctrlcode/linters/hand_rolled_utils.py +221 -0
  36. ctrlcode/linters/yolo_parsing.py +217 -0
  37. ctrlcode/metrics/__init__.py +6 -0
  38. ctrlcode/metrics/dashboard.py +283 -0
  39. ctrlcode/metrics/tech_debt.py +663 -0
  40. ctrlcode/paths.py +68 -0
  41. ctrlcode/permissions.py +179 -0
  42. ctrlcode/providers/__init__.py +15 -0
  43. ctrlcode/providers/anthropic.py +138 -0
  44. ctrlcode/providers/base.py +77 -0
  45. ctrlcode/providers/openai.py +197 -0
  46. ctrlcode/providers/parallel.py +104 -0
  47. ctrlcode/server.py +871 -0
  48. ctrlcode/session/__init__.py +6 -0
  49. ctrlcode/session/baseline.py +57 -0
  50. ctrlcode/session/manager.py +967 -0
  51. ctrlcode/skills/__init__.py +10 -0
  52. ctrlcode/skills/builtin/commit.toml +29 -0
  53. ctrlcode/skills/builtin/docs.toml +25 -0
  54. ctrlcode/skills/builtin/refactor.toml +33 -0
  55. ctrlcode/skills/builtin/review.toml +28 -0
  56. ctrlcode/skills/builtin/test.toml +28 -0
  57. ctrlcode/skills/loader.py +111 -0
  58. ctrlcode/skills/registry.py +139 -0
  59. ctrlcode/storage/__init__.py +19 -0
  60. ctrlcode/storage/history_db.py +708 -0
  61. ctrlcode/tools/__init__.py +220 -0
  62. ctrlcode/tools/bash.py +112 -0
  63. ctrlcode/tools/browser.py +352 -0
  64. ctrlcode/tools/executor.py +153 -0
  65. ctrlcode/tools/explore.py +486 -0
  66. ctrlcode/tools/mcp.py +108 -0
  67. ctrlcode/tools/observability.py +561 -0
  68. ctrlcode/tools/registry.py +193 -0
  69. ctrlcode/tools/todo.py +291 -0
  70. ctrlcode/tools/update.py +266 -0
  71. ctrlcode/tools/webfetch.py +147 -0
  72. ctrlcode-0.1.0.dist-info/METADATA +93 -0
  73. ctrlcode-0.1.0.dist-info/RECORD +75 -0
  74. ctrlcode-0.1.0.dist-info/WHEEL +4 -0
  75. ctrlcode-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,967 @@
1
+ """Session management using harness-utils."""
2
+
3
+ import asyncio
4
+ import logging
5
+ import time
6
+ from typing import Any, AsyncIterator, Optional
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ import uuid
10
+
11
+ from harnessutils import ConversationManager, Message, TextPart
12
+ from harnessutils.storage import FilesystemStorage
13
+ from harnessutils.config import HarnessConfig, StorageConfig
14
+
15
+ from ..providers.base import Provider, StreamEvent
16
+ from .baseline import BaselineManager, Baseline
17
+ from ..fuzzing.derived_orchestrator import DerivedFuzzingOrchestrator
18
+ from ..tools.executor import ToolExecutor
19
+ from ..tools.registry import ToolRegistry
20
+ from ..skills.registry import SkillRegistry
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ def strip_markdown_fences(content: str) -> str:
26
+ """Remove markdown code fences from content if present."""
27
+ lines = content.splitlines()
28
+
29
+ # Check if first line is a code fence
30
+ if lines and lines[0].strip().startswith("```"):
31
+ # Remove first line
32
+ lines = lines[1:]
33
+
34
+ # Check if last line is a closing fence
35
+ if lines and lines[-1].strip() == "```":
36
+ # Remove last line
37
+ lines = lines[:-1]
38
+
39
+ # Remove any trailing instruction lines (e.g., "Run with: ...")
40
+ while lines and lines[-1].strip().startswith("Run with:"):
41
+ lines = lines[:-1]
42
+
43
+ return "\n".join(lines)
44
+
45
+
46
+ @dataclass
47
+ class Session:
48
+ """Represents a conversation session."""
49
+
50
+ id: str
51
+ conv_id: str
52
+ provider: Provider
53
+ cumulative_tokens: int = 0 # Track total tokens used across all turns
54
+
55
+
56
+ class SessionManager:
57
+ """Manages conversation sessions with baseline tracking."""
58
+
59
+ def __init__(
60
+ self,
61
+ provider: Provider,
62
+ storage_path: str,
63
+ config: Optional[HarnessConfig] = None,
64
+ fuzzing_orchestrator: Optional[DerivedFuzzingOrchestrator] = None,
65
+ fuzzing_enabled: bool = False,
66
+ tool_executor: Optional[ToolExecutor] = None,
67
+ tool_registry: Optional[ToolRegistry] = None,
68
+ skill_registry: Optional[SkillRegistry] = None,
69
+ context_limit: int = 200000,
70
+ workspace_root: Optional[Path] = None,
71
+ ):
72
+ """
73
+ Initialize session manager.
74
+
75
+ Args:
76
+ provider: LLM provider to use
77
+ storage_path: Path for conversation storage
78
+ config: Optional harness config
79
+ fuzzing_orchestrator: Optional fuzzing orchestrator
80
+ fuzzing_enabled: Whether to use fuzzing
81
+ tool_executor: Optional tool executor for MCP tools
82
+ tool_registry: Optional tool registry for tool definitions
83
+ skill_registry: Optional skill registry
84
+ context_limit: Context window limit
85
+ workspace_root: Workspace root for AGENT.md lookup
86
+ """
87
+ storage_config = StorageConfig(base_path=Path(storage_path))
88
+ self.conv_manager = ConversationManager(
89
+ storage=FilesystemStorage(storage_config),
90
+ config=config or HarnessConfig()
91
+ )
92
+ self.provider = provider
93
+ self.sessions: dict[str, Session] = {}
94
+ self.baseline_manager = BaselineManager()
95
+ self.fuzzing_orchestrator = fuzzing_orchestrator
96
+ self.fuzzing_enabled = fuzzing_enabled
97
+ self.tool_executor = tool_executor
98
+ self.tool_registry = tool_registry
99
+ self.skill_registry = skill_registry
100
+ self.context_limit = context_limit
101
+ self.workspace_root = workspace_root
102
+ self._base_prompt = self._load_base_prompt()
103
+ self._agent_instructions = self._load_agent_instructions()
104
+
105
+ def _load_base_prompt(self) -> str:
106
+ """
107
+ Load base system prompt from prompts/SYSTEM_PROMPT.md.
108
+
109
+ Search order:
110
+ 1. {workspace_root}/prompts/SYSTEM_PROMPT.md (project override)
111
+ 2. ~/.config/ctrlcode/SYSTEM_PROMPT.md (user global override)
112
+ 3. Bundled default
113
+
114
+ Returns:
115
+ Base system prompt content.
116
+ """
117
+ candidates = []
118
+
119
+ # 1. Project-local override
120
+ if self.workspace_root:
121
+ candidates.append(self.workspace_root / "prompts" / "SYSTEM_PROMPT.md")
122
+
123
+ # 2. User global override
124
+ try:
125
+ from platformdirs import user_config_dir
126
+ candidates.append(Path(user_config_dir("ctrlcode")) / "SYSTEM_PROMPT.md")
127
+ except Exception:
128
+ pass
129
+
130
+ for prompt_file in candidates:
131
+ if prompt_file.exists():
132
+ try:
133
+ content = prompt_file.read_text(encoding="utf-8")
134
+ logger.info(f"Loaded system prompt from {prompt_file} ({len(content)} chars)")
135
+ return content
136
+ except Exception as e:
137
+ logger.warning(f"Failed to load {prompt_file}: {e}")
138
+
139
+ # 3. Bundled default
140
+ logger.debug("Using bundled default system prompt")
141
+ return """You are Ctrl+Code, Canoozie's personal coding assistant.
142
+
143
+ ## CRITICAL: Act immediately, never introduce yourself
144
+
145
+ **NEVER** say "I'm ready to help", "I have access to tools", or introduce your capabilities.
146
+ **NEVER** greet the user or ask what they want when they've already told you.
147
+ When given a task, **immediately call the appropriate tool**. Your first output must be a tool call, not text.
148
+
149
+ Examples of correct behaviour:
150
+ - User says "show me the last git commit" → call `run_command` with `git log -1` immediately
151
+ - User says "find the login function" → call `search_code` with "login" immediately
152
+ - User says "read app.py" → call `read_file` with "app.py" immediately
153
+
154
+ ## Tools available
155
+
156
+ - `run_command` — run shell commands (git, tests, builds, etc.)
157
+ - `read_file` — read a file's contents
158
+ - `write_file` — create a new file
159
+ - `update_file` — edit an existing file
160
+ - `search_files` — find files by glob pattern
161
+ - `search_code` — search for code by content
162
+ - `list_directory` — list directory contents
163
+ - `web_fetch` — fetch a URL
164
+
165
+ ## Tool usage rules
166
+
167
+ - Call ALL tools needed in a SINGLE response — do not wait for results before calling the next tool
168
+ - Use `run_command` for git operations, tests, builds, and any shell commands
169
+ - Use `read_file` / `search_files` / `search_code` for exploring the codebase
170
+ - Use `update_file` to edit existing files, `write_file` only for new files
171
+ - When referencing code, include `file_path:line_number` so the user can navigate to it
172
+
173
+ ## Workspace and file paths
174
+
175
+ - Use relative paths (`src/main.py`) not absolute paths (`/home/user/src/main.py`)
176
+ - If unsure of a file's location, call `search_files` first
177
+
178
+ ## Tone
179
+
180
+ - Be concise and direct. No emojis unless asked. Output renders in a monospace terminal."""
181
+
182
+ def _load_agent_instructions(self) -> str:
183
+ """
184
+ Load AGENT.md instructions hierarchically.
185
+
186
+ Order (most general to most specific):
187
+ 1. Global: ~/.config/ctrlcode/AGENT.md
188
+ 2. Project: {workspace_root}/AGENT.md
189
+
190
+ Returns combined instructions with clear section markers.
191
+ """
192
+ instructions = []
193
+
194
+ # 1. Global config AGENT.md
195
+ try:
196
+ from platformdirs import user_config_dir
197
+ config_dir = Path(user_config_dir("ctrlcode"))
198
+ global_agent = config_dir / "AGENT.md"
199
+ if global_agent.exists():
200
+ content = global_agent.read_text(encoding="utf-8")
201
+ instructions.append(f"# Global Agent Instructions\n\n{content}")
202
+ logger.info(f"Loaded global AGENT.md ({len(content)} chars)")
203
+ except Exception as e:
204
+ logger.debug(f"No global AGENT.md: {e}")
205
+
206
+ # 2. Project AGENT.md
207
+ if self.workspace_root:
208
+ project_agent = self.workspace_root / "AGENT.md"
209
+ if project_agent.exists():
210
+ try:
211
+ content = project_agent.read_text(encoding="utf-8")
212
+ instructions.append(f"# Project-Specific Instructions\n\n{content}")
213
+ logger.info(f"Loaded project AGENT.md ({len(content)} chars)")
214
+ except Exception as e:
215
+ logger.warning(f"Failed to load project AGENT.md: {e}")
216
+
217
+ return "\n\n---\n\n".join(instructions) if instructions else ""
218
+
219
+ def create_session(self, provider: Optional[Provider] = None) -> Session:
220
+ """
221
+ Create a new session.
222
+
223
+ Args:
224
+ provider: Optional provider override
225
+
226
+ Returns:
227
+ New session instance
228
+ """
229
+ conv = self.conv_manager.create_conversation(project_id="ctrl-code")
230
+ session = Session(
231
+ id=str(uuid.uuid4()),
232
+ conv_id=conv.id,
233
+ provider=provider or self.provider
234
+ )
235
+ self.sessions[session.id] = session
236
+ return session
237
+
238
+ def get_session(self, session_id: str) -> Optional[Session]:
239
+ """Get session by ID."""
240
+ return self.sessions.get(session_id)
241
+
242
+ async def process_turn(
243
+ self,
244
+ session_id: str,
245
+ user_input: str,
246
+ tools: list[dict] | None = None
247
+ ) -> AsyncIterator[StreamEvent]:
248
+ """
249
+ Process a conversation turn.
250
+
251
+ Args:
252
+ session_id: Session identifier
253
+ user_input: User's input message
254
+ tools: Optional tool definitions
255
+
256
+ Yields:
257
+ StreamEvent: Streaming events from provider
258
+ """
259
+ session = self.sessions.get(session_id)
260
+ if not session:
261
+ raise ValueError(f"Session not found: {session_id}")
262
+
263
+ # Expand skills if present
264
+ if self.skill_registry:
265
+ expanded_input, was_skill = self.skill_registry.process_input(user_input)
266
+ if was_skill:
267
+ # Emit skill expansion event
268
+ yield StreamEvent(
269
+ type="skill_expanded",
270
+ data={"original": user_input, "expanded": expanded_input}
271
+ )
272
+ user_input = expanded_input
273
+
274
+ # Extract baseline if present
275
+ baseline_code = self.baseline_manager.extract_from_request(user_input)
276
+ if baseline_code:
277
+ baseline = Baseline(code=baseline_code)
278
+ self.baseline_manager.store(session_id, baseline)
279
+
280
+ # Prune before adding new user message (makes room for the new turn)
281
+ self.conv_manager.prune_before_turn(session.conv_id)
282
+
283
+ # Add user message
284
+ user_msg = Message(id=self._generate_msg_id(), role="user")
285
+ user_msg.add_part(TextPart(text=user_input))
286
+ self.conv_manager.add_message(session.conv_id, user_msg)
287
+
288
+ # Track context size before LLM call for accurate token delta
289
+ session.context_before_turn = self.conv_manager.calculate_context_usage(
290
+ session.conv_id,
291
+ model=session.provider.model
292
+ )
293
+
294
+ # Get messages for LLM
295
+ messages = self.conv_manager.to_model_format(session.conv_id)
296
+ logger.info(f"to_model_format: {len(messages)} messages, roles={[m.get('role') for m in messages]}")
297
+
298
+ # Get tool definitions from registry if not provided
299
+ if tools is None and self.tool_registry:
300
+ tools = self.tool_registry.get_tool_definitions()
301
+ logger.info(f"Fetched {len(tools)} tool definitions from registry")
302
+ else:
303
+ logger.info(f"Tools parameter: {tools}")
304
+
305
+ # Track if we should fuzz (detected code block)
306
+ should_fuzz = False
307
+ accumulated_text = []
308
+
309
+ # Track streaming rate for fuzzing output
310
+ streaming_start_time = time.time()
311
+ streaming_end_time = None
312
+
313
+ # Track usage tokens (will be replaced by fuzzed output tokens if fuzzing occurs)
314
+ usage_tokens = 0
315
+
316
+ # Build system prompt once; reuse for all continuation calls
317
+ system_prompt_msg: dict | None = None
318
+ if not messages or messages[0].get("role") != "system":
319
+ # Build AGENT.md section
320
+ agent_section = ""
321
+ if self._agent_instructions:
322
+ agent_section = f"\n\n{self._agent_instructions}"
323
+
324
+ # Inject live environment info
325
+ import platform
326
+ from datetime import date
327
+ work_dir = self.workspace_root or Path.cwd()
328
+ is_git = (work_dir / ".git").exists()
329
+ env_section = f"""
330
+ **Environment:**
331
+ ```
332
+ Working directory: {work_dir}
333
+ Is directory a git repo: {"Yes" if is_git else "No"}
334
+ Platform: {platform.system().lower()}
335
+ OS Version: {platform.platform()}
336
+ Today's date: {date.today().isoformat()}
337
+ ```"""
338
+
339
+ system_prompt_msg = {
340
+ "role": "system",
341
+ "content": f"{self._base_prompt}{agent_section}{env_section}"
342
+ }
343
+ messages = [system_prompt_msg] + messages
344
+
345
+ # Stream response with tool execution loop
346
+ assistant_text = []
347
+ tool_calls: list[dict] = []
348
+ current_tool_call: dict | None = None
349
+
350
+ # First pass: stream and collect tool calls
351
+ async for event in session.provider.stream(messages, tools=tools): # type: ignore[attr-defined]
352
+ # Accumulate text and detect code blocks
353
+ if event.type == "text":
354
+ text_chunk = event.data["text"]
355
+ assistant_text.append(text_chunk)
356
+ accumulated_text.append(text_chunk)
357
+
358
+ # Check if we've accumulated a code block
359
+ full_text = "".join(accumulated_text)
360
+ if "```" in full_text and not should_fuzz:
361
+ should_fuzz = True
362
+ logger.info("Code block detected, will run fuzzing after completion")
363
+ # Emit fuzzing start status instead of showing original text
364
+ yield StreamEvent(
365
+ type="fuzzing_progress",
366
+ data={"stage": "detected", "message": "Code detected, preparing to optimize..."}
367
+ )
368
+
369
+ # Only yield text if we're NOT fuzzing
370
+ if not should_fuzz:
371
+ yield event
372
+ else:
373
+ logger.debug("Suppressing text event (fuzzing enabled)")
374
+
375
+ elif event.type == "usage":
376
+ # Capture end time when response completes
377
+ streaming_end_time = time.time()
378
+
379
+ # Store completion tokens from API (tokens generated by assistant)
380
+ usage = event.data.get("usage", {})
381
+ usage_tokens += usage.get("completion_tokens", 0)
382
+
383
+ # Don't yield yet - we'll yield cumulative at the end
384
+ logger.debug(f"Captured {usage.get('completion_tokens', 0)} tokens, cumulative: {usage_tokens}")
385
+
386
+ elif event.type == "tool_call_start":
387
+ current_tool_call = {
388
+ "tool": event.data["tool"],
389
+ "call_id": event.data["call_id"],
390
+ "input": ""
391
+ }
392
+ yield event
393
+
394
+ elif event.type == "tool_call_delta":
395
+ if current_tool_call:
396
+ current_tool_call["input"] += event.data.get("delta", "")
397
+ yield event
398
+
399
+ elif event.type == "content_block_stop":
400
+ if current_tool_call:
401
+ import json
402
+ try:
403
+ current_tool_call["input"] = json.loads(current_tool_call["input"])
404
+ except json.JSONDecodeError:
405
+ current_tool_call["input"] = {}
406
+ tool_calls.append(current_tool_call)
407
+ current_tool_call = None
408
+ yield event
409
+
410
+ else:
411
+ # All other events
412
+ yield event
413
+
414
+ # Check if we should fuzz before executing tools
415
+ # If write_file or update_file is called, fuzz the content
416
+ write_file_calls = [tc for tc in tool_calls if tc["tool"] == "write_file"]
417
+ update_file_calls = [tc for tc in tool_calls if tc["tool"] == "update_file"]
418
+
419
+ if write_file_calls:
420
+ # Fuzz any write_file content
421
+ for write_call in write_file_calls:
422
+ content = write_call["input"].get("content", "")
423
+ # Strip markdown fences if present
424
+ content = strip_markdown_fences(content)
425
+ write_call["input"]["content"] = content
426
+ path = write_call["input"].get("path", "")
427
+
428
+ should_fuzz = True
429
+ # Add to accumulated_text so fuzzing has the content
430
+ accumulated_text.append(content)
431
+ logger.info(f"write_file detected for {path}, will fuzz before writing")
432
+ break
433
+
434
+ if update_file_calls:
435
+ # Fuzz any update_file content
436
+ for update_call in update_file_calls:
437
+ content = update_call["input"].get("content", "")
438
+ if content: # Only fuzz if there's content to update
439
+ # Strip markdown fences if present
440
+ content = strip_markdown_fences(content)
441
+ update_call["input"]["content"] = content
442
+ path = update_call["input"].get("path", "")
443
+
444
+ should_fuzz = True
445
+ # Add to accumulated_text so fuzzing has the content
446
+ accumulated_text.append(content)
447
+ logger.info(f"update_file detected for {path}, will fuzz before updating")
448
+ break
449
+
450
+ # Execute tools if any were called (but not write_file if fuzzing)
451
+ if tool_calls and self.tool_executor:
452
+ # Add assistant message with tool use
453
+ assistant_msg = Message(id=self._generate_msg_id(), role="assistant")
454
+ if assistant_text:
455
+ assistant_msg.add_part(TextPart(text="".join(assistant_text)))
456
+ self.conv_manager.add_message(session.conv_id, assistant_msg)
457
+
458
+ # Execute non-write/update tools first (or all tools if not fuzzing)
459
+ auto_chain_calls = [] # Track tools to auto-chain
460
+
461
+ for tool_call in tool_calls:
462
+ # Skip write_file or update_file if we're going to fuzz
463
+ if tool_call["tool"] in ("write_file", "update_file") and should_fuzz:
464
+ continue
465
+
466
+ result = await self.tool_executor.execute(
467
+ tool_name=tool_call["tool"],
468
+ arguments=tool_call["input"],
469
+ call_id=tool_call["call_id"]
470
+ )
471
+
472
+ # Emit tool result event
473
+ yield StreamEvent(
474
+ type="tool_result",
475
+ data={
476
+ "tool": tool_call["tool"],
477
+ "success": result.success,
478
+ "result": result.result if result.success else result.error
479
+ }
480
+ )
481
+
482
+ # Add tool result message
483
+ tool_result_msg = Message(id=self._generate_msg_id(), role="user")
484
+ result_text = f"[Tool: {tool_call['tool']}]\n"
485
+ if result.success:
486
+ result_text += f"Result: {result.result}"
487
+ else:
488
+ result_text += f"Error: {result.error}"
489
+ tool_result_msg.add_part(TextPart(text=result_text))
490
+ self.conv_manager.add_message(session.conv_id, tool_result_msg)
491
+
492
+ # Auto-chaining logic: check if we should automatically call next tool
493
+ if result.success:
494
+ chained = self._check_auto_chain(tool_call["tool"], result.result)
495
+ if chained:
496
+ auto_chain_calls.append(chained)
497
+ logger.info(f"Auto-chaining: {tool_call['tool']} → {chained['tool']}")
498
+
499
+ # Execute auto-chained tool calls
500
+ for chained_call in auto_chain_calls:
501
+ result = await self.tool_executor.execute(
502
+ tool_name=chained_call["tool"],
503
+ arguments=chained_call["arguments"],
504
+ call_id=chained_call["call_id"]
505
+ )
506
+
507
+ # Emit tool result event
508
+ yield StreamEvent(
509
+ type="tool_result",
510
+ data={
511
+ "tool": chained_call["tool"],
512
+ "success": result.success,
513
+ "result": result.result if result.success else result.error
514
+ }
515
+ )
516
+
517
+ # Add tool result message
518
+ tool_result_msg = Message(id=self._generate_msg_id(), role="user")
519
+ result_text = f"[Tool: {chained_call['tool']}]\n"
520
+ if result.success:
521
+ result_text += f"Result: {result.result}"
522
+ else:
523
+ result_text += f"Error: {result.error}"
524
+ tool_result_msg.add_part(TextPart(text=result_text))
525
+ self.conv_manager.add_message(session.conv_id, tool_result_msg)
526
+
527
+ # FSM: Continue looping through tool use until no more tools called
528
+ # This implements a proper ReAct loop: observe -> reason -> act -> repeat
529
+ non_write_tools = [tc for tc in tool_calls if tc["tool"] not in ("write_file", "update_file")]
530
+ logger.info(f"ReAct loop check: non_write_tools={len(non_write_tools)}, should_fuzz={should_fuzz}")
531
+
532
+ # Loop with high safety limit - let LLM decide when it's done
533
+ continuation_count = 0
534
+ max_continuations = 50 # Safety limit, not expected to hit in normal operation
535
+
536
+ while non_write_tools and not should_fuzz and continuation_count < max_continuations:
537
+ continuation_count += 1
538
+ logger.info(f"ReAct loop iteration {continuation_count}/{max_continuations}")
539
+
540
+ # Add continuation prompt
541
+ reminder_msg = Message(id=self._generate_msg_id(), role="user")
542
+ reminder_msg.add_part(TextPart(text="If you have all the information needed, present your findings to the user. If you need more data, call additional tools."))
543
+ self.conv_manager.add_message(session.conv_id, reminder_msg)
544
+
545
+ # Get updated messages for continuation (always include system prompt)
546
+ messages = self.conv_manager.to_model_format(session.conv_id)
547
+ if system_prompt_msg:
548
+ messages = [system_prompt_msg] + messages
549
+
550
+ # Stream continuation and track tool calls
551
+ assistant_text = []
552
+ continuation_tool_calls = []
553
+ current_continuation_tool = None
554
+ event_count = 0
555
+
556
+ async for event in session.provider.stream(messages, tools=tools): # type: ignore[attr-defined]
557
+ event_count += 1
558
+ yield event
559
+
560
+ if event.type == "text":
561
+ assistant_text.append(event.data["text"])
562
+ elif event.type == "usage":
563
+ # Accumulate tokens from continuation
564
+ usage = event.data.get("usage", {})
565
+ usage_tokens += usage.get("completion_tokens", 0)
566
+ logger.debug(f"Continuation added {usage.get('completion_tokens', 0)} tokens, cumulative: {usage_tokens}")
567
+ elif event.type == "tool_call_start":
568
+ current_continuation_tool = {
569
+ "tool": event.data["tool"],
570
+ "call_id": event.data["call_id"],
571
+ "input": ""
572
+ }
573
+ elif event.type == "tool_call_delta":
574
+ if current_continuation_tool:
575
+ current_continuation_tool["input"] += event.data.get("delta", "")
576
+ elif event.type == "content_block_stop":
577
+ if current_continuation_tool:
578
+ import json
579
+ try:
580
+ current_continuation_tool["input"] = json.loads(current_continuation_tool["input"])
581
+ except json.JSONDecodeError:
582
+ current_continuation_tool["input"] = {}
583
+ continuation_tool_calls.append(current_continuation_tool)
584
+ current_continuation_tool = None
585
+
586
+ logger.info(f"Continuation finished with {event_count} events, {len(continuation_tool_calls)} tool calls")
587
+
588
+ # Add assistant message from continuation (whether tools were called or not)
589
+ cont_assistant_msg = Message(id=self._generate_msg_id(), role="assistant")
590
+ if assistant_text:
591
+ cont_assistant_msg.add_part(TextPart(text="".join(assistant_text)))
592
+ self.conv_manager.add_message(session.conv_id, cont_assistant_msg)
593
+
594
+ # Execute any tool calls from continuation
595
+ if continuation_tool_calls and self.tool_executor:
596
+ logger.info(f"Executing {len(continuation_tool_calls)} tools from continuation")
597
+
598
+ # Check if continuation has write_file/update_file that should be fuzzed
599
+ cont_write_calls = [tc for tc in continuation_tool_calls if tc["tool"] == "write_file"]
600
+ cont_update_calls = [tc for tc in continuation_tool_calls if tc["tool"] == "update_file"]
601
+
602
+ if cont_write_calls:
603
+ for write_call in cont_write_calls:
604
+ content = write_call["input"].get("content", "")
605
+ content = strip_markdown_fences(content)
606
+ write_call["input"]["content"] = content
607
+ path = write_call["input"].get("path", "")
608
+ should_fuzz = True
609
+ accumulated_text.append(content)
610
+ logger.info(f"write_file in continuation detected for {path}, will fuzz")
611
+ # Add to main list so it gets executed after fuzzing
612
+ write_file_calls.extend(cont_write_calls)
613
+ break
614
+
615
+ if cont_update_calls:
616
+ for update_call in cont_update_calls:
617
+ content = update_call["input"].get("content", "")
618
+ if content:
619
+ content = strip_markdown_fences(content)
620
+ update_call["input"]["content"] = content
621
+ path = update_call["input"].get("path", "")
622
+ should_fuzz = True
623
+ accumulated_text.append(content)
624
+ logger.info(f"update_file in continuation detected for {path}, will fuzz")
625
+ # Add to main list so it gets executed after fuzzing
626
+ update_file_calls.extend(cont_update_calls)
627
+ break
628
+
629
+ # Execute non-write/update tools (or all if not fuzzing)
630
+ for tool_call in continuation_tool_calls:
631
+ # Skip write_file/update_file if fuzzing
632
+ if tool_call["tool"] in ("write_file", "update_file") and should_fuzz:
633
+ logger.info(f"Skipping {tool_call['tool']} execution, will fuzz first")
634
+ continue
635
+
636
+ result = await self.tool_executor.execute(
637
+ tool_name=tool_call["tool"],
638
+ arguments=tool_call["input"],
639
+ call_id=tool_call["call_id"]
640
+ )
641
+
642
+ # Emit tool result event (marked as continuation)
643
+ yield StreamEvent(
644
+ type="tool_result",
645
+ data={
646
+ "tool": tool_call["tool"],
647
+ "success": result.success,
648
+ "result": result.result if result.success else result.error,
649
+ "continuation": True, # Mark as continuation for TUI to handle differently
650
+ "continuation_index": continuation_count
651
+ }
652
+ )
653
+
654
+ # Add tool result message
655
+ tool_result_msg = Message(id=self._generate_msg_id(), role="user")
656
+ result_text = f"[Tool: {tool_call['tool']}]\n"
657
+ if result.success:
658
+ result_text += f"Result: {result.result}"
659
+ else:
660
+ result_text += f"Error: {result.error}"
661
+ tool_result_msg.add_part(TextPart(text=result_text))
662
+ self.conv_manager.add_message(session.conv_id, tool_result_msg)
663
+
664
+ # Check if we should continue the ReAct loop
665
+ non_write_tools = [tc for tc in continuation_tool_calls if tc["tool"] not in ("write_file", "update_file")]
666
+ if not non_write_tools:
667
+ logger.info(f"ReAct loop ending: no more non-write tools after {continuation_count} continuations")
668
+ break
669
+
670
+ # Update messages for next iteration (always include system prompt)
671
+ messages = self.conv_manager.to_model_format(session.conv_id)
672
+ if system_prompt_msg:
673
+ messages = [system_prompt_msg] + messages
674
+ logger.info(f"ReAct loop continuing: iteration {continuation_count}/{max_continuations}")
675
+
676
+ # After loop exits, emit continuation summary for TUI
677
+ tool_count_estimate = len(tool_calls) + continuation_count # Rough estimate
678
+ yield StreamEvent(
679
+ type="continuation_complete",
680
+ data={
681
+ "iterations": continuation_count,
682
+ "tool_count": tool_count_estimate
683
+ }
684
+ )
685
+
686
+ # Only do a final summary call if the loop hit the safety limit.
687
+ # If it ended naturally (model gave a text response), the summary is already done.
688
+ if non_write_tools:
689
+ logger.info(f"ReAct loop hit max iterations ({max_continuations}), requesting final summary")
690
+ messages = self.conv_manager.to_model_format(session.conv_id)
691
+ if system_prompt_msg:
692
+ messages = [system_prompt_msg] + messages
693
+ async for event in session.provider.stream(messages, tools=None): # type: ignore[attr-defined]
694
+ yield event
695
+ if event.type == "usage":
696
+ usage = event.data.get("usage", {})
697
+ usage_tokens += usage.get("completion_tokens", 0)
698
+ else:
699
+ logger.info("ReAct loop completed naturally, skipping redundant final summary")
700
+
701
+ # If fuzzing is enabled and we detected code block, run fuzzing now
702
+ if should_fuzz and self.fuzzing_enabled and self.fuzzing_orchestrator:
703
+ logger.info("Running fuzzing on code response")
704
+
705
+ # Get baseline if exists
706
+ baseline = self.baseline_manager.get(session_id) # type: ignore[assignment]
707
+ baseline_code = baseline.code if baseline else None # type: ignore[union-attr]
708
+
709
+ # Use accumulated text as context for fuzzing
710
+ full_response = "".join(accumulated_text)
711
+
712
+ # Emit fuzzing start
713
+ yield StreamEvent(
714
+ type="fuzzing_progress",
715
+ data={"stage": "starting", "message": "Improving code quality..."}
716
+ )
717
+
718
+ # Run fuzzing and forward progress events
719
+ result = None
720
+ async for event in self.fuzzing_orchestrator.fuzz(
721
+ user_request=user_input,
722
+ generated_code=baseline_code or full_response,
723
+ context_messages=messages,
724
+ ):
725
+ from ..fuzzing.derived_orchestrator import FuzzingResult
726
+ if isinstance(event, FuzzingResult):
727
+ result = event
728
+ yield StreamEvent(
729
+ type="fuzzing_complete",
730
+ data={
731
+ "iterations": result.iterations,
732
+ "quality_score": result.quality_score,
733
+ "budget_used": result.budget_used,
734
+ "divergences_found": result.divergences_found,
735
+ "divergences_fixed": result.divergences_fixed,
736
+ "oracle_corrections": result.oracle_corrections,
737
+ }
738
+ )
739
+ else:
740
+ yield event
741
+
742
+ if result:
743
+ # Calculate actual streaming rate from original response
744
+ if streaming_end_time and streaming_start_time:
745
+ elapsed = streaming_end_time - streaming_start_time
746
+ char_count = len(full_response)
747
+ # Estimate tokens (1 token ≈ 4 chars)
748
+ estimated_tokens = char_count / 4
749
+ tokens_per_sec = max(estimated_tokens / elapsed, 20.0) if elapsed > 0 else 70.0
750
+ logger.info(f"Using measured streaming rate: {tokens_per_sec:.1f} tokens/sec")
751
+ else:
752
+ tokens_per_sec = 70.0
753
+
754
+ # Stream improved output at same rate as original (unless it's for write_file/update_file)
755
+ assistant_text = []
756
+ if write_file_calls or update_file_calls:
757
+ # Don't stream write_file/update_file content - it will be shown in collapsible widget
758
+ logger.info("Skipping streaming for write_file/update_file fuzzed output")
759
+ assistant_text.append(result.final_output)
760
+ else:
761
+ # Stream normally for non-write/update fuzzing
762
+ logger.info("Streaming fuzzed output (no write_file/update_file)")
763
+ async for event in self._stream_text_with_rate(result.final_output, tokens_per_sec):
764
+ assistant_text.append(event.data["text"])
765
+ yield event
766
+
767
+ # Count tokens in fuzzed output (this is what goes into context, not the original)
768
+ try:
769
+ import tiktoken
770
+ encoding = tiktoken.get_encoding("cl100k_base")
771
+ fuzzed_tokens = len(encoding.encode(result.final_output))
772
+ session.cumulative_tokens += fuzzed_tokens
773
+ logger.info(f"Session {session_id} added {fuzzed_tokens} tokens from fuzzed output (cumulative: {session.cumulative_tokens})")
774
+ except ImportError:
775
+ # Fallback to usage tokens if tiktoken not available
776
+ session.cumulative_tokens += usage_tokens
777
+ logger.info(f"Session {session_id} added {usage_tokens} tokens (cumulative: {session.cumulative_tokens})")
778
+
779
+ # Now execute write_file and update_file calls with the fuzzed code
780
+ for tool_call in write_file_calls + update_file_calls:
781
+ # Update content with fuzzed output (strip markdown fences as safety measure)
782
+ fuzzed_content = strip_markdown_fences(result.final_output)
783
+ tool_call["input"]["content"] = fuzzed_content
784
+
785
+ result_exec = await self.tool_executor.execute(
786
+ tool_name=tool_call["tool"],
787
+ arguments=tool_call["input"],
788
+ call_id=tool_call["call_id"]
789
+ )
790
+
791
+ # Emit tool result event
792
+ yield StreamEvent(
793
+ type="tool_result",
794
+ data={
795
+ "tool": tool_call["tool"],
796
+ "success": result_exec.success,
797
+ "result": result_exec.result if result_exec.success else result_exec.error
798
+ }
799
+ )
800
+
801
+ # Add tool result message
802
+ tool_result_msg = Message(id=self._generate_msg_id(), role="user")
803
+ result_text = f"[Tool: {tool_call['tool']}]\n"
804
+ if result_exec.success:
805
+ result_text += f"Result: {result_exec.result}\n\n"
806
+ # Add explicit instruction for search tools
807
+ if tool_call['tool'] in ('search_code', 'search_files', 'list_directory'):
808
+ result_text += "Present these results to the user clearly. Show file paths, line numbers, and relevant content."
809
+ else:
810
+ result_text += f"Error: {result_exec.error}"
811
+ tool_result_msg.add_part(TextPart(text=result_text))
812
+ self.conv_manager.add_message(session.conv_id, tool_result_msg)
813
+
814
+ # Add final assistant message to conversation
815
+ if assistant_text:
816
+ assistant_msg = Message(id=self._generate_msg_id(), role="assistant")
817
+ assistant_msg.add_part(TextPart(text="".join(assistant_text)))
818
+ self.conv_manager.add_message(session.conv_id, assistant_msg)
819
+
820
+ # Calculate exact token usage AFTER turn using harness-utils
821
+ context_after = self.conv_manager.calculate_context_usage(
822
+ session.conv_id,
823
+ model=session.provider.model
824
+ )
825
+
826
+ # Calculate tokens used this turn (delta from before turn started)
827
+ context_before = session.context_before_turn if hasattr(session, 'context_before_turn') else 0
828
+ total_turn_tokens = context_after - context_before
829
+
830
+ # Update cumulative tracking
831
+ if not should_fuzz and usage_tokens > 0:
832
+ session.cumulative_tokens += usage_tokens
833
+ logger.info(f"Session {session_id} added {usage_tokens} tokens (no fuzzing, cumulative: {session.cumulative_tokens})")
834
+
835
+ # Emit token usage event for this turn
836
+ yield StreamEvent(
837
+ type="token_usage",
838
+ data={
839
+ "total_tokens": context_after, # Cumulative total for context counter
840
+ "turn_tokens": total_turn_tokens, # Tokens used this turn
841
+ "output_tokens": usage_tokens, # Output tokens from API
842
+ "input_tokens": total_turn_tokens - usage_tokens if total_turn_tokens > usage_tokens else 0,
843
+ }
844
+ )
845
+
846
+
847
+ def get_baseline(self, session_id: str) -> Optional[Baseline]:
848
+ """Get baseline for session."""
849
+ return self.baseline_manager.get(session_id)
850
+
851
+ def get_context_stats(self, session_id: str) -> dict[str, Any]:
852
+ """Get context statistics for session."""
853
+ session = self.sessions.get(session_id)
854
+ if not session:
855
+ raise ValueError(f"Session not found: {session_id}")
856
+
857
+ # Get conversation messages from harness-utils
858
+ messages = self.conv_manager.to_model_format(session.conv_id)
859
+
860
+ # Calculate exact token count using harness-utils
861
+ token_count = self.conv_manager.calculate_context_usage(
862
+ session.conv_id,
863
+ model=session.provider.model
864
+ )
865
+
866
+ logger.info(f"Context stats for {session_id}: {len(messages)} messages, {token_count} tokens")
867
+
868
+ return {
869
+ "message_count": len(messages),
870
+ "estimated_tokens": token_count,
871
+ "max_tokens": self.context_limit,
872
+ }
873
+
874
+ def compact_conversation(self, session_id: str) -> None:
875
+ """Compact conversation using harness-utils."""
876
+ session = self.sessions.get(session_id)
877
+ if not session:
878
+ raise ValueError(f"Session not found: {session_id}")
879
+
880
+ # Trigger compaction manually
881
+ self.conv_manager.prune_before_turn(session.conv_id)
882
+
883
+ def clear_conversation(self, session_id: str) -> None:
884
+ """Clear conversation history."""
885
+ session = self.sessions.get(session_id)
886
+ if not session:
887
+ raise ValueError(f"Session not found: {session_id}")
888
+
889
+ # Create a new conversation to replace the old one
890
+ old_conv_id = session.conv_id
891
+ new_conv = self.conv_manager.create_conversation(project_id="ctrl-code")
892
+ session.conv_id = new_conv.id
893
+
894
+ # Reset cumulative tokens
895
+ session.cumulative_tokens = 0
896
+
897
+ logger.info(f"Cleared conversation history for session {session_id} (old: {old_conv_id}, new: {new_conv.id})")
898
+
899
+ def _generate_msg_id(self) -> str:
900
+ """Generate message ID."""
901
+ return f"msg_{uuid.uuid4().hex[:12]}"
902
+
903
+ def _check_auto_chain(self, tool_name: str, result: dict) -> dict | None:
904
+ """
905
+ Check if we should auto-chain to another tool call.
906
+
907
+ Args:
908
+ tool_name: Name of the tool that just executed
909
+ result: Result from the tool
910
+
911
+ Returns:
912
+ Dict with tool call info if should chain, None otherwise
913
+ """
914
+ # search_files → read_file (if exactly 1 file found)
915
+ if tool_name == "search_files":
916
+ # Result is a list of file paths
917
+ if isinstance(result, list) and len(result) == 1:
918
+ file_path = result[0]
919
+ return {
920
+ "tool": "read_file",
921
+ "arguments": {"path": file_path},
922
+ "call_id": f"auto_{uuid.uuid4().hex[:12]}"
923
+ }
924
+
925
+ return None
926
+
927
+ async def _stream_text_with_rate(
928
+ self, text: str, tokens_per_sec: float = 50.0
929
+ ) -> AsyncIterator[StreamEvent]:
930
+ """
931
+ Stream text in chunks to simulate realistic token generation rate.
932
+
933
+ Args:
934
+ text: Complete text to stream
935
+ tokens_per_sec: Rate to simulate (default ~50 tokens/sec, typical for LLMs)
936
+
937
+ Yields:
938
+ StreamEvent with text chunks
939
+ """
940
+ if not text:
941
+ return
942
+
943
+ # Split by lines to preserve newlines
944
+ lines = text.split('\n')
945
+
946
+ # Calculate delay: tokens/sec * 4 chars/token = chars/sec
947
+ # delay = 1 / chars_per_sec
948
+ chars_per_sec = tokens_per_sec * 4.0
949
+ delay_per_char = 1.0 / chars_per_sec
950
+
951
+ # Stream line by line, with char batches within lines
952
+ batch_size = 20 # Send 20 chars at a time for smooth streaming
953
+
954
+ for line_idx, line in enumerate(lines):
955
+ # Stream the line content in chunks
956
+ for i in range(0, len(line), batch_size):
957
+ chunk = line[i:i + batch_size]
958
+ yield StreamEvent(type="text", data={"text": chunk})
959
+
960
+ # Delay before next chunk
961
+ if i + batch_size < len(line):
962
+ await asyncio.sleep(delay_per_char * len(chunk))
963
+
964
+ # Add newline after each line (except the last one)
965
+ if line_idx < len(lines) - 1:
966
+ yield StreamEvent(type="text", data={"text": "\n"})
967
+ await asyncio.sleep(delay_per_char)