kairo-code 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- image-service/main.py +178 -0
- infra/chat/app/main.py +84 -0
- kairo/backend/__init__.py +0 -0
- kairo/backend/api/__init__.py +0 -0
- kairo/backend/api/admin/__init__.py +23 -0
- kairo/backend/api/admin/audit.py +54 -0
- kairo/backend/api/admin/content.py +142 -0
- kairo/backend/api/admin/incidents.py +148 -0
- kairo/backend/api/admin/stats.py +125 -0
- kairo/backend/api/admin/system.py +87 -0
- kairo/backend/api/admin/users.py +279 -0
- kairo/backend/api/agents.py +94 -0
- kairo/backend/api/api_keys.py +85 -0
- kairo/backend/api/auth.py +116 -0
- kairo/backend/api/billing.py +41 -0
- kairo/backend/api/chat.py +72 -0
- kairo/backend/api/conversations.py +125 -0
- kairo/backend/api/device_auth.py +100 -0
- kairo/backend/api/files.py +83 -0
- kairo/backend/api/health.py +36 -0
- kairo/backend/api/images.py +80 -0
- kairo/backend/api/openai_compat.py +225 -0
- kairo/backend/api/projects.py +102 -0
- kairo/backend/api/usage.py +32 -0
- kairo/backend/api/webhooks.py +79 -0
- kairo/backend/app.py +297 -0
- kairo/backend/config.py +179 -0
- kairo/backend/core/__init__.py +0 -0
- kairo/backend/core/admin_auth.py +24 -0
- kairo/backend/core/api_key_auth.py +55 -0
- kairo/backend/core/database.py +28 -0
- kairo/backend/core/dependencies.py +70 -0
- kairo/backend/core/logging.py +23 -0
- kairo/backend/core/rate_limit.py +73 -0
- kairo/backend/core/security.py +29 -0
- kairo/backend/models/__init__.py +19 -0
- kairo/backend/models/agent.py +30 -0
- kairo/backend/models/api_key.py +25 -0
- kairo/backend/models/api_usage.py +29 -0
- kairo/backend/models/audit_log.py +26 -0
- kairo/backend/models/conversation.py +48 -0
- kairo/backend/models/device_code.py +30 -0
- kairo/backend/models/feature_flag.py +21 -0
- kairo/backend/models/image_generation.py +24 -0
- kairo/backend/models/incident.py +28 -0
- kairo/backend/models/project.py +28 -0
- kairo/backend/models/uptime_record.py +24 -0
- kairo/backend/models/usage.py +24 -0
- kairo/backend/models/user.py +49 -0
- kairo/backend/schemas/__init__.py +0 -0
- kairo/backend/schemas/admin/__init__.py +0 -0
- kairo/backend/schemas/admin/audit.py +28 -0
- kairo/backend/schemas/admin/content.py +53 -0
- kairo/backend/schemas/admin/stats.py +77 -0
- kairo/backend/schemas/admin/system.py +44 -0
- kairo/backend/schemas/admin/users.py +48 -0
- kairo/backend/schemas/agent.py +42 -0
- kairo/backend/schemas/api_key.py +30 -0
- kairo/backend/schemas/auth.py +57 -0
- kairo/backend/schemas/chat.py +26 -0
- kairo/backend/schemas/conversation.py +39 -0
- kairo/backend/schemas/device_auth.py +40 -0
- kairo/backend/schemas/image.py +15 -0
- kairo/backend/schemas/openai_compat.py +76 -0
- kairo/backend/schemas/project.py +21 -0
- kairo/backend/schemas/status.py +81 -0
- kairo/backend/schemas/usage.py +15 -0
- kairo/backend/services/__init__.py +0 -0
- kairo/backend/services/admin/__init__.py +0 -0
- kairo/backend/services/admin/audit_service.py +78 -0
- kairo/backend/services/admin/content_service.py +119 -0
- kairo/backend/services/admin/incident_service.py +94 -0
- kairo/backend/services/admin/stats_service.py +281 -0
- kairo/backend/services/admin/system_service.py +126 -0
- kairo/backend/services/admin/user_service.py +157 -0
- kairo/backend/services/agent_service.py +107 -0
- kairo/backend/services/api_key_service.py +66 -0
- kairo/backend/services/api_usage_service.py +126 -0
- kairo/backend/services/auth_service.py +101 -0
- kairo/backend/services/chat_service.py +501 -0
- kairo/backend/services/conversation_service.py +264 -0
- kairo/backend/services/device_auth_service.py +193 -0
- kairo/backend/services/email_service.py +55 -0
- kairo/backend/services/image_service.py +181 -0
- kairo/backend/services/llm_service.py +186 -0
- kairo/backend/services/project_service.py +109 -0
- kairo/backend/services/status_service.py +167 -0
- kairo/backend/services/stripe_service.py +78 -0
- kairo/backend/services/usage_service.py +150 -0
- kairo/backend/services/web_search_service.py +96 -0
- kairo/migrations/env.py +60 -0
- kairo/migrations/versions/001_initial.py +55 -0
- kairo/migrations/versions/002_usage_tracking_and_indexes.py +66 -0
- kairo/migrations/versions/003_username_to_email.py +21 -0
- kairo/migrations/versions/004_add_plans_and_verification.py +67 -0
- kairo/migrations/versions/005_add_projects.py +52 -0
- kairo/migrations/versions/006_add_image_generation.py +63 -0
- kairo/migrations/versions/007_add_admin_portal.py +107 -0
- kairo/migrations/versions/008_add_device_code_auth.py +76 -0
- kairo/migrations/versions/009_add_status_page.py +65 -0
- kairo/tools/extract_claude_data.py +465 -0
- kairo/tools/filter_claude_data.py +303 -0
- kairo/tools/generate_curated_data.py +157 -0
- kairo/tools/mix_training_data.py +295 -0
- kairo_code/__init__.py +3 -0
- kairo_code/agents/__init__.py +25 -0
- kairo_code/agents/architect.py +98 -0
- kairo_code/agents/audit.py +100 -0
- kairo_code/agents/base.py +463 -0
- kairo_code/agents/coder.py +155 -0
- kairo_code/agents/database.py +77 -0
- kairo_code/agents/docs.py +88 -0
- kairo_code/agents/explorer.py +62 -0
- kairo_code/agents/guardian.py +80 -0
- kairo_code/agents/planner.py +66 -0
- kairo_code/agents/reviewer.py +91 -0
- kairo_code/agents/security.py +94 -0
- kairo_code/agents/terraform.py +88 -0
- kairo_code/agents/testing.py +97 -0
- kairo_code/agents/uiux.py +88 -0
- kairo_code/auth.py +232 -0
- kairo_code/config.py +172 -0
- kairo_code/conversation.py +173 -0
- kairo_code/heartbeat.py +63 -0
- kairo_code/llm.py +291 -0
- kairo_code/logging_config.py +156 -0
- kairo_code/main.py +818 -0
- kairo_code/router.py +217 -0
- kairo_code/sandbox.py +248 -0
- kairo_code/settings.py +183 -0
- kairo_code/tools/__init__.py +51 -0
- kairo_code/tools/analysis.py +509 -0
- kairo_code/tools/base.py +417 -0
- kairo_code/tools/code.py +58 -0
- kairo_code/tools/definitions.py +617 -0
- kairo_code/tools/files.py +315 -0
- kairo_code/tools/review.py +390 -0
- kairo_code/tools/search.py +185 -0
- kairo_code/ui.py +418 -0
- kairo_code-0.1.0.dist-info/METADATA +13 -0
- kairo_code-0.1.0.dist-info/RECORD +144 -0
- kairo_code-0.1.0.dist-info/WHEEL +5 -0
- kairo_code-0.1.0.dist-info/entry_points.txt +2 -0
- kairo_code-0.1.0.dist-info/top_level.txt +4 -0
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
"""Base agent class with robust tool loop execution"""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import time
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Generator, Callable
|
|
8
|
+
from enum import Enum
|
|
9
|
+
|
|
10
|
+
from ..llm import LLM
|
|
11
|
+
from ..config import Config
|
|
12
|
+
from ..tools.base import (
|
|
13
|
+
ToolRegistry, parse_tool_calls, ToolResult,
|
|
14
|
+
format_tool_error, ToolParseResult, ParsedToolCall
|
|
15
|
+
)
|
|
16
|
+
from ..logging_config import get_agent_logger, log_tool_call
|
|
17
|
+
|
|
18
|
+
logger = get_agent_logger()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AgentStatus(Enum):
|
|
22
|
+
RUNNING = "running"
|
|
23
|
+
COMPLETED = "completed"
|
|
24
|
+
ERROR = "error"
|
|
25
|
+
MAX_ITERATIONS = "max_iterations"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class AgentEvent:
|
|
30
|
+
"""Structured event from agent execution."""
|
|
31
|
+
type: str # "thinking", "tool_start", "tool_result", "text", "error", "done"
|
|
32
|
+
content: str = ""
|
|
33
|
+
tool_name: str | None = None
|
|
34
|
+
tool_params: dict | None = None
|
|
35
|
+
success: bool = True
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class AgentMessage:
|
|
40
|
+
"""A message in the agent's conversation."""
|
|
41
|
+
role: str # "user", "assistant", "tool", "error"
|
|
42
|
+
content: str
|
|
43
|
+
tool_name: str | None = None
|
|
44
|
+
tool_result: ToolResult | None = None
|
|
45
|
+
thinking: str | None = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class AgentState:
|
|
50
|
+
"""Current state of an agent execution."""
|
|
51
|
+
messages: list[AgentMessage] = field(default_factory=list)
|
|
52
|
+
tool_calls_made: int = 0
|
|
53
|
+
failed_parses: int = 0
|
|
54
|
+
retries: int = 0
|
|
55
|
+
status: AgentStatus = AgentStatus.RUNNING
|
|
56
|
+
final_response: str = ""
|
|
57
|
+
thinking_log: list[str] = field(default_factory=list)
|
|
58
|
+
|
|
59
|
+
# Context tracking (approximate)
|
|
60
|
+
estimated_tokens: int = 0
|
|
61
|
+
|
|
62
|
+
# Track recent tool calls to prevent loops: (tool_name, key_param) -> success
|
|
63
|
+
recent_tool_calls: dict[tuple[str, str], bool] = field(default_factory=dict)
|
|
64
|
+
|
|
65
|
+
# Track recent file writes to detect identical content loops
|
|
66
|
+
recent_file_hashes: dict[str, str] = field(default_factory=dict) # path -> content_hash
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class Agent(ABC):
|
|
70
|
+
"""
|
|
71
|
+
Base agent class that can execute multi-step tasks using tools.
|
|
72
|
+
|
|
73
|
+
Features:
|
|
74
|
+
- Robust tool parsing with multiple format support
|
|
75
|
+
- Retry mechanism with error feedback
|
|
76
|
+
- Thinking/scratchpad extraction
|
|
77
|
+
- Context window tracking
|
|
78
|
+
- Configurable confirmation for destructive actions
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
name: str = "base_agent"
|
|
82
|
+
description: str = "Base agent"
|
|
83
|
+
max_iterations: int = 10
|
|
84
|
+
max_retries: int = 3 # Retries per malformed tool call
|
|
85
|
+
max_context_tokens: int = 6000 # Conservative limit for 7B models
|
|
86
|
+
|
|
87
|
+
# Override in subclasses
|
|
88
|
+
require_confirmation: list[str] = ["write_file", "bash"]
|
|
89
|
+
|
|
90
|
+
def __init__(
|
|
91
|
+
self,
|
|
92
|
+
llm: LLM,
|
|
93
|
+
tools: ToolRegistry,
|
|
94
|
+
on_confirm: Callable[[str], bool] | None = None
|
|
95
|
+
):
|
|
96
|
+
self.llm = llm
|
|
97
|
+
self.tools = tools
|
|
98
|
+
self.on_confirm = on_confirm # Callback for confirmation prompts
|
|
99
|
+
self.state = AgentState()
|
|
100
|
+
self.config = Config() # Load config for safety settings
|
|
101
|
+
|
|
102
|
+
@abstractmethod
|
|
103
|
+
def get_system_prompt(self) -> str:
|
|
104
|
+
"""Get the system prompt for this agent."""
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
def get_tool_examples(self) -> str:
|
|
108
|
+
"""Get few-shot examples of tool usage. Override in subclasses."""
|
|
109
|
+
return ""
|
|
110
|
+
|
|
111
|
+
def run_events(self, task: str) -> Generator[AgentEvent, None, None]:
|
|
112
|
+
"""
|
|
113
|
+
Run the agent and yield structured events.
|
|
114
|
+
|
|
115
|
+
This is the preferred method for UI rendering - it yields clean events
|
|
116
|
+
instead of raw LLM chunks.
|
|
117
|
+
"""
|
|
118
|
+
logger.info(f"=== Agent '{self.name}' starting task ===")
|
|
119
|
+
logger.info(f"Task: {task[:200]}...")
|
|
120
|
+
|
|
121
|
+
self.state = AgentState()
|
|
122
|
+
self.state.messages.append(AgentMessage(role="user", content=task))
|
|
123
|
+
self._update_token_estimate()
|
|
124
|
+
|
|
125
|
+
consecutive_failures = 0
|
|
126
|
+
max_iter = self.config.max_iterations or self.max_iterations
|
|
127
|
+
accumulated_text = []
|
|
128
|
+
|
|
129
|
+
iteration = 0
|
|
130
|
+
while iteration < max_iter:
|
|
131
|
+
iteration += 1
|
|
132
|
+
logger.info(f"--- Iteration {iteration}/{max_iter} ---")
|
|
133
|
+
|
|
134
|
+
# Check context limits
|
|
135
|
+
if self.state.estimated_tokens > self.max_context_tokens:
|
|
136
|
+
logger.warning(f"Context limit approaching: {self.state.estimated_tokens} tokens")
|
|
137
|
+
self._summarize_history()
|
|
138
|
+
|
|
139
|
+
# Build messages for LLM
|
|
140
|
+
messages = self._build_messages()
|
|
141
|
+
|
|
142
|
+
# Notify UI that we're waiting for LLM
|
|
143
|
+
yield AgentEvent(type="thinking", content=f"Thinking... (iteration {iteration})")
|
|
144
|
+
|
|
145
|
+
# Get LLM response (don't stream to UI - collect it)
|
|
146
|
+
response_chunks = []
|
|
147
|
+
llm_start = time.time()
|
|
148
|
+
try:
|
|
149
|
+
for chunk in self.llm.chat(messages, stream=True):
|
|
150
|
+
response_chunks.append(chunk)
|
|
151
|
+
llm_elapsed = time.time() - llm_start
|
|
152
|
+
logger.info(f"LLM response received in {llm_elapsed:.2f}s")
|
|
153
|
+
except Exception as e:
|
|
154
|
+
logger.error(f"LLM Error: {e}")
|
|
155
|
+
yield AgentEvent(type="error", content=str(e))
|
|
156
|
+
self.state.status = AgentStatus.ERROR
|
|
157
|
+
break
|
|
158
|
+
|
|
159
|
+
full_response = "".join(response_chunks)
|
|
160
|
+
|
|
161
|
+
# Parse the response for tool calls and thinking
|
|
162
|
+
parse_result = parse_tool_calls(full_response)
|
|
163
|
+
|
|
164
|
+
# Add assistant message to state
|
|
165
|
+
self.state.messages.append(
|
|
166
|
+
AgentMessage(
|
|
167
|
+
role="assistant",
|
|
168
|
+
content=full_response,
|
|
169
|
+
thinking=parse_result.thinking_content if parse_result.has_thinking else None
|
|
170
|
+
)
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Handle parse errors
|
|
174
|
+
if parse_result.parse_errors and not parse_result.calls:
|
|
175
|
+
consecutive_failures += 1
|
|
176
|
+
if consecutive_failures >= self.max_retries:
|
|
177
|
+
yield AgentEvent(type="error", content="Max retries reached")
|
|
178
|
+
self.state.status = AgentStatus.ERROR
|
|
179
|
+
break
|
|
180
|
+
error_msg = format_tool_error(f"Failed to parse: {parse_result.parse_errors}")
|
|
181
|
+
self.state.messages.append(AgentMessage(role="error", content=error_msg))
|
|
182
|
+
continue
|
|
183
|
+
|
|
184
|
+
# No tool calls - agent is done
|
|
185
|
+
if not parse_result.calls:
|
|
186
|
+
# Clean the response and yield as final text
|
|
187
|
+
cleaned = self._clean_response_text(full_response)
|
|
188
|
+
yield AgentEvent(type="text", content=cleaned)
|
|
189
|
+
self.state.status = AgentStatus.COMPLETED
|
|
190
|
+
self.state.final_response = full_response
|
|
191
|
+
break
|
|
192
|
+
|
|
193
|
+
# Reset failures on successful parse
|
|
194
|
+
consecutive_failures = 0
|
|
195
|
+
|
|
196
|
+
# Execute tool calls
|
|
197
|
+
for call in parse_result.calls:
|
|
198
|
+
# Yield tool start event
|
|
199
|
+
yield AgentEvent(
|
|
200
|
+
type="tool_start",
|
|
201
|
+
tool_name=call.tool_name,
|
|
202
|
+
tool_params=call.params
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
result = self._execute_tool_call(call)
|
|
206
|
+
|
|
207
|
+
if result is None:
|
|
208
|
+
yield AgentEvent(type="error", content="Action cancelled by user")
|
|
209
|
+
continue
|
|
210
|
+
|
|
211
|
+
# Yield tool result event
|
|
212
|
+
yield AgentEvent(
|
|
213
|
+
type="tool_result",
|
|
214
|
+
tool_name=call.tool_name,
|
|
215
|
+
content=result.output[:500] if result.output else "",
|
|
216
|
+
success=result.success
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# Add result to messages
|
|
220
|
+
if result.success:
|
|
221
|
+
content = result.output
|
|
222
|
+
else:
|
|
223
|
+
content = f"Error: {result.error}"
|
|
224
|
+
if result.output:
|
|
225
|
+
content += f"\n\nOutput:\n{result.output}"
|
|
226
|
+
|
|
227
|
+
self.state.messages.append(
|
|
228
|
+
AgentMessage(
|
|
229
|
+
role="tool",
|
|
230
|
+
content=content,
|
|
231
|
+
tool_name=call.tool_name,
|
|
232
|
+
tool_result=result,
|
|
233
|
+
)
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
self._update_token_estimate()
|
|
237
|
+
|
|
238
|
+
# Check if we hit max iterations
|
|
239
|
+
if iteration >= max_iter and self.state.status == AgentStatus.RUNNING:
|
|
240
|
+
self.state.status = AgentStatus.MAX_ITERATIONS
|
|
241
|
+
yield AgentEvent(type="error", content="Max iterations reached")
|
|
242
|
+
|
|
243
|
+
yield AgentEvent(type="done")
|
|
244
|
+
|
|
245
|
+
def _clean_response_text(self, text: str) -> str:
|
|
246
|
+
"""Remove tool calls and thinking from response text."""
|
|
247
|
+
import re
|
|
248
|
+
# Remove thinking blocks
|
|
249
|
+
text = re.sub(r'<thinking>.*?</thinking>', '', text, flags=re.DOTALL)
|
|
250
|
+
# Remove tool calls
|
|
251
|
+
text = re.sub(r'<tool>.*?</tool>\s*<params>.*?</params>', '', text, flags=re.DOTALL)
|
|
252
|
+
# Clean up extra whitespace
|
|
253
|
+
text = re.sub(r'\n{3,}', '\n\n', text)
|
|
254
|
+
return text.strip()
|
|
255
|
+
|
|
256
|
+
def _execute_tool_call(self, call: ParsedToolCall) -> ToolResult | None:
|
|
257
|
+
"""Execute a single tool call with optional confirmation."""
|
|
258
|
+
logger.info(f"Executing tool: {call.tool_name}")
|
|
259
|
+
logger.debug(f"Tool params: {call.params}")
|
|
260
|
+
|
|
261
|
+
# Check for duplicate tool calls (especially web_search loops)
|
|
262
|
+
key_param = self._get_tool_key_param(call)
|
|
263
|
+
call_signature = (call.tool_name, key_param)
|
|
264
|
+
|
|
265
|
+
# Check if this is a duplicate
|
|
266
|
+
if call_signature in self.state.recent_tool_calls:
|
|
267
|
+
prev_success = self.state.recent_tool_calls[call_signature]
|
|
268
|
+
|
|
269
|
+
# For web_search: always block duplicates (main source of loops)
|
|
270
|
+
# For bash: allow retry if previous attempt failed/timed out
|
|
271
|
+
if call.tool_name == "web_search" or prev_success:
|
|
272
|
+
logger.warning(f"Duplicate tool call detected: {call.tool_name}({key_param})")
|
|
273
|
+
return ToolResult(
|
|
274
|
+
success=True,
|
|
275
|
+
output=f"[Already executed {call.tool_name} with similar parameters. Move on to the next step - output your results or plan.]",
|
|
276
|
+
error=None
|
|
277
|
+
)
|
|
278
|
+
else:
|
|
279
|
+
logger.info(f"Allowing retry of failed {call.tool_name}({key_param})")
|
|
280
|
+
|
|
281
|
+
tool = self.tools.get(call.tool_name)
|
|
282
|
+
|
|
283
|
+
if not tool:
|
|
284
|
+
# Try to suggest correct tool name
|
|
285
|
+
available = self.tools.get_tool_names()
|
|
286
|
+
suggestion = f"Available tools: {', '.join(available)}"
|
|
287
|
+
logger.error(f"Unknown tool: {call.tool_name}")
|
|
288
|
+
return ToolResult(
|
|
289
|
+
success=False,
|
|
290
|
+
output="",
|
|
291
|
+
error=f"Unknown tool '{call.tool_name}'. {suggestion}"
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# Safety check for bash commands
|
|
295
|
+
if call.tool_name == "bash":
|
|
296
|
+
command = call.params.get("command", "")
|
|
297
|
+
allowed, reason = self.config.is_command_allowed(command)
|
|
298
|
+
if not allowed:
|
|
299
|
+
logger.warning(f"Blocked bash command: {command} - {reason}")
|
|
300
|
+
return ToolResult(
|
|
301
|
+
success=False,
|
|
302
|
+
output="",
|
|
303
|
+
error=f"Command blocked: {reason}"
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
# Check for identical file content (writing same thing repeatedly)
|
|
307
|
+
if call.tool_name == "write_file":
|
|
308
|
+
path = call.params.get("path", "")
|
|
309
|
+
content = call.params.get("content", "")
|
|
310
|
+
content_hash = hashlib.md5(content.encode()).hexdigest()
|
|
311
|
+
|
|
312
|
+
if path in self.state.recent_file_hashes:
|
|
313
|
+
if self.state.recent_file_hashes[path] == content_hash:
|
|
314
|
+
logger.warning(f"Identical content write detected for {path}")
|
|
315
|
+
return ToolResult(
|
|
316
|
+
success=True,
|
|
317
|
+
output=f"[File {path} already has this exact content. Make DIFFERENT changes or move to the next step.]",
|
|
318
|
+
error=None
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
# Track the content hash
|
|
322
|
+
self.state.recent_file_hashes[path] = content_hash
|
|
323
|
+
|
|
324
|
+
# Check if confirmation is required (respecting auto_approve settings)
|
|
325
|
+
needs_confirm = tool.name in self.require_confirmation
|
|
326
|
+
auto_approved = self.config.should_auto_approve(tool.name)
|
|
327
|
+
|
|
328
|
+
if self.on_confirm and needs_confirm and not auto_approved:
|
|
329
|
+
params_preview = str(call.params)[:100]
|
|
330
|
+
logger.debug(f"Requesting user confirmation for {tool.name}")
|
|
331
|
+
if not self.on_confirm(f"Execute {tool.name}({params_preview})?"):
|
|
332
|
+
logger.info(f"User denied execution of {tool.name}")
|
|
333
|
+
return None
|
|
334
|
+
|
|
335
|
+
# Execute the tool
|
|
336
|
+
try:
|
|
337
|
+
self.state.tool_calls_made += 1
|
|
338
|
+
start_time = time.time()
|
|
339
|
+
logger.debug(f"Starting tool execution: {tool.name}")
|
|
340
|
+
result = tool.execute(**call.params)
|
|
341
|
+
elapsed = time.time() - start_time
|
|
342
|
+
logger.info(f"Tool {tool.name} completed in {elapsed:.2f}s (success={result.success})")
|
|
343
|
+
log_tool_call(tool.name, call.params, result)
|
|
344
|
+
|
|
345
|
+
# Track success for duplicate detection (allows retry on failure)
|
|
346
|
+
self.state.recent_tool_calls[call_signature] = result.success
|
|
347
|
+
return result
|
|
348
|
+
except TypeError as e:
|
|
349
|
+
# Wrong parameters
|
|
350
|
+
logger.error(f"Invalid parameters for {tool.name}: {e}")
|
|
351
|
+
self.state.recent_tool_calls[call_signature] = False
|
|
352
|
+
return ToolResult(
|
|
353
|
+
success=False,
|
|
354
|
+
output="",
|
|
355
|
+
error=f"Invalid parameters for {tool.name}: {e}"
|
|
356
|
+
)
|
|
357
|
+
except Exception as e:
|
|
358
|
+
logger.error(f"Tool execution failed: {e}")
|
|
359
|
+
self.state.recent_tool_calls[call_signature] = False
|
|
360
|
+
return ToolResult(
|
|
361
|
+
success=False,
|
|
362
|
+
output="",
|
|
363
|
+
error=f"Tool execution failed: {e}"
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
def _get_tool_key_param(self, call: ParsedToolCall) -> str:
|
|
367
|
+
"""Extract the key parameter for duplicate detection.
|
|
368
|
+
|
|
369
|
+
Only dedupe tools that cause loops when repeated:
|
|
370
|
+
- web_search: same query is definitely a loop
|
|
371
|
+
- bash: same command might be intentional retry, but limit it
|
|
372
|
+
|
|
373
|
+
Don't dedupe:
|
|
374
|
+
- write_file: updating same file with different content is normal
|
|
375
|
+
- read_file: re-reading files is often needed
|
|
376
|
+
"""
|
|
377
|
+
params = call.params
|
|
378
|
+
|
|
379
|
+
# Only dedupe web_search (the main source of loops)
|
|
380
|
+
if call.tool_name == "web_search":
|
|
381
|
+
query = params.get("query", "")
|
|
382
|
+
return query.lower().strip()
|
|
383
|
+
|
|
384
|
+
# For bash, only dedupe exact same command
|
|
385
|
+
if call.tool_name == "bash":
|
|
386
|
+
cmd = params.get("command", "")
|
|
387
|
+
return cmd.strip()
|
|
388
|
+
|
|
389
|
+
# Don't dedupe other tools - return unique value each time
|
|
390
|
+
import time
|
|
391
|
+
return f"{call.tool_name}_{time.time()}"
|
|
392
|
+
|
|
393
|
+
def _build_messages(self) -> list[dict]:
|
|
394
|
+
"""Build message list for LLM with system prompt and examples."""
|
|
395
|
+
system_content = self.get_system_prompt()
|
|
396
|
+
|
|
397
|
+
# Add tool examples if available
|
|
398
|
+
examples = self.get_tool_examples()
|
|
399
|
+
if examples:
|
|
400
|
+
system_content += f"\n\n## Examples\n{examples}"
|
|
401
|
+
|
|
402
|
+
messages = [{"role": "system", "content": system_content}]
|
|
403
|
+
|
|
404
|
+
# Critical instruction reminder to prepend to first user message
|
|
405
|
+
# (Some models ignore system prompts, so repeat key rules here)
|
|
406
|
+
instruction_reminder = """[INSTRUCTION: You MUST use tools. Do NOT write code in markdown blocks.
|
|
407
|
+
Use this format: <tool>tool_name</tool><params>{"key": "value"}</params>
|
|
408
|
+
Do ONE web_search at most, then immediately start writing code with write_file.]
|
|
409
|
+
|
|
410
|
+
"""
|
|
411
|
+
|
|
412
|
+
for i, msg in enumerate(self.state.messages):
|
|
413
|
+
if msg.role == "tool":
|
|
414
|
+
# Format tool results clearly
|
|
415
|
+
content = f"[Tool Result: {msg.tool_name}]\n{msg.content}"
|
|
416
|
+
messages.append({"role": "user", "content": content})
|
|
417
|
+
elif msg.role == "error":
|
|
418
|
+
# Error feedback
|
|
419
|
+
messages.append({"role": "user", "content": msg.content})
|
|
420
|
+
elif msg.role == "user" and i == 0:
|
|
421
|
+
# Prepend instruction reminder to first user message
|
|
422
|
+
messages.append({"role": "user", "content": instruction_reminder + msg.content})
|
|
423
|
+
else:
|
|
424
|
+
messages.append({"role": msg.role, "content": msg.content})
|
|
425
|
+
|
|
426
|
+
return messages
|
|
427
|
+
|
|
428
|
+
def _update_token_estimate(self) -> None:
|
|
429
|
+
"""Estimate current context size (rough approximation)."""
|
|
430
|
+
total_chars = sum(len(msg.content) for msg in self.state.messages)
|
|
431
|
+
# Rough estimate: 4 chars per token
|
|
432
|
+
self.state.estimated_tokens = total_chars // 4
|
|
433
|
+
|
|
434
|
+
def _summarize_history(self) -> None:
|
|
435
|
+
"""Summarize older messages to reduce context size."""
|
|
436
|
+
if len(self.state.messages) <= 4:
|
|
437
|
+
return
|
|
438
|
+
|
|
439
|
+
# Keep first message (original task) and last 3 messages
|
|
440
|
+
# Summarize the middle
|
|
441
|
+
to_summarize = self.state.messages[1:-3]
|
|
442
|
+
|
|
443
|
+
if not to_summarize:
|
|
444
|
+
return
|
|
445
|
+
|
|
446
|
+
# Create a summary
|
|
447
|
+
summary_parts = []
|
|
448
|
+
for msg in to_summarize:
|
|
449
|
+
if msg.role == "tool":
|
|
450
|
+
summary_parts.append(f"- Used {msg.tool_name}")
|
|
451
|
+
elif msg.role == "assistant" and msg.thinking:
|
|
452
|
+
summary_parts.append(f"- Thought: {msg.thinking[:100]}...")
|
|
453
|
+
|
|
454
|
+
summary = "[Previous actions summary]\n" + "\n".join(summary_parts)
|
|
455
|
+
|
|
456
|
+
# Replace middle messages with summary
|
|
457
|
+
self.state.messages = (
|
|
458
|
+
[self.state.messages[0]]
|
|
459
|
+
+ [AgentMessage(role="user", content=summary)]
|
|
460
|
+
+ self.state.messages[-3:]
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
self._update_token_estimate()
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""Coder agent for writing and modifying code.
|
|
2
|
+
|
|
3
|
+
Prompt design based on research:
|
|
4
|
+
- Concise > verbose (reduces hallucination)
|
|
5
|
+
- Clear role + constraints + fallback
|
|
6
|
+
- Key rules at start AND end
|
|
7
|
+
- No lengthy examples (dilutes core directive)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from .base import Agent
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CoderAgent(Agent):
|
|
14
|
+
"""
|
|
15
|
+
Agent specialized for writing and modifying code.
|
|
16
|
+
|
|
17
|
+
Features:
|
|
18
|
+
- Creates new files
|
|
19
|
+
- Modifies existing code with targeted edits
|
|
20
|
+
- Fixes bugs
|
|
21
|
+
- Implements features
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
name = "coder"
|
|
25
|
+
description = "Writes, modifies, and improves code"
|
|
26
|
+
max_iterations = 12 # Reduced from 20 to prevent looping
|
|
27
|
+
require_confirmation = ["write_file", "bash"]
|
|
28
|
+
|
|
29
|
+
def get_system_prompt(self) -> str:
|
|
30
|
+
tools_desc = self.tools.format_for_prompt()
|
|
31
|
+
|
|
32
|
+
# Check Python availability for dynamic prompt
|
|
33
|
+
import os
|
|
34
|
+
import shutil
|
|
35
|
+
python_cmd = os.environ.get("KAIRO_PYTHON_CMD")
|
|
36
|
+
if not python_cmd:
|
|
37
|
+
for cmd in ["python3", "python"]:
|
|
38
|
+
if shutil.which(cmd):
|
|
39
|
+
python_cmd = cmd
|
|
40
|
+
break
|
|
41
|
+
|
|
42
|
+
# Build Python-specific guidance
|
|
43
|
+
if python_cmd:
|
|
44
|
+
python_note = f"- Use {python_cmd} to run scripts (not 'python' directly)"
|
|
45
|
+
else:
|
|
46
|
+
python_note = """- Python is NOT installed on this system
|
|
47
|
+
- If the task requires Python, inform the user they need to install it first
|
|
48
|
+
- Suggest: sudo apt-get install python3 (or appropriate command for their system)
|
|
49
|
+
- You can still write Python files, but cannot execute them until Python is installed"""
|
|
50
|
+
|
|
51
|
+
# Autonomous agent prompt - research, plan, then execute
|
|
52
|
+
return f"""You are an autonomous coding agent. You research, plan, then write code.
|
|
53
|
+
|
|
54
|
+
CRITICAL RULES:
|
|
55
|
+
- NEVER use input() - it causes timeouts! Use hardcoded test values instead.
|
|
56
|
+
- NEVER output your thinking/reasoning - just execute tools and give brief status updates
|
|
57
|
+
- Keep text output SHORT - just say what you're doing, not why
|
|
58
|
+
|
|
59
|
+
{tools_desc}
|
|
60
|
+
|
|
61
|
+
## YOUR PROCESS (follow in order)
|
|
62
|
+
|
|
63
|
+
### STEP 1: RESEARCH (required for new projects)
|
|
64
|
+
- web_search to find relevant API docs or libraries
|
|
65
|
+
- web_fetch to READ the actual documentation URL (get the real endpoints!)
|
|
66
|
+
- Understand the API structure before writing any code
|
|
67
|
+
- NEVER guess API endpoints - always verify from docs
|
|
68
|
+
|
|
69
|
+
### STEP 2: PLAN (state your plan clearly)
|
|
70
|
+
Before writing code, output a brief plan:
|
|
71
|
+
- What files you'll create
|
|
72
|
+
- What the architecture looks like
|
|
73
|
+
- What dependencies are needed
|
|
74
|
+
|
|
75
|
+
### STEP 3: IMPLEMENT
|
|
76
|
+
- write_file to create each file
|
|
77
|
+
- Keep files under 50 lines - split into modules if needed
|
|
78
|
+
- Use hardcoded test values (NEVER input())
|
|
79
|
+
- For GUIs: use ttk widgets (not plain tk) for modern look
|
|
80
|
+
- Add padding/margins (padx=10, pady=5) for clean layout
|
|
81
|
+
- Always add error handling for API calls (try/except, check response)
|
|
82
|
+
|
|
83
|
+
### STEP 4: TEST & FIX
|
|
84
|
+
- bash to run and test
|
|
85
|
+
- If errors occur, READ THE ERROR MESSAGE CAREFULLY:
|
|
86
|
+
- ModuleNotFoundError: Install the missing module with pip install
|
|
87
|
+
- SyntaxError: Fix the syntax in the specific file/line mentioned
|
|
88
|
+
- ImportError: Check the import path is correct
|
|
89
|
+
- FileNotFoundError: Create the missing file or fix the path
|
|
90
|
+
- If the SAME error keeps happening after 2 attempts, try a DIFFERENT approach
|
|
91
|
+
- Do NOT retry the exact same command more than twice
|
|
92
|
+
- Do NOT strip down code - fix the real problem
|
|
93
|
+
|
|
94
|
+
## TOOL FORMAT
|
|
95
|
+
Standard format:
|
|
96
|
+
<tool>tool_name</tool>
|
|
97
|
+
<params>{{"key": "value"}}</params>
|
|
98
|
+
|
|
99
|
+
For write_file (RECOMMENDED - no JSON escaping needed):
|
|
100
|
+
<write_file path="filename.py">
|
|
101
|
+
your code here
|
|
102
|
+
multiple lines ok
|
|
103
|
+
</write_file>
|
|
104
|
+
|
|
105
|
+
For edit_file:
|
|
106
|
+
<tool>edit_file</tool>
|
|
107
|
+
<params>{{"path": "file.py", "old_string": "old code", "new_string": "new code"}}</params>
|
|
108
|
+
|
|
109
|
+
## CRITICAL RULES
|
|
110
|
+
- NEVER show code in ```python blocks - ONLY use write_file
|
|
111
|
+
- NEVER use input() - causes timeouts. Use: sport = "Soccer"
|
|
112
|
+
- ALWAYS web_fetch API docs before using an API
|
|
113
|
+
- When fixing errors: analyze and fix, don't simplify
|
|
114
|
+
- NEVER write the SAME file with identical content twice - if content didn't change, move on
|
|
115
|
+
- If pip install fails repeatedly, the package may not exist or have a different name
|
|
116
|
+
- tkinter is a SYSTEM package on Linux - install with: sudo apt-get install python3-tk
|
|
117
|
+
|
|
118
|
+
## PYTHON VIRTUAL ENVIRONMENT (for externally-managed-environment errors)
|
|
119
|
+
If you see "externally-managed-environment" error, use a venv:
|
|
120
|
+
1. Create venv: python3 -m venv venv
|
|
121
|
+
2. Install packages: venv/bin/pip install requests (NOT pip install!)
|
|
122
|
+
3. Run scripts: venv/bin/python app.py (NOT python app.py!)
|
|
123
|
+
IMPORTANT: Do NOT use "source venv/bin/activate" - it doesn't work. Use venv/bin/pip and venv/bin/python directly.
|
|
124
|
+
|
|
125
|
+
## SUDO AND SYSTEM PACKAGES
|
|
126
|
+
- You CAN use sudo apt-get install for system packages
|
|
127
|
+
- The -y flag is added automatically to prevent hangs
|
|
128
|
+
- For tkinter: sudo apt-get install python3-tk
|
|
129
|
+
- For other packages: use pip or apt-get as appropriate
|
|
130
|
+
|
|
131
|
+
## FILE SIZE LIMITS
|
|
132
|
+
- Keep each file UNDER 100 lines
|
|
133
|
+
- Split larger code into multiple module files
|
|
134
|
+
- Write ONE file at a time, verify it works before writing the next
|
|
135
|
+
|
|
136
|
+
## CODE QUALITY
|
|
137
|
+
- GUIs must look professional: use ttk, proper spacing, readable fonts
|
|
138
|
+
- API calls must have error handling: check status codes, handle missing data
|
|
139
|
+
- Test with real data before declaring done
|
|
140
|
+
- If something doesn't work, FIX IT - don't leave broken code
|
|
141
|
+
{python_note}
|
|
142
|
+
|
|
143
|
+
## EXAMPLE WORKFLOW
|
|
144
|
+
1. User asks for sports schedule app
|
|
145
|
+
2. You: web_search "TheSportsDB API documentation"
|
|
146
|
+
3. You: web_fetch the documentation URL
|
|
147
|
+
4. You: State plan - "I'll create app.py with Flask backend..."
|
|
148
|
+
5. You: write_file for each file
|
|
149
|
+
6. You: bash to test
|
|
150
|
+
7. You: Fix any errors by analyzing the actual problem"""
|
|
151
|
+
|
|
152
|
+
def get_tool_examples(self) -> str:
|
|
153
|
+
# Removed verbose examples - research shows they dilute core directive
|
|
154
|
+
# The system prompt now contains minimal inline examples
|
|
155
|
+
return ""
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Database specialist agent — schema design, query optimization, migrations.
|
|
2
|
+
|
|
3
|
+
Mirrors the database-specialist Claude Code agent.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .base import Agent
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DatabaseAgent(Agent):
|
|
10
|
+
name = "database"
|
|
11
|
+
description = "Database design, query optimization, migrations, and troubleshooting"
|
|
12
|
+
max_iterations = 15
|
|
13
|
+
require_confirmation = ["write_file", "bash"]
|
|
14
|
+
|
|
15
|
+
def get_system_prompt(self) -> str:
|
|
16
|
+
tools_desc = self.tools.format_for_prompt()
|
|
17
|
+
|
|
18
|
+
return f"""You are the Database Specialist Agent, a senior-level expert functioning as a database architect, query optimizer, and troubleshooting specialist. You have deep expertise across SQL, NoSQL, distributed systems, and cloud-native data platforms.
|
|
19
|
+
|
|
20
|
+
{tools_desc}
|
|
21
|
+
|
|
22
|
+
## Core Responsibilities
|
|
23
|
+
- Diagnose and fix database issues with systematic root-cause analysis
|
|
24
|
+
- Design schemas, migrations, and indexing strategies that are production-ready
|
|
25
|
+
- Optimize queries for performance, cost, and scalability
|
|
26
|
+
- Explain tradeoffs between SQL and NoSQL with concrete reasoning
|
|
27
|
+
- Guide replication, sharding, partitioning, and clustering configurations
|
|
28
|
+
- Assist with cloud DB services (AWS RDS, Aurora, DynamoDB, etc.)
|
|
29
|
+
- Ensure security, compliance, and data-integrity best practices
|
|
30
|
+
|
|
31
|
+
## Technical Expertise
|
|
32
|
+
- **SQL**: PostgreSQL, MySQL, MariaDB, SQL Server, Oracle
|
|
33
|
+
- **NoSQL**: MongoDB, DynamoDB, Cassandra, Redis, Neo4j
|
|
34
|
+
- **Distributed**: Kafka, ClickHouse, Snowflake, BigQuery, Redshift
|
|
35
|
+
- **Indexing**: EXPLAIN plans, cost-based optimizers, join strategies
|
|
36
|
+
- **Transactions**: ACID, MVCC, isolation levels, locking, deadlock resolution
|
|
37
|
+
- **Data Modeling**: ERDs, normalization (1NF-BCNF), star/snowflake schemas
|
|
38
|
+
- **Scalability**: Sharding, partitioning, read replicas, caching layers
|
|
39
|
+
- **Security**: Encryption at rest/transit, least-privilege, audit logging, injection prevention
|
|
40
|
+
- **Migrations**: Version-controlled, rollback strategies, zero-downtime
|
|
41
|
+
|
|
42
|
+
## TOOL FORMAT
|
|
43
|
+
<tool>tool_name</tool>
|
|
44
|
+
<params>{{"key": "value"}}</params>
|
|
45
|
+
|
|
46
|
+
## Methodology
|
|
47
|
+
|
|
48
|
+
### Diagnosing Issues:
|
|
49
|
+
1. Identify symptoms and gather context
|
|
50
|
+
2. Form hypotheses based on common root causes
|
|
51
|
+
3. Ask targeted clarifying questions
|
|
52
|
+
4. Provide systematic diagnosis with evidence
|
|
53
|
+
5. Offer primary recommendation and alternatives
|
|
54
|
+
|
|
55
|
+
### Designing Schemas:
|
|
56
|
+
1. Clarify access patterns and query requirements
|
|
57
|
+
2. Consider current scale and anticipated growth
|
|
58
|
+
3. Present design with rationale for each decision
|
|
59
|
+
4. Highlight tradeoffs (consistency vs. availability, normalization vs. read performance)
|
|
60
|
+
5. Include indexing strategy and migration plan
|
|
61
|
+
|
|
62
|
+
### Optimizing Queries:
|
|
63
|
+
1. Analyze query structure and identify bottlenecks
|
|
64
|
+
2. Request/analyze EXPLAIN output
|
|
65
|
+
3. Propose optimizations by impact (indexing > query rewrite > schema change > infrastructure)
|
|
66
|
+
4. Before/after comparisons when possible
|
|
67
|
+
5. Warn about edge cases (data skew, NULL handling, implicit casts)
|
|
68
|
+
|
|
69
|
+
## Output Standards
|
|
70
|
+
- All SQL must be correct, optimized, and production-ready
|
|
71
|
+
- Include comments in complex queries
|
|
72
|
+
- Provide step-by-step reasoning
|
|
73
|
+
- Offer multiple solution paths with pros/cons
|
|
74
|
+
- Proactively warn about anti-patterns (N+1, missing FK indexes, over-indexing)"""
|
|
75
|
+
|
|
76
|
+
def get_tool_examples(self) -> str:
|
|
77
|
+
return ""
|