ripperdoc 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ripperdoc/__init__.py +3 -0
- ripperdoc/__main__.py +25 -0
- ripperdoc/cli/__init__.py +1 -0
- ripperdoc/cli/cli.py +317 -0
- ripperdoc/cli/commands/__init__.py +76 -0
- ripperdoc/cli/commands/agents_cmd.py +234 -0
- ripperdoc/cli/commands/base.py +19 -0
- ripperdoc/cli/commands/clear_cmd.py +18 -0
- ripperdoc/cli/commands/compact_cmd.py +19 -0
- ripperdoc/cli/commands/config_cmd.py +31 -0
- ripperdoc/cli/commands/context_cmd.py +114 -0
- ripperdoc/cli/commands/cost_cmd.py +77 -0
- ripperdoc/cli/commands/exit_cmd.py +19 -0
- ripperdoc/cli/commands/help_cmd.py +20 -0
- ripperdoc/cli/commands/mcp_cmd.py +65 -0
- ripperdoc/cli/commands/models_cmd.py +327 -0
- ripperdoc/cli/commands/resume_cmd.py +97 -0
- ripperdoc/cli/commands/status_cmd.py +167 -0
- ripperdoc/cli/commands/tasks_cmd.py +240 -0
- ripperdoc/cli/commands/todos_cmd.py +69 -0
- ripperdoc/cli/commands/tools_cmd.py +19 -0
- ripperdoc/cli/ui/__init__.py +1 -0
- ripperdoc/cli/ui/context_display.py +297 -0
- ripperdoc/cli/ui/helpers.py +22 -0
- ripperdoc/cli/ui/rich_ui.py +1010 -0
- ripperdoc/cli/ui/spinner.py +50 -0
- ripperdoc/core/__init__.py +1 -0
- ripperdoc/core/agents.py +306 -0
- ripperdoc/core/commands.py +33 -0
- ripperdoc/core/config.py +382 -0
- ripperdoc/core/default_tools.py +57 -0
- ripperdoc/core/permissions.py +227 -0
- ripperdoc/core/query.py +682 -0
- ripperdoc/core/system_prompt.py +418 -0
- ripperdoc/core/tool.py +214 -0
- ripperdoc/sdk/__init__.py +9 -0
- ripperdoc/sdk/client.py +309 -0
- ripperdoc/tools/__init__.py +1 -0
- ripperdoc/tools/background_shell.py +291 -0
- ripperdoc/tools/bash_output_tool.py +98 -0
- ripperdoc/tools/bash_tool.py +822 -0
- ripperdoc/tools/file_edit_tool.py +281 -0
- ripperdoc/tools/file_read_tool.py +168 -0
- ripperdoc/tools/file_write_tool.py +141 -0
- ripperdoc/tools/glob_tool.py +134 -0
- ripperdoc/tools/grep_tool.py +232 -0
- ripperdoc/tools/kill_bash_tool.py +136 -0
- ripperdoc/tools/ls_tool.py +298 -0
- ripperdoc/tools/mcp_tools.py +804 -0
- ripperdoc/tools/multi_edit_tool.py +393 -0
- ripperdoc/tools/notebook_edit_tool.py +325 -0
- ripperdoc/tools/task_tool.py +282 -0
- ripperdoc/tools/todo_tool.py +362 -0
- ripperdoc/tools/tool_search_tool.py +366 -0
- ripperdoc/utils/__init__.py +1 -0
- ripperdoc/utils/bash_constants.py +51 -0
- ripperdoc/utils/bash_output_utils.py +43 -0
- ripperdoc/utils/exit_code_handlers.py +241 -0
- ripperdoc/utils/log.py +76 -0
- ripperdoc/utils/mcp.py +427 -0
- ripperdoc/utils/memory.py +239 -0
- ripperdoc/utils/message_compaction.py +640 -0
- ripperdoc/utils/messages.py +399 -0
- ripperdoc/utils/output_utils.py +233 -0
- ripperdoc/utils/path_utils.py +46 -0
- ripperdoc/utils/permissions/__init__.py +21 -0
- ripperdoc/utils/permissions/path_validation_utils.py +165 -0
- ripperdoc/utils/permissions/shell_command_validation.py +74 -0
- ripperdoc/utils/permissions/tool_permission_utils.py +279 -0
- ripperdoc/utils/safe_get_cwd.py +24 -0
- ripperdoc/utils/sandbox_utils.py +38 -0
- ripperdoc/utils/session_history.py +223 -0
- ripperdoc/utils/session_usage.py +110 -0
- ripperdoc/utils/shell_token_utils.py +95 -0
- ripperdoc/utils/todo.py +199 -0
- ripperdoc-0.1.0.dist-info/METADATA +178 -0
- ripperdoc-0.1.0.dist-info/RECORD +81 -0
- ripperdoc-0.1.0.dist-info/WHEEL +5 -0
- ripperdoc-0.1.0.dist-info/entry_points.txt +3 -0
- ripperdoc-0.1.0.dist-info/licenses/LICENSE +53 -0
- ripperdoc-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
"""Message handling and formatting for Ripperdoc.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for creating and normalizing messages
|
|
4
|
+
for communication with AI models.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Dict, List, Optional, Union
|
|
8
|
+
from pydantic import BaseModel, ConfigDict
|
|
9
|
+
from uuid import uuid4
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from ripperdoc.utils.log import get_logger
|
|
12
|
+
|
|
13
|
+
logger = get_logger()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class MessageRole(str, Enum):
|
|
17
|
+
"""Message roles in a conversation."""
|
|
18
|
+
|
|
19
|
+
USER = "user"
|
|
20
|
+
ASSISTANT = "assistant"
|
|
21
|
+
SYSTEM = "system"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class MessageContent(BaseModel):
|
|
25
|
+
"""Content of a message."""
|
|
26
|
+
|
|
27
|
+
type: str
|
|
28
|
+
text: Optional[str] = None
|
|
29
|
+
# Some providers return tool_use IDs as "id", others as "tool_use_id"
|
|
30
|
+
id: Optional[str] = None
|
|
31
|
+
tool_use_id: Optional[str] = None
|
|
32
|
+
name: Optional[str] = None
|
|
33
|
+
input: Optional[Dict[str, Any]] = None
|
|
34
|
+
is_error: Optional[bool] = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _content_block_to_api(block: MessageContent) -> Dict[str, Any]:
|
|
38
|
+
"""Convert a MessageContent block to API-ready dict for tool protocols."""
|
|
39
|
+
block_type = getattr(block, "type", None)
|
|
40
|
+
if block_type == "tool_use":
|
|
41
|
+
return {
|
|
42
|
+
"type": "tool_use",
|
|
43
|
+
"id": getattr(block, "id", None) or getattr(block, "tool_use_id", "") or "",
|
|
44
|
+
"name": getattr(block, "name", None) or "",
|
|
45
|
+
"input": getattr(block, "input", None) or {},
|
|
46
|
+
}
|
|
47
|
+
if block_type == "tool_result":
|
|
48
|
+
result: Dict[str, Any] = {
|
|
49
|
+
"type": "tool_result",
|
|
50
|
+
"tool_use_id": getattr(block, "tool_use_id", None) or getattr(block, "id", None) or "",
|
|
51
|
+
"content": [
|
|
52
|
+
{
|
|
53
|
+
"type": "text",
|
|
54
|
+
"text": getattr(block, "text", None) or getattr(block, "content", None) or "",
|
|
55
|
+
}
|
|
56
|
+
],
|
|
57
|
+
}
|
|
58
|
+
if getattr(block, "is_error", None) is not None:
|
|
59
|
+
result["is_error"] = block.is_error
|
|
60
|
+
return result
|
|
61
|
+
# Default to text block
|
|
62
|
+
return {
|
|
63
|
+
"type": "text",
|
|
64
|
+
"text": getattr(block, "text", None) or getattr(block, "content", None) or str(block),
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _content_block_to_openai(block: MessageContent) -> Dict[str, Any]:
|
|
69
|
+
"""Convert a MessageContent block to OpenAI chat-completions tool call format."""
|
|
70
|
+
block_type = getattr(block, "type", None)
|
|
71
|
+
if block_type == "tool_use":
|
|
72
|
+
import json
|
|
73
|
+
|
|
74
|
+
args = getattr(block, "input", None) or {}
|
|
75
|
+
try:
|
|
76
|
+
args_str = json.dumps(args)
|
|
77
|
+
except Exception:
|
|
78
|
+
args_str = "{}"
|
|
79
|
+
tool_call_id = (
|
|
80
|
+
getattr(block, "id", None) or getattr(block, "tool_use_id", "") or str(uuid4())
|
|
81
|
+
)
|
|
82
|
+
return {
|
|
83
|
+
"role": "assistant",
|
|
84
|
+
"content": None,
|
|
85
|
+
"tool_calls": [
|
|
86
|
+
{
|
|
87
|
+
"id": tool_call_id,
|
|
88
|
+
"type": "function",
|
|
89
|
+
"function": {
|
|
90
|
+
"name": getattr(block, "name", None) or "",
|
|
91
|
+
"arguments": args_str,
|
|
92
|
+
},
|
|
93
|
+
}
|
|
94
|
+
],
|
|
95
|
+
}
|
|
96
|
+
if block_type == "tool_result":
|
|
97
|
+
# OpenAI expects role=tool messages after a tool call
|
|
98
|
+
tool_call_id = getattr(block, "tool_use_id", None) or getattr(block, "id", None) or ""
|
|
99
|
+
if not tool_call_id:
|
|
100
|
+
logger.debug("[_content_block_to_openai] Skipping tool_result without tool_call_id")
|
|
101
|
+
return {}
|
|
102
|
+
return {
|
|
103
|
+
"role": "tool",
|
|
104
|
+
"tool_call_id": tool_call_id,
|
|
105
|
+
"content": getattr(block, "text", None) or getattr(block, "content", None) or "",
|
|
106
|
+
}
|
|
107
|
+
# Fallback text message
|
|
108
|
+
return {
|
|
109
|
+
"role": "assistant",
|
|
110
|
+
"content": getattr(block, "text", None) or getattr(block, "content", None) or str(block),
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class Message(BaseModel):
|
|
115
|
+
"""A message in a conversation."""
|
|
116
|
+
|
|
117
|
+
role: MessageRole
|
|
118
|
+
content: Union[str, List[MessageContent]]
|
|
119
|
+
uuid: str = ""
|
|
120
|
+
|
|
121
|
+
def __init__(self, **data: Any) -> None:
|
|
122
|
+
if "uuid" not in data or not data["uuid"]:
|
|
123
|
+
data["uuid"] = str(uuid4())
|
|
124
|
+
super().__init__(**data)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class UserMessage(BaseModel):
|
|
128
|
+
"""User message with tool results."""
|
|
129
|
+
|
|
130
|
+
type: str = "user"
|
|
131
|
+
message: Message
|
|
132
|
+
uuid: str = ""
|
|
133
|
+
tool_use_result: Optional[Any] = None
|
|
134
|
+
|
|
135
|
+
def __init__(self, **data: Any) -> None:
|
|
136
|
+
if "uuid" not in data or not data["uuid"]:
|
|
137
|
+
data["uuid"] = str(uuid4())
|
|
138
|
+
super().__init__(**data)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class AssistantMessage(BaseModel):
|
|
142
|
+
"""Assistant message with metadata."""
|
|
143
|
+
|
|
144
|
+
type: str = "assistant"
|
|
145
|
+
message: Message
|
|
146
|
+
uuid: str = ""
|
|
147
|
+
cost_usd: float = 0.0
|
|
148
|
+
duration_ms: float = 0.0
|
|
149
|
+
is_api_error_message: bool = False
|
|
150
|
+
|
|
151
|
+
def __init__(self, **data: Any) -> None:
|
|
152
|
+
if "uuid" not in data or not data["uuid"]:
|
|
153
|
+
data["uuid"] = str(uuid4())
|
|
154
|
+
super().__init__(**data)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class ProgressMessage(BaseModel):
|
|
158
|
+
"""Progress message during tool execution."""
|
|
159
|
+
|
|
160
|
+
type: str = "progress"
|
|
161
|
+
uuid: str = ""
|
|
162
|
+
tool_use_id: str
|
|
163
|
+
content: Any
|
|
164
|
+
normalized_messages: List[Message] = []
|
|
165
|
+
sibling_tool_use_ids: set[str] = set()
|
|
166
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
167
|
+
|
|
168
|
+
def __init__(self, **data: Any) -> None:
|
|
169
|
+
if "uuid" not in data or not data["uuid"]:
|
|
170
|
+
data["uuid"] = str(uuid4())
|
|
171
|
+
super().__init__(**data)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def create_user_message(
|
|
175
|
+
content: Union[str, List[Dict[str, Any]]], tool_use_result: Optional[Any] = None
|
|
176
|
+
) -> UserMessage:
|
|
177
|
+
"""Create a user message."""
|
|
178
|
+
if isinstance(content, str):
|
|
179
|
+
message_content: Union[str, List[MessageContent]] = content
|
|
180
|
+
else:
|
|
181
|
+
message_content = [MessageContent(**item) for item in content]
|
|
182
|
+
|
|
183
|
+
# Normalize tool_use_result to a dict if it's a Pydantic model
|
|
184
|
+
if tool_use_result is not None:
|
|
185
|
+
try:
|
|
186
|
+
if hasattr(tool_use_result, "model_dump"):
|
|
187
|
+
tool_use_result = tool_use_result.model_dump()
|
|
188
|
+
except Exception:
|
|
189
|
+
# Fallback: keep as-is if conversion fails
|
|
190
|
+
pass
|
|
191
|
+
|
|
192
|
+
message = Message(role=MessageRole.USER, content=message_content)
|
|
193
|
+
|
|
194
|
+
# Debug: record tool_result shaping
|
|
195
|
+
if isinstance(message_content, list):
|
|
196
|
+
tool_result_blocks = [
|
|
197
|
+
blk for blk in message_content if getattr(blk, "type", None) == "tool_result"
|
|
198
|
+
]
|
|
199
|
+
if tool_result_blocks:
|
|
200
|
+
logger.debug(
|
|
201
|
+
f"[create_user_message] tool_result blocks={len(tool_result_blocks)} "
|
|
202
|
+
f"ids={[getattr(b, 'tool_use_id', None) for b in tool_result_blocks]}"
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
return UserMessage(message=message, tool_use_result=tool_use_result)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def create_assistant_message(
|
|
209
|
+
content: Union[str, List[Dict[str, Any]]], cost_usd: float = 0.0, duration_ms: float = 0.0
|
|
210
|
+
) -> AssistantMessage:
|
|
211
|
+
"""Create an assistant message."""
|
|
212
|
+
if isinstance(content, str):
|
|
213
|
+
message_content: Union[str, List[MessageContent]] = content
|
|
214
|
+
else:
|
|
215
|
+
message_content = [MessageContent(**item) for item in content]
|
|
216
|
+
|
|
217
|
+
message = Message(role=MessageRole.ASSISTANT, content=message_content)
|
|
218
|
+
|
|
219
|
+
return AssistantMessage(message=message, cost_usd=cost_usd, duration_ms=duration_ms)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def create_progress_message(
|
|
223
|
+
tool_use_id: str,
|
|
224
|
+
sibling_tool_use_ids: set[str],
|
|
225
|
+
content: Any,
|
|
226
|
+
normalized_messages: Optional[List[Message]] = None,
|
|
227
|
+
) -> ProgressMessage:
|
|
228
|
+
"""Create a progress message."""
|
|
229
|
+
return ProgressMessage(
|
|
230
|
+
tool_use_id=tool_use_id,
|
|
231
|
+
sibling_tool_use_ids=sibling_tool_use_ids,
|
|
232
|
+
content=content,
|
|
233
|
+
normalized_messages=normalized_messages or [],
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def normalize_messages_for_api(
|
|
238
|
+
messages: List[Union[UserMessage, AssistantMessage, ProgressMessage]],
|
|
239
|
+
protocol: str = "anthropic",
|
|
240
|
+
) -> List[Dict[str, Any]]:
|
|
241
|
+
"""Normalize messages for API submission.
|
|
242
|
+
|
|
243
|
+
Progress messages are filtered out as they are not sent to the API.
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
def _msg_type(msg: Any) -> Optional[str]:
|
|
247
|
+
if hasattr(msg, "type"):
|
|
248
|
+
return getattr(msg, "type", None)
|
|
249
|
+
if isinstance(msg, dict):
|
|
250
|
+
return msg.get("type")
|
|
251
|
+
return None
|
|
252
|
+
|
|
253
|
+
def _msg_content(msg: Any) -> Any:
|
|
254
|
+
if hasattr(msg, "message"):
|
|
255
|
+
return getattr(getattr(msg, "message", None), "content", None)
|
|
256
|
+
if isinstance(msg, dict):
|
|
257
|
+
message_payload = msg.get("message")
|
|
258
|
+
if isinstance(message_payload, dict):
|
|
259
|
+
return message_payload.get("content")
|
|
260
|
+
if "content" in msg:
|
|
261
|
+
return msg.get("content")
|
|
262
|
+
return None
|
|
263
|
+
|
|
264
|
+
normalized: List[Dict[str, Any]] = []
|
|
265
|
+
tool_results_seen = 0
|
|
266
|
+
tool_uses_seen = 0
|
|
267
|
+
|
|
268
|
+
# Precompute tool_result positions so we can drop dangling tool_calls that
|
|
269
|
+
# lack a following tool response (which OpenAI rejects).
|
|
270
|
+
tool_result_positions: Dict[str, int] = {}
|
|
271
|
+
skipped_tool_uses_no_result = 0
|
|
272
|
+
skipped_tool_uses_no_id = 0
|
|
273
|
+
if protocol == "openai":
|
|
274
|
+
for idx, msg in enumerate(messages):
|
|
275
|
+
if _msg_type(msg) != "user":
|
|
276
|
+
continue
|
|
277
|
+
content = _msg_content(msg)
|
|
278
|
+
if not isinstance(content, list):
|
|
279
|
+
continue
|
|
280
|
+
for block in content:
|
|
281
|
+
if getattr(block, "type", None) == "tool_result":
|
|
282
|
+
tool_id = getattr(block, "tool_use_id", None) or getattr(block, "id", None)
|
|
283
|
+
if tool_id and tool_id not in tool_result_positions:
|
|
284
|
+
tool_result_positions[tool_id] = idx
|
|
285
|
+
|
|
286
|
+
for msg_index, msg in enumerate(messages):
|
|
287
|
+
msg_type = _msg_type(msg)
|
|
288
|
+
if msg_type == "progress":
|
|
289
|
+
# Skip progress messages
|
|
290
|
+
continue
|
|
291
|
+
if msg_type is None:
|
|
292
|
+
continue
|
|
293
|
+
|
|
294
|
+
if msg_type == "user":
|
|
295
|
+
user_content = _msg_content(msg)
|
|
296
|
+
if isinstance(user_content, list):
|
|
297
|
+
if protocol == "openai":
|
|
298
|
+
# Map each block to an OpenAI-style message
|
|
299
|
+
openai_msgs: List[Dict[str, Any]] = []
|
|
300
|
+
for block in user_content:
|
|
301
|
+
if getattr(block, "type", None) == "tool_result":
|
|
302
|
+
tool_results_seen += 1
|
|
303
|
+
mapped = _content_block_to_openai(block)
|
|
304
|
+
if mapped:
|
|
305
|
+
openai_msgs.append(mapped)
|
|
306
|
+
normalized.extend(openai_msgs)
|
|
307
|
+
continue
|
|
308
|
+
api_blocks = []
|
|
309
|
+
for block in user_content:
|
|
310
|
+
if getattr(block, "type", None) == "tool_result":
|
|
311
|
+
tool_results_seen += 1
|
|
312
|
+
api_blocks.append(_content_block_to_api(block))
|
|
313
|
+
normalized.append({"role": "user", "content": api_blocks})
|
|
314
|
+
else:
|
|
315
|
+
normalized.append(
|
|
316
|
+
{"role": "user", "content": user_content} # type: ignore
|
|
317
|
+
)
|
|
318
|
+
elif msg_type == "assistant":
|
|
319
|
+
asst_content = _msg_content(msg)
|
|
320
|
+
if isinstance(asst_content, list):
|
|
321
|
+
if protocol == "openai":
|
|
322
|
+
assistant_openai_msgs: List[Dict[str, Any]] = []
|
|
323
|
+
tool_calls: List[Dict[str, Any]] = []
|
|
324
|
+
text_parts: List[str] = []
|
|
325
|
+
for block in asst_content:
|
|
326
|
+
if getattr(block, "type", None) == "tool_use":
|
|
327
|
+
tool_uses_seen += 1
|
|
328
|
+
tool_id = getattr(block, "tool_use_id", None) or getattr(
|
|
329
|
+
block, "id", None
|
|
330
|
+
)
|
|
331
|
+
if not tool_id:
|
|
332
|
+
skipped_tool_uses_no_id += 1
|
|
333
|
+
continue
|
|
334
|
+
# Skip tool_use blocks that are not followed by a tool_result
|
|
335
|
+
result_pos = tool_result_positions.get(tool_id)
|
|
336
|
+
if result_pos is None:
|
|
337
|
+
skipped_tool_uses_no_result += 1
|
|
338
|
+
continue
|
|
339
|
+
if result_pos <= msg_index:
|
|
340
|
+
skipped_tool_uses_no_result += 1
|
|
341
|
+
continue
|
|
342
|
+
mapped = _content_block_to_openai(block)
|
|
343
|
+
if mapped.get("tool_calls"):
|
|
344
|
+
tool_calls.extend(mapped["tool_calls"])
|
|
345
|
+
elif getattr(block, "type", None) == "text":
|
|
346
|
+
text_parts.append(getattr(block, "text", "") or "")
|
|
347
|
+
else:
|
|
348
|
+
mapped = _content_block_to_openai(block)
|
|
349
|
+
if mapped:
|
|
350
|
+
assistant_openai_msgs.append(mapped)
|
|
351
|
+
if text_parts:
|
|
352
|
+
assistant_openai_msgs.append(
|
|
353
|
+
{"role": "assistant", "content": "\n".join(text_parts)}
|
|
354
|
+
)
|
|
355
|
+
if tool_calls:
|
|
356
|
+
assistant_openai_msgs.append(
|
|
357
|
+
{
|
|
358
|
+
"role": "assistant",
|
|
359
|
+
"content": None,
|
|
360
|
+
"tool_calls": tool_calls,
|
|
361
|
+
}
|
|
362
|
+
)
|
|
363
|
+
normalized.extend(assistant_openai_msgs)
|
|
364
|
+
continue
|
|
365
|
+
api_blocks = []
|
|
366
|
+
for block in asst_content:
|
|
367
|
+
if getattr(block, "type", None) == "tool_use":
|
|
368
|
+
tool_uses_seen += 1
|
|
369
|
+
api_blocks.append(_content_block_to_api(block))
|
|
370
|
+
normalized.append({"role": "assistant", "content": api_blocks})
|
|
371
|
+
else:
|
|
372
|
+
normalized.append(
|
|
373
|
+
{"role": "assistant", "content": asst_content} # type: ignore
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
logger.debug(
|
|
377
|
+
f"[normalize_messages_for_api] protocol={protocol} input_msgs={len(messages)} "
|
|
378
|
+
f"normalized={len(normalized)} tool_results_seen={tool_results_seen} "
|
|
379
|
+
f"tool_uses_seen={tool_uses_seen} "
|
|
380
|
+
f"tool_result_positions={len(tool_result_positions)} "
|
|
381
|
+
f"skipped_tool_uses_no_result={skipped_tool_uses_no_result} "
|
|
382
|
+
f"skipped_tool_uses_no_id={skipped_tool_uses_no_id}"
|
|
383
|
+
)
|
|
384
|
+
return normalized
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
# Special interrupt messages
|
|
388
|
+
INTERRUPT_MESSAGE = "Request was interrupted by user."
|
|
389
|
+
INTERRUPT_MESSAGE_FOR_TOOL_USE = "Tool execution was interrupted by user."
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def create_tool_result_stop_message(tool_use_id: str) -> Dict[str, Any]:
|
|
393
|
+
"""Create a tool result message for interruption."""
|
|
394
|
+
return {
|
|
395
|
+
"type": "tool_result",
|
|
396
|
+
"tool_use_id": tool_use_id,
|
|
397
|
+
"text": INTERRUPT_MESSAGE_FOR_TOOL_USE,
|
|
398
|
+
"is_error": True,
|
|
399
|
+
}
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""Utilities for processing and truncating command output."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# Maximum output length to prevent token overflow
|
|
8
|
+
MAX_OUTPUT_CHARS = 30000
|
|
9
|
+
|
|
10
|
+
# Threshold for considering output "large"
|
|
11
|
+
LARGE_OUTPUT_THRESHOLD = 5000
|
|
12
|
+
|
|
13
|
+
# When truncating, keep this many chars from start and end
|
|
14
|
+
TRUNCATE_KEEP_START = 15000
|
|
15
|
+
TRUNCATE_KEEP_END = 10000
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def trim_blank_lines(text: str) -> str:
|
|
19
|
+
"""Remove leading and trailing blank lines while preserving internal spacing.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
text: Input text
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Text with leading/trailing blank lines removed
|
|
26
|
+
"""
|
|
27
|
+
lines = text.split("\n")
|
|
28
|
+
|
|
29
|
+
# Remove leading blank lines
|
|
30
|
+
start = 0
|
|
31
|
+
while start < len(lines) and not lines[start].strip():
|
|
32
|
+
start += 1
|
|
33
|
+
|
|
34
|
+
# Remove trailing blank lines
|
|
35
|
+
end = len(lines)
|
|
36
|
+
while end > start and not lines[end - 1].strip():
|
|
37
|
+
end -= 1
|
|
38
|
+
|
|
39
|
+
return "\n".join(lines[start:end])
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def is_image_data(text: str) -> bool:
|
|
43
|
+
"""Check if text appears to be base64 encoded image data.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
text: Text to check
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
True if text looks like image data
|
|
50
|
+
"""
|
|
51
|
+
if not text:
|
|
52
|
+
return False
|
|
53
|
+
|
|
54
|
+
stripped = text.strip()
|
|
55
|
+
|
|
56
|
+
# Check for data URI scheme (most reliable indicator)
|
|
57
|
+
if stripped.startswith("data:image/"):
|
|
58
|
+
return True
|
|
59
|
+
|
|
60
|
+
# Don't treat arbitrary long text as base64 unless it has image indicators
|
|
61
|
+
# Base64 images are typically very long AND have specific characteristics
|
|
62
|
+
if len(stripped) < 1000:
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
# Check for common image base64 patterns
|
|
66
|
+
# Real base64 images usually have variety of characters and padding
|
|
67
|
+
base64_chars = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=")
|
|
68
|
+
text_chars = set(stripped)
|
|
69
|
+
|
|
70
|
+
# If text only uses a small subset of base64 chars, it's probably not base64
|
|
71
|
+
# Real base64 uses a variety of characters
|
|
72
|
+
if len(text_chars) < 10:
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
# Must be valid base64 characters
|
|
76
|
+
if not text_chars.issubset(base64_chars):
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
# Must end with proper base64 padding or no padding
|
|
80
|
+
if not (
|
|
81
|
+
stripped.endswith("==")
|
|
82
|
+
or stripped.endswith("=")
|
|
83
|
+
or stripped[-1] in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
|
84
|
+
):
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
# If all checks pass and it's very long, might be base64 image
|
|
88
|
+
return len(stripped) > 10000
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def truncate_output(text: str, max_chars: int = MAX_OUTPUT_CHARS) -> dict[str, Any]:
|
|
92
|
+
"""Truncate output if it exceeds max length.
|
|
93
|
+
|
|
94
|
+
Keeps both the beginning and end of output to preserve context.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
text: Output text to truncate
|
|
98
|
+
max_chars: Maximum character limit
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Dict with:
|
|
102
|
+
- truncated_content: Potentially truncated text
|
|
103
|
+
- is_truncated: Whether truncation occurred
|
|
104
|
+
- original_length: Original text length
|
|
105
|
+
- is_image: Whether content appears to be image data
|
|
106
|
+
"""
|
|
107
|
+
if not text:
|
|
108
|
+
return {
|
|
109
|
+
"truncated_content": text,
|
|
110
|
+
"is_truncated": False,
|
|
111
|
+
"original_length": 0,
|
|
112
|
+
"is_image": False,
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
# Check if it's image data
|
|
116
|
+
if is_image_data(text):
|
|
117
|
+
return {
|
|
118
|
+
"truncated_content": text,
|
|
119
|
+
"is_truncated": False,
|
|
120
|
+
"original_length": len(text),
|
|
121
|
+
"is_image": True,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
original_length = len(text)
|
|
125
|
+
|
|
126
|
+
if original_length <= max_chars:
|
|
127
|
+
return {
|
|
128
|
+
"truncated_content": text,
|
|
129
|
+
"is_truncated": False,
|
|
130
|
+
"original_length": original_length,
|
|
131
|
+
"is_image": False,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
# Truncate: keep start and end
|
|
135
|
+
start_chars = min(TRUNCATE_KEEP_START, max_chars // 2)
|
|
136
|
+
end_chars = min(TRUNCATE_KEEP_END, max_chars - start_chars - 100)
|
|
137
|
+
|
|
138
|
+
truncated = (
|
|
139
|
+
text[:start_chars]
|
|
140
|
+
+ f"\n\n... [Output truncated: {original_length - start_chars - end_chars} characters omitted] ...\n\n"
|
|
141
|
+
+ text[-end_chars:]
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
return {
|
|
145
|
+
"truncated_content": truncated,
|
|
146
|
+
"is_truncated": True,
|
|
147
|
+
"original_length": original_length,
|
|
148
|
+
"is_image": False,
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def format_duration(duration_ms: float) -> str:
|
|
153
|
+
"""Format duration in milliseconds to human-readable string.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
duration_ms: Duration in milliseconds
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
Formatted duration string (e.g., "1.23s", "45.6ms")
|
|
160
|
+
"""
|
|
161
|
+
if duration_ms < 1000:
|
|
162
|
+
return f"{duration_ms:.0f}ms"
|
|
163
|
+
else:
|
|
164
|
+
return f"{duration_ms / 1000:.2f}s"
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def is_output_large(text: str) -> bool:
|
|
168
|
+
"""Check if output is considered large.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
text: Output text
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
True if output exceeds large threshold
|
|
175
|
+
"""
|
|
176
|
+
return len(text) > LARGE_OUTPUT_THRESHOLD
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def count_lines(text: str) -> int:
|
|
180
|
+
"""Count number of lines in text.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
text: Text to count
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Number of lines
|
|
187
|
+
"""
|
|
188
|
+
if not text:
|
|
189
|
+
return 0
|
|
190
|
+
return text.count("\n") + 1
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def get_last_n_lines(text: str, n: int) -> str:
|
|
194
|
+
"""Get the last N lines from text.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
text: Input text
|
|
198
|
+
n: Number of lines to keep
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Last N lines
|
|
202
|
+
"""
|
|
203
|
+
if not text:
|
|
204
|
+
return text
|
|
205
|
+
|
|
206
|
+
lines = text.split("\n")
|
|
207
|
+
if len(lines) <= n:
|
|
208
|
+
return text
|
|
209
|
+
|
|
210
|
+
return "\n".join(lines[-n:])
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def sanitize_output(text: str) -> str:
|
|
214
|
+
"""Sanitize output by removing control/escape sequences and ensuring UTF-8."""
|
|
215
|
+
# ANSI/VT escape patterns, including charset selection (e.g., ESC(B) and OSC)
|
|
216
|
+
ansi_escape = re.compile(
|
|
217
|
+
r"""
|
|
218
|
+
\x1B
|
|
219
|
+
(?:
|
|
220
|
+
[@-Z\\-_] # 7-bit C1 control
|
|
221
|
+
| \[ [0-?]* [ -/]* [@-~] # CSI (colors, cursor moves, etc.)
|
|
222
|
+
| [()][0-9A-Za-z] # Charset selection like ESC(B
|
|
223
|
+
| \] (?: [^\x07\x1B]* \x07 | [^\x1B]* \x1B\\ ) # OSC to BEL or ST
|
|
224
|
+
)
|
|
225
|
+
""",
|
|
226
|
+
re.VERBOSE,
|
|
227
|
+
)
|
|
228
|
+
text = ansi_escape.sub("", text)
|
|
229
|
+
|
|
230
|
+
# Remove remaining control characters except newline, tab, carriage return
|
|
231
|
+
text = re.sub(r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]", "", text)
|
|
232
|
+
|
|
233
|
+
return text
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Filesystem path helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import re
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _legacy_sanitize_project_path(project_path: Path) -> str:
|
|
11
|
+
"""Legacy sanitizer that strips non-alphanumeric characters."""
|
|
12
|
+
normalized = str(project_path.resolve())
|
|
13
|
+
return re.sub(r"[^a-zA-Z0-9]+", "-", normalized).strip("-") or "project"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def sanitize_project_path(project_path: Path) -> str:
|
|
17
|
+
"""Make a project path safe for directory names and avoid collisions.
|
|
18
|
+
|
|
19
|
+
Non-alphanumeric characters (including non-ASCII) are replaced with "-".
|
|
20
|
+
A short hash of the full resolved path is appended to prevent collisions
|
|
21
|
+
between different paths that would otherwise sanitize to the same string.
|
|
22
|
+
"""
|
|
23
|
+
normalized = str(project_path.resolve())
|
|
24
|
+
safe = _legacy_sanitize_project_path(project_path)
|
|
25
|
+
digest = hashlib.sha1(normalized.encode("utf-8")).hexdigest()[:8]
|
|
26
|
+
return f"{safe}-{digest}"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def project_storage_dir(base_dir: Path, project_path: Path, ensure: bool = False) -> Path:
|
|
30
|
+
"""Return a storage directory path for a project, with legacy fallback.
|
|
31
|
+
|
|
32
|
+
Prefers a hashed, collision-safe name but will reuse an existing legacy
|
|
33
|
+
directory (pre-hash) to avoid stranding older data.
|
|
34
|
+
"""
|
|
35
|
+
hashed_name = sanitize_project_path(project_path)
|
|
36
|
+
legacy_name = _legacy_sanitize_project_path(project_path)
|
|
37
|
+
|
|
38
|
+
hashed_dir = base_dir / hashed_name
|
|
39
|
+
legacy_dir = base_dir / legacy_name
|
|
40
|
+
|
|
41
|
+
chosen = hashed_dir if hashed_dir.exists() or not legacy_dir.exists() else legacy_dir
|
|
42
|
+
|
|
43
|
+
if ensure:
|
|
44
|
+
chosen.mkdir(parents=True, exist_ok=True)
|
|
45
|
+
|
|
46
|
+
return chosen
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Permission utilities."""
|
|
2
|
+
|
|
3
|
+
from .path_validation_utils import validate_shell_command_paths
|
|
4
|
+
from .shell_command_validation import validate_shell_command
|
|
5
|
+
from .tool_permission_utils import (
|
|
6
|
+
PermissionDecision,
|
|
7
|
+
ToolRule,
|
|
8
|
+
evaluate_shell_command_permissions,
|
|
9
|
+
extract_rule_prefix,
|
|
10
|
+
match_rule,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"PermissionDecision",
|
|
15
|
+
"ToolRule",
|
|
16
|
+
"evaluate_shell_command_permissions",
|
|
17
|
+
"extract_rule_prefix",
|
|
18
|
+
"match_rule",
|
|
19
|
+
"validate_shell_command_paths",
|
|
20
|
+
"validate_shell_command",
|
|
21
|
+
]
|