python-codex 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycodex/__init__.py +139 -2
- pycodex/agent.py +290 -0
- pycodex/cli.py +641 -0
- pycodex/collaboration.py +21 -0
- pycodex/context.py +580 -0
- pycodex/doctor.py +360 -0
- pycodex/model.py +533 -0
- pycodex/prompts/collaboration_default.md +11 -0
- pycodex/prompts/collaboration_plan.md +128 -0
- pycodex/prompts/default_base_instructions.md +275 -0
- pycodex/prompts/exec_tools.json +411 -0
- pycodex/prompts/models.json +847 -0
- pycodex/prompts/permissions/approval_policy/never.md +1 -0
- pycodex/prompts/permissions/approval_policy/on_failure.md +1 -0
- pycodex/prompts/permissions/approval_policy/on_request.md +57 -0
- pycodex/prompts/permissions/approval_policy/on_request_rule_request_permission.md +33 -0
- pycodex/prompts/permissions/approval_policy/unless_trusted.md +1 -0
- pycodex/prompts/permissions/sandbox_mode/danger_full_access.md +1 -0
- pycodex/prompts/permissions/sandbox_mode/read_only.md +1 -0
- pycodex/prompts/permissions/sandbox_mode/workspace_write.md +1 -0
- pycodex/prompts/subagent_tools.json +163 -0
- pycodex/protocol.py +347 -0
- pycodex/runtime.py +200 -0
- pycodex/runtime_services.py +408 -0
- pycodex/tools/__init__.py +58 -0
- pycodex/tools/agent_tool_schemas.py +70 -0
- pycodex/tools/apply_patch_tool.py +363 -0
- pycodex/tools/base_tool.py +168 -0
- pycodex/tools/close_agent_tool.py +55 -0
- pycodex/tools/code_mode_manager.py +519 -0
- pycodex/tools/exec_command_tool.py +96 -0
- pycodex/tools/exec_runtime.js +161 -0
- pycodex/tools/exec_tool.py +48 -0
- pycodex/tools/grep_files_tool.py +150 -0
- pycodex/tools/list_dir_tool.py +135 -0
- pycodex/tools/read_file_tool.py +217 -0
- pycodex/tools/request_permissions_tool.py +95 -0
- pycodex/tools/request_user_input_tool.py +167 -0
- pycodex/tools/resume_agent_tool.py +56 -0
- pycodex/tools/send_input_tool.py +106 -0
- pycodex/tools/shell_command_tool.py +107 -0
- pycodex/tools/shell_tool.py +112 -0
- pycodex/tools/spawn_agent_tool.py +97 -0
- pycodex/tools/unified_exec_manager.py +380 -0
- pycodex/tools/update_plan_tool.py +79 -0
- pycodex/tools/view_image_tool.py +111 -0
- pycodex/tools/wait_agent_tool.py +75 -0
- pycodex/tools/wait_tool.py +68 -0
- pycodex/tools/web_search_tool.py +30 -0
- pycodex/tools/write_stdin_tool.py +75 -0
- pycodex/utils/__init__.py +40 -0
- pycodex/utils/dotenv.py +64 -0
- pycodex/utils/get_env.py +218 -0
- pycodex/utils/random_ids.py +19 -0
- pycodex/utils/visualize.py +978 -0
- python_codex-0.1.0.dist-info/METADATA +267 -0
- python_codex-0.1.0.dist-info/RECORD +60 -0
- python_codex-0.1.0.dist-info/entry_points.txt +2 -0
- python_codex-0.1.0.dist-info/licenses/LICENSE +201 -0
- python_codex-0.0.1.dist-info/METADATA +0 -30
- python_codex-0.0.1.dist-info/RECORD +0 -4
- {python_codex-0.0.1.dist-info → python_codex-0.1.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,519 @@
|
|
|
1
|
+
"""Shared runtime for Codex `exec` / `wait` tools.
|
|
2
|
+
|
|
3
|
+
Original Codex mapping:
|
|
4
|
+
- Corresponds to the code-mode runtime behind Codex `exec` and `wait`.
|
|
5
|
+
|
|
6
|
+
Expected behavior:
|
|
7
|
+
- Run raw JavaScript source in a background cell.
|
|
8
|
+
- Let JavaScript call nested local tools through a `tools` object.
|
|
9
|
+
- Support yielding/running cells and later polling or terminating them via
|
|
10
|
+
`wait`.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import json
|
|
17
|
+
import math
|
|
18
|
+
import uuid
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
from loguru import logger
|
|
23
|
+
|
|
24
|
+
from ..protocol import JSONDict, JSONValue, ToolCall
|
|
25
|
+
from .base_tool import StructuredToolOutput, ToolContext, ToolRegistry
|
|
26
|
+
|
|
27
|
+
DEFAULT_WAIT_YIELD_TIME_MS = 10_000
|
|
28
|
+
DEFAULT_MAX_OUTPUT_TOKENS = 10_000
|
|
29
|
+
CHARS_PER_TOKEN = 4
|
|
30
|
+
EXEC_PRAGMA_PREFIX = "// @exec:"
|
|
31
|
+
WAIT_COMPLETION_GRACE_SECONDS = 0.02
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(slots=True)
|
|
35
|
+
class ExecCell:
|
|
36
|
+
cell_id: str
|
|
37
|
+
process: asyncio.subprocess.Process
|
|
38
|
+
started_at: float
|
|
39
|
+
output_items: list[JSONDict] = field(default_factory=list)
|
|
40
|
+
delivered_count: int = 0
|
|
41
|
+
reader_task: asyncio.Task | None = None
|
|
42
|
+
stderr_task: asyncio.Task | None = None
|
|
43
|
+
yield_event: asyncio.Event = field(default_factory=asyncio.Event)
|
|
44
|
+
output_event: asyncio.Event = field(default_factory=asyncio.Event)
|
|
45
|
+
done_event: asyncio.Event = field(default_factory=asyncio.Event)
|
|
46
|
+
completed: bool = False
|
|
47
|
+
terminated: bool = False
|
|
48
|
+
error_text: str | None = None
|
|
49
|
+
stderr_chunks: list[str] = field(default_factory=list)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass(frozen=True, slots=True)
|
|
53
|
+
class ParsedExecSource:
|
|
54
|
+
code: str
|
|
55
|
+
yield_time_ms: int | None
|
|
56
|
+
max_output_tokens: int | None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class CodeModeManager:
|
|
60
|
+
def __init__(self, registry: ToolRegistry, cwd: str | Path | None = None) -> None:
|
|
61
|
+
self._registry = registry
|
|
62
|
+
self._default_cwd = Path(cwd or Path.cwd()).resolve()
|
|
63
|
+
self._runtime_script = Path(__file__).with_name("exec_runtime.js")
|
|
64
|
+
self._stored_values: dict[str, JSONValue] = {}
|
|
65
|
+
self._cells: dict[str, ExecCell] = {}
|
|
66
|
+
self._lock = asyncio.Lock()
|
|
67
|
+
|
|
68
|
+
async def exec(self, source: str, context: ToolContext) -> StructuredToolOutput | str:
|
|
69
|
+
try:
|
|
70
|
+
parsed = self._parse_exec_source(source)
|
|
71
|
+
except ValueError as exc:
|
|
72
|
+
return f"Error: {exc}"
|
|
73
|
+
|
|
74
|
+
cell = await self._start_cell(parsed.code, context)
|
|
75
|
+
await self._wait_for_exec(cell, parsed.yield_time_ms)
|
|
76
|
+
return await self._snapshot_cell(cell, parsed.max_output_tokens)
|
|
77
|
+
|
|
78
|
+
async def wait(
|
|
79
|
+
self,
|
|
80
|
+
cell_id: str,
|
|
81
|
+
yield_time_ms: int,
|
|
82
|
+
max_tokens: int | None,
|
|
83
|
+
terminate: bool,
|
|
84
|
+
) -> StructuredToolOutput | str:
|
|
85
|
+
cell = self._cells.get(cell_id)
|
|
86
|
+
if cell is None:
|
|
87
|
+
return f"Error: unknown exec cell `{cell_id}`."
|
|
88
|
+
|
|
89
|
+
if terminate and cell.process.returncode is None:
|
|
90
|
+
cell.terminated = True
|
|
91
|
+
cell.process.terminate()
|
|
92
|
+
try:
|
|
93
|
+
await asyncio.wait_for(cell.process.wait(), timeout=1.0)
|
|
94
|
+
except asyncio.TimeoutError:
|
|
95
|
+
cell.process.kill()
|
|
96
|
+
await cell.process.wait()
|
|
97
|
+
|
|
98
|
+
await self._wait_for_wait(cell, yield_time_ms)
|
|
99
|
+
return await self._snapshot_cell(cell, max_tokens)
|
|
100
|
+
|
|
101
|
+
def enabled_tools(self) -> list[dict[str, str]]:
|
|
102
|
+
enabled: list[dict[str, str]] = []
|
|
103
|
+
for tool in self._registry.tools():
|
|
104
|
+
if tool.name in {"exec", "wait"}:
|
|
105
|
+
continue
|
|
106
|
+
if tool.tool_type not in {"function", "custom"}:
|
|
107
|
+
continue
|
|
108
|
+
enabled.append(
|
|
109
|
+
{
|
|
110
|
+
"tool_name": tool.name,
|
|
111
|
+
"js_name": self._normalize_identifier(tool.name),
|
|
112
|
+
"description": tool.description,
|
|
113
|
+
"tool_type": tool.tool_type,
|
|
114
|
+
}
|
|
115
|
+
)
|
|
116
|
+
enabled.sort(key=lambda item: item["tool_name"])
|
|
117
|
+
return enabled
|
|
118
|
+
|
|
119
|
+
async def _start_cell(self, code: str, context: ToolContext) -> ExecCell:
|
|
120
|
+
cell_id = uuid.uuid4().hex[:10]
|
|
121
|
+
process = await asyncio.create_subprocess_exec(
|
|
122
|
+
"node",
|
|
123
|
+
str(self._runtime_script),
|
|
124
|
+
cwd=str(self._default_cwd),
|
|
125
|
+
stdin=asyncio.subprocess.PIPE,
|
|
126
|
+
stdout=asyncio.subprocess.PIPE,
|
|
127
|
+
stderr=asyncio.subprocess.PIPE,
|
|
128
|
+
)
|
|
129
|
+
cell = ExecCell(
|
|
130
|
+
cell_id=cell_id,
|
|
131
|
+
process=process,
|
|
132
|
+
started_at=asyncio.get_running_loop().time(),
|
|
133
|
+
)
|
|
134
|
+
self._cells[cell_id] = cell
|
|
135
|
+
cell.reader_task = asyncio.create_task(self._read_stdout(cell, context))
|
|
136
|
+
cell.stderr_task = asyncio.create_task(self._read_stderr(cell))
|
|
137
|
+
await self._send_message(
|
|
138
|
+
cell,
|
|
139
|
+
{
|
|
140
|
+
"type": "init",
|
|
141
|
+
"cell_id": cell_id,
|
|
142
|
+
"source": code,
|
|
143
|
+
"stored_values": self._stored_values,
|
|
144
|
+
"tools": self.enabled_tools(),
|
|
145
|
+
},
|
|
146
|
+
)
|
|
147
|
+
logger.debug("exec start cell_id={} cwd={}", cell_id, self._default_cwd)
|
|
148
|
+
return cell
|
|
149
|
+
|
|
150
|
+
async def _read_stdout(self, cell: ExecCell, context: ToolContext) -> None:
|
|
151
|
+
stream = cell.process.stdout
|
|
152
|
+
if stream is None:
|
|
153
|
+
cell.error_text = "missing stdout pipe"
|
|
154
|
+
cell.done_event.set()
|
|
155
|
+
return
|
|
156
|
+
|
|
157
|
+
while True:
|
|
158
|
+
line = await stream.readline()
|
|
159
|
+
if not line:
|
|
160
|
+
break
|
|
161
|
+
try:
|
|
162
|
+
message = json.loads(line.decode("utf-8", errors="replace"))
|
|
163
|
+
except json.JSONDecodeError:
|
|
164
|
+
cell.output_items.append(
|
|
165
|
+
{
|
|
166
|
+
"type": "input_text",
|
|
167
|
+
"text": line.decode("utf-8", errors="replace").rstrip("\n"),
|
|
168
|
+
}
|
|
169
|
+
)
|
|
170
|
+
cell.output_event.set()
|
|
171
|
+
continue
|
|
172
|
+
|
|
173
|
+
msg_type = message.get("type")
|
|
174
|
+
if msg_type == "output_text":
|
|
175
|
+
cell.output_items.append(
|
|
176
|
+
{
|
|
177
|
+
"type": "input_text",
|
|
178
|
+
"text": str(message.get("text", "")),
|
|
179
|
+
}
|
|
180
|
+
)
|
|
181
|
+
cell.output_event.set()
|
|
182
|
+
continue
|
|
183
|
+
if msg_type == "output_image":
|
|
184
|
+
image_item: JSONDict = {
|
|
185
|
+
"type": "input_image",
|
|
186
|
+
"image_url": str(message.get("image_url", "")),
|
|
187
|
+
}
|
|
188
|
+
detail = message.get("detail")
|
|
189
|
+
if detail is not None:
|
|
190
|
+
image_item["detail"] = detail
|
|
191
|
+
cell.output_items.append(image_item)
|
|
192
|
+
cell.output_event.set()
|
|
193
|
+
continue
|
|
194
|
+
if msg_type == "yield":
|
|
195
|
+
cell.yield_event.set()
|
|
196
|
+
continue
|
|
197
|
+
if msg_type == "tool_call":
|
|
198
|
+
await self._handle_nested_tool_call(cell, context, message)
|
|
199
|
+
continue
|
|
200
|
+
if msg_type == "result":
|
|
201
|
+
cell.completed = True
|
|
202
|
+
cell.error_text = self._coerce_optional_text(message.get("error_text"))
|
|
203
|
+
stored_values = message.get("stored_values")
|
|
204
|
+
if isinstance(stored_values, dict):
|
|
205
|
+
async with self._lock:
|
|
206
|
+
self._stored_values = stored_values
|
|
207
|
+
cell.done_event.set()
|
|
208
|
+
cell.output_event.set()
|
|
209
|
+
continue
|
|
210
|
+
|
|
211
|
+
await cell.process.wait()
|
|
212
|
+
if cell.stderr_task is not None:
|
|
213
|
+
await cell.stderr_task
|
|
214
|
+
if not cell.done_event.is_set():
|
|
215
|
+
stderr_text = "".join(cell.stderr_chunks).strip()
|
|
216
|
+
if stderr_text:
|
|
217
|
+
cell.error_text = stderr_text
|
|
218
|
+
elif cell.process.returncode not in (0, None):
|
|
219
|
+
cell.error_text = f"process exited with code {cell.process.returncode}"
|
|
220
|
+
cell.done_event.set()
|
|
221
|
+
cell.output_event.set()
|
|
222
|
+
|
|
223
|
+
async def _read_stderr(self, cell: ExecCell) -> None:
|
|
224
|
+
stream = cell.process.stderr
|
|
225
|
+
if stream is None:
|
|
226
|
+
return
|
|
227
|
+
while True:
|
|
228
|
+
chunk = await stream.read(4096)
|
|
229
|
+
if not chunk:
|
|
230
|
+
break
|
|
231
|
+
cell.stderr_chunks.append(chunk.decode("utf-8", errors="replace"))
|
|
232
|
+
|
|
233
|
+
async def _handle_nested_tool_call(
|
|
234
|
+
self,
|
|
235
|
+
cell: ExecCell,
|
|
236
|
+
context: ToolContext,
|
|
237
|
+
message: JSONDict,
|
|
238
|
+
) -> None:
|
|
239
|
+
tool_name = str(message.get("tool_name", ""))
|
|
240
|
+
request_id = str(message.get("id", ""))
|
|
241
|
+
tool = self._registry.get_tool(tool_name)
|
|
242
|
+
if tool is None:
|
|
243
|
+
await self._send_message(
|
|
244
|
+
cell,
|
|
245
|
+
{
|
|
246
|
+
"type": "tool_result",
|
|
247
|
+
"id": request_id,
|
|
248
|
+
"ok": False,
|
|
249
|
+
"error": f"unknown tool: {tool_name}",
|
|
250
|
+
},
|
|
251
|
+
)
|
|
252
|
+
return
|
|
253
|
+
if tool.tool_type not in {"function", "custom"}:
|
|
254
|
+
await self._send_message(
|
|
255
|
+
cell,
|
|
256
|
+
{
|
|
257
|
+
"type": "tool_result",
|
|
258
|
+
"id": request_id,
|
|
259
|
+
"ok": False,
|
|
260
|
+
"error": f"tool `{tool_name}` is not available inside exec",
|
|
261
|
+
},
|
|
262
|
+
)
|
|
263
|
+
return
|
|
264
|
+
|
|
265
|
+
result = await self._registry.execute(
|
|
266
|
+
ToolCall(
|
|
267
|
+
call_id=f"{cell.cell_id}_{request_id}",
|
|
268
|
+
name=tool_name,
|
|
269
|
+
arguments=message.get("arguments"),
|
|
270
|
+
tool_type=tool.tool_type,
|
|
271
|
+
),
|
|
272
|
+
ToolContext(turn_id=context.turn_id, history=context.history),
|
|
273
|
+
)
|
|
274
|
+
if result.is_error:
|
|
275
|
+
payload = {
|
|
276
|
+
"type": "tool_result",
|
|
277
|
+
"id": request_id,
|
|
278
|
+
"ok": False,
|
|
279
|
+
"error": result.output_text(),
|
|
280
|
+
}
|
|
281
|
+
else:
|
|
282
|
+
payload = {
|
|
283
|
+
"type": "tool_result",
|
|
284
|
+
"id": request_id,
|
|
285
|
+
"ok": True,
|
|
286
|
+
"result": result.output,
|
|
287
|
+
}
|
|
288
|
+
await self._send_message(cell, payload)
|
|
289
|
+
|
|
290
|
+
async def _send_message(self, cell: ExecCell, payload: JSONDict) -> None:
|
|
291
|
+
stdin = cell.process.stdin
|
|
292
|
+
if stdin is None or stdin.is_closing():
|
|
293
|
+
return
|
|
294
|
+
stdin.write((json.dumps(payload, ensure_ascii=False) + "\n").encode("utf-8"))
|
|
295
|
+
await stdin.drain()
|
|
296
|
+
|
|
297
|
+
async def _wait_for_exec(self, cell: ExecCell, yield_time_ms: int | None) -> None:
|
|
298
|
+
done_task = asyncio.create_task(cell.done_event.wait())
|
|
299
|
+
yield_task = asyncio.create_task(cell.yield_event.wait())
|
|
300
|
+
tasks = {done_task, yield_task}
|
|
301
|
+
try:
|
|
302
|
+
if yield_time_ms is None:
|
|
303
|
+
await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
|
|
304
|
+
else:
|
|
305
|
+
await asyncio.wait_for(
|
|
306
|
+
asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED),
|
|
307
|
+
timeout=max(yield_time_ms, 1) / 1000.0,
|
|
308
|
+
)
|
|
309
|
+
except asyncio.TimeoutError:
|
|
310
|
+
return
|
|
311
|
+
finally:
|
|
312
|
+
for task in tasks:
|
|
313
|
+
if not task.done():
|
|
314
|
+
task.cancel()
|
|
315
|
+
cell.yield_event.clear()
|
|
316
|
+
|
|
317
|
+
async def _wait_for_wait(self, cell: ExecCell, yield_time_ms: int) -> None:
|
|
318
|
+
loop = asyncio.get_running_loop()
|
|
319
|
+
deadline = loop.time() + max(yield_time_ms, 1) / 1000.0
|
|
320
|
+
initial_count = cell.delivered_count
|
|
321
|
+
|
|
322
|
+
while True:
|
|
323
|
+
if cell.done_event.is_set():
|
|
324
|
+
break
|
|
325
|
+
if cell.yield_event.is_set():
|
|
326
|
+
break
|
|
327
|
+
if len(cell.output_items) > initial_count:
|
|
328
|
+
remaining = deadline - loop.time()
|
|
329
|
+
if remaining > 0:
|
|
330
|
+
await self._wait_for_completion_grace(
|
|
331
|
+
cell,
|
|
332
|
+
min(remaining, WAIT_COMPLETION_GRACE_SECONDS),
|
|
333
|
+
)
|
|
334
|
+
break
|
|
335
|
+
|
|
336
|
+
remaining = deadline - loop.time()
|
|
337
|
+
if remaining <= 0:
|
|
338
|
+
break
|
|
339
|
+
|
|
340
|
+
done_task = asyncio.create_task(cell.done_event.wait())
|
|
341
|
+
output_task = asyncio.create_task(cell.output_event.wait())
|
|
342
|
+
yield_task = asyncio.create_task(cell.yield_event.wait())
|
|
343
|
+
tasks = {done_task, output_task, yield_task}
|
|
344
|
+
try:
|
|
345
|
+
await asyncio.wait(
|
|
346
|
+
tasks,
|
|
347
|
+
timeout=min(remaining, 0.05),
|
|
348
|
+
return_when=asyncio.FIRST_COMPLETED,
|
|
349
|
+
)
|
|
350
|
+
finally:
|
|
351
|
+
for task in tasks:
|
|
352
|
+
if not task.done():
|
|
353
|
+
task.cancel()
|
|
354
|
+
|
|
355
|
+
cell.output_event.clear()
|
|
356
|
+
cell.yield_event.clear()
|
|
357
|
+
|
|
358
|
+
async def _wait_for_completion_grace(
|
|
359
|
+
self,
|
|
360
|
+
cell: ExecCell,
|
|
361
|
+
timeout_seconds: float,
|
|
362
|
+
) -> None:
|
|
363
|
+
if timeout_seconds <= 0:
|
|
364
|
+
return
|
|
365
|
+
done_task = asyncio.create_task(cell.done_event.wait())
|
|
366
|
+
yield_task = asyncio.create_task(cell.yield_event.wait())
|
|
367
|
+
tasks = {done_task, yield_task}
|
|
368
|
+
try:
|
|
369
|
+
await asyncio.wait(
|
|
370
|
+
tasks,
|
|
371
|
+
timeout=timeout_seconds,
|
|
372
|
+
return_when=asyncio.FIRST_COMPLETED,
|
|
373
|
+
)
|
|
374
|
+
finally:
|
|
375
|
+
for task in tasks:
|
|
376
|
+
if not task.done():
|
|
377
|
+
task.cancel()
|
|
378
|
+
|
|
379
|
+
async def _snapshot_cell(
|
|
380
|
+
self,
|
|
381
|
+
cell: ExecCell,
|
|
382
|
+
max_tokens: int | None,
|
|
383
|
+
) -> StructuredToolOutput:
|
|
384
|
+
if cell.process.returncode is not None and cell.reader_task is not None:
|
|
385
|
+
await cell.reader_task
|
|
386
|
+
|
|
387
|
+
wall_time = asyncio.get_running_loop().time() - cell.started_at
|
|
388
|
+
new_items = list(cell.output_items[cell.delivered_count :])
|
|
389
|
+
cell.delivered_count = len(cell.output_items)
|
|
390
|
+
|
|
391
|
+
if cell.error_text:
|
|
392
|
+
new_items.append(
|
|
393
|
+
{
|
|
394
|
+
"type": "input_text",
|
|
395
|
+
"text": f"Script error:\n{cell.error_text}",
|
|
396
|
+
}
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
header = {
|
|
400
|
+
"type": "input_text",
|
|
401
|
+
"text": (
|
|
402
|
+
f"{self._status_text(cell)}\n"
|
|
403
|
+
f"Wall time {wall_time:.1f} seconds\n"
|
|
404
|
+
"Output:\n"
|
|
405
|
+
),
|
|
406
|
+
}
|
|
407
|
+
content_items = [header] + new_items
|
|
408
|
+
content_items = self._truncate_content_items(content_items, max_tokens)
|
|
409
|
+
output_text = "\n".join(
|
|
410
|
+
item.get("text", "")
|
|
411
|
+
for item in content_items
|
|
412
|
+
if item.get("type") == "input_text"
|
|
413
|
+
)
|
|
414
|
+
cell.output_event.clear()
|
|
415
|
+
cell.yield_event.clear()
|
|
416
|
+
|
|
417
|
+
if cell.done_event.is_set():
|
|
418
|
+
self._cells.pop(cell.cell_id, None)
|
|
419
|
+
|
|
420
|
+
return StructuredToolOutput(output=output_text, content_items=tuple(content_items))
|
|
421
|
+
|
|
422
|
+
def _truncate_content_items(
|
|
423
|
+
self,
|
|
424
|
+
items: list[JSONDict],
|
|
425
|
+
max_tokens: int | None,
|
|
426
|
+
) -> list[JSONDict]:
|
|
427
|
+
token_budget = DEFAULT_MAX_OUTPUT_TOKENS if max_tokens is None else max_tokens
|
|
428
|
+
max_chars = max(1, token_budget) * CHARS_PER_TOKEN
|
|
429
|
+
total_chars = 0
|
|
430
|
+
truncated: list[JSONDict] = []
|
|
431
|
+
for item in items:
|
|
432
|
+
if item.get("type") != "input_text":
|
|
433
|
+
truncated.append(item)
|
|
434
|
+
continue
|
|
435
|
+
text = str(item.get("text", ""))
|
|
436
|
+
remaining = max_chars - total_chars
|
|
437
|
+
if remaining <= 0:
|
|
438
|
+
break
|
|
439
|
+
if len(text) > remaining:
|
|
440
|
+
truncated.append(
|
|
441
|
+
{
|
|
442
|
+
"type": "input_text",
|
|
443
|
+
"text": text[:remaining] + "\n...[truncated]...",
|
|
444
|
+
}
|
|
445
|
+
)
|
|
446
|
+
total_chars = max_chars
|
|
447
|
+
break
|
|
448
|
+
truncated.append(item)
|
|
449
|
+
total_chars += len(text)
|
|
450
|
+
return truncated
|
|
451
|
+
|
|
452
|
+
def _status_text(self, cell: ExecCell) -> str:
|
|
453
|
+
if cell.terminated:
|
|
454
|
+
return "Script terminated"
|
|
455
|
+
if not cell.done_event.is_set():
|
|
456
|
+
return f"Script running with cell ID {cell.cell_id}"
|
|
457
|
+
if cell.error_text:
|
|
458
|
+
return "Script failed"
|
|
459
|
+
return "Script completed"
|
|
460
|
+
|
|
461
|
+
def _parse_exec_source(self, input_text: str) -> ParsedExecSource:
|
|
462
|
+
if not input_text.strip():
|
|
463
|
+
raise ValueError(
|
|
464
|
+
"exec expects raw JavaScript source text (non-empty)."
|
|
465
|
+
)
|
|
466
|
+
code = input_text
|
|
467
|
+
yield_time_ms = None
|
|
468
|
+
max_output_tokens = None
|
|
469
|
+
lines = input_text.split("\n", 1)
|
|
470
|
+
first_line = lines[0].lstrip()
|
|
471
|
+
if first_line.startswith(EXEC_PRAGMA_PREFIX):
|
|
472
|
+
if len(lines) == 1 or not lines[1].strip():
|
|
473
|
+
raise ValueError(
|
|
474
|
+
"exec pragma must be followed by JavaScript source on subsequent lines"
|
|
475
|
+
)
|
|
476
|
+
pragma_text = first_line[len(EXEC_PRAGMA_PREFIX) :].strip()
|
|
477
|
+
try:
|
|
478
|
+
value = json.loads(pragma_text)
|
|
479
|
+
except json.JSONDecodeError as exc:
|
|
480
|
+
raise ValueError(
|
|
481
|
+
"exec pragma must be valid JSON with supported fields `yield_time_ms` and `max_output_tokens`: "
|
|
482
|
+
f"{exc}"
|
|
483
|
+
) from exc
|
|
484
|
+
if not isinstance(value, dict):
|
|
485
|
+
raise ValueError(
|
|
486
|
+
"exec pragma must be a JSON object with supported fields `yield_time_ms` and `max_output_tokens`"
|
|
487
|
+
)
|
|
488
|
+
for key in value:
|
|
489
|
+
if key not in {"yield_time_ms", "max_output_tokens"}:
|
|
490
|
+
raise ValueError(f"unsupported exec pragma field `{key}`")
|
|
491
|
+
if value.get("yield_time_ms") is not None:
|
|
492
|
+
yield_time_ms = int(value["yield_time_ms"])
|
|
493
|
+
if value.get("max_output_tokens") is not None:
|
|
494
|
+
max_output_tokens = int(value["max_output_tokens"])
|
|
495
|
+
code = lines[1]
|
|
496
|
+
return ParsedExecSource(
|
|
497
|
+
code=code,
|
|
498
|
+
yield_time_ms=yield_time_ms,
|
|
499
|
+
max_output_tokens=max_output_tokens,
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
def _normalize_identifier(self, tool_name: str) -> str:
|
|
503
|
+
identifier = []
|
|
504
|
+
for index, char in enumerate(tool_name):
|
|
505
|
+
is_valid = (
|
|
506
|
+
char == "_"
|
|
507
|
+
or char == "$"
|
|
508
|
+
or (char.isascii() and char.isalnum() and (index != 0 or char.isalpha()))
|
|
509
|
+
)
|
|
510
|
+
if is_valid:
|
|
511
|
+
identifier.append(char)
|
|
512
|
+
else:
|
|
513
|
+
identifier.append("_")
|
|
514
|
+
return "".join(identifier) or "_"
|
|
515
|
+
|
|
516
|
+
def _coerce_optional_text(self, value: JSONValue) -> str | None:
|
|
517
|
+
if value in (None, ""):
|
|
518
|
+
return None
|
|
519
|
+
return str(value)
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""`exec_command` tool for the Python Codex prototype.
|
|
2
|
+
|
|
3
|
+
Original Codex mapping:
|
|
4
|
+
- Corresponds to the original Codex `exec_command` tool.
|
|
5
|
+
|
|
6
|
+
Expected behavior:
|
|
7
|
+
- Start a command in a session-backed execution runtime.
|
|
8
|
+
- Return output immediately when the process finishes during the current call.
|
|
9
|
+
- Otherwise return a running `session_id` that can be continued via
|
|
10
|
+
`write_stdin`.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from ..protocol import JSONDict, JSONValue
|
|
16
|
+
from .base_tool import BaseTool, ToolContext
|
|
17
|
+
from .unified_exec_manager import (
|
|
18
|
+
DEFAULT_EXEC_YIELD_TIME_MS,
|
|
19
|
+
DEFAULT_LOGIN,
|
|
20
|
+
DEFAULT_TTY,
|
|
21
|
+
UNIFIED_EXEC_OUTPUT_SCHEMA,
|
|
22
|
+
UnifiedExecManager,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ExecCommandTool(BaseTool):
|
|
27
|
+
name = "exec_command"
|
|
28
|
+
description = "Runs a command in a PTY, returning output or a session ID for ongoing interaction."
|
|
29
|
+
input_schema = {
|
|
30
|
+
"type": "object",
|
|
31
|
+
"properties": {
|
|
32
|
+
"cmd": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"description": "Shell command to execute.",
|
|
35
|
+
},
|
|
36
|
+
"workdir": {
|
|
37
|
+
"type": "string",
|
|
38
|
+
"description": "Optional working directory to run the command in; defaults to the turn cwd.",
|
|
39
|
+
},
|
|
40
|
+
"shell": {
|
|
41
|
+
"type": "string",
|
|
42
|
+
"description": "Shell binary to launch. Defaults to the user's default shell.",
|
|
43
|
+
},
|
|
44
|
+
"login": {
|
|
45
|
+
"type": "boolean",
|
|
46
|
+
"description": "Whether to run the shell with -l/-i semantics. Defaults to true.",
|
|
47
|
+
},
|
|
48
|
+
"tty": {
|
|
49
|
+
"type": "boolean",
|
|
50
|
+
"description": "Whether to allocate a TTY for the command. Defaults to false (plain pipes); set to true to open a PTY and access TTY process.",
|
|
51
|
+
},
|
|
52
|
+
"yield_time_ms": {
|
|
53
|
+
"type": "integer",
|
|
54
|
+
"description": "How long to wait (in milliseconds) for output before yielding.",
|
|
55
|
+
},
|
|
56
|
+
"max_output_tokens": {
|
|
57
|
+
"type": "integer",
|
|
58
|
+
"description": "Maximum number of tokens to return. Excess output will be truncated.",
|
|
59
|
+
},
|
|
60
|
+
},
|
|
61
|
+
"required": ["cmd"],
|
|
62
|
+
"additionalProperties": False,
|
|
63
|
+
}
|
|
64
|
+
output_schema = UNIFIED_EXEC_OUTPUT_SCHEMA
|
|
65
|
+
supports_parallel = False
|
|
66
|
+
|
|
67
|
+
def __init__(self, manager: UnifiedExecManager) -> None:
|
|
68
|
+
self._manager = manager
|
|
69
|
+
|
|
70
|
+
async def run(self, context: ToolContext, args: JSONDict) -> JSONValue:
|
|
71
|
+
del context
|
|
72
|
+
cmd = str(args.get("cmd", "")).strip()
|
|
73
|
+
if not cmd:
|
|
74
|
+
return "Error: `cmd` is required."
|
|
75
|
+
|
|
76
|
+
return await self._manager.exec_command(
|
|
77
|
+
cmd=cmd,
|
|
78
|
+
workdir=self._optional_string(args, "workdir"),
|
|
79
|
+
shell=self._optional_string(args, "shell"),
|
|
80
|
+
login=bool(args.get("login", DEFAULT_LOGIN)),
|
|
81
|
+
tty=bool(args.get("tty", DEFAULT_TTY)),
|
|
82
|
+
yield_time_ms=int(args.get("yield_time_ms", DEFAULT_EXEC_YIELD_TIME_MS)),
|
|
83
|
+
max_output_tokens=self._optional_int(args, "max_output_tokens"),
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def _optional_string(self, args: JSONDict, key: str) -> str | None:
|
|
87
|
+
value = args.get(key)
|
|
88
|
+
if value in (None, ""):
|
|
89
|
+
return None
|
|
90
|
+
return str(value)
|
|
91
|
+
|
|
92
|
+
def _optional_int(self, args: JSONDict, key: str) -> int | None:
|
|
93
|
+
value = args.get(key)
|
|
94
|
+
if value in (None, ""):
|
|
95
|
+
return None
|
|
96
|
+
return int(value)
|