python-codex 0.0.1__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycodex/__init__.py +141 -2
- pycodex/agent.py +290 -0
- pycodex/cli.py +705 -0
- pycodex/collaboration.py +21 -0
- pycodex/context.py +580 -0
- pycodex/doctor.py +360 -0
- pycodex/model.py +533 -0
- pycodex/portable.py +390 -0
- pycodex/portable_server.py +205 -0
- pycodex/prompts/collaboration_default.md +11 -0
- pycodex/prompts/collaboration_plan.md +128 -0
- pycodex/prompts/default_base_instructions.md +275 -0
- pycodex/prompts/exec_tools.json +411 -0
- pycodex/prompts/models.json +847 -0
- pycodex/prompts/permissions/approval_policy/never.md +1 -0
- pycodex/prompts/permissions/approval_policy/on_failure.md +1 -0
- pycodex/prompts/permissions/approval_policy/on_request.md +57 -0
- pycodex/prompts/permissions/approval_policy/on_request_rule_request_permission.md +33 -0
- pycodex/prompts/permissions/approval_policy/unless_trusted.md +1 -0
- pycodex/prompts/permissions/sandbox_mode/danger_full_access.md +1 -0
- pycodex/prompts/permissions/sandbox_mode/read_only.md +1 -0
- pycodex/prompts/permissions/sandbox_mode/workspace_write.md +1 -0
- pycodex/prompts/subagent_tools.json +163 -0
- pycodex/protocol.py +347 -0
- pycodex/runtime.py +204 -0
- pycodex/runtime_services.py +409 -0
- pycodex/tools/__init__.py +58 -0
- pycodex/tools/agent_tool_schemas.py +70 -0
- pycodex/tools/apply_patch_tool.py +363 -0
- pycodex/tools/base_tool.py +168 -0
- pycodex/tools/close_agent_tool.py +55 -0
- pycodex/tools/code_mode_manager.py +519 -0
- pycodex/tools/exec_command_tool.py +96 -0
- pycodex/tools/exec_runtime.js +161 -0
- pycodex/tools/exec_tool.py +48 -0
- pycodex/tools/grep_files_tool.py +150 -0
- pycodex/tools/list_dir_tool.py +135 -0
- pycodex/tools/read_file_tool.py +217 -0
- pycodex/tools/request_permissions_tool.py +95 -0
- pycodex/tools/request_user_input_tool.py +167 -0
- pycodex/tools/resume_agent_tool.py +56 -0
- pycodex/tools/send_input_tool.py +106 -0
- pycodex/tools/shell_command_tool.py +107 -0
- pycodex/tools/shell_tool.py +112 -0
- pycodex/tools/spawn_agent_tool.py +97 -0
- pycodex/tools/unified_exec_manager.py +380 -0
- pycodex/tools/update_plan_tool.py +79 -0
- pycodex/tools/view_image_tool.py +111 -0
- pycodex/tools/wait_agent_tool.py +75 -0
- pycodex/tools/wait_tool.py +68 -0
- pycodex/tools/web_search_tool.py +30 -0
- pycodex/tools/write_stdin_tool.py +75 -0
- pycodex/utils/__init__.py +40 -0
- pycodex/utils/dotenv.py +64 -0
- pycodex/utils/get_env.py +218 -0
- pycodex/utils/random_ids.py +19 -0
- pycodex/utils/visualize.py +978 -0
- python_codex-0.1.1.dist-info/METADATA +355 -0
- python_codex-0.1.1.dist-info/RECORD +62 -0
- python_codex-0.1.1.dist-info/entry_points.txt +2 -0
- python_codex-0.1.1.dist-info/licenses/LICENSE +201 -0
- python_codex-0.0.1.dist-info/METADATA +0 -30
- python_codex-0.0.1.dist-info/RECORD +0 -4
- {python_codex-0.0.1.dist-info → python_codex-0.1.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""`shell_command` tool for the Python Codex prototype.
|
|
2
|
+
|
|
3
|
+
Original Codex mapping:
|
|
4
|
+
- Corresponds to the original Codex `shell_command` tool.
|
|
5
|
+
|
|
6
|
+
Expected behavior:
|
|
7
|
+
- Execute a shell-script string in the user's shell context.
|
|
8
|
+
- Accept the same core input shape as Codex: `command: string`, plus `workdir`,
|
|
9
|
+
`timeout_ms`, and `login`.
|
|
10
|
+
- Return a concise text summary including working directory, exit status,
|
|
11
|
+
stdout, and stderr.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
from ..protocol import JSONDict, JSONValue
|
|
20
|
+
from .base_tool import BaseTool, ToolContext
|
|
21
|
+
|
|
22
|
+
DEFAULT_SHELL_TIMEOUT_MS = 30_000
|
|
23
|
+
MAX_OUTPUT_CHARS = 12_000
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ShellCommandTool(BaseTool):
|
|
27
|
+
name = "shell_command"
|
|
28
|
+
description = "Runs a shell command string and returns its output."
|
|
29
|
+
input_schema = {
|
|
30
|
+
"type": "object",
|
|
31
|
+
"properties": {
|
|
32
|
+
"command": {"type": "string"},
|
|
33
|
+
"workdir": {"type": "string"},
|
|
34
|
+
"timeout_ms": {"type": "integer"},
|
|
35
|
+
"login": {"type": "boolean"},
|
|
36
|
+
},
|
|
37
|
+
"required": ["command"],
|
|
38
|
+
}
|
|
39
|
+
supports_parallel = False
|
|
40
|
+
|
|
41
|
+
def __init__(self, cwd: str | Path | None = None) -> None:
|
|
42
|
+
self._working_directory = Path(cwd or Path.cwd()).resolve()
|
|
43
|
+
|
|
44
|
+
async def run(self, context: ToolContext, args: JSONDict) -> JSONValue:
|
|
45
|
+
del context
|
|
46
|
+
command = str(args.get("command", "")).strip()
|
|
47
|
+
timeout_ms = int(args.get("timeout_ms", DEFAULT_SHELL_TIMEOUT_MS))
|
|
48
|
+
if not command:
|
|
49
|
+
return "Error: `command` is required."
|
|
50
|
+
|
|
51
|
+
login = bool(args.get("login", True))
|
|
52
|
+
workdir_arg = args.get("workdir")
|
|
53
|
+
working_directory = self._resolve_workdir(workdir_arg)
|
|
54
|
+
shell_args = ["bash", "-lc" if login else "-c", command]
|
|
55
|
+
|
|
56
|
+
process = await asyncio.create_subprocess_exec(
|
|
57
|
+
*shell_args,
|
|
58
|
+
cwd=str(working_directory),
|
|
59
|
+
stdout=asyncio.subprocess.PIPE,
|
|
60
|
+
stderr=asyncio.subprocess.PIPE,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
|
65
|
+
process.communicate(),
|
|
66
|
+
timeout=max(timeout_ms, 1) / 1000.0,
|
|
67
|
+
)
|
|
68
|
+
timed_out = False
|
|
69
|
+
except asyncio.TimeoutError:
|
|
70
|
+
process.kill()
|
|
71
|
+
stdout_bytes, stderr_bytes = await process.communicate()
|
|
72
|
+
timed_out = True
|
|
73
|
+
|
|
74
|
+
stdout = stdout_bytes.decode("utf-8", errors="replace")
|
|
75
|
+
stderr = stderr_bytes.decode("utf-8", errors="replace")
|
|
76
|
+
pieces = [f"Working directory: {working_directory}"]
|
|
77
|
+
|
|
78
|
+
if timed_out:
|
|
79
|
+
pieces.append(f"Timeout: exceeded {timeout_ms} ms")
|
|
80
|
+
else:
|
|
81
|
+
pieces.append(f"Exit code: {process.returncode}")
|
|
82
|
+
|
|
83
|
+
stdout = self._clip_output(stdout)
|
|
84
|
+
stderr = self._clip_output(stderr)
|
|
85
|
+
|
|
86
|
+
if stdout:
|
|
87
|
+
pieces.append("Stdout:")
|
|
88
|
+
pieces.append(stdout)
|
|
89
|
+
|
|
90
|
+
if stderr:
|
|
91
|
+
pieces.append("Stderr:")
|
|
92
|
+
pieces.append(stderr)
|
|
93
|
+
|
|
94
|
+
return "\n".join(pieces)
|
|
95
|
+
|
|
96
|
+
def _resolve_workdir(self, workdir_arg) -> Path:
|
|
97
|
+
if workdir_arg in (None, ""):
|
|
98
|
+
return self._working_directory
|
|
99
|
+
workdir = Path(str(workdir_arg))
|
|
100
|
+
if not workdir.is_absolute():
|
|
101
|
+
workdir = self._working_directory / workdir
|
|
102
|
+
return workdir.resolve()
|
|
103
|
+
|
|
104
|
+
def _clip_output(self, text: str) -> str:
|
|
105
|
+
if len(text) <= MAX_OUTPUT_CHARS:
|
|
106
|
+
return text
|
|
107
|
+
return text[:MAX_OUTPUT_CHARS] + "\n...[truncated]..."
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""`shell` tool for the Python Codex prototype.
|
|
2
|
+
|
|
3
|
+
Original Codex mapping:
|
|
4
|
+
- Corresponds to the original Codex `shell` tool.
|
|
5
|
+
|
|
6
|
+
Expected behavior:
|
|
7
|
+
- Execute a command as argv, not as a shell-script string.
|
|
8
|
+
- Accept the same core input shape as Codex: `command: string[]`, plus
|
|
9
|
+
`workdir` and `timeout_ms`.
|
|
10
|
+
- Return a concise text summary including working directory, exit status,
|
|
11
|
+
stdout, and stderr.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
from ..protocol import JSONDict, JSONValue
|
|
20
|
+
from .base_tool import BaseTool, ToolContext
|
|
21
|
+
|
|
22
|
+
DEFAULT_SHELL_TIMEOUT_MS = 30_000
|
|
23
|
+
MAX_OUTPUT_CHARS = 12_000
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ShellTool(BaseTool):
|
|
27
|
+
name = "shell"
|
|
28
|
+
description = (
|
|
29
|
+
"Runs a shell command and returns its output. The command must be passed "
|
|
30
|
+
"as argv, typically prefixed with ['bash', '-lc'] for shell syntax."
|
|
31
|
+
)
|
|
32
|
+
input_schema = {
|
|
33
|
+
"type": "object",
|
|
34
|
+
"properties": {
|
|
35
|
+
"command": {
|
|
36
|
+
"type": "array",
|
|
37
|
+
"items": {"type": "string"},
|
|
38
|
+
},
|
|
39
|
+
"workdir": {"type": "string"},
|
|
40
|
+
"timeout_ms": {"type": "integer"},
|
|
41
|
+
},
|
|
42
|
+
"required": ["command"],
|
|
43
|
+
}
|
|
44
|
+
supports_parallel = False
|
|
45
|
+
|
|
46
|
+
def __init__(self, cwd: str | Path | None = None) -> None:
|
|
47
|
+
self._working_directory = Path(cwd or Path.cwd()).resolve()
|
|
48
|
+
|
|
49
|
+
async def run(self, context: ToolContext, args: JSONDict) -> JSONValue:
|
|
50
|
+
del context
|
|
51
|
+
command = args.get("command")
|
|
52
|
+
timeout_ms = int(args.get("timeout_ms", DEFAULT_SHELL_TIMEOUT_MS))
|
|
53
|
+
if not isinstance(command, list) or not command:
|
|
54
|
+
return "Error: `command` must be a non-empty string array."
|
|
55
|
+
if not all(isinstance(part, str) and part for part in command):
|
|
56
|
+
return "Error: each `command` entry must be a non-empty string."
|
|
57
|
+
|
|
58
|
+
workdir_arg = args.get("workdir")
|
|
59
|
+
working_directory = self._resolve_workdir(workdir_arg)
|
|
60
|
+
|
|
61
|
+
process = await asyncio.create_subprocess_exec(
|
|
62
|
+
*command,
|
|
63
|
+
cwd=str(working_directory),
|
|
64
|
+
stdout=asyncio.subprocess.PIPE,
|
|
65
|
+
stderr=asyncio.subprocess.PIPE,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
stdout_bytes, stderr_bytes = await asyncio.wait_for(
|
|
70
|
+
process.communicate(),
|
|
71
|
+
timeout=max(timeout_ms, 1) / 1000.0,
|
|
72
|
+
)
|
|
73
|
+
timed_out = False
|
|
74
|
+
except asyncio.TimeoutError:
|
|
75
|
+
process.kill()
|
|
76
|
+
stdout_bytes, stderr_bytes = await process.communicate()
|
|
77
|
+
timed_out = True
|
|
78
|
+
|
|
79
|
+
stdout = stdout_bytes.decode("utf-8", errors="replace")
|
|
80
|
+
stderr = stderr_bytes.decode("utf-8", errors="replace")
|
|
81
|
+
pieces = [f"Working directory: {working_directory}"]
|
|
82
|
+
|
|
83
|
+
if timed_out:
|
|
84
|
+
pieces.append(f"Timeout: exceeded {timeout_ms} ms")
|
|
85
|
+
else:
|
|
86
|
+
pieces.append(f"Exit code: {process.returncode}")
|
|
87
|
+
|
|
88
|
+
stdout = self._clip_output(stdout)
|
|
89
|
+
stderr = self._clip_output(stderr)
|
|
90
|
+
|
|
91
|
+
if stdout:
|
|
92
|
+
pieces.append("Stdout:")
|
|
93
|
+
pieces.append(stdout)
|
|
94
|
+
|
|
95
|
+
if stderr:
|
|
96
|
+
pieces.append("Stderr:")
|
|
97
|
+
pieces.append(stderr)
|
|
98
|
+
|
|
99
|
+
return "\n".join(pieces)
|
|
100
|
+
|
|
101
|
+
def _resolve_workdir(self, workdir_arg) -> Path:
|
|
102
|
+
if workdir_arg in (None, ""):
|
|
103
|
+
return self._working_directory
|
|
104
|
+
workdir = Path(str(workdir_arg))
|
|
105
|
+
if not workdir.is_absolute():
|
|
106
|
+
workdir = self._working_directory / workdir
|
|
107
|
+
return workdir.resolve()
|
|
108
|
+
|
|
109
|
+
def _clip_output(self, text: str) -> str:
|
|
110
|
+
if len(text) <= MAX_OUTPUT_CHARS:
|
|
111
|
+
return text
|
|
112
|
+
return text[:MAX_OUTPUT_CHARS] + "\n...[truncated]..."
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""`spawn_agent` tool for the Python Codex prototype.
|
|
2
|
+
|
|
3
|
+
Original Codex mapping:
|
|
4
|
+
- Corresponds to the original Codex `spawn_agent` collaboration tool.
|
|
5
|
+
|
|
6
|
+
Expected behavior:
|
|
7
|
+
- Spawn a sibling agent runtime that can work in parallel with the caller.
|
|
8
|
+
- Optionally seed the spawned agent with the current thread history.
|
|
9
|
+
- Return the new agent identifier plus any user-facing nickname.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from ..protocol import JSONDict, JSONValue
|
|
15
|
+
from ..runtime_services import SubAgentManager
|
|
16
|
+
from .agent_tool_schemas import COLLAB_INPUT_ITEMS_SCHEMA
|
|
17
|
+
from .base_tool import BaseTool, ToolContext
|
|
18
|
+
|
|
19
|
+
SPAWN_AGENT_OUTPUT_SCHEMA = {
|
|
20
|
+
"type": "object",
|
|
21
|
+
"properties": {
|
|
22
|
+
"agent_id": {
|
|
23
|
+
"type": "string",
|
|
24
|
+
"description": "Thread identifier for the spawned agent.",
|
|
25
|
+
},
|
|
26
|
+
"nickname": {
|
|
27
|
+
"type": ["string", "null"],
|
|
28
|
+
"description": "User-facing nickname for the spawned agent when available.",
|
|
29
|
+
},
|
|
30
|
+
},
|
|
31
|
+
"required": ["agent_id", "nickname"],
|
|
32
|
+
"additionalProperties": False,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class SpawnAgentTool(BaseTool):
|
|
37
|
+
name = "spawn_agent"
|
|
38
|
+
description = (
|
|
39
|
+
"Spawn a sub-agent for a well-scoped task. Returns the agent id (and "
|
|
40
|
+
"user-facing nickname when available) to use to communicate with this "
|
|
41
|
+
"agent."
|
|
42
|
+
)
|
|
43
|
+
input_schema = {
|
|
44
|
+
"type": "object",
|
|
45
|
+
"properties": {
|
|
46
|
+
"message": {
|
|
47
|
+
"type": "string",
|
|
48
|
+
"description": "Initial plain-text task for the new agent. Use either message or items.",
|
|
49
|
+
},
|
|
50
|
+
"items": COLLAB_INPUT_ITEMS_SCHEMA,
|
|
51
|
+
"agent_type": {
|
|
52
|
+
"type": "string",
|
|
53
|
+
"description": "Optional type name for the new agent.",
|
|
54
|
+
},
|
|
55
|
+
"fork_context": {
|
|
56
|
+
"type": "boolean",
|
|
57
|
+
"description": "When true, fork the current thread history into the new agent before sending the initial prompt.",
|
|
58
|
+
},
|
|
59
|
+
"model": {
|
|
60
|
+
"type": "string",
|
|
61
|
+
"description": "Optional model override for the new agent.",
|
|
62
|
+
},
|
|
63
|
+
"reasoning_effort": {
|
|
64
|
+
"type": "string",
|
|
65
|
+
"description": "Optional reasoning effort override for the new agent.",
|
|
66
|
+
},
|
|
67
|
+
},
|
|
68
|
+
"additionalProperties": False,
|
|
69
|
+
}
|
|
70
|
+
output_schema = SPAWN_AGENT_OUTPUT_SCHEMA
|
|
71
|
+
supports_parallel = False
|
|
72
|
+
|
|
73
|
+
def __init__(self, subagent_manager: SubAgentManager) -> None:
|
|
74
|
+
self._subagent_manager = subagent_manager
|
|
75
|
+
|
|
76
|
+
async def run(self, context: ToolContext, args: JSONDict) -> JSONValue:
|
|
77
|
+
message = self._optional_string(args, "message")
|
|
78
|
+
items = args.get("items")
|
|
79
|
+
if items is not None and not isinstance(items, list):
|
|
80
|
+
return "Error: `items` must be a list when provided."
|
|
81
|
+
if message is None and not items:
|
|
82
|
+
return "Provide one of: message or items"
|
|
83
|
+
return await self._subagent_manager.spawn_agent(
|
|
84
|
+
message=message,
|
|
85
|
+
items=items,
|
|
86
|
+
agent_type=self._optional_string(args, "agent_type"),
|
|
87
|
+
fork_context=bool(args.get("fork_context", False)),
|
|
88
|
+
model=self._optional_string(args, "model"),
|
|
89
|
+
reasoning_effort=self._optional_string(args, "reasoning_effort"),
|
|
90
|
+
history=context.history,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
def _optional_string(self, args: JSONDict, key: str) -> str | None:
|
|
94
|
+
value = args.get(key)
|
|
95
|
+
if value in (None, ""):
|
|
96
|
+
return None
|
|
97
|
+
return str(value)
|
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
"""Shared runtime for `exec_command` / `write_stdin`.
|
|
2
|
+
|
|
3
|
+
Original Codex mapping:
|
|
4
|
+
- Corresponds to the shared unified-exec session manager behind the original
|
|
5
|
+
Codex `exec_command` and `write_stdin` tools.
|
|
6
|
+
|
|
7
|
+
Expected behavior:
|
|
8
|
+
- Start a long-lived command session for `exec_command`.
|
|
9
|
+
- Preserve unread output between calls.
|
|
10
|
+
- Accept follow-up stdin writes and polling through `write_stdin`.
|
|
11
|
+
- Return summaries in the same textual shape Codex tools expect.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import os
|
|
18
|
+
import shlex
|
|
19
|
+
import uuid
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
from loguru import logger
|
|
24
|
+
|
|
25
|
+
DEFAULT_EXEC_YIELD_TIME_MS = 10_000
|
|
26
|
+
DEFAULT_WRITE_STDIN_YIELD_TIME_MS = 250
|
|
27
|
+
DEFAULT_MAX_OUTPUT_TOKENS = 10_000
|
|
28
|
+
DEFAULT_LOGIN = True
|
|
29
|
+
DEFAULT_TTY = False
|
|
30
|
+
DEFAULT_SESSION_ID_START = 1000
|
|
31
|
+
APPROX_BYTES_PER_TOKEN = 4
|
|
32
|
+
UNIFIED_EXEC_OUTPUT_MAX_BYTES = 1024 * 1024
|
|
33
|
+
UNIFIED_EXEC_OUTPUT_SCHEMA = {
|
|
34
|
+
"type": "object",
|
|
35
|
+
"properties": {
|
|
36
|
+
"chunk_id": {
|
|
37
|
+
"type": "string",
|
|
38
|
+
"description": "Chunk identifier included when the response reports one.",
|
|
39
|
+
},
|
|
40
|
+
"wall_time_seconds": {
|
|
41
|
+
"type": "number",
|
|
42
|
+
"description": "Elapsed wall time spent waiting for output in seconds.",
|
|
43
|
+
},
|
|
44
|
+
"exit_code": {
|
|
45
|
+
"type": "number",
|
|
46
|
+
"description": "Process exit code when the command finished during this call.",
|
|
47
|
+
},
|
|
48
|
+
"session_id": {
|
|
49
|
+
"type": "number",
|
|
50
|
+
"description": "Session identifier to pass to write_stdin when the process is still running.",
|
|
51
|
+
},
|
|
52
|
+
"original_token_count": {
|
|
53
|
+
"type": "number",
|
|
54
|
+
"description": "Approximate token count before output truncation.",
|
|
55
|
+
},
|
|
56
|
+
"output": {
|
|
57
|
+
"type": "string",
|
|
58
|
+
"description": "Command output text, possibly truncated.",
|
|
59
|
+
},
|
|
60
|
+
},
|
|
61
|
+
"required": ["wall_time_seconds", "output"],
|
|
62
|
+
"additionalProperties": False,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _approx_token_count(text: str) -> int:
|
|
67
|
+
if not text:
|
|
68
|
+
return 0
|
|
69
|
+
byte_length = len(text.encode("utf-8"))
|
|
70
|
+
return max(1, (byte_length + APPROX_BYTES_PER_TOKEN - 1) // APPROX_BYTES_PER_TOKEN)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _approx_bytes_for_tokens(token_count: int) -> int:
|
|
74
|
+
return max(token_count, 0) * APPROX_BYTES_PER_TOKEN
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _approx_tokens_from_byte_count(byte_count: int) -> int:
|
|
78
|
+
if byte_count <= 0:
|
|
79
|
+
return 0
|
|
80
|
+
return (byte_count + APPROX_BYTES_PER_TOKEN - 1) // APPROX_BYTES_PER_TOKEN
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _split_budget(byte_budget: int) -> tuple[int, int]:
|
|
84
|
+
left_budget = byte_budget // 2
|
|
85
|
+
return left_budget, byte_budget - left_budget
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _split_string(
|
|
89
|
+
text: str,
|
|
90
|
+
beginning_bytes: int,
|
|
91
|
+
end_bytes: int,
|
|
92
|
+
) -> tuple[str, str]:
|
|
93
|
+
if not text:
|
|
94
|
+
return "", ""
|
|
95
|
+
|
|
96
|
+
total_bytes = len(text.encode("utf-8"))
|
|
97
|
+
tail_start_target = max(total_bytes - end_bytes, 0)
|
|
98
|
+
prefix_end = 0
|
|
99
|
+
suffix_start = len(text)
|
|
100
|
+
suffix_started = False
|
|
101
|
+
current_byte = 0
|
|
102
|
+
|
|
103
|
+
for index, char in enumerate(text):
|
|
104
|
+
char_bytes = len(char.encode("utf-8"))
|
|
105
|
+
char_start = current_byte
|
|
106
|
+
char_end = current_byte + char_bytes
|
|
107
|
+
if char_end <= beginning_bytes:
|
|
108
|
+
prefix_end = index + 1
|
|
109
|
+
current_byte = char_end
|
|
110
|
+
continue
|
|
111
|
+
if char_start >= tail_start_target:
|
|
112
|
+
if not suffix_started:
|
|
113
|
+
suffix_start = index
|
|
114
|
+
suffix_started = True
|
|
115
|
+
current_byte = char_end
|
|
116
|
+
continue
|
|
117
|
+
current_byte = char_end
|
|
118
|
+
|
|
119
|
+
if suffix_start < prefix_end:
|
|
120
|
+
suffix_start = prefix_end
|
|
121
|
+
|
|
122
|
+
return text[:prefix_end], text[suffix_start:]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _truncate_text(text: str, max_tokens: int) -> str:
|
|
126
|
+
if not text:
|
|
127
|
+
return ""
|
|
128
|
+
|
|
129
|
+
max_bytes = _approx_bytes_for_tokens(max_tokens)
|
|
130
|
+
total_bytes = len(text.encode("utf-8"))
|
|
131
|
+
if total_bytes <= max_bytes:
|
|
132
|
+
return text
|
|
133
|
+
|
|
134
|
+
removed_tokens = _approx_tokens_from_byte_count(total_bytes - max_bytes)
|
|
135
|
+
marker = f"\u2026{removed_tokens} tokens truncated\u2026"
|
|
136
|
+
if max_bytes == 0:
|
|
137
|
+
return marker
|
|
138
|
+
|
|
139
|
+
left_budget, right_budget = _split_budget(max_bytes)
|
|
140
|
+
prefix, suffix = _split_string(text, left_budget, right_budget)
|
|
141
|
+
return f"{prefix}{marker}{suffix}"
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _formatted_truncate_text(text: str, max_tokens: int) -> str:
|
|
145
|
+
byte_budget = _approx_bytes_for_tokens(max_tokens)
|
|
146
|
+
if len(text.encode("utf-8")) <= byte_budget:
|
|
147
|
+
return text
|
|
148
|
+
|
|
149
|
+
total_lines = len(text.splitlines())
|
|
150
|
+
return f"Total output lines: {total_lines}\n\n{_truncate_text(text, max_tokens)}"
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@dataclass(slots=True)
|
|
154
|
+
class _HeadTailBuffer:
|
|
155
|
+
max_bytes: int = UNIFIED_EXEC_OUTPUT_MAX_BYTES
|
|
156
|
+
head: bytearray = field(default_factory=bytearray)
|
|
157
|
+
tail: bytearray = field(default_factory=bytearray)
|
|
158
|
+
|
|
159
|
+
def push_chunk(self, chunk: bytes) -> None:
|
|
160
|
+
if not chunk or self.max_bytes <= 0:
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
head_budget = self.max_bytes // 2
|
|
164
|
+
tail_budget = self.max_bytes - head_budget
|
|
165
|
+
remaining = bytes(chunk)
|
|
166
|
+
|
|
167
|
+
if len(self.head) < head_budget:
|
|
168
|
+
head_room = head_budget - len(self.head)
|
|
169
|
+
head_part = remaining[:head_room]
|
|
170
|
+
self.head.extend(head_part)
|
|
171
|
+
remaining = remaining[len(head_part) :]
|
|
172
|
+
|
|
173
|
+
if not remaining or tail_budget <= 0:
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
self.tail.extend(remaining)
|
|
177
|
+
if len(self.tail) > tail_budget:
|
|
178
|
+
excess = len(self.tail) - tail_budget
|
|
179
|
+
del self.tail[:excess]
|
|
180
|
+
|
|
181
|
+
def drain_bytes(self) -> bytes:
|
|
182
|
+
combined = bytes(self.head) + bytes(self.tail)
|
|
183
|
+
self.head.clear()
|
|
184
|
+
self.tail.clear()
|
|
185
|
+
return combined
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@dataclass(slots=True)
|
|
189
|
+
class UnifiedExecSession:
|
|
190
|
+
session_id: int
|
|
191
|
+
process: asyncio.subprocess.Process
|
|
192
|
+
start_time: float
|
|
193
|
+
command_display: str
|
|
194
|
+
tty: bool
|
|
195
|
+
unread_output: _HeadTailBuffer = field(default_factory=_HeadTailBuffer)
|
|
196
|
+
reader_task: asyncio.Task | None = None
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class UnifiedExecManager:
|
|
200
|
+
def __init__(self, cwd: str | Path | None = None) -> None:
|
|
201
|
+
self._default_cwd = Path(cwd or Path.cwd()).resolve()
|
|
202
|
+
self._next_session_id = DEFAULT_SESSION_ID_START
|
|
203
|
+
self._sessions: dict[int, UnifiedExecSession] = {}
|
|
204
|
+
self._lock = asyncio.Lock()
|
|
205
|
+
|
|
206
|
+
async def exec_command(
|
|
207
|
+
self,
|
|
208
|
+
cmd: str,
|
|
209
|
+
workdir: str | None = None,
|
|
210
|
+
shell: str | None = None,
|
|
211
|
+
login: bool = DEFAULT_LOGIN,
|
|
212
|
+
tty: bool = DEFAULT_TTY,
|
|
213
|
+
yield_time_ms: int = DEFAULT_EXEC_YIELD_TIME_MS,
|
|
214
|
+
max_output_tokens: int | None = None,
|
|
215
|
+
) -> str:
|
|
216
|
+
session_id = await self._allocate_session_id()
|
|
217
|
+
command = self._build_shell_command(cmd, shell, login)
|
|
218
|
+
cwd = self._resolve_workdir(workdir)
|
|
219
|
+
logger.debug(
|
|
220
|
+
"exec_command start session_id={} shell_command={} cwd={}",
|
|
221
|
+
session_id,
|
|
222
|
+
command,
|
|
223
|
+
cwd,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
process = await asyncio.create_subprocess_exec(
|
|
227
|
+
*command,
|
|
228
|
+
cwd=str(cwd),
|
|
229
|
+
stdin=asyncio.subprocess.PIPE,
|
|
230
|
+
stdout=asyncio.subprocess.PIPE,
|
|
231
|
+
stderr=asyncio.subprocess.STDOUT,
|
|
232
|
+
)
|
|
233
|
+
session = UnifiedExecSession(
|
|
234
|
+
session_id=session_id,
|
|
235
|
+
process=process,
|
|
236
|
+
start_time=asyncio.get_running_loop().time(),
|
|
237
|
+
command_display=shlex.join(command),
|
|
238
|
+
tty=tty,
|
|
239
|
+
)
|
|
240
|
+
session.reader_task = asyncio.create_task(self._pump_output(session))
|
|
241
|
+
|
|
242
|
+
async with self._lock:
|
|
243
|
+
self._sessions[session_id] = session
|
|
244
|
+
|
|
245
|
+
return await self._wait_and_snapshot(
|
|
246
|
+
session_id,
|
|
247
|
+
max(yield_time_ms, 1),
|
|
248
|
+
max_output_tokens,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
async def write_stdin(
|
|
252
|
+
self,
|
|
253
|
+
session_id: int,
|
|
254
|
+
chars: str = "",
|
|
255
|
+
yield_time_ms: int = DEFAULT_WRITE_STDIN_YIELD_TIME_MS,
|
|
256
|
+
max_output_tokens: int | None = None,
|
|
257
|
+
) -> str:
|
|
258
|
+
session = await self._get_session(session_id)
|
|
259
|
+
if session is None:
|
|
260
|
+
return f"Error: session_id {session_id} is not running."
|
|
261
|
+
|
|
262
|
+
if chars:
|
|
263
|
+
if session.process.stdin is None:
|
|
264
|
+
return f"Error: session_id {session_id} does not accept stdin."
|
|
265
|
+
logger.debug("write_stdin session_id={} chars_len={}", session_id, len(chars))
|
|
266
|
+
if session.tty:
|
|
267
|
+
session.unread_output.push_chunk(self._tty_echo(chars))
|
|
268
|
+
session.process.stdin.write(chars.encode("utf-8"))
|
|
269
|
+
await session.process.stdin.drain()
|
|
270
|
+
|
|
271
|
+
return await self._wait_and_snapshot(
|
|
272
|
+
session_id,
|
|
273
|
+
max(yield_time_ms, 1),
|
|
274
|
+
max_output_tokens,
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
async def _allocate_session_id(self) -> int:
|
|
278
|
+
async with self._lock:
|
|
279
|
+
session_id = self._next_session_id
|
|
280
|
+
self._next_session_id += 1
|
|
281
|
+
return session_id
|
|
282
|
+
|
|
283
|
+
async def _get_session(self, session_id: int) -> UnifiedExecSession | None:
|
|
284
|
+
async with self._lock:
|
|
285
|
+
return self._sessions.get(session_id)
|
|
286
|
+
|
|
287
|
+
async def _wait_and_snapshot(
|
|
288
|
+
self,
|
|
289
|
+
session_id: int,
|
|
290
|
+
yield_time_ms: int,
|
|
291
|
+
max_output_tokens: int | None,
|
|
292
|
+
) -> str:
|
|
293
|
+
session = await self._get_session(session_id)
|
|
294
|
+
if session is None:
|
|
295
|
+
return f"Error: session_id {session_id} is not running."
|
|
296
|
+
|
|
297
|
+
start_wait = asyncio.get_running_loop().time()
|
|
298
|
+
try:
|
|
299
|
+
await asyncio.wait_for(session.process.wait(), timeout=yield_time_ms / 1000.0)
|
|
300
|
+
except asyncio.TimeoutError:
|
|
301
|
+
pass
|
|
302
|
+
|
|
303
|
+
if session.reader_task is not None and session.process.returncode is not None:
|
|
304
|
+
await session.reader_task
|
|
305
|
+
|
|
306
|
+
wall_time = asyncio.get_running_loop().time() - start_wait
|
|
307
|
+
output_bytes = session.unread_output.drain_bytes()
|
|
308
|
+
output_text = output_bytes.decode("utf-8", errors="replace")
|
|
309
|
+
original_token_count = self._estimate_token_count(output_text)
|
|
310
|
+
output_text = self._truncate_output(output_text, max_output_tokens)
|
|
311
|
+
|
|
312
|
+
lines = [
|
|
313
|
+
f"Command: {session.command_display}",
|
|
314
|
+
f"Chunk ID: {uuid.uuid4().hex[:6]}",
|
|
315
|
+
f"Wall time: {wall_time:.4f} seconds",
|
|
316
|
+
]
|
|
317
|
+
if session.process.returncode is None:
|
|
318
|
+
lines.append(f"Process running with session ID {session_id}")
|
|
319
|
+
else:
|
|
320
|
+
lines.append(f"Process exited with code {session.process.returncode}")
|
|
321
|
+
if original_token_count is not None:
|
|
322
|
+
lines.append(f"Original token count: {original_token_count}")
|
|
323
|
+
lines.append("Output:")
|
|
324
|
+
lines.append(output_text)
|
|
325
|
+
|
|
326
|
+
if session.process.returncode is not None:
|
|
327
|
+
await self._close_session(session_id)
|
|
328
|
+
|
|
329
|
+
return "\n".join(lines)
|
|
330
|
+
|
|
331
|
+
async def _close_session(self, session_id: int) -> None:
|
|
332
|
+
async with self._lock:
|
|
333
|
+
session = self._sessions.pop(session_id, None)
|
|
334
|
+
if session is None:
|
|
335
|
+
return
|
|
336
|
+
if session.process.stdin is not None and not session.process.stdin.is_closing():
|
|
337
|
+
session.process.stdin.close()
|
|
338
|
+
|
|
339
|
+
async def _pump_output(self, session: UnifiedExecSession) -> None:
|
|
340
|
+
stream = session.process.stdout
|
|
341
|
+
if stream is None:
|
|
342
|
+
return
|
|
343
|
+
while True:
|
|
344
|
+
chunk = await stream.read(4096)
|
|
345
|
+
if not chunk:
|
|
346
|
+
break
|
|
347
|
+
session.unread_output.push_chunk(chunk)
|
|
348
|
+
|
|
349
|
+
def _resolve_workdir(self, workdir: str | None) -> Path:
|
|
350
|
+
if not workdir:
|
|
351
|
+
return self._default_cwd
|
|
352
|
+
path = Path(workdir)
|
|
353
|
+
if not path.is_absolute():
|
|
354
|
+
path = self._default_cwd / path
|
|
355
|
+
return path.resolve()
|
|
356
|
+
|
|
357
|
+
def _build_shell_command(
|
|
358
|
+
self,
|
|
359
|
+
cmd: str,
|
|
360
|
+
shell: str | None,
|
|
361
|
+
login: bool,
|
|
362
|
+
) -> list[str]:
|
|
363
|
+
shell_path = shell or os.environ.get("SHELL") or "/bin/bash"
|
|
364
|
+
shell_name = Path(shell_path).name.lower()
|
|
365
|
+
if shell_name in {"cmd", "cmd.exe"}:
|
|
366
|
+
return [shell_path, "/C", cmd]
|
|
367
|
+
if "powershell" in shell_name:
|
|
368
|
+
return [shell_path, "-NoProfile", "-Command", cmd]
|
|
369
|
+
return [shell_path, "-lc" if login else "-c", cmd]
|
|
370
|
+
|
|
371
|
+
def _estimate_token_count(self, output: str) -> int | None:
|
|
372
|
+
return _approx_token_count(output)
|
|
373
|
+
|
|
374
|
+
def _truncate_output(self, output: str, max_output_tokens: int | None) -> str:
|
|
375
|
+
token_budget = DEFAULT_MAX_OUTPUT_TOKENS if max_output_tokens is None else max_output_tokens
|
|
376
|
+
return _formatted_truncate_text(output, max(token_budget, 0))
|
|
377
|
+
|
|
378
|
+
def _tty_echo(self, chars: str) -> bytes:
|
|
379
|
+
normalized = chars.replace("\n", "\r\n")
|
|
380
|
+
return normalized.encode("utf-8")
|