python-codex 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycodex/__init__.py +139 -2
- pycodex/agent.py +290 -0
- pycodex/cli.py +641 -0
- pycodex/collaboration.py +21 -0
- pycodex/context.py +580 -0
- pycodex/doctor.py +360 -0
- pycodex/model.py +533 -0
- pycodex/prompts/collaboration_default.md +11 -0
- pycodex/prompts/collaboration_plan.md +128 -0
- pycodex/prompts/default_base_instructions.md +275 -0
- pycodex/prompts/exec_tools.json +411 -0
- pycodex/prompts/models.json +847 -0
- pycodex/prompts/permissions/approval_policy/never.md +1 -0
- pycodex/prompts/permissions/approval_policy/on_failure.md +1 -0
- pycodex/prompts/permissions/approval_policy/on_request.md +57 -0
- pycodex/prompts/permissions/approval_policy/on_request_rule_request_permission.md +33 -0
- pycodex/prompts/permissions/approval_policy/unless_trusted.md +1 -0
- pycodex/prompts/permissions/sandbox_mode/danger_full_access.md +1 -0
- pycodex/prompts/permissions/sandbox_mode/read_only.md +1 -0
- pycodex/prompts/permissions/sandbox_mode/workspace_write.md +1 -0
- pycodex/prompts/subagent_tools.json +163 -0
- pycodex/protocol.py +347 -0
- pycodex/runtime.py +200 -0
- pycodex/runtime_services.py +408 -0
- pycodex/tools/__init__.py +58 -0
- pycodex/tools/agent_tool_schemas.py +70 -0
- pycodex/tools/apply_patch_tool.py +363 -0
- pycodex/tools/base_tool.py +168 -0
- pycodex/tools/close_agent_tool.py +55 -0
- pycodex/tools/code_mode_manager.py +519 -0
- pycodex/tools/exec_command_tool.py +96 -0
- pycodex/tools/exec_runtime.js +161 -0
- pycodex/tools/exec_tool.py +48 -0
- pycodex/tools/grep_files_tool.py +150 -0
- pycodex/tools/list_dir_tool.py +135 -0
- pycodex/tools/read_file_tool.py +217 -0
- pycodex/tools/request_permissions_tool.py +95 -0
- pycodex/tools/request_user_input_tool.py +167 -0
- pycodex/tools/resume_agent_tool.py +56 -0
- pycodex/tools/send_input_tool.py +106 -0
- pycodex/tools/shell_command_tool.py +107 -0
- pycodex/tools/shell_tool.py +112 -0
- pycodex/tools/spawn_agent_tool.py +97 -0
- pycodex/tools/unified_exec_manager.py +380 -0
- pycodex/tools/update_plan_tool.py +79 -0
- pycodex/tools/view_image_tool.py +111 -0
- pycodex/tools/wait_agent_tool.py +75 -0
- pycodex/tools/wait_tool.py +68 -0
- pycodex/tools/web_search_tool.py +30 -0
- pycodex/tools/write_stdin_tool.py +75 -0
- pycodex/utils/__init__.py +40 -0
- pycodex/utils/dotenv.py +64 -0
- pycodex/utils/get_env.py +218 -0
- pycodex/utils/random_ids.py +19 -0
- pycodex/utils/visualize.py +978 -0
- python_codex-0.1.0.dist-info/METADATA +267 -0
- python_codex-0.1.0.dist-info/RECORD +60 -0
- python_codex-0.1.0.dist-info/entry_points.txt +2 -0
- python_codex-0.1.0.dist-info/licenses/LICENSE +201 -0
- python_codex-0.0.1.dist-info/METADATA +0 -30
- python_codex-0.0.1.dist-info/RECORD +0 -4
- {python_codex-0.0.1.dist-info → python_codex-0.1.0.dist-info}/WHEEL +0 -0
pycodex/model.py
ADDED
|
@@ -0,0 +1,533 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import urllib.parse
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
from dataclasses import dataclass, field, replace
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Protocol
|
|
11
|
+
|
|
12
|
+
import requests
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
import tomllib
|
|
16
|
+
except ModuleNotFoundError: # pragma: no cover - Python 3.10 path
|
|
17
|
+
import tomli as tomllib
|
|
18
|
+
|
|
19
|
+
from .protocol import (
|
|
20
|
+
AssistantMessage,
|
|
21
|
+
ModelResponse,
|
|
22
|
+
ModelStreamEvent,
|
|
23
|
+
Prompt,
|
|
24
|
+
ReasoningItem,
|
|
25
|
+
ToolCall,
|
|
26
|
+
)
|
|
27
|
+
from .utils import build_user_agent, uuid7_string
|
|
28
|
+
|
|
29
|
+
DEFAULT_CODEX_CONFIG_PATH = Path.home() / ".codex" / "config.toml"
|
|
30
|
+
DEFAULT_ORIGINATOR = "pycodex"
|
|
31
|
+
ModelStreamEventHandler = Callable[[ModelStreamEvent], None]
|
|
32
|
+
NOOP_MODEL_STREAM_EVENT_HANDLER: ModelStreamEventHandler = lambda _event: None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ModelClient(Protocol):
|
|
36
|
+
async def complete(
|
|
37
|
+
self,
|
|
38
|
+
prompt: Prompt,
|
|
39
|
+
event_handler: ModelStreamEventHandler = NOOP_MODEL_STREAM_EVENT_HANDLER,
|
|
40
|
+
) -> ModelResponse:
|
|
41
|
+
"""Return the next batch of model output items for the current prompt."""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True, slots=True)
|
|
45
|
+
class ResponsesProviderConfig:
|
|
46
|
+
model: str
|
|
47
|
+
provider_name: str
|
|
48
|
+
base_url: str
|
|
49
|
+
api_key_env: str
|
|
50
|
+
wire_api: str = "responses"
|
|
51
|
+
query_params: dict[str, str] = field(default_factory=dict)
|
|
52
|
+
reasoning_effort: str | None = None
|
|
53
|
+
reasoning_summary: str | None = None
|
|
54
|
+
verbosity: str | None = None
|
|
55
|
+
sandbox_mode: str | None = None
|
|
56
|
+
beta_features_header: str | None = None
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def from_codex_config(
|
|
60
|
+
cls,
|
|
61
|
+
config_path: str | Path = DEFAULT_CODEX_CONFIG_PATH,
|
|
62
|
+
profile: str | None = None,
|
|
63
|
+
) -> ResponsesProviderConfig:
|
|
64
|
+
data = tomllib.loads(Path(config_path).read_text())
|
|
65
|
+
selected = dict(data)
|
|
66
|
+
if profile is not None:
|
|
67
|
+
overrides = data.get("profiles", {}).get(profile)
|
|
68
|
+
if overrides is None:
|
|
69
|
+
raise ValueError(f"unknown Codex profile: {profile}")
|
|
70
|
+
selected.update(overrides)
|
|
71
|
+
|
|
72
|
+
provider_name = selected["model_provider"]
|
|
73
|
+
provider = data["model_providers"][provider_name]
|
|
74
|
+
wire_api = provider.get("wire_api", "responses")
|
|
75
|
+
if wire_api != "responses":
|
|
76
|
+
raise ValueError(f"unsupported wire_api for Python client: {wire_api}")
|
|
77
|
+
|
|
78
|
+
api_key_env = provider.get("env_key")
|
|
79
|
+
if not api_key_env:
|
|
80
|
+
raise ValueError(
|
|
81
|
+
f"provider {provider_name} does not define env_key in Codex config"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
query_params = {
|
|
85
|
+
str(key): str(value)
|
|
86
|
+
for key, value in provider.get("query_params", {}).items()
|
|
87
|
+
}
|
|
88
|
+
features = selected.get("features", {})
|
|
89
|
+
beta_features: list[str] = []
|
|
90
|
+
if isinstance(features, dict) and features.get("guardian_approval") is True:
|
|
91
|
+
beta_features.append("guardian_approval")
|
|
92
|
+
return cls(
|
|
93
|
+
model=selected["model"],
|
|
94
|
+
provider_name=provider_name,
|
|
95
|
+
base_url=provider["base_url"],
|
|
96
|
+
api_key_env=api_key_env,
|
|
97
|
+
wire_api=wire_api,
|
|
98
|
+
query_params=query_params,
|
|
99
|
+
reasoning_effort=selected.get("model_reasoning_effort"),
|
|
100
|
+
reasoning_summary=selected.get("model_reasoning_summary"),
|
|
101
|
+
verbosity=selected.get("model_verbosity"),
|
|
102
|
+
sandbox_mode=selected.get("sandbox_mode"),
|
|
103
|
+
beta_features_header=",".join(beta_features) or None,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def api_key(self) -> str:
|
|
107
|
+
value = os.environ.get(self.api_key_env, "")
|
|
108
|
+
if not value:
|
|
109
|
+
raise RuntimeError(
|
|
110
|
+
f"missing API key environment variable: {self.api_key_env}"
|
|
111
|
+
)
|
|
112
|
+
return value
|
|
113
|
+
|
|
114
|
+
def with_overrides(
|
|
115
|
+
self,
|
|
116
|
+
model: str | None = None,
|
|
117
|
+
reasoning_effort: str | None = None,
|
|
118
|
+
) -> ResponsesProviderConfig:
|
|
119
|
+
return replace(
|
|
120
|
+
self,
|
|
121
|
+
model=self.model if model is None else model,
|
|
122
|
+
reasoning_effort=(
|
|
123
|
+
self.reasoning_effort
|
|
124
|
+
if reasoning_effort is None
|
|
125
|
+
else reasoning_effort
|
|
126
|
+
),
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class ResponsesApiError(RuntimeError):
|
|
131
|
+
pass
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class ResponsesModelClient:
|
|
135
|
+
"""Minimal OpenAI-compatible Responses API client.
|
|
136
|
+
|
|
137
|
+
This implementation is intentionally narrow: it supports the subset needed
|
|
138
|
+
by the current AgentLoop abstraction, namely assistant text and function
|
|
139
|
+
tool calls over the streaming `/responses` endpoint.
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
def __init__(
|
|
143
|
+
self,
|
|
144
|
+
config: ResponsesProviderConfig,
|
|
145
|
+
timeout_seconds: float = 120.0,
|
|
146
|
+
session_id: str | None = None,
|
|
147
|
+
originator: str = DEFAULT_ORIGINATOR,
|
|
148
|
+
user_agent: str | None = None,
|
|
149
|
+
openai_subagent: str | None = None,
|
|
150
|
+
) -> None:
|
|
151
|
+
self._config = config
|
|
152
|
+
self.model = config.model
|
|
153
|
+
self._timeout_seconds = timeout_seconds
|
|
154
|
+
self._session_id = session_id or uuid7_string()
|
|
155
|
+
self._originator = originator
|
|
156
|
+
self._user_agent = user_agent or build_user_agent(originator)
|
|
157
|
+
self._openai_subagent = openai_subagent
|
|
158
|
+
|
|
159
|
+
@classmethod
|
|
160
|
+
def from_codex_config(
|
|
161
|
+
cls,
|
|
162
|
+
config_path: str | Path = DEFAULT_CODEX_CONFIG_PATH,
|
|
163
|
+
profile: str | None = None,
|
|
164
|
+
timeout_seconds: float = 120.0,
|
|
165
|
+
originator: str = DEFAULT_ORIGINATOR,
|
|
166
|
+
user_agent: str | None = None,
|
|
167
|
+
) -> ResponsesModelClient:
|
|
168
|
+
config = ResponsesProviderConfig.from_codex_config(config_path, profile)
|
|
169
|
+
return cls(config, timeout_seconds, originator=originator, user_agent=user_agent)
|
|
170
|
+
|
|
171
|
+
def with_overrides(
|
|
172
|
+
self,
|
|
173
|
+
model: str | None = None,
|
|
174
|
+
reasoning_effort: str | None = None,
|
|
175
|
+
session_id: str | None = None,
|
|
176
|
+
openai_subagent: str | None = None,
|
|
177
|
+
) -> ResponsesModelClient:
|
|
178
|
+
return ResponsesModelClient(
|
|
179
|
+
self._config.with_overrides(
|
|
180
|
+
model or self.model,
|
|
181
|
+
reasoning_effort,
|
|
182
|
+
),
|
|
183
|
+
self._timeout_seconds,
|
|
184
|
+
session_id=self._session_id if session_id is None else session_id,
|
|
185
|
+
originator=self._originator,
|
|
186
|
+
user_agent=self._user_agent,
|
|
187
|
+
openai_subagent=(
|
|
188
|
+
self._openai_subagent
|
|
189
|
+
if openai_subagent is None
|
|
190
|
+
else openai_subagent
|
|
191
|
+
),
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
def responses_url(self) -> str:
|
|
195
|
+
base_url = self._config.base_url.rstrip("/")
|
|
196
|
+
url = f"{base_url}/responses"
|
|
197
|
+
if self._config.query_params:
|
|
198
|
+
return f"{url}?{urllib.parse.urlencode(self._config.query_params)}"
|
|
199
|
+
return url
|
|
200
|
+
|
|
201
|
+
def models_url(self) -> str:
|
|
202
|
+
base_url = self._config.base_url.rstrip("/")
|
|
203
|
+
url = f"{base_url}/models"
|
|
204
|
+
if self._config.query_params:
|
|
205
|
+
return f"{url}?{urllib.parse.urlencode(self._config.query_params)}"
|
|
206
|
+
return url
|
|
207
|
+
|
|
208
|
+
async def list_models(self) -> list[str]:
|
|
209
|
+
return await asyncio.to_thread(self._list_models_sync)
|
|
210
|
+
|
|
211
|
+
async def complete(
|
|
212
|
+
self,
|
|
213
|
+
prompt: Prompt,
|
|
214
|
+
event_handler: ModelStreamEventHandler = NOOP_MODEL_STREAM_EVENT_HANDLER,
|
|
215
|
+
) -> ModelResponse:
|
|
216
|
+
return await asyncio.to_thread(self._complete_sync, prompt, event_handler)
|
|
217
|
+
|
|
218
|
+
def _complete_sync(
|
|
219
|
+
self,
|
|
220
|
+
prompt: Prompt,
|
|
221
|
+
event_handler: ModelStreamEventHandler,
|
|
222
|
+
) -> ModelResponse:
|
|
223
|
+
payload = self._build_payload(prompt)
|
|
224
|
+
body = json.dumps(payload).encode("utf-8")
|
|
225
|
+
url = self.responses_url()
|
|
226
|
+
prepared = requests.PreparedRequest()
|
|
227
|
+
prepared.prepare(
|
|
228
|
+
method="POST",
|
|
229
|
+
url=url,
|
|
230
|
+
headers=self._build_headers(prompt),
|
|
231
|
+
data=body,
|
|
232
|
+
)
|
|
233
|
+
try:
|
|
234
|
+
with requests.Session() as session:
|
|
235
|
+
settings = session.merge_environment_settings(
|
|
236
|
+
prepared.url,
|
|
237
|
+
proxies={},
|
|
238
|
+
stream=True,
|
|
239
|
+
verify=None,
|
|
240
|
+
cert=None,
|
|
241
|
+
)
|
|
242
|
+
verify = _requests_verify_setting()
|
|
243
|
+
if verify is not None:
|
|
244
|
+
settings["verify"] = verify
|
|
245
|
+
response = session.send(
|
|
246
|
+
prepared,
|
|
247
|
+
timeout=self._timeout_seconds,
|
|
248
|
+
allow_redirects=False,
|
|
249
|
+
**settings,
|
|
250
|
+
)
|
|
251
|
+
with response:
|
|
252
|
+
if response.status_code >= 400:
|
|
253
|
+
error_body = response.text
|
|
254
|
+
raise ResponsesApiError(
|
|
255
|
+
f"responses request failed with status {response.status_code}: "
|
|
256
|
+
f"{error_body[:500]}"
|
|
257
|
+
)
|
|
258
|
+
return self._parse_stream(
|
|
259
|
+
response.iter_lines(chunk_size=1, decode_unicode=False),
|
|
260
|
+
event_handler,
|
|
261
|
+
)
|
|
262
|
+
except requests.RequestException as exc:
|
|
263
|
+
raise ResponsesApiError(f"responses request failed: {exc}") from exc
|
|
264
|
+
|
|
265
|
+
def _build_payload(self, prompt: Prompt) -> dict[str, object]:
|
|
266
|
+
payload: dict[str, object] = {
|
|
267
|
+
"model": self.model,
|
|
268
|
+
"instructions": prompt.base_instructions or "",
|
|
269
|
+
"input": [item.serialize() for item in prompt.input],
|
|
270
|
+
"tools": [tool.serialize() for tool in prompt.tools],
|
|
271
|
+
"tool_choice": "auto",
|
|
272
|
+
"parallel_tool_calls": prompt.parallel_tool_calls,
|
|
273
|
+
"store": False,
|
|
274
|
+
"stream": True,
|
|
275
|
+
"include": ["reasoning.encrypted_content"],
|
|
276
|
+
"prompt_cache_key": self._session_id,
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
reasoning: dict[str, str] = {}
|
|
280
|
+
if self._config.reasoning_effort is not None:
|
|
281
|
+
reasoning["effort"] = self._config.reasoning_effort
|
|
282
|
+
if self._config.reasoning_summary is not None:
|
|
283
|
+
reasoning["summary"] = self._config.reasoning_summary
|
|
284
|
+
if reasoning:
|
|
285
|
+
payload["reasoning"] = reasoning
|
|
286
|
+
|
|
287
|
+
text = None
|
|
288
|
+
if self._config.verbosity is not None:
|
|
289
|
+
text = {"verbosity": self._config.verbosity}
|
|
290
|
+
if text is not None:
|
|
291
|
+
payload["text"] = text
|
|
292
|
+
|
|
293
|
+
return payload
|
|
294
|
+
|
|
295
|
+
def _list_models_sync(self) -> list[str]:
|
|
296
|
+
prepared = requests.PreparedRequest()
|
|
297
|
+
prepared.prepare(
|
|
298
|
+
method="GET",
|
|
299
|
+
url=self.models_url(),
|
|
300
|
+
headers=self._build_model_list_headers(),
|
|
301
|
+
)
|
|
302
|
+
try:
|
|
303
|
+
with requests.Session() as session:
|
|
304
|
+
settings = session.merge_environment_settings(
|
|
305
|
+
prepared.url,
|
|
306
|
+
proxies={},
|
|
307
|
+
stream=False,
|
|
308
|
+
verify=None,
|
|
309
|
+
cert=None,
|
|
310
|
+
)
|
|
311
|
+
verify = _requests_verify_setting()
|
|
312
|
+
if verify is not None:
|
|
313
|
+
settings["verify"] = verify
|
|
314
|
+
response = session.send(
|
|
315
|
+
prepared,
|
|
316
|
+
timeout=self._timeout_seconds,
|
|
317
|
+
allow_redirects=False,
|
|
318
|
+
**settings,
|
|
319
|
+
)
|
|
320
|
+
with response:
|
|
321
|
+
if response.status_code >= 400:
|
|
322
|
+
raise ResponsesApiError(
|
|
323
|
+
f"models request failed with status {response.status_code}: "
|
|
324
|
+
f"{response.text[:500]}"
|
|
325
|
+
)
|
|
326
|
+
payload = response.json()
|
|
327
|
+
except requests.RequestException as exc:
|
|
328
|
+
raise ResponsesApiError(f"models request failed: {exc}") from exc
|
|
329
|
+
|
|
330
|
+
data = payload.get("data")
|
|
331
|
+
if not isinstance(data, list):
|
|
332
|
+
raise ResponsesApiError("models response is missing `data` list")
|
|
333
|
+
models: list[str] = []
|
|
334
|
+
for item in data:
|
|
335
|
+
if not isinstance(item, dict):
|
|
336
|
+
continue
|
|
337
|
+
model_id = str(item.get("id", "")).strip()
|
|
338
|
+
if model_id:
|
|
339
|
+
models.append(model_id)
|
|
340
|
+
return models
|
|
341
|
+
|
|
342
|
+
def _build_headers(self, prompt: Prompt) -> dict[str, str]:
|
|
343
|
+
headers = {
|
|
344
|
+
"content-type": "application/json",
|
|
345
|
+
"accept": "text/event-stream",
|
|
346
|
+
"authorization": f"Bearer {self._config.api_key()}",
|
|
347
|
+
"x-client-request-id": self._session_id,
|
|
348
|
+
"session_id": self._session_id,
|
|
349
|
+
"originator": self._originator,
|
|
350
|
+
"user-agent": self._user_agent,
|
|
351
|
+
}
|
|
352
|
+
if self._config.beta_features_header is not None:
|
|
353
|
+
headers["x-codex-beta-features"] = self._config.beta_features_header
|
|
354
|
+
if self._openai_subagent is not None:
|
|
355
|
+
headers["x-openai-subagent"] = self._openai_subagent
|
|
356
|
+
if prompt.turn_metadata is not None:
|
|
357
|
+
headers["x-codex-turn-metadata"] = json.dumps(
|
|
358
|
+
prompt.turn_metadata,
|
|
359
|
+
separators=(",", ":"),
|
|
360
|
+
)
|
|
361
|
+
return headers
|
|
362
|
+
|
|
363
|
+
def _build_model_list_headers(self) -> dict[str, str]:
|
|
364
|
+
headers = {
|
|
365
|
+
"accept": "application/json",
|
|
366
|
+
"authorization": f"Bearer {self._config.api_key()}",
|
|
367
|
+
"originator": self._originator,
|
|
368
|
+
"user-agent": self._user_agent,
|
|
369
|
+
}
|
|
370
|
+
if self._config.beta_features_header is not None:
|
|
371
|
+
headers["x-codex-beta-features"] = self._config.beta_features_header
|
|
372
|
+
if self._openai_subagent is not None:
|
|
373
|
+
headers["x-openai-subagent"] = self._openai_subagent
|
|
374
|
+
return headers
|
|
375
|
+
|
|
376
|
+
def _parse_stream(
|
|
377
|
+
self,
|
|
378
|
+
response,
|
|
379
|
+
event_handler: ModelStreamEventHandler,
|
|
380
|
+
) -> ModelResponse:
|
|
381
|
+
items: list[AssistantMessage | ToolCall | ReasoningItem] = []
|
|
382
|
+
saw_completed = False
|
|
383
|
+
|
|
384
|
+
for event_name, data in self._iter_sse_events(response):
|
|
385
|
+
if not data:
|
|
386
|
+
continue
|
|
387
|
+
payload = json.loads(data)
|
|
388
|
+
event_type = payload.get("type", event_name)
|
|
389
|
+
|
|
390
|
+
if event_type == "response.output_text.delta":
|
|
391
|
+
event_handler(
|
|
392
|
+
ModelStreamEvent(
|
|
393
|
+
kind="assistant_delta",
|
|
394
|
+
payload={"delta": str(payload.get("delta", ""))},
|
|
395
|
+
)
|
|
396
|
+
)
|
|
397
|
+
continue
|
|
398
|
+
|
|
399
|
+
if event_type == "response.output_item.done":
|
|
400
|
+
item_payload = payload.get("item", {})
|
|
401
|
+
if (
|
|
402
|
+
isinstance(item_payload, dict)
|
|
403
|
+
and item_payload.get("type") == "web_search_call"
|
|
404
|
+
):
|
|
405
|
+
action_payload = item_payload.get("action")
|
|
406
|
+
event_payload = {
|
|
407
|
+
"call_id": str(item_payload.get("id", "web_search")),
|
|
408
|
+
"tool_name": "web_search",
|
|
409
|
+
}
|
|
410
|
+
if isinstance(action_payload, dict):
|
|
411
|
+
event_payload["action_type"] = str(
|
|
412
|
+
action_payload.get("type", "")
|
|
413
|
+
)
|
|
414
|
+
if "query" in action_payload:
|
|
415
|
+
event_payload["query"] = str(action_payload.get("query", ""))
|
|
416
|
+
queries = action_payload.get("queries")
|
|
417
|
+
if isinstance(queries, list):
|
|
418
|
+
event_payload["queries"] = [
|
|
419
|
+
str(query) for query in queries if str(query).strip()
|
|
420
|
+
]
|
|
421
|
+
if "url" in action_payload:
|
|
422
|
+
event_payload["url"] = str(action_payload.get("url", ""))
|
|
423
|
+
if "pattern" in action_payload:
|
|
424
|
+
event_payload["pattern"] = str(
|
|
425
|
+
action_payload.get("pattern", "")
|
|
426
|
+
)
|
|
427
|
+
event_handler(
|
|
428
|
+
ModelStreamEvent(
|
|
429
|
+
kind="tool_call",
|
|
430
|
+
payload=event_payload,
|
|
431
|
+
)
|
|
432
|
+
)
|
|
433
|
+
continue
|
|
434
|
+
|
|
435
|
+
parsed = self._parse_output_item(item_payload)
|
|
436
|
+
if parsed is not None:
|
|
437
|
+
if isinstance(parsed, ToolCall):
|
|
438
|
+
event_handler(
|
|
439
|
+
ModelStreamEvent(
|
|
440
|
+
kind="tool_call",
|
|
441
|
+
payload={
|
|
442
|
+
"call_id": parsed.call_id,
|
|
443
|
+
"tool_name": parsed.name,
|
|
444
|
+
},
|
|
445
|
+
)
|
|
446
|
+
)
|
|
447
|
+
items.append(parsed)
|
|
448
|
+
continue
|
|
449
|
+
|
|
450
|
+
if event_type == "response.completed":
|
|
451
|
+
saw_completed = True
|
|
452
|
+
break
|
|
453
|
+
|
|
454
|
+
if event_type == "response.failed":
|
|
455
|
+
error = payload.get("response", {}).get("error") or {}
|
|
456
|
+
message = error.get("message") or "responses stream failed"
|
|
457
|
+
raise ResponsesApiError(message)
|
|
458
|
+
|
|
459
|
+
if not saw_completed:
|
|
460
|
+
raise ResponsesApiError("responses stream ended before response.completed")
|
|
461
|
+
|
|
462
|
+
return ModelResponse(items=items)
|
|
463
|
+
|
|
464
|
+
def _parse_output_item(
|
|
465
|
+
self,
|
|
466
|
+
item: dict[str, object],
|
|
467
|
+
) -> AssistantMessage | ToolCall | ReasoningItem | None:
|
|
468
|
+
item_type = item.get("type")
|
|
469
|
+
if item_type == "reasoning":
|
|
470
|
+
return ReasoningItem(payload=dict(item))
|
|
471
|
+
|
|
472
|
+
if item_type == "message" and item.get("role") == "assistant":
|
|
473
|
+
content = item.get("content", [])
|
|
474
|
+
text_parts = []
|
|
475
|
+
for part in content:
|
|
476
|
+
if isinstance(part, dict) and part.get("type") == "output_text":
|
|
477
|
+
text_parts.append(str(part.get("text", "")))
|
|
478
|
+
return AssistantMessage(text="".join(text_parts))
|
|
479
|
+
|
|
480
|
+
if item_type == "function_call":
|
|
481
|
+
raw_arguments = str(item.get("arguments", "") or "{}")
|
|
482
|
+
arguments = json.loads(raw_arguments)
|
|
483
|
+
if not isinstance(arguments, dict):
|
|
484
|
+
raise ResponsesApiError(
|
|
485
|
+
f"function call arguments must decode to an object, got {type(arguments).__name__}"
|
|
486
|
+
)
|
|
487
|
+
return ToolCall(
|
|
488
|
+
call_id=str(item["call_id"]),
|
|
489
|
+
name=str(item["name"]),
|
|
490
|
+
arguments=arguments,
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
if item_type == "custom_tool_call":
|
|
494
|
+
return ToolCall(
|
|
495
|
+
call_id=str(item["call_id"]),
|
|
496
|
+
name=str(item["name"]),
|
|
497
|
+
arguments=str(item.get("input", "")),
|
|
498
|
+
tool_type="custom",
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
return None
|
|
502
|
+
|
|
503
|
+
def _iter_sse_events(self, response):
|
|
504
|
+
event_name: str | None = None
|
|
505
|
+
data_lines: list[str] = []
|
|
506
|
+
|
|
507
|
+
for raw_line in response:
|
|
508
|
+
line = raw_line.decode("utf-8", errors="replace").rstrip("\r\n")
|
|
509
|
+
if line == "":
|
|
510
|
+
if data_lines:
|
|
511
|
+
yield event_name or "message", "\n".join(data_lines)
|
|
512
|
+
event_name = None
|
|
513
|
+
data_lines = []
|
|
514
|
+
continue
|
|
515
|
+
|
|
516
|
+
if line.startswith(":"):
|
|
517
|
+
continue
|
|
518
|
+
if line.startswith("event:"):
|
|
519
|
+
event_name = line.split(":", 1)[1].lstrip()
|
|
520
|
+
continue
|
|
521
|
+
if line.startswith("data:"):
|
|
522
|
+
data_lines.append(line.split(":", 1)[1].lstrip())
|
|
523
|
+
|
|
524
|
+
if data_lines:
|
|
525
|
+
yield event_name or "message", "\n".join(data_lines)
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def _requests_verify_setting() -> str | bool | None:
|
|
529
|
+
for env_name in ("REQUESTS_CA_BUNDLE", "CURL_CA_BUNDLE", "SSL_CERT_FILE"):
|
|
530
|
+
value = os.environ.get(env_name, "").strip()
|
|
531
|
+
if value:
|
|
532
|
+
return value
|
|
533
|
+
return None
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# Collaboration Mode: Default
|
|
2
|
+
|
|
3
|
+
You are now in Default mode. Any previous instructions for other modes (e.g. Plan mode) are no longer active.
|
|
4
|
+
|
|
5
|
+
Your active mode changes only when new developer instructions with a different `<collaboration_mode>...</collaboration_mode>` change it; user requests or tool descriptions do not change mode by themselves. Known mode names are Default and Plan.
|
|
6
|
+
|
|
7
|
+
## request_user_input availability
|
|
8
|
+
|
|
9
|
+
The `request_user_input` tool is unavailable in Default mode. If you call it while in Default mode, it will return an error.
|
|
10
|
+
|
|
11
|
+
In Default mode, strongly prefer making reasonable assumptions and executing the user's request rather than stopping to ask questions. If you absolutely must ask a question because the answer cannot be discovered from local context and a reasonable assumption would be risky, ask the user directly with a concise plain-text question. Never write a multiple choice question as a textual assistant message.
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# Plan Mode (Conversational)
|
|
2
|
+
|
|
3
|
+
You work in 3 phases, and you should *chat your way* to a great plan before finalizing it. A great plan is very detailed—intent- and implementation-wise—so that it can be handed to another engineer or agent to be implemented right away. It must be **decision complete**, where the implementer does not need to make any decisions.
|
|
4
|
+
|
|
5
|
+
## Mode rules (strict)
|
|
6
|
+
|
|
7
|
+
You are in **Plan Mode** until a developer message explicitly ends it.
|
|
8
|
+
|
|
9
|
+
Plan Mode is not changed by user intent, tone, or imperative language. If a user asks for execution while still in Plan Mode, treat it as a request to **plan the execution**, not perform it.
|
|
10
|
+
|
|
11
|
+
## Plan Mode vs update_plan tool
|
|
12
|
+
|
|
13
|
+
Plan Mode is a collaboration mode that can involve requesting user input and eventually issuing a `<proposed_plan>` block.
|
|
14
|
+
|
|
15
|
+
Separately, `update_plan` is a checklist/progress/TODOs tool; it does not enter or exit Plan Mode. Do not confuse it with Plan mode or try to use it while in Plan mode. If you try to use `update_plan` in Plan mode, it will return an error.
|
|
16
|
+
|
|
17
|
+
## Execution vs. mutation in Plan Mode
|
|
18
|
+
|
|
19
|
+
You may explore and execute **non-mutating** actions that improve the plan. You must not perform **mutating** actions.
|
|
20
|
+
|
|
21
|
+
### Allowed (non-mutating, plan-improving)
|
|
22
|
+
|
|
23
|
+
Actions that gather truth, reduce ambiguity, or validate feasibility without changing repo-tracked state. Examples:
|
|
24
|
+
|
|
25
|
+
* Reading or searching files, configs, schemas, types, manifests, and docs
|
|
26
|
+
* Static analysis, inspection, and repo exploration
|
|
27
|
+
* Dry-run style commands when they do not edit repo-tracked files
|
|
28
|
+
* Tests, builds, or checks that may write to caches or build artifacts (for example, `target/`, `.cache/`, or snapshots) so long as they do not edit repo-tracked files
|
|
29
|
+
|
|
30
|
+
### Not allowed (mutating, plan-executing)
|
|
31
|
+
|
|
32
|
+
Actions that implement the plan or change repo-tracked state. Examples:
|
|
33
|
+
|
|
34
|
+
* Editing or writing files
|
|
35
|
+
* Running formatters or linters that rewrite files
|
|
36
|
+
* Applying patches, migrations, or codegen that updates repo-tracked files
|
|
37
|
+
* Side-effectful commands whose purpose is to carry out the plan rather than refine it
|
|
38
|
+
|
|
39
|
+
When in doubt: if the action would reasonably be described as "doing the work" rather than "planning the work," do not do it.
|
|
40
|
+
|
|
41
|
+
## PHASE 1 — Ground in the environment (explore first, ask second)
|
|
42
|
+
|
|
43
|
+
Begin by grounding yourself in the actual environment. Eliminate unknowns in the prompt by discovering facts, not by asking the user. Resolve all questions that can be answered through exploration or inspection. Identify missing or ambiguous details only if they cannot be derived from the environment. Silent exploration between turns is allowed and encouraged.
|
|
44
|
+
|
|
45
|
+
Before asking the user any question, perform at least one targeted non-mutating exploration pass (for example: search relevant files, inspect likely entrypoints/configs, confirm current implementation shape), unless no local environment/repo is available.
|
|
46
|
+
|
|
47
|
+
Exception: you may ask clarifying questions about the user's prompt before exploring, ONLY if there are obvious ambiguities or contradictions in the prompt itself. However, if ambiguity might be resolved by exploring, always prefer exploring first.
|
|
48
|
+
|
|
49
|
+
Do not ask questions that can be answered from the repo or system (for example, "where is this struct?" or "which UI component should we use?" when exploration can make it clear). Only ask once you have exhausted reasonable non-mutating exploration.
|
|
50
|
+
|
|
51
|
+
## PHASE 2 — Intent chat (what they actually want)
|
|
52
|
+
|
|
53
|
+
* Keep asking until you can clearly state: goal + success criteria, audience, in/out of scope, constraints, current state, and the key preferences/tradeoffs.
|
|
54
|
+
* Bias toward questions over guessing: if any high-impact ambiguity remains, do NOT plan yet—ask.
|
|
55
|
+
|
|
56
|
+
## PHASE 3 — Implementation chat (what/how we’ll build)
|
|
57
|
+
|
|
58
|
+
* Once intent is stable, keep asking until the spec is decision complete: approach, interfaces (APIs/schemas/I/O), data flow, edge cases/failure modes, testing + acceptance criteria, rollout/monitoring, and any migrations/compat constraints.
|
|
59
|
+
|
|
60
|
+
## Asking questions
|
|
61
|
+
|
|
62
|
+
Critical rules:
|
|
63
|
+
|
|
64
|
+
* Strongly prefer using the `request_user_input` tool to ask any questions.
|
|
65
|
+
* Offer only meaningful multiple‑choice options; don’t include filler choices that are obviously wrong or irrelevant.
|
|
66
|
+
* In rare cases where an unavoidable, important question can’t be expressed with reasonable multiple‑choice options (due to extreme ambiguity), you may ask it directly without the tool.
|
|
67
|
+
|
|
68
|
+
You SHOULD ask many questions, but each question must:
|
|
69
|
+
|
|
70
|
+
* materially change the spec/plan, OR
|
|
71
|
+
* confirm/lock an assumption, OR
|
|
72
|
+
* choose between meaningful tradeoffs.
|
|
73
|
+
* not be answerable by non-mutating commands.
|
|
74
|
+
|
|
75
|
+
Use the `request_user_input` tool only for decisions that materially change the plan, for confirming important assumptions, or for information that cannot be discovered via non-mutating exploration.
|
|
76
|
+
|
|
77
|
+
## Two kinds of unknowns (treat differently)
|
|
78
|
+
|
|
79
|
+
1. **Discoverable facts** (repo/system truth): explore first.
|
|
80
|
+
|
|
81
|
+
* Before asking, run targeted searches and check likely sources of truth (configs/manifests/entrypoints/schemas/types/constants).
|
|
82
|
+
* Ask only if: multiple plausible candidates; nothing found but you need a missing identifier/context; or ambiguity is actually product intent.
|
|
83
|
+
* If asking, present concrete candidates (paths/service names) + recommend one.
|
|
84
|
+
* Never ask questions you can answer from your environment (e.g., “where is this struct”).
|
|
85
|
+
|
|
86
|
+
2. **Preferences/tradeoffs** (not discoverable): ask early.
|
|
87
|
+
|
|
88
|
+
* These are intent or implementation preferences that cannot be derived from exploration.
|
|
89
|
+
* Provide 2–4 mutually exclusive options + a recommended default.
|
|
90
|
+
* If unanswered, proceed with the recommended option and record it as an assumption in the final plan.
|
|
91
|
+
|
|
92
|
+
## Finalization rule
|
|
93
|
+
|
|
94
|
+
Only output the final plan when it is decision complete and leaves no decisions to the implementer.
|
|
95
|
+
|
|
96
|
+
When you present the official plan, wrap it in a `<proposed_plan>` block so the client can render it specially:
|
|
97
|
+
|
|
98
|
+
1) The opening tag must be on its own line.
|
|
99
|
+
2) Start the plan content on the next line (no text on the same line as the tag).
|
|
100
|
+
3) The closing tag must be on its own line.
|
|
101
|
+
4) Use Markdown inside the block.
|
|
102
|
+
5) Keep the tags exactly as `<proposed_plan>` and `</proposed_plan>` (do not translate or rename them), even if the plan content is in another language.
|
|
103
|
+
|
|
104
|
+
Example:
|
|
105
|
+
|
|
106
|
+
<proposed_plan>
|
|
107
|
+
plan content
|
|
108
|
+
</proposed_plan>
|
|
109
|
+
|
|
110
|
+
plan content should be human and agent digestible. The final plan must be plan-only, concise by default, and include:
|
|
111
|
+
|
|
112
|
+
* A clear title
|
|
113
|
+
* A brief summary section
|
|
114
|
+
* Important changes or additions to public APIs/interfaces/types
|
|
115
|
+
* Test cases and scenarios
|
|
116
|
+
* Explicit assumptions and defaults chosen where needed
|
|
117
|
+
|
|
118
|
+
When possible, prefer a compact structure with 3-5 short sections, usually: Summary, Key Changes or Implementation Changes, Test Plan, and Assumptions. Do not include a separate Scope section unless scope boundaries are genuinely important to avoid mistakes.
|
|
119
|
+
|
|
120
|
+
Prefer grouped implementation bullets by subsystem or behavior over file-by-file inventories. Mention files only when needed to disambiguate a non-obvious change, and avoid naming more than 3 paths unless extra specificity is necessary to prevent mistakes. Prefer behavior-level descriptions over symbol-by-symbol removal lists. For v1 feature-addition plans, do not invent detailed schema, validation, precedence, fallback, or wire-shape policy unless the request establishes it or it is needed to prevent a concrete implementation mistake; prefer the intended capability and minimum interface/behavior changes.
|
|
121
|
+
|
|
122
|
+
Keep bullets short and avoid explanatory sub-bullets unless they are needed to prevent ambiguity. Prefer the minimum detail needed for implementation safety, not exhaustive coverage. Within each section, compress related changes into a few high-signal bullets and omit branch-by-branch logic, repeated invariants, and long lists of unaffected behavior unless they are necessary to prevent a likely implementation mistake. Avoid repeated repo facts and irrelevant edge-case or rollout detail. For straightforward refactors, keep the plan to a compact summary, key edits, tests, and assumptions. If the user asks for more detail, then expand.
|
|
123
|
+
|
|
124
|
+
Do not ask "should I proceed?" in the final output. The user can easily switch out of Plan mode and request implementation if you have included a `<proposed_plan>` block in your response. Alternatively, they can decide to stay in Plan mode and continue refining the plan.
|
|
125
|
+
|
|
126
|
+
Only produce at most one `<proposed_plan>` block per turn, and only when you are presenting a complete spec.
|
|
127
|
+
|
|
128
|
+
If the user stays in Plan mode and asks for revisions after a prior `<proposed_plan>`, any new `<proposed_plan>` must be a complete replacement.
|