codex-python-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codex_python_sdk/__init__.py +57 -0
- codex_python_sdk/_shared.py +99 -0
- codex_python_sdk/async_client.py +1313 -0
- codex_python_sdk/errors.py +18 -0
- codex_python_sdk/examples/__init__.py +2 -0
- codex_python_sdk/examples/demo_smoke.py +304 -0
- codex_python_sdk/factory.py +25 -0
- codex_python_sdk/policy.py +636 -0
- codex_python_sdk/renderer.py +607 -0
- codex_python_sdk/sync_client.py +333 -0
- codex_python_sdk/types.py +48 -0
- codex_python_sdk-0.1.0.dist-info/METADATA +274 -0
- codex_python_sdk-0.1.0.dist-info/RECORD +17 -0
- codex_python_sdk-0.1.0.dist-info/WHEEL +5 -0
- codex_python_sdk-0.1.0.dist-info/entry_points.txt +2 -0
- codex_python_sdk-0.1.0.dist-info/licenses/LICENSE +21 -0
- codex_python_sdk-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,636 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import copy
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Awaitable, Callable, Literal, Protocol
|
|
9
|
+
|
|
10
|
+
from ._shared import first_nonempty_text, utc_now
|
|
11
|
+
from .errors import AppServerConnectionError, CodexAgenticError, NotAuthenticatedError
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from .async_client import AsyncCodexAgenticClient
|
|
15
|
+
|
|
16
|
+
RequestType = Literal["command", "file_change", "tool_user_input"]
|
|
17
|
+
Decision = Literal["accept", "acceptForSession", "decline", "cancel"]
|
|
18
|
+
PolicyMode = Literal["permissive", "balanced", "strict"]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class PolicyContext:
|
|
23
|
+
request_type: RequestType
|
|
24
|
+
method: str
|
|
25
|
+
thread_id: str | None
|
|
26
|
+
turn_id: str | None
|
|
27
|
+
item_id: str | None
|
|
28
|
+
params: dict[str, Any]
|
|
29
|
+
timestamp: str = field(default_factory=utc_now)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class PolicyJudgeConfig:
|
|
34
|
+
timeout_seconds: float = 8.0
|
|
35
|
+
model: str | None = None
|
|
36
|
+
effort: str | None = "medium"
|
|
37
|
+
summary: str | None = "none"
|
|
38
|
+
include_params: bool = True
|
|
39
|
+
max_params_chars: int = 8000
|
|
40
|
+
enable_web_search: bool = False
|
|
41
|
+
fallback_command_decision: Decision = "decline"
|
|
42
|
+
fallback_file_change_decision: Decision = "decline"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class PolicyRubric:
|
|
47
|
+
system_rubric: str
|
|
48
|
+
mode: PolicyMode = "permissive"
|
|
49
|
+
use_llm_judge: bool = False
|
|
50
|
+
command_rules: list[dict[str, Any]] = field(default_factory=list)
|
|
51
|
+
file_change_rules: list[dict[str, Any]] = field(default_factory=list)
|
|
52
|
+
tool_input_rules: list[dict[str, Any]] = field(default_factory=list)
|
|
53
|
+
defaults: dict[str, Any] = field(
|
|
54
|
+
default_factory=lambda: {
|
|
55
|
+
"command": "accept",
|
|
56
|
+
"file_change": "accept",
|
|
57
|
+
"tool_input": "auto_empty",
|
|
58
|
+
}
|
|
59
|
+
)
|
|
60
|
+
audit: dict[str, Any] = field(default_factory=lambda: {"enabled": False, "include_params": False})
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
DEFAULT_POLICY_RUBRIC = PolicyRubric(
|
|
64
|
+
system_rubric=(
|
|
65
|
+
"You are a policy judge for command/file-change approvals. "
|
|
66
|
+
"When uncertain, prefer safe behavior."
|
|
67
|
+
),
|
|
68
|
+
mode="permissive",
|
|
69
|
+
use_llm_judge=False,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _default_policy_rubric() -> PolicyRubric:
|
|
74
|
+
# Avoid leaking mutations across engines by sharing a global mutable instance.
|
|
75
|
+
return copy.deepcopy(DEFAULT_POLICY_RUBRIC)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class PolicyEngine(Protocol):
|
|
79
|
+
def on_command_approval(
|
|
80
|
+
self,
|
|
81
|
+
params: dict[str, Any],
|
|
82
|
+
context: PolicyContext,
|
|
83
|
+
) -> dict[str, Any] | Awaitable[dict[str, Any]]:
|
|
84
|
+
...
|
|
85
|
+
|
|
86
|
+
def on_file_change_approval(
|
|
87
|
+
self,
|
|
88
|
+
params: dict[str, Any],
|
|
89
|
+
context: PolicyContext,
|
|
90
|
+
) -> dict[str, Any] | Awaitable[dict[str, Any]]:
|
|
91
|
+
...
|
|
92
|
+
|
|
93
|
+
def on_tool_request_user_input(
|
|
94
|
+
self,
|
|
95
|
+
params: dict[str, Any],
|
|
96
|
+
context: PolicyContext,
|
|
97
|
+
) -> dict[str, Any] | Awaitable[dict[str, Any]]:
|
|
98
|
+
...
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class DefaultPolicyEngine:
|
|
102
|
+
"""Default behavior: mirror current client defaults."""
|
|
103
|
+
|
|
104
|
+
def on_command_approval(self, params: dict[str, Any], context: PolicyContext) -> dict[str, Any]:
|
|
105
|
+
del params, context
|
|
106
|
+
return {"decision": "accept"}
|
|
107
|
+
|
|
108
|
+
def on_file_change_approval(self, params: dict[str, Any], context: PolicyContext) -> dict[str, Any]:
|
|
109
|
+
del params, context
|
|
110
|
+
return {"decision": "accept"}
|
|
111
|
+
|
|
112
|
+
def on_tool_request_user_input(self, params: dict[str, Any], context: PolicyContext) -> dict[str, Any]:
|
|
113
|
+
del params, context
|
|
114
|
+
return {"answers": {}}
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class RuleBasedPolicyEngine:
|
|
118
|
+
"""Rule evaluator based on a declarative rubric dict/dataclass."""
|
|
119
|
+
|
|
120
|
+
def __init__(self, rubric: PolicyRubric | dict[str, Any] | None = None) -> None:
|
|
121
|
+
self.rubric = _to_rubric(rubric) if rubric is not None else _default_policy_rubric()
|
|
122
|
+
|
|
123
|
+
def on_command_approval(self, params: dict[str, Any], context: PolicyContext) -> dict[str, Any]:
|
|
124
|
+
fallback = self.rubric.defaults.get("command", "accept")
|
|
125
|
+
decision = self._apply_rules(
|
|
126
|
+
self.rubric.command_rules,
|
|
127
|
+
params,
|
|
128
|
+
context,
|
|
129
|
+
fallback=fallback,
|
|
130
|
+
kind="command",
|
|
131
|
+
)
|
|
132
|
+
return _normalize_command_or_file_decision(decision, fallback=str(fallback))
|
|
133
|
+
|
|
134
|
+
def on_file_change_approval(self, params: dict[str, Any], context: PolicyContext) -> dict[str, Any]:
|
|
135
|
+
fallback = self.rubric.defaults.get("file_change", "accept")
|
|
136
|
+
decision = self._apply_rules(
|
|
137
|
+
self.rubric.file_change_rules,
|
|
138
|
+
params,
|
|
139
|
+
context,
|
|
140
|
+
fallback=fallback,
|
|
141
|
+
kind="file_change",
|
|
142
|
+
)
|
|
143
|
+
return _normalize_command_or_file_decision(decision, fallback=str(fallback))
|
|
144
|
+
|
|
145
|
+
def on_tool_request_user_input(self, params: dict[str, Any], context: PolicyContext) -> dict[str, Any]:
|
|
146
|
+
decision = self._apply_rules(
|
|
147
|
+
self.rubric.tool_input_rules,
|
|
148
|
+
params,
|
|
149
|
+
context,
|
|
150
|
+
fallback=self.rubric.defaults.get("tool_input", "auto_empty"),
|
|
151
|
+
kind="tool_user_input",
|
|
152
|
+
)
|
|
153
|
+
return _normalize_user_input_decision(decision)
|
|
154
|
+
|
|
155
|
+
def _apply_rules(
|
|
156
|
+
self,
|
|
157
|
+
rules: list[dict[str, Any]],
|
|
158
|
+
params: dict[str, Any],
|
|
159
|
+
context: PolicyContext,
|
|
160
|
+
*,
|
|
161
|
+
fallback: Any,
|
|
162
|
+
kind: RequestType,
|
|
163
|
+
) -> Any:
|
|
164
|
+
ordered = sorted(rules, key=lambda item: int(item.get("priority", 1000)))
|
|
165
|
+
for rule in ordered:
|
|
166
|
+
when = rule.get("when", {})
|
|
167
|
+
if not isinstance(when, dict):
|
|
168
|
+
continue
|
|
169
|
+
if _matches_rule(when, params, context, kind=kind):
|
|
170
|
+
return rule.get("decision", fallback)
|
|
171
|
+
return fallback
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class LlmRubricPolicyEngine:
|
|
175
|
+
"""Policy engine that asks an LLM judge using a temporary app-server thread."""
|
|
176
|
+
|
|
177
|
+
def __init__(
|
|
178
|
+
self,
|
|
179
|
+
*,
|
|
180
|
+
rubric: PolicyRubric | dict[str, Any] | None = None,
|
|
181
|
+
judge_config: PolicyJudgeConfig | None = None,
|
|
182
|
+
codex_command: str = "codex",
|
|
183
|
+
app_server_args: list[str] | None = None,
|
|
184
|
+
env: dict[str, str] | None = None,
|
|
185
|
+
process_cwd: str | None = None,
|
|
186
|
+
server_config_overrides: dict[str, Any] | None = None,
|
|
187
|
+
) -> None:
|
|
188
|
+
self.rubric = _to_rubric(rubric) if rubric is not None else _default_policy_rubric()
|
|
189
|
+
self.judge_config = judge_config or PolicyJudgeConfig()
|
|
190
|
+
self.codex_command = codex_command
|
|
191
|
+
self.app_server_args = app_server_args[:] if app_server_args else ["app-server"]
|
|
192
|
+
self.env = env
|
|
193
|
+
self.process_cwd = process_cwd
|
|
194
|
+
self.server_config_overrides = dict(server_config_overrides or {})
|
|
195
|
+
self._judge_client: AsyncCodexAgenticClient | None = None
|
|
196
|
+
self._runtime_loop: asyncio.AbstractEventLoop | None = None
|
|
197
|
+
self._judge_client_lock_ref: asyncio.Lock | None = None
|
|
198
|
+
self._judge_call_lock_ref: asyncio.Lock | None = None
|
|
199
|
+
|
|
200
|
+
def _ensure_runtime_locks(self) -> None:
|
|
201
|
+
loop = asyncio.get_running_loop()
|
|
202
|
+
if self._runtime_loop is None:
|
|
203
|
+
self._runtime_loop = loop
|
|
204
|
+
elif self._runtime_loop is not loop:
|
|
205
|
+
raise CodexAgenticError("Policy engine cannot be shared across different event loops.")
|
|
206
|
+
|
|
207
|
+
if self._judge_client_lock_ref is None:
|
|
208
|
+
self._judge_client_lock_ref = asyncio.Lock()
|
|
209
|
+
if self._judge_call_lock_ref is None:
|
|
210
|
+
self._judge_call_lock_ref = asyncio.Lock()
|
|
211
|
+
|
|
212
|
+
@property
|
|
213
|
+
def _judge_client_lock(self) -> asyncio.Lock:
|
|
214
|
+
if self._judge_client_lock_ref is None:
|
|
215
|
+
self._ensure_runtime_locks()
|
|
216
|
+
assert self._judge_client_lock_ref is not None
|
|
217
|
+
return self._judge_client_lock_ref
|
|
218
|
+
|
|
219
|
+
@property
|
|
220
|
+
def _judge_call_lock(self) -> asyncio.Lock:
|
|
221
|
+
if self._judge_call_lock_ref is None:
|
|
222
|
+
self._ensure_runtime_locks()
|
|
223
|
+
assert self._judge_call_lock_ref is not None
|
|
224
|
+
return self._judge_call_lock_ref
|
|
225
|
+
|
|
226
|
+
async def aclose(self) -> None:
|
|
227
|
+
"""Close the internal judge client if it exists."""
|
|
228
|
+
|
|
229
|
+
client: AsyncCodexAgenticClient | None
|
|
230
|
+
async with self._judge_client_lock:
|
|
231
|
+
client = self._judge_client
|
|
232
|
+
self._judge_client = None
|
|
233
|
+
if client is not None:
|
|
234
|
+
await client.close()
|
|
235
|
+
|
|
236
|
+
async def on_command_approval(self, params: dict[str, Any], context: PolicyContext) -> dict[str, Any]:
|
|
237
|
+
allowed = ["accept", "acceptForSession", "decline", "cancel"]
|
|
238
|
+
fallback = {"decision": self.judge_config.fallback_command_decision}
|
|
239
|
+
return await self._judge_or_fallback(
|
|
240
|
+
params=params,
|
|
241
|
+
context=context,
|
|
242
|
+
allowed_decisions=allowed,
|
|
243
|
+
fallback=fallback,
|
|
244
|
+
output_kind="command_or_file",
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
async def on_file_change_approval(self, params: dict[str, Any], context: PolicyContext) -> dict[str, Any]:
|
|
248
|
+
allowed = ["accept", "acceptForSession", "decline", "cancel"]
|
|
249
|
+
fallback = {"decision": self.judge_config.fallback_file_change_decision}
|
|
250
|
+
return await self._judge_or_fallback(
|
|
251
|
+
params=params,
|
|
252
|
+
context=context,
|
|
253
|
+
allowed_decisions=allowed,
|
|
254
|
+
fallback=fallback,
|
|
255
|
+
output_kind="command_or_file",
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
async def on_tool_request_user_input(self, params: dict[str, Any], context: PolicyContext) -> dict[str, Any]:
|
|
259
|
+
return await self._judge_or_fallback(
|
|
260
|
+
params=params,
|
|
261
|
+
context=context,
|
|
262
|
+
allowed_decisions=[],
|
|
263
|
+
fallback={"answers": {}},
|
|
264
|
+
output_kind="user_input",
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
async def _judge_or_fallback(
|
|
268
|
+
self,
|
|
269
|
+
*,
|
|
270
|
+
params: dict[str, Any],
|
|
271
|
+
context: PolicyContext,
|
|
272
|
+
allowed_decisions: list[str],
|
|
273
|
+
fallback: dict[str, Any],
|
|
274
|
+
output_kind: Literal["command_or_file", "user_input"],
|
|
275
|
+
) -> dict[str, Any]:
|
|
276
|
+
try:
|
|
277
|
+
payload = await asyncio.wait_for(
|
|
278
|
+
self._judge_with_llm(
|
|
279
|
+
params=params,
|
|
280
|
+
context=context,
|
|
281
|
+
allowed_decisions=allowed_decisions,
|
|
282
|
+
output_kind=output_kind,
|
|
283
|
+
),
|
|
284
|
+
timeout=self.judge_config.timeout_seconds,
|
|
285
|
+
)
|
|
286
|
+
except asyncio.CancelledError:
|
|
287
|
+
raise
|
|
288
|
+
except asyncio.TimeoutError:
|
|
289
|
+
return fallback
|
|
290
|
+
except (AppServerConnectionError, NotAuthenticatedError, CodexAgenticError):
|
|
291
|
+
return fallback
|
|
292
|
+
|
|
293
|
+
if output_kind == "command_or_file":
|
|
294
|
+
return _normalize_command_or_file_decision(payload, fallback=fallback["decision"])
|
|
295
|
+
return _normalize_user_input_decision(payload)
|
|
296
|
+
|
|
297
|
+
async def _judge_with_llm(
|
|
298
|
+
self,
|
|
299
|
+
*,
|
|
300
|
+
params: dict[str, Any],
|
|
301
|
+
context: PolicyContext,
|
|
302
|
+
allowed_decisions: list[str],
|
|
303
|
+
output_kind: Literal["command_or_file", "user_input"],
|
|
304
|
+
) -> dict[str, Any]:
|
|
305
|
+
input_payload = _build_judge_input(
|
|
306
|
+
params=params,
|
|
307
|
+
context=context,
|
|
308
|
+
include_params=self.judge_config.include_params,
|
|
309
|
+
max_params_chars=self.judge_config.max_params_chars,
|
|
310
|
+
)
|
|
311
|
+
prompt = _build_judge_prompt(
|
|
312
|
+
rubric=self.rubric.system_rubric,
|
|
313
|
+
context=context,
|
|
314
|
+
allowed_decisions=allowed_decisions,
|
|
315
|
+
input_payload=input_payload,
|
|
316
|
+
output_kind=output_kind,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
turn_params: dict[str, Any] = {
|
|
320
|
+
"outputSchema": _judge_output_schema(output_kind=output_kind, allowed_decisions=allowed_decisions),
|
|
321
|
+
}
|
|
322
|
+
if self.judge_config.summary is not None:
|
|
323
|
+
turn_params["summary"] = self.judge_config.summary
|
|
324
|
+
if self.judge_config.effort is not None:
|
|
325
|
+
turn_params["effort"] = self.judge_config.effort
|
|
326
|
+
if self.judge_config.model is not None:
|
|
327
|
+
turn_params["model"] = self.judge_config.model
|
|
328
|
+
|
|
329
|
+
try:
|
|
330
|
+
async with self._judge_call_lock:
|
|
331
|
+
judge_client = await self._ensure_judge_client()
|
|
332
|
+
# Each judge decision runs in a fresh ephemeral thread.
|
|
333
|
+
response = await judge_client.responses_create(
|
|
334
|
+
prompt=prompt,
|
|
335
|
+
thread_params={"ephemeral": True},
|
|
336
|
+
turn_params=turn_params,
|
|
337
|
+
)
|
|
338
|
+
except Exception:
|
|
339
|
+
await self.aclose()
|
|
340
|
+
raise
|
|
341
|
+
|
|
342
|
+
parsed = _parse_json_object(response.text)
|
|
343
|
+
if not isinstance(parsed, dict):
|
|
344
|
+
raise CodexAgenticError("LLM judge did not return a valid JSON object.")
|
|
345
|
+
return parsed
|
|
346
|
+
|
|
347
|
+
async def _ensure_judge_client(self) -> "AsyncCodexAgenticClient":
|
|
348
|
+
existing = self._judge_client
|
|
349
|
+
if existing is not None:
|
|
350
|
+
return existing
|
|
351
|
+
|
|
352
|
+
async with self._judge_client_lock:
|
|
353
|
+
existing = self._judge_client
|
|
354
|
+
if existing is not None:
|
|
355
|
+
return existing
|
|
356
|
+
|
|
357
|
+
from .factory import create_async_client
|
|
358
|
+
|
|
359
|
+
judge_client = create_async_client(
|
|
360
|
+
codex_command=self.codex_command,
|
|
361
|
+
app_server_args=self.app_server_args,
|
|
362
|
+
env=self.env,
|
|
363
|
+
process_cwd=self.process_cwd,
|
|
364
|
+
enable_web_search=self.judge_config.enable_web_search,
|
|
365
|
+
server_config_overrides=self.server_config_overrides or None,
|
|
366
|
+
default_turn_params={},
|
|
367
|
+
default_thread_params={},
|
|
368
|
+
)
|
|
369
|
+
try:
|
|
370
|
+
await judge_client.connect()
|
|
371
|
+
except Exception:
|
|
372
|
+
await judge_client.close()
|
|
373
|
+
raise
|
|
374
|
+
|
|
375
|
+
self._judge_client = judge_client
|
|
376
|
+
return judge_client
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def build_policy_engine_from_rubric(
|
|
380
|
+
rubric: PolicyRubric | dict[str, Any],
|
|
381
|
+
*,
|
|
382
|
+
judge_config: PolicyJudgeConfig | None = None,
|
|
383
|
+
codex_command: str = "codex",
|
|
384
|
+
app_server_args: list[str] | None = None,
|
|
385
|
+
env: dict[str, str] | None = None,
|
|
386
|
+
process_cwd: str | None = None,
|
|
387
|
+
server_config_overrides: dict[str, Any] | None = None,
|
|
388
|
+
) -> PolicyEngine:
|
|
389
|
+
normalized = _to_rubric(rubric)
|
|
390
|
+
if normalized.use_llm_judge:
|
|
391
|
+
return LlmRubricPolicyEngine(
|
|
392
|
+
rubric=normalized,
|
|
393
|
+
judge_config=judge_config,
|
|
394
|
+
codex_command=codex_command,
|
|
395
|
+
app_server_args=app_server_args,
|
|
396
|
+
env=env,
|
|
397
|
+
process_cwd=process_cwd,
|
|
398
|
+
server_config_overrides=server_config_overrides,
|
|
399
|
+
)
|
|
400
|
+
return RuleBasedPolicyEngine(rubric=normalized)
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def _to_rubric(rubric: PolicyRubric | dict[str, Any]) -> PolicyRubric:
|
|
404
|
+
if isinstance(rubric, PolicyRubric):
|
|
405
|
+
return rubric
|
|
406
|
+
data = dict(rubric)
|
|
407
|
+
return PolicyRubric(
|
|
408
|
+
system_rubric=str(data.get("system_rubric") or DEFAULT_POLICY_RUBRIC.system_rubric),
|
|
409
|
+
mode=str(data.get("mode") or "permissive"), # type: ignore[arg-type]
|
|
410
|
+
use_llm_judge=bool(data.get("use_llm_judge", False)),
|
|
411
|
+
command_rules=[item for item in data.get("command_rules", []) if isinstance(item, dict)],
|
|
412
|
+
file_change_rules=[item for item in data.get("file_change_rules", []) if isinstance(item, dict)],
|
|
413
|
+
tool_input_rules=[item for item in data.get("tool_input_rules", []) if isinstance(item, dict)],
|
|
414
|
+
defaults=data.get("defaults") if isinstance(data.get("defaults"), dict) else DEFAULT_POLICY_RUBRIC.defaults.copy(),
|
|
415
|
+
audit=data.get("audit") if isinstance(data.get("audit"), dict) else DEFAULT_POLICY_RUBRIC.audit.copy(),
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def _matches_rule(
|
|
420
|
+
when: dict[str, Any],
|
|
421
|
+
params: dict[str, Any],
|
|
422
|
+
context: PolicyContext,
|
|
423
|
+
*,
|
|
424
|
+
kind: RequestType,
|
|
425
|
+
) -> bool:
|
|
426
|
+
common_keys = {
|
|
427
|
+
"command_regex",
|
|
428
|
+
"cwd_prefix",
|
|
429
|
+
"reason_regex",
|
|
430
|
+
"thread_id_regex",
|
|
431
|
+
"turn_id_regex",
|
|
432
|
+
"has_proposed_execpolicy",
|
|
433
|
+
}
|
|
434
|
+
kind_specific_keys: dict[RequestType, set[str]] = {
|
|
435
|
+
"command": set(),
|
|
436
|
+
"file_change": {"grant_root_present"},
|
|
437
|
+
"tool_user_input": {"question_id_regex"},
|
|
438
|
+
}
|
|
439
|
+
allowed_keys = common_keys | kind_specific_keys[kind]
|
|
440
|
+
unknown_keys = sorted(key for key in when if key not in allowed_keys)
|
|
441
|
+
if unknown_keys:
|
|
442
|
+
names = ", ".join(unknown_keys)
|
|
443
|
+
raise CodexAgenticError(f"Unsupported policy rule fields for '{kind}': {names}")
|
|
444
|
+
|
|
445
|
+
command = str(params.get("command") or "")
|
|
446
|
+
cwd = str(params.get("cwd") or "")
|
|
447
|
+
reason = str(params.get("reason") or "")
|
|
448
|
+
proposed = params.get("proposedExecpolicyAmendment")
|
|
449
|
+
has_proposed = isinstance(proposed, list) and len(proposed) > 0
|
|
450
|
+
|
|
451
|
+
if "command_regex" in when:
|
|
452
|
+
pattern = str(when["command_regex"])
|
|
453
|
+
if not re.search(pattern, command):
|
|
454
|
+
return False
|
|
455
|
+
if "cwd_prefix" in when:
|
|
456
|
+
prefix = str(when["cwd_prefix"])
|
|
457
|
+
if not cwd.startswith(prefix):
|
|
458
|
+
return False
|
|
459
|
+
if "reason_regex" in when:
|
|
460
|
+
pattern = str(when["reason_regex"])
|
|
461
|
+
if not re.search(pattern, reason):
|
|
462
|
+
return False
|
|
463
|
+
if "thread_id_regex" in when:
|
|
464
|
+
pattern = str(when["thread_id_regex"])
|
|
465
|
+
if not re.search(pattern, context.thread_id or ""):
|
|
466
|
+
return False
|
|
467
|
+
if "turn_id_regex" in when:
|
|
468
|
+
pattern = str(when["turn_id_regex"])
|
|
469
|
+
if not re.search(pattern, context.turn_id or ""):
|
|
470
|
+
return False
|
|
471
|
+
if "has_proposed_execpolicy" in when:
|
|
472
|
+
expected = bool(when["has_proposed_execpolicy"])
|
|
473
|
+
if expected != has_proposed:
|
|
474
|
+
return False
|
|
475
|
+
if kind == "file_change" and "grant_root_present" in when:
|
|
476
|
+
expected = bool(when["grant_root_present"])
|
|
477
|
+
present = bool(params.get("grantRoot"))
|
|
478
|
+
if expected != present:
|
|
479
|
+
return False
|
|
480
|
+
if kind == "tool_user_input" and "question_id_regex" in when:
|
|
481
|
+
pattern = str(when["question_id_regex"])
|
|
482
|
+
questions = params.get("questions")
|
|
483
|
+
if not isinstance(questions, list):
|
|
484
|
+
return False
|
|
485
|
+
matched = False
|
|
486
|
+
for question in questions:
|
|
487
|
+
if not isinstance(question, dict):
|
|
488
|
+
continue
|
|
489
|
+
qid = question.get("id")
|
|
490
|
+
if isinstance(qid, str) and re.search(pattern, qid):
|
|
491
|
+
matched = True
|
|
492
|
+
break
|
|
493
|
+
if not matched:
|
|
494
|
+
return False
|
|
495
|
+
return True
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def _normalize_command_or_file_decision(decision: Any, *, fallback: str) -> dict[str, Any]:
|
|
499
|
+
valid = {"accept", "acceptForSession", "decline", "cancel"}
|
|
500
|
+
if isinstance(decision, dict):
|
|
501
|
+
payload = dict(decision)
|
|
502
|
+
value = payload.get("decision")
|
|
503
|
+
if isinstance(value, str) and value in valid:
|
|
504
|
+
return payload
|
|
505
|
+
if isinstance(value, dict) and "acceptWithExecpolicyAmendment" in value:
|
|
506
|
+
return payload
|
|
507
|
+
raise CodexAgenticError("Policy decision dict must contain a valid 'decision'.")
|
|
508
|
+
if isinstance(decision, str):
|
|
509
|
+
if decision == "auto_empty":
|
|
510
|
+
return {"decision": fallback}
|
|
511
|
+
if decision in valid:
|
|
512
|
+
return {"decision": decision}
|
|
513
|
+
raise CodexAgenticError("Policy decision string must be one of: accept, acceptForSession, decline, cancel.")
|
|
514
|
+
raise CodexAgenticError("Policy decision must be a dict or string.")
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def _normalize_user_input_decision(decision: Any) -> dict[str, Any]:
|
|
518
|
+
if isinstance(decision, dict):
|
|
519
|
+
answers = decision.get("answers")
|
|
520
|
+
if isinstance(answers, list):
|
|
521
|
+
mapped: dict[str, dict[str, list[str]]] = {}
|
|
522
|
+
for row in answers:
|
|
523
|
+
if not isinstance(row, dict):
|
|
524
|
+
continue
|
|
525
|
+
qid = row.get("id")
|
|
526
|
+
values = row.get("answers")
|
|
527
|
+
if not isinstance(qid, str):
|
|
528
|
+
continue
|
|
529
|
+
if not isinstance(values, list):
|
|
530
|
+
continue
|
|
531
|
+
normalized_values = [value for value in values if isinstance(value, str)]
|
|
532
|
+
mapped[qid] = {"answers": normalized_values}
|
|
533
|
+
return {"answers": mapped}
|
|
534
|
+
if isinstance(answers, dict):
|
|
535
|
+
return {"answers": answers}
|
|
536
|
+
return {"answers": {}}
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
def _build_judge_input(
|
|
540
|
+
*,
|
|
541
|
+
params: dict[str, Any],
|
|
542
|
+
context: PolicyContext,
|
|
543
|
+
include_params: bool,
|
|
544
|
+
max_params_chars: int,
|
|
545
|
+
) -> dict[str, Any]:
|
|
546
|
+
payload: dict[str, Any] = {
|
|
547
|
+
"request_type": context.request_type,
|
|
548
|
+
"thread_id": context.thread_id,
|
|
549
|
+
"turn_id": context.turn_id,
|
|
550
|
+
"item_id": context.item_id,
|
|
551
|
+
}
|
|
552
|
+
if include_params:
|
|
553
|
+
params_json = json.dumps(params, ensure_ascii=False)
|
|
554
|
+
budget = max(0, int(max_params_chars))
|
|
555
|
+
if len(params_json) > budget:
|
|
556
|
+
if budget < 4:
|
|
557
|
+
params_json = params_json[:budget]
|
|
558
|
+
else:
|
|
559
|
+
params_json = params_json[: budget - 3] + "..."
|
|
560
|
+
payload["params_json"] = params_json
|
|
561
|
+
return payload
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def _build_judge_prompt(
|
|
565
|
+
*,
|
|
566
|
+
rubric: str,
|
|
567
|
+
context: PolicyContext,
|
|
568
|
+
allowed_decisions: list[str],
|
|
569
|
+
input_payload: dict[str, Any],
|
|
570
|
+
output_kind: Literal["command_or_file", "user_input"],
|
|
571
|
+
) -> str:
|
|
572
|
+
if output_kind == "command_or_file":
|
|
573
|
+
decision_text = ", ".join(allowed_decisions)
|
|
574
|
+
return (
|
|
575
|
+
"You are a policy judge.\n"
|
|
576
|
+
f"Rubric:\n{rubric}\n\n"
|
|
577
|
+
f"Request method: {context.method}\n"
|
|
578
|
+
f"Allowed decisions: {decision_text}\n"
|
|
579
|
+
"Return only JSON object: {\"decision\":\"...\",\"reason\":\"...\"}\n\n"
|
|
580
|
+
f"Input:\n{json.dumps(input_payload, ensure_ascii=False, indent=2)}"
|
|
581
|
+
)
|
|
582
|
+
return (
|
|
583
|
+
"You are a policy judge for requestUserInput.\n"
|
|
584
|
+
f"Rubric:\n{rubric}\n\n"
|
|
585
|
+
f"Request method: {context.method}\n"
|
|
586
|
+
"Return only JSON object with shape: {\"answers\": [{\"id\":\"<question_id>\",\"answers\":[\"...\"]}]}\n"
|
|
587
|
+
"If no confident answer, return {\"answers\": []}.\n\n"
|
|
588
|
+
f"Input:\n{json.dumps(input_payload, ensure_ascii=False, indent=2)}"
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def _judge_output_schema(
|
|
593
|
+
*,
|
|
594
|
+
output_kind: Literal["command_or_file", "user_input"],
|
|
595
|
+
allowed_decisions: list[str],
|
|
596
|
+
) -> dict[str, Any]:
|
|
597
|
+
if output_kind == "command_or_file":
|
|
598
|
+
return {
|
|
599
|
+
"type": "object",
|
|
600
|
+
"properties": {
|
|
601
|
+
"decision": {"type": "string", "enum": allowed_decisions},
|
|
602
|
+
"reason": {"type": "string"},
|
|
603
|
+
},
|
|
604
|
+
"required": ["decision", "reason"],
|
|
605
|
+
"additionalProperties": False,
|
|
606
|
+
}
|
|
607
|
+
return {
|
|
608
|
+
"type": "object",
|
|
609
|
+
"properties": {
|
|
610
|
+
"answers": {
|
|
611
|
+
"type": "array",
|
|
612
|
+
"items": {
|
|
613
|
+
"type": "object",
|
|
614
|
+
"properties": {
|
|
615
|
+
"id": {"type": "string"},
|
|
616
|
+
"answers": {"type": "array", "items": {"type": "string"}},
|
|
617
|
+
},
|
|
618
|
+
"required": ["id", "answers"],
|
|
619
|
+
"additionalProperties": False,
|
|
620
|
+
},
|
|
621
|
+
}
|
|
622
|
+
},
|
|
623
|
+
"required": ["answers"],
|
|
624
|
+
"additionalProperties": False,
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
def _parse_json_object(text: str) -> dict[str, Any] | None:
|
|
629
|
+
value = first_nonempty_text(text)
|
|
630
|
+
if not value:
|
|
631
|
+
return None
|
|
632
|
+
try:
|
|
633
|
+
parsed = json.loads(value)
|
|
634
|
+
except json.JSONDecodeError:
|
|
635
|
+
return None
|
|
636
|
+
return parsed if isinstance(parsed, dict) else None
|