codexapi 0.7.0__tar.gz → 0.7.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codexapi-0.7.0/src/codexapi.egg-info → codexapi-0.7.2}/PKG-INFO +15 -5
- {codexapi-0.7.0 → codexapi-0.7.2}/README.md +14 -4
- {codexapi-0.7.0 → codexapi-0.7.2}/pyproject.toml +1 -1
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/__init__.py +1 -1
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/agent.py +15 -6
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/cli.py +42 -1
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/ralph.py +17 -2
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/science.py +91 -3
- {codexapi-0.7.0 → codexapi-0.7.2/src/codexapi.egg-info}/PKG-INFO +15 -5
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi.egg-info/SOURCES.txt +1 -0
- codexapi-0.7.2/tests/test_science.py +97 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/LICENSE +0 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/setup.cfg +0 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/__main__.py +0 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/foreach.py +0 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/gh_integration.py +0 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/lead.py +0 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/pushover.py +0 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/rate_limits.py +0 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/task.py +0 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/taskfile.py +0 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/welfare.py +0 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi.egg-info/dependency_links.txt +0 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi.egg-info/entry_points.txt +0 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi.egg-info/requires.txt +0 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi.egg-info/top_level.txt +0 -0
- {codexapi-0.7.0 → codexapi-0.7.2}/tests/test_task_progress.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: codexapi
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.2
|
|
4
4
|
Summary: Minimal Python API for running the Codex CLI.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: codex,agent,cli,openai
|
|
@@ -130,6 +130,7 @@ codexapi run --thread-id THREAD_ID --print-thread-id "Continue where we left off
|
|
|
130
130
|
```
|
|
131
131
|
|
|
132
132
|
Use `--no-yolo` to run Codex with `--full-auto` instead.
|
|
133
|
+
Use `--include-thinking` to return all agent messages joined together for `codexapi run`.
|
|
133
134
|
|
|
134
135
|
Lead mode periodically checks in on a long-running agent session with the
|
|
135
136
|
current time and prints JSON status updates. The agent controls the loop by
|
|
@@ -174,17 +175,23 @@ codexapi ralph --cancel --cwd /path/to/project
|
|
|
174
175
|
Science mode wraps a short task in a science prompt and runs it through the
|
|
175
176
|
Ralph loop. It defaults to `--yolo` and expects progress notes in `SCIENCE.md`.
|
|
176
177
|
Each iteration appends the agent output to `LOGBOOK.md` and the runner extracts
|
|
177
|
-
any improved figures of merit for optional notifications.
|
|
178
|
+
any improved figures of merit for optional notifications. You can also set
|
|
179
|
+
`--max-duration` to stop after the current iteration once a time limit is hit.
|
|
180
|
+
The default science wrapper also tells the agent to create/use a local git
|
|
181
|
+
branch when in a repo and make local commits for worthwhile improvements, while
|
|
182
|
+
never committing or resetting `LOGBOOK.md` or `SCIENCE.md`.
|
|
178
183
|
|
|
179
184
|
```bash
|
|
180
185
|
codexapi science "hyper-optimize the kernel cycles"
|
|
181
186
|
codexapi science --no-yolo "hyper-optimize the kernel cycles" --max-iterations 3
|
|
187
|
+
codexapi science "hyper-optimize the kernel cycles" --max-duration 90m
|
|
182
188
|
```
|
|
183
189
|
|
|
184
190
|
Optional Pushover notifications: create `~/.pushover` with two non-empty lines.
|
|
185
191
|
Line 1 is your user or group key, line 2 is the app API token. When this file
|
|
186
192
|
exists, Science will send a notification whenever it detects a new best result,
|
|
187
|
-
including the metric values and percent improvement
|
|
193
|
+
including the metric values and percent improvement, plus a final run-end status.
|
|
194
|
+
Task runs will also send a
|
|
188
195
|
✅/❌ notification with the task summary. Lead runs send a notification when the
|
|
189
196
|
loop stops.
|
|
190
197
|
|
|
@@ -199,7 +206,7 @@ codexapi foreach list.txt task.yaml --retry-all
|
|
|
199
206
|
|
|
200
207
|
## API
|
|
201
208
|
|
|
202
|
-
### `agent(prompt, cwd=None, yolo=True, flags=None) -> str`
|
|
209
|
+
### `agent(prompt, cwd=None, yolo=True, flags=None, include_thinking=False) -> str`
|
|
203
210
|
|
|
204
211
|
Runs a single Codex turn and returns only the agent's message. Any reasoning
|
|
205
212
|
items are filtered out.
|
|
@@ -208,8 +215,9 @@ items are filtered out.
|
|
|
208
215
|
- `cwd` (str | PathLike | None): working directory for the Codex session.
|
|
209
216
|
- `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
|
|
210
217
|
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
218
|
+
- `include_thinking` (bool): when true, return all agent messages joined.
|
|
211
219
|
|
|
212
|
-
### `Agent(cwd=None, yolo=True, thread_id=None, flags=None, welfare=False)`
|
|
220
|
+
### `Agent(cwd=None, yolo=True, thread_id=None, flags=None, welfare=False, include_thinking=False)`
|
|
213
221
|
|
|
214
222
|
Creates a stateful session wrapper. Calling the instance sends the prompt into
|
|
215
223
|
the same conversation and returns only the agent's message.
|
|
@@ -220,6 +228,7 @@ the same conversation and returns only the agent's message.
|
|
|
220
228
|
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
221
229
|
- `welfare` (bool): when true, append welfare stop instructions to each prompt
|
|
222
230
|
and raise `WelfareStop` if the agent outputs `MAKE IT STOP`.
|
|
231
|
+
- `include_thinking` (bool): when true, return all agent messages joined.
|
|
223
232
|
|
|
224
233
|
### `lead(minutes, prompt, cwd=None, yolo=True, flags=None, leadbook=None) -> dict`
|
|
225
234
|
|
|
@@ -305,6 +314,7 @@ Simple result object returned by `foreach()`.
|
|
|
305
314
|
## Behavior notes
|
|
306
315
|
|
|
307
316
|
- Uses `codex exec --json` and parses JSONL events for `agent_message` items.
|
|
317
|
+
- Returns the last `agent_message` by default; set `include_thinking=True` to join all messages.
|
|
308
318
|
- Automatically passes `--skip-git-repo-check` so it can run outside a git repo.
|
|
309
319
|
- Passes `--yolo` by default (use `--no-yolo` or `yolo=False` for `--full-auto`).
|
|
310
320
|
- Raises `RuntimeError` if Codex exits non-zero or returns no agent message.
|
|
@@ -115,6 +115,7 @@ codexapi run --thread-id THREAD_ID --print-thread-id "Continue where we left off
|
|
|
115
115
|
```
|
|
116
116
|
|
|
117
117
|
Use `--no-yolo` to run Codex with `--full-auto` instead.
|
|
118
|
+
Use `--include-thinking` to return all agent messages joined together for `codexapi run`.
|
|
118
119
|
|
|
119
120
|
Lead mode periodically checks in on a long-running agent session with the
|
|
120
121
|
current time and prints JSON status updates. The agent controls the loop by
|
|
@@ -159,17 +160,23 @@ codexapi ralph --cancel --cwd /path/to/project
|
|
|
159
160
|
Science mode wraps a short task in a science prompt and runs it through the
|
|
160
161
|
Ralph loop. It defaults to `--yolo` and expects progress notes in `SCIENCE.md`.
|
|
161
162
|
Each iteration appends the agent output to `LOGBOOK.md` and the runner extracts
|
|
162
|
-
any improved figures of merit for optional notifications.
|
|
163
|
+
any improved figures of merit for optional notifications. You can also set
|
|
164
|
+
`--max-duration` to stop after the current iteration once a time limit is hit.
|
|
165
|
+
The default science wrapper also tells the agent to create/use a local git
|
|
166
|
+
branch when in a repo and make local commits for worthwhile improvements, while
|
|
167
|
+
never committing or resetting `LOGBOOK.md` or `SCIENCE.md`.
|
|
163
168
|
|
|
164
169
|
```bash
|
|
165
170
|
codexapi science "hyper-optimize the kernel cycles"
|
|
166
171
|
codexapi science --no-yolo "hyper-optimize the kernel cycles" --max-iterations 3
|
|
172
|
+
codexapi science "hyper-optimize the kernel cycles" --max-duration 90m
|
|
167
173
|
```
|
|
168
174
|
|
|
169
175
|
Optional Pushover notifications: create `~/.pushover` with two non-empty lines.
|
|
170
176
|
Line 1 is your user or group key, line 2 is the app API token. When this file
|
|
171
177
|
exists, Science will send a notification whenever it detects a new best result,
|
|
172
|
-
including the metric values and percent improvement
|
|
178
|
+
including the metric values and percent improvement, plus a final run-end status.
|
|
179
|
+
Task runs will also send a
|
|
173
180
|
✅/❌ notification with the task summary. Lead runs send a notification when the
|
|
174
181
|
loop stops.
|
|
175
182
|
|
|
@@ -184,7 +191,7 @@ codexapi foreach list.txt task.yaml --retry-all
|
|
|
184
191
|
|
|
185
192
|
## API
|
|
186
193
|
|
|
187
|
-
### `agent(prompt, cwd=None, yolo=True, flags=None) -> str`
|
|
194
|
+
### `agent(prompt, cwd=None, yolo=True, flags=None, include_thinking=False) -> str`
|
|
188
195
|
|
|
189
196
|
Runs a single Codex turn and returns only the agent's message. Any reasoning
|
|
190
197
|
items are filtered out.
|
|
@@ -193,8 +200,9 @@ items are filtered out.
|
|
|
193
200
|
- `cwd` (str | PathLike | None): working directory for the Codex session.
|
|
194
201
|
- `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
|
|
195
202
|
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
203
|
+
- `include_thinking` (bool): when true, return all agent messages joined.
|
|
196
204
|
|
|
197
|
-
### `Agent(cwd=None, yolo=True, thread_id=None, flags=None, welfare=False)`
|
|
205
|
+
### `Agent(cwd=None, yolo=True, thread_id=None, flags=None, welfare=False, include_thinking=False)`
|
|
198
206
|
|
|
199
207
|
Creates a stateful session wrapper. Calling the instance sends the prompt into
|
|
200
208
|
the same conversation and returns only the agent's message.
|
|
@@ -205,6 +213,7 @@ the same conversation and returns only the agent's message.
|
|
|
205
213
|
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
206
214
|
- `welfare` (bool): when true, append welfare stop instructions to each prompt
|
|
207
215
|
and raise `WelfareStop` if the agent outputs `MAKE IT STOP`.
|
|
216
|
+
- `include_thinking` (bool): when true, return all agent messages joined.
|
|
208
217
|
|
|
209
218
|
### `lead(minutes, prompt, cwd=None, yolo=True, flags=None, leadbook=None) -> dict`
|
|
210
219
|
|
|
@@ -290,6 +299,7 @@ Simple result object returned by `foreach()`.
|
|
|
290
299
|
## Behavior notes
|
|
291
300
|
|
|
292
301
|
- Uses `codex exec --json` and parses JSONL events for `agent_message` items.
|
|
302
|
+
- Returns the last `agent_message` by default; set `include_thinking=True` to join all messages.
|
|
293
303
|
- Automatically passes `--skip-git-repo-check` so it can run outside a git repo.
|
|
294
304
|
- Passes `--yolo` by default (use `--no-yolo` or `yolo=False` for `--full-auto`).
|
|
295
305
|
- Raises `RuntimeError` if Codex exits non-zero or returns no agent message.
|
|
@@ -10,7 +10,7 @@ from . import welfare
|
|
|
10
10
|
_CODEX_BIN = os.environ.get("CODEX_BIN", "codex")
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
def agent(prompt, cwd=None, yolo=True, flags=None):
|
|
13
|
+
def agent(prompt, cwd=None, yolo=True, flags=None, include_thinking=False):
|
|
14
14
|
"""Run a single Codex turn and return only the agent's message.
|
|
15
15
|
|
|
16
16
|
Args:
|
|
@@ -18,11 +18,14 @@ def agent(prompt, cwd=None, yolo=True, flags=None):
|
|
|
18
18
|
cwd: Optional working directory for the Codex session.
|
|
19
19
|
yolo: Whether to pass --yolo to Codex.
|
|
20
20
|
flags: Additional raw CLI flags to pass to Codex.
|
|
21
|
+
include_thinking: When true, return all agent messages joined together.
|
|
21
22
|
|
|
22
23
|
Returns:
|
|
23
24
|
The agent's visible response text with reasoning traces removed.
|
|
24
25
|
"""
|
|
25
|
-
message, _thread_id = _run_codex(
|
|
26
|
+
message, _thread_id = _run_codex(
|
|
27
|
+
prompt, cwd, None, yolo, flags, include_thinking
|
|
28
|
+
)
|
|
26
29
|
return message
|
|
27
30
|
|
|
28
31
|
|
|
@@ -51,6 +54,7 @@ class Agent:
|
|
|
51
54
|
thread_id=None,
|
|
52
55
|
flags=None,
|
|
53
56
|
welfare=False,
|
|
57
|
+
include_thinking=False,
|
|
54
58
|
):
|
|
55
59
|
"""Create a new session wrapper.
|
|
56
60
|
|
|
@@ -62,11 +66,13 @@ class Agent:
|
|
|
62
66
|
flags: Additional raw CLI flags to pass to Codex.
|
|
63
67
|
welfare: When true, append welfare stop instructions to each prompt
|
|
64
68
|
and raise WelfareStop if the agent outputs MAKE IT STOP.
|
|
69
|
+
include_thinking: When true, return all agent messages joined together.
|
|
65
70
|
"""
|
|
66
71
|
self.cwd = cwd
|
|
67
72
|
self._yolo = yolo
|
|
68
73
|
self._flags = flags
|
|
69
74
|
self._welfare = welfare
|
|
75
|
+
self._include_thinking = include_thinking
|
|
70
76
|
self.thread_id = thread_id
|
|
71
77
|
|
|
72
78
|
def __call__(self, prompt):
|
|
@@ -79,6 +85,7 @@ class Agent:
|
|
|
79
85
|
self.thread_id,
|
|
80
86
|
self._yolo,
|
|
81
87
|
self._flags,
|
|
88
|
+
self._include_thinking,
|
|
82
89
|
)
|
|
83
90
|
if thread_id:
|
|
84
91
|
self.thread_id = thread_id
|
|
@@ -87,7 +94,7 @@ class Agent:
|
|
|
87
94
|
return message
|
|
88
95
|
|
|
89
96
|
|
|
90
|
-
def _run_codex(prompt, cwd, thread_id, yolo, flags):
|
|
97
|
+
def _run_codex(prompt, cwd, thread_id, yolo, flags, include_thinking):
|
|
91
98
|
"""Invoke the Codex CLI and return the message plus thread id (if any)."""
|
|
92
99
|
command = [
|
|
93
100
|
_CODEX_BIN,
|
|
@@ -124,10 +131,10 @@ def _run_codex(prompt, cwd, thread_id, yolo, flags):
|
|
|
124
131
|
msg = f"{msg}\n{stderr}"
|
|
125
132
|
raise RuntimeError(msg)
|
|
126
133
|
|
|
127
|
-
return _parse_jsonl(result.stdout)
|
|
134
|
+
return _parse_jsonl(result.stdout, include_thinking)
|
|
128
135
|
|
|
129
136
|
|
|
130
|
-
def _parse_jsonl(output):
|
|
137
|
+
def _parse_jsonl(output, include_thinking):
|
|
131
138
|
"""Extract agent messages and the latest thread id from Codex JSONL output."""
|
|
132
139
|
thread_id = None
|
|
133
140
|
messages = []
|
|
@@ -161,4 +168,6 @@ def _parse_jsonl(output):
|
|
|
161
168
|
"Codex returned no agent message. Raw output:\n" + fallback
|
|
162
169
|
)
|
|
163
170
|
|
|
164
|
-
|
|
171
|
+
if include_thinking:
|
|
172
|
+
return "\n\n".join(messages), thread_id
|
|
173
|
+
return messages[-1], thread_id
|
|
@@ -24,6 +24,7 @@ from .lead import lead
|
|
|
24
24
|
_SESSION_ID_RE = re.compile(
|
|
25
25
|
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
|
|
26
26
|
)
|
|
27
|
+
_DURATION_RE = re.compile(r"^\s*(\d+(?:\.\d+)?)([smhdSMHD]?)\s*$")
|
|
27
28
|
_TAIL_BYTES = 256 * 1024
|
|
28
29
|
_TAIL_MAX_BYTES = 4 * 1024 * 1024
|
|
29
30
|
_TAIL_MIN_LINES = 200
|
|
@@ -92,6 +93,25 @@ def _read_prompt(prompt):
|
|
|
92
93
|
return data
|
|
93
94
|
|
|
94
95
|
|
|
96
|
+
def _parse_duration_seconds(value, flag_name):
|
|
97
|
+
if value is None:
|
|
98
|
+
return 0.0
|
|
99
|
+
text = str(value).strip()
|
|
100
|
+
if not text:
|
|
101
|
+
raise SystemExit(f"{flag_name} cannot be empty.")
|
|
102
|
+
match = _DURATION_RE.match(text)
|
|
103
|
+
if not match:
|
|
104
|
+
raise SystemExit(
|
|
105
|
+
f"{flag_name} must be a number with optional unit s/m/h/d (example: 90m)."
|
|
106
|
+
)
|
|
107
|
+
amount = float(match.group(1))
|
|
108
|
+
unit = (match.group(2) or "m").lower()
|
|
109
|
+
if amount < 0:
|
|
110
|
+
raise SystemExit(f"{flag_name} must be >= 0.")
|
|
111
|
+
multiplier = {"s": 1, "m": 60, "h": 3600, "d": 86400}[unit]
|
|
112
|
+
return amount * multiplier
|
|
113
|
+
|
|
114
|
+
|
|
95
115
|
def _read_prompt_file(path):
|
|
96
116
|
if not path or not str(path).strip():
|
|
97
117
|
raise SystemExit("Prompt file path is empty.")
|
|
@@ -1026,6 +1046,8 @@ def main(argv=None):
|
|
|
1026
1046
|
"Science mode (science command):\n"
|
|
1027
1047
|
" Wraps your short task in a science prompt and runs it via the Ralph loop.\n"
|
|
1028
1048
|
" Default uses --yolo. Use --no-yolo to run --full-auto instead.\n"
|
|
1049
|
+
" Optional --max-duration stops before starting the next iteration once\n"
|
|
1050
|
+
" the duration limit is reached (e.g. 90m, 2h, 45s; default unit is minutes).\n"
|
|
1029
1051
|
)
|
|
1030
1052
|
parser = argparse.ArgumentParser(
|
|
1031
1053
|
prog="codexapi",
|
|
@@ -1053,6 +1075,11 @@ def main(argv=None):
|
|
|
1053
1075
|
"--flags",
|
|
1054
1076
|
help="Additional raw CLI flags to pass to Codex (quoted as needed).",
|
|
1055
1077
|
)
|
|
1078
|
+
run_parser.add_argument(
|
|
1079
|
+
"--include-thinking",
|
|
1080
|
+
action="store_true",
|
|
1081
|
+
help="Return all agent messages joined together.",
|
|
1082
|
+
)
|
|
1056
1083
|
|
|
1057
1084
|
lead_parser = subparsers.add_parser(
|
|
1058
1085
|
"lead",
|
|
@@ -1250,6 +1277,13 @@ def main(argv=None):
|
|
|
1250
1277
|
default=0,
|
|
1251
1278
|
help="Max iterations for the loop (0 means unlimited).",
|
|
1252
1279
|
)
|
|
1280
|
+
science_parser.add_argument(
|
|
1281
|
+
"--max-duration",
|
|
1282
|
+
help=(
|
|
1283
|
+
"Maximum loop runtime. Stops after the current iteration when reached. "
|
|
1284
|
+
"Accepts s/m/h/d units (e.g. 90m, 2h, 45s); default unit is minutes."
|
|
1285
|
+
),
|
|
1286
|
+
)
|
|
1253
1287
|
science_parser.add_argument(
|
|
1254
1288
|
"--cancel",
|
|
1255
1289
|
action="store_true",
|
|
@@ -1435,6 +1469,8 @@ def main(argv=None):
|
|
|
1435
1469
|
)
|
|
1436
1470
|
if args.max_iterations != 0:
|
|
1437
1471
|
raise SystemExit("--max-iterations is not allowed with --cancel.")
|
|
1472
|
+
if args.max_duration:
|
|
1473
|
+
raise SystemExit("--max-duration is not allowed with --cancel.")
|
|
1438
1474
|
print(cancel_ralph_loop(args.cwd))
|
|
1439
1475
|
return
|
|
1440
1476
|
if args.ralph_fresh is None:
|
|
@@ -1574,6 +1610,7 @@ def main(argv=None):
|
|
|
1574
1610
|
if args.command == "science":
|
|
1575
1611
|
if args.max_iterations < 0:
|
|
1576
1612
|
raise SystemExit("--max-iterations must be >= 0.")
|
|
1613
|
+
max_duration_seconds = _parse_duration_seconds(args.max_duration, "--max-duration")
|
|
1577
1614
|
Science(
|
|
1578
1615
|
prompt,
|
|
1579
1616
|
args.cwd,
|
|
@@ -1582,6 +1619,7 @@ def main(argv=None):
|
|
|
1582
1619
|
args.max_iterations,
|
|
1583
1620
|
args.completion_promise,
|
|
1584
1621
|
args.ralph_fresh,
|
|
1622
|
+
max_duration_seconds,
|
|
1585
1623
|
)()
|
|
1586
1624
|
return
|
|
1587
1625
|
if args.command == "lead":
|
|
@@ -1637,12 +1675,15 @@ def main(argv=None):
|
|
|
1637
1675
|
args.yolo,
|
|
1638
1676
|
args.thread_id,
|
|
1639
1677
|
args.flags,
|
|
1678
|
+
include_thinking=args.include_thinking,
|
|
1640
1679
|
)
|
|
1641
1680
|
message = session(prompt)
|
|
1642
1681
|
if args.print_thread_id:
|
|
1643
1682
|
print(f"thread_id={session.thread_id}", file=sys.stderr)
|
|
1644
1683
|
else:
|
|
1645
|
-
message = agent(
|
|
1684
|
+
message = agent(
|
|
1685
|
+
prompt, args.cwd, args.yolo, args.flags, args.include_thinking
|
|
1686
|
+
)
|
|
1646
1687
|
|
|
1647
1688
|
if message is not None:
|
|
1648
1689
|
print(message)
|
|
@@ -41,6 +41,7 @@ class Ralph:
|
|
|
41
41
|
self.max_iterations = max_iterations
|
|
42
42
|
self.completion_promise = completion_promise
|
|
43
43
|
self.fresh = fresh
|
|
44
|
+
self.include_thinking = True
|
|
44
45
|
|
|
45
46
|
def hook_before_loop(self):
|
|
46
47
|
"""Hook called once before the loop starts."""
|
|
@@ -156,9 +157,23 @@ class Ralph:
|
|
|
156
157
|
self.hook_before_iteration(iteration)
|
|
157
158
|
|
|
158
159
|
if self.fresh:
|
|
159
|
-
runner = Agent(
|
|
160
|
+
runner = Agent(
|
|
161
|
+
self.cwd,
|
|
162
|
+
self.yolo,
|
|
163
|
+
None,
|
|
164
|
+
self.flags,
|
|
165
|
+
welfare=True,
|
|
166
|
+
include_thinking=self.include_thinking,
|
|
167
|
+
)
|
|
160
168
|
elif runner is None:
|
|
161
|
-
runner = Agent(
|
|
169
|
+
runner = Agent(
|
|
170
|
+
self.cwd,
|
|
171
|
+
self.yolo,
|
|
172
|
+
None,
|
|
173
|
+
self.flags,
|
|
174
|
+
welfare=True,
|
|
175
|
+
include_thinking=self.include_thinking,
|
|
176
|
+
)
|
|
162
177
|
|
|
163
178
|
prompt = self.build_prompt(iteration)
|
|
164
179
|
stopped = False
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
5
|
import sys
|
|
6
|
+
import time
|
|
6
7
|
from datetime import datetime, timezone
|
|
7
8
|
|
|
8
9
|
from .agent import agent
|
|
@@ -29,7 +30,10 @@ _SCIENCE_TEMPLATE_B = (
|
|
|
29
30
|
"Try your best and have fun with this one! If you "
|
|
30
31
|
"think of several options, pick one and run with it - I will not be available "
|
|
31
32
|
"to make decisions for you, I give you my full permission to explore and make "
|
|
32
|
-
"your own best judgement towards our goal!
|
|
33
|
+
"your own best judgement towards our goal! If you are in a git repository, "
|
|
34
|
+
"create and use a local branch for this run. Make local commits for improvements "
|
|
35
|
+
"worth keeping, but never commit or reset LOGBOOK.md or SCIENCE.md. "
|
|
36
|
+
"Remember to update SCIENCE.md. "
|
|
33
37
|
"Good hunting!"
|
|
34
38
|
)
|
|
35
39
|
_LOGBOOK_NAME = "LOGBOOK.md"
|
|
@@ -97,7 +101,10 @@ class Science(Ralph):
|
|
|
97
101
|
max_iterations=0,
|
|
98
102
|
completion_promise=None,
|
|
99
103
|
fresh=True,
|
|
104
|
+
max_duration_seconds=0,
|
|
100
105
|
):
|
|
106
|
+
if max_duration_seconds < 0:
|
|
107
|
+
raise ValueError("max_duration_seconds must be >= 0")
|
|
101
108
|
self._task = task.strip() if isinstance(task, str) else task
|
|
102
109
|
prompt_a, prompt_b = _science_parts(task)
|
|
103
110
|
prompt = f"{prompt_a}{prompt_b}"
|
|
@@ -110,17 +117,27 @@ class Science(Ralph):
|
|
|
110
117
|
completion_promise,
|
|
111
118
|
fresh,
|
|
112
119
|
)
|
|
120
|
+
self.include_thinking = True
|
|
113
121
|
self._prompt_a = prompt_a
|
|
114
122
|
self._prompt_b = prompt_b
|
|
115
123
|
self._logbook_path = _logbook_path(cwd)
|
|
116
124
|
self._best_metrics = None
|
|
117
125
|
self._run_title = None
|
|
118
126
|
self._pushover = Pushover()
|
|
127
|
+
self._pushover_enabled = False
|
|
128
|
+
self._max_duration_seconds = float(max_duration_seconds)
|
|
129
|
+
self._loop_started_monotonic = None
|
|
130
|
+
self._duration_limit_hit = False
|
|
131
|
+
self._last_iteration = 0
|
|
119
132
|
|
|
120
133
|
def hook_before_loop(self):
|
|
121
134
|
super().hook_before_loop()
|
|
122
|
-
self.
|
|
123
|
-
self.
|
|
135
|
+
self._loop_started_monotonic = time.monotonic()
|
|
136
|
+
self._pushover_enabled = self._pushover.ensure_ready()
|
|
137
|
+
if self._pushover_enabled:
|
|
138
|
+
self._run_title = self._build_run_title()
|
|
139
|
+
else:
|
|
140
|
+
self._run_title = _fallback_title(self._task)
|
|
124
141
|
|
|
125
142
|
def build_prompt(self, iteration):
|
|
126
143
|
if iteration <= 1:
|
|
@@ -130,8 +147,33 @@ class Science(Ralph):
|
|
|
130
147
|
|
|
131
148
|
def hook_after_iteration(self, iteration, message):
|
|
132
149
|
super().hook_after_iteration(iteration, message)
|
|
150
|
+
self._last_iteration = iteration
|
|
133
151
|
self._append_logbook(iteration, message)
|
|
134
152
|
self._extract_and_notify(message)
|
|
153
|
+
self._mark_duration_stop(iteration)
|
|
154
|
+
|
|
155
|
+
def hook_after_loop(self, last_message, stop_reason):
|
|
156
|
+
super().hook_after_loop(last_message, stop_reason)
|
|
157
|
+
if not self._pushover_enabled:
|
|
158
|
+
return
|
|
159
|
+
status = _format_final_status(
|
|
160
|
+
stop_reason,
|
|
161
|
+
self.max_iterations,
|
|
162
|
+
self.completion_promise,
|
|
163
|
+
self._duration_limit_hit,
|
|
164
|
+
)
|
|
165
|
+
lines = [
|
|
166
|
+
f"Science run ended: {status}",
|
|
167
|
+
f"Iterations completed: {self._last_iteration}",
|
|
168
|
+
]
|
|
169
|
+
if self._best_metrics:
|
|
170
|
+
summary = _single_line(self._best_metrics.get("summary", "")).strip()
|
|
171
|
+
metrics_text = _format_metrics(self._best_metrics.get("metrics") or [])
|
|
172
|
+
if summary:
|
|
173
|
+
lines.append(f"Best summary: {summary}")
|
|
174
|
+
if metrics_text:
|
|
175
|
+
lines.append(f"Best metrics: {metrics_text}")
|
|
176
|
+
self._pushover.send(self._run_title, "\n".join(lines))
|
|
135
177
|
|
|
136
178
|
def hook_new_best(self, result):
|
|
137
179
|
super().hook_new_best(result)
|
|
@@ -185,6 +227,25 @@ class Science(Ralph):
|
|
|
185
227
|
title = _fallback_title(self._task)
|
|
186
228
|
return title
|
|
187
229
|
|
|
230
|
+
def _mark_duration_stop(self, iteration):
|
|
231
|
+
if self._duration_limit_hit:
|
|
232
|
+
return
|
|
233
|
+
if self._max_duration_seconds <= 0:
|
|
234
|
+
return
|
|
235
|
+
if self._loop_started_monotonic is None:
|
|
236
|
+
return
|
|
237
|
+
elapsed = time.monotonic() - self._loop_started_monotonic
|
|
238
|
+
if elapsed < self._max_duration_seconds:
|
|
239
|
+
return
|
|
240
|
+
self._duration_limit_hit = True
|
|
241
|
+
self.max_iterations = (
|
|
242
|
+
iteration if self.max_iterations == 0 else min(self.max_iterations, iteration)
|
|
243
|
+
)
|
|
244
|
+
print(
|
|
245
|
+
"Science loop: Max duration reached; "
|
|
246
|
+
"stopping after the current iteration."
|
|
247
|
+
)
|
|
248
|
+
|
|
188
249
|
|
|
189
250
|
|
|
190
251
|
def _build_metrics_prompt(task, message, previous_best):
|
|
@@ -299,3 +360,30 @@ def _fallback_title(task):
|
|
|
299
360
|
|
|
300
361
|
def _warn(message):
|
|
301
362
|
print(message, file=sys.stderr)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _format_final_status(
|
|
366
|
+
stop_reason,
|
|
367
|
+
max_iterations,
|
|
368
|
+
completion_promise,
|
|
369
|
+
duration_limit_hit,
|
|
370
|
+
):
|
|
371
|
+
if stop_reason == "max_iterations":
|
|
372
|
+
if duration_limit_hit:
|
|
373
|
+
return "max duration reached"
|
|
374
|
+
return f"max iterations reached ({max_iterations})"
|
|
375
|
+
if stop_reason == "promise":
|
|
376
|
+
if completion_promise:
|
|
377
|
+
return f"completion promise met ({completion_promise})"
|
|
378
|
+
return "completion promise met"
|
|
379
|
+
if stop_reason == "welfare_stop":
|
|
380
|
+
return "agent requested welfare stop"
|
|
381
|
+
if stop_reason == "canceled":
|
|
382
|
+
return "loop canceled"
|
|
383
|
+
if stop_reason == "interrupted":
|
|
384
|
+
return "interrupted"
|
|
385
|
+
if stop_reason == "error":
|
|
386
|
+
return "stopped due to error"
|
|
387
|
+
if stop_reason:
|
|
388
|
+
return _single_line(stop_reason)
|
|
389
|
+
return "finished"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: codexapi
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.2
|
|
4
4
|
Summary: Minimal Python API for running the Codex CLI.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: codex,agent,cli,openai
|
|
@@ -130,6 +130,7 @@ codexapi run --thread-id THREAD_ID --print-thread-id "Continue where we left off
|
|
|
130
130
|
```
|
|
131
131
|
|
|
132
132
|
Use `--no-yolo` to run Codex with `--full-auto` instead.
|
|
133
|
+
Use `--include-thinking` to return all agent messages joined together for `codexapi run`.
|
|
133
134
|
|
|
134
135
|
Lead mode periodically checks in on a long-running agent session with the
|
|
135
136
|
current time and prints JSON status updates. The agent controls the loop by
|
|
@@ -174,17 +175,23 @@ codexapi ralph --cancel --cwd /path/to/project
|
|
|
174
175
|
Science mode wraps a short task in a science prompt and runs it through the
|
|
175
176
|
Ralph loop. It defaults to `--yolo` and expects progress notes in `SCIENCE.md`.
|
|
176
177
|
Each iteration appends the agent output to `LOGBOOK.md` and the runner extracts
|
|
177
|
-
any improved figures of merit for optional notifications.
|
|
178
|
+
any improved figures of merit for optional notifications. You can also set
|
|
179
|
+
`--max-duration` to stop after the current iteration once a time limit is hit.
|
|
180
|
+
The default science wrapper also tells the agent to create/use a local git
|
|
181
|
+
branch when in a repo and make local commits for worthwhile improvements, while
|
|
182
|
+
never committing or resetting `LOGBOOK.md` or `SCIENCE.md`.
|
|
178
183
|
|
|
179
184
|
```bash
|
|
180
185
|
codexapi science "hyper-optimize the kernel cycles"
|
|
181
186
|
codexapi science --no-yolo "hyper-optimize the kernel cycles" --max-iterations 3
|
|
187
|
+
codexapi science "hyper-optimize the kernel cycles" --max-duration 90m
|
|
182
188
|
```
|
|
183
189
|
|
|
184
190
|
Optional Pushover notifications: create `~/.pushover` with two non-empty lines.
|
|
185
191
|
Line 1 is your user or group key, line 2 is the app API token. When this file
|
|
186
192
|
exists, Science will send a notification whenever it detects a new best result,
|
|
187
|
-
including the metric values and percent improvement
|
|
193
|
+
including the metric values and percent improvement, plus a final run-end status.
|
|
194
|
+
Task runs will also send a
|
|
188
195
|
✅/❌ notification with the task summary. Lead runs send a notification when the
|
|
189
196
|
loop stops.
|
|
190
197
|
|
|
@@ -199,7 +206,7 @@ codexapi foreach list.txt task.yaml --retry-all
|
|
|
199
206
|
|
|
200
207
|
## API
|
|
201
208
|
|
|
202
|
-
### `agent(prompt, cwd=None, yolo=True, flags=None) -> str`
|
|
209
|
+
### `agent(prompt, cwd=None, yolo=True, flags=None, include_thinking=False) -> str`
|
|
203
210
|
|
|
204
211
|
Runs a single Codex turn and returns only the agent's message. Any reasoning
|
|
205
212
|
items are filtered out.
|
|
@@ -208,8 +215,9 @@ items are filtered out.
|
|
|
208
215
|
- `cwd` (str | PathLike | None): working directory for the Codex session.
|
|
209
216
|
- `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
|
|
210
217
|
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
218
|
+
- `include_thinking` (bool): when true, return all agent messages joined.
|
|
211
219
|
|
|
212
|
-
### `Agent(cwd=None, yolo=True, thread_id=None, flags=None, welfare=False)`
|
|
220
|
+
### `Agent(cwd=None, yolo=True, thread_id=None, flags=None, welfare=False, include_thinking=False)`
|
|
213
221
|
|
|
214
222
|
Creates a stateful session wrapper. Calling the instance sends the prompt into
|
|
215
223
|
the same conversation and returns only the agent's message.
|
|
@@ -220,6 +228,7 @@ the same conversation and returns only the agent's message.
|
|
|
220
228
|
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
221
229
|
- `welfare` (bool): when true, append welfare stop instructions to each prompt
|
|
222
230
|
and raise `WelfareStop` if the agent outputs `MAKE IT STOP`.
|
|
231
|
+
- `include_thinking` (bool): when true, return all agent messages joined.
|
|
223
232
|
|
|
224
233
|
### `lead(minutes, prompt, cwd=None, yolo=True, flags=None, leadbook=None) -> dict`
|
|
225
234
|
|
|
@@ -305,6 +314,7 @@ Simple result object returned by `foreach()`.
|
|
|
305
314
|
## Behavior notes
|
|
306
315
|
|
|
307
316
|
- Uses `codex exec --json` and parses JSONL events for `agent_message` items.
|
|
317
|
+
- Returns the last `agent_message` by default; set `include_thinking=True` to join all messages.
|
|
308
318
|
- Automatically passes `--skip-git-repo-check` so it can run outside a git repo.
|
|
309
319
|
- Passes `--yolo` by default (use `--no-yolo` or `yolo=False` for `--full-auto`).
|
|
310
320
|
- Raises `RuntimeError` if Codex exits non-zero or returns no agent message.
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import tempfile
|
|
3
|
+
import unittest
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from unittest.mock import patch
|
|
6
|
+
|
|
7
|
+
sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "src"))
|
|
8
|
+
|
|
9
|
+
from codexapi.science import Science, _science_parts
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class _FakePushover:
|
|
13
|
+
def __init__(self, enabled):
|
|
14
|
+
self.enabled = enabled
|
|
15
|
+
self.sent = []
|
|
16
|
+
|
|
17
|
+
def ensure_ready(self, announce=True):
|
|
18
|
+
return self.enabled
|
|
19
|
+
|
|
20
|
+
def send(self, title, message):
|
|
21
|
+
self.sent.append((title, message))
|
|
22
|
+
return True
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class _TestScience(Science):
|
|
26
|
+
def _append_logbook(self, iteration, message):
|
|
27
|
+
return None
|
|
28
|
+
|
|
29
|
+
def _extract_and_notify(self, message):
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
def _build_run_title(self):
|
|
33
|
+
return "test-run"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class _FakeAgent:
|
|
37
|
+
calls = 0
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
cwd=None,
|
|
42
|
+
yolo=True,
|
|
43
|
+
thread_id=None,
|
|
44
|
+
flags=None,
|
|
45
|
+
welfare=False,
|
|
46
|
+
include_thinking=False,
|
|
47
|
+
):
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
def __call__(self, prompt):
|
|
51
|
+
_FakeAgent.calls += 1
|
|
52
|
+
return f"message {_FakeAgent.calls}"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class ScienceTests(unittest.TestCase):
|
|
56
|
+
def test_science_prompt_includes_git_commit_guidance(self):
|
|
57
|
+
_prompt_a, prompt_b = _science_parts("improve performance")
|
|
58
|
+
self.assertIn("create and use a local branch", prompt_b)
|
|
59
|
+
self.assertIn("never commit or reset LOGBOOK.md or SCIENCE.md", prompt_b)
|
|
60
|
+
|
|
61
|
+
def test_max_duration_stops_after_current_iteration(self):
|
|
62
|
+
_FakeAgent.calls = 0
|
|
63
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
64
|
+
runner = _TestScience(
|
|
65
|
+
"improve performance",
|
|
66
|
+
cwd=tmpdir,
|
|
67
|
+
max_duration_seconds=60,
|
|
68
|
+
)
|
|
69
|
+
runner._pushover = _FakePushover(enabled=False)
|
|
70
|
+
with patch("codexapi.ralph.Agent", _FakeAgent):
|
|
71
|
+
with patch("codexapi.science.time.monotonic", side_effect=[0, 30, 61]):
|
|
72
|
+
runner()
|
|
73
|
+
self.assertEqual(_FakeAgent.calls, 2)
|
|
74
|
+
self.assertTrue(runner._duration_limit_hit)
|
|
75
|
+
self.assertEqual(runner._last_iteration, 2)
|
|
76
|
+
|
|
77
|
+
def test_final_pushover_update_sent_when_enabled(self):
|
|
78
|
+
_FakeAgent.calls = 0
|
|
79
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
80
|
+
runner = _TestScience(
|
|
81
|
+
"improve performance",
|
|
82
|
+
cwd=tmpdir,
|
|
83
|
+
max_iterations=1,
|
|
84
|
+
)
|
|
85
|
+
fake_pushover = _FakePushover(enabled=True)
|
|
86
|
+
runner._pushover = fake_pushover
|
|
87
|
+
with patch("codexapi.ralph.Agent", _FakeAgent):
|
|
88
|
+
runner()
|
|
89
|
+
self.assertEqual(len(fake_pushover.sent), 1)
|
|
90
|
+
title, message = fake_pushover.sent[0]
|
|
91
|
+
self.assertEqual(title, "test-run")
|
|
92
|
+
self.assertIn("Science run ended: max iterations reached (1)", message)
|
|
93
|
+
self.assertIn("Iterations completed: 1", message)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
if __name__ == "__main__":
|
|
97
|
+
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|