codexapi 0.7.0__tar.gz → 0.7.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {codexapi-0.7.0/src/codexapi.egg-info → codexapi-0.7.2}/PKG-INFO +15 -5
  2. {codexapi-0.7.0 → codexapi-0.7.2}/README.md +14 -4
  3. {codexapi-0.7.0 → codexapi-0.7.2}/pyproject.toml +1 -1
  4. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/__init__.py +1 -1
  5. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/agent.py +15 -6
  6. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/cli.py +42 -1
  7. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/ralph.py +17 -2
  8. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/science.py +91 -3
  9. {codexapi-0.7.0 → codexapi-0.7.2/src/codexapi.egg-info}/PKG-INFO +15 -5
  10. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi.egg-info/SOURCES.txt +1 -0
  11. codexapi-0.7.2/tests/test_science.py +97 -0
  12. {codexapi-0.7.0 → codexapi-0.7.2}/LICENSE +0 -0
  13. {codexapi-0.7.0 → codexapi-0.7.2}/setup.cfg +0 -0
  14. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/__main__.py +0 -0
  15. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/foreach.py +0 -0
  16. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/gh_integration.py +0 -0
  17. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/lead.py +0 -0
  18. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/pushover.py +0 -0
  19. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/rate_limits.py +0 -0
  20. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/task.py +0 -0
  21. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/taskfile.py +0 -0
  22. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi/welfare.py +0 -0
  23. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi.egg-info/dependency_links.txt +0 -0
  24. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi.egg-info/entry_points.txt +0 -0
  25. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi.egg-info/requires.txt +0 -0
  26. {codexapi-0.7.0 → codexapi-0.7.2}/src/codexapi.egg-info/top_level.txt +0 -0
  27. {codexapi-0.7.0 → codexapi-0.7.2}/tests/test_task_progress.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: codexapi
3
- Version: 0.7.0
3
+ Version: 0.7.2
4
4
  Summary: Minimal Python API for running the Codex CLI.
5
5
  License: MIT
6
6
  Keywords: codex,agent,cli,openai
@@ -130,6 +130,7 @@ codexapi run --thread-id THREAD_ID --print-thread-id "Continue where we left off
130
130
  ```
131
131
 
132
132
  Use `--no-yolo` to run Codex with `--full-auto` instead.
133
+ Use `--include-thinking` to return all agent messages joined together for `codexapi run`.
133
134
 
134
135
  Lead mode periodically checks in on a long-running agent session with the
135
136
  current time and prints JSON status updates. The agent controls the loop by
@@ -174,17 +175,23 @@ codexapi ralph --cancel --cwd /path/to/project
174
175
  Science mode wraps a short task in a science prompt and runs it through the
175
176
  Ralph loop. It defaults to `--yolo` and expects progress notes in `SCIENCE.md`.
176
177
  Each iteration appends the agent output to `LOGBOOK.md` and the runner extracts
177
- any improved figures of merit for optional notifications.
178
+ any improved figures of merit for optional notifications. You can also set
179
+ `--max-duration` to stop after the current iteration once a time limit is hit.
180
+ The default science wrapper also tells the agent to create/use a local git
181
+ branch when in a repo and make local commits for worthwhile improvements, while
182
+ never committing or resetting `LOGBOOK.md` or `SCIENCE.md`.
178
183
 
179
184
  ```bash
180
185
  codexapi science "hyper-optimize the kernel cycles"
181
186
  codexapi science --no-yolo "hyper-optimize the kernel cycles" --max-iterations 3
187
+ codexapi science "hyper-optimize the kernel cycles" --max-duration 90m
182
188
  ```
183
189
 
184
190
  Optional Pushover notifications: create `~/.pushover` with two non-empty lines.
185
191
  Line 1 is your user or group key, line 2 is the app API token. When this file
186
192
  exists, Science will send a notification whenever it detects a new best result,
187
- including the metric values and percent improvement. Task runs will also send a
193
+ including the metric values and percent improvement, plus a final run-end status.
194
+ Task runs will also send a
188
195
  ✅/❌ notification with the task summary. Lead runs send a notification when the
189
196
  loop stops.
190
197
 
@@ -199,7 +206,7 @@ codexapi foreach list.txt task.yaml --retry-all
199
206
 
200
207
  ## API
201
208
 
202
- ### `agent(prompt, cwd=None, yolo=True, flags=None) -> str`
209
+ ### `agent(prompt, cwd=None, yolo=True, flags=None, include_thinking=False) -> str`
203
210
 
204
211
  Runs a single Codex turn and returns only the agent's message. Any reasoning
205
212
  items are filtered out.
@@ -208,8 +215,9 @@ items are filtered out.
208
215
  - `cwd` (str | PathLike | None): working directory for the Codex session.
209
216
  - `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
210
217
  - `flags` (str | None): extra CLI flags to pass to Codex.
218
+ - `include_thinking` (bool): when true, return all agent messages joined.
211
219
 
212
- ### `Agent(cwd=None, yolo=True, thread_id=None, flags=None, welfare=False)`
220
+ ### `Agent(cwd=None, yolo=True, thread_id=None, flags=None, welfare=False, include_thinking=False)`
213
221
 
214
222
  Creates a stateful session wrapper. Calling the instance sends the prompt into
215
223
  the same conversation and returns only the agent's message.
@@ -220,6 +228,7 @@ the same conversation and returns only the agent's message.
220
228
  - `flags` (str | None): extra CLI flags to pass to Codex.
221
229
  - `welfare` (bool): when true, append welfare stop instructions to each prompt
222
230
  and raise `WelfareStop` if the agent outputs `MAKE IT STOP`.
231
+ - `include_thinking` (bool): when true, return all agent messages joined.
223
232
 
224
233
  ### `lead(minutes, prompt, cwd=None, yolo=True, flags=None, leadbook=None) -> dict`
225
234
 
@@ -305,6 +314,7 @@ Simple result object returned by `foreach()`.
305
314
  ## Behavior notes
306
315
 
307
316
  - Uses `codex exec --json` and parses JSONL events for `agent_message` items.
317
+ - Returns the last `agent_message` by default; set `include_thinking=True` to join all messages.
308
318
  - Automatically passes `--skip-git-repo-check` so it can run outside a git repo.
309
319
  - Passes `--yolo` by default (use `--no-yolo` or `yolo=False` for `--full-auto`).
310
320
  - Raises `RuntimeError` if Codex exits non-zero or returns no agent message.
@@ -115,6 +115,7 @@ codexapi run --thread-id THREAD_ID --print-thread-id "Continue where we left off
115
115
  ```
116
116
 
117
117
  Use `--no-yolo` to run Codex with `--full-auto` instead.
118
+ Use `--include-thinking` to return all agent messages joined together for `codexapi run`.
118
119
 
119
120
  Lead mode periodically checks in on a long-running agent session with the
120
121
  current time and prints JSON status updates. The agent controls the loop by
@@ -159,17 +160,23 @@ codexapi ralph --cancel --cwd /path/to/project
159
160
  Science mode wraps a short task in a science prompt and runs it through the
160
161
  Ralph loop. It defaults to `--yolo` and expects progress notes in `SCIENCE.md`.
161
162
  Each iteration appends the agent output to `LOGBOOK.md` and the runner extracts
162
- any improved figures of merit for optional notifications.
163
+ any improved figures of merit for optional notifications. You can also set
164
+ `--max-duration` to stop after the current iteration once a time limit is hit.
165
+ The default science wrapper also tells the agent to create/use a local git
166
+ branch when in a repo and make local commits for worthwhile improvements, while
167
+ never committing or resetting `LOGBOOK.md` or `SCIENCE.md`.
163
168
 
164
169
  ```bash
165
170
  codexapi science "hyper-optimize the kernel cycles"
166
171
  codexapi science --no-yolo "hyper-optimize the kernel cycles" --max-iterations 3
172
+ codexapi science "hyper-optimize the kernel cycles" --max-duration 90m
167
173
  ```
168
174
 
169
175
  Optional Pushover notifications: create `~/.pushover` with two non-empty lines.
170
176
  Line 1 is your user or group key, line 2 is the app API token. When this file
171
177
  exists, Science will send a notification whenever it detects a new best result,
172
- including the metric values and percent improvement. Task runs will also send a
178
+ including the metric values and percent improvement, plus a final run-end status.
179
+ Task runs will also send a
173
180
  ✅/❌ notification with the task summary. Lead runs send a notification when the
174
181
  loop stops.
175
182
 
@@ -184,7 +191,7 @@ codexapi foreach list.txt task.yaml --retry-all
184
191
 
185
192
  ## API
186
193
 
187
- ### `agent(prompt, cwd=None, yolo=True, flags=None) -> str`
194
+ ### `agent(prompt, cwd=None, yolo=True, flags=None, include_thinking=False) -> str`
188
195
 
189
196
  Runs a single Codex turn and returns only the agent's message. Any reasoning
190
197
  items are filtered out.
@@ -193,8 +200,9 @@ items are filtered out.
193
200
  - `cwd` (str | PathLike | None): working directory for the Codex session.
194
201
  - `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
195
202
  - `flags` (str | None): extra CLI flags to pass to Codex.
203
+ - `include_thinking` (bool): when true, return all agent messages joined.
196
204
 
197
- ### `Agent(cwd=None, yolo=True, thread_id=None, flags=None, welfare=False)`
205
+ ### `Agent(cwd=None, yolo=True, thread_id=None, flags=None, welfare=False, include_thinking=False)`
198
206
 
199
207
  Creates a stateful session wrapper. Calling the instance sends the prompt into
200
208
  the same conversation and returns only the agent's message.
@@ -205,6 +213,7 @@ the same conversation and returns only the agent's message.
205
213
  - `flags` (str | None): extra CLI flags to pass to Codex.
206
214
  - `welfare` (bool): when true, append welfare stop instructions to each prompt
207
215
  and raise `WelfareStop` if the agent outputs `MAKE IT STOP`.
216
+ - `include_thinking` (bool): when true, return all agent messages joined.
208
217
 
209
218
  ### `lead(minutes, prompt, cwd=None, yolo=True, flags=None, leadbook=None) -> dict`
210
219
 
@@ -290,6 +299,7 @@ Simple result object returned by `foreach()`.
290
299
  ## Behavior notes
291
300
 
292
301
  - Uses `codex exec --json` and parses JSONL events for `agent_message` items.
302
+ - Returns the last `agent_message` by default; set `include_thinking=True` to join all messages.
293
303
  - Automatically passes `--skip-git-repo-check` so it can run outside a git repo.
294
304
  - Passes `--yolo` by default (use `--no-yolo` or `yolo=False` for `--full-auto`).
295
305
  - Raises `RuntimeError` if Codex exits non-zero or returns no agent message.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codexapi"
7
- version = "0.7.0"
7
+ version = "0.7.2"
8
8
  description = "Minimal Python API for running the Codex CLI."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -27,4 +27,4 @@ __all__ = [
27
27
  "task_result",
28
28
  "lead",
29
29
  ]
30
- __version__ = "0.7.0"
30
+ __version__ = "0.7.2"
@@ -10,7 +10,7 @@ from . import welfare
10
10
  _CODEX_BIN = os.environ.get("CODEX_BIN", "codex")
11
11
 
12
12
 
13
- def agent(prompt, cwd=None, yolo=True, flags=None):
13
+ def agent(prompt, cwd=None, yolo=True, flags=None, include_thinking=False):
14
14
  """Run a single Codex turn and return only the agent's message.
15
15
 
16
16
  Args:
@@ -18,11 +18,14 @@ def agent(prompt, cwd=None, yolo=True, flags=None):
18
18
  cwd: Optional working directory for the Codex session.
19
19
  yolo: Whether to pass --yolo to Codex.
20
20
  flags: Additional raw CLI flags to pass to Codex.
21
+ include_thinking: When true, return all agent messages joined together.
21
22
 
22
23
  Returns:
23
24
  The agent's visible response text with reasoning traces removed.
24
25
  """
25
- message, _thread_id = _run_codex(prompt, cwd, None, yolo, flags)
26
+ message, _thread_id = _run_codex(
27
+ prompt, cwd, None, yolo, flags, include_thinking
28
+ )
26
29
  return message
27
30
 
28
31
 
@@ -51,6 +54,7 @@ class Agent:
51
54
  thread_id=None,
52
55
  flags=None,
53
56
  welfare=False,
57
+ include_thinking=False,
54
58
  ):
55
59
  """Create a new session wrapper.
56
60
 
@@ -62,11 +66,13 @@ class Agent:
62
66
  flags: Additional raw CLI flags to pass to Codex.
63
67
  welfare: When true, append welfare stop instructions to each prompt
64
68
  and raise WelfareStop if the agent outputs MAKE IT STOP.
69
+ include_thinking: When true, return all agent messages joined together.
65
70
  """
66
71
  self.cwd = cwd
67
72
  self._yolo = yolo
68
73
  self._flags = flags
69
74
  self._welfare = welfare
75
+ self._include_thinking = include_thinking
70
76
  self.thread_id = thread_id
71
77
 
72
78
  def __call__(self, prompt):
@@ -79,6 +85,7 @@ class Agent:
79
85
  self.thread_id,
80
86
  self._yolo,
81
87
  self._flags,
88
+ self._include_thinking,
82
89
  )
83
90
  if thread_id:
84
91
  self.thread_id = thread_id
@@ -87,7 +94,7 @@ class Agent:
87
94
  return message
88
95
 
89
96
 
90
- def _run_codex(prompt, cwd, thread_id, yolo, flags):
97
+ def _run_codex(prompt, cwd, thread_id, yolo, flags, include_thinking):
91
98
  """Invoke the Codex CLI and return the message plus thread id (if any)."""
92
99
  command = [
93
100
  _CODEX_BIN,
@@ -124,10 +131,10 @@ def _run_codex(prompt, cwd, thread_id, yolo, flags):
124
131
  msg = f"{msg}\n{stderr}"
125
132
  raise RuntimeError(msg)
126
133
 
127
- return _parse_jsonl(result.stdout)
134
+ return _parse_jsonl(result.stdout, include_thinking)
128
135
 
129
136
 
130
- def _parse_jsonl(output):
137
+ def _parse_jsonl(output, include_thinking):
131
138
  """Extract agent messages and the latest thread id from Codex JSONL output."""
132
139
  thread_id = None
133
140
  messages = []
@@ -161,4 +168,6 @@ def _parse_jsonl(output):
161
168
  "Codex returned no agent message. Raw output:\n" + fallback
162
169
  )
163
170
 
164
- return "\n\n".join(messages), thread_id
171
+ if include_thinking:
172
+ return "\n\n".join(messages), thread_id
173
+ return messages[-1], thread_id
@@ -24,6 +24,7 @@ from .lead import lead
24
24
  _SESSION_ID_RE = re.compile(
25
25
  r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
26
26
  )
27
+ _DURATION_RE = re.compile(r"^\s*(\d+(?:\.\d+)?)([smhdSMHD]?)\s*$")
27
28
  _TAIL_BYTES = 256 * 1024
28
29
  _TAIL_MAX_BYTES = 4 * 1024 * 1024
29
30
  _TAIL_MIN_LINES = 200
@@ -92,6 +93,25 @@ def _read_prompt(prompt):
92
93
  return data
93
94
 
94
95
 
96
+ def _parse_duration_seconds(value, flag_name):
97
+ if value is None:
98
+ return 0.0
99
+ text = str(value).strip()
100
+ if not text:
101
+ raise SystemExit(f"{flag_name} cannot be empty.")
102
+ match = _DURATION_RE.match(text)
103
+ if not match:
104
+ raise SystemExit(
105
+ f"{flag_name} must be a number with optional unit s/m/h/d (example: 90m)."
106
+ )
107
+ amount = float(match.group(1))
108
+ unit = (match.group(2) or "m").lower()
109
+ if amount < 0:
110
+ raise SystemExit(f"{flag_name} must be >= 0.")
111
+ multiplier = {"s": 1, "m": 60, "h": 3600, "d": 86400}[unit]
112
+ return amount * multiplier
113
+
114
+
95
115
  def _read_prompt_file(path):
96
116
  if not path or not str(path).strip():
97
117
  raise SystemExit("Prompt file path is empty.")
@@ -1026,6 +1046,8 @@ def main(argv=None):
1026
1046
  "Science mode (science command):\n"
1027
1047
  " Wraps your short task in a science prompt and runs it via the Ralph loop.\n"
1028
1048
  " Default uses --yolo. Use --no-yolo to run --full-auto instead.\n"
1049
+ " Optional --max-duration stops before starting the next iteration once\n"
1050
+ " the duration limit is reached (e.g. 90m, 2h, 45s; default unit is minutes).\n"
1029
1051
  )
1030
1052
  parser = argparse.ArgumentParser(
1031
1053
  prog="codexapi",
@@ -1053,6 +1075,11 @@ def main(argv=None):
1053
1075
  "--flags",
1054
1076
  help="Additional raw CLI flags to pass to Codex (quoted as needed).",
1055
1077
  )
1078
+ run_parser.add_argument(
1079
+ "--include-thinking",
1080
+ action="store_true",
1081
+ help="Return all agent messages joined together.",
1082
+ )
1056
1083
 
1057
1084
  lead_parser = subparsers.add_parser(
1058
1085
  "lead",
@@ -1250,6 +1277,13 @@ def main(argv=None):
1250
1277
  default=0,
1251
1278
  help="Max iterations for the loop (0 means unlimited).",
1252
1279
  )
1280
+ science_parser.add_argument(
1281
+ "--max-duration",
1282
+ help=(
1283
+ "Maximum loop runtime. Stops after the current iteration when reached. "
1284
+ "Accepts s/m/h/d units (e.g. 90m, 2h, 45s); default unit is minutes."
1285
+ ),
1286
+ )
1253
1287
  science_parser.add_argument(
1254
1288
  "--cancel",
1255
1289
  action="store_true",
@@ -1435,6 +1469,8 @@ def main(argv=None):
1435
1469
  )
1436
1470
  if args.max_iterations != 0:
1437
1471
  raise SystemExit("--max-iterations is not allowed with --cancel.")
1472
+ if args.max_duration:
1473
+ raise SystemExit("--max-duration is not allowed with --cancel.")
1438
1474
  print(cancel_ralph_loop(args.cwd))
1439
1475
  return
1440
1476
  if args.ralph_fresh is None:
@@ -1574,6 +1610,7 @@ def main(argv=None):
1574
1610
  if args.command == "science":
1575
1611
  if args.max_iterations < 0:
1576
1612
  raise SystemExit("--max-iterations must be >= 0.")
1613
+ max_duration_seconds = _parse_duration_seconds(args.max_duration, "--max-duration")
1577
1614
  Science(
1578
1615
  prompt,
1579
1616
  args.cwd,
@@ -1582,6 +1619,7 @@ def main(argv=None):
1582
1619
  args.max_iterations,
1583
1620
  args.completion_promise,
1584
1621
  args.ralph_fresh,
1622
+ max_duration_seconds,
1585
1623
  )()
1586
1624
  return
1587
1625
  if args.command == "lead":
@@ -1637,12 +1675,15 @@ def main(argv=None):
1637
1675
  args.yolo,
1638
1676
  args.thread_id,
1639
1677
  args.flags,
1678
+ include_thinking=args.include_thinking,
1640
1679
  )
1641
1680
  message = session(prompt)
1642
1681
  if args.print_thread_id:
1643
1682
  print(f"thread_id={session.thread_id}", file=sys.stderr)
1644
1683
  else:
1645
- message = agent(prompt, args.cwd, args.yolo, args.flags)
1684
+ message = agent(
1685
+ prompt, args.cwd, args.yolo, args.flags, args.include_thinking
1686
+ )
1646
1687
 
1647
1688
  if message is not None:
1648
1689
  print(message)
@@ -41,6 +41,7 @@ class Ralph:
41
41
  self.max_iterations = max_iterations
42
42
  self.completion_promise = completion_promise
43
43
  self.fresh = fresh
44
+ self.include_thinking = True
44
45
 
45
46
  def hook_before_loop(self):
46
47
  """Hook called once before the loop starts."""
@@ -156,9 +157,23 @@ class Ralph:
156
157
  self.hook_before_iteration(iteration)
157
158
 
158
159
  if self.fresh:
159
- runner = Agent(self.cwd, self.yolo, None, self.flags, welfare=True)
160
+ runner = Agent(
161
+ self.cwd,
162
+ self.yolo,
163
+ None,
164
+ self.flags,
165
+ welfare=True,
166
+ include_thinking=self.include_thinking,
167
+ )
160
168
  elif runner is None:
161
- runner = Agent(self.cwd, self.yolo, None, self.flags, welfare=True)
169
+ runner = Agent(
170
+ self.cwd,
171
+ self.yolo,
172
+ None,
173
+ self.flags,
174
+ welfare=True,
175
+ include_thinking=self.include_thinking,
176
+ )
162
177
 
163
178
  prompt = self.build_prompt(iteration)
164
179
  stopped = False
@@ -3,6 +3,7 @@
3
3
  import json
4
4
  import os
5
5
  import sys
6
+ import time
6
7
  from datetime import datetime, timezone
7
8
 
8
9
  from .agent import agent
@@ -29,7 +30,10 @@ _SCIENCE_TEMPLATE_B = (
29
30
  "Try your best and have fun with this one! If you "
30
31
  "think of several options, pick one and run with it - I will not be available "
31
32
  "to make decisions for you, I give you my full permission to explore and make "
32
- "your own best judgement towards our goal! Remember to update SCIENCE.md. "
33
+ "your own best judgement towards our goal! If you are in a git repository, "
34
+ "create and use a local branch for this run. Make local commits for improvements "
35
+ "worth keeping, but never commit or reset LOGBOOK.md or SCIENCE.md. "
36
+ "Remember to update SCIENCE.md. "
33
37
  "Good hunting!"
34
38
  )
35
39
  _LOGBOOK_NAME = "LOGBOOK.md"
@@ -97,7 +101,10 @@ class Science(Ralph):
97
101
  max_iterations=0,
98
102
  completion_promise=None,
99
103
  fresh=True,
104
+ max_duration_seconds=0,
100
105
  ):
106
+ if max_duration_seconds < 0:
107
+ raise ValueError("max_duration_seconds must be >= 0")
101
108
  self._task = task.strip() if isinstance(task, str) else task
102
109
  prompt_a, prompt_b = _science_parts(task)
103
110
  prompt = f"{prompt_a}{prompt_b}"
@@ -110,17 +117,27 @@ class Science(Ralph):
110
117
  completion_promise,
111
118
  fresh,
112
119
  )
120
+ self.include_thinking = True
113
121
  self._prompt_a = prompt_a
114
122
  self._prompt_b = prompt_b
115
123
  self._logbook_path = _logbook_path(cwd)
116
124
  self._best_metrics = None
117
125
  self._run_title = None
118
126
  self._pushover = Pushover()
127
+ self._pushover_enabled = False
128
+ self._max_duration_seconds = float(max_duration_seconds)
129
+ self._loop_started_monotonic = None
130
+ self._duration_limit_hit = False
131
+ self._last_iteration = 0
119
132
 
120
133
  def hook_before_loop(self):
121
134
  super().hook_before_loop()
122
- self._pushover.ensure_ready()
123
- self._run_title = self._build_run_title()
135
+ self._loop_started_monotonic = time.monotonic()
136
+ self._pushover_enabled = self._pushover.ensure_ready()
137
+ if self._pushover_enabled:
138
+ self._run_title = self._build_run_title()
139
+ else:
140
+ self._run_title = _fallback_title(self._task)
124
141
 
125
142
  def build_prompt(self, iteration):
126
143
  if iteration <= 1:
@@ -130,8 +147,33 @@ class Science(Ralph):
130
147
 
131
148
  def hook_after_iteration(self, iteration, message):
132
149
  super().hook_after_iteration(iteration, message)
150
+ self._last_iteration = iteration
133
151
  self._append_logbook(iteration, message)
134
152
  self._extract_and_notify(message)
153
+ self._mark_duration_stop(iteration)
154
+
155
+ def hook_after_loop(self, last_message, stop_reason):
156
+ super().hook_after_loop(last_message, stop_reason)
157
+ if not self._pushover_enabled:
158
+ return
159
+ status = _format_final_status(
160
+ stop_reason,
161
+ self.max_iterations,
162
+ self.completion_promise,
163
+ self._duration_limit_hit,
164
+ )
165
+ lines = [
166
+ f"Science run ended: {status}",
167
+ f"Iterations completed: {self._last_iteration}",
168
+ ]
169
+ if self._best_metrics:
170
+ summary = _single_line(self._best_metrics.get("summary", "")).strip()
171
+ metrics_text = _format_metrics(self._best_metrics.get("metrics") or [])
172
+ if summary:
173
+ lines.append(f"Best summary: {summary}")
174
+ if metrics_text:
175
+ lines.append(f"Best metrics: {metrics_text}")
176
+ self._pushover.send(self._run_title, "\n".join(lines))
135
177
 
136
178
  def hook_new_best(self, result):
137
179
  super().hook_new_best(result)
@@ -185,6 +227,25 @@ class Science(Ralph):
185
227
  title = _fallback_title(self._task)
186
228
  return title
187
229
 
230
+ def _mark_duration_stop(self, iteration):
231
+ if self._duration_limit_hit:
232
+ return
233
+ if self._max_duration_seconds <= 0:
234
+ return
235
+ if self._loop_started_monotonic is None:
236
+ return
237
+ elapsed = time.monotonic() - self._loop_started_monotonic
238
+ if elapsed < self._max_duration_seconds:
239
+ return
240
+ self._duration_limit_hit = True
241
+ self.max_iterations = (
242
+ iteration if self.max_iterations == 0 else min(self.max_iterations, iteration)
243
+ )
244
+ print(
245
+ "Science loop: Max duration reached; "
246
+ "stopping after the current iteration."
247
+ )
248
+
188
249
 
189
250
 
190
251
  def _build_metrics_prompt(task, message, previous_best):
@@ -299,3 +360,30 @@ def _fallback_title(task):
299
360
 
300
361
  def _warn(message):
301
362
  print(message, file=sys.stderr)
363
+
364
+
365
+ def _format_final_status(
366
+ stop_reason,
367
+ max_iterations,
368
+ completion_promise,
369
+ duration_limit_hit,
370
+ ):
371
+ if stop_reason == "max_iterations":
372
+ if duration_limit_hit:
373
+ return "max duration reached"
374
+ return f"max iterations reached ({max_iterations})"
375
+ if stop_reason == "promise":
376
+ if completion_promise:
377
+ return f"completion promise met ({completion_promise})"
378
+ return "completion promise met"
379
+ if stop_reason == "welfare_stop":
380
+ return "agent requested welfare stop"
381
+ if stop_reason == "canceled":
382
+ return "loop canceled"
383
+ if stop_reason == "interrupted":
384
+ return "interrupted"
385
+ if stop_reason == "error":
386
+ return "stopped due to error"
387
+ if stop_reason:
388
+ return _single_line(stop_reason)
389
+ return "finished"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: codexapi
3
- Version: 0.7.0
3
+ Version: 0.7.2
4
4
  Summary: Minimal Python API for running the Codex CLI.
5
5
  License: MIT
6
6
  Keywords: codex,agent,cli,openai
@@ -130,6 +130,7 @@ codexapi run --thread-id THREAD_ID --print-thread-id "Continue where we left off
130
130
  ```
131
131
 
132
132
  Use `--no-yolo` to run Codex with `--full-auto` instead.
133
+ Use `--include-thinking` to return all agent messages joined together for `codexapi run`.
133
134
 
134
135
  Lead mode periodically checks in on a long-running agent session with the
135
136
  current time and prints JSON status updates. The agent controls the loop by
@@ -174,17 +175,23 @@ codexapi ralph --cancel --cwd /path/to/project
174
175
  Science mode wraps a short task in a science prompt and runs it through the
175
176
  Ralph loop. It defaults to `--yolo` and expects progress notes in `SCIENCE.md`.
176
177
  Each iteration appends the agent output to `LOGBOOK.md` and the runner extracts
177
- any improved figures of merit for optional notifications.
178
+ any improved figures of merit for optional notifications. You can also set
179
+ `--max-duration` to stop after the current iteration once a time limit is hit.
180
+ The default science wrapper also tells the agent to create/use a local git
181
+ branch when in a repo and make local commits for worthwhile improvements, while
182
+ never committing or resetting `LOGBOOK.md` or `SCIENCE.md`.
178
183
 
179
184
  ```bash
180
185
  codexapi science "hyper-optimize the kernel cycles"
181
186
  codexapi science --no-yolo "hyper-optimize the kernel cycles" --max-iterations 3
187
+ codexapi science "hyper-optimize the kernel cycles" --max-duration 90m
182
188
  ```
183
189
 
184
190
  Optional Pushover notifications: create `~/.pushover` with two non-empty lines.
185
191
  Line 1 is your user or group key, line 2 is the app API token. When this file
186
192
  exists, Science will send a notification whenever it detects a new best result,
187
- including the metric values and percent improvement. Task runs will also send a
193
+ including the metric values and percent improvement, plus a final run-end status.
194
+ Task runs will also send a
188
195
  ✅/❌ notification with the task summary. Lead runs send a notification when the
189
196
  loop stops.
190
197
 
@@ -199,7 +206,7 @@ codexapi foreach list.txt task.yaml --retry-all
199
206
 
200
207
  ## API
201
208
 
202
- ### `agent(prompt, cwd=None, yolo=True, flags=None) -> str`
209
+ ### `agent(prompt, cwd=None, yolo=True, flags=None, include_thinking=False) -> str`
203
210
 
204
211
  Runs a single Codex turn and returns only the agent's message. Any reasoning
205
212
  items are filtered out.
@@ -208,8 +215,9 @@ items are filtered out.
208
215
  - `cwd` (str | PathLike | None): working directory for the Codex session.
209
216
  - `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
210
217
  - `flags` (str | None): extra CLI flags to pass to Codex.
218
+ - `include_thinking` (bool): when true, return all agent messages joined.
211
219
 
212
- ### `Agent(cwd=None, yolo=True, thread_id=None, flags=None, welfare=False)`
220
+ ### `Agent(cwd=None, yolo=True, thread_id=None, flags=None, welfare=False, include_thinking=False)`
213
221
 
214
222
  Creates a stateful session wrapper. Calling the instance sends the prompt into
215
223
  the same conversation and returns only the agent's message.
@@ -220,6 +228,7 @@ the same conversation and returns only the agent's message.
220
228
  - `flags` (str | None): extra CLI flags to pass to Codex.
221
229
  - `welfare` (bool): when true, append welfare stop instructions to each prompt
222
230
  and raise `WelfareStop` if the agent outputs `MAKE IT STOP`.
231
+ - `include_thinking` (bool): when true, return all agent messages joined.
223
232
 
224
233
  ### `lead(minutes, prompt, cwd=None, yolo=True, flags=None, leadbook=None) -> dict`
225
234
 
@@ -305,6 +314,7 @@ Simple result object returned by `foreach()`.
305
314
  ## Behavior notes
306
315
 
307
316
  - Uses `codex exec --json` and parses JSONL events for `agent_message` items.
317
+ - Returns the last `agent_message` by default; set `include_thinking=True` to join all messages.
308
318
  - Automatically passes `--skip-git-repo-check` so it can run outside a git repo.
309
319
  - Passes `--yolo` by default (use `--no-yolo` or `yolo=False` for `--full-auto`).
310
320
  - Raises `RuntimeError` if Codex exits non-zero or returns no agent message.
@@ -21,4 +21,5 @@ src/codexapi.egg-info/dependency_links.txt
21
21
  src/codexapi.egg-info/entry_points.txt
22
22
  src/codexapi.egg-info/requires.txt
23
23
  src/codexapi.egg-info/top_level.txt
24
+ tests/test_science.py
24
25
  tests/test_task_progress.py
@@ -0,0 +1,97 @@
1
+ import sys
2
+ import tempfile
3
+ import unittest
4
+ from pathlib import Path
5
+ from unittest.mock import patch
6
+
7
+ sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "src"))
8
+
9
+ from codexapi.science import Science, _science_parts
10
+
11
+
12
+ class _FakePushover:
13
+ def __init__(self, enabled):
14
+ self.enabled = enabled
15
+ self.sent = []
16
+
17
+ def ensure_ready(self, announce=True):
18
+ return self.enabled
19
+
20
+ def send(self, title, message):
21
+ self.sent.append((title, message))
22
+ return True
23
+
24
+
25
+ class _TestScience(Science):
26
+ def _append_logbook(self, iteration, message):
27
+ return None
28
+
29
+ def _extract_and_notify(self, message):
30
+ return None
31
+
32
+ def _build_run_title(self):
33
+ return "test-run"
34
+
35
+
36
+ class _FakeAgent:
37
+ calls = 0
38
+
39
+ def __init__(
40
+ self,
41
+ cwd=None,
42
+ yolo=True,
43
+ thread_id=None,
44
+ flags=None,
45
+ welfare=False,
46
+ include_thinking=False,
47
+ ):
48
+ pass
49
+
50
+ def __call__(self, prompt):
51
+ _FakeAgent.calls += 1
52
+ return f"message {_FakeAgent.calls}"
53
+
54
+
55
+ class ScienceTests(unittest.TestCase):
56
+ def test_science_prompt_includes_git_commit_guidance(self):
57
+ _prompt_a, prompt_b = _science_parts("improve performance")
58
+ self.assertIn("create and use a local branch", prompt_b)
59
+ self.assertIn("never commit or reset LOGBOOK.md or SCIENCE.md", prompt_b)
60
+
61
+ def test_max_duration_stops_after_current_iteration(self):
62
+ _FakeAgent.calls = 0
63
+ with tempfile.TemporaryDirectory() as tmpdir:
64
+ runner = _TestScience(
65
+ "improve performance",
66
+ cwd=tmpdir,
67
+ max_duration_seconds=60,
68
+ )
69
+ runner._pushover = _FakePushover(enabled=False)
70
+ with patch("codexapi.ralph.Agent", _FakeAgent):
71
+ with patch("codexapi.science.time.monotonic", side_effect=[0, 30, 61]):
72
+ runner()
73
+ self.assertEqual(_FakeAgent.calls, 2)
74
+ self.assertTrue(runner._duration_limit_hit)
75
+ self.assertEqual(runner._last_iteration, 2)
76
+
77
+ def test_final_pushover_update_sent_when_enabled(self):
78
+ _FakeAgent.calls = 0
79
+ with tempfile.TemporaryDirectory() as tmpdir:
80
+ runner = _TestScience(
81
+ "improve performance",
82
+ cwd=tmpdir,
83
+ max_iterations=1,
84
+ )
85
+ fake_pushover = _FakePushover(enabled=True)
86
+ runner._pushover = fake_pushover
87
+ with patch("codexapi.ralph.Agent", _FakeAgent):
88
+ runner()
89
+ self.assertEqual(len(fake_pushover.sent), 1)
90
+ title, message = fake_pushover.sent[0]
91
+ self.assertEqual(title, "test-run")
92
+ self.assertIn("Science run ended: max iterations reached (1)", message)
93
+ self.assertIn("Iterations completed: 1", message)
94
+
95
+
96
+ if __name__ == "__main__":
97
+ unittest.main()
File without changes
File without changes
File without changes
File without changes