codexapi 0.3.4__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codexapi-0.3.4/src/codexapi.egg-info → codexapi-0.5.0}/PKG-INFO +42 -11
- {codexapi-0.3.4 → codexapi-0.5.0}/README.md +39 -10
- {codexapi-0.3.4 → codexapi-0.5.0}/pyproject.toml +5 -2
- {codexapi-0.3.4 → codexapi-0.5.0}/src/codexapi/__init__.py +4 -1
- {codexapi-0.3.4 → codexapi-0.5.0}/src/codexapi/agent.py +2 -2
- {codexapi-0.3.4 → codexapi-0.5.0}/src/codexapi/cli.py +125 -5
- codexapi-0.5.0/src/codexapi/foreach.py +230 -0
- {codexapi-0.3.4 → codexapi-0.5.0}/src/codexapi/ralph.py +2 -2
- {codexapi-0.3.4 → codexapi-0.5.0}/src/codexapi/task.py +19 -10
- codexapi-0.5.0/src/codexapi/taskfile.py +108 -0
- {codexapi-0.3.4 → codexapi-0.5.0/src/codexapi.egg-info}/PKG-INFO +42 -11
- {codexapi-0.3.4 → codexapi-0.5.0}/src/codexapi.egg-info/SOURCES.txt +3 -0
- codexapi-0.5.0/src/codexapi.egg-info/requires.txt +2 -0
- {codexapi-0.3.4 → codexapi-0.5.0}/LICENSE +0 -0
- {codexapi-0.3.4 → codexapi-0.5.0}/setup.cfg +0 -0
- {codexapi-0.3.4 → codexapi-0.5.0}/src/codexapi/__main__.py +0 -0
- {codexapi-0.3.4 → codexapi-0.5.0}/src/codexapi.egg-info/dependency_links.txt +0 -0
- {codexapi-0.3.4 → codexapi-0.5.0}/src/codexapi.egg-info/entry_points.txt +0 -0
- {codexapi-0.3.4 → codexapi-0.5.0}/src/codexapi.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: codexapi
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Minimal Python API for running the Codex CLI.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: codex,agent,cli,openai
|
|
@@ -9,6 +9,8 @@ Classifier: Operating System :: OS Independent
|
|
|
9
9
|
Requires-Python: >=3.8
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE
|
|
12
|
+
Requires-Dist: PyYAML>=6.0
|
|
13
|
+
Requires-Dist: tqdm>=4.64
|
|
12
14
|
|
|
13
15
|
# CodexAPI
|
|
14
16
|
|
|
@@ -70,6 +72,7 @@ echo "Say hello." | codexapi run
|
|
|
70
72
|
|
|
71
73
|
```bash
|
|
72
74
|
codexapi task "Fix the failing tests." --max-iterations 5
|
|
75
|
+
codexapi task -f task.yaml
|
|
73
76
|
```
|
|
74
77
|
|
|
75
78
|
Show running sessions and their latest activity:
|
|
@@ -85,6 +88,8 @@ Resume a session and print the thread id to stderr:
|
|
|
85
88
|
codexapi run --thread-id THREAD_ID --print-thread-id "Continue where we left off."
|
|
86
89
|
```
|
|
87
90
|
|
|
91
|
+
Use `--no-yolo` to run Codex with `--full-auto` instead.
|
|
92
|
+
|
|
88
93
|
Ralph loop mode repeats the same prompt until a completion promise or a max
|
|
89
94
|
iteration cap is hit (0 means unlimited). Cancel by deleting
|
|
90
95
|
`.codexapi/ralph-loop.local.md` or running `codexapi ralph --cancel`.
|
|
@@ -95,29 +100,36 @@ codexapi ralph --ralph-fresh "Try again from scratch." --max-iterations 3
|
|
|
95
100
|
codexapi ralph --cancel --cwd /path/to/project
|
|
96
101
|
```
|
|
97
102
|
|
|
103
|
+
Run a task file across a list file:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
codexapi foreach list.txt task.yaml
|
|
107
|
+
codexapi foreach list.txt task.yaml -n 4
|
|
108
|
+
```
|
|
109
|
+
|
|
98
110
|
## API
|
|
99
111
|
|
|
100
|
-
### `agent(prompt, cwd=None, yolo=
|
|
112
|
+
### `agent(prompt, cwd=None, yolo=True, flags=None) -> str`
|
|
101
113
|
|
|
102
114
|
Runs a single Codex turn and returns only the agent's message. Any reasoning
|
|
103
115
|
items are filtered out.
|
|
104
116
|
|
|
105
117
|
- `prompt` (str): prompt to send to Codex.
|
|
106
118
|
- `cwd` (str | PathLike | None): working directory for the Codex session.
|
|
107
|
-
- `yolo` (bool): pass `--yolo` to Codex when true.
|
|
119
|
+
- `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
|
|
108
120
|
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
109
121
|
|
|
110
|
-
### `Agent(cwd=None, yolo=
|
|
122
|
+
### `Agent(cwd=None, yolo=True, thread_id=None, flags=None)`
|
|
111
123
|
|
|
112
124
|
Creates a stateful session wrapper. Calling the instance sends the prompt into
|
|
113
125
|
the same conversation and returns only the agent's message.
|
|
114
126
|
|
|
115
127
|
- `__call__(prompt) -> str`: send a prompt to Codex and return the message.
|
|
116
128
|
- `thread_id -> str | None`: expose the underlying session id once created.
|
|
117
|
-
- `yolo` (bool): pass `--yolo` to Codex when true.
|
|
129
|
+
- `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
|
|
118
130
|
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
119
131
|
|
|
120
|
-
### `task(prompt, check=None, n=10, cwd=None, yolo=
|
|
132
|
+
### `task(prompt, check=None, n=10, cwd=None, yolo=True, flags=None) -> str`
|
|
121
133
|
|
|
122
134
|
Runs a task with checker-driven retries and returns the success summary.
|
|
123
135
|
Raises `TaskFailed` when the maximum attempts are reached.
|
|
@@ -125,12 +137,12 @@ Raises `TaskFailed` when the maximum attempts are reached.
|
|
|
125
137
|
- `check` (str | None | False): custom check prompt, default checker, or `False` to skip.
|
|
126
138
|
- `n` (int): maximum number of retries after a failed check.
|
|
127
139
|
|
|
128
|
-
### `task_result(prompt, check=None, n=10, cwd=None, yolo=
|
|
140
|
+
### `task_result(prompt, check=None, n=10, cwd=None, yolo=True, flags=None) -> TaskResult`
|
|
129
141
|
|
|
130
142
|
Runs a task with checker-driven retries and returns a `TaskResult` without
|
|
131
143
|
raising `TaskFailed`.
|
|
132
144
|
|
|
133
|
-
### `Task(prompt, max_attempts=10, cwd=None, yolo=
|
|
145
|
+
### `Task(prompt, max_attempts=10, cwd=None, yolo=True, thread_id=None, flags=None)`
|
|
134
146
|
|
|
135
147
|
Runs a Codex task with checker-driven retries. Subclass it and implement
|
|
136
148
|
`check()` to return an error string when the task is incomplete, or return
|
|
@@ -139,7 +151,7 @@ Runs a Codex task with checker-driven retries. Subclass it and implement
|
|
|
139
151
|
- `__call__() -> TaskResult`: run the task.
|
|
140
152
|
- `set_up()`: optional setup hook.
|
|
141
153
|
- `tear_down()`: optional cleanup hook.
|
|
142
|
-
- `check() -> str | None`: return an error description or `None`/`""`.
|
|
154
|
+
- `check(output=None) -> str | None`: return an error description or `None`/`""`. `output` is the last agent response.
|
|
143
155
|
- `on_success(result)`: optional success hook.
|
|
144
156
|
- `on_failure(result)`: optional failure hook.
|
|
145
157
|
|
|
@@ -161,12 +173,31 @@ Exception raised by `task()` when retries are exhausted.
|
|
|
161
173
|
- `attempts` (int | None): attempts made when the task failed.
|
|
162
174
|
- `errors` (str | None): last checker error, if any.
|
|
163
175
|
|
|
176
|
+
### `foreach(list_file, task_file, n=None, cwd=None, yolo=True, flags=None) -> ForeachResult`
|
|
177
|
+
|
|
178
|
+
Runs a task file over a list of items, updating the list file in place.
|
|
179
|
+
|
|
180
|
+
- `list_file` (str | PathLike): path to the list file to process.
|
|
181
|
+
- `task_file` (str | PathLike): YAML task file (must include `prompt`).
|
|
182
|
+
- `n` (int | None): limit parallelism to N (default: run all items in parallel).
|
|
183
|
+
- `cwd` (str | PathLike | None): working directory for the Codex session.
|
|
184
|
+
- `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
|
|
185
|
+
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
186
|
+
|
|
187
|
+
### `ForeachResult(succeeded, failed, skipped, results)`
|
|
188
|
+
|
|
189
|
+
Simple result object returned by `foreach()`.
|
|
190
|
+
|
|
191
|
+
- `succeeded` (int): number of successful items.
|
|
192
|
+
- `failed` (int): number of failed items.
|
|
193
|
+
- `skipped` (int): number of items skipped (already marked in the list file).
|
|
194
|
+
- `results` (list[tuple]): `(item, success, summary)` entries for items that ran.
|
|
195
|
+
|
|
164
196
|
## Behavior notes
|
|
165
197
|
|
|
166
198
|
- Uses `codex exec --json` and parses JSONL events for `agent_message` items.
|
|
167
199
|
- Automatically passes `--skip-git-repo-check` so it can run outside a git repo.
|
|
168
|
-
- Passes `--
|
|
169
|
-
- Passes `--yolo` when enabled (use with care).
|
|
200
|
+
- Passes `--yolo` by default (use `--no-yolo` or `yolo=False` for `--full-auto`).
|
|
170
201
|
- Raises `RuntimeError` if Codex exits non-zero or returns no agent message.
|
|
171
202
|
|
|
172
203
|
## Configuration
|
|
@@ -58,6 +58,7 @@ echo "Say hello." | codexapi run
|
|
|
58
58
|
|
|
59
59
|
```bash
|
|
60
60
|
codexapi task "Fix the failing tests." --max-iterations 5
|
|
61
|
+
codexapi task -f task.yaml
|
|
61
62
|
```
|
|
62
63
|
|
|
63
64
|
Show running sessions and their latest activity:
|
|
@@ -73,6 +74,8 @@ Resume a session and print the thread id to stderr:
|
|
|
73
74
|
codexapi run --thread-id THREAD_ID --print-thread-id "Continue where we left off."
|
|
74
75
|
```
|
|
75
76
|
|
|
77
|
+
Use `--no-yolo` to run Codex with `--full-auto` instead.
|
|
78
|
+
|
|
76
79
|
Ralph loop mode repeats the same prompt until a completion promise or a max
|
|
77
80
|
iteration cap is hit (0 means unlimited). Cancel by deleting
|
|
78
81
|
`.codexapi/ralph-loop.local.md` or running `codexapi ralph --cancel`.
|
|
@@ -83,29 +86,36 @@ codexapi ralph --ralph-fresh "Try again from scratch." --max-iterations 3
|
|
|
83
86
|
codexapi ralph --cancel --cwd /path/to/project
|
|
84
87
|
```
|
|
85
88
|
|
|
89
|
+
Run a task file across a list file:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
codexapi foreach list.txt task.yaml
|
|
93
|
+
codexapi foreach list.txt task.yaml -n 4
|
|
94
|
+
```
|
|
95
|
+
|
|
86
96
|
## API
|
|
87
97
|
|
|
88
|
-
### `agent(prompt, cwd=None, yolo=
|
|
98
|
+
### `agent(prompt, cwd=None, yolo=True, flags=None) -> str`
|
|
89
99
|
|
|
90
100
|
Runs a single Codex turn and returns only the agent's message. Any reasoning
|
|
91
101
|
items are filtered out.
|
|
92
102
|
|
|
93
103
|
- `prompt` (str): prompt to send to Codex.
|
|
94
104
|
- `cwd` (str | PathLike | None): working directory for the Codex session.
|
|
95
|
-
- `yolo` (bool): pass `--yolo` to Codex when true.
|
|
105
|
+
- `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
|
|
96
106
|
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
97
107
|
|
|
98
|
-
### `Agent(cwd=None, yolo=
|
|
108
|
+
### `Agent(cwd=None, yolo=True, thread_id=None, flags=None)`
|
|
99
109
|
|
|
100
110
|
Creates a stateful session wrapper. Calling the instance sends the prompt into
|
|
101
111
|
the same conversation and returns only the agent's message.
|
|
102
112
|
|
|
103
113
|
- `__call__(prompt) -> str`: send a prompt to Codex and return the message.
|
|
104
114
|
- `thread_id -> str | None`: expose the underlying session id once created.
|
|
105
|
-
- `yolo` (bool): pass `--yolo` to Codex when true.
|
|
115
|
+
- `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
|
|
106
116
|
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
107
117
|
|
|
108
|
-
### `task(prompt, check=None, n=10, cwd=None, yolo=
|
|
118
|
+
### `task(prompt, check=None, n=10, cwd=None, yolo=True, flags=None) -> str`
|
|
109
119
|
|
|
110
120
|
Runs a task with checker-driven retries and returns the success summary.
|
|
111
121
|
Raises `TaskFailed` when the maximum attempts are reached.
|
|
@@ -113,12 +123,12 @@ Raises `TaskFailed` when the maximum attempts are reached.
|
|
|
113
123
|
- `check` (str | None | False): custom check prompt, default checker, or `False` to skip.
|
|
114
124
|
- `n` (int): maximum number of retries after a failed check.
|
|
115
125
|
|
|
116
|
-
### `task_result(prompt, check=None, n=10, cwd=None, yolo=
|
|
126
|
+
### `task_result(prompt, check=None, n=10, cwd=None, yolo=True, flags=None) -> TaskResult`
|
|
117
127
|
|
|
118
128
|
Runs a task with checker-driven retries and returns a `TaskResult` without
|
|
119
129
|
raising `TaskFailed`.
|
|
120
130
|
|
|
121
|
-
### `Task(prompt, max_attempts=10, cwd=None, yolo=
|
|
131
|
+
### `Task(prompt, max_attempts=10, cwd=None, yolo=True, thread_id=None, flags=None)`
|
|
122
132
|
|
|
123
133
|
Runs a Codex task with checker-driven retries. Subclass it and implement
|
|
124
134
|
`check()` to return an error string when the task is incomplete, or return
|
|
@@ -127,7 +137,7 @@ Runs a Codex task with checker-driven retries. Subclass it and implement
|
|
|
127
137
|
- `__call__() -> TaskResult`: run the task.
|
|
128
138
|
- `set_up()`: optional setup hook.
|
|
129
139
|
- `tear_down()`: optional cleanup hook.
|
|
130
|
-
- `check() -> str | None`: return an error description or `None`/`""`.
|
|
140
|
+
- `check(output=None) -> str | None`: return an error description or `None`/`""`. `output` is the last agent response.
|
|
131
141
|
- `on_success(result)`: optional success hook.
|
|
132
142
|
- `on_failure(result)`: optional failure hook.
|
|
133
143
|
|
|
@@ -149,12 +159,31 @@ Exception raised by `task()` when retries are exhausted.
|
|
|
149
159
|
- `attempts` (int | None): attempts made when the task failed.
|
|
150
160
|
- `errors` (str | None): last checker error, if any.
|
|
151
161
|
|
|
162
|
+
### `foreach(list_file, task_file, n=None, cwd=None, yolo=True, flags=None) -> ForeachResult`
|
|
163
|
+
|
|
164
|
+
Runs a task file over a list of items, updating the list file in place.
|
|
165
|
+
|
|
166
|
+
- `list_file` (str | PathLike): path to the list file to process.
|
|
167
|
+
- `task_file` (str | PathLike): YAML task file (must include `prompt`).
|
|
168
|
+
- `n` (int | None): limit parallelism to N (default: run all items in parallel).
|
|
169
|
+
- `cwd` (str | PathLike | None): working directory for the Codex session.
|
|
170
|
+
- `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
|
|
171
|
+
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
172
|
+
|
|
173
|
+
### `ForeachResult(succeeded, failed, skipped, results)`
|
|
174
|
+
|
|
175
|
+
Simple result object returned by `foreach()`.
|
|
176
|
+
|
|
177
|
+
- `succeeded` (int): number of successful items.
|
|
178
|
+
- `failed` (int): number of failed items.
|
|
179
|
+
- `skipped` (int): number of items skipped (already marked in the list file).
|
|
180
|
+
- `results` (list[tuple]): `(item, success, summary)` entries for items that ran.
|
|
181
|
+
|
|
152
182
|
## Behavior notes
|
|
153
183
|
|
|
154
184
|
- Uses `codex exec --json` and parses JSONL events for `agent_message` items.
|
|
155
185
|
- Automatically passes `--skip-git-repo-check` so it can run outside a git repo.
|
|
156
|
-
- Passes `--
|
|
157
|
-
- Passes `--yolo` when enabled (use with care).
|
|
186
|
+
- Passes `--yolo` by default (use `--no-yolo` or `yolo=False` for `--full-auto`).
|
|
158
187
|
- Raises `RuntimeError` if Codex exits non-zero or returns no agent message.
|
|
159
188
|
|
|
160
189
|
## Configuration
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "codexapi"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.5.0"
|
|
8
8
|
description = "Minimal Python API for running the Codex CLI."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.8"
|
|
@@ -15,7 +15,10 @@ classifiers = [
|
|
|
15
15
|
"Operating System :: OS Independent",
|
|
16
16
|
]
|
|
17
17
|
|
|
18
|
-
dependencies = [
|
|
18
|
+
dependencies = [
|
|
19
|
+
"PyYAML>=6.0",
|
|
20
|
+
"tqdm>=4.64",
|
|
21
|
+
]
|
|
19
22
|
|
|
20
23
|
[project.scripts]
|
|
21
24
|
codexapi = "codexapi.cli:main"
|
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
"""Minimal Python API for running the Codex CLI."""
|
|
2
2
|
|
|
3
3
|
from .agent import Agent, agent
|
|
4
|
+
from .foreach import ForeachResult, foreach
|
|
4
5
|
from .task import Task, TaskFailed, TaskResult, task, task_result
|
|
5
6
|
|
|
6
7
|
__all__ = [
|
|
7
8
|
"Agent",
|
|
9
|
+
"ForeachResult",
|
|
8
10
|
"Task",
|
|
9
11
|
"TaskFailed",
|
|
10
12
|
"TaskResult",
|
|
11
13
|
"agent",
|
|
14
|
+
"foreach",
|
|
12
15
|
"task",
|
|
13
16
|
"task_result",
|
|
14
17
|
]
|
|
15
|
-
__version__ = "0.
|
|
18
|
+
__version__ = "0.5.0"
|
|
@@ -8,7 +8,7 @@ import subprocess
|
|
|
8
8
|
_CODEX_BIN = os.environ.get("CODEX_BIN", "codex")
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def agent(prompt, cwd=None, yolo=
|
|
11
|
+
def agent(prompt, cwd=None, yolo=True, flags=None):
|
|
12
12
|
"""Run a single Codex turn and return only the agent's message.
|
|
13
13
|
|
|
14
14
|
Args:
|
|
@@ -36,7 +36,7 @@ class Agent:
|
|
|
36
36
|
def __init__(
|
|
37
37
|
self,
|
|
38
38
|
cwd=None,
|
|
39
|
-
yolo=
|
|
39
|
+
yolo=True,
|
|
40
40
|
thread_id=None,
|
|
41
41
|
flags=None,
|
|
42
42
|
):
|
|
@@ -12,8 +12,10 @@ from datetime import datetime
|
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
|
|
14
14
|
from .agent import Agent, agent
|
|
15
|
+
from .foreach import foreach
|
|
15
16
|
from .ralph import cancel_ralph_loop, run_ralph_loop
|
|
16
17
|
from .task import TaskFailed, task
|
|
18
|
+
from .taskfile import AutoTask, load_task_file
|
|
17
19
|
|
|
18
20
|
_SESSION_ID_RE = re.compile(
|
|
19
21
|
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
|
|
@@ -38,6 +40,7 @@ _COLUMN_TITLES = {
|
|
|
38
40
|
"in": "IN",
|
|
39
41
|
"out": "OUT",
|
|
40
42
|
"turn": "TURN",
|
|
43
|
+
"turns": "NTRN",
|
|
41
44
|
"model": "MODEL",
|
|
42
45
|
"effort": "EFF",
|
|
43
46
|
"perm": "PERM",
|
|
@@ -121,6 +124,27 @@ def _tail_lines(path):
|
|
|
121
124
|
return text.splitlines()
|
|
122
125
|
|
|
123
126
|
|
|
127
|
+
def _count_turns(path):
|
|
128
|
+
event_count = 0
|
|
129
|
+
response_count = 0
|
|
130
|
+
try:
|
|
131
|
+
with open(path, "r", encoding="utf-8", errors="replace") as handle:
|
|
132
|
+
for line in handle:
|
|
133
|
+
if "\"type\":\"event_msg\"" in line and "\"type\":\"user_message\"" in line:
|
|
134
|
+
event_count += 1
|
|
135
|
+
continue
|
|
136
|
+
if "\"type\":\"response_item\"" in line and "\"role\":\"user\"" in line and "\"type\":\"message\"" in line:
|
|
137
|
+
response_count += 1
|
|
138
|
+
except OSError:
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
if event_count:
|
|
142
|
+
return event_count
|
|
143
|
+
if response_count:
|
|
144
|
+
return response_count
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
|
|
124
148
|
def _extract_text(content):
|
|
125
149
|
if isinstance(content, str):
|
|
126
150
|
return content
|
|
@@ -364,6 +388,7 @@ def _summarize_session(path, mtime):
|
|
|
364
388
|
total_usage = None
|
|
365
389
|
meta = {}
|
|
366
390
|
subagent = None
|
|
391
|
+
turns = _count_turns(path)
|
|
367
392
|
|
|
368
393
|
for line in _tail_lines(path):
|
|
369
394
|
try:
|
|
@@ -485,6 +510,7 @@ def _summarize_session(path, mtime):
|
|
|
485
510
|
"last_user_ts": last_user_ts,
|
|
486
511
|
"last_agent_ts": last_agent_ts,
|
|
487
512
|
"last_event_kind": last_event_kind,
|
|
513
|
+
"turns": turns,
|
|
488
514
|
"meta": meta,
|
|
489
515
|
}
|
|
490
516
|
|
|
@@ -604,6 +630,7 @@ def _layout_columns(width, id_width, show):
|
|
|
604
630
|
("in", ">"),
|
|
605
631
|
("out", ">"),
|
|
606
632
|
("turn", ">"),
|
|
633
|
+
("turns", ">"),
|
|
607
634
|
]
|
|
608
635
|
widths = {
|
|
609
636
|
"id": id_width,
|
|
@@ -612,6 +639,7 @@ def _layout_columns(width, id_width, show):
|
|
|
612
639
|
"in": 7,
|
|
613
640
|
"out": 7,
|
|
614
641
|
"turn": 7,
|
|
642
|
+
"turns": 5,
|
|
615
643
|
}
|
|
616
644
|
mins = {}
|
|
617
645
|
|
|
@@ -684,6 +712,8 @@ def _format_session(session, layout):
|
|
|
684
712
|
else:
|
|
685
713
|
turn_seconds = None
|
|
686
714
|
turn_str = _format_duration(turn_seconds)
|
|
715
|
+
turns = session.get("turns")
|
|
716
|
+
turns_str = "-" if turns is None else str(turns)
|
|
687
717
|
meta = session.get("meta") or {}
|
|
688
718
|
model = meta.get("model") or meta.get("model_provider") or "-"
|
|
689
719
|
effort = meta.get("effort") or "-"
|
|
@@ -702,6 +732,7 @@ def _format_session(session, layout):
|
|
|
702
732
|
"in": total_in,
|
|
703
733
|
"out": total_out,
|
|
704
734
|
"turn": turn_str,
|
|
735
|
+
"turns": _truncate_head(str(turns_str), widths.get("turns", 0)),
|
|
705
736
|
"model": _truncate_head(str(model), widths.get("model", 0)),
|
|
706
737
|
"effort": _truncate_head(str(effort), widths.get("effort", 0)),
|
|
707
738
|
"perm": _truncate_head(str(perm), widths.get("perm", 0)),
|
|
@@ -908,7 +939,12 @@ def main(argv=None):
|
|
|
908
939
|
help="Prompt to send. Use '-' or omit to read from stdin.",
|
|
909
940
|
)
|
|
910
941
|
run_parser.add_argument("--cwd", help="Working directory for the Codex session.")
|
|
911
|
-
run_parser.add_argument(
|
|
942
|
+
run_parser.add_argument(
|
|
943
|
+
"--no-yolo",
|
|
944
|
+
action="store_false",
|
|
945
|
+
dest="yolo",
|
|
946
|
+
help="Disable --yolo and use --full-auto.",
|
|
947
|
+
)
|
|
912
948
|
run_parser.add_argument(
|
|
913
949
|
"--flags",
|
|
914
950
|
help="Additional raw CLI flags to pass to Codex (quoted as needed).",
|
|
@@ -927,6 +963,11 @@ def main(argv=None):
|
|
|
927
963
|
"task",
|
|
928
964
|
help="Run a task with verification retries.",
|
|
929
965
|
)
|
|
966
|
+
task_parser.add_argument(
|
|
967
|
+
"-f",
|
|
968
|
+
"--task-file",
|
|
969
|
+
help="YAML task file to run.",
|
|
970
|
+
)
|
|
930
971
|
task_parser.add_argument(
|
|
931
972
|
"prompt",
|
|
932
973
|
nargs="?",
|
|
@@ -939,11 +980,16 @@ def main(argv=None):
|
|
|
939
980
|
task_parser.add_argument(
|
|
940
981
|
"--max-iterations",
|
|
941
982
|
type=int,
|
|
942
|
-
default=
|
|
943
|
-
help="Max verification retries after a failed check (0 means no retries).",
|
|
983
|
+
default=None,
|
|
984
|
+
help="Max verification retries after a failed check (0 means no retries). Defaults to 10.",
|
|
944
985
|
)
|
|
945
986
|
task_parser.add_argument("--cwd", help="Working directory for the Codex session.")
|
|
946
|
-
task_parser.add_argument(
|
|
987
|
+
task_parser.add_argument(
|
|
988
|
+
"--no-yolo",
|
|
989
|
+
action="store_false",
|
|
990
|
+
dest="yolo",
|
|
991
|
+
help="Disable --yolo and use --full-auto.",
|
|
992
|
+
)
|
|
947
993
|
task_parser.add_argument(
|
|
948
994
|
"--flags",
|
|
949
995
|
help="Additional raw CLI flags to pass to Codex (quoted as needed).",
|
|
@@ -986,12 +1032,46 @@ def main(argv=None):
|
|
|
986
1032
|
help="Start each iteration with a fresh Agent context.",
|
|
987
1033
|
)
|
|
988
1034
|
ralph_parser.add_argument("--cwd", help="Working directory for the Codex session.")
|
|
989
|
-
ralph_parser.add_argument(
|
|
1035
|
+
ralph_parser.add_argument(
|
|
1036
|
+
"--no-yolo",
|
|
1037
|
+
action="store_false",
|
|
1038
|
+
dest="yolo",
|
|
1039
|
+
help="Disable --yolo and use --full-auto.",
|
|
1040
|
+
)
|
|
990
1041
|
ralph_parser.add_argument(
|
|
991
1042
|
"--flags",
|
|
992
1043
|
help="Additional raw CLI flags to pass to Codex (quoted as needed).",
|
|
993
1044
|
)
|
|
994
1045
|
|
|
1046
|
+
foreach_parser = subparsers.add_parser(
|
|
1047
|
+
"foreach",
|
|
1048
|
+
help="Run a task file over a list file.",
|
|
1049
|
+
)
|
|
1050
|
+
foreach_parser.add_argument(
|
|
1051
|
+
"list_file",
|
|
1052
|
+
help="Path to the list file to process.",
|
|
1053
|
+
)
|
|
1054
|
+
foreach_parser.add_argument(
|
|
1055
|
+
"task_file",
|
|
1056
|
+
help="Path to the YAML task file.",
|
|
1057
|
+
)
|
|
1058
|
+
foreach_parser.add_argument(
|
|
1059
|
+
"-n",
|
|
1060
|
+
type=int,
|
|
1061
|
+
help="Limit parallelism to N.",
|
|
1062
|
+
)
|
|
1063
|
+
foreach_parser.add_argument("--cwd", help="Working directory for the Codex session.")
|
|
1064
|
+
foreach_parser.add_argument(
|
|
1065
|
+
"--no-yolo",
|
|
1066
|
+
action="store_false",
|
|
1067
|
+
dest="yolo",
|
|
1068
|
+
help="Disable --yolo and use --full-auto.",
|
|
1069
|
+
)
|
|
1070
|
+
foreach_parser.add_argument(
|
|
1071
|
+
"--flags",
|
|
1072
|
+
help="Additional raw CLI flags to pass to Codex (quoted as needed).",
|
|
1073
|
+
)
|
|
1074
|
+
|
|
995
1075
|
subparsers.add_parser(
|
|
996
1076
|
"top",
|
|
997
1077
|
help="Show running Codex sessions.",
|
|
@@ -1005,6 +1085,21 @@ def main(argv=None):
|
|
|
1005
1085
|
_run_top([])
|
|
1006
1086
|
return
|
|
1007
1087
|
|
|
1088
|
+
if args.command == "foreach":
|
|
1089
|
+
if args.n is not None and args.n < 1:
|
|
1090
|
+
raise SystemExit("-n must be >= 1.")
|
|
1091
|
+
result = foreach(
|
|
1092
|
+
args.list_file,
|
|
1093
|
+
args.task_file,
|
|
1094
|
+
args.n,
|
|
1095
|
+
args.cwd,
|
|
1096
|
+
args.yolo,
|
|
1097
|
+
args.flags,
|
|
1098
|
+
)
|
|
1099
|
+
if result.failed:
|
|
1100
|
+
raise SystemExit(1)
|
|
1101
|
+
return
|
|
1102
|
+
|
|
1008
1103
|
if args.command == "ralph":
|
|
1009
1104
|
if args.cancel:
|
|
1010
1105
|
if args.prompt:
|
|
@@ -1016,6 +1111,29 @@ def main(argv=None):
|
|
|
1016
1111
|
print(cancel_ralph_loop(args.cwd))
|
|
1017
1112
|
return
|
|
1018
1113
|
|
|
1114
|
+
if args.command == "task" and args.task_file:
|
|
1115
|
+
if args.prompt:
|
|
1116
|
+
raise SystemExit("task -f does not take a prompt.")
|
|
1117
|
+
if args.check is not None:
|
|
1118
|
+
raise SystemExit("--check is not allowed with -f.")
|
|
1119
|
+
if args.max_iterations is not None:
|
|
1120
|
+
raise SystemExit("--max-iterations is not allowed with -f.")
|
|
1121
|
+
task_def = load_task_file(args.task_file)
|
|
1122
|
+
task_runner = AutoTask(
|
|
1123
|
+
task_def,
|
|
1124
|
+
None,
|
|
1125
|
+
10,
|
|
1126
|
+
args.cwd,
|
|
1127
|
+
args.yolo,
|
|
1128
|
+
None,
|
|
1129
|
+
args.flags,
|
|
1130
|
+
)
|
|
1131
|
+
result = task_runner()
|
|
1132
|
+
print(result.summary)
|
|
1133
|
+
if not result.success:
|
|
1134
|
+
raise SystemExit(1)
|
|
1135
|
+
return
|
|
1136
|
+
|
|
1019
1137
|
prompt = _read_prompt(args.prompt)
|
|
1020
1138
|
exit_code = 0
|
|
1021
1139
|
|
|
@@ -1033,6 +1151,8 @@ def main(argv=None):
|
|
|
1033
1151
|
)
|
|
1034
1152
|
return
|
|
1035
1153
|
if args.command == "task":
|
|
1154
|
+
if args.max_iterations is None:
|
|
1155
|
+
args.max_iterations = 10
|
|
1036
1156
|
if args.max_iterations < 0:
|
|
1037
1157
|
raise SystemExit("--max-iterations must be >= 0.")
|
|
1038
1158
|
check = args.check if args.check is not None else prompt
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""Run a task file over a list of items with resumable progress."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
import threading
|
|
5
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
6
|
+
|
|
7
|
+
from tqdm import tqdm
|
|
8
|
+
|
|
9
|
+
from .taskfile import AutoTask, load_task_file
|
|
10
|
+
|
|
11
|
+
_STATUS_RUNNING = "⏳"
|
|
12
|
+
_STATUS_SUCCESS = "✅"
|
|
13
|
+
_STATUS_FAILED = "❌"
|
|
14
|
+
_STATUS_SET = {_STATUS_RUNNING, _STATUS_SUCCESS, _STATUS_FAILED}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ForeachResult:
|
|
18
|
+
"""Outcome summary for a foreach run."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, succeeded, failed, skipped, results):
|
|
21
|
+
self.succeeded = succeeded
|
|
22
|
+
self.failed = failed
|
|
23
|
+
self.skipped = skipped
|
|
24
|
+
self.results = results
|
|
25
|
+
|
|
26
|
+
def __repr__(self):
|
|
27
|
+
return (
|
|
28
|
+
"ForeachResult("
|
|
29
|
+
f"succeeded={self.succeeded}, "
|
|
30
|
+
f"failed={self.failed}, "
|
|
31
|
+
f"skipped={self.skipped}, "
|
|
32
|
+
f"results={self.results!r}"
|
|
33
|
+
")"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def foreach(
|
|
38
|
+
list_file,
|
|
39
|
+
task_file,
|
|
40
|
+
n=None,
|
|
41
|
+
cwd=None,
|
|
42
|
+
yolo=True,
|
|
43
|
+
flags=None,
|
|
44
|
+
):
|
|
45
|
+
"""Run a task file over each item in list_file and update the file."""
|
|
46
|
+
task_def = load_task_file(task_file)
|
|
47
|
+
lines, ends_with_newline = _read_lines(list_file)
|
|
48
|
+
items, skipped = _collect_items(lines)
|
|
49
|
+
|
|
50
|
+
if not items:
|
|
51
|
+
return ForeachResult(0, 0, skipped, [])
|
|
52
|
+
|
|
53
|
+
max_workers = _max_workers(n, len(items))
|
|
54
|
+
lock = threading.Lock()
|
|
55
|
+
results = []
|
|
56
|
+
counts = {
|
|
57
|
+
"running": 0,
|
|
58
|
+
"success": 0,
|
|
59
|
+
"failed": 0,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
progress = tqdm(total=len(items))
|
|
63
|
+
try:
|
|
64
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
65
|
+
futures = []
|
|
66
|
+
for index, item in items:
|
|
67
|
+
futures.append(
|
|
68
|
+
executor.submit(
|
|
69
|
+
_run_item,
|
|
70
|
+
index,
|
|
71
|
+
item,
|
|
72
|
+
task_def,
|
|
73
|
+
lines,
|
|
74
|
+
ends_with_newline,
|
|
75
|
+
list_file,
|
|
76
|
+
cwd,
|
|
77
|
+
yolo,
|
|
78
|
+
flags,
|
|
79
|
+
counts,
|
|
80
|
+
results,
|
|
81
|
+
progress,
|
|
82
|
+
lock,
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
for future in as_completed(futures):
|
|
86
|
+
future.result()
|
|
87
|
+
finally:
|
|
88
|
+
progress.close()
|
|
89
|
+
|
|
90
|
+
return ForeachResult(
|
|
91
|
+
counts["success"],
|
|
92
|
+
counts["failed"],
|
|
93
|
+
skipped,
|
|
94
|
+
results,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _max_workers(n, total):
|
|
99
|
+
if n is None:
|
|
100
|
+
return total
|
|
101
|
+
if n < 1:
|
|
102
|
+
raise ValueError("n must be >= 1")
|
|
103
|
+
if n > total:
|
|
104
|
+
return total
|
|
105
|
+
return n
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _read_lines(path):
|
|
109
|
+
with open(path, "r", encoding="utf-8") as handle:
|
|
110
|
+
data = handle.read()
|
|
111
|
+
ends_with_newline = data.endswith("\n")
|
|
112
|
+
return data.splitlines(), ends_with_newline
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _write_lines(path, lines, ends_with_newline):
|
|
116
|
+
text = "\n".join(lines)
|
|
117
|
+
if ends_with_newline:
|
|
118
|
+
text += "\n"
|
|
119
|
+
with open(path, "w", encoding="utf-8") as handle:
|
|
120
|
+
handle.write(text)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _collect_items(lines):
|
|
124
|
+
items = []
|
|
125
|
+
skipped = 0
|
|
126
|
+
for index, line in enumerate(lines):
|
|
127
|
+
if not line.strip():
|
|
128
|
+
continue
|
|
129
|
+
if _status_marker(line):
|
|
130
|
+
skipped += 1
|
|
131
|
+
continue
|
|
132
|
+
items.append((index, line))
|
|
133
|
+
return items, skipped
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _status_marker(line):
|
|
137
|
+
if not line:
|
|
138
|
+
return None
|
|
139
|
+
marker = line[0]
|
|
140
|
+
if marker in _STATUS_SET:
|
|
141
|
+
return marker
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _status_text(counts):
|
|
146
|
+
return (
|
|
147
|
+
f"{_STATUS_RUNNING}: {counts['running']}, "
|
|
148
|
+
f"{_STATUS_SUCCESS}: {counts['success']}, "
|
|
149
|
+
f"{_STATUS_FAILED}: {counts['failed']}"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _single_line(text):
|
|
154
|
+
if not text:
|
|
155
|
+
return ""
|
|
156
|
+
return text.replace("\r", " ").replace("\n", " ")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _format_turns(used, total):
|
|
160
|
+
used_text = "?" if used is None else str(used)
|
|
161
|
+
total_text = "?" if total is None else str(total)
|
|
162
|
+
return f"[turns: {used_text}/{total_text}]"
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _run_item(
|
|
166
|
+
index,
|
|
167
|
+
item,
|
|
168
|
+
task_def,
|
|
169
|
+
lines,
|
|
170
|
+
ends_with_newline,
|
|
171
|
+
list_file,
|
|
172
|
+
cwd,
|
|
173
|
+
yolo,
|
|
174
|
+
flags,
|
|
175
|
+
counts,
|
|
176
|
+
results,
|
|
177
|
+
progress,
|
|
178
|
+
lock,
|
|
179
|
+
):
|
|
180
|
+
running_line = f"{_STATUS_RUNNING} {item}"
|
|
181
|
+
with lock:
|
|
182
|
+
lines[index] = running_line
|
|
183
|
+
_write_lines(list_file, lines, ends_with_newline)
|
|
184
|
+
counts["running"] += 1
|
|
185
|
+
progress.set_postfix_str(_status_text(counts))
|
|
186
|
+
|
|
187
|
+
summary = ""
|
|
188
|
+
success = False
|
|
189
|
+
attempts = None
|
|
190
|
+
max_attempts = None
|
|
191
|
+
try:
|
|
192
|
+
task = AutoTask(
|
|
193
|
+
task_def,
|
|
194
|
+
item,
|
|
195
|
+
10,
|
|
196
|
+
cwd,
|
|
197
|
+
yolo,
|
|
198
|
+
None,
|
|
199
|
+
flags,
|
|
200
|
+
)
|
|
201
|
+
max_attempts = task.max_attempts
|
|
202
|
+
result = task()
|
|
203
|
+
success = result.success
|
|
204
|
+
attempts = result.attempts
|
|
205
|
+
summary = result.summary or ""
|
|
206
|
+
except Exception as exc:
|
|
207
|
+
summary = f"{type(exc).__name__}: {exc}"
|
|
208
|
+
success = False
|
|
209
|
+
|
|
210
|
+
summary = _single_line(summary)
|
|
211
|
+
turns = _format_turns(attempts, max_attempts)
|
|
212
|
+
if summary:
|
|
213
|
+
summary = f"{summary} {turns}"
|
|
214
|
+
else:
|
|
215
|
+
summary = turns
|
|
216
|
+
status = _STATUS_SUCCESS if success else _STATUS_FAILED
|
|
217
|
+
final_line = f"{status} {item} | {summary}"
|
|
218
|
+
|
|
219
|
+
with lock:
|
|
220
|
+
lines[index] = final_line
|
|
221
|
+
_write_lines(list_file, lines, ends_with_newline)
|
|
222
|
+
counts["running"] -= 1
|
|
223
|
+
if success:
|
|
224
|
+
counts["success"] += 1
|
|
225
|
+
else:
|
|
226
|
+
counts["failed"] += 1
|
|
227
|
+
results.append((item, success, summary))
|
|
228
|
+
progress.update(1)
|
|
229
|
+
progress.set_postfix_str(_status_text(counts))
|
|
230
|
+
tqdm.write(final_line, file=sys.stdout)
|
|
@@ -15,7 +15,7 @@ _PROMISE_RE = re.compile(r"<promise>(.*?)</promise>", re.DOTALL)
|
|
|
15
15
|
def run_ralph_loop(
|
|
16
16
|
prompt,
|
|
17
17
|
cwd=None,
|
|
18
|
-
yolo=
|
|
18
|
+
yolo=True,
|
|
19
19
|
flags=None,
|
|
20
20
|
max_iterations=0,
|
|
21
21
|
completion_promise=None,
|
|
@@ -135,7 +135,7 @@ def run_ralph_loop(
|
|
|
135
135
|
elif runner is None:
|
|
136
136
|
runner = Agent(cwd, yolo, None, flags)
|
|
137
137
|
|
|
138
|
-
message = runner(prompt + '\nIf there are multiple paths forward,
|
|
138
|
+
message = runner(prompt + '\nIf there are multiple paths forward, you MUST use your own best judgement as to which to try first! Do not ask the user to choose an option, they hereby give you explciit permission to pick the best one yourself.\n')
|
|
139
139
|
print(message)
|
|
140
140
|
last_message = message
|
|
141
141
|
|
|
@@ -10,8 +10,9 @@ _logger = logging.getLogger(__name__)
|
|
|
10
10
|
|
|
11
11
|
_CHECK_PREFIX = (
|
|
12
12
|
"You are a verification agent. Explore this workspace and carefully evaluate it "
|
|
13
|
-
"against the
|
|
13
|
+
"against the task below. Collect evidence by running any tests and/or reading "
|
|
14
14
|
"and tracing through code, but do not change any of the code.\n"
|
|
15
|
+
"Act as a collaborator who wants to give the task owner all the information they need to succeed.\n"
|
|
15
16
|
"Return only JSON with keys: success (boolean) and reason (string).\n"
|
|
16
17
|
"Set success to true only if everything matches the intent."
|
|
17
18
|
)
|
|
@@ -141,9 +142,11 @@ def _print_progress(
|
|
|
141
142
|
|
|
142
143
|
def _fix_prompt(error):
|
|
143
144
|
return (
|
|
144
|
-
"
|
|
145
|
+
"Thanks for your work. An automated verifier reported these issues:\n"
|
|
145
146
|
f"{error}\n\n"
|
|
146
|
-
"
|
|
147
|
+
"Take another look and see whether you agree and, if so, please take this "
|
|
148
|
+
"feedback into consideration and use it to continue to make progress "
|
|
149
|
+
"towards our original goal and intent."
|
|
147
150
|
)
|
|
148
151
|
|
|
149
152
|
|
|
@@ -176,7 +179,7 @@ def task(
|
|
|
176
179
|
check=None,
|
|
177
180
|
n=10,
|
|
178
181
|
cwd=None,
|
|
179
|
-
yolo=
|
|
182
|
+
yolo=True,
|
|
180
183
|
flags=None,
|
|
181
184
|
progress=False,
|
|
182
185
|
):
|
|
@@ -209,7 +212,7 @@ def task_result(
|
|
|
209
212
|
check=None,
|
|
210
213
|
n=10,
|
|
211
214
|
cwd=None,
|
|
212
|
-
yolo=
|
|
215
|
+
yolo=True,
|
|
213
216
|
flags=None,
|
|
214
217
|
progress=False,
|
|
215
218
|
):
|
|
@@ -319,7 +322,7 @@ class Task:
|
|
|
319
322
|
prompt,
|
|
320
323
|
max_attempts=10,
|
|
321
324
|
cwd=None,
|
|
322
|
-
yolo=
|
|
325
|
+
yolo=True,
|
|
323
326
|
thread_id=None,
|
|
324
327
|
flags=None,
|
|
325
328
|
):
|
|
@@ -328,6 +331,7 @@ class Task:
|
|
|
328
331
|
self.prompt = prompt
|
|
329
332
|
self.max_attempts = max_attempts
|
|
330
333
|
self.cwd = cwd
|
|
334
|
+
self.last_output = None
|
|
331
335
|
self.agent = Agent(
|
|
332
336
|
cwd,
|
|
333
337
|
yolo,
|
|
@@ -341,8 +345,9 @@ class Task:
|
|
|
341
345
|
def tear_down(self):
|
|
342
346
|
"""Delete the directory etc."""
|
|
343
347
|
|
|
344
|
-
def check(self):
|
|
348
|
+
def check(self, output=None):
|
|
345
349
|
""" Check if the task is done, return a string describing the problems if not.
|
|
350
|
+
The output argument is the last agent response.
|
|
346
351
|
This can be any combination of running tests, python code or running an agent
|
|
347
352
|
with a specific prompt in self.cwd.
|
|
348
353
|
"""
|
|
@@ -356,9 +361,11 @@ class Task:
|
|
|
356
361
|
def fix_prompt(self, error):
|
|
357
362
|
"""Build a prompt that asks the agent to fix checker failures."""
|
|
358
363
|
return (
|
|
359
|
-
"
|
|
364
|
+
"Thanks for your work. An automated verifier reported these issues:\n"
|
|
360
365
|
f"{error}\n\n"
|
|
361
|
-
"
|
|
366
|
+
"Take another look and see whether you agree and, if so, please take "
|
|
367
|
+
"this feedback into consideration and use it to continue to make "
|
|
368
|
+
"progress towards our original goal and intent."
|
|
362
369
|
)
|
|
363
370
|
|
|
364
371
|
def success_prompt(self):
|
|
@@ -382,18 +389,20 @@ class Task:
|
|
|
382
389
|
|
|
383
390
|
# Start with the initial prompt
|
|
384
391
|
output = self.agent(self.prompt)
|
|
392
|
+
self.last_output = output
|
|
385
393
|
if debug:
|
|
386
394
|
_logger.debug("Initial output: %s", output)
|
|
387
395
|
|
|
388
396
|
# Try correcting it up to max_attempts times
|
|
389
397
|
for attempt in range(self.max_attempts):
|
|
390
|
-
error = self.check()
|
|
398
|
+
error = self.check(self.last_output)
|
|
391
399
|
if debug:
|
|
392
400
|
_logger.debug("Check error: %s", error)
|
|
393
401
|
|
|
394
402
|
if error:
|
|
395
403
|
# if there were errors, tell the agent to fix them
|
|
396
404
|
output = self.agent(self.fix_prompt(error))
|
|
405
|
+
self.last_output = output
|
|
397
406
|
if debug:
|
|
398
407
|
_logger.debug("Fix output: %s", output)
|
|
399
408
|
else:
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Load YAML task files and map them onto Task hooks."""
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
from .agent import agent
|
|
6
|
+
from .task import Task
|
|
7
|
+
|
|
8
|
+
_ITEM_TOKEN = "{{item}}"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def load_task_file(path):
|
|
12
|
+
"""Load a YAML task file and return a normalized task definition."""
|
|
13
|
+
if not path:
|
|
14
|
+
raise ValueError("task file path is required")
|
|
15
|
+
with open(path, "r", encoding="utf-8") as handle:
|
|
16
|
+
data = yaml.safe_load(handle) or {}
|
|
17
|
+
if not isinstance(data, dict):
|
|
18
|
+
raise ValueError("Task file must be a YAML mapping.")
|
|
19
|
+
|
|
20
|
+
prompt = data.get("prompt")
|
|
21
|
+
if not isinstance(prompt, str) or not prompt.strip():
|
|
22
|
+
raise ValueError("Task file missing non-empty 'prompt'.")
|
|
23
|
+
|
|
24
|
+
return {
|
|
25
|
+
"prompt": prompt,
|
|
26
|
+
"set_up": _optional_str(data.get("set_up")),
|
|
27
|
+
"tear_down": _optional_str(data.get("tear_down")),
|
|
28
|
+
"check": _optional_str(data.get("check")),
|
|
29
|
+
"on_success": _optional_str(data.get("on_success")),
|
|
30
|
+
"on_failure": _optional_str(data.get("on_failure")),
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _optional_str(value):
|
|
35
|
+
if value is None:
|
|
36
|
+
return None
|
|
37
|
+
if isinstance(value, str):
|
|
38
|
+
return value if value.strip() else None
|
|
39
|
+
raise ValueError("Task file values must be strings.")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _render(text, item):
|
|
43
|
+
if text is None:
|
|
44
|
+
return None
|
|
45
|
+
if item is None:
|
|
46
|
+
return text
|
|
47
|
+
return text.replace(_ITEM_TOKEN, item)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class AutoTask(Task):
|
|
51
|
+
"""Task subclass that maps YAML strings onto Task hooks."""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
config,
|
|
56
|
+
item=None,
|
|
57
|
+
max_attempts=10,
|
|
58
|
+
cwd=None,
|
|
59
|
+
yolo=True,
|
|
60
|
+
thread_id=None,
|
|
61
|
+
flags=None,
|
|
62
|
+
):
|
|
63
|
+
if not isinstance(config, dict):
|
|
64
|
+
raise TypeError("config must be a task definition dict")
|
|
65
|
+
self._config = config
|
|
66
|
+
self._item = "" if item is None else str(item)
|
|
67
|
+
self._yolo = yolo
|
|
68
|
+
self._flags = flags
|
|
69
|
+
prompt = _render(config.get("prompt"), self._item)
|
|
70
|
+
super().__init__(prompt, max_attempts, cwd, yolo, thread_id, flags)
|
|
71
|
+
|
|
72
|
+
def _hook(self, name):
|
|
73
|
+
return _render(self._config.get(name), self._item)
|
|
74
|
+
|
|
75
|
+
def set_up(self):
|
|
76
|
+
text = self._hook("set_up")
|
|
77
|
+
if text:
|
|
78
|
+
agent(text, self.cwd, self._yolo, self._flags)
|
|
79
|
+
|
|
80
|
+
def tear_down(self):
|
|
81
|
+
text = self._hook("tear_down")
|
|
82
|
+
if text:
|
|
83
|
+
agent(text, self.cwd, self._yolo, self._flags)
|
|
84
|
+
|
|
85
|
+
def check(self, output=None):
|
|
86
|
+
text = self._hook("check")
|
|
87
|
+
if not text:
|
|
88
|
+
return None
|
|
89
|
+
last_output = output if output is not None else self.last_output
|
|
90
|
+
last_output = last_output or ""
|
|
91
|
+
if last_output:
|
|
92
|
+
prompt = f"{text}\n\nAGENT OUTPUT:\n{last_output}"
|
|
93
|
+
else:
|
|
94
|
+
prompt = text
|
|
95
|
+
result = agent(prompt, self.cwd, self._yolo, self._flags)
|
|
96
|
+
if not isinstance(result, str) or not result.strip():
|
|
97
|
+
return None
|
|
98
|
+
return result
|
|
99
|
+
|
|
100
|
+
def on_success(self, result):
|
|
101
|
+
text = self._hook("on_success")
|
|
102
|
+
if text:
|
|
103
|
+
agent(text, self.cwd, self._yolo, self._flags)
|
|
104
|
+
|
|
105
|
+
def on_failure(self, result):
|
|
106
|
+
text = self._hook("on_failure")
|
|
107
|
+
if text:
|
|
108
|
+
agent(text, self.cwd, self._yolo, self._flags)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: codexapi
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Minimal Python API for running the Codex CLI.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: codex,agent,cli,openai
|
|
@@ -9,6 +9,8 @@ Classifier: Operating System :: OS Independent
|
|
|
9
9
|
Requires-Python: >=3.8
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE
|
|
12
|
+
Requires-Dist: PyYAML>=6.0
|
|
13
|
+
Requires-Dist: tqdm>=4.64
|
|
12
14
|
|
|
13
15
|
# CodexAPI
|
|
14
16
|
|
|
@@ -70,6 +72,7 @@ echo "Say hello." | codexapi run
|
|
|
70
72
|
|
|
71
73
|
```bash
|
|
72
74
|
codexapi task "Fix the failing tests." --max-iterations 5
|
|
75
|
+
codexapi task -f task.yaml
|
|
73
76
|
```
|
|
74
77
|
|
|
75
78
|
Show running sessions and their latest activity:
|
|
@@ -85,6 +88,8 @@ Resume a session and print the thread id to stderr:
|
|
|
85
88
|
codexapi run --thread-id THREAD_ID --print-thread-id "Continue where we left off."
|
|
86
89
|
```
|
|
87
90
|
|
|
91
|
+
Use `--no-yolo` to run Codex with `--full-auto` instead.
|
|
92
|
+
|
|
88
93
|
Ralph loop mode repeats the same prompt until a completion promise or a max
|
|
89
94
|
iteration cap is hit (0 means unlimited). Cancel by deleting
|
|
90
95
|
`.codexapi/ralph-loop.local.md` or running `codexapi ralph --cancel`.
|
|
@@ -95,29 +100,36 @@ codexapi ralph --ralph-fresh "Try again from scratch." --max-iterations 3
|
|
|
95
100
|
codexapi ralph --cancel --cwd /path/to/project
|
|
96
101
|
```
|
|
97
102
|
|
|
103
|
+
Run a task file across a list file:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
codexapi foreach list.txt task.yaml
|
|
107
|
+
codexapi foreach list.txt task.yaml -n 4
|
|
108
|
+
```
|
|
109
|
+
|
|
98
110
|
## API
|
|
99
111
|
|
|
100
|
-
### `agent(prompt, cwd=None, yolo=
|
|
112
|
+
### `agent(prompt, cwd=None, yolo=True, flags=None) -> str`
|
|
101
113
|
|
|
102
114
|
Runs a single Codex turn and returns only the agent's message. Any reasoning
|
|
103
115
|
items are filtered out.
|
|
104
116
|
|
|
105
117
|
- `prompt` (str): prompt to send to Codex.
|
|
106
118
|
- `cwd` (str | PathLike | None): working directory for the Codex session.
|
|
107
|
-
- `yolo` (bool): pass `--yolo` to Codex when true.
|
|
119
|
+
- `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
|
|
108
120
|
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
109
121
|
|
|
110
|
-
### `Agent(cwd=None, yolo=
|
|
122
|
+
### `Agent(cwd=None, yolo=True, thread_id=None, flags=None)`
|
|
111
123
|
|
|
112
124
|
Creates a stateful session wrapper. Calling the instance sends the prompt into
|
|
113
125
|
the same conversation and returns only the agent's message.
|
|
114
126
|
|
|
115
127
|
- `__call__(prompt) -> str`: send a prompt to Codex and return the message.
|
|
116
128
|
- `thread_id -> str | None`: expose the underlying session id once created.
|
|
117
|
-
- `yolo` (bool): pass `--yolo` to Codex when true.
|
|
129
|
+
- `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
|
|
118
130
|
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
119
131
|
|
|
120
|
-
### `task(prompt, check=None, n=10, cwd=None, yolo=
|
|
132
|
+
### `task(prompt, check=None, n=10, cwd=None, yolo=True, flags=None) -> str`
|
|
121
133
|
|
|
122
134
|
Runs a task with checker-driven retries and returns the success summary.
|
|
123
135
|
Raises `TaskFailed` when the maximum attempts are reached.
|
|
@@ -125,12 +137,12 @@ Raises `TaskFailed` when the maximum attempts are reached.
|
|
|
125
137
|
- `check` (str | None | False): custom check prompt, default checker, or `False` to skip.
|
|
126
138
|
- `n` (int): maximum number of retries after a failed check.
|
|
127
139
|
|
|
128
|
-
### `task_result(prompt, check=None, n=10, cwd=None, yolo=
|
|
140
|
+
### `task_result(prompt, check=None, n=10, cwd=None, yolo=True, flags=None) -> TaskResult`
|
|
129
141
|
|
|
130
142
|
Runs a task with checker-driven retries and returns a `TaskResult` without
|
|
131
143
|
raising `TaskFailed`.
|
|
132
144
|
|
|
133
|
-
### `Task(prompt, max_attempts=10, cwd=None, yolo=
|
|
145
|
+
### `Task(prompt, max_attempts=10, cwd=None, yolo=True, thread_id=None, flags=None)`
|
|
134
146
|
|
|
135
147
|
Runs a Codex task with checker-driven retries. Subclass it and implement
|
|
136
148
|
`check()` to return an error string when the task is incomplete, or return
|
|
@@ -139,7 +151,7 @@ Runs a Codex task with checker-driven retries. Subclass it and implement
|
|
|
139
151
|
- `__call__() -> TaskResult`: run the task.
|
|
140
152
|
- `set_up()`: optional setup hook.
|
|
141
153
|
- `tear_down()`: optional cleanup hook.
|
|
142
|
-
- `check() -> str | None`: return an error description or `None`/`""`.
|
|
154
|
+
- `check(output=None) -> str | None`: return an error description or `None`/`""`. `output` is the last agent response.
|
|
143
155
|
- `on_success(result)`: optional success hook.
|
|
144
156
|
- `on_failure(result)`: optional failure hook.
|
|
145
157
|
|
|
@@ -161,12 +173,31 @@ Exception raised by `task()` when retries are exhausted.
|
|
|
161
173
|
- `attempts` (int | None): attempts made when the task failed.
|
|
162
174
|
- `errors` (str | None): last checker error, if any.
|
|
163
175
|
|
|
176
|
+
### `foreach(list_file, task_file, n=None, cwd=None, yolo=True, flags=None) -> ForeachResult`
|
|
177
|
+
|
|
178
|
+
Runs a task file over a list of items, updating the list file in place.
|
|
179
|
+
|
|
180
|
+
- `list_file` (str | PathLike): path to the list file to process.
|
|
181
|
+
- `task_file` (str | PathLike): YAML task file (must include `prompt`).
|
|
182
|
+
- `n` (int | None): limit parallelism to N (default: run all items in parallel).
|
|
183
|
+
- `cwd` (str | PathLike | None): working directory for the Codex session.
|
|
184
|
+
- `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
|
|
185
|
+
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
186
|
+
|
|
187
|
+
### `ForeachResult(succeeded, failed, skipped, results)`
|
|
188
|
+
|
|
189
|
+
Simple result object returned by `foreach()`.
|
|
190
|
+
|
|
191
|
+
- `succeeded` (int): number of successful items.
|
|
192
|
+
- `failed` (int): number of failed items.
|
|
193
|
+
- `skipped` (int): number of items skipped (already marked in the list file).
|
|
194
|
+
- `results` (list[tuple]): `(item, success, summary)` entries for items that ran.
|
|
195
|
+
|
|
164
196
|
## Behavior notes
|
|
165
197
|
|
|
166
198
|
- Uses `codex exec --json` and parses JSONL events for `agent_message` items.
|
|
167
199
|
- Automatically passes `--skip-git-repo-check` so it can run outside a git repo.
|
|
168
|
-
- Passes `--
|
|
169
|
-
- Passes `--yolo` when enabled (use with care).
|
|
200
|
+
- Passes `--yolo` by default (use `--no-yolo` or `yolo=False` for `--full-auto`).
|
|
170
201
|
- Raises `RuntimeError` if Codex exits non-zero or returns no agent message.
|
|
171
202
|
|
|
172
203
|
## Configuration
|
|
@@ -5,10 +5,13 @@ src/codexapi/__init__.py
|
|
|
5
5
|
src/codexapi/__main__.py
|
|
6
6
|
src/codexapi/agent.py
|
|
7
7
|
src/codexapi/cli.py
|
|
8
|
+
src/codexapi/foreach.py
|
|
8
9
|
src/codexapi/ralph.py
|
|
9
10
|
src/codexapi/task.py
|
|
11
|
+
src/codexapi/taskfile.py
|
|
10
12
|
src/codexapi.egg-info/PKG-INFO
|
|
11
13
|
src/codexapi.egg-info/SOURCES.txt
|
|
12
14
|
src/codexapi.egg-info/dependency_links.txt
|
|
13
15
|
src/codexapi.egg-info/entry_points.txt
|
|
16
|
+
src/codexapi.egg-info/requires.txt
|
|
14
17
|
src/codexapi.egg-info/top_level.txt
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|