codexapi 0.5.2__tar.gz → 0.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codexapi-0.5.2/src/codexapi.egg-info → codexapi-0.5.4}/PKG-INFO +17 -7
- {codexapi-0.5.2 → codexapi-0.5.4}/README.md +16 -6
- {codexapi-0.5.2 → codexapi-0.5.4}/pyproject.toml +1 -1
- {codexapi-0.5.2 → codexapi-0.5.4}/src/codexapi/__init__.py +1 -1
- {codexapi-0.5.2 → codexapi-0.5.4}/src/codexapi/cli.py +18 -17
- {codexapi-0.5.2 → codexapi-0.5.4}/src/codexapi/foreach.py +9 -11
- {codexapi-0.5.2 → codexapi-0.5.4}/src/codexapi/ralph.py +2 -0
- {codexapi-0.5.2 → codexapi-0.5.4}/src/codexapi/task.py +223 -108
- codexapi-0.5.4/src/codexapi/taskfile.py +112 -0
- {codexapi-0.5.2 → codexapi-0.5.4/src/codexapi.egg-info}/PKG-INFO +17 -7
- codexapi-0.5.2/src/codexapi/taskfile.py +0 -108
- {codexapi-0.5.2 → codexapi-0.5.4}/LICENSE +0 -0
- {codexapi-0.5.2 → codexapi-0.5.4}/setup.cfg +0 -0
- {codexapi-0.5.2 → codexapi-0.5.4}/src/codexapi/__main__.py +0 -0
- {codexapi-0.5.2 → codexapi-0.5.4}/src/codexapi/agent.py +0 -0
- {codexapi-0.5.2 → codexapi-0.5.4}/src/codexapi.egg-info/SOURCES.txt +0 -0
- {codexapi-0.5.2 → codexapi-0.5.4}/src/codexapi.egg-info/dependency_links.txt +0 -0
- {codexapi-0.5.2 → codexapi-0.5.4}/src/codexapi.egg-info/entry_points.txt +0 -0
- {codexapi-0.5.2 → codexapi-0.5.4}/src/codexapi.egg-info/requires.txt +0 -0
- {codexapi-0.5.2 → codexapi-0.5.4}/src/codexapi.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: codexapi
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.4
|
|
4
4
|
Summary: Minimal Python API for running the Codex CLI.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: codex,agent,cli,openai
|
|
@@ -74,6 +74,11 @@ echo "Say hello." | codexapi run
|
|
|
74
74
|
codexapi task "Fix the failing tests." --max-iterations 5
|
|
75
75
|
codexapi task -f task.yaml
|
|
76
76
|
```
|
|
77
|
+
Progress is shown by default for `codexapi task`; use `--quiet` to suppress it.
|
|
78
|
+
|
|
79
|
+
Task files default to using the standard check prompt for the task. Set `check: "None"` to skip verification.
|
|
80
|
+
Use `max_iterations` in the task file to override the default attempt cap (0 means unlimited).
|
|
81
|
+
Checks are wrapped with the verifier prompt, include the agent output, and expect JSON with `success`/`reason`.
|
|
77
82
|
|
|
78
83
|
Show running sessions and their latest activity:
|
|
79
84
|
|
|
@@ -139,26 +144,31 @@ the same conversation and returns only the agent's message.
|
|
|
139
144
|
- `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
|
|
140
145
|
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
141
146
|
|
|
142
|
-
### `task(prompt, check=None,
|
|
147
|
+
### `task(prompt, check=None, max_iterations=10, cwd=None, yolo=True, flags=None, progress=False, set_up=None, tear_down=None, on_success=None, on_failure=None) -> str`
|
|
143
148
|
|
|
144
149
|
Runs a task with checker-driven retries and returns the success summary.
|
|
145
150
|
Raises `TaskFailed` when the maximum attempts are reached.
|
|
146
151
|
|
|
147
|
-
- `check` (str | None | False): custom check prompt, default checker, or `False` to skip.
|
|
148
|
-
- `
|
|
152
|
+
- `check` (str | None | False): custom check prompt, default checker, or `False`/`"None"` to skip.
|
|
153
|
+
- `max_iterations` (int): maximum number of task attempts (0 means unlimited).
|
|
154
|
+
- `progress` (bool): print progress after each verification round.
|
|
155
|
+
- `set_up`/`tear_down`/`on_success`/`on_failure` (str | None): optional hook prompts.
|
|
149
156
|
|
|
150
|
-
### `task_result(prompt, check=None,
|
|
157
|
+
### `task_result(prompt, check=None, max_iterations=10, cwd=None, yolo=True, flags=None, progress=False, set_up=None, tear_down=None, on_success=None, on_failure=None) -> TaskResult`
|
|
151
158
|
|
|
152
159
|
Runs a task with checker-driven retries and returns a `TaskResult` without
|
|
153
160
|
raising `TaskFailed`.
|
|
161
|
+
Arguments mirror `task()` (including hooks).
|
|
154
162
|
|
|
155
163
|
### `Task(prompt, max_attempts=10, cwd=None, yolo=True, thread_id=None, flags=None)`
|
|
156
164
|
|
|
157
165
|
Runs a Codex task with checker-driven retries. Subclass it and implement
|
|
158
166
|
`check()` to return an error string when the task is incomplete, or return
|
|
159
167
|
`None`/`""` when the task passes.
|
|
168
|
+
If you do not override `check()`, the default verifier wrapper runs with the
|
|
169
|
+
default check prompt and includes the agent output.
|
|
160
170
|
|
|
161
|
-
- `__call__() -> TaskResult`: run the task.
|
|
171
|
+
- `__call__(debug=False, progress=False) -> TaskResult`: run the task.
|
|
162
172
|
- `set_up()`: optional setup hook.
|
|
163
173
|
- `tear_down()`: optional cleanup hook.
|
|
164
174
|
- `check(output=None) -> str | None`: return an error description or `None`/`""`. `output` is the last agent response.
|
|
@@ -177,7 +187,7 @@ Simple result object returned by `Task.__call__`.
|
|
|
177
187
|
|
|
178
188
|
### `TaskFailed`
|
|
179
189
|
|
|
180
|
-
Exception raised by `task()` when
|
|
190
|
+
Exception raised by `task()` when attempts are exhausted.
|
|
181
191
|
|
|
182
192
|
- `summary` (str): failure summary text.
|
|
183
193
|
- `attempts` (int | None): attempts made when the task failed.
|
|
@@ -60,6 +60,11 @@ echo "Say hello." | codexapi run
|
|
|
60
60
|
codexapi task "Fix the failing tests." --max-iterations 5
|
|
61
61
|
codexapi task -f task.yaml
|
|
62
62
|
```
|
|
63
|
+
Progress is shown by default for `codexapi task`; use `--quiet` to suppress it.
|
|
64
|
+
|
|
65
|
+
Task files default to using the standard check prompt for the task. Set `check: "None"` to skip verification.
|
|
66
|
+
Use `max_iterations` in the task file to override the default attempt cap (0 means unlimited).
|
|
67
|
+
Checks are wrapped with the verifier prompt, include the agent output, and expect JSON with `success`/`reason`.
|
|
63
68
|
|
|
64
69
|
Show running sessions and their latest activity:
|
|
65
70
|
|
|
@@ -125,26 +130,31 @@ the same conversation and returns only the agent's message.
|
|
|
125
130
|
- `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
|
|
126
131
|
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
127
132
|
|
|
128
|
-
### `task(prompt, check=None,
|
|
133
|
+
### `task(prompt, check=None, max_iterations=10, cwd=None, yolo=True, flags=None, progress=False, set_up=None, tear_down=None, on_success=None, on_failure=None) -> str`
|
|
129
134
|
|
|
130
135
|
Runs a task with checker-driven retries and returns the success summary.
|
|
131
136
|
Raises `TaskFailed` when the maximum attempts are reached.
|
|
132
137
|
|
|
133
|
-
- `check` (str | None | False): custom check prompt, default checker, or `False` to skip.
|
|
134
|
-
- `
|
|
138
|
+
- `check` (str | None | False): custom check prompt, default checker, or `False`/`"None"` to skip.
|
|
139
|
+
- `max_iterations` (int): maximum number of task attempts (0 means unlimited).
|
|
140
|
+
- `progress` (bool): print progress after each verification round.
|
|
141
|
+
- `set_up`/`tear_down`/`on_success`/`on_failure` (str | None): optional hook prompts.
|
|
135
142
|
|
|
136
|
-
### `task_result(prompt, check=None,
|
|
143
|
+
### `task_result(prompt, check=None, max_iterations=10, cwd=None, yolo=True, flags=None, progress=False, set_up=None, tear_down=None, on_success=None, on_failure=None) -> TaskResult`
|
|
137
144
|
|
|
138
145
|
Runs a task with checker-driven retries and returns a `TaskResult` without
|
|
139
146
|
raising `TaskFailed`.
|
|
147
|
+
Arguments mirror `task()` (including hooks).
|
|
140
148
|
|
|
141
149
|
### `Task(prompt, max_attempts=10, cwd=None, yolo=True, thread_id=None, flags=None)`
|
|
142
150
|
|
|
143
151
|
Runs a Codex task with checker-driven retries. Subclass it and implement
|
|
144
152
|
`check()` to return an error string when the task is incomplete, or return
|
|
145
153
|
`None`/`""` when the task passes.
|
|
154
|
+
If you do not override `check()`, the default verifier wrapper runs with the
|
|
155
|
+
default check prompt and includes the agent output.
|
|
146
156
|
|
|
147
|
-
- `__call__() -> TaskResult`: run the task.
|
|
157
|
+
- `__call__(debug=False, progress=False) -> TaskResult`: run the task.
|
|
148
158
|
- `set_up()`: optional setup hook.
|
|
149
159
|
- `tear_down()`: optional cleanup hook.
|
|
150
160
|
- `check(output=None) -> str | None`: return an error description or `None`/`""`. `output` is the last agent response.
|
|
@@ -163,7 +173,7 @@ Simple result object returned by `Task.__call__`.
|
|
|
163
173
|
|
|
164
174
|
### `TaskFailed`
|
|
165
175
|
|
|
166
|
-
Exception raised by `task()` when
|
|
176
|
+
Exception raised by `task()` when attempts are exhausted.
|
|
167
177
|
|
|
168
178
|
- `summary` (str): failure summary text.
|
|
169
179
|
- `attempts` (int | None): attempts made when the task failed.
|
|
@@ -14,8 +14,8 @@ from pathlib import Path
|
|
|
14
14
|
from .agent import Agent, agent
|
|
15
15
|
from .foreach import foreach
|
|
16
16
|
from .ralph import cancel_ralph_loop, run_ralph_loop
|
|
17
|
-
from .task import TaskFailed, task
|
|
18
|
-
from .taskfile import
|
|
17
|
+
from .task import DEFAULT_MAX_ITERATIONS, TaskFailed, task
|
|
18
|
+
from .taskfile import TaskFile
|
|
19
19
|
|
|
20
20
|
_SESSION_ID_RE = re.compile(
|
|
21
21
|
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
|
|
@@ -1008,7 +1008,10 @@ def main(argv=None):
|
|
|
1008
1008
|
"--max-iterations",
|
|
1009
1009
|
type=int,
|
|
1010
1010
|
default=None,
|
|
1011
|
-
help=
|
|
1011
|
+
help=(
|
|
1012
|
+
"Max agent attempts (0 means unlimited). "
|
|
1013
|
+
f"Defaults to {DEFAULT_MAX_ITERATIONS}."
|
|
1014
|
+
),
|
|
1012
1015
|
)
|
|
1013
1016
|
task_parser.add_argument("--cwd", help="Working directory for the Codex session.")
|
|
1014
1017
|
task_parser.add_argument(
|
|
@@ -1022,9 +1025,9 @@ def main(argv=None):
|
|
|
1022
1025
|
help="Additional raw CLI flags to pass to Codex (quoted as needed).",
|
|
1023
1026
|
)
|
|
1024
1027
|
task_parser.add_argument(
|
|
1025
|
-
"--
|
|
1028
|
+
"--quiet",
|
|
1026
1029
|
action="store_true",
|
|
1027
|
-
help="
|
|
1030
|
+
help="Suppress progress output during verification.",
|
|
1028
1031
|
)
|
|
1029
1032
|
|
|
1030
1033
|
ralph_parser = subparsers.add_parser(
|
|
@@ -1226,17 +1229,15 @@ def main(argv=None):
|
|
|
1226
1229
|
raise SystemExit("--check is not allowed with -f.")
|
|
1227
1230
|
if args.max_iterations is not None:
|
|
1228
1231
|
raise SystemExit("--max-iterations is not allowed with -f.")
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
task_def,
|
|
1232
|
-
None,
|
|
1233
|
-
10,
|
|
1234
|
-
args.cwd,
|
|
1235
|
-
args.yolo,
|
|
1232
|
+
task_runner = TaskFile(
|
|
1233
|
+
args.task_file,
|
|
1236
1234
|
None,
|
|
1237
|
-
args.
|
|
1235
|
+
cwd=args.cwd,
|
|
1236
|
+
yolo=args.yolo,
|
|
1237
|
+
thread_id=None,
|
|
1238
|
+
flags=args.flags,
|
|
1238
1239
|
)
|
|
1239
|
-
result = task_runner()
|
|
1240
|
+
result = task_runner(progress=not args.quiet)
|
|
1240
1241
|
print(result.summary)
|
|
1241
1242
|
if not result.success:
|
|
1242
1243
|
raise SystemExit(1)
|
|
@@ -1279,10 +1280,10 @@ def main(argv=None):
|
|
|
1279
1280
|
return
|
|
1280
1281
|
if args.command == "task":
|
|
1281
1282
|
if args.max_iterations is None:
|
|
1282
|
-
args.max_iterations =
|
|
1283
|
+
args.max_iterations = DEFAULT_MAX_ITERATIONS
|
|
1283
1284
|
if args.max_iterations < 0:
|
|
1284
1285
|
raise SystemExit("--max-iterations must be >= 0.")
|
|
1285
|
-
check = args.check
|
|
1286
|
+
check = args.check
|
|
1286
1287
|
try:
|
|
1287
1288
|
message = task(
|
|
1288
1289
|
prompt,
|
|
@@ -1291,7 +1292,7 @@ def main(argv=None):
|
|
|
1291
1292
|
args.cwd,
|
|
1292
1293
|
args.yolo,
|
|
1293
1294
|
args.flags,
|
|
1294
|
-
args.
|
|
1295
|
+
not args.quiet,
|
|
1295
1296
|
)
|
|
1296
1297
|
except TaskFailed as exc:
|
|
1297
1298
|
message = exc.summary
|
|
@@ -6,7 +6,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
6
6
|
|
|
7
7
|
from tqdm import tqdm
|
|
8
8
|
|
|
9
|
-
from .taskfile import
|
|
9
|
+
from .taskfile import TaskFile
|
|
10
10
|
|
|
11
11
|
_STATUS_RUNNING = "⏳"
|
|
12
12
|
_STATUS_SUCCESS = "✅"
|
|
@@ -43,7 +43,6 @@ def foreach(
|
|
|
43
43
|
flags=None,
|
|
44
44
|
):
|
|
45
45
|
"""Run a task file over each item in list_file and update the file."""
|
|
46
|
-
task_def = load_task_file(task_file)
|
|
47
46
|
lines, ends_with_newline = _read_lines(list_file)
|
|
48
47
|
items, skipped = _collect_items(lines)
|
|
49
48
|
|
|
@@ -69,7 +68,7 @@ def foreach(
|
|
|
69
68
|
_run_item,
|
|
70
69
|
index,
|
|
71
70
|
item,
|
|
72
|
-
|
|
71
|
+
task_file,
|
|
73
72
|
lines,
|
|
74
73
|
ends_with_newline,
|
|
75
74
|
list_file,
|
|
@@ -165,7 +164,7 @@ def _format_turns(used, total):
|
|
|
165
164
|
def _run_item(
|
|
166
165
|
index,
|
|
167
166
|
item,
|
|
168
|
-
|
|
167
|
+
task_file,
|
|
169
168
|
lines,
|
|
170
169
|
ends_with_newline,
|
|
171
170
|
list_file,
|
|
@@ -189,14 +188,13 @@ def _run_item(
|
|
|
189
188
|
attempts = None
|
|
190
189
|
max_attempts = None
|
|
191
190
|
try:
|
|
192
|
-
task =
|
|
193
|
-
|
|
191
|
+
task = TaskFile(
|
|
192
|
+
task_file,
|
|
194
193
|
item,
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
flags,
|
|
194
|
+
cwd=cwd,
|
|
195
|
+
yolo=yolo,
|
|
196
|
+
thread_id=None,
|
|
197
|
+
flags=flags,
|
|
200
198
|
)
|
|
201
199
|
max_attempts = task.max_attempts
|
|
202
200
|
result = task()
|
|
@@ -47,6 +47,8 @@ def run_ralph_loop(
|
|
|
47
47
|
raise TypeError("completion_promise must be a string or None")
|
|
48
48
|
if max_iterations < 0:
|
|
49
49
|
raise ValueError("max_iterations must be >= 0")
|
|
50
|
+
if hasattr(sys.stdout, "reconfigure"):
|
|
51
|
+
sys.stdout.reconfigure(line_buffering=True)
|
|
50
52
|
|
|
51
53
|
state_path = _state_path(cwd)
|
|
52
54
|
_ensure_state_dir(state_path)
|
|
@@ -10,9 +10,12 @@ _logger = logging.getLogger(__name__)
|
|
|
10
10
|
|
|
11
11
|
_CHECK_PREFIX = (
|
|
12
12
|
"You are a verification agent. Explore this workspace and carefully evaluate it "
|
|
13
|
-
"against the
|
|
14
|
-
"and tracing through code, but do not change any of the code.\n"
|
|
15
|
-
"
|
|
13
|
+
"against the instructions below. Collect evidence by running any tests and/or "
|
|
14
|
+
"reading and tracing through code, but do not change any of the code.\n"
|
|
15
|
+
"You will receive the task or check instructions first, then the agent output "
|
|
16
|
+
"under the heading 'AGENT OUTPUT', which is provided for context and does not "
|
|
17
|
+
"replace or supersede collecting your own evidence unless it is clear from the "
|
|
18
|
+
"instructions that the agent's output IS the expected output of the task.\n"
|
|
16
19
|
"Return only JSON with keys: success (boolean) and reason (string).\n"
|
|
17
20
|
"Set success to true only if everything matches the intent."
|
|
18
21
|
)
|
|
@@ -23,6 +26,7 @@ _PROGRESS_PROMPT = (
|
|
|
23
26
|
"Each value must be a single line with no newlines.\n"
|
|
24
27
|
"Do not run commands or change any files."
|
|
25
28
|
)
|
|
29
|
+
DEFAULT_MAX_ITERATIONS = 10
|
|
26
30
|
|
|
27
31
|
|
|
28
32
|
def _default_check(prompt):
|
|
@@ -35,8 +39,27 @@ def _default_check(prompt):
|
|
|
35
39
|
)
|
|
36
40
|
|
|
37
41
|
|
|
38
|
-
def _build_check_prompt(check):
|
|
39
|
-
|
|
42
|
+
def _build_check_prompt(check, agent_output):
|
|
43
|
+
output = agent_output or ""
|
|
44
|
+
return (
|
|
45
|
+
f"{_CHECK_PREFIX}\n\n"
|
|
46
|
+
f"{check}\n\n"
|
|
47
|
+
"AGENT OUTPUT:\n"
|
|
48
|
+
f"{output}\n\n"
|
|
49
|
+
f"{_CHECK_SUFFIX}"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _resolve_check_text(prompt, check):
|
|
54
|
+
if check is False:
|
|
55
|
+
return None, True
|
|
56
|
+
if check is None:
|
|
57
|
+
return _default_check(prompt), False
|
|
58
|
+
if not isinstance(check, str):
|
|
59
|
+
raise TypeError("check must be a string or False")
|
|
60
|
+
if check.strip() == "None":
|
|
61
|
+
return None, True
|
|
62
|
+
return check, False
|
|
40
63
|
|
|
41
64
|
|
|
42
65
|
def _build_progress_prompt(agent_output, check_output):
|
|
@@ -123,17 +146,23 @@ def _print_progress(
|
|
|
123
146
|
):
|
|
124
147
|
elapsed = time.monotonic() - start_time
|
|
125
148
|
remaining = 0
|
|
126
|
-
|
|
127
|
-
|
|
149
|
+
remaining_text = "unknown"
|
|
150
|
+
if total:
|
|
151
|
+
if attempt:
|
|
152
|
+
remaining = (elapsed / attempt) * (total - attempt)
|
|
153
|
+
remaining_text = _format_duration(remaining)
|
|
128
154
|
|
|
129
155
|
summary_prompt = _build_progress_prompt(agent_output, check_output)
|
|
130
156
|
summary = agent(summary_prompt, cwd, yolo, flags)
|
|
131
157
|
agent_summary, check_summary = _progress_result(summary)
|
|
132
158
|
|
|
133
159
|
elapsed_text = _format_duration(elapsed)
|
|
134
|
-
|
|
160
|
+
if not total:
|
|
161
|
+
round_text = f"Round {attempt}/unlimited"
|
|
162
|
+
else:
|
|
163
|
+
round_text = f"Round {attempt}/{total}"
|
|
135
164
|
print(
|
|
136
|
-
f"
|
|
165
|
+
f"{round_text} ({elapsed_text} elapsed, {remaining_text} remaining)",
|
|
137
166
|
flush=True,
|
|
138
167
|
)
|
|
139
168
|
print(f"Agent: {agent_summary}", flush=True)
|
|
@@ -174,26 +203,42 @@ class TaskFailed(RuntimeError):
|
|
|
174
203
|
self.errors = errors
|
|
175
204
|
|
|
176
205
|
|
|
206
|
+
def _validate_hook(name, value):
|
|
207
|
+
if value is None:
|
|
208
|
+
return None
|
|
209
|
+
if isinstance(value, str):
|
|
210
|
+
return value
|
|
211
|
+
raise TypeError(f"{name} must be a string or None")
|
|
212
|
+
|
|
213
|
+
|
|
177
214
|
def task(
|
|
178
215
|
prompt,
|
|
179
216
|
check=None,
|
|
180
|
-
|
|
217
|
+
max_iterations=DEFAULT_MAX_ITERATIONS,
|
|
181
218
|
cwd=None,
|
|
182
219
|
yolo=True,
|
|
183
220
|
flags=None,
|
|
184
221
|
progress=False,
|
|
222
|
+
set_up=None,
|
|
223
|
+
tear_down=None,
|
|
224
|
+
on_success=None,
|
|
225
|
+
on_failure=None,
|
|
185
226
|
):
|
|
186
227
|
"""Run a prompt with optional checker-driven retries.
|
|
187
228
|
|
|
188
229
|
Args:
|
|
189
230
|
prompt: The task prompt to run.
|
|
190
231
|
check: False to skip verification, None for the default check, or
|
|
191
|
-
a string check prompt.
|
|
192
|
-
|
|
232
|
+
a string check prompt. The string "None" skips verification.
|
|
233
|
+
max_iterations: Maximum number of task attempts (0 means unlimited).
|
|
193
234
|
cwd: Optional working directory for the Codex session.
|
|
194
235
|
yolo: Whether to pass --yolo to Codex.
|
|
195
236
|
flags: Additional raw CLI flags to pass to Codex.
|
|
196
237
|
progress: Whether to print progress after each verification round.
|
|
238
|
+
set_up: Optional setup prompt to run before the task.
|
|
239
|
+
tear_down: Optional cleanup prompt to run after the task.
|
|
240
|
+
on_success: Optional prompt to run after a successful task.
|
|
241
|
+
on_failure: Optional prompt to run after a failed task.
|
|
197
242
|
|
|
198
243
|
Returns:
|
|
199
244
|
The agent's response text when the task succeeds.
|
|
@@ -201,7 +246,19 @@ def task(
|
|
|
201
246
|
Raises:
|
|
202
247
|
TaskFailed: when the task reaches the maximum attempts without success.
|
|
203
248
|
"""
|
|
204
|
-
result = task_result(
|
|
249
|
+
result = task_result(
|
|
250
|
+
prompt,
|
|
251
|
+
check,
|
|
252
|
+
max_iterations,
|
|
253
|
+
cwd,
|
|
254
|
+
yolo,
|
|
255
|
+
flags,
|
|
256
|
+
progress,
|
|
257
|
+
set_up,
|
|
258
|
+
tear_down,
|
|
259
|
+
on_success,
|
|
260
|
+
on_failure,
|
|
261
|
+
)
|
|
205
262
|
if result.success:
|
|
206
263
|
return result.summary
|
|
207
264
|
raise TaskFailed(result.summary, result.attempts, result.errors)
|
|
@@ -210,78 +267,46 @@ def task(
|
|
|
210
267
|
def task_result(
|
|
211
268
|
prompt,
|
|
212
269
|
check=None,
|
|
213
|
-
|
|
270
|
+
max_iterations=DEFAULT_MAX_ITERATIONS,
|
|
214
271
|
cwd=None,
|
|
215
272
|
yolo=True,
|
|
216
273
|
flags=None,
|
|
217
274
|
progress=False,
|
|
275
|
+
set_up=None,
|
|
276
|
+
tear_down=None,
|
|
277
|
+
on_success=None,
|
|
278
|
+
on_failure=None,
|
|
218
279
|
):
|
|
219
280
|
"""Run a prompt with optional checker-driven retries and return TaskResult.
|
|
220
281
|
|
|
221
282
|
The runner keeps a single session. Each verification attempt uses a fresh,
|
|
222
283
|
stateless agent call. When progress is True, print a summary each round.
|
|
284
|
+
|
|
285
|
+
Hook strings mirror task file keys: set_up, tear_down, on_success, on_failure.
|
|
223
286
|
"""
|
|
224
|
-
if
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
summary = runner(prompt)
|
|
228
|
-
if progress:
|
|
229
|
-
_print_progress(
|
|
230
|
-
1,
|
|
231
|
-
1,
|
|
232
|
-
start_time,
|
|
233
|
-
summary,
|
|
234
|
-
"Verification skipped.",
|
|
235
|
-
cwd,
|
|
236
|
-
yolo,
|
|
237
|
-
flags,
|
|
238
|
-
)
|
|
239
|
-
return TaskResult(True, summary, 1, None, runner.thread_id)
|
|
240
|
-
if check is None:
|
|
241
|
-
check = _default_check(prompt)
|
|
242
|
-
if not isinstance(check, str):
|
|
287
|
+
if max_iterations < 0:
|
|
288
|
+
raise ValueError("max_iterations must be >= 0")
|
|
289
|
+
if not (check is None or check is False or isinstance(check, str)):
|
|
243
290
|
raise TypeError("check must be a string or False")
|
|
244
|
-
if n < 0:
|
|
245
|
-
raise ValueError("n must be >= 0")
|
|
246
|
-
|
|
247
|
-
runner = Agent(cwd, yolo, None, flags)
|
|
248
|
-
start_time = time.monotonic()
|
|
249
|
-
last_output = runner(prompt)
|
|
250
|
-
check_prompt = _build_check_prompt(check)
|
|
251
291
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
summary,
|
|
271
|
-
attempt + 1,
|
|
272
|
-
None,
|
|
273
|
-
runner.thread_id,
|
|
274
|
-
)
|
|
275
|
-
if attempt == n:
|
|
276
|
-
summary = runner(_failure_prompt(reason))
|
|
277
|
-
return TaskResult(
|
|
278
|
-
False,
|
|
279
|
-
summary,
|
|
280
|
-
attempt + 1,
|
|
281
|
-
reason,
|
|
282
|
-
runner.thread_id,
|
|
283
|
-
)
|
|
284
|
-
last_output = runner(_fix_prompt(reason))
|
|
292
|
+
set_up_text = _validate_hook("set_up", set_up)
|
|
293
|
+
tear_down_text = _validate_hook("tear_down", tear_down)
|
|
294
|
+
on_success_text = _validate_hook("on_success", on_success)
|
|
295
|
+
on_failure_text = _validate_hook("on_failure", on_failure)
|
|
296
|
+
runner = AutoTask(
|
|
297
|
+
prompt,
|
|
298
|
+
check,
|
|
299
|
+
max_iterations,
|
|
300
|
+
cwd,
|
|
301
|
+
yolo,
|
|
302
|
+
None,
|
|
303
|
+
flags,
|
|
304
|
+
set_up=set_up_text,
|
|
305
|
+
tear_down=tear_down_text,
|
|
306
|
+
on_success=on_success_text,
|
|
307
|
+
on_failure=on_failure_text,
|
|
308
|
+
)
|
|
309
|
+
return runner(progress=progress)
|
|
285
310
|
|
|
286
311
|
|
|
287
312
|
class TaskResult:
|
|
@@ -320,18 +345,23 @@ class Task:
|
|
|
320
345
|
def __init__(
|
|
321
346
|
self,
|
|
322
347
|
prompt,
|
|
323
|
-
max_attempts=
|
|
348
|
+
max_attempts=DEFAULT_MAX_ITERATIONS,
|
|
324
349
|
cwd=None,
|
|
325
350
|
yolo=True,
|
|
326
351
|
thread_id=None,
|
|
327
352
|
flags=None,
|
|
328
353
|
):
|
|
329
|
-
if max_attempts <
|
|
330
|
-
raise ValueError("max_attempts must be >=
|
|
354
|
+
if max_attempts < 0:
|
|
355
|
+
raise ValueError("max_attempts must be >= 0")
|
|
331
356
|
self.prompt = prompt
|
|
332
357
|
self.max_attempts = max_attempts
|
|
333
358
|
self.cwd = cwd
|
|
334
359
|
self.last_output = None
|
|
360
|
+
self.last_check_output = None
|
|
361
|
+
self.check_skipped = False
|
|
362
|
+
self.check_text = None
|
|
363
|
+
self._yolo = yolo
|
|
364
|
+
self._flags = flags
|
|
335
365
|
self.agent = Agent(
|
|
336
366
|
cwd,
|
|
337
367
|
yolo,
|
|
@@ -346,11 +376,26 @@ class Task:
|
|
|
346
376
|
"""Delete the directory etc."""
|
|
347
377
|
|
|
348
378
|
def check(self, output=None):
|
|
349
|
-
"""
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
379
|
+
"""Check if the task is done, return a string describing problems if not.
|
|
380
|
+
|
|
381
|
+
The default implementation runs the verifier agent with the standard
|
|
382
|
+
check wrapper and expects JSON output.
|
|
383
|
+
"""
|
|
384
|
+
self.last_check_output = None
|
|
385
|
+
self.check_skipped = False
|
|
386
|
+
check_text, skip = _resolve_check_text(self.prompt, self.check_text)
|
|
387
|
+
if skip:
|
|
388
|
+
self.check_skipped = True
|
|
389
|
+
return None
|
|
390
|
+
last_output = output if output is not None else self.last_output
|
|
391
|
+
last_output = last_output or ""
|
|
392
|
+
check_prompt = _build_check_prompt(check_text, last_output)
|
|
393
|
+
check_output = agent(check_prompt, self.cwd, self._yolo, self._flags)
|
|
394
|
+
self.last_check_output = check_output
|
|
395
|
+
success, reason = _check_result(check_output)
|
|
396
|
+
if success:
|
|
397
|
+
return None
|
|
398
|
+
return reason
|
|
354
399
|
|
|
355
400
|
def on_success(self, result):
|
|
356
401
|
"""Hook called after a successful task, e.g. commit the changes."""
|
|
@@ -365,23 +410,22 @@ class Task:
|
|
|
365
410
|
f"{error}\n\n"
|
|
366
411
|
"Take another look and see whether you agree and, if so, please take "
|
|
367
412
|
"this feedback into consideration and use it to continue to make "
|
|
368
|
-
"progress towards our original goal and intent."
|
|
413
|
+
"progress towards our original goal and intent. Don't propose next steps, "
|
|
414
|
+
"use your best judgement and work towards the goal!"
|
|
369
415
|
)
|
|
370
416
|
|
|
371
417
|
def success_prompt(self):
|
|
372
418
|
"""Ask the agent to summarize what it did."""
|
|
373
|
-
return
|
|
419
|
+
return _success_prompt()
|
|
374
420
|
|
|
375
421
|
def failure_prompt(self, error):
|
|
376
422
|
"""Ask the agent to summarize remaining issues after retries."""
|
|
377
|
-
return (
|
|
378
|
-
"We ran out of attempts. Can you please look back at everything you tried and summarize what it was that made this task too hard to complete, including anything you wish you'd known at the start that would have helped improve things?\n\n"
|
|
379
|
-
f"Outstanding issues:\n{error}"
|
|
380
|
-
)
|
|
423
|
+
return _failure_prompt(error)
|
|
381
424
|
|
|
382
|
-
def __call__(self, debug=False):
|
|
425
|
+
def __call__(self, debug=False, progress=False):
|
|
383
426
|
"""Run the task with checker-driven retries.
|
|
384
427
|
If debug is True, log debug messages.
|
|
428
|
+
If progress is True, print progress after each verification round.
|
|
385
429
|
"""
|
|
386
430
|
try:
|
|
387
431
|
# If this fails in the middle we will still try to tear down
|
|
@@ -392,35 +436,106 @@ class Task:
|
|
|
392
436
|
self.last_output = output
|
|
393
437
|
if debug:
|
|
394
438
|
_logger.debug("Initial output: %s", output)
|
|
395
|
-
|
|
439
|
+
|
|
396
440
|
# Try correcting it up to max_attempts times
|
|
397
|
-
|
|
441
|
+
start_time = time.monotonic()
|
|
442
|
+
error = None
|
|
443
|
+
attempt = 0
|
|
444
|
+
while True:
|
|
445
|
+
attempt += 1
|
|
398
446
|
error = self.check(self.last_output)
|
|
399
447
|
if debug:
|
|
400
448
|
_logger.debug("Check error: %s", error)
|
|
401
|
-
|
|
402
|
-
if
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
449
|
+
|
|
450
|
+
if progress:
|
|
451
|
+
check_output = self.last_check_output
|
|
452
|
+
if self.check_skipped:
|
|
453
|
+
check_output = "Verification skipped."
|
|
454
|
+
_print_progress(
|
|
455
|
+
attempt,
|
|
456
|
+
self.max_attempts,
|
|
457
|
+
start_time,
|
|
458
|
+
self.last_output,
|
|
459
|
+
check_output or "",
|
|
460
|
+
self.cwd,
|
|
461
|
+
self._yolo,
|
|
462
|
+
self._flags,
|
|
463
|
+
)
|
|
464
|
+
if not error:
|
|
410
465
|
summary = self.agent(self.success_prompt())
|
|
411
466
|
if debug:
|
|
412
467
|
_logger.debug("Success summary: %s", summary)
|
|
413
|
-
result = TaskResult(
|
|
468
|
+
result = TaskResult(
|
|
469
|
+
True,
|
|
470
|
+
summary,
|
|
471
|
+
attempt,
|
|
472
|
+
None,
|
|
473
|
+
self.agent.thread_id,
|
|
474
|
+
)
|
|
414
475
|
self.on_success(result)
|
|
415
476
|
return result
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
477
|
+
if self.max_attempts and attempt >= self.max_attempts:
|
|
478
|
+
summary = self.agent(self.failure_prompt(error))
|
|
479
|
+
if debug:
|
|
480
|
+
_logger.debug("Failure summary: %s", summary)
|
|
481
|
+
result = TaskResult(
|
|
482
|
+
False,
|
|
483
|
+
summary,
|
|
484
|
+
attempt,
|
|
485
|
+
error,
|
|
486
|
+
self.agent.thread_id,
|
|
487
|
+
)
|
|
488
|
+
self.on_failure(result)
|
|
489
|
+
return result
|
|
490
|
+
output = self.agent(self.fix_prompt(error))
|
|
491
|
+
self.last_output = output
|
|
492
|
+
if debug:
|
|
493
|
+
_logger.debug("Fix output: %s", output)
|
|
424
494
|
finally:
|
|
425
495
|
# No matter what, once we have set_up we will always tear_down
|
|
426
496
|
self.tear_down()
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
class AutoTask(Task):
|
|
500
|
+
"""Task subclass that maps prompt strings onto Task hooks."""
|
|
501
|
+
|
|
502
|
+
def __init__(
|
|
503
|
+
self,
|
|
504
|
+
prompt,
|
|
505
|
+
check=None,
|
|
506
|
+
max_attempts=DEFAULT_MAX_ITERATIONS,
|
|
507
|
+
cwd=None,
|
|
508
|
+
yolo=True,
|
|
509
|
+
thread_id=None,
|
|
510
|
+
flags=None,
|
|
511
|
+
set_up=None,
|
|
512
|
+
tear_down=None,
|
|
513
|
+
on_success=None,
|
|
514
|
+
on_failure=None,
|
|
515
|
+
):
|
|
516
|
+
if not (check is None or check is False or isinstance(check, str)):
|
|
517
|
+
raise TypeError("check must be a string or False")
|
|
518
|
+
if max_attempts < 0:
|
|
519
|
+
raise ValueError("max_attempts must be >= 0")
|
|
520
|
+
super().__init__(prompt, max_attempts, cwd, yolo, thread_id, flags)
|
|
521
|
+
self.check_text = check
|
|
522
|
+
self._set_up = _validate_hook("set_up", set_up)
|
|
523
|
+
self._tear_down = _validate_hook("tear_down", tear_down)
|
|
524
|
+
self._on_success = _validate_hook("on_success", on_success)
|
|
525
|
+
self._on_failure = _validate_hook("on_failure", on_failure)
|
|
526
|
+
|
|
527
|
+
def _run_hook(self, text):
|
|
528
|
+
if text:
|
|
529
|
+
agent(text, self.cwd, self._yolo, self._flags)
|
|
530
|
+
|
|
531
|
+
def set_up(self):
|
|
532
|
+
self._run_hook(self._set_up)
|
|
533
|
+
|
|
534
|
+
def tear_down(self):
|
|
535
|
+
self._run_hook(self._tear_down)
|
|
536
|
+
|
|
537
|
+
def on_success(self, result):
|
|
538
|
+
self._run_hook(self._on_success)
|
|
539
|
+
|
|
540
|
+
def on_failure(self, result):
|
|
541
|
+
self._run_hook(self._on_failure)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Load YAML task files and map them onto Task hooks."""
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
from .task import AutoTask
|
|
6
|
+
|
|
7
|
+
_ITEM_TOKEN = "{{item}}"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def load_task_file(path):
|
|
11
|
+
"""Load a YAML task file and return a normalized task definition."""
|
|
12
|
+
if not path:
|
|
13
|
+
raise ValueError("task file path is required")
|
|
14
|
+
with open(path, "r", encoding="utf-8") as handle:
|
|
15
|
+
data = yaml.safe_load(handle) or {}
|
|
16
|
+
if not isinstance(data, dict):
|
|
17
|
+
raise ValueError("Task file must be a YAML mapping.")
|
|
18
|
+
|
|
19
|
+
prompt = data.get("prompt")
|
|
20
|
+
if not isinstance(prompt, str) or not prompt.strip():
|
|
21
|
+
raise ValueError("Task file missing non-empty 'prompt'.")
|
|
22
|
+
|
|
23
|
+
max_iterations = data.get("max_iterations")
|
|
24
|
+
if max_iterations is not None:
|
|
25
|
+
if not isinstance(max_iterations, int):
|
|
26
|
+
raise ValueError("Task file max_iterations must be an integer.")
|
|
27
|
+
if max_iterations < 0:
|
|
28
|
+
raise ValueError("Task file max_iterations must be >= 0.")
|
|
29
|
+
|
|
30
|
+
return {
|
|
31
|
+
"prompt": prompt,
|
|
32
|
+
"set_up": _optional_str(data.get("set_up")),
|
|
33
|
+
"tear_down": _optional_str(data.get("tear_down")),
|
|
34
|
+
"check": _optional_str(data.get("check")),
|
|
35
|
+
"on_success": _optional_str(data.get("on_success")),
|
|
36
|
+
"on_failure": _optional_str(data.get("on_failure")),
|
|
37
|
+
"max_iterations": max_iterations,
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _optional_str(value):
|
|
42
|
+
if value is None:
|
|
43
|
+
return None
|
|
44
|
+
if isinstance(value, str):
|
|
45
|
+
return value if value.strip() else None
|
|
46
|
+
raise ValueError("Task file values must be strings.")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _render(text, item):
|
|
50
|
+
if text is None:
|
|
51
|
+
return None
|
|
52
|
+
if item is None:
|
|
53
|
+
return text
|
|
54
|
+
return text.replace(_ITEM_TOKEN, item)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class TaskFile(AutoTask):
|
|
58
|
+
"""Task subclass that maps a YAML task file onto Task hooks."""
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
path,
|
|
63
|
+
item=None,
|
|
64
|
+
max_iterations=None,
|
|
65
|
+
cwd=None,
|
|
66
|
+
yolo=True,
|
|
67
|
+
thread_id=None,
|
|
68
|
+
flags=None,
|
|
69
|
+
):
|
|
70
|
+
task_def = load_task_file(path)
|
|
71
|
+
if max_iterations is None:
|
|
72
|
+
max_iterations = task_def.get("max_iterations")
|
|
73
|
+
elif not isinstance(max_iterations, int):
|
|
74
|
+
raise ValueError("max_iterations must be an integer.")
|
|
75
|
+
elif max_iterations < 0:
|
|
76
|
+
raise ValueError("max_iterations must be >= 0.")
|
|
77
|
+
item_text = "" if item is None else str(item)
|
|
78
|
+
rendered = {
|
|
79
|
+
"prompt": _render(task_def.get("prompt"), item_text),
|
|
80
|
+
"set_up": _render(task_def.get("set_up"), item_text),
|
|
81
|
+
"tear_down": _render(task_def.get("tear_down"), item_text),
|
|
82
|
+
"check": _render(task_def.get("check"), item_text),
|
|
83
|
+
"on_success": _render(task_def.get("on_success"), item_text),
|
|
84
|
+
"on_failure": _render(task_def.get("on_failure"), item_text),
|
|
85
|
+
}
|
|
86
|
+
if max_iterations is None:
|
|
87
|
+
super().__init__(
|
|
88
|
+
rendered["prompt"],
|
|
89
|
+
rendered["check"],
|
|
90
|
+
cwd=cwd,
|
|
91
|
+
yolo=yolo,
|
|
92
|
+
thread_id=thread_id,
|
|
93
|
+
flags=flags,
|
|
94
|
+
set_up=rendered["set_up"],
|
|
95
|
+
tear_down=rendered["tear_down"],
|
|
96
|
+
on_success=rendered["on_success"],
|
|
97
|
+
on_failure=rendered["on_failure"],
|
|
98
|
+
)
|
|
99
|
+
return
|
|
100
|
+
super().__init__(
|
|
101
|
+
rendered["prompt"],
|
|
102
|
+
rendered["check"],
|
|
103
|
+
max_iterations,
|
|
104
|
+
cwd,
|
|
105
|
+
yolo,
|
|
106
|
+
thread_id,
|
|
107
|
+
flags,
|
|
108
|
+
set_up=rendered["set_up"],
|
|
109
|
+
tear_down=rendered["tear_down"],
|
|
110
|
+
on_success=rendered["on_success"],
|
|
111
|
+
on_failure=rendered["on_failure"],
|
|
112
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: codexapi
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.4
|
|
4
4
|
Summary: Minimal Python API for running the Codex CLI.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: codex,agent,cli,openai
|
|
@@ -74,6 +74,11 @@ echo "Say hello." | codexapi run
|
|
|
74
74
|
codexapi task "Fix the failing tests." --max-iterations 5
|
|
75
75
|
codexapi task -f task.yaml
|
|
76
76
|
```
|
|
77
|
+
Progress is shown by default for `codexapi task`; use `--quiet` to suppress it.
|
|
78
|
+
|
|
79
|
+
Task files default to using the standard check prompt for the task. Set `check: "None"` to skip verification.
|
|
80
|
+
Use `max_iterations` in the task file to override the default attempt cap (0 means unlimited).
|
|
81
|
+
Checks are wrapped with the verifier prompt, include the agent output, and expect JSON with `success`/`reason`.
|
|
77
82
|
|
|
78
83
|
Show running sessions and their latest activity:
|
|
79
84
|
|
|
@@ -139,26 +144,31 @@ the same conversation and returns only the agent's message.
|
|
|
139
144
|
- `yolo` (bool): pass `--yolo` to Codex when true (defaults to true).
|
|
140
145
|
- `flags` (str | None): extra CLI flags to pass to Codex.
|
|
141
146
|
|
|
142
|
-
### `task(prompt, check=None,
|
|
147
|
+
### `task(prompt, check=None, max_iterations=10, cwd=None, yolo=True, flags=None, progress=False, set_up=None, tear_down=None, on_success=None, on_failure=None) -> str`
|
|
143
148
|
|
|
144
149
|
Runs a task with checker-driven retries and returns the success summary.
|
|
145
150
|
Raises `TaskFailed` when the maximum attempts are reached.
|
|
146
151
|
|
|
147
|
-
- `check` (str | None | False): custom check prompt, default checker, or `False` to skip.
|
|
148
|
-
- `
|
|
152
|
+
- `check` (str | None | False): custom check prompt, default checker, or `False`/`"None"` to skip.
|
|
153
|
+
- `max_iterations` (int): maximum number of task attempts (0 means unlimited).
|
|
154
|
+
- `progress` (bool): print progress after each verification round.
|
|
155
|
+
- `set_up`/`tear_down`/`on_success`/`on_failure` (str | None): optional hook prompts.
|
|
149
156
|
|
|
150
|
-
### `task_result(prompt, check=None,
|
|
157
|
+
### `task_result(prompt, check=None, max_iterations=10, cwd=None, yolo=True, flags=None, progress=False, set_up=None, tear_down=None, on_success=None, on_failure=None) -> TaskResult`
|
|
151
158
|
|
|
152
159
|
Runs a task with checker-driven retries and returns a `TaskResult` without
|
|
153
160
|
raising `TaskFailed`.
|
|
161
|
+
Arguments mirror `task()` (including hooks).
|
|
154
162
|
|
|
155
163
|
### `Task(prompt, max_attempts=10, cwd=None, yolo=True, thread_id=None, flags=None)`
|
|
156
164
|
|
|
157
165
|
Runs a Codex task with checker-driven retries. Subclass it and implement
|
|
158
166
|
`check()` to return an error string when the task is incomplete, or return
|
|
159
167
|
`None`/`""` when the task passes.
|
|
168
|
+
If you do not override `check()`, the default verifier wrapper runs with the
|
|
169
|
+
default check prompt and includes the agent output.
|
|
160
170
|
|
|
161
|
-
- `__call__() -> TaskResult`: run the task.
|
|
171
|
+
- `__call__(debug=False, progress=False) -> TaskResult`: run the task.
|
|
162
172
|
- `set_up()`: optional setup hook.
|
|
163
173
|
- `tear_down()`: optional cleanup hook.
|
|
164
174
|
- `check(output=None) -> str | None`: return an error description or `None`/`""`. `output` is the last agent response.
|
|
@@ -177,7 +187,7 @@ Simple result object returned by `Task.__call__`.
|
|
|
177
187
|
|
|
178
188
|
### `TaskFailed`
|
|
179
189
|
|
|
180
|
-
Exception raised by `task()` when
|
|
190
|
+
Exception raised by `task()` when attempts are exhausted.
|
|
181
191
|
|
|
182
192
|
- `summary` (str): failure summary text.
|
|
183
193
|
- `attempts` (int | None): attempts made when the task failed.
|
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
"""Load YAML task files and map them onto Task hooks."""
|
|
2
|
-
|
|
3
|
-
import yaml
|
|
4
|
-
|
|
5
|
-
from .agent import agent
|
|
6
|
-
from .task import Task
|
|
7
|
-
|
|
8
|
-
_ITEM_TOKEN = "{{item}}"
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def load_task_file(path):
|
|
12
|
-
"""Load a YAML task file and return a normalized task definition."""
|
|
13
|
-
if not path:
|
|
14
|
-
raise ValueError("task file path is required")
|
|
15
|
-
with open(path, "r", encoding="utf-8") as handle:
|
|
16
|
-
data = yaml.safe_load(handle) or {}
|
|
17
|
-
if not isinstance(data, dict):
|
|
18
|
-
raise ValueError("Task file must be a YAML mapping.")
|
|
19
|
-
|
|
20
|
-
prompt = data.get("prompt")
|
|
21
|
-
if not isinstance(prompt, str) or not prompt.strip():
|
|
22
|
-
raise ValueError("Task file missing non-empty 'prompt'.")
|
|
23
|
-
|
|
24
|
-
return {
|
|
25
|
-
"prompt": prompt,
|
|
26
|
-
"set_up": _optional_str(data.get("set_up")),
|
|
27
|
-
"tear_down": _optional_str(data.get("tear_down")),
|
|
28
|
-
"check": _optional_str(data.get("check")),
|
|
29
|
-
"on_success": _optional_str(data.get("on_success")),
|
|
30
|
-
"on_failure": _optional_str(data.get("on_failure")),
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def _optional_str(value):
|
|
35
|
-
if value is None:
|
|
36
|
-
return None
|
|
37
|
-
if isinstance(value, str):
|
|
38
|
-
return value if value.strip() else None
|
|
39
|
-
raise ValueError("Task file values must be strings.")
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def _render(text, item):
|
|
43
|
-
if text is None:
|
|
44
|
-
return None
|
|
45
|
-
if item is None:
|
|
46
|
-
return text
|
|
47
|
-
return text.replace(_ITEM_TOKEN, item)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
class AutoTask(Task):
|
|
51
|
-
"""Task subclass that maps YAML strings onto Task hooks."""
|
|
52
|
-
|
|
53
|
-
def __init__(
|
|
54
|
-
self,
|
|
55
|
-
config,
|
|
56
|
-
item=None,
|
|
57
|
-
max_attempts=10,
|
|
58
|
-
cwd=None,
|
|
59
|
-
yolo=True,
|
|
60
|
-
thread_id=None,
|
|
61
|
-
flags=None,
|
|
62
|
-
):
|
|
63
|
-
if not isinstance(config, dict):
|
|
64
|
-
raise TypeError("config must be a task definition dict")
|
|
65
|
-
self._config = config
|
|
66
|
-
self._item = "" if item is None else str(item)
|
|
67
|
-
self._yolo = yolo
|
|
68
|
-
self._flags = flags
|
|
69
|
-
prompt = _render(config.get("prompt"), self._item)
|
|
70
|
-
super().__init__(prompt, max_attempts, cwd, yolo, thread_id, flags)
|
|
71
|
-
|
|
72
|
-
def _hook(self, name):
|
|
73
|
-
return _render(self._config.get(name), self._item)
|
|
74
|
-
|
|
75
|
-
def set_up(self):
|
|
76
|
-
text = self._hook("set_up")
|
|
77
|
-
if text:
|
|
78
|
-
agent(text, self.cwd, self._yolo, self._flags)
|
|
79
|
-
|
|
80
|
-
def tear_down(self):
|
|
81
|
-
text = self._hook("tear_down")
|
|
82
|
-
if text:
|
|
83
|
-
agent(text, self.cwd, self._yolo, self._flags)
|
|
84
|
-
|
|
85
|
-
def check(self, output=None):
|
|
86
|
-
text = self._hook("check")
|
|
87
|
-
if not text:
|
|
88
|
-
return None
|
|
89
|
-
last_output = output if output is not None else self.last_output
|
|
90
|
-
last_output = last_output or ""
|
|
91
|
-
if last_output:
|
|
92
|
-
prompt = f"{text}\n\nAGENT OUTPUT:\n{last_output}"
|
|
93
|
-
else:
|
|
94
|
-
prompt = text
|
|
95
|
-
result = agent(prompt, self.cwd, self._yolo, self._flags)
|
|
96
|
-
if not isinstance(result, str) or not result.strip():
|
|
97
|
-
return None
|
|
98
|
-
return result
|
|
99
|
-
|
|
100
|
-
def on_success(self, result):
|
|
101
|
-
text = self._hook("on_success")
|
|
102
|
-
if text:
|
|
103
|
-
agent(text, self.cwd, self._yolo, self._flags)
|
|
104
|
-
|
|
105
|
-
def on_failure(self, result):
|
|
106
|
-
text = self._hook("on_failure")
|
|
107
|
-
if text:
|
|
108
|
-
agent(text, self.cwd, self._yolo, self._flags)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|