codexapi 0.3.2__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codexapi-0.3.2/src/codexapi.egg-info → codexapi-0.3.4}/PKG-INFO +1 -1
- {codexapi-0.3.2 → codexapi-0.3.4}/pyproject.toml +1 -1
- {codexapi-0.3.2 → codexapi-0.3.4}/src/codexapi/__init__.py +1 -1
- {codexapi-0.3.2 → codexapi-0.3.4}/src/codexapi/cli.py +6 -0
- {codexapi-0.3.2 → codexapi-0.3.4}/src/codexapi/ralph.py +1 -1
- {codexapi-0.3.2 → codexapi-0.3.4}/src/codexapi/task.py +122 -5
- {codexapi-0.3.2 → codexapi-0.3.4/src/codexapi.egg-info}/PKG-INFO +1 -1
- {codexapi-0.3.2 → codexapi-0.3.4}/LICENSE +0 -0
- {codexapi-0.3.2 → codexapi-0.3.4}/README.md +0 -0
- {codexapi-0.3.2 → codexapi-0.3.4}/setup.cfg +0 -0
- {codexapi-0.3.2 → codexapi-0.3.4}/src/codexapi/__main__.py +0 -0
- {codexapi-0.3.2 → codexapi-0.3.4}/src/codexapi/agent.py +0 -0
- {codexapi-0.3.2 → codexapi-0.3.4}/src/codexapi.egg-info/SOURCES.txt +0 -0
- {codexapi-0.3.2 → codexapi-0.3.4}/src/codexapi.egg-info/dependency_links.txt +0 -0
- {codexapi-0.3.2 → codexapi-0.3.4}/src/codexapi.egg-info/entry_points.txt +0 -0
- {codexapi-0.3.2 → codexapi-0.3.4}/src/codexapi.egg-info/top_level.txt +0 -0
|
@@ -948,6 +948,11 @@ def main(argv=None):
|
|
|
948
948
|
"--flags",
|
|
949
949
|
help="Additional raw CLI flags to pass to Codex (quoted as needed).",
|
|
950
950
|
)
|
|
951
|
+
task_parser.add_argument(
|
|
952
|
+
"--progress",
|
|
953
|
+
action="store_true",
|
|
954
|
+
help="Print progress after each verification round.",
|
|
955
|
+
)
|
|
951
956
|
|
|
952
957
|
ralph_parser = subparsers.add_parser(
|
|
953
958
|
"ralph",
|
|
@@ -1039,6 +1044,7 @@ def main(argv=None):
|
|
|
1039
1044
|
args.cwd,
|
|
1040
1045
|
args.yolo,
|
|
1041
1046
|
args.flags,
|
|
1047
|
+
args.progress,
|
|
1042
1048
|
)
|
|
1043
1049
|
except TaskFailed as exc:
|
|
1044
1050
|
message = exc.summary
|
|
@@ -135,7 +135,7 @@ def run_ralph_loop(
|
|
|
135
135
|
elif runner is None:
|
|
136
136
|
runner = Agent(cwd, yolo, None, flags)
|
|
137
137
|
|
|
138
|
-
message = runner(prompt)
|
|
138
|
+
message = runner(prompt + '\nIf there are multiple paths forward, please use your own best judgement as to which to try first - I trust you!\n')
|
|
139
139
|
print(message)
|
|
140
140
|
last_message = message
|
|
141
141
|
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import logging
|
|
5
|
+
import time
|
|
5
6
|
|
|
6
7
|
from .agent import Agent, agent
|
|
7
8
|
|
|
@@ -15,6 +16,12 @@ _CHECK_PREFIX = (
|
|
|
15
16
|
"Set success to true only if everything matches the intent."
|
|
16
17
|
)
|
|
17
18
|
_CHECK_SUFFIX = "JSON only. No markdown or extra text."
|
|
19
|
+
_PROGRESS_PROMPT = (
|
|
20
|
+
"Summarize the outputs below in one line each.\n"
|
|
21
|
+
"Return only JSON with keys: agent (string) and check (string).\n"
|
|
22
|
+
"Each value must be a single line with no newlines.\n"
|
|
23
|
+
"Do not run commands or change any files."
|
|
24
|
+
)
|
|
18
25
|
|
|
19
26
|
|
|
20
27
|
def _default_check(prompt):
|
|
@@ -31,6 +38,16 @@ def _build_check_prompt(check):
|
|
|
31
38
|
return f"{_CHECK_PREFIX}\n\n{check}\n\n{_CHECK_SUFFIX}"
|
|
32
39
|
|
|
33
40
|
|
|
41
|
+
def _build_progress_prompt(agent_output, check_output):
|
|
42
|
+
return (
|
|
43
|
+
f"{_PROGRESS_PROMPT}\n\n"
|
|
44
|
+
"AGENT OUTPUT:\n"
|
|
45
|
+
f"{agent_output}\n\n"
|
|
46
|
+
"CHECK OUTPUT:\n"
|
|
47
|
+
f"{check_output}"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
34
51
|
def _check_result(output):
|
|
35
52
|
try:
|
|
36
53
|
data = json.loads(output)
|
|
@@ -50,6 +67,78 @@ def _check_result(output):
|
|
|
50
67
|
return success, reason.strip()
|
|
51
68
|
|
|
52
69
|
|
|
70
|
+
def _progress_result(output):
|
|
71
|
+
try:
|
|
72
|
+
data = json.loads(output)
|
|
73
|
+
except json.JSONDecodeError as exc:
|
|
74
|
+
raise RuntimeError(
|
|
75
|
+
f"Progress summary returned invalid JSON: {exc}"
|
|
76
|
+
) from exc
|
|
77
|
+
|
|
78
|
+
if not isinstance(data, dict):
|
|
79
|
+
raise RuntimeError("Progress summary JSON must be an object.")
|
|
80
|
+
|
|
81
|
+
agent_summary = data.get("agent")
|
|
82
|
+
check_summary = data.get("check")
|
|
83
|
+
if not isinstance(agent_summary, str):
|
|
84
|
+
raise RuntimeError("Progress summary JSON missing string 'agent'.")
|
|
85
|
+
if not isinstance(check_summary, str):
|
|
86
|
+
raise RuntimeError("Progress summary JSON missing string 'check'.")
|
|
87
|
+
|
|
88
|
+
return _single_line(agent_summary), _single_line(check_summary)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _single_line(text):
|
|
92
|
+
if not text:
|
|
93
|
+
return ""
|
|
94
|
+
return " ".join(text.replace("\r", " ").split())
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _format_duration(seconds):
|
|
98
|
+
if seconds < 0:
|
|
99
|
+
seconds = 0
|
|
100
|
+
seconds = int(round(seconds))
|
|
101
|
+
hours, remainder = divmod(seconds, 3600)
|
|
102
|
+
minutes, seconds = divmod(remainder, 60)
|
|
103
|
+
parts = []
|
|
104
|
+
if hours:
|
|
105
|
+
parts.append(f"{hours}h")
|
|
106
|
+
if minutes or hours:
|
|
107
|
+
parts.append(f"{minutes}m")
|
|
108
|
+
if not hours:
|
|
109
|
+
parts.append(f"{seconds}s")
|
|
110
|
+
return " ".join(parts)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _print_progress(
|
|
114
|
+
attempt,
|
|
115
|
+
total,
|
|
116
|
+
start_time,
|
|
117
|
+
agent_output,
|
|
118
|
+
check_output,
|
|
119
|
+
cwd,
|
|
120
|
+
yolo,
|
|
121
|
+
flags,
|
|
122
|
+
):
|
|
123
|
+
elapsed = time.monotonic() - start_time
|
|
124
|
+
remaining = 0
|
|
125
|
+
if attempt:
|
|
126
|
+
remaining = (elapsed / attempt) * (total - attempt)
|
|
127
|
+
|
|
128
|
+
summary_prompt = _build_progress_prompt(agent_output, check_output)
|
|
129
|
+
summary = agent(summary_prompt, cwd, yolo, flags)
|
|
130
|
+
agent_summary, check_summary = _progress_result(summary)
|
|
131
|
+
|
|
132
|
+
elapsed_text = _format_duration(elapsed)
|
|
133
|
+
remaining_text = _format_duration(remaining)
|
|
134
|
+
print(
|
|
135
|
+
f"Round {attempt}/{total} ({elapsed_text} elapsed, {remaining_text} remaining)",
|
|
136
|
+
flush=True,
|
|
137
|
+
)
|
|
138
|
+
print(f"Agent: {agent_summary}", flush=True)
|
|
139
|
+
print(f"Check: {check_summary}", flush=True)
|
|
140
|
+
print("", flush=True)
|
|
141
|
+
|
|
53
142
|
def _fix_prompt(error):
|
|
54
143
|
return (
|
|
55
144
|
"The verification check failed:\n"
|
|
@@ -89,6 +178,7 @@ def task(
|
|
|
89
178
|
cwd=None,
|
|
90
179
|
yolo=False,
|
|
91
180
|
flags=None,
|
|
181
|
+
progress=False,
|
|
92
182
|
):
|
|
93
183
|
"""Run a prompt with optional checker-driven retries.
|
|
94
184
|
|
|
@@ -100,6 +190,7 @@ def task(
|
|
|
100
190
|
cwd: Optional working directory for the Codex session.
|
|
101
191
|
yolo: Whether to pass --yolo to Codex.
|
|
102
192
|
flags: Additional raw CLI flags to pass to Codex.
|
|
193
|
+
progress: Whether to print progress after each verification round.
|
|
103
194
|
|
|
104
195
|
Returns:
|
|
105
196
|
The agent's response text when the task succeeds.
|
|
@@ -107,7 +198,7 @@ def task(
|
|
|
107
198
|
Raises:
|
|
108
199
|
TaskFailed: when the task reaches the maximum attempts without success.
|
|
109
200
|
"""
|
|
110
|
-
result = task_result(prompt, check, n, cwd, yolo, flags)
|
|
201
|
+
result = task_result(prompt, check, n, cwd, yolo, flags, progress)
|
|
111
202
|
if result.success:
|
|
112
203
|
return result.summary
|
|
113
204
|
raise TaskFailed(result.summary, result.attempts, result.errors)
|
|
@@ -120,15 +211,28 @@ def task_result(
|
|
|
120
211
|
cwd=None,
|
|
121
212
|
yolo=False,
|
|
122
213
|
flags=None,
|
|
214
|
+
progress=False,
|
|
123
215
|
):
|
|
124
216
|
"""Run a prompt with optional checker-driven retries and return TaskResult.
|
|
125
217
|
|
|
126
218
|
The runner keeps a single session. Each verification attempt uses a fresh,
|
|
127
|
-
stateless agent call.
|
|
219
|
+
stateless agent call. When progress is True, print a summary each round.
|
|
128
220
|
"""
|
|
129
221
|
if check is False:
|
|
130
222
|
runner = Agent(cwd, yolo, None, flags)
|
|
223
|
+
start_time = time.monotonic()
|
|
131
224
|
summary = runner(prompt)
|
|
225
|
+
if progress:
|
|
226
|
+
_print_progress(
|
|
227
|
+
1,
|
|
228
|
+
1,
|
|
229
|
+
start_time,
|
|
230
|
+
summary,
|
|
231
|
+
"Verification skipped.",
|
|
232
|
+
cwd,
|
|
233
|
+
yolo,
|
|
234
|
+
flags,
|
|
235
|
+
)
|
|
132
236
|
return TaskResult(True, summary, 1, None, runner.thread_id)
|
|
133
237
|
if check is None:
|
|
134
238
|
check = _default_check(prompt)
|
|
@@ -138,11 +242,24 @@ def task_result(
|
|
|
138
242
|
raise ValueError("n must be >= 0")
|
|
139
243
|
|
|
140
244
|
runner = Agent(cwd, yolo, None, flags)
|
|
141
|
-
|
|
245
|
+
start_time = time.monotonic()
|
|
246
|
+
last_output = runner(prompt)
|
|
142
247
|
check_prompt = _build_check_prompt(check)
|
|
143
248
|
|
|
144
249
|
for attempt in range(n + 1):
|
|
145
|
-
|
|
250
|
+
check_output = agent(check_prompt, cwd, yolo, flags)
|
|
251
|
+
success, reason = _check_result(check_output)
|
|
252
|
+
if progress:
|
|
253
|
+
_print_progress(
|
|
254
|
+
attempt + 1,
|
|
255
|
+
n + 1,
|
|
256
|
+
start_time,
|
|
257
|
+
last_output,
|
|
258
|
+
check_output,
|
|
259
|
+
cwd,
|
|
260
|
+
yolo,
|
|
261
|
+
flags,
|
|
262
|
+
)
|
|
146
263
|
if success:
|
|
147
264
|
summary = runner(_success_prompt())
|
|
148
265
|
return TaskResult(
|
|
@@ -161,7 +278,7 @@ def task_result(
|
|
|
161
278
|
reason,
|
|
162
279
|
runner.thread_id,
|
|
163
280
|
)
|
|
164
|
-
runner(_fix_prompt(reason))
|
|
281
|
+
last_output = runner(_fix_prompt(reason))
|
|
165
282
|
|
|
166
283
|
|
|
167
284
|
class TaskResult:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|