codexapi 0.5.5__tar.gz → 0.5.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: codexapi
3
- Version: 0.5.5
3
+ Version: 0.5.6
4
4
  Summary: Minimal Python API for running the Codex CLI.
5
5
  License: MIT
6
6
  Keywords: codex,agent,cli,openai
@@ -68,7 +68,7 @@ codexapi run --cwd /path/to/project "Fix the failing tests."
68
68
  echo "Say hello." | codexapi run
69
69
  ```
70
70
 
71
- `codexapi task` exits with code 0 on success and 1 on failure, printing the summary.
71
+ `codexapi task` exits with code 0 on success and 1 on failure.
72
72
 
73
73
  ```bash
74
74
  codexapi task "Fix the failing tests." --max-iterations 5
@@ -82,6 +82,12 @@ Task files default to using the standard check prompt for the task. Set `check:
82
82
  Use `max_iterations` in the task file to override the default attempt cap (0 means unlimited).
83
83
  Checks are wrapped with the verifier prompt, include the agent output, and expect JSON with `success`/`reason`.
84
84
 
85
+ Example task progress run:
86
+
87
+ ```bash
88
+ ./examples/example_task_progress.sh
89
+ ```
90
+
85
91
  Show running sessions and their latest activity:
86
92
 
87
93
  ```bash
@@ -155,7 +161,7 @@ Raises `TaskFailed` when the maximum attempts are reached.
155
161
 
156
162
  - `check` (str | None | False): custom check prompt, default checker, or `False`/`"None"` to skip.
157
163
  - `max_iterations` (int): maximum number of task attempts (0 means unlimited).
158
- - `progress` (bool): print progress after each verification round.
164
+ - `progress` (bool): show a tqdm progress bar with a one-line status after each round.
159
165
  - `set_up`/`tear_down`/`on_success`/`on_failure` (str | None): optional hook prompts.
160
166
 
161
167
  ### `task_result(prompt, check=None, max_iterations=10, cwd=None, yolo=True, flags=None, progress=False, set_up=None, tear_down=None, on_success=None, on_failure=None) -> TaskResult`
@@ -54,7 +54,7 @@ codexapi run --cwd /path/to/project "Fix the failing tests."
54
54
  echo "Say hello." | codexapi run
55
55
  ```
56
56
 
57
- `codexapi task` exits with code 0 on success and 1 on failure, printing the summary.
57
+ `codexapi task` exits with code 0 on success and 1 on failure.
58
58
 
59
59
  ```bash
60
60
  codexapi task "Fix the failing tests." --max-iterations 5
@@ -68,6 +68,12 @@ Task files default to using the standard check prompt for the task. Set `check:
68
68
  Use `max_iterations` in the task file to override the default attempt cap (0 means unlimited).
69
69
  Checks are wrapped with the verifier prompt, include the agent output, and expect JSON with `success`/`reason`.
70
70
 
71
+ Example task progress run:
72
+
73
+ ```bash
74
+ ./examples/example_task_progress.sh
75
+ ```
76
+
71
77
  Show running sessions and their latest activity:
72
78
 
73
79
  ```bash
@@ -141,7 +147,7 @@ Raises `TaskFailed` when the maximum attempts are reached.
141
147
 
142
148
  - `check` (str | None | False): custom check prompt, default checker, or `False`/`"None"` to skip.
143
149
  - `max_iterations` (int): maximum number of task attempts (0 means unlimited).
144
- - `progress` (bool): print progress after each verification round.
150
+ - `progress` (bool): show a tqdm progress bar with a one-line status after each round.
145
151
  - `set_up`/`tear_down`/`on_success`/`on_failure` (str | None): optional hook prompts.
146
152
 
147
153
  ### `task_result(prompt, check=None, max_iterations=10, cwd=None, yolo=True, flags=None, progress=False, set_up=None, tear_down=None, on_success=None, on_failure=None) -> TaskResult`
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codexapi"
7
- version = "0.5.5"
7
+ version = "0.5.6"
8
8
  description = "Minimal Python API for running the Codex CLI."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -15,4 +15,4 @@ __all__ = [
15
15
  "task",
16
16
  "task_result",
17
17
  ]
18
- __version__ = "0.5.5"
18
+ __version__ = "0.5.6"
@@ -1298,7 +1298,6 @@ def main(argv=None):
1298
1298
  flags=args.flags,
1299
1299
  )
1300
1300
  result = task_runner(progress=not args.quiet)
1301
- print(result.summary)
1302
1301
  if not result.success:
1303
1302
  raise SystemExit(1)
1304
1303
  return
@@ -1310,6 +1309,7 @@ def main(argv=None):
1310
1309
  prompt_source = args.task
1311
1310
  prompt = _read_prompt(prompt_source)
1312
1311
  exit_code = 0
1312
+ message = None
1313
1313
 
1314
1314
  if args.command == "ralph":
1315
1315
  if args.max_iterations < 0:
@@ -1347,7 +1347,7 @@ def main(argv=None):
1347
1347
  raise SystemExit("--max-iterations must be >= 0.")
1348
1348
  check = args.check
1349
1349
  try:
1350
- message = task(
1350
+ task(
1351
1351
  prompt,
1352
1352
  check,
1353
1353
  args.max_iterations,
@@ -1357,7 +1357,6 @@ def main(argv=None):
1357
1357
  not args.quiet,
1358
1358
  )
1359
1359
  except TaskFailed as exc:
1360
- message = exc.summary
1361
1360
  exit_code = 1
1362
1361
  else:
1363
1362
  use_session = args.thread_id or args.print_thread_id
@@ -1374,7 +1373,8 @@ def main(argv=None):
1374
1373
  else:
1375
1374
  message = agent(prompt, args.cwd, args.yolo, args.flags)
1376
1375
 
1377
- print(message)
1376
+ if message is not None:
1377
+ print(message)
1378
1378
  if exit_code:
1379
1379
  raise SystemExit(exit_code)
1380
1380
 
@@ -5,6 +5,7 @@ import logging
5
5
  import time
6
6
 
7
7
  from .agent import Agent, agent
8
+ from tqdm import tqdm
8
9
 
9
10
  _logger = logging.getLogger(__name__)
10
11
 
@@ -20,11 +21,13 @@ _CHECK_PREFIX = (
20
21
  "Set success to true only if everything matches the intent."
21
22
  )
22
23
  _CHECK_SUFFIX = "JSON only. No markdown or extra text."
23
- _PROGRESS_PROMPT = (
24
- "Summarize the outputs below in one line each.\n"
25
- "Return only JSON with keys: agent (string) and check (string).\n"
26
- "Each value must be a single line with no newlines.\n"
27
- "Do not run commands or change any files."
24
+ _ESTIMATE_PROMPT = (
25
+ "Estimate remaining work in story points for the task below.\n"
26
+ "You may inspect the repo (read files, git status/diff), but do not run tests.\n"
27
+ "Do not change any files.\n"
28
+ "Use the task prompt, current repo state, and latest agent/check outputs.\n"
29
+ "Return only JSON with keys: remaining (number) and summary (string).\n"
30
+ "summary must be a single line describing agent + verifier status."
28
31
  )
29
32
  DEFAULT_MAX_ITERATIONS = 10
30
33
 
@@ -62,14 +65,32 @@ def _resolve_check_text(prompt, check):
62
65
  return check, False
63
66
 
64
67
 
65
- def _build_progress_prompt(agent_output, check_output):
66
- return (
67
- f"{_PROGRESS_PROMPT}\n\n"
68
- "AGENT OUTPUT:\n"
69
- f"{agent_output}\n\n"
70
- "CHECK OUTPUT:\n"
71
- f"{check_output}"
68
+ def _build_estimate_prompt(prompt, agent_output, check_output, previous_total):
69
+ agent_text = agent_output.strip() or "(no agent output yet)"
70
+ check_text = check_output.strip() or "(no check output yet)"
71
+ lines = [
72
+ _ESTIMATE_PROMPT,
73
+ "",
74
+ "TASK:",
75
+ "```",
76
+ prompt,
77
+ "```",
78
+ ]
79
+ if previous_total is not None:
80
+ lines.append(
81
+ f"This task was previously estimated at about {previous_total} story points."
82
+ )
83
+ lines.extend(
84
+ [
85
+ "",
86
+ "AGENT OUTPUT:",
87
+ agent_text,
88
+ "",
89
+ "CHECK OUTPUT:",
90
+ check_text,
91
+ ]
72
92
  )
93
+ return "\n".join(lines)
73
94
 
74
95
 
75
96
  def _check_result(output):
@@ -91,25 +112,29 @@ def _check_result(output):
91
112
  return success, reason.strip()
92
113
 
93
114
 
94
- def _progress_result(output):
115
+ def _estimate_result(output):
95
116
  try:
96
117
  data = json.loads(output)
97
118
  except json.JSONDecodeError as exc:
98
119
  raise RuntimeError(
99
- f"Progress summary returned invalid JSON: {exc}"
120
+ f"Estimate returned invalid JSON: {exc}"
100
121
  ) from exc
101
122
 
102
123
  if not isinstance(data, dict):
103
- raise RuntimeError("Progress summary JSON must be an object.")
124
+ raise RuntimeError("Estimate JSON must be an object.")
104
125
 
105
- agent_summary = data.get("agent")
106
- check_summary = data.get("check")
107
- if not isinstance(agent_summary, str):
108
- raise RuntimeError("Progress summary JSON missing string 'agent'.")
109
- if not isinstance(check_summary, str):
110
- raise RuntimeError("Progress summary JSON missing string 'check'.")
126
+ remaining = data.get("remaining")
127
+ summary = data.get("summary")
128
+ if not isinstance(remaining, (int, float)):
129
+ raise RuntimeError("Estimate JSON missing numeric 'remaining'.")
130
+ if not isinstance(summary, str):
131
+ raise RuntimeError("Estimate JSON missing string 'summary'.")
111
132
 
112
- return _single_line(agent_summary), _single_line(check_summary)
133
+ remaining = int(round(remaining))
134
+ if remaining < 0:
135
+ remaining = 0
136
+
137
+ return remaining, _single_line(summary)
113
138
 
114
139
 
115
140
  def _single_line(text):
@@ -118,63 +143,36 @@ def _single_line(text):
118
143
  return " ".join(text.replace("\r", " ").split())
119
144
 
120
145
 
121
- def _format_duration(seconds):
146
+ def _format_elapsed(seconds):
122
147
  if seconds < 0:
123
148
  seconds = 0
124
149
  seconds = int(round(seconds))
125
150
  hours, remainder = divmod(seconds, 3600)
126
151
  minutes, seconds = divmod(remainder, 60)
127
- parts = []
128
- if hours:
129
- parts.append(f"{hours}h")
130
- if minutes or hours:
131
- parts.append(f"{minutes}m")
132
- if not hours:
133
- parts.append(f"{seconds}s")
134
- return " ".join(parts)
135
-
136
-
137
- def _progress_round_label(attempt, total):
138
- if not total:
139
- return f"Round {attempt}/unlimited"
140
- return f"Round {attempt}/{total}"
141
-
142
-
143
- def _print_progress_start(attempt, total):
144
- print(_progress_round_label(attempt, total), flush=True)
145
-
146
-
147
- def _print_progress_result(
148
- attempt,
149
- total,
150
- start_time,
151
- agent_output,
152
- check_output,
153
- cwd,
154
- yolo,
155
- flags,
156
- success,
157
- ):
158
- elapsed = time.monotonic() - start_time
159
- remaining = 0
160
- remaining_text = "unknown"
161
- if total and attempt:
162
- remaining = (elapsed / attempt) * (total - attempt)
163
- remaining_text = _format_duration(remaining)
164
-
165
- summary_prompt = _build_progress_prompt(agent_output, check_output)
166
- summary = agent(summary_prompt, cwd, yolo, flags)
167
- agent_summary, check_summary = _progress_result(summary)
168
-
169
- elapsed_text = _format_duration(elapsed)
170
- print(f"Agent: {agent_summary}", flush=True)
171
- print(f"Check: {check_summary}", flush=True)
172
- verdict = "success" if success else "failure"
173
- print(
174
- f"Verdict: {verdict} ({elapsed_text} elapsed, {remaining_text} remaining)",
175
- flush=True,
152
+ return f"{hours}h{minutes:02d}m{seconds:02d}s"
153
+
154
+
155
+ def _format_turns(attempt, total):
156
+ if total:
157
+ width = max(2, len(str(total)))
158
+ total_text = str(total)
159
+ else:
160
+ width = 2
161
+ total_text = "∞"
162
+ attempt_text = f"{attempt:0{width}d}"
163
+ return f"{attempt_text}/{total_text}"
164
+
165
+
166
+ def estimate(prompt, agent_output, check_output, cwd, yolo, flags, previous_total):
167
+ estimate_prompt = _build_estimate_prompt(
168
+ prompt,
169
+ agent_output or "",
170
+ check_output or "",
171
+ previous_total,
176
172
  )
177
- print("", flush=True)
173
+ output = agent(estimate_prompt, cwd, yolo, flags)
174
+ return _estimate_result(output)
175
+
178
176
 
179
177
  def _fix_prompt(error):
180
178
  return (
@@ -241,7 +239,7 @@ def task(
241
239
  cwd: Optional working directory for the Codex session.
242
240
  yolo: Whether to pass --yolo to Codex.
243
241
  flags: Additional raw CLI flags to pass to Codex.
244
- progress: Whether to print progress after each verification round.
242
+ progress: Whether to show a tqdm progress bar with status updates.
245
243
  set_up: Optional setup prompt to run before the task.
246
244
  tear_down: Optional cleanup prompt to run after the task.
247
245
  on_success: Optional prompt to run after a successful task.
@@ -287,7 +285,7 @@ def task_result(
287
285
  """Run a prompt with optional checker-driven retries and return TaskResult.
288
286
 
289
287
  The runner keeps a single session. Each verification attempt uses a fresh,
290
- stateless agent call. When progress is True, print a summary each round.
288
+ stateless agent call. When progress is True, show progress updates each round.
291
289
 
292
290
  Hook strings mirror task file keys: set_up, tear_down, on_success, on_failure.
293
291
  """
@@ -369,6 +367,9 @@ class Task:
369
367
  self.check_text = None
370
368
  self._yolo = yolo
371
369
  self._flags = flags
370
+ self._progress_enabled = False
371
+ self._progress_bar = None
372
+ self._progress_total = None
372
373
  self.agent = Agent(
373
374
  cwd,
374
375
  yolo,
@@ -410,6 +411,30 @@ class Task:
410
411
  def on_failure(self, result):
411
412
  """Hook called after a failed run, e.g. log the failure reason."""
412
413
 
414
+ def on_progress(
415
+ self,
416
+ turns,
417
+ max_turns,
418
+ total_estimate,
419
+ remaining_estimate,
420
+ status_line,
421
+ ):
422
+ """Hook called with progress updates."""
423
+ if not self._progress_enabled:
424
+ return
425
+ if self._progress_bar is None:
426
+ self._progress_bar = tqdm(total=total_estimate)
427
+ if total_estimate != self._progress_bar.total:
428
+ self._progress_bar.total = total_estimate
429
+ current = total_estimate - remaining_estimate
430
+ if current < 0:
431
+ current = 0
432
+ if self._progress_bar.n != current:
433
+ self._progress_bar.n = current
434
+ self._progress_bar.refresh()
435
+ if status_line:
436
+ tqdm.write(status_line, file=self._progress_bar.fp)
437
+
413
438
  def fix_prompt(self, error):
414
439
  """Build a prompt that asks the agent to fix checker failures."""
415
440
  return (
@@ -432,12 +457,35 @@ class Task:
432
457
  def __call__(self, debug=False, progress=False):
433
458
  """Run the task with checker-driven retries.
434
459
  If debug is True, log debug messages.
435
- If progress is True, print progress after each verification round.
460
+ If progress is True, show a tqdm progress bar with status updates.
436
461
  """
437
462
  try:
438
463
  # If this fails in the middle we will still try to tear down
439
464
  self.set_up()
440
465
 
466
+ self._progress_enabled = progress
467
+ if progress:
468
+ remaining, _summary = estimate(
469
+ self.prompt,
470
+ "",
471
+ "",
472
+ self.cwd,
473
+ self._yolo,
474
+ self._flags,
475
+ None,
476
+ )
477
+ self._progress_total = remaining
478
+ start_time = time.monotonic()
479
+ self.on_progress(
480
+ 0,
481
+ self.max_attempts,
482
+ self._progress_total,
483
+ remaining,
484
+ None,
485
+ )
486
+ else:
487
+ start_time = time.monotonic()
488
+
441
489
  # Start with the initial prompt
442
490
  output = self.agent(self.prompt)
443
491
  self.last_output = output
@@ -445,16 +493,10 @@ class Task:
445
493
  _logger.debug("Initial output: %s", output)
446
494
 
447
495
  # Try correcting it up to max_attempts times
448
- start_time = time.monotonic()
449
496
  error = None
450
497
  attempt = 0
451
498
  while True:
452
499
  attempt += 1
453
- if progress:
454
- _print_progress_start(
455
- attempt,
456
- self.max_attempts,
457
- )
458
500
  error = self.check(self.last_output)
459
501
  if debug:
460
502
  _logger.debug("Check error: %s", error)
@@ -463,16 +505,36 @@ class Task:
463
505
  check_output = self.last_check_output
464
506
  if self.check_skipped:
465
507
  check_output = "Verification skipped."
466
- _print_progress_result(
467
- attempt,
468
- self.max_attempts,
469
- start_time,
470
- self.last_output,
508
+ remaining, summary = estimate(
509
+ self.prompt,
510
+ self.last_output or "",
471
511
  check_output or "",
472
512
  self.cwd,
473
513
  self._yolo,
474
514
  self._flags,
475
- not error,
515
+ self._progress_total,
516
+ )
517
+ total_estimate = self._progress_total
518
+ if total_estimate is None or remaining > total_estimate:
519
+ total_estimate = remaining
520
+ self._progress_total = total_estimate
521
+ elapsed = _format_elapsed(time.monotonic() - start_time)
522
+ status_prefix = (
523
+ f"[{_format_turns(attempt, self.max_attempts)} @ {elapsed}]"
524
+ )
525
+ is_final = not error or (
526
+ self.max_attempts and attempt >= self.max_attempts
527
+ )
528
+ if is_final:
529
+ marker = "✅" if not error else "❌"
530
+ summary = f"{marker} {summary}".strip()
531
+ status_line = f"{status_prefix}: {summary}".rstrip()
532
+ self.on_progress(
533
+ attempt,
534
+ self.max_attempts,
535
+ total_estimate,
536
+ remaining,
537
+ status_line,
476
538
  )
477
539
  if not error:
478
540
  summary = self.agent(self.success_prompt())
@@ -507,6 +569,8 @@ class Task:
507
569
  finally:
508
570
  # No matter what, once we have set_up we will always tear_down
509
571
  self.tear_down()
572
+ if self._progress_bar is not None:
573
+ self._progress_bar.close()
510
574
 
511
575
 
512
576
  class AutoTask(Task):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: codexapi
3
- Version: 0.5.5
3
+ Version: 0.5.6
4
4
  Summary: Minimal Python API for running the Codex CLI.
5
5
  License: MIT
6
6
  Keywords: codex,agent,cli,openai
@@ -68,7 +68,7 @@ codexapi run --cwd /path/to/project "Fix the failing tests."
68
68
  echo "Say hello." | codexapi run
69
69
  ```
70
70
 
71
- `codexapi task` exits with code 0 on success and 1 on failure, printing the summary.
71
+ `codexapi task` exits with code 0 on success and 1 on failure.
72
72
 
73
73
  ```bash
74
74
  codexapi task "Fix the failing tests." --max-iterations 5
@@ -82,6 +82,12 @@ Task files default to using the standard check prompt for the task. Set `check:
82
82
  Use `max_iterations` in the task file to override the default attempt cap (0 means unlimited).
83
83
  Checks are wrapped with the verifier prompt, include the agent output, and expect JSON with `success`/`reason`.
84
84
 
85
+ Example task progress run:
86
+
87
+ ```bash
88
+ ./examples/example_task_progress.sh
89
+ ```
90
+
85
91
  Show running sessions and their latest activity:
86
92
 
87
93
  ```bash
@@ -155,7 +161,7 @@ Raises `TaskFailed` when the maximum attempts are reached.
155
161
 
156
162
  - `check` (str | None | False): custom check prompt, default checker, or `False`/`"None"` to skip.
157
163
  - `max_iterations` (int): maximum number of task attempts (0 means unlimited).
158
- - `progress` (bool): print progress after each verification round.
164
+ - `progress` (bool): show a tqdm progress bar with a one-line status after each round.
159
165
  - `set_up`/`tear_down`/`on_success`/`on_failure` (str | None): optional hook prompts.
160
166
 
161
167
  ### `task_result(prompt, check=None, max_iterations=10, cwd=None, yolo=True, flags=None, progress=False, set_up=None, tear_down=None, on_success=None, on_failure=None) -> TaskResult`
File without changes
File without changes
File without changes
File without changes