@pushpalsdev/cli 1.1.35 → 1.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pushpalsdev/cli",
3
- "version": "1.1.35",
3
+ "version": "1.1.37",
4
4
  "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -116,6 +116,7 @@ _WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
116
116
  _BACKGROUND_NO_EDIT_WATCHDOG_S = 120
117
117
  _NO_EDIT_RECOVERY_WATCHDOG_S = 90
118
118
  _DEFAULT_NO_EDIT_RECHECK_S = 120
119
+ _DEFAULT_NO_EDIT_COMMAND_GRACE_S = 240
119
120
  _DEFAULT_STARTUP_STALL_WATCHDOG_S = 210
120
121
  _RECOVERY_STARTUP_STALL_WATCHDOG_S = 150
121
122
  _DEFAULT_ROLLOUT_WATCHDOG_S = 300
@@ -757,6 +758,27 @@ def _resolve_no_edit_recheck_seconds(communicate_timeout_s: Optional[int]) -> in
757
758
  return max(1, min(_DEFAULT_NO_EDIT_RECHECK_S, upper))
758
759
 
759
760
 
761
+ def _resolve_no_edit_command_grace_seconds(communicate_timeout_s: Optional[int]) -> Optional[int]:
762
+ if not communicate_timeout_s:
763
+ return None
764
+
765
+ raw = os.environ.get("WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S", "").strip()
766
+ if raw:
767
+ if raw == "0":
768
+ return None
769
+ parsed = _to_positive_int(raw)
770
+ if parsed is None:
771
+ log.info(
772
+ "Invalid WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S="
773
+ f"{raw!r}; using default command-progress grace."
774
+ )
775
+ else:
776
+ return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
777
+
778
+ upper = max(1, communicate_timeout_s - 1)
779
+ return max(1, min(_DEFAULT_NO_EDIT_COMMAND_GRACE_S, upper))
780
+
781
+
760
782
  def _resolve_startup_stall_watchdog_seconds(
761
783
  communicate_timeout_s: Optional[int],
762
784
  recovery_attempt: int = 0,
@@ -1339,10 +1361,94 @@ def _empty_codex_trace() -> Dict[str, Any]:
1339
1361
  "prompt_tokens": 0,
1340
1362
  "completion_tokens": 0,
1341
1363
  "total_tokens": 0,
1364
+ "active_command_ids": [],
1365
+ "command_event_count": 0,
1366
+ "last_command_activity_at": None,
1367
+ "last_command_summary": "",
1342
1368
  }
1343
1369
 
1344
1370
 
1345
- def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, Any]) -> None:
1371
+ def _looks_like_codex_command_item(value: Any) -> bool:
1372
+ if not isinstance(value, dict):
1373
+ return False
1374
+ type_text = " ".join(
1375
+ str(value.get(key) or "")
1376
+ for key in ("type", "item_type", "kind", "name", "tool_name")
1377
+ ).lower()
1378
+ if any(marker in type_text for marker in ("command_execution", "exec_command", "shell_command")):
1379
+ return True
1380
+ return any(key in value for key in ("command", "cmd", "exit_code", "aggregated_output"))
1381
+
1382
+
1383
+ def _record_codex_command_activity(
1384
+ parsed: Dict[str, Any],
1385
+ event_type: str,
1386
+ trace: Dict[str, Any],
1387
+ now: float,
1388
+ ) -> None:
1389
+ item = parsed.get("item")
1390
+ command_source: Any = item if _looks_like_codex_command_item(item) else parsed
1391
+ if not _looks_like_codex_command_item(command_source):
1392
+ return
1393
+
1394
+ command_text = ""
1395
+ if isinstance(command_source, dict):
1396
+ for key in ("command", "cmd", "name"):
1397
+ raw = command_source.get(key)
1398
+ if isinstance(raw, str) and raw.strip():
1399
+ command_text = _truncate_inline(raw.strip(), 160)
1400
+ break
1401
+ command_id = ""
1402
+ if isinstance(command_source, dict):
1403
+ command_id = str(
1404
+ command_source.get("id")
1405
+ or command_source.get("call_id")
1406
+ or command_source.get("item_id")
1407
+ or command_text
1408
+ or "command"
1409
+ ).strip()
1410
+ command_id = command_id or "command"
1411
+
1412
+ active = trace.setdefault("active_command_ids", [])
1413
+ if not isinstance(active, list):
1414
+ active = []
1415
+ trace["active_command_ids"] = active
1416
+
1417
+ status_text = ""
1418
+ if isinstance(command_source, dict):
1419
+ status_text = " ".join(
1420
+ str(command_source.get(key) or "")
1421
+ for key in ("status", "state", "outcome")
1422
+ ).lower()
1423
+ event_lower = event_type.lower()
1424
+ completed = (
1425
+ "completed" in event_lower
1426
+ or "failed" in event_lower
1427
+ or "error" in event_lower
1428
+ or any(marker in status_text for marker in ("completed", "failed", "cancelled", "canceled", "exited"))
1429
+ )
1430
+ started = (
1431
+ "started" in event_lower
1432
+ or "updated" in event_lower
1433
+ or any(marker in status_text for marker in ("running", "in_progress", "started"))
1434
+ )
1435
+
1436
+ if completed:
1437
+ trace["active_command_ids"] = [item for item in active if str(item) != command_id]
1438
+ elif started and command_id not in active:
1439
+ active.append(command_id)
1440
+
1441
+ trace["command_event_count"] = to_int(trace.get("command_event_count"), 0) + 1
1442
+ trace["last_command_activity_at"] = float(now)
1443
+ trace["last_command_summary"] = command_text or event_type
1444
+
1445
+
1446
+ def _record_live_codex_stdout_line(
1447
+ line: str,
1448
+ use_json: bool,
1449
+ trace: Dict[str, Any],
1450
+ now: Optional[float] = None,
1451
+ ) -> None:
1346
1452
  stripped = line.strip()
1347
1453
  if not stripped:
1348
1454
  return
@@ -1369,6 +1475,7 @@ def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, A
1369
1475
  return
1370
1476
 
1371
1477
  if isinstance(parsed, dict):
1478
+ observed_at = float(now if now is not None else time.monotonic())
1372
1479
  usage = _extract_usage_counts(parsed)
1373
1480
  if usage is not None:
1374
1481
  trace["prompt_tokens"] = max(
@@ -1385,6 +1492,7 @@ def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, A
1385
1492
  .strip()
1386
1493
  or "event"
1387
1494
  )
1495
+ _record_codex_command_activity(parsed, event_type, trace, observed_at)
1388
1496
  event_type_counts[event_type] = to_int(event_type_counts.get(event_type), 0) + 1
1389
1497
  summary = _summarize_json_event(parsed)
1390
1498
  # Reasoning can arrive under generic event types (for example item.updated).
@@ -1449,10 +1557,13 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
1449
1557
  prompt_tokens = to_int(trace.get("prompt_tokens"), 0)
1450
1558
  completion_tokens = to_int(trace.get("completion_tokens"), 0)
1451
1559
  total_tokens = to_int(trace.get("total_tokens"), 0)
1560
+ command_event_count = to_int(trace.get("command_event_count"), 0)
1452
1561
  if reasoning_events > 0:
1453
1562
  log.info(f"[codex] Reasoning-like event(s): {reasoning_events}")
1454
1563
  elif use_json and valid_json > 0:
1455
1564
  log.info("[codex] No reasoning-like events observed in this run.")
1565
+ if command_event_count > 0:
1566
+ log.info(f"[codex] Command execution event(s): {command_event_count}")
1456
1567
  if total_tokens > 0:
1457
1568
  log.info(
1458
1569
  f"[codex] Usage observed: prompt={prompt_tokens} completion={completion_tokens} total={total_tokens}"
@@ -1473,6 +1584,7 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
1473
1584
  "prompt_tokens": prompt_tokens,
1474
1585
  "completion_tokens": completion_tokens,
1475
1586
  "total_tokens": total_tokens,
1587
+ "command_event_count": command_event_count,
1476
1588
  }
1477
1589
 
1478
1590
 
@@ -2299,8 +2411,14 @@ def _run_codex_task(
2299
2411
  if not line:
2300
2412
  continue
2301
2413
  with trace_lock:
2302
- last_activity_at["ts"] = time.monotonic()
2303
- _record_live_codex_stdout_line(line, use_json, stdout_trace_state)
2414
+ observed_at = time.monotonic()
2415
+ last_activity_at["ts"] = observed_at
2416
+ _record_live_codex_stdout_line(
2417
+ line,
2418
+ use_json,
2419
+ stdout_trace_state,
2420
+ observed_at,
2421
+ )
2304
2422
  except Exception:
2305
2423
  pass
2306
2424
  finally:
@@ -2377,6 +2495,7 @@ def _run_codex_task(
2377
2495
  else None
2378
2496
  )
2379
2497
  no_edit_recheck_s = _resolve_no_edit_recheck_seconds(communicate_timeout_s)
2498
+ no_edit_command_grace_s = _resolve_no_edit_command_grace_seconds(communicate_timeout_s)
2380
2499
  startup_stall_watchdog_s = _resolve_startup_stall_watchdog_seconds(
2381
2500
  communicate_timeout_s,
2382
2501
  recovery_attempt=startup_stall_recovery_attempt,
@@ -2405,6 +2524,9 @@ def _run_codex_task(
2405
2524
  if rollout_watchdog_s is not None
2406
2525
  else None
2407
2526
  )
2527
+ publishable_progress_seen_at: Optional[float] = None
2528
+ publishable_progress_finalized = False
2529
+ publishable_progress_paths: List[str] = []
2408
2530
 
2409
2531
  while proc.poll() is None:
2410
2532
  now = time.monotonic()
@@ -2457,6 +2579,44 @@ def _run_codex_task(
2457
2579
  "before startup-stall recovery."
2458
2580
  )
2459
2581
  continue
2582
+ command_event_count = to_int(live_trace.get("command_event_count"), 0)
2583
+ active_commands_raw = live_trace.get("active_command_ids")
2584
+ active_command_count = (
2585
+ len(active_commands_raw)
2586
+ if isinstance(active_commands_raw, list)
2587
+ else 0
2588
+ )
2589
+ last_command_activity_at = 0.0
2590
+ try:
2591
+ last_command_activity_at = float(
2592
+ live_trace.get("last_command_activity_at") or 0.0
2593
+ )
2594
+ except Exception:
2595
+ last_command_activity_at = 0.0
2596
+ if command_event_count > 0 and no_edit_command_grace_s is not None:
2597
+ command_grace_deadline = 0.0
2598
+ if active_command_count > 0:
2599
+ # Do not kill while Codex is actively running a tool command; poll
2600
+ # again soon, but keep the total grace bounded by the hard cap below.
2601
+ command_grace_deadline = now + min(60.0, float(no_edit_command_grace_s))
2602
+ elif last_command_activity_at > 0:
2603
+ command_grace_deadline = last_command_activity_at + float(
2604
+ no_edit_command_grace_s
2605
+ )
2606
+ if command_grace_deadline > now:
2607
+ no_edit_deadline = command_grace_deadline
2608
+ remaining_s = int(max(1.0, command_grace_deadline - now))
2609
+ command_detail = (
2610
+ f"{active_command_count} active command(s)"
2611
+ if active_command_count > 0
2612
+ else "recent command completion"
2613
+ )
2614
+ log.info(
2615
+ "No-edit watchdog observed Codex tool progress "
2616
+ f"({command_detail}); allowing {remaining_s}s for a "
2617
+ "publishable patch before recovery."
2618
+ )
2619
+ continue
2460
2620
  no_edit_artifact_only_paths = _describe_non_publishable_paths(
2461
2621
  changed_paths,
2462
2622
  baseline_snapshot,
@@ -2478,6 +2638,22 @@ def _run_codex_task(
2478
2638
  )
2479
2639
  _terminate_active_child()
2480
2640
  break
2641
+ if publishable_progress_seen_at is None:
2642
+ publishable_progress_seen_at = now
2643
+ publishable_progress_paths = list(effective_paths)
2644
+ elif _has_credible_shell_wrapper_progress(effective_paths):
2645
+ publishable_progress_paths = list(effective_paths)
2646
+ publishable_age_s = now - publishable_progress_seen_at
2647
+ if publishable_age_s >= float(no_edit_recheck_s):
2648
+ publishable_progress_finalized = True
2649
+ log.info(
2650
+ "No-edit watchdog observed durable publishable file changes "
2651
+ f"({_describe_publishable_paths(effective_paths)}) for "
2652
+ f"{int(publishable_age_s)}s; stopping Codex early so "
2653
+ "QualityGate/ValidationGate can use the remaining budget."
2654
+ )
2655
+ _terminate_active_child()
2656
+ break
2481
2657
  no_edit_deadline = now + float(no_edit_recheck_s)
2482
2658
  log.info(
2483
2659
  "No-edit watchdog observed publishable-looking file changes "
@@ -2639,6 +2815,35 @@ def _run_codex_task(
2639
2815
  "cooldownMs": _NO_PUBLISHABLE_FAILURE_COOLDOWN_MS,
2640
2816
  }
2641
2817
 
2818
+ if publishable_progress_finalized:
2819
+ changed_paths, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
2820
+ effective_paths = effective_paths or publishable_progress_paths
2821
+ last_message = _read_text_if_exists(last_message_path)
2822
+ log_git_status(repo, log)
2823
+ prefix = (
2824
+ "Codex produced durable publishable file changes. PushPals stopped the "
2825
+ "Codex child early to preserve validation and revision budget; the normal "
2826
+ "QualityGate/ValidationGate will catch any incomplete edit."
2827
+ )
2828
+ return {
2829
+ "ok": True,
2830
+ "summary": (
2831
+ "openai_codex stopped after durable publishable progress "
2832
+ f"({len(effective_paths)} file(s))"
2833
+ ),
2834
+ "stdout": _truncate(
2835
+ _build_success_stdout(
2836
+ effective_paths=effective_paths,
2837
+ last_message=last_message,
2838
+ trace_excerpt=trace_excerpt,
2839
+ prefix=prefix,
2840
+ )
2841
+ ),
2842
+ "stderr": _truncate(stderr),
2843
+ "exitCode": 0,
2844
+ "usage": usage,
2845
+ }
2846
+
2642
2847
  if no_edit_watchdog_fired:
2643
2848
  startup_stall = _codex_trace_is_startup_stall(stdout_trace)
2644
2849
  if startup_stall and startup_stall_recovery_attempt < _MAX_STARTUP_STALL_RECOVERY_ATTEMPTS:
@@ -1347,6 +1347,259 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
1347
1347
  self.assertIn("Patched immediately after no-edit recovery", str(result.get("stdout") or ""))
1348
1348
  self.assertIn("src/", str(result.get("stdout") or ""))
1349
1349
 
1350
+ def test_run_codex_task_no_edit_watchdog_allows_command_backed_discovery(self) -> None:
1351
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-command-grace-") as temp_dir:
1352
+ repo = Path(temp_dir) / "repo"
1353
+ repo.mkdir(parents=True, exist_ok=True)
1354
+ (repo / "README.md").write_text("# command grace repo\n", encoding="utf-8")
1355
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
1356
+ subprocess.run(
1357
+ ["git", "config", "user.name", "PushPals Test"],
1358
+ cwd=repo,
1359
+ check=True,
1360
+ capture_output=True,
1361
+ text=True,
1362
+ )
1363
+ subprocess.run(
1364
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
1365
+ cwd=repo,
1366
+ check=True,
1367
+ capture_output=True,
1368
+ text=True,
1369
+ )
1370
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
1371
+ subprocess.run(
1372
+ ["git", "commit", "-m", "chore: seed command grace repo"],
1373
+ cwd=repo,
1374
+ check=True,
1375
+ capture_output=True,
1376
+ text=True,
1377
+ )
1378
+
1379
+ stub_path = Path(temp_dir) / "fake_codex_no_edit_command_grace.py"
1380
+ stub_path.write_text(
1381
+ "\n".join(
1382
+ [
1383
+ "from pathlib import Path",
1384
+ "import json",
1385
+ "import sys",
1386
+ "import time",
1387
+ "",
1388
+ "argv = sys.argv[1:]",
1389
+ "last_message_path = None",
1390
+ "for index, arg in enumerate(argv):",
1391
+ " if arg == '--output-last-message' and index + 1 < len(argv):",
1392
+ " last_message_path = argv[index + 1]",
1393
+ " break",
1394
+ "",
1395
+ "sys.stdin.read()",
1396
+ "print(json.dumps({'type': 'thread.started'}), flush=True)",
1397
+ "print(json.dumps({'type': 'turn.started'}), flush=True)",
1398
+ "print(json.dumps({'type': 'item.started', 'item': {'id': 'cmd-read-target', 'type': 'command_execution', 'command': 'sed -n 1,120p README.md', 'status': 'in_progress'}}), flush=True)",
1399
+ "time.sleep(1.4)",
1400
+ "print(json.dumps({'type': 'item.completed', 'item': {'id': 'cmd-read-target', 'type': 'command_execution', 'command': 'sed -n 1,120p README.md', 'status': 'completed', 'exit_code': 0, 'aggregated_output': '# command grace repo'}}), flush=True)",
1401
+ "time.sleep(1.6)",
1402
+ "Path('src').mkdir(exist_ok=True)",
1403
+ "Path('src/command-grace.txt').write_text('patched after command-backed discovery\\n', encoding='utf-8')",
1404
+ "if last_message_path:",
1405
+ " Path(last_message_path).write_text('Patched after command-backed discovery.', encoding='utf-8')",
1406
+ "print(json.dumps({'type': 'item.completed', 'item': {'type': 'message', 'text': 'Patched after command-backed discovery.'}}), flush=True)",
1407
+ ]
1408
+ ),
1409
+ encoding="utf-8",
1410
+ )
1411
+
1412
+ env_overrides = {
1413
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
1414
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
1415
+ "OPENAI_API_KEY": "pushpals-no-edit-command-grace-test-key",
1416
+ "WORKERPALS_OPENAI_CODEX_JSON": "true",
1417
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
1418
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
1419
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S": "5",
1420
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
1421
+ }
1422
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
1423
+ result = _run_codex_task(
1424
+ str(repo),
1425
+ "Add one focused contract assertion after inspecting the hinted test.",
1426
+ [],
1427
+ )
1428
+
1429
+ self.assertTrue(result.get("ok"), result)
1430
+ self.assertEqual(result.get("exitCode"), 0)
1431
+ self.assertIn("Patched after command-backed discovery", str(result.get("stdout") or ""))
1432
+ self.assertIn("src/", str(result.get("stdout") or ""))
1433
+
1434
+ def test_run_codex_task_no_edit_watchdog_extends_after_later_command_progress(self) -> None:
1435
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-late-command-") as temp_dir:
1436
+ repo = Path(temp_dir) / "repo"
1437
+ repo.mkdir(parents=True, exist_ok=True)
1438
+ (repo / "README.md").write_text("# late command grace repo\n", encoding="utf-8")
1439
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
1440
+ subprocess.run(
1441
+ ["git", "config", "user.name", "PushPals Test"],
1442
+ cwd=repo,
1443
+ check=True,
1444
+ capture_output=True,
1445
+ text=True,
1446
+ )
1447
+ subprocess.run(
1448
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
1449
+ cwd=repo,
1450
+ check=True,
1451
+ capture_output=True,
1452
+ text=True,
1453
+ )
1454
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
1455
+ subprocess.run(
1456
+ ["git", "commit", "-m", "chore: seed late command repo"],
1457
+ cwd=repo,
1458
+ check=True,
1459
+ capture_output=True,
1460
+ text=True,
1461
+ )
1462
+
1463
+ stub_path = Path(temp_dir) / "fake_codex_late_command_grace.py"
1464
+ stub_path.write_text(
1465
+ "\n".join(
1466
+ [
1467
+ "from pathlib import Path",
1468
+ "import json",
1469
+ "import sys",
1470
+ "import time",
1471
+ "",
1472
+ "argv = sys.argv[1:]",
1473
+ "last_message_path = None",
1474
+ "for index, arg in enumerate(argv):",
1475
+ " if arg == '--output-last-message' and index + 1 < len(argv):",
1476
+ " last_message_path = argv[index + 1]",
1477
+ " break",
1478
+ "",
1479
+ "sys.stdin.read()",
1480
+ "print(json.dumps({'type': 'thread.started'}), flush=True)",
1481
+ "print(json.dumps({'type': 'turn.started'}), flush=True)",
1482
+ "print(json.dumps({'type': 'item.started', 'item': {'id': 'cmd-one', 'type': 'command_execution', 'command': 'cat README.md', 'status': 'in_progress'}}), flush=True)",
1483
+ "time.sleep(0.2)",
1484
+ "print(json.dumps({'type': 'item.completed', 'item': {'id': 'cmd-one', 'type': 'command_execution', 'command': 'cat README.md', 'status': 'completed', 'exit_code': 0}}), flush=True)",
1485
+ "time.sleep(2.2)",
1486
+ "print(json.dumps({'type': 'item.started', 'item': {'id': 'cmd-two', 'type': 'command_execution', 'command': 'ls', 'status': 'in_progress'}}), flush=True)",
1487
+ "time.sleep(0.2)",
1488
+ "print(json.dumps({'type': 'item.completed', 'item': {'id': 'cmd-two', 'type': 'command_execution', 'command': 'ls', 'status': 'completed', 'exit_code': 0}}), flush=True)",
1489
+ "time.sleep(2.0)",
1490
+ "Path('src').mkdir(exist_ok=True)",
1491
+ "Path('src/late-command-grace.txt').write_text('patched after later command progress\\n', encoding='utf-8')",
1492
+ "if last_message_path:",
1493
+ " Path(last_message_path).write_text('Patched after later command progress.', encoding='utf-8')",
1494
+ "print(json.dumps({'type': 'item.completed', 'item': {'type': 'message', 'text': 'Patched after later command progress.'}}), flush=True)",
1495
+ ]
1496
+ ),
1497
+ encoding="utf-8",
1498
+ )
1499
+
1500
+ env_overrides = {
1501
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
1502
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
1503
+ "OPENAI_API_KEY": "pushpals-no-edit-late-command-test-key",
1504
+ "WORKERPALS_OPENAI_CODEX_JSON": "true",
1505
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
1506
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
1507
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S": "3",
1508
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
1509
+ }
1510
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
1511
+ result = _run_codex_task(
1512
+ str(repo),
1513
+ "Add one focused contract assertion after a later targeted read.",
1514
+ [],
1515
+ )
1516
+
1517
+ self.assertTrue(result.get("ok"), result)
1518
+ self.assertEqual(result.get("exitCode"), 0)
1519
+ self.assertIn("Patched after later command progress", str(result.get("stdout") or ""))
1520
+ self.assertIn("src/", str(result.get("stdout") or ""))
1521
+
1522
+ def test_run_codex_task_finalizes_after_durable_publishable_progress(self) -> None:
1523
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-durable-progress-") as temp_dir:
1524
+ repo = Path(temp_dir) / "repo"
1525
+ repo.mkdir(parents=True, exist_ok=True)
1526
+ (repo / "README.md").write_text("# durable progress repo\n", encoding="utf-8")
1527
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
1528
+ subprocess.run(
1529
+ ["git", "config", "user.name", "PushPals Test"],
1530
+ cwd=repo,
1531
+ check=True,
1532
+ capture_output=True,
1533
+ text=True,
1534
+ )
1535
+ subprocess.run(
1536
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
1537
+ cwd=repo,
1538
+ check=True,
1539
+ capture_output=True,
1540
+ text=True,
1541
+ )
1542
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
1543
+ subprocess.run(
1544
+ ["git", "commit", "-m", "chore: seed durable progress repo"],
1545
+ cwd=repo,
1546
+ check=True,
1547
+ capture_output=True,
1548
+ text=True,
1549
+ )
1550
+
1551
+ stub_path = Path(temp_dir) / "fake_codex_durable_progress.py"
1552
+ stub_path.write_text(
1553
+ "\n".join(
1554
+ [
1555
+ "from pathlib import Path",
1556
+ "import json",
1557
+ "import sys",
1558
+ "import time",
1559
+ "",
1560
+ "argv = sys.argv[1:]",
1561
+ "last_message_path = None",
1562
+ "for index, arg in enumerate(argv):",
1563
+ " if arg == '--output-last-message' and index + 1 < len(argv):",
1564
+ " last_message_path = argv[index + 1]",
1565
+ " break",
1566
+ "",
1567
+ "sys.stdin.read()",
1568
+ "print(json.dumps({'type': 'thread.started'}), flush=True)",
1569
+ "print(json.dumps({'type': 'turn.started'}), flush=True)",
1570
+ "Path('src').mkdir(exist_ok=True)",
1571
+ "Path('src/durable-progress.txt').write_text('durable patch\\n', encoding='utf-8')",
1572
+ "if last_message_path:",
1573
+ " Path(last_message_path).write_text('Created durable patch and kept thinking.', encoding='utf-8')",
1574
+ "print(json.dumps({'type': 'item.completed', 'item': {'type': 'message', 'text': 'Created durable patch and kept thinking.'}}), flush=True)",
1575
+ "time.sleep(10)",
1576
+ ]
1577
+ ),
1578
+ encoding="utf-8",
1579
+ )
1580
+
1581
+ env_overrides = {
1582
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
1583
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
1584
+ "OPENAI_API_KEY": "pushpals-durable-progress-test-key",
1585
+ "WORKERPALS_OPENAI_CODEX_JSON": "true",
1586
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
1587
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
1588
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_RECHECK_S": "1",
1589
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
1590
+ }
1591
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
1592
+ result = _run_codex_task(
1593
+ str(repo),
1594
+ "Make a focused patch and stop once it is durable.",
1595
+ [],
1596
+ )
1597
+
1598
+ self.assertTrue(result.get("ok"), result)
1599
+ self.assertEqual(result.get("exitCode"), 0)
1600
+ self.assertIn("stopped after durable publishable progress", str(result.get("summary") or ""))
1601
+ self.assertIn("src/", str(result.get("stdout") or ""))
1602
+
1350
1603
  def test_run_codex_task_recovery_attempt_is_still_guarded_by_no_edit_watchdog(self) -> None:
1351
1604
  with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-fail-") as temp_dir:
1352
1605
  repo = Path(temp_dir) / "repo"
@@ -335,6 +335,22 @@ export function shouldSkipCriticForDeterministicValidationRevision(opts: {
335
335
  return opts.validationRuns.some(isDeterministicFastValidationFailure);
336
336
  }
337
337
 
338
+ export function shouldSkipCriticToPreserveRevisionBudget(opts: {
339
+ deterministicRequiresRevision: boolean;
340
+ remainingBudgetMs: number;
341
+ minimumRevisionBudgetMs: number;
342
+ criticTimeoutMs: number;
343
+ criticTimeoutBehavior: "skip" | "retry_once" | "block" | string;
344
+ }): boolean {
345
+ if (!opts.deterministicRequiresRevision) return false;
346
+ const remainingBudgetMs = Math.max(0, Math.floor(opts.remainingBudgetMs));
347
+ const minimumRevisionBudgetMs = Math.max(0, Math.floor(opts.minimumRevisionBudgetMs));
348
+ const criticTimeoutMs = Math.max(0, Math.floor(opts.criticTimeoutMs));
349
+ const criticAttempts = opts.criticTimeoutBehavior === "retry_once" ? 2 : 1;
350
+ const criticWorstCaseMs = criticTimeoutMs * criticAttempts;
351
+ return remainingBudgetMs < minimumRevisionBudgetMs + criticWorstCaseMs;
352
+ }
353
+
338
354
  export function workerAttemptRolloutScore(params: {
339
355
  executorElapsedMs: number;
340
356
  qualityElapsedMs: number;
@@ -7975,11 +7991,23 @@ export async function executeJob(
7975
7991
  validationOutsideTaskScope,
7976
7992
  validationRuns: quality.validationRuns,
7977
7993
  });
7994
+ const preCriticRevisionBudget = qualityRevisionBudgetDecision({
7995
+ jobElapsedMs: Date.now() - jobStartedAt,
7996
+ executionBudgetMs,
7997
+ });
7998
+ const skipCriticForRevisionBudget = shouldSkipCriticToPreserveRevisionBudget({
7999
+ deterministicRequiresRevision: preCriticDeterministicRequiresRevision,
8000
+ remainingBudgetMs: preCriticRevisionBudget.remainingBudgetMs,
8001
+ minimumRevisionBudgetMs: preCriticRevisionBudget.minimumRevisionBudgetMs,
8002
+ criticTimeoutMs: resolveQualityCriticTimeoutMs(runtimeConfig),
8003
+ criticTimeoutBehavior: resolveQualityCriticTimeoutBehavior(runtimeConfig),
8004
+ });
7978
8005
  const critic =
7979
8006
  quality.skipped ||
7980
8007
  !qualityGatePolicy.criticGateEnabled ||
7981
8008
  skipCriticAfterExecutorTimeout ||
7982
- skipCriticForDeterministicValidationRevision
8009
+ skipCriticForDeterministicValidationRevision ||
8010
+ skipCriticForRevisionBudget
7983
8011
  ? null
7984
8012
  : executor === "openai_codex"
7985
8013
  ? await runCodexCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog)
@@ -8020,6 +8048,11 @@ export async function executeJob(
8020
8048
  "stdout",
8021
8049
  "[CriticGate] Skipping critic because deterministic fast validation already requires a quality revision.",
8022
8050
  );
8051
+ } else if (skipCriticForRevisionBudget) {
8052
+ onLog?.(
8053
+ "stdout",
8054
+ `[CriticGate] Skipping critic because deterministic quality already requires revision and remaining budget (${preCriticRevisionBudget.remainingBudgetMs}ms) must be reserved for the next worker turn.`,
8055
+ );
8023
8056
  }
8024
8057
  const rolloutScore = workerAttemptRolloutScore({
8025
8058
  executorElapsedMs,