@pushpalsdev/cli 1.1.34 → 1.1.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pushpalsdev/cli",
3
- "version": "1.1.34",
3
+ "version": "1.1.36",
4
4
  "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -116,6 +116,9 @@ _WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
116
116
  _BACKGROUND_NO_EDIT_WATCHDOG_S = 120
117
117
  _NO_EDIT_RECOVERY_WATCHDOG_S = 90
118
118
  _DEFAULT_NO_EDIT_RECHECK_S = 120
119
+ _DEFAULT_NO_EDIT_COMMAND_GRACE_S = 240
120
+ _DEFAULT_STARTUP_STALL_WATCHDOG_S = 210
121
+ _RECOVERY_STARTUP_STALL_WATCHDOG_S = 150
119
122
  _DEFAULT_ROLLOUT_WATCHDOG_S = 300
120
123
  _SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
121
124
  _NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S = 150
@@ -755,6 +758,65 @@ def _resolve_no_edit_recheck_seconds(communicate_timeout_s: Optional[int]) -> in
755
758
  return max(1, min(_DEFAULT_NO_EDIT_RECHECK_S, upper))
756
759
 
757
760
 
761
+ def _resolve_no_edit_command_grace_seconds(communicate_timeout_s: Optional[int]) -> Optional[int]:
762
+ if not communicate_timeout_s:
763
+ return None
764
+
765
+ raw = os.environ.get("WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S", "").strip()
766
+ if raw:
767
+ if raw == "0":
768
+ return None
769
+ parsed = _to_positive_int(raw)
770
+ if parsed is None:
771
+ log.info(
772
+ "Invalid WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S="
773
+ f"{raw!r}; using default command-progress grace."
774
+ )
775
+ else:
776
+ return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
777
+
778
+ upper = max(1, communicate_timeout_s - 1)
779
+ return max(1, min(_DEFAULT_NO_EDIT_COMMAND_GRACE_S, upper))
780
+
781
+
782
+ def _resolve_startup_stall_watchdog_seconds(
783
+ communicate_timeout_s: Optional[int],
784
+ recovery_attempt: int = 0,
785
+ ) -> Optional[int]:
786
+ if not communicate_timeout_s:
787
+ return None
788
+
789
+ raw = os.environ.get("WORKERPALS_OPENAI_CODEX_STARTUP_STALL_WATCHDOG_S", "").strip()
790
+ if raw:
791
+ if raw == "0":
792
+ return None
793
+ parsed = _to_positive_int(raw)
794
+ if parsed is None:
795
+ log.info(
796
+ "Invalid WORKERPALS_OPENAI_CODEX_STARTUP_STALL_WATCHDOG_S="
797
+ f"{raw!r}; using default startup-stall watchdog."
798
+ )
799
+ else:
800
+ return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
801
+
802
+ default_s = (
803
+ _RECOVERY_STARTUP_STALL_WATCHDOG_S
804
+ if recovery_attempt > 0
805
+ else _DEFAULT_STARTUP_STALL_WATCHDOG_S
806
+ )
807
+ floor_s = 60
808
+ return max(floor_s, min(default_s, max(floor_s, communicate_timeout_s - 1)))
809
+
810
+
811
+ def _startup_stall_recovery_model(current_model: str) -> str:
812
+ normalized = str(current_model or "").strip()
813
+ if not normalized:
814
+ return LEGACY_CODEX_MODEL_FALLBACK
815
+ if normalized.lower() == LEGACY_CODEX_MODEL_FALLBACK.lower():
816
+ return normalized
817
+ return LEGACY_CODEX_MODEL_FALLBACK
818
+
819
+
758
820
  def _looks_like_web_review_prompt(prompt: str) -> bool:
759
821
  text = str(prompt or "").lower()
760
822
  return "repo-native web review" in text or "web review path" in text
@@ -1299,10 +1361,94 @@ def _empty_codex_trace() -> Dict[str, Any]:
1299
1361
  "prompt_tokens": 0,
1300
1362
  "completion_tokens": 0,
1301
1363
  "total_tokens": 0,
1364
+ "active_command_ids": [],
1365
+ "command_event_count": 0,
1366
+ "last_command_activity_at": None,
1367
+ "last_command_summary": "",
1302
1368
  }
1303
1369
 
1304
1370
 
1305
- def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, Any]) -> None:
1371
+ def _looks_like_codex_command_item(value: Any) -> bool:
1372
+ if not isinstance(value, dict):
1373
+ return False
1374
+ type_text = " ".join(
1375
+ str(value.get(key) or "")
1376
+ for key in ("type", "item_type", "kind", "name", "tool_name")
1377
+ ).lower()
1378
+ if any(marker in type_text for marker in ("command_execution", "exec_command", "shell_command")):
1379
+ return True
1380
+ return any(key in value for key in ("command", "cmd", "exit_code", "aggregated_output"))
1381
+
1382
+
1383
+ def _record_codex_command_activity(
1384
+ parsed: Dict[str, Any],
1385
+ event_type: str,
1386
+ trace: Dict[str, Any],
1387
+ now: float,
1388
+ ) -> None:
1389
+ item = parsed.get("item")
1390
+ command_source: Any = item if _looks_like_codex_command_item(item) else parsed
1391
+ if not _looks_like_codex_command_item(command_source):
1392
+ return
1393
+
1394
+ command_text = ""
1395
+ if isinstance(command_source, dict):
1396
+ for key in ("command", "cmd", "name"):
1397
+ raw = command_source.get(key)
1398
+ if isinstance(raw, str) and raw.strip():
1399
+ command_text = _truncate_inline(raw.strip(), 160)
1400
+ break
1401
+ command_id = ""
1402
+ if isinstance(command_source, dict):
1403
+ command_id = str(
1404
+ command_source.get("id")
1405
+ or command_source.get("call_id")
1406
+ or command_source.get("item_id")
1407
+ or command_text
1408
+ or "command"
1409
+ ).strip()
1410
+ command_id = command_id or "command"
1411
+
1412
+ active = trace.setdefault("active_command_ids", [])
1413
+ if not isinstance(active, list):
1414
+ active = []
1415
+ trace["active_command_ids"] = active
1416
+
1417
+ status_text = ""
1418
+ if isinstance(command_source, dict):
1419
+ status_text = " ".join(
1420
+ str(command_source.get(key) or "")
1421
+ for key in ("status", "state", "outcome")
1422
+ ).lower()
1423
+ event_lower = event_type.lower()
1424
+ completed = (
1425
+ "completed" in event_lower
1426
+ or "failed" in event_lower
1427
+ or "error" in event_lower
1428
+ or any(marker in status_text for marker in ("completed", "failed", "cancelled", "canceled", "exited"))
1429
+ )
1430
+ started = (
1431
+ "started" in event_lower
1432
+ or "updated" in event_lower
1433
+ or any(marker in status_text for marker in ("running", "in_progress", "started"))
1434
+ )
1435
+
1436
+ if completed:
1437
+ trace["active_command_ids"] = [item for item in active if str(item) != command_id]
1438
+ elif started and command_id not in active:
1439
+ active.append(command_id)
1440
+
1441
+ trace["command_event_count"] = to_int(trace.get("command_event_count"), 0) + 1
1442
+ trace["last_command_activity_at"] = float(now)
1443
+ trace["last_command_summary"] = command_text or event_type
1444
+
1445
+
1446
+ def _record_live_codex_stdout_line(
1447
+ line: str,
1448
+ use_json: bool,
1449
+ trace: Dict[str, Any],
1450
+ now: Optional[float] = None,
1451
+ ) -> None:
1306
1452
  stripped = line.strip()
1307
1453
  if not stripped:
1308
1454
  return
@@ -1329,6 +1475,7 @@ def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, A
1329
1475
  return
1330
1476
 
1331
1477
  if isinstance(parsed, dict):
1478
+ observed_at = float(now if now is not None else time.monotonic())
1332
1479
  usage = _extract_usage_counts(parsed)
1333
1480
  if usage is not None:
1334
1481
  trace["prompt_tokens"] = max(
@@ -1345,6 +1492,7 @@ def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, A
1345
1492
  .strip()
1346
1493
  or "event"
1347
1494
  )
1495
+ _record_codex_command_activity(parsed, event_type, trace, observed_at)
1348
1496
  event_type_counts[event_type] = to_int(event_type_counts.get(event_type), 0) + 1
1349
1497
  summary = _summarize_json_event(parsed)
1350
1498
  # Reasoning can arrive under generic event types (for example item.updated).
@@ -1409,10 +1557,13 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
1409
1557
  prompt_tokens = to_int(trace.get("prompt_tokens"), 0)
1410
1558
  completion_tokens = to_int(trace.get("completion_tokens"), 0)
1411
1559
  total_tokens = to_int(trace.get("total_tokens"), 0)
1560
+ command_event_count = to_int(trace.get("command_event_count"), 0)
1412
1561
  if reasoning_events > 0:
1413
1562
  log.info(f"[codex] Reasoning-like event(s): {reasoning_events}")
1414
1563
  elif use_json and valid_json > 0:
1415
1564
  log.info("[codex] No reasoning-like events observed in this run.")
1565
+ if command_event_count > 0:
1566
+ log.info(f"[codex] Command execution event(s): {command_event_count}")
1416
1567
  if total_tokens > 0:
1417
1568
  log.info(
1418
1569
  f"[codex] Usage observed: prompt={prompt_tokens} completion={completion_tokens} total={total_tokens}"
@@ -1433,6 +1584,7 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
1433
1584
  "prompt_tokens": prompt_tokens,
1434
1585
  "completion_tokens": completion_tokens,
1435
1586
  "total_tokens": total_tokens,
1587
+ "command_event_count": command_event_count,
1436
1588
  }
1437
1589
 
1438
1590
 
@@ -2259,8 +2411,14 @@ def _run_codex_task(
2259
2411
  if not line:
2260
2412
  continue
2261
2413
  with trace_lock:
2262
- last_activity_at["ts"] = time.monotonic()
2263
- _record_live_codex_stdout_line(line, use_json, stdout_trace_state)
2414
+ observed_at = time.monotonic()
2415
+ last_activity_at["ts"] = observed_at
2416
+ _record_live_codex_stdout_line(
2417
+ line,
2418
+ use_json,
2419
+ stdout_trace_state,
2420
+ observed_at,
2421
+ )
2264
2422
  except Exception:
2265
2423
  pass
2266
2424
  finally:
@@ -2337,6 +2495,16 @@ def _run_codex_task(
2337
2495
  else None
2338
2496
  )
2339
2497
  no_edit_recheck_s = _resolve_no_edit_recheck_seconds(communicate_timeout_s)
2498
+ no_edit_command_grace_s = _resolve_no_edit_command_grace_seconds(communicate_timeout_s)
2499
+ startup_stall_watchdog_s = _resolve_startup_stall_watchdog_seconds(
2500
+ communicate_timeout_s,
2501
+ recovery_attempt=startup_stall_recovery_attempt,
2502
+ )
2503
+ startup_stall_deadline = (
2504
+ started_at + float(startup_stall_watchdog_s)
2505
+ if startup_stall_watchdog_s is not None
2506
+ else None
2507
+ )
2340
2508
  rollout_watchdog_s = (
2341
2509
  _resolve_rollout_watchdog_seconds(
2342
2510
  prompt,
@@ -2351,6 +2519,11 @@ def _run_codex_task(
2351
2519
  if no_edit_watchdog_s is not None
2352
2520
  else None
2353
2521
  )
2522
+ no_edit_command_grace_cap_deadline = (
2523
+ started_at + float(no_edit_watchdog_s + no_edit_command_grace_s)
2524
+ if no_edit_watchdog_s is not None and no_edit_command_grace_s is not None
2525
+ else None
2526
+ )
2354
2527
  rollout_deadline = (
2355
2528
  started_at + float(rollout_watchdog_s)
2356
2529
  if rollout_watchdog_s is not None
@@ -2364,9 +2537,93 @@ def _run_codex_task(
2364
2537
  _terminate_active_child()
2365
2538
  break
2366
2539
 
2540
+ if startup_stall_deadline is not None and now >= startup_stall_deadline:
2541
+ with trace_lock:
2542
+ live_trace = dict(stdout_trace_state)
2543
+ summaries = stdout_trace_state.get("summaries")
2544
+ if isinstance(summaries, list):
2545
+ live_trace["summaries"] = list(summaries)
2546
+ if _codex_trace_is_startup_stall(live_trace):
2547
+ changed_paths, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
2548
+ if not effective_paths:
2549
+ no_edit_artifact_only_paths = _describe_non_publishable_paths(
2550
+ changed_paths,
2551
+ baseline_snapshot,
2552
+ )
2553
+ no_edit_watchdog_fired = True
2554
+ elapsed_s = int(max(0.0, now - started_at))
2555
+ log.info(
2556
+ f"Startup-stall watchdog fired after {elapsed_s}s with no assistant/tool progress."
2557
+ )
2558
+ _terminate_active_child()
2559
+ break
2560
+ startup_stall_deadline = None
2561
+
2367
2562
  if no_edit_deadline is not None and now >= no_edit_deadline:
2368
2563
  changed_paths, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
2369
2564
  if not effective_paths:
2565
+ with trace_lock:
2566
+ live_trace = dict(stdout_trace_state)
2567
+ summaries = stdout_trace_state.get("summaries")
2568
+ if isinstance(summaries, list):
2569
+ live_trace["summaries"] = list(summaries)
2570
+ startup_only = _codex_trace_is_startup_stall(live_trace)
2571
+ if (
2572
+ startup_only
2573
+ and startup_stall_deadline is not None
2574
+ and now < startup_stall_deadline
2575
+ ):
2576
+ no_edit_deadline = startup_stall_deadline
2577
+ remaining_s = int(max(1.0, startup_stall_deadline - now))
2578
+ log.info(
2579
+ "No-edit watchdog observed only Codex startup events; "
2580
+ f"allowing {remaining_s}s for first assistant/tool progress "
2581
+ "before startup-stall recovery."
2582
+ )
2583
+ continue
2584
+ command_event_count = to_int(live_trace.get("command_event_count"), 0)
2585
+ active_commands_raw = live_trace.get("active_command_ids")
2586
+ active_command_count = (
2587
+ len(active_commands_raw)
2588
+ if isinstance(active_commands_raw, list)
2589
+ else 0
2590
+ )
2591
+ last_command_activity_at = 0.0
2592
+ try:
2593
+ last_command_activity_at = float(
2594
+ live_trace.get("last_command_activity_at") or 0.0
2595
+ )
2596
+ except Exception:
2597
+ last_command_activity_at = 0.0
2598
+ if command_event_count > 0 and no_edit_command_grace_s is not None:
2599
+ command_grace_deadline = 0.0
2600
+ if active_command_count > 0:
2601
+ # Do not kill while Codex is actively running a tool command; poll
2602
+ # again soon, but keep the total grace bounded by the hard cap below.
2603
+ command_grace_deadline = now + min(60.0, float(no_edit_command_grace_s))
2604
+ elif last_command_activity_at > 0:
2605
+ command_grace_deadline = last_command_activity_at + float(
2606
+ no_edit_command_grace_s
2607
+ )
2608
+ if no_edit_command_grace_cap_deadline is not None:
2609
+ command_grace_deadline = min(
2610
+ command_grace_deadline,
2611
+ no_edit_command_grace_cap_deadline,
2612
+ )
2613
+ if command_grace_deadline > now:
2614
+ no_edit_deadline = command_grace_deadline
2615
+ remaining_s = int(max(1.0, command_grace_deadline - now))
2616
+ command_detail = (
2617
+ f"{active_command_count} active command(s)"
2618
+ if active_command_count > 0
2619
+ else "recent command completion"
2620
+ )
2621
+ log.info(
2622
+ "No-edit watchdog observed Codex tool progress "
2623
+ f"({command_detail}); allowing {remaining_s}s for a "
2624
+ "publishable patch before recovery."
2625
+ )
2626
+ continue
2370
2627
  no_edit_artifact_only_paths = _describe_non_publishable_paths(
2371
2628
  changed_paths,
2372
2629
  baseline_snapshot,
@@ -2377,9 +2634,15 @@ def _run_codex_task(
2377
2634
  if no_edit_artifact_only_paths
2378
2635
  else ""
2379
2636
  )
2380
- log.info(
2381
- f"No-edit watchdog fired after {int(no_edit_watchdog_s or 0)}s with no publishable file changes.{artifact_detail} Retrying with patch-first guidance."
2382
- )
2637
+ if startup_only:
2638
+ elapsed_s = int(max(0.0, now - started_at))
2639
+ log.info(
2640
+ f"Startup-stall watchdog fired after {elapsed_s}s with no assistant/tool progress."
2641
+ )
2642
+ else:
2643
+ log.info(
2644
+ f"No-edit watchdog fired after {int(no_edit_watchdog_s or 0)}s with no publishable file changes.{artifact_detail} Retrying with patch-first guidance."
2645
+ )
2383
2646
  _terminate_active_child()
2384
2647
  break
2385
2648
  no_edit_deadline = now + float(no_edit_recheck_s)
@@ -2550,9 +2813,15 @@ def _run_codex_task(
2550
2813
  *supplemental_guidance,
2551
2814
  _build_startup_stall_recovery_guidance(trace_excerpt),
2552
2815
  ]
2816
+ recovery_model = _startup_stall_recovery_model(model)
2817
+ recovery_detail = (
2818
+ f" using fallback model {recovery_model!r}"
2819
+ if recovery_model and recovery_model != model
2820
+ else ""
2821
+ )
2553
2822
  log.warning(
2554
2823
  "Codex emitted only startup events before the no-edit watchdog; "
2555
- "restarting Codex once before classifying the job terminally."
2824
+ f"restarting Codex once{recovery_detail} before classifying the job terminally."
2556
2825
  )
2557
2826
  retry_result = _run_codex_task(
2558
2827
  repo,
@@ -2563,7 +2832,7 @@ def _run_codex_task(
2563
2832
  startup_stall_recovery_attempt=startup_stall_recovery_attempt + 1,
2564
2833
  no_edit_recovery_attempt=no_edit_recovery_attempt,
2565
2834
  rollout_recovery_attempt=rollout_recovery_attempt,
2566
- model_override=model_override,
2835
+ model_override=recovery_model or model_override,
2567
2836
  baseline_changes=baseline_snapshot,
2568
2837
  )
2569
2838
  retry_result["usage"] = _merge_usage_records(usage, retry_result.get("usage"))
@@ -49,6 +49,7 @@ from openai_codex_executor import (
49
49
  _resolve_codex_command_prefix,
50
50
  _resolve_no_edit_watchdog_seconds,
51
51
  _resolve_rollout_watchdog_seconds,
52
+ _resolve_startup_stall_watchdog_seconds,
52
53
  _unwrap_shell_wrapper_command,
53
54
  _usage_from_trace_or_estimate,
54
55
  )
@@ -372,6 +373,63 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
372
373
  self.assertEqual(task.repo, str(repo.resolve()))
373
374
  self.assertEqual(task.instruction, "Make one small publishable change")
374
375
 
376
+ def test_parse_payload_accepts_positional_payload_file_path(self) -> None:
377
+ with tempfile.TemporaryDirectory(prefix="pushpals-payload-file-positional-") as temp_dir:
378
+ repo = Path(temp_dir) / "repo"
379
+ repo.mkdir(parents=True, exist_ok=True)
380
+ payload = {
381
+ "kind": "task.execute",
382
+ "repo": str(repo),
383
+ "params": {"instruction": "Recover from a direct-worker payload handoff"},
384
+ }
385
+ encoded = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("ascii")
386
+ payload_file = Path(temp_dir) / "payload.b64"
387
+ payload_file.write_text(encoded, encoding="utf-8")
388
+
389
+ task = parse_task_execute_payload(
390
+ ["executor", str(payload_file)],
391
+ logger=Logger("[test]"),
392
+ )
393
+
394
+ self.assertEqual(task.kind, "task.execute")
395
+ self.assertEqual(task.repo, str(repo.resolve()))
396
+ self.assertEqual(task.instruction, "Recover from a direct-worker payload handoff")
397
+
398
+ def test_parse_payload_accepts_unpadded_base64_payload(self) -> None:
399
+ with tempfile.TemporaryDirectory(prefix="pushpals-payload-unpadded-") as temp_dir:
400
+ repo = Path(temp_dir) / "repo"
401
+ repo.mkdir(parents=True, exist_ok=True)
402
+ payload = {
403
+ "kind": "task.execute",
404
+ "repo": str(repo),
405
+ "params": {"instruction": "Accept wrapper-normalized payload padding"},
406
+ }
407
+ encoded = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("ascii")
408
+ unpadded = encoded.rstrip("=")
409
+
410
+ task = parse_task_execute_payload(["executor", unpadded], logger=Logger("[test]"))
411
+
412
+ self.assertEqual(task.kind, "task.execute")
413
+ self.assertEqual(task.repo, str(repo.resolve()))
414
+ self.assertEqual(task.instruction, "Accept wrapper-normalized payload padding")
415
+
416
+ def test_parse_payload_accepts_raw_json_payload(self) -> None:
417
+ with tempfile.TemporaryDirectory(prefix="pushpals-payload-raw-json-") as temp_dir:
418
+ repo = Path(temp_dir) / "repo"
419
+ repo.mkdir(parents=True, exist_ok=True)
420
+ payload = {
421
+ "kind": "task.execute",
422
+ "repo": str(repo),
423
+ "params": {"instruction": "Accept raw JSON from a recovery wrapper"},
424
+ }
425
+ raw_json = json.dumps(payload)
426
+
427
+ task = parse_task_execute_payload(["executor", raw_json], logger=Logger("[test]"))
428
+
429
+ self.assertEqual(task.kind, "task.execute")
430
+ self.assertEqual(task.repo, str(repo.resolve()))
431
+ self.assertEqual(task.instruction, "Accept raw JSON from a recovery wrapper")
432
+
375
433
  def test_parse_payload_prefers_helper_tests_for_visual_derivation_tasks(self) -> None:
376
434
  with tempfile.TemporaryDirectory(prefix="pushpals-visual-guidance-") as temp_dir:
377
435
  repo = Path(temp_dir) / "repo"
@@ -1091,13 +1149,16 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
1091
1149
  "",
1092
1150
  "argv = sys.argv[1:]",
1093
1151
  "last_message_path = None",
1152
+ "model = ''",
1094
1153
  "for index, arg in enumerate(argv):",
1095
1154
  " if arg == '--output-last-message' and index + 1 < len(argv):",
1096
1155
  " last_message_path = argv[index + 1]",
1156
+ " if arg == '-m' and index + 1 < len(argv):",
1157
+ " model = argv[index + 1]",
1097
1158
  " break",
1098
1159
  "",
1099
1160
  "prompt = sys.stdin.read()",
1100
- "if 'Codex startup-stall recovery' in prompt:",
1161
+ "if 'Codex startup-stall recovery' in prompt and model == 'gpt-5.4':",
1101
1162
  " Path('src').mkdir(exist_ok=True)",
1102
1163
  " Path('src/startup-stall-recovered.txt').write_text('patched after restart\\n', encoding='utf-8')",
1103
1164
  " if last_message_path:",
@@ -1119,7 +1180,8 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
1119
1180
  "OPENAI_API_KEY": "pushpals-startup-stall-test-key",
1120
1181
  "WORKERPALS_OPENAI_CODEX_JSON": "true",
1121
1182
  "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
1122
- "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
1183
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "0",
1184
+ "WORKERPALS_OPENAI_CODEX_STARTUP_STALL_WATCHDOG_S": "1",
1123
1185
  "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
1124
1186
  }
1125
1187
  with mock.patch.dict(os.environ, env_overrides, clear=False):
@@ -1189,6 +1251,7 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
1189
1251
  "WORKERPALS_OPENAI_CODEX_JSON": "true",
1190
1252
  "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
1191
1253
  "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
1254
+ "WORKERPALS_OPENAI_CODEX_STARTUP_STALL_WATCHDOG_S": "1",
1192
1255
  "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
1193
1256
  }
1194
1257
  with mock.patch.dict(os.environ, env_overrides, clear=False):
@@ -1284,6 +1347,90 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
1284
1347
  self.assertIn("Patched immediately after no-edit recovery", str(result.get("stdout") or ""))
1285
1348
  self.assertIn("src/", str(result.get("stdout") or ""))
1286
1349
 
1350
+ def test_run_codex_task_no_edit_watchdog_allows_command_backed_discovery(self) -> None:
1351
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-command-grace-") as temp_dir:
1352
+ repo = Path(temp_dir) / "repo"
1353
+ repo.mkdir(parents=True, exist_ok=True)
1354
+ (repo / "README.md").write_text("# command grace repo\n", encoding="utf-8")
1355
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
1356
+ subprocess.run(
1357
+ ["git", "config", "user.name", "PushPals Test"],
1358
+ cwd=repo,
1359
+ check=True,
1360
+ capture_output=True,
1361
+ text=True,
1362
+ )
1363
+ subprocess.run(
1364
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
1365
+ cwd=repo,
1366
+ check=True,
1367
+ capture_output=True,
1368
+ text=True,
1369
+ )
1370
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
1371
+ subprocess.run(
1372
+ ["git", "commit", "-m", "chore: seed command grace repo"],
1373
+ cwd=repo,
1374
+ check=True,
1375
+ capture_output=True,
1376
+ text=True,
1377
+ )
1378
+
1379
+ stub_path = Path(temp_dir) / "fake_codex_no_edit_command_grace.py"
1380
+ stub_path.write_text(
1381
+ "\n".join(
1382
+ [
1383
+ "from pathlib import Path",
1384
+ "import json",
1385
+ "import sys",
1386
+ "import time",
1387
+ "",
1388
+ "argv = sys.argv[1:]",
1389
+ "last_message_path = None",
1390
+ "for index, arg in enumerate(argv):",
1391
+ " if arg == '--output-last-message' and index + 1 < len(argv):",
1392
+ " last_message_path = argv[index + 1]",
1393
+ " break",
1394
+ "",
1395
+ "sys.stdin.read()",
1396
+ "print(json.dumps({'type': 'thread.started'}), flush=True)",
1397
+ "print(json.dumps({'type': 'turn.started'}), flush=True)",
1398
+ "print(json.dumps({'type': 'item.started', 'item': {'id': 'cmd-read-target', 'type': 'command_execution', 'command': 'sed -n 1,120p README.md', 'status': 'in_progress'}}), flush=True)",
1399
+ "time.sleep(1.4)",
1400
+ "print(json.dumps({'type': 'item.completed', 'item': {'id': 'cmd-read-target', 'type': 'command_execution', 'command': 'sed -n 1,120p README.md', 'status': 'completed', 'exit_code': 0, 'aggregated_output': '# command grace repo'}}), flush=True)",
1401
+ "time.sleep(1.6)",
1402
+ "Path('src').mkdir(exist_ok=True)",
1403
+ "Path('src/command-grace.txt').write_text('patched after command-backed discovery\\n', encoding='utf-8')",
1404
+ "if last_message_path:",
1405
+ " Path(last_message_path).write_text('Patched after command-backed discovery.', encoding='utf-8')",
1406
+ "print(json.dumps({'type': 'item.completed', 'item': {'type': 'message', 'text': 'Patched after command-backed discovery.'}}), flush=True)",
1407
+ ]
1408
+ ),
1409
+ encoding="utf-8",
1410
+ )
1411
+
1412
+ env_overrides = {
1413
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
1414
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
1415
+ "OPENAI_API_KEY": "pushpals-no-edit-command-grace-test-key",
1416
+ "WORKERPALS_OPENAI_CODEX_JSON": "true",
1417
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
1418
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
1419
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S": "5",
1420
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
1421
+ }
1422
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
1423
+ result = _run_codex_task(
1424
+ str(repo),
1425
+ "Add one focused contract assertion after inspecting the hinted test.",
1426
+ [],
1427
+ )
1428
+
1429
+ self.assertTrue(result.get("ok"), result)
1430
+ self.assertEqual(result.get("exitCode"), 0)
1431
+ self.assertIn("Patched after command-backed discovery", str(result.get("stdout") or ""))
1432
+ self.assertIn("src/", str(result.get("stdout") or ""))
1433
+
1287
1434
  def test_run_codex_task_recovery_attempt_is_still_guarded_by_no_edit_watchdog(self) -> None:
1288
1435
  with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-fail-") as temp_dir:
1289
1436
  repo = Path(temp_dir) / "repo"
@@ -1587,6 +1734,31 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
1587
1734
 
1588
1735
  self.assertEqual(watchdog_s, 180)
1589
1736
 
1737
+ def test_startup_stall_watchdog_allows_slower_first_response_than_no_edit_watchdog(self) -> None:
1738
+ with mock.patch.dict(
1739
+ os.environ,
1740
+ {"WORKERPALS_OPENAI_CODEX_STARTUP_STALL_WATCHDOG_S": ""},
1741
+ clear=False,
1742
+ ):
1743
+ watchdog_s = _resolve_startup_stall_watchdog_seconds(1200)
1744
+ recovery_watchdog_s = _resolve_startup_stall_watchdog_seconds(
1745
+ 1200,
1746
+ recovery_attempt=1,
1747
+ )
1748
+
1749
+ self.assertEqual(watchdog_s, 210)
1750
+ self.assertEqual(recovery_watchdog_s, 150)
1751
+
1752
+ def test_explicit_startup_stall_watchdog_override_is_bounded(self) -> None:
1753
+ with mock.patch.dict(
1754
+ os.environ,
1755
+ {"WORKERPALS_OPENAI_CODEX_STARTUP_STALL_WATCHDOG_S": "500"},
1756
+ clear=False,
1757
+ ):
1758
+ watchdog_s = _resolve_startup_stall_watchdog_seconds(120)
1759
+
1760
+ self.assertEqual(watchdog_s, 119)
1761
+
1590
1762
  def test_narrow_contract_regression_with_required_e2e_uses_fast_no_edit_watchdog(self) -> None:
1591
1763
  prompt = (
1592
1764
  "Harden the opportunity graph contract around autonomous delivery-loop failure signals. "
@@ -155,14 +155,39 @@ def fail(summary: str, stderr: Optional[str] = None, exit_code: int = 1) -> int:
155
155
  return exit_code
156
156
 
157
157
 
158
- def decode_payload(raw: str) -> Dict[str, Any]:
159
- decoded = base64.b64decode(raw).decode("utf-8")
160
- payload = json.loads(decoded)
158
+ def _parse_payload_json(raw: str) -> Dict[str, Any]:
159
+ payload = json.loads(raw)
161
160
  if not isinstance(payload, dict):
162
161
  raise ValueError("payload must be a JSON object")
163
162
  return payload
164
163
 
165
164
 
165
+ def decode_payload(raw: str) -> Dict[str, Any]:
166
+ stripped = str(raw or "").strip()
167
+ if not stripped:
168
+ raise ValueError("empty job payload")
169
+
170
+ # Direct workers normally receive a file-backed base64 payload, but this
171
+ # parser intentionally accepts the safe adjacent encodings too. That keeps
172
+ # executor startup resilient if an outer wrapper normalizes padding, uses
173
+ # url-safe base64, or hands through raw JSON during recovery.
174
+ if stripped.startswith("{"):
175
+ return _parse_payload_json(stripped)
176
+
177
+ compact = "".join(stripped.split())
178
+ padded = compact + ("=" * ((4 - len(compact) % 4) % 4))
179
+ decode_errors: List[str] = []
180
+ for decoder in (base64.b64decode, base64.urlsafe_b64decode):
181
+ try:
182
+ decoded = decoder(padded).decode("utf-8")
183
+ return _parse_payload_json(decoded)
184
+ except Exception as exc:
185
+ decode_errors.append(str(exc))
186
+
187
+ detail = "; ".join(error for error in decode_errors if error) or "unknown decode error"
188
+ raise ValueError(f"invalid base64/JSON job payload: {detail}")
189
+
190
+
166
191
  def read_encoded_payload_arg(argv: List[str]) -> str:
167
192
  if len(argv) < 2:
168
193
  raise ValueError("missing base64 job payload")
@@ -174,6 +199,13 @@ def read_encoded_payload_arg(argv: List[str]) -> str:
174
199
  return path.read_text(encoding="utf-8").strip()
175
200
  if mode == "--payload-stdin":
176
201
  return sys.stdin.read().strip()
202
+ if len(mode) < 4096:
203
+ try:
204
+ path = Path(mode).expanduser()
205
+ if path.is_file():
206
+ return path.read_text(encoding="utf-8").strip()
207
+ except OSError:
208
+ pass
177
209
  return mode
178
210
 
179
211
 
@@ -1918,8 +1918,6 @@ export class DockerExecutor {
1918
1918
 
1919
1919
  private matchesRetryablePattern(text: string): boolean {
1920
1920
  const transientPatterns: RegExp[] = [
1921
- /\bstalled before first response\b/i,
1922
- /\bstartup stall\b/i,
1923
1921
  /warm .*runtime/i,
1924
1922
  /failed to start warm container/i,
1925
1923
  /docker execution error/i,