@pushpalsdev/cli 1.1.36 → 1.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pushpalsdev/cli",
3
- "version": "1.1.36",
3
+ "version": "1.1.37",
4
4
  "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -2519,16 +2519,14 @@ def _run_codex_task(
2519
2519
  if no_edit_watchdog_s is not None
2520
2520
  else None
2521
2521
  )
2522
- no_edit_command_grace_cap_deadline = (
2523
- started_at + float(no_edit_watchdog_s + no_edit_command_grace_s)
2524
- if no_edit_watchdog_s is not None and no_edit_command_grace_s is not None
2525
- else None
2526
- )
2527
2522
  rollout_deadline = (
2528
2523
  started_at + float(rollout_watchdog_s)
2529
2524
  if rollout_watchdog_s is not None
2530
2525
  else None
2531
2526
  )
2527
+ publishable_progress_seen_at: Optional[float] = None
2528
+ publishable_progress_finalized = False
2529
+ publishable_progress_paths: List[str] = []
2532
2530
 
2533
2531
  while proc.poll() is None:
2534
2532
  now = time.monotonic()
@@ -2605,11 +2603,6 @@ def _run_codex_task(
2605
2603
  command_grace_deadline = last_command_activity_at + float(
2606
2604
  no_edit_command_grace_s
2607
2605
  )
2608
- if no_edit_command_grace_cap_deadline is not None:
2609
- command_grace_deadline = min(
2610
- command_grace_deadline,
2611
- no_edit_command_grace_cap_deadline,
2612
- )
2613
2606
  if command_grace_deadline > now:
2614
2607
  no_edit_deadline = command_grace_deadline
2615
2608
  remaining_s = int(max(1.0, command_grace_deadline - now))
@@ -2645,6 +2638,22 @@ def _run_codex_task(
2645
2638
  )
2646
2639
  _terminate_active_child()
2647
2640
  break
2641
+ if publishable_progress_seen_at is None:
2642
+ publishable_progress_seen_at = now
2643
+ publishable_progress_paths = list(effective_paths)
2644
+ elif _has_credible_shell_wrapper_progress(effective_paths):
2645
+ publishable_progress_paths = list(effective_paths)
2646
+ publishable_age_s = now - publishable_progress_seen_at
2647
+ if publishable_age_s >= float(no_edit_recheck_s):
2648
+ publishable_progress_finalized = True
2649
+ log.info(
2650
+ "No-edit watchdog observed durable publishable file changes "
2651
+ f"({_describe_publishable_paths(effective_paths)}) for "
2652
+ f"{int(publishable_age_s)}s; stopping Codex early so "
2653
+ "QualityGate/ValidationGate can use the remaining budget."
2654
+ )
2655
+ _terminate_active_child()
2656
+ break
2648
2657
  no_edit_deadline = now + float(no_edit_recheck_s)
2649
2658
  log.info(
2650
2659
  "No-edit watchdog observed publishable-looking file changes "
@@ -2806,6 +2815,35 @@ def _run_codex_task(
2806
2815
  "cooldownMs": _NO_PUBLISHABLE_FAILURE_COOLDOWN_MS,
2807
2816
  }
2808
2817
 
2818
+ if publishable_progress_finalized:
2819
+ changed_paths, _, effective_paths = _codex_changed_paths(repo, baseline_snapshot)
2820
+ effective_paths = effective_paths or publishable_progress_paths
2821
+ last_message = _read_text_if_exists(last_message_path)
2822
+ log_git_status(repo, log)
2823
+ prefix = (
2824
+ "Codex produced durable publishable file changes. PushPals stopped the "
2825
+ "Codex child early to preserve validation and revision budget; the normal "
2826
+ "QualityGate/ValidationGate will catch any incomplete edit."
2827
+ )
2828
+ return {
2829
+ "ok": True,
2830
+ "summary": (
2831
+ "openai_codex stopped after durable publishable progress "
2832
+ f"({len(effective_paths)} file(s))"
2833
+ ),
2834
+ "stdout": _truncate(
2835
+ _build_success_stdout(
2836
+ effective_paths=effective_paths,
2837
+ last_message=last_message,
2838
+ trace_excerpt=trace_excerpt,
2839
+ prefix=prefix,
2840
+ )
2841
+ ),
2842
+ "stderr": _truncate(stderr),
2843
+ "exitCode": 0,
2844
+ "usage": usage,
2845
+ }
2846
+
2809
2847
  if no_edit_watchdog_fired:
2810
2848
  startup_stall = _codex_trace_is_startup_stall(stdout_trace)
2811
2849
  if startup_stall and startup_stall_recovery_attempt < _MAX_STARTUP_STALL_RECOVERY_ATTEMPTS:
@@ -1431,6 +1431,175 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
1431
1431
  self.assertIn("Patched after command-backed discovery", str(result.get("stdout") or ""))
1432
1432
  self.assertIn("src/", str(result.get("stdout") or ""))
1433
1433
 
1434
+ def test_run_codex_task_no_edit_watchdog_extends_after_later_command_progress(self) -> None:
1435
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-late-command-") as temp_dir:
1436
+ repo = Path(temp_dir) / "repo"
1437
+ repo.mkdir(parents=True, exist_ok=True)
1438
+ (repo / "README.md").write_text("# late command grace repo\n", encoding="utf-8")
1439
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
1440
+ subprocess.run(
1441
+ ["git", "config", "user.name", "PushPals Test"],
1442
+ cwd=repo,
1443
+ check=True,
1444
+ capture_output=True,
1445
+ text=True,
1446
+ )
1447
+ subprocess.run(
1448
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
1449
+ cwd=repo,
1450
+ check=True,
1451
+ capture_output=True,
1452
+ text=True,
1453
+ )
1454
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
1455
+ subprocess.run(
1456
+ ["git", "commit", "-m", "chore: seed late command repo"],
1457
+ cwd=repo,
1458
+ check=True,
1459
+ capture_output=True,
1460
+ text=True,
1461
+ )
1462
+
1463
+ stub_path = Path(temp_dir) / "fake_codex_late_command_grace.py"
1464
+ stub_path.write_text(
1465
+ "\n".join(
1466
+ [
1467
+ "from pathlib import Path",
1468
+ "import json",
1469
+ "import sys",
1470
+ "import time",
1471
+ "",
1472
+ "argv = sys.argv[1:]",
1473
+ "last_message_path = None",
1474
+ "for index, arg in enumerate(argv):",
1475
+ " if arg == '--output-last-message' and index + 1 < len(argv):",
1476
+ " last_message_path = argv[index + 1]",
1477
+ " break",
1478
+ "",
1479
+ "sys.stdin.read()",
1480
+ "print(json.dumps({'type': 'thread.started'}), flush=True)",
1481
+ "print(json.dumps({'type': 'turn.started'}), flush=True)",
1482
+ "print(json.dumps({'type': 'item.started', 'item': {'id': 'cmd-one', 'type': 'command_execution', 'command': 'cat README.md', 'status': 'in_progress'}}), flush=True)",
1483
+ "time.sleep(0.2)",
1484
+ "print(json.dumps({'type': 'item.completed', 'item': {'id': 'cmd-one', 'type': 'command_execution', 'command': 'cat README.md', 'status': 'completed', 'exit_code': 0}}), flush=True)",
1485
+ "time.sleep(2.2)",
1486
+ "print(json.dumps({'type': 'item.started', 'item': {'id': 'cmd-two', 'type': 'command_execution', 'command': 'ls', 'status': 'in_progress'}}), flush=True)",
1487
+ "time.sleep(0.2)",
1488
+ "print(json.dumps({'type': 'item.completed', 'item': {'id': 'cmd-two', 'type': 'command_execution', 'command': 'ls', 'status': 'completed', 'exit_code': 0}}), flush=True)",
1489
+ "time.sleep(2.0)",
1490
+ "Path('src').mkdir(exist_ok=True)",
1491
+ "Path('src/late-command-grace.txt').write_text('patched after later command progress\\n', encoding='utf-8')",
1492
+ "if last_message_path:",
1493
+ " Path(last_message_path).write_text('Patched after later command progress.', encoding='utf-8')",
1494
+ "print(json.dumps({'type': 'item.completed', 'item': {'type': 'message', 'text': 'Patched after later command progress.'}}), flush=True)",
1495
+ ]
1496
+ ),
1497
+ encoding="utf-8",
1498
+ )
1499
+
1500
+ env_overrides = {
1501
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
1502
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
1503
+ "OPENAI_API_KEY": "pushpals-no-edit-late-command-test-key",
1504
+ "WORKERPALS_OPENAI_CODEX_JSON": "true",
1505
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
1506
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
1507
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S": "3",
1508
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
1509
+ }
1510
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
1511
+ result = _run_codex_task(
1512
+ str(repo),
1513
+ "Add one focused contract assertion after a later targeted read.",
1514
+ [],
1515
+ )
1516
+
1517
+ self.assertTrue(result.get("ok"), result)
1518
+ self.assertEqual(result.get("exitCode"), 0)
1519
+ self.assertIn("Patched after later command progress", str(result.get("stdout") or ""))
1520
+ self.assertIn("src/", str(result.get("stdout") or ""))
1521
+
1522
+ def test_run_codex_task_finalizes_after_durable_publishable_progress(self) -> None:
1523
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-durable-progress-") as temp_dir:
1524
+ repo = Path(temp_dir) / "repo"
1525
+ repo.mkdir(parents=True, exist_ok=True)
1526
+ (repo / "README.md").write_text("# durable progress repo\n", encoding="utf-8")
1527
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
1528
+ subprocess.run(
1529
+ ["git", "config", "user.name", "PushPals Test"],
1530
+ cwd=repo,
1531
+ check=True,
1532
+ capture_output=True,
1533
+ text=True,
1534
+ )
1535
+ subprocess.run(
1536
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
1537
+ cwd=repo,
1538
+ check=True,
1539
+ capture_output=True,
1540
+ text=True,
1541
+ )
1542
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
1543
+ subprocess.run(
1544
+ ["git", "commit", "-m", "chore: seed durable progress repo"],
1545
+ cwd=repo,
1546
+ check=True,
1547
+ capture_output=True,
1548
+ text=True,
1549
+ )
1550
+
1551
+ stub_path = Path(temp_dir) / "fake_codex_durable_progress.py"
1552
+ stub_path.write_text(
1553
+ "\n".join(
1554
+ [
1555
+ "from pathlib import Path",
1556
+ "import json",
1557
+ "import sys",
1558
+ "import time",
1559
+ "",
1560
+ "argv = sys.argv[1:]",
1561
+ "last_message_path = None",
1562
+ "for index, arg in enumerate(argv):",
1563
+ " if arg == '--output-last-message' and index + 1 < len(argv):",
1564
+ " last_message_path = argv[index + 1]",
1565
+ " break",
1566
+ "",
1567
+ "sys.stdin.read()",
1568
+ "print(json.dumps({'type': 'thread.started'}), flush=True)",
1569
+ "print(json.dumps({'type': 'turn.started'}), flush=True)",
1570
+ "Path('src').mkdir(exist_ok=True)",
1571
+ "Path('src/durable-progress.txt').write_text('durable patch\\n', encoding='utf-8')",
1572
+ "if last_message_path:",
1573
+ " Path(last_message_path).write_text('Created durable patch and kept thinking.', encoding='utf-8')",
1574
+ "print(json.dumps({'type': 'item.completed', 'item': {'type': 'message', 'text': 'Created durable patch and kept thinking.'}}), flush=True)",
1575
+ "time.sleep(10)",
1576
+ ]
1577
+ ),
1578
+ encoding="utf-8",
1579
+ )
1580
+
1581
+ env_overrides = {
1582
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
1583
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
1584
+ "OPENAI_API_KEY": "pushpals-durable-progress-test-key",
1585
+ "WORKERPALS_OPENAI_CODEX_JSON": "true",
1586
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
1587
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
1588
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_RECHECK_S": "1",
1589
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
1590
+ }
1591
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
1592
+ result = _run_codex_task(
1593
+ str(repo),
1594
+ "Make a focused patch and stop once it is durable.",
1595
+ [],
1596
+ )
1597
+
1598
+ self.assertTrue(result.get("ok"), result)
1599
+ self.assertEqual(result.get("exitCode"), 0)
1600
+ self.assertIn("stopped after durable publishable progress", str(result.get("summary") or ""))
1601
+ self.assertIn("src/", str(result.get("stdout") or ""))
1602
+
1434
1603
  def test_run_codex_task_recovery_attempt_is_still_guarded_by_no_edit_watchdog(self) -> None:
1435
1604
  with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-fail-") as temp_dir:
1436
1605
  repo = Path(temp_dir) / "repo"
@@ -335,6 +335,22 @@ export function shouldSkipCriticForDeterministicValidationRevision(opts: {
335
335
  return opts.validationRuns.some(isDeterministicFastValidationFailure);
336
336
  }
337
337
 
338
+ export function shouldSkipCriticToPreserveRevisionBudget(opts: {
339
+ deterministicRequiresRevision: boolean;
340
+ remainingBudgetMs: number;
341
+ minimumRevisionBudgetMs: number;
342
+ criticTimeoutMs: number;
343
+ criticTimeoutBehavior: "skip" | "retry_once" | "block" | string;
344
+ }): boolean {
345
+ if (!opts.deterministicRequiresRevision) return false;
346
+ const remainingBudgetMs = Math.max(0, Math.floor(opts.remainingBudgetMs));
347
+ const minimumRevisionBudgetMs = Math.max(0, Math.floor(opts.minimumRevisionBudgetMs));
348
+ const criticTimeoutMs = Math.max(0, Math.floor(opts.criticTimeoutMs));
349
+ const criticAttempts = opts.criticTimeoutBehavior === "retry_once" ? 2 : 1;
350
+ const criticWorstCaseMs = criticTimeoutMs * criticAttempts;
351
+ return remainingBudgetMs < minimumRevisionBudgetMs + criticWorstCaseMs;
352
+ }
353
+
338
354
  export function workerAttemptRolloutScore(params: {
339
355
  executorElapsedMs: number;
340
356
  qualityElapsedMs: number;
@@ -7975,11 +7991,23 @@ export async function executeJob(
7975
7991
  validationOutsideTaskScope,
7976
7992
  validationRuns: quality.validationRuns,
7977
7993
  });
7994
+ const preCriticRevisionBudget = qualityRevisionBudgetDecision({
7995
+ jobElapsedMs: Date.now() - jobStartedAt,
7996
+ executionBudgetMs,
7997
+ });
7998
+ const skipCriticForRevisionBudget = shouldSkipCriticToPreserveRevisionBudget({
7999
+ deterministicRequiresRevision: preCriticDeterministicRequiresRevision,
8000
+ remainingBudgetMs: preCriticRevisionBudget.remainingBudgetMs,
8001
+ minimumRevisionBudgetMs: preCriticRevisionBudget.minimumRevisionBudgetMs,
8002
+ criticTimeoutMs: resolveQualityCriticTimeoutMs(runtimeConfig),
8003
+ criticTimeoutBehavior: resolveQualityCriticTimeoutBehavior(runtimeConfig),
8004
+ });
7978
8005
  const critic =
7979
8006
  quality.skipped ||
7980
8007
  !qualityGatePolicy.criticGateEnabled ||
7981
8008
  skipCriticAfterExecutorTimeout ||
7982
- skipCriticForDeterministicValidationRevision
8009
+ skipCriticForDeterministicValidationRevision ||
8010
+ skipCriticForRevisionBudget
7983
8011
  ? null
7984
8012
  : executor === "openai_codex"
7985
8013
  ? await runCodexCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog)
@@ -8020,6 +8048,11 @@ export async function executeJob(
8020
8048
  "stdout",
8021
8049
  "[CriticGate] Skipping critic because deterministic fast validation already requires a quality revision.",
8022
8050
  );
8051
+ } else if (skipCriticForRevisionBudget) {
8052
+ onLog?.(
8053
+ "stdout",
8054
+ `[CriticGate] Skipping critic because deterministic quality already requires revision and remaining budget (${preCriticRevisionBudget.remainingBudgetMs}ms) must be reserved for the next worker turn.`,
8055
+ );
8023
8056
  }
8024
8057
  const rolloutScore = workerAttemptRolloutScore({
8025
8058
  executorElapsedMs,