@pushpalsdev/cli 1.1.22 → 1.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pushpalsdev/cli",
3
- "version": "1.1.22",
3
+ "version": "1.1.24",
4
4
  "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -8285,6 +8285,7 @@ function buildWorkerSpawnCommand(options) {
8285
8285
  }
8286
8286
 
8287
8287
  // apps/remotebuddy/src/remotebuddy_main.ts
8288
+ var AUTONOMY_TASK_DEDUPE_COOLDOWN_MS = 6 * 60 * 60 * 1000;
8288
8289
  var CONFIG = loadPushPalsConfig();
8289
8290
  function parseArgs() {
8290
8291
  const args = process.argv.slice(2);
@@ -8464,6 +8465,11 @@ function buildTaskExecuteDedupeKey(sessionId, params) {
8464
8465
  }
8465
8466
  return `task.execute:${normalizedOrigin}:${normalizedSessionId}:${uniqueTargets.join("|")}`.toLowerCase();
8466
8467
  }
8468
+ function resolveTaskExecuteDedupeCooldownMs(params, dedupeKey) {
8469
+ if (!dedupeKey)
8470
+ return 0;
8471
+ return params.origin === "autonomy" ? AUTONOMY_TASK_DEDUPE_COOLDOWN_MS : 0;
8472
+ }
8467
8473
  function parseAutonomyRequestMetadata(value) {
8468
8474
  let root = asObject2(value);
8469
8475
  if (!root && typeof value === "string") {
@@ -9509,6 +9515,9 @@ Please reply with the missing details and I will enqueue a follow-up request.` :
9509
9515
  const dedupeKey = buildTaskExecuteDedupeKey(sessionId, params);
9510
9516
  if (dedupeKey)
9511
9517
  payload.dedupeKey = dedupeKey;
9518
+ const dedupeCooldownMs = resolveTaskExecuteDedupeCooldownMs(params, dedupeKey);
9519
+ if (dedupeCooldownMs > 0)
9520
+ payload.dedupeCooldownMs = dedupeCooldownMs;
9512
9521
  if (targetWorkerId)
9513
9522
  payload.targetWorkerId = targetWorkerId;
9514
9523
  const res = await this.fetchImpl(`${this.server}/jobs/enqueue`, {
@@ -10603,6 +10612,7 @@ if (import.meta.main) {
10603
10612
  });
10604
10613
  }
10605
10614
  export {
10615
+ resolveTaskExecuteDedupeCooldownMs,
10606
10616
  extractRequiredValidationStepsFromVisionMarkdown,
10607
10617
  buildTaskExecuteDedupeKey,
10608
10618
  RemoteBuddyOrchestrator
@@ -8,6 +8,7 @@ that the TypeScript host parses.
8
8
  from __future__ import annotations
9
9
 
10
10
  import json
11
+ import hashlib
11
12
  import os
12
13
  import re
13
14
  from shutil import rmtree, which
@@ -108,11 +109,13 @@ _MAX_CREDIBLE_WRAPPER_LOOP_TOP_LEVELS = 4
108
109
  _MAX_NO_EDIT_RECOVERY_ATTEMPTS = 1
109
110
  _MAX_ROLLOUT_RECOVERY_ATTEMPTS = 1
110
111
  _DEFAULT_NO_EDIT_WATCHDOG_S = 480
111
- _SMALL_TASK_NO_EDIT_WATCHDOG_S = 360
112
+ _SMALL_TASK_NO_EDIT_WATCHDOG_S = 240
113
+ _NARROW_TEST_TASK_NO_EDIT_WATCHDOG_S = 180
112
114
  _WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
113
115
  _DEFAULT_NO_EDIT_RECHECK_S = 120
114
116
  _DEFAULT_ROLLOUT_WATCHDOG_S = 300
115
117
  _SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
118
+ _NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S = 150
116
119
  _WEB_REVIEW_ROLLOUT_WATCHDOG_S = 180
117
120
 
118
121
 
@@ -590,6 +593,21 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
590
593
  "browser smoke",
591
594
  "web delivery",
592
595
  "navigation trustworthy",
596
+ "test-only",
597
+ "test only",
598
+ "contract test",
599
+ "contract coverage",
600
+ "ranking contract",
601
+ "focused scenario",
602
+ "targeted test",
603
+ "one-file",
604
+ "one file",
605
+ "single-file",
606
+ "single file",
607
+ "max_files_to_edit: 1",
608
+ "max_files_to_edit=1",
609
+ "maxfilestoedit: 1",
610
+ "maxfilestoedit=1",
593
611
  )
594
612
  heavy_markers = (
595
613
  "merge-conflict",
@@ -606,6 +624,34 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
606
624
  )
607
625
 
608
626
 
627
+ def _looks_like_narrow_test_task_prompt(prompt: str) -> bool:
628
+ text = str(prompt or "").lower()
629
+ if not text:
630
+ return False
631
+ narrow_markers = (
632
+ "contract test",
633
+ "contract coverage",
634
+ "ranking contract",
635
+ "test-only",
636
+ "test only",
637
+ "targeted test",
638
+ "focused scenario",
639
+ )
640
+ if not any(marker in text for marker in narrow_markers):
641
+ return False
642
+ broad_markers = (
643
+ "full render harness",
644
+ "full-surface",
645
+ "full surface",
646
+ "e2e",
647
+ "browser validation",
648
+ "browser smoke",
649
+ "migration",
650
+ "broad refactor",
651
+ )
652
+ return not any(marker in text for marker in broad_markers)
653
+
654
+
609
655
  def _resolve_task_reasoning_effort(
610
656
  configured_effort: str,
611
657
  prompt: str,
@@ -651,7 +697,9 @@ def _resolve_no_edit_watchdog_seconds(
651
697
  return None
652
698
 
653
699
  prompt_text = str(prompt or "").lower()
654
- if "repo-native web review" in prompt_text or "web review path" in prompt_text:
700
+ if _looks_like_narrow_test_task_prompt(prompt):
701
+ default_s = _NARROW_TEST_TASK_NO_EDIT_WATCHDOG_S
702
+ elif "repo-native web review" in prompt_text or "web review path" in prompt_text:
655
703
  default_s = _WEB_REVIEW_NO_EDIT_WATCHDOG_S
656
704
  else:
657
705
  default_s = (
@@ -702,7 +750,9 @@ def _resolve_rollout_watchdog_seconds(
702
750
  else:
703
751
  return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
704
752
 
705
- if _looks_like_web_review_prompt(prompt):
753
+ if _looks_like_narrow_test_task_prompt(prompt):
754
+ default_s = _NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S
755
+ elif _looks_like_web_review_prompt(prompt):
706
756
  default_s = _WEB_REVIEW_ROLLOUT_WATCHDOG_S
707
757
  elif _looks_like_small_task_prompt(prompt):
708
758
  default_s = _SMALL_TASK_ROLLOUT_WATCHDOG_S
@@ -713,9 +763,39 @@ def _resolve_rollout_watchdog_seconds(
713
763
  return max(90, min(default_s, max(90, communicate_timeout_s - 60)))
714
764
 
715
765
 
716
- def _describe_non_publishable_paths(changed_paths: List[str], baseline_snapshot: List[str]) -> str:
717
- delta = [p for p in changed_paths if p not in baseline_snapshot]
718
- inspected = delta if delta else changed_paths
766
+ def _baseline_snapshot_paths(baseline_snapshot: Any) -> List[str]:
767
+ if isinstance(baseline_snapshot, dict):
768
+ return [str(path) for path in baseline_snapshot.keys()]
769
+ if isinstance(baseline_snapshot, list):
770
+ return [str(path) for path in baseline_snapshot]
771
+ return []
772
+
773
+
774
+ def _paths_changed_after_baseline(
775
+ repo: str,
776
+ changed_paths: List[str],
777
+ baseline_snapshot: Any,
778
+ ) -> List[str]:
779
+ baseline_paths = set(_baseline_snapshot_paths(baseline_snapshot))
780
+ if not baseline_paths:
781
+ return list(changed_paths)
782
+
783
+ delta: List[str] = []
784
+ baseline_fingerprints = baseline_snapshot if isinstance(baseline_snapshot, dict) else {}
785
+ for path in changed_paths:
786
+ if path not in baseline_paths:
787
+ delta.append(path)
788
+ continue
789
+ if baseline_fingerprints:
790
+ current_fingerprint = _changed_path_fingerprint(repo, path)
791
+ if current_fingerprint != str(baseline_fingerprints.get(path) or ""):
792
+ delta.append(path)
793
+ return delta
794
+
795
+
796
+ def _describe_non_publishable_paths(changed_paths: List[str], baseline_snapshot: Any) -> str:
797
+ baseline_paths = set(_baseline_snapshot_paths(baseline_snapshot))
798
+ inspected = [p for p in changed_paths if p not in baseline_paths] if baseline_paths else changed_paths
719
799
  non_publishable = [p for p in inspected if not _is_publishable_changed_path(p)]
720
800
  if not non_publishable:
721
801
  return ""
@@ -735,6 +815,8 @@ def _describe_publishable_paths(paths: List[str]) -> str:
735
815
  def _build_no_edit_recovery_guidance(trace_excerpt: str, artifact_only_paths: str = "") -> str:
736
816
  lines = [
737
817
  "No-edit watchdog recovery: the previous Codex attempt spent too much of the execution budget without producing publishable file changes.",
818
+ "This recovery attempt has a patch-first contract: make one publishable edit before any further broad discovery. If you need one narrow read of the hinted file to place the edit, do that once, then patch immediately.",
819
+ "Do not repeat the same read/search sequence from the previous attempt. Re-reading the target without editing is a failed recovery.",
738
820
  "Start from the already inspected context. Do not re-read broad repo topology, route wrappers, or missing test infrastructure unless that is the blocker.",
739
821
  "Runtime/dependency artifacts such as node_modules, outputs, .worktrees, .codex, dist, build, and coverage do not count as progress.",
740
822
  "Within the first response/action, edit the smallest behavior-owning file that satisfies the task. If the hinted file is a thin wrapper, patch the owner you already identified.",
@@ -1686,10 +1768,99 @@ def _is_publishable_changed_path(path: str) -> bool:
1686
1768
  return not re.search(r"(^|/)(outputs|node_modules|\.worktrees|\.codex|dist|build|coverage)(/|$)", normalized)
1687
1769
 
1688
1770
 
1689
- def _codex_changed_paths(repo: str, baseline_snapshot: List[str]) -> Tuple[List[str], List[str], List[str]]:
1771
+ def _filesystem_fingerprint(repo: str, raw_path: str) -> str:
1772
+ root = Path(repo)
1773
+ target = (root / raw_path).resolve()
1774
+ try:
1775
+ root_resolved = root.resolve()
1776
+ common = os.path.commonpath([str(root_resolved), str(target)])
1777
+ if common != str(root_resolved):
1778
+ return "outside-repo"
1779
+ except Exception:
1780
+ return "unresolved"
1781
+ digest = hashlib.sha256()
1782
+ if not target.exists():
1783
+ return "missing"
1784
+ if target.is_file():
1785
+ digest.update(b"file\0")
1786
+ try:
1787
+ digest.update(str(target.stat().st_size).encode("utf-8"))
1788
+ with target.open("rb") as handle:
1789
+ while True:
1790
+ chunk = handle.read(1024 * 1024)
1791
+ if not chunk:
1792
+ break
1793
+ digest.update(chunk)
1794
+ except Exception as exc:
1795
+ digest.update(f"read-error:{type(exc).__name__}:{exc}".encode("utf-8", errors="replace"))
1796
+ return digest.hexdigest()
1797
+ if target.is_dir():
1798
+ digest.update(b"dir\0")
1799
+ files_seen = 0
1800
+ try:
1801
+ for dirpath, dirnames, filenames in os.walk(target):
1802
+ dirnames.sort()
1803
+ filenames.sort()
1804
+ for filename in filenames:
1805
+ if files_seen >= 128:
1806
+ digest.update(b"\0truncated")
1807
+ return digest.hexdigest()
1808
+ child = Path(dirpath) / filename
1809
+ try:
1810
+ rel = child.relative_to(root_resolved).as_posix()
1811
+ except Exception:
1812
+ rel = child.name
1813
+ digest.update(rel.encode("utf-8", errors="replace"))
1814
+ digest.update(b"\0")
1815
+ digest.update(str(child.stat().st_size).encode("utf-8"))
1816
+ digest.update(b"\0")
1817
+ try:
1818
+ with child.open("rb") as handle:
1819
+ digest.update(handle.read(64 * 1024))
1820
+ except Exception as exc:
1821
+ digest.update(f"read-error:{type(exc).__name__}:{exc}".encode("utf-8", errors="replace"))
1822
+ files_seen += 1
1823
+ except Exception as exc:
1824
+ digest.update(f"walk-error:{type(exc).__name__}:{exc}".encode("utf-8", errors="replace"))
1825
+ return digest.hexdigest()
1826
+ return "special"
1827
+
1828
+
1829
+ def _changed_path_fingerprint(repo: str, path: str) -> str:
1830
+ normalized = str(path or "").strip()
1831
+ if not normalized:
1832
+ return ""
1833
+ digest = hashlib.sha256()
1834
+ digest.update(normalized.replace("\\", "/").encode("utf-8", errors="replace"))
1835
+ digest.update(b"\0fs\0")
1836
+ digest.update(_filesystem_fingerprint(repo, normalized).encode("utf-8", errors="replace"))
1837
+ return digest.hexdigest()
1838
+
1839
+
1840
+ def _capture_git_change_snapshot(repo: str) -> Dict[str, str]:
1841
+ return {path: _changed_path_fingerprint(repo, path) for path in summarize_git_changes(repo)}
1842
+
1843
+
1844
+ def _normalize_baseline_snapshot(repo: str, baseline_changes: Any) -> Dict[str, str]:
1845
+ if isinstance(baseline_changes, dict):
1846
+ return {
1847
+ str(path): str(fingerprint)
1848
+ for path, fingerprint in baseline_changes.items()
1849
+ if str(path or "").strip()
1850
+ }
1851
+ if isinstance(baseline_changes, list):
1852
+ return {
1853
+ str(path): _changed_path_fingerprint(repo, str(path))
1854
+ for path in baseline_changes
1855
+ if str(path or "").strip()
1856
+ }
1857
+ return _capture_git_change_snapshot(repo)
1858
+
1859
+
1860
+ def _codex_changed_paths(repo: str, baseline_snapshot: Any) -> Tuple[List[str], List[str], List[str]]:
1690
1861
  changed_paths = summarize_git_changes(repo)
1691
- delta = [p for p in changed_paths if p not in baseline_snapshot]
1692
- effective = [p for p in (delta if delta else changed_paths) if _is_publishable_changed_path(p)]
1862
+ delta = _paths_changed_after_baseline(repo, changed_paths, baseline_snapshot)
1863
+ effective = [p for p in delta if _is_publishable_changed_path(p)]
1693
1864
  return changed_paths, delta, effective
1694
1865
 
1695
1866
 
@@ -1851,7 +2022,7 @@ def _run_codex_task(
1851
2022
  prompt,
1852
2023
  model,
1853
2024
  )
1854
- baseline_snapshot = list(baseline_changes) if baseline_changes is not None else summarize_git_changes(repo)
2025
+ baseline_snapshot = _normalize_baseline_snapshot(repo, baseline_changes)
1855
2026
 
1856
2027
  with tempfile.TemporaryDirectory(prefix="pushpals-codex-") as tmp_dir:
1857
2028
  last_message_path = Path(tmp_dir) / "codex-last-message.txt"
@@ -36,6 +36,7 @@ from openai_codex_executor import (
36
36
  _build_rollout_recovery_guidance,
37
37
  _collect_disallowed_shell_wrapper_rejections,
38
38
  _codex_changed_paths,
39
+ _capture_git_change_snapshot,
39
40
  _describe_non_publishable_paths,
40
41
  _detect_offtrack_rollout,
41
42
  _detect_codex_workaround_signal,
@@ -944,6 +945,75 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
944
945
  self.assertIn("too broad/noisy", str(result.get("stderr") or ""))
945
946
  self.assertIn("area0", str(result.get("stderr") or ""))
946
947
 
948
+ def test_run_codex_task_timeout_ignores_broad_dirty_baseline(self) -> None:
949
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-timeout-dirty-baseline-") as temp_dir:
950
+ repo = Path(temp_dir) / "repo"
951
+ repo.mkdir(parents=True, exist_ok=True)
952
+ (repo / "README.md").write_text("# timeout dirty baseline repo\n", encoding="utf-8")
953
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
954
+ subprocess.run(
955
+ ["git", "config", "user.name", "PushPals Test"],
956
+ cwd=repo,
957
+ check=True,
958
+ capture_output=True,
959
+ text=True,
960
+ )
961
+ subprocess.run(
962
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
963
+ cwd=repo,
964
+ check=True,
965
+ capture_output=True,
966
+ text=True,
967
+ )
968
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
969
+ subprocess.run(
970
+ ["git", "commit", "-m", "chore: seed timeout dirty baseline repo"],
971
+ cwd=repo,
972
+ check=True,
973
+ capture_output=True,
974
+ text=True,
975
+ )
976
+ for index in range(5):
977
+ root = repo / f"area{index}"
978
+ root.mkdir(exist_ok=True)
979
+ (root / "changed.txt").write_text("pre-existing dirty change\n", encoding="utf-8")
980
+
981
+ stub_path = Path(temp_dir) / "fake_codex_timeout_dirty_baseline.py"
982
+ stub_path.write_text(
983
+ "\n".join(
984
+ [
985
+ "import sys",
986
+ "import time",
987
+ "",
988
+ "sys.stdin.read()",
989
+ "print('item.completed | Still thinking without changing baseline files.', flush=True)",
990
+ "time.sleep(5)",
991
+ ]
992
+ ),
993
+ encoding="utf-8",
994
+ )
995
+
996
+ env_overrides = {
997
+ "PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
998
+ "PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
999
+ "OPENAI_API_KEY": "pushpals-timeout-dirty-baseline-test-key",
1000
+ "WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "1",
1001
+ "WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "0",
1002
+ "WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
1003
+ }
1004
+ with mock.patch.dict(os.environ, env_overrides, clear=False):
1005
+ result = _run_codex_task(
1006
+ str(repo),
1007
+ "Make a compact scoped patch, then continue thinking too long.",
1008
+ [],
1009
+ )
1010
+
1011
+ self.assertFalse(result.get("ok"), result)
1012
+ self.assertEqual(result.get("exitCode"), 124)
1013
+ self.assertIn("execution timed out", str(result.get("summary") or ""))
1014
+ self.assertNotIn("broad/noisy", str(result.get("summary") or ""))
1015
+ self.assertNotIn("too broad/noisy", str(result.get("stderr") or ""))
1016
+
947
1017
  def test_run_codex_task_retries_once_when_no_edit_watchdog_fires(self) -> None:
948
1018
  with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-") as temp_dir:
949
1019
  repo = Path(temp_dir) / "repo"
@@ -1215,6 +1285,86 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
1215
1285
  self.assertGreaterEqual(len(delta), 2)
1216
1286
  self.assertEqual(effective, [])
1217
1287
 
1288
+ def test_codex_changed_paths_ignores_publishable_paths_dirty_at_baseline(self) -> None:
1289
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-dirty-baseline-") as temp_dir:
1290
+ repo = Path(temp_dir) / "repo"
1291
+ repo.mkdir(parents=True, exist_ok=True)
1292
+ (repo / "README.md").write_text("# dirty baseline repo\n", encoding="utf-8")
1293
+ (repo / "src").mkdir()
1294
+ (repo / "src" / "existing.ts").write_text("export const value = 1;\n", encoding="utf-8")
1295
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
1296
+ subprocess.run(
1297
+ ["git", "config", "user.name", "PushPals Test"],
1298
+ cwd=repo,
1299
+ check=True,
1300
+ capture_output=True,
1301
+ text=True,
1302
+ )
1303
+ subprocess.run(
1304
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
1305
+ cwd=repo,
1306
+ check=True,
1307
+ capture_output=True,
1308
+ text=True,
1309
+ )
1310
+ subprocess.run(["git", "add", "."], cwd=repo, check=True, capture_output=True, text=True)
1311
+ subprocess.run(
1312
+ ["git", "commit", "-m", "chore: seed dirty baseline repo"],
1313
+ cwd=repo,
1314
+ check=True,
1315
+ capture_output=True,
1316
+ text=True,
1317
+ )
1318
+ (repo / "README.md").write_text("# dirty baseline repo\n\npre-existing edit\n", encoding="utf-8")
1319
+ (repo / "src" / "existing.ts").write_text("export const value = 2;\n", encoding="utf-8")
1320
+ baseline = _capture_git_change_snapshot(str(repo))
1321
+
1322
+ changed_paths, delta, effective = _codex_changed_paths(str(repo), baseline)
1323
+
1324
+ self.assertIn("README.md", changed_paths)
1325
+ self.assertEqual(delta, [])
1326
+ self.assertEqual(effective, [])
1327
+
1328
+ def test_codex_changed_paths_counts_worker_edits_to_dirty_baseline_paths(self) -> None:
1329
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-dirty-baseline-mutated-") as temp_dir:
1330
+ repo = Path(temp_dir) / "repo"
1331
+ repo.mkdir(parents=True, exist_ok=True)
1332
+ (repo / "README.md").write_text("# dirty baseline mutation repo\n", encoding="utf-8")
1333
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
1334
+ subprocess.run(
1335
+ ["git", "config", "user.name", "PushPals Test"],
1336
+ cwd=repo,
1337
+ check=True,
1338
+ capture_output=True,
1339
+ text=True,
1340
+ )
1341
+ subprocess.run(
1342
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
1343
+ cwd=repo,
1344
+ check=True,
1345
+ capture_output=True,
1346
+ text=True,
1347
+ )
1348
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
1349
+ subprocess.run(
1350
+ ["git", "commit", "-m", "chore: seed dirty baseline mutation repo"],
1351
+ cwd=repo,
1352
+ check=True,
1353
+ capture_output=True,
1354
+ text=True,
1355
+ )
1356
+ (repo / "README.md").write_text("# dirty baseline mutation repo\n\npre-existing edit\n", encoding="utf-8")
1357
+ baseline = _capture_git_change_snapshot(str(repo))
1358
+ (repo / "README.md").write_text(
1359
+ "# dirty baseline mutation repo\n\npre-existing edit\nworker edit\n",
1360
+ encoding="utf-8",
1361
+ )
1362
+
1363
+ _, delta, effective = _codex_changed_paths(str(repo), baseline)
1364
+
1365
+ self.assertEqual(delta, ["README.md"])
1366
+ self.assertEqual(effective, ["README.md"])
1367
+
1218
1368
  def test_non_publishable_path_summary_names_artifact_only_dirty_paths(self) -> None:
1219
1369
  changed_paths = [
1220
1370
  "node_modules/react/index.js",
@@ -1236,6 +1386,16 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
1236
1386
 
1237
1387
  self.assertEqual(watchdog_s, 240)
1238
1388
 
1389
+ def test_narrow_contract_tests_use_fast_no_edit_watchdog(self) -> None:
1390
+ prompt = (
1391
+ "Update app/__tests__/opportunity-graph.contract.test.ts to tighten the "
1392
+ "ranking contract test. Keep this test-only and preserve existing behavior."
1393
+ )
1394
+ with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
1395
+ watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
1396
+
1397
+ self.assertEqual(watchdog_s, 180)
1398
+
1239
1399
  def test_no_edit_recovery_guidance_warns_against_artifact_only_progress(self) -> None:
1240
1400
  guidance = _build_no_edit_recovery_guidance(
1241
1401
  "item.completed | still inspecting",
@@ -1243,6 +1403,8 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
1243
1403
  )
1244
1404
 
1245
1405
  self.assertIn("node_modules", guidance)
1406
+ self.assertIn("patch-first contract", guidance)
1407
+ self.assertIn("Re-reading the target without editing is a failed recovery", guidance)
1246
1408
  self.assertIn("do not invent PushPals/autonomy-specific files", guidance)
1247
1409
  self.assertIn("Previous Codex event trace excerpt", guidance)
1248
1410
 
@@ -1261,6 +1423,15 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
1261
1423
  self.assertEqual(no_edit_s, 240)
1262
1424
  self.assertEqual(rollout_s, 180)
1263
1425
 
1426
+ def test_narrow_contract_rollout_watchdog_is_earlier_than_no_edit_watchdog(self) -> None:
1427
+ prompt = "Tighten the focused contract test for one ranking behavior."
1428
+ with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": ""}, clear=False):
1429
+ no_edit_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
1430
+ rollout_s = _resolve_rollout_watchdog_seconds(prompt, 1200, no_edit_s)
1431
+
1432
+ self.assertEqual(no_edit_s, 180)
1433
+ self.assertEqual(rollout_s, 120)
1434
+
1264
1435
  def test_offtrack_rollout_detects_missing_path_and_harness_drift(self) -> None:
1265
1436
  trace = {
1266
1437
  "summaries": [
@@ -1785,9 +1785,10 @@ async function workerLoop(
1785
1785
  terminalStage: currentJobPhase ?? (result.ok ? "completed" : "worker"),
1786
1786
  executorBackend: resolveExecutor(CONFIG),
1787
1787
  summary: result.summary,
1788
- watchdogFired: /timed out|timeout|signal 15|terminated|exit 143|exit 137/i.test(
1789
- `${result.summary}\n${result.stderr ?? ""}`,
1790
- ),
1788
+ watchdogFired:
1789
+ /watchdog|rollout coach|timed out|timeout|signal 15|terminated|exit 143|exit 137/i.test(
1790
+ `${result.summary}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`,
1791
+ ),
1791
1792
  metadata: {
1792
1793
  workerId: opts.workerId,
1793
1794
  docker: Boolean(dockerExecutor),