@pushpalsdev/cli 1.1.23 → 1.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +10 -0
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +54 -3
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +21 -0
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +4 -3
package/package.json
CHANGED
|
@@ -8285,6 +8285,7 @@ function buildWorkerSpawnCommand(options) {
|
|
|
8285
8285
|
}
|
|
8286
8286
|
|
|
8287
8287
|
// apps/remotebuddy/src/remotebuddy_main.ts
|
|
8288
|
+
var AUTONOMY_TASK_DEDUPE_COOLDOWN_MS = 6 * 60 * 60 * 1000;
|
|
8288
8289
|
var CONFIG = loadPushPalsConfig();
|
|
8289
8290
|
function parseArgs() {
|
|
8290
8291
|
const args = process.argv.slice(2);
|
|
@@ -8464,6 +8465,11 @@ function buildTaskExecuteDedupeKey(sessionId, params) {
|
|
|
8464
8465
|
}
|
|
8465
8466
|
return `task.execute:${normalizedOrigin}:${normalizedSessionId}:${uniqueTargets.join("|")}`.toLowerCase();
|
|
8466
8467
|
}
|
|
8468
|
+
function resolveTaskExecuteDedupeCooldownMs(params, dedupeKey) {
|
|
8469
|
+
if (!dedupeKey)
|
|
8470
|
+
return 0;
|
|
8471
|
+
return params.origin === "autonomy" ? AUTONOMY_TASK_DEDUPE_COOLDOWN_MS : 0;
|
|
8472
|
+
}
|
|
8467
8473
|
function parseAutonomyRequestMetadata(value) {
|
|
8468
8474
|
let root = asObject2(value);
|
|
8469
8475
|
if (!root && typeof value === "string") {
|
|
@@ -9509,6 +9515,9 @@ Please reply with the missing details and I will enqueue a follow-up request.` :
|
|
|
9509
9515
|
const dedupeKey = buildTaskExecuteDedupeKey(sessionId, params);
|
|
9510
9516
|
if (dedupeKey)
|
|
9511
9517
|
payload.dedupeKey = dedupeKey;
|
|
9518
|
+
const dedupeCooldownMs = resolveTaskExecuteDedupeCooldownMs(params, dedupeKey);
|
|
9519
|
+
if (dedupeCooldownMs > 0)
|
|
9520
|
+
payload.dedupeCooldownMs = dedupeCooldownMs;
|
|
9512
9521
|
if (targetWorkerId)
|
|
9513
9522
|
payload.targetWorkerId = targetWorkerId;
|
|
9514
9523
|
const res = await this.fetchImpl(`${this.server}/jobs/enqueue`, {
|
|
@@ -10603,6 +10612,7 @@ if (import.meta.main) {
|
|
|
10603
10612
|
});
|
|
10604
10613
|
}
|
|
10605
10614
|
export {
|
|
10615
|
+
resolveTaskExecuteDedupeCooldownMs,
|
|
10606
10616
|
extractRequiredValidationStepsFromVisionMarkdown,
|
|
10607
10617
|
buildTaskExecuteDedupeKey,
|
|
10608
10618
|
RemoteBuddyOrchestrator
|
|
@@ -109,11 +109,13 @@ _MAX_CREDIBLE_WRAPPER_LOOP_TOP_LEVELS = 4
|
|
|
109
109
|
_MAX_NO_EDIT_RECOVERY_ATTEMPTS = 1
|
|
110
110
|
_MAX_ROLLOUT_RECOVERY_ATTEMPTS = 1
|
|
111
111
|
_DEFAULT_NO_EDIT_WATCHDOG_S = 480
|
|
112
|
-
_SMALL_TASK_NO_EDIT_WATCHDOG_S =
|
|
112
|
+
_SMALL_TASK_NO_EDIT_WATCHDOG_S = 240
|
|
113
|
+
_NARROW_TEST_TASK_NO_EDIT_WATCHDOG_S = 180
|
|
113
114
|
_WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
|
|
114
115
|
_DEFAULT_NO_EDIT_RECHECK_S = 120
|
|
115
116
|
_DEFAULT_ROLLOUT_WATCHDOG_S = 300
|
|
116
117
|
_SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
|
|
118
|
+
_NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S = 150
|
|
117
119
|
_WEB_REVIEW_ROLLOUT_WATCHDOG_S = 180
|
|
118
120
|
|
|
119
121
|
|
|
@@ -591,6 +593,21 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
|
|
|
591
593
|
"browser smoke",
|
|
592
594
|
"web delivery",
|
|
593
595
|
"navigation trustworthy",
|
|
596
|
+
"test-only",
|
|
597
|
+
"test only",
|
|
598
|
+
"contract test",
|
|
599
|
+
"contract coverage",
|
|
600
|
+
"ranking contract",
|
|
601
|
+
"focused scenario",
|
|
602
|
+
"targeted test",
|
|
603
|
+
"one-file",
|
|
604
|
+
"one file",
|
|
605
|
+
"single-file",
|
|
606
|
+
"single file",
|
|
607
|
+
"max_files_to_edit: 1",
|
|
608
|
+
"max_files_to_edit=1",
|
|
609
|
+
"maxfilestoedit: 1",
|
|
610
|
+
"maxfilestoedit=1",
|
|
594
611
|
)
|
|
595
612
|
heavy_markers = (
|
|
596
613
|
"merge-conflict",
|
|
@@ -607,6 +624,34 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
|
|
|
607
624
|
)
|
|
608
625
|
|
|
609
626
|
|
|
627
|
+
def _looks_like_narrow_test_task_prompt(prompt: str) -> bool:
|
|
628
|
+
text = str(prompt or "").lower()
|
|
629
|
+
if not text:
|
|
630
|
+
return False
|
|
631
|
+
narrow_markers = (
|
|
632
|
+
"contract test",
|
|
633
|
+
"contract coverage",
|
|
634
|
+
"ranking contract",
|
|
635
|
+
"test-only",
|
|
636
|
+
"test only",
|
|
637
|
+
"targeted test",
|
|
638
|
+
"focused scenario",
|
|
639
|
+
)
|
|
640
|
+
if not any(marker in text for marker in narrow_markers):
|
|
641
|
+
return False
|
|
642
|
+
broad_markers = (
|
|
643
|
+
"full render harness",
|
|
644
|
+
"full-surface",
|
|
645
|
+
"full surface",
|
|
646
|
+
"e2e",
|
|
647
|
+
"browser validation",
|
|
648
|
+
"browser smoke",
|
|
649
|
+
"migration",
|
|
650
|
+
"broad refactor",
|
|
651
|
+
)
|
|
652
|
+
return not any(marker in text for marker in broad_markers)
|
|
653
|
+
|
|
654
|
+
|
|
610
655
|
def _resolve_task_reasoning_effort(
|
|
611
656
|
configured_effort: str,
|
|
612
657
|
prompt: str,
|
|
@@ -652,7 +697,9 @@ def _resolve_no_edit_watchdog_seconds(
|
|
|
652
697
|
return None
|
|
653
698
|
|
|
654
699
|
prompt_text = str(prompt or "").lower()
|
|
655
|
-
if
|
|
700
|
+
if _looks_like_narrow_test_task_prompt(prompt):
|
|
701
|
+
default_s = _NARROW_TEST_TASK_NO_EDIT_WATCHDOG_S
|
|
702
|
+
elif "repo-native web review" in prompt_text or "web review path" in prompt_text:
|
|
656
703
|
default_s = _WEB_REVIEW_NO_EDIT_WATCHDOG_S
|
|
657
704
|
else:
|
|
658
705
|
default_s = (
|
|
@@ -703,7 +750,9 @@ def _resolve_rollout_watchdog_seconds(
|
|
|
703
750
|
else:
|
|
704
751
|
return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
|
|
705
752
|
|
|
706
|
-
if
|
|
753
|
+
if _looks_like_narrow_test_task_prompt(prompt):
|
|
754
|
+
default_s = _NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S
|
|
755
|
+
elif _looks_like_web_review_prompt(prompt):
|
|
707
756
|
default_s = _WEB_REVIEW_ROLLOUT_WATCHDOG_S
|
|
708
757
|
elif _looks_like_small_task_prompt(prompt):
|
|
709
758
|
default_s = _SMALL_TASK_ROLLOUT_WATCHDOG_S
|
|
@@ -766,6 +815,8 @@ def _describe_publishable_paths(paths: List[str]) -> str:
|
|
|
766
815
|
def _build_no_edit_recovery_guidance(trace_excerpt: str, artifact_only_paths: str = "") -> str:
|
|
767
816
|
lines = [
|
|
768
817
|
"No-edit watchdog recovery: the previous Codex attempt spent too much of the execution budget without producing publishable file changes.",
|
|
818
|
+
"This recovery attempt has a patch-first contract: make one publishable edit before any further broad discovery. If you need one narrow read of the hinted file to place the edit, do that once, then patch immediately.",
|
|
819
|
+
"Do not repeat the same read/search sequence from the previous attempt. Re-reading the target without editing is a failed recovery.",
|
|
769
820
|
"Start from the already inspected context. Do not re-read broad repo topology, route wrappers, or missing test infrastructure unless that is the blocker.",
|
|
770
821
|
"Runtime/dependency artifacts such as node_modules, outputs, .worktrees, .codex, dist, build, and coverage do not count as progress.",
|
|
771
822
|
"Within the first response/action, edit the smallest behavior-owning file that satisfies the task. If the hinted file is a thin wrapper, patch the owner you already identified.",
|
|
@@ -1386,6 +1386,16 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1386
1386
|
|
|
1387
1387
|
self.assertEqual(watchdog_s, 240)
|
|
1388
1388
|
|
|
1389
|
+
def test_narrow_contract_tests_use_fast_no_edit_watchdog(self) -> None:
|
|
1390
|
+
prompt = (
|
|
1391
|
+
"Update app/__tests__/opportunity-graph.contract.test.ts to tighten the "
|
|
1392
|
+
"ranking contract test. Keep this test-only and preserve existing behavior."
|
|
1393
|
+
)
|
|
1394
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
|
|
1395
|
+
watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
|
|
1396
|
+
|
|
1397
|
+
self.assertEqual(watchdog_s, 180)
|
|
1398
|
+
|
|
1389
1399
|
def test_no_edit_recovery_guidance_warns_against_artifact_only_progress(self) -> None:
|
|
1390
1400
|
guidance = _build_no_edit_recovery_guidance(
|
|
1391
1401
|
"item.completed | still inspecting",
|
|
@@ -1393,6 +1403,8 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1393
1403
|
)
|
|
1394
1404
|
|
|
1395
1405
|
self.assertIn("node_modules", guidance)
|
|
1406
|
+
self.assertIn("patch-first contract", guidance)
|
|
1407
|
+
self.assertIn("Re-reading the target without editing is a failed recovery", guidance)
|
|
1396
1408
|
self.assertIn("do not invent PushPals/autonomy-specific files", guidance)
|
|
1397
1409
|
self.assertIn("Previous Codex event trace excerpt", guidance)
|
|
1398
1410
|
|
|
@@ -1411,6 +1423,15 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1411
1423
|
self.assertEqual(no_edit_s, 240)
|
|
1412
1424
|
self.assertEqual(rollout_s, 180)
|
|
1413
1425
|
|
|
1426
|
+
def test_narrow_contract_rollout_watchdog_is_earlier_than_no_edit_watchdog(self) -> None:
|
|
1427
|
+
prompt = "Tighten the focused contract test for one ranking behavior."
|
|
1428
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": ""}, clear=False):
|
|
1429
|
+
no_edit_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
|
|
1430
|
+
rollout_s = _resolve_rollout_watchdog_seconds(prompt, 1200, no_edit_s)
|
|
1431
|
+
|
|
1432
|
+
self.assertEqual(no_edit_s, 180)
|
|
1433
|
+
self.assertEqual(rollout_s, 120)
|
|
1434
|
+
|
|
1414
1435
|
def test_offtrack_rollout_detects_missing_path_and_harness_drift(self) -> None:
|
|
1415
1436
|
trace = {
|
|
1416
1437
|
"summaries": [
|
|
@@ -1785,9 +1785,10 @@ async function workerLoop(
|
|
|
1785
1785
|
terminalStage: currentJobPhase ?? (result.ok ? "completed" : "worker"),
|
|
1786
1786
|
executorBackend: resolveExecutor(CONFIG),
|
|
1787
1787
|
summary: result.summary,
|
|
1788
|
-
watchdogFired:
|
|
1789
|
-
|
|
1790
|
-
|
|
1788
|
+
watchdogFired:
|
|
1789
|
+
/watchdog|rollout coach|timed out|timeout|signal 15|terminated|exit 143|exit 137/i.test(
|
|
1790
|
+
`${result.summary}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`,
|
|
1791
|
+
),
|
|
1791
1792
|
metadata: {
|
|
1792
1793
|
workerId: opts.workerId,
|
|
1793
1794
|
docker: Boolean(dockerExecutor),
|