@pushpalsdev/cli 1.1.41 → 1.1.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -7773,20 +7773,29 @@ ${JSON.stringify(input.messages ?? [])}`),
|
|
|
7773
7773
|
return;
|
|
7774
7774
|
}
|
|
7775
7775
|
this.setPhase("scoring");
|
|
7776
|
-
|
|
7777
|
-
|
|
7778
|
-
|
|
7779
|
-
|
|
7780
|
-
|
|
7781
|
-
|
|
7782
|
-
|
|
7783
|
-
|
|
7784
|
-
|
|
7785
|
-
|
|
7786
|
-
|
|
7787
|
-
|
|
7788
|
-
|
|
7789
|
-
|
|
7776
|
+
let scoringJson = { scores: [] };
|
|
7777
|
+
try {
|
|
7778
|
+
const scoringPhase = await this.llmPhase("scoring", runId, snapshot.snapshot_id, {
|
|
7779
|
+
system: SCORING_SYSTEM_PROMPT,
|
|
7780
|
+
json: true,
|
|
7781
|
+
maxTokens: 1400,
|
|
7782
|
+
temperature: 0.1,
|
|
7783
|
+
messages: [
|
|
7784
|
+
{
|
|
7785
|
+
role: "user",
|
|
7786
|
+
content: JSON.stringify({ candidates: normalizedCandidates, top_k: this.cfg.topK })
|
|
7787
|
+
}
|
|
7788
|
+
]
|
|
7789
|
+
});
|
|
7790
|
+
llmCalls.push(scoringPhase.llmCall);
|
|
7791
|
+
scoringJson = scoringPhase.json;
|
|
7792
|
+
} catch (error) {
|
|
7793
|
+
if (error instanceof Error && error.message === "autonomy scoring phase timeout") {
|
|
7794
|
+
console.warn(`[RemoteBuddyAutonomousEngine] tick ${runId}: scoring timed out; continuing with deterministic candidate scoring.`);
|
|
7795
|
+
} else {
|
|
7796
|
+
throw error;
|
|
7797
|
+
}
|
|
7798
|
+
}
|
|
7790
7799
|
if (this.isSnapshotExpired(snapshot) || Date.now() > cycleDeadline) {
|
|
7791
7800
|
this.setPhase("record_snapshot_expired");
|
|
7792
7801
|
await this.recordSnapshotExpired(runId, snapshot.snapshot_id, llmCalls, candidatesPayload);
|
|
@@ -2720,7 +2720,7 @@ def _run_codex_task(
|
|
|
2720
2720
|
)
|
|
2721
2721
|
startup_stall_watchdog_s = _resolve_startup_stall_watchdog_seconds(
|
|
2722
2722
|
communicate_timeout_s,
|
|
2723
|
-
recovery_attempt=
|
|
2723
|
+
recovery_attempt=recovery_depth,
|
|
2724
2724
|
)
|
|
2725
2725
|
startup_stall_deadline = (
|
|
2726
2726
|
started_at + float(startup_stall_watchdog_s)
|
|
@@ -2853,6 +2853,11 @@ def _run_codex_task(
|
|
|
2853
2853
|
first_no_edit_command_progress_at
|
|
2854
2854
|
+ float(no_edit_command_progress_cap_s),
|
|
2855
2855
|
)
|
|
2856
|
+
if deadline is not None and command_grace_deadline > 0:
|
|
2857
|
+
command_grace_deadline = min(
|
|
2858
|
+
command_grace_deadline,
|
|
2859
|
+
max(now, deadline - 1.0),
|
|
2860
|
+
)
|
|
2856
2861
|
if command_progress_cap_reached:
|
|
2857
2862
|
log.info(
|
|
2858
2863
|
"No-edit watchdog observed Codex tool progress for "
|
|
@@ -1794,6 +1794,86 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1794
1794
|
self.assertIn("no publishable changes", str(result.get("summary") or ""))
|
|
1795
1795
|
self.assertEqual(result.get("cooldownMs"), 600000)
|
|
1796
1796
|
|
|
1797
|
+
def test_run_codex_task_recovery_command_grace_stops_before_child_timeout(self) -> None:
|
|
1798
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-before-timeout-") as temp_dir:
|
|
1799
|
+
repo = Path(temp_dir) / "repo"
|
|
1800
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1801
|
+
(repo / "README.md").write_text("# no edit before timeout repo\n", encoding="utf-8")
|
|
1802
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1803
|
+
subprocess.run(
|
|
1804
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1805
|
+
cwd=repo,
|
|
1806
|
+
check=True,
|
|
1807
|
+
capture_output=True,
|
|
1808
|
+
text=True,
|
|
1809
|
+
)
|
|
1810
|
+
subprocess.run(
|
|
1811
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1812
|
+
cwd=repo,
|
|
1813
|
+
check=True,
|
|
1814
|
+
capture_output=True,
|
|
1815
|
+
text=True,
|
|
1816
|
+
)
|
|
1817
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1818
|
+
subprocess.run(
|
|
1819
|
+
["git", "commit", "-m", "chore: seed no-edit before timeout repo"],
|
|
1820
|
+
cwd=repo,
|
|
1821
|
+
check=True,
|
|
1822
|
+
capture_output=True,
|
|
1823
|
+
text=True,
|
|
1824
|
+
)
|
|
1825
|
+
|
|
1826
|
+
stub_path = Path(temp_dir) / "fake_codex_no_edit_before_timeout.py"
|
|
1827
|
+
stub_path.write_text(
|
|
1828
|
+
"\n".join(
|
|
1829
|
+
[
|
|
1830
|
+
"import json",
|
|
1831
|
+
"import sys",
|
|
1832
|
+
"import time",
|
|
1833
|
+
"",
|
|
1834
|
+
"prompt = sys.stdin.read()",
|
|
1835
|
+
"print(json.dumps({'type': 'thread.started'}), flush=True)",
|
|
1836
|
+
"print(json.dumps({'type': 'turn.started'}), flush=True)",
|
|
1837
|
+
"if 'No-edit watchdog recovery' in prompt:",
|
|
1838
|
+
" print(json.dumps({'type': 'item.started', 'item': {'id': 'cmd-read', 'type': 'command_execution', 'command': 'cat README.md', 'status': 'in_progress'}}), flush=True)",
|
|
1839
|
+
" time.sleep(0.1)",
|
|
1840
|
+
" print(json.dumps({'type': 'item.completed', 'item': {'id': 'cmd-read', 'type': 'command_execution', 'command': 'cat README.md', 'status': 'completed', 'exit_code': 0, 'aggregated_output': '# no edit before timeout repo'}}), flush=True)",
|
|
1841
|
+
" time.sleep(6)",
|
|
1842
|
+
" raise SystemExit(0)",
|
|
1843
|
+
"",
|
|
1844
|
+
"print(json.dumps({'type': 'item.completed', 'item': {'type': 'message', 'text': 'Still inspecting without a patch.'}}), flush=True)",
|
|
1845
|
+
"time.sleep(6)",
|
|
1846
|
+
]
|
|
1847
|
+
),
|
|
1848
|
+
encoding="utf-8",
|
|
1849
|
+
)
|
|
1850
|
+
|
|
1851
|
+
env_overrides = {
|
|
1852
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1853
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1854
|
+
"OPENAI_API_KEY": "pushpals-no-edit-before-timeout-test-key",
|
|
1855
|
+
"WORKERPALS_OPENAI_CODEX_JSON": "true",
|
|
1856
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "6",
|
|
1857
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
|
|
1858
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S": "5",
|
|
1859
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1860
|
+
}
|
|
1861
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1862
|
+
result = _run_codex_task(
|
|
1863
|
+
str(repo),
|
|
1864
|
+
"Make one focused test edit after the hinted file read.",
|
|
1865
|
+
[],
|
|
1866
|
+
)
|
|
1867
|
+
|
|
1868
|
+
self.assertFalse(result.get("ok"), result)
|
|
1869
|
+
self.assertEqual(result.get("exitCode"), 124)
|
|
1870
|
+
self.assertEqual(
|
|
1871
|
+
result.get("summary"),
|
|
1872
|
+
"openai_codex made no publishable changes before the no-edit watchdog",
|
|
1873
|
+
)
|
|
1874
|
+
self.assertNotIn("execution timed out", str(result.get("summary") or ""))
|
|
1875
|
+
self.assertEqual(result.get("cooldownMs"), 600000)
|
|
1876
|
+
|
|
1797
1877
|
def test_run_codex_task_no_edit_watchdog_rechecks_transient_publishable_progress(self) -> None:
|
|
1798
1878
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-recheck-") as temp_dir:
|
|
1799
1879
|
repo = Path(temp_dir) / "repo"
|