@pushpalsdev/cli 1.1.9 → 1.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pushpals-cli.js +107 -9
- package/package.json +1 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +6 -1
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +126 -0
- package/runtime/sandbox/apps/workerpals/src/backends/shared/executor_base.py +177 -0
- package/runtime/sandbox/apps/workerpals/src/docker_executor.ts +8 -7
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +513 -7
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +168 -41
package/dist/pushpals-cli.js
CHANGED
|
@@ -1647,6 +1647,7 @@ var DEFAULT_STARTUP_GIT_REMOTE_TIMEOUT_MS = 1e4;
|
|
|
1647
1647
|
var DEFAULT_EMBEDDED_SERVICE_LAUNCH_WARN_MS = 5000;
|
|
1648
1648
|
var EMBEDDED_SERVICE_RESTART_MAX_ATTEMPTS = 4;
|
|
1649
1649
|
var WORKERPAL_STARTUP_READINESS_PROBE_MAX_MS = 15000;
|
|
1650
|
+
var CLI_SESSION_JOB_LOG_MAX_CHARS = 700;
|
|
1650
1651
|
var EMBEDDED_RUNTIME_SAFETY_CAP_DISABLE_ENV = "PUSHPALS_DISABLE_EMBEDDED_SAFETY_CAPS";
|
|
1651
1652
|
var EMBEDDED_RUNTIME_WINDOWS_SAFETY_CAPS = {
|
|
1652
1653
|
REMOTEBUDDY_WORKERPAL_STARTUP_TIMEOUT_MS: "120000",
|
|
@@ -3035,6 +3036,17 @@ async function downloadBinaryAssetWithWindowsCurlFallback(url, outPath, cause) {
|
|
|
3035
3036
|
renameSync(tmpPath, outPath);
|
|
3036
3037
|
return true;
|
|
3037
3038
|
}
|
|
3039
|
+
async function runWithConcurrency(items, concurrency, worker) {
|
|
3040
|
+
const workerCount = Math.max(1, Math.min(items.length, Math.floor(concurrency)));
|
|
3041
|
+
let nextIndex = 0;
|
|
3042
|
+
await Promise.all(Array.from({ length: workerCount }, async () => {
|
|
3043
|
+
while (nextIndex < items.length) {
|
|
3044
|
+
const currentIndex = nextIndex;
|
|
3045
|
+
nextIndex += 1;
|
|
3046
|
+
await worker(items[currentIndex], currentIndex);
|
|
3047
|
+
}
|
|
3048
|
+
}));
|
|
3049
|
+
}
|
|
3038
3050
|
async function ensureRuntimeBinaries(runtimeRoot, runtimeTag) {
|
|
3039
3051
|
const platformKey = resolveRuntimePlatformKey();
|
|
3040
3052
|
console.log(`[pushpals] Preparing embedded runtime binaries for ${runtimeTag} (${platformKey})...`);
|
|
@@ -3056,14 +3068,15 @@ async function ensureRuntimeBinaries(runtimeRoot, runtimeTag) {
|
|
|
3056
3068
|
runtimeBinaries.sourceControlManager
|
|
3057
3069
|
];
|
|
3058
3070
|
const shouldRefreshAll = installedTag !== runtimeTag;
|
|
3059
|
-
|
|
3060
|
-
|
|
3061
|
-
|
|
3062
|
-
|
|
3071
|
+
const assetsToDownload = requiredAssets.filter((binaryPath) => shouldRefreshAll || !existsSync5(binaryPath));
|
|
3072
|
+
if (assetsToDownload.length > 1) {
|
|
3073
|
+
console.log(`[pushpals] Downloading ${assetsToDownload.length} runtime binary asset(s) with bounded parallelism...`);
|
|
3074
|
+
}
|
|
3075
|
+
await runWithConcurrency(assetsToDownload, 3, async (binaryPath) => {
|
|
3063
3076
|
const assetName = binaryPath.split(/[\\/]/).pop() || "";
|
|
3064
3077
|
await downloadBinaryAsset(runtimeTag, assetName, binaryPath);
|
|
3065
|
-
|
|
3066
|
-
|
|
3078
|
+
});
|
|
3079
|
+
const downloadedCount = assetsToDownload.length;
|
|
3067
3080
|
writeFileSync(tagMarkerPath, `${runtimeTag}
|
|
3068
3081
|
`, "utf8");
|
|
3069
3082
|
cleanupLegacyRuntimeBinaryLayouts(runtimeRoot, platformKey, binDir);
|
|
@@ -4694,6 +4707,8 @@ ${tail}` : ""}`);
|
|
|
4694
4707
|
const deadline = Date.now() + DEFAULT_RUNTIME_BOOT_TIMEOUT_MS;
|
|
4695
4708
|
const readinessPhaseStartedAt = Date.now();
|
|
4696
4709
|
const optionalServiceExitWarned = new Set;
|
|
4710
|
+
let lastReadinessWaitLogAt = 0;
|
|
4711
|
+
let lastReadinessWaitDetail = "";
|
|
4697
4712
|
while (Date.now() < deadline) {
|
|
4698
4713
|
reportRemoteBuddyAutonomousEngineState();
|
|
4699
4714
|
if (maybeActivateRemoteBuddyWindowsFallback("silent_startup")) {
|
|
@@ -4737,6 +4752,17 @@ ${tail}` : ""}`);
|
|
|
4737
4752
|
}
|
|
4738
4753
|
const health = localBuddyEnabled ? await probeLocalBuddy(opts.localAgentUrl) : null;
|
|
4739
4754
|
const remoteBuddyHealth2 = await probeRemoteBuddySessionConsumer(opts.serverUrl, opts.sessionId);
|
|
4755
|
+
if (localBuddyEnabled && !health?.ok || !remoteBuddyHealth2.ok) {
|
|
4756
|
+
const localBuddyDetail = localBuddyEnabled ? health?.ok ? "LocalBuddy ready" : "LocalBuddy not ready" : "LocalBuddy skipped";
|
|
4757
|
+
const readinessDetail = `${localBuddyDetail}; ${remoteBuddyHealth2.detail}`;
|
|
4758
|
+
const now = Date.now();
|
|
4759
|
+
if (readinessDetail !== lastReadinessWaitDetail || now - lastReadinessWaitLogAt >= 5000) {
|
|
4760
|
+
console.log(`[pushpals] Waiting for embedded runtime readiness: ${readinessDetail}`);
|
|
4761
|
+
appendRuntimeServicesLogLine(runtimeServicesLogPath, `[pushpals] waiting for embedded runtime readiness: ${readinessDetail}`);
|
|
4762
|
+
lastReadinessWaitDetail = readinessDetail;
|
|
4763
|
+
lastReadinessWaitLogAt = now;
|
|
4764
|
+
}
|
|
4765
|
+
}
|
|
4740
4766
|
if ((!localBuddyEnabled || health?.ok) && remoteBuddyHealth2.ok) {
|
|
4741
4767
|
reportRemoteBuddyAutonomousEngineState();
|
|
4742
4768
|
const stabilityDeadline = Date.now() + DEFAULT_SERVICE_STABILITY_GRACE_MS;
|
|
@@ -5192,6 +5218,31 @@ function formatSessionEventLine(event) {
|
|
|
5192
5218
|
const type = String(event.type ?? "").toLowerCase();
|
|
5193
5219
|
const from = String(event.from ?? "");
|
|
5194
5220
|
const payload = event.payload ?? {};
|
|
5221
|
+
if (type === "job_enqueued") {
|
|
5222
|
+
const jobId = String(payload.jobId ?? "").slice(0, 8);
|
|
5223
|
+
const kind = String(payload.kind ?? "").trim();
|
|
5224
|
+
const taskId = String(payload.taskId ?? "").slice(0, 8);
|
|
5225
|
+
const detail = kind || (taskId ? `task ${taskId}` : "queued");
|
|
5226
|
+
return `[job ${jobId}] queued: ${detail}`;
|
|
5227
|
+
}
|
|
5228
|
+
if (type === "job_claimed") {
|
|
5229
|
+
const jobId = String(payload.jobId ?? "").slice(0, 8);
|
|
5230
|
+
const workerId = String(payload.workerId ?? "").trim();
|
|
5231
|
+
return `[job ${jobId}] claimed${workerId ? ` by ${workerId}` : ""}`;
|
|
5232
|
+
}
|
|
5233
|
+
if (type === "job_log") {
|
|
5234
|
+
const jobId = String(payload.jobId ?? "").slice(0, 8);
|
|
5235
|
+
const stream = String(payload.stream ?? "").toLowerCase() === "stderr" ? " stderr" : "";
|
|
5236
|
+
const phase = compactCliSessionJobLogLine(String(payload.phase ?? "").trim());
|
|
5237
|
+
const phaseLabel = phase ? ` phase:${phase}` : "";
|
|
5238
|
+
const line = formatCliSessionJobLogLine(String(payload.line ?? "").trim());
|
|
5239
|
+
return line ? `[job ${jobId}${stream}${phaseLabel}] ${line}` : null;
|
|
5240
|
+
}
|
|
5241
|
+
if (type === "job_failed") {
|
|
5242
|
+
const jobId = String(payload.jobId ?? "").slice(0, 8);
|
|
5243
|
+
const message = String(payload.message ?? "").trim();
|
|
5244
|
+
return `[job ${jobId}] failed: ${message || "unknown"}`;
|
|
5245
|
+
}
|
|
5195
5246
|
if (!shouldDisplayInteractiveSessionEvent(event))
|
|
5196
5247
|
return null;
|
|
5197
5248
|
if (type === "message")
|
|
@@ -5217,10 +5268,10 @@ function formatSessionEventLine(event) {
|
|
|
5217
5268
|
const summary = String(payload.summary ?? "").trim();
|
|
5218
5269
|
return `[task ${taskId}] completed${summary ? `: ${summary}` : ""}`;
|
|
5219
5270
|
}
|
|
5220
|
-
if (type === "
|
|
5271
|
+
if (type === "job_completed") {
|
|
5221
5272
|
const jobId = String(payload.jobId ?? "").slice(0, 8);
|
|
5222
|
-
const
|
|
5223
|
-
return `[job ${jobId}]
|
|
5273
|
+
const summary = String(payload.summary ?? "").trim();
|
|
5274
|
+
return `[job ${jobId}] completed${summary ? `: ${summary}` : ""}`;
|
|
5224
5275
|
}
|
|
5225
5276
|
if (type === "error") {
|
|
5226
5277
|
const message = String(payload.message ?? "").trim();
|
|
@@ -5234,6 +5285,53 @@ function formatSessionEventLine(event) {
|
|
|
5234
5285
|
}
|
|
5235
5286
|
return null;
|
|
5236
5287
|
}
|
|
5288
|
+
function compactCliSessionJobLogLine(line) {
|
|
5289
|
+
const compacted = line.replace(/\s+/g, " ").trim();
|
|
5290
|
+
if (compacted.length <= CLI_SESSION_JOB_LOG_MAX_CHARS)
|
|
5291
|
+
return compacted;
|
|
5292
|
+
return `${compacted.slice(0, CLI_SESSION_JOB_LOG_MAX_CHARS - 3)}...`;
|
|
5293
|
+
}
|
|
5294
|
+
function formatCliSessionJobLogLine(line) {
|
|
5295
|
+
const compacted = compactCliSessionJobLogLine(line);
|
|
5296
|
+
if (!compacted)
|
|
5297
|
+
return null;
|
|
5298
|
+
if (shouldSuppressCliSessionJobLogLine(compacted))
|
|
5299
|
+
return null;
|
|
5300
|
+
const codexItem = compacted.match(/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.(?:completed|updated)\s+\|\s+(.+)$/i);
|
|
5301
|
+
if (codexItem?.[1]) {
|
|
5302
|
+
return `[codex] ${compactCliSessionJobLogLine(codexItem[1])}`;
|
|
5303
|
+
}
|
|
5304
|
+
return compacted;
|
|
5305
|
+
}
|
|
5306
|
+
function shouldSuppressCliSessionJobLogLine(line) {
|
|
5307
|
+
const text = String(line ?? "").trim();
|
|
5308
|
+
if (!text)
|
|
5309
|
+
return true;
|
|
5310
|
+
if (/^(___RESULT___|__PUSHPALS_OH_RESULT__)\b/.test(text))
|
|
5311
|
+
return true;
|
|
5312
|
+
if (/^\[DockerExecutor\]\s+Linked worktree dependency artifact/i.test(text))
|
|
5313
|
+
return true;
|
|
5314
|
+
if (/^\[Openai_codexExecutor\]\s+Spawning openai_codex executor/i.test(text))
|
|
5315
|
+
return true;
|
|
5316
|
+
if (/^\[OpenAICodexExecutor\]\s+(?:Planner guidance|Codex auth mode|ChatGPT auth mode|Starting codex exec|codex exec finished|Codex JSON stream captured|Codex stdout captured|No reasoning-like|Reasoning-like event|Usage observed|Temporarily masked repo-local)/i.test(text)) {
|
|
5317
|
+
return true;
|
|
5318
|
+
}
|
|
5319
|
+
if (/^\[OpenAICodexExecutor\]\s+codex exec still running\b/i.test(text))
|
|
5320
|
+
return true;
|
|
5321
|
+
if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+(?:thread|turn)\.started\b/i.test(text)) {
|
|
5322
|
+
return true;
|
|
5323
|
+
}
|
|
5324
|
+
if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.started\b/i.test(text))
|
|
5325
|
+
return true;
|
|
5326
|
+
if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.completed\s*$/i.test(text))
|
|
5327
|
+
return true;
|
|
5328
|
+
if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.updated\s*$/i.test(text))
|
|
5329
|
+
return true;
|
|
5330
|
+
if (/^\[OpenAICodexExecutor\]\s+\[stderr\].*codex_core::tools::router: error=exec_command failed/i.test(text)) {
|
|
5331
|
+
return true;
|
|
5332
|
+
}
|
|
5333
|
+
return false;
|
|
5334
|
+
}
|
|
5237
5335
|
function buildSessionEventReplayFingerprint(event) {
|
|
5238
5336
|
const type = String(event.type ?? "").trim().toLowerCase();
|
|
5239
5337
|
if (type !== "status")
|
package/package.json
CHANGED
|
@@ -1432,10 +1432,15 @@ def _merge_usage_records(first: Any, second: Any) -> Dict[str, Any]:
|
|
|
1432
1432
|
return merged
|
|
1433
1433
|
|
|
1434
1434
|
|
|
1435
|
+
def _is_publishable_changed_path(path: str) -> bool:
|
|
1436
|
+
normalized = str(path or "").replace("\\", "/").lower()
|
|
1437
|
+
return not re.search(r"(^|/)(outputs|node_modules|\.worktrees|\.codex|dist|build|coverage)(/|$)", normalized)
|
|
1438
|
+
|
|
1439
|
+
|
|
1435
1440
|
def _codex_changed_paths(repo: str, baseline_snapshot: List[str]) -> Tuple[List[str], List[str], List[str]]:
|
|
1436
1441
|
changed_paths = summarize_git_changes(repo)
|
|
1437
1442
|
delta = [p for p in changed_paths if p not in baseline_snapshot]
|
|
1438
|
-
effective = delta if delta else changed_paths
|
|
1443
|
+
effective = [p for p in (delta if delta else changed_paths) if _is_publishable_changed_path(p)]
|
|
1439
1444
|
return changed_paths, delta, effective
|
|
1440
1445
|
|
|
1441
1446
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import base64
|
|
1
2
|
import os
|
|
2
3
|
import re
|
|
3
4
|
import json
|
|
@@ -19,6 +20,7 @@ from executor_base import (
|
|
|
19
20
|
Logger,
|
|
20
21
|
SettingsResolver,
|
|
21
22
|
config_dir_for_runtime_config,
|
|
23
|
+
parse_task_execute_payload,
|
|
22
24
|
runtime_config,
|
|
23
25
|
)
|
|
24
26
|
from openai_codex_executor import (
|
|
@@ -30,6 +32,7 @@ from openai_codex_executor import (
|
|
|
30
32
|
_resolve_reasoning_effort,
|
|
31
33
|
_build_instruction,
|
|
32
34
|
_collect_disallowed_shell_wrapper_rejections,
|
|
35
|
+
_codex_changed_paths,
|
|
33
36
|
_detect_codex_workaround_signal,
|
|
34
37
|
_extract_usage_counts,
|
|
35
38
|
_load_prompt_template,
|
|
@@ -257,6 +260,90 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
257
260
|
self.assertIn("Keep assertions strict", prompt)
|
|
258
261
|
self.assertIn("bun test tests/localbuddy.request-status.test.ts", prompt)
|
|
259
262
|
|
|
263
|
+
def test_parse_payload_adds_structured_planning_guidance(self) -> None:
|
|
264
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-planning-guidance-") as temp_dir:
|
|
265
|
+
repo = Path(temp_dir) / "repo"
|
|
266
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
267
|
+
payload = {
|
|
268
|
+
"kind": "task.execute",
|
|
269
|
+
"repo": str(repo),
|
|
270
|
+
"params": {
|
|
271
|
+
"instruction": "Improve the game startup smoke path",
|
|
272
|
+
"schemaVersion": 2,
|
|
273
|
+
"planning": {
|
|
274
|
+
"intent": "code_change",
|
|
275
|
+
"riskLevel": "medium",
|
|
276
|
+
"queuePriority": "normal",
|
|
277
|
+
"queueWaitBudgetMs": 90_000,
|
|
278
|
+
"executionBudgetMs": 1_800_000,
|
|
279
|
+
"finalizationBudgetMs": 120_000,
|
|
280
|
+
"scope": {
|
|
281
|
+
"readAnywhere": True,
|
|
282
|
+
"writeAllowed": True,
|
|
283
|
+
"writeGlobs": ["app/**", "scripts/**"],
|
|
284
|
+
},
|
|
285
|
+
"targetPaths": ["app/__tests__/_layout.autonomy.test.ts"],
|
|
286
|
+
"discovery": {
|
|
287
|
+
"ripgrepQueries": ['rg "home-screen|web:e2e" app scripts'],
|
|
288
|
+
"likelyDirs": ["app", "scripts"],
|
|
289
|
+
"keywords": ["home-screen", "web:e2e"],
|
|
290
|
+
},
|
|
291
|
+
"acceptanceCriteria": ["Home shell startup is assertable"],
|
|
292
|
+
"validationSteps": ["bun test", "bun run web:e2e"],
|
|
293
|
+
"requiredValidationSteps": ["bun run web:e2e"],
|
|
294
|
+
},
|
|
295
|
+
},
|
|
296
|
+
}
|
|
297
|
+
encoded = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("ascii")
|
|
298
|
+
|
|
299
|
+
task = parse_task_execute_payload(["executor", encoded], logger=Logger("[test]"))
|
|
300
|
+
guidance = "\n".join(task.supplemental_guidance)
|
|
301
|
+
|
|
302
|
+
self.assertIn("Worker speed/convergence contract", guidance)
|
|
303
|
+
self.assertIn("roughly 20 minutes", guidance)
|
|
304
|
+
self.assertIn("Task planning contract from PushPals", guidance)
|
|
305
|
+
self.assertIn("Worker phase contract", guidance)
|
|
306
|
+
self.assertIn("Write globs are relevance hints, not hard limits", guidance)
|
|
307
|
+
self.assertIn("app/__tests__/_layout.autonomy.test.ts", guidance)
|
|
308
|
+
self.assertIn("Home shell startup is assertable", guidance)
|
|
309
|
+
self.assertIn("bun run web:e2e", guidance)
|
|
310
|
+
|
|
311
|
+
def test_parse_payload_prefers_helper_tests_for_visual_derivation_tasks(self) -> None:
|
|
312
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-visual-guidance-") as temp_dir:
|
|
313
|
+
repo = Path(temp_dir) / "repo"
|
|
314
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
315
|
+
payload = {
|
|
316
|
+
"kind": "task.execute",
|
|
317
|
+
"repo": str(repo),
|
|
318
|
+
"params": {
|
|
319
|
+
"instruction": (
|
|
320
|
+
"Improve battlefield readability by making planet ownership rings, "
|
|
321
|
+
"projectile trails, and danger cues clearer."
|
|
322
|
+
),
|
|
323
|
+
"schemaVersion": 2,
|
|
324
|
+
"planning": {
|
|
325
|
+
"intent": "code_change",
|
|
326
|
+
"riskLevel": "medium",
|
|
327
|
+
"queuePriority": "normal",
|
|
328
|
+
"queueWaitBudgetMs": 90_000,
|
|
329
|
+
"executionBudgetMs": 1_800_000,
|
|
330
|
+
"finalizationBudgetMs": 120_000,
|
|
331
|
+
"scope": {"readAnywhere": True, "writeAllowed": True},
|
|
332
|
+
"targetPaths": ["app/game.tsx"],
|
|
333
|
+
"acceptanceCriteria": ["Projectile and ownership readability improve"],
|
|
334
|
+
"validationSteps": ["bun test app/__tests__/battlefieldReadability.test.ts"],
|
|
335
|
+
},
|
|
336
|
+
},
|
|
337
|
+
}
|
|
338
|
+
encoded = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("ascii")
|
|
339
|
+
|
|
340
|
+
task = parse_task_execute_payload(["executor", encoded], logger=Logger("[test]"))
|
|
341
|
+
guidance = "\n".join(task.supplemental_guidance)
|
|
342
|
+
|
|
343
|
+
self.assertIn("Visual/rendering task rule", guidance)
|
|
344
|
+
self.assertIn("prefer pure helper/state/style-prop tests", guidance)
|
|
345
|
+
self.assertIn("full React Native/component render regression", guidance)
|
|
346
|
+
|
|
260
347
|
def test_detects_codex_workaround_signals(self) -> None:
|
|
261
348
|
signal = _detect_codex_workaround_signal(
|
|
262
349
|
"Adapting test to avoid external Codex calls because Codex CLI isn't available in this environment.",
|
|
@@ -523,6 +610,45 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
523
610
|
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
524
611
|
self.assertNotIn("Recovered after Codex attempts", str(result.get("stdout") or ""))
|
|
525
612
|
|
|
613
|
+
def test_codex_changed_paths_filters_dependency_artifacts_from_publishable_delta(self) -> None:
|
|
614
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-delta-") as temp_dir:
|
|
615
|
+
repo = Path(temp_dir) / "repo"
|
|
616
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
617
|
+
(repo / "README.md").write_text("# artifact delta test\n", encoding="utf-8")
|
|
618
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
619
|
+
subprocess.run(
|
|
620
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
621
|
+
cwd=repo,
|
|
622
|
+
check=True,
|
|
623
|
+
capture_output=True,
|
|
624
|
+
text=True,
|
|
625
|
+
)
|
|
626
|
+
subprocess.run(
|
|
627
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
628
|
+
cwd=repo,
|
|
629
|
+
check=True,
|
|
630
|
+
capture_output=True,
|
|
631
|
+
text=True,
|
|
632
|
+
)
|
|
633
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
634
|
+
subprocess.run(
|
|
635
|
+
["git", "commit", "-m", "chore: seed artifact test"],
|
|
636
|
+
cwd=repo,
|
|
637
|
+
check=True,
|
|
638
|
+
capture_output=True,
|
|
639
|
+
text=True,
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
(repo / "node_modules").mkdir()
|
|
643
|
+
(repo / "node_modules" / "linked.txt").write_text("artifact\n", encoding="utf-8")
|
|
644
|
+
(repo / "outputs").mkdir()
|
|
645
|
+
(repo / "outputs" / "runtime.log").write_text("artifact\n", encoding="utf-8")
|
|
646
|
+
changed_paths, delta, effective = _codex_changed_paths(str(repo), [])
|
|
647
|
+
|
|
648
|
+
self.assertGreaterEqual(len(changed_paths), 2)
|
|
649
|
+
self.assertGreaterEqual(len(delta), 2)
|
|
650
|
+
self.assertEqual(effective, [])
|
|
651
|
+
|
|
526
652
|
def test_run_codex_task_escalates_wrapper_recovery_and_recovers(self) -> None:
|
|
527
653
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-wrapper-recovery-") as temp_dir:
|
|
528
654
|
repo = Path(temp_dir) / "repo"
|
|
@@ -717,6 +717,179 @@ def _is_non_actionable_planner_guidance(text: str) -> bool:
|
|
|
717
717
|
return any(marker in lower for marker in blocked_markers)
|
|
718
718
|
|
|
719
719
|
|
|
720
|
+
def _string_list(value: Any, *, limit: int = 12, max_chars: int = 220) -> List[str]:
|
|
721
|
+
if not isinstance(value, list):
|
|
722
|
+
return []
|
|
723
|
+
out: List[str] = []
|
|
724
|
+
for item in value:
|
|
725
|
+
text = to_single_line(item, max_chars)
|
|
726
|
+
if text:
|
|
727
|
+
out.append(text)
|
|
728
|
+
if len(out) >= limit:
|
|
729
|
+
break
|
|
730
|
+
return out
|
|
731
|
+
|
|
732
|
+
|
|
733
|
+
def _append_list_guidance(lines: List[str], label: str, values: List[str]) -> None:
|
|
734
|
+
if not values:
|
|
735
|
+
return
|
|
736
|
+
lines.append(f"- {label}:")
|
|
737
|
+
for value in values:
|
|
738
|
+
lines.append(f" - {value}")
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
def _joined_task_text(params: Dict[str, Any]) -> str:
|
|
742
|
+
pieces: List[str] = []
|
|
743
|
+
|
|
744
|
+
def collect(value: Any) -> None:
|
|
745
|
+
if isinstance(value, str):
|
|
746
|
+
pieces.append(value)
|
|
747
|
+
elif isinstance(value, list):
|
|
748
|
+
for item in value:
|
|
749
|
+
collect(item)
|
|
750
|
+
elif isinstance(value, dict):
|
|
751
|
+
for item in value.values():
|
|
752
|
+
collect(item)
|
|
753
|
+
|
|
754
|
+
collect(params.get("instruction"))
|
|
755
|
+
collect(params.get("plannerWorkerInstruction"))
|
|
756
|
+
collect(params.get("qualityRevisionHint"))
|
|
757
|
+
planning = params.get("planning")
|
|
758
|
+
if isinstance(planning, dict):
|
|
759
|
+
collect(planning.get("targetPaths"))
|
|
760
|
+
collect(planning.get("acceptanceCriteria"))
|
|
761
|
+
collect(planning.get("validationSteps"))
|
|
762
|
+
collect(planning.get("requiredValidationSteps"))
|
|
763
|
+
collect(planning.get("discovery"))
|
|
764
|
+
return "\n".join(pieces).lower()
|
|
765
|
+
|
|
766
|
+
|
|
767
|
+
def _looks_like_visual_derivation_task(params: Dict[str, Any]) -> bool:
|
|
768
|
+
text = _joined_task_text(params)
|
|
769
|
+
visual_markers = (
|
|
770
|
+
"visual",
|
|
771
|
+
"readability",
|
|
772
|
+
"battlefield",
|
|
773
|
+
"render",
|
|
774
|
+
"rendering",
|
|
775
|
+
"projectile",
|
|
776
|
+
"planet",
|
|
777
|
+
"ship",
|
|
778
|
+
"ring",
|
|
779
|
+
"danger",
|
|
780
|
+
"threat",
|
|
781
|
+
"ownership",
|
|
782
|
+
"dense action",
|
|
783
|
+
"ui surface",
|
|
784
|
+
"style",
|
|
785
|
+
"styles",
|
|
786
|
+
)
|
|
787
|
+
return any(marker in text for marker in visual_markers)
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
def _build_efficiency_guidance(params: Dict[str, Any]) -> str:
|
|
791
|
+
lines: List[str] = [
|
|
792
|
+
"Worker speed/convergence contract from PushPals:",
|
|
793
|
+
"- Target useful completion in roughly 20 minutes for small or medium repo tasks; optimize for the smallest coherent patch over exhaustive exploration.",
|
|
794
|
+
"- Phase soft budgets: discovery <= 5m, editing <= 10m, focused validation <= 5m, final diff review <= 2m. If a phase runs long, narrow scope rather than expanding the harness.",
|
|
795
|
+
"- Test-harness soft budget: if setting up a focused test requires multiple new shared mocks, broad React Native shims, or repeated import fixes, stop building that harness and switch to smaller pure helper/state coverage.",
|
|
796
|
+
]
|
|
797
|
+
if _looks_like_visual_derivation_task(params):
|
|
798
|
+
lines.extend(
|
|
799
|
+
[
|
|
800
|
+
"- Visual/rendering task rule: prefer pure helper/state/style-prop tests for derived visual cues. Use a full React Native/component render regression only if the repo already has a stable harness for that exact surface.",
|
|
801
|
+
"- Full-surface React Native tests are a last resort for visual derivation work; do not spend the job constructing broad mocks just to assert pixels or nested component trees.",
|
|
802
|
+
]
|
|
803
|
+
)
|
|
804
|
+
return "\n".join(lines)
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
def _build_planning_guidance(params: Dict[str, Any]) -> str:
|
|
808
|
+
planning = params.get("planning")
|
|
809
|
+
if not isinstance(planning, dict):
|
|
810
|
+
return ""
|
|
811
|
+
|
|
812
|
+
lines: List[str] = ["Task planning contract from PushPals:"]
|
|
813
|
+
intent = to_single_line(planning.get("intent"), 80)
|
|
814
|
+
risk = to_single_line(planning.get("riskLevel"), 80)
|
|
815
|
+
priority = to_single_line(planning.get("queuePriority"), 80)
|
|
816
|
+
summary_parts = []
|
|
817
|
+
if intent:
|
|
818
|
+
summary_parts.append(f"intent={intent}")
|
|
819
|
+
if risk:
|
|
820
|
+
summary_parts.append(f"risk={risk}")
|
|
821
|
+
if priority:
|
|
822
|
+
summary_parts.append(f"priority={priority}")
|
|
823
|
+
if summary_parts:
|
|
824
|
+
lines.append(f"- Planning summary: {', '.join(summary_parts)}")
|
|
825
|
+
lines.append(
|
|
826
|
+
"- Worker phase contract: discovering -> editing -> focused validation -> full validation handoff -> final diff review."
|
|
827
|
+
)
|
|
828
|
+
lines.append(
|
|
829
|
+
" - discovering: inspect relevant files/artifacts and state the current hypothesis before editing."
|
|
830
|
+
)
|
|
831
|
+
lines.append(" - editing: make the smallest behavior-owning patch.")
|
|
832
|
+
lines.append(" - focused validation: run targeted fast checks for the changed surface.")
|
|
833
|
+
lines.append(
|
|
834
|
+
" - full validation: let PushPals ValidationGate own long required/browser checks unless one local confirmation is explicitly useful."
|
|
835
|
+
)
|
|
836
|
+
lines.append(" - final diff review: remove unrelated churn before returning.")
|
|
837
|
+
lines.append(
|
|
838
|
+
"- Phase soft budget: aim for discovery <= 5m, editing <= 10m, focused validation <= 5m, final diff review <= 2m; if test harness setup starts consuming the budget, reduce to simpler helper/state coverage."
|
|
839
|
+
)
|
|
840
|
+
|
|
841
|
+
scope = planning.get("scope")
|
|
842
|
+
if isinstance(scope, dict):
|
|
843
|
+
write_allowed = scope.get("writeAllowed")
|
|
844
|
+
read_anywhere = scope.get("readAnywhere")
|
|
845
|
+
scope_parts = []
|
|
846
|
+
if isinstance(read_anywhere, bool):
|
|
847
|
+
scope_parts.append(f"read_anywhere={str(read_anywhere).lower()}")
|
|
848
|
+
if isinstance(write_allowed, bool):
|
|
849
|
+
scope_parts.append(f"write_allowed={str(write_allowed).lower()}")
|
|
850
|
+
if scope_parts:
|
|
851
|
+
lines.append(f"- Repo access: {', '.join(scope_parts)}")
|
|
852
|
+
write_globs = _string_list(scope.get("writeGlobs"), limit=10)
|
|
853
|
+
if write_globs:
|
|
854
|
+
lines.append("- Write globs are relevance hints, not hard limits; edit behavior-owning files as needed.")
|
|
855
|
+
_append_list_guidance(lines, "Write-scope hints", write_globs)
|
|
856
|
+
forbidden = _string_list(scope.get("forbiddenGlobs"), limit=8)
|
|
857
|
+
_append_list_guidance(lines, "Forbidden path hints", forbidden)
|
|
858
|
+
|
|
859
|
+
_append_list_guidance(lines, "Target path hints", _string_list(planning.get("targetPaths"), limit=12))
|
|
860
|
+
|
|
861
|
+
discovery = planning.get("discovery")
|
|
862
|
+
if isinstance(discovery, dict):
|
|
863
|
+
_append_list_guidance(
|
|
864
|
+
lines,
|
|
865
|
+
"Suggested discovery commands",
|
|
866
|
+
_string_list(discovery.get("ripgrepQueries"), limit=8),
|
|
867
|
+
)
|
|
868
|
+
_append_list_guidance(lines, "Likely directories", _string_list(discovery.get("likelyDirs"), limit=8))
|
|
869
|
+
_append_list_guidance(lines, "Search keywords", _string_list(discovery.get("keywords"), limit=12))
|
|
870
|
+
|
|
871
|
+
_append_list_guidance(
|
|
872
|
+
lines,
|
|
873
|
+
"Acceptance criteria",
|
|
874
|
+
_string_list(planning.get("acceptanceCriteria"), limit=10, max_chars=260),
|
|
875
|
+
)
|
|
876
|
+
_append_list_guidance(
|
|
877
|
+
lines,
|
|
878
|
+
"Planned validation steps",
|
|
879
|
+
_string_list(planning.get("validationSteps"), limit=8, max_chars=260),
|
|
880
|
+
)
|
|
881
|
+
_append_list_guidance(
|
|
882
|
+
lines,
|
|
883
|
+
"Required vision.md validation steps",
|
|
884
|
+
_string_list(planning.get("requiredValidationSteps"), limit=8, max_chars=260),
|
|
885
|
+
)
|
|
886
|
+
|
|
887
|
+
guidance = "\n".join(lines).strip()
|
|
888
|
+
if len(guidance) > 4000:
|
|
889
|
+
guidance = guidance[:4000].rstrip() + "\n- Planning guidance truncated to stay within worker prompt budget."
|
|
890
|
+
return guidance
|
|
891
|
+
|
|
892
|
+
|
|
720
893
|
def parse_task_execute_payload(
|
|
721
894
|
argv: List[str],
|
|
722
895
|
*,
|
|
@@ -765,6 +938,10 @@ def parse_task_execute_payload(
|
|
|
765
938
|
quality_revision_hint = str(params.get("qualityRevisionHint") or "").strip()
|
|
766
939
|
|
|
767
940
|
supplemental_guidance: List[str] = []
|
|
941
|
+
supplemental_guidance.append(_build_efficiency_guidance(params))
|
|
942
|
+
planning_guidance = _build_planning_guidance(params)
|
|
943
|
+
if planning_guidance:
|
|
944
|
+
supplemental_guidance.append(planning_guidance)
|
|
768
945
|
if planner_instruction and planner_instruction != instruction:
|
|
769
946
|
if _is_non_actionable_planner_guidance(planner_instruction):
|
|
770
947
|
log.info(
|
|
@@ -43,10 +43,10 @@ const WORKERPAL_SANDBOX_COMPONENT_LABEL = "pushpals.component=workerpals-sandbox
|
|
|
43
43
|
const DOCKER_IMAGE_INSPECT_TIMEOUT_MS = 15_000;
|
|
44
44
|
const DOCKER_IMAGE_BUILD_TIMEOUT_MS = 10 * 60_000;
|
|
45
45
|
const DOCKER_IMAGE_PULL_TIMEOUT_MS = 10 * 60_000;
|
|
46
|
-
const BROWSER_VALIDATION_JOB_REPAIR_ATTEMPTS =
|
|
47
|
-
const BROWSER_VALIDATION_JOB_OVERHEAD_MS =
|
|
48
|
-
const BROWSER_VALIDATION_JOB_MIN_TIMEOUT_MS =
|
|
49
|
-
const BROWSER_VALIDATION_JOB_MAX_TIMEOUT_MS =
|
|
46
|
+
const BROWSER_VALIDATION_JOB_REPAIR_ATTEMPTS = 3;
|
|
47
|
+
const BROWSER_VALIDATION_JOB_OVERHEAD_MS = 5 * 60_000;
|
|
48
|
+
const BROWSER_VALIDATION_JOB_MIN_TIMEOUT_MS = 20 * 60_000;
|
|
49
|
+
const BROWSER_VALIDATION_JOB_MAX_TIMEOUT_MS = 45 * 60_000;
|
|
50
50
|
|
|
51
51
|
function parseClampedInt(value: unknown, defaultValue: number, min: number, max: number): number {
|
|
52
52
|
const parsed =
|
|
@@ -312,7 +312,7 @@ export function resolveDockerJobTimeoutMs(
|
|
|
312
312
|
BROWSER_VALIDATION_JOB_MAX_TIMEOUT_MS,
|
|
313
313
|
Math.max(BROWSER_VALIDATION_JOB_MIN_TIMEOUT_MS, estimatedTimeoutMs),
|
|
314
314
|
);
|
|
315
|
-
return Math.max(baseTimeoutMs, boundedTimeoutMs);
|
|
315
|
+
return Math.max(Math.min(baseTimeoutMs, boundedTimeoutMs), BROWSER_VALIDATION_JOB_MIN_TIMEOUT_MS);
|
|
316
316
|
}
|
|
317
317
|
|
|
318
318
|
export class DockerExecutor {
|
|
@@ -1221,7 +1221,8 @@ export class DockerExecutor {
|
|
|
1221
1221
|
});
|
|
1222
1222
|
const timeoutMs = resolveDockerJobTimeoutMs(this.options.timeoutMs, job);
|
|
1223
1223
|
if (timeoutMs !== this.options.timeoutMs) {
|
|
1224
|
-
const
|
|
1224
|
+
const verb = timeoutMs > this.options.timeoutMs ? "Extended" : "Capped";
|
|
1225
|
+
const note = `[DockerExecutor] ${verb} job timeout for browser validation convergence: ${timeoutMs}ms (configured ${this.options.timeoutMs}ms).`;
|
|
1225
1226
|
console.log(note);
|
|
1226
1227
|
onLog?.("stdout", note);
|
|
1227
1228
|
}
|
|
@@ -1246,7 +1247,7 @@ export class DockerExecutor {
|
|
|
1246
1247
|
const timer = setTimeout(() => {
|
|
1247
1248
|
timedOutByDocker = true;
|
|
1248
1249
|
const elapsedMs = Math.max(1, Date.now() - startedAtMs);
|
|
1249
|
-
const timeoutMsg = `[DockerExecutor] Job timeout in warm container after ${elapsedMs}ms (limit ${
|
|
1250
|
+
const timeoutMsg = `[DockerExecutor] Job timeout in warm container after ${elapsedMs}ms (limit ${timeoutMs}ms): ${this.warmContainerName}`;
|
|
1250
1251
|
console.log(timeoutMsg);
|
|
1251
1252
|
onLog?.("stderr", timeoutMsg);
|
|
1252
1253
|
try {
|