@pushpalsdev/cli 1.1.23 → 1.1.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/runtime/sandbox/.pushpals-remotebuddy-fallback.js +10 -0
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py +70 -4
- package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py +65 -0
- package/runtime/sandbox/apps/workerpals/src/execute_job.ts +78 -3
- package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts +4 -3
package/package.json
CHANGED
|
@@ -8285,6 +8285,7 @@ function buildWorkerSpawnCommand(options) {
|
|
|
8285
8285
|
}
|
|
8286
8286
|
|
|
8287
8287
|
// apps/remotebuddy/src/remotebuddy_main.ts
|
|
8288
|
+
var AUTONOMY_TASK_DEDUPE_COOLDOWN_MS = 6 * 60 * 60 * 1000;
|
|
8288
8289
|
var CONFIG = loadPushPalsConfig();
|
|
8289
8290
|
function parseArgs() {
|
|
8290
8291
|
const args = process.argv.slice(2);
|
|
@@ -8464,6 +8465,11 @@ function buildTaskExecuteDedupeKey(sessionId, params) {
|
|
|
8464
8465
|
}
|
|
8465
8466
|
return `task.execute:${normalizedOrigin}:${normalizedSessionId}:${uniqueTargets.join("|")}`.toLowerCase();
|
|
8466
8467
|
}
|
|
8468
|
+
function resolveTaskExecuteDedupeCooldownMs(params, dedupeKey) {
|
|
8469
|
+
if (!dedupeKey)
|
|
8470
|
+
return 0;
|
|
8471
|
+
return params.origin === "autonomy" ? AUTONOMY_TASK_DEDUPE_COOLDOWN_MS : 0;
|
|
8472
|
+
}
|
|
8467
8473
|
function parseAutonomyRequestMetadata(value) {
|
|
8468
8474
|
let root = asObject2(value);
|
|
8469
8475
|
if (!root && typeof value === "string") {
|
|
@@ -9509,6 +9515,9 @@ Please reply with the missing details and I will enqueue a follow-up request.` :
|
|
|
9509
9515
|
const dedupeKey = buildTaskExecuteDedupeKey(sessionId, params);
|
|
9510
9516
|
if (dedupeKey)
|
|
9511
9517
|
payload.dedupeKey = dedupeKey;
|
|
9518
|
+
const dedupeCooldownMs = resolveTaskExecuteDedupeCooldownMs(params, dedupeKey);
|
|
9519
|
+
if (dedupeCooldownMs > 0)
|
|
9520
|
+
payload.dedupeCooldownMs = dedupeCooldownMs;
|
|
9512
9521
|
if (targetWorkerId)
|
|
9513
9522
|
payload.targetWorkerId = targetWorkerId;
|
|
9514
9523
|
const res = await this.fetchImpl(`${this.server}/jobs/enqueue`, {
|
|
@@ -10603,6 +10612,7 @@ if (import.meta.main) {
|
|
|
10603
10612
|
});
|
|
10604
10613
|
}
|
|
10605
10614
|
export {
|
|
10615
|
+
resolveTaskExecuteDedupeCooldownMs,
|
|
10606
10616
|
extractRequiredValidationStepsFromVisionMarkdown,
|
|
10607
10617
|
buildTaskExecuteDedupeKey,
|
|
10608
10618
|
RemoteBuddyOrchestrator
|
|
@@ -109,11 +109,14 @@ _MAX_CREDIBLE_WRAPPER_LOOP_TOP_LEVELS = 4
|
|
|
109
109
|
_MAX_NO_EDIT_RECOVERY_ATTEMPTS = 1
|
|
110
110
|
_MAX_ROLLOUT_RECOVERY_ATTEMPTS = 1
|
|
111
111
|
_DEFAULT_NO_EDIT_WATCHDOG_S = 480
|
|
112
|
-
_SMALL_TASK_NO_EDIT_WATCHDOG_S =
|
|
112
|
+
_SMALL_TASK_NO_EDIT_WATCHDOG_S = 240
|
|
113
|
+
_NARROW_TEST_TASK_NO_EDIT_WATCHDOG_S = 180
|
|
113
114
|
_WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
|
|
115
|
+
_NO_EDIT_RECOVERY_WATCHDOG_S = 180
|
|
114
116
|
_DEFAULT_NO_EDIT_RECHECK_S = 120
|
|
115
117
|
_DEFAULT_ROLLOUT_WATCHDOG_S = 300
|
|
116
118
|
_SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
|
|
119
|
+
_NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S = 150
|
|
117
120
|
_WEB_REVIEW_ROLLOUT_WATCHDOG_S = 180
|
|
118
121
|
|
|
119
122
|
|
|
@@ -591,6 +594,26 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
|
|
|
591
594
|
"browser smoke",
|
|
592
595
|
"web delivery",
|
|
593
596
|
"navigation trustworthy",
|
|
597
|
+
"test-only",
|
|
598
|
+
"test only",
|
|
599
|
+
"contract test",
|
|
600
|
+
"contract-level test",
|
|
601
|
+
"contract-level tests",
|
|
602
|
+
"contract around",
|
|
603
|
+
"contract coverage",
|
|
604
|
+
"ranking contract",
|
|
605
|
+
"regression coverage",
|
|
606
|
+
"focused regression",
|
|
607
|
+
"focused scenario",
|
|
608
|
+
"targeted test",
|
|
609
|
+
"one-file",
|
|
610
|
+
"one file",
|
|
611
|
+
"single-file",
|
|
612
|
+
"single file",
|
|
613
|
+
"max_files_to_edit: 1",
|
|
614
|
+
"max_files_to_edit=1",
|
|
615
|
+
"maxfilestoedit: 1",
|
|
616
|
+
"maxfilestoedit=1",
|
|
594
617
|
)
|
|
595
618
|
heavy_markers = (
|
|
596
619
|
"merge-conflict",
|
|
@@ -607,6 +630,36 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
|
|
|
607
630
|
)
|
|
608
631
|
|
|
609
632
|
|
|
633
|
+
def _looks_like_narrow_test_task_prompt(prompt: str) -> bool:
|
|
634
|
+
text = str(prompt or "").lower()
|
|
635
|
+
if not text:
|
|
636
|
+
return False
|
|
637
|
+
narrow_markers = (
|
|
638
|
+
"contract test",
|
|
639
|
+
"contract-level test",
|
|
640
|
+
"contract-level tests",
|
|
641
|
+
"contract around",
|
|
642
|
+
"contract coverage",
|
|
643
|
+
"ranking contract",
|
|
644
|
+
"regression coverage",
|
|
645
|
+
"focused regression",
|
|
646
|
+
"test-only",
|
|
647
|
+
"test only",
|
|
648
|
+
"targeted test",
|
|
649
|
+
"focused scenario",
|
|
650
|
+
)
|
|
651
|
+
if not any(marker in text for marker in narrow_markers):
|
|
652
|
+
return False
|
|
653
|
+
broad_markers = (
|
|
654
|
+
"full render harness",
|
|
655
|
+
"full-surface",
|
|
656
|
+
"full surface",
|
|
657
|
+
"migration",
|
|
658
|
+
"broad refactor",
|
|
659
|
+
)
|
|
660
|
+
return not any(marker in text for marker in broad_markers)
|
|
661
|
+
|
|
662
|
+
|
|
610
663
|
def _resolve_task_reasoning_effort(
|
|
611
664
|
configured_effort: str,
|
|
612
665
|
prompt: str,
|
|
@@ -632,6 +685,7 @@ def _resolve_progress_log_interval_seconds(config: OpenAICodexRuntimeConfig) ->
|
|
|
632
685
|
def _resolve_no_edit_watchdog_seconds(
|
|
633
686
|
prompt: str,
|
|
634
687
|
communicate_timeout_s: Optional[int],
|
|
688
|
+
recovery_attempt: int = 0,
|
|
635
689
|
) -> Optional[int]:
|
|
636
690
|
if not communicate_timeout_s:
|
|
637
691
|
return None
|
|
@@ -652,7 +706,9 @@ def _resolve_no_edit_watchdog_seconds(
|
|
|
652
706
|
return None
|
|
653
707
|
|
|
654
708
|
prompt_text = str(prompt or "").lower()
|
|
655
|
-
if
|
|
709
|
+
if _looks_like_narrow_test_task_prompt(prompt):
|
|
710
|
+
default_s = _NARROW_TEST_TASK_NO_EDIT_WATCHDOG_S
|
|
711
|
+
elif "repo-native web review" in prompt_text or "web review path" in prompt_text:
|
|
656
712
|
default_s = _WEB_REVIEW_NO_EDIT_WATCHDOG_S
|
|
657
713
|
else:
|
|
658
714
|
default_s = (
|
|
@@ -660,6 +716,8 @@ def _resolve_no_edit_watchdog_seconds(
|
|
|
660
716
|
if _looks_like_small_task_prompt(prompt)
|
|
661
717
|
else _DEFAULT_NO_EDIT_WATCHDOG_S
|
|
662
718
|
)
|
|
719
|
+
if recovery_attempt > 0:
|
|
720
|
+
default_s = min(default_s, _NO_EDIT_RECOVERY_WATCHDOG_S)
|
|
663
721
|
return max(120, min(default_s, max(120, communicate_timeout_s - 60)))
|
|
664
722
|
|
|
665
723
|
|
|
@@ -703,7 +761,9 @@ def _resolve_rollout_watchdog_seconds(
|
|
|
703
761
|
else:
|
|
704
762
|
return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
|
|
705
763
|
|
|
706
|
-
if
|
|
764
|
+
if _looks_like_narrow_test_task_prompt(prompt):
|
|
765
|
+
default_s = _NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S
|
|
766
|
+
elif _looks_like_web_review_prompt(prompt):
|
|
707
767
|
default_s = _WEB_REVIEW_ROLLOUT_WATCHDOG_S
|
|
708
768
|
elif _looks_like_small_task_prompt(prompt):
|
|
709
769
|
default_s = _SMALL_TASK_ROLLOUT_WATCHDOG_S
|
|
@@ -766,6 +826,8 @@ def _describe_publishable_paths(paths: List[str]) -> str:
|
|
|
766
826
|
def _build_no_edit_recovery_guidance(trace_excerpt: str, artifact_only_paths: str = "") -> str:
|
|
767
827
|
lines = [
|
|
768
828
|
"No-edit watchdog recovery: the previous Codex attempt spent too much of the execution budget without producing publishable file changes.",
|
|
829
|
+
"This recovery attempt has a patch-first contract: make one publishable edit before any further broad discovery. If you need one narrow read of the hinted file to place the edit, do that once, then patch immediately.",
|
|
830
|
+
"Do not repeat the same read/search sequence from the previous attempt. Re-reading the target without editing is a failed recovery.",
|
|
769
831
|
"Start from the already inspected context. Do not re-read broad repo topology, route wrappers, or missing test infrastructure unless that is the blocker.",
|
|
770
832
|
"Runtime/dependency artifacts such as node_modules, outputs, .worktrees, .codex, dist, build, and coverage do not count as progress.",
|
|
771
833
|
"Within the first response/action, edit the smallest behavior-owning file that satisfies the task. If the hinted file is a thin wrapper, patch the owner you already identified.",
|
|
@@ -2201,7 +2263,11 @@ def _run_codex_task(
|
|
|
2201
2263
|
rollout_watchdog_retryable = True
|
|
2202
2264
|
command_policy_rejection_loop = False
|
|
2203
2265
|
no_edit_watchdog_s = (
|
|
2204
|
-
_resolve_no_edit_watchdog_seconds(
|
|
2266
|
+
_resolve_no_edit_watchdog_seconds(
|
|
2267
|
+
prompt,
|
|
2268
|
+
communicate_timeout_s,
|
|
2269
|
+
recovery_attempt=no_edit_recovery_attempt,
|
|
2270
|
+
)
|
|
2205
2271
|
if no_edit_recovery_attempt <= _MAX_NO_EDIT_RECOVERY_ATTEMPTS
|
|
2206
2272
|
else None
|
|
2207
2273
|
)
|
|
@@ -1386,6 +1386,60 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1386
1386
|
|
|
1387
1387
|
self.assertEqual(watchdog_s, 240)
|
|
1388
1388
|
|
|
1389
|
+
def test_narrow_contract_tests_use_fast_no_edit_watchdog(self) -> None:
|
|
1390
|
+
prompt = (
|
|
1391
|
+
"Update app/__tests__/opportunity-graph.contract.test.ts to tighten the "
|
|
1392
|
+
"ranking contract test. Keep this test-only and preserve existing behavior."
|
|
1393
|
+
)
|
|
1394
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
|
|
1395
|
+
watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
|
|
1396
|
+
|
|
1397
|
+
self.assertEqual(watchdog_s, 180)
|
|
1398
|
+
|
|
1399
|
+
def test_narrow_contract_regression_with_required_e2e_uses_fast_no_edit_watchdog(self) -> None:
|
|
1400
|
+
prompt = (
|
|
1401
|
+
"Harden the opportunity graph contract around autonomous delivery-loop failure signals. "
|
|
1402
|
+
"Add focused regression coverage in app/__tests__/opportunity-graph.contract.test.ts. "
|
|
1403
|
+
"Required vision.md testing criteria: bun test | bun x tsc --noEmit | bun run lint | bun run web:e2e."
|
|
1404
|
+
)
|
|
1405
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
|
|
1406
|
+
watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
|
|
1407
|
+
|
|
1408
|
+
self.assertEqual(watchdog_s, 180)
|
|
1409
|
+
|
|
1410
|
+
def test_no_edit_recovery_attempt_uses_patch_first_watchdog(self) -> None:
|
|
1411
|
+
prompt = "Investigate a broad reliability issue and make the smallest safe fix."
|
|
1412
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
|
|
1413
|
+
first_attempt_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
|
|
1414
|
+
recovery_attempt_s = _resolve_no_edit_watchdog_seconds(
|
|
1415
|
+
prompt,
|
|
1416
|
+
1200,
|
|
1417
|
+
recovery_attempt=1,
|
|
1418
|
+
)
|
|
1419
|
+
|
|
1420
|
+
self.assertEqual(first_attempt_s, 480)
|
|
1421
|
+
self.assertEqual(recovery_attempt_s, 180)
|
|
1422
|
+
|
|
1423
|
+
def test_explicit_no_edit_watchdog_override_still_controls_recovery_attempts(self) -> None:
|
|
1424
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "300"}, clear=False):
|
|
1425
|
+
watchdog_s = _resolve_no_edit_watchdog_seconds(
|
|
1426
|
+
"Investigate a broad reliability issue.",
|
|
1427
|
+
1200,
|
|
1428
|
+
recovery_attempt=1,
|
|
1429
|
+
)
|
|
1430
|
+
|
|
1431
|
+
self.assertEqual(watchdog_s, 300)
|
|
1432
|
+
|
|
1433
|
+
def test_review_fix_contract_level_tests_use_fast_no_edit_watchdog(self) -> None:
|
|
1434
|
+
prompt = (
|
|
1435
|
+
"Restore exact score assertions for contract-level tests where score is part "
|
|
1436
|
+
"of the public output. Keep this as a test-only patch in app/__tests__."
|
|
1437
|
+
)
|
|
1438
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
|
|
1439
|
+
watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
|
|
1440
|
+
|
|
1441
|
+
self.assertEqual(watchdog_s, 180)
|
|
1442
|
+
|
|
1389
1443
|
def test_no_edit_recovery_guidance_warns_against_artifact_only_progress(self) -> None:
|
|
1390
1444
|
guidance = _build_no_edit_recovery_guidance(
|
|
1391
1445
|
"item.completed | still inspecting",
|
|
@@ -1393,6 +1447,8 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1393
1447
|
)
|
|
1394
1448
|
|
|
1395
1449
|
self.assertIn("node_modules", guidance)
|
|
1450
|
+
self.assertIn("patch-first contract", guidance)
|
|
1451
|
+
self.assertIn("Re-reading the target without editing is a failed recovery", guidance)
|
|
1396
1452
|
self.assertIn("do not invent PushPals/autonomy-specific files", guidance)
|
|
1397
1453
|
self.assertIn("Previous Codex event trace excerpt", guidance)
|
|
1398
1454
|
|
|
@@ -1411,6 +1467,15 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1411
1467
|
self.assertEqual(no_edit_s, 240)
|
|
1412
1468
|
self.assertEqual(rollout_s, 180)
|
|
1413
1469
|
|
|
1470
|
+
def test_narrow_contract_rollout_watchdog_is_earlier_than_no_edit_watchdog(self) -> None:
|
|
1471
|
+
prompt = "Tighten the focused contract test for one ranking behavior."
|
|
1472
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": ""}, clear=False):
|
|
1473
|
+
no_edit_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
|
|
1474
|
+
rollout_s = _resolve_rollout_watchdog_seconds(prompt, 1200, no_edit_s)
|
|
1475
|
+
|
|
1476
|
+
self.assertEqual(no_edit_s, 180)
|
|
1477
|
+
self.assertEqual(rollout_s, 120)
|
|
1478
|
+
|
|
1414
1479
|
def test_offtrack_rollout_detects_missing_path_and_harness_drift(self) -> None:
|
|
1415
1480
|
trace = {
|
|
1416
1481
|
"summaries": [
|
|
@@ -74,6 +74,7 @@ export interface TaskExecutePlanning {
|
|
|
74
74
|
validationSteps: string[];
|
|
75
75
|
requiredValidationSteps?: string[];
|
|
76
76
|
repoHintDiagnostics?: string[];
|
|
77
|
+
repoHintStalePaths?: string[];
|
|
77
78
|
queuePriority: TaskExecutePriority;
|
|
78
79
|
queueWaitBudgetMs: number;
|
|
79
80
|
executionBudgetMs: number;
|
|
@@ -325,6 +326,15 @@ export function shouldSkipCriticAfterExecutorTimeout(opts: {
|
|
|
325
326
|
return /\b(openai_codex|codex(?: exec)?)\b[^\r\n]*\btimed out\b/i.test(opts.executorText);
|
|
326
327
|
}
|
|
327
328
|
|
|
329
|
+
export function shouldSkipCriticForDeterministicValidationRevision(opts: {
|
|
330
|
+
deterministicRequiresRevision: boolean;
|
|
331
|
+
validationOutsideTaskScope: boolean;
|
|
332
|
+
validationRuns: ValidationExecutionResult[];
|
|
333
|
+
}): boolean {
|
|
334
|
+
if (!opts.deterministicRequiresRevision || opts.validationOutsideTaskScope) return false;
|
|
335
|
+
return opts.validationRuns.some(isDeterministicFastValidationFailure);
|
|
336
|
+
}
|
|
337
|
+
|
|
328
338
|
export function workerAttemptRolloutScore(params: {
|
|
329
339
|
executorElapsedMs: number;
|
|
330
340
|
qualityElapsedMs: number;
|
|
@@ -6883,6 +6893,7 @@ function sanitizeStalePathHints(
|
|
|
6883
6893
|
repo: string,
|
|
6884
6894
|
values: unknown,
|
|
6885
6895
|
taskText: string,
|
|
6896
|
+
opts: { dropMissingParentHints?: boolean } = {},
|
|
6886
6897
|
): { values: string[]; stale: string[]; diagnostics: string[] } {
|
|
6887
6898
|
const stale: string[] = [];
|
|
6888
6899
|
const diagnostics: string[] = [];
|
|
@@ -6897,7 +6908,12 @@ function sanitizeStalePathHints(
|
|
|
6897
6908
|
continue;
|
|
6898
6909
|
}
|
|
6899
6910
|
if (!pathParentExists(repo, raw) && !taskTextAllowsCreatingMissingPaths(taskText)) {
|
|
6900
|
-
|
|
6911
|
+
const diagnostic = `Path hint "${raw}" has a missing parent directory; verify the existing repo owner before editing.`;
|
|
6912
|
+
diagnostics.push(diagnostic);
|
|
6913
|
+
if (opts.dropMissingParentHints) {
|
|
6914
|
+
stale.push(raw);
|
|
6915
|
+
continue;
|
|
6916
|
+
}
|
|
6901
6917
|
}
|
|
6902
6918
|
out.push(raw);
|
|
6903
6919
|
}
|
|
@@ -6964,7 +6980,9 @@ export function sanitizeTaskExecutePlanningPathHints(
|
|
|
6964
6980
|
const normalizedDiscovery: Record<string, unknown> = { ...discovery };
|
|
6965
6981
|
if (isStringArray(discovery.likelyDirs)) {
|
|
6966
6982
|
const sanitized = repo
|
|
6967
|
-
? sanitizeStalePathHints(repo, discovery.likelyDirs, taskText
|
|
6983
|
+
? sanitizeStalePathHints(repo, discovery.likelyDirs, taskText, {
|
|
6984
|
+
dropMissingParentHints: true,
|
|
6985
|
+
})
|
|
6968
6986
|
: { values: toStringArray(discovery.likelyDirs), stale: [], diagnostics: [] };
|
|
6969
6987
|
normalizedDiscovery.likelyDirs = sanitized.values;
|
|
6970
6988
|
staleHints.push(...sanitized.stale);
|
|
@@ -6986,10 +7004,40 @@ export function sanitizeTaskExecutePlanningPathHints(
|
|
|
6986
7004
|
if (repoDiagnostics.length > 0) {
|
|
6987
7005
|
out.repoHintDiagnostics = Array.from(new Set(repoDiagnostics)).slice(0, 8);
|
|
6988
7006
|
}
|
|
7007
|
+
if (staleHints.length > 0) {
|
|
7008
|
+
out.repoHintStalePaths = Array.from(new Set(staleHints)).slice(0, 16);
|
|
7009
|
+
}
|
|
6989
7010
|
|
|
6990
7011
|
return out;
|
|
6991
7012
|
}
|
|
6992
7013
|
|
|
7014
|
+
export function sanitizePlannerWorkerInstructionPathHints(
|
|
7015
|
+
value: unknown,
|
|
7016
|
+
staleHints: unknown,
|
|
7017
|
+
): string | undefined {
|
|
7018
|
+
const text = String(value ?? "").trim();
|
|
7019
|
+
if (!text) return undefined;
|
|
7020
|
+
const normalizedHints = toStringArray(staleHints)
|
|
7021
|
+
.map((hint) => normalizeStagePath(hint))
|
|
7022
|
+
.filter((hint): hint is string => Boolean(hint))
|
|
7023
|
+
.map((hint) => hint.toLowerCase());
|
|
7024
|
+
if (normalizedHints.length === 0) return text;
|
|
7025
|
+
|
|
7026
|
+
const uniqueHints = Array.from(new Set(normalizedHints));
|
|
7027
|
+
const hasStaleHint = (line: string): boolean => {
|
|
7028
|
+
const lower = line.replace(/\\/g, "/").toLowerCase();
|
|
7029
|
+
return uniqueHints.some((hint) => lower.includes(hint));
|
|
7030
|
+
};
|
|
7031
|
+
const lines = text.split(/\r?\n/);
|
|
7032
|
+
const kept = lines.filter((line) => !hasStaleHint(line)).map((line) => line.trim()).filter(Boolean);
|
|
7033
|
+
if (kept.length === lines.length) return text;
|
|
7034
|
+
|
|
7035
|
+
return [
|
|
7036
|
+
"Planner path guidance was sanitized because it referenced paths absent from this checkout; rely on the Task planning contract target path hints and existing repo owners instead.",
|
|
7037
|
+
...kept,
|
|
7038
|
+
].join("\n");
|
|
7039
|
+
}
|
|
7040
|
+
|
|
6993
7041
|
function validateTaskExecutePlanning(
|
|
6994
7042
|
value: unknown,
|
|
6995
7043
|
options?: {
|
|
@@ -7546,6 +7594,13 @@ export async function executeJob(
|
|
|
7546
7594
|
planning: sanitizedPlanning,
|
|
7547
7595
|
instruction,
|
|
7548
7596
|
};
|
|
7597
|
+
const sanitizedPlannerWorkerInstruction = sanitizePlannerWorkerInstructionPathHints(
|
|
7598
|
+
params.plannerWorkerInstruction,
|
|
7599
|
+
planning.repoHintStalePaths ?? [],
|
|
7600
|
+
);
|
|
7601
|
+
if (sanitizedPlannerWorkerInstruction !== undefined) {
|
|
7602
|
+
normalizedParams.plannerWorkerInstruction = sanitizedPlannerWorkerInstruction;
|
|
7603
|
+
}
|
|
7549
7604
|
const executionBudgetMs = Number(planning.executionBudgetMs);
|
|
7550
7605
|
const finalizationBudgetMs = Number(planning.finalizationBudgetMs);
|
|
7551
7606
|
const mergeConflictContext = extractMergeConflictReviewContext(normalizedParams);
|
|
@@ -7902,8 +7957,23 @@ export async function executeJob(
|
|
|
7902
7957
|
qualityIssues: qualityForCritic.issues,
|
|
7903
7958
|
changedPaths: quality.changedPaths,
|
|
7904
7959
|
});
|
|
7960
|
+
const preCriticEffectiveQualityIssues = validationOutsideTaskScope
|
|
7961
|
+
? quality.issues.filter((issue) => !issue.startsWith("ValidationGate:"))
|
|
7962
|
+
: quality.issues;
|
|
7963
|
+
const preCriticDeterministicRequiresRevision =
|
|
7964
|
+
preCriticEffectiveQualityIssues.length > 0 ||
|
|
7965
|
+
(quality.blocker !== null && !validationOutsideTaskScope);
|
|
7966
|
+
const skipCriticForDeterministicValidationRevision =
|
|
7967
|
+
shouldSkipCriticForDeterministicValidationRevision({
|
|
7968
|
+
deterministicRequiresRevision: preCriticDeterministicRequiresRevision,
|
|
7969
|
+
validationOutsideTaskScope,
|
|
7970
|
+
validationRuns: quality.validationRuns,
|
|
7971
|
+
});
|
|
7905
7972
|
const critic =
|
|
7906
|
-
quality.skipped ||
|
|
7973
|
+
quality.skipped ||
|
|
7974
|
+
!qualityGatePolicy.criticGateEnabled ||
|
|
7975
|
+
skipCriticAfterExecutorTimeout ||
|
|
7976
|
+
skipCriticForDeterministicValidationRevision
|
|
7907
7977
|
? null
|
|
7908
7978
|
: executor === "openai_codex"
|
|
7909
7979
|
? await runCodexCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog)
|
|
@@ -7939,6 +8009,11 @@ export async function executeJob(
|
|
|
7939
8009
|
"stdout",
|
|
7940
8010
|
"[CriticGate] Skipping Codex critic after primary Codex executor timeout because deterministic quality and validation are clean.",
|
|
7941
8011
|
);
|
|
8012
|
+
} else if (skipCriticForDeterministicValidationRevision) {
|
|
8013
|
+
onLog?.(
|
|
8014
|
+
"stdout",
|
|
8015
|
+
"[CriticGate] Skipping critic because deterministic fast validation already requires a quality revision.",
|
|
8016
|
+
);
|
|
7942
8017
|
}
|
|
7943
8018
|
const rolloutScore = workerAttemptRolloutScore({
|
|
7944
8019
|
executorElapsedMs,
|
|
@@ -1785,9 +1785,10 @@ async function workerLoop(
|
|
|
1785
1785
|
terminalStage: currentJobPhase ?? (result.ok ? "completed" : "worker"),
|
|
1786
1786
|
executorBackend: resolveExecutor(CONFIG),
|
|
1787
1787
|
summary: result.summary,
|
|
1788
|
-
watchdogFired:
|
|
1789
|
-
|
|
1790
|
-
|
|
1788
|
+
watchdogFired:
|
|
1789
|
+
/watchdog|rollout coach|timed out|timeout|signal 15|terminated|exit 143|exit 137/i.test(
|
|
1790
|
+
`${result.summary}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`,
|
|
1791
|
+
),
|
|
1791
1792
|
metadata: {
|
|
1792
1793
|
workerId: opts.workerId,
|
|
1793
1794
|
docker: Boolean(dockerExecutor),
|