@pushpalsdev/cli 1.1.24 → 1.1.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -112,6 +112,7 @@ _DEFAULT_NO_EDIT_WATCHDOG_S = 480
|
|
|
112
112
|
_SMALL_TASK_NO_EDIT_WATCHDOG_S = 240
|
|
113
113
|
_NARROW_TEST_TASK_NO_EDIT_WATCHDOG_S = 180
|
|
114
114
|
_WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
|
|
115
|
+
_NO_EDIT_RECOVERY_WATCHDOG_S = 180
|
|
115
116
|
_DEFAULT_NO_EDIT_RECHECK_S = 120
|
|
116
117
|
_DEFAULT_ROLLOUT_WATCHDOG_S = 300
|
|
117
118
|
_SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
|
|
@@ -596,8 +597,13 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
|
|
|
596
597
|
"test-only",
|
|
597
598
|
"test only",
|
|
598
599
|
"contract test",
|
|
600
|
+
"contract-level test",
|
|
601
|
+
"contract-level tests",
|
|
602
|
+
"contract around",
|
|
599
603
|
"contract coverage",
|
|
600
604
|
"ranking contract",
|
|
605
|
+
"regression coverage",
|
|
606
|
+
"focused regression",
|
|
601
607
|
"focused scenario",
|
|
602
608
|
"targeted test",
|
|
603
609
|
"one-file",
|
|
@@ -630,8 +636,13 @@ def _looks_like_narrow_test_task_prompt(prompt: str) -> bool:
|
|
|
630
636
|
return False
|
|
631
637
|
narrow_markers = (
|
|
632
638
|
"contract test",
|
|
639
|
+
"contract-level test",
|
|
640
|
+
"contract-level tests",
|
|
641
|
+
"contract around",
|
|
633
642
|
"contract coverage",
|
|
634
643
|
"ranking contract",
|
|
644
|
+
"regression coverage",
|
|
645
|
+
"focused regression",
|
|
635
646
|
"test-only",
|
|
636
647
|
"test only",
|
|
637
648
|
"targeted test",
|
|
@@ -643,9 +654,6 @@ def _looks_like_narrow_test_task_prompt(prompt: str) -> bool:
|
|
|
643
654
|
"full render harness",
|
|
644
655
|
"full-surface",
|
|
645
656
|
"full surface",
|
|
646
|
-
"e2e",
|
|
647
|
-
"browser validation",
|
|
648
|
-
"browser smoke",
|
|
649
657
|
"migration",
|
|
650
658
|
"broad refactor",
|
|
651
659
|
)
|
|
@@ -677,6 +685,7 @@ def _resolve_progress_log_interval_seconds(config: OpenAICodexRuntimeConfig) ->
|
|
|
677
685
|
def _resolve_no_edit_watchdog_seconds(
|
|
678
686
|
prompt: str,
|
|
679
687
|
communicate_timeout_s: Optional[int],
|
|
688
|
+
recovery_attempt: int = 0,
|
|
680
689
|
) -> Optional[int]:
|
|
681
690
|
if not communicate_timeout_s:
|
|
682
691
|
return None
|
|
@@ -707,6 +716,8 @@ def _resolve_no_edit_watchdog_seconds(
|
|
|
707
716
|
if _looks_like_small_task_prompt(prompt)
|
|
708
717
|
else _DEFAULT_NO_EDIT_WATCHDOG_S
|
|
709
718
|
)
|
|
719
|
+
if recovery_attempt > 0:
|
|
720
|
+
default_s = min(default_s, _NO_EDIT_RECOVERY_WATCHDOG_S)
|
|
710
721
|
return max(120, min(default_s, max(120, communicate_timeout_s - 60)))
|
|
711
722
|
|
|
712
723
|
|
|
@@ -2252,7 +2263,11 @@ def _run_codex_task(
|
|
|
2252
2263
|
rollout_watchdog_retryable = True
|
|
2253
2264
|
command_policy_rejection_loop = False
|
|
2254
2265
|
no_edit_watchdog_s = (
|
|
2255
|
-
_resolve_no_edit_watchdog_seconds(
|
|
2266
|
+
_resolve_no_edit_watchdog_seconds(
|
|
2267
|
+
prompt,
|
|
2268
|
+
communicate_timeout_s,
|
|
2269
|
+
recovery_attempt=no_edit_recovery_attempt,
|
|
2270
|
+
)
|
|
2256
2271
|
if no_edit_recovery_attempt <= _MAX_NO_EDIT_RECOVERY_ATTEMPTS
|
|
2257
2272
|
else None
|
|
2258
2273
|
)
|
|
@@ -1396,6 +1396,50 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1396
1396
|
|
|
1397
1397
|
self.assertEqual(watchdog_s, 180)
|
|
1398
1398
|
|
|
1399
|
+
def test_narrow_contract_regression_with_required_e2e_uses_fast_no_edit_watchdog(self) -> None:
|
|
1400
|
+
prompt = (
|
|
1401
|
+
"Harden the opportunity graph contract around autonomous delivery-loop failure signals. "
|
|
1402
|
+
"Add focused regression coverage in app/__tests__/opportunity-graph.contract.test.ts. "
|
|
1403
|
+
"Required vision.md testing criteria: bun test | bun x tsc --noEmit | bun run lint | bun run web:e2e."
|
|
1404
|
+
)
|
|
1405
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
|
|
1406
|
+
watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
|
|
1407
|
+
|
|
1408
|
+
self.assertEqual(watchdog_s, 180)
|
|
1409
|
+
|
|
1410
|
+
def test_no_edit_recovery_attempt_uses_patch_first_watchdog(self) -> None:
|
|
1411
|
+
prompt = "Investigate a broad reliability issue and make the smallest safe fix."
|
|
1412
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
|
|
1413
|
+
first_attempt_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
|
|
1414
|
+
recovery_attempt_s = _resolve_no_edit_watchdog_seconds(
|
|
1415
|
+
prompt,
|
|
1416
|
+
1200,
|
|
1417
|
+
recovery_attempt=1,
|
|
1418
|
+
)
|
|
1419
|
+
|
|
1420
|
+
self.assertEqual(first_attempt_s, 480)
|
|
1421
|
+
self.assertEqual(recovery_attempt_s, 180)
|
|
1422
|
+
|
|
1423
|
+
def test_explicit_no_edit_watchdog_override_still_controls_recovery_attempts(self) -> None:
|
|
1424
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "300"}, clear=False):
|
|
1425
|
+
watchdog_s = _resolve_no_edit_watchdog_seconds(
|
|
1426
|
+
"Investigate a broad reliability issue.",
|
|
1427
|
+
1200,
|
|
1428
|
+
recovery_attempt=1,
|
|
1429
|
+
)
|
|
1430
|
+
|
|
1431
|
+
self.assertEqual(watchdog_s, 300)
|
|
1432
|
+
|
|
1433
|
+
def test_review_fix_contract_level_tests_use_fast_no_edit_watchdog(self) -> None:
|
|
1434
|
+
prompt = (
|
|
1435
|
+
"Restore exact score assertions for contract-level tests where score is part "
|
|
1436
|
+
"of the public output. Keep this as a test-only patch in app/__tests__."
|
|
1437
|
+
)
|
|
1438
|
+
with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
|
|
1439
|
+
watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
|
|
1440
|
+
|
|
1441
|
+
self.assertEqual(watchdog_s, 180)
|
|
1442
|
+
|
|
1399
1443
|
def test_no_edit_recovery_guidance_warns_against_artifact_only_progress(self) -> None:
|
|
1400
1444
|
guidance = _build_no_edit_recovery_guidance(
|
|
1401
1445
|
"item.completed | still inspecting",
|
|
@@ -74,6 +74,7 @@ export interface TaskExecutePlanning {
|
|
|
74
74
|
validationSteps: string[];
|
|
75
75
|
requiredValidationSteps?: string[];
|
|
76
76
|
repoHintDiagnostics?: string[];
|
|
77
|
+
repoHintStalePaths?: string[];
|
|
77
78
|
queuePriority: TaskExecutePriority;
|
|
78
79
|
queueWaitBudgetMs: number;
|
|
79
80
|
executionBudgetMs: number;
|
|
@@ -325,6 +326,15 @@ export function shouldSkipCriticAfterExecutorTimeout(opts: {
|
|
|
325
326
|
return /\b(openai_codex|codex(?: exec)?)\b[^\r\n]*\btimed out\b/i.test(opts.executorText);
|
|
326
327
|
}
|
|
327
328
|
|
|
329
|
+
export function shouldSkipCriticForDeterministicValidationRevision(opts: {
|
|
330
|
+
deterministicRequiresRevision: boolean;
|
|
331
|
+
validationOutsideTaskScope: boolean;
|
|
332
|
+
validationRuns: ValidationExecutionResult[];
|
|
333
|
+
}): boolean {
|
|
334
|
+
if (!opts.deterministicRequiresRevision || opts.validationOutsideTaskScope) return false;
|
|
335
|
+
return opts.validationRuns.some(isDeterministicFastValidationFailure);
|
|
336
|
+
}
|
|
337
|
+
|
|
328
338
|
export function workerAttemptRolloutScore(params: {
|
|
329
339
|
executorElapsedMs: number;
|
|
330
340
|
qualityElapsedMs: number;
|
|
@@ -6883,6 +6893,7 @@ function sanitizeStalePathHints(
|
|
|
6883
6893
|
repo: string,
|
|
6884
6894
|
values: unknown,
|
|
6885
6895
|
taskText: string,
|
|
6896
|
+
opts: { dropMissingParentHints?: boolean } = {},
|
|
6886
6897
|
): { values: string[]; stale: string[]; diagnostics: string[] } {
|
|
6887
6898
|
const stale: string[] = [];
|
|
6888
6899
|
const diagnostics: string[] = [];
|
|
@@ -6897,7 +6908,12 @@ function sanitizeStalePathHints(
|
|
|
6897
6908
|
continue;
|
|
6898
6909
|
}
|
|
6899
6910
|
if (!pathParentExists(repo, raw) && !taskTextAllowsCreatingMissingPaths(taskText)) {
|
|
6900
|
-
|
|
6911
|
+
const diagnostic = `Path hint "${raw}" has a missing parent directory; verify the existing repo owner before editing.`;
|
|
6912
|
+
diagnostics.push(diagnostic);
|
|
6913
|
+
if (opts.dropMissingParentHints) {
|
|
6914
|
+
stale.push(raw);
|
|
6915
|
+
continue;
|
|
6916
|
+
}
|
|
6901
6917
|
}
|
|
6902
6918
|
out.push(raw);
|
|
6903
6919
|
}
|
|
@@ -6964,7 +6980,9 @@ export function sanitizeTaskExecutePlanningPathHints(
|
|
|
6964
6980
|
const normalizedDiscovery: Record<string, unknown> = { ...discovery };
|
|
6965
6981
|
if (isStringArray(discovery.likelyDirs)) {
|
|
6966
6982
|
const sanitized = repo
|
|
6967
|
-
? sanitizeStalePathHints(repo, discovery.likelyDirs, taskText
|
|
6983
|
+
? sanitizeStalePathHints(repo, discovery.likelyDirs, taskText, {
|
|
6984
|
+
dropMissingParentHints: true,
|
|
6985
|
+
})
|
|
6968
6986
|
: { values: toStringArray(discovery.likelyDirs), stale: [], diagnostics: [] };
|
|
6969
6987
|
normalizedDiscovery.likelyDirs = sanitized.values;
|
|
6970
6988
|
staleHints.push(...sanitized.stale);
|
|
@@ -6986,10 +7004,40 @@ export function sanitizeTaskExecutePlanningPathHints(
|
|
|
6986
7004
|
if (repoDiagnostics.length > 0) {
|
|
6987
7005
|
out.repoHintDiagnostics = Array.from(new Set(repoDiagnostics)).slice(0, 8);
|
|
6988
7006
|
}
|
|
7007
|
+
if (staleHints.length > 0) {
|
|
7008
|
+
out.repoHintStalePaths = Array.from(new Set(staleHints)).slice(0, 16);
|
|
7009
|
+
}
|
|
6989
7010
|
|
|
6990
7011
|
return out;
|
|
6991
7012
|
}
|
|
6992
7013
|
|
|
7014
|
+
export function sanitizePlannerWorkerInstructionPathHints(
|
|
7015
|
+
value: unknown,
|
|
7016
|
+
staleHints: unknown,
|
|
7017
|
+
): string | undefined {
|
|
7018
|
+
const text = String(value ?? "").trim();
|
|
7019
|
+
if (!text) return undefined;
|
|
7020
|
+
const normalizedHints = toStringArray(staleHints)
|
|
7021
|
+
.map((hint) => normalizeStagePath(hint))
|
|
7022
|
+
.filter((hint): hint is string => Boolean(hint))
|
|
7023
|
+
.map((hint) => hint.toLowerCase());
|
|
7024
|
+
if (normalizedHints.length === 0) return text;
|
|
7025
|
+
|
|
7026
|
+
const uniqueHints = Array.from(new Set(normalizedHints));
|
|
7027
|
+
const hasStaleHint = (line: string): boolean => {
|
|
7028
|
+
const lower = line.replace(/\\/g, "/").toLowerCase();
|
|
7029
|
+
return uniqueHints.some((hint) => lower.includes(hint));
|
|
7030
|
+
};
|
|
7031
|
+
const lines = text.split(/\r?\n/);
|
|
7032
|
+
const kept = lines.filter((line) => !hasStaleHint(line)).map((line) => line.trim()).filter(Boolean);
|
|
7033
|
+
if (kept.length === lines.length) return text;
|
|
7034
|
+
|
|
7035
|
+
return [
|
|
7036
|
+
"Planner path guidance was sanitized because it referenced paths absent from this checkout; rely on the Task planning contract target path hints and existing repo owners instead.",
|
|
7037
|
+
...kept,
|
|
7038
|
+
].join("\n");
|
|
7039
|
+
}
|
|
7040
|
+
|
|
6993
7041
|
function validateTaskExecutePlanning(
|
|
6994
7042
|
value: unknown,
|
|
6995
7043
|
options?: {
|
|
@@ -7546,6 +7594,13 @@ export async function executeJob(
|
|
|
7546
7594
|
planning: sanitizedPlanning,
|
|
7547
7595
|
instruction,
|
|
7548
7596
|
};
|
|
7597
|
+
const sanitizedPlannerWorkerInstruction = sanitizePlannerWorkerInstructionPathHints(
|
|
7598
|
+
params.plannerWorkerInstruction,
|
|
7599
|
+
planning.repoHintStalePaths ?? [],
|
|
7600
|
+
);
|
|
7601
|
+
if (sanitizedPlannerWorkerInstruction !== undefined) {
|
|
7602
|
+
normalizedParams.plannerWorkerInstruction = sanitizedPlannerWorkerInstruction;
|
|
7603
|
+
}
|
|
7549
7604
|
const executionBudgetMs = Number(planning.executionBudgetMs);
|
|
7550
7605
|
const finalizationBudgetMs = Number(planning.finalizationBudgetMs);
|
|
7551
7606
|
const mergeConflictContext = extractMergeConflictReviewContext(normalizedParams);
|
|
@@ -7902,8 +7957,23 @@ export async function executeJob(
|
|
|
7902
7957
|
qualityIssues: qualityForCritic.issues,
|
|
7903
7958
|
changedPaths: quality.changedPaths,
|
|
7904
7959
|
});
|
|
7960
|
+
const preCriticEffectiveQualityIssues = validationOutsideTaskScope
|
|
7961
|
+
? quality.issues.filter((issue) => !issue.startsWith("ValidationGate:"))
|
|
7962
|
+
: quality.issues;
|
|
7963
|
+
const preCriticDeterministicRequiresRevision =
|
|
7964
|
+
preCriticEffectiveQualityIssues.length > 0 ||
|
|
7965
|
+
(quality.blocker !== null && !validationOutsideTaskScope);
|
|
7966
|
+
const skipCriticForDeterministicValidationRevision =
|
|
7967
|
+
shouldSkipCriticForDeterministicValidationRevision({
|
|
7968
|
+
deterministicRequiresRevision: preCriticDeterministicRequiresRevision,
|
|
7969
|
+
validationOutsideTaskScope,
|
|
7970
|
+
validationRuns: quality.validationRuns,
|
|
7971
|
+
});
|
|
7905
7972
|
const critic =
|
|
7906
|
-
quality.skipped ||
|
|
7973
|
+
quality.skipped ||
|
|
7974
|
+
!qualityGatePolicy.criticGateEnabled ||
|
|
7975
|
+
skipCriticAfterExecutorTimeout ||
|
|
7976
|
+
skipCriticForDeterministicValidationRevision
|
|
7907
7977
|
? null
|
|
7908
7978
|
: executor === "openai_codex"
|
|
7909
7979
|
? await runCodexCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog)
|
|
@@ -7939,6 +8009,11 @@ export async function executeJob(
|
|
|
7939
8009
|
"stdout",
|
|
7940
8010
|
"[CriticGate] Skipping Codex critic after primary Codex executor timeout because deterministic quality and validation are clean.",
|
|
7941
8011
|
);
|
|
8012
|
+
} else if (skipCriticForDeterministicValidationRevision) {
|
|
8013
|
+
onLog?.(
|
|
8014
|
+
"stdout",
|
|
8015
|
+
"[CriticGate] Skipping critic because deterministic fast validation already requires a quality revision.",
|
|
8016
|
+
);
|
|
7942
8017
|
}
|
|
7943
8018
|
const rolloutScore = workerAttemptRolloutScore({
|
|
7944
8019
|
executorElapsedMs,
|