@pushpalsdev/cli 1.1.24 → 1.1.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pushpalsdev/cli",
3
- "version": "1.1.24",
3
+ "version": "1.1.25",
4
4
  "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -112,6 +112,7 @@ _DEFAULT_NO_EDIT_WATCHDOG_S = 480
112
112
  _SMALL_TASK_NO_EDIT_WATCHDOG_S = 240
113
113
  _NARROW_TEST_TASK_NO_EDIT_WATCHDOG_S = 180
114
114
  _WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
115
+ _NO_EDIT_RECOVERY_WATCHDOG_S = 180
115
116
  _DEFAULT_NO_EDIT_RECHECK_S = 120
116
117
  _DEFAULT_ROLLOUT_WATCHDOG_S = 300
117
118
  _SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
@@ -596,8 +597,13 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
596
597
  "test-only",
597
598
  "test only",
598
599
  "contract test",
600
+ "contract-level test",
601
+ "contract-level tests",
602
+ "contract around",
599
603
  "contract coverage",
600
604
  "ranking contract",
605
+ "regression coverage",
606
+ "focused regression",
601
607
  "focused scenario",
602
608
  "targeted test",
603
609
  "one-file",
@@ -630,8 +636,13 @@ def _looks_like_narrow_test_task_prompt(prompt: str) -> bool:
630
636
  return False
631
637
  narrow_markers = (
632
638
  "contract test",
639
+ "contract-level test",
640
+ "contract-level tests",
641
+ "contract around",
633
642
  "contract coverage",
634
643
  "ranking contract",
644
+ "regression coverage",
645
+ "focused regression",
635
646
  "test-only",
636
647
  "test only",
637
648
  "targeted test",
@@ -643,9 +654,6 @@ def _looks_like_narrow_test_task_prompt(prompt: str) -> bool:
643
654
  "full render harness",
644
655
  "full-surface",
645
656
  "full surface",
646
- "e2e",
647
- "browser validation",
648
- "browser smoke",
649
657
  "migration",
650
658
  "broad refactor",
651
659
  )
@@ -677,6 +685,7 @@ def _resolve_progress_log_interval_seconds(config: OpenAICodexRuntimeConfig) ->
677
685
  def _resolve_no_edit_watchdog_seconds(
678
686
  prompt: str,
679
687
  communicate_timeout_s: Optional[int],
688
+ recovery_attempt: int = 0,
680
689
  ) -> Optional[int]:
681
690
  if not communicate_timeout_s:
682
691
  return None
@@ -707,6 +716,8 @@ def _resolve_no_edit_watchdog_seconds(
707
716
  if _looks_like_small_task_prompt(prompt)
708
717
  else _DEFAULT_NO_EDIT_WATCHDOG_S
709
718
  )
719
+ if recovery_attempt > 0:
720
+ default_s = min(default_s, _NO_EDIT_RECOVERY_WATCHDOG_S)
710
721
  return max(120, min(default_s, max(120, communicate_timeout_s - 60)))
711
722
 
712
723
 
@@ -2252,7 +2263,11 @@ def _run_codex_task(
2252
2263
  rollout_watchdog_retryable = True
2253
2264
  command_policy_rejection_loop = False
2254
2265
  no_edit_watchdog_s = (
2255
- _resolve_no_edit_watchdog_seconds(prompt, communicate_timeout_s)
2266
+ _resolve_no_edit_watchdog_seconds(
2267
+ prompt,
2268
+ communicate_timeout_s,
2269
+ recovery_attempt=no_edit_recovery_attempt,
2270
+ )
2256
2271
  if no_edit_recovery_attempt <= _MAX_NO_EDIT_RECOVERY_ATTEMPTS
2257
2272
  else None
2258
2273
  )
@@ -1396,6 +1396,50 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
1396
1396
 
1397
1397
  self.assertEqual(watchdog_s, 180)
1398
1398
 
1399
+ def test_narrow_contract_regression_with_required_e2e_uses_fast_no_edit_watchdog(self) -> None:
1400
+ prompt = (
1401
+ "Harden the opportunity graph contract around autonomous delivery-loop failure signals. "
1402
+ "Add focused regression coverage in app/__tests__/opportunity-graph.contract.test.ts. "
1403
+ "Required vision.md testing criteria: bun test | bun x tsc --noEmit | bun run lint | bun run web:e2e."
1404
+ )
1405
+ with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
1406
+ watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
1407
+
1408
+ self.assertEqual(watchdog_s, 180)
1409
+
1410
+ def test_no_edit_recovery_attempt_uses_patch_first_watchdog(self) -> None:
1411
+ prompt = "Investigate a broad reliability issue and make the smallest safe fix."
1412
+ with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
1413
+ first_attempt_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
1414
+ recovery_attempt_s = _resolve_no_edit_watchdog_seconds(
1415
+ prompt,
1416
+ 1200,
1417
+ recovery_attempt=1,
1418
+ )
1419
+
1420
+ self.assertEqual(first_attempt_s, 480)
1421
+ self.assertEqual(recovery_attempt_s, 180)
1422
+
1423
+ def test_explicit_no_edit_watchdog_override_still_controls_recovery_attempts(self) -> None:
1424
+ with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "300"}, clear=False):
1425
+ watchdog_s = _resolve_no_edit_watchdog_seconds(
1426
+ "Investigate a broad reliability issue.",
1427
+ 1200,
1428
+ recovery_attempt=1,
1429
+ )
1430
+
1431
+ self.assertEqual(watchdog_s, 300)
1432
+
1433
+ def test_review_fix_contract_level_tests_use_fast_no_edit_watchdog(self) -> None:
1434
+ prompt = (
1435
+ "Restore exact score assertions for contract-level tests where score is part "
1436
+ "of the public output. Keep this as a test-only patch in app/__tests__."
1437
+ )
1438
+ with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
1439
+ watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
1440
+
1441
+ self.assertEqual(watchdog_s, 180)
1442
+
1399
1443
  def test_no_edit_recovery_guidance_warns_against_artifact_only_progress(self) -> None:
1400
1444
  guidance = _build_no_edit_recovery_guidance(
1401
1445
  "item.completed | still inspecting",
@@ -74,6 +74,7 @@ export interface TaskExecutePlanning {
74
74
  validationSteps: string[];
75
75
  requiredValidationSteps?: string[];
76
76
  repoHintDiagnostics?: string[];
77
+ repoHintStalePaths?: string[];
77
78
  queuePriority: TaskExecutePriority;
78
79
  queueWaitBudgetMs: number;
79
80
  executionBudgetMs: number;
@@ -325,6 +326,15 @@ export function shouldSkipCriticAfterExecutorTimeout(opts: {
325
326
  return /\b(openai_codex|codex(?: exec)?)\b[^\r\n]*\btimed out\b/i.test(opts.executorText);
326
327
  }
327
328
 
329
+ export function shouldSkipCriticForDeterministicValidationRevision(opts: {
330
+ deterministicRequiresRevision: boolean;
331
+ validationOutsideTaskScope: boolean;
332
+ validationRuns: ValidationExecutionResult[];
333
+ }): boolean {
334
+ if (!opts.deterministicRequiresRevision || opts.validationOutsideTaskScope) return false;
335
+ return opts.validationRuns.some(isDeterministicFastValidationFailure);
336
+ }
337
+
328
338
  export function workerAttemptRolloutScore(params: {
329
339
  executorElapsedMs: number;
330
340
  qualityElapsedMs: number;
@@ -6883,6 +6893,7 @@ function sanitizeStalePathHints(
6883
6893
  repo: string,
6884
6894
  values: unknown,
6885
6895
  taskText: string,
6896
+ opts: { dropMissingParentHints?: boolean } = {},
6886
6897
  ): { values: string[]; stale: string[]; diagnostics: string[] } {
6887
6898
  const stale: string[] = [];
6888
6899
  const diagnostics: string[] = [];
@@ -6897,7 +6908,12 @@ function sanitizeStalePathHints(
6897
6908
  continue;
6898
6909
  }
6899
6910
  if (!pathParentExists(repo, raw) && !taskTextAllowsCreatingMissingPaths(taskText)) {
6900
- diagnostics.push(`Path hint "${raw}" has a missing parent directory; verify the existing repo owner before editing.`);
6911
+ const diagnostic = `Path hint "${raw}" has a missing parent directory; verify the existing repo owner before editing.`;
6912
+ diagnostics.push(diagnostic);
6913
+ if (opts.dropMissingParentHints) {
6914
+ stale.push(raw);
6915
+ continue;
6916
+ }
6901
6917
  }
6902
6918
  out.push(raw);
6903
6919
  }
@@ -6964,7 +6980,9 @@ export function sanitizeTaskExecutePlanningPathHints(
6964
6980
  const normalizedDiscovery: Record<string, unknown> = { ...discovery };
6965
6981
  if (isStringArray(discovery.likelyDirs)) {
6966
6982
  const sanitized = repo
6967
- ? sanitizeStalePathHints(repo, discovery.likelyDirs, taskText)
6983
+ ? sanitizeStalePathHints(repo, discovery.likelyDirs, taskText, {
6984
+ dropMissingParentHints: true,
6985
+ })
6968
6986
  : { values: toStringArray(discovery.likelyDirs), stale: [], diagnostics: [] };
6969
6987
  normalizedDiscovery.likelyDirs = sanitized.values;
6970
6988
  staleHints.push(...sanitized.stale);
@@ -6986,10 +7004,40 @@ export function sanitizeTaskExecutePlanningPathHints(
6986
7004
  if (repoDiagnostics.length > 0) {
6987
7005
  out.repoHintDiagnostics = Array.from(new Set(repoDiagnostics)).slice(0, 8);
6988
7006
  }
7007
+ if (staleHints.length > 0) {
7008
+ out.repoHintStalePaths = Array.from(new Set(staleHints)).slice(0, 16);
7009
+ }
6989
7010
 
6990
7011
  return out;
6991
7012
  }
6992
7013
 
7014
+ export function sanitizePlannerWorkerInstructionPathHints(
7015
+ value: unknown,
7016
+ staleHints: unknown,
7017
+ ): string | undefined {
7018
+ const text = String(value ?? "").trim();
7019
+ if (!text) return undefined;
7020
+ const normalizedHints = toStringArray(staleHints)
7021
+ .map((hint) => normalizeStagePath(hint))
7022
+ .filter((hint): hint is string => Boolean(hint))
7023
+ .map((hint) => hint.toLowerCase());
7024
+ if (normalizedHints.length === 0) return text;
7025
+
7026
+ const uniqueHints = Array.from(new Set(normalizedHints));
7027
+ const hasStaleHint = (line: string): boolean => {
7028
+ const lower = line.replace(/\\/g, "/").toLowerCase();
7029
+ return uniqueHints.some((hint) => lower.includes(hint));
7030
+ };
7031
+ const lines = text.split(/\r?\n/);
7032
+ const kept = lines.filter((line) => !hasStaleHint(line)).map((line) => line.trim()).filter(Boolean);
7033
+ if (kept.length === lines.length) return text;
7034
+
7035
+ return [
7036
+ "Planner path guidance was sanitized because it referenced paths absent from this checkout; rely on the Task planning contract target path hints and existing repo owners instead.",
7037
+ ...kept,
7038
+ ].join("\n");
7039
+ }
7040
+
6993
7041
  function validateTaskExecutePlanning(
6994
7042
  value: unknown,
6995
7043
  options?: {
@@ -7546,6 +7594,13 @@ export async function executeJob(
7546
7594
  planning: sanitizedPlanning,
7547
7595
  instruction,
7548
7596
  };
7597
+ const sanitizedPlannerWorkerInstruction = sanitizePlannerWorkerInstructionPathHints(
7598
+ params.plannerWorkerInstruction,
7599
+ planning.repoHintStalePaths ?? [],
7600
+ );
7601
+ if (sanitizedPlannerWorkerInstruction !== undefined) {
7602
+ normalizedParams.plannerWorkerInstruction = sanitizedPlannerWorkerInstruction;
7603
+ }
7549
7604
  const executionBudgetMs = Number(planning.executionBudgetMs);
7550
7605
  const finalizationBudgetMs = Number(planning.finalizationBudgetMs);
7551
7606
  const mergeConflictContext = extractMergeConflictReviewContext(normalizedParams);
@@ -7902,8 +7957,23 @@ export async function executeJob(
7902
7957
  qualityIssues: qualityForCritic.issues,
7903
7958
  changedPaths: quality.changedPaths,
7904
7959
  });
7960
+ const preCriticEffectiveQualityIssues = validationOutsideTaskScope
7961
+ ? quality.issues.filter((issue) => !issue.startsWith("ValidationGate:"))
7962
+ : quality.issues;
7963
+ const preCriticDeterministicRequiresRevision =
7964
+ preCriticEffectiveQualityIssues.length > 0 ||
7965
+ (quality.blocker !== null && !validationOutsideTaskScope);
7966
+ const skipCriticForDeterministicValidationRevision =
7967
+ shouldSkipCriticForDeterministicValidationRevision({
7968
+ deterministicRequiresRevision: preCriticDeterministicRequiresRevision,
7969
+ validationOutsideTaskScope,
7970
+ validationRuns: quality.validationRuns,
7971
+ });
7905
7972
  const critic =
7906
- quality.skipped || !qualityGatePolicy.criticGateEnabled || skipCriticAfterExecutorTimeout
7973
+ quality.skipped ||
7974
+ !qualityGatePolicy.criticGateEnabled ||
7975
+ skipCriticAfterExecutorTimeout ||
7976
+ skipCriticForDeterministicValidationRevision
7907
7977
  ? null
7908
7978
  : executor === "openai_codex"
7909
7979
  ? await runCodexCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog)
@@ -7939,6 +8009,11 @@ export async function executeJob(
7939
8009
  "stdout",
7940
8010
  "[CriticGate] Skipping Codex critic after primary Codex executor timeout because deterministic quality and validation are clean.",
7941
8011
  );
8012
+ } else if (skipCriticForDeterministicValidationRevision) {
8013
+ onLog?.(
8014
+ "stdout",
8015
+ "[CriticGate] Skipping critic because deterministic fast validation already requires a quality revision.",
8016
+ );
7942
8017
  }
7943
8018
  const rolloutScore = workerAttemptRolloutScore({
7944
8019
  executorElapsedMs,