npm - @pushpalsdev/cli - Versions diffs - 1.1.24 → 1.1.25 - Mend

@pushpalsdev/cli 1.1.24 → 1.1.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pushpalsdev/cli",
-  "version": "1.1.24",
+  "version": "1.1.25",
   "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
   "license": "MIT",
   "repository": {

package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py CHANGED Viewed

@@ -112,6 +112,7 @@ _DEFAULT_NO_EDIT_WATCHDOG_S = 480
 _SMALL_TASK_NO_EDIT_WATCHDOG_S = 240
 _NARROW_TEST_TASK_NO_EDIT_WATCHDOG_S = 180
 _WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
+_NO_EDIT_RECOVERY_WATCHDOG_S = 180
 _DEFAULT_NO_EDIT_RECHECK_S = 120
 _DEFAULT_ROLLOUT_WATCHDOG_S = 300
 _SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
@@ -596,8 +597,13 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
         "test-only",
         "test only",
         "contract test",
+        "contract-level test",
+        "contract-level tests",
+        "contract around",
         "contract coverage",
         "ranking contract",
+        "regression coverage",
+        "focused regression",
         "focused scenario",
         "targeted test",
         "one-file",
@@ -630,8 +636,13 @@ def _looks_like_narrow_test_task_prompt(prompt: str) -> bool:
         return False
     narrow_markers = (
         "contract test",
+        "contract-level test",
+        "contract-level tests",
+        "contract around",
         "contract coverage",
         "ranking contract",
+        "regression coverage",
+        "focused regression",
         "test-only",
         "test only",
         "targeted test",
@@ -643,9 +654,6 @@ def _looks_like_narrow_test_task_prompt(prompt: str) -> bool:
         "full render harness",
         "full-surface",
         "full surface",
-        "e2e",
-        "browser validation",
-        "browser smoke",
         "migration",
         "broad refactor",
     )
@@ -677,6 +685,7 @@ def _resolve_progress_log_interval_seconds(config: OpenAICodexRuntimeConfig) ->
 def _resolve_no_edit_watchdog_seconds(
     prompt: str,
     communicate_timeout_s: Optional[int],
+    recovery_attempt: int = 0,
 ) -> Optional[int]:
     if not communicate_timeout_s:
         return None
@@ -707,6 +716,8 @@ def _resolve_no_edit_watchdog_seconds(
             if _looks_like_small_task_prompt(prompt)
             else _DEFAULT_NO_EDIT_WATCHDOG_S
         )
+    if recovery_attempt > 0:
+        default_s = min(default_s, _NO_EDIT_RECOVERY_WATCHDOG_S)
     return max(120, min(default_s, max(120, communicate_timeout_s - 60)))
@@ -2252,7 +2263,11 @@ def _run_codex_task(
             rollout_watchdog_retryable = True
             command_policy_rejection_loop = False
             no_edit_watchdog_s = (
-                _resolve_no_edit_watchdog_seconds(prompt, communicate_timeout_s)
+                _resolve_no_edit_watchdog_seconds(
+                    prompt,
+                    communicate_timeout_s,
+                    recovery_attempt=no_edit_recovery_attempt,
+                )
                 if no_edit_recovery_attempt <= _MAX_NO_EDIT_RECOVERY_ATTEMPTS
                 else None
             )

package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py CHANGED Viewed

@@ -1396,6 +1396,50 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
         self.assertEqual(watchdog_s, 180)
+    def test_narrow_contract_regression_with_required_e2e_uses_fast_no_edit_watchdog(self) -> None:
+        prompt = (
+            "Harden the opportunity graph contract around autonomous delivery-loop failure signals. "
+            "Add focused regression coverage in app/__tests__/opportunity-graph.contract.test.ts. "
+            "Required vision.md testing criteria: bun test | bun x tsc --noEmit | bun run lint | bun run web:e2e."
+        )
+        with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
+            watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
+        self.assertEqual(watchdog_s, 180)
+    def test_no_edit_recovery_attempt_uses_patch_first_watchdog(self) -> None:
+        prompt = "Investigate a broad reliability issue and make the smallest safe fix."
+        with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
+            first_attempt_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
+            recovery_attempt_s = _resolve_no_edit_watchdog_seconds(
+                prompt,
+                1200,
+                recovery_attempt=1,
+            )
+        self.assertEqual(first_attempt_s, 480)
+        self.assertEqual(recovery_attempt_s, 180)
+    def test_explicit_no_edit_watchdog_override_still_controls_recovery_attempts(self) -> None:
+        with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "300"}, clear=False):
+            watchdog_s = _resolve_no_edit_watchdog_seconds(
+                "Investigate a broad reliability issue.",
+                1200,
+                recovery_attempt=1,
+            )
+        self.assertEqual(watchdog_s, 300)
+    def test_review_fix_contract_level_tests_use_fast_no_edit_watchdog(self) -> None:
+        prompt = (
+            "Restore exact score assertions for contract-level tests where score is part "
+            "of the public output. Keep this as a test-only patch in app/__tests__."
+        )
+        with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
+            watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
+        self.assertEqual(watchdog_s, 180)
     def test_no_edit_recovery_guidance_warns_against_artifact_only_progress(self) -> None:
         guidance = _build_no_edit_recovery_guidance(
             "item.completed | still inspecting",

package/runtime/sandbox/apps/workerpals/src/execute_job.ts CHANGED Viewed

@@ -74,6 +74,7 @@ export interface TaskExecutePlanning {
   validationSteps: string[];
   requiredValidationSteps?: string[];
   repoHintDiagnostics?: string[];
+  repoHintStalePaths?: string[];
   queuePriority: TaskExecutePriority;
   queueWaitBudgetMs: number;
   executionBudgetMs: number;
@@ -325,6 +326,15 @@ export function shouldSkipCriticAfterExecutorTimeout(opts: {
   return /\b(openai_codex|codex(?: exec)?)\b[^\r\n]*\btimed out\b/i.test(opts.executorText);
 }
+export function shouldSkipCriticForDeterministicValidationRevision(opts: {
+  deterministicRequiresRevision: boolean;
+  validationOutsideTaskScope: boolean;
+  validationRuns: ValidationExecutionResult[];
+}): boolean {
+  if (!opts.deterministicRequiresRevision || opts.validationOutsideTaskScope) return false;
+  return opts.validationRuns.some(isDeterministicFastValidationFailure);
+}
 export function workerAttemptRolloutScore(params: {
   executorElapsedMs: number;
   qualityElapsedMs: number;
@@ -6883,6 +6893,7 @@ function sanitizeStalePathHints(
   repo: string,
   values: unknown,
   taskText: string,
+  opts: { dropMissingParentHints?: boolean } = {},
 ): { values: string[]; stale: string[]; diagnostics: string[] } {
   const stale: string[] = [];
   const diagnostics: string[] = [];
@@ -6897,7 +6908,12 @@ function sanitizeStalePathHints(
       continue;
     }
     if (!pathParentExists(repo, raw) && !taskTextAllowsCreatingMissingPaths(taskText)) {
-      diagnostics.push(`Path hint "${raw}" has a missing parent directory; verify the existing repo owner before editing.`);
+      const diagnostic = `Path hint "${raw}" has a missing parent directory; verify the existing repo owner before editing.`;
+      diagnostics.push(diagnostic);
+      if (opts.dropMissingParentHints) {
+        stale.push(raw);
+        continue;
+      }
     }
     out.push(raw);
   }
@@ -6964,7 +6980,9 @@ export function sanitizeTaskExecutePlanningPathHints(
     const normalizedDiscovery: Record<string, unknown> = { ...discovery };
     if (isStringArray(discovery.likelyDirs)) {
       const sanitized = repo
-        ? sanitizeStalePathHints(repo, discovery.likelyDirs, taskText)
+        ? sanitizeStalePathHints(repo, discovery.likelyDirs, taskText, {
+            dropMissingParentHints: true,
+          })
         : { values: toStringArray(discovery.likelyDirs), stale: [], diagnostics: [] };
       normalizedDiscovery.likelyDirs = sanitized.values;
       staleHints.push(...sanitized.stale);
@@ -6986,10 +7004,40 @@ export function sanitizeTaskExecutePlanningPathHints(
   if (repoDiagnostics.length > 0) {
     out.repoHintDiagnostics = Array.from(new Set(repoDiagnostics)).slice(0, 8);
   }
+  if (staleHints.length > 0) {
+    out.repoHintStalePaths = Array.from(new Set(staleHints)).slice(0, 16);
+  }
   return out;
 }
+export function sanitizePlannerWorkerInstructionPathHints(
+  value: unknown,
+  staleHints: unknown,
+): string | undefined {
+  const text = String(value ?? "").trim();
+  if (!text) return undefined;
+  const normalizedHints = toStringArray(staleHints)
+    .map((hint) => normalizeStagePath(hint))
+    .filter((hint): hint is string => Boolean(hint))
+    .map((hint) => hint.toLowerCase());
+  if (normalizedHints.length === 0) return text;
+  const uniqueHints = Array.from(new Set(normalizedHints));
+  const hasStaleHint = (line: string): boolean => {
+    const lower = line.replace(/\\/g, "/").toLowerCase();
+    return uniqueHints.some((hint) => lower.includes(hint));
+  };
+  const lines = text.split(/\r?\n/);
+  const kept = lines.filter((line) => !hasStaleHint(line)).map((line) => line.trim()).filter(Boolean);
+  if (kept.length === lines.length) return text;
+  return [
+    "Planner path guidance was sanitized because it referenced paths absent from this checkout; rely on the Task planning contract target path hints and existing repo owners instead.",
+    ...kept,
+  ].join("\n");
+}
 function validateTaskExecutePlanning(
   value: unknown,
   options?: {
@@ -7546,6 +7594,13 @@ export async function executeJob(
     planning: sanitizedPlanning,
     instruction,
   };
+  const sanitizedPlannerWorkerInstruction = sanitizePlannerWorkerInstructionPathHints(
+    params.plannerWorkerInstruction,
+    planning.repoHintStalePaths ?? [],
+  );
+  if (sanitizedPlannerWorkerInstruction !== undefined) {
+    normalizedParams.plannerWorkerInstruction = sanitizedPlannerWorkerInstruction;
+  }
   const executionBudgetMs = Number(planning.executionBudgetMs);
   const finalizationBudgetMs = Number(planning.finalizationBudgetMs);
   const mergeConflictContext = extractMergeConflictReviewContext(normalizedParams);
@@ -7902,8 +7957,23 @@ export async function executeJob(
       qualityIssues: qualityForCritic.issues,
       changedPaths: quality.changedPaths,
     });
+    const preCriticEffectiveQualityIssues = validationOutsideTaskScope
+      ? quality.issues.filter((issue) => !issue.startsWith("ValidationGate:"))
+      : quality.issues;
+    const preCriticDeterministicRequiresRevision =
+      preCriticEffectiveQualityIssues.length > 0 ||
+      (quality.blocker !== null && !validationOutsideTaskScope);
+    const skipCriticForDeterministicValidationRevision =
+      shouldSkipCriticForDeterministicValidationRevision({
+        deterministicRequiresRevision: preCriticDeterministicRequiresRevision,
+        validationOutsideTaskScope,
+        validationRuns: quality.validationRuns,
+      });
     const critic =
-      quality.skipped || !qualityGatePolicy.criticGateEnabled || skipCriticAfterExecutorTimeout
+      quality.skipped ||
+      !qualityGatePolicy.criticGateEnabled ||
+      skipCriticAfterExecutorTimeout ||
+      skipCriticForDeterministicValidationRevision
         ? null
         : executor === "openai_codex"
           ? await runCodexCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog)
@@ -7939,6 +8009,11 @@ export async function executeJob(
         "stdout",
         "[CriticGate] Skipping Codex critic after primary Codex executor timeout because deterministic quality and validation are clean.",
       );
+    } else if (skipCriticForDeterministicValidationRevision) {
+      onLog?.(
+        "stdout",
+        "[CriticGate] Skipping critic because deterministic fast validation already requires a quality revision.",
+      );
     }
     const rolloutScore = workerAttemptRolloutScore({
       executorElapsedMs,