npm - @pushpalsdev/cli - Versions diffs - 1.1.23 → 1.1.25 - Mend

@pushpalsdev/cli 1.1.23 → 1.1.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@pushpalsdev/cli",
-  "version": "1.1.23",
+  "version": "1.1.25",
   "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
   "license": "MIT",
   "repository": {

package/runtime/sandbox/.pushpals-remotebuddy-fallback.js CHANGED Viewed

@@ -8285,6 +8285,7 @@ function buildWorkerSpawnCommand(options) {
 }
 // apps/remotebuddy/src/remotebuddy_main.ts
+var AUTONOMY_TASK_DEDUPE_COOLDOWN_MS = 6 * 60 * 60 * 1000;
 var CONFIG = loadPushPalsConfig();
 function parseArgs() {
   const args = process.argv.slice(2);
@@ -8464,6 +8465,11 @@ function buildTaskExecuteDedupeKey(sessionId, params) {
   }
   return `task.execute:${normalizedOrigin}:${normalizedSessionId}:${uniqueTargets.join("|")}`.toLowerCase();
 }
+function resolveTaskExecuteDedupeCooldownMs(params, dedupeKey) {
+  if (!dedupeKey)
+    return 0;
+  return params.origin === "autonomy" ? AUTONOMY_TASK_DEDUPE_COOLDOWN_MS : 0;
+}
 function parseAutonomyRequestMetadata(value) {
   let root = asObject2(value);
   if (!root && typeof value === "string") {
@@ -9509,6 +9515,9 @@ Please reply with the missing details and I will enqueue a follow-up request.` :
       const dedupeKey = buildTaskExecuteDedupeKey(sessionId, params);
       if (dedupeKey)
         payload.dedupeKey = dedupeKey;
+      const dedupeCooldownMs = resolveTaskExecuteDedupeCooldownMs(params, dedupeKey);
+      if (dedupeCooldownMs > 0)
+        payload.dedupeCooldownMs = dedupeCooldownMs;
       if (targetWorkerId)
         payload.targetWorkerId = targetWorkerId;
       const res = await this.fetchImpl(`${this.server}/jobs/enqueue`, {
@@ -10603,6 +10612,7 @@ if (import.meta.main) {
   });
 }
 export {
+  resolveTaskExecuteDedupeCooldownMs,
   extractRequiredValidationStepsFromVisionMarkdown,
   buildTaskExecuteDedupeKey,
   RemoteBuddyOrchestrator

package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py CHANGED Viewed

@@ -109,11 +109,14 @@ _MAX_CREDIBLE_WRAPPER_LOOP_TOP_LEVELS = 4
 _MAX_NO_EDIT_RECOVERY_ATTEMPTS = 1
 _MAX_ROLLOUT_RECOVERY_ATTEMPTS = 1
 _DEFAULT_NO_EDIT_WATCHDOG_S = 480
-_SMALL_TASK_NO_EDIT_WATCHDOG_S = 360
+_SMALL_TASK_NO_EDIT_WATCHDOG_S = 240
+_NARROW_TEST_TASK_NO_EDIT_WATCHDOG_S = 180
 _WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
+_NO_EDIT_RECOVERY_WATCHDOG_S = 180
 _DEFAULT_NO_EDIT_RECHECK_S = 120
 _DEFAULT_ROLLOUT_WATCHDOG_S = 300
 _SMALL_TASK_ROLLOUT_WATCHDOG_S = 240
+_NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S = 150
 _WEB_REVIEW_ROLLOUT_WATCHDOG_S = 180
@@ -591,6 +594,26 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
         "browser smoke",
         "web delivery",
         "navigation trustworthy",
+        "test-only",
+        "test only",
+        "contract test",
+        "contract-level test",
+        "contract-level tests",
+        "contract around",
+        "contract coverage",
+        "ranking contract",
+        "regression coverage",
+        "focused regression",
+        "focused scenario",
+        "targeted test",
+        "one-file",
+        "one file",
+        "single-file",
+        "single file",
+        "max_files_to_edit: 1",
+        "max_files_to_edit=1",
+        "maxfilestoedit: 1",
+        "maxfilestoedit=1",
     )
     heavy_markers = (
         "merge-conflict",
@@ -607,6 +630,36 @@ def _looks_like_small_task_prompt(prompt: str) -> bool:
     )
+def _looks_like_narrow_test_task_prompt(prompt: str) -> bool:
+    text = str(prompt or "").lower()
+    if not text:
+        return False
+    narrow_markers = (
+        "contract test",
+        "contract-level test",
+        "contract-level tests",
+        "contract around",
+        "contract coverage",
+        "ranking contract",
+        "regression coverage",
+        "focused regression",
+        "test-only",
+        "test only",
+        "targeted test",
+        "focused scenario",
+    )
+    if not any(marker in text for marker in narrow_markers):
+        return False
+    broad_markers = (
+        "full render harness",
+        "full-surface",
+        "full surface",
+        "migration",
+        "broad refactor",
+    )
+    return not any(marker in text for marker in broad_markers)
 def _resolve_task_reasoning_effort(
     configured_effort: str,
     prompt: str,
@@ -632,6 +685,7 @@ def _resolve_progress_log_interval_seconds(config: OpenAICodexRuntimeConfig) ->
 def _resolve_no_edit_watchdog_seconds(
     prompt: str,
     communicate_timeout_s: Optional[int],
+    recovery_attempt: int = 0,
 ) -> Optional[int]:
     if not communicate_timeout_s:
         return None
@@ -652,7 +706,9 @@ def _resolve_no_edit_watchdog_seconds(
         return None
     prompt_text = str(prompt or "").lower()
-    if "repo-native web review" in prompt_text or "web review path" in prompt_text:
+    if _looks_like_narrow_test_task_prompt(prompt):
+        default_s = _NARROW_TEST_TASK_NO_EDIT_WATCHDOG_S
+    elif "repo-native web review" in prompt_text or "web review path" in prompt_text:
         default_s = _WEB_REVIEW_NO_EDIT_WATCHDOG_S
     else:
         default_s = (
@@ -660,6 +716,8 @@ def _resolve_no_edit_watchdog_seconds(
             if _looks_like_small_task_prompt(prompt)
             else _DEFAULT_NO_EDIT_WATCHDOG_S
         )
+    if recovery_attempt > 0:
+        default_s = min(default_s, _NO_EDIT_RECOVERY_WATCHDOG_S)
     return max(120, min(default_s, max(120, communicate_timeout_s - 60)))
@@ -703,7 +761,9 @@ def _resolve_rollout_watchdog_seconds(
         else:
             return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
-    if _looks_like_web_review_prompt(prompt):
+    if _looks_like_narrow_test_task_prompt(prompt):
+        default_s = _NARROW_TEST_TASK_ROLLOUT_WATCHDOG_S
+    elif _looks_like_web_review_prompt(prompt):
         default_s = _WEB_REVIEW_ROLLOUT_WATCHDOG_S
     elif _looks_like_small_task_prompt(prompt):
         default_s = _SMALL_TASK_ROLLOUT_WATCHDOG_S
@@ -766,6 +826,8 @@ def _describe_publishable_paths(paths: List[str]) -> str:
 def _build_no_edit_recovery_guidance(trace_excerpt: str, artifact_only_paths: str = "") -> str:
     lines = [
         "No-edit watchdog recovery: the previous Codex attempt spent too much of the execution budget without producing publishable file changes.",
+        "This recovery attempt has a patch-first contract: make one publishable edit before any further broad discovery. If you need one narrow read of the hinted file to place the edit, do that once, then patch immediately.",
+        "Do not repeat the same read/search sequence from the previous attempt. Re-reading the target without editing is a failed recovery.",
         "Start from the already inspected context. Do not re-read broad repo topology, route wrappers, or missing test infrastructure unless that is the blocker.",
         "Runtime/dependency artifacts such as node_modules, outputs, .worktrees, .codex, dist, build, and coverage do not count as progress.",
         "Within the first response/action, edit the smallest behavior-owning file that satisfies the task. If the hinted file is a thin wrapper, patch the owner you already identified.",
@@ -2201,7 +2263,11 @@ def _run_codex_task(
             rollout_watchdog_retryable = True
             command_policy_rejection_loop = False
             no_edit_watchdog_s = (
-                _resolve_no_edit_watchdog_seconds(prompt, communicate_timeout_s)
+                _resolve_no_edit_watchdog_seconds(
+                    prompt,
+                    communicate_timeout_s,
+                    recovery_attempt=no_edit_recovery_attempt,
+                )
                 if no_edit_recovery_attempt <= _MAX_NO_EDIT_RECOVERY_ATTEMPTS
                 else None
             )

package/runtime/sandbox/apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py CHANGED Viewed

@@ -1386,6 +1386,60 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
         self.assertEqual(watchdog_s, 240)
+    def test_narrow_contract_tests_use_fast_no_edit_watchdog(self) -> None:
+        prompt = (
+            "Update app/__tests__/opportunity-graph.contract.test.ts to tighten the "
+            "ranking contract test. Keep this test-only and preserve existing behavior."
+        )
+        with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
+            watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
+        self.assertEqual(watchdog_s, 180)
+    def test_narrow_contract_regression_with_required_e2e_uses_fast_no_edit_watchdog(self) -> None:
+        prompt = (
+            "Harden the opportunity graph contract around autonomous delivery-loop failure signals. "
+            "Add focused regression coverage in app/__tests__/opportunity-graph.contract.test.ts. "
+            "Required vision.md testing criteria: bun test | bun x tsc --noEmit | bun run lint | bun run web:e2e."
+        )
+        with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
+            watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
+        self.assertEqual(watchdog_s, 180)
+    def test_no_edit_recovery_attempt_uses_patch_first_watchdog(self) -> None:
+        prompt = "Investigate a broad reliability issue and make the smallest safe fix."
+        with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
+            first_attempt_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
+            recovery_attempt_s = _resolve_no_edit_watchdog_seconds(
+                prompt,
+                1200,
+                recovery_attempt=1,
+            )
+        self.assertEqual(first_attempt_s, 480)
+        self.assertEqual(recovery_attempt_s, 180)
+    def test_explicit_no_edit_watchdog_override_still_controls_recovery_attempts(self) -> None:
+        with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "300"}, clear=False):
+            watchdog_s = _resolve_no_edit_watchdog_seconds(
+                "Investigate a broad reliability issue.",
+                1200,
+                recovery_attempt=1,
+            )
+        self.assertEqual(watchdog_s, 300)
+    def test_review_fix_contract_level_tests_use_fast_no_edit_watchdog(self) -> None:
+        prompt = (
+            "Restore exact score assertions for contract-level tests where score is part "
+            "of the public output. Keep this as a test-only patch in app/__tests__."
+        )
+        with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": ""}, clear=False):
+            watchdog_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
+        self.assertEqual(watchdog_s, 180)
     def test_no_edit_recovery_guidance_warns_against_artifact_only_progress(self) -> None:
         guidance = _build_no_edit_recovery_guidance(
             "item.completed | still inspecting",
@@ -1393,6 +1447,8 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
         )
         self.assertIn("node_modules", guidance)
+        self.assertIn("patch-first contract", guidance)
+        self.assertIn("Re-reading the target without editing is a failed recovery", guidance)
         self.assertIn("do not invent PushPals/autonomy-specific files", guidance)
         self.assertIn("Previous Codex event trace excerpt", guidance)
@@ -1411,6 +1467,15 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
         self.assertEqual(no_edit_s, 240)
         self.assertEqual(rollout_s, 180)
+    def test_narrow_contract_rollout_watchdog_is_earlier_than_no_edit_watchdog(self) -> None:
+        prompt = "Tighten the focused contract test for one ranking behavior."
+        with mock.patch.dict(os.environ, {"WORKERPALS_OPENAI_CODEX_ROLLOUT_WATCHDOG_S": ""}, clear=False):
+            no_edit_s = _resolve_no_edit_watchdog_seconds(prompt, 1200)
+            rollout_s = _resolve_rollout_watchdog_seconds(prompt, 1200, no_edit_s)
+        self.assertEqual(no_edit_s, 180)
+        self.assertEqual(rollout_s, 120)
     def test_offtrack_rollout_detects_missing_path_and_harness_drift(self) -> None:
         trace = {
             "summaries": [

package/runtime/sandbox/apps/workerpals/src/execute_job.ts CHANGED Viewed

@@ -74,6 +74,7 @@ export interface TaskExecutePlanning {
   validationSteps: string[];
   requiredValidationSteps?: string[];
   repoHintDiagnostics?: string[];
+  repoHintStalePaths?: string[];
   queuePriority: TaskExecutePriority;
   queueWaitBudgetMs: number;
   executionBudgetMs: number;
@@ -325,6 +326,15 @@ export function shouldSkipCriticAfterExecutorTimeout(opts: {
   return /\b(openai_codex|codex(?: exec)?)\b[^\r\n]*\btimed out\b/i.test(opts.executorText);
 }
+export function shouldSkipCriticForDeterministicValidationRevision(opts: {
+  deterministicRequiresRevision: boolean;
+  validationOutsideTaskScope: boolean;
+  validationRuns: ValidationExecutionResult[];
+}): boolean {
+  if (!opts.deterministicRequiresRevision || opts.validationOutsideTaskScope) return false;
+  return opts.validationRuns.some(isDeterministicFastValidationFailure);
+}
 export function workerAttemptRolloutScore(params: {
   executorElapsedMs: number;
   qualityElapsedMs: number;
@@ -6883,6 +6893,7 @@ function sanitizeStalePathHints(
   repo: string,
   values: unknown,
   taskText: string,
+  opts: { dropMissingParentHints?: boolean } = {},
 ): { values: string[]; stale: string[]; diagnostics: string[] } {
   const stale: string[] = [];
   const diagnostics: string[] = [];
@@ -6897,7 +6908,12 @@ function sanitizeStalePathHints(
       continue;
     }
     if (!pathParentExists(repo, raw) && !taskTextAllowsCreatingMissingPaths(taskText)) {
-      diagnostics.push(`Path hint "${raw}" has a missing parent directory; verify the existing repo owner before editing.`);
+      const diagnostic = `Path hint "${raw}" has a missing parent directory; verify the existing repo owner before editing.`;
+      diagnostics.push(diagnostic);
+      if (opts.dropMissingParentHints) {
+        stale.push(raw);
+        continue;
+      }
     }
     out.push(raw);
   }
@@ -6964,7 +6980,9 @@ export function sanitizeTaskExecutePlanningPathHints(
     const normalizedDiscovery: Record<string, unknown> = { ...discovery };
     if (isStringArray(discovery.likelyDirs)) {
       const sanitized = repo
-        ? sanitizeStalePathHints(repo, discovery.likelyDirs, taskText)
+        ? sanitizeStalePathHints(repo, discovery.likelyDirs, taskText, {
+            dropMissingParentHints: true,
+          })
         : { values: toStringArray(discovery.likelyDirs), stale: [], diagnostics: [] };
       normalizedDiscovery.likelyDirs = sanitized.values;
       staleHints.push(...sanitized.stale);
@@ -6986,10 +7004,40 @@ export function sanitizeTaskExecutePlanningPathHints(
   if (repoDiagnostics.length > 0) {
     out.repoHintDiagnostics = Array.from(new Set(repoDiagnostics)).slice(0, 8);
   }
+  if (staleHints.length > 0) {
+    out.repoHintStalePaths = Array.from(new Set(staleHints)).slice(0, 16);
+  }
   return out;
 }
+export function sanitizePlannerWorkerInstructionPathHints(
+  value: unknown,
+  staleHints: unknown,
+): string | undefined {
+  const text = String(value ?? "").trim();
+  if (!text) return undefined;
+  const normalizedHints = toStringArray(staleHints)
+    .map((hint) => normalizeStagePath(hint))
+    .filter((hint): hint is string => Boolean(hint))
+    .map((hint) => hint.toLowerCase());
+  if (normalizedHints.length === 0) return text;
+  const uniqueHints = Array.from(new Set(normalizedHints));
+  const hasStaleHint = (line: string): boolean => {
+    const lower = line.replace(/\\/g, "/").toLowerCase();
+    return uniqueHints.some((hint) => lower.includes(hint));
+  };
+  const lines = text.split(/\r?\n/);
+  const kept = lines.filter((line) => !hasStaleHint(line)).map((line) => line.trim()).filter(Boolean);
+  if (kept.length === lines.length) return text;
+  return [
+    "Planner path guidance was sanitized because it referenced paths absent from this checkout; rely on the Task planning contract target path hints and existing repo owners instead.",
+    ...kept,
+  ].join("\n");
+}
 function validateTaskExecutePlanning(
   value: unknown,
   options?: {
@@ -7546,6 +7594,13 @@ export async function executeJob(
     planning: sanitizedPlanning,
     instruction,
   };
+  const sanitizedPlannerWorkerInstruction = sanitizePlannerWorkerInstructionPathHints(
+    params.plannerWorkerInstruction,
+    planning.repoHintStalePaths ?? [],
+  );
+  if (sanitizedPlannerWorkerInstruction !== undefined) {
+    normalizedParams.plannerWorkerInstruction = sanitizedPlannerWorkerInstruction;
+  }
   const executionBudgetMs = Number(planning.executionBudgetMs);
   const finalizationBudgetMs = Number(planning.finalizationBudgetMs);
   const mergeConflictContext = extractMergeConflictReviewContext(normalizedParams);
@@ -7902,8 +7957,23 @@ export async function executeJob(
       qualityIssues: qualityForCritic.issues,
       changedPaths: quality.changedPaths,
     });
+    const preCriticEffectiveQualityIssues = validationOutsideTaskScope
+      ? quality.issues.filter((issue) => !issue.startsWith("ValidationGate:"))
+      : quality.issues;
+    const preCriticDeterministicRequiresRevision =
+      preCriticEffectiveQualityIssues.length > 0 ||
+      (quality.blocker !== null && !validationOutsideTaskScope);
+    const skipCriticForDeterministicValidationRevision =
+      shouldSkipCriticForDeterministicValidationRevision({
+        deterministicRequiresRevision: preCriticDeterministicRequiresRevision,
+        validationOutsideTaskScope,
+        validationRuns: quality.validationRuns,
+      });
     const critic =
-      quality.skipped || !qualityGatePolicy.criticGateEnabled || skipCriticAfterExecutorTimeout
+      quality.skipped ||
+      !qualityGatePolicy.criticGateEnabled ||
+      skipCriticAfterExecutorTimeout ||
+      skipCriticForDeterministicValidationRevision
         ? null
         : executor === "openai_codex"
           ? await runCodexCriticReview(repo, attemptParams, qualityForCritic, runtimeConfig, onLog)
@@ -7939,6 +8009,11 @@ export async function executeJob(
         "stdout",
         "[CriticGate] Skipping Codex critic after primary Codex executor timeout because deterministic quality and validation are clean.",
       );
+    } else if (skipCriticForDeterministicValidationRevision) {
+      onLog?.(
+        "stdout",
+        "[CriticGate] Skipping critic because deterministic fast validation already requires a quality revision.",
+      );
     }
     const rolloutScore = workerAttemptRolloutScore({
       executorElapsedMs,

package/runtime/sandbox/apps/workerpals/src/workerpals_main.ts CHANGED Viewed

@@ -1785,9 +1785,10 @@ async function workerLoop(
                   terminalStage: currentJobPhase ?? (result.ok ? "completed" : "worker"),
                   executorBackend: resolveExecutor(CONFIG),
                   summary: result.summary,
-                  watchdogFired: /timed out|timeout|signal 15|terminated|exit 143|exit 137/i.test(
-                    `${result.summary}\n${result.stderr ?? ""}`,
-                  ),
+                  watchdogFired:
+                    /watchdog|rollout coach|timed out|timeout|signal 15|terminated|exit 143|exit 137/i.test(
+                      `${result.summary}\n${result.stderr ?? ""}\n${result.stdout ?? ""}`,
+                    ),
                   metadata: {
                     workerId: opts.workerId,
                     docker: Boolean(dockerExecutor),