@pushpalsdev/cli 1.1.10 → 1.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3036,6 +3036,17 @@ async function downloadBinaryAssetWithWindowsCurlFallback(url, outPath, cause) {
3036
3036
  renameSync(tmpPath, outPath);
3037
3037
  return true;
3038
3038
  }
3039
+ async function runWithConcurrency(items, concurrency, worker) {
3040
+ const workerCount = Math.max(1, Math.min(items.length, Math.floor(concurrency)));
3041
+ let nextIndex = 0;
3042
+ await Promise.all(Array.from({ length: workerCount }, async () => {
3043
+ while (nextIndex < items.length) {
3044
+ const currentIndex = nextIndex;
3045
+ nextIndex += 1;
3046
+ await worker(items[currentIndex], currentIndex);
3047
+ }
3048
+ }));
3049
+ }
3039
3050
  async function ensureRuntimeBinaries(runtimeRoot, runtimeTag) {
3040
3051
  const platformKey = resolveRuntimePlatformKey();
3041
3052
  console.log(`[pushpals] Preparing embedded runtime binaries for ${runtimeTag} (${platformKey})...`);
@@ -3057,14 +3068,15 @@ async function ensureRuntimeBinaries(runtimeRoot, runtimeTag) {
3057
3068
  runtimeBinaries.sourceControlManager
3058
3069
  ];
3059
3070
  const shouldRefreshAll = installedTag !== runtimeTag;
3060
- let downloadedCount = 0;
3061
- for (const binaryPath of requiredAssets) {
3062
- if (!shouldRefreshAll && existsSync5(binaryPath))
3063
- continue;
3071
+ const assetsToDownload = requiredAssets.filter((binaryPath) => shouldRefreshAll || !existsSync5(binaryPath));
3072
+ if (assetsToDownload.length > 1) {
3073
+ console.log(`[pushpals] Downloading ${assetsToDownload.length} runtime binary asset(s) with bounded parallelism...`);
3074
+ }
3075
+ await runWithConcurrency(assetsToDownload, 3, async (binaryPath) => {
3064
3076
  const assetName = binaryPath.split(/[\\/]/).pop() || "";
3065
3077
  await downloadBinaryAsset(runtimeTag, assetName, binaryPath);
3066
- downloadedCount++;
3067
- }
3078
+ });
3079
+ const downloadedCount = assetsToDownload.length;
3068
3080
  writeFileSync(tagMarkerPath, `${runtimeTag}
3069
3081
  `, "utf8");
3070
3082
  cleanupLegacyRuntimeBinaryLayouts(runtimeRoot, platformKey, binDir);
@@ -4695,6 +4707,8 @@ ${tail}` : ""}`);
4695
4707
  const deadline = Date.now() + DEFAULT_RUNTIME_BOOT_TIMEOUT_MS;
4696
4708
  const readinessPhaseStartedAt = Date.now();
4697
4709
  const optionalServiceExitWarned = new Set;
4710
+ let lastReadinessWaitLogAt = 0;
4711
+ let lastReadinessWaitDetail = "";
4698
4712
  while (Date.now() < deadline) {
4699
4713
  reportRemoteBuddyAutonomousEngineState();
4700
4714
  if (maybeActivateRemoteBuddyWindowsFallback("silent_startup")) {
@@ -4738,6 +4752,17 @@ ${tail}` : ""}`);
4738
4752
  }
4739
4753
  const health = localBuddyEnabled ? await probeLocalBuddy(opts.localAgentUrl) : null;
4740
4754
  const remoteBuddyHealth2 = await probeRemoteBuddySessionConsumer(opts.serverUrl, opts.sessionId);
4755
+ if (localBuddyEnabled && !health?.ok || !remoteBuddyHealth2.ok) {
4756
+ const localBuddyDetail = localBuddyEnabled ? health?.ok ? "LocalBuddy ready" : "LocalBuddy not ready" : "LocalBuddy skipped";
4757
+ const readinessDetail = `${localBuddyDetail}; ${remoteBuddyHealth2.detail}`;
4758
+ const now = Date.now();
4759
+ if (readinessDetail !== lastReadinessWaitDetail || now - lastReadinessWaitLogAt >= 5000) {
4760
+ console.log(`[pushpals] Waiting for embedded runtime readiness: ${readinessDetail}`);
4761
+ appendRuntimeServicesLogLine(runtimeServicesLogPath, `[pushpals] waiting for embedded runtime readiness: ${readinessDetail}`);
4762
+ lastReadinessWaitDetail = readinessDetail;
4763
+ lastReadinessWaitLogAt = now;
4764
+ }
4765
+ }
4741
4766
  if ((!localBuddyEnabled || health?.ok) && remoteBuddyHealth2.ok) {
4742
4767
  reportRemoteBuddyAutonomousEngineState();
4743
4768
  const stabilityDeadline = Date.now() + DEFAULT_SERVICE_STABILITY_GRACE_MS;
@@ -5208,8 +5233,10 @@ function formatSessionEventLine(event) {
5208
5233
  if (type === "job_log") {
5209
5234
  const jobId = String(payload.jobId ?? "").slice(0, 8);
5210
5235
  const stream = String(payload.stream ?? "").toLowerCase() === "stderr" ? " stderr" : "";
5211
- const line = compactCliSessionJobLogLine(String(payload.line ?? "").trim());
5212
- return line ? `[job ${jobId}${stream}] ${line}` : null;
5236
+ const phase = compactCliSessionJobLogLine(String(payload.phase ?? "").trim());
5237
+ const phaseLabel = phase ? ` phase:${phase}` : "";
5238
+ const line = formatCliSessionJobLogLine(String(payload.line ?? "").trim());
5239
+ return line ? `[job ${jobId}${stream}${phaseLabel}] ${line}` : null;
5213
5240
  }
5214
5241
  if (type === "job_failed") {
5215
5242
  const jobId = String(payload.jobId ?? "").slice(0, 8);
@@ -5264,6 +5291,47 @@ function compactCliSessionJobLogLine(line) {
5264
5291
  return compacted;
5265
5292
  return `${compacted.slice(0, CLI_SESSION_JOB_LOG_MAX_CHARS - 3)}...`;
5266
5293
  }
5294
+ function formatCliSessionJobLogLine(line) {
5295
+ const compacted = compactCliSessionJobLogLine(line);
5296
+ if (!compacted)
5297
+ return null;
5298
+ if (shouldSuppressCliSessionJobLogLine(compacted))
5299
+ return null;
5300
+ const codexItem = compacted.match(/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.(?:completed|updated)\s+\|\s+(.+)$/i);
5301
+ if (codexItem?.[1]) {
5302
+ return `[codex] ${compactCliSessionJobLogLine(codexItem[1])}`;
5303
+ }
5304
+ return compacted;
5305
+ }
5306
+ function shouldSuppressCliSessionJobLogLine(line) {
5307
+ const text = String(line ?? "").trim();
5308
+ if (!text)
5309
+ return true;
5310
+ if (/^(___RESULT___|__PUSHPALS_OH_RESULT__)\b/.test(text))
5311
+ return true;
5312
+ if (/^\[DockerExecutor\]\s+Linked worktree dependency artifact/i.test(text))
5313
+ return true;
5314
+ if (/^\[Openai_codexExecutor\]\s+Spawning openai_codex executor/i.test(text))
5315
+ return true;
5316
+ if (/^\[OpenAICodexExecutor\]\s+(?:Planner guidance|Codex auth mode|ChatGPT auth mode|Starting codex exec|codex exec finished|Codex JSON stream captured|Codex stdout captured|No reasoning-like|Reasoning-like event|Usage observed|Temporarily masked repo-local)/i.test(text)) {
5317
+ return true;
5318
+ }
5319
+ if (/^\[OpenAICodexExecutor\]\s+codex exec still running\b/i.test(text))
5320
+ return true;
5321
+ if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+(?:thread|turn)\.started\b/i.test(text)) {
5322
+ return true;
5323
+ }
5324
+ if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.started\b/i.test(text))
5325
+ return true;
5326
+ if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.completed\s*$/i.test(text))
5327
+ return true;
5328
+ if (/^\[OpenAICodexExecutor\]\s+\[codex\]\s+item\.updated\s*$/i.test(text))
5329
+ return true;
5330
+ if (/^\[OpenAICodexExecutor\]\s+\[stderr\].*codex_core::tools::router: error=exec_command failed/i.test(text)) {
5331
+ return true;
5332
+ }
5333
+ return false;
5334
+ }
5267
5335
  function buildSessionEventReplayFingerprint(event) {
5268
5336
  const type = String(event.type ?? "").trim().toLowerCase();
5269
5337
  if (type !== "status")
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pushpalsdev/cli",
3
- "version": "1.1.10",
3
+ "version": "1.1.11",
4
4
  "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -1432,10 +1432,15 @@ def _merge_usage_records(first: Any, second: Any) -> Dict[str, Any]:
1432
1432
  return merged
1433
1433
 
1434
1434
 
1435
+ def _is_publishable_changed_path(path: str) -> bool:
1436
+ normalized = str(path or "").replace("\\", "/").lower()
1437
+ return not re.search(r"(^|/)(outputs|node_modules|\.worktrees|\.codex|dist|build|coverage)(/|$)", normalized)
1438
+
1439
+
1435
1440
  def _codex_changed_paths(repo: str, baseline_snapshot: List[str]) -> Tuple[List[str], List[str], List[str]]:
1436
1441
  changed_paths = summarize_git_changes(repo)
1437
1442
  delta = [p for p in changed_paths if p not in baseline_snapshot]
1438
- effective = delta if delta else changed_paths
1443
+ effective = [p for p in (delta if delta else changed_paths) if _is_publishable_changed_path(p)]
1439
1444
  return changed_paths, delta, effective
1440
1445
 
1441
1446
 
@@ -32,6 +32,7 @@ from openai_codex_executor import (
32
32
  _resolve_reasoning_effort,
33
33
  _build_instruction,
34
34
  _collect_disallowed_shell_wrapper_rejections,
35
+ _codex_changed_paths,
35
36
  _detect_codex_workaround_signal,
36
37
  _extract_usage_counts,
37
38
  _load_prompt_template,
@@ -298,6 +299,8 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
298
299
  task = parse_task_execute_payload(["executor", encoded], logger=Logger("[test]"))
299
300
  guidance = "\n".join(task.supplemental_guidance)
300
301
 
302
+ self.assertIn("Worker speed/convergence contract", guidance)
303
+ self.assertIn("roughly 20 minutes", guidance)
301
304
  self.assertIn("Task planning contract from PushPals", guidance)
302
305
  self.assertIn("Worker phase contract", guidance)
303
306
  self.assertIn("Write globs are relevance hints, not hard limits", guidance)
@@ -305,6 +308,42 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
305
308
  self.assertIn("Home shell startup is assertable", guidance)
306
309
  self.assertIn("bun run web:e2e", guidance)
307
310
 
311
+ def test_parse_payload_prefers_helper_tests_for_visual_derivation_tasks(self) -> None:
312
+ with tempfile.TemporaryDirectory(prefix="pushpals-visual-guidance-") as temp_dir:
313
+ repo = Path(temp_dir) / "repo"
314
+ repo.mkdir(parents=True, exist_ok=True)
315
+ payload = {
316
+ "kind": "task.execute",
317
+ "repo": str(repo),
318
+ "params": {
319
+ "instruction": (
320
+ "Improve battlefield readability by making planet ownership rings, "
321
+ "projectile trails, and danger cues clearer."
322
+ ),
323
+ "schemaVersion": 2,
324
+ "planning": {
325
+ "intent": "code_change",
326
+ "riskLevel": "medium",
327
+ "queuePriority": "normal",
328
+ "queueWaitBudgetMs": 90_000,
329
+ "executionBudgetMs": 1_800_000,
330
+ "finalizationBudgetMs": 120_000,
331
+ "scope": {"readAnywhere": True, "writeAllowed": True},
332
+ "targetPaths": ["app/game.tsx"],
333
+ "acceptanceCriteria": ["Projectile and ownership readability improve"],
334
+ "validationSteps": ["bun test app/__tests__/battlefieldReadability.test.ts"],
335
+ },
336
+ },
337
+ }
338
+ encoded = base64.b64encode(json.dumps(payload).encode("utf-8")).decode("ascii")
339
+
340
+ task = parse_task_execute_payload(["executor", encoded], logger=Logger("[test]"))
341
+ guidance = "\n".join(task.supplemental_guidance)
342
+
343
+ self.assertIn("Visual/rendering task rule", guidance)
344
+ self.assertIn("prefer pure helper/state/style-prop tests", guidance)
345
+ self.assertIn("full React Native/component render regression", guidance)
346
+
308
347
  def test_detects_codex_workaround_signals(self) -> None:
309
348
  signal = _detect_codex_workaround_signal(
310
349
  "Adapting test to avoid external Codex calls because Codex CLI isn't available in this environment.",
@@ -571,6 +610,45 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
571
610
  self.assertIn("src/", str(result.get("stdout") or ""))
572
611
  self.assertNotIn("Recovered after Codex attempts", str(result.get("stdout") or ""))
573
612
 
613
+ def test_codex_changed_paths_filters_dependency_artifacts_from_publishable_delta(self) -> None:
614
+ with tempfile.TemporaryDirectory(prefix="pushpals-codex-artifact-delta-") as temp_dir:
615
+ repo = Path(temp_dir) / "repo"
616
+ repo.mkdir(parents=True, exist_ok=True)
617
+ (repo / "README.md").write_text("# artifact delta test\n", encoding="utf-8")
618
+ subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
619
+ subprocess.run(
620
+ ["git", "config", "user.name", "PushPals Test"],
621
+ cwd=repo,
622
+ check=True,
623
+ capture_output=True,
624
+ text=True,
625
+ )
626
+ subprocess.run(
627
+ ["git", "config", "user.email", "pushpals-tests@example.com"],
628
+ cwd=repo,
629
+ check=True,
630
+ capture_output=True,
631
+ text=True,
632
+ )
633
+ subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
634
+ subprocess.run(
635
+ ["git", "commit", "-m", "chore: seed artifact test"],
636
+ cwd=repo,
637
+ check=True,
638
+ capture_output=True,
639
+ text=True,
640
+ )
641
+
642
+ (repo / "node_modules").mkdir()
643
+ (repo / "node_modules" / "linked.txt").write_text("artifact\n", encoding="utf-8")
644
+ (repo / "outputs").mkdir()
645
+ (repo / "outputs" / "runtime.log").write_text("artifact\n", encoding="utf-8")
646
+ changed_paths, delta, effective = _codex_changed_paths(str(repo), [])
647
+
648
+ self.assertGreaterEqual(len(changed_paths), 2)
649
+ self.assertGreaterEqual(len(delta), 2)
650
+ self.assertEqual(effective, [])
651
+
574
652
  def test_run_codex_task_escalates_wrapper_recovery_and_recovers(self) -> None:
575
653
  with tempfile.TemporaryDirectory(prefix="pushpals-codex-wrapper-recovery-") as temp_dir:
576
654
  repo = Path(temp_dir) / "repo"
@@ -738,6 +738,72 @@ def _append_list_guidance(lines: List[str], label: str, values: List[str]) -> No
738
738
  lines.append(f" - {value}")
739
739
 
740
740
 
741
+ def _joined_task_text(params: Dict[str, Any]) -> str:
742
+ pieces: List[str] = []
743
+
744
+ def collect(value: Any) -> None:
745
+ if isinstance(value, str):
746
+ pieces.append(value)
747
+ elif isinstance(value, list):
748
+ for item in value:
749
+ collect(item)
750
+ elif isinstance(value, dict):
751
+ for item in value.values():
752
+ collect(item)
753
+
754
+ collect(params.get("instruction"))
755
+ collect(params.get("plannerWorkerInstruction"))
756
+ collect(params.get("qualityRevisionHint"))
757
+ planning = params.get("planning")
758
+ if isinstance(planning, dict):
759
+ collect(planning.get("targetPaths"))
760
+ collect(planning.get("acceptanceCriteria"))
761
+ collect(planning.get("validationSteps"))
762
+ collect(planning.get("requiredValidationSteps"))
763
+ collect(planning.get("discovery"))
764
+ return "\n".join(pieces).lower()
765
+
766
+
767
+ def _looks_like_visual_derivation_task(params: Dict[str, Any]) -> bool:
768
+ text = _joined_task_text(params)
769
+ visual_markers = (
770
+ "visual",
771
+ "readability",
772
+ "battlefield",
773
+ "render",
774
+ "rendering",
775
+ "projectile",
776
+ "planet",
777
+ "ship",
778
+ "ring",
779
+ "danger",
780
+ "threat",
781
+ "ownership",
782
+ "dense action",
783
+ "ui surface",
784
+ "style",
785
+ "styles",
786
+ )
787
+ return any(marker in text for marker in visual_markers)
788
+
789
+
790
+ def _build_efficiency_guidance(params: Dict[str, Any]) -> str:
791
+ lines: List[str] = [
792
+ "Worker speed/convergence contract from PushPals:",
793
+ "- Target useful completion in roughly 20 minutes for small or medium repo tasks; optimize for the smallest coherent patch over exhaustive exploration.",
794
+ "- Phase soft budgets: discovery <= 5m, editing <= 10m, focused validation <= 5m, final diff review <= 2m. If a phase runs long, narrow scope rather than expanding the harness.",
795
+ "- Test-harness soft budget: if setting up a focused test requires multiple new shared mocks, broad React Native shims, or repeated import fixes, stop building that harness and switch to smaller pure helper/state coverage.",
796
+ ]
797
+ if _looks_like_visual_derivation_task(params):
798
+ lines.extend(
799
+ [
800
+ "- Visual/rendering task rule: prefer pure helper/state/style-prop tests for derived visual cues. Use a full React Native/component render regression only if the repo already has a stable harness for that exact surface.",
801
+ "- Full-surface React Native tests are a last resort for visual derivation work; do not spend the job constructing broad mocks just to assert pixels or nested component trees.",
802
+ ]
803
+ )
804
+ return "\n".join(lines)
805
+
806
+
741
807
  def _build_planning_guidance(params: Dict[str, Any]) -> str:
742
808
  planning = params.get("planning")
743
809
  if not isinstance(planning, dict):
@@ -768,6 +834,9 @@ def _build_planning_guidance(params: Dict[str, Any]) -> str:
768
834
  " - full validation: let PushPals ValidationGate own long required/browser checks unless one local confirmation is explicitly useful."
769
835
  )
770
836
  lines.append(" - final diff review: remove unrelated churn before returning.")
837
+ lines.append(
838
+ "- Phase soft budget: aim for discovery <= 5m, editing <= 10m, focused validation <= 5m, final diff review <= 2m; if test harness setup starts consuming the budget, reduce to simpler helper/state coverage."
839
+ )
771
840
 
772
841
  scope = planning.get("scope")
773
842
  if isinstance(scope, dict):
@@ -869,6 +938,7 @@ def parse_task_execute_payload(
869
938
  quality_revision_hint = str(params.get("qualityRevisionHint") or "").strip()
870
939
 
871
940
  supplemental_guidance: List[str] = []
941
+ supplemental_guidance.append(_build_efficiency_guidance(params))
872
942
  planning_guidance = _build_planning_guidance(params)
873
943
  if planning_guidance:
874
944
  supplemental_guidance.append(planning_guidance)
@@ -43,10 +43,10 @@ const WORKERPAL_SANDBOX_COMPONENT_LABEL = "pushpals.component=workerpals-sandbox
43
43
  const DOCKER_IMAGE_INSPECT_TIMEOUT_MS = 15_000;
44
44
  const DOCKER_IMAGE_BUILD_TIMEOUT_MS = 10 * 60_000;
45
45
  const DOCKER_IMAGE_PULL_TIMEOUT_MS = 10 * 60_000;
46
- const BROWSER_VALIDATION_JOB_REPAIR_ATTEMPTS = 8;
47
- const BROWSER_VALIDATION_JOB_OVERHEAD_MS = 15 * 60_000;
48
- const BROWSER_VALIDATION_JOB_MIN_TIMEOUT_MS = 4 * 60 * 60_000;
49
- const BROWSER_VALIDATION_JOB_MAX_TIMEOUT_MS = 8 * 60 * 60_000;
46
+ const BROWSER_VALIDATION_JOB_REPAIR_ATTEMPTS = 3;
47
+ const BROWSER_VALIDATION_JOB_OVERHEAD_MS = 5 * 60_000;
48
+ const BROWSER_VALIDATION_JOB_MIN_TIMEOUT_MS = 20 * 60_000;
49
+ const BROWSER_VALIDATION_JOB_MAX_TIMEOUT_MS = 45 * 60_000;
50
50
 
51
51
  function parseClampedInt(value: unknown, defaultValue: number, min: number, max: number): number {
52
52
  const parsed =
@@ -312,7 +312,7 @@ export function resolveDockerJobTimeoutMs(
312
312
  BROWSER_VALIDATION_JOB_MAX_TIMEOUT_MS,
313
313
  Math.max(BROWSER_VALIDATION_JOB_MIN_TIMEOUT_MS, estimatedTimeoutMs),
314
314
  );
315
- return Math.max(baseTimeoutMs, boundedTimeoutMs);
315
+ return Math.max(Math.min(baseTimeoutMs, boundedTimeoutMs), BROWSER_VALIDATION_JOB_MIN_TIMEOUT_MS);
316
316
  }
317
317
 
318
318
  export class DockerExecutor {
@@ -1221,7 +1221,8 @@ export class DockerExecutor {
1221
1221
  });
1222
1222
  const timeoutMs = resolveDockerJobTimeoutMs(this.options.timeoutMs, job);
1223
1223
  if (timeoutMs !== this.options.timeoutMs) {
1224
- const note = `[DockerExecutor] Extended job timeout for browser validation convergence: ${timeoutMs}ms (configured ${this.options.timeoutMs}ms).`;
1224
+ const verb = timeoutMs > this.options.timeoutMs ? "Extended" : "Capped";
1225
+ const note = `[DockerExecutor] ${verb} job timeout for browser validation convergence: ${timeoutMs}ms (configured ${this.options.timeoutMs}ms).`;
1225
1226
  console.log(note);
1226
1227
  onLog?.("stdout", note);
1227
1228
  }
@@ -1246,7 +1247,7 @@ export class DockerExecutor {
1246
1247
  const timer = setTimeout(() => {
1247
1248
  timedOutByDocker = true;
1248
1249
  const elapsedMs = Math.max(1, Date.now() - startedAtMs);
1249
- const timeoutMsg = `[DockerExecutor] Job timeout in warm container after ${elapsedMs}ms (limit ${this.options.timeoutMs}ms): ${this.warmContainerName}`;
1250
+ const timeoutMsg = `[DockerExecutor] Job timeout in warm container after ${elapsedMs}ms (limit ${timeoutMs}ms): ${this.warmContainerName}`;
1250
1251
  console.log(timeoutMsg);
1251
1252
  onLog?.("stderr", timeoutMsg);
1252
1253
  try {
@@ -176,7 +176,7 @@ export interface QualityGatePolicy {
176
176
  criticMinScore: number;
177
177
  }
178
178
 
179
- const BROWSER_VALIDATION_MAX_AUTO_REVISIONS = 8;
179
+ const BROWSER_VALIDATION_MAX_AUTO_REVISIONS = 3;
180
180
 
181
181
  export function qualityRevisionLoopUpperBound(policy: {
182
182
  maxAutoRevisions: number;
@@ -378,6 +378,97 @@ function buildDiffBudgetWarning(
378
378
  .join(", ")}${meaningfulChangedPaths.length > 12 ? ", ..." : ""}`;
379
379
  }
380
380
 
381
+ function isNonPublishableArtifactPath(path: string): boolean {
382
+ return /(^|\/)(outputs|node_modules|\.worktrees|\.codex|dist|build|coverage)(\/|$)/i.test(
383
+ path.replace(/\\/g, "/"),
384
+ );
385
+ }
386
+
387
+ export function publishableChangedPaths(changedPaths: string[]): string[] {
388
+ return changedPaths.filter((path) => !isNonPublishableArtifactPath(path));
389
+ }
390
+
391
+ function collectPlanningText(planning: TaskExecutePlanning): string {
392
+ return [
393
+ planning.intent,
394
+ planning.riskLevel,
395
+ ...(planning.targetPaths ?? []),
396
+ ...(planning.acceptanceCriteria ?? []),
397
+ ...(planning.validationSteps ?? []),
398
+ ...(planning.requiredValidationSteps ?? []),
399
+ ...(planning.discovery?.keywords ?? []),
400
+ ...(planning.discovery?.likelyDirs ?? []),
401
+ ...(planning.discovery?.ripgrepQueries ?? []),
402
+ ]
403
+ .map((part) => String(part ?? ""))
404
+ .join("\n")
405
+ .toLowerCase();
406
+ }
407
+
408
+ function planningLooksLikeVisualDerivationTask(planning: TaskExecutePlanning): boolean {
409
+ const text = collectPlanningText(planning);
410
+ return /\b(visual|readability|battlefield|render(?:ing)?|projectile|planet|ship|ring|danger|threat|ownership|dense action|style|ui surface)\b/i.test(
411
+ text,
412
+ );
413
+ }
414
+
415
+ function buildTestHarnessConvergenceWarning(
416
+ planning: TaskExecutePlanning,
417
+ issues: string[],
418
+ validationRuns: ValidationExecutionResult[],
419
+ ): string | null {
420
+ const combined = [
421
+ ...issues,
422
+ ...validationRuns.flatMap((run) => [run.command, run.stdout, run.stderr]),
423
+ ]
424
+ .map((part) => String(part ?? ""))
425
+ .join("\n");
426
+ const hasMockImportFailure =
427
+ /\bCannot find module\b|\bdoes not provide an export\b|\bno exported member\b|\bimport error\b|\bundefined is not a function\b/i.test(
428
+ combined,
429
+ ) &&
430
+ /\b(react[- ]native|reactNativeMock|Animated\.View|expo-secure-store|SettingsContext|skin validator|mock|test helper|__mocks__)\b/i.test(
431
+ combined,
432
+ );
433
+ if (!hasMockImportFailure) return null;
434
+ const visualPrefix = planningLooksLikeVisualDerivationTask(planning)
435
+ ? " For this visual/rendering task, prefer pure helper/state/style-prop tests over a full React Native surface render."
436
+ : "";
437
+ return (
438
+ "Test harness convergence warning: validation is failing in mock/import setup rather than product behavior." +
439
+ visualPrefix +
440
+ " Do not keep expanding broad shared mocks to rescue an over-scoped component render test. If the repo does not already have stable React Native render-test infrastructure for this surface, replace the full-surface regression with smaller deterministic helper/state coverage and one focused assertion on the behavior-owning API."
441
+ );
442
+ }
443
+
444
+ function buildBroadSharedMockWarning(
445
+ planning: TaskExecutePlanning,
446
+ changedPaths: string[],
447
+ ): string | null {
448
+ const meaningfulChangedPaths = changedPaths.filter(
449
+ (path) => !/(^|\/)(outputs|node_modules|\.worktrees|dist|build|coverage)(\/|$)/i.test(path),
450
+ );
451
+ const broadMockPaths = meaningfulChangedPaths.filter((path) =>
452
+ /(^|\/)(__mocks__|tests\/.*mock|test.*mock|reactNativeMock|setupTests?|jest\.|vitest\.|mock)(\.|\/|$)/i.test(
453
+ path,
454
+ ),
455
+ );
456
+ if (broadMockPaths.length === 0) return null;
457
+ const smallTask =
458
+ planning.riskLevel !== "high" &&
459
+ ((planning.targetPaths?.length ?? 0) <= 2 || planning.acceptanceCriteria.length <= 3);
460
+ if (!smallTask && !planningLooksLikeVisualDerivationTask(planning)) return null;
461
+ const explicitlyRequested = /mock|test harness|react native test|component render/i.test(
462
+ collectPlanningText(planning),
463
+ );
464
+ if (explicitlyRequested) return null;
465
+ return `Broad mock warning: this focused task now changes shared mock/test-harness file(s): ${broadMockPaths
466
+ .slice(0, 6)
467
+ .join(", ")}${
468
+ broadMockPaths.length > 6 ? ", ..." : ""
469
+ }. Before continuing, prefer behavior-owned helper/state tests or existing stable render-test infrastructure; do not add broad React Native mocks for a small visual/control change unless the task explicitly requires harness repair.`;
470
+ }
471
+
381
472
  const TEST_ASSERTION_BALANCE_ISSUE =
382
473
  "Changed test files do not show both positive and negative assertion coverage (expected both).";
383
474
 
@@ -3527,6 +3618,22 @@ export function buildQualityRevisionHint(
3527
3618
  );
3528
3619
  const diffBudgetWarning = buildDiffBudgetWarning(planning, changedPaths, focusedBrowserRepair);
3529
3620
  if (diffBudgetWarning) lines.push(diffBudgetWarning);
3621
+ const broadSharedMockWarning = buildBroadSharedMockWarning(planning, changedPaths);
3622
+ if (broadSharedMockWarning) lines.push(broadSharedMockWarning);
3623
+ const testHarnessConvergenceWarning = buildTestHarnessConvergenceWarning(
3624
+ planning,
3625
+ issues,
3626
+ validationRuns,
3627
+ );
3628
+ if (testHarnessConvergenceWarning) lines.push(testHarnessConvergenceWarning);
3629
+ if (planningLooksLikeVisualDerivationTask(planning)) {
3630
+ lines.push(
3631
+ "Visual derivation testing rule: prefer pure helper/state/style-prop tests for planet/projectile/ownership/readability cues. Only add a full React Native render regression when this repo already has a stable harness for that exact surface; otherwise keep render-visible behavior covered through the derived inputs that drive it.",
3632
+ );
3633
+ }
3634
+ lines.push(
3635
+ "Phase soft-budget reminder: if discovery, test-harness setup, or validation repair is running long, reduce the approach before spending more time. Small/medium tasks should converge toward a useful patch within roughly 20 minutes.",
3636
+ );
3530
3637
  const validationAlreadyPassed =
3531
3638
  validationRuns.length > 0 && validationRuns.every((run) => run.ok);
3532
3639
  if (validationAlreadyPassed && !focusedBrowserRepair) {
@@ -6645,6 +6752,53 @@ export async function executeJob(
6645
6752
  };
6646
6753
  }
6647
6754
 
6755
+ const preQualityStatus = await git(repo, ["status", "--porcelain"]);
6756
+ const preQualityChangedPaths = preQualityStatus.ok
6757
+ ? parseChangedPathsFromStatus(preQualityStatus.stdout)
6758
+ : [];
6759
+ const preQualityPublishablePaths = publishableChangedPaths(preQualityChangedPaths);
6760
+ const executorText = `${result.summary ?? ""}\n${result.stdout ?? ""}\n${result.stderr ?? ""}`;
6761
+ const shellWrapperReturn =
6762
+ /shell-wrapper command rejections|command-router shell-wrapper|command policy rejection/i.test(
6763
+ executorText,
6764
+ );
6765
+ if (preQualityChangedPaths.length > 0 && preQualityPublishablePaths.length === 0) {
6766
+ const detail = `Executor changed only non-publishable dependency/runtime artifact path(s): ${preQualityChangedPaths
6767
+ .slice(0, 12)
6768
+ .join(", ")}${preQualityChangedPaths.length > 12 ? ", ..." : ""}.`;
6769
+ onLog?.(
6770
+ "stderr",
6771
+ `[QualityGate] ${detail} Skipping ValidationGate/CriticGate because there is no PR-worthy patch to validate.`,
6772
+ );
6773
+ return {
6774
+ ok: false,
6775
+ summary: "Executor produced no publishable code changes",
6776
+ stdout: result.stdout,
6777
+ stderr: [result.stderr ?? "", detail].filter(Boolean).join("\n"),
6778
+ exitCode: 4,
6779
+ };
6780
+ }
6781
+ if (
6782
+ preQualityPublishablePaths.length === 0 &&
6783
+ (qualityGatePolicy.mode === "review_fix" || shellWrapperReturn)
6784
+ ) {
6785
+ const reason =
6786
+ qualityGatePolicy.mode === "review_fix"
6787
+ ? "Review-fix executor returned without publishable code changes."
6788
+ : "Codex hit shell-wrapper command rejections without leaving a publishable patch.";
6789
+ onLog?.(
6790
+ "stderr",
6791
+ `[QualityGate] ${reason} Skipping ValidationGate/CriticGate and failing fast.`,
6792
+ );
6793
+ return {
6794
+ ok: false,
6795
+ summary: reason,
6796
+ stdout: result.stdout,
6797
+ stderr: [result.stderr ?? "", reason].filter(Boolean).join("\n"),
6798
+ exitCode: 4,
6799
+ };
6800
+ }
6801
+
6648
6802
  const quality = await runDeterministicQualityGate(
6649
6803
  repo,
6650
6804
  attemptParams,
@@ -342,6 +342,70 @@ function isNoisyProgressLine(line: string): boolean {
342
342
  return /^(📦 Installing \[\d+\/\d+\]|🔍 Resolving\.\.\.|🔒 Saving lockfile\.\.\.)$/.test(line);
343
343
  }
344
344
 
345
+ type WorkerJobPhase =
346
+ | "discovering"
347
+ | "editing"
348
+ | "test harness repair"
349
+ | "focused validation"
350
+ | "full validation"
351
+ | "final diff review"
352
+ | "publishing"
353
+ | "quality revision";
354
+
355
+ function inferWorkerJobPhaseFromLogLine(line: string): WorkerJobPhase | null {
356
+ const text = String(line ?? "").trim();
357
+ if (!text) return null;
358
+ if (/Quality gate requested revision|Quality revision required|revision guidance/i.test(text)) {
359
+ return "quality revision";
360
+ }
361
+ if (
362
+ /test harness|React Native package|reactNativeMock|mock helper|mock was missing|expo-secure-store|import error|Cannot find module|does not provide an export|no exported member|Animated\.View|SettingsContext|skin validator/i.test(
363
+ text,
364
+ )
365
+ ) {
366
+ return "test harness repair";
367
+ }
368
+ if (
369
+ /focused validation|focused checks|targeted test|focused test|new regression|focused regression|fast checks|rerunning .*regression|node --check/i.test(
370
+ text,
371
+ )
372
+ ) {
373
+ return "focused validation";
374
+ }
375
+ if (
376
+ /ValidationGate|required validation|full .*test suite|whole Bun test|repo-level|bun test\b|bunx? tsc|typecheck|type check|bun run lint|web:e2e|browser smoke/i.test(
377
+ text,
378
+ )
379
+ ) {
380
+ return "full validation";
381
+ }
382
+ if (/creating commit|Publish blocked|publish-blocked|completion ref|enqueueCompletion/i.test(text)) {
383
+ return "publishing";
384
+ }
385
+ if (
386
+ /final diff|diff review|git diff|git status|whitespace|line-ending|line ending|pruning|remove unrelated|remaining diff|changed files/i.test(
387
+ text,
388
+ )
389
+ ) {
390
+ return "final diff review";
391
+ }
392
+ if (
393
+ /editing|patch|implemented|adding|fixing|updating|wiring|in place|changes are in place|making .*change|tightening|restore|normalizing/i.test(
394
+ text,
395
+ )
396
+ ) {
397
+ return "editing";
398
+ }
399
+ if (
400
+ /read|inspect|checking|locating|opening|artifact|screenshot|README|context|discover|search|rg |current checkout|worktree/i.test(
401
+ text,
402
+ )
403
+ ) {
404
+ return "discovering";
405
+ }
406
+ return null;
407
+ }
408
+
345
409
  export function shouldEmitDirectSessionJobEvent(options: {
346
410
  ok: boolean;
347
411
  statusPersistedToServer: boolean;
@@ -1352,6 +1416,7 @@ async function workerLoop(
1352
1416
  let lastCleanLog = "";
1353
1417
  let lastCleanLogAt = 0;
1354
1418
  let lastForwardedJobLogAt = Date.now();
1419
+ let currentJobPhase: WorkerJobPhase | null = null;
1355
1420
 
1356
1421
  const emitJobLog = job.sessionId
1357
1422
  ? (stream: "stdout" | "stderr", line: string): boolean => {
@@ -1367,6 +1432,7 @@ async function workerLoop(
1367
1432
  lastCleanLog = cleaned;
1368
1433
  lastCleanLogAt = now;
1369
1434
  lastForwardedJobLogAt = now;
1435
+ currentJobPhase = inferWorkerJobPhaseFromLogLine(cleaned) ?? currentJobPhase;
1370
1436
  const logTs = new Date(now).toISOString();
1371
1437
 
1372
1438
  const seq = stream === "stdout" ? ++stdoutSeq : ++stderrSeq;
@@ -1374,7 +1440,14 @@ async function workerLoop(
1374
1440
  job.sessionId,
1375
1441
  {
1376
1442
  type: "job_log",
1377
- payload: { jobId: job.id, stream, seq, line: cleaned, ts: logTs },
1443
+ payload: {
1444
+ jobId: job.id,
1445
+ stream,
1446
+ seq,
1447
+ line: cleaned,
1448
+ ts: logTs,
1449
+ phase: currentJobPhase,
1450
+ },
1378
1451
  from: `worker:${opts.workerId}`,
1379
1452
  },
1380
1453
  { droppable: true },
@@ -1409,9 +1482,9 @@ async function workerLoop(
1409
1482
  "stdout",
1410
1483
  `[WorkerPals] Job ${job.id} still running after ${formatDurationMs(
1411
1484
  now - jobClaimedAtMs,
1412
- )} (kind=${job.kind}, worker=${opts.workerId}, quiet_for=${formatDurationMs(
1413
- quietForMs,
1414
- )}).`,
1485
+ )} (kind=${job.kind}, worker=${opts.workerId}, phase=${
1486
+ currentJobPhase ?? "unknown"
1487
+ }, quiet_for=${formatDurationMs(quietForMs)}).`,
1415
1488
  );
1416
1489
  }, jobProgressLogEveryMs)
1417
1490
  : null;