@pushpalsdev/cli 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pushpalsdev/cli",
3
- "version": "1.1.1",
3
+ "version": "1.1.2",
4
4
  "description": "PushPals terminal CLI for LocalBuddy -> RemoteBuddy orchestration",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -16,7 +16,7 @@ Execution rules:
16
16
  - If the hinted file is a thin wrapper or the behavior lives elsewhere, edit the behavior-owning file(s) needed to solve the task and explain the scope expansion in your final response.
17
17
  - Avoid irrelevant sprawl; the review agent will judge whether changed files are necessary for the requested outcome.
18
18
  - Read relevant files before editing, then run focused validation.
19
- - PushPals runs the deterministic ValidationGate after your edit, including any repo-required `vision.md` commands. During the editing turn, prefer focused/fast validation. Do not spend the main Codex execution budget repeatedly running long browser/e2e smoke commands such as `bun run web:e2e`; run them only when the task is specifically about the browser harness or when you need a final targeted confirmation and can stop promptly on a clear failure.
19
+ - PushPals runs the deterministic ValidationGate after your edit, including any repo-required `vision.md` commands. During the editing turn, prefer focused/fast validation. Do not run long browser/e2e smoke commands such as `bun run web:e2e` by default from the Codex executor; ValidationGate is the authoritative browser runner and has the provisioned browser/runtime environment. For browser-harness tasks, inspect existing artifacts, run fast non-browser checks, and only run the full browser command once when a quick local startup probe shows it can run here and you need one targeted confirmation.
20
20
  - Use direct commands without shell wrappers. Prefer plain commands like `git diff -- path`, `git add <path>`, `git status --porcelain`, and `pwd`.
21
21
  - Do not wrap commands in `/bin/bash -lc`, `sh -lc`, `cmd /c`, or `powershell -Command`, and avoid pipelines, `awk`, heredocs, or multi-command shell snippets unless they are truly unavoidable.
22
22
  - If the command router rejects a command, simplify it to a single direct command instead of retrying more shell wrappers.
@@ -295,6 +295,8 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
295
295
  template = _load_prompt_template("workerpals/openai_codex_task_execute_system_prompt.md")
296
296
  self.assertIn("Codex CLI is required infrastructure", template)
297
297
  self.assertIn("Use direct commands without shell wrappers", template)
298
+ self.assertIn("ValidationGate is the authoritative browser runner", template)
299
+ self.assertIn("Do not run long browser/e2e smoke commands", template)
298
300
 
299
301
  def test_extracts_usage_counts_from_nested_json_event(self) -> None:
300
302
  usage = _extract_usage_counts(
@@ -43,6 +43,10 @@ const WORKERPAL_SANDBOX_COMPONENT_LABEL = "pushpals.component=workerpals-sandbox
43
43
  const DOCKER_IMAGE_INSPECT_TIMEOUT_MS = 15_000;
44
44
  const DOCKER_IMAGE_BUILD_TIMEOUT_MS = 10 * 60_000;
45
45
  const DOCKER_IMAGE_PULL_TIMEOUT_MS = 10 * 60_000;
46
+ const BROWSER_VALIDATION_JOB_REPAIR_ATTEMPTS = 8;
47
+ const BROWSER_VALIDATION_JOB_OVERHEAD_MS = 15 * 60_000;
48
+ const BROWSER_VALIDATION_JOB_MIN_TIMEOUT_MS = 4 * 60 * 60_000;
49
+ const BROWSER_VALIDATION_JOB_MAX_TIMEOUT_MS = 8 * 60 * 60_000;
46
50
 
47
51
  function parseClampedInt(value: unknown, defaultValue: number, min: number, max: number): number {
48
52
  const parsed =
@@ -237,6 +241,75 @@ export interface Job {
237
241
  sessionId: string;
238
242
  }
239
243
 
244
+ function readPositiveNumber(value: unknown): number | null {
245
+ const parsed =
246
+ typeof value === "number"
247
+ ? value
248
+ : typeof value === "string"
249
+ ? Number.parseInt(value, 10)
250
+ : Number.NaN;
251
+ if (!Number.isFinite(parsed) || parsed <= 0) return null;
252
+ return Math.floor(parsed);
253
+ }
254
+
255
+ function maybeRecord(value: unknown): Record<string, unknown> | null {
256
+ return value && typeof value === "object" && !Array.isArray(value)
257
+ ? (value as Record<string, unknown>)
258
+ : null;
259
+ }
260
+
261
+ function collectValidationCommandHints(params: Record<string, unknown>): string[] {
262
+ const planning = maybeRecord(params.planning);
263
+ const values: unknown[] = [
264
+ params.instruction,
265
+ params.plannerWorkerInstruction,
266
+ params.validationSteps,
267
+ params.requiredValidationSteps,
268
+ planning?.validationSteps,
269
+ planning?.requiredValidationSteps,
270
+ ];
271
+ const commands: string[] = [];
272
+ for (const value of values) {
273
+ if (typeof value === "string") {
274
+ commands.push(value);
275
+ continue;
276
+ }
277
+ if (Array.isArray(value)) {
278
+ commands.push(...value.filter((entry): entry is string => typeof entry === "string"));
279
+ }
280
+ }
281
+ return commands;
282
+ }
283
+
284
+ function hasBrowserValidationCommand(job: Pick<Job, "kind" | "params">): boolean {
285
+ if (job.kind !== "task.execute") return false;
286
+ return collectValidationCommandHints(job.params).some((command) =>
287
+ /\b(web:e2e|e2e:web|browser:e2e|smoke:web|web:smoke|browser:smoke|playwright|cypress)\b/i.test(
288
+ command,
289
+ ),
290
+ );
291
+ }
292
+
293
+ export function resolveDockerJobTimeoutMs(
294
+ configuredTimeoutMs: number,
295
+ job: Pick<Job, "kind" | "params">,
296
+ ): number {
297
+ const baseTimeoutMs = Math.max(10_000, Math.floor(configuredTimeoutMs));
298
+ if (!hasBrowserValidationCommand(job)) return baseTimeoutMs;
299
+
300
+ const planning = maybeRecord(job.params.planning);
301
+ const executionBudgetMs = readPositiveNumber(planning?.executionBudgetMs) ?? 1_800_000;
302
+ const finalizationBudgetMs = readPositiveNumber(planning?.finalizationBudgetMs) ?? 120_000;
303
+ const attempts = BROWSER_VALIDATION_JOB_REPAIR_ATTEMPTS + 1; // initial attempt plus repairs
304
+ const estimatedTimeoutMs =
305
+ attempts * (executionBudgetMs + finalizationBudgetMs + BROWSER_VALIDATION_JOB_OVERHEAD_MS);
306
+ const boundedTimeoutMs = Math.min(
307
+ BROWSER_VALIDATION_JOB_MAX_TIMEOUT_MS,
308
+ Math.max(BROWSER_VALIDATION_JOB_MIN_TIMEOUT_MS, estimatedTimeoutMs),
309
+ );
310
+ return Math.max(baseTimeoutMs, boundedTimeoutMs);
311
+ }
312
+
240
313
  export class DockerExecutor {
241
314
  private options: Required<Omit<DockerExecutorOptions, "config">>;
242
315
  private worktreeDir: string;
@@ -1141,9 +1214,15 @@ export class DockerExecutor {
1141
1214
  stdout: "pipe",
1142
1215
  stderr: "pipe",
1143
1216
  });
1217
+ const timeoutMs = resolveDockerJobTimeoutMs(this.options.timeoutMs, job);
1218
+ if (timeoutMs !== this.options.timeoutMs) {
1219
+ const note = `[DockerExecutor] Extended job timeout for browser validation convergence: ${timeoutMs}ms (configured ${this.options.timeoutMs}ms).`;
1220
+ console.log(note);
1221
+ onLog?.("stdout", note);
1222
+ }
1144
1223
 
1145
1224
  const { leadMs: warningLeadMs, delayMs: warningDelayMs } = computeTimeoutWarningWindow(
1146
- this.options.timeoutMs,
1225
+ timeoutMs,
1147
1226
  );
1148
1227
  const warningTimer = setTimeout(() => {
1149
1228
  const warning = `[DockerExecutor] Job nearing timeout in warm container (${Math.round(
@@ -1172,7 +1251,7 @@ export class DockerExecutor {
1172
1251
  } catch {
1173
1252
  // Ignore kill errors
1174
1253
  }
1175
- }, this.options.timeoutMs);
1254
+ }, timeoutMs);
1176
1255
 
1177
1256
  // Process streams
1178
1257
  const stdoutLines: string[] = [];
@@ -1192,6 +1271,7 @@ export class DockerExecutor {
1192
1271
  const result = this.parseResult(stdoutLines, stderrLines, exitCode, {
1193
1272
  timedOutByDocker,
1194
1273
  elapsedMs,
1274
+ timeoutMs,
1195
1275
  });
1196
1276
 
1197
1277
  return result;
@@ -1445,7 +1525,7 @@ export class DockerExecutor {
1445
1525
  stdoutLines: string[],
1446
1526
  stderrLines: string[],
1447
1527
  exitCode: number,
1448
- context: { timedOutByDocker: boolean; elapsedMs: number },
1528
+ context: { timedOutByDocker: boolean; elapsedMs: number; timeoutMs: number },
1449
1529
  ): DockerJobResult {
1450
1530
  let sawSentinel = false;
1451
1531
  let sentinelParseError = "";
@@ -1487,7 +1567,7 @@ export class DockerExecutor {
1487
1567
  if (context.timedOutByDocker) {
1488
1568
  return {
1489
1569
  ok: false,
1490
- summary: `Job timed out in Docker executor after ${context.elapsedMs}ms (limit ${this.options.timeoutMs}ms; terminated before structured result).`,
1570
+ summary: `Job timed out in Docker executor after ${context.elapsedMs}ms (limit ${context.timeoutMs}ms; terminated before structured result).`,
1491
1571
  stdout,
1492
1572
  stderr,
1493
1573
  exitCode,
@@ -3,7 +3,15 @@
3
3
  * Used by both the host Worker (direct mode) and the Docker job runner.
4
4
  */
5
5
 
6
- import { existsSync, lstatSync, readFileSync, renameSync, rmSync, unlinkSync } from "fs";
6
+ import {
7
+ existsSync,
8
+ lstatSync,
9
+ readdirSync,
10
+ readFileSync,
11
+ renameSync,
12
+ rmSync,
13
+ unlinkSync,
14
+ } from "fs";
7
15
  import { resolve } from "path";
8
16
  import {
9
17
  buildGitCommitArgs as buildSourceControlGitCommitArgs,
@@ -138,7 +146,41 @@ export interface QualityGatePolicy {
138
146
  criticMinScore: number;
139
147
  }
140
148
 
141
- const BROWSER_VALIDATION_MAX_AUTO_REVISIONS = 5;
149
+ const BROWSER_VALIDATION_MAX_AUTO_REVISIONS = 8;
150
+
151
+ export function qualityRevisionLoopUpperBound(policy: {
152
+ maxAutoRevisions: number;
153
+ validationMaxAutoRevisions: number;
154
+ }, opts: {
155
+ browserValidation?: boolean;
156
+ } = {}): number {
157
+ return Math.max(
158
+ policy.maxAutoRevisions,
159
+ policy.validationMaxAutoRevisions,
160
+ opts.browserValidation ? BROWSER_VALIDATION_MAX_AUTO_REVISIONS : 0,
161
+ );
162
+ }
163
+
164
+ function taskRequestsBrowserValidation(params: Record<string, unknown>): boolean {
165
+ const candidates: string[] = [];
166
+ const collect = (value: unknown) => {
167
+ if (typeof value === "string") {
168
+ candidates.push(value);
169
+ } else if (Array.isArray(value)) {
170
+ for (const item of value) collect(item);
171
+ }
172
+ };
173
+ const planning =
174
+ params.planning && typeof params.planning === "object"
175
+ ? (params.planning as Record<string, unknown>)
176
+ : {};
177
+ collect(planning.requiredValidationSteps);
178
+ collect(planning.validationSteps);
179
+ collect(params.requiredValidationSteps);
180
+ collect(params.validationSteps);
181
+ collect(params.instruction);
182
+ return candidates.some((candidate) => isLongRunningBrowserValidationCommand(candidate));
183
+ }
142
184
 
143
185
  function shouldSoftPassValidationBlocker(
144
186
  policy: QualityGatePolicy,
@@ -1760,13 +1802,102 @@ function extractBrowserValidationArtifacts(text: string): string[] {
1760
1802
  return out;
1761
1803
  }
1762
1804
 
1805
+ function collectRecentBrowserValidationFiles(
1806
+ repo: string | undefined,
1807
+ extensions: RegExp,
1808
+ limit = 8,
1809
+ ): string[] {
1810
+ if (!repo) return [];
1811
+ const roots = ["outputs/web-e2e", "test-results", "playwright-report"]
1812
+ .map((entry) => resolve(repo, entry))
1813
+ .filter((entry) => existsSync(entry));
1814
+ const files: Array<{ path: string; mtimeMs: number }> = [];
1815
+ const visit = (dir: string, depth: number) => {
1816
+ if (depth > 4 || files.length > 2_000) return;
1817
+ let entries: Array<{ name: unknown; isDirectory(): boolean; isFile(): boolean }>;
1818
+ try {
1819
+ entries = readdirSync(dir, { withFileTypes: true });
1820
+ } catch {
1821
+ return;
1822
+ }
1823
+ for (const entry of entries) {
1824
+ const entryName = String(entry.name);
1825
+ const path = resolve(dir, entryName);
1826
+ if (entry.isDirectory()) {
1827
+ visit(path, depth + 1);
1828
+ continue;
1829
+ }
1830
+ if (!entry.isFile() || !extensions.test(entryName)) continue;
1831
+ try {
1832
+ const stat = lstatSync(path);
1833
+ files.push({ path, mtimeMs: stat.mtimeMs });
1834
+ } catch {
1835
+ // Ignore files that disappear while a validation command is cleaning up.
1836
+ }
1837
+ }
1838
+ };
1839
+ for (const root of roots) visit(root, 0);
1840
+ return files
1841
+ .sort((a, b) => b.mtimeMs - a.mtimeMs)
1842
+ .slice(0, limit)
1843
+ .map((entry) => entry.path);
1844
+ }
1845
+
1846
+ function collectRecentBrowserValidationArtifacts(repo: string | undefined): string[] {
1847
+ return collectRecentBrowserValidationFiles(
1848
+ repo,
1849
+ /\.(?:png|jpe?g|webp|zip|json|txt|log|webm)$/i,
1850
+ 6,
1851
+ ).map((entry) => toSingleLine(entry, 220));
1852
+ }
1853
+
1854
+ function summarizeRecentBrowserValidationLogs(repo: string | undefined): string {
1855
+ const logFiles = collectRecentBrowserValidationFiles(repo, /\.(?:log|txt)$/i, 3);
1856
+ const summaries: string[] = [];
1857
+ for (const logFile of logFiles) {
1858
+ let content = "";
1859
+ try {
1860
+ content = readFileSync(logFile, "utf8");
1861
+ } catch {
1862
+ continue;
1863
+ }
1864
+ const lines = stripAnsiControlSequences(content)
1865
+ .split(/\r?\n/)
1866
+ .map((line) => line.trim())
1867
+ .filter(Boolean)
1868
+ .filter((line) =>
1869
+ /\b(Web end-to-end smoke test failed|Browser validation failed|Expected |locator\.|page\.|waiting for |Call log:|Verified:|Saved screenshot|Saved trace|ERR_SOCKET_BAD_PORT|EADDRINUSE|EPERM|EACCES|browserType\.launch|Expo exited early|freeport|net::ERR_|Validation command timed out|terminated by signal|SIGTERM|timed out after \d+ms)/i.test(
1870
+ line,
1871
+ ),
1872
+ );
1873
+ if (lines.length === 0) continue;
1874
+ summaries.push(`${logFile}: ${lines.slice(-18).join(" | ")}`);
1875
+ }
1876
+ return toSingleLine(summaries.join(" | "), 1_400);
1877
+ }
1878
+
1879
+ function mergeBrowserValidationArtifacts(...sources: Array<string[] | undefined>): string[] {
1880
+ const out: string[] = [];
1881
+ const seen = new Set<string>();
1882
+ for (const source of sources) {
1883
+ for (const artifact of source ?? []) {
1884
+ const clean = toSingleLine(artifact, 220);
1885
+ if (!clean || seen.has(clean)) continue;
1886
+ seen.add(clean);
1887
+ out.push(clean);
1888
+ if (out.length >= 8) return out;
1889
+ }
1890
+ }
1891
+ return out;
1892
+ }
1893
+
1763
1894
  function summarizeBrowserValidationOutput(text: string): string {
1764
1895
  const lines = stripAnsiControlSequences(text)
1765
1896
  .split(/\r?\n/)
1766
1897
  .map((line) => line.trim())
1767
1898
  .filter(Boolean)
1768
1899
  .filter((line) =>
1769
- /\b(Web end-to-end smoke test failed|Browser validation failed|Expected |locator\.|page\.|waiting for getBy|Call log:|ERR_SOCKET_BAD_PORT|EADDRINUSE|EPERM|EACCES|browserType\.launch|Executable doesn't exist|Expo exited early|freeport|net::ERR_|Validation command timed out|terminated by signal|SIGTERM|timed out after \d+ms)\b/i.test(
1900
+ /\b(Web end-to-end smoke test failed|Browser validation failed|Expected |locator\.|page\.|waiting for getBy|Call log:|ERR_SOCKET_BAD_PORT|EADDRINUSE|EPERM|EACCES|browserType\.launch|Executable doesn't exist|Expo exited early|freeport|net::ERR_|Validation command timed out|terminated by signal|SIGTERM|timed out after \d+ms)/i.test(
1770
1901
  line,
1771
1902
  ),
1772
1903
  );
@@ -1776,6 +1907,7 @@ function summarizeBrowserValidationOutput(text: string): string {
1776
1907
  export function buildBrowserValidationRepairPacket(
1777
1908
  validationRuns: ValidationExecutionResult[],
1778
1909
  previousFailureDigests: Map<string, string> = new Map(),
1910
+ repo?: string,
1779
1911
  ): BrowserValidationRepairPacket | null {
1780
1912
  for (const run of validationRuns) {
1781
1913
  if (run.ok || !isLongRunningBrowserValidationCommand(run.command)) continue;
@@ -1784,6 +1916,8 @@ export function buildBrowserValidationRepairPacket(
1784
1916
  const failureKind = classifyBrowserValidationFailureKindFromText(`${digest}\n${combined}`);
1785
1917
  if (failureKind === "unknown") continue;
1786
1918
  const previousDigest = previousFailureDigests.get(validationCommandKey(run.command)) ?? null;
1919
+ const recentLogSummary = summarizeRecentBrowserValidationLogs(repo);
1920
+ const enrichedBrowserContext = [combined, recentLogSummary].filter(Boolean).join("\n");
1787
1921
  const progress =
1788
1922
  previousDigest == null
1789
1923
  ? "first_failure"
@@ -1793,17 +1927,25 @@ export function buildBrowserValidationRepairPacket(
1793
1927
  return {
1794
1928
  command: run.command,
1795
1929
  failureKind,
1796
- stage: extractBrowserValidationStage(combined),
1797
- selector: extractBrowserValidationSelector(combined),
1798
- expected: extractBrowserValidationExpectedUi(combined),
1930
+ stage: extractBrowserValidationStage(enrichedBrowserContext),
1931
+ selector: extractBrowserValidationSelector(enrichedBrowserContext),
1932
+ expected: extractBrowserValidationExpectedUi(enrichedBrowserContext),
1799
1933
  digest,
1800
1934
  previousDigest,
1801
1935
  previousStage: previousDigest ? extractBrowserValidationStage(previousDigest) : null,
1802
1936
  previousSelector: previousDigest ? extractBrowserValidationSelector(previousDigest) : null,
1803
1937
  previousExpected: previousDigest ? extractBrowserValidationExpectedUi(previousDigest) : null,
1804
1938
  progress,
1805
- artifacts: extractBrowserValidationArtifacts(combined),
1806
- output: summarizeBrowserValidationOutput(combined) || digest,
1939
+ artifacts: mergeBrowserValidationArtifacts(
1940
+ extractBrowserValidationArtifacts(combined),
1941
+ collectRecentBrowserValidationArtifacts(repo),
1942
+ ),
1943
+ output: [
1944
+ summarizeBrowserValidationOutput(combined) || digest,
1945
+ recentLogSummary,
1946
+ ]
1947
+ .filter(Boolean)
1948
+ .join(" | "),
1807
1949
  };
1808
1950
  }
1809
1951
  return null;
@@ -2663,7 +2805,10 @@ export function buildQualityRevisionHint(
2663
2805
  "Convergence rule: preserve stages that already passed, repair only the current failing browser stage, and stop after one targeted browser confirmation so the next ValidationGate run gets a clean signal.",
2664
2806
  );
2665
2807
  lines.push(
2666
- `Validation rerun rule: PushPals ValidationGate will rerun "${browserRepairPacket.command}" after the patch. During the edit turn, run focused fast checks first; only run the full browser command for one targeted confirmation and stop on the first clear stage failure.`,
2808
+ "Executor sandbox rule: if the full browser command cannot run inside this edit turn because local server binding is denied or Expo/Playwright reports ERR_SOCKET_BAD_PORT, listen EPERM, EACCES, or a local port bind/freeport failure before reaching the app, treat that as a Codex executor verification limitation. Do not change app startup, ports, or browser provisioning for that local-only signal unless the ValidationGate failure above is also a startup/setup failure. Use the captured artifacts plus fast checks, then let ValidationGate perform the authoritative browser run.",
2809
+ );
2810
+ lines.push(
2811
+ `Validation rerun rule: PushPals ValidationGate will rerun "${browserRepairPacket.command}" after the patch. During a focused browser repair turn, run fast non-browser checks and inspect captured artifacts first; do not run the full browser command from the Codex executor by default. Only run the full browser command for one targeted confirmation if artifacts are missing and a quick local bind/startup probe shows the browser server can actually run in this executor. Otherwise stop after fast checks so ValidationGate gets the clean authoritative signal.`,
2667
2812
  );
2668
2813
  }
2669
2814
  if (reviewFixContext) {
@@ -5421,10 +5566,9 @@ export async function executeJob(
5421
5566
  const qualityGatePolicy = deriveQualityGatePolicy(normalizedParams, runtimeConfig);
5422
5567
  const qualityMaxAutoRevisions = qualityGatePolicy.maxAutoRevisions;
5423
5568
  const qualityValidationMaxAutoRevisions = qualityGatePolicy.validationMaxAutoRevisions;
5424
- const qualityRevisionLoopMax = Math.max(
5425
- qualityMaxAutoRevisions,
5426
- qualityValidationMaxAutoRevisions,
5427
- );
5569
+ const qualityRevisionLoopMax = qualityRevisionLoopUpperBound(qualityGatePolicy, {
5570
+ browserValidation: taskRequestsBrowserValidation(normalizedParams),
5571
+ });
5428
5572
  const qualitySoftPassOnExhausted = qualityGatePolicy.softPassOnExhausted;
5429
5573
  const qualityCriticMinScore = qualityGatePolicy.criticMinScore;
5430
5574
 
@@ -5565,6 +5709,7 @@ export async function executeJob(
5565
5709
  const browserRepairPacket = buildBrowserValidationRepairPacket(
5566
5710
  quality.validationRuns,
5567
5711
  previousValidationFailureDigests,
5712
+ repo,
5568
5713
  );
5569
5714
  for (const run of quality.validationRuns) {
5570
5715
  if (run.ok) continue;
@@ -5722,7 +5867,7 @@ export async function executeJob(
5722
5867
  requiredValidationFailures: quality.requiredValidationFailures,
5723
5868
  blocker: quality.blocker,
5724
5869
  revisionAttempt,
5725
- maxAutoRevisions: qualityValidationMaxAutoRevisions,
5870
+ maxAutoRevisions: activeMaxAutoRevisions,
5726
5871
  outsideTaskScope: validationOutsideTaskScope,
5727
5872
  });
5728
5873
  if (requiredValidationCanRevise) {
@@ -16,7 +16,7 @@ Execution rules:
16
16
  - If the hinted file is a thin wrapper or the behavior lives elsewhere, edit the behavior-owning file(s) needed to solve the task and explain the scope expansion in your final response.
17
17
  - Avoid irrelevant sprawl; the review agent will judge whether changed files are necessary for the requested outcome.
18
18
  - Read relevant files before editing, then run focused validation.
19
- - PushPals runs the deterministic ValidationGate after your edit, including any repo-required `vision.md` commands. During the editing turn, prefer focused/fast validation. Do not spend the main Codex execution budget repeatedly running long browser/e2e smoke commands such as `bun run web:e2e`; run them only when the task is specifically about the browser harness or when you need a final targeted confirmation and can stop promptly on a clear failure.
19
+ - PushPals runs the deterministic ValidationGate after your edit, including any repo-required `vision.md` commands. During the editing turn, prefer focused/fast validation. Do not run long browser/e2e smoke commands such as `bun run web:e2e` by default from the Codex executor; ValidationGate is the authoritative browser runner and has the provisioned browser/runtime environment. For browser-harness tasks, inspect existing artifacts, run fast non-browser checks, and only run the full browser command once when a quick local startup probe shows it can run here and you need one targeted confirmation.
20
20
  - Use direct commands without shell wrappers. Prefer plain commands like `git diff -- path`, `git add <path>`, `git status --porcelain`, and `pwd`.
21
21
  - Do not wrap commands in `/bin/bash -lc`, `sh -lc`, `cmd /c`, or `powershell -Command`, and avoid pipelines, `awk`, heredocs, or multi-command shell snippets unless they are truly unavoidable.
22
22
  - If the command router rejects a command, simplify it to a single direct command instead of retrying more shell wrappers.