@pushpalsdev/cli 1.0.79 → 1.0.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,422 @@
1
+ export type ToolKind = "known" | "discovered" | "shell";
2
+
3
+ export type ToolFailureClass =
4
+ | "missing_binary"
5
+ | "missing_runtime"
6
+ | "auth"
7
+ | "network"
8
+ | "permission"
9
+ | "policy_denied"
10
+ | "timeout"
11
+ | "nonzero_exit"
12
+ | "repo_state"
13
+ | "sandbox_mount"
14
+ | "unknown";
15
+
16
+ export type ToolEffect = "read" | "write" | "network" | "git" | "process";
17
+
18
+ export interface ToolFailureClassification {
19
+ failureClass: ToolFailureClass;
20
+ retryable: boolean;
21
+ remediation: string;
22
+ }
23
+
24
+ export interface ToolFailureInput {
25
+ tool?: string | null;
26
+ argv?: string[] | null;
27
+ commandLine?: string | null;
28
+ stdout?: string | null;
29
+ stderr?: string | null;
30
+ summary?: string | null;
31
+ detail?: string | null;
32
+ exitCode?: number | null;
33
+ timedOut?: boolean;
34
+ }
35
+
36
+ export interface ToolRunRecord {
37
+ id: string;
38
+ jobId?: string | null;
39
+ workerId?: string | null;
40
+ sessionId?: string | null;
41
+ phase?: string | null;
42
+ tool: string;
43
+ kind: ToolKind;
44
+ capability?: string | null;
45
+ envProfile?: string | null;
46
+ cwd?: string | null;
47
+ argv: string[];
48
+ commandLine?: string | null;
49
+ allowedEffects: ToolEffect[];
50
+ ok: boolean;
51
+ exitCode?: number | null;
52
+ failureClass?: ToolFailureClass | null;
53
+ retryable: boolean;
54
+ remediation?: string | null;
55
+ startedAt: string;
56
+ finishedAt: string;
57
+ durationMs: number;
58
+ stdoutTail?: string | null;
59
+ stderrTail?: string | null;
60
+ metadata?: Record<string, unknown>;
61
+ }
62
+
63
+ export interface ToolAdapter {
64
+ tool: string;
65
+ kind: ToolKind;
66
+ executableHints?: string[];
67
+ defaultEffects?: ToolEffect[];
68
+ }
69
+
70
+ export interface ToolRegistry {
71
+ adapters: ToolAdapter[];
72
+ fallbackKind: ToolKind;
73
+ }
74
+
75
+ const KNOWN_TOOL_NAMES = new Set([
76
+ "bun",
77
+ "codex",
78
+ "docker",
79
+ "gh",
80
+ "git",
81
+ "node",
82
+ "npm",
83
+ "python",
84
+ "shell",
85
+ ]);
86
+
87
+ export const DEFAULT_TOOL_REGISTRY: ToolRegistry = {
88
+ fallbackKind: "discovered",
89
+ adapters: [
90
+ { tool: "git", kind: "known", executableHints: ["git"], defaultEffects: ["read", "write", "git"] },
91
+ { tool: "codex", kind: "known", executableHints: ["codex", "bunx @openai/codex"], defaultEffects: ["read", "write", "network", "process"] },
92
+ { tool: "bun", kind: "known", executableHints: ["bun"], defaultEffects: ["read", "write", "process"] },
93
+ { tool: "docker", kind: "known", executableHints: ["docker"], defaultEffects: ["read", "write", "network", "process"] },
94
+ { tool: "gh", kind: "known", executableHints: ["gh"], defaultEffects: ["read", "write", "network"] },
95
+ { tool: "node", kind: "known", executableHints: ["node"], defaultEffects: ["read", "write", "process"] },
96
+ { tool: "shell", kind: "shell", executableHints: ["sh", "bash", "cmd", "powershell"], defaultEffects: ["read", "write", "process"] },
97
+ ],
98
+ };
99
+
100
+ export const TOOL_RUN_TAIL_CHARS = 8_000;
101
+
102
+ function cleanText(value: unknown): string {
103
+ return String(value ?? "").trim();
104
+ }
105
+
106
+ function basename(command: string): string {
107
+ const trimmed = command.trim();
108
+ const withoutQuotes = trimmed.replace(/^["']|["']$/g, "");
109
+ const parts = withoutQuotes.split(/[\\/]/);
110
+ return parts[parts.length - 1] || withoutQuotes;
111
+ }
112
+
113
+ export function truncateToolText(value: unknown, maxChars = TOOL_RUN_TAIL_CHARS): string {
114
+ const text = cleanText(value);
115
+ if (!text) return "";
116
+ if (text.length <= maxChars) return text;
117
+ return `...[truncated]...\n${text.slice(-maxChars)}`;
118
+ }
119
+
120
+ export function redactToolText(value: unknown): string {
121
+ const text = cleanText(value);
122
+ if (!text) return "";
123
+ return text
124
+ .replace(/\b(OPENAI_API_KEY|GITHUB_TOKEN|GH_TOKEN|PUSHPALS_AUTH_TOKEN)=([^\s]+)/gi, "$1=[redacted]")
125
+ .replace(/\b(Bearer\s+)[A-Za-z0-9._~+/=-]{16,}/gi, "$1[redacted]")
126
+ .replace(/\b(ghp|github_pat)_[A-Za-z0-9_]{20,}/g, "[redacted-github-token]")
127
+ .replace(/\bsk-[A-Za-z0-9_-]{20,}/g, "[redacted-openai-key]");
128
+ }
129
+
130
+ export function normalizeToolName(tool: unknown): string {
131
+ const raw = cleanText(tool).toLowerCase();
132
+ if (!raw) return "shell";
133
+ if (raw.includes("@openai/codex") || raw.includes("openai_codex")) return "codex";
134
+ const name = basename(raw).replace(/\.(exe|cmd|bat|ps1)$/i, "");
135
+ if (name === "bunx") return "bun";
136
+ if (name === "python3") return "python";
137
+ if (name === "pwsh" || name === "powershell" || name === "bash" || name === "sh" || name === "cmd") {
138
+ return "shell";
139
+ }
140
+ return name || "shell";
141
+ }
142
+
143
+ export function resolveToolKind(tool: string, registry = DEFAULT_TOOL_REGISTRY): ToolKind {
144
+ const normalized = normalizeToolName(tool);
145
+ const adapter = registry.adapters.find((entry) => normalizeToolName(entry.tool) === normalized);
146
+ if (adapter) return adapter.kind;
147
+ return KNOWN_TOOL_NAMES.has(normalized) ? "known" : registry.fallbackKind;
148
+ }
149
+
150
+ export function inferToolNameFromFailureText(input: ToolFailureInput): string {
151
+ const explicit = normalizeToolName(input.tool);
152
+ if (explicit !== "shell") return explicit;
153
+
154
+ const argv = Array.isArray(input.argv) ? input.argv : [];
155
+ const argvText = argv.join(" ");
156
+ const text = [
157
+ input.commandLine,
158
+ argvText,
159
+ input.summary,
160
+ input.detail,
161
+ input.stdout,
162
+ input.stderr,
163
+ ]
164
+ .map((part) => cleanText(part).toLowerCase())
165
+ .filter(Boolean)
166
+ .join("\n");
167
+
168
+ if (
169
+ text.includes("failed to sync branch before push") ||
170
+ text.includes("tracked .codex path blocks branch sync") ||
171
+ text.includes("untracked working tree files would be overwritten") ||
172
+ text.includes("git pull --rebase") ||
173
+ text.includes("could not detach head") ||
174
+ text.includes("could not apply")
175
+ ) {
176
+ return "git";
177
+ }
178
+ if (text.includes("@openai/codex") || text.includes("openai_codex") || /\bcodex\b/.test(text)) {
179
+ return "codex";
180
+ }
181
+ if (/\bgit\b/.test(text) || /\b(rebase|cherry-pick|checkout|merge conflict)\b/.test(text)) {
182
+ return "git";
183
+ }
184
+ if (/\bdocker\b/.test(text) || text.includes("docker_engine")) return "docker";
185
+ if (/\bgh\b/.test(text) || text.includes("github api")) return "gh";
186
+ if (/\bbun\b/.test(text)) return "bun";
187
+ if (/\bnode\b/.test(text)) return "node";
188
+ return "shell";
189
+ }
190
+
191
+ function combinedFailureText(input: ToolFailureInput): string {
192
+ return [
193
+ input.tool,
194
+ input.argv?.join(" "),
195
+ input.commandLine,
196
+ input.summary,
197
+ input.detail,
198
+ input.stdout,
199
+ input.stderr,
200
+ ]
201
+ .map(cleanText)
202
+ .filter(Boolean)
203
+ .join("\n");
204
+ }
205
+
206
+ function hasNodeEnvRuntimeFailure(text: string): boolean {
207
+ return (
208
+ /env:\s*[`'"\u2018\u2019\u201c\u201d]?node[`'"\u2018\u2019\u201c\u201d]?:?\s+no such file or directory/i.test(text) ||
209
+ /\bnode:\s+not found\b/i.test(text) ||
210
+ /\bnode\.exe.*not found\b/i.test(text)
211
+ );
212
+ }
213
+
214
+ export function classifyToolFailure(input: ToolFailureInput): ToolFailureClassification {
215
+ const tool = inferToolNameFromFailureText(input);
216
+ const text = combinedFailureText(input);
217
+ const lower = text.toLowerCase();
218
+
219
+ if (input.timedOut || lower.includes("timed out") || lower.includes("timeout")) {
220
+ return {
221
+ failureClass: "timeout",
222
+ retryable: true,
223
+ remediation: "Retry with a larger tool budget or reduce the command scope.",
224
+ };
225
+ }
226
+
227
+ if (hasNodeEnvRuntimeFailure(text)) {
228
+ return {
229
+ failureClass: "missing_runtime",
230
+ retryable: false,
231
+ remediation:
232
+ tool === "codex"
233
+ ? "Codex was invoked through a launcher that requires node, but node is absent in this environment. Use a Bun-backed Codex launcher or install node in the sandbox image."
234
+ : "Install the missing node runtime or invoke the tool through a runtime available in this environment.",
235
+ };
236
+ }
237
+
238
+ if (
239
+ lower.includes("requires a newer version of codex") ||
240
+ (lower.includes("requires newer") && lower.includes("codex"))
241
+ ) {
242
+ return {
243
+ failureClass: "missing_runtime",
244
+ retryable: false,
245
+ remediation: "Upgrade the Codex CLI/runtime used by PushPals before retrying this model.",
246
+ };
247
+ }
248
+
249
+ if (
250
+ lower.includes("docker_engine") ||
251
+ lower.includes("cannot connect to the docker daemon") ||
252
+ lower.includes("docker daemon is not running") ||
253
+ (lower.includes("failed to connect to the docker api") && lower.includes("docker"))
254
+ ) {
255
+ return {
256
+ failureClass: "missing_runtime",
257
+ retryable: false,
258
+ remediation: "Start Docker Desktop/the Docker daemon, then retry the Docker-backed operation.",
259
+ };
260
+ }
261
+
262
+ if (
263
+ lower.includes("command-router") ||
264
+ lower.includes("policy rejection") ||
265
+ lower.includes("policy denied") ||
266
+ lower.includes("disallowed command") ||
267
+ lower.includes("command policy")
268
+ ) {
269
+ return {
270
+ failureClass: "policy_denied",
271
+ retryable: false,
272
+ remediation: "Adjust the tool invocation to comply with the configured command policy.",
273
+ };
274
+ }
275
+
276
+ if (
277
+ lower.includes("login is required") ||
278
+ lower.includes("not logged in") ||
279
+ lower.includes("authentication") ||
280
+ lower.includes("unauthorized") ||
281
+ lower.includes("invalid api key") ||
282
+ lower.includes("api_key auth requires")
283
+ ) {
284
+ return {
285
+ failureClass: "auth",
286
+ retryable: false,
287
+ remediation: `Authenticate ${tool} or provide the required token before retrying.`,
288
+ };
289
+ }
290
+
291
+ if (
292
+ lower.includes("econnrefused") ||
293
+ lower.includes("enotfound") ||
294
+ lower.includes("etimedout") ||
295
+ lower.includes("failed to connect") ||
296
+ lower.includes("connection reset") ||
297
+ lower.includes("network is unreachable")
298
+ ) {
299
+ return {
300
+ failureClass: "network",
301
+ retryable: true,
302
+ remediation: "Retry after the dependent service or network path is available.",
303
+ };
304
+ }
305
+
306
+ if (
307
+ lower.includes("read-only file system") ||
308
+ lower.includes("mounted read-only") ||
309
+ lower.includes("operation not permitted") ||
310
+ lower.includes("permission denied") ||
311
+ lower.includes("eacces") ||
312
+ lower.includes("eperm")
313
+ ) {
314
+ const sandboxMount = lower.includes("read-only") || lower.includes("mounted");
315
+ return {
316
+ failureClass: sandboxMount ? "sandbox_mount" : "permission",
317
+ retryable: false,
318
+ remediation: sandboxMount
319
+ ? "Remount the sandbox/worktree with writable metadata or move mutable tool state outside the read-only mount."
320
+ : "Fix filesystem or process permissions before retrying.",
321
+ };
322
+ }
323
+
324
+ if (
325
+ lower.includes("rebase in progress") ||
326
+ lower.includes("merge conflict") ||
327
+ lower.includes("tracked .codex path blocks branch sync") ||
328
+ lower.includes("untracked working tree files would be overwritten") ||
329
+ lower.includes("could not apply") ||
330
+ lower.includes("please move or remove them before you switch branches")
331
+ ) {
332
+ return {
333
+ failureClass: "repo_state",
334
+ retryable: false,
335
+ remediation: "Resolve the repository state conflict before retrying the same publish/sync step.",
336
+ };
337
+ }
338
+
339
+ if (
340
+ lower.includes("command not found") ||
341
+ lower.includes("not recognized as an internal or external command") ||
342
+ lower.includes("neither bunx nor codex was found") ||
343
+ lower.includes("no such file or directory")
344
+ ) {
345
+ return {
346
+ failureClass: "missing_binary",
347
+ retryable: false,
348
+ remediation: `Install ${tool} or configure its executable path before retrying.`,
349
+ };
350
+ }
351
+
352
+ if (typeof input.exitCode === "number" && input.exitCode !== 0) {
353
+ return {
354
+ failureClass: "nonzero_exit",
355
+ retryable: false,
356
+ remediation: `Inspect ${tool} stdout/stderr and fix the command-specific failure before retrying.`,
357
+ };
358
+ }
359
+
360
+ return {
361
+ failureClass: "unknown",
362
+ retryable: false,
363
+ remediation: "Inspect the tool output and add a classifier if this failure mode recurs.",
364
+ };
365
+ }
366
+
367
+ export function createToolRunRecordFromFailure(input: ToolFailureInput & {
368
+ id: string;
369
+ jobId?: string | null;
370
+ workerId?: string | null;
371
+ sessionId?: string | null;
372
+ phase?: string | null;
373
+ kind?: ToolKind;
374
+ capability?: string | null;
375
+ envProfile?: string | null;
376
+ cwd?: string | null;
377
+ allowedEffects?: ToolEffect[];
378
+ durationMs?: number | null;
379
+ startedAt?: string | null;
380
+ finishedAt?: string | null;
381
+ metadata?: Record<string, unknown>;
382
+ }): ToolRunRecord {
383
+ const finishedAt = cleanText(input.finishedAt) || new Date().toISOString();
384
+ const durationMs =
385
+ typeof input.durationMs === "number" && Number.isFinite(input.durationMs) && input.durationMs >= 0
386
+ ? Math.round(input.durationMs)
387
+ : 0;
388
+ const finishedMs = Date.parse(finishedAt);
389
+ const fallbackStartedAt = Number.isFinite(finishedMs)
390
+ ? new Date(Math.max(0, finishedMs - durationMs)).toISOString()
391
+ : new Date().toISOString();
392
+ const startedAt =
393
+ cleanText(input.startedAt) || fallbackStartedAt;
394
+ const tool = inferToolNameFromFailureText(input);
395
+ const classification = classifyToolFailure({ ...input, tool });
396
+ return {
397
+ id: input.id,
398
+ jobId: input.jobId ?? null,
399
+ workerId: input.workerId ?? null,
400
+ sessionId: input.sessionId ?? null,
401
+ phase: input.phase ?? null,
402
+ tool,
403
+ kind: input.kind ?? resolveToolKind(tool),
404
+ capability: input.capability ?? null,
405
+ envProfile: input.envProfile ?? null,
406
+ cwd: input.cwd ?? null,
407
+ argv: Array.isArray(input.argv) ? input.argv.map((arg) => cleanText(arg)).filter(Boolean) : [],
408
+ commandLine: cleanText(input.commandLine) || null,
409
+ allowedEffects: Array.isArray(input.allowedEffects) ? input.allowedEffects : [],
410
+ ok: false,
411
+ exitCode: typeof input.exitCode === "number" && Number.isFinite(input.exitCode) ? input.exitCode : null,
412
+ failureClass: classification.failureClass,
413
+ retryable: classification.retryable,
414
+ remediation: classification.remediation,
415
+ startedAt,
416
+ finishedAt,
417
+ durationMs,
418
+ stdoutTail: truncateToolText(redactToolText(input.stdout)),
419
+ stderrTail: truncateToolText(redactToolText(input.stderr ?? input.detail)),
420
+ metadata: input.metadata ?? {},
421
+ };
422
+ }
@@ -24,6 +24,7 @@ export type VisionKeyItems = {
24
24
  constraints: string[];
25
25
  nonGoals: string[];
26
26
  metrics: string[];
27
+ testingCriteria: string[];
27
28
  riskPolicy: string[];
28
29
  operatingModel: string[];
29
30
  governance: string[];
@@ -107,6 +108,15 @@ function classifyHeadingBucket(heading: string): keyof VisionKeyItems | null {
107
108
  if (text.includes("non-goal") || text.includes("out of scope") || text.includes("not ")) {
108
109
  return "nonGoals";
109
110
  }
111
+ if (
112
+ text.includes("testing criteria") ||
113
+ text.includes("test criteria") ||
114
+ text.includes("required tests") ||
115
+ text.includes("required validation") ||
116
+ text.includes("validation criteria")
117
+ ) {
118
+ return "testingCriteria";
119
+ }
110
120
  if (text.includes("measure") || text.includes("metric") || text.includes("good looks like")) {
111
121
  return "metrics";
112
122
  }
@@ -199,6 +209,7 @@ export function extractVisionKeyItems(markdown: string): VisionKeyItems {
199
209
  constraints: [],
200
210
  nonGoals: [],
201
211
  metrics: [],
212
+ testingCriteria: [],
202
213
  riskPolicy: [],
203
214
  operatingModel: [],
204
215
  governance: [],
@@ -226,6 +237,7 @@ export function extractVisionKeyItems(markdown: string): VisionKeyItems {
226
237
  constraints: dedupeAndClamp(buckets.constraints),
227
238
  nonGoals: dedupeAndClamp(buckets.nonGoals),
228
239
  metrics: dedupeAndClamp(buckets.metrics),
240
+ testingCriteria: dedupeAndClamp(buckets.testingCriteria),
229
241
  riskPolicy: dedupeAndClamp(buckets.riskPolicy),
230
242
  operatingModel: dedupeAndClamp(buckets.operatingModel),
231
243
  governance: dedupeAndClamp(buckets.governance),
@@ -1,4 +1,5 @@
1
1
  You are PushPals WorkerPal running via the OpenAI Codex CLI backend.
2
2
  Codex CLI is required infrastructure in this environment.
3
+ Do not self-check PushPals infrastructure by running `codex --version` or `codex login status` inside the task workspace; the WorkerPals executor has already launched you through Codex.
3
4
  Do not modify tests or product code to bypass, stub, or avoid Codex CLI usage due to assumed environment limits.
4
5
  If Codex CLI auth/execution is unavailable, fail loudly with a clear error and stop; do not apply non-Codex workarounds.
@@ -1,6 +1,7 @@
1
1
  Runtime policy guardrails (mandatory):
2
2
 
3
3
  - Codex CLI is required infrastructure in this environment.
4
+ - Do not self-check PushPals infrastructure by running `codex --version` or `codex login status` inside the task workspace; the WorkerPals executor has already launched you through Codex.
4
5
  - Never bypass Codex usage by changing tests/code expectations.
5
6
  - If Codex CLI auth/execution is unavailable, hard-fail and stop.
6
7
  - Do not apply fallback/workaround execution paths when Codex is unavailable.
@@ -3,6 +3,7 @@ You are PushPals WorkerPal running via the OpenAI Codex CLI backend.
3
3
  Non-negotiable runtime invariants:
4
4
 
5
5
  - Codex CLI is required infrastructure in this environment.
6
+ - Do not self-check PushPals infrastructure by running `codex --version` or `codex login status` inside the task workspace; the WorkerPals executor has already launched you through Codex.
6
7
  - Do not modify tests or production code to bypass, stub, or remove Codex CLI usage due to assumed environment limitations.
7
8
  - Do not "adapt around" missing Codex access by rewriting coverage or behavior expectations.
8
9
  - If Codex CLI authentication/execution is unavailable, fail loudly with a clear error and stop.