pi-crew 0.9.8 → 0.9.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +311 -0
  2. package/README.md +2 -2
  3. package/docs/fixes/v0.9.10/locks-fix-verify.md +3 -0
  4. package/docs/fixes/v0.9.10/smoke-test.md +12 -0
  5. package/package.json +1 -1
  6. package/src/extension/register.ts +94 -21
  7. package/src/extension/registration/subagent-helpers.ts +1 -0
  8. package/src/extension/registration/subagent-tools.ts +9 -0
  9. package/src/extension/team-tool/doctor.ts +41 -18
  10. package/src/runtime/batch-barrier.ts +145 -0
  11. package/src/runtime/child-pi.ts +135 -22
  12. package/src/runtime/compact-pipeline.ts +56 -0
  13. package/src/runtime/compact-stages/ansi-strip-stage.ts +25 -0
  14. package/src/runtime/compact-stages/blank-collapse-stage.ts +31 -0
  15. package/src/runtime/compact-stages/deduplicate-stage.ts +34 -0
  16. package/src/runtime/compact-stages/head-snap-stage.ts +57 -0
  17. package/src/runtime/compact-stages/index.ts +13 -0
  18. package/src/runtime/compact-stages/tail-capture-stage.ts +72 -0
  19. package/src/runtime/compact-stages/truncation-stage.ts +71 -0
  20. package/src/runtime/crash-classification.ts +208 -0
  21. package/src/runtime/custom-tools/irc-tool.ts +47 -7
  22. package/src/runtime/handoff-manager.ts +10 -0
  23. package/src/runtime/important-line-classifier.ts +130 -0
  24. package/src/runtime/iteration-hooks.ts +7 -19
  25. package/src/runtime/live-agent-manager.ts +185 -0
  26. package/src/runtime/live-session-runtime.ts +50 -1
  27. package/src/runtime/model-fallback.ts +29 -1
  28. package/src/runtime/process-lifecycle.ts +481 -0
  29. package/src/runtime/role-permission.ts +2 -2
  30. package/src/runtime/stream-preview.ts +9 -2
  31. package/src/runtime/subagent-manager.ts +6 -0
  32. package/src/runtime/task-output-context.ts +209 -24
  33. package/src/runtime/task-runner.ts +76 -15
  34. package/src/runtime/tool-output-pruner.ts +334 -0
  35. package/src/state/locks.ts +16 -0
  36. package/src/state/state-store.ts +8 -2
  37. package/src/state/types.ts +5 -0
  38. package/src/ui/live-run-sidebar.ts +6 -1
  39. package/src/ui/loaders.ts +24 -4
  40. package/src/ui/run-dashboard.ts +6 -1
  41. package/src/ui/run-event-bus.ts +1 -1
  42. package/src/ui/run-snapshot-cache.ts +50 -16
  43. package/src/ui/widget/index.ts +27 -5
  44. package/src/ui/widget/widget-renderer.ts +43 -13
  45. package/src/utils/redaction.ts +17 -1
  46. package/src/utils/visual.ts +6 -0
  47. package/src/ui/crew-widget.ts +0 -544
@@ -0,0 +1,130 @@
1
+ /**
2
+ * Important-Line Classifier (P0-B) — scan middle slice of a truncated value
3
+ * for diagnostic lines worth preserving between head and tail.
4
+ *
5
+ * Ported from Hypa's `ImportantLineClassifier.cs` (5 regexes) and the
6
+ * middle-scanning portion of `Stages/TruncationStage.cs:24-46`, adapted to TS
7
+ * (no `[GeneratedRegex]` AOT) and to pi-crew's head(75%)/tail(25%) split.
8
+ *
9
+ * Design rationale:
10
+ * - Patterns are intentionally OVER-INCLUSIVE. False positives preserve
11
+ * harmless lines; false negatives drop critical diagnostics, which is
12
+ * unacceptable (the whole point of this module). Hypa uses the same
13
+ * over-inclusive design.
14
+ * - Patterns are evaluated against a WHOLE line, not against the raw
15
+ * truncated slice, so a match at a line boundary is reliable.
16
+ * - The `splitWithImportantLines` helper performs the head/tail split AND
17
+ * greedily picks whole important lines from the middle that fit inside
18
+ * `slackFactor * maxChars` (default 15% slack). Callers compose their own
19
+ * marker using the returned parts — keeping `compactString` (marker
20
+ * "compacted ... chars, head+tail preserved") and `readIfSmall` (marker
21
+ * "truncated ... bytes, head+tail preserved") backward-compatible when no
22
+ * important lines are present.
23
+ */
24
+
25
+ /** Diagnostic patterns. Anchored where safe to avoid matching noise. */
26
+ export const IMPORTANT_LINE_PATTERNS: readonly RegExp[] = [
27
+ // error keywords — NOTE: "warning" is intentionally excluded here; it has
28
+ // its own case-sensitive pattern below so that the common prose word
29
+ // "warning" does not over-match. (Hypa does the same split.)
30
+ /\b(error|failed|exception|fatal|panic)\b/i,
31
+ // file:line diagnostic — `child-pi.ts:383:`, `App.tsx:42:`
32
+ /\w+\.\w+:\d+:/,
33
+ // HTTP 4xx / 5xx — bounded so it does not match phone numbers etc.
34
+ /\b[45]\d{2}\b/,
35
+ // k8s / linter "Warning" event (case-sensitive so prose is not matched)
36
+ /\bWarning\b/,
37
+ // compiler / linter diagnostic id — `TS2304`, `CS0246`, `ES1234`
38
+ /\b[A-Z]{2,4}\d{3,5}\b/,
39
+ ];
40
+
41
+ /** True iff `line` matches at least one important-line pattern. */
42
+ export function isImportantLine(line: string): boolean {
43
+ if (!line) return false;
44
+ for (const pattern of IMPORTANT_LINE_PATTERNS) {
45
+ if (pattern.test(line)) return true;
46
+ }
47
+ return false;
48
+ }
49
+
50
+ /**
51
+ * Extract up to `maxLines` important lines from `text`. Lines are split on
52
+ * `\n` (also handles `\r\n`). Order preserved; duplicates kept (callers may
53
+ * want to see the same diagnostic twice if it appears twice — that often
54
+ * signals a recurring failure).
55
+ */
56
+ export function extractImportantLines(text: string, maxLines = 30): string[] {
57
+ if (!text || maxLines <= 0) return [];
58
+ const out: string[] = [];
59
+ for (const line of text.split(/\r?\n/)) {
60
+ if (out.length >= maxLines) break;
61
+ if (isImportantLine(line)) out.push(line);
62
+ }
63
+ return out;
64
+ }
65
+
66
+ export interface TruncationSplit {
67
+ /** The first 75% of the value (by char count), verbatim. */
68
+ head: string;
69
+ /** The last 25% of the value (by char count), verbatim. */
70
+ tail: string;
71
+ /**
72
+ * Important lines from the middle slice, greedily picked (whole lines) so
73
+ * the joined length fits inside `slackFactor * maxChars`. Empty when
74
+ * `preserveImportant` is false OR no important lines are present OR none
75
+ * fit the slack budget.
76
+ */
77
+ importantLines: string[];
78
+ /** `value.length - maxChars` — chars dropped if no important lines preserved. */
79
+ baseDropped: number;
80
+ }
81
+
82
+ export interface SplitOptions {
83
+ /** When false, important-line scanning is skipped (assistant-text mode). */
84
+ preserveImportant?: boolean;
85
+ /** Hard cap on candidate lines before slack-budget selection. Default 30. */
86
+ maxImportantLines?: number;
87
+ /** Fraction of `maxChars` available for important-line content. Default 0.15. */
88
+ slackFactor?: number;
89
+ }
90
+
91
+ /**
92
+ * Split `value` into head + important-middle + tail, returning the parts.
93
+ * The caller is responsible for composing the final result (marker + glue)
94
+ * because the marker wording differs between `compactString` and
95
+ * `readIfSmall`.
96
+ *
97
+ * When no important lines are picked, the returned `importantLines` is `[]`
98
+ * and the marker wording stays bit-identical to the pre-P0-B format.
99
+ */
100
+ export function splitWithImportantLines(value: string, maxChars: number, opts: SplitOptions = {}): TruncationSplit {
101
+ if (value.length <= maxChars) {
102
+ return { head: value, tail: "", importantLines: [], baseDropped: 0 };
103
+ }
104
+ const headLen = Math.floor(maxChars * 0.75);
105
+ const tailLen = maxChars - headLen;
106
+ const head = value.slice(0, headLen);
107
+ const tail = value.slice(value.length - tailLen);
108
+
109
+ if (opts.preserveImportant === false) {
110
+ return { head, tail, importantLines: [], baseDropped: value.length - maxChars };
111
+ }
112
+
113
+ const slackFactor = opts.slackFactor ?? 0.15;
114
+ const slackChars = Math.max(0, Math.floor(maxChars * slackFactor));
115
+ const maxCandidates = opts.maxImportantLines ?? 30;
116
+ const middle = value.slice(headLen, value.length - tailLen);
117
+ const candidates = extractImportantLines(middle, maxCandidates);
118
+
119
+ // Greedily pick whole lines that fit in the slack budget.
120
+ const chosen: string[] = [];
121
+ let used = 0;
122
+ for (const line of candidates) {
123
+ const addLen = (chosen.length > 0 ? 1 : 0) + line.length; // '\n' separator
124
+ if (used + addLen > slackChars) break;
125
+ chosen.push(line);
126
+ used += addLen;
127
+ }
128
+
129
+ return { head, tail, importantLines: chosen, baseDropped: value.length - maxChars };
130
+ }
@@ -8,6 +8,7 @@ import { spawn } from "node:child_process";
8
8
  import * as fs from "node:fs";
9
9
  import * as path from "node:path";
10
10
  import { WINDOWS_ESSENTIAL_ENV_VARS } from "../utils/env-allowlist.ts";
11
+ import { HeadSnapStage } from "./compact-stages/index.ts";
11
12
  import { resolveShellForScript } from "../utils/resolve-shell.ts";
12
13
  import { sanitizeEnvSecrets } from "../utils/env-filter.ts";
13
14
  import { DENIED_METRIC_NAMES } from "./metric-parser.ts";
@@ -98,23 +99,6 @@ function notFiredResult(): HookResult {
98
99
  };
99
100
  }
100
101
 
101
- /**
102
- * Truncate a buffer to the given byte limit, snapping to the last newline
103
- * boundary for UTF-8 safety.
104
- */
105
- function truncateToLimit(buf: Buffer, limit: number): Buffer {
106
- if (buf.byteLength <= limit) return buf;
107
-
108
- const slice = buf.subarray(0, limit);
109
- // Find the last newline within the truncated region
110
- const lastNewline = slice.lastIndexOf("\n");
111
- if (lastNewline >= 0) {
112
- return slice.subarray(0, lastNewline);
113
- }
114
- // No newline found — return the full slice
115
- return slice;
116
- }
117
-
118
102
  /**
119
103
  * Check if a script path exists and is executable.
120
104
  */
@@ -196,13 +180,17 @@ export async function runIterationHook(
196
180
  const durationMs = Date.now() - startTime;
197
181
 
198
182
  const rawStdout = Buffer.concat(stdoutChunks);
199
- const truncatedStdout = truncateToLimit(rawStdout, MAX_STDOUT_BYTES);
183
+ // Sprint 5: refactored onto HeadSnapStage. Convert to UTF-8 string once,
184
+ // then apply the byte-cap stage with newline-snap so partial lines
185
+ // never appear in the captured preview. HeadSnapStage is byte-cap-safe
186
+ // (walks back partial UTF-8 sequences at the cut boundary).
187
+ const stdoutText = new HeadSnapStage({ maxBytes: MAX_STDOUT_BYTES }).apply(rawStdout.toString("utf-8"));
200
188
 
201
189
  const rawStderr = Buffer.concat(stderrChunks);
202
190
 
203
191
  resolve({
204
192
  fired: true,
205
- stdout: truncatedStdout.toString("utf-8"),
193
+ stdout: stdoutText,
206
194
  stderr: rawStderr.toString("utf-8"),
207
195
  exitCode: code,
208
196
  timedOut: killed,
@@ -416,3 +416,188 @@ function drainIrcMessages(agentIdOrTaskId: string): IrcMessage[] {
416
416
  handle.pendingMessages.length = 0;
417
417
  return messages;
418
418
  }
419
+
420
+ /* ── IRC reply support (side-channel Q&A) ─────────────────────────── */
421
+
422
+ /** Default timeout for awaiting a side-channel reply (60s). */
423
+ const DEFAULT_REPLY_TIMEOUT_MS = 60_000;
424
+
425
+ /** Result of a background reply attempt. */
426
+ export interface BackgroundReplyResult {
427
+ ok: boolean;
428
+ /** Correlation id for the pending reply (present once registered). */
429
+ corrId?: string;
430
+ /** Reply prose content (present on success when awaitReply was set). */
431
+ replyContent?: string;
432
+ /** Human-readable error description. */
433
+ error?: string;
434
+ /** True when the reply did not arrive before the timeout. */
435
+ timedOut?: boolean;
436
+ }
437
+
438
+ interface PendingReply {
439
+ corrId: string;
440
+ targetAgentId: string;
441
+ fromId: string;
442
+ deadline: number;
443
+ resolve: (result: BackgroundReplyResult) => void;
444
+ timer?: ReturnType<typeof setTimeout>;
445
+ }
446
+
447
+ /** In-process pending replies keyed by correlation id. */
448
+ const pendingReplies = new Map<string, PendingReply>();
449
+ /** Reverse index: targetAgentId → set of corrIds awaiting a reply from it. */
450
+ const pendingRepliesByTarget = new Map<string, Set<string>>();
451
+
452
+ function makeCorrelationId(): string {
453
+ return `irc_reply_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 10)}`;
454
+ }
455
+
456
+ /**
457
+ * Deliver a message to a live agent's session as a *background* turn —
458
+ * without blocking the recipient's main agent loop — and (optionally)
459
+ * await a prose reply via a side-channel.
460
+ *
461
+ * Non-blocking invariant (mirrors gajae-code's `respondAsBackground`):
462
+ * the message is injected via `sendCustomMessage` (triggerTurn:false) or a
463
+ * fire-and-forget `session.prompt`; we NEVER await the recipient's full
464
+ * main-loop turn. When `awaitReply` is set we instead await an event-driven
465
+ * reply resolution (see {@link resolveIrcReply}) bounded by a timeout.
466
+ *
467
+ * Note on mailbox.ts reply fields: those file-based fields
468
+ * (`replyTo`/`replyContent`/`replyDeadline`/`updateMailboxMessageReply`)
469
+ * serve cross-process workers that communicate via on-disk mailbox files.
470
+ * Live-session agents share a single process, so an in-memory event-driven
471
+ * registry is used here — it is lower-latency and trivially non-blocking.
472
+ * Both mechanisms coexist; file-based workers keep using mailbox.ts.
473
+ */
474
+ export async function respondAsBackground(
475
+ targetAgentId: string,
476
+ fromId: string,
477
+ message: string,
478
+ opts?: { awaitReply?: boolean; timeoutMs?: number; signal?: AbortSignal },
479
+ ): Promise<BackgroundReplyResult> {
480
+ const handle = getLiveAgent(targetAgentId);
481
+ if (!handle) return { ok: false, error: `Live agent '${targetAgentId}' not found.` };
482
+
483
+ const awaitReply = opts?.awaitReply ?? false;
484
+ const timeoutMs = opts?.timeoutMs ?? DEFAULT_REPLY_TIMEOUT_MS;
485
+ const corrId = makeCorrelationId();
486
+
487
+ // --- Non-blocking delivery -------------------------------------------
488
+ const session = handle.session as Record<string, unknown>;
489
+ const deliveredTag = `[DM from ${fromId}] ${message}`;
490
+ let delivered = false;
491
+ if (typeof session.sendCustomMessage === "function") {
492
+ try {
493
+ (session.sendCustomMessage as (msg: unknown, o?: unknown) => void)(
494
+ { customType: "irc", content: deliveredTag, display: "collapsed", corrId },
495
+ { deliverAs: "followUp", triggerTurn: false },
496
+ );
497
+ delivered = true;
498
+ } catch {
499
+ // fall through to prompt-based delivery
500
+ }
501
+ }
502
+ if (!delivered && typeof handle.session.prompt === "function") {
503
+ const promptText = `${deliveredTag}${awaitReply ? ` (reply correlation: ${corrId})` : ""}`;
504
+ void handle.session.prompt(promptText, { source: "api", expandPromptTemplates: false }).catch((error) => logInternalError("live-agent-manager.respondAsBackground", error, `agentId=${handle.agentId}`));
505
+ delivered = true;
506
+ }
507
+ if (!delivered) return { ok: false, error: `Target '${targetAgentId}' has no message channel.` };
508
+ handle.updatedAt = new Date().toISOString();
509
+
510
+ if (!awaitReply) return { ok: true, corrId };
511
+
512
+ // --- Await reply (event-driven, bounded by timeout) ------------------
513
+ return awaitPendingReply(corrId, targetAgentId, fromId, timeoutMs, opts?.signal);
514
+ }
515
+
516
+ /**
517
+ * Register a pending reply and resolve it when the reply arrives, the
518
+ * timeout elapses, or the caller's abort signal fires.
519
+ *
520
+ * @internal exported for testing
521
+ */
522
+ export function awaitPendingReply(
523
+ corrId: string,
524
+ targetAgentId: string,
525
+ fromId: string,
526
+ timeoutMs: number,
527
+ signal?: AbortSignal,
528
+ ): Promise<BackgroundReplyResult> {
529
+ return new Promise((resolve) => {
530
+ const deadline = Date.now() + timeoutMs;
531
+ let settled = false;
532
+ let timer: ReturnType<typeof setTimeout> | undefined;
533
+ let signalListener: (() => void) | undefined;
534
+
535
+ const finish = (result: BackgroundReplyResult) => {
536
+ if (settled) return;
537
+ settled = true;
538
+ if (timer) clearTimeout(timer);
539
+ if (signalListener && signal) signal.removeEventListener("abort", signalListener);
540
+ pendingReplies.delete(corrId);
541
+ const set = pendingRepliesByTarget.get(targetAgentId);
542
+ set?.delete(corrId);
543
+ if (set && set.size === 0) pendingRepliesByTarget.delete(targetAgentId);
544
+ resolve(result);
545
+ };
546
+
547
+ timer = setTimeout(() => finish({ ok: false, corrId, timedOut: true }), timeoutMs);
548
+
549
+ if (signal) {
550
+ if (signal.aborted) {
551
+ finish({ ok: false, corrId, error: "cancelled" });
552
+ return;
553
+ }
554
+ signalListener = () => finish({ ok: false, corrId, error: "cancelled" });
555
+ signal.addEventListener("abort", signalListener, { once: true });
556
+ }
557
+
558
+ pendingReplies.set(corrId, { corrId, targetAgentId, fromId, deadline, resolve: finish, timer });
559
+ const set = pendingRepliesByTarget.get(targetAgentId) ?? new Set<string>();
560
+ set.add(corrId);
561
+ pendingRepliesByTarget.set(targetAgentId, set);
562
+ });
563
+ }
564
+
565
+ /**
566
+ * Resolve a pending side-channel reply. Called by the reply-routing layer
567
+ * (e.g. irc-tool when the recipient sends a message back referencing the
568
+ * correlation id, or by tests simulating a recipient response).
569
+ *
570
+ * Returns true if a pending reply was resolved, false if none matched
571
+ * (already timed out / cancelled / unknown correlation id).
572
+ */
573
+ export function resolveIrcReply(corrId: string, replyContent: string): boolean {
574
+ const pending = pendingReplies.get(corrId);
575
+ if (!pending) return false;
576
+ pending.resolve({ ok: true, corrId, replyContent });
577
+ return true;
578
+ }
579
+
580
+ /**
581
+ * Cancel a pending side-channel reply (e.g. sender gave up).
582
+ * Returns true if a pending reply was cancelled, false if none matched.
583
+ */
584
+ export function cancelIrcReply(corrId: string, reason = "cancelled"): boolean {
585
+ const pending = pendingReplies.get(corrId);
586
+ if (!pending) return false;
587
+ pending.resolve({ ok: false, corrId, error: reason });
588
+ return true;
589
+ }
590
+
591
+ /** Correlation ids currently awaiting a reply from the given target agent. */
592
+ export function pendingReplyCorrIdsForTarget(targetAgentId: string): string[] {
593
+ return [...(pendingRepliesByTarget.get(targetAgentId) ?? [])];
594
+ }
595
+
596
+ /** Clear all pending replies (test helper). */
597
+ export function clearPendingRepliesForTest(): void {
598
+ for (const pending of pendingReplies.values()) {
599
+ if (pending.timer) clearTimeout(pending.timer);
600
+ }
601
+ pendingReplies.clear();
602
+ pendingRepliesByTarget.clear();
603
+ }
@@ -241,6 +241,54 @@ function modelFromRegistry(modelRegistry: unknown, modelId: string | undefined):
241
241
  }
242
242
  }
243
243
 
244
+ /**
245
+ * Round 18: when agent declares `model: false`, the inherited `parentModel`
246
+ * (= `ctx.model` from Pi runtime, set via `team-tool.ts:541/655`) is the
247
+ * session's SAVED model. That saved model can be stale (e.g. a previous
248
+ * session used claude-sonnet-4-5 and saved it as session.model; the new
249
+ * session actually runs on minimax-M3 displayed in the footer). If the
250
+ * saved model has no auth in `modelRegistry`, the worker fails immediately
251
+ * with "No API key found" before reaching any fallback candidate.
252
+ *
253
+ * This helper prefers the saved model when it is in the auth-available
254
+ * registry; otherwise falls back to the first auth-available registry
255
+ * model (e.g. minimax/MiniMax-M3, zai/glm-5.2); otherwise returns the
256
+ * raw `parentModel` unchanged so the caller surfaces E008.
257
+ */
258
+ export function resolveParentModelFromRegistry(
259
+ modelRegistry: unknown,
260
+ rawParentModel: unknown,
261
+ ): string | undefined {
262
+ const raw = typeof rawParentModel === "string" ? rawParentModel.trim() : undefined;
263
+ if (raw) {
264
+ const candidate = raw.includes("/")
265
+ ? raw
266
+ : (() => {
267
+ const m = modelFromRegistry(modelRegistry, raw);
268
+ if (m && typeof m === "object" && "fullId" in m) {
269
+ return String((m as { fullId?: unknown }).fullId ?? raw);
270
+ }
271
+ return undefined;
272
+ })();
273
+ if (candidate && modelFromRegistry(modelRegistry, candidate)) return candidate;
274
+ }
275
+ const registry = modelRegistry as { getAvailable?: () => unknown[] } | undefined;
276
+ if (registry && typeof registry.getAvailable === "function") {
277
+ try {
278
+ const available = registry.getAvailable();
279
+ if (Array.isArray(available) && available.length > 0) {
280
+ const first = available[0] as { provider?: unknown; id?: unknown } | undefined;
281
+ if (first && typeof first.provider === "string" && typeof first.id === "string") {
282
+ return `${first.provider}/${first.id}`;
283
+ }
284
+ }
285
+ } catch {
286
+ // ignore — fall through to raw
287
+ }
288
+ }
289
+ return raw;
290
+ }
291
+
244
292
  /** Communication intensity by role (caveman-inspired token optimization) */
245
293
  const ROLE_INTENSITY: Record<string, "lite" | "full" | "ultra"> = {
246
294
  explorer: "ultra",
@@ -473,7 +521,8 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
473
521
  });
474
522
  await (resourceLoader as { reload?: () => Promise<void> }).reload?.();
475
523
  }
476
- const modelRouting = buildConfiguredModelRouting({ overrideModel: input.modelOverride, stepModel: input.step.model, teamRoleModel: input.teamRoleModel, agentModel: input.agent.model, fallbackModels: input.agent.fallbackModels, parentModel: input.parentModel, modelRegistry: input.modelRegistry, cwd: input.manifest.cwd, scopeModelsPatterns: await resolveScopeModelsPatterns(input.manifest.cwd) });
524
+ const effectiveParentModel = resolveParentModelFromRegistry(input.modelRegistry, input.parentModel);
525
+ const modelRouting = buildConfiguredModelRouting({ overrideModel: input.modelOverride, stepModel: input.step.model, teamRoleModel: input.teamRoleModel, agentModel: input.agent.model, fallbackModels: input.agent.fallbackModels, parentModel: effectiveParentModel, modelRegistry: input.modelRegistry, cwd: input.manifest.cwd, scopeModelsPatterns: await resolveScopeModelsPatterns(input.manifest.cwd) });
477
526
  const resolvedModel = modelFromRegistry(input.modelRegistry, modelRouting.candidates[0] ?? modelRouting.requested) ?? input.parentModel;
478
527
  // Phase 4: MCP proxy — will be determined after session creation
479
528
  // (we check parent's MCP tools and share connections when available)
@@ -209,6 +209,25 @@ const RETRYABLE_MODEL_FAILURE_PATTERNS = [
209
209
  /internal(?:_server)?[ _]error/i,
210
210
  /server error/i,
211
211
  /bad gateway/i,
212
+ //
213
+ // Broader retryable patterns (added 2026-06-25, FIX 2):
214
+ // - `/provider[_ ]?error/i`: OpenAI-compatible "Provider error" generic fault.
215
+ // - `/context[_ ]?length[_ ]?exceeded/i`: "context_length_exceeded" from
216
+ // OpenAI/Anthropic — when the configured model is the bottleneck, a
217
+ // different model in the fallback chain may have a larger window.
218
+ // - `/safety/i`: Anthropic safety blocks — typically retryable on a
219
+ // different model in the fallback chain.
220
+ // - `/is[_ ]?overloaded/i`: alias to the existing `/overloaded/i` pattern
221
+ // to catch phrasings like "upstream is overloaded".
222
+ // - `/\b408\b/`: HTTP 408 Request Timeout — transient, provider-side.
223
+ //
224
+ // Intentionally NOT added: `/bad_request/` — can mean bad input (e.g.
225
+ // invalid schema), which is non-retryable.
226
+ /provider[_ ]?error/i,
227
+ /context[_ ]?length[_ ]?exceeded/i,
228
+ /safety/i,
229
+ /is[_ ]?overloaded/i,
230
+ /\b408\b/,
212
231
  ];
213
232
 
214
233
  // These patterns indicate auth/key/billing issues that will never succeed on retry.
@@ -313,9 +332,18 @@ export function buildConfiguredModelRouting(input: {
313
332
  const rawModels = availableModels
314
333
  ? [input.overrideModel, input.stepModel, input.teamRoleModel, effectiveAgentModel, ...(input.fallbackModels ?? []), ...availableModels.map((model) => model.fullId)]
315
334
  : [input.overrideModel, input.stepModel, input.teamRoleModel, effectiveAgentModel, ...(input.fallbackModels ?? []), parentModel];
335
+ // Fix (Round 18): when an agent has `model: false` (frontmatter) the
336
+ // inherited `parentModel` (= session chính's model, e.g. minimax-M3) IS the
337
+ // desired primary. It must NOT be filtered out by isAvailableModel — which
338
+ // only knows about models from models.json / registry, NOT builtin Pi models.
339
+ // Pin the inherited parentModel at index 0 regardless of availability.
340
+ const parentModelRaw = effectiveAgentModel?.trim() || undefined;
316
341
  const configuredModels = rawModels
317
342
  .filter((model): model is string => Boolean(model?.trim()))
318
- .filter((model) => isAvailableModel(model.trim(), availableModels));
343
+ .filter((model, idx) => {
344
+ if (parentModelRaw && idx === 0 && model.trim() === parentModelRaw) return true;
345
+ return isAvailableModel(model.trim(), availableModels);
346
+ });
319
347
  const candidates = buildModelCandidates(configuredModels[0], configuredModels.slice(1), availableModels, preferredProvider);
320
348
  const reason = requested && candidates[0] && resolveModelCandidate(requested, availableModels, preferredProvider) !== candidates[0]
321
349
  ? "requested model unavailable; selected configured Pi fallback"