@gajae-code/coding-agent 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -450,6 +450,52 @@ function activeFlag(value: unknown): boolean {
450
450
  return isPlainObject(value) && value.active !== false;
451
451
  }
452
452
 
453
+ function phaseFromActiveValue(value: unknown): string | undefined {
454
+ if (!isPlainObject(value) || typeof value.phase !== "string") return undefined;
455
+ const phase = value.phase.trim();
456
+ return phase || undefined;
457
+ }
458
+
459
+ const RALPLAN_CANONICAL_PHASE_OVERRIDES = new Set([
460
+ "final",
461
+ "handoff",
462
+ "complete",
463
+ "completed",
464
+ "failed",
465
+ "cancelled",
466
+ "canceled",
467
+ "inactive",
468
+ ]);
469
+
470
+ function modeStatePhase(value: unknown): string | undefined {
471
+ if (!isPlainObject(value) || typeof value.current_phase !== "string") return undefined;
472
+ const phase = value.current_phase.trim();
473
+ if (!phase) return undefined;
474
+ if (value.active === false && !RALPLAN_CANONICAL_PHASE_OVERRIDES.has(phase)) return undefined;
475
+ return phase;
476
+ }
477
+
478
+ function pushPhaseDriftProblem(options: {
479
+ problems: DoctorProblem[];
480
+ pathValue: string;
481
+ skill: CanonicalGjcWorkflowSkill;
482
+ entryKind: "active entry" | "active snapshot";
483
+ entrySkill: string;
484
+ entryPhase: string | undefined;
485
+ statePhase: string | undefined;
486
+ }): void {
487
+ if (!options.entryPhase || !options.statePhase || options.entryPhase === options.statePhase) return;
488
+ options.problems.push(
489
+ doctorProblem(
490
+ "stale_active_state",
491
+ options.pathValue,
492
+ `${options.entryKind} for ${options.entrySkill} phase ${options.entryPhase} differs from canonical mode-state phase ${options.statePhase}`,
493
+ `gjc state ${options.skill} clear`,
494
+ options.skill,
495
+ ),
496
+ );
497
+ }
498
+
453
499
  async function collectDoctorSummary(
454
500
  cwd: string,
455
501
  skill: CanonicalGjcWorkflowSkill | undefined,
@@ -460,6 +506,7 @@ async function collectDoctorSummary(
460
506
  const problems: DoctorProblem[] = [];
461
507
  let filesScanned = 0;
462
508
  let journalsScanned = 0;
509
+ const invalidModeStates = new Set<string>();
463
510
 
464
511
  for (const currentSkill of skills) {
465
512
  const filePath = modeStateFile(cwd, currentSkill, sessionId);
@@ -476,6 +523,7 @@ async function collectDoctorSummary(
476
523
  currentSkill,
477
524
  ),
478
525
  );
526
+ invalidModeStates.add(currentSkill);
479
527
  continue;
480
528
  }
481
529
  const validation = validateWorkflowStateEnvelope(currentSkill, raw.value);
@@ -489,6 +537,7 @@ async function collectDoctorSummary(
489
537
  currentSkill,
490
538
  ),
491
539
  );
540
+ invalidModeStates.add(currentSkill);
492
541
  }
493
542
  const mismatch = await detectWorkflowEnvelopeIntegrityMismatch(filePath);
494
543
  if (mismatch) {
@@ -501,6 +550,7 @@ async function collectDoctorSummary(
501
550
  currentSkill,
502
551
  ),
503
552
  );
553
+ invalidModeStates.add(currentSkill);
504
554
  }
505
555
  }
506
556
 
@@ -553,6 +603,17 @@ async function collectDoctorSummary(
553
603
  ),
554
604
  );
555
605
  }
606
+ if (canonical && activeFlag(entry.value) && !invalidModeStates.has(canonical)) {
607
+ pushPhaseDriftProblem({
608
+ problems,
609
+ pathValue: entryPath,
610
+ skill: canonical,
611
+ entryKind: "active entry",
612
+ entrySkill,
613
+ entryPhase: phaseFromActiveValue(entry.value),
614
+ statePhase: modeStatePhase(state.value),
615
+ });
616
+ }
556
617
  }
557
618
  if (isPlainObject(snapshot.value)) {
558
619
  const activeSkills = Array.isArray(snapshot.value.active_skills) ? snapshot.value.active_skills : [];
@@ -572,6 +633,18 @@ async function collectDoctorSummary(
572
633
  ),
573
634
  );
574
635
  }
636
+ if (canonical && activeFlag(entry) && !invalidModeStates.has(canonical)) {
637
+ const state = await readRawJson(modeStateFile(cwd, canonical, scopeSessionId));
638
+ pushPhaseDriftProblem({
639
+ problems,
640
+ pathValue: snapshotPath,
641
+ skill: canonical,
642
+ entryKind: "active snapshot",
643
+ entrySkill,
644
+ entryPhase: phaseFromActiveValue(entry),
645
+ statePhase: modeStatePhase(state.value),
646
+ });
647
+ }
575
648
  }
576
649
  }
577
650
  };
@@ -1247,7 +1247,7 @@ const CLI_REPLAY_MAX_OUTPUT_BYTES = 1024 * 1024;
1247
1247
  const CLI_REPLAY_DEFAULT_TIMEOUT_MS = 10_000;
1248
1248
  const CLI_REPLAY_MIN_TIMEOUT_MS = 1_000;
1249
1249
  const CLI_REPLAY_MAX_TIMEOUT_MS = 30_000;
1250
- const CLI_REPLAY_EXEMPT_REASON_CODES = new Set([
1250
+ const CLI_REPLAY_EXEMPT_REASON_CODES = [
1251
1251
  "unsafe_side_effect",
1252
1252
  "requires_credentials",
1253
1253
  "requires_network",
@@ -1255,8 +1255,10 @@ const CLI_REPLAY_EXEMPT_REASON_CODES = new Set([
1255
1255
  "destructive",
1256
1256
  "interactive_only",
1257
1257
  "platform_unavailable",
1258
- ]);
1258
+ ] as const;
1259
+ const CLI_REPLAY_EXEMPT_REASON_CODE_SET = new Set<string>(CLI_REPLAY_EXEMPT_REASON_CODES);
1259
1260
  const CLI_REPLAY_ENV_BASE: Record<string, string> = { CI: "1", NO_COLOR: "1", GJC_ULTRAGOAL_REPLAY: "1" };
1261
+ const CLI_REPLAY_EXEMPT_REASON_CODE_LIST = CLI_REPLAY_EXEMPT_REASON_CODES.join(", ");
1260
1262
  const CLI_REPLAY_SAFE_ENV_NAMES = new Set(["LANG", "LC_ALL", "LC_CTYPE", "TZ"]);
1261
1263
  const CLI_REPLAY_DANGEROUS_ENV_NAME_PATTERN =
1262
1264
  /^(?:NODE_OPTIONS|GIT_EXTERNAL_DIFF|GIT_SSH|GIT_SSH_COMMAND|GIT_PAGER|PATH|LD_PRELOAD|LD_LIBRARY_PATH)$|^(?:GIT_CONFIG|DYLD_|BUN_|NPM_CONFIG_)|(?:^|_)OPTIONS$|PRELOAD$/;
@@ -1568,8 +1570,10 @@ async function validateReplayExemptFallback(
1568
1570
  const exempt = qualityGateObject(record.replayExempt);
1569
1571
  if (!exempt) return false;
1570
1572
  const reasonCode = requiredStringField(exempt, "reasonCode", `${fieldName}.replayExempt`);
1571
- if (!CLI_REPLAY_EXEMPT_REASON_CODES.has(reasonCode))
1572
- throw new Error(`qualityGate ${fieldName}.replayExempt.reasonCode is not recognized`);
1573
+ if (!CLI_REPLAY_EXEMPT_REASON_CODE_SET.has(reasonCode))
1574
+ throw new Error(
1575
+ `qualityGate ${fieldName}.replayExempt.reasonCode must be one of: ${CLI_REPLAY_EXEMPT_REASON_CODE_LIST}`,
1576
+ );
1573
1577
  const reason = requiredStringField(exempt, "reason", `${fieldName}.replayExempt`);
1574
1578
  if (!isSubstantiveEvidence(reason) || reason.length < 30)
1575
1579
  throw new Error(`qualityGate ${fieldName}.replayExempt.reason must be audited and substantive`);
@@ -84,6 +84,20 @@ export class InputController {
84
84
  }
85
85
  this.#steerConsumePending = false;
86
86
  }
87
+ // Normal input state with user-typed text: Esc must not interrupt a
88
+ // running task (streaming turn, bash/eval). A double Esc within the
89
+ // 500ms window clears the composer instead. Bash/Python input modes
90
+ // keep their own Esc handling in the chain below.
91
+ if (!this.ctx.isBashMode && !this.ctx.isPythonMode && this.ctx.editor.getText().trim()) {
92
+ const now = Date.now();
93
+ if (now - this.ctx.lastComposerClearEscapeTime < 500) {
94
+ this.ctx.clearEditor();
95
+ this.ctx.lastComposerClearEscapeTime = 0;
96
+ } else {
97
+ this.ctx.lastComposerClearEscapeTime = now;
98
+ }
99
+ return;
100
+ }
87
101
  if (this.ctx.loadingAnimation) {
88
102
  if (this.ctx.cancelPendingSubmission()) {
89
103
  return;
@@ -292,6 +292,7 @@ export class InteractiveMode implements InteractiveModeContext {
292
292
  #pendingSubmissionDispose: (() => void) | undefined;
293
293
  lastSigintTime = 0;
294
294
  lastEscapeTime = 0;
295
+ lastComposerClearEscapeTime = 0;
295
296
  shutdownRequested = false;
296
297
  #isShuttingDown = false;
297
298
  hookSelector: HookSelectorComponent | undefined = undefined;
@@ -306,6 +307,7 @@ export class InteractiveMode implements InteractiveModeContext {
306
307
  #baseSlashCommands: SlashCommand[] = [];
307
308
  #baseReservedSlashCommandNames: Set<string> = new Set();
308
309
  #cleanupUnsubscribe?: () => void;
310
+ #subprocessTeardownUnsubscribe?: () => void;
309
311
  readonly #version: string;
310
312
  readonly #changelogMarkdown: string | undefined;
311
313
  #planModePreviousTools: string[] | undefined;
@@ -447,6 +449,14 @@ export class InteractiveMode implements InteractiveModeContext {
447
449
  // Register session manager flush for signal handlers (SIGINT, SIGTERM, SIGHUP)
448
450
  this.#cleanupUnsubscribe = postmortem.register("session-manager-flush", () => this.sessionManager.flush());
449
451
 
452
+ // Tear down subprocess-spawning tools (browser Chrome, Python eval kernel) on a
453
+ // signal kill (SIGINT/SIGTERM/SIGHUP) so they aren't reparented to PID 1 (#698).
454
+ // The graceful /quit path already releases these via session.dispose(); this hook
455
+ // is the bounded, idempotent fallback for an external kill that bypasses it.
456
+ this.#subprocessTeardownUnsubscribe = postmortem.register("session-subprocess-teardown", () =>
457
+ this.session.disposeChildSubprocesses(),
458
+ );
459
+
450
460
  await logger.time(
451
461
  "InteractiveMode.init:slashCommands",
452
462
  this.refreshSlashCommandState.bind(this),
@@ -1908,6 +1918,9 @@ export class InteractiveMode implements InteractiveModeContext {
1908
1918
  if (this.#cleanupUnsubscribe) {
1909
1919
  this.#cleanupUnsubscribe();
1910
1920
  }
1921
+ if (this.#subprocessTeardownUnsubscribe) {
1922
+ this.#subprocessTeardownUnsubscribe();
1923
+ }
1911
1924
  if (this.isInitialized) {
1912
1925
  this.ui.stop();
1913
1926
  this.isInitialized = false;
@@ -116,6 +116,7 @@ export interface InteractiveModeContext {
116
116
  locallySubmittedUserSignatures: Set<string>;
117
117
  lastSigintTime: number;
118
118
  lastEscapeTime: number;
119
+ lastComposerClearEscapeTime: number;
119
120
  shutdownRequested: boolean;
120
121
  hookSelector: HookSelectorComponent | undefined;
121
122
  hookInput: HookInputComponent | undefined;
@@ -37,7 +37,7 @@ This mode activates only when the assignment explicitly labels Executor as Ultra
37
37
  When active:
38
38
  - Start from the approved plan/spec/acceptance criteria, then user-facing contracts, then implementation code only as supporting evidence. Treat plan/code mismatches as blockers.
39
39
  - Exercise the real user-facing invocation rather than inspecting internals alone. Live artifacts must be runtime-valid: GUI/web needs a real automation transcript plus non-uniform screenshot; CLI needs executed argv-only replay; native/desktop/TUI needs a real screenshot, PTY capture with control codes, or app-automation transcript. `inlineEvidence` is supplemental only and is never sole proof for live surfaces.
40
- - For CLI evidence, emit argv-only replay JSON with `schemaVersion: 1`, `kind: "cli-replay"`, `replaySafe: true`, and `command` as a string array. Use only allowlisted deterministic executables/arguments, or mark unsafe/non-deterministic commands with audited `replayExempt` metadata plus a valid structural fallback artifact.
40
+ - For CLI evidence, emit argv-only replay JSON with `schemaVersion: 1`, `kind: "cli-replay"`, `replaySafe: true`, and `command` as a string array. Use only allowlisted deterministic executables/arguments, or mark unsafe/non-deterministic commands with audited `replayExempt` metadata plus a valid structural fallback artifact. `replayExempt` must use exact fields `reasonCode`, `reason`, `approvedBy`, and `fallbackArtifactRefs`; allowed `reasonCode` values are exactly `unsafe_side_effect`, `requires_credentials`, `requires_network`, `non_deterministic_external`, `destructive`, `interactive_only`, and `platform_unavailable`.
41
41
  - Native/TUI evidence must be structural, not prose-only: screenshot, app transcript, or PTY artifact with terminal control codes.
42
42
  - Do not call the `ask` tool while an Ultragoal run is active; record unresolved decisions with `gjc ultragoal record-review-blockers`.
43
43
  - Try to break the work with adversarial cases, not just happy-path confirmations.
@@ -58,8 +58,8 @@ type TrackedPromise<T> = {
58
58
  };
59
59
 
60
60
  const STARTUP_TIMEOUT_MS = 250;
61
- const STARTUP_TIMEOUT_GRACE_MS = 250;
62
- const MAX_STARTUP_TIMEOUT_MS = 1_500;
61
+ const STARTUP_TIMEOUT_GRACE_MS = 500;
62
+ const MAX_STARTUP_TIMEOUT_MS = 1_750;
63
63
 
64
64
  function resolveStartupTimeoutMs(configs: MCPServerConfig[]): number {
65
65
  const configuredTimeouts = configs
@@ -549,6 +549,13 @@ function formatRetryFallbackBaseSelector(selector: RetryFallbackSelector): strin
549
549
 
550
550
  const IRC_REPLY_MAX_BYTES = 4096;
551
551
 
552
+ /**
553
+ * Hard cap for {@link AgentSession.disposeChildSubprocesses}. A `SIGINT`/`SIGTERM` handler
554
+ * awaits this teardown before exiting, so it must never block longer than this even if a
555
+ * subprocess (wedged Chrome renderer, stuck Python cell) refuses to settle.
556
+ */
557
+ const SIGNAL_TEARDOWN_TIMEOUT_MS = 5_000;
558
+
552
559
  /**
553
560
  * Collapse degenerate IRC ephemeral replies before they hit the relay.
554
561
  * Models occasionally loop on a single line (~16 reports of N-times-repeated
@@ -3221,6 +3228,36 @@ export class AgentSession {
3221
3228
  this.#eventListeners = [];
3222
3229
  }
3223
3230
 
3231
+ /**
3232
+ * Bounded, best-effort teardown of the subprocess-spawning resources this session
3233
+ * owns: the browser tool's headless/spawned Chrome and the Python eval kernel + JS VM
3234
+ * contexts. Unlike {@link dispose}, this touches only child processes and is time-boxed,
3235
+ * so a top-level `SIGINT`/`SIGTERM`/`SIGHUP` handler can run it without hanging — without
3236
+ * it, an external kill bypasses `dispose()` and orphans Chrome/Python to PID 1 (#698).
3237
+ *
3238
+ * Idempotent: every step is a no-op once the graceful {@link dispose} path has released
3239
+ * the resources. Never throws; per-step failures are logged and the whole run is capped
3240
+ * at `timeoutMs` so a wedged subprocess can't stall process exit.
3241
+ */
3242
+ async disposeChildSubprocesses(timeoutMs = SIGNAL_TEARDOWN_TIMEOUT_MS): Promise<void> {
3243
+ const sessionId = this.sessionManager.getSessionId();
3244
+ const kernelOwnerId = this.#evalKernelOwnerId;
3245
+ const work = Promise.allSettled([
3246
+ // kill:true so a forced exit also reaps spawned-app Chrome we own (headless
3247
+ // always closes; connected/attached browsers only disconnect — never killed).
3248
+ releaseTabsForOwner(sessionId, { kill: true }).catch((error: unknown) =>
3249
+ logger.warn("signal teardown: releaseTabsForOwner failed", { error }),
3250
+ ),
3251
+ disposeKernelSessionsByOwner(kernelOwnerId).catch((error: unknown) =>
3252
+ logger.warn("signal teardown: disposeKernelSessionsByOwner failed", { error }),
3253
+ ),
3254
+ disposeVmContextsByOwner(kernelOwnerId).catch((error: unknown) =>
3255
+ logger.warn("signal teardown: disposeVmContextsByOwner failed", { error }),
3256
+ ),
3257
+ ]);
3258
+ await Promise.race([work, Bun.sleep(timeoutMs)]);
3259
+ }
3260
+
3224
3261
  #closeAllProviderSessions(reason: string): void {
3225
3262
  for (const [providerKey, state] of this.#providerSessionState) {
3226
3263
  try {
@@ -6445,6 +6482,7 @@ export class AgentSession {
6445
6482
  model,
6446
6483
  apiKey,
6447
6484
  {
6485
+ ...this.#maintenanceProviderTransport(),
6448
6486
  systemPrompt: this.#baseSystemPrompt,
6449
6487
  tools: this.agent.state.tools,
6450
6488
  customInstructions,
@@ -7376,6 +7414,25 @@ export class AgentSession {
7376
7414
  );
7377
7415
  }
7378
7416
 
7417
+ /**
7418
+ * Transport-affinity fields forwarded into local maintenance one-shot LLM
7419
+ * calls (compaction, handoff, branch summary) so they reuse the live turn's
7420
+ * provider session state and configured WebSocket transport preference
7421
+ * instead of falling back to a fresh HTTP/SSE session. Mirrors the
7422
+ * `providerSessionId ?? sessionId` affinity the agent loop sends per turn.
7423
+ */
7424
+ #maintenanceProviderTransport(): {
7425
+ sessionId: string | undefined;
7426
+ providerSessionState: Map<string, ProviderSessionState>;
7427
+ preferWebsockets: boolean | undefined;
7428
+ } {
7429
+ return {
7430
+ sessionId: this.agent.providerSessionId ?? this.agent.sessionId,
7431
+ providerSessionState: this.#providerSessionState,
7432
+ preferWebsockets: this.agent.preferWebsockets,
7433
+ };
7434
+ }
7435
+
7379
7436
  async #compactWithFallbackModel(
7380
7437
  preparation: CompactionPreparation,
7381
7438
  customInstructions: string | undefined,
@@ -7392,6 +7449,7 @@ export class AgentSession {
7392
7449
  try {
7393
7450
  return await compact(preparation, candidate, apiKey, customInstructions, signal, {
7394
7451
  ...options,
7452
+ ...this.#maintenanceProviderTransport(),
7395
7453
  metadata: this.agent.metadataForProvider(candidate.provider),
7396
7454
  convertToLlm,
7397
7455
  telemetry,
@@ -7681,6 +7739,7 @@ export class AgentSession {
7681
7739
  while (true) {
7682
7740
  try {
7683
7741
  compactResult = await compact(preparation, candidate, apiKey, undefined, autoCompactionSignal, {
7742
+ ...this.#maintenanceProviderTransport(),
7684
7743
  promptOverride: compactionPrep.hookPrompt,
7685
7744
  extraContext: compactionPrep.hookContext,
7686
7745
  remoteInstructions: this.#baseSystemPrompt.join("\n\n"),
@@ -7906,7 +7965,12 @@ export class AgentSession {
7906
7965
  */
7907
7966
  #isRetryableError(message: AssistantMessage): boolean {
7908
7967
  const classification = this.#classifyErrorForRetry(message);
7909
- return classification === "usage_limit" || classification === "transient" || classification === "unknown";
7968
+ return (
7969
+ classification === "usage_limit" ||
7970
+ classification === "transient" ||
7971
+ classification === "unknown" ||
7972
+ classification === "first_event_timeout"
7973
+ );
7910
7974
  }
7911
7975
 
7912
7976
  #isTransientErrorMessage(errorMessage: string): boolean {
@@ -7932,6 +7996,33 @@ export class AgentSession {
7932
7996
  );
7933
7997
  }
7934
7998
 
7999
+ #isFirstEventTimeoutErrorMessage(errorMessage: string): boolean {
8000
+ // First-event timeout: the stream watchdog aborted because no event
8001
+ // arrived within the first-event window. Matches the shared lazy-stream
8002
+ // message and the per-provider variants
8003
+ // ("<Provider> stream timed out while waiting for the first event").
8004
+ return /timed?\s*out while waiting for the first event|timeout waiting for first/i.test(errorMessage);
8005
+ }
8006
+
8007
+ /**
8008
+ * Whether a first-event timeout on the error's provider should fail closed —
8009
+ * i.e. retry a bounded number of times (capped at retry.maxRetries) and then
8010
+ * surface, instead of joining the unbounded transient-retry class.
8011
+ *
8012
+ * Targets the ollama-chat API, which is exclusively ollama-cloud (local
8013
+ * Ollama uses the openai-responses API). That remote, queued backend can
8014
+ * stall before its first token even for tiny prompts; an unbounded
8015
+ * continuation retry re-issues the full request on every attempt and can
8016
+ * silently spike upstream usage (#713). First-party providers keep their
8017
+ * existing unbounded first-event-timeout retry behavior.
8018
+ */
8019
+ #shouldFailClosedOnFirstEventTimeout(message: AssistantMessage): boolean {
8020
+ // Prefer the active model's API (the model that produced the error);
8021
+ // the errored message's API is a fallback for the rare case where the
8022
+ // session model has already moved on.
8023
+ return this.model?.api === "ollama-chat" || message.api === "ollama-chat";
8024
+ }
8025
+
7935
8026
  #isTerminalErrorMessage(errorMessage: string): boolean {
7936
8027
  // Errors that will never succeed on retry (auth/permission, malformed
7937
8028
  // request, unknown/unsupported model). These surface immediately rather
@@ -7953,11 +8044,12 @@ export class AgentSession {
7953
8044
 
7954
8045
  /**
7955
8046
  * Ordered retry classification: overflow (compaction) -> terminal (surface)
7956
- * -> usage_limit (rotation) -> transient (retry) -> unknown (retry).
8047
+ * -> usage_limit (rotation) -> first_event_timeout (bounded retry) ->
8048
+ * transient (retry) -> unknown (retry).
7957
8049
  */
7958
8050
  #classifyErrorForRetry(
7959
8051
  message: AssistantMessage,
7960
- ): "none" | "overflow" | "terminal" | "usage_limit" | "transient" | "unknown" {
8052
+ ): "none" | "overflow" | "terminal" | "usage_limit" | "first_event_timeout" | "transient" | "unknown" {
7961
8053
  if (message.stopReason !== "error" || !message.errorMessage) return "none";
7962
8054
  const contextWindow = this.model?.contextWindow ?? 0;
7963
8055
  if (isContextOverflow(message, contextWindow)) return "overflow";
@@ -7985,6 +8077,13 @@ export class AgentSession {
7985
8077
  if (isTerminalHttp4xx && (explicitStatus !== undefined || !/rate.?limit|too many requests/i.test(err))) {
7986
8078
  return "terminal";
7987
8079
  }
8080
+ // A first-event timeout on ollama-cloud (the ollama-chat API) must not
8081
+ // join the unbounded transient class: each continuation retry re-issues
8082
+ // the full request to a remote, billable backend, so an unbounded loop
8083
+ // can silently spike usage (#713). Bound it to retry.maxRetries instead.
8084
+ if (this.#isFirstEventTimeoutErrorMessage(err) && this.#shouldFailClosedOnFirstEventTimeout(message)) {
8085
+ return "first_event_timeout";
8086
+ }
7988
8087
  if (this.#isTransientErrorMessage(err)) return "transient";
7989
8088
  return "unknown";
7990
8089
  }
@@ -9488,6 +9587,7 @@ export class AgentSession {
9488
9587
  }
9489
9588
  const branchSummarySettings = this.settings.getGroup("branchSummary");
9490
9589
  const result = await generateBranchSummary(entriesToSummarize, {
9590
+ ...this.#maintenanceProviderTransport(),
9491
9591
  model,
9492
9592
  apiKey,
9493
9593
  signal: this.#branchSummaryAbortController.signal,
@@ -8,6 +8,13 @@ function sanitizeOutputChunk(rawChunk: string): string {
8
8
  return sanitizeWithOptionalSixelPassthrough(rawChunk, sanitizeText);
9
9
  }
10
10
 
11
+ /**
12
+ * Flush threshold for the opt-in sanitize-coalescing path (F21). When coalescing is enabled, raw
13
+ * chunks accumulate until they reach this many chars, then are sanitized + delivered as one batch,
14
+ * so many-small-chunk output pays one sanitize pass per batch instead of one per tiny chunk.
15
+ */
16
+ const COALESCE_FLUSH_CHARS = 64 * 1024;
17
+
11
18
  // =============================================================================
12
19
  // Constants
13
20
  // =============================================================================
@@ -80,6 +87,13 @@ export interface OutputSinkOptions {
80
87
  * relative to the sink (the sink does not catch errors from this callback).
81
88
  */
82
89
  onRawChunk?: (chunk: string) => void;
90
+ /**
91
+ * Opt-in (F21): when true, sanitization + live callback delivery + retention are coalesced over
92
+ * batched raw chunks instead of run per chunk, bounding sync CPU for many-small-chunk output. The
93
+ * raw artifact mirror stays byte-correct. Defaults to the PI_OUTPUT_SANITIZE_COALESCE env flag
94
+ * (default OFF — the per-chunk path is byte-identical to historical behavior).
95
+ */
96
+ coalesceSanitize?: boolean;
83
97
  }
84
98
 
85
99
  export interface TruncationResult {
@@ -706,6 +720,8 @@ export class OutputSink {
706
720
  readonly #chunkThrottleMs: number;
707
721
  readonly #maxColumns: number;
708
722
  readonly #artifactMaxBytes: number;
723
+ readonly #coalesceSanitize: boolean;
724
+ #coalesceBuf = "";
709
725
 
710
726
  constructor(options?: OutputSinkOptions) {
711
727
  const {
@@ -718,6 +734,7 @@ export class OutputSink {
718
734
  chunkThrottleMs = 0,
719
735
  onRawChunk,
720
736
  artifactMaxBytes = DEFAULT_ARTIFACT_MAX_BYTES,
737
+ coalesceSanitize = process.env.PI_OUTPUT_SANITIZE_COALESCE === "1",
721
738
  } = options ?? {};
722
739
  this.#artifactPath = artifactPath;
723
740
  this.#artifactId = artifactId;
@@ -728,6 +745,7 @@ export class OutputSink {
728
745
  this.#onRawChunk = onRawChunk;
729
746
  this.#chunkThrottleMs = chunkThrottleMs;
730
747
  this.#artifactMaxBytes = Math.max(0, artifactMaxBytes);
748
+ this.#coalesceSanitize = coalesceSanitize;
731
749
  }
732
750
 
733
751
  #headText(): string {
@@ -765,7 +783,28 @@ export class OutputSink {
765
783
  * visible retention windows are selected from the sanitized/column-capped
766
784
  * stream so production-default display matches the historical processed view.
767
785
  */
786
+ // F21: with coalescing enabled, accumulate raw chunks and process them in batches; the default
787
+ // (disabled) path calls #ingest directly and is byte-identical to the historical per-chunk path.
768
788
  push(chunk: string): void {
789
+ if (!this.#coalesceSanitize) {
790
+ this.#ingest(chunk);
791
+ return;
792
+ }
793
+ this.#coalesceBuf += chunk;
794
+ if (this.#coalesceBuf.length >= COALESCE_FLUSH_CHARS) {
795
+ this.#flushCoalesced();
796
+ }
797
+ }
798
+
799
+ /** Process any buffered coalesced chunks as a single batch (F21). */
800
+ #flushCoalesced(): void {
801
+ if (this.#coalesceBuf.length === 0) return;
802
+ const batch = this.#coalesceBuf;
803
+ this.#coalesceBuf = "";
804
+ this.#ingest(batch);
805
+ }
806
+
807
+ #ingest(chunk: string): void {
769
808
  const rawChunk = chunk;
770
809
 
771
810
  // Live callbacks historically observe sanitized, uncapped chunks. The same
@@ -1046,6 +1085,7 @@ export class OutputSink {
1046
1085
  * branch in `dump()` against stale totals.
1047
1086
  */
1048
1087
  replace(text: string): void {
1088
+ this.#coalesceBuf = "";
1049
1089
  this.#setTail(text);
1050
1090
  this.#head = "";
1051
1091
  this.#headBytes = 0;
@@ -1063,6 +1103,7 @@ export class OutputSink {
1063
1103
  }
1064
1104
 
1065
1105
  async dump(notice?: string): Promise<OutputSummary> {
1106
+ this.#flushCoalesced();
1066
1107
  const noticeLine = notice ? `[${notice}]\n` : "";
1067
1108
  const totalLines = this.#sawData ? this.#totalLines + 1 : 0;
1068
1109
 
@@ -1,3 +1,5 @@
1
+ import { formatProviderCredentialHint } from "@gajae-code/ai/stream";
2
+
1
3
  export const MODEL_ONBOARDING_API_PROVIDER_COMMAND =
2
4
  "/provider add --compat <openai|anthropic> --provider <id> --base-url <url> --api-key-env <ENV> --model <model>";
3
5
  export const MODEL_ONBOARDING_PROVIDER_PRESET_COMMAND = "/provider add --preset <minimax|minimax-cn|glm>";
@@ -26,14 +28,19 @@ export function formatNoModelOnboardingError(): string {
26
28
  }
27
29
 
28
30
  export function formatNoCredentialOnboardingError(providerId: string): string {
29
- return [
31
+ const lines = [
30
32
  `No credentials found for ${providerId}.`,
31
33
  "",
32
34
  `For MiniMax/GLM presets, configure credentials with ${MODEL_ONBOARDING_PROVIDER_PRESET_COMMAND} (or ${MODEL_ONBOARDING_SETUP_COMMAND} --preset <preset>).`,
33
35
  `For custom API-compatible providers, use ${MODEL_ONBOARDING_API_PROVIDER_COMMAND}.`,
34
- `For OAuth/subscription providers, use ${MODEL_ONBOARDING_OAUTH_COMMAND}.`,
36
+ `For OAuth/subscription providers, use ${MODEL_ONBOARDING_OAUTH_COMMAND} (interactive; not available in headless/print mode).`,
37
+ ];
38
+ const headlessHint = formatProviderCredentialHint(providerId);
39
+ if (headlessHint) lines.push(headlessHint);
40
+ lines.push(
35
41
  "Then run /model to select a configured model or assign it to DEFAULT, EXECUTOR, ARCHITECT, PLANNER, or CRITIC.",
36
- ].join("\n");
42
+ );
43
+ return lines.join("\n");
37
44
  }
38
45
 
39
46
  export function formatNoModelsAvailableFallback(): string {