npm - @gajae-code/coding-agent - Versions diffs - 0.5.3 → 0.5.4 - Mend

@gajae-code/coding-agent 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/CHANGELOG.md +9 -0
package/dist/types/config/model-profiles.d.ts +10 -0
package/dist/types/modes/interactive-mode.d.ts +1 -0
package/dist/types/modes/types.d.ts +1 -0
package/dist/types/session/agent-session.d.ts +12 -0
package/dist/types/session/streaming-output.d.ts +7 -0
package/dist/types/web/search/providers/codex.d.ts +4 -4
package/package.json +7 -7
package/src/async/job-manager.ts +30 -6
package/src/config/model-profile-activation.ts +71 -3
package/src/config/model-profiles.ts +39 -14
package/src/defaults/gjc/skills/deep-interview/SKILL.md +11 -2
package/src/defaults/gjc/skills/ralplan/SKILL.md +2 -2
package/src/defaults/gjc/skills/ultragoal/SKILL.md +2 -2
package/src/gjc-runtime/deep-interview-runtime.ts +14 -13
package/src/gjc-runtime/ralplan-runtime.ts +10 -0
package/src/gjc-runtime/state-runtime.ts +73 -0
package/src/gjc-runtime/ultragoal-runtime.ts +8 -4
package/src/modes/controllers/input-controller.ts +14 -0
package/src/modes/interactive-mode.ts +13 -0
package/src/modes/types.ts +1 -0
package/src/prompts/agents/executor.md +1 -1
package/src/runtime-mcp/manager.ts +2 -2
package/src/session/agent-session.ts +103 -3
package/src/session/streaming-output.ts +41 -0
package/src/setup/model-onboarding-guidance.ts +10 -3
package/src/skill-state/active-state.ts +79 -7
package/src/tools/browser/registry.ts +17 -1
package/src/tools/cron.ts +2 -6
package/src/web/search/providers/codex.ts +6 -5

package/src/gjc-runtime/state-runtime.ts CHANGED Viewed

@@ -450,6 +450,52 @@ function activeFlag(value: unknown): boolean {
 	return isPlainObject(value) && value.active !== false;
 }
+function phaseFromActiveValue(value: unknown): string | undefined {
+	if (!isPlainObject(value) || typeof value.phase !== "string") return undefined;
+	const phase = value.phase.trim();
+	return phase || undefined;
+}
+const RALPLAN_CANONICAL_PHASE_OVERRIDES = new Set([
+	"final",
+	"handoff",
+	"complete",
+	"completed",
+	"failed",
+	"cancelled",
+	"canceled",
+	"inactive",
+]);
+function modeStatePhase(value: unknown): string | undefined {
+	if (!isPlainObject(value) || typeof value.current_phase !== "string") return undefined;
+	const phase = value.current_phase.trim();
+	if (!phase) return undefined;
+	if (value.active === false && !RALPLAN_CANONICAL_PHASE_OVERRIDES.has(phase)) return undefined;
+	return phase;
+}
+function pushPhaseDriftProblem(options: {
+	problems: DoctorProblem[];
+	pathValue: string;
+	skill: CanonicalGjcWorkflowSkill;
+	entryKind: "active entry" | "active snapshot";
+	entrySkill: string;
+	entryPhase: string | undefined;
+	statePhase: string | undefined;
+}): void {
+	if (!options.entryPhase || !options.statePhase || options.entryPhase === options.statePhase) return;
+	options.problems.push(
+		doctorProblem(
+			"stale_active_state",
+			options.pathValue,
+			`${options.entryKind} for ${options.entrySkill} phase ${options.entryPhase} differs from canonical mode-state phase ${options.statePhase}`,
+			`gjc state ${options.skill} clear`,
+			options.skill,
+		),
+	);
+}
 async function collectDoctorSummary(
 	cwd: string,
 	skill: CanonicalGjcWorkflowSkill | undefined,
@@ -460,6 +506,7 @@ async function collectDoctorSummary(
 	const problems: DoctorProblem[] = [];
 	let filesScanned = 0;
 	let journalsScanned = 0;
+	const invalidModeStates = new Set<string>();
 	for (const currentSkill of skills) {
 		const filePath = modeStateFile(cwd, currentSkill, sessionId);
@@ -476,6 +523,7 @@ async function collectDoctorSummary(
 					currentSkill,
 				),
 			);
+			invalidModeStates.add(currentSkill);
 			continue;
 		}
 		const validation = validateWorkflowStateEnvelope(currentSkill, raw.value);
@@ -489,6 +537,7 @@ async function collectDoctorSummary(
 					currentSkill,
 				),
 			);
+			invalidModeStates.add(currentSkill);
 		}
 		const mismatch = await detectWorkflowEnvelopeIntegrityMismatch(filePath);
 		if (mismatch) {
@@ -501,6 +550,7 @@ async function collectDoctorSummary(
 					currentSkill,
 				),
 			);
+			invalidModeStates.add(currentSkill);
 		}
 	}
@@ -553,6 +603,17 @@ async function collectDoctorSummary(
 					),
 				);
 			}
+			if (canonical && activeFlag(entry.value) && !invalidModeStates.has(canonical)) {
+				pushPhaseDriftProblem({
+					problems,
+					pathValue: entryPath,
+					skill: canonical,
+					entryKind: "active entry",
+					entrySkill,
+					entryPhase: phaseFromActiveValue(entry.value),
+					statePhase: modeStatePhase(state.value),
+				});
+			}
 		}
 		if (isPlainObject(snapshot.value)) {
 			const activeSkills = Array.isArray(snapshot.value.active_skills) ? snapshot.value.active_skills : [];
@@ -572,6 +633,18 @@ async function collectDoctorSummary(
 						),
 					);
 				}
+				if (canonical && activeFlag(entry) && !invalidModeStates.has(canonical)) {
+					const state = await readRawJson(modeStateFile(cwd, canonical, scopeSessionId));
+					pushPhaseDriftProblem({
+						problems,
+						pathValue: snapshotPath,
+						skill: canonical,
+						entryKind: "active snapshot",
+						entrySkill,
+						entryPhase: phaseFromActiveValue(entry),
+						statePhase: modeStatePhase(state.value),
+					});
+				}
 			}
 		}
 	};

package/src/gjc-runtime/ultragoal-runtime.ts CHANGED Viewed

@@ -1247,7 +1247,7 @@ const CLI_REPLAY_MAX_OUTPUT_BYTES = 1024 * 1024;
 const CLI_REPLAY_DEFAULT_TIMEOUT_MS = 10_000;
 const CLI_REPLAY_MIN_TIMEOUT_MS = 1_000;
 const CLI_REPLAY_MAX_TIMEOUT_MS = 30_000;
-const CLI_REPLAY_EXEMPT_REASON_CODES = new Set([
+const CLI_REPLAY_EXEMPT_REASON_CODES = [
 	"unsafe_side_effect",
 	"requires_credentials",
 	"requires_network",
@@ -1255,8 +1255,10 @@ const CLI_REPLAY_EXEMPT_REASON_CODES = new Set([
 	"destructive",
 	"interactive_only",
 	"platform_unavailable",
-]);
+] as const;
+const CLI_REPLAY_EXEMPT_REASON_CODE_SET = new Set<string>(CLI_REPLAY_EXEMPT_REASON_CODES);
 const CLI_REPLAY_ENV_BASE: Record<string, string> = { CI: "1", NO_COLOR: "1", GJC_ULTRAGOAL_REPLAY: "1" };
+const CLI_REPLAY_EXEMPT_REASON_CODE_LIST = CLI_REPLAY_EXEMPT_REASON_CODES.join(", ");
 const CLI_REPLAY_SAFE_ENV_NAMES = new Set(["LANG", "LC_ALL", "LC_CTYPE", "TZ"]);
 const CLI_REPLAY_DANGEROUS_ENV_NAME_PATTERN =
 	/^(?:NODE_OPTIONS|GIT_EXTERNAL_DIFF|GIT_SSH|GIT_SSH_COMMAND|GIT_PAGER|PATH|LD_PRELOAD|LD_LIBRARY_PATH)$|^(?:GIT_CONFIG|DYLD_|BUN_|NPM_CONFIG_)|(?:^|_)OPTIONS$|PRELOAD$/;
@@ -1568,8 +1570,10 @@ async function validateReplayExemptFallback(
 	const exempt = qualityGateObject(record.replayExempt);
 	if (!exempt) return false;
 	const reasonCode = requiredStringField(exempt, "reasonCode", `${fieldName}.replayExempt`);
-	if (!CLI_REPLAY_EXEMPT_REASON_CODES.has(reasonCode))
-		throw new Error(`qualityGate ${fieldName}.replayExempt.reasonCode is not recognized`);
+	if (!CLI_REPLAY_EXEMPT_REASON_CODE_SET.has(reasonCode))
+		throw new Error(
+			`qualityGate ${fieldName}.replayExempt.reasonCode must be one of: ${CLI_REPLAY_EXEMPT_REASON_CODE_LIST}`,
+		);
 	const reason = requiredStringField(exempt, "reason", `${fieldName}.replayExempt`);
 	if (!isSubstantiveEvidence(reason) || reason.length < 30)
 		throw new Error(`qualityGate ${fieldName}.replayExempt.reason must be audited and substantive`);

package/src/modes/controllers/input-controller.ts CHANGED Viewed

@@ -84,6 +84,20 @@ export class InputController {
 				}
 				this.#steerConsumePending = false;
 			}
+			// Normal input state with user-typed text: Esc must not interrupt a
+			// running task (streaming turn, bash/eval). A double Esc within the
+			// 500ms window clears the composer instead. Bash/Python input modes
+			// keep their own Esc handling in the chain below.
+			if (!this.ctx.isBashMode && !this.ctx.isPythonMode && this.ctx.editor.getText().trim()) {
+				const now = Date.now();
+				if (now - this.ctx.lastComposerClearEscapeTime < 500) {
+					this.ctx.clearEditor();
+					this.ctx.lastComposerClearEscapeTime = 0;
+				} else {
+					this.ctx.lastComposerClearEscapeTime = now;
+				}
+				return;
+			}
 			if (this.ctx.loadingAnimation) {
 				if (this.ctx.cancelPendingSubmission()) {
 					return;

package/src/modes/interactive-mode.ts CHANGED Viewed

@@ -292,6 +292,7 @@ export class InteractiveMode implements InteractiveModeContext {
 	#pendingSubmissionDispose: (() => void) | undefined;
 	lastSigintTime = 0;
 	lastEscapeTime = 0;
+	lastComposerClearEscapeTime = 0;
 	shutdownRequested = false;
 	#isShuttingDown = false;
 	hookSelector: HookSelectorComponent | undefined = undefined;
@@ -306,6 +307,7 @@ export class InteractiveMode implements InteractiveModeContext {
 	#baseSlashCommands: SlashCommand[] = [];
 	#baseReservedSlashCommandNames: Set<string> = new Set();
 	#cleanupUnsubscribe?: () => void;
+	#subprocessTeardownUnsubscribe?: () => void;
 	readonly #version: string;
 	readonly #changelogMarkdown: string | undefined;
 	#planModePreviousTools: string[] | undefined;
@@ -447,6 +449,14 @@ export class InteractiveMode implements InteractiveModeContext {
 		// Register session manager flush for signal handlers (SIGINT, SIGTERM, SIGHUP)
 		this.#cleanupUnsubscribe = postmortem.register("session-manager-flush", () => this.sessionManager.flush());
+		// Tear down subprocess-spawning tools (browser Chrome, Python eval kernel) on a
+		// signal kill (SIGINT/SIGTERM/SIGHUP) so they aren't reparented to PID 1 (#698).
+		// The graceful /quit path already releases these via session.dispose(); this hook
+		// is the bounded, idempotent fallback for an external kill that bypasses it.
+		this.#subprocessTeardownUnsubscribe = postmortem.register("session-subprocess-teardown", () =>
+			this.session.disposeChildSubprocesses(),
+		);
 		await logger.time(
 			"InteractiveMode.init:slashCommands",
 			this.refreshSlashCommandState.bind(this),
@@ -1908,6 +1918,9 @@ export class InteractiveMode implements InteractiveModeContext {
 		if (this.#cleanupUnsubscribe) {
 			this.#cleanupUnsubscribe();
 		}
+		if (this.#subprocessTeardownUnsubscribe) {
+			this.#subprocessTeardownUnsubscribe();
+		}
 		if (this.isInitialized) {
 			this.ui.stop();
 			this.isInitialized = false;

package/src/modes/types.ts CHANGED Viewed

@@ -116,6 +116,7 @@ export interface InteractiveModeContext {
 	locallySubmittedUserSignatures: Set<string>;
 	lastSigintTime: number;
 	lastEscapeTime: number;
+	lastComposerClearEscapeTime: number;
 	shutdownRequested: boolean;
 	hookSelector: HookSelectorComponent | undefined;
 	hookInput: HookInputComponent | undefined;

package/src/prompts/agents/executor.md CHANGED Viewed

@@ -37,7 +37,7 @@ This mode activates only when the assignment explicitly labels Executor as Ultra
 When active:
 - Start from the approved plan/spec/acceptance criteria, then user-facing contracts, then implementation code only as supporting evidence. Treat plan/code mismatches as blockers.
 - Exercise the real user-facing invocation rather than inspecting internals alone. Live artifacts must be runtime-valid: GUI/web needs a real automation transcript plus non-uniform screenshot; CLI needs executed argv-only replay; native/desktop/TUI needs a real screenshot, PTY capture with control codes, or app-automation transcript. `inlineEvidence` is supplemental only and is never sole proof for live surfaces.
-- For CLI evidence, emit argv-only replay JSON with `schemaVersion: 1`, `kind: "cli-replay"`, `replaySafe: true`, and `command` as a string array. Use only allowlisted deterministic executables/arguments, or mark unsafe/non-deterministic commands with audited `replayExempt` metadata plus a valid structural fallback artifact.
+- For CLI evidence, emit argv-only replay JSON with `schemaVersion: 1`, `kind: "cli-replay"`, `replaySafe: true`, and `command` as a string array. Use only allowlisted deterministic executables/arguments, or mark unsafe/non-deterministic commands with audited `replayExempt` metadata plus a valid structural fallback artifact. `replayExempt` must use exact fields `reasonCode`, `reason`, `approvedBy`, and `fallbackArtifactRefs`; allowed `reasonCode` values are exactly `unsafe_side_effect`, `requires_credentials`, `requires_network`, `non_deterministic_external`, `destructive`, `interactive_only`, and `platform_unavailable`.
 - Native/TUI evidence must be structural, not prose-only: screenshot, app transcript, or PTY artifact with terminal control codes.
 - Do not call the `ask` tool while an Ultragoal run is active; record unresolved decisions with `gjc ultragoal record-review-blockers`.
 - Try to break the work with adversarial cases, not just happy-path confirmations.

package/src/runtime-mcp/manager.ts CHANGED Viewed

@@ -58,8 +58,8 @@ type TrackedPromise<T> = {
 };
 const STARTUP_TIMEOUT_MS = 250;
-const STARTUP_TIMEOUT_GRACE_MS = 250;
-const MAX_STARTUP_TIMEOUT_MS = 1_500;
+const STARTUP_TIMEOUT_GRACE_MS = 500;
+const MAX_STARTUP_TIMEOUT_MS = 1_750;
 function resolveStartupTimeoutMs(configs: MCPServerConfig[]): number {
 	const configuredTimeouts = configs

package/src/session/agent-session.ts CHANGED Viewed

@@ -549,6 +549,13 @@ function formatRetryFallbackBaseSelector(selector: RetryFallbackSelector): strin
 const IRC_REPLY_MAX_BYTES = 4096;
+/**
+ * Hard cap for {@link AgentSession.disposeChildSubprocesses}. A `SIGINT`/`SIGTERM` handler
+ * awaits this teardown before exiting, so it must never block longer than this even if a
+ * subprocess (wedged Chrome renderer, stuck Python cell) refuses to settle.
+ */
+const SIGNAL_TEARDOWN_TIMEOUT_MS = 5_000;
 /**
  * Collapse degenerate IRC ephemeral replies before they hit the relay.
  * Models occasionally loop on a single line (~16 reports of N-times-repeated
@@ -3221,6 +3228,36 @@ export class AgentSession {
 		this.#eventListeners = [];
 	}
+	/**
+	 * Bounded, best-effort teardown of the subprocess-spawning resources this session
+	 * owns: the browser tool's headless/spawned Chrome and the Python eval kernel + JS VM
+	 * contexts. Unlike {@link dispose}, this touches only child processes and is time-boxed,
+	 * so a top-level `SIGINT`/`SIGTERM`/`SIGHUP` handler can run it without hanging — without
+	 * it, an external kill bypasses `dispose()` and orphans Chrome/Python to PID 1 (#698).
+	 *
+	 * Idempotent: every step is a no-op once the graceful {@link dispose} path has released
+	 * the resources. Never throws; per-step failures are logged and the whole run is capped
+	 * at `timeoutMs` so a wedged subprocess can't stall process exit.
+	 */
+	async disposeChildSubprocesses(timeoutMs = SIGNAL_TEARDOWN_TIMEOUT_MS): Promise<void> {
+		const sessionId = this.sessionManager.getSessionId();
+		const kernelOwnerId = this.#evalKernelOwnerId;
+		const work = Promise.allSettled([
+			// kill:true so a forced exit also reaps spawned-app Chrome we own (headless
+			// always closes; connected/attached browsers only disconnect — never killed).
+			releaseTabsForOwner(sessionId, { kill: true }).catch((error: unknown) =>
+				logger.warn("signal teardown: releaseTabsForOwner failed", { error }),
+			),
+			disposeKernelSessionsByOwner(kernelOwnerId).catch((error: unknown) =>
+				logger.warn("signal teardown: disposeKernelSessionsByOwner failed", { error }),
+			),
+			disposeVmContextsByOwner(kernelOwnerId).catch((error: unknown) =>
+				logger.warn("signal teardown: disposeVmContextsByOwner failed", { error }),
+			),
+		]);
+		await Promise.race([work, Bun.sleep(timeoutMs)]);
+	}
 	#closeAllProviderSessions(reason: string): void {
 		for (const [providerKey, state] of this.#providerSessionState) {
 			try {
@@ -6445,6 +6482,7 @@ export class AgentSession {
 				model,
 				apiKey,
 				{
+					...this.#maintenanceProviderTransport(),
 					systemPrompt: this.#baseSystemPrompt,
 					tools: this.agent.state.tools,
 					customInstructions,
@@ -7376,6 +7414,25 @@ export class AgentSession {
 		);
 	}
+	/**
+	 * Transport-affinity fields forwarded into local maintenance one-shot LLM
+	 * calls (compaction, handoff, branch summary) so they reuse the live turn's
+	 * provider session state and configured WebSocket transport preference
+	 * instead of falling back to a fresh HTTP/SSE session. Mirrors the
+	 * `providerSessionId ?? sessionId` affinity the agent loop sends per turn.
+	 */
+	#maintenanceProviderTransport(): {
+		sessionId: string | undefined;
+		providerSessionState: Map<string, ProviderSessionState>;
+		preferWebsockets: boolean | undefined;
+	} {
+		return {
+			sessionId: this.agent.providerSessionId ?? this.agent.sessionId,
+			providerSessionState: this.#providerSessionState,
+			preferWebsockets: this.agent.preferWebsockets,
+		};
+	}
 	async #compactWithFallbackModel(
 		preparation: CompactionPreparation,
 		customInstructions: string | undefined,
@@ -7392,6 +7449,7 @@ export class AgentSession {
 			try {
 				return await compact(preparation, candidate, apiKey, customInstructions, signal, {
 					...options,
+					...this.#maintenanceProviderTransport(),
 					metadata: this.agent.metadataForProvider(candidate.provider),
 					convertToLlm,
 					telemetry,
@@ -7681,6 +7739,7 @@ export class AgentSession {
 					while (true) {
 						try {
 							compactResult = await compact(preparation, candidate, apiKey, undefined, autoCompactionSignal, {
+								...this.#maintenanceProviderTransport(),
 								promptOverride: compactionPrep.hookPrompt,
 								extraContext: compactionPrep.hookContext,
 								remoteInstructions: this.#baseSystemPrompt.join("\n\n"),
@@ -7906,7 +7965,12 @@ export class AgentSession {
 	 */
 	#isRetryableError(message: AssistantMessage): boolean {
 		const classification = this.#classifyErrorForRetry(message);
-		return classification === "usage_limit" || classification === "transient" || classification === "unknown";
+		return (
+			classification === "usage_limit" ||
+			classification === "transient" ||
+			classification === "unknown" ||
+			classification === "first_event_timeout"
+		);
 	}
 	#isTransientErrorMessage(errorMessage: string): boolean {
@@ -7932,6 +7996,33 @@ export class AgentSession {
 		);
 	}
+	#isFirstEventTimeoutErrorMessage(errorMessage: string): boolean {
+		// First-event timeout: the stream watchdog aborted because no event
+		// arrived within the first-event window. Matches the shared lazy-stream
+		// message and the per-provider variants
+		// ("<Provider> stream timed out while waiting for the first event").
+		return /timed?\s*out while waiting for the first event|timeout waiting for first/i.test(errorMessage);
+	}
+	/**
+	 * Whether a first-event timeout on the error's provider should fail closed —
+	 * i.e. retry a bounded number of times (capped at retry.maxRetries) and then
+	 * surface, instead of joining the unbounded transient-retry class.
+	 *
+	 * Targets the ollama-chat API, which is exclusively ollama-cloud (local
+	 * Ollama uses the openai-responses API). That remote, queued backend can
+	 * stall before its first token even for tiny prompts; an unbounded
+	 * continuation retry re-issues the full request on every attempt and can
+	 * silently spike upstream usage (#713). First-party providers keep their
+	 * existing unbounded first-event-timeout retry behavior.
+	 */
+	#shouldFailClosedOnFirstEventTimeout(message: AssistantMessage): boolean {
+		// Prefer the active model's API (the model that produced the error);
+		// the errored message's API is a fallback for the rare case where the
+		// session model has already moved on.
+		return this.model?.api === "ollama-chat" || message.api === "ollama-chat";
+	}
 	#isTerminalErrorMessage(errorMessage: string): boolean {
 		// Errors that will never succeed on retry (auth/permission, malformed
 		// request, unknown/unsupported model). These surface immediately rather
@@ -7953,11 +8044,12 @@ export class AgentSession {
 	/**
 	 * Ordered retry classification: overflow (compaction) -> terminal (surface)
-	 * -> usage_limit (rotation) -> transient (retry) -> unknown (retry).
+	 * -> usage_limit (rotation) -> first_event_timeout (bounded retry) ->
+	 * transient (retry) -> unknown (retry).
 	 */
 	#classifyErrorForRetry(
 		message: AssistantMessage,
-	): "none" | "overflow" | "terminal" | "usage_limit" | "transient" | "unknown" {
+	): "none" | "overflow" | "terminal" | "usage_limit" | "first_event_timeout" | "transient" | "unknown" {
 		if (message.stopReason !== "error" || !message.errorMessage) return "none";
 		const contextWindow = this.model?.contextWindow ?? 0;
 		if (isContextOverflow(message, contextWindow)) return "overflow";
@@ -7985,6 +8077,13 @@ export class AgentSession {
 		if (isTerminalHttp4xx && (explicitStatus !== undefined || !/rate.?limit|too many requests/i.test(err))) {
 			return "terminal";
 		}
+		// A first-event timeout on ollama-cloud (the ollama-chat API) must not
+		// join the unbounded transient class: each continuation retry re-issues
+		// the full request to a remote, billable backend, so an unbounded loop
+		// can silently spike usage (#713). Bound it to retry.maxRetries instead.
+		if (this.#isFirstEventTimeoutErrorMessage(err) && this.#shouldFailClosedOnFirstEventTimeout(message)) {
+			return "first_event_timeout";
+		}
 		if (this.#isTransientErrorMessage(err)) return "transient";
 		return "unknown";
 	}
@@ -9488,6 +9587,7 @@ export class AgentSession {
 			}
 			const branchSummarySettings = this.settings.getGroup("branchSummary");
 			const result = await generateBranchSummary(entriesToSummarize, {
+				...this.#maintenanceProviderTransport(),
 				model,
 				apiKey,
 				signal: this.#branchSummaryAbortController.signal,

package/src/session/streaming-output.ts CHANGED Viewed

@@ -8,6 +8,13 @@ function sanitizeOutputChunk(rawChunk: string): string {
 	return sanitizeWithOptionalSixelPassthrough(rawChunk, sanitizeText);
 }
+/**
+ * Flush threshold for the opt-in sanitize-coalescing path (F21). When coalescing is enabled, raw
+ * chunks accumulate until they reach this many chars, then are sanitized + delivered as one batch,
+ * so many-small-chunk output pays one sanitize pass per batch instead of one per tiny chunk.
+ */
+const COALESCE_FLUSH_CHARS = 64 * 1024;
 // =============================================================================
 // Constants
 // =============================================================================
@@ -80,6 +87,13 @@ export interface OutputSinkOptions {
 	 * relative to the sink (the sink does not catch errors from this callback).
 	 */
 	onRawChunk?: (chunk: string) => void;
+	/**
+	 * Opt-in (F21): when true, sanitization + live callback delivery + retention are coalesced over
+	 * batched raw chunks instead of run per chunk, bounding sync CPU for many-small-chunk output. The
+	 * raw artifact mirror stays byte-correct. Defaults to the PI_OUTPUT_SANITIZE_COALESCE env flag
+	 * (default OFF — the per-chunk path is byte-identical to historical behavior).
+	 */
+	coalesceSanitize?: boolean;
 }
 export interface TruncationResult {
@@ -706,6 +720,8 @@ export class OutputSink {
 	readonly #chunkThrottleMs: number;
 	readonly #maxColumns: number;
 	readonly #artifactMaxBytes: number;
+	readonly #coalesceSanitize: boolean;
+	#coalesceBuf = "";
 	constructor(options?: OutputSinkOptions) {
 		const {
@@ -718,6 +734,7 @@ export class OutputSink {
 			chunkThrottleMs = 0,
 			onRawChunk,
 			artifactMaxBytes = DEFAULT_ARTIFACT_MAX_BYTES,
+			coalesceSanitize = process.env.PI_OUTPUT_SANITIZE_COALESCE === "1",
 		} = options ?? {};
 		this.#artifactPath = artifactPath;
 		this.#artifactId = artifactId;
@@ -728,6 +745,7 @@ export class OutputSink {
 		this.#onRawChunk = onRawChunk;
 		this.#chunkThrottleMs = chunkThrottleMs;
 		this.#artifactMaxBytes = Math.max(0, artifactMaxBytes);
+		this.#coalesceSanitize = coalesceSanitize;
 	}
 	#headText(): string {
@@ -765,7 +783,28 @@ export class OutputSink {
 	 * visible retention windows are selected from the sanitized/column-capped
 	 * stream so production-default display matches the historical processed view.
 	 */
+	// F21: with coalescing enabled, accumulate raw chunks and process them in batches; the default
+	// (disabled) path calls #ingest directly and is byte-identical to the historical per-chunk path.
 	push(chunk: string): void {
+		if (!this.#coalesceSanitize) {
+			this.#ingest(chunk);
+			return;
+		}
+		this.#coalesceBuf += chunk;
+		if (this.#coalesceBuf.length >= COALESCE_FLUSH_CHARS) {
+			this.#flushCoalesced();
+		}
+	}
+	/** Process any buffered coalesced chunks as a single batch (F21). */
+	#flushCoalesced(): void {
+		if (this.#coalesceBuf.length === 0) return;
+		const batch = this.#coalesceBuf;
+		this.#coalesceBuf = "";
+		this.#ingest(batch);
+	}
+	#ingest(chunk: string): void {
 		const rawChunk = chunk;
 		// Live callbacks historically observe sanitized, uncapped chunks. The same
@@ -1046,6 +1085,7 @@ export class OutputSink {
 	 * branch in `dump()` against stale totals.
 	 */
 	replace(text: string): void {
+		this.#coalesceBuf = "";
 		this.#setTail(text);
 		this.#head = "";
 		this.#headBytes = 0;
@@ -1063,6 +1103,7 @@ export class OutputSink {
 	}
 	async dump(notice?: string): Promise<OutputSummary> {
+		this.#flushCoalesced();
 		const noticeLine = notice ? `[${notice}]\n` : "";
 		const totalLines = this.#sawData ? this.#totalLines + 1 : 0;

package/src/setup/model-onboarding-guidance.ts CHANGED Viewed

@@ -1,3 +1,5 @@
+import { formatProviderCredentialHint } from "@gajae-code/ai/stream";
 export const MODEL_ONBOARDING_API_PROVIDER_COMMAND =
 	"/provider add --compat <openai|anthropic> --provider <id> --base-url <url> --api-key-env <ENV> --model <model>";
 export const MODEL_ONBOARDING_PROVIDER_PRESET_COMMAND = "/provider add --preset <minimax|minimax-cn|glm>";
@@ -26,14 +28,19 @@ export function formatNoModelOnboardingError(): string {
 }
 export function formatNoCredentialOnboardingError(providerId: string): string {
-	return [
+	const lines = [
 		`No credentials found for ${providerId}.`,
 		"",
 		`For MiniMax/GLM presets, configure credentials with ${MODEL_ONBOARDING_PROVIDER_PRESET_COMMAND} (or ${MODEL_ONBOARDING_SETUP_COMMAND} --preset <preset>).`,
 		`For custom API-compatible providers, use ${MODEL_ONBOARDING_API_PROVIDER_COMMAND}.`,
-		`For OAuth/subscription providers, use ${MODEL_ONBOARDING_OAUTH_COMMAND}.`,
+		`For OAuth/subscription providers, use ${MODEL_ONBOARDING_OAUTH_COMMAND} (interactive; not available in headless/print mode).`,
+	];
+	const headlessHint = formatProviderCredentialHint(providerId);
+	if (headlessHint) lines.push(headlessHint);
+	lines.push(
 		"Then run /model to select a configured model or assign it to DEFAULT, EXECUTOR, ARCHITECT, PLANNER, or CRITIC.",
-	].join("\n");
+	);
+	return lines.join("\n");
 }
 export function formatNoModelsAvailableFallback(): string {