npm - @gajae-code/coding-agent - Versions diffs - 0.5.2 → 0.5.4 - Mend

@gajae-code/coding-agent 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

package/CHANGELOG.md +23 -0
package/dist/types/async/job-manager.d.ts +6 -0
package/dist/types/config/model-profiles.d.ts +10 -0
package/dist/types/dap/client.d.ts +2 -1
package/dist/types/edit/read-file.d.ts +6 -0
package/dist/types/eval/js/context-manager.d.ts +3 -0
package/dist/types/eval/js/executor.d.ts +1 -0
package/dist/types/exec/bash-executor.d.ts +2 -0
package/dist/types/gjc-runtime/tmux-sessions.d.ts +7 -1
package/dist/types/lsp/types.d.ts +2 -0
package/dist/types/modes/bridge/bridge-mode.d.ts +1 -0
package/dist/types/modes/components/model-selector.d.ts +2 -0
package/dist/types/modes/components/oauth-selector.d.ts +1 -0
package/dist/types/modes/components/runtime-mcp-add-wizard.d.ts +1 -0
package/dist/types/modes/components/tool-execution.d.ts +1 -0
package/dist/types/modes/interactive-mode.d.ts +1 -0
package/dist/types/modes/types.d.ts +1 -0
package/dist/types/runtime/process-lifecycle.d.ts +108 -0
package/dist/types/runtime-mcp/transports/stdio.d.ts +1 -0
package/dist/types/runtime-mcp/types.d.ts +2 -0
package/dist/types/session/agent-session.d.ts +29 -1
package/dist/types/session/artifacts.d.ts +4 -1
package/dist/types/session/streaming-output.d.ts +12 -0
package/dist/types/slash-commands/helpers/fast-status-report.d.ts +76 -0
package/dist/types/tools/bash.d.ts +1 -0
package/dist/types/tools/browser/tab-supervisor.d.ts +9 -0
package/dist/types/tools/sqlite-reader.d.ts +2 -1
package/dist/types/web/search/providers/codex.d.ts +4 -4
package/package.json +7 -7
package/src/async/job-manager.ts +181 -43
package/src/config/file-lock.ts +9 -1
package/src/config/model-profile-activation.ts +71 -3
package/src/config/model-profiles.ts +39 -14
package/src/dap/client.ts +105 -64
package/src/dap/session.ts +44 -7
package/src/defaults/gjc/skills/deep-interview/SKILL.md +11 -2
package/src/defaults/gjc/skills/ralplan/SKILL.md +2 -2
package/src/defaults/gjc/skills/ultragoal/SKILL.md +2 -2
package/src/edit/read-file.ts +19 -1
package/src/eval/js/context-manager.ts +228 -65
package/src/eval/js/executor.ts +2 -0
package/src/eval/js/index.ts +1 -0
package/src/eval/js/worker-core.ts +10 -6
package/src/eval/py/executor.ts +68 -19
package/src/eval/py/kernel.ts +46 -22
package/src/eval/py/runner.py +68 -14
package/src/exec/bash-executor.ts +49 -13
package/src/gjc-runtime/deep-interview-runtime.ts +14 -13
package/src/gjc-runtime/ralplan-runtime.ts +10 -0
package/src/gjc-runtime/state-runtime.ts +73 -0
package/src/gjc-runtime/tmux-gc.ts +86 -37
package/src/gjc-runtime/tmux-sessions.ts +44 -6
package/src/gjc-runtime/ultragoal-runtime.ts +8 -4
package/src/internal-urls/artifact-protocol.ts +10 -1
package/src/internal-urls/docs-index.generated.ts +2 -2
package/src/lsp/client.ts +64 -26
package/src/lsp/index.ts +2 -1
package/src/lsp/lspmux.ts +33 -9
package/src/lsp/types.ts +2 -0
package/src/modes/bridge/bridge-mode.ts +21 -0
package/src/modes/components/assistant-message.ts +10 -2
package/src/modes/components/bash-execution.ts +5 -1
package/src/modes/components/eval-execution.ts +5 -1
package/src/modes/components/model-selector.ts +34 -2
package/src/modes/components/oauth-selector.ts +5 -0
package/src/modes/components/runtime-mcp-add-wizard.ts +58 -7
package/src/modes/components/skill-message.ts +24 -16
package/src/modes/components/tool-execution.ts +6 -0
package/src/modes/controllers/extension-ui-controller.ts +33 -6
package/src/modes/controllers/input-controller.ts +19 -0
package/src/modes/controllers/selector-controller.ts +6 -1
package/src/modes/interactive-mode.ts +13 -0
package/src/modes/types.ts +1 -0
package/src/modes/utils/ui-helpers.ts +5 -2
package/src/prompts/agents/executor.md +1 -1
package/src/runtime/process-lifecycle.ts +400 -0
package/src/runtime-mcp/manager.ts +164 -50
package/src/runtime-mcp/transports/http.ts +12 -11
package/src/runtime-mcp/transports/stdio.ts +64 -38
package/src/runtime-mcp/types.ts +3 -0
package/src/sdk.ts +27 -0
package/src/session/agent-session.ts +271 -25
package/src/session/artifacts.ts +17 -2
package/src/session/blob-store.ts +36 -2
package/src/session/session-manager.ts +29 -13
package/src/session/streaming-output.ts +95 -3
package/src/setup/model-onboarding-guidance.ts +10 -3
package/src/skill-state/active-state.ts +79 -7
package/src/slash-commands/builtin-registry.ts +30 -3
package/src/slash-commands/helpers/fast-status-report.ts +111 -0
package/src/tools/archive-reader.ts +10 -1
package/src/tools/bash.ts +11 -4
package/src/tools/browser/registry.ts +17 -1
package/src/tools/browser/tab-supervisor.ts +22 -0
package/src/tools/browser.ts +38 -4
package/src/tools/cron.ts +2 -6
package/src/tools/read.ts +11 -12
package/src/tools/sqlite-reader.ts +19 -5
package/src/web/search/providers/codex.ts +6 -5

package/src/session/agent-session.ts CHANGED Viewed

@@ -41,6 +41,8 @@ import {
 	calculatePromptTokens,
 	collectEntriesForBranchSummary,
 	compact,
+	type EmergencyCompactionSample,
+	emergencyCompactionReason,
 	estimateMessageTokensHeuristic,
 	estimateTokens,
 	generateBranchSummary,
@@ -142,6 +144,7 @@ import { onAppendOnlyModeChanged } from "../config/settings";
 import { RawSseDebugBuffer } from "../debug/raw-sse-buffer";
 import { loadCapability } from "../discovery";
 import { expandApplyPatchToEntries, normalizeDiff, normalizeToLF, ParseError, previewPatch, stripBom } from "../edit";
+import { disposeVmContextsByOwner } from "../eval/js/context-manager";
 import {
 	disposeKernelSessionsByOwner,
 	executePython as executePythonCommand,
@@ -234,6 +237,7 @@ import {
 import type { ToolSession } from "../tools";
 import { AskTool } from "../tools/ask";
 import { assertEditableFile } from "../tools/auto-generated-guard";
+import { releaseTabsForOwner } from "../tools/browser/tab-supervisor";
 import type { CheckpointState } from "../tools/checkpoint";
 import { outputMeta, wrapToolWithMetaNotice } from "../tools/output-meta";
 import { normalizeLocalScheme, resolveToCwd } from "../tools/path-utils";
@@ -545,6 +549,13 @@ function formatRetryFallbackBaseSelector(selector: RetryFallbackSelector): strin
 const IRC_REPLY_MAX_BYTES = 4096;
+/**
+ * Hard cap for {@link AgentSession.disposeChildSubprocesses}. A `SIGINT`/`SIGTERM` handler
+ * awaits this teardown before exiting, so it must never block longer than this even if a
+ * subprocess (wedged Chrome renderer, stuck Python cell) refuses to settle.
+ */
+const SIGNAL_TEARDOWN_TIMEOUT_MS = 5_000;
 /**
  * Collapse degenerate IRC ephemeral replies before they hit the relay.
  * Models occasionally loop on a single line (~16 reports of N-times-repeated
@@ -907,6 +918,7 @@ export class AgentSession {
 	// Compaction state
 	#compactionAbortController: AbortController | undefined = undefined;
 	#autoCompactionAbortController: AbortController | undefined = undefined;
+	#resourceSampler: () => EmergencyCompactionSample = () => this.#defaultResourceSample();
 	#prePromptContextCheckPromise: Promise<void> | undefined = undefined;
 	// Branch summarization state
@@ -3187,6 +3199,13 @@ export class AgentSession {
 			}
 		}
 		await shutdownAllLspClients();
+		// F13: release only THIS session's browser tabs on dispose (kill:false → remote
+		// browsers disconnect, headless close gracefully). Scoped by the session id the
+		// browser tool tagged tabs with, so other live sessions' tabs are untouched.
+		// No-op when this session opened no tabs. Failure is logged, not thrown.
+		await releaseTabsForOwner(this.sessionManager.getSessionId()).catch((error: unknown) =>
+			logger.warn("session dispose: releaseTabsForOwner failed", { error }),
+		);
 		const pythonExecutionsSettled = await this.#prepareEvalExecutionsForDispose();
 		if (!pythonExecutionsSettled) {
 			logger.warn(
@@ -3194,6 +3213,7 @@ export class AgentSession {
 			);
 		}
 		await disposeKernelSessionsByOwner(this.#evalKernelOwnerId);
+		await disposeVmContextsByOwner(this.#evalKernelOwnerId);
 		this.#releasePowerAssertion();
 		await this.sessionManager.close();
 		this.#closeAllProviderSessions("dispose");
@@ -3208,6 +3228,36 @@ export class AgentSession {
 		this.#eventListeners = [];
 	}
+	/**
+	 * Bounded, best-effort teardown of the subprocess-spawning resources this session
+	 * owns: the browser tool's headless/spawned Chrome and the Python eval kernel + JS VM
+	 * contexts. Unlike {@link dispose}, this touches only child processes and is time-boxed,
+	 * so a top-level `SIGINT`/`SIGTERM`/`SIGHUP` handler can run it without hanging — without
+	 * it, an external kill bypasses `dispose()` and orphans Chrome/Python to PID 1 (#698).
+	 *
+	 * Idempotent: every step is a no-op once the graceful {@link dispose} path has released
+	 * the resources. Never throws; per-step failures are logged and the whole run is capped
+	 * at `timeoutMs` so a wedged subprocess can't stall process exit.
+	 */
+	async disposeChildSubprocesses(timeoutMs = SIGNAL_TEARDOWN_TIMEOUT_MS): Promise<void> {
+		const sessionId = this.sessionManager.getSessionId();
+		const kernelOwnerId = this.#evalKernelOwnerId;
+		const work = Promise.allSettled([
+			// kill:true so a forced exit also reaps spawned-app Chrome we own (headless
+			// always closes; connected/attached browsers only disconnect — never killed).
+			releaseTabsForOwner(sessionId, { kill: true }).catch((error: unknown) =>
+				logger.warn("signal teardown: releaseTabsForOwner failed", { error }),
+			),
+			disposeKernelSessionsByOwner(kernelOwnerId).catch((error: unknown) =>
+				logger.warn("signal teardown: disposeKernelSessionsByOwner failed", { error }),
+			),
+			disposeVmContextsByOwner(kernelOwnerId).catch((error: unknown) =>
+				logger.warn("signal teardown: disposeVmContextsByOwner failed", { error }),
+			),
+		]);
+		await Promise.race([work, Bun.sleep(timeoutMs)]);
+	}
 	#closeAllProviderSessions(reason: string): void {
 		for (const [providerKey, state] of this.#providerSessionState) {
 			try {
@@ -6016,6 +6066,44 @@ export class AgentSession {
 		);
 	}
+	/**
+	 * True when the configured `serviceTier` resolves to `"priority"` for the
+	 * given model `provider`. Returns false for scoped tiers that don't match
+	 * (e.g. `"openai-only"` on an anthropic provider) and when `provider` is
+	 * undefined. This is the canonical provider-aware fast-mode predicate.
+	 */
+	isFastForProvider(provider?: string): boolean {
+		// Fast mode applies to a concrete model's provider. With no provider
+		// (no model selected) it cannot apply, even under an unscoped `priority`
+		// tier that `resolveServiceTier` would otherwise pass through.
+		if (provider === undefined) return false;
+		return resolveServiceTier(this.serviceTier, provider) === "priority";
+	}
+	/**
+	 * Effective service tier applied to task-tool subagent sessions
+	 * (executor/architect/planner/critic). They run under `task.serviceTier`
+	 * unless it is `"inherit"`, in which case they inherit the main session
+	 * tier — mirroring `createSubagentSettings`.
+	 */
+	#subagentServiceTier(): ServiceTier | undefined {
+		const configured = this.settings.get("task.serviceTier");
+		if (configured === "inherit") return this.serviceTier;
+		if (configured === "none") return undefined;
+		return configured;
+	}
+	/**
+	 * Provider-aware fast-mode predicate for task-tool subagent roles, evaluated
+	 * against the effective subagent tier (`task.serviceTier`) rather than the
+	 * main session tier. Use this for `task.agentModelOverrides` role rows so the
+	 * ⚡ glyph reflects the tier the subagent actually runs under.
+	 */
+	isFastForSubagentProvider(provider?: string): boolean {
+		if (provider === undefined) return false;
+		return resolveServiceTier(this.#subagentServiceTier(), provider) === "priority";
+	}
 	/**
 	 * True when the configured `serviceTier` resolves to `"priority"` for the
 	 * *currently selected model's provider*. Returns false for scoped tiers
@@ -6023,7 +6111,7 @@ export class AgentSession {
 	 * no model is selected.
 	 */
 	isFastModeActive(): boolean {
-		return resolveServiceTier(this.serviceTier, this.model?.provider) === "priority";
+		return this.isFastForProvider(this.model?.provider);
 	}
 	setServiceTier(serviceTier: ServiceTier | undefined): void {
@@ -6394,6 +6482,7 @@ export class AgentSession {
 				model,
 				apiKey,
 				{
+					...this.#maintenanceProviderTransport(),
 					systemPrompt: this.#baseSystemPrompt,
 					tools: this.agent.state.tools,
 					customInstructions,
@@ -6587,11 +6676,55 @@ export class AgentSession {
 		}
 	}
+	/** Test seam: override the emergency-compaction resource sampler so tests never read real RSS. */
+	setResourceSampler(sampler: () => EmergencyCompactionSample): void {
+		this.#resourceSampler = sampler;
+	}
+	#defaultResourceSample(): EmergencyCompactionSample {
+		let providerBytes = 0;
+		let imageBytes = 0;
+		for (const message of this.state.messages) {
+			const content = (message as { content?: unknown }).content;
+			if (typeof content === "string") {
+				providerBytes += content.length;
+			} else if (Array.isArray(content)) {
+				for (const block of content) {
+					if (!block || typeof block !== "object") continue;
+					const typed = block as { text?: unknown; data?: unknown };
+					if (typeof typed.text === "string") providerBytes += typed.text.length;
+					if (typeof typed.data === "string") {
+						imageBytes += typed.data.length;
+						providerBytes += typed.data.length;
+					}
+				}
+			}
+		}
+		return {
+			heapUsedBytes: process.memoryUsage().heapUsed,
+			providerBytes,
+			messageCount: this.state.messages.length,
+			imageBytes,
+		};
+	}
 	async #checkEstimatedContextBeforePromptOnce(pendingMessages: readonly AgentMessage[]): Promise<void> {
 		const model = this.model;
 		if (!model) return;
 		const contextWindow = model.contextWindow ?? 0;
 		if (contextWindow <= 0) return;
+		// F6: non-disableable emergency floor — compact before OOM even when token-based
+		// compaction is disabled or its threshold is set too high (weak-hardware protection).
+		const emergencyReason = emergencyCompactionReason(this.#resourceSampler());
+		if (emergencyReason) {
+			logger.warn("Emergency compaction triggered (resource floor exceeded)", { reason: emergencyReason });
+			await this.#runAutoCompaction("overflow", false, false, {
+				continueAfterMaintenance: false,
+				deferHandoffMaintenance: false,
+				force: true,
+			});
+			return;
+		}
 		const compactionSettings = this.settings.getGroup("compaction");
 		if (!compactionSettings.enabled || compactionSettings.strategy === "off") return;
@@ -7243,7 +7376,17 @@ export class AgentSession {
 			addCandidate(this.#resolveRoleModelFull(role, availableModels, currentModel).model);
 		}
-		const sortedByContext = [...availableModels].sort((a, b) => b.contextWindow - a.contextWindow);
+		// Last-resort fallback: the largest-context model that shares the ACTIVE
+		// model's provider. Scoping this to the current provider keeps auto-
+		// compaction on the user's configured/custom route instead of silently
+		// defaulting to an unrelated provider (e.g. a stray OpenAI credential
+		// with no remaining credit) just because it happens to be in the bundled
+		// catalog. Cross-provider compaction stays possible, but only when the
+		// user opts in explicitly via modelRoles (handled by the loop above).
+		const fallbackProvider = currentModel?.provider;
+		const sortedByContext = [...availableModels]
+			.filter(model => fallbackProvider === undefined || model.provider === fallbackProvider)
+			.sort((a, b) => b.contextWindow - a.contextWindow);
 		for (const model of sortedByContext) {
 			if (!seen.has(this.#getModelKey(model))) {
 				addCandidate(model);
@@ -7271,6 +7414,25 @@ export class AgentSession {
 		);
 	}
+	/**
+	 * Transport-affinity fields forwarded into local maintenance one-shot LLM
+	 * calls (compaction, handoff, branch summary) so they reuse the live turn's
+	 * provider session state and configured WebSocket transport preference
+	 * instead of falling back to a fresh HTTP/SSE session. Mirrors the
+	 * `providerSessionId ?? sessionId` affinity the agent loop sends per turn.
+	 */
+	#maintenanceProviderTransport(): {
+		sessionId: string | undefined;
+		providerSessionState: Map<string, ProviderSessionState>;
+		preferWebsockets: boolean | undefined;
+	} {
+		return {
+			sessionId: this.agent.providerSessionId ?? this.agent.sessionId,
+			providerSessionState: this.#providerSessionState,
+			preferWebsockets: this.agent.preferWebsockets,
+		};
+	}
 	async #compactWithFallbackModel(
 		preparation: CompactionPreparation,
 		customInstructions: string | undefined,
@@ -7287,6 +7449,7 @@ export class AgentSession {
 			try {
 				return await compact(preparation, candidate, apiKey, customInstructions, signal, {
 					...options,
+					...this.#maintenanceProviderTransport(),
 					metadata: this.agent.metadataForProvider(candidate.provider),
 					convertToLlm,
 					telemetry,
@@ -7367,11 +7530,13 @@ export class AgentSession {
 		reason: "overflow" | "threshold" | "idle",
 		willRetry: boolean,
 		deferred = false,
-		options?: { continueAfterMaintenance?: boolean; deferHandoffMaintenance?: boolean },
+		options?: { continueAfterMaintenance?: boolean; deferHandoffMaintenance?: boolean; force?: boolean },
 	): Promise<void> {
 		const compactionSettings = this.settings.getGroup("compaction");
-		if (compactionSettings.strategy === "off") return;
-		if (reason !== "idle" && !compactionSettings.enabled) return;
+		// `force` is the non-disableable emergency floor (F6): it bypasses the user's
+		// disabled/off settings so a resource-floor breach still compacts before OOM.
+		if (!options?.force && compactionSettings.strategy === "off") return;
+		if (!options?.force && reason !== "idle" && !compactionSettings.enabled) return;
 		const generation = this.#promptGeneration;
 		if (
 			options?.deferHandoffMaintenance !== false &&
@@ -7574,6 +7739,7 @@ export class AgentSession {
 					while (true) {
 						try {
 							compactResult = await compact(preparation, candidate, apiKey, undefined, autoCompactionSignal, {
+								...this.#maintenanceProviderTransport(),
 								promptOverride: compactionPrep.hookPrompt,
 								extraContext: compactionPrep.hookContext,
 								remoteInstructions: this.#baseSystemPrompt.join("\n\n"),
@@ -7799,7 +7965,12 @@ export class AgentSession {
 	 */
 	#isRetryableError(message: AssistantMessage): boolean {
 		const classification = this.#classifyErrorForRetry(message);
-		return classification === "usage_limit" || classification === "transient" || classification === "unknown";
+		return (
+			classification === "usage_limit" ||
+			classification === "transient" ||
+			classification === "unknown" ||
+			classification === "first_event_timeout"
+		);
 	}
 	#isTransientErrorMessage(errorMessage: string): boolean {
@@ -7825,6 +7996,33 @@ export class AgentSession {
 		);
 	}
+	#isFirstEventTimeoutErrorMessage(errorMessage: string): boolean {
+		// First-event timeout: the stream watchdog aborted because no event
+		// arrived within the first-event window. Matches the shared lazy-stream
+		// message and the per-provider variants
+		// ("<Provider> stream timed out while waiting for the first event").
+		return /timed?\s*out while waiting for the first event|timeout waiting for first/i.test(errorMessage);
+	}
+	/**
+	 * Whether a first-event timeout on the error's provider should fail closed —
+	 * i.e. retry a bounded number of times (capped at retry.maxRetries) and then
+	 * surface, instead of joining the unbounded transient-retry class.
+	 *
+	 * Targets the ollama-chat API, which is exclusively ollama-cloud (local
+	 * Ollama uses the openai-responses API). That remote, queued backend can
+	 * stall before its first token even for tiny prompts; an unbounded
+	 * continuation retry re-issues the full request on every attempt and can
+	 * silently spike upstream usage (#713). First-party providers keep their
+	 * existing unbounded first-event-timeout retry behavior.
+	 */
+	#shouldFailClosedOnFirstEventTimeout(message: AssistantMessage): boolean {
+		// Prefer the active model's API (the model that produced the error);
+		// the errored message's API is a fallback for the rare case where the
+		// session model has already moved on.
+		return this.model?.api === "ollama-chat" || message.api === "ollama-chat";
+	}
 	#isTerminalErrorMessage(errorMessage: string): boolean {
 		// Errors that will never succeed on retry (auth/permission, malformed
 		// request, unknown/unsupported model). These surface immediately rather
@@ -7846,11 +8044,12 @@ export class AgentSession {
 	/**
 	 * Ordered retry classification: overflow (compaction) -> terminal (surface)
-	 * -> usage_limit (rotation) -> transient (retry) -> unknown (retry).
+	 * -> usage_limit (rotation) -> first_event_timeout (bounded retry) ->
+	 * transient (retry) -> unknown (retry).
 	 */
 	#classifyErrorForRetry(
 		message: AssistantMessage,
-	): "none" | "overflow" | "terminal" | "usage_limit" | "transient" | "unknown" {
+	): "none" | "overflow" | "terminal" | "usage_limit" | "first_event_timeout" | "transient" | "unknown" {
 		if (message.stopReason !== "error" || !message.errorMessage) return "none";
 		const contextWindow = this.model?.contextWindow ?? 0;
 		if (isContextOverflow(message, contextWindow)) return "overflow";
@@ -7878,6 +8077,13 @@ export class AgentSession {
 		if (isTerminalHttp4xx && (explicitStatus !== undefined || !/rate.?limit|too many requests/i.test(err))) {
 			return "terminal";
 		}
+		// A first-event timeout on ollama-cloud (the ollama-chat API) must not
+		// join the unbounded transient class: each continuation retry re-issues
+		// the full request to a remote, billable backend, so an unbounded loop
+		// can silently spike usage (#713). Bound it to retry.maxRetries instead.
+		if (this.#isFirstEventTimeoutErrorMessage(err) && this.#shouldFailClosedOnFirstEventTimeout(message)) {
+			return "first_event_timeout";
+		}
 		if (this.#isTransientErrorMessage(err)) return "transient";
 		return "unknown";
 	}
@@ -9381,6 +9587,7 @@ export class AgentSession {
 			}
 			const branchSummarySettings = this.settings.getGroup("branchSummary");
 			const result = await generateBranchSummary(entriesToSummarize, {
+				...this.#maintenanceProviderTransport(),
 				model,
 				apiKey,
 				signal: this.#branchSummaryAbortController.signal,
@@ -9508,17 +9715,15 @@ export class AgentSession {
 	 */
 	getSessionStats(): SessionStats {
 		const state = this.state;
-		const userMessages = state.messages.filter(m => m.role === "user").length;
-		const assistantMessages = state.messages.filter(m => m.role === "assistant").length;
-		const toolResults = state.messages.filter(m => m.role === "toolResult").length;
+		let userMessages = 0;
+		let assistantMessages = 0;
+		let toolResults = 0;
 		let toolCalls = 0;
 		let totalInput = 0;
 		let totalOutput = 0;
 		let totalCacheRead = 0;
 		let totalCacheWrite = 0;
 		let totalCost = 0;
 		let totalPremiumRequests = 0;
 		const getTaskToolUsage = (details: unknown): Usage | undefined => {
 			if (!details || typeof details !== "object") return undefined;
@@ -9528,8 +9733,13 @@ export class AgentSession {
 			return usage as Usage;
 		};
+		// Single pass over messages (replaces three role filters plus a separate usage
+		// loop) so per-turn stats stay O(messages + assistant content blocks), not O(4N).
 		for (const message of state.messages) {
-			if (message.role === "assistant") {
+			if (message.role === "user") {
+				userMessages += 1;
+			} else if (message.role === "assistant") {
+				assistantMessages += 1;
 				const assistantMsg = message as AssistantMessage;
 				toolCalls += assistantMsg.content.filter(c => c.type === "toolCall").length;
 				totalInput += assistantMsg.usage.input;
@@ -9538,17 +9748,18 @@ export class AgentSession {
 				totalCacheWrite += assistantMsg.usage.cacheWrite;
 				totalPremiumRequests += assistantMsg.usage.premiumRequests ?? 0;
 				totalCost += assistantMsg.usage.cost.total;
-			}
-			if (message.role === "toolResult" && message.toolName === "task") {
-				const usage = getTaskToolUsage(message.details);
-				if (usage) {
-					totalInput += usage.input;
-					totalOutput += usage.output;
-					totalCacheRead += usage.cacheRead;
-					totalCacheWrite += usage.cacheWrite;
-					totalPremiumRequests += usage.premiumRequests ?? 0;
-					totalCost += usage.cost.total;
+			} else if (message.role === "toolResult") {
+				toolResults += 1;
+				if (message.toolName === "task") {
+					const usage = getTaskToolUsage(message.details);
+					if (usage) {
+						totalInput += usage.input;
+						totalOutput += usage.output;
+						totalCacheRead += usage.cacheRead;
+						totalCacheWrite += usage.cacheWrite;
+						totalPremiumRequests += usage.premiumRequests ?? 0;
+						totalCost += usage.cost.total;
+					}
 				}
 			}
 		}
@@ -9709,11 +9920,46 @@ export class AgentSession {
 		return tokens;
 	}
+	#nativeTokenCache = new WeakMap<AgentMessage, { len: number; tokens: number }>();
+	/** Cheap content-size signal to invalidate the native token cache on mutation (growth). */
+	/**
+	 * Cheap content-size signal to invalidate the native token cache on mutation. Recursively
+	 * sums string lengths across the whole message (depth-bounded), so it covers every
+	 * provider-visible shape (text/thinking/tool args, toolResult output, tool names, etc.)
+	 * without allocating a serialized copy. A size-preserving in-place edit yields only a
+	 * benign estimate drift.
+	 */
+	#messageTokenSize(value: unknown, depth = 0): number {
+		if (depth > 6) return 0;
+		if (typeof value === "string") return value.length;
+		if (typeof value === "number" || typeof value === "boolean") return 8;
+		if (Array.isArray(value)) {
+			let size = 0;
+			for (const item of value) size += this.#messageTokenSize(item, depth + 1);
+			return size;
+		}
+		if (value && typeof value === "object") {
+			let size = 0;
+			for (const item of Object.values(value)) size += this.#messageTokenSize(item, depth + 1);
+			return size;
+		}
+		return 0;
+	}
 	#estimateMessageNativeContextTokens(message: AgentMessage): number {
+		// F10/F22: cache the expensive native token count per message object, invalidated by a
+		// cheap content-size signal, so unchanged (stable-size) messages are not re-tokenized on
+		// every pre-prompt estimate. A rare size-preserving in-place edit yields only a benign
+		// token-estimate drift, never wrong output.
+		const len = this.#messageTokenSize(message);
+		const cached = this.#nativeTokenCache.get(message);
+		if (cached && cached.len === len) return cached.tokens;
 		let tokens = 0;
 		for (const llmMessage of convertToLlm([message])) {
 			tokens += estimateTokens(llmMessage);
 		}
+		this.#nativeTokenCache.set(message, { len, tokens });
 		return tokens;
 	}

package/src/session/artifacts.ts CHANGED Viewed

@@ -7,6 +7,11 @@
 import * as fs from "node:fs/promises";
 import * as path from "node:path";
+import { DEFAULT_ARTIFACT_MAX_BYTES, truncateHeadBytes } from "./streaming-output";
+export interface ArtifactSaveOptions {
+	maxBytes?: number;
+}
 /**
  * Manages artifact storage for a session.
  *
@@ -94,9 +99,19 @@ export class ArtifactManager {
 	 * @param toolType Tool name for file extension (e.g., "bash", "read")
 	 * @returns Artifact ID (numeric string)
 	 */
-	async save(content: string, toolType: string): Promise<string> {
+	async save(content: string, toolType: string, options: ArtifactSaveOptions = {}): Promise<string> {
 		const { id, path } = await this.allocatePath(toolType);
-		await Bun.write(path, content);
+		const maxBytes = Math.max(0, options.maxBytes ?? DEFAULT_ARTIFACT_MAX_BYTES);
+		const contentBytes = Buffer.byteLength(content, "utf-8");
+		if (contentBytes > maxBytes) {
+			const truncated = truncateHeadBytes(content, maxBytes);
+			await Bun.write(
+				path,
+				`${truncated.text}\n[artifact truncated after ${truncated.bytes} bytes; omitted at least ${contentBytes - truncated.bytes} bytes]\n`,
+			);
+		} else {
+			await Bun.write(path, content);
+		}
 		return id;
 	}

package/src/session/blob-store.ts CHANGED Viewed

@@ -167,19 +167,49 @@ export class EphemeralBlobStore extends BlobStore {
 }
 export class MemoryBlobStore extends BlobStore {
+	/**
+	 * Generous byte/count LRU bound (F8). Content-addressed resident blobs are fail-closed
+	 * on miss (callers raise/handle {@link ResidentBlobMissingError}), so evicting the
+	 * least-recently-used entry on an extremely large session is preferable to unbounded
+	 * RAM growth. The caps sit well above normal usage and only trip on pathological sizes.
+	 */
+	static readonly #MAX_BYTES = 64 * 1024 * 1024;
+	static readonly #MAX_COUNT = 4096;
 	#blobs = new Map<string, Buffer>();
+	#bytes = 0;
 	constructor() {
 		super(":memory:");
 	}
+	#store(hash: string, data: Buffer): void {
+		const existing = this.#blobs.get(hash);
+		if (existing) {
+			this.#blobs.delete(hash);
+			this.#bytes -= existing.byteLength;
+		}
+		this.#blobs.set(hash, data);
+		this.#bytes += data.byteLength;
+		while (
+			(this.#bytes > MemoryBlobStore.#MAX_BYTES || this.#blobs.size > MemoryBlobStore.#MAX_COUNT) &&
+			this.#blobs.size > 1
+		) {
+			const oldest = this.#blobs.keys().next().value;
+			if (oldest === undefined) break;
+			const evicted = this.#blobs.get(oldest);
+			this.#blobs.delete(oldest);
+			if (evicted) this.#bytes -= evicted.byteLength;
+		}
+	}
 	async put(data: Buffer): Promise<BlobPutResult> {
 		return this.putSync(data);
 	}
 	putSync(data: Buffer): BlobPutResult {
 		const hash = new Bun.SHA256().update(data).digest("hex");
-		this.#blobs.set(hash, Buffer.from(data));
+		this.#store(hash, Buffer.from(data));
 		return {
 			hash,
 			path: `memory:${hash}`,
@@ -195,7 +225,11 @@ export class MemoryBlobStore extends BlobStore {
 	getSync(hash: string): Buffer | null {
 		const data = this.#blobs.get(hash);
-		return data ? Buffer.from(data) : null;
+		if (!data) return null;
+		// Refresh LRU recency on hit so hot blobs survive eviction.
+		this.#blobs.delete(hash);
+		this.#blobs.set(hash, data);
+		return Buffer.from(data);
 	}
 	async has(hash: string): Promise<boolean> {

package/src/session/session-manager.ts CHANGED Viewed

@@ -889,8 +889,27 @@ async function resolvePersistedBlobRefs(value: unknown, blobStore: BlobStore, ke
 	);
 }
+/**
+ * Run async tasks with bounded concurrency so an image-heavy resume never materializes
+ * every blob's base64 simultaneously (F8: avoids the transient OOM spike of an unbounded
+ * Promise.all over all historical images).
+ */
+const BLOB_RESOLVE_CONCURRENCY = 8;
+async function runWithConcurrency(tasks: Array<() => Promise<void>>, limit: number): Promise<void> {
+	let next = 0;
+	const worker = async (): Promise<void> => {
+		while (next < tasks.length) {
+			const index = next;
+			next += 1;
+			await tasks[index]!();
+		}
+	};
+	const workerCount = Math.max(1, Math.min(limit, tasks.length));
+	await Promise.all(Array.from({ length: workerCount }, () => worker()));
+}
 async function resolveBlobRefsInEntries(entries: FileEntry[], blobStore: BlobStore): Promise<void> {
-	const promises: Promise<void>[] = [];
+	const tasks: Array<() => Promise<void>> = [];
 	for (const entry of entries) {
 		if (entry.type === "session") continue;
@@ -902,22 +921,19 @@ async function resolveBlobRefsInEntries(entries: FileEntry[], blobStore: BlobSto
 			contentArray = entry.content;
 		}
-		if (contentArray) {
-			for (const block of contentArray) {
-				if (isImageBlock(block) && isBlobRef(block.data)) {
-					promises.push(
-						resolveImageData(blobStore, block.data).then(resolved => {
-							block.data = resolved;
-						}),
-					);
+		tasks.push(async () => {
+			if (contentArray) {
+				for (const block of contentArray) {
+					if (isImageBlock(block) && isBlobRef(block.data)) {
+						block.data = await resolveImageData(blobStore, block.data);
+					}
 				}
 			}
-		}
-		promises.push(resolvePersistedBlobRefs(entry, blobStore));
+			await resolvePersistedBlobRefs(entry, blobStore);
+		});
 	}
-	await Promise.all(promises);
+	await runWithConcurrency(tasks, BLOB_RESOLVE_CONCURRENCY);
 }
 /**