npm - @oh-my-pi/pi-coding-agent - Versions diffs - 16.1.2 → 16.1.3 - Mend

@oh-my-pi/pi-coding-agent 16.1.2 → 16.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/CHANGELOG.md +30 -1
package/dist/cli.js +3046 -3047
package/dist/types/config/model-resolver.d.ts +3 -3
package/dist/types/mnemopi/embed-client.d.ts +70 -0
package/dist/types/mnemopi/embed-protocol.d.ts +52 -0
package/dist/types/mnemopi/embed-worker.d.ts +12 -0
package/dist/types/mnemopi/state.d.ts +9 -1
package/dist/types/session/agent-storage.d.ts +2 -0
package/dist/types/session/auth-broker-config.d.ts +3 -2
package/dist/types/session/history-storage.d.ts +1 -1
package/dist/types/tools/image-gen.d.ts +2 -2
package/dist/types/utils/image-loading.d.ts +1 -1
package/dist/types/utils/ipc.d.ts +22 -0
package/dist/types/web/search/providers/perplexity-auth.d.ts +37 -0
package/package.json +12 -12
package/src/cli.ts +8 -0
package/src/commands/token.ts +52 -33
package/src/config/append-only-context-mode.ts +45 -0
package/src/config/model-discovery.ts +3 -0
package/src/config/model-registry.ts +21 -3
package/src/config/model-resolver.ts +31 -8
package/src/discovery/builtin-rules/ts-no-return-type.md +0 -1
package/src/lsp/client.ts +24 -0
package/src/mnemopi/backend.ts +49 -3
package/src/mnemopi/embed-client.ts +401 -0
package/src/mnemopi/embed-protocol.ts +35 -0
package/src/mnemopi/embed-worker.ts +113 -0
package/src/mnemopi/state.ts +29 -1
package/src/modes/components/custom-editor.ts +1 -1
package/src/modes/components/model-selector.ts +2 -2
package/src/modes/components/welcome.ts +1 -1
package/src/modes/controllers/event-controller.ts +8 -0
package/src/modes/controllers/selector-controller.ts +2 -2
package/src/modes/theme/theme.ts +69 -0
package/src/sdk.ts +4 -0
package/src/session/agent-session.ts +8 -0
package/src/session/agent-storage.ts +14 -0
package/src/session/auth-broker-config.ts +2 -1
package/src/session/history-storage.ts +13 -1
package/src/stt/asr-client.ts +2 -7
package/src/tiny/title-client.ts +2 -7
package/src/tools/image-gen.ts +4 -8
package/src/tools/render-utils.ts +4 -1
package/src/tts/tts-client.ts +2 -7
package/src/utils/image-loading.ts +12 -2
package/src/utils/ipc.ts +38 -0
package/src/web/search/providers/perplexity-auth.ts +133 -0
package/src/web/search/providers/perplexity.ts +2 -125

package/dist/types/config/model-resolver.d.ts CHANGED Viewed

@@ -201,9 +201,9 @@ export declare function resolveModelScope(patterns: string[], modelRegistry: Pic
  * the result to models matching those patterns.
  *
  * Returns the unfiltered available list when `enabledModels` is empty.
- * Returns an empty list when `enabledModels` is configured but no available
- * model matches any pattern — callers MUST treat this as "no usable model"
- * rather than falling back to the global default (see issue #1022).
+ * Returns an empty list when `enabledModels` is configured but no model matches
+ * any pattern — callers MUST treat this as "no usable model" rather than
+ * falling back to the global default (see issue #1022).
  */
 export declare function resolveAllowedModels(modelRegistry: Pick<ModelRegistry, "getAvailable" | "getCanonicalVariants">, settings: Settings | undefined, preferences?: ModelMatchPreferences): Promise<Model<Api>[]>;
 /**

package/dist/types/mnemopi/embed-client.d.ts ADDED Viewed

@@ -0,0 +1,70 @@
+import type { Subprocess } from "bun";
+import type { MnemopiEmbedModelId, MnemopiEmbedWorkerInbound, MnemopiEmbedWorkerOutbound } from "./embed-protocol";
+/**
+ * Abstraction over the mnemopi embeddings subprocess. The runtime
+ * implementation is a Bun child process so `onnxruntime-node`'s NAPI
+ * constructor + finalizer never run inside the main agent address space —
+ * those destructors segfault Bun on Windows when mnemopi's local embedding
+ * provider loads fastembed in the main process (issue #3031; the mnemopi
+ * sibling of the tiny-model fix from #1606 / #1607).
+ */
+export interface MnemopiEmbedWorkerHandle {
+    send(message: MnemopiEmbedWorkerInbound): void;
+    onMessage(handler: (message: MnemopiEmbedWorkerOutbound) => void): () => void;
+    onError(handler: (error: Error) => void): () => void;
+    terminate(): Promise<void>;
+}
+/**
+ * Hidden subcommand on the main CLI that boots the mnemopi embeddings worker
+ * in the spawned subprocess. Kept in sync with the dispatch in `cli.ts`.
+ */
+export declare const MNEMOPI_EMBED_WORKER_ARG = "__omp_worker_mnemopi_embed";
+interface SpawnedSubprocess {
+    proc: Subprocess<"ignore", "ignore", "ignore">;
+    inbound: Set<(message: MnemopiEmbedWorkerOutbound) => void>;
+    errors: Set<(error: Error) => void>;
+    /**
+     * Flipped to `true` right before the deliberate SIGKILL so `onExit` can
+     * distinguish the expected hard-kill from a crash (SIGSEGV from a native
+     * fault, OOM SIGKILL, operator `kill -9`). Only the latter surfaces as a
+     * worker error so callers don't await forever.
+     */
+    intentionalExit: {
+        value: boolean;
+    };
+}
+/**
+ * Spawn the mnemopi embeddings worker as a subprocess. Exported for tests and
+ * the smoke probe; production callers go through {@link spawnMnemopiEmbedWorker}.
+ */
+export declare function createMnemopiEmbedSubprocess(): SpawnedSubprocess;
+/**
+ * Per-model wrapper produced by {@link MnemopiEmbedClient.initialize}.
+ * `embed()` round-trips one batch of texts through the worker subprocess and
+ * yields the resulting vectors in a single asynchronous batch — fastembed's
+ * own iterator was emitting batches that we collect on the child side anyway,
+ * and serializing per-batch over IPC would not improve throughput.
+ */
+export interface MnemopiSubprocessEmbeddingModel {
+    embed(texts: string[], batchSize?: number): AsyncIterable<number[][]>;
+}
+export declare class MnemopiEmbedClient {
+    #private;
+    constructor(spawnWorker?: () => MnemopiEmbedWorkerHandle);
+    /**
+     * Load the named fastembed model inside the subprocess. Resolves to a
+     * thin wrapper whose `embed()` round-trips through the same worker, or
+     * `null` when the worker cannot init the model (missing peer, native
+     * load failure, etc.). Multiple calls with the same model reuse the
+     * single in-flight worker; calling with a different model loads it on
+     * the child without restarting the process.
+     */
+    initialize(model: MnemopiEmbedModelId, cacheDir: string | undefined): Promise<MnemopiSubprocessEmbeddingModel | null>;
+    terminate(): Promise<void>;
+}
+export declare const mnemopiEmbedClient: MnemopiEmbedClient;
+export declare function shutdownMnemopiEmbedClient(): Promise<void>;
+export declare function smokeTestMnemopiEmbedWorker({ timeoutMs, }?: {
+    timeoutMs?: number;
+}): Promise<void>;
+export {};

package/dist/types/mnemopi/embed-protocol.d.ts ADDED Viewed

@@ -0,0 +1,52 @@
+/**
+ * Wire types between the parent (`MnemopiEmbedClient`) and the local
+ * embeddings subprocess. The parent owns the subprocess lifecycle (graceful
+ * work, hard `SIGKILL` on shutdown); the protocol carries no explicit close
+ * handshake — once the parent decides to terminate, it signals the OS to reap
+ * the child so `onnxruntime-node`'s NAPI finalizer never runs in the main
+ * agent address space (it crashes Bun on Windows shutdown — issue #3031, the
+ * mnemopi sibling of the tiny-model fix from #1606/#1607). See
+ * `embed-client.ts` for the spawn/kill glue.
+ */
+/** Identifier of the fastembed model the worker should load (e.g. `fast-bge-base-en-v1.5`). */
+export type MnemopiEmbedModelId = string;
+export type MnemopiEmbedWorkerInbound = {
+    type: "ping";
+    id: string;
+} | {
+    type: "init";
+    id: string;
+    model: MnemopiEmbedModelId;
+    cacheDir?: string;
+} | {
+    type: "embed";
+    id: string;
+    model: MnemopiEmbedModelId;
+    cacheDir?: string;
+    texts: string[];
+    batchSize?: number;
+};
+export type MnemopiEmbedWorkerOutbound = {
+    type: "pong";
+    id: string;
+} | {
+    type: "ready";
+    id: string;
+} | {
+    type: "vectors";
+    id: string;
+    vectors: number[][];
+} | {
+    type: "error";
+    id: string;
+    error: string;
+} | {
+    type: "log";
+    level: "debug" | "warn" | "error";
+    msg: string;
+    meta?: Record<string, unknown>;
+};
+export interface MnemopiEmbedTransport {
+    send(message: MnemopiEmbedWorkerOutbound): void;
+    onMessage(handler: (message: MnemopiEmbedWorkerInbound) => void): () => void;
+}

package/dist/types/mnemopi/embed-worker.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+/**
+ * Mnemopi local-embeddings worker. Loaded inside the dedicated subprocess
+ * spawned by `embed-client.ts` (re-entered through the agent CLI's hidden
+ * `__omp_worker_mnemopi_embed` selector). The whole point of this module is
+ * that `loadFastembed()` — and therefore `onnxruntime-node`'s NAPI
+ * constructor + finalizer — only ever runs in this child address space. The
+ * parent `SIGKILL`s us on shutdown so the destructor that crashes Bun on
+ * Windows shutdown (issue #3031, mnemopi sibling of #1606/#1607) never runs
+ * in either process.
+ */
+import type { MnemopiEmbedTransport } from "./embed-protocol";
+export declare function startMnemopiEmbedWorker(transport: MnemopiEmbedTransport): void;

package/dist/types/mnemopi/state.d.ts CHANGED Viewed

@@ -4,7 +4,15 @@ import type { Mnemopi, RecallResult } from "@oh-my-pi/pi-mnemopi";
 import type * as MnemopiCoreNs from "@oh-my-pi/pi-mnemopi/core";
 import type { AgentSession } from "../session/agent-session";
 import type { MnemopiBackendConfig } from "./config";
-/** Lazily load `@oh-my-pi/pi-mnemopi` (memoized). */
+/**
+ * Lazily load `@oh-my-pi/pi-mnemopi` (memoized) and route fastembed loads
+ * through the dedicated embeddings subprocess. The override is installed once
+ * — before any consumer gets the chance to call `embed()` — so
+ * `onnxruntime-node`'s NAPI constructor + finalizer never run inside the
+ * agent's address space (issue #3031). Test seams that swap the initializer
+ * with `setLocalModelInitializerForTests` still win because both go through
+ * the same module-level slot.
+ */
 export declare function loadMnemopi(): Promise<typeof MnemopiNs>;
 /** Lazily load `@oh-my-pi/pi-mnemopi/core` (memoized). */
 export declare function loadMnemopiCore(): Promise<typeof MnemopiCoreNs>;

package/dist/types/session/agent-storage.d.ts CHANGED Viewed

@@ -16,6 +16,8 @@ export declare class AgentStorage {
      * @returns AgentStorage instance for the given path
      */
     static open(dbPath?: string): Promise<AgentStorage>;
+    /** @internal Reset all singletons and close their databases — test-only. */
+    static resetInstance(): void;
     /**
      * Reads legacy settings persisted in the agent.db `settings` table.
      * The canonical settings store is `config.yml`; this accessor only

package/dist/types/session/auth-broker-config.d.ts CHANGED Viewed

@@ -20,7 +20,8 @@
  * `runRootCommand`, and we want hand-edited config entries to be honoured at
  * boot without forcing a startup reorder.
  */
-import { type AuthBrokerClientConfig, type DiscoverAuthStorageOptions, discoverAuthStorage as discoverAuthStorageShared, getAuthBrokerTokenFilePath } from "@oh-my-pi/pi-ai/auth-broker/discover";
+import { type AuthBrokerClientConfig, type DiscoverAuthStorageOptions, getAuthBrokerTokenFilePath } from "@oh-my-pi/pi-ai/auth-broker/discover";
+import type { AuthStorage } from "./auth-storage";
 export { type AuthBrokerClientConfig, getAuthBrokerTokenFilePath };
 /**
  * Read broker configuration. Returns null when the URL is missing
@@ -41,4 +42,4 @@ export declare function resolveAuthBrokerConfig(): Promise<AuthBrokerClientConfi
  *
  * Default `agentDir` is the current configured agent directory.
  */
-export declare function discoverAuthStorage(agentDir?: string, options?: Omit<DiscoverAuthStorageOptions, "agentDir" | "configValueResolver">): ReturnType<typeof discoverAuthStorageShared>;
+export declare function discoverAuthStorage(agentDir?: string, options?: Omit<DiscoverAuthStorageOptions, "agentDir" | "configValueResolver">): Promise<AuthStorage>;

package/dist/types/session/history-storage.d.ts CHANGED Viewed

@@ -10,7 +10,7 @@ export declare class HistoryStorage {
     #private;
     private constructor();
     static open(dbPath?: string): HistoryStorage;
-    /** @internal Reset the singleton — test-only. */
+    /** @internal Reset the singleton and close its database — test-only. */
     static resetInstance(): void;
     /**
      * Register a resolver that supplies the current session ID for prompts added

package/dist/types/tools/image-gen.d.ts CHANGED Viewed

@@ -61,6 +61,6 @@ export declare function isImageProviderPreference(value: unknown): value is Imag
 /** Set the preferred image provider from settings */
 export declare function setPreferredImageProvider(provider: ImageProviderPreference): void;
 export declare const imageGenTool: CustomTool<typeof imageGenSchema, ImageGenToolDetails>;
-export declare function getImageGenTools(modelRegistry?: ModelRegistry, activeModel?: Model): Promise<Array<CustomTool<typeof imageGenSchema, ImageGenToolDetails>>>;
-export declare function getImageGenToolsWithRegistry(modelRegistry: ModelRegistry, activeModel?: Model): Promise<Array<CustomTool<typeof imageGenSchema, ImageGenToolDetails>>>;
+export declare function getImageGenTools(_modelRegistry?: ModelRegistry, _activeModel?: Model): Promise<Array<CustomTool<typeof imageGenSchema, ImageGenToolDetails>>>;
+export declare function getImageGenToolsWithRegistry(_modelRegistry: ModelRegistry, _activeModel?: Model): Promise<Array<CustomTool<typeof imageGenSchema, ImageGenToolDetails>>>;
 export {};

package/dist/types/utils/image-loading.d.ts CHANGED Viewed

@@ -8,7 +8,7 @@ export declare const SUPPORTED_INPUT_IMAGE_MIME_TYPES: Set<string>;
  * with an opaque HTTP 400. Detect those models so the resize pipeline encodes
  * to PNG/JPEG instead — the automatic equivalent of `OMP_NO_WEBP=1`.
  */
-export declare function modelLacksWebpSupport(model: Pick<Model, "provider" | "api"> | undefined): boolean;
+export declare function modelLacksWebpSupport(model: Pick<Model, "provider" | "api" | "imageInputDecoder"> | undefined): boolean;
 /**
  * `true` when `model` cannot decode WebP, otherwise `undefined` so the
  * `OMP_NO_WEBP` env fallback in {@link resizeImage} still applies. Feed straight

package/dist/types/utils/ipc.d.ts ADDED Viewed

@@ -0,0 +1,22 @@
+/**
+ * Narrow a value to a thenable so a rejection handler can be attached.
+ *
+ * Mirrors the local helper in `mcp/transports/stdio.ts` (kept separate because
+ * that copy serves the FileSink stdin-write path and is battle-tested there).
+ * This shared copy is the home for the IPC `send()` sites.
+ */
+export declare function isThenable(value: unknown): value is PromiseLike<unknown>;
+/**
+ * Send a message to a Bun subprocess over IPC, neutralizing both the
+ * synchronous throw ("cannot be used after the process has exited") and any
+ * asynchronous rejection (EPIPE from a pipe that broke between exit being
+ * observed and the next `send()`). The dead worker is detected separately via
+ * `onExit`/`onError` and respawned or disabled by the owning client; an
+ * un-awaited EPIPE rejection must not escape as a fatal unhandled rejection
+ * that takes down the whole session. See issue #2997.
+ *
+ * `label` prefixes the debug log on synchronous failure (e.g. "tts").
+ */
+export declare function safeSend(proc: {
+    send(message: unknown): unknown;
+}, message: unknown, label: string): void;

package/dist/types/web/search/providers/perplexity-auth.d.ts ADDED Viewed

@@ -0,0 +1,37 @@
+import type { AuthStorage, OAuthAccess } from "@oh-my-pi/pi-ai";
+export declare const PERPLEXITY_CHAT_BASE_URL = "https://api.perplexity.ai";
+export declare const PERPLEXITY_RESPONSES_BASE_URL = "https://api.perplexity.ai/v1";
+export declare const OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1";
+export declare const OAUTH_EXPIRY_BUFFER_MS: number;
+export interface ApiConfig {
+    type: "api_key";
+    apiKey: string;
+    provider: "perplexity" | "openrouter";
+    chatBaseUrl: string;
+    responsesBaseUrl: string;
+    modelPrefix: string;
+    useResponses: boolean;
+}
+export type PerplexityAuth = ApiConfig | {
+    type: "oauth";
+    access: OAuthAccess;
+} | {
+    type: "cookies";
+    cookies: string;
+} | {
+    type: "anonymous";
+};
+export interface PerplexityAuthOptions {
+    signal?: AbortSignal;
+    forceRefresh?: boolean;
+}
+/** Detect API-key endpoints to try in priority order (Perplexity direct, then OpenRouter). */
+export declare function getApiConfigs(authStorage: AuthStorage, sessionId: string | undefined, options?: PerplexityAuthOptions): Promise<ApiConfig[]>;
+/**
+ * Decode a Perplexity JWT's `exp` claim, in ms. Returns `undefined` when the
+ * token has no `exp` (which is the common case — Perplexity sessions are
+ * server-side and effectively non-expiring from the client's POV).
+ */
+export declare function jwtExpiryMs(token: string): number | undefined;
+/** Collect all available auth methods to try in priority order */
+export declare function getAvailableAuthMethods(authStorage: AuthStorage, sessionId: string | undefined, options?: PerplexityAuthOptions): Promise<PerplexityAuth[]>;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-coding-agent",
-	"version": "16.1.2",
+	"version": "16.1.3",
 	"description": "Coding agent CLI with read, bash, edit, write tools and session management",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -48,17 +48,17 @@
 		"@agentclientprotocol/sdk": "0.25.0",
 		"@babel/parser": "^7.29.7",
 		"@mozilla/readability": "^0.6.0",
-		"@oh-my-pi/hashline": "16.1.2",
-		"@oh-my-pi/omp-stats": "16.1.2",
-		"@oh-my-pi/pi-agent-core": "16.1.2",
-		"@oh-my-pi/pi-ai": "16.1.2",
-		"@oh-my-pi/pi-catalog": "16.1.2",
-		"@oh-my-pi/pi-mnemopi": "16.1.2",
-		"@oh-my-pi/pi-natives": "16.1.2",
-		"@oh-my-pi/pi-tui": "16.1.2",
-		"@oh-my-pi/pi-utils": "16.1.2",
-		"@oh-my-pi/pi-wire": "16.1.2",
-		"@oh-my-pi/snapcompact": "16.1.2",
+		"@oh-my-pi/hashline": "16.1.3",
+		"@oh-my-pi/omp-stats": "16.1.3",
+		"@oh-my-pi/pi-agent-core": "16.1.3",
+		"@oh-my-pi/pi-ai": "16.1.3",
+		"@oh-my-pi/pi-catalog": "16.1.3",
+		"@oh-my-pi/pi-mnemopi": "16.1.3",
+		"@oh-my-pi/pi-natives": "16.1.3",
+		"@oh-my-pi/pi-tui": "16.1.3",
+		"@oh-my-pi/pi-utils": "16.1.3",
+		"@oh-my-pi/pi-wire": "16.1.3",
+		"@oh-my-pi/snapcompact": "16.1.3",
 		"@opentelemetry/api": "^1.9.1",
 		"@opentelemetry/context-async-hooks": "^2.7.1",
 		"@opentelemetry/exporter-trace-otlp-proto": "^0.218.0",

package/src/cli.ts CHANGED Viewed

@@ -68,6 +68,7 @@ async function runSmokeTest(): Promise<void> {
 	const { smokeTestTinyTitleWorker } = await import("./tiny/title-client");
 	const { smokeTestSttWorker } = await import("./stt/asr-client");
 	const { smokeTestTtsWorker } = await import("./tts/tts-client");
+	const { smokeTestMnemopiEmbedWorker } = await import("./mnemopi/embed-client");
 	const { smokeTestJsEvalWorker } = await import("./eval/js/context-manager");
 	await smokeTestSyncWorker();
@@ -87,6 +88,7 @@ async function runSmokeTest(): Promise<void> {
 	await smokeTestSttWorker();
 	await smokeTestJsEvalWorker();
 	await smokeTestTtsWorker();
+	await smokeTestMnemopiEmbedWorker();
 	process.stdout.write("smoke-test: ok\n");
 }
@@ -96,6 +98,7 @@ const TAB_WORKER_ARG = "__omp_worker_tab";
 const JS_EVAL_WORKER_ARG = "__omp_worker_js_eval";
 const STT_WORKER_ARG = "__omp_worker_stt";
 const TTS_WORKER_ARG = "__omp_worker_tts";
+const MNEMOPI_EMBED_WORKER_ARG = "__omp_worker_mnemopi_embed";
 async function runWorkerEntrypoint(arg: string | undefined): Promise<boolean> {
 	if (arg === TINY_WORKER_ARG) {
@@ -151,6 +154,11 @@ async function runWorkerEntrypoint(arg: string | undefined): Promise<boolean> {
 		await runIpcSubprocessWorker(startTtsWorker);
 		return true;
 	}
+	if (arg === MNEMOPI_EMBED_WORKER_ARG) {
+		const { startMnemopiEmbedWorker } = await import("./mnemopi/embed-worker");
+		await runIpcSubprocessWorker(startMnemopiEmbedWorker);
+		return true;
+	}
 	return false;
 }

package/src/commands/token.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import { Args, Command, Flags } from "@oh-my-pi/pi-utils/cli";
 import chalk from "chalk";
 import { isAuthenticated, ModelRegistry } from "../config/model-registry";
 import { discoverAuthStorage } from "../sdk";
+import { getAvailableAuthMethods } from "../web/search/providers/perplexity-auth";
 export default class Token extends Command {
 	static description = "Get the API key or OAuth token for a provider";
@@ -41,49 +42,67 @@ export default class Token extends Command {
 		const provider = providerName.toLowerCase();
 		const authStorage = await discoverAuthStorage();
-		const modelRegistry = new ModelRegistry(authStorage);
+		try {
+			const modelRegistry = new ModelRegistry(authStorage);
-		// Resolve the API key / token
-		const apiKey = await modelRegistry.getApiKeyForProvider(provider, undefined, {
-			forceRefresh: flags["force-refresh"],
-		});
+			// Resolve the API key / token
+			let apiKey: string | undefined;
-		if (!isAuthenticated(apiKey)) {
-			// Find all active/configured providers
-			const activeProviders = new Set<string>();
-			for (const p of PROVIDER_REGISTRY) {
-				if (authStorage.hasAuth(p.id)) {
-					activeProviders.add(p.id);
+			if (provider === "perplexity") {
+				const methods = await getAvailableAuthMethods(authStorage, undefined, {
+					forceRefresh: flags["force-refresh"],
+				});
+				const printable = methods.find(m => m.type === "oauth" || m.type === "api_key");
+				if (printable) {
+					apiKey = printable.type === "oauth" ? printable.access.accessToken : printable.apiKey;
 				}
 			}
-			const all = authStorage.getAll();
-			for (const p in all) {
-				if (authStorage.hasAuth(p)) {
-					activeProviders.add(p);
-				}
+			if (!apiKey) {
+				apiKey = await modelRegistry.getApiKeyForProvider(provider, undefined, {
+					forceRefresh: flags["force-refresh"],
+				});
 			}
-			const msg = `No active credential found for provider "${providerName}".`;
-			process.stderr.write(`${chalk.red(msg)}\n`);
-			if (activeProviders.size > 0) {
-				process.stderr.write(`Configured providers: ${Array.from(activeProviders).sort().join(", ")}\n`);
+			if (!isAuthenticated(apiKey)) {
+				// Find all active/configured providers
+				const activeProviders = new Set<string>();
+				for (const p of PROVIDER_REGISTRY) {
+					if (authStorage.hasAuth(p.id)) {
+						activeProviders.add(p.id);
+					}
+				}
+				const all = authStorage.getAll();
+				for (const p in all) {
+					if (authStorage.hasAuth(p)) {
+						activeProviders.add(p);
+					}
+				}
+				const msg = `No active credential found for provider "${providerName}".`;
+				process.stderr.write(`${chalk.red(msg)}\n`);
+				if (activeProviders.size > 0) {
+					process.stderr.write(`Configured providers: ${Array.from(activeProviders).sort().join(", ")}\n`);
+				}
+				process.exitCode = 1;
+				return;
 			}
-			process.exitCode = 1;
-			return;
-		}
-		if (!flags.raw) {
-			try {
-				const parsed = JSON.parse(apiKey);
-				if (parsed && typeof parsed === "object" && typeof parsed.token === "string") {
-					process.stdout.write(`${parsed.token}\n`);
-					return;
+			if (!flags.raw) {
+				try {
+					const parsed = JSON.parse(apiKey);
+					if (parsed && typeof parsed === "object" && typeof parsed.token === "string") {
+						process.stdout.write(`${parsed.token}\n`);
+						return;
+					}
+				} catch {
+					// Not a JSON string, print as-is
 				}
-			} catch {
-				// Not a JSON string, print as-is
 			}
-		}
-		process.stdout.write(`${apiKey}\n`);
+			process.stdout.write(`${apiKey}\n`);
+		} finally {
+			authStorage.close();
+		}
 	}
 }

package/src/config/append-only-context-mode.ts CHANGED Viewed

@@ -8,10 +8,55 @@ export interface AppendOnlyContextModel {
 	compatConfig?: object;
 }
+/**
+ * Local model servers (Ollama, LM Studio, llama.cpp, vLLM, sglang, …) all
+ * rely on llama.cpp-style prefix KV-cache reuse: identical leading tokens
+ * skip re-prefill on the next request. Append-only mode is the only way to
+ * guarantee byte-stable bytes across turns, since the live system prompt,
+ * tool catalogue, and message log all flow through fresh allocations every
+ * step (see `agent-loop.ts` `streamAssistantResponse` fallback path).
+ */
+const LOCAL_INFERENCE_PROVIDERS = new Set(["ollama", "ollama-cloud", "lm-studio", "llama.cpp"]);
+/** True when `baseUrl` resolves to a loopback or RFC1918 host — covers
+ * llama.cpp/vLLM/sglang servers registered under a user-defined provider id
+ * via `models.yaml`. Built-in local provider ids (`ollama`, `lm-studio`,
+ * `llama.cpp`) are already handled by `LOCAL_INFERENCE_PROVIDERS`.
+ * Substring match on the parsed hostname only; ports, paths, and unparseable
+ * URLs return false.
+ */
+function hasLocalLoopbackBaseUrl(baseUrl: string | undefined): boolean {
+	if (!baseUrl) return false;
+	let hostname: string;
+	try {
+		hostname = new URL(baseUrl).hostname.toLowerCase();
+	} catch {
+		return false;
+	}
+	if (
+		hostname === "localhost" ||
+		hostname === "127.0.0.1" ||
+		hostname === "0.0.0.0" ||
+		hostname === "::1" ||
+		hostname === "[::1]"
+	) {
+		return true;
+	}
+	// RFC1918 private IPv4 ranges.
+	if (/^10\./.test(hostname)) return true;
+	if (/^192\.168\./.test(hostname)) return true;
+	if (/^172\.(1[6-9]|2[0-9]|3[01])\./.test(hostname)) return true;
+	// Common ".local" mDNS hostnames used for home-LAN llama.cpp boxes.
+	if (hostname.endsWith(".local")) return true;
+	return false;
+}
 function shouldAutoEnableAppendOnlyContext(model: AppendOnlyContextModel | null | undefined): boolean {
 	if (!model) return false;
 	if (model.provider === "deepseek") return true;
+	if (LOCAL_INFERENCE_PROVIDERS.has(model.provider)) return true;
 	if (hostMatchesUrl(model.baseUrl, "xiaomi")) return true;
+	if (hasLocalLoopbackBaseUrl(model.baseUrl)) return true;
 	return !!model.compatConfig && "supportsStore" in model.compatConfig && model.compatConfig.supportsStore === true;
 }

package/src/config/model-discovery.ts CHANGED Viewed

@@ -275,6 +275,7 @@ export async function discoverOllamaModels(
 			baseUrl: `${endpoint}/v1`,
 			reasoning: metadata?.reasoning ?? false,
 			input: metadata?.input ?? ["text"],
+			imageInputDecoder: "stb",
 			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
 			contextWindow: metadata?.contextWindow ?? 128000,
 			maxTokens: Math.min(metadata?.contextWindow ?? Number.POSITIVE_INFINITY, DISCOVERY_DEFAULT_MAX_TOKENS),
@@ -352,6 +353,7 @@ export async function discoverLlamaCppModels(
 				baseUrl,
 				reasoning: false,
 				input: serverMetadata?.input ?? ["text"],
+				imageInputDecoder: "stb",
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
 				contextWindow: serverMetadata?.contextWindow ?? 128000,
 				maxTokens: Math.min(
@@ -424,6 +426,7 @@ export async function discoverOpenAIModelsList(
 				baseUrl,
 				reasoning: false,
 				input: nativeMetadataForModel?.input ?? ["text"],
+				...(providerConfig.discovery.type === "lm-studio" ? { imageInputDecoder: "stb" as const } : {}),
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
 				contextWindow,
 				maxTokens: Math.min(contextWindow, discoveryDefaultMaxTokens(providerConfig.api)),

package/src/config/model-registry.ts CHANGED Viewed

@@ -900,6 +900,7 @@ export class ModelRegistry {
 				...replacementModel,
 				contextWindow: replacementModel.contextWindow ?? existing.contextWindow,
 				maxTokens: replacementModel.maxTokens ?? existing.maxTokens,
+				omitMaxOutputTokens: replacementModel.omitMaxOutputTokens ?? existing.omitMaxOutputTokens,
 				...(supportsTools !== undefined ? { supportsTools } : {}),
 			};
 		});
@@ -1023,12 +1024,21 @@ export class ModelRegistry {
 	}
 	#normalizeDiscoverableModels(providerConfig: DiscoveryProviderConfig, models: Model<Api>[]): Model<Api>[] {
+		const withDecoderMetadata =
+			providerConfig.discovery.type === "ollama" ||
+			providerConfig.discovery.type === "llama.cpp" ||
+			providerConfig.discovery.type === "lm-studio"
+				? models.map(model =>
+						buildModel({ ...model, imageInputDecoder: "stb", compat: model.compatConfig } as ModelSpec<Api>),
+					)
+				: models;
 		if (providerConfig.provider !== "ollama" || providerConfig.api !== "openai-responses") {
-			return models;
+			return withDecoderMetadata;
 		}
 		const contextLengthOverride = getOllamaContextLengthOverride();
-		return models.map(model => {
+		return withDecoderMetadata.map(model => {
 			const normalized =
 				model.api === "openai-completions"
 					? buildModel({
@@ -1269,7 +1279,12 @@ export class ModelRegistry {
 					models: cached?.models.map(model => model.id) ?? [],
 				});
 				this.#lastDiscoveryWarnings.delete(providerConfig.provider);
-				return cached ? cached.models.map(model => buildModel(model)) : [];
+				return cached
+					? this.#normalizeDiscoverableModels(
+							providerConfig,
+							cached.models.map(model => buildModel(model)),
+						)
+					: [];
 			}
 		}
@@ -1569,6 +1584,9 @@ export class ModelRegistry {
 	}
 	#applyHardcodedModelPolicies(models: Model<Api>[]): Model<Api>[] {
 		return models.map(model => {
+			if (model.provider === "ollama-cloud" && model.omitMaxOutputTokens !== true) {
+				model = applyModelOverride(model, { omitMaxOutputTokens: true });
+			}
 			if (model.id !== "gpt-5.4" || model.provider === "github-copilot") {
 				return model;
 			}