npm - pi-omlx-picker - Versions diffs - 0.2.9 → 0.3.0 - Mend

pi-omlx-picker 0.2.9 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/index.ts CHANGED Viewed

@@ -20,6 +20,7 @@ import {
 } from "./src/catalog.ts";
 import {
 	DEFAULT_OMLX_BASE_URL,
+	hasOmlxTarget,
 	loadConfig,
 	type OmlxConfig,
 	resolveConfiguredApiKey,
@@ -115,9 +116,18 @@ function registerModels(
 	models: OmlxModel[],
 	modelSettingsPath?: string,
 ): void {
+	const keyless = !resolveConfiguredApiKey();
 	pi.registerProvider(PROVIDER, {
 		name: "OMLX",
-		...toProviderConfig(config.apiRoot, config.apiKeyEnvVar, models),
+		...toProviderConfig(
+			config.apiRoot,
+			config.apiKeyEnvVar,
+			models,
+			undefined,
+			{
+				keyless,
+			},
+		),
 	});
 	state.config = config;
 	state.catalog = models;
@@ -152,8 +162,9 @@ function registerCachedOrSetupModels(pi: ExtensionAPI, state: State): void {
 		apiRoot: DEFAULT_OMLX_BASE_URL,
 		apiKeyEnvVar: "OMLX_API_KEY",
 	};
+	const configured = resolveConfiguredApiKey() || hasOmlxTarget();
 	const cached = registrableCachedModels(readCatalogCache(config.apiRoot));
-	const fallbackCached = resolveConfiguredApiKey()
+	const fallbackCached = configured
 		? undefined
 		: registrableCachedModels(readLastCatalogCache());
 	const models = cached ?? fallbackCached;
@@ -161,7 +172,7 @@ function registerCachedOrSetupModels(pi: ExtensionAPI, state: State): void {
 		state.config = config;
 		state.catalog = [];
 		state.registered = false;
-		state.lastError = resolveConfiguredApiKey()
+		state.lastError = configured
 			? "No cached OMLX models with real max_context_window/max_tokens; waiting for live catalog refresh."
 			: "OMLX credentials are not set. Run /login and choose OMLX.";
 		state.lastRefreshAt = new Date().toISOString();
@@ -169,7 +180,9 @@ function registerCachedOrSetupModels(pi: ExtensionAPI, state: State): void {
 		return;
 	}
-	if (resolveConfiguredApiKey()) {
+	// A key OR a configured base URL (keyless server) is enough to register the
+	// real provider. Pi omits the auth header when the resolved key is empty.
+	if (configured) {
 		registerModels(pi, state, config, models);
 		return;
 	}
@@ -226,7 +239,7 @@ async function refreshProvider(
 ): Promise<RefreshResult> {
 	const config = loadConfig();
 	const apiKey = resolveConfiguredApiKey();
-	if (!apiKey) {
+	if (!apiKey && !hasOmlxTarget()) {
 		state.lastError = "OMLX credentials are not set";
 		return "not_configured";
 	}
@@ -237,7 +250,7 @@ async function refreshProvider(
 	let models: OmlxModel[];
 	try {
-		models = await fetchModels(config.apiRoot, apiKey, {
+		models = await fetchModels(config.apiRoot, apiKey ?? "", {
 			modelSettingsPath,
 			timeoutMs: opts.timeoutMs,
 		});

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "pi-omlx-picker",
-	"version": "0.2.9",
+	"version": "0.3.0",
 	"type": "module",
 	"description": "Pi extension that discovers models from a local OMLX server and registers them as a native Pi provider.",
 	"license": "MIT",

package/src/catalog.ts CHANGED Viewed

@@ -10,6 +10,8 @@ export interface OmlxModel {
 	modelAlias?: string;
 	contextWindow?: number;
 	maxTokens?: number;
+	/** Model architectural ceiling (`max_model_len`). Prio-3 fallback and clamp limit. */
+	archContextWindow?: number;
 	thinkingDefault?: boolean | null;
 	taskBudgetTokens?: number;
 	maxToolResultTokens?: number;
@@ -36,7 +38,7 @@ export interface CatalogDebugEvent {
 interface OpenAIModelsResponse {
 	object: string;
-	data: Array<{ id: string; object?: string }>;
+	data: Array<{ id: string; object?: string; max_model_len?: number | null }>;
 }
 interface OmlxModelsStatusResponse {
@@ -92,7 +94,10 @@ export function parseModelsResponse(json: unknown): OmlxModel[] {
 		if (!entry || typeof entry.id !== "string" || !entry.id) continue;
 		if (seen.has(entry.id)) continue;
 		seen.add(entry.id);
-		out.push({ id: entry.id });
+		const m: OmlxModel = { id: entry.id };
+		if (typeof entry.max_model_len === "number" && entry.max_model_len > 0)
+			m.archContextWindow = entry.max_model_len;
+		out.push(m);
 	}
 	return out;
 }
@@ -208,13 +213,15 @@ export async function fetchModels(
 			opts.modelSettingsPath,
 			opts.onDebug,
 		);
-		return applyApiGlobalDefaultsIfNeeded(
-			models,
-			apiRoot,
-			apiKey,
-			opts.signal,
-			timeoutMs,
-			opts.onDebug,
+		return resolveArchContextLimits(
+			await applyApiGlobalDefaultsIfNeeded(
+				models,
+				apiRoot,
+				apiKey,
+				opts.signal,
+				timeoutMs,
+				opts.onDebug,
+			),
 		);
 	} catch (err) {
 		if (err instanceof Error && err.name === "AbortError") throw err;
@@ -245,16 +252,35 @@ export async function fetchModels(
 		opts.modelSettingsPath,
 		opts.onDebug,
 	);
-	return applyApiGlobalDefaultsIfNeeded(
-		models,
-		apiRoot,
-		apiKey,
-		opts.signal,
-		timeoutMs,
-		opts.onDebug,
+	return resolveArchContextLimits(
+		await applyApiGlobalDefaultsIfNeeded(
+			models,
+			apiRoot,
+			apiKey,
+			opts.signal,
+			timeoutMs,
+			opts.onDebug,
+		),
 	);
 }
+/**
+ * Final context-window resolution, applied after model-specific (prio 1) and
+ * global (prio 2) settings. The model's architectural ceiling
+ * (`archContextWindow`, from `max_model_len`) is the prio-3 fallback when no
+ * user setting exists, and the hard clamp when a user setting exceeds it.
+ */
+export function resolveArchContextLimits(models: OmlxModel[]): OmlxModel[] {
+	return models.map((model) => {
+		const arch = model.archContextWindow;
+		if (arch == null) return model;
+		const next: OmlxModel = { ...model };
+		if (next.contextWindow == null) next.contextWindow = arch;
+		else if (next.contextWindow > arch) next.contextWindow = arch;
+		return next;
+	});
+}
 async function applyApiGlobalDefaultsIfNeeded(
 	models: OmlxModel[],
 	apiRoot: string,
@@ -263,7 +289,8 @@ async function applyApiGlobalDefaultsIfNeeded(
 	timeoutMs: number,
 	onDebug?: (event: CatalogDebugEvent) => void,
 ): Promise<OmlxModel[]> {
-	if (!models.some((m) => !m.contextWindow || !m.maxTokens)) return models;
+	if (!models.some((m) => m.contextWindow == null || m.maxTokens == null))
+		return models;
 	let defaults: OmlxGlobalDefaults | undefined;
 	try {
 		defaults = await fetchGlobalDefaults(apiRoot, apiKey, signal, timeoutMs);
@@ -272,6 +299,7 @@ async function applyApiGlobalDefaultsIfNeeded(
 			details: { apiRoot, defaults },
 		});
 	} catch (err) {
+		if (signal?.aborted) throw err;
 		onDebug?.({
 			kind: "catalog_global_settings_failed",
 			details: {
@@ -281,12 +309,13 @@ async function applyApiGlobalDefaultsIfNeeded(
 		});
 		return models;
 	}
-	if (!defaults.contextWindow && !defaults.maxTokens) return models;
+	if (defaults.contextWindow == null && defaults.maxTokens == null)
+		return models;
 	return models.map((model) => {
 		const next: OmlxModel = { ...model };
-		if (!next.contextWindow && defaults.contextWindow)
+		if (next.contextWindow == null && defaults.contextWindow != null)
 			next.contextWindow = defaults.contextWindow;
-		if (!next.maxTokens && defaults.maxTokens)
+		if (next.maxTokens == null && defaults.maxTokens != null)
 			next.maxTokens = defaults.maxTokens;
 		return next;
 	});
@@ -325,8 +354,10 @@ async function getJson(
 	timeoutMs: number,
 ): Promise<unknown> {
 	const signal = withTimeout(parent, timeoutMs);
+	// Empty key => keyless server (skip_api_key_verification): omit the header.
+	const headers = apiKey ? { Authorization: `Bearer ${apiKey}` } : undefined;
 	const res = await fetch(url, {
-		headers: { Authorization: `Bearer ${apiKey}` },
+		headers,
 		signal,
 	}).catch((err) => {
 		if (err instanceof Error && err.name === "AbortError") {

package/src/config.ts CHANGED Viewed

@@ -34,6 +34,18 @@ export function resolveConfiguredApiKey(
 	return loadOmlxCredential()?.apiKey;
 }
+/**
+ * True when the user has pointed us at a server even without an API key.
+ * OMLX servers run with `skip_api_key_verification: true` need no key; an
+ * explicit base URL (env or stored) is the signal that a keyless server is
+ * intended. With neither key nor base URL there is nothing to talk to.
+ */
+export function hasOmlxTarget(env: NodeJS.ProcessEnv = process.env): boolean {
+	if (env.OMLX_API_KEY || env.OMLX_BASE_URL) return true;
+	const stored = loadOmlxCredential();
+	return Boolean(stored?.apiKey || stored?.baseUrl);
+}
 // Legacy helper for older stored api_key credentials. Never fills only one side
 // of the env pair; partial shell overrides remain explicit shell state.
 export function applyStoredCredentialToEnv(

package/src/overflow.ts CHANGED Viewed

@@ -4,6 +4,7 @@ const OMLX_OVERFLOW_RE =
 	/prompt too long[:.]?\s*(\d[\d,]*)\s*tokens?\s*exceeds\s*max(?:imum)?\s*context window of\s*(\d[\d,]*)\s*tokens?/i;
 export function normalizeOverflowMessage(errorMessage: string): string {
+	if (errorMessage.startsWith("prompt is too long:")) return errorMessage;
 	const match = OMLX_OVERFLOW_RE.exec(errorMessage);
 	if (!match) return errorMessage;
 	const used = match[1];

package/src/provider.ts CHANGED Viewed

@@ -23,22 +23,27 @@ export function toProviderConfig(
 	apiKeyEnvVar: string,
 	models: OmlxModel[],
 	onStreamTimeout?: (event: StreamTimeoutEvent) => void,
+	options: { keyless?: boolean } = {},
 ): ProviderConfig {
-	return {
+	const config: ProviderConfig = {
 		baseUrl: apiRoot,
-		apiKey: `$${apiKeyEnvVar}`,
 		api: "openai-completions",
-		authHeader: true,
-		streamSimple: (model, context, options) =>
+		// Keyless server (skip_api_key_verification): no auth header. Pi rejects
+		// authHeader:true with no key, and resolveConfigValueOrThrow would throw
+		// on an unset $OMLX_API_KEY — so both apiKey and authHeader stay off.
+		authHeader: !options.keyless,
+		streamSimple: (model, context, streamOptions) =>
 			streamOmlxOpenAICompletions(
 				model,
 				context,
-				options,
+				streamOptions,
 				resolveFirstDeltaTimeoutMs(),
 				onStreamTimeout,
 			),
 		models: models.map(toProviderModel),
 	};
+	if (!options.keyless) config.apiKey = `$${apiKeyEnvVar}`;
+	return config;
 }
 function requirePositive(
@@ -59,11 +64,11 @@ function toProviderModel(m: OmlxModel): ProviderModelConfig {
 		name: m.displayName ?? m.id,
 		reasoning,
 		input: m.modelType === "vlm" ? ["text", "image"] : ["text"],
-		cost: FREE_COST,
+		cost: { ...FREE_COST },
 		contextWindow: requirePositive(m.contextWindow, m.id, "max_context_window"),
 		maxTokens: requirePositive(m.maxTokens, m.id, "max_tokens"),
 		compat: reasoning
 			? { ...BASE_COMPAT, thinkingFormat: thinkingFormatFor(m.reasoningParser) }
-			: BASE_COMPAT,
+			: { ...BASE_COMPAT },
 	};
 }

package/src/repeat-stop.ts CHANGED Viewed

@@ -29,11 +29,8 @@ function extractAssistantParts(message: AssistantMessage): AssistantParts {
 function lastAssistantMessage(
 	messages: Message[],
 ): AssistantMessage | undefined {
-	for (let i = messages.length - 1; i >= 0; i--) {
-		const m = messages[i];
-		if (m.role === "assistant") return m;
-	}
-	return undefined;
+	const last = messages.at(-1);
+	return last?.role === "assistant" ? last : undefined;
 }
 function bigramCounts(s: string): Map<string, number> {

package/src/stream-events.ts CHANGED Viewed

@@ -64,7 +64,7 @@ export function errorAssistantMessage(
 		api: model.api,
 		provider: model.provider,
 		model: model.id,
-		usage: ZERO_USAGE,
+		usage: { ...ZERO_USAGE, cost: { ...ZERO_USAGE.cost } },
 		stopReason,
 		errorMessage,
 		timestamp: Date.now(),

package/src/stream-writer.ts CHANGED Viewed

@@ -21,6 +21,13 @@ export class StreamWriter {
 	}
 	push(event: AssistantMessageEvent): void {
+		if (event.type === "start") {
+			if (!this.startPushed) {
+				this.stream.push(this.startEvent ?? event);
+				this.startPushed = true;
+			}
+			return;
+		}
 		if (!this.startPushed) {
 			this.stream.push(
 				this.startEvent ?? {

package/src/stream.ts CHANGED Viewed

@@ -7,7 +7,7 @@ import {
 	type Model,
 	type SimpleStreamOptions,
 } from "@earendil-works/pi-ai";
-import { streamSimple as streamSimpleOpenAICompletions } from "@earendil-works/pi-ai/api/openai-completions";
+import { streamSimple as streamSimpleOpenAICompletions } from "@earendil-works/pi-ai/compat";
 import { normalizeErrorEvent } from "./overflow.ts";
 import { isRepeatStop } from "./repeat-stop.ts";
 import {
@@ -104,10 +104,17 @@ async function runAttempt(
 			writer.push(normalizeErrorEvent(event));
 			if (event.type === "done" || event.type === "error") break;
 		}
+	} catch (err) {
+		if (timedOut) return "timed-out";
+		throw err;
 	} finally {
 		clearTimeout(timer);
 	}
+	if (!timedOut) {
+		for (const held of bufferedThinking) writer.push(held);
+	}
 	return timedOut ? "timed-out" : "completed";
 }

package/src/thinking-format.ts CHANGED Viewed

@@ -19,6 +19,6 @@ export function thinkingFormatFor(
 	if (!reasoningParser) return NO_THINKING_FORMAT;
 	return (
 		REASONING_PARSER_FORMATS[reasoningParser.toLowerCase()] ??
-		OMLX_CHAT_TEMPLATE_FORMAT
+		NO_THINKING_FORMAT
 	);
 }