pi-omlx-picker 0.2.9 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts CHANGED
@@ -20,6 +20,7 @@ import {
20
20
  } from "./src/catalog.ts";
21
21
  import {
22
22
  DEFAULT_OMLX_BASE_URL,
23
+ hasOmlxTarget,
23
24
  loadConfig,
24
25
  type OmlxConfig,
25
26
  resolveConfiguredApiKey,
@@ -115,9 +116,18 @@ function registerModels(
115
116
  models: OmlxModel[],
116
117
  modelSettingsPath?: string,
117
118
  ): void {
119
+ const keyless = !resolveConfiguredApiKey();
118
120
  pi.registerProvider(PROVIDER, {
119
121
  name: "OMLX",
120
- ...toProviderConfig(config.apiRoot, config.apiKeyEnvVar, models),
122
+ ...toProviderConfig(
123
+ config.apiRoot,
124
+ config.apiKeyEnvVar,
125
+ models,
126
+ undefined,
127
+ {
128
+ keyless,
129
+ },
130
+ ),
121
131
  });
122
132
  state.config = config;
123
133
  state.catalog = models;
@@ -152,8 +162,9 @@ function registerCachedOrSetupModels(pi: ExtensionAPI, state: State): void {
152
162
  apiRoot: DEFAULT_OMLX_BASE_URL,
153
163
  apiKeyEnvVar: "OMLX_API_KEY",
154
164
  };
165
+ const configured = resolveConfiguredApiKey() || hasOmlxTarget();
155
166
  const cached = registrableCachedModels(readCatalogCache(config.apiRoot));
156
- const fallbackCached = resolveConfiguredApiKey()
167
+ const fallbackCached = configured
157
168
  ? undefined
158
169
  : registrableCachedModels(readLastCatalogCache());
159
170
  const models = cached ?? fallbackCached;
@@ -161,7 +172,7 @@ function registerCachedOrSetupModels(pi: ExtensionAPI, state: State): void {
161
172
  state.config = config;
162
173
  state.catalog = [];
163
174
  state.registered = false;
164
- state.lastError = resolveConfiguredApiKey()
175
+ state.lastError = configured
165
176
  ? "No cached OMLX models with real max_context_window/max_tokens; waiting for live catalog refresh."
166
177
  : "OMLX credentials are not set. Run /login and choose OMLX.";
167
178
  state.lastRefreshAt = new Date().toISOString();
@@ -169,7 +180,9 @@ function registerCachedOrSetupModels(pi: ExtensionAPI, state: State): void {
169
180
  return;
170
181
  }
171
182
 
172
- if (resolveConfiguredApiKey()) {
183
+ // A key OR a configured base URL (keyless server) is enough to register the
184
+ // real provider. Pi omits the auth header when the resolved key is empty.
185
+ if (configured) {
173
186
  registerModels(pi, state, config, models);
174
187
  return;
175
188
  }
@@ -226,7 +239,7 @@ async function refreshProvider(
226
239
  ): Promise<RefreshResult> {
227
240
  const config = loadConfig();
228
241
  const apiKey = resolveConfiguredApiKey();
229
- if (!apiKey) {
242
+ if (!apiKey && !hasOmlxTarget()) {
230
243
  state.lastError = "OMLX credentials are not set";
231
244
  return "not_configured";
232
245
  }
@@ -237,7 +250,7 @@ async function refreshProvider(
237
250
 
238
251
  let models: OmlxModel[];
239
252
  try {
240
- models = await fetchModels(config.apiRoot, apiKey, {
253
+ models = await fetchModels(config.apiRoot, apiKey ?? "", {
241
254
  modelSettingsPath,
242
255
  timeoutMs: opts.timeoutMs,
243
256
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-omlx-picker",
3
- "version": "0.2.9",
3
+ "version": "0.3.0",
4
4
  "type": "module",
5
5
  "description": "Pi extension that discovers models from a local OMLX server and registers them as a native Pi provider.",
6
6
  "license": "MIT",
package/src/catalog.ts CHANGED
@@ -10,6 +10,8 @@ export interface OmlxModel {
10
10
  modelAlias?: string;
11
11
  contextWindow?: number;
12
12
  maxTokens?: number;
13
+ /** Model architectural ceiling (`max_model_len`). Prio-3 fallback and clamp limit. */
14
+ archContextWindow?: number;
13
15
  thinkingDefault?: boolean | null;
14
16
  taskBudgetTokens?: number;
15
17
  maxToolResultTokens?: number;
@@ -36,7 +38,7 @@ export interface CatalogDebugEvent {
36
38
 
37
39
  interface OpenAIModelsResponse {
38
40
  object: string;
39
- data: Array<{ id: string; object?: string }>;
41
+ data: Array<{ id: string; object?: string; max_model_len?: number | null }>;
40
42
  }
41
43
 
42
44
  interface OmlxModelsStatusResponse {
@@ -92,7 +94,10 @@ export function parseModelsResponse(json: unknown): OmlxModel[] {
92
94
  if (!entry || typeof entry.id !== "string" || !entry.id) continue;
93
95
  if (seen.has(entry.id)) continue;
94
96
  seen.add(entry.id);
95
- out.push({ id: entry.id });
97
+ const m: OmlxModel = { id: entry.id };
98
+ if (typeof entry.max_model_len === "number" && entry.max_model_len > 0)
99
+ m.archContextWindow = entry.max_model_len;
100
+ out.push(m);
96
101
  }
97
102
  return out;
98
103
  }
@@ -208,13 +213,15 @@ export async function fetchModels(
208
213
  opts.modelSettingsPath,
209
214
  opts.onDebug,
210
215
  );
211
- return applyApiGlobalDefaultsIfNeeded(
212
- models,
213
- apiRoot,
214
- apiKey,
215
- opts.signal,
216
- timeoutMs,
217
- opts.onDebug,
216
+ return resolveArchContextLimits(
217
+ await applyApiGlobalDefaultsIfNeeded(
218
+ models,
219
+ apiRoot,
220
+ apiKey,
221
+ opts.signal,
222
+ timeoutMs,
223
+ opts.onDebug,
224
+ ),
218
225
  );
219
226
  } catch (err) {
220
227
  if (err instanceof Error && err.name === "AbortError") throw err;
@@ -245,16 +252,35 @@ export async function fetchModels(
245
252
  opts.modelSettingsPath,
246
253
  opts.onDebug,
247
254
  );
248
- return applyApiGlobalDefaultsIfNeeded(
249
- models,
250
- apiRoot,
251
- apiKey,
252
- opts.signal,
253
- timeoutMs,
254
- opts.onDebug,
255
+ return resolveArchContextLimits(
256
+ await applyApiGlobalDefaultsIfNeeded(
257
+ models,
258
+ apiRoot,
259
+ apiKey,
260
+ opts.signal,
261
+ timeoutMs,
262
+ opts.onDebug,
263
+ ),
255
264
  );
256
265
  }
257
266
 
267
+ /**
268
+ * Final context-window resolution, applied after model-specific (prio 1) and
269
+ * global (prio 2) settings. The model's architectural ceiling
270
+ * (`archContextWindow`, from `max_model_len`) is the prio-3 fallback when no
271
+ * user setting exists, and the hard clamp when a user setting exceeds it.
272
+ */
273
+ export function resolveArchContextLimits(models: OmlxModel[]): OmlxModel[] {
274
+ return models.map((model) => {
275
+ const arch = model.archContextWindow;
276
+ if (arch == null) return model;
277
+ const next: OmlxModel = { ...model };
278
+ if (next.contextWindow == null) next.contextWindow = arch;
279
+ else if (next.contextWindow > arch) next.contextWindow = arch;
280
+ return next;
281
+ });
282
+ }
283
+
258
284
  async function applyApiGlobalDefaultsIfNeeded(
259
285
  models: OmlxModel[],
260
286
  apiRoot: string,
@@ -263,7 +289,8 @@ async function applyApiGlobalDefaultsIfNeeded(
263
289
  timeoutMs: number,
264
290
  onDebug?: (event: CatalogDebugEvent) => void,
265
291
  ): Promise<OmlxModel[]> {
266
- if (!models.some((m) => !m.contextWindow || !m.maxTokens)) return models;
292
+ if (!models.some((m) => m.contextWindow == null || m.maxTokens == null))
293
+ return models;
267
294
  let defaults: OmlxGlobalDefaults | undefined;
268
295
  try {
269
296
  defaults = await fetchGlobalDefaults(apiRoot, apiKey, signal, timeoutMs);
@@ -272,6 +299,7 @@ async function applyApiGlobalDefaultsIfNeeded(
272
299
  details: { apiRoot, defaults },
273
300
  });
274
301
  } catch (err) {
302
+ if (signal?.aborted) throw err;
275
303
  onDebug?.({
276
304
  kind: "catalog_global_settings_failed",
277
305
  details: {
@@ -281,12 +309,13 @@ async function applyApiGlobalDefaultsIfNeeded(
281
309
  });
282
310
  return models;
283
311
  }
284
- if (!defaults.contextWindow && !defaults.maxTokens) return models;
312
+ if (defaults.contextWindow == null && defaults.maxTokens == null)
313
+ return models;
285
314
  return models.map((model) => {
286
315
  const next: OmlxModel = { ...model };
287
- if (!next.contextWindow && defaults.contextWindow)
316
+ if (next.contextWindow == null && defaults.contextWindow != null)
288
317
  next.contextWindow = defaults.contextWindow;
289
- if (!next.maxTokens && defaults.maxTokens)
318
+ if (next.maxTokens == null && defaults.maxTokens != null)
290
319
  next.maxTokens = defaults.maxTokens;
291
320
  return next;
292
321
  });
@@ -325,8 +354,10 @@ async function getJson(
325
354
  timeoutMs: number,
326
355
  ): Promise<unknown> {
327
356
  const signal = withTimeout(parent, timeoutMs);
357
+ // Empty key => keyless server (skip_api_key_verification): omit the header.
358
+ const headers = apiKey ? { Authorization: `Bearer ${apiKey}` } : undefined;
328
359
  const res = await fetch(url, {
329
- headers: { Authorization: `Bearer ${apiKey}` },
360
+ headers,
330
361
  signal,
331
362
  }).catch((err) => {
332
363
  if (err instanceof Error && err.name === "AbortError") {
package/src/config.ts CHANGED
@@ -34,6 +34,18 @@ export function resolveConfiguredApiKey(
34
34
  return loadOmlxCredential()?.apiKey;
35
35
  }
36
36
 
37
+ /**
38
+ * True when the user has pointed us at a server even without an API key.
39
+ * OMLX servers run with `skip_api_key_verification: true` need no key; an
40
+ * explicit base URL (env or stored) is the signal that a keyless server is
41
+ * intended. With neither key nor base URL there is nothing to talk to.
42
+ */
43
+ export function hasOmlxTarget(env: NodeJS.ProcessEnv = process.env): boolean {
44
+ if (env.OMLX_API_KEY || env.OMLX_BASE_URL) return true;
45
+ const stored = loadOmlxCredential();
46
+ return Boolean(stored?.apiKey || stored?.baseUrl);
47
+ }
48
+
37
49
  // Legacy helper for older stored api_key credentials. Never fills only one side
38
50
  // of the env pair; partial shell overrides remain explicit shell state.
39
51
  export function applyStoredCredentialToEnv(
package/src/overflow.ts CHANGED
@@ -4,6 +4,7 @@ const OMLX_OVERFLOW_RE =
4
4
  /prompt too long[:.]?\s*(\d[\d,]*)\s*tokens?\s*exceeds\s*max(?:imum)?\s*context window of\s*(\d[\d,]*)\s*tokens?/i;
5
5
 
6
6
  export function normalizeOverflowMessage(errorMessage: string): string {
7
+ if (errorMessage.startsWith("prompt is too long:")) return errorMessage;
7
8
  const match = OMLX_OVERFLOW_RE.exec(errorMessage);
8
9
  if (!match) return errorMessage;
9
10
  const used = match[1];
package/src/provider.ts CHANGED
@@ -23,22 +23,27 @@ export function toProviderConfig(
23
23
  apiKeyEnvVar: string,
24
24
  models: OmlxModel[],
25
25
  onStreamTimeout?: (event: StreamTimeoutEvent) => void,
26
+ options: { keyless?: boolean } = {},
26
27
  ): ProviderConfig {
27
- return {
28
+ const config: ProviderConfig = {
28
29
  baseUrl: apiRoot,
29
- apiKey: `$${apiKeyEnvVar}`,
30
30
  api: "openai-completions",
31
- authHeader: true,
32
- streamSimple: (model, context, options) =>
31
+ // Keyless server (skip_api_key_verification): no auth header. Pi rejects
32
+ // authHeader:true with no key, and resolveConfigValueOrThrow would throw
33
+ // on an unset $OMLX_API_KEY — so both apiKey and authHeader stay off.
34
+ authHeader: !options.keyless,
35
+ streamSimple: (model, context, streamOptions) =>
33
36
  streamOmlxOpenAICompletions(
34
37
  model,
35
38
  context,
36
- options,
39
+ streamOptions,
37
40
  resolveFirstDeltaTimeoutMs(),
38
41
  onStreamTimeout,
39
42
  ),
40
43
  models: models.map(toProviderModel),
41
44
  };
45
+ if (!options.keyless) config.apiKey = `$${apiKeyEnvVar}`;
46
+ return config;
42
47
  }
43
48
 
44
49
  function requirePositive(
@@ -59,11 +64,11 @@ function toProviderModel(m: OmlxModel): ProviderModelConfig {
59
64
  name: m.displayName ?? m.id,
60
65
  reasoning,
61
66
  input: m.modelType === "vlm" ? ["text", "image"] : ["text"],
62
- cost: FREE_COST,
67
+ cost: { ...FREE_COST },
63
68
  contextWindow: requirePositive(m.contextWindow, m.id, "max_context_window"),
64
69
  maxTokens: requirePositive(m.maxTokens, m.id, "max_tokens"),
65
70
  compat: reasoning
66
71
  ? { ...BASE_COMPAT, thinkingFormat: thinkingFormatFor(m.reasoningParser) }
67
- : BASE_COMPAT,
72
+ : { ...BASE_COMPAT },
68
73
  };
69
74
  }
@@ -29,11 +29,8 @@ function extractAssistantParts(message: AssistantMessage): AssistantParts {
29
29
  function lastAssistantMessage(
30
30
  messages: Message[],
31
31
  ): AssistantMessage | undefined {
32
- for (let i = messages.length - 1; i >= 0; i--) {
33
- const m = messages[i];
34
- if (m.role === "assistant") return m;
35
- }
36
- return undefined;
32
+ const last = messages.at(-1);
33
+ return last?.role === "assistant" ? last : undefined;
37
34
  }
38
35
 
39
36
  function bigramCounts(s: string): Map<string, number> {
@@ -64,7 +64,7 @@ export function errorAssistantMessage(
64
64
  api: model.api,
65
65
  provider: model.provider,
66
66
  model: model.id,
67
- usage: ZERO_USAGE,
67
+ usage: { ...ZERO_USAGE, cost: { ...ZERO_USAGE.cost } },
68
68
  stopReason,
69
69
  errorMessage,
70
70
  timestamp: Date.now(),
@@ -21,6 +21,13 @@ export class StreamWriter {
21
21
  }
22
22
 
23
23
  push(event: AssistantMessageEvent): void {
24
+ if (event.type === "start") {
25
+ if (!this.startPushed) {
26
+ this.stream.push(this.startEvent ?? event);
27
+ this.startPushed = true;
28
+ }
29
+ return;
30
+ }
24
31
  if (!this.startPushed) {
25
32
  this.stream.push(
26
33
  this.startEvent ?? {
package/src/stream.ts CHANGED
@@ -7,7 +7,7 @@ import {
7
7
  type Model,
8
8
  type SimpleStreamOptions,
9
9
  } from "@earendil-works/pi-ai";
10
- import { streamSimple as streamSimpleOpenAICompletions } from "@earendil-works/pi-ai/api/openai-completions";
10
+ import { streamSimple as streamSimpleOpenAICompletions } from "@earendil-works/pi-ai/compat";
11
11
  import { normalizeErrorEvent } from "./overflow.ts";
12
12
  import { isRepeatStop } from "./repeat-stop.ts";
13
13
  import {
@@ -104,10 +104,17 @@ async function runAttempt(
104
104
  writer.push(normalizeErrorEvent(event));
105
105
  if (event.type === "done" || event.type === "error") break;
106
106
  }
107
+ } catch (err) {
108
+ if (timedOut) return "timed-out";
109
+ throw err;
107
110
  } finally {
108
111
  clearTimeout(timer);
109
112
  }
110
113
 
114
+ if (!timedOut) {
115
+ for (const held of bufferedThinking) writer.push(held);
116
+ }
117
+
111
118
  return timedOut ? "timed-out" : "completed";
112
119
  }
113
120
 
@@ -19,6 +19,6 @@ export function thinkingFormatFor(
19
19
  if (!reasoningParser) return NO_THINKING_FORMAT;
20
20
  return (
21
21
  REASONING_PARSER_FORMATS[reasoningParser.toLowerCase()] ??
22
- OMLX_CHAT_TEMPLATE_FORMAT
22
+ NO_THINKING_FORMAT
23
23
  );
24
24
  }