pi-omlx-picker 0.2.9 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +19 -6
- package/package.json +1 -1
- package/src/catalog.ts +52 -21
- package/src/config.ts +12 -0
- package/src/overflow.ts +1 -0
- package/src/provider.ts +12 -7
- package/src/repeat-stop.ts +2 -5
- package/src/stream-events.ts +1 -1
- package/src/stream-writer.ts +7 -0
- package/src/stream.ts +8 -1
- package/src/thinking-format.ts +1 -1
package/index.ts
CHANGED
|
@@ -20,6 +20,7 @@ import {
|
|
|
20
20
|
} from "./src/catalog.ts";
|
|
21
21
|
import {
|
|
22
22
|
DEFAULT_OMLX_BASE_URL,
|
|
23
|
+
hasOmlxTarget,
|
|
23
24
|
loadConfig,
|
|
24
25
|
type OmlxConfig,
|
|
25
26
|
resolveConfiguredApiKey,
|
|
@@ -115,9 +116,18 @@ function registerModels(
|
|
|
115
116
|
models: OmlxModel[],
|
|
116
117
|
modelSettingsPath?: string,
|
|
117
118
|
): void {
|
|
119
|
+
const keyless = !resolveConfiguredApiKey();
|
|
118
120
|
pi.registerProvider(PROVIDER, {
|
|
119
121
|
name: "OMLX",
|
|
120
|
-
...toProviderConfig(
|
|
122
|
+
...toProviderConfig(
|
|
123
|
+
config.apiRoot,
|
|
124
|
+
config.apiKeyEnvVar,
|
|
125
|
+
models,
|
|
126
|
+
undefined,
|
|
127
|
+
{
|
|
128
|
+
keyless,
|
|
129
|
+
},
|
|
130
|
+
),
|
|
121
131
|
});
|
|
122
132
|
state.config = config;
|
|
123
133
|
state.catalog = models;
|
|
@@ -152,8 +162,9 @@ function registerCachedOrSetupModels(pi: ExtensionAPI, state: State): void {
|
|
|
152
162
|
apiRoot: DEFAULT_OMLX_BASE_URL,
|
|
153
163
|
apiKeyEnvVar: "OMLX_API_KEY",
|
|
154
164
|
};
|
|
165
|
+
const configured = resolveConfiguredApiKey() || hasOmlxTarget();
|
|
155
166
|
const cached = registrableCachedModels(readCatalogCache(config.apiRoot));
|
|
156
|
-
const fallbackCached =
|
|
167
|
+
const fallbackCached = configured
|
|
157
168
|
? undefined
|
|
158
169
|
: registrableCachedModels(readLastCatalogCache());
|
|
159
170
|
const models = cached ?? fallbackCached;
|
|
@@ -161,7 +172,7 @@ function registerCachedOrSetupModels(pi: ExtensionAPI, state: State): void {
|
|
|
161
172
|
state.config = config;
|
|
162
173
|
state.catalog = [];
|
|
163
174
|
state.registered = false;
|
|
164
|
-
state.lastError =
|
|
175
|
+
state.lastError = configured
|
|
165
176
|
? "No cached OMLX models with real max_context_window/max_tokens; waiting for live catalog refresh."
|
|
166
177
|
: "OMLX credentials are not set. Run /login and choose OMLX.";
|
|
167
178
|
state.lastRefreshAt = new Date().toISOString();
|
|
@@ -169,7 +180,9 @@ function registerCachedOrSetupModels(pi: ExtensionAPI, state: State): void {
|
|
|
169
180
|
return;
|
|
170
181
|
}
|
|
171
182
|
|
|
172
|
-
|
|
183
|
+
// A key OR a configured base URL (keyless server) is enough to register the
|
|
184
|
+
// real provider. Pi omits the auth header when the resolved key is empty.
|
|
185
|
+
if (configured) {
|
|
173
186
|
registerModels(pi, state, config, models);
|
|
174
187
|
return;
|
|
175
188
|
}
|
|
@@ -226,7 +239,7 @@ async function refreshProvider(
|
|
|
226
239
|
): Promise<RefreshResult> {
|
|
227
240
|
const config = loadConfig();
|
|
228
241
|
const apiKey = resolveConfiguredApiKey();
|
|
229
|
-
if (!apiKey) {
|
|
242
|
+
if (!apiKey && !hasOmlxTarget()) {
|
|
230
243
|
state.lastError = "OMLX credentials are not set";
|
|
231
244
|
return "not_configured";
|
|
232
245
|
}
|
|
@@ -237,7 +250,7 @@ async function refreshProvider(
|
|
|
237
250
|
|
|
238
251
|
let models: OmlxModel[];
|
|
239
252
|
try {
|
|
240
|
-
models = await fetchModels(config.apiRoot, apiKey, {
|
|
253
|
+
models = await fetchModels(config.apiRoot, apiKey ?? "", {
|
|
241
254
|
modelSettingsPath,
|
|
242
255
|
timeoutMs: opts.timeoutMs,
|
|
243
256
|
});
|
package/package.json
CHANGED
package/src/catalog.ts
CHANGED
|
@@ -10,6 +10,8 @@ export interface OmlxModel {
|
|
|
10
10
|
modelAlias?: string;
|
|
11
11
|
contextWindow?: number;
|
|
12
12
|
maxTokens?: number;
|
|
13
|
+
/** Model architectural ceiling (`max_model_len`). Prio-3 fallback and clamp limit. */
|
|
14
|
+
archContextWindow?: number;
|
|
13
15
|
thinkingDefault?: boolean | null;
|
|
14
16
|
taskBudgetTokens?: number;
|
|
15
17
|
maxToolResultTokens?: number;
|
|
@@ -36,7 +38,7 @@ export interface CatalogDebugEvent {
|
|
|
36
38
|
|
|
37
39
|
interface OpenAIModelsResponse {
|
|
38
40
|
object: string;
|
|
39
|
-
data: Array<{ id: string; object?: string }>;
|
|
41
|
+
data: Array<{ id: string; object?: string; max_model_len?: number | null }>;
|
|
40
42
|
}
|
|
41
43
|
|
|
42
44
|
interface OmlxModelsStatusResponse {
|
|
@@ -92,7 +94,10 @@ export function parseModelsResponse(json: unknown): OmlxModel[] {
|
|
|
92
94
|
if (!entry || typeof entry.id !== "string" || !entry.id) continue;
|
|
93
95
|
if (seen.has(entry.id)) continue;
|
|
94
96
|
seen.add(entry.id);
|
|
95
|
-
|
|
97
|
+
const m: OmlxModel = { id: entry.id };
|
|
98
|
+
if (typeof entry.max_model_len === "number" && entry.max_model_len > 0)
|
|
99
|
+
m.archContextWindow = entry.max_model_len;
|
|
100
|
+
out.push(m);
|
|
96
101
|
}
|
|
97
102
|
return out;
|
|
98
103
|
}
|
|
@@ -208,13 +213,15 @@ export async function fetchModels(
|
|
|
208
213
|
opts.modelSettingsPath,
|
|
209
214
|
opts.onDebug,
|
|
210
215
|
);
|
|
211
|
-
return
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
216
|
+
return resolveArchContextLimits(
|
|
217
|
+
await applyApiGlobalDefaultsIfNeeded(
|
|
218
|
+
models,
|
|
219
|
+
apiRoot,
|
|
220
|
+
apiKey,
|
|
221
|
+
opts.signal,
|
|
222
|
+
timeoutMs,
|
|
223
|
+
opts.onDebug,
|
|
224
|
+
),
|
|
218
225
|
);
|
|
219
226
|
} catch (err) {
|
|
220
227
|
if (err instanceof Error && err.name === "AbortError") throw err;
|
|
@@ -245,16 +252,35 @@ export async function fetchModels(
|
|
|
245
252
|
opts.modelSettingsPath,
|
|
246
253
|
opts.onDebug,
|
|
247
254
|
);
|
|
248
|
-
return
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
+
return resolveArchContextLimits(
|
|
256
|
+
await applyApiGlobalDefaultsIfNeeded(
|
|
257
|
+
models,
|
|
258
|
+
apiRoot,
|
|
259
|
+
apiKey,
|
|
260
|
+
opts.signal,
|
|
261
|
+
timeoutMs,
|
|
262
|
+
opts.onDebug,
|
|
263
|
+
),
|
|
255
264
|
);
|
|
256
265
|
}
|
|
257
266
|
|
|
267
|
+
/**
|
|
268
|
+
* Final context-window resolution, applied after model-specific (prio 1) and
|
|
269
|
+
* global (prio 2) settings. The model's architectural ceiling
|
|
270
|
+
* (`archContextWindow`, from `max_model_len`) is the prio-3 fallback when no
|
|
271
|
+
* user setting exists, and the hard clamp when a user setting exceeds it.
|
|
272
|
+
*/
|
|
273
|
+
export function resolveArchContextLimits(models: OmlxModel[]): OmlxModel[] {
|
|
274
|
+
return models.map((model) => {
|
|
275
|
+
const arch = model.archContextWindow;
|
|
276
|
+
if (arch == null) return model;
|
|
277
|
+
const next: OmlxModel = { ...model };
|
|
278
|
+
if (next.contextWindow == null) next.contextWindow = arch;
|
|
279
|
+
else if (next.contextWindow > arch) next.contextWindow = arch;
|
|
280
|
+
return next;
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
|
|
258
284
|
async function applyApiGlobalDefaultsIfNeeded(
|
|
259
285
|
models: OmlxModel[],
|
|
260
286
|
apiRoot: string,
|
|
@@ -263,7 +289,8 @@ async function applyApiGlobalDefaultsIfNeeded(
|
|
|
263
289
|
timeoutMs: number,
|
|
264
290
|
onDebug?: (event: CatalogDebugEvent) => void,
|
|
265
291
|
): Promise<OmlxModel[]> {
|
|
266
|
-
if (!models.some((m) =>
|
|
292
|
+
if (!models.some((m) => m.contextWindow == null || m.maxTokens == null))
|
|
293
|
+
return models;
|
|
267
294
|
let defaults: OmlxGlobalDefaults | undefined;
|
|
268
295
|
try {
|
|
269
296
|
defaults = await fetchGlobalDefaults(apiRoot, apiKey, signal, timeoutMs);
|
|
@@ -272,6 +299,7 @@ async function applyApiGlobalDefaultsIfNeeded(
|
|
|
272
299
|
details: { apiRoot, defaults },
|
|
273
300
|
});
|
|
274
301
|
} catch (err) {
|
|
302
|
+
if (signal?.aborted) throw err;
|
|
275
303
|
onDebug?.({
|
|
276
304
|
kind: "catalog_global_settings_failed",
|
|
277
305
|
details: {
|
|
@@ -281,12 +309,13 @@ async function applyApiGlobalDefaultsIfNeeded(
|
|
|
281
309
|
});
|
|
282
310
|
return models;
|
|
283
311
|
}
|
|
284
|
-
if (
|
|
312
|
+
if (defaults.contextWindow == null && defaults.maxTokens == null)
|
|
313
|
+
return models;
|
|
285
314
|
return models.map((model) => {
|
|
286
315
|
const next: OmlxModel = { ...model };
|
|
287
|
-
if (
|
|
316
|
+
if (next.contextWindow == null && defaults.contextWindow != null)
|
|
288
317
|
next.contextWindow = defaults.contextWindow;
|
|
289
|
-
if (
|
|
318
|
+
if (next.maxTokens == null && defaults.maxTokens != null)
|
|
290
319
|
next.maxTokens = defaults.maxTokens;
|
|
291
320
|
return next;
|
|
292
321
|
});
|
|
@@ -325,8 +354,10 @@ async function getJson(
|
|
|
325
354
|
timeoutMs: number,
|
|
326
355
|
): Promise<unknown> {
|
|
327
356
|
const signal = withTimeout(parent, timeoutMs);
|
|
357
|
+
// Empty key => keyless server (skip_api_key_verification): omit the header.
|
|
358
|
+
const headers = apiKey ? { Authorization: `Bearer ${apiKey}` } : undefined;
|
|
328
359
|
const res = await fetch(url, {
|
|
329
|
-
headers
|
|
360
|
+
headers,
|
|
330
361
|
signal,
|
|
331
362
|
}).catch((err) => {
|
|
332
363
|
if (err instanceof Error && err.name === "AbortError") {
|
package/src/config.ts
CHANGED
|
@@ -34,6 +34,18 @@ export function resolveConfiguredApiKey(
|
|
|
34
34
|
return loadOmlxCredential()?.apiKey;
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
+
/**
|
|
38
|
+
* True when the user has pointed us at a server even without an API key.
|
|
39
|
+
* OMLX servers run with `skip_api_key_verification: true` need no key; an
|
|
40
|
+
* explicit base URL (env or stored) is the signal that a keyless server is
|
|
41
|
+
* intended. With neither key nor base URL there is nothing to talk to.
|
|
42
|
+
*/
|
|
43
|
+
export function hasOmlxTarget(env: NodeJS.ProcessEnv = process.env): boolean {
|
|
44
|
+
if (env.OMLX_API_KEY || env.OMLX_BASE_URL) return true;
|
|
45
|
+
const stored = loadOmlxCredential();
|
|
46
|
+
return Boolean(stored?.apiKey || stored?.baseUrl);
|
|
47
|
+
}
|
|
48
|
+
|
|
37
49
|
// Legacy helper for older stored api_key credentials. Never fills only one side
|
|
38
50
|
// of the env pair; partial shell overrides remain explicit shell state.
|
|
39
51
|
export function applyStoredCredentialToEnv(
|
package/src/overflow.ts
CHANGED
|
@@ -4,6 +4,7 @@ const OMLX_OVERFLOW_RE =
|
|
|
4
4
|
/prompt too long[:.]?\s*(\d[\d,]*)\s*tokens?\s*exceeds\s*max(?:imum)?\s*context window of\s*(\d[\d,]*)\s*tokens?/i;
|
|
5
5
|
|
|
6
6
|
export function normalizeOverflowMessage(errorMessage: string): string {
|
|
7
|
+
if (errorMessage.startsWith("prompt is too long:")) return errorMessage;
|
|
7
8
|
const match = OMLX_OVERFLOW_RE.exec(errorMessage);
|
|
8
9
|
if (!match) return errorMessage;
|
|
9
10
|
const used = match[1];
|
package/src/provider.ts
CHANGED
|
@@ -23,22 +23,27 @@ export function toProviderConfig(
|
|
|
23
23
|
apiKeyEnvVar: string,
|
|
24
24
|
models: OmlxModel[],
|
|
25
25
|
onStreamTimeout?: (event: StreamTimeoutEvent) => void,
|
|
26
|
+
options: { keyless?: boolean } = {},
|
|
26
27
|
): ProviderConfig {
|
|
27
|
-
|
|
28
|
+
const config: ProviderConfig = {
|
|
28
29
|
baseUrl: apiRoot,
|
|
29
|
-
apiKey: `$${apiKeyEnvVar}`,
|
|
30
30
|
api: "openai-completions",
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
// Keyless server (skip_api_key_verification): no auth header. Pi rejects
|
|
32
|
+
// authHeader:true with no key, and resolveConfigValueOrThrow would throw
|
|
33
|
+
// on an unset $OMLX_API_KEY — so both apiKey and authHeader stay off.
|
|
34
|
+
authHeader: !options.keyless,
|
|
35
|
+
streamSimple: (model, context, streamOptions) =>
|
|
33
36
|
streamOmlxOpenAICompletions(
|
|
34
37
|
model,
|
|
35
38
|
context,
|
|
36
|
-
|
|
39
|
+
streamOptions,
|
|
37
40
|
resolveFirstDeltaTimeoutMs(),
|
|
38
41
|
onStreamTimeout,
|
|
39
42
|
),
|
|
40
43
|
models: models.map(toProviderModel),
|
|
41
44
|
};
|
|
45
|
+
if (!options.keyless) config.apiKey = `$${apiKeyEnvVar}`;
|
|
46
|
+
return config;
|
|
42
47
|
}
|
|
43
48
|
|
|
44
49
|
function requirePositive(
|
|
@@ -59,11 +64,11 @@ function toProviderModel(m: OmlxModel): ProviderModelConfig {
|
|
|
59
64
|
name: m.displayName ?? m.id,
|
|
60
65
|
reasoning,
|
|
61
66
|
input: m.modelType === "vlm" ? ["text", "image"] : ["text"],
|
|
62
|
-
cost: FREE_COST,
|
|
67
|
+
cost: { ...FREE_COST },
|
|
63
68
|
contextWindow: requirePositive(m.contextWindow, m.id, "max_context_window"),
|
|
64
69
|
maxTokens: requirePositive(m.maxTokens, m.id, "max_tokens"),
|
|
65
70
|
compat: reasoning
|
|
66
71
|
? { ...BASE_COMPAT, thinkingFormat: thinkingFormatFor(m.reasoningParser) }
|
|
67
|
-
: BASE_COMPAT,
|
|
72
|
+
: { ...BASE_COMPAT },
|
|
68
73
|
};
|
|
69
74
|
}
|
package/src/repeat-stop.ts
CHANGED
|
@@ -29,11 +29,8 @@ function extractAssistantParts(message: AssistantMessage): AssistantParts {
|
|
|
29
29
|
function lastAssistantMessage(
|
|
30
30
|
messages: Message[],
|
|
31
31
|
): AssistantMessage | undefined {
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
if (m.role === "assistant") return m;
|
|
35
|
-
}
|
|
36
|
-
return undefined;
|
|
32
|
+
const last = messages.at(-1);
|
|
33
|
+
return last?.role === "assistant" ? last : undefined;
|
|
37
34
|
}
|
|
38
35
|
|
|
39
36
|
function bigramCounts(s: string): Map<string, number> {
|
package/src/stream-events.ts
CHANGED
package/src/stream-writer.ts
CHANGED
|
@@ -21,6 +21,13 @@ export class StreamWriter {
|
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
push(event: AssistantMessageEvent): void {
|
|
24
|
+
if (event.type === "start") {
|
|
25
|
+
if (!this.startPushed) {
|
|
26
|
+
this.stream.push(this.startEvent ?? event);
|
|
27
|
+
this.startPushed = true;
|
|
28
|
+
}
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
24
31
|
if (!this.startPushed) {
|
|
25
32
|
this.stream.push(
|
|
26
33
|
this.startEvent ?? {
|
package/src/stream.ts
CHANGED
|
@@ -7,7 +7,7 @@ import {
|
|
|
7
7
|
type Model,
|
|
8
8
|
type SimpleStreamOptions,
|
|
9
9
|
} from "@earendil-works/pi-ai";
|
|
10
|
-
import { streamSimple as streamSimpleOpenAICompletions } from "@earendil-works/pi-ai/
|
|
10
|
+
import { streamSimple as streamSimpleOpenAICompletions } from "@earendil-works/pi-ai/compat";
|
|
11
11
|
import { normalizeErrorEvent } from "./overflow.ts";
|
|
12
12
|
import { isRepeatStop } from "./repeat-stop.ts";
|
|
13
13
|
import {
|
|
@@ -104,10 +104,17 @@ async function runAttempt(
|
|
|
104
104
|
writer.push(normalizeErrorEvent(event));
|
|
105
105
|
if (event.type === "done" || event.type === "error") break;
|
|
106
106
|
}
|
|
107
|
+
} catch (err) {
|
|
108
|
+
if (timedOut) return "timed-out";
|
|
109
|
+
throw err;
|
|
107
110
|
} finally {
|
|
108
111
|
clearTimeout(timer);
|
|
109
112
|
}
|
|
110
113
|
|
|
114
|
+
if (!timedOut) {
|
|
115
|
+
for (const held of bufferedThinking) writer.push(held);
|
|
116
|
+
}
|
|
117
|
+
|
|
111
118
|
return timedOut ? "timed-out" : "completed";
|
|
112
119
|
}
|
|
113
120
|
|