pi-free 2.0.9 → 2.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,26 +2,24 @@
2
2
  * Ollama Cloud Provider Extension
3
3
  *
4
4
  * Provides access to Ollama's cloud-hosted models via ollama.com API.
5
- * All models use Ollama's usage-based pricing system:
6
- * - Free tier: Unlimited public models (session limits reset every 5 hours,
7
- * weekly limits reset every 7 days)
8
- * - Pro tier: 50x more cloud usage than Free
9
- * - Max tier: 5x more usage than Pro
5
+ * Fetches per-model capabilities via /api/show for accurate reasoning,
6
+ * vision, and context window detection.
10
7
  *
11
8
  * Requires OLLAMA_API_KEY with cloud access.
12
9
  * Get a free key at: https://ollama.com/settings/keys
13
10
  *
14
- * Responds to global free-only filter (shows models but warns they're freemium).
15
- *
16
11
  * Usage:
17
12
  * pi install git:github.com/apmantza/pi-free
18
13
  * # Set OLLAMA_API_KEY env var
19
14
  * # Models appear in /model selector
20
15
  * # Use /toggle-ollama to show all vs limited set
16
+ * # Use /probe-ollama to detect and hide 403 models
17
+ * # Use /ollama-cloud-refresh to re-fetch models live
21
18
  */
22
19
 
23
20
  import type {
24
21
  ExtensionAPI,
22
+ ExtensionCommandContext,
25
23
  ProviderModelConfig,
26
24
  } from "@earendil-works/pi-coding-agent";
27
25
  import {
@@ -37,12 +35,26 @@ import {
37
35
  PROVIDER_OLLAMA,
38
36
  } from "../../constants.ts";
39
37
  import { createLogger } from "../../lib/logger.ts";
38
+ import {
39
+ loadProviderCache,
40
+ saveProviderCache,
41
+ } from "../../lib/provider-cache.ts";
40
42
  import { registerWithGlobalToggle } from "../../lib/registry.ts";
41
43
  import { fetchWithRetry, fetchWithTimeout } from "../../lib/util.ts";
42
44
  import { createReRegister, enhanceWithCI } from "../../provider-helper.ts";
45
+ import { resolveThinkingMap } from "./thinking-levels.ts";
43
46
 
44
47
  const _logger = createLogger("ollama-cloud");
45
48
 
49
+ // =============================================================================
50
+ // Constants
51
+ // =============================================================================
52
+
53
+ /** Base URL for non-OpenAI-compatible endpoints (e.g. /api/show). */
54
+ const OLLAMA_API_BASE = BASE_URL_OLLAMA.replace(/\/v1\/?$/, "");
55
+ const DETAIL_FETCH_TIMEOUT_MS = 10000;
56
+ const DETAIL_CONCURRENCY = 8;
57
+
46
58
  // =============================================================================
47
59
  // Known 403 models (listed but return "access denied" on /v1/chat/completions)
48
60
  // These are models that appear in /v1/models but aren't provisioned for chat.
@@ -54,14 +66,120 @@ const OLLAMA_KNOWN_403_MODELS: ReadonlySet<string> = new Set([
54
66
  ]);
55
67
 
56
68
  // =============================================================================
57
- // Fetch + map
69
+ // Fallback models (used when API is unreachable and no cache exists)
58
70
  // =============================================================================
71
+ const FALLBACK_MODELS: ProviderModelConfig[] = [
72
+ {
73
+ id: "glm-5.1",
74
+ name: "GLM 5.1",
75
+ reasoning: false,
76
+ input: ["text"],
77
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
78
+ contextWindow: 202752,
79
+ maxTokens: 32768,
80
+ compat: { supportsDeveloperRole: false },
81
+ },
82
+ {
83
+ id: "gemma4:31b",
84
+ name: "Gemma 4 31B",
85
+ reasoning: false,
86
+ input: ["text"],
87
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
88
+ contextWindow: 262144,
89
+ maxTokens: 32768,
90
+ compat: { supportsDeveloperRole: false },
91
+ },
92
+ {
93
+ id: "deepseek-v4-pro",
94
+ name: "DeepSeek V4 Pro",
95
+ reasoning: true,
96
+ thinkingLevelMap: resolveThinkingMap("deepseek-v4-pro", [
97
+ "thinking",
98
+ "tools",
99
+ ]),
100
+ input: ["text"],
101
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
102
+ contextWindow: 1000000,
103
+ maxTokens: 32768,
104
+ compat: { supportsDeveloperRole: false },
105
+ },
106
+ {
107
+ id: "qwen3.5",
108
+ name: "Qwen 3.5",
109
+ reasoning: true,
110
+ thinkingLevelMap: resolveThinkingMap("qwen3.5", ["thinking", "tools"]),
111
+ input: ["text"],
112
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
113
+ contextWindow: 131072,
114
+ maxTokens: 32768,
115
+ compat: { supportsDeveloperRole: false },
116
+ },
117
+ {
118
+ id: "kimi-k2.6",
119
+ name: "Kimi K2.6",
120
+ reasoning: true,
121
+ thinkingLevelMap: resolveThinkingMap("kimi-k2.6", ["thinking", "tools"]),
122
+ input: ["text"],
123
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
124
+ contextWindow: 131072,
125
+ maxTokens: 32768,
126
+ compat: { supportsDeveloperRole: false },
127
+ },
128
+ ];
59
129
 
60
- async function fetchOllamaModels(
61
- apiKey: string,
62
- ): Promise<ProviderModelConfig[]> {
63
- // Use OpenAI-compatible /v1/models endpoint for consistency
64
- // The native /api/tags returns :cloud suffixes that may not work with /v1/chat/completions
130
+ // =============================================================================
131
+ // Types
132
+ // =============================================================================
133
+
134
+ /** Response from POST /api/show */
135
+ interface OllamaShowResponse {
136
+ details: {
137
+ parent_model: string;
138
+ format: string;
139
+ family: string;
140
+ families: string[] | null;
141
+ parameter_size: string;
142
+ quantization_level: string;
143
+ };
144
+ model_info: Record<string, unknown>;
145
+ capabilities: string[];
146
+ modified_at: string;
147
+ }
148
+
149
+ // =============================================================================
150
+ // Utility: concurrent map with bounded parallelism
151
+ // =============================================================================
152
+
153
+ async function concurrentMap<T, R>(
154
+ items: T[],
155
+ workers: number,
156
+ fn: (item: T) => Promise<R>,
157
+ ): Promise<PromiseSettledResult<R>[]> {
158
+ const results: PromiseSettledResult<R>[] = new Array(items.length);
159
+ let next = 0;
160
+ await Promise.all(
161
+ Array.from({ length: Math.max(1, workers) }, async () => {
162
+ while (next < items.length) {
163
+ const index = next++;
164
+ try {
165
+ results[index] = {
166
+ status: "fulfilled",
167
+ value: await fn(items[index]),
168
+ };
169
+ } catch (reason) {
170
+ results[index] = { status: "rejected", reason };
171
+ }
172
+ }
173
+ }),
174
+ );
175
+ return results;
176
+ }
177
+
178
+ // =============================================================================
179
+ // Fetch: /v1/models → list of model IDs
180
+ // =============================================================================
181
+
182
+ async function fetchModelIds(apiKey: string): Promise<string[]> {
65
183
  const response = await fetchWithRetry(
66
184
  `${BASE_URL_OLLAMA}/models`,
67
185
  {
@@ -77,63 +195,188 @@ async function fetchOllamaModels(
77
195
 
78
196
  if (!response.ok) {
79
197
  throw new Error(
80
- `Failed to fetch Ollama models: ${response.status} ${response.statusText}`,
198
+ `Failed to fetch Ollama model list: ${response.status} ${response.statusText}`,
81
199
  );
82
200
  }
83
201
 
84
202
  const json = (await response.json()) as {
85
203
  data?: Array<{ id: string; owned_by?: string }>;
86
204
  };
87
- const models = json.data ?? [];
205
+ return (json.data ?? []).map((m) => m.id);
206
+ }
88
207
 
89
- _logger.info(
90
- `[ollama-cloud] Fetched ${models.length} models from Ollama Cloud`,
208
+ // =============================================================================
209
+ // Fetch: /api/show per-model capabilities
210
+ // =============================================================================
211
+
212
+ async function fetchModelDetails(
213
+ apiKey: string,
214
+ modelId: string,
215
+ ): Promise<OllamaShowResponse> {
216
+ const response = await fetchWithTimeout(
217
+ `${OLLAMA_API_BASE}/api/show`,
218
+ {
219
+ method: "POST",
220
+ headers: {
221
+ Authorization: `Bearer ${apiKey}`,
222
+ "Content-Type": "application/json",
223
+ },
224
+ body: JSON.stringify({ model: modelId }),
225
+ },
226
+ DETAIL_FETCH_TIMEOUT_MS,
91
227
  );
92
228
 
93
- // Filter to chat/text generation models only
94
- const chatModels = models
95
- .filter((m) => {
96
- // Skip embedding-only models (typically have "embed" in name)
97
- const name = m.id.toLowerCase();
98
- if (name.includes("embed")) return false;
99
- return true;
100
- })
101
- // Filter out known 403 models (listed but not provisioned for chat)
102
- .filter((m) => {
103
- if (OLLAMA_KNOWN_403_MODELS.has(m.id)) {
104
- return false;
105
- }
106
- return true;
107
- });
229
+ if (!response.ok) {
230
+ throw new Error(
231
+ `/api/show failed for ${modelId}: ${response.status} ${response.statusText}`,
232
+ );
233
+ }
234
+
235
+ return (await response.json()) as OllamaShowResponse;
236
+ }
237
+
238
+ // =============================================================================
239
+ // Assembly: raw /api/show data → ProviderModelConfig[]
240
+ // =============================================================================
241
+
242
+ function getContextLength(modelInfo: Record<string, unknown>): number {
243
+ for (const [key, value] of Object.entries(modelInfo)) {
244
+ if (key.endsWith(".context_length") && typeof value === "number") {
245
+ return value;
246
+ }
247
+ }
248
+ return 128000; // fallback
249
+ }
250
+
251
+ /**
252
+ * Build a human-readable display name from model ID and details.
253
+ * Enriches with parameter size and quantization when available.
254
+ */
255
+ function buildModelName(
256
+ id: string,
257
+ details: OllamaShowResponse["details"],
258
+ ): string {
259
+ // Convert dashes/colons to spaces for readability
260
+ const base = id.replace(/[:-]/g, " ");
261
+ const parts: string[] = [base];
262
+
263
+ const params = details?.parameter_size;
264
+ const quant = details?.quantization_level;
265
+
266
+ if (params && quant) {
267
+ parts.push(`(${params}, ${quant})`);
268
+ } else if (params) {
269
+ parts.push(`(${params})`);
270
+ }
271
+
272
+ return parts.join(" ");
273
+ }
108
274
 
109
- const result = applyHidden(
110
- chatModels.map(
111
- (m): ProviderModelConfig => ({
112
- id: m.id,
113
- name: m.id,
114
- // Try to infer reasoning from model name
115
- reasoning:
116
- m.id.toLowerCase().includes("reasoning") ||
117
- m.id.toLowerCase().includes("r1") ||
118
- m.id.toLowerCase().includes("thinking"),
119
- input: ["text"],
120
- // Ollama Cloud uses usage-based pricing (GPU time), not per-token
121
- // Free tier has limits but no direct cost per token
275
+ function assembleModels(
276
+ raw: Record<string, OllamaShowResponse>,
277
+ ): ProviderModelConfig[] {
278
+ return Object.entries(raw)
279
+ .filter(([, data]) => data.capabilities?.includes("tools"))
280
+ .map(([id, data]) => {
281
+ const reasoning = data.capabilities?.includes("thinking") ?? false;
282
+ const thinkingMap = resolveThinkingMap(id, data.capabilities ?? []);
283
+
284
+ return {
285
+ id,
286
+ name: buildModelName(id, data.details),
287
+ reasoning,
288
+ thinkingLevelMap: thinkingMap,
289
+ input: (data.capabilities?.includes("vision")
290
+ ? ["text", "image"]
291
+ : ["text"]) as ("text" | "image")[],
122
292
  cost: {
123
- input: 0, // Freemium: usage-based, not per-token
293
+ input: 0,
124
294
  output: 0,
125
295
  cacheRead: 0,
126
296
  cacheWrite: 0,
127
297
  },
128
- // Default context window - Ollama doesn't expose this via /v1/models
129
- contextWindow: 32768,
130
- maxTokens: 4096, // Default, varies by model
131
- }),
132
- ),
133
- PROVIDER_OLLAMA,
298
+ contextWindow: getContextLength(data.model_info ?? {}),
299
+ maxTokens: 32768,
300
+ compat: {
301
+ supportsDeveloperRole: false,
302
+ // When we provide a thinkingLevelMap, tell Pi not to use its own
303
+ // reasoning_effort logic — we handle it ourselves.
304
+ supportsReasoningEffort: thinkingMap != null,
305
+ },
306
+ };
307
+ });
308
+ }
309
+
310
+ // =============================================================================
311
+ // Fetch all models (orchestrates /v1/models + /api/show)
312
+ // =============================================================================
313
+
314
+ async function fetchAllModels(apiKey: string): Promise<ProviderModelConfig[]> {
315
+ // Step 1: Get model IDs
316
+ const modelIds = await fetchModelIds(apiKey);
317
+ _logger.info(
318
+ `[ollama-cloud] Found ${modelIds.length} model IDs, fetching details...`,
134
319
  );
135
320
 
136
- return result;
321
+ // Step 2: Filter out known-broken and embedding models early
322
+ const candidateIds = modelIds.filter((id) => {
323
+ if (OLLAMA_KNOWN_403_MODELS.has(id)) return false;
324
+ const name = id.toLowerCase();
325
+ if (name.includes("embed")) return false;
326
+ return true;
327
+ });
328
+
329
+ // Step 3: Fetch per-model details concurrently
330
+ let succeeded = 0;
331
+ let failed = 0;
332
+
333
+ const detailResults = await concurrentMap(
334
+ candidateIds,
335
+ DETAIL_CONCURRENCY,
336
+ async (id) => {
337
+ try {
338
+ const result = await fetchModelDetails(apiKey, id);
339
+ succeeded++;
340
+ return [id, result] as const;
341
+ } catch {
342
+ failed++;
343
+ throw new Error(`detail fetch failed for ${id}`);
344
+ } finally {
345
+ if (
346
+ (succeeded + failed) % 10 === 0 ||
347
+ succeeded + failed === candidateIds.length
348
+ ) {
349
+ _logger.debug(
350
+ `[ollama-cloud] Detail progress: ${succeeded + failed}/${candidateIds.length} (${failed} failed)`,
351
+ );
352
+ }
353
+ }
354
+ },
355
+ );
356
+
357
+ // Step 4: Collect successful results
358
+ const raw: Record<string, OllamaShowResponse> = {};
359
+ for (const result of detailResults) {
360
+ if (result.status === "fulfilled") {
361
+ const [id, data] = result.value;
362
+ raw[id] = data;
363
+ }
364
+ }
365
+
366
+ _logger.info(
367
+ `[ollama-cloud] Fetched ${Object.keys(raw).length} model details` +
368
+ (failed ? ` (${failed} failed)` : ""),
369
+ );
370
+
371
+ if (Object.keys(raw).length === 0) {
372
+ throw new Error("Failed to fetch any model details");
373
+ }
374
+
375
+ // Step 5: Assemble into Pi model configs
376
+ const models = assembleModels(raw);
377
+
378
+ // Step 6: Apply user-configured hidden models
379
+ return applyHidden(models, PROVIDER_OLLAMA);
137
380
  }
138
381
 
139
382
  // =============================================================================
@@ -150,35 +393,35 @@ export default async function ollamaProvider(pi: ExtensionAPI) {
150
393
  return;
151
394
  }
152
395
 
153
- // Fetch models
154
- let allModels: ProviderModelConfig[] = [];
396
+ // ── Try cache first for fast startup ────────────────────────────
397
+ let allModels: ProviderModelConfig[];
398
+ let fromCache = false;
155
399
 
156
- try {
157
- allModels = await fetchOllamaModels(apiKey);
158
- } catch (error) {
159
- _logger.error("[ollama-cloud] Failed to fetch models at startup", {
160
- error: error instanceof Error ? error.message : String(error),
161
- });
162
- return;
400
+ const cachedModels = loadProviderCache(PROVIDER_OLLAMA);
401
+ if (cachedModels && cachedModels.length > 0) {
402
+ allModels = cachedModels;
403
+ fromCache = true;
404
+ _logger.info(
405
+ `[ollama-cloud] Using ${cachedModels.length} cached models for fast startup`,
406
+ );
407
+ } else {
408
+ allModels = FALLBACK_MODELS;
409
+ _logger.info("[ollama-cloud] No cache available, using fallback models");
163
410
  }
164
411
 
165
- // For Ollama, all models share the same free tier
166
- // So "free" and "all" are the same set
412
+ // ── Register immediately with cached/fallback models ────────────
167
413
  const freeModels = allModels;
168
- const stored = { free: freeModels, all: allModels };
414
+ let stored = { free: freeModels, all: allModels };
169
415
  const hasKey = true;
170
416
 
171
- // Create re-register function
172
417
  const reRegister = createReRegister(pi, {
173
418
  providerId: PROVIDER_OLLAMA,
174
419
  baseUrl: BASE_URL_OLLAMA,
175
420
  apiKey,
176
421
  });
177
422
 
178
- // Register with global toggle system
179
423
  registerWithGlobalToggle(PROVIDER_OLLAMA, stored, reRegister, hasKey);
180
424
 
181
- // Register initial models
182
425
  const initialModels = getOllamaShowPaid() ? allModels : freeModels;
183
426
  pi.registerProvider(PROVIDER_OLLAMA, {
184
427
  baseUrl: BASE_URL_OLLAMA,
@@ -188,13 +431,55 @@ export default async function ollamaProvider(pi: ExtensionAPI) {
188
431
  });
189
432
 
190
433
  _logger.info(
191
- `[ollama-cloud] Registered ${initialModels.length} models (usage-based free tier)`,
434
+ `[ollama-cloud] Registered ${initialModels.length} models` +
435
+ (fromCache ? " (from cache)" : " (fallback)") +
436
+ ", fetching fresh in background...",
192
437
  );
193
438
 
194
- // ── Probe command: test all registered models for 403s ─────────────
439
+ // ── Background refresh ─────────────────────────────────────────
440
+ async function refreshModels(): Promise<ProviderModelConfig[]> {
441
+ try {
442
+ const freshModels = await fetchAllModels(apiKey!);
443
+ saveProviderCache(PROVIDER_OLLAMA, freshModels);
444
+ return freshModels;
445
+ } catch (error) {
446
+ _logger.error("[ollama-cloud] Background refresh failed", {
447
+ error: error instanceof Error ? error.message : String(error),
448
+ });
449
+ // Return current models so we don't lose what we have
450
+ return allModels;
451
+ }
452
+ }
453
+
454
+ // ── /ollama-cloud-refresh command ───────────────────────────────
455
+ pi.registerCommand("ollama-cloud-refresh", {
456
+ description:
457
+ "Re-fetch Ollama Cloud models from the API and update the provider live",
458
+ handler: async (_args: string, ctx: ExtensionCommandContext) => {
459
+ ctx.ui.notify("Refreshing Ollama Cloud models…", "info");
460
+ try {
461
+ const fresh = await fetchAllModels(apiKey!);
462
+ saveProviderCache(PROVIDER_OLLAMA, fresh);
463
+ allModels = fresh;
464
+ stored = { free: fresh, all: fresh };
465
+ reRegister(fresh);
466
+ ctx.ui.notify(
467
+ `Registered ${fresh.length} Ollama Cloud models (refresh complete)`,
468
+ "info",
469
+ );
470
+ } catch (error) {
471
+ ctx.ui.notify(
472
+ `Refresh failed: ${error instanceof Error ? error.message : String(error)}`,
473
+ "error",
474
+ );
475
+ }
476
+ },
477
+ });
478
+
479
+ // ── /probe-ollama command ───────────────────────────────────────
195
480
  pi.registerCommand("probe-ollama", {
196
481
  description: "Test all Ollama Cloud models for 403 'access denied' errors",
197
- handler: async (_args, ctx) => {
482
+ handler: async (_args: string, ctx: ExtensionCommandContext) => {
198
483
  if (!apiKey) {
199
484
  ctx.ui.notify("OLLAMA_API_KEY not set", "error");
200
485
  return;
@@ -228,13 +513,21 @@ export default async function ollamaProvider(pi: ExtensionAPI) {
228
513
  const config = loadConfigFile();
229
514
  const existingHidden = new Set(config.hidden_models ?? []);
230
515
  for (const id of notFound) existingHidden.add(`${PROVIDER_OLLAMA}/${id}`);
231
- saveConfig({ hidden_models: Array.from(existingHidden) });
232
-
233
- // Re-register so hidden models disappear immediately
234
- const filtered = await fetchOllamaModels(apiKey);
235
- stored.free = filtered;
236
- stored.all = filtered;
237
- reRegister(filtered);
516
+ saveConfig({
517
+ hidden_models: Array.from(existingHidden),
518
+ });
519
+
520
+ // Re-fetch and re-register so hidden models disappear immediately
521
+ try {
522
+ const fresh = await fetchAllModels(apiKey!);
523
+ saveProviderCache(PROVIDER_OLLAMA, fresh);
524
+ allModels = fresh;
525
+ stored = { free: fresh, all: fresh };
526
+ reRegister(fresh);
527
+ } catch {
528
+ // If refresh fails, just re-register current models
529
+ reRegister(allModels);
530
+ }
238
531
 
239
532
  ctx.ui.notify(
240
533
  `Found ${notFound.length} broken models (auto-hidden):\n${notFound.join("\n")}`,
@@ -243,22 +536,44 @@ export default async function ollamaProvider(pi: ExtensionAPI) {
243
536
  },
244
537
  });
245
538
 
246
- // ── Status bar for provider selection ─────────────────────────
539
+ // ── Status bar for provider selection ───────────────────────────
247
540
 
248
- pi.on("model_select", (_event, ctx) => {
541
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
542
+ pi.on("model_select" as any, (_event: any, ctx: any) => {
249
543
  if (_event.model?.provider !== PROVIDER_OLLAMA) {
250
544
  ctx.ui.setStatus(`${PROVIDER_OLLAMA}-status`, undefined);
251
545
  return;
252
546
  }
253
547
 
254
548
  const count = allModels.length;
255
- ctx.ui.setStatus(
256
- `${PROVIDER_OLLAMA}-status`,
257
- `ollama: ${count} models (usage-based)`,
258
- );
549
+ ctx.ui.setStatus(`${PROVIDER_OLLAMA}-status`, `ollama: ${count} models`);
550
+ });
551
+
552
+ // ── Background refresh on session_start ─────────────────────────
553
+ let bgRefreshed = false;
554
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
555
+ pi.on("session_start" as any, async (_event: any, ctx: any) => {
556
+ if (bgRefreshed) {
557
+ return;
558
+ }
559
+ bgRefreshed = true;
560
+
561
+ try {
562
+ const fresh = await refreshModels();
563
+ allModels = fresh;
564
+ stored = { free: fresh, all: fresh };
565
+ reRegister(fresh);
566
+ ctx.ui.notify(`Ollama Cloud: ${fresh.length} models ready`, "info");
567
+ } catch {
568
+ // Already logged in refreshModels()
569
+ }
259
570
  });
260
571
  }
261
572
 
573
+ // =============================================================================
574
+ // Probe helper
575
+ // =============================================================================
576
+
262
577
  /**
263
578
  * Probe a single Ollama model with a minimal chat request.
264
579
  * Returns true if the model is accessible (not 403), false if it 403s.
@@ -283,7 +598,7 @@ async function probeOllamaModel(
283
598
  max_tokens: 1,
284
599
  }),
285
600
  },
286
- 10000, // 10 second timeout
601
+ 10000,
287
602
  );
288
603
  // 403 = access denied (model not provisioned)
289
604
  // 200/400/401/etc = at least accessible