pi-free 2.0.2 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,280 +1,295 @@
1
- /**
2
- * Ollama Cloud Provider Extension
3
- *
4
- * Provides access to Ollama's cloud-hosted models via ollama.com API.
5
- * All models use Ollama's usage-based pricing system:
6
- * - Free tier: Unlimited public models (session limits reset every 5 hours,
7
- * weekly limits reset every 7 days)
8
- * - Pro tier: 50x more cloud usage than Free
9
- * - Max tier: 5x more usage than Pro
10
- *
11
- * Requires OLLAMA_API_KEY with cloud access.
12
- * Get a free key at: https://ollama.com/settings/keys
13
- *
14
- * Responds to global free-only filter (shows models but warns they're freemium).
15
- *
16
- * Usage:
17
- * pi install git:github.com/apmantza/pi-free
18
- * # Set OLLAMA_API_KEY env var
19
- * # Models appear in /model selector
20
- * # Use /toggle-ollama to show all vs limited set
21
- */
22
-
23
- import type {
24
- ExtensionAPI,
25
- ProviderModelConfig,
26
- } from "@mariozechner/pi-coding-agent";
27
- import {
28
- applyHidden,
29
- getOllamaApiKey,
30
- getOllamaShowPaid,
31
- loadConfigFile,
32
- saveConfig,
33
- } from "../../config.ts";
34
- import {
35
- BASE_URL_OLLAMA,
36
- DEFAULT_FETCH_TIMEOUT_MS,
37
- PROVIDER_OLLAMA,
38
- } from "../../constants.ts";
39
- import { createLogger } from "../../lib/logger.ts";
40
- import { registerWithGlobalToggle } from "../../lib/registry.ts";
41
- import { fetchWithRetry, fetchWithTimeout } from "../../lib/util.ts";
42
- import { createReRegister, enhanceWithCI } from "../../provider-helper.ts";
43
-
44
- const _logger = createLogger("ollama-cloud");
45
-
46
- // =============================================================================
47
- // Known 403 models (listed but return "access denied" on /v1/chat/completions)
48
- // These are models that appear in /v1/models but aren't provisioned for chat.
49
- // Add new IDs here as they surface via /probe-ollama command.
50
- // =============================================================================
51
- const OLLAMA_KNOWN_403_MODELS: ReadonlySet<string> = new Set([
52
- // Example entries - populate via probe-ollama.mjs
53
- // "model-id-that-403s",
54
- ]);
55
-
56
- // =============================================================================
57
- // Fetch + map
58
- // =============================================================================
59
-
60
- async function fetchOllamaModels(
61
- apiKey: string,
62
- ): Promise<ProviderModelConfig[]> {
63
- // Use OpenAI-compatible /v1/models endpoint for consistency
64
- // The native /api/tags returns :cloud suffixes that may not work with /v1/chat/completions
65
- const response = await fetchWithRetry(
66
- `${BASE_URL_OLLAMA}/models`,
67
- {
68
- headers: {
69
- Authorization: `Bearer ${apiKey}`,
70
- "Content-Type": "application/json",
71
- },
72
- },
73
- 3,
74
- 1000,
75
- DEFAULT_FETCH_TIMEOUT_MS,
76
- );
77
-
78
- if (!response.ok) {
79
- throw new Error(
80
- `Failed to fetch Ollama models: ${response.status} ${response.statusText}`,
81
- );
82
- }
83
-
84
- const json = (await response.json()) as {
85
- data?: Array<{ id: string; owned_by?: string }>;
86
- };
87
- const models = json.data ?? [];
88
-
89
- _logger.info(
90
- `[ollama-cloud] Fetched ${models.length} models from Ollama Cloud`,
91
- );
92
-
93
- // Filter to chat/text generation models only
94
- const chatModels = models
95
- .filter((m) => {
96
- // Skip embedding-only models (typically have "embed" in name)
97
- const name = m.id.toLowerCase();
98
- if (name.includes("embed")) return false;
99
- return true;
100
- })
101
- // Filter out known 403 models (listed but not provisioned for chat)
102
- .filter((m) => {
103
- if (OLLAMA_KNOWN_403_MODELS.has(m.id)) {
104
- return false;
105
- }
106
- return true;
107
- });
108
-
109
- const result = applyHidden(
110
- chatModels.map(
111
- (m): ProviderModelConfig => ({
112
- id: m.id,
113
- name: m.id,
114
- // Try to infer reasoning from model name
115
- reasoning:
116
- m.id.toLowerCase().includes("reasoning") ||
117
- m.id.toLowerCase().includes("r1") ||
118
- m.id.toLowerCase().includes("thinking"),
119
- input: ["text"],
120
- // Ollama Cloud uses usage-based pricing (GPU time), not per-token
121
- // Free tier has limits but no direct cost per token
122
- cost: {
123
- input: 0, // Freemium: usage-based, not per-token
124
- output: 0,
125
- cacheRead: 0,
126
- cacheWrite: 0,
127
- },
128
- // Default context window - Ollama doesn't expose this via /v1/models
129
- contextWindow: 32768,
130
- maxTokens: 4096, // Default, varies by model
131
- }),
132
- ),
133
- PROVIDER_OLLAMA,
134
- );
135
-
136
- return result;
137
- }
138
-
139
- // =============================================================================
140
- // Extension Entry Point
141
- // =============================================================================
142
-
143
- export default async function (pi: ExtensionAPI) {
144
- const apiKey = getOllamaApiKey();
145
-
146
- if (!apiKey) {
147
- _logger.info(
148
- "[ollama-cloud] Skipping - OLLAMA_API_KEY not set (env var or ~/.pi/free.json)",
149
- );
150
- return;
151
- }
152
-
153
- // Fetch models
154
- let allModels: ProviderModelConfig[] = [];
155
-
156
- try {
157
- allModels = await fetchOllamaModels(apiKey);
158
- } catch (error) {
159
- _logger.error("[ollama-cloud] Failed to fetch models at startup", {
160
- error: error instanceof Error ? error.message : String(error),
161
- });
162
- return;
163
- }
164
-
165
- // For Ollama, all models share the same free tier
166
- // So "free" and "all" are the same set
167
- const freeModels = allModels;
168
- const stored = { free: freeModels, all: allModels };
169
- const hasKey = true;
170
-
171
- // Create re-register function
172
- const reRegister = createReRegister(pi, {
173
- providerId: PROVIDER_OLLAMA,
174
- baseUrl: BASE_URL_OLLAMA,
175
- apiKey,
176
- });
177
-
178
- // Register with global toggle system
179
- registerWithGlobalToggle(PROVIDER_OLLAMA, stored, reRegister, hasKey);
180
-
181
- // Register initial models
182
- const initialModels = getOllamaShowPaid() ? allModels : freeModels;
183
- pi.registerProvider(PROVIDER_OLLAMA, {
184
- baseUrl: BASE_URL_OLLAMA,
185
- apiKey,
186
- api: "openai-completions" as const,
187
- models: enhanceWithCI(initialModels),
188
- });
189
-
190
- _logger.info(
191
- `[ollama-cloud] Registered ${initialModels.length} models (usage-based free tier)`,
192
- );
193
-
194
- // ── Probe command: test all registered models for 403s ─────────────
195
- pi.registerCommand("probe-ollama", {
196
- description: "Test all Ollama Cloud models for 403 'access denied' errors",
197
- handler: async (_args, ctx) => {
198
- if (!apiKey) {
199
- ctx.ui.notify("OLLAMA_API_KEY not set", "error");
200
- return;
201
- }
202
-
203
- const modelsToTest = allModels;
204
- ctx.ui.notify(`Probing ${modelsToTest.length} Ollama models…`, "info");
205
-
206
- const notFound: string[] = [];
207
- const batchSize = 5;
208
-
209
- for (let i = 0; i < modelsToTest.length; i += batchSize) {
210
- const batch = modelsToTest.slice(i, i + batchSize);
211
- const results = await Promise.all(
212
- batch.map(async (m) => {
213
- const ok = await probeOllamaModel(apiKey, m.id);
214
- return { id: m.id, ok };
215
- }),
216
- );
217
- for (const r of results) {
218
- if (!r.ok) notFound.push(r.id);
219
- }
220
- }
221
-
222
- if (notFound.length === 0) {
223
- ctx.ui.notify("All Ollama models are accessible ✅", "info");
224
- return;
225
- }
226
-
227
- // Auto-hide 403 models in config (provider-scoped)
228
- const config = loadConfigFile();
229
- const existingHidden = new Set(config.hidden_models ?? []);
230
- for (const id of notFound) existingHidden.add(`${PROVIDER_OLLAMA}/${id}`);
231
- saveConfig({ hidden_models: Array.from(existingHidden) });
232
-
233
- // Re-register so hidden models disappear immediately
234
- const filtered = await fetchOllamaModels(apiKey);
235
- stored.free = filtered;
236
- stored.all = filtered;
237
- reRegister(filtered);
238
-
239
- ctx.ui.notify(
240
- `Found ${notFound.length} broken models (auto-hidden):\n${notFound.join("\n")}`,
241
- "warning",
242
- );
243
- },
244
- });
245
- }
246
-
247
- /**
248
- * Probe a single Ollama model with a minimal chat request.
249
- * Returns true if the model is accessible (not 403), false if it 403s.
250
- */
251
- async function probeOllamaModel(
252
- apiKey: string,
253
- modelId: string,
254
- ): Promise<boolean> {
255
- try {
256
- const response = await fetchWithTimeout(
257
- `${BASE_URL_OLLAMA}/chat/completions`,
258
- {
259
- method: "POST",
260
- headers: {
261
- Authorization: `Bearer ${apiKey}`,
262
- "Content-Type": "application/json",
263
- "User-Agent": "pi-free-providers",
264
- },
265
- body: JSON.stringify({
266
- model: modelId,
267
- messages: [{ role: "user", content: "hi" }],
268
- max_tokens: 1,
269
- }),
270
- },
271
- 10000, // 10 second timeout
272
- );
273
- // 403 = access denied (model not provisioned)
274
- // 200/400/401/etc = at least accessible
275
- return response.status !== 403;
276
- } catch {
277
- // Network errors / timeouts are not "access denied"
278
- return true;
279
- }
280
- }
1
+ /**
2
+ * Ollama Cloud Provider Extension
3
+ *
4
+ * Provides access to Ollama's cloud-hosted models via ollama.com API.
5
+ * All models use Ollama's usage-based pricing system:
6
+ * - Free tier: Unlimited public models (session limits reset every 5 hours,
7
+ * weekly limits reset every 7 days)
8
+ * - Pro tier: 50x more cloud usage than Free
9
+ * - Max tier: 5x more usage than Pro
10
+ *
11
+ * Requires OLLAMA_API_KEY with cloud access.
12
+ * Get a free key at: https://ollama.com/settings/keys
13
+ *
14
+ * Responds to global free-only filter (shows models but warns they're freemium).
15
+ *
16
+ * Usage:
17
+ * pi install git:github.com/apmantza/pi-free
18
+ * # Set OLLAMA_API_KEY env var
19
+ * # Models appear in /model selector
20
+ * # Use /toggle-ollama to show all vs limited set
21
+ */
22
+
23
+ import type {
24
+ ExtensionAPI,
25
+ ProviderModelConfig,
26
+ } from "@mariozechner/pi-coding-agent";
27
+ import {
28
+ applyHidden,
29
+ getOllamaApiKey,
30
+ getOllamaShowPaid,
31
+ loadConfigFile,
32
+ saveConfig,
33
+ } from "../../config.ts";
34
+ import {
35
+ BASE_URL_OLLAMA,
36
+ DEFAULT_FETCH_TIMEOUT_MS,
37
+ PROVIDER_OLLAMA,
38
+ } from "../../constants.ts";
39
+ import { createLogger } from "../../lib/logger.ts";
40
+ import { registerWithGlobalToggle } from "../../lib/registry.ts";
41
+ import { fetchWithRetry, fetchWithTimeout } from "../../lib/util.ts";
42
+ import { createReRegister, enhanceWithCI } from "../../provider-helper.ts";
43
+
44
+ const _logger = createLogger("ollama-cloud");
45
+
46
+ // =============================================================================
47
+ // Known 403 models (listed but return "access denied" on /v1/chat/completions)
48
+ // These are models that appear in /v1/models but aren't provisioned for chat.
49
+ // Add new IDs here as they surface via /probe-ollama command.
50
+ // =============================================================================
51
+ const OLLAMA_KNOWN_403_MODELS: ReadonlySet<string> = new Set([
52
+ // Example entries - populate via probe-ollama.mjs
53
+ // "model-id-that-403s",
54
+ ]);
55
+
56
+ // =============================================================================
57
+ // Fetch + map
58
+ // =============================================================================
59
+
60
+ async function fetchOllamaModels(
61
+ apiKey: string,
62
+ ): Promise<ProviderModelConfig[]> {
63
+ // Use OpenAI-compatible /v1/models endpoint for consistency
64
+ // The native /api/tags returns :cloud suffixes that may not work with /v1/chat/completions
65
+ const response = await fetchWithRetry(
66
+ `${BASE_URL_OLLAMA}/models`,
67
+ {
68
+ headers: {
69
+ Authorization: `Bearer ${apiKey}`,
70
+ "Content-Type": "application/json",
71
+ },
72
+ },
73
+ 3,
74
+ 1000,
75
+ DEFAULT_FETCH_TIMEOUT_MS,
76
+ );
77
+
78
+ if (!response.ok) {
79
+ throw new Error(
80
+ `Failed to fetch Ollama models: ${response.status} ${response.statusText}`,
81
+ );
82
+ }
83
+
84
+ const json = (await response.json()) as {
85
+ data?: Array<{ id: string; owned_by?: string }>;
86
+ };
87
+ const models = json.data ?? [];
88
+
89
+ _logger.info(
90
+ `[ollama-cloud] Fetched ${models.length} models from Ollama Cloud`,
91
+ );
92
+
93
+ // Filter to chat/text generation models only
94
+ const chatModels = models
95
+ .filter((m) => {
96
+ // Skip embedding-only models (typically have "embed" in name)
97
+ const name = m.id.toLowerCase();
98
+ if (name.includes("embed")) return false;
99
+ return true;
100
+ })
101
+ // Filter out known 403 models (listed but not provisioned for chat)
102
+ .filter((m) => {
103
+ if (OLLAMA_KNOWN_403_MODELS.has(m.id)) {
104
+ return false;
105
+ }
106
+ return true;
107
+ });
108
+
109
+ const result = applyHidden(
110
+ chatModels.map(
111
+ (m): ProviderModelConfig => ({
112
+ id: m.id,
113
+ name: m.id,
114
+ // Try to infer reasoning from model name
115
+ reasoning:
116
+ m.id.toLowerCase().includes("reasoning") ||
117
+ m.id.toLowerCase().includes("r1") ||
118
+ m.id.toLowerCase().includes("thinking"),
119
+ input: ["text"],
120
+ // Ollama Cloud uses usage-based pricing (GPU time), not per-token
121
+ // Free tier has limits but no direct cost per token
122
+ cost: {
123
+ input: 0, // Freemium: usage-based, not per-token
124
+ output: 0,
125
+ cacheRead: 0,
126
+ cacheWrite: 0,
127
+ },
128
+ // Default context window - Ollama doesn't expose this via /v1/models
129
+ contextWindow: 32768,
130
+ maxTokens: 4096, // Default, varies by model
131
+ }),
132
+ ),
133
+ PROVIDER_OLLAMA,
134
+ );
135
+
136
+ return result;
137
+ }
138
+
139
+ // =============================================================================
140
+ // Extension Entry Point
141
+ // =============================================================================
142
+
143
+ export default async function (pi: ExtensionAPI) {
144
+ const apiKey = getOllamaApiKey();
145
+
146
+ if (!apiKey) {
147
+ _logger.info(
148
+ "[ollama-cloud] Skipping - OLLAMA_API_KEY not set (env var or ~/.pi/free.json)",
149
+ );
150
+ return;
151
+ }
152
+
153
+ // Fetch models
154
+ let allModels: ProviderModelConfig[] = [];
155
+
156
+ try {
157
+ allModels = await fetchOllamaModels(apiKey);
158
+ } catch (error) {
159
+ _logger.error("[ollama-cloud] Failed to fetch models at startup", {
160
+ error: error instanceof Error ? error.message : String(error),
161
+ });
162
+ return;
163
+ }
164
+
165
+ // For Ollama, all models share the same free tier
166
+ // So "free" and "all" are the same set
167
+ const freeModels = allModels;
168
+ const stored = { free: freeModels, all: allModels };
169
+ const hasKey = true;
170
+
171
+ // Create re-register function
172
+ const reRegister = createReRegister(pi, {
173
+ providerId: PROVIDER_OLLAMA,
174
+ baseUrl: BASE_URL_OLLAMA,
175
+ apiKey,
176
+ });
177
+
178
+ // Register with global toggle system
179
+ registerWithGlobalToggle(PROVIDER_OLLAMA, stored, reRegister, hasKey);
180
+
181
+ // Register initial models
182
+ const initialModels = getOllamaShowPaid() ? allModels : freeModels;
183
+ pi.registerProvider(PROVIDER_OLLAMA, {
184
+ baseUrl: BASE_URL_OLLAMA,
185
+ apiKey,
186
+ api: "openai-completions" as const,
187
+ models: enhanceWithCI(initialModels),
188
+ });
189
+
190
+ _logger.info(
191
+ `[ollama-cloud] Registered ${initialModels.length} models (usage-based free tier)`,
192
+ );
193
+
194
+ // ── Probe command: test all registered models for 403s ─────────────
195
+ pi.registerCommand("probe-ollama", {
196
+ description: "Test all Ollama Cloud models for 403 'access denied' errors",
197
+ handler: async (_args, ctx) => {
198
+ if (!apiKey) {
199
+ ctx.ui.notify("OLLAMA_API_KEY not set", "error");
200
+ return;
201
+ }
202
+
203
+ const modelsToTest = allModels;
204
+ ctx.ui.notify(`Probing ${modelsToTest.length} Ollama models…`, "info");
205
+
206
+ const notFound: string[] = [];
207
+ const batchSize = 5;
208
+
209
+ for (let i = 0; i < modelsToTest.length; i += batchSize) {
210
+ const batch = modelsToTest.slice(i, i + batchSize);
211
+ const results = await Promise.all(
212
+ batch.map(async (m) => {
213
+ const ok = await probeOllamaModel(apiKey, m.id);
214
+ return { id: m.id, ok };
215
+ }),
216
+ );
217
+ for (const r of results) {
218
+ if (!r.ok) notFound.push(r.id);
219
+ }
220
+ }
221
+
222
+ if (notFound.length === 0) {
223
+ ctx.ui.notify("All Ollama models are accessible ✅", "info");
224
+ return;
225
+ }
226
+
227
+ // Auto-hide 403 models in config (provider-scoped)
228
+ const config = loadConfigFile();
229
+ const existingHidden = new Set(config.hidden_models ?? []);
230
+ for (const id of notFound) existingHidden.add(`${PROVIDER_OLLAMA}/${id}`);
231
+ saveConfig({ hidden_models: Array.from(existingHidden) });
232
+
233
+ // Re-register so hidden models disappear immediately
234
+ const filtered = await fetchOllamaModels(apiKey);
235
+ stored.free = filtered;
236
+ stored.all = filtered;
237
+ reRegister(filtered);
238
+
239
+ ctx.ui.notify(
240
+ `Found ${notFound.length} broken models (auto-hidden):\n${notFound.join("\n")}`,
241
+ "warning",
242
+ );
243
+ },
244
+ });
245
+
246
+ // ── Status bar for provider selection ─────────────────────────
247
+
248
+ pi.on("model_select", (_event, ctx) => {
249
+ if (_event.model?.provider !== PROVIDER_OLLAMA) {
250
+ ctx.ui.setStatus(`${PROVIDER_OLLAMA}-status`, undefined);
251
+ return;
252
+ }
253
+
254
+ const count = allModels.length;
255
+ ctx.ui.setStatus(
256
+ `${PROVIDER_OLLAMA}-status`,
257
+ `ollama: ${count} models (usage-based)`,
258
+ );
259
+ });
260
+ }
261
+
262
+ /**
263
+ * Probe a single Ollama model with a minimal chat request.
264
+ * Returns true if the model is accessible (not 403), false if it 403s.
265
+ */
266
+ async function probeOllamaModel(
267
+ apiKey: string,
268
+ modelId: string,
269
+ ): Promise<boolean> {
270
+ try {
271
+ const response = await fetchWithTimeout(
272
+ `${BASE_URL_OLLAMA}/chat/completions`,
273
+ {
274
+ method: "POST",
275
+ headers: {
276
+ Authorization: `Bearer ${apiKey}`,
277
+ "Content-Type": "application/json",
278
+ "User-Agent": "pi-free-providers",
279
+ },
280
+ body: JSON.stringify({
281
+ model: modelId,
282
+ messages: [{ role: "user", content: "hi" }],
283
+ max_tokens: 1,
284
+ }),
285
+ },
286
+ 10000, // 10 second timeout
287
+ );
288
+ // 403 = access denied (model not provisioned)
289
+ // 200/400/401/etc = at least accessible
290
+ return response.status !== 403;
291
+ } catch {
292
+ // Network errors / timeouts are not "access denied"
293
+ return true;
294
+ }
295
+ }
@@ -1,3 +1,5 @@
1
+ import { randomUUID } from "node:crypto";
2
+
1
3
  /**
2
4
  * Shared OpenCode session/request tracking.
3
5
  *
@@ -9,10 +11,7 @@ export function createOpenCodeSessionTracker() {
9
11
  let requestCount = 0;
10
12
 
11
13
  function generateId(): string {
12
- return (
13
- Math.random().toString(36).substring(2, 15) +
14
- Math.random().toString(36).substring(2, 15)
15
- );
14
+ return randomUUID().replace(/-/g, "");
16
15
  }
17
16
 
18
17
  function getSessionId(): string {