pi-free 2.0.9 → 2.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +576 -544
- package/README.md +16 -0
- package/banner.svg +12 -10
- package/config.ts +86 -20
- package/constants.ts +3 -0
- package/index.ts +3 -0
- package/lib/util.ts +72 -8
- package/package.json +1 -1
- package/providers/crofai/crofai.ts +106 -15
- package/providers/deepinfra/deepinfra.ts +108 -11
- package/providers/ollama/ollama.ts +400 -85
- package/providers/ollama/thinking-levels.ts +96 -0
- package/providers/together/together.ts +197 -0
- package/providers/zenmux/zenmux.ts +32 -17
|
@@ -2,26 +2,24 @@
|
|
|
2
2
|
* Ollama Cloud Provider Extension
|
|
3
3
|
*
|
|
4
4
|
* Provides access to Ollama's cloud-hosted models via ollama.com API.
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
* weekly limits reset every 7 days)
|
|
8
|
-
* - Pro tier: 50x more cloud usage than Free
|
|
9
|
-
* - Max tier: 5x more usage than Pro
|
|
5
|
+
* Fetches per-model capabilities via /api/show for accurate reasoning,
|
|
6
|
+
* vision, and context window detection.
|
|
10
7
|
*
|
|
11
8
|
* Requires OLLAMA_API_KEY with cloud access.
|
|
12
9
|
* Get a free key at: https://ollama.com/settings/keys
|
|
13
10
|
*
|
|
14
|
-
* Responds to global free-only filter (shows models but warns they're freemium).
|
|
15
|
-
*
|
|
16
11
|
* Usage:
|
|
17
12
|
* pi install git:github.com/apmantza/pi-free
|
|
18
13
|
* # Set OLLAMA_API_KEY env var
|
|
19
14
|
* # Models appear in /model selector
|
|
20
15
|
* # Use /toggle-ollama to show all vs limited set
|
|
16
|
+
* # Use /probe-ollama to detect and hide 403 models
|
|
17
|
+
* # Use /ollama-cloud-refresh to re-fetch models live
|
|
21
18
|
*/
|
|
22
19
|
|
|
23
20
|
import type {
|
|
24
21
|
ExtensionAPI,
|
|
22
|
+
ExtensionCommandContext,
|
|
25
23
|
ProviderModelConfig,
|
|
26
24
|
} from "@earendil-works/pi-coding-agent";
|
|
27
25
|
import {
|
|
@@ -37,12 +35,26 @@ import {
|
|
|
37
35
|
PROVIDER_OLLAMA,
|
|
38
36
|
} from "../../constants.ts";
|
|
39
37
|
import { createLogger } from "../../lib/logger.ts";
|
|
38
|
+
import {
|
|
39
|
+
loadProviderCache,
|
|
40
|
+
saveProviderCache,
|
|
41
|
+
} from "../../lib/provider-cache.ts";
|
|
40
42
|
import { registerWithGlobalToggle } from "../../lib/registry.ts";
|
|
41
43
|
import { fetchWithRetry, fetchWithTimeout } from "../../lib/util.ts";
|
|
42
44
|
import { createReRegister, enhanceWithCI } from "../../provider-helper.ts";
|
|
45
|
+
import { resolveThinkingMap } from "./thinking-levels.ts";
|
|
43
46
|
|
|
44
47
|
const _logger = createLogger("ollama-cloud");
|
|
45
48
|
|
|
49
|
+
// =============================================================================
|
|
50
|
+
// Constants
|
|
51
|
+
// =============================================================================
|
|
52
|
+
|
|
53
|
+
/** Base URL for non-OpenAI-compatible endpoints (e.g. /api/show). */
|
|
54
|
+
const OLLAMA_API_BASE = BASE_URL_OLLAMA.replace(/\/v1\/?$/, "");
|
|
55
|
+
const DETAIL_FETCH_TIMEOUT_MS = 10000;
|
|
56
|
+
const DETAIL_CONCURRENCY = 8;
|
|
57
|
+
|
|
46
58
|
// =============================================================================
|
|
47
59
|
// Known 403 models (listed but return "access denied" on /v1/chat/completions)
|
|
48
60
|
// These are models that appear in /v1/models but aren't provisioned for chat.
|
|
@@ -54,14 +66,120 @@ const OLLAMA_KNOWN_403_MODELS: ReadonlySet<string> = new Set([
|
|
|
54
66
|
]);
|
|
55
67
|
|
|
56
68
|
// =============================================================================
|
|
57
|
-
//
|
|
69
|
+
// Fallback models (used when API is unreachable and no cache exists)
|
|
58
70
|
// =============================================================================
|
|
71
|
+
const FALLBACK_MODELS: ProviderModelConfig[] = [
|
|
72
|
+
{
|
|
73
|
+
id: "glm-5.1",
|
|
74
|
+
name: "GLM 5.1",
|
|
75
|
+
reasoning: false,
|
|
76
|
+
input: ["text"],
|
|
77
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
78
|
+
contextWindow: 202752,
|
|
79
|
+
maxTokens: 32768,
|
|
80
|
+
compat: { supportsDeveloperRole: false },
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
id: "gemma4:31b",
|
|
84
|
+
name: "Gemma 4 31B",
|
|
85
|
+
reasoning: false,
|
|
86
|
+
input: ["text"],
|
|
87
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
88
|
+
contextWindow: 262144,
|
|
89
|
+
maxTokens: 32768,
|
|
90
|
+
compat: { supportsDeveloperRole: false },
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
id: "deepseek-v4-pro",
|
|
94
|
+
name: "DeepSeek V4 Pro",
|
|
95
|
+
reasoning: true,
|
|
96
|
+
thinkingLevelMap: resolveThinkingMap("deepseek-v4-pro", [
|
|
97
|
+
"thinking",
|
|
98
|
+
"tools",
|
|
99
|
+
]),
|
|
100
|
+
input: ["text"],
|
|
101
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
102
|
+
contextWindow: 1000000,
|
|
103
|
+
maxTokens: 32768,
|
|
104
|
+
compat: { supportsDeveloperRole: false },
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
id: "qwen3.5",
|
|
108
|
+
name: "Qwen 3.5",
|
|
109
|
+
reasoning: true,
|
|
110
|
+
thinkingLevelMap: resolveThinkingMap("qwen3.5", ["thinking", "tools"]),
|
|
111
|
+
input: ["text"],
|
|
112
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
113
|
+
contextWindow: 131072,
|
|
114
|
+
maxTokens: 32768,
|
|
115
|
+
compat: { supportsDeveloperRole: false },
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
id: "kimi-k2.6",
|
|
119
|
+
name: "Kimi K2.6",
|
|
120
|
+
reasoning: true,
|
|
121
|
+
thinkingLevelMap: resolveThinkingMap("kimi-k2.6", ["thinking", "tools"]),
|
|
122
|
+
input: ["text"],
|
|
123
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
124
|
+
contextWindow: 131072,
|
|
125
|
+
maxTokens: 32768,
|
|
126
|
+
compat: { supportsDeveloperRole: false },
|
|
127
|
+
},
|
|
128
|
+
];
|
|
59
129
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
130
|
+
// =============================================================================
|
|
131
|
+
// Types
|
|
132
|
+
// =============================================================================
|
|
133
|
+
|
|
134
|
+
/** Response from POST /api/show */
|
|
135
|
+
interface OllamaShowResponse {
|
|
136
|
+
details: {
|
|
137
|
+
parent_model: string;
|
|
138
|
+
format: string;
|
|
139
|
+
family: string;
|
|
140
|
+
families: string[] | null;
|
|
141
|
+
parameter_size: string;
|
|
142
|
+
quantization_level: string;
|
|
143
|
+
};
|
|
144
|
+
model_info: Record<string, unknown>;
|
|
145
|
+
capabilities: string[];
|
|
146
|
+
modified_at: string;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// =============================================================================
|
|
150
|
+
// Utility: concurrent map with bounded parallelism
|
|
151
|
+
// =============================================================================
|
|
152
|
+
|
|
153
|
+
async function concurrentMap<T, R>(
|
|
154
|
+
items: T[],
|
|
155
|
+
workers: number,
|
|
156
|
+
fn: (item: T) => Promise<R>,
|
|
157
|
+
): Promise<PromiseSettledResult<R>[]> {
|
|
158
|
+
const results: PromiseSettledResult<R>[] = new Array(items.length);
|
|
159
|
+
let next = 0;
|
|
160
|
+
await Promise.all(
|
|
161
|
+
Array.from({ length: Math.max(1, workers) }, async () => {
|
|
162
|
+
while (next < items.length) {
|
|
163
|
+
const index = next++;
|
|
164
|
+
try {
|
|
165
|
+
results[index] = {
|
|
166
|
+
status: "fulfilled",
|
|
167
|
+
value: await fn(items[index]),
|
|
168
|
+
};
|
|
169
|
+
} catch (reason) {
|
|
170
|
+
results[index] = { status: "rejected", reason };
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}),
|
|
174
|
+
);
|
|
175
|
+
return results;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// =============================================================================
|
|
179
|
+
// Fetch: /v1/models → list of model IDs
|
|
180
|
+
// =============================================================================
|
|
181
|
+
|
|
182
|
+
async function fetchModelIds(apiKey: string): Promise<string[]> {
|
|
65
183
|
const response = await fetchWithRetry(
|
|
66
184
|
`${BASE_URL_OLLAMA}/models`,
|
|
67
185
|
{
|
|
@@ -77,63 +195,188 @@ async function fetchOllamaModels(
|
|
|
77
195
|
|
|
78
196
|
if (!response.ok) {
|
|
79
197
|
throw new Error(
|
|
80
|
-
`Failed to fetch Ollama
|
|
198
|
+
`Failed to fetch Ollama model list: ${response.status} ${response.statusText}`,
|
|
81
199
|
);
|
|
82
200
|
}
|
|
83
201
|
|
|
84
202
|
const json = (await response.json()) as {
|
|
85
203
|
data?: Array<{ id: string; owned_by?: string }>;
|
|
86
204
|
};
|
|
87
|
-
|
|
205
|
+
return (json.data ?? []).map((m) => m.id);
|
|
206
|
+
}
|
|
88
207
|
|
|
89
|
-
|
|
90
|
-
|
|
208
|
+
// =============================================================================
|
|
209
|
+
// Fetch: /api/show → per-model capabilities
|
|
210
|
+
// =============================================================================
|
|
211
|
+
|
|
212
|
+
async function fetchModelDetails(
|
|
213
|
+
apiKey: string,
|
|
214
|
+
modelId: string,
|
|
215
|
+
): Promise<OllamaShowResponse> {
|
|
216
|
+
const response = await fetchWithTimeout(
|
|
217
|
+
`${OLLAMA_API_BASE}/api/show`,
|
|
218
|
+
{
|
|
219
|
+
method: "POST",
|
|
220
|
+
headers: {
|
|
221
|
+
Authorization: `Bearer ${apiKey}`,
|
|
222
|
+
"Content-Type": "application/json",
|
|
223
|
+
},
|
|
224
|
+
body: JSON.stringify({ model: modelId }),
|
|
225
|
+
},
|
|
226
|
+
DETAIL_FETCH_TIMEOUT_MS,
|
|
91
227
|
);
|
|
92
228
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
229
|
+
if (!response.ok) {
|
|
230
|
+
throw new Error(
|
|
231
|
+
`/api/show failed for ${modelId}: ${response.status} ${response.statusText}`,
|
|
232
|
+
);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
return (await response.json()) as OllamaShowResponse;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// =============================================================================
|
|
239
|
+
// Assembly: raw /api/show data → ProviderModelConfig[]
|
|
240
|
+
// =============================================================================
|
|
241
|
+
|
|
242
|
+
function getContextLength(modelInfo: Record<string, unknown>): number {
|
|
243
|
+
for (const [key, value] of Object.entries(modelInfo)) {
|
|
244
|
+
if (key.endsWith(".context_length") && typeof value === "number") {
|
|
245
|
+
return value;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
return 128000; // fallback
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Build a human-readable display name from model ID and details.
|
|
253
|
+
* Enriches with parameter size and quantization when available.
|
|
254
|
+
*/
|
|
255
|
+
function buildModelName(
|
|
256
|
+
id: string,
|
|
257
|
+
details: OllamaShowResponse["details"],
|
|
258
|
+
): string {
|
|
259
|
+
// Convert dashes/colons to spaces for readability
|
|
260
|
+
const base = id.replace(/[:-]/g, " ");
|
|
261
|
+
const parts: string[] = [base];
|
|
262
|
+
|
|
263
|
+
const params = details?.parameter_size;
|
|
264
|
+
const quant = details?.quantization_level;
|
|
265
|
+
|
|
266
|
+
if (params && quant) {
|
|
267
|
+
parts.push(`(${params}, ${quant})`);
|
|
268
|
+
} else if (params) {
|
|
269
|
+
parts.push(`(${params})`);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
return parts.join(" ");
|
|
273
|
+
}
|
|
108
274
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
275
|
+
function assembleModels(
|
|
276
|
+
raw: Record<string, OllamaShowResponse>,
|
|
277
|
+
): ProviderModelConfig[] {
|
|
278
|
+
return Object.entries(raw)
|
|
279
|
+
.filter(([, data]) => data.capabilities?.includes("tools"))
|
|
280
|
+
.map(([id, data]) => {
|
|
281
|
+
const reasoning = data.capabilities?.includes("thinking") ?? false;
|
|
282
|
+
const thinkingMap = resolveThinkingMap(id, data.capabilities ?? []);
|
|
283
|
+
|
|
284
|
+
return {
|
|
285
|
+
id,
|
|
286
|
+
name: buildModelName(id, data.details),
|
|
287
|
+
reasoning,
|
|
288
|
+
thinkingLevelMap: thinkingMap,
|
|
289
|
+
input: (data.capabilities?.includes("vision")
|
|
290
|
+
? ["text", "image"]
|
|
291
|
+
: ["text"]) as ("text" | "image")[],
|
|
122
292
|
cost: {
|
|
123
|
-
input: 0,
|
|
293
|
+
input: 0,
|
|
124
294
|
output: 0,
|
|
125
295
|
cacheRead: 0,
|
|
126
296
|
cacheWrite: 0,
|
|
127
297
|
},
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
298
|
+
contextWindow: getContextLength(data.model_info ?? {}),
|
|
299
|
+
maxTokens: 32768,
|
|
300
|
+
compat: {
|
|
301
|
+
supportsDeveloperRole: false,
|
|
302
|
+
// When we provide a thinkingLevelMap, tell Pi not to use its own
|
|
303
|
+
// reasoning_effort logic — we handle it ourselves.
|
|
304
|
+
supportsReasoningEffort: thinkingMap != null,
|
|
305
|
+
},
|
|
306
|
+
};
|
|
307
|
+
});
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// =============================================================================
|
|
311
|
+
// Fetch all models (orchestrates /v1/models + /api/show)
|
|
312
|
+
// =============================================================================
|
|
313
|
+
|
|
314
|
+
async function fetchAllModels(apiKey: string): Promise<ProviderModelConfig[]> {
|
|
315
|
+
// Step 1: Get model IDs
|
|
316
|
+
const modelIds = await fetchModelIds(apiKey);
|
|
317
|
+
_logger.info(
|
|
318
|
+
`[ollama-cloud] Found ${modelIds.length} model IDs, fetching details...`,
|
|
134
319
|
);
|
|
135
320
|
|
|
136
|
-
|
|
321
|
+
// Step 2: Filter out known-broken and embedding models early
|
|
322
|
+
const candidateIds = modelIds.filter((id) => {
|
|
323
|
+
if (OLLAMA_KNOWN_403_MODELS.has(id)) return false;
|
|
324
|
+
const name = id.toLowerCase();
|
|
325
|
+
if (name.includes("embed")) return false;
|
|
326
|
+
return true;
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
// Step 3: Fetch per-model details concurrently
|
|
330
|
+
let succeeded = 0;
|
|
331
|
+
let failed = 0;
|
|
332
|
+
|
|
333
|
+
const detailResults = await concurrentMap(
|
|
334
|
+
candidateIds,
|
|
335
|
+
DETAIL_CONCURRENCY,
|
|
336
|
+
async (id) => {
|
|
337
|
+
try {
|
|
338
|
+
const result = await fetchModelDetails(apiKey, id);
|
|
339
|
+
succeeded++;
|
|
340
|
+
return [id, result] as const;
|
|
341
|
+
} catch {
|
|
342
|
+
failed++;
|
|
343
|
+
throw new Error(`detail fetch failed for ${id}`);
|
|
344
|
+
} finally {
|
|
345
|
+
if (
|
|
346
|
+
(succeeded + failed) % 10 === 0 ||
|
|
347
|
+
succeeded + failed === candidateIds.length
|
|
348
|
+
) {
|
|
349
|
+
_logger.debug(
|
|
350
|
+
`[ollama-cloud] Detail progress: ${succeeded + failed}/${candidateIds.length} (${failed} failed)`,
|
|
351
|
+
);
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
},
|
|
355
|
+
);
|
|
356
|
+
|
|
357
|
+
// Step 4: Collect successful results
|
|
358
|
+
const raw: Record<string, OllamaShowResponse> = {};
|
|
359
|
+
for (const result of detailResults) {
|
|
360
|
+
if (result.status === "fulfilled") {
|
|
361
|
+
const [id, data] = result.value;
|
|
362
|
+
raw[id] = data;
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
_logger.info(
|
|
367
|
+
`[ollama-cloud] Fetched ${Object.keys(raw).length} model details` +
|
|
368
|
+
(failed ? ` (${failed} failed)` : ""),
|
|
369
|
+
);
|
|
370
|
+
|
|
371
|
+
if (Object.keys(raw).length === 0) {
|
|
372
|
+
throw new Error("Failed to fetch any model details");
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
// Step 5: Assemble into Pi model configs
|
|
376
|
+
const models = assembleModels(raw);
|
|
377
|
+
|
|
378
|
+
// Step 6: Apply user-configured hidden models
|
|
379
|
+
return applyHidden(models, PROVIDER_OLLAMA);
|
|
137
380
|
}
|
|
138
381
|
|
|
139
382
|
// =============================================================================
|
|
@@ -150,35 +393,35 @@ export default async function ollamaProvider(pi: ExtensionAPI) {
|
|
|
150
393
|
return;
|
|
151
394
|
}
|
|
152
395
|
|
|
153
|
-
//
|
|
154
|
-
let allModels: ProviderModelConfig[]
|
|
396
|
+
// ── Try cache first for fast startup ────────────────────────────
|
|
397
|
+
let allModels: ProviderModelConfig[];
|
|
398
|
+
let fromCache = false;
|
|
155
399
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
400
|
+
const cachedModels = loadProviderCache(PROVIDER_OLLAMA);
|
|
401
|
+
if (cachedModels && cachedModels.length > 0) {
|
|
402
|
+
allModels = cachedModels;
|
|
403
|
+
fromCache = true;
|
|
404
|
+
_logger.info(
|
|
405
|
+
`[ollama-cloud] Using ${cachedModels.length} cached models for fast startup`,
|
|
406
|
+
);
|
|
407
|
+
} else {
|
|
408
|
+
allModels = FALLBACK_MODELS;
|
|
409
|
+
_logger.info("[ollama-cloud] No cache available, using fallback models");
|
|
163
410
|
}
|
|
164
411
|
|
|
165
|
-
//
|
|
166
|
-
// So "free" and "all" are the same set
|
|
412
|
+
// ── Register immediately with cached/fallback models ────────────
|
|
167
413
|
const freeModels = allModels;
|
|
168
|
-
|
|
414
|
+
let stored = { free: freeModels, all: allModels };
|
|
169
415
|
const hasKey = true;
|
|
170
416
|
|
|
171
|
-
// Create re-register function
|
|
172
417
|
const reRegister = createReRegister(pi, {
|
|
173
418
|
providerId: PROVIDER_OLLAMA,
|
|
174
419
|
baseUrl: BASE_URL_OLLAMA,
|
|
175
420
|
apiKey,
|
|
176
421
|
});
|
|
177
422
|
|
|
178
|
-
// Register with global toggle system
|
|
179
423
|
registerWithGlobalToggle(PROVIDER_OLLAMA, stored, reRegister, hasKey);
|
|
180
424
|
|
|
181
|
-
// Register initial models
|
|
182
425
|
const initialModels = getOllamaShowPaid() ? allModels : freeModels;
|
|
183
426
|
pi.registerProvider(PROVIDER_OLLAMA, {
|
|
184
427
|
baseUrl: BASE_URL_OLLAMA,
|
|
@@ -188,13 +431,55 @@ export default async function ollamaProvider(pi: ExtensionAPI) {
|
|
|
188
431
|
});
|
|
189
432
|
|
|
190
433
|
_logger.info(
|
|
191
|
-
`[ollama-cloud] Registered ${initialModels.length} models
|
|
434
|
+
`[ollama-cloud] Registered ${initialModels.length} models` +
|
|
435
|
+
(fromCache ? " (from cache)" : " (fallback)") +
|
|
436
|
+
", fetching fresh in background...",
|
|
192
437
|
);
|
|
193
438
|
|
|
194
|
-
// ──
|
|
439
|
+
// ── Background refresh ─────────────────────────────────────────
|
|
440
|
+
async function refreshModels(): Promise<ProviderModelConfig[]> {
|
|
441
|
+
try {
|
|
442
|
+
const freshModels = await fetchAllModels(apiKey!);
|
|
443
|
+
saveProviderCache(PROVIDER_OLLAMA, freshModels);
|
|
444
|
+
return freshModels;
|
|
445
|
+
} catch (error) {
|
|
446
|
+
_logger.error("[ollama-cloud] Background refresh failed", {
|
|
447
|
+
error: error instanceof Error ? error.message : String(error),
|
|
448
|
+
});
|
|
449
|
+
// Return current models so we don't lose what we have
|
|
450
|
+
return allModels;
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// ── /ollama-cloud-refresh command ───────────────────────────────
|
|
455
|
+
pi.registerCommand("ollama-cloud-refresh", {
|
|
456
|
+
description:
|
|
457
|
+
"Re-fetch Ollama Cloud models from the API and update the provider live",
|
|
458
|
+
handler: async (_args: string, ctx: ExtensionCommandContext) => {
|
|
459
|
+
ctx.ui.notify("Refreshing Ollama Cloud models…", "info");
|
|
460
|
+
try {
|
|
461
|
+
const fresh = await fetchAllModels(apiKey!);
|
|
462
|
+
saveProviderCache(PROVIDER_OLLAMA, fresh);
|
|
463
|
+
allModels = fresh;
|
|
464
|
+
stored = { free: fresh, all: fresh };
|
|
465
|
+
reRegister(fresh);
|
|
466
|
+
ctx.ui.notify(
|
|
467
|
+
`Registered ${fresh.length} Ollama Cloud models (refresh complete)`,
|
|
468
|
+
"info",
|
|
469
|
+
);
|
|
470
|
+
} catch (error) {
|
|
471
|
+
ctx.ui.notify(
|
|
472
|
+
`Refresh failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
473
|
+
"error",
|
|
474
|
+
);
|
|
475
|
+
}
|
|
476
|
+
},
|
|
477
|
+
});
|
|
478
|
+
|
|
479
|
+
// ── /probe-ollama command ───────────────────────────────────────
|
|
195
480
|
pi.registerCommand("probe-ollama", {
|
|
196
481
|
description: "Test all Ollama Cloud models for 403 'access denied' errors",
|
|
197
|
-
handler: async (_args, ctx) => {
|
|
482
|
+
handler: async (_args: string, ctx: ExtensionCommandContext) => {
|
|
198
483
|
if (!apiKey) {
|
|
199
484
|
ctx.ui.notify("OLLAMA_API_KEY not set", "error");
|
|
200
485
|
return;
|
|
@@ -228,13 +513,21 @@ export default async function ollamaProvider(pi: ExtensionAPI) {
|
|
|
228
513
|
const config = loadConfigFile();
|
|
229
514
|
const existingHidden = new Set(config.hidden_models ?? []);
|
|
230
515
|
for (const id of notFound) existingHidden.add(`${PROVIDER_OLLAMA}/${id}`);
|
|
231
|
-
saveConfig({
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
516
|
+
saveConfig({
|
|
517
|
+
hidden_models: Array.from(existingHidden),
|
|
518
|
+
});
|
|
519
|
+
|
|
520
|
+
// Re-fetch and re-register so hidden models disappear immediately
|
|
521
|
+
try {
|
|
522
|
+
const fresh = await fetchAllModels(apiKey!);
|
|
523
|
+
saveProviderCache(PROVIDER_OLLAMA, fresh);
|
|
524
|
+
allModels = fresh;
|
|
525
|
+
stored = { free: fresh, all: fresh };
|
|
526
|
+
reRegister(fresh);
|
|
527
|
+
} catch {
|
|
528
|
+
// If refresh fails, just re-register current models
|
|
529
|
+
reRegister(allModels);
|
|
530
|
+
}
|
|
238
531
|
|
|
239
532
|
ctx.ui.notify(
|
|
240
533
|
`Found ${notFound.length} broken models (auto-hidden):\n${notFound.join("\n")}`,
|
|
@@ -243,22 +536,44 @@ export default async function ollamaProvider(pi: ExtensionAPI) {
|
|
|
243
536
|
},
|
|
244
537
|
});
|
|
245
538
|
|
|
246
|
-
// ── Status bar for provider selection
|
|
539
|
+
// ── Status bar for provider selection ───────────────────────────
|
|
247
540
|
|
|
248
|
-
|
|
541
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
542
|
+
pi.on("model_select" as any, (_event: any, ctx: any) => {
|
|
249
543
|
if (_event.model?.provider !== PROVIDER_OLLAMA) {
|
|
250
544
|
ctx.ui.setStatus(`${PROVIDER_OLLAMA}-status`, undefined);
|
|
251
545
|
return;
|
|
252
546
|
}
|
|
253
547
|
|
|
254
548
|
const count = allModels.length;
|
|
255
|
-
ctx.ui.setStatus(
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
549
|
+
ctx.ui.setStatus(`${PROVIDER_OLLAMA}-status`, `ollama: ${count} models`);
|
|
550
|
+
});
|
|
551
|
+
|
|
552
|
+
// ── Background refresh on session_start ─────────────────────────
|
|
553
|
+
let bgRefreshed = false;
|
|
554
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
555
|
+
pi.on("session_start" as any, async (_event: any, ctx: any) => {
|
|
556
|
+
if (bgRefreshed) {
|
|
557
|
+
return;
|
|
558
|
+
}
|
|
559
|
+
bgRefreshed = true;
|
|
560
|
+
|
|
561
|
+
try {
|
|
562
|
+
const fresh = await refreshModels();
|
|
563
|
+
allModels = fresh;
|
|
564
|
+
stored = { free: fresh, all: fresh };
|
|
565
|
+
reRegister(fresh);
|
|
566
|
+
ctx.ui.notify(`Ollama Cloud: ${fresh.length} models ready`, "info");
|
|
567
|
+
} catch {
|
|
568
|
+
// Already logged in refreshModels()
|
|
569
|
+
}
|
|
259
570
|
});
|
|
260
571
|
}
|
|
261
572
|
|
|
573
|
+
// =============================================================================
|
|
574
|
+
// Probe helper
|
|
575
|
+
// =============================================================================
|
|
576
|
+
|
|
262
577
|
/**
|
|
263
578
|
* Probe a single Ollama model with a minimal chat request.
|
|
264
579
|
* Returns true if the model is accessible (not 403), false if it 403s.
|
|
@@ -283,7 +598,7 @@ async function probeOllamaModel(
|
|
|
283
598
|
max_tokens: 1,
|
|
284
599
|
}),
|
|
285
600
|
},
|
|
286
|
-
10000,
|
|
601
|
+
10000,
|
|
287
602
|
);
|
|
288
603
|
// 403 = access denied (model not provisioned)
|
|
289
604
|
// 200/400/401/etc = at least accessible
|