pi-free 2.0.11 → 2.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,205 @@
1
+ /**
2
+ * Novita AI Provider Extension
3
+ *
4
+ * Novita AI deploys 100+ open-source models with an OpenAI-compatible API.
5
+ * Known for competitive pricing, globally distributed GPU infrastructure,
6
+ * and support for chat, vision, and Anthropic-compatible endpoints.
7
+ *
8
+ * API: https://api.novita.ai/openai/v1
9
+ * Models: /v1/models returns non-standard pricing fields (input_token_price_per_m,
10
+ * output_token_price_per_m) plus rich metadata (context_size, max_output_tokens,
11
+ * features for reasoning, input_modalities for vision).
12
+ *
13
+ * Setup:
14
+ * 1. Sign up at https://novita.ai
15
+ * 2. Get API key from dashboard
16
+ * 3. Set NOVITA_API_KEY env var or add to ~/.pi/free.json
17
+ *
18
+ * Usage:
19
+ * pi install git:github.com/apmantza/pi-free
20
+ * # Set NOVITA_API_KEY env var
21
+ * # Models appear in /model selector
22
+ */
23
+
24
+ import type {
25
+ ExtensionAPI,
26
+ ProviderModelConfig,
27
+ } from "@earendil-works/pi-coding-agent";
28
+ import { getNovitaApiKey, getNovitaShowPaid } from "../../config.ts";
29
+ import {
30
+ BASE_URL_NOVITA,
31
+ DEFAULT_FETCH_TIMEOUT_MS,
32
+ PROVIDER_NOVITA,
33
+ } from "../../constants.ts";
34
+ import { createLogger } from "../../lib/logger.ts";
35
+ import {
36
+ getProxyModelCompat,
37
+ isLikelyReasoningModel,
38
+ } from "../../lib/provider-compat.ts";
39
+ import { isFreeModel, registerWithGlobalToggle } from "../../lib/registry.ts";
40
+ import { fetchWithRetry } from "../../lib/util.ts";
41
+ import { createReRegister, setupProvider } from "../../provider-helper.ts";
42
+
43
+ const _logger = createLogger("novita");
44
+
45
+ // =============================================================================
46
+ // Types
47
+ // =============================================================================
48
+
49
+ interface NovitaModel {
50
+ id: string;
51
+ display_name?: string;
52
+ description?: string;
53
+ input_token_price_per_m?: number;
54
+ output_token_price_per_m?: number;
55
+ context_size?: number;
56
+ max_output_tokens?: number;
57
+ features?: string[];
58
+ input_modalities?: string[];
59
+ output_modalities?: string[];
60
+ model_type?: string;
61
+ endpoints?: string[];
62
+ status?: number;
63
+ }
64
+
65
+ // =============================================================================
66
+ // Fetch
67
+ // =============================================================================
68
+
69
+ async function fetchNovitaModels(
70
+ apiKey: string,
71
+ ): Promise<ProviderModelConfig[]> {
72
+ _logger.info("[novita] Fetching models from Novita API...");
73
+
74
+ try {
75
+ const response = await fetchWithRetry(
76
+ `${BASE_URL_NOVITA}/models`,
77
+ {
78
+ headers: {
79
+ Authorization: `Bearer ${apiKey}`,
80
+ "Content-Type": "application/json",
81
+ },
82
+ },
83
+ 3,
84
+ 1000,
85
+ DEFAULT_FETCH_TIMEOUT_MS,
86
+ );
87
+
88
+ if (!response.ok) {
89
+ throw new Error(`Novita API error: ${response.status}`);
90
+ }
91
+
92
+ const json = (await response.json()) as { data?: NovitaModel[] };
93
+ const models = (json.data ?? []).filter(
94
+ (m) => m.status === 1 && m.model_type === "chat",
95
+ );
96
+
97
+ _logger.info(`[novita] Fetched ${models.length} models`);
98
+
99
+ return models.map((m): ProviderModelConfig => {
100
+ const name = m.display_name || m.id.split("/").pop() || m.id;
101
+ const reasoning =
102
+ (m.features ?? []).includes("reasoning") ||
103
+ isLikelyReasoningModel({ id: m.id, name });
104
+ const hasVision = m.input_modalities?.includes("image") ?? false;
105
+
106
+ // Novita pricing is per-MILLION tokens. Divide for per-token (Pi convention).
107
+ const inputCost = (m.input_token_price_per_m ?? 0) / 1_000_000;
108
+ const outputCost = (m.output_token_price_per_m ?? 0) / 1_000_000;
109
+ const hasPricing =
110
+ m.input_token_price_per_m !== undefined ||
111
+ m.output_token_price_per_m !== undefined;
112
+
113
+ return {
114
+ id: m.id,
115
+ name,
116
+ reasoning,
117
+ input: hasVision ? ["text", "image"] : ["text"],
118
+ cost: {
119
+ input: inputCost,
120
+ output: outputCost,
121
+ cacheRead: 0,
122
+ cacheWrite: 0,
123
+ },
124
+ contextWindow: m.context_size ?? 128_000,
125
+ maxTokens: m.max_output_tokens ?? 16_384,
126
+ compat: getProxyModelCompat({ id: m.id, name }),
127
+ _pricingKnown: hasPricing,
128
+ } as ProviderModelConfig & { _pricingKnown?: boolean };
129
+ });
130
+ } catch (error) {
131
+ _logger.error("[novita] Failed to fetch models:", {
132
+ error: error instanceof Error ? error.message : String(error),
133
+ });
134
+ return [];
135
+ }
136
+ }
137
+
138
+ // =============================================================================
139
+ // Extension Entry Point
140
+ // =============================================================================
141
+
142
+ export default async function novitaProvider(pi: ExtensionAPI) {
143
+ const apiKey = getNovitaApiKey();
144
+
145
+ if (!apiKey) {
146
+ _logger.info(
147
+ "[novita] Skipping — NOVITA_API_KEY not set. Sign up at https://novita.ai/",
148
+ );
149
+ return;
150
+ }
151
+
152
+ // Fetch models
153
+ const allModels = await fetchNovitaModels(apiKey);
154
+
155
+ if (allModels.length === 0) {
156
+ _logger.warn("[novita] No chat models available");
157
+ return;
158
+ }
159
+
160
+ // Use isFreeModel with allModels for proper detection
161
+ // Novita returns pricing for all models → _pricingKnown=true → Route A OR logic
162
+ const freeModels = allModels.filter((m) =>
163
+ isFreeModel({ ...m, provider: PROVIDER_NOVITA }, allModels),
164
+ );
165
+
166
+ const stored = { free: freeModels, all: allModels };
167
+
168
+ _logger.info(
169
+ `[novita] Registered ${allModels.length} models (${freeModels.length} free)`,
170
+ );
171
+
172
+ // Create re-register function
173
+ const reRegister = createReRegister(pi, {
174
+ providerId: PROVIDER_NOVITA,
175
+ baseUrl: BASE_URL_NOVITA,
176
+ apiKey,
177
+ });
178
+
179
+ // Register with global toggle
180
+ registerWithGlobalToggle(PROVIDER_NOVITA, stored, reRegister, true);
181
+
182
+ // Setup provider with toggle command
183
+ setupProvider(
184
+ pi,
185
+ {
186
+ providerId: PROVIDER_NOVITA,
187
+ initialShowPaid: getNovitaShowPaid(),
188
+ tosUrl: "https://novita.ai/terms",
189
+ reRegister: (models, _stored) => {
190
+ if (_stored) {
191
+ stored.free = _stored.free;
192
+ stored.all = _stored.all;
193
+ }
194
+ reRegister(models);
195
+ },
196
+ },
197
+ stored,
198
+ );
199
+
200
+ // Initial registration — respect persisted toggle state
201
+ const showPaid = getNovitaShowPaid();
202
+ const initialModels =
203
+ showPaid && stored.all.length > 0 ? stored.all : freeModels;
204
+ reRegister(initialModels);
205
+ }
@@ -31,7 +31,7 @@ import {
31
31
  URL_MODELS_DEV,
32
32
  } from "../../constants.ts";
33
33
  import { createLogger } from "../../lib/logger.ts";
34
- import { isFreeModel, registerWithGlobalToggle } from "../../lib/registry.ts";
34
+ import { registerWithGlobalToggle } from "../../lib/registry.ts";
35
35
  import type { ModelsDevModel, ModelsDevProvider } from "../../lib/types.ts";
36
36
  import {
37
37
  fetchWithRetry,
@@ -382,11 +382,9 @@ export default async function nvidiaProvider(pi: ExtensionAPI) {
382
382
  return;
383
383
  }
384
384
 
385
- // Store both sets for global toggle using consistent isFreeModel helper
386
- // NVIDIA uses Route B (name-based): only models with "free" in name are marked free
387
- const freeModels = allModels.filter((m) =>
388
- isFreeModel({ ...m, provider: PROVIDER_NVIDIA }),
389
- );
385
+ // All NVIDIA NIM models are accessible via free credits (no payment method required).
386
+ // Same approach as Codestral/Ollama: all models shown as free-tier.
387
+ const freeModels = allModels;
390
388
  const stored = { free: freeModels, all: allModels };
391
389
 
392
390
  // Create re-register function
@@ -1,33 +1,371 @@
1
- import { randomUUID } from "node:crypto";
2
-
3
- /**
4
- * Shared OpenCode session/request tracking.
5
- *
6
- * OpenCode endpoints appear to behave more reliably when a stable session id
7
- * is included across requests in the same Pi session.
8
- */
9
- export function createOpenCodeSessionTracker() {
10
- let sessionId = "";
11
- let requestCount = 0;
12
-
13
- function generateId(): string {
14
- return randomUUID().replaceAll("-", "");
15
- }
16
-
17
- function getSessionId(): string {
18
- if (!sessionId) {
19
- sessionId = generateId();
20
- }
21
- return sessionId;
22
- }
23
-
24
- function nextRequestId(): string {
25
- requestCount++;
26
- return `${getSessionId()}-${requestCount}`;
27
- }
28
-
29
- return {
30
- getSessionId,
31
- nextRequestId,
32
- };
33
- }
1
+ import { existsSync, lstatSync, readFileSync } from "node:fs";
2
+ import { basename, dirname, join } from "node:path";
3
+ import { randomBytes } from "node:crypto";
4
+ import { createRequire } from "node:module";
5
+ import { pathToFileURL } from "node:url";
6
+ import type {
7
+ Api,
8
+ AssistantMessage,
9
+ AssistantMessageEvent,
10
+ AssistantMessageEventStream,
11
+ Context,
12
+ Model,
13
+ SimpleStreamOptions,
14
+ } from "@earendil-works/pi-ai";
15
+ import type { ProviderConfig } from "@earendil-works/pi-coding-agent";
16
+
17
+ export const OPENCODE_DYNAMIC_API = "opencode-dynamic" as const;
18
+
19
+ export const OPENCODE_STATIC_HEADERS = {
20
+ "User-Agent": "opencode/1.15.5",
21
+ "x-opencode-client": "cli",
22
+ } as const;
23
+
24
+ /**
25
+ * OpenCode-native identifier generation.
26
+ *
27
+ * OpenCode's server uses checkHeaders to distinguish native CLI requests from
28
+ * third-party clients. Native identifiers use ULID-style prefixes:
29
+ *
30
+ * Session: ses_<hex><base62> (e.g. ses_a1b2c3d4e5f6g7h8i9j0k1l2m3n4)
31
+ * Request: msg_<hex><base62> (e.g. msg_01KA1B2C3D4E5F6G7H8I9J0K1L2M)
32
+ *
33
+ * If the server does not see the expected prefix it applies a fallback rate
34
+ * limit (~2 req/day) which causes models to "freeze" after a few prompts.
35
+ */
36
+ function generateOpenCodeId(prefix: string): string {
37
+ // Timestamp in ms as big-endian hex (matches ULID-style sortability).
38
+ const ms = BigInt(Date.now());
39
+ const timeHex = ms.toString(16).padStart(12, "0");
40
+ // Random suffix (crypto) encoded as base62 for compactness.
41
+ const randomLen = 14;
42
+ const base62Chars =
43
+ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
44
+ const bytes = randomBytes(randomLen);
45
+ let suffix = "";
46
+ for (let i = 0; i < randomLen; i++) {
47
+ suffix += base62Chars[bytes[i] % 62];
48
+ }
49
+ return `${prefix}${timeHex}${suffix}`;
50
+ }
51
+
52
+ /**
53
+ * Shared OpenCode session/request tracking.
54
+ *
55
+ * OpenCode endpoints require native-format identifiers (ses_ / msg_ prefix)
56
+ * to receive the full daily rate limit. Without matching prefixes the server
57
+ * falls back to a ~2 req/day limit, causing free models to freeze after a
58
+ * couple of prompts.
59
+ */
60
+ export function createOpenCodeSessionTracker() {
61
+ let sessionId = "";
62
+
63
+ function getSessionId(): string {
64
+ if (!sessionId) {
65
+ sessionId = generateOpenCodeId("ses_");
66
+ }
67
+ return sessionId;
68
+ }
69
+
70
+ function nextRequestId(): string {
71
+ return generateOpenCodeId("msg_");
72
+ }
73
+
74
+ return {
75
+ getSessionId,
76
+ nextRequestId,
77
+ };
78
+ }
79
+
80
+ export type OpenCodeSessionTracker = ReturnType<
81
+ typeof createOpenCodeSessionTracker
82
+ >;
83
+
84
+ export function createOpenCodeHeaders(
85
+ tracker: OpenCodeSessionTracker,
86
+ existingHeaders?: Record<string, string>,
87
+ ): Record<string, string> {
88
+ return {
89
+ ...existingHeaders,
90
+ ...OPENCODE_STATIC_HEADERS,
91
+ "x-opencode-session": tracker.getSessionId(),
92
+ "x-opencode-request": tracker.nextRequestId(),
93
+ };
94
+ }
95
+
96
+ export function isOpenCodeProvider(providerId: string): boolean {
97
+ return providerId === "opencode" || providerId === "opencode-go";
98
+ }
99
+
100
+ function stripTrailingSlashes(value: string): string {
101
+ let end = value.length;
102
+ while (end > 0 && value.codePointAt(end - 1) === 47) {
103
+ end--;
104
+ }
105
+ return value.slice(0, end);
106
+ }
107
+
108
+ function isAnthropicOpenCodeEndpoint(model: Model<Api>): boolean {
109
+ return !stripTrailingSlashes(model.baseUrl).endsWith("/v1");
110
+ }
111
+
112
+ type StreamSimpleFn<TApi extends Api> = (
113
+ model: Model<TApi>,
114
+ context: Context,
115
+ options?: SimpleStreamOptions,
116
+ ) => AssistantMessageEventStream;
117
+
118
+ type AnthropicStreamModule = {
119
+ streamSimpleAnthropic: StreamSimpleFn<"anthropic-messages">;
120
+ };
121
+
122
+ type OpenAICompletionsStreamModule = {
123
+ streamSimpleOpenAICompletions: StreamSimpleFn<"openai-completions">;
124
+ };
125
+
126
+ const piAiSubpathCache = new Map<string, Promise<unknown>>();
127
+
128
+ async function importPiAiSubpath<T>(subpath: string): Promise<T> {
129
+ const specifier = `@earendil-works/pi-ai/${subpath}`;
130
+ const cached = piAiSubpathCache.get(specifier) as Promise<T> | undefined;
131
+ if (cached) return cached;
132
+
133
+ const promise = importPiAiSubpathUncached<T>(specifier);
134
+ piAiSubpathCache.set(specifier, promise);
135
+ return promise;
136
+ }
137
+
138
+ async function importPiAiSubpathUncached<T>(specifier: string): Promise<T> {
139
+ try {
140
+ return (await import(specifier)) as T;
141
+ } catch (directError) {
142
+ const resolved = resolvePiAiSubpathFromPackage(specifier);
143
+ if (!resolved) throw directError;
144
+ try {
145
+ return (await import(pathToFileURL(resolved).href)) as T;
146
+ } catch {
147
+ throw directError;
148
+ }
149
+ }
150
+ }
151
+
152
+ const PI_AI_DEPENDENCY_CANARY = "openai";
153
+
154
+ function findPiAiPackageDir(requireBase: string): string | undefined {
155
+ try {
156
+ const require = createRequire(requireBase);
157
+ const resolved = require.resolve(PI_AI_DEPENDENCY_CANARY);
158
+ let dir = dirname(resolved);
159
+ while (dir !== dirname(dir)) {
160
+ if (basename(dir) === "node_modules") {
161
+ const piAiDir = join(dir, "@earendil-works", "pi-ai");
162
+ const pkgJsonPath = join(piAiDir, "package.json");
163
+ if (existsSync(pkgJsonPath) && lstatSync(pkgJsonPath).isFile()) {
164
+ return piAiDir;
165
+ }
166
+ }
167
+ dir = dirname(dir);
168
+ }
169
+ } catch {
170
+ // Resolution failed — try the next base.
171
+ }
172
+ return undefined;
173
+ }
174
+
175
+ function resolvePiAiSubpathFromPackage(specifier: string): string | undefined {
176
+ const subpath = specifier.replace("@earendil-works/pi-ai/", "");
177
+ const candidates = [process.argv[1], import.meta.url].filter(
178
+ (value): value is string => Boolean(value),
179
+ );
180
+
181
+ for (const candidate of candidates) {
182
+ const pkgDir = findPiAiPackageDir(candidate);
183
+ if (!pkgDir) continue;
184
+ try {
185
+ const pkg = JSON.parse(
186
+ readFileSync(join(pkgDir, "package.json"), "utf-8"),
187
+ );
188
+ const exportEntry = pkg.exports?.[`./${subpath}`];
189
+ const targetPath = exportEntry?.import ?? exportEntry?.default;
190
+ if (typeof targetPath === "string") {
191
+ return join(pkgDir, targetPath);
192
+ }
193
+ } catch {
194
+ // Try the next resolution base.
195
+ }
196
+ }
197
+
198
+ return undefined;
199
+ }
200
+
201
+ class DeferredAssistantMessageEventStream {
202
+ private queue: AssistantMessageEvent[] = [];
203
+ private waiting: Array<
204
+ (result: IteratorResult<AssistantMessageEvent>) => void
205
+ > = [];
206
+ private done = false;
207
+ private resolveResult!: (message: AssistantMessage) => void;
208
+ private readonly finalResultPromise: Promise<AssistantMessage>;
209
+
210
+ constructor() {
211
+ this.finalResultPromise = new Promise((resolve) => {
212
+ this.resolveResult = resolve;
213
+ });
214
+ }
215
+
216
+ push(event: AssistantMessageEvent): void {
217
+ if (this.done) return;
218
+
219
+ if (event.type === "done" || event.type === "error") {
220
+ this.done = true;
221
+ this.resolveResult(event.type === "done" ? event.message : event.error);
222
+ }
223
+
224
+ const waiter = this.waiting.shift();
225
+ if (waiter) {
226
+ waiter({ value: event, done: false });
227
+ } else {
228
+ this.queue.push(event);
229
+ }
230
+ }
231
+
232
+ end(result?: AssistantMessage): void {
233
+ if (this.done) return;
234
+ this.done = true;
235
+ if (result) this.resolveResult(result);
236
+ while (this.waiting.length > 0) {
237
+ this.waiting.shift()?.({ value: undefined, done: true });
238
+ }
239
+ }
240
+
241
+ async *[Symbol.asyncIterator](): AsyncIterator<AssistantMessageEvent> {
242
+ while (true) {
243
+ if (this.queue.length > 0) {
244
+ yield this.queue.shift()!;
245
+ } else if (this.done) {
246
+ return;
247
+ } else {
248
+ const result = await new Promise<IteratorResult<AssistantMessageEvent>>(
249
+ (resolve) => this.waiting.push(resolve),
250
+ );
251
+ if (result.done) return;
252
+ yield result.value;
253
+ }
254
+ }
255
+ }
256
+
257
+ result(): Promise<AssistantMessage> {
258
+ return this.finalResultPromise;
259
+ }
260
+ }
261
+
262
+ function createErrorMessage(
263
+ model: Model<Api>,
264
+ error: unknown,
265
+ ): AssistantMessage {
266
+ const message = error instanceof Error ? error.message : String(error);
267
+ return {
268
+ role: "assistant",
269
+ content: [],
270
+ api: model.api,
271
+ provider: model.provider,
272
+ model: model.id,
273
+ usage: {
274
+ input: 0,
275
+ output: 0,
276
+ cacheRead: 0,
277
+ cacheWrite: 0,
278
+ totalTokens: 0,
279
+ cost: {
280
+ input: 0,
281
+ output: 0,
282
+ cacheRead: 0,
283
+ cacheWrite: 0,
284
+ total: 0,
285
+ },
286
+ },
287
+ stopReason: "error",
288
+ errorMessage: message,
289
+ timestamp: Date.now(),
290
+ };
291
+ }
292
+
293
+ async function pipeStream(
294
+ stream: DeferredAssistantMessageEventStream,
295
+ upstream: AssistantMessageEventStream,
296
+ ): Promise<void> {
297
+ let finalMessage: AssistantMessage | undefined;
298
+ try {
299
+ for await (const event of upstream) {
300
+ stream.push(event);
301
+ if (event.type === "done") finalMessage = event.message;
302
+ if (event.type === "error") finalMessage = event.error;
303
+ }
304
+ stream.end(finalMessage ?? (await upstream.result()));
305
+ } catch (error) {
306
+ if (finalMessage) {
307
+ stream.end(finalMessage);
308
+ } else {
309
+ throw error;
310
+ }
311
+ }
312
+ }
313
+
314
+ /**
315
+ * Pi's static model headers are evaluated at registration time. OpenCode treats
316
+ * x-opencode-request like a per-request id, so reusing one value across turns can
317
+ * leave later requests attached to an old/in-flight generation. Registering a
318
+ * provider-specific stream keeps the normal Pi parsers but refreshes headers for
319
+ * every LLM call.
320
+ */
321
+ export function createOpenCodeStreamSimple(
322
+ tracker: OpenCodeSessionTracker,
323
+ ): NonNullable<ProviderConfig["streamSimple"]> {
324
+ return (model, context, options) => {
325
+ const headers = createOpenCodeHeaders(tracker, options?.headers);
326
+ const stream = new DeferredAssistantMessageEventStream();
327
+
328
+ void (async () => {
329
+ try {
330
+ if (isAnthropicOpenCodeEndpoint(model)) {
331
+ const { streamSimpleAnthropic } =
332
+ await importPiAiSubpath<AnthropicStreamModule>("anthropic");
333
+ await pipeStream(
334
+ stream,
335
+ streamSimpleAnthropic(
336
+ {
337
+ ...model,
338
+ api: "anthropic-messages",
339
+ } as Model<"anthropic-messages">,
340
+ context,
341
+ { ...options, headers },
342
+ ),
343
+ );
344
+ return;
345
+ }
346
+
347
+ const { streamSimpleOpenAICompletions } =
348
+ await importPiAiSubpath<OpenAICompletionsStreamModule>(
349
+ "openai-completions",
350
+ );
351
+ await pipeStream(
352
+ stream,
353
+ streamSimpleOpenAICompletions(
354
+ {
355
+ ...model,
356
+ api: "openai-completions",
357
+ } as Model<"openai-completions">,
358
+ context,
359
+ { ...options, headers },
360
+ ),
361
+ );
362
+ } catch (error) {
363
+ const errorMessage = createErrorMessage(model, error);
364
+ stream.push({ type: "start", partial: errorMessage });
365
+ stream.push({ type: "error", reason: "error", error: errorMessage });
366
+ }
367
+ })();
368
+
369
+ return stream as unknown as AssistantMessageEventStream;
370
+ };
371
+ }
@@ -31,7 +31,7 @@ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
31
31
  import { getSambanovaApiKey, getSambanovaShowPaid } from "../../config.ts";
32
32
  import { BASE_URL_SAMBANOVA, PROVIDER_SAMBANOVA } from "../../constants.ts";
33
33
  import { createLogger } from "../../lib/logger.ts";
34
- import { registerWithGlobalToggle } from "../../lib/registry.ts";
34
+ import { isFreeModel, registerWithGlobalToggle } from "../../lib/registry.ts";
35
35
  import { fetchOpenAICompatibleModels } from "../../lib/util.ts";
36
36
  import { createReRegister, setupProvider } from "../../provider-helper.ts";
37
37
 
@@ -66,7 +66,13 @@ export default async function sambanovaProvider(pi: ExtensionAPI) {
66
66
 
67
67
  // All SambaNova models are free-tier (no payment method required).
68
68
  // Rate limits are lower on free tier but all models are accessible.
69
- const freeModels = allModels;
69
+ // Override _pricingKnown so isFreeModel trusts the zero costs.
70
+ for (const m of allModels) {
71
+ (m as unknown as { _pricingKnown?: boolean })._pricingKnown = true;
72
+ }
73
+ const freeModels = allModels.filter((m) =>
74
+ isFreeModel({ ...m, provider: PROVIDER_SAMBANOVA }, allModels),
75
+ );
70
76
  const stored = { free: freeModels, all: allModels };
71
77
 
72
78
  _logger.info(
@@ -45,7 +45,7 @@ import {
45
45
  getProxyModelCompat,
46
46
  isLikelyReasoningModel,
47
47
  } from "../../lib/provider-compat.ts";
48
- import { registerWithGlobalToggle } from "../../lib/registry.ts";
48
+ import { isFreeModel, registerWithGlobalToggle } from "../../lib/registry.ts";
49
49
  import { fetchWithRetry } from "../../lib/util.ts";
50
50
  import { createReRegister, setupProvider } from "../../provider-helper.ts";
51
51
 
@@ -123,7 +123,8 @@ async function fetchTogetherModels(
123
123
  contextWindow: m.context_length ?? 128_000,
124
124
  maxTokens: 16_384,
125
125
  compat: getProxyModelCompat({ id: m.id, name }),
126
- };
126
+ _pricingKnown: m.pricing !== undefined,
127
+ } as ProviderModelConfig & { _pricingKnown?: boolean };
127
128
  });
128
129
  }
129
130
 
@@ -150,13 +151,9 @@ export default async function togetherProvider(pi: ExtensionAPI) {
150
151
  }
151
152
 
152
153
  // Together AI is a pay-per-token provider with $1 trial credit.
153
- // Zero-cost models (if any) are marked free; all others are paid.
154
- const freeModels = allModels.filter(
155
- (m) =>
156
- m.cost.input === 0 &&
157
- m.cost.output === 0 &&
158
- m.cost.cacheRead === 0 &&
159
- m.cost.cacheWrite === 0,
154
+ // Use isFreeModel for consistent detection across all providers.
155
+ const freeModels = allModels.filter((m) =>
156
+ isFreeModel({ ...m, provider: PROVIDER_TOGETHER }, allModels),
160
157
  );
161
158
  const stored = { free: freeModels, all: allModels };
162
159