pi-free 2.0.9 → 2.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,96 @@
1
+ /**
2
+ * Thinking level mapping for Ollama Cloud models.
3
+ *
4
+ * Maps Pi's thinking levels to Ollama Cloud's OpenAI-compatible
5
+ * `reasoning_effort` values. The API accepts "none", "low", "medium",
6
+ * "high", and "max". On simple prompts, "max" can be a no-op over
7
+ * "high", but on harder prompts it can increase thinking substantially
8
+ * (e.g. deepseek-v4-pro: ~32k tokens on high vs ~55k on max).
9
+ *
10
+ * A `null` value means the level is hidden in Pi's UI.
11
+ *
12
+ * Model-specific behavior discovered through testing
13
+ * (see https://github.com/fgrehm/pi-ollama-cloud/blob/main/docs/think-experiment.md):
14
+ * - Most models: all levels work, "none" disables thinking
15
+ * - GPT-OSS: no off mode, only low/medium/high
16
+ * - Qwen 3.x (non-VL): binary-only (think/nothink) - off works
17
+ * - Qwen 3 VL: "none" doesn't disable thinking - off is hidden
18
+ * - Kimi K2 Thinking: "none" doesn't disable thinking - off is hidden
19
+ * - MiniMax M2.x: "none" doesn't disable thinking - off is hidden
20
+ *
21
+ * Reference: https://docs.ollama.com/api/openai-compatibility
22
+ */
23
+
24
+ import type { ProviderModelConfig } from "@earendil-works/pi-coding-agent";
25
+
26
+ export type ThinkingLevelMap = NonNullable<
27
+ ProviderModelConfig["thinkingLevelMap"]
28
+ >;
29
+
30
+ /** Default: off/low/medium/high/xhigh with minimal hidden. */
31
+ export const DEFAULT: ThinkingLevelMap = {
32
+ off: "none",
33
+ minimal: null,
34
+ low: "low",
35
+ medium: "medium",
36
+ high: "high",
37
+ xhigh: "max",
38
+ };
39
+
40
+ /**
41
+ * GPT-OSS: can't disable thinking, only low/medium/high.
42
+ * https://ollama.com/library/gpt-oss
43
+ */
44
+ export const GPT_OSS: ThinkingLevelMap = {
45
+ off: null,
46
+ minimal: null,
47
+ low: "low",
48
+ medium: "medium",
49
+ high: "high",
50
+ xhigh: null,
51
+ };
52
+
53
+ /**
54
+ * Qwen 3.x: binary-only (think/nothink), no gradation.
55
+ * https://docs.ollama.com/capabilities/thinking
56
+ */
57
+ export const QWEN3: ThinkingLevelMap = {
58
+ off: "none",
59
+ minimal: null,
60
+ low: null,
61
+ medium: "medium",
62
+ high: null,
63
+ xhigh: null,
64
+ };
65
+
66
+ /**
67
+ * "none" doesn't disable thinking - off is hidden.
68
+ * Used by kimi-k2-thinking, minimax family, qwen3-vl.
69
+ */
70
+ export const NO_OFF: ThinkingLevelMap = {
71
+ off: null,
72
+ minimal: null,
73
+ low: "low",
74
+ medium: "medium",
75
+ high: "high",
76
+ xhigh: "max",
77
+ };
78
+
79
+ /**
80
+ * Resolve the thinking level map for a model.
81
+ * Matches by model ID prefix (case-sensitive, checks first chars).
82
+ */
83
+ export function resolveThinkingMap(
84
+ id: string,
85
+ capabilities: string[],
86
+ ): ThinkingLevelMap | undefined {
87
+ if (!capabilities.includes("thinking")) return undefined;
88
+
89
+ if (id.startsWith("gpt-oss")) return GPT_OSS;
90
+ if (id.startsWith("qwen3-vl")) return NO_OFF;
91
+ if (id.startsWith("qwen3")) return QWEN3;
92
+ if (id === "kimi-k2-thinking") return NO_OFF;
93
+ if (id.startsWith("minimax")) return NO_OFF;
94
+
95
+ return DEFAULT;
96
+ }
@@ -0,0 +1,197 @@
1
+ /**
2
+ * Together AI Provider Extension
3
+ *
4
+ * Together AI provides fast inference on 200+ open-source models through an
5
+ * OpenAI-compatible API. Known for Llama, DeepSeek, Qwen, Mixtral, and other
6
+ * popular models at competitive per-token pricing.
7
+ *
8
+ * Free tier:
9
+ * - $1 one-time credit on signup (no credit card)
10
+ * - 60 RPM, 600 RPD (varies by model)
11
+ * - Sign up at https://api.together.ai/
12
+ *
13
+ * Paid: pay-per-token after credits exhaust
14
+ *
15
+ * NOTE: Together AI's /v1/models returns a plain array (not { data: [...] }),
16
+ * uses per-million-token pricing (not per-token), and includes a "type" field
17
+ * we use to filter to chat models only.
18
+ *
19
+ * Endpoint:
20
+ * Chat: https://api.together.xyz/v1/chat/completions
21
+ *
22
+ * Setup:
23
+ * 1. Sign up at https://api.together.ai/
24
+ * 2. Get API key from https://api.together.ai/settings/api-keys
25
+ * 3. Set TOGETHER_AI_API_KEY env var (or add to ~/.pi/free.json)
26
+ *
27
+ * Usage:
28
+ * pi install git:github.com/apmantza/pi-free
29
+ * # Set TOGETHER_AI_API_KEY env var
30
+ * # Models appear in /model selector as "together/deepseek-ai/..."
31
+ */
32
+
33
+ import type {
34
+ ExtensionAPI,
35
+ ProviderModelConfig,
36
+ } from "@earendil-works/pi-coding-agent";
37
+ import { getTogetherApiKey, getTogetherShowPaid } from "../../config.ts";
38
+ import {
39
+ BASE_URL_TOGETHER,
40
+ DEFAULT_FETCH_TIMEOUT_MS,
41
+ PROVIDER_TOGETHER,
42
+ } from "../../constants.ts";
43
+ import { createLogger } from "../../lib/logger.ts";
44
+ import {
45
+ getProxyModelCompat,
46
+ isLikelyReasoningModel,
47
+ } from "../../lib/provider-compat.ts";
48
+ import { registerWithGlobalToggle } from "../../lib/registry.ts";
49
+ import { fetchWithRetry } from "../../lib/util.ts";
50
+ import { createReRegister, setupProvider } from "../../provider-helper.ts";
51
+
52
+ const _logger = createLogger("together");
53
+
54
+ // =============================================================================
55
+ // Types
56
+ // =============================================================================
57
+
58
+ interface TogetherModel {
59
+ id: string;
60
+ display_name?: string;
61
+ type?: string;
62
+ context_length?: number;
63
+ pricing?: {
64
+ input?: number;
65
+ output?: number;
66
+ cached_input?: number;
67
+ };
68
+ }
69
+
70
+ // =============================================================================
71
+ // Fetch
72
+ // =============================================================================
73
+
74
+ async function fetchTogetherModels(
75
+ apiKey: string,
76
+ ): Promise<ProviderModelConfig[]> {
77
+ const response = await fetchWithRetry(
78
+ `${BASE_URL_TOGETHER}/models`,
79
+ {
80
+ headers: {
81
+ Authorization: `Bearer ${apiKey}`,
82
+ "Content-Type": "application/json",
83
+ },
84
+ },
85
+ 3,
86
+ 1000,
87
+ DEFAULT_FETCH_TIMEOUT_MS,
88
+ );
89
+
90
+ if (!response.ok) {
91
+ throw new Error(
92
+ `Together AI API error: ${response.status} ${response.statusText}`,
93
+ );
94
+ }
95
+
96
+ // Together AI returns a plain array (not { data: [...] })
97
+ const models = (await response.json()) as TogetherModel[];
98
+
99
+ _logger.info(`[together] Fetched ${models.length} models`);
100
+
101
+ return models
102
+ .filter((m) => m.type === "chat" && m.id && !m.id.includes("embed"))
103
+ .map((m): ProviderModelConfig => {
104
+ const name = m.display_name || m.id.split("/").pop() || m.id;
105
+
106
+ // Together AI pricing is per-MILLION tokens.
107
+ // Divide by 1_000_000 to get per-token cost (Pi convention).
108
+ const inputCost = (m.pricing?.input ?? 0) / 1_000_000;
109
+ const outputCost = (m.pricing?.output ?? 0) / 1_000_000;
110
+ const cacheReadCost = (m.pricing?.cached_input ?? 0) / 1_000_000;
111
+
112
+ return {
113
+ id: m.id,
114
+ name,
115
+ reasoning: isLikelyReasoningModel({ id: m.id, name }),
116
+ input: ["text"],
117
+ cost: {
118
+ input: inputCost,
119
+ output: outputCost,
120
+ cacheRead: cacheReadCost,
121
+ cacheWrite: 0,
122
+ },
123
+ contextWindow: m.context_length ?? 128_000,
124
+ maxTokens: 16_384,
125
+ compat: getProxyModelCompat({ id: m.id, name }),
126
+ };
127
+ });
128
+ }
129
+
130
+ // =============================================================================
131
+ // Extension Entry Point
132
+ // =============================================================================
133
+
134
+ export default async function togetherProvider(pi: ExtensionAPI) {
135
+ const apiKey = getTogetherApiKey();
136
+
137
+ if (!apiKey) {
138
+ _logger.info(
139
+ "[together] Skipping — TOGETHER_AI_API_KEY not set. Sign up at https://api.together.ai/",
140
+ );
141
+ return;
142
+ }
143
+
144
+ // Fetch models
145
+ const allModels = await fetchTogetherModels(apiKey);
146
+
147
+ if (allModels.length === 0) {
148
+ _logger.warn("[together] No chat models available");
149
+ return;
150
+ }
151
+
152
+ // Together AI is a pay-per-token provider with $1 trial credit.
153
+ // Zero-cost models (if any) are marked free; all others are paid.
154
+ const freeModels = allModels.filter(
155
+ (m) =>
156
+ m.cost.input === 0 &&
157
+ m.cost.output === 0 &&
158
+ m.cost.cacheRead === 0 &&
159
+ m.cost.cacheWrite === 0,
160
+ );
161
+ const stored = { free: freeModels, all: allModels };
162
+
163
+ _logger.info(
164
+ `[together] ${allModels.length} chat models (${freeModels.length} free)`,
165
+ );
166
+
167
+ // Create re-register function
168
+ const reRegister = createReRegister(pi, {
169
+ providerId: PROVIDER_TOGETHER,
170
+ baseUrl: BASE_URL_TOGETHER,
171
+ apiKey,
172
+ });
173
+
174
+ // Register with global toggle
175
+ registerWithGlobalToggle(PROVIDER_TOGETHER, stored, reRegister, true);
176
+
177
+ // Setup provider with toggle command
178
+ setupProvider(
179
+ pi,
180
+ {
181
+ providerId: PROVIDER_TOGETHER,
182
+ initialShowPaid: getTogetherShowPaid(),
183
+ tosUrl: "https://api.together.ai/",
184
+ reRegister: (models, _stored) => {
185
+ if (_stored) {
186
+ stored.free = _stored.free;
187
+ stored.all = _stored.all;
188
+ }
189
+ reRegister(models);
190
+ },
191
+ },
192
+ stored,
193
+ );
194
+
195
+ // Initial registration — show all models (trial credit provider)
196
+ reRegister(stored.all);
197
+ }
@@ -27,10 +27,7 @@ import {
27
27
  PROVIDER_ZENMUX,
28
28
  } from "../../constants.ts";
29
29
  import { createLogger } from "../../lib/logger.ts";
30
- import {
31
- getProxyModelCompat,
32
- isLikelyReasoningModel,
33
- } from "../../lib/provider-compat.ts";
30
+ import { getProxyModelCompat } from "../../lib/provider-compat.ts";
34
31
  import { isFreeModel, registerWithGlobalToggle } from "../../lib/registry.ts";
35
32
  import { fetchWithRetry } from "../../lib/util.ts";
36
33
  import { createReRegister, setupProvider } from "../../provider-helper.ts";
@@ -43,17 +40,33 @@ const _logger = createLogger("zenmux");
43
40
 
44
41
  interface ZenMuxModel {
45
42
  id: string;
46
- name?: string;
43
+ display_name?: string;
47
44
  context_length?: number;
48
- pricing?: {
49
- prompt?: number;
50
- completion?: number;
45
+ input_modalities?: string[];
46
+ output_modalities?: string[];
47
+ capabilities?: {
48
+ reasoning?: boolean;
49
+ };
50
+ pricings?: {
51
+ prompt?: Array<{ value: number }>;
52
+ completion?: Array<{ value: number }>;
53
+ input_cache_read?: Array<{ value: number }>;
51
54
  };
52
55
  }
53
56
 
54
- function isZenmuxReasoningModel(model: Pick<ZenMuxModel, "id" | "name">) {
55
- const haystack = `${model.id} ${model.name ?? ""}`.toLowerCase();
56
- return isLikelyReasoningModel(model) || haystack.includes("claude");
57
+ /**
58
+ * Extract the first pricing value from a ZenMux pricings array.
59
+ * ZenMux uses a structured format: pricings.prompt[0].value (per-million-tokens).
60
+ * We divide by 1_000_000 to convert to per-token price (Pi's convention).
61
+ * Returns 0 if pricing is missing or empty.
62
+ */
63
+ function extractZenmuxPrice(
64
+ pricings: ZenMuxModel["pricings"],
65
+ key: "prompt" | "completion" | "input_cache_read",
66
+ ): number {
67
+ const entries = pricings?.[key];
68
+ if (!entries || entries.length === 0) return 0;
69
+ return (entries[0].value ?? 0) / 1_000_000;
57
70
  }
58
71
 
59
72
  async function fetchZenmuxModels(
@@ -87,13 +100,15 @@ async function fetchZenmuxModels(
87
100
  return models.map(
88
101
  (m): ProviderModelConfig => ({
89
102
  id: m.id,
90
- name: m.name || m.id,
91
- reasoning: isZenmuxReasoningModel(m),
92
- input: ["text"],
103
+ name: m.display_name || m.id,
104
+ reasoning: m.capabilities?.reasoning ?? false,
105
+ input: m.input_modalities?.includes("image")
106
+ ? ["text", "image"]
107
+ : ["text"],
93
108
  cost: {
94
- input: m.pricing?.prompt || 0,
95
- output: m.pricing?.completion || 0,
96
- cacheRead: 0,
109
+ input: extractZenmuxPrice(m.pricings, "prompt"),
110
+ output: extractZenmuxPrice(m.pricings, "completion"),
111
+ cacheRead: extractZenmuxPrice(m.pricings, "input_cache_read"),
97
112
  cacheWrite: 0,
98
113
  },
99
114
  contextWindow: m.context_length || 128000,