pi-free 2.0.8 → 2.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -1
- package/README.md +588 -572
- package/banner.jpg +0 -0
- package/banner.png +0 -0
- package/banner.svg +12 -10
- package/config.ts +349 -337
- package/constants.ts +106 -103
- package/index.ts +242 -239
- package/lib/built-in-toggle.ts +2 -2
- package/lib/model-detection.ts +1 -1
- package/lib/model-enhancer.ts +20 -20
- package/lib/provider-compat.ts +1 -1
- package/lib/registry.ts +1 -1
- package/lib/util.ts +524 -460
- package/package.json +70 -68
- package/provider-helper.ts +1 -1
- package/providers/cline/cline-auth.ts +1 -1
- package/providers/cline/cline.ts +2 -2
- package/providers/codestral/codestral.ts +1 -1
- package/providers/crofai/crofai.ts +190 -99
- package/providers/deepinfra/deepinfra.ts +206 -109
- package/providers/dynamic-built-in/index.ts +1 -1
- package/providers/kilo/kilo-auth.ts +1 -1
- package/providers/kilo/kilo.ts +2 -2
- package/providers/llm7/llm7.ts +1 -1
- package/providers/nvidia/nvidia.ts +1 -1
- package/providers/ollama/ollama.ts +610 -295
- package/providers/ollama/thinking-levels.ts +96 -0
- package/providers/qwen/qwen-auth.ts +1 -1
- package/providers/qwen/qwen-models.ts +101 -101
- package/providers/qwen/qwen.ts +2 -2
- package/providers/sambanova/sambanova.ts +1 -1
- package/providers/together/together.ts +197 -0
- package/providers/zenmux/zenmux.ts +194 -179
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Thinking level mapping for Ollama Cloud models.
|
|
3
|
+
*
|
|
4
|
+
* Maps Pi's thinking levels to Ollama Cloud's OpenAI-compatible
|
|
5
|
+
* `reasoning_effort` values. The API accepts "none", "low", "medium",
|
|
6
|
+
* "high", and "max". On simple prompts, "max" can be a no-op over
|
|
7
|
+
* "high", but on harder prompts it can increase thinking substantially
|
|
8
|
+
* (e.g. deepseek-v4-pro: ~32k tokens on high vs ~55k on max).
|
|
9
|
+
*
|
|
10
|
+
* A `null` value means the level is hidden in Pi's UI.
|
|
11
|
+
*
|
|
12
|
+
* Model-specific behavior discovered through testing
|
|
13
|
+
* (see https://github.com/fgrehm/pi-ollama-cloud/blob/main/docs/think-experiment.md):
|
|
14
|
+
* - Most models: all levels work, "none" disables thinking
|
|
15
|
+
* - GPT-OSS: no off mode, only low/medium/high
|
|
16
|
+
* - Qwen 3.x (non-VL): binary-only (think/nothink) - off works
|
|
17
|
+
* - Qwen 3 VL: "none" doesn't disable thinking - off is hidden
|
|
18
|
+
* - Kimi K2 Thinking: "none" doesn't disable thinking - off is hidden
|
|
19
|
+
* - MiniMax M2.x: "none" doesn't disable thinking - off is hidden
|
|
20
|
+
*
|
|
21
|
+
* Reference: https://docs.ollama.com/api/openai-compatibility
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import type { ProviderModelConfig } from "@earendil-works/pi-coding-agent";
|
|
25
|
+
|
|
26
|
+
export type ThinkingLevelMap = NonNullable<
|
|
27
|
+
ProviderModelConfig["thinkingLevelMap"]
|
|
28
|
+
>;
|
|
29
|
+
|
|
30
|
+
/** Default: off/low/medium/high/xhigh with minimal hidden. */
|
|
31
|
+
export const DEFAULT: ThinkingLevelMap = {
|
|
32
|
+
off: "none",
|
|
33
|
+
minimal: null,
|
|
34
|
+
low: "low",
|
|
35
|
+
medium: "medium",
|
|
36
|
+
high: "high",
|
|
37
|
+
xhigh: "max",
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* GPT-OSS: can't disable thinking, only low/medium/high.
|
|
42
|
+
* https://ollama.com/library/gpt-oss
|
|
43
|
+
*/
|
|
44
|
+
export const GPT_OSS: ThinkingLevelMap = {
|
|
45
|
+
off: null,
|
|
46
|
+
minimal: null,
|
|
47
|
+
low: "low",
|
|
48
|
+
medium: "medium",
|
|
49
|
+
high: "high",
|
|
50
|
+
xhigh: null,
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Qwen 3.x: binary-only (think/nothink), no gradation.
|
|
55
|
+
* https://docs.ollama.com/capabilities/thinking
|
|
56
|
+
*/
|
|
57
|
+
export const QWEN3: ThinkingLevelMap = {
|
|
58
|
+
off: "none",
|
|
59
|
+
minimal: null,
|
|
60
|
+
low: null,
|
|
61
|
+
medium: "medium",
|
|
62
|
+
high: null,
|
|
63
|
+
xhigh: null,
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* "none" doesn't disable thinking - off is hidden.
|
|
68
|
+
* Used by kimi-k2-thinking, minimax family, qwen3-vl.
|
|
69
|
+
*/
|
|
70
|
+
export const NO_OFF: ThinkingLevelMap = {
|
|
71
|
+
off: null,
|
|
72
|
+
minimal: null,
|
|
73
|
+
low: "low",
|
|
74
|
+
medium: "medium",
|
|
75
|
+
high: "high",
|
|
76
|
+
xhigh: "max",
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Resolve the thinking level map for a model.
|
|
81
|
+
* Matches by model ID prefix (case-sensitive, checks first chars).
|
|
82
|
+
*/
|
|
83
|
+
export function resolveThinkingMap(
|
|
84
|
+
id: string,
|
|
85
|
+
capabilities: string[],
|
|
86
|
+
): ThinkingLevelMap | undefined {
|
|
87
|
+
if (!capabilities.includes("thinking")) return undefined;
|
|
88
|
+
|
|
89
|
+
if (id.startsWith("gpt-oss")) return GPT_OSS;
|
|
90
|
+
if (id.startsWith("qwen3-vl")) return NO_OFF;
|
|
91
|
+
if (id.startsWith("qwen3")) return QWEN3;
|
|
92
|
+
if (id === "kimi-k2-thinking") return NO_OFF;
|
|
93
|
+
if (id.startsWith("minimax")) return NO_OFF;
|
|
94
|
+
|
|
95
|
+
return DEFAULT;
|
|
96
|
+
}
|
|
@@ -16,7 +16,7 @@ import crypto from "node:crypto";
|
|
|
16
16
|
import type {
|
|
17
17
|
OAuthCredentials,
|
|
18
18
|
OAuthLoginCallbacks,
|
|
19
|
-
} from "@
|
|
19
|
+
} from "@earendil-works/pi-ai";
|
|
20
20
|
import { createLogger } from "../../lib/logger.ts";
|
|
21
21
|
import { openBrowser } from "../../lib/open-browser.ts";
|
|
22
22
|
|
|
@@ -1,101 +1,101 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Qwen OAuth model definitions.
|
|
3
|
-
*
|
|
4
|
-
* @deprecated The 1,000 req/day free tier is no longer available. Auth is broken.
|
|
5
|
-
* This provider remains for backward compatibility but should not be used.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import type { ProviderModelConfig } from "@
|
|
9
|
-
import { createLogger } from "../../lib/logger.ts";
|
|
10
|
-
|
|
11
|
-
const _logger = createLogger("qwen-models");
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* portal.qwen.ai compatibility settings.
|
|
15
|
-
*
|
|
16
|
-
* portal.qwen.ai's OpenAI-compatible API does not support several parameters
|
|
17
|
-
* that the pi framework sends by default.
|
|
18
|
-
*/
|
|
19
|
-
export const PORTAL_COMPAT: NonNullable<ProviderModelConfig["compat"]> = {
|
|
20
|
-
supportsStore: false,
|
|
21
|
-
supportsDeveloperRole: false,
|
|
22
|
-
supportsReasoningEffort: false,
|
|
23
|
-
supportsUsageInStreaming: false,
|
|
24
|
-
supportsStrictMode: false,
|
|
25
|
-
maxTokensField: "max_tokens",
|
|
26
|
-
};
|
|
27
|
-
|
|
28
|
-
/**
|
|
29
|
-
* Fallback model used before OAuth completes or if model discovery fails.
|
|
30
|
-
* The real model ID is resolved dynamically via fetchQwenLiveModels() after auth.
|
|
31
|
-
*/
|
|
32
|
-
export const QWEN_FREE_MODELS: ProviderModelConfig[] = [
|
|
33
|
-
{
|
|
34
|
-
id: "coder-model",
|
|
35
|
-
name: "Qwen Coder — DEPRECATED (free tier discontinued)",
|
|
36
|
-
reasoning: false,
|
|
37
|
-
input: ["text"],
|
|
38
|
-
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
39
|
-
contextWindow: 131_072,
|
|
40
|
-
maxTokens: 16_384,
|
|
41
|
-
compat: PORTAL_COMPAT,
|
|
42
|
-
},
|
|
43
|
-
];
|
|
44
|
-
|
|
45
|
-
/**
|
|
46
|
-
* Fetch Qwen models. Returns static model list for backward compatibility.
|
|
47
|
-
* @deprecated Qwen free tier is discontinued.
|
|
48
|
-
*/
|
|
49
|
-
export async function fetchQwenModels(): Promise<ProviderModelConfig[]> {
|
|
50
|
-
_logger.info("Qwen provider is deprecated, returning placeholder models");
|
|
51
|
-
return QWEN_FREE_MODELS;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
/**
|
|
55
|
-
* Fetch live model list from the Qwen API using the OAuth access token.
|
|
56
|
-
* Returns updated models with real IDs from the server, or the original
|
|
57
|
-
* models unchanged if the request fails.
|
|
58
|
-
*/
|
|
59
|
-
export async function fetchQwenLiveModels(
|
|
60
|
-
baseUrl: string,
|
|
61
|
-
accessToken: string,
|
|
62
|
-
templateModels: ProviderModelConfig[],
|
|
63
|
-
): Promise<ProviderModelConfig[]> {
|
|
64
|
-
try {
|
|
65
|
-
const response = await fetch(`${baseUrl}/models`, {
|
|
66
|
-
headers: {
|
|
67
|
-
Authorization: `Bearer ${accessToken}`,
|
|
68
|
-
Accept: "application/json",
|
|
69
|
-
},
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
if (!response.ok) {
|
|
73
|
-
_logger.info("Qwen /v1/models fetch failed, keeping current model IDs", {
|
|
74
|
-
status: response.status,
|
|
75
|
-
});
|
|
76
|
-
return templateModels;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
interface ModelEntry {
|
|
80
|
-
id: string;
|
|
81
|
-
}
|
|
82
|
-
const data = (await response.json()) as { data?: ModelEntry[] };
|
|
83
|
-
const ids: string[] = (data.data ?? [])
|
|
84
|
-
.map((m: ModelEntry) => m.id)
|
|
85
|
-
.filter(Boolean);
|
|
86
|
-
|
|
87
|
-
_logger.info("Qwen live models discovered", { ids });
|
|
88
|
-
|
|
89
|
-
if (ids.length === 0) return templateModels;
|
|
90
|
-
|
|
91
|
-
// Prefer a coder model if available, otherwise use the first model
|
|
92
|
-
const preferred = ids.find((id) => /coder/i.test(id)) ?? ids[0];
|
|
93
|
-
|
|
94
|
-
return templateModels.map((m) => ({ ...m, id: preferred }));
|
|
95
|
-
} catch (err) {
|
|
96
|
-
_logger.info("Qwen live model fetch error, keeping current model IDs", {
|
|
97
|
-
error: String(err),
|
|
98
|
-
});
|
|
99
|
-
return templateModels;
|
|
100
|
-
}
|
|
101
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Qwen OAuth model definitions.
|
|
3
|
+
*
|
|
4
|
+
* @deprecated The 1,000 req/day free tier is no longer available. Auth is broken.
|
|
5
|
+
* This provider remains for backward compatibility but should not be used.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { ProviderModelConfig } from "@earendil-works/pi-coding-agent";
|
|
9
|
+
import { createLogger } from "../../lib/logger.ts";
|
|
10
|
+
|
|
11
|
+
const _logger = createLogger("qwen-models");
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* portal.qwen.ai compatibility settings.
|
|
15
|
+
*
|
|
16
|
+
* portal.qwen.ai's OpenAI-compatible API does not support several parameters
|
|
17
|
+
* that the pi framework sends by default.
|
|
18
|
+
*/
|
|
19
|
+
export const PORTAL_COMPAT: NonNullable<ProviderModelConfig["compat"]> = {
|
|
20
|
+
supportsStore: false,
|
|
21
|
+
supportsDeveloperRole: false,
|
|
22
|
+
supportsReasoningEffort: false,
|
|
23
|
+
supportsUsageInStreaming: false,
|
|
24
|
+
supportsStrictMode: false,
|
|
25
|
+
maxTokensField: "max_tokens",
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Fallback model used before OAuth completes or if model discovery fails.
|
|
30
|
+
* The real model ID is resolved dynamically via fetchQwenLiveModels() after auth.
|
|
31
|
+
*/
|
|
32
|
+
export const QWEN_FREE_MODELS: ProviderModelConfig[] = [
|
|
33
|
+
{
|
|
34
|
+
id: "coder-model",
|
|
35
|
+
name: "Qwen Coder — DEPRECATED (free tier discontinued)",
|
|
36
|
+
reasoning: false,
|
|
37
|
+
input: ["text"],
|
|
38
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
39
|
+
contextWindow: 131_072,
|
|
40
|
+
maxTokens: 16_384,
|
|
41
|
+
compat: PORTAL_COMPAT,
|
|
42
|
+
},
|
|
43
|
+
];
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Fetch Qwen models. Returns static model list for backward compatibility.
|
|
47
|
+
* @deprecated Qwen free tier is discontinued.
|
|
48
|
+
*/
|
|
49
|
+
export async function fetchQwenModels(): Promise<ProviderModelConfig[]> {
|
|
50
|
+
_logger.info("Qwen provider is deprecated, returning placeholder models");
|
|
51
|
+
return QWEN_FREE_MODELS;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Fetch live model list from the Qwen API using the OAuth access token.
|
|
56
|
+
* Returns updated models with real IDs from the server, or the original
|
|
57
|
+
* models unchanged if the request fails.
|
|
58
|
+
*/
|
|
59
|
+
export async function fetchQwenLiveModels(
|
|
60
|
+
baseUrl: string,
|
|
61
|
+
accessToken: string,
|
|
62
|
+
templateModels: ProviderModelConfig[],
|
|
63
|
+
): Promise<ProviderModelConfig[]> {
|
|
64
|
+
try {
|
|
65
|
+
const response = await fetch(`${baseUrl}/models`, {
|
|
66
|
+
headers: {
|
|
67
|
+
Authorization: `Bearer ${accessToken}`,
|
|
68
|
+
Accept: "application/json",
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
if (!response.ok) {
|
|
73
|
+
_logger.info("Qwen /v1/models fetch failed, keeping current model IDs", {
|
|
74
|
+
status: response.status,
|
|
75
|
+
});
|
|
76
|
+
return templateModels;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
interface ModelEntry {
|
|
80
|
+
id: string;
|
|
81
|
+
}
|
|
82
|
+
const data = (await response.json()) as { data?: ModelEntry[] };
|
|
83
|
+
const ids: string[] = (data.data ?? [])
|
|
84
|
+
.map((m: ModelEntry) => m.id)
|
|
85
|
+
.filter(Boolean);
|
|
86
|
+
|
|
87
|
+
_logger.info("Qwen live models discovered", { ids });
|
|
88
|
+
|
|
89
|
+
if (ids.length === 0) return templateModels;
|
|
90
|
+
|
|
91
|
+
// Prefer a coder model if available, otherwise use the first model
|
|
92
|
+
const preferred = ids.find((id) => /coder/i.test(id)) ?? ids[0];
|
|
93
|
+
|
|
94
|
+
return templateModels.map((m) => ({ ...m, id: preferred }));
|
|
95
|
+
} catch (err) {
|
|
96
|
+
_logger.info("Qwen live model fetch error, keeping current model IDs", {
|
|
97
|
+
error: String(err),
|
|
98
|
+
});
|
|
99
|
+
return templateModels;
|
|
100
|
+
}
|
|
101
|
+
}
|
package/providers/qwen/qwen.ts
CHANGED
|
@@ -10,8 +10,8 @@
|
|
|
10
10
|
* 1,000 free API calls/day — run /login qwen to authenticate.~~
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
|
-
import type { Api, Model, OAuthCredentials } from "@
|
|
14
|
-
import type { ExtensionAPI } from "@
|
|
13
|
+
import type { Api, Model, OAuthCredentials } from "@earendil-works/pi-ai";
|
|
14
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
15
15
|
import { PROVIDER_QWEN, URL_QWEN_TOS } from "../../constants.ts";
|
|
16
16
|
import { createLogger } from "../../lib/logger.ts";
|
|
17
17
|
import { logWarning } from "../../lib/util.ts";
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
* # Models appear in /model selector as "sambanova/Meta-Llama-3.3-70B-Instruct"
|
|
28
28
|
*/
|
|
29
29
|
|
|
30
|
-
import type { ExtensionAPI } from "@
|
|
30
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
31
31
|
import { getSambanovaApiKey, getSambanovaShowPaid } from "../../config.ts";
|
|
32
32
|
import { BASE_URL_SAMBANOVA, PROVIDER_SAMBANOVA } from "../../constants.ts";
|
|
33
33
|
import { createLogger } from "../../lib/logger.ts";
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Together AI Provider Extension
|
|
3
|
+
*
|
|
4
|
+
* Together AI provides fast inference on 200+ open-source models through an
|
|
5
|
+
* OpenAI-compatible API. Known for Llama, DeepSeek, Qwen, Mixtral, and other
|
|
6
|
+
* popular models at competitive per-token pricing.
|
|
7
|
+
*
|
|
8
|
+
* Free tier:
|
|
9
|
+
* - $1 one-time credit on signup (no credit card)
|
|
10
|
+
* - 60 RPM, 600 RPD (varies by model)
|
|
11
|
+
* - Sign up at https://api.together.ai/
|
|
12
|
+
*
|
|
13
|
+
* Paid: pay-per-token after credits exhaust
|
|
14
|
+
*
|
|
15
|
+
* NOTE: Together AI's /v1/models returns a plain array (not { data: [...] }),
|
|
16
|
+
* uses per-million-token pricing (not per-token), and includes a "type" field
|
|
17
|
+
* we use to filter to chat models only.
|
|
18
|
+
*
|
|
19
|
+
* Endpoint:
|
|
20
|
+
* Chat: https://api.together.xyz/v1/chat/completions
|
|
21
|
+
*
|
|
22
|
+
* Setup:
|
|
23
|
+
* 1. Sign up at https://api.together.ai/
|
|
24
|
+
* 2. Get API key from https://api.together.ai/settings/api-keys
|
|
25
|
+
* 3. Set TOGETHER_AI_API_KEY env var (or add to ~/.pi/free.json)
|
|
26
|
+
*
|
|
27
|
+
* Usage:
|
|
28
|
+
* pi install git:github.com/apmantza/pi-free
|
|
29
|
+
* # Set TOGETHER_AI_API_KEY env var
|
|
30
|
+
* # Models appear in /model selector as "together/deepseek-ai/..."
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
import type {
|
|
34
|
+
ExtensionAPI,
|
|
35
|
+
ProviderModelConfig,
|
|
36
|
+
} from "@earendil-works/pi-coding-agent";
|
|
37
|
+
import { getTogetherApiKey, getTogetherShowPaid } from "../../config.ts";
|
|
38
|
+
import {
|
|
39
|
+
BASE_URL_TOGETHER,
|
|
40
|
+
DEFAULT_FETCH_TIMEOUT_MS,
|
|
41
|
+
PROVIDER_TOGETHER,
|
|
42
|
+
} from "../../constants.ts";
|
|
43
|
+
import { createLogger } from "../../lib/logger.ts";
|
|
44
|
+
import {
|
|
45
|
+
getProxyModelCompat,
|
|
46
|
+
isLikelyReasoningModel,
|
|
47
|
+
} from "../../lib/provider-compat.ts";
|
|
48
|
+
import { registerWithGlobalToggle } from "../../lib/registry.ts";
|
|
49
|
+
import { fetchWithRetry } from "../../lib/util.ts";
|
|
50
|
+
import { createReRegister, setupProvider } from "../../provider-helper.ts";
|
|
51
|
+
|
|
52
|
+
const _logger = createLogger("together");
|
|
53
|
+
|
|
54
|
+
// =============================================================================
|
|
55
|
+
// Types
|
|
56
|
+
// =============================================================================
|
|
57
|
+
|
|
58
|
+
interface TogetherModel {
|
|
59
|
+
id: string;
|
|
60
|
+
display_name?: string;
|
|
61
|
+
type?: string;
|
|
62
|
+
context_length?: number;
|
|
63
|
+
pricing?: {
|
|
64
|
+
input?: number;
|
|
65
|
+
output?: number;
|
|
66
|
+
cached_input?: number;
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// =============================================================================
|
|
71
|
+
// Fetch
|
|
72
|
+
// =============================================================================
|
|
73
|
+
|
|
74
|
+
async function fetchTogetherModels(
|
|
75
|
+
apiKey: string,
|
|
76
|
+
): Promise<ProviderModelConfig[]> {
|
|
77
|
+
const response = await fetchWithRetry(
|
|
78
|
+
`${BASE_URL_TOGETHER}/models`,
|
|
79
|
+
{
|
|
80
|
+
headers: {
|
|
81
|
+
Authorization: `Bearer ${apiKey}`,
|
|
82
|
+
"Content-Type": "application/json",
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
3,
|
|
86
|
+
1000,
|
|
87
|
+
DEFAULT_FETCH_TIMEOUT_MS,
|
|
88
|
+
);
|
|
89
|
+
|
|
90
|
+
if (!response.ok) {
|
|
91
|
+
throw new Error(
|
|
92
|
+
`Together AI API error: ${response.status} ${response.statusText}`,
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Together AI returns a plain array (not { data: [...] })
|
|
97
|
+
const models = (await response.json()) as TogetherModel[];
|
|
98
|
+
|
|
99
|
+
_logger.info(`[together] Fetched ${models.length} models`);
|
|
100
|
+
|
|
101
|
+
return models
|
|
102
|
+
.filter((m) => m.type === "chat" && m.id && !m.id.includes("embed"))
|
|
103
|
+
.map((m): ProviderModelConfig => {
|
|
104
|
+
const name = m.display_name || m.id.split("/").pop() || m.id;
|
|
105
|
+
|
|
106
|
+
// Together AI pricing is per-MILLION tokens.
|
|
107
|
+
// Divide by 1_000_000 to get per-token cost (Pi convention).
|
|
108
|
+
const inputCost = (m.pricing?.input ?? 0) / 1_000_000;
|
|
109
|
+
const outputCost = (m.pricing?.output ?? 0) / 1_000_000;
|
|
110
|
+
const cacheReadCost = (m.pricing?.cached_input ?? 0) / 1_000_000;
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
id: m.id,
|
|
114
|
+
name,
|
|
115
|
+
reasoning: isLikelyReasoningModel({ id: m.id, name }),
|
|
116
|
+
input: ["text"],
|
|
117
|
+
cost: {
|
|
118
|
+
input: inputCost,
|
|
119
|
+
output: outputCost,
|
|
120
|
+
cacheRead: cacheReadCost,
|
|
121
|
+
cacheWrite: 0,
|
|
122
|
+
},
|
|
123
|
+
contextWindow: m.context_length ?? 128_000,
|
|
124
|
+
maxTokens: 16_384,
|
|
125
|
+
compat: getProxyModelCompat({ id: m.id, name }),
|
|
126
|
+
};
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// =============================================================================
|
|
131
|
+
// Extension Entry Point
|
|
132
|
+
// =============================================================================
|
|
133
|
+
|
|
134
|
+
export default async function togetherProvider(pi: ExtensionAPI) {
|
|
135
|
+
const apiKey = getTogetherApiKey();
|
|
136
|
+
|
|
137
|
+
if (!apiKey) {
|
|
138
|
+
_logger.info(
|
|
139
|
+
"[together] Skipping — TOGETHER_AI_API_KEY not set. Sign up at https://api.together.ai/",
|
|
140
|
+
);
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Fetch models
|
|
145
|
+
const allModels = await fetchTogetherModels(apiKey);
|
|
146
|
+
|
|
147
|
+
if (allModels.length === 0) {
|
|
148
|
+
_logger.warn("[together] No chat models available");
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Together AI is a pay-per-token provider with $1 trial credit.
|
|
153
|
+
// Zero-cost models (if any) are marked free; all others are paid.
|
|
154
|
+
const freeModels = allModels.filter(
|
|
155
|
+
(m) =>
|
|
156
|
+
m.cost.input === 0 &&
|
|
157
|
+
m.cost.output === 0 &&
|
|
158
|
+
m.cost.cacheRead === 0 &&
|
|
159
|
+
m.cost.cacheWrite === 0,
|
|
160
|
+
);
|
|
161
|
+
const stored = { free: freeModels, all: allModels };
|
|
162
|
+
|
|
163
|
+
_logger.info(
|
|
164
|
+
`[together] ${allModels.length} chat models (${freeModels.length} free)`,
|
|
165
|
+
);
|
|
166
|
+
|
|
167
|
+
// Create re-register function
|
|
168
|
+
const reRegister = createReRegister(pi, {
|
|
169
|
+
providerId: PROVIDER_TOGETHER,
|
|
170
|
+
baseUrl: BASE_URL_TOGETHER,
|
|
171
|
+
apiKey,
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
// Register with global toggle
|
|
175
|
+
registerWithGlobalToggle(PROVIDER_TOGETHER, stored, reRegister, true);
|
|
176
|
+
|
|
177
|
+
// Setup provider with toggle command
|
|
178
|
+
setupProvider(
|
|
179
|
+
pi,
|
|
180
|
+
{
|
|
181
|
+
providerId: PROVIDER_TOGETHER,
|
|
182
|
+
initialShowPaid: getTogetherShowPaid(),
|
|
183
|
+
tosUrl: "https://api.together.ai/",
|
|
184
|
+
reRegister: (models, _stored) => {
|
|
185
|
+
if (_stored) {
|
|
186
|
+
stored.free = _stored.free;
|
|
187
|
+
stored.all = _stored.all;
|
|
188
|
+
}
|
|
189
|
+
reRegister(models);
|
|
190
|
+
},
|
|
191
|
+
},
|
|
192
|
+
stored,
|
|
193
|
+
);
|
|
194
|
+
|
|
195
|
+
// Initial registration — show all models (trial credit provider)
|
|
196
|
+
reRegister(stored.all);
|
|
197
|
+
}
|