pi-free 2.0.9 → 2.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,10 @@
4
4
  * DeepInfra is an AI inference cloud with an OpenAI-compatible API for
5
5
  * 100+ open-source models (Llama, DeepSeek, Mistral, Qwen, Mixtral, etc.).
6
6
  *
7
+ * NOTE: DeepInfra's /v1/openai/models buries real model data in a "metadata"
8
+ * field (context_length, max_tokens, pricing, tags). We extract it here.
9
+ * Pricing is per-MILLION tokens.
10
+ *
7
11
  * Free tier:
8
12
  * - $5 one-time credit on signup (no credit card)
9
13
  * - ~5M tokens, expires after 90 days
@@ -30,14 +34,112 @@ import type {
30
34
  ProviderModelConfig,
31
35
  } from "@earendil-works/pi-coding-agent";
32
36
  import { getDeepinfraApiKey } from "../../config.ts";
33
- import { BASE_URL_DEEPINFRA, PROVIDER_DEEPINFRA } from "../../constants.ts";
37
+ import {
38
+ BASE_URL_DEEPINFRA,
39
+ DEFAULT_FETCH_TIMEOUT_MS,
40
+ PROVIDER_DEEPINFRA,
41
+ } from "../../constants.ts";
34
42
  import { createLogger } from "../../lib/logger.ts";
43
+ import {
44
+ getProxyModelCompat,
45
+ isLikelyReasoningModel,
46
+ } from "../../lib/provider-compat.ts";
35
47
  import { registerWithGlobalToggle } from "../../lib/registry.ts";
36
- import { fetchOpenAICompatibleModels } from "../../lib/util.ts";
48
+ import { fetchWithRetry } from "../../lib/util.ts";
37
49
  import { createReRegister, setupProvider } from "../../provider-helper.ts";
38
50
 
39
51
  const _logger = createLogger("deepinfra");
40
52
 
53
+ // =============================================================================
54
+ // Types
55
+ // =============================================================================
56
+
57
+ interface DeepInfraModel {
58
+ id: string;
59
+ metadata?: {
60
+ context_length?: number;
61
+ max_tokens?: number;
62
+ description?: string;
63
+ pricing?: {
64
+ input_tokens?: number;
65
+ output_tokens?: number;
66
+ };
67
+ tags?: string[];
68
+ };
69
+ }
70
+
71
+ // =============================================================================
72
+ // Fetch
73
+ // =============================================================================
74
+
75
+ async function fetchDeepinfraModels(
76
+ apiKey: string,
77
+ ): Promise<ProviderModelConfig[]> {
78
+ const response = await fetchWithRetry(
79
+ `${BASE_URL_DEEPINFRA}/models`,
80
+ {
81
+ headers: {
82
+ Authorization: `Bearer ${apiKey}`,
83
+ "Content-Type": "application/json",
84
+ },
85
+ },
86
+ 3,
87
+ 1000,
88
+ DEFAULT_FETCH_TIMEOUT_MS,
89
+ );
90
+
91
+ if (!response.ok) {
92
+ throw new Error(
93
+ `DeepInfra API error: ${response.status} ${response.statusText}`,
94
+ );
95
+ }
96
+
97
+ const json = (await response.json()) as { data?: DeepInfraModel[] };
98
+ const models = json.data ?? [];
99
+
100
+ _logger.info(`[deepinfra] Fetched ${models.length} models`);
101
+
102
+ return models
103
+ .filter((m) => {
104
+ const id = m.id.toLowerCase();
105
+ // Filter out non-chat models
106
+ if (id.includes("embed")) return false;
107
+ if (id.includes("rerank")) return false;
108
+ if (id.includes("whisper")) return false;
109
+ if (id.includes("speech")) return false;
110
+ return true;
111
+ })
112
+ .map((m): ProviderModelConfig => {
113
+ const meta = m.metadata;
114
+ const name = m.id.split("/").pop() || m.id;
115
+
116
+ // Reasoning: check tags first, fall back to name heuristic
117
+ const reasoning =
118
+ meta?.tags?.includes("reasoning") ??
119
+ isLikelyReasoningModel({ id: m.id, name });
120
+
121
+ // Pricing is per-MILLION tokens. Divide to get per-token (Pi convention).
122
+ const inputCost = (meta?.pricing?.input_tokens ?? 0.3) / 1_000_000;
123
+ const outputCost = (meta?.pricing?.output_tokens ?? 0.9) / 1_000_000;
124
+
125
+ return {
126
+ id: m.id,
127
+ name,
128
+ reasoning,
129
+ input: ["text"],
130
+ cost: {
131
+ input: inputCost,
132
+ output: outputCost,
133
+ cacheRead: 0,
134
+ cacheWrite: 0,
135
+ },
136
+ contextWindow: meta?.context_length ?? 128_000,
137
+ maxTokens: meta?.max_tokens ?? 16_384,
138
+ compat: getProxyModelCompat({ id: m.id, name }),
139
+ };
140
+ });
141
+ }
142
+
41
143
  // =============================================================================
42
144
  // Extension Entry Point
43
145
  // =============================================================================
@@ -52,16 +154,11 @@ export default async function deepinfraProvider(pi: ExtensionAPI) {
52
154
  return;
53
155
  }
54
156
 
55
- // Fetch models via shared OpenAI-compatible helper
56
- const allModels = await fetchOpenAICompatibleModels(
57
- "deepinfra",
58
- BASE_URL_DEEPINFRA,
59
- apiKey,
60
- { cost: { input: 0.3, output: 0.9 } },
61
- );
157
+ // Fetch models
158
+ const allModels = await fetchDeepinfraModels(apiKey);
62
159
 
63
160
  if (allModels.length === 0) {
64
- _logger.warn("[deepinfra] No models available");
161
+ _logger.warn("[deepinfra] No chat models available");
65
162
  return;
66
163
  }
67
164
 
@@ -72,7 +169,7 @@ export default async function deepinfraProvider(pi: ExtensionAPI) {
72
169
  const stored = { free: freeModels, all: allModels };
73
170
 
74
171
  _logger.info(
75
- `[deepinfra] Registered ${allModels.length} models (trial credit, 0 free)`,
172
+ `[deepinfra] Registered ${allModels.length} chat models (trial credit, 0 free)`,
76
173
  );
77
174
 
78
175
  // Create re-register function