@aliou/pi-neuralwatt 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
registerNeuralwattSettings,
|
|
9
9
|
} from "../../config";
|
|
10
10
|
import { getNeuralwattApiKey } from "../../lib/env";
|
|
11
|
+
import { fetchModels } from "../../lib/fetch-models";
|
|
11
12
|
import type { NeuralwattQuotas } from "../../types/quota-api";
|
|
12
13
|
import {
|
|
13
14
|
NEURALWATT_QUOTAS_REQUEST_EVENT,
|
|
@@ -16,9 +17,24 @@ import {
|
|
|
16
17
|
parseQuotaHeaders,
|
|
17
18
|
} from "../../types/quota-events";
|
|
18
19
|
import { fetchQuotas } from "../../utils/quotas";
|
|
19
|
-
import {
|
|
20
|
+
import type { NeuralwattModelConfig } from "./models";
|
|
21
|
+
import { NEURALWATT_MODELS_CACHE } from "./models";
|
|
20
22
|
|
|
21
|
-
|
|
23
|
+
function buildModelsPayload(models: NeuralwattModelConfig[]) {
|
|
24
|
+
return models.map(({ fast: _fast, ...model }) => ({
|
|
25
|
+
...model,
|
|
26
|
+
compat: {
|
|
27
|
+
supportsDeveloperRole: false,
|
|
28
|
+
maxTokensField: "max_tokens",
|
|
29
|
+
...model.compat,
|
|
30
|
+
},
|
|
31
|
+
}));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function registerNeuralwattProvider(
|
|
35
|
+
pi: ExtensionAPI,
|
|
36
|
+
models: NeuralwattModelConfig[],
|
|
37
|
+
): void {
|
|
22
38
|
pi.registerProvider("neuralwatt", {
|
|
23
39
|
baseUrl: "https://api.neuralwatt.com/v1",
|
|
24
40
|
apiKey: "NEURALWATT_API_KEY",
|
|
@@ -28,20 +44,15 @@ export function registerNeuralwattProvider(pi: ExtensionAPI): void {
|
|
|
28
44
|
Referer: "https://pi.dev",
|
|
29
45
|
"X-Title": "npm:@aliou/pi-neuralwatt",
|
|
30
46
|
},
|
|
31
|
-
models:
|
|
32
|
-
...model,
|
|
33
|
-
compat: {
|
|
34
|
-
supportsDeveloperRole: false,
|
|
35
|
-
maxTokensField: "max_tokens",
|
|
36
|
-
...model.compat,
|
|
37
|
-
},
|
|
38
|
-
})),
|
|
47
|
+
models: buildModelsPayload(models),
|
|
39
48
|
});
|
|
40
49
|
}
|
|
41
50
|
|
|
42
51
|
export default async function (pi: ExtensionAPI) {
|
|
43
52
|
await configLoader.load();
|
|
44
|
-
|
|
53
|
+
|
|
54
|
+
// Register with hardcoded cache immediately so models are available on startup
|
|
55
|
+
registerNeuralwattProvider(pi, NEURALWATT_MODELS_CACHE);
|
|
45
56
|
|
|
46
57
|
// Track which feature extensions loaded
|
|
47
58
|
const loadedFeatures = new Set<NeuralwattFeatureId>();
|
|
@@ -131,16 +142,35 @@ export default async function (pi: ExtensionAPI) {
|
|
|
131
142
|
loadedFeatures.add(feature);
|
|
132
143
|
});
|
|
133
144
|
|
|
134
|
-
// On session start: request extensions
|
|
145
|
+
// On session start: fetch live models, request extensions, emit config, fetch quotas
|
|
135
146
|
pi.on("session_start", async (_event, ctx) => {
|
|
136
147
|
loadedFeatures.clear();
|
|
137
148
|
pi.events.emit(NEURALWATT_EXTENSIONS_REQUEST_EVENT, undefined);
|
|
138
149
|
emitConfigUpdated(pi);
|
|
139
150
|
|
|
151
|
+
// Fetch live models from the API and re-register if successful
|
|
152
|
+
const result = await fetchModels();
|
|
153
|
+
if (result.success) {
|
|
154
|
+
const cacheIds = new Set(NEURALWATT_MODELS_CACHE.map((m) => m.id));
|
|
155
|
+
const liveIds = new Set(result.models.map((m) => m.id));
|
|
156
|
+
const added = result.models.filter((m) => !cacheIds.has(m.id));
|
|
157
|
+
const removed = NEURALWATT_MODELS_CACHE.filter((m) => !liveIds.has(m.id));
|
|
158
|
+
if (added.length > 0 || removed.length > 0) {
|
|
159
|
+
const parts: string[] = [];
|
|
160
|
+
if (added.length > 0) parts.push(`${added.length} new`);
|
|
161
|
+
if (removed.length > 0) parts.push(`${removed.length} removed`);
|
|
162
|
+
ctx.ui.notify(
|
|
163
|
+
`Neuralwatt models updated (${parts.join(", ")})`,
|
|
164
|
+
"info",
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
registerNeuralwattProvider(pi, result.models);
|
|
168
|
+
}
|
|
169
|
+
|
|
140
170
|
if (ctx.model?.provider !== "neuralwatt") return;
|
|
141
171
|
const apiKey = await getNeuralwattApiKey(ctx.modelRegistry.authStorage);
|
|
142
172
|
if (!apiKey) return;
|
|
143
|
-
const
|
|
144
|
-
if (
|
|
173
|
+
const quotaResult = await fetchQuotas(apiKey);
|
|
174
|
+
if (quotaResult.success) emitQuotas(quotaResult.data.quotas, "api");
|
|
145
175
|
});
|
|
146
176
|
}
|
|
@@ -1,18 +1,10 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
owned_by: string;
|
|
9
|
-
max_model_len: number;
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
interface ApiResponse {
|
|
13
|
-
object: "list";
|
|
14
|
-
data: ApiModel[];
|
|
15
|
-
}
|
|
2
|
+
import type {
|
|
3
|
+
ApiModel as FullApiModel,
|
|
4
|
+
ApiResponse as FullApiResponse,
|
|
5
|
+
} from "../../lib/fetch-models";
|
|
6
|
+
import { mapApiModel } from "../../lib/fetch-models";
|
|
7
|
+
import { NEURALWATT_MODELS_CACHE } from "./models";
|
|
16
8
|
|
|
17
9
|
interface Discrepancy {
|
|
18
10
|
model: string;
|
|
@@ -21,7 +13,7 @@ interface Discrepancy {
|
|
|
21
13
|
api: unknown;
|
|
22
14
|
}
|
|
23
15
|
|
|
24
|
-
async function fetchApiModels(): Promise<
|
|
16
|
+
async function fetchApiModels(): Promise<FullApiModel[]> {
|
|
25
17
|
const apiKey = process.env.NEURALWATT_API_KEY;
|
|
26
18
|
const headers: Record<string, string> = {
|
|
27
19
|
"Content-Type": "application/json",
|
|
@@ -41,13 +33,13 @@ async function fetchApiModels(): Promise<ApiModel[]> {
|
|
|
41
33
|
);
|
|
42
34
|
}
|
|
43
35
|
|
|
44
|
-
const data:
|
|
36
|
+
const data: FullApiResponse = await response.json();
|
|
45
37
|
return data.data;
|
|
46
38
|
}
|
|
47
39
|
|
|
48
40
|
function compareModels(
|
|
49
|
-
apiModels:
|
|
50
|
-
hardcodedModels: typeof
|
|
41
|
+
apiModels: FullApiModel[],
|
|
42
|
+
hardcodedModels: typeof NEURALWATT_MODELS_CACHE,
|
|
51
43
|
): Discrepancy[] {
|
|
52
44
|
const discrepancies: Discrepancy[] = [];
|
|
53
45
|
|
|
@@ -73,10 +65,55 @@ function compareModels(
|
|
|
73
65
|
api: apiModel.max_model_len,
|
|
74
66
|
});
|
|
75
67
|
}
|
|
68
|
+
|
|
69
|
+
// Check metadata-driven fields if available
|
|
70
|
+
const meta = apiModel.metadata;
|
|
71
|
+
if (meta) {
|
|
72
|
+
// Check reasoning
|
|
73
|
+
if (meta.capabilities.reasoning !== hardcoded.reasoning) {
|
|
74
|
+
discrepancies.push({
|
|
75
|
+
model: hardcoded.id,
|
|
76
|
+
field: "reasoning",
|
|
77
|
+
hardcoded: hardcoded.reasoning,
|
|
78
|
+
api: meta.capabilities.reasoning,
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Check pricing
|
|
83
|
+
if (meta.pricing.input_per_million !== hardcoded.cost.input) {
|
|
84
|
+
discrepancies.push({
|
|
85
|
+
model: hardcoded.id,
|
|
86
|
+
field: "cost.input",
|
|
87
|
+
hardcoded: hardcoded.cost.input,
|
|
88
|
+
api: meta.pricing.input_per_million,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
if (meta.pricing.output_per_million !== hardcoded.cost.output) {
|
|
92
|
+
discrepancies.push({
|
|
93
|
+
model: hardcoded.id,
|
|
94
|
+
field: "cost.output",
|
|
95
|
+
hardcoded: hardcoded.cost.output,
|
|
96
|
+
api: meta.pricing.output_per_million,
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Check vision
|
|
101
|
+
const hasVision = hardcoded.input.includes("image");
|
|
102
|
+
if (meta.capabilities.vision !== hasVision) {
|
|
103
|
+
discrepancies.push({
|
|
104
|
+
model: hardcoded.id,
|
|
105
|
+
field: "input (vision)",
|
|
106
|
+
hardcoded: hasVision,
|
|
107
|
+
api: meta.capabilities.vision,
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
}
|
|
76
111
|
}
|
|
77
112
|
|
|
78
113
|
// Check for API models not in hardcoded list
|
|
79
114
|
for (const apiModel of apiModels) {
|
|
115
|
+
if (apiModel.metadata?.deprecated || apiModel.metadata?.pricing.pricing_tbd)
|
|
116
|
+
continue;
|
|
80
117
|
const hardcoded = hardcodedModels.find((m) => m.id === apiModel.id);
|
|
81
118
|
if (!hardcoded) {
|
|
82
119
|
discrepancies.push({
|
|
@@ -94,7 +131,7 @@ function compareModels(
|
|
|
94
131
|
describe("Neuralwatt models", () => {
|
|
95
132
|
it("should match API model definitions", { timeout: 30000 }, async () => {
|
|
96
133
|
const apiModels = await fetchApiModels();
|
|
97
|
-
const discrepancies = compareModels(apiModels,
|
|
134
|
+
const discrepancies = compareModels(apiModels, NEURALWATT_MODELS_CACHE);
|
|
98
135
|
|
|
99
136
|
if (discrepancies.length > 0) {
|
|
100
137
|
console.error("\nModel discrepancies found:");
|
|
@@ -117,4 +154,186 @@ describe("Neuralwatt models", () => {
|
|
|
117
154
|
|
|
118
155
|
expect(discrepancies).toHaveLength(0);
|
|
119
156
|
});
|
|
157
|
+
|
|
158
|
+
it("should map API models with metadata correctly", () => {
|
|
159
|
+
// Simulate a reasoning model with reasoning_effort support (like gpt-oss-20b)
|
|
160
|
+
const apiModelWithEffort: FullApiModel = {
|
|
161
|
+
id: "openai/gpt-oss-20b",
|
|
162
|
+
object: "model",
|
|
163
|
+
created: 1777467968,
|
|
164
|
+
owned_by: "vllm",
|
|
165
|
+
root: "openai/gpt-oss-20b",
|
|
166
|
+
parent: null,
|
|
167
|
+
max_model_len: 16384,
|
|
168
|
+
metadata: {
|
|
169
|
+
display_name: "GPT-OSS 20B",
|
|
170
|
+
description: "OpenAI GPT-OSS 20B",
|
|
171
|
+
provider: "OpenAI",
|
|
172
|
+
huggingface_id: null,
|
|
173
|
+
pricing: {
|
|
174
|
+
input_per_million: 0.03,
|
|
175
|
+
output_per_million: 0.16,
|
|
176
|
+
cached_input_per_million: null,
|
|
177
|
+
cached_output_per_million: null,
|
|
178
|
+
currency: "USD",
|
|
179
|
+
pricing_tbd: false,
|
|
180
|
+
},
|
|
181
|
+
capabilities: {
|
|
182
|
+
tools: true,
|
|
183
|
+
json_mode: true,
|
|
184
|
+
vision: false,
|
|
185
|
+
reasoning: true,
|
|
186
|
+
reasoning_effort: true,
|
|
187
|
+
streaming: true,
|
|
188
|
+
system_role: true,
|
|
189
|
+
developer_role: false,
|
|
190
|
+
},
|
|
191
|
+
limits: {
|
|
192
|
+
max_context_length: 16384,
|
|
193
|
+
max_output_tokens: 4096,
|
|
194
|
+
max_images: null,
|
|
195
|
+
},
|
|
196
|
+
deprecated: false,
|
|
197
|
+
deprecated_message: null,
|
|
198
|
+
},
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
const result = mapApiModel(apiModelWithEffort);
|
|
202
|
+
expect(result.id).toBe("openai/gpt-oss-20b");
|
|
203
|
+
expect(result.name).toBe("GPT-OSS 20B");
|
|
204
|
+
expect(result.reasoning).toBe(true);
|
|
205
|
+
expect(result.contextWindow).toBe(16384);
|
|
206
|
+
expect(result.maxTokens).toBe(4096);
|
|
207
|
+
expect(result.input).toEqual(["text"]);
|
|
208
|
+
expect(result.cost.input).toBe(0.03);
|
|
209
|
+
expect(result.cost.output).toBe(0.16);
|
|
210
|
+
expect(
|
|
211
|
+
(result.compat as Record<string, unknown>)?.supportsReasoningEffort,
|
|
212
|
+
).toBe(true);
|
|
213
|
+
expect(result.fast).toBeUndefined();
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
it("should map fast variants correctly", () => {
|
|
217
|
+
// Simulate a fast variant (owned by "neuralwatt")
|
|
218
|
+
const fastModel: FullApiModel = {
|
|
219
|
+
id: "qwen3.6-35b-fast",
|
|
220
|
+
object: "model",
|
|
221
|
+
created: 0,
|
|
222
|
+
owned_by: "neuralwatt",
|
|
223
|
+
max_model_len: 131072,
|
|
224
|
+
metadata: {
|
|
225
|
+
display_name: "Qwen3.6 35B Fast",
|
|
226
|
+
description: "Fast variant",
|
|
227
|
+
provider: "Qwen",
|
|
228
|
+
huggingface_id: null,
|
|
229
|
+
pricing: {
|
|
230
|
+
input_per_million: 0.05,
|
|
231
|
+
output_per_million: 0.1,
|
|
232
|
+
cached_input_per_million: null,
|
|
233
|
+
cached_output_per_million: null,
|
|
234
|
+
currency: "USD",
|
|
235
|
+
pricing_tbd: false,
|
|
236
|
+
},
|
|
237
|
+
capabilities: {
|
|
238
|
+
tools: true,
|
|
239
|
+
json_mode: true,
|
|
240
|
+
vision: false,
|
|
241
|
+
reasoning: false,
|
|
242
|
+
reasoning_effort: false,
|
|
243
|
+
streaming: true,
|
|
244
|
+
system_role: true,
|
|
245
|
+
developer_role: false,
|
|
246
|
+
},
|
|
247
|
+
limits: {
|
|
248
|
+
max_context_length: 131072,
|
|
249
|
+
max_output_tokens: null,
|
|
250
|
+
max_images: null,
|
|
251
|
+
},
|
|
252
|
+
deprecated: false,
|
|
253
|
+
deprecated_message: null,
|
|
254
|
+
},
|
|
255
|
+
};
|
|
256
|
+
|
|
257
|
+
const result = mapApiModel(fastModel);
|
|
258
|
+
expect(result.id).toBe("qwen3.6-35b-fast");
|
|
259
|
+
expect(result.fast).toBe(true);
|
|
260
|
+
expect(result.reasoning).toBe(false);
|
|
261
|
+
expect(
|
|
262
|
+
(result.compat as Record<string, unknown>)?.supportsReasoningEffort,
|
|
263
|
+
).toBeUndefined();
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
it("should map vision models correctly", () => {
|
|
267
|
+
const visionModel: FullApiModel = {
|
|
268
|
+
id: "moonshotai/Kimi-K2.6",
|
|
269
|
+
object: "model",
|
|
270
|
+
created: 1777467965,
|
|
271
|
+
owned_by: "vllm",
|
|
272
|
+
root: "moonshotai/Kimi-K2.6",
|
|
273
|
+
parent: null,
|
|
274
|
+
max_model_len: 262144,
|
|
275
|
+
metadata: {
|
|
276
|
+
display_name: "Kimi K2.6",
|
|
277
|
+
description: "Moonshot Kimi K2.6",
|
|
278
|
+
provider: "MoonshotAI",
|
|
279
|
+
huggingface_id: null,
|
|
280
|
+
pricing: {
|
|
281
|
+
input_per_million: 0.69,
|
|
282
|
+
output_per_million: 3.22,
|
|
283
|
+
cached_input_per_million: null,
|
|
284
|
+
cached_output_per_million: null,
|
|
285
|
+
currency: "USD",
|
|
286
|
+
pricing_tbd: false,
|
|
287
|
+
},
|
|
288
|
+
capabilities: {
|
|
289
|
+
tools: true,
|
|
290
|
+
json_mode: true,
|
|
291
|
+
vision: true,
|
|
292
|
+
reasoning: true,
|
|
293
|
+
reasoning_effort: false,
|
|
294
|
+
streaming: true,
|
|
295
|
+
system_role: true,
|
|
296
|
+
developer_role: false,
|
|
297
|
+
},
|
|
298
|
+
limits: {
|
|
299
|
+
max_context_length: 262144,
|
|
300
|
+
max_output_tokens: null,
|
|
301
|
+
max_images: 20,
|
|
302
|
+
},
|
|
303
|
+
deprecated: false,
|
|
304
|
+
deprecated_message: null,
|
|
305
|
+
},
|
|
306
|
+
};
|
|
307
|
+
|
|
308
|
+
const result = mapApiModel(visionModel);
|
|
309
|
+
expect(result.input).toEqual(["text", "image"]);
|
|
310
|
+
expect(result.reasoning).toBe(true);
|
|
311
|
+
expect(
|
|
312
|
+
(result.compat as Record<string, unknown>)?.supportsReasoningEffort,
|
|
313
|
+
).toBeUndefined();
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
it("should use defaults when metadata is missing", () => {
|
|
317
|
+
const bareModel: FullApiModel = {
|
|
318
|
+
id: "test/model",
|
|
319
|
+
object: "model",
|
|
320
|
+
created: 0,
|
|
321
|
+
owned_by: "vllm",
|
|
322
|
+
max_model_len: 8192,
|
|
323
|
+
};
|
|
324
|
+
|
|
325
|
+
const result = mapApiModel(bareModel);
|
|
326
|
+
expect(result.id).toBe("test/model");
|
|
327
|
+
expect(result.name).toBe("test/model");
|
|
328
|
+
expect(result.reasoning).toBe(false);
|
|
329
|
+
expect(result.contextWindow).toBe(8192);
|
|
330
|
+
expect(result.maxTokens).toBe(65536);
|
|
331
|
+
expect(result.input).toEqual(["text"]);
|
|
332
|
+
expect(result.cost.input).toBe(0);
|
|
333
|
+
expect(result.cost.output).toBe(0);
|
|
334
|
+
expect(result.fast).toBeUndefined();
|
|
335
|
+
expect(
|
|
336
|
+
(result.compat as Record<string, unknown>)?.supportsReasoningEffort,
|
|
337
|
+
).toBeUndefined();
|
|
338
|
+
});
|
|
120
339
|
});
|
|
@@ -18,7 +18,10 @@ const NEURALWATT_REASONING_EFFORT_MAP = {
|
|
|
18
18
|
xhigh: "high",
|
|
19
19
|
} as const;
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
/** Hardcoded model cache. Used as a fallback on startup before live models are fetched.
|
|
22
|
+
* Updated from https://api.neuralwatt.com/v1/models and https://portal.neuralwatt.com/pricing
|
|
23
|
+
*/
|
|
24
|
+
export const NEURALWATT_MODELS_CACHE: NeuralwattModelConfig[] = [
|
|
22
25
|
// Devstral Small 2 - Mistral
|
|
23
26
|
{
|
|
24
27
|
id: "mistralai/Devstral-Small-2-24B-Instruct-2512",
|
|
@@ -74,8 +77,6 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
74
77
|
maxTokens: 32768,
|
|
75
78
|
compat: {
|
|
76
79
|
supportsDeveloperRole: false,
|
|
77
|
-
supportsReasoningEffort: true,
|
|
78
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
79
80
|
maxTokensField: "max_tokens",
|
|
80
81
|
},
|
|
81
82
|
},
|
|
@@ -103,7 +104,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
103
104
|
{
|
|
104
105
|
id: "openai/gpt-oss-20b",
|
|
105
106
|
name: "GPT-OSS 20B",
|
|
106
|
-
reasoning:
|
|
107
|
+
reasoning: true,
|
|
107
108
|
input: ["text"],
|
|
108
109
|
cost: {
|
|
109
110
|
input: 0.03,
|
|
@@ -115,6 +116,8 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
115
116
|
maxTokens: 4096,
|
|
116
117
|
compat: {
|
|
117
118
|
supportsDeveloperRole: false,
|
|
119
|
+
supportsReasoningEffort: true,
|
|
120
|
+
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
118
121
|
maxTokensField: "max_tokens",
|
|
119
122
|
},
|
|
120
123
|
},
|
|
@@ -134,8 +137,6 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
134
137
|
maxTokens: 65536,
|
|
135
138
|
compat: {
|
|
136
139
|
supportsDeveloperRole: false,
|
|
137
|
-
supportsReasoningEffort: true,
|
|
138
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
139
140
|
maxTokensField: "max_tokens",
|
|
140
141
|
},
|
|
141
142
|
},
|
|
@@ -159,6 +160,45 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
159
160
|
maxTokensField: "max_tokens",
|
|
160
161
|
},
|
|
161
162
|
},
|
|
163
|
+
// Kimi K2.6 - MoonshotAI
|
|
164
|
+
{
|
|
165
|
+
id: "moonshotai/Kimi-K2.6",
|
|
166
|
+
name: "Kimi K2.6",
|
|
167
|
+
reasoning: true,
|
|
168
|
+
input: ["text", "image"],
|
|
169
|
+
cost: {
|
|
170
|
+
input: 0.69,
|
|
171
|
+
output: 3.22,
|
|
172
|
+
cacheRead: 0,
|
|
173
|
+
cacheWrite: 0,
|
|
174
|
+
},
|
|
175
|
+
contextWindow: 262144,
|
|
176
|
+
maxTokens: 65536,
|
|
177
|
+
compat: {
|
|
178
|
+
supportsDeveloperRole: false,
|
|
179
|
+
maxTokensField: "max_tokens",
|
|
180
|
+
},
|
|
181
|
+
},
|
|
182
|
+
// Kimi K2.6 Fast - MoonshotAI
|
|
183
|
+
{
|
|
184
|
+
id: "kimi-k2.6-fast",
|
|
185
|
+
name: "Kimi K2.6 Fast",
|
|
186
|
+
reasoning: false,
|
|
187
|
+
fast: true,
|
|
188
|
+
input: ["text", "image"],
|
|
189
|
+
cost: {
|
|
190
|
+
input: 0.69,
|
|
191
|
+
output: 3.22,
|
|
192
|
+
cacheRead: 0,
|
|
193
|
+
cacheWrite: 0,
|
|
194
|
+
},
|
|
195
|
+
contextWindow: 262144,
|
|
196
|
+
maxTokens: 65536,
|
|
197
|
+
compat: {
|
|
198
|
+
supportsDeveloperRole: false,
|
|
199
|
+
maxTokensField: "max_tokens",
|
|
200
|
+
},
|
|
201
|
+
},
|
|
162
202
|
// MiniMax M2.5 - MiniMax
|
|
163
203
|
{
|
|
164
204
|
id: "MiniMaxAI/MiniMax-M2.5",
|
|
@@ -175,8 +215,6 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
175
215
|
maxTokens: 65536,
|
|
176
216
|
compat: {
|
|
177
217
|
supportsDeveloperRole: false,
|
|
178
|
-
supportsReasoningEffort: true,
|
|
179
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
180
218
|
maxTokensField: "max_tokens",
|
|
181
219
|
},
|
|
182
220
|
},
|
|
@@ -196,8 +234,6 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
196
234
|
maxTokens: 65536,
|
|
197
235
|
compat: {
|
|
198
236
|
supportsDeveloperRole: false,
|
|
199
|
-
supportsReasoningEffort: true,
|
|
200
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
201
237
|
maxTokensField: "max_tokens",
|
|
202
238
|
},
|
|
203
239
|
},
|
|
@@ -235,28 +271,6 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
235
271
|
},
|
|
236
272
|
contextWindow: 131072,
|
|
237
273
|
maxTokens: 32768,
|
|
238
|
-
compat: {
|
|
239
|
-
supportsDeveloperRole: false,
|
|
240
|
-
supportsReasoningEffort: true,
|
|
241
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
242
|
-
maxTokensField: "max_tokens",
|
|
243
|
-
},
|
|
244
|
-
},
|
|
245
|
-
// Qwen3.6 35B Fast (qwen3.5-35b-fast) - Qwen
|
|
246
|
-
{
|
|
247
|
-
id: "qwen3.5-35b-fast",
|
|
248
|
-
name: "Qwen3.6 35B Fast",
|
|
249
|
-
reasoning: false,
|
|
250
|
-
fast: true,
|
|
251
|
-
input: ["text"],
|
|
252
|
-
cost: {
|
|
253
|
-
input: 0.05,
|
|
254
|
-
output: 0.1,
|
|
255
|
-
cacheRead: 0,
|
|
256
|
-
cacheWrite: 0,
|
|
257
|
-
},
|
|
258
|
-
contextWindow: 131072,
|
|
259
|
-
maxTokens: 32768,
|
|
260
274
|
compat: {
|
|
261
275
|
supportsDeveloperRole: false,
|
|
262
276
|
maxTokensField: "max_tokens",
|
|
@@ -265,7 +279,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
265
279
|
// Qwen3.6 35B Fast (qwen3.6-35b-fast) - Qwen
|
|
266
280
|
{
|
|
267
281
|
id: "qwen3.6-35b-fast",
|
|
268
|
-
name: "Qwen3.6 35B Fast
|
|
282
|
+
name: "Qwen3.6 35B Fast",
|
|
269
283
|
reasoning: false,
|
|
270
284
|
fast: true,
|
|
271
285
|
input: ["text"],
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import type { NeuralwattModelConfig } from "../extensions/provider/models";
|
|
2
|
+
|
|
3
|
+
const FETCH_TIMEOUT_MS = 15_000;
|
|
4
|
+
|
|
5
|
+
export interface ApiModelMetadata {
|
|
6
|
+
display_name: string;
|
|
7
|
+
description: string | null;
|
|
8
|
+
provider: string;
|
|
9
|
+
huggingface_id: string | null;
|
|
10
|
+
pricing: {
|
|
11
|
+
input_per_million: number;
|
|
12
|
+
output_per_million: number;
|
|
13
|
+
cached_input_per_million: number | null;
|
|
14
|
+
cached_output_per_million: number | null;
|
|
15
|
+
currency: string;
|
|
16
|
+
pricing_tbd: boolean;
|
|
17
|
+
};
|
|
18
|
+
capabilities: {
|
|
19
|
+
tools: boolean;
|
|
20
|
+
json_mode: boolean;
|
|
21
|
+
vision: boolean;
|
|
22
|
+
reasoning: boolean;
|
|
23
|
+
reasoning_effort: boolean;
|
|
24
|
+
streaming: boolean;
|
|
25
|
+
system_role: boolean;
|
|
26
|
+
developer_role: boolean;
|
|
27
|
+
};
|
|
28
|
+
limits: {
|
|
29
|
+
max_context_length: number;
|
|
30
|
+
max_output_tokens: number | null;
|
|
31
|
+
max_images: number | null;
|
|
32
|
+
};
|
|
33
|
+
deprecated: boolean;
|
|
34
|
+
deprecated_message: string | null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface ApiModel {
|
|
38
|
+
id: string;
|
|
39
|
+
object: string;
|
|
40
|
+
created: number;
|
|
41
|
+
owned_by: string;
|
|
42
|
+
root?: string;
|
|
43
|
+
parent?: string | null;
|
|
44
|
+
max_model_len: number;
|
|
45
|
+
metadata?: ApiModelMetadata;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export interface ApiResponse {
|
|
49
|
+
object: "list";
|
|
50
|
+
data: ApiModel[];
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const NEURALWATT_REASONING_EFFORT_MAP = {
|
|
54
|
+
minimal: "low",
|
|
55
|
+
low: "low",
|
|
56
|
+
medium: "medium",
|
|
57
|
+
high: "high",
|
|
58
|
+
xhigh: "high",
|
|
59
|
+
} as const;
|
|
60
|
+
|
|
61
|
+
/** Identify fast variants by their owned_by field or naming convention. */
|
|
62
|
+
function isFastModel(model: ApiModel): boolean {
|
|
63
|
+
if (model.owned_by === "neuralwatt") return true;
|
|
64
|
+
return model.id.endsWith("-fast");
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/** Map API model data to NeuralwattModelConfig. */
|
|
68
|
+
export function mapApiModel(model: ApiModel): NeuralwattModelConfig {
|
|
69
|
+
const meta = model.metadata;
|
|
70
|
+
const fast = isFastModel(model);
|
|
71
|
+
|
|
72
|
+
// Base fields from top-level API data
|
|
73
|
+
const result: NeuralwattModelConfig = {
|
|
74
|
+
id: model.id,
|
|
75
|
+
name: meta?.display_name ?? model.id,
|
|
76
|
+
reasoning: meta?.capabilities.reasoning ?? false,
|
|
77
|
+
contextWindow: model.max_model_len,
|
|
78
|
+
maxTokens: 65536, // sensible default
|
|
79
|
+
cost: {
|
|
80
|
+
input: meta?.pricing.input_per_million ?? 0,
|
|
81
|
+
output: meta?.pricing.output_per_million ?? 0,
|
|
82
|
+
cacheRead: meta?.pricing.cached_input_per_million ?? 0,
|
|
83
|
+
cacheWrite: meta?.pricing.cached_output_per_million ?? 0,
|
|
84
|
+
},
|
|
85
|
+
input: meta?.capabilities.vision ? ["text", "image"] : ["text"],
|
|
86
|
+
compat: {
|
|
87
|
+
supportsDeveloperRole: false,
|
|
88
|
+
maxTokensField: "max_tokens",
|
|
89
|
+
},
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
if (fast) {
|
|
93
|
+
result.fast = true;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Override maxTokens from limits if available
|
|
97
|
+
if (meta?.limits.max_output_tokens) {
|
|
98
|
+
result.maxTokens = meta.limits.max_output_tokens;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Reasoning effort support
|
|
102
|
+
if (meta?.capabilities.reasoning_effort) {
|
|
103
|
+
result.compat = {
|
|
104
|
+
...result.compat,
|
|
105
|
+
supportsReasoningEffort: true,
|
|
106
|
+
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return result;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export type FetchModelsResult =
|
|
114
|
+
| { success: true; models: NeuralwattModelConfig[] }
|
|
115
|
+
| {
|
|
116
|
+
success: false;
|
|
117
|
+
error: { message: string; kind: "timeout" | "network" | "cancelled" };
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Fetch live model definitions from the Neuralwatt /v1/models endpoint.
|
|
122
|
+
*
|
|
123
|
+
* When the API returns metadata (pricing, capabilities, limits), those values
|
|
124
|
+
* are used directly. Fields not exposed by the API fall back to sensible
|
|
125
|
+
* defaults.
|
|
126
|
+
*/
|
|
127
|
+
export async function fetchModels(
|
|
128
|
+
signal?: AbortSignal,
|
|
129
|
+
): Promise<FetchModelsResult> {
|
|
130
|
+
const signals: AbortSignal[] = [AbortSignal.timeout(FETCH_TIMEOUT_MS)];
|
|
131
|
+
if (signal) signals.push(signal);
|
|
132
|
+
const combined = AbortSignal.any(signals);
|
|
133
|
+
|
|
134
|
+
try {
|
|
135
|
+
const response = await fetch("https://api.neuralwatt.com/v1/models", {
|
|
136
|
+
headers: {
|
|
137
|
+
Referer: "https://pi.dev",
|
|
138
|
+
"X-Title": "npm:@aliou/pi-neuralwatt",
|
|
139
|
+
},
|
|
140
|
+
signal: combined,
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
if (!response.ok) {
|
|
144
|
+
return {
|
|
145
|
+
success: false,
|
|
146
|
+
error: {
|
|
147
|
+
message: `Failed to fetch models: ${response.status} ${response.statusText}`,
|
|
148
|
+
kind: "network",
|
|
149
|
+
},
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const data: ApiResponse = await response.json();
|
|
154
|
+
|
|
155
|
+
// Filter out deprecated models
|
|
156
|
+
const active = data.data.filter(
|
|
157
|
+
(m) => !m.metadata?.deprecated && !m.metadata?.pricing.pricing_tbd,
|
|
158
|
+
);
|
|
159
|
+
|
|
160
|
+
const models = active.map(mapApiModel);
|
|
161
|
+
return { success: true, models };
|
|
162
|
+
} catch (err: unknown) {
|
|
163
|
+
if (err instanceof DOMException && err.name === "AbortError") {
|
|
164
|
+
if (
|
|
165
|
+
combined.reason instanceof DOMException &&
|
|
166
|
+
combined.reason.name === "TimeoutError"
|
|
167
|
+
) {
|
|
168
|
+
return {
|
|
169
|
+
success: false,
|
|
170
|
+
error: { message: "Fetch models timed out", kind: "timeout" },
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
return {
|
|
174
|
+
success: false,
|
|
175
|
+
error: { message: "Fetch models cancelled", kind: "cancelled" },
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
const message = err instanceof Error ? err.message : "Unknown error";
|
|
179
|
+
return { success: false, error: { message, kind: "network" } };
|
|
180
|
+
}
|
|
181
|
+
}
|