@aliou/pi-neuralwatt 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
registerNeuralwattSettings,
|
|
9
9
|
} from "../../config";
|
|
10
10
|
import { getNeuralwattApiKey } from "../../lib/env";
|
|
11
|
+
import { fetchModels } from "../../lib/fetch-models";
|
|
11
12
|
import type { NeuralwattQuotas } from "../../types/quota-api";
|
|
12
13
|
import {
|
|
13
14
|
NEURALWATT_QUOTAS_REQUEST_EVENT,
|
|
@@ -16,9 +17,24 @@ import {
|
|
|
16
17
|
parseQuotaHeaders,
|
|
17
18
|
} from "../../types/quota-events";
|
|
18
19
|
import { fetchQuotas } from "../../utils/quotas";
|
|
19
|
-
import {
|
|
20
|
+
import type { NeuralwattModelConfig } from "./models";
|
|
21
|
+
import { NEURALWATT_MODELS_CACHE } from "./models";
|
|
20
22
|
|
|
21
|
-
|
|
23
|
+
function buildModelsPayload(models: NeuralwattModelConfig[]) {
|
|
24
|
+
return models.map(({ fast: _fast, ...model }) => ({
|
|
25
|
+
...model,
|
|
26
|
+
compat: {
|
|
27
|
+
supportsDeveloperRole: false,
|
|
28
|
+
maxTokensField: "max_tokens",
|
|
29
|
+
...model.compat,
|
|
30
|
+
},
|
|
31
|
+
}));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function registerNeuralwattProvider(
|
|
35
|
+
pi: ExtensionAPI,
|
|
36
|
+
models: NeuralwattModelConfig[],
|
|
37
|
+
): void {
|
|
22
38
|
pi.registerProvider("neuralwatt", {
|
|
23
39
|
baseUrl: "https://api.neuralwatt.com/v1",
|
|
24
40
|
apiKey: "NEURALWATT_API_KEY",
|
|
@@ -28,20 +44,15 @@ export function registerNeuralwattProvider(pi: ExtensionAPI): void {
|
|
|
28
44
|
Referer: "https://pi.dev",
|
|
29
45
|
"X-Title": "npm:@aliou/pi-neuralwatt",
|
|
30
46
|
},
|
|
31
|
-
models:
|
|
32
|
-
...model,
|
|
33
|
-
compat: {
|
|
34
|
-
supportsDeveloperRole: false,
|
|
35
|
-
maxTokensField: "max_tokens",
|
|
36
|
-
...model.compat,
|
|
37
|
-
},
|
|
38
|
-
})),
|
|
47
|
+
models: buildModelsPayload(models),
|
|
39
48
|
});
|
|
40
49
|
}
|
|
41
50
|
|
|
42
51
|
export default async function (pi: ExtensionAPI) {
|
|
43
52
|
await configLoader.load();
|
|
44
|
-
|
|
53
|
+
|
|
54
|
+
// Register with hardcoded cache immediately so models are available on startup
|
|
55
|
+
registerNeuralwattProvider(pi, NEURALWATT_MODELS_CACHE);
|
|
45
56
|
|
|
46
57
|
// Track which feature extensions loaded
|
|
47
58
|
const loadedFeatures = new Set<NeuralwattFeatureId>();
|
|
@@ -131,16 +142,35 @@ export default async function (pi: ExtensionAPI) {
|
|
|
131
142
|
loadedFeatures.add(feature);
|
|
132
143
|
});
|
|
133
144
|
|
|
134
|
-
// On session start: request extensions
|
|
145
|
+
// On session start: fetch live models, request extensions, emit config, fetch quotas
|
|
135
146
|
pi.on("session_start", async (_event, ctx) => {
|
|
136
147
|
loadedFeatures.clear();
|
|
137
148
|
pi.events.emit(NEURALWATT_EXTENSIONS_REQUEST_EVENT, undefined);
|
|
138
149
|
emitConfigUpdated(pi);
|
|
139
150
|
|
|
151
|
+
// Fetch live models from the API and re-register if successful
|
|
152
|
+
const result = await fetchModels();
|
|
153
|
+
if (result.success) {
|
|
154
|
+
const cacheIds = new Set(NEURALWATT_MODELS_CACHE.map((m) => m.id));
|
|
155
|
+
const liveIds = new Set(result.models.map((m) => m.id));
|
|
156
|
+
const added = result.models.filter((m) => !cacheIds.has(m.id));
|
|
157
|
+
const removed = NEURALWATT_MODELS_CACHE.filter((m) => !liveIds.has(m.id));
|
|
158
|
+
if (added.length > 0 || removed.length > 0) {
|
|
159
|
+
const parts: string[] = [];
|
|
160
|
+
if (added.length > 0) parts.push(`${added.length} new`);
|
|
161
|
+
if (removed.length > 0) parts.push(`${removed.length} removed`);
|
|
162
|
+
ctx.ui.notify(
|
|
163
|
+
`Neuralwatt models updated (${parts.join(", ")})`,
|
|
164
|
+
"info",
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
registerNeuralwattProvider(pi, result.models);
|
|
168
|
+
}
|
|
169
|
+
|
|
140
170
|
if (ctx.model?.provider !== "neuralwatt") return;
|
|
141
171
|
const apiKey = await getNeuralwattApiKey(ctx.modelRegistry.authStorage);
|
|
142
172
|
if (!apiKey) return;
|
|
143
|
-
const
|
|
144
|
-
if (
|
|
173
|
+
const quotaResult = await fetchQuotas(apiKey);
|
|
174
|
+
if (quotaResult.success) emitQuotas(quotaResult.data.quotas, "api");
|
|
145
175
|
});
|
|
146
176
|
}
|
|
@@ -1,18 +1,10 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
owned_by: string;
|
|
9
|
-
max_model_len: number;
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
interface ApiResponse {
|
|
13
|
-
object: "list";
|
|
14
|
-
data: ApiModel[];
|
|
15
|
-
}
|
|
2
|
+
import type {
|
|
3
|
+
ApiModel as FullApiModel,
|
|
4
|
+
ApiResponse as FullApiResponse,
|
|
5
|
+
} from "../../lib/fetch-models";
|
|
6
|
+
import { mapApiModel } from "../../lib/fetch-models";
|
|
7
|
+
import { NEURALWATT_MODELS_CACHE } from "./models";
|
|
16
8
|
|
|
17
9
|
interface Discrepancy {
|
|
18
10
|
model: string;
|
|
@@ -21,7 +13,7 @@ interface Discrepancy {
|
|
|
21
13
|
api: unknown;
|
|
22
14
|
}
|
|
23
15
|
|
|
24
|
-
async function fetchApiModels(): Promise<
|
|
16
|
+
async function fetchApiModels(): Promise<FullApiModel[]> {
|
|
25
17
|
const apiKey = process.env.NEURALWATT_API_KEY;
|
|
26
18
|
const headers: Record<string, string> = {
|
|
27
19
|
"Content-Type": "application/json",
|
|
@@ -41,13 +33,13 @@ async function fetchApiModels(): Promise<ApiModel[]> {
|
|
|
41
33
|
);
|
|
42
34
|
}
|
|
43
35
|
|
|
44
|
-
const data:
|
|
36
|
+
const data: FullApiResponse = await response.json();
|
|
45
37
|
return data.data;
|
|
46
38
|
}
|
|
47
39
|
|
|
48
40
|
function compareModels(
|
|
49
|
-
apiModels:
|
|
50
|
-
hardcodedModels: typeof
|
|
41
|
+
apiModels: FullApiModel[],
|
|
42
|
+
hardcodedModels: typeof NEURALWATT_MODELS_CACHE,
|
|
51
43
|
): Discrepancy[] {
|
|
52
44
|
const discrepancies: Discrepancy[] = [];
|
|
53
45
|
|
|
@@ -73,10 +65,55 @@ function compareModels(
|
|
|
73
65
|
api: apiModel.max_model_len,
|
|
74
66
|
});
|
|
75
67
|
}
|
|
68
|
+
|
|
69
|
+
// Check metadata-driven fields if available
|
|
70
|
+
const meta = apiModel.metadata;
|
|
71
|
+
if (meta) {
|
|
72
|
+
// Check reasoning
|
|
73
|
+
if (meta.capabilities.reasoning !== hardcoded.reasoning) {
|
|
74
|
+
discrepancies.push({
|
|
75
|
+
model: hardcoded.id,
|
|
76
|
+
field: "reasoning",
|
|
77
|
+
hardcoded: hardcoded.reasoning,
|
|
78
|
+
api: meta.capabilities.reasoning,
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Check pricing
|
|
83
|
+
if (meta.pricing.input_per_million !== hardcoded.cost.input) {
|
|
84
|
+
discrepancies.push({
|
|
85
|
+
model: hardcoded.id,
|
|
86
|
+
field: "cost.input",
|
|
87
|
+
hardcoded: hardcoded.cost.input,
|
|
88
|
+
api: meta.pricing.input_per_million,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
if (meta.pricing.output_per_million !== hardcoded.cost.output) {
|
|
92
|
+
discrepancies.push({
|
|
93
|
+
model: hardcoded.id,
|
|
94
|
+
field: "cost.output",
|
|
95
|
+
hardcoded: hardcoded.cost.output,
|
|
96
|
+
api: meta.pricing.output_per_million,
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Check vision
|
|
101
|
+
const hasVision = hardcoded.input.includes("image");
|
|
102
|
+
if (meta.capabilities.vision !== hasVision) {
|
|
103
|
+
discrepancies.push({
|
|
104
|
+
model: hardcoded.id,
|
|
105
|
+
field: "input (vision)",
|
|
106
|
+
hardcoded: hasVision,
|
|
107
|
+
api: meta.capabilities.vision,
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
}
|
|
76
111
|
}
|
|
77
112
|
|
|
78
113
|
// Check for API models not in hardcoded list
|
|
79
114
|
for (const apiModel of apiModels) {
|
|
115
|
+
if (apiModel.metadata?.deprecated || apiModel.metadata?.pricing.pricing_tbd)
|
|
116
|
+
continue;
|
|
80
117
|
const hardcoded = hardcodedModels.find((m) => m.id === apiModel.id);
|
|
81
118
|
if (!hardcoded) {
|
|
82
119
|
discrepancies.push({
|
|
@@ -94,7 +131,7 @@ function compareModels(
|
|
|
94
131
|
describe("Neuralwatt models", () => {
|
|
95
132
|
it("should match API model definitions", { timeout: 30000 }, async () => {
|
|
96
133
|
const apiModels = await fetchApiModels();
|
|
97
|
-
const discrepancies = compareModels(apiModels,
|
|
134
|
+
const discrepancies = compareModels(apiModels, NEURALWATT_MODELS_CACHE);
|
|
98
135
|
|
|
99
136
|
if (discrepancies.length > 0) {
|
|
100
137
|
console.error("\nModel discrepancies found:");
|
|
@@ -117,4 +154,186 @@ describe("Neuralwatt models", () => {
|
|
|
117
154
|
|
|
118
155
|
expect(discrepancies).toHaveLength(0);
|
|
119
156
|
});
|
|
157
|
+
|
|
158
|
+
it("should map API models with metadata correctly", () => {
|
|
159
|
+
// Simulate a reasoning model with reasoning_effort support (like gpt-oss-20b)
|
|
160
|
+
const apiModelWithEffort: FullApiModel = {
|
|
161
|
+
id: "openai/gpt-oss-20b",
|
|
162
|
+
object: "model",
|
|
163
|
+
created: 1777467968,
|
|
164
|
+
owned_by: "vllm",
|
|
165
|
+
root: "openai/gpt-oss-20b",
|
|
166
|
+
parent: null,
|
|
167
|
+
max_model_len: 16384,
|
|
168
|
+
metadata: {
|
|
169
|
+
display_name: "GPT-OSS 20B",
|
|
170
|
+
description: "OpenAI GPT-OSS 20B",
|
|
171
|
+
provider: "OpenAI",
|
|
172
|
+
huggingface_id: null,
|
|
173
|
+
pricing: {
|
|
174
|
+
input_per_million: 0.03,
|
|
175
|
+
output_per_million: 0.16,
|
|
176
|
+
cached_input_per_million: null,
|
|
177
|
+
cached_output_per_million: null,
|
|
178
|
+
currency: "USD",
|
|
179
|
+
pricing_tbd: false,
|
|
180
|
+
},
|
|
181
|
+
capabilities: {
|
|
182
|
+
tools: true,
|
|
183
|
+
json_mode: true,
|
|
184
|
+
vision: false,
|
|
185
|
+
reasoning: true,
|
|
186
|
+
reasoning_effort: true,
|
|
187
|
+
streaming: true,
|
|
188
|
+
system_role: true,
|
|
189
|
+
developer_role: false,
|
|
190
|
+
},
|
|
191
|
+
limits: {
|
|
192
|
+
max_context_length: 16384,
|
|
193
|
+
max_output_tokens: 4096,
|
|
194
|
+
max_images: null,
|
|
195
|
+
},
|
|
196
|
+
deprecated: false,
|
|
197
|
+
deprecated_message: null,
|
|
198
|
+
},
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
const result = mapApiModel(apiModelWithEffort);
|
|
202
|
+
expect(result.id).toBe("openai/gpt-oss-20b");
|
|
203
|
+
expect(result.name).toBe("GPT-OSS 20B");
|
|
204
|
+
expect(result.reasoning).toBe(true);
|
|
205
|
+
expect(result.contextWindow).toBe(16384);
|
|
206
|
+
expect(result.maxTokens).toBe(4096);
|
|
207
|
+
expect(result.input).toEqual(["text"]);
|
|
208
|
+
expect(result.cost.input).toBe(0.03);
|
|
209
|
+
expect(result.cost.output).toBe(0.16);
|
|
210
|
+
expect(
|
|
211
|
+
(result.compat as Record<string, unknown>)?.supportsReasoningEffort,
|
|
212
|
+
).toBe(true);
|
|
213
|
+
expect(result.fast).toBeUndefined();
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
it("should map fast variants correctly", () => {
|
|
217
|
+
// Simulate a fast variant (owned by "neuralwatt")
|
|
218
|
+
const fastModel: FullApiModel = {
|
|
219
|
+
id: "qwen3.6-35b-fast",
|
|
220
|
+
object: "model",
|
|
221
|
+
created: 0,
|
|
222
|
+
owned_by: "neuralwatt",
|
|
223
|
+
max_model_len: 131072,
|
|
224
|
+
metadata: {
|
|
225
|
+
display_name: "Qwen3.6 35B Fast",
|
|
226
|
+
description: "Fast variant",
|
|
227
|
+
provider: "Qwen",
|
|
228
|
+
huggingface_id: null,
|
|
229
|
+
pricing: {
|
|
230
|
+
input_per_million: 0.05,
|
|
231
|
+
output_per_million: 0.1,
|
|
232
|
+
cached_input_per_million: null,
|
|
233
|
+
cached_output_per_million: null,
|
|
234
|
+
currency: "USD",
|
|
235
|
+
pricing_tbd: false,
|
|
236
|
+
},
|
|
237
|
+
capabilities: {
|
|
238
|
+
tools: true,
|
|
239
|
+
json_mode: true,
|
|
240
|
+
vision: false,
|
|
241
|
+
reasoning: false,
|
|
242
|
+
reasoning_effort: false,
|
|
243
|
+
streaming: true,
|
|
244
|
+
system_role: true,
|
|
245
|
+
developer_role: false,
|
|
246
|
+
},
|
|
247
|
+
limits: {
|
|
248
|
+
max_context_length: 131072,
|
|
249
|
+
max_output_tokens: null,
|
|
250
|
+
max_images: null,
|
|
251
|
+
},
|
|
252
|
+
deprecated: false,
|
|
253
|
+
deprecated_message: null,
|
|
254
|
+
},
|
|
255
|
+
};
|
|
256
|
+
|
|
257
|
+
const result = mapApiModel(fastModel);
|
|
258
|
+
expect(result.id).toBe("qwen3.6-35b-fast");
|
|
259
|
+
expect(result.fast).toBe(true);
|
|
260
|
+
expect(result.reasoning).toBe(false);
|
|
261
|
+
expect(
|
|
262
|
+
(result.compat as Record<string, unknown>)?.supportsReasoningEffort,
|
|
263
|
+
).toBeUndefined();
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
it("should map vision models correctly", () => {
|
|
267
|
+
const visionModel: FullApiModel = {
|
|
268
|
+
id: "moonshotai/Kimi-K2.6",
|
|
269
|
+
object: "model",
|
|
270
|
+
created: 1777467965,
|
|
271
|
+
owned_by: "vllm",
|
|
272
|
+
root: "moonshotai/Kimi-K2.6",
|
|
273
|
+
parent: null,
|
|
274
|
+
max_model_len: 262144,
|
|
275
|
+
metadata: {
|
|
276
|
+
display_name: "Kimi K2.6",
|
|
277
|
+
description: "Moonshot Kimi K2.6",
|
|
278
|
+
provider: "MoonshotAI",
|
|
279
|
+
huggingface_id: null,
|
|
280
|
+
pricing: {
|
|
281
|
+
input_per_million: 0.69,
|
|
282
|
+
output_per_million: 3.22,
|
|
283
|
+
cached_input_per_million: null,
|
|
284
|
+
cached_output_per_million: null,
|
|
285
|
+
currency: "USD",
|
|
286
|
+
pricing_tbd: false,
|
|
287
|
+
},
|
|
288
|
+
capabilities: {
|
|
289
|
+
tools: true,
|
|
290
|
+
json_mode: true,
|
|
291
|
+
vision: true,
|
|
292
|
+
reasoning: true,
|
|
293
|
+
reasoning_effort: false,
|
|
294
|
+
streaming: true,
|
|
295
|
+
system_role: true,
|
|
296
|
+
developer_role: false,
|
|
297
|
+
},
|
|
298
|
+
limits: {
|
|
299
|
+
max_context_length: 262144,
|
|
300
|
+
max_output_tokens: null,
|
|
301
|
+
max_images: 20,
|
|
302
|
+
},
|
|
303
|
+
deprecated: false,
|
|
304
|
+
deprecated_message: null,
|
|
305
|
+
},
|
|
306
|
+
};
|
|
307
|
+
|
|
308
|
+
const result = mapApiModel(visionModel);
|
|
309
|
+
expect(result.input).toEqual(["text", "image"]);
|
|
310
|
+
expect(result.reasoning).toBe(true);
|
|
311
|
+
expect(
|
|
312
|
+
(result.compat as Record<string, unknown>)?.supportsReasoningEffort,
|
|
313
|
+
).toBeUndefined();
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
it("should use defaults when metadata is missing", () => {
|
|
317
|
+
const bareModel: FullApiModel = {
|
|
318
|
+
id: "test/model",
|
|
319
|
+
object: "model",
|
|
320
|
+
created: 0,
|
|
321
|
+
owned_by: "vllm",
|
|
322
|
+
max_model_len: 8192,
|
|
323
|
+
};
|
|
324
|
+
|
|
325
|
+
const result = mapApiModel(bareModel);
|
|
326
|
+
expect(result.id).toBe("test/model");
|
|
327
|
+
expect(result.name).toBe("test/model");
|
|
328
|
+
expect(result.reasoning).toBe(false);
|
|
329
|
+
expect(result.contextWindow).toBe(8192);
|
|
330
|
+
expect(result.maxTokens).toBe(65536);
|
|
331
|
+
expect(result.input).toEqual(["text"]);
|
|
332
|
+
expect(result.cost.input).toBe(0);
|
|
333
|
+
expect(result.cost.output).toBe(0);
|
|
334
|
+
expect(result.fast).toBeUndefined();
|
|
335
|
+
expect(
|
|
336
|
+
(result.compat as Record<string, unknown>)?.supportsReasoningEffort,
|
|
337
|
+
).toBeUndefined();
|
|
338
|
+
});
|
|
120
339
|
});
|
|
@@ -18,7 +18,10 @@ const NEURALWATT_REASONING_EFFORT_MAP = {
|
|
|
18
18
|
xhigh: "high",
|
|
19
19
|
} as const;
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
/** Hardcoded model cache. Used as a fallback on startup before live models are fetched.
|
|
22
|
+
* Updated from https://api.neuralwatt.com/v1/models and https://portal.neuralwatt.com/pricing
|
|
23
|
+
*/
|
|
24
|
+
export const NEURALWATT_MODELS_CACHE: NeuralwattModelConfig[] = [
|
|
22
25
|
// Devstral Small 2 - Mistral
|
|
23
26
|
{
|
|
24
27
|
id: "mistralai/Devstral-Small-2-24B-Instruct-2512",
|
|
@@ -74,8 +77,6 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
74
77
|
maxTokens: 32768,
|
|
75
78
|
compat: {
|
|
76
79
|
supportsDeveloperRole: false,
|
|
77
|
-
supportsReasoningEffort: true,
|
|
78
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
79
80
|
maxTokensField: "max_tokens",
|
|
80
81
|
},
|
|
81
82
|
},
|
|
@@ -103,7 +104,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
103
104
|
{
|
|
104
105
|
id: "openai/gpt-oss-20b",
|
|
105
106
|
name: "GPT-OSS 20B",
|
|
106
|
-
reasoning:
|
|
107
|
+
reasoning: true,
|
|
107
108
|
input: ["text"],
|
|
108
109
|
cost: {
|
|
109
110
|
input: 0.03,
|
|
@@ -115,6 +116,8 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
115
116
|
maxTokens: 4096,
|
|
116
117
|
compat: {
|
|
117
118
|
supportsDeveloperRole: false,
|
|
119
|
+
supportsReasoningEffort: true,
|
|
120
|
+
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
118
121
|
maxTokensField: "max_tokens",
|
|
119
122
|
},
|
|
120
123
|
},
|
|
@@ -134,8 +137,6 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
134
137
|
maxTokens: 65536,
|
|
135
138
|
compat: {
|
|
136
139
|
supportsDeveloperRole: false,
|
|
137
|
-
supportsReasoningEffort: true,
|
|
138
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
139
140
|
maxTokensField: "max_tokens",
|
|
140
141
|
},
|
|
141
142
|
},
|
|
@@ -175,8 +176,6 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
175
176
|
maxTokens: 65536,
|
|
176
177
|
compat: {
|
|
177
178
|
supportsDeveloperRole: false,
|
|
178
|
-
supportsReasoningEffort: true,
|
|
179
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
180
179
|
maxTokensField: "max_tokens",
|
|
181
180
|
},
|
|
182
181
|
},
|
|
@@ -184,7 +183,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
184
183
|
{
|
|
185
184
|
id: "kimi-k2.6-fast",
|
|
186
185
|
name: "Kimi K2.6 Fast",
|
|
187
|
-
reasoning:
|
|
186
|
+
reasoning: false,
|
|
188
187
|
fast: true,
|
|
189
188
|
input: ["text", "image"],
|
|
190
189
|
cost: {
|
|
@@ -197,8 +196,6 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
197
196
|
maxTokens: 65536,
|
|
198
197
|
compat: {
|
|
199
198
|
supportsDeveloperRole: false,
|
|
200
|
-
supportsReasoningEffort: true,
|
|
201
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
202
199
|
maxTokensField: "max_tokens",
|
|
203
200
|
},
|
|
204
201
|
},
|
|
@@ -218,8 +215,6 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
218
215
|
maxTokens: 65536,
|
|
219
216
|
compat: {
|
|
220
217
|
supportsDeveloperRole: false,
|
|
221
|
-
supportsReasoningEffort: true,
|
|
222
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
223
218
|
maxTokensField: "max_tokens",
|
|
224
219
|
},
|
|
225
220
|
},
|
|
@@ -239,8 +234,6 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
239
234
|
maxTokens: 65536,
|
|
240
235
|
compat: {
|
|
241
236
|
supportsDeveloperRole: false,
|
|
242
|
-
supportsReasoningEffort: true,
|
|
243
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
244
237
|
maxTokensField: "max_tokens",
|
|
245
238
|
},
|
|
246
239
|
},
|
|
@@ -280,8 +273,6 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
280
273
|
maxTokens: 32768,
|
|
281
274
|
compat: {
|
|
282
275
|
supportsDeveloperRole: false,
|
|
283
|
-
supportsReasoningEffort: true,
|
|
284
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
285
276
|
maxTokensField: "max_tokens",
|
|
286
277
|
},
|
|
287
278
|
},
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import type { NeuralwattModelConfig } from "../extensions/provider/models";
|
|
2
|
+
|
|
3
|
+
const FETCH_TIMEOUT_MS = 15_000;
|
|
4
|
+
|
|
5
|
+
export interface ApiModelMetadata {
|
|
6
|
+
display_name: string;
|
|
7
|
+
description: string | null;
|
|
8
|
+
provider: string;
|
|
9
|
+
huggingface_id: string | null;
|
|
10
|
+
pricing: {
|
|
11
|
+
input_per_million: number;
|
|
12
|
+
output_per_million: number;
|
|
13
|
+
cached_input_per_million: number | null;
|
|
14
|
+
cached_output_per_million: number | null;
|
|
15
|
+
currency: string;
|
|
16
|
+
pricing_tbd: boolean;
|
|
17
|
+
};
|
|
18
|
+
capabilities: {
|
|
19
|
+
tools: boolean;
|
|
20
|
+
json_mode: boolean;
|
|
21
|
+
vision: boolean;
|
|
22
|
+
reasoning: boolean;
|
|
23
|
+
reasoning_effort: boolean;
|
|
24
|
+
streaming: boolean;
|
|
25
|
+
system_role: boolean;
|
|
26
|
+
developer_role: boolean;
|
|
27
|
+
};
|
|
28
|
+
limits: {
|
|
29
|
+
max_context_length: number;
|
|
30
|
+
max_output_tokens: number | null;
|
|
31
|
+
max_images: number | null;
|
|
32
|
+
};
|
|
33
|
+
deprecated: boolean;
|
|
34
|
+
deprecated_message: string | null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface ApiModel {
|
|
38
|
+
id: string;
|
|
39
|
+
object: string;
|
|
40
|
+
created: number;
|
|
41
|
+
owned_by: string;
|
|
42
|
+
root?: string;
|
|
43
|
+
parent?: string | null;
|
|
44
|
+
max_model_len: number;
|
|
45
|
+
metadata?: ApiModelMetadata;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export interface ApiResponse {
|
|
49
|
+
object: "list";
|
|
50
|
+
data: ApiModel[];
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const NEURALWATT_REASONING_EFFORT_MAP = {
|
|
54
|
+
minimal: "low",
|
|
55
|
+
low: "low",
|
|
56
|
+
medium: "medium",
|
|
57
|
+
high: "high",
|
|
58
|
+
xhigh: "high",
|
|
59
|
+
} as const;
|
|
60
|
+
|
|
61
|
+
/** Identify fast variants by their owned_by field or naming convention. */
|
|
62
|
+
function isFastModel(model: ApiModel): boolean {
|
|
63
|
+
if (model.owned_by === "neuralwatt") return true;
|
|
64
|
+
return model.id.endsWith("-fast");
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/** Map API model data to NeuralwattModelConfig. */
|
|
68
|
+
export function mapApiModel(model: ApiModel): NeuralwattModelConfig {
|
|
69
|
+
const meta = model.metadata;
|
|
70
|
+
const fast = isFastModel(model);
|
|
71
|
+
|
|
72
|
+
// Base fields from top-level API data
|
|
73
|
+
const result: NeuralwattModelConfig = {
|
|
74
|
+
id: model.id,
|
|
75
|
+
name: meta?.display_name ?? model.id,
|
|
76
|
+
reasoning: meta?.capabilities.reasoning ?? false,
|
|
77
|
+
contextWindow: model.max_model_len,
|
|
78
|
+
maxTokens: 65536, // sensible default
|
|
79
|
+
cost: {
|
|
80
|
+
input: meta?.pricing.input_per_million ?? 0,
|
|
81
|
+
output: meta?.pricing.output_per_million ?? 0,
|
|
82
|
+
cacheRead: meta?.pricing.cached_input_per_million ?? 0,
|
|
83
|
+
cacheWrite: meta?.pricing.cached_output_per_million ?? 0,
|
|
84
|
+
},
|
|
85
|
+
input: meta?.capabilities.vision ? ["text", "image"] : ["text"],
|
|
86
|
+
compat: {
|
|
87
|
+
supportsDeveloperRole: false,
|
|
88
|
+
maxTokensField: "max_tokens",
|
|
89
|
+
},
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
if (fast) {
|
|
93
|
+
result.fast = true;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Override maxTokens from limits if available
|
|
97
|
+
if (meta?.limits.max_output_tokens) {
|
|
98
|
+
result.maxTokens = meta.limits.max_output_tokens;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Reasoning effort support
|
|
102
|
+
if (meta?.capabilities.reasoning_effort) {
|
|
103
|
+
result.compat = {
|
|
104
|
+
...result.compat,
|
|
105
|
+
supportsReasoningEffort: true,
|
|
106
|
+
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return result;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export type FetchModelsResult =
|
|
114
|
+
| { success: true; models: NeuralwattModelConfig[] }
|
|
115
|
+
| {
|
|
116
|
+
success: false;
|
|
117
|
+
error: { message: string; kind: "timeout" | "network" | "cancelled" };
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Fetch live model definitions from the Neuralwatt /v1/models endpoint.
|
|
122
|
+
*
|
|
123
|
+
* When the API returns metadata (pricing, capabilities, limits), those values
|
|
124
|
+
* are used directly. Fields not exposed by the API fall back to sensible
|
|
125
|
+
* defaults.
|
|
126
|
+
*/
|
|
127
|
+
export async function fetchModels(
|
|
128
|
+
signal?: AbortSignal,
|
|
129
|
+
): Promise<FetchModelsResult> {
|
|
130
|
+
const signals: AbortSignal[] = [AbortSignal.timeout(FETCH_TIMEOUT_MS)];
|
|
131
|
+
if (signal) signals.push(signal);
|
|
132
|
+
const combined = AbortSignal.any(signals);
|
|
133
|
+
|
|
134
|
+
try {
|
|
135
|
+
const response = await fetch("https://api.neuralwatt.com/v1/models", {
|
|
136
|
+
headers: {
|
|
137
|
+
Referer: "https://pi.dev",
|
|
138
|
+
"X-Title": "npm:@aliou/pi-neuralwatt",
|
|
139
|
+
},
|
|
140
|
+
signal: combined,
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
if (!response.ok) {
|
|
144
|
+
return {
|
|
145
|
+
success: false,
|
|
146
|
+
error: {
|
|
147
|
+
message: `Failed to fetch models: ${response.status} ${response.statusText}`,
|
|
148
|
+
kind: "network",
|
|
149
|
+
},
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const data: ApiResponse = await response.json();
|
|
154
|
+
|
|
155
|
+
// Filter out deprecated models
|
|
156
|
+
const active = data.data.filter(
|
|
157
|
+
(m) => !m.metadata?.deprecated && !m.metadata?.pricing.pricing_tbd,
|
|
158
|
+
);
|
|
159
|
+
|
|
160
|
+
const models = active.map(mapApiModel);
|
|
161
|
+
return { success: true, models };
|
|
162
|
+
} catch (err: unknown) {
|
|
163
|
+
if (err instanceof DOMException && err.name === "AbortError") {
|
|
164
|
+
if (
|
|
165
|
+
combined.reason instanceof DOMException &&
|
|
166
|
+
combined.reason.name === "TimeoutError"
|
|
167
|
+
) {
|
|
168
|
+
return {
|
|
169
|
+
success: false,
|
|
170
|
+
error: { message: "Fetch models timed out", kind: "timeout" },
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
return {
|
|
174
|
+
success: false,
|
|
175
|
+
error: { message: "Fetch models cancelled", kind: "cancelled" },
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
const message = err instanceof Error ? err.message : "Unknown error";
|
|
179
|
+
return { success: false, error: { message, kind: "network" } };
|
|
180
|
+
}
|
|
181
|
+
}
|