@aliou/pi-neuralwatt 0.1.2 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/package.json +10 -10
- package/src/config.ts +2 -2
- package/src/extensions/command-quotas/command.ts +4 -1
- package/src/extensions/command-quotas/components/quota-tabs.ts +2 -2
- package/src/extensions/command-quotas/components/quotas-display.ts +4 -4
- package/src/extensions/command-quotas/index.ts +1 -1
- package/src/extensions/provider/context-overflow.ts +31 -0
- package/src/extensions/provider/index.ts +61 -66
- package/src/extensions/provider/models.test.ts +246 -19
- package/src/extensions/provider/models.ts +41 -39
- package/src/extensions/provider/provider-payload.ts +12 -0
- package/src/extensions/provider/quota-store.ts +57 -0
- package/src/extensions/quota-warnings/index.ts +1 -1
- package/src/extensions/quota-warnings/notifier.ts +1 -1
- package/src/extensions/sub-bar-integration/index.ts +1 -1
- package/src/lib/env.ts +1 -1
- package/src/lib/fetch-models.ts +187 -0
- package/src/utils/is-offline.test.ts +60 -0
- package/src/utils/is-offline.ts +4 -0
- package/src/utils/quota-bar.ts +1 -1
package/README.md
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aliou/pi-neuralwatt",
|
|
3
|
-
"version": "0.1
|
|
3
|
+
"version": "0.4.1",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"private": false,
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"./src/extensions/quota-warnings/index.ts",
|
|
21
21
|
"./src/extensions/sub-bar-integration/index.ts"
|
|
22
22
|
],
|
|
23
|
-
"video": "https://assets.aliou.me/
|
|
23
|
+
"video": "https://assets.aliou.me/github/aliou/pi-neuralwatt/demo.mp4"
|
|
24
24
|
},
|
|
25
25
|
"publishConfig": {
|
|
26
26
|
"access": "public"
|
|
@@ -31,20 +31,20 @@
|
|
|
31
31
|
"README.md"
|
|
32
32
|
],
|
|
33
33
|
"dependencies": {
|
|
34
|
-
"@aliou/pi-utils-settings": "^0.
|
|
35
|
-
"@aliou/pi-utils-ui": "^0.
|
|
34
|
+
"@aliou/pi-utils-settings": "^0.15.0",
|
|
35
|
+
"@aliou/pi-utils-ui": "^0.4.0"
|
|
36
36
|
},
|
|
37
37
|
"peerDependencies": {
|
|
38
|
-
"@
|
|
39
|
-
"@
|
|
38
|
+
"@earendil-works/pi-coding-agent": "0.74.0",
|
|
39
|
+
"@earendil-works/pi-tui": "0.74.0",
|
|
40
40
|
"@sinclair/typebox": ">=0.34.0"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
43
|
"@aliou/biome-plugins": "^0.8.1",
|
|
44
44
|
"@biomejs/biome": "^2.4.12",
|
|
45
45
|
"@changesets/cli": "^2.27.11",
|
|
46
|
-
"@
|
|
47
|
-
"@
|
|
46
|
+
"@earendil-works/pi-coding-agent": "0.74.0",
|
|
47
|
+
"@earendil-works/pi-tui": "0.74.0",
|
|
48
48
|
"@types/node": "^25.0.10",
|
|
49
49
|
"husky": "^9.1.7",
|
|
50
50
|
"ts-json-schema-generator": "^2.4.0",
|
|
@@ -52,10 +52,10 @@
|
|
|
52
52
|
"vitest": "^4.0.18"
|
|
53
53
|
},
|
|
54
54
|
"peerDependenciesMeta": {
|
|
55
|
-
"@
|
|
55
|
+
"@earendil-works/pi-coding-agent": {
|
|
56
56
|
"optional": true
|
|
57
57
|
},
|
|
58
|
-
"@
|
|
58
|
+
"@earendil-works/pi-tui": {
|
|
59
59
|
"optional": true
|
|
60
60
|
},
|
|
61
61
|
"@sinclair/typebox": {
|
package/src/config.ts
CHANGED
|
@@ -3,8 +3,8 @@ import {
|
|
|
3
3
|
registerSettingsCommand,
|
|
4
4
|
type SettingsSection,
|
|
5
5
|
} from "@aliou/pi-utils-settings";
|
|
6
|
-
import type { ExtensionAPI } from "@
|
|
7
|
-
import type { SettingItem } from "@
|
|
6
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
7
|
+
import type { SettingItem } from "@earendil-works/pi-tui";
|
|
8
8
|
|
|
9
9
|
export type NeuralwattFeatureId =
|
|
10
10
|
| "quotaCommand"
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import { join } from "node:path";
|
|
2
|
-
import {
|
|
2
|
+
import {
|
|
3
|
+
type ExtensionAPI,
|
|
4
|
+
getAgentDir,
|
|
5
|
+
} from "@earendil-works/pi-coding-agent";
|
|
3
6
|
import { getNeuralwattApiKey } from "../../lib/env";
|
|
4
7
|
import { fetchQuotas } from "../../utils/quotas";
|
|
5
8
|
import { QuotasComponent } from "./components/quotas-display";
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import type { Theme } from "@
|
|
2
|
-
import { truncateToWidth } from "@
|
|
1
|
+
import type { Theme } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
import { truncateToWidth } from "@earendil-works/pi-tui";
|
|
3
3
|
import type { NeuralwattQuotas } from "../../../types/quota-api";
|
|
4
4
|
import {
|
|
5
5
|
percentCreditsRemaining,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import type { Theme } from "@
|
|
2
|
-
import { DynamicBorder } from "@
|
|
3
|
-
import type { Component, TUI } from "@
|
|
4
|
-
import { Loader, matchesKey, truncateToWidth } from "@
|
|
1
|
+
import type { Theme } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
import { DynamicBorder } from "@earendil-works/pi-coding-agent";
|
|
3
|
+
import type { Component, TUI } from "@earendil-works/pi-tui";
|
|
4
|
+
import { Loader, matchesKey, truncateToWidth } from "@earendil-works/pi-tui";
|
|
5
5
|
import type { NeuralwattQuotas } from "../../../types/quota-api";
|
|
6
6
|
import {
|
|
7
7
|
renderCreditsTab,
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
interface AssistantErrorLike {
|
|
2
|
+
role: string;
|
|
3
|
+
stopReason?: string;
|
|
4
|
+
provider?: string;
|
|
5
|
+
errorMessage?: string;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
const NEURALWATT_CONTEXT_OVERFLOW_PATTERN =
|
|
9
|
+
/request exceeds model'?s maximum context length/i;
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Normalize Neuralwatt context overflow errors so Pi's native overflow
|
|
13
|
+
* compaction path can detect them and perform compact-and-retry.
|
|
14
|
+
*/
|
|
15
|
+
export function normalizeNeuralwattContextOverflowError<
|
|
16
|
+
TMessage extends AssistantErrorLike,
|
|
17
|
+
>(message: TMessage, currentProvider?: string): TMessage | undefined {
|
|
18
|
+
if (message.role !== "assistant") return;
|
|
19
|
+
if (message.stopReason !== "error") return;
|
|
20
|
+
if (message.provider !== "neuralwatt" && currentProvider !== "neuralwatt")
|
|
21
|
+
return;
|
|
22
|
+
|
|
23
|
+
const errorMessage = message.errorMessage ?? "";
|
|
24
|
+
if (errorMessage.includes("context_length_exceeded")) return;
|
|
25
|
+
if (!NEURALWATT_CONTEXT_OVERFLOW_PATTERN.test(errorMessage)) return;
|
|
26
|
+
|
|
27
|
+
return {
|
|
28
|
+
...message,
|
|
29
|
+
errorMessage: `context_length_exceeded: ${errorMessage}`,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
2
2
|
import {
|
|
3
3
|
configLoader,
|
|
4
4
|
emitConfigUpdated,
|
|
@@ -8,17 +8,27 @@ import {
|
|
|
8
8
|
registerNeuralwattSettings,
|
|
9
9
|
} from "../../config";
|
|
10
10
|
import { getNeuralwattApiKey } from "../../lib/env";
|
|
11
|
+
import { fetchModels } from "../../lib/fetch-models";
|
|
11
12
|
import type { NeuralwattQuotas } from "../../types/quota-api";
|
|
12
13
|
import {
|
|
13
14
|
NEURALWATT_QUOTAS_REQUEST_EVENT,
|
|
14
15
|
NEURALWATT_QUOTAS_UPDATED_EVENT,
|
|
15
16
|
type NeuralwattQuotasUpdatedPayload,
|
|
16
|
-
parseQuotaHeaders,
|
|
17
17
|
} from "../../types/quota-events";
|
|
18
|
+
import { isOffline } from "../../utils/is-offline";
|
|
18
19
|
import { fetchQuotas } from "../../utils/quotas";
|
|
19
|
-
import {
|
|
20
|
+
import { normalizeNeuralwattContextOverflowError } from "./context-overflow";
|
|
21
|
+
import type { NeuralwattModelConfig } from "./models";
|
|
22
|
+
import { NEURALWATT_MODELS_CACHE } from "./models";
|
|
23
|
+
import { buildModelsPayload } from "./provider-payload";
|
|
24
|
+
import { buildQuotasFromHeaders, fetchRequestedQuotas } from "./quota-store";
|
|
20
25
|
|
|
21
|
-
|
|
26
|
+
const HEADER_EMIT_THROTTLE_MS = 5_000;
|
|
27
|
+
|
|
28
|
+
function registerNeuralwattProvider(
|
|
29
|
+
pi: ExtensionAPI,
|
|
30
|
+
models: NeuralwattModelConfig[],
|
|
31
|
+
): void {
|
|
22
32
|
pi.registerProvider("neuralwatt", {
|
|
23
33
|
baseUrl: "https://api.neuralwatt.com/v1",
|
|
24
34
|
apiKey: "NEURALWATT_API_KEY",
|
|
@@ -28,32 +38,25 @@ export function registerNeuralwattProvider(pi: ExtensionAPI): void {
|
|
|
28
38
|
Referer: "https://pi.dev",
|
|
29
39
|
"X-Title": "npm:@aliou/pi-neuralwatt",
|
|
30
40
|
},
|
|
31
|
-
models:
|
|
32
|
-
...model,
|
|
33
|
-
compat: {
|
|
34
|
-
supportsDeveloperRole: false,
|
|
35
|
-
maxTokensField: "max_tokens",
|
|
36
|
-
...model.compat,
|
|
37
|
-
},
|
|
38
|
-
})),
|
|
41
|
+
models: buildModelsPayload(models),
|
|
39
42
|
});
|
|
40
43
|
}
|
|
41
44
|
|
|
42
45
|
export default async function (pi: ExtensionAPI) {
|
|
43
46
|
await configLoader.load();
|
|
44
|
-
registerNeuralwattProvider(pi);
|
|
45
47
|
|
|
46
|
-
//
|
|
48
|
+
// Register with hardcoded cache immediately so models are available on startup
|
|
49
|
+
registerNeuralwattProvider(pi, NEURALWATT_MODELS_CACHE);
|
|
50
|
+
|
|
47
51
|
const loadedFeatures = new Set<NeuralwattFeatureId>();
|
|
48
52
|
|
|
49
|
-
// Register settings
|
|
53
|
+
// Register settings in the provider so it is always available.
|
|
50
54
|
registerNeuralwattSettings(pi, {
|
|
51
55
|
getLoadedFeatures: () => loadedFeatures,
|
|
52
56
|
});
|
|
53
57
|
|
|
54
|
-
// --- Quota store (event-based) ---
|
|
55
58
|
let lastHeaderEmitAt = 0;
|
|
56
|
-
|
|
59
|
+
let quotaRequestInFlight = false;
|
|
57
60
|
|
|
58
61
|
function emitQuotas(
|
|
59
62
|
quotas: NeuralwattQuotas,
|
|
@@ -66,81 +69,73 @@ export default async function (pi: ExtensionAPI) {
|
|
|
66
69
|
pi.events.emit(NEURALWATT_QUOTAS_UPDATED_EVENT, { quotas, source });
|
|
67
70
|
}
|
|
68
71
|
|
|
69
|
-
|
|
72
|
+
pi.on("message_end", (event, ctx) => {
|
|
73
|
+
const message = normalizeNeuralwattContextOverflowError(
|
|
74
|
+
event.message,
|
|
75
|
+
ctx.model?.provider,
|
|
76
|
+
);
|
|
77
|
+
if (!message) return;
|
|
78
|
+
return { message };
|
|
79
|
+
});
|
|
80
|
+
|
|
70
81
|
pi.on("after_provider_response", (event, ctx) => {
|
|
71
82
|
if (ctx.model?.provider !== "neuralwatt") return;
|
|
72
|
-
const
|
|
73
|
-
if (!
|
|
74
|
-
|
|
75
|
-
const quotas: NeuralwattQuotas = {
|
|
76
|
-
snapshot_at: new Date().toISOString(),
|
|
77
|
-
balance: {
|
|
78
|
-
credits_remaining_usd: headerQuotas.allowanceRemainingUsd,
|
|
79
|
-
total_credits_usd: 0,
|
|
80
|
-
credits_used_usd: 0,
|
|
81
|
-
accounting_method: "token",
|
|
82
|
-
},
|
|
83
|
-
usage: {
|
|
84
|
-
lifetime: { cost_usd: 0, requests: 0, tokens: 0, energy_kwh: 0 },
|
|
85
|
-
current_month: { cost_usd: 0, requests: 0, tokens: 0, energy_kwh: 0 },
|
|
86
|
-
},
|
|
87
|
-
limits: { overage_limit_usd: null, rate_limit_tier: "standard" },
|
|
88
|
-
subscription:
|
|
89
|
-
headerQuotas.subscriptionPlan !== "none" &&
|
|
90
|
-
headerQuotas.energyRemaining !== undefined
|
|
91
|
-
? {
|
|
92
|
-
plan: headerQuotas.subscriptionPlan,
|
|
93
|
-
status: "active",
|
|
94
|
-
billing_interval: "month",
|
|
95
|
-
current_period_start: "",
|
|
96
|
-
current_period_end: "",
|
|
97
|
-
auto_renew: false,
|
|
98
|
-
kwh_included: headerQuotas.energyIncluded ?? 0,
|
|
99
|
-
kwh_used: headerQuotas.energyUsed ?? 0,
|
|
100
|
-
kwh_remaining: headerQuotas.energyRemaining,
|
|
101
|
-
in_overage: false,
|
|
102
|
-
}
|
|
103
|
-
: null,
|
|
104
|
-
key: { name: "", allowance: null },
|
|
105
|
-
};
|
|
106
|
-
|
|
83
|
+
const quotas = buildQuotasFromHeaders(event.headers);
|
|
84
|
+
if (!quotas) return;
|
|
107
85
|
emitQuotas(quotas, "header");
|
|
108
86
|
});
|
|
109
87
|
|
|
110
|
-
// Respond to quota requests from other extensions
|
|
111
|
-
let quotaRequestInFlight = false;
|
|
112
88
|
pi.events.on(NEURALWATT_QUOTAS_REQUEST_EVENT, async (data: unknown) => {
|
|
113
89
|
if (quotaRequestInFlight) return;
|
|
114
90
|
quotaRequestInFlight = true;
|
|
115
91
|
try {
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
if (result.success) emitQuotas(result.data.quotas, "api");
|
|
92
|
+
const quotas = await fetchRequestedQuotas(data);
|
|
93
|
+
if (quotas)
|
|
94
|
+
pi.events.emit(NEURALWATT_QUOTAS_UPDATED_EVENT, {
|
|
95
|
+
quotas,
|
|
96
|
+
source: "api",
|
|
97
|
+
});
|
|
123
98
|
} finally {
|
|
124
99
|
quotaRequestInFlight = false;
|
|
125
100
|
}
|
|
126
101
|
});
|
|
127
102
|
|
|
128
|
-
// Collect which feature extensions are loaded
|
|
129
103
|
pi.events.on(NEURALWATT_EXTENSIONS_REGISTER_EVENT, (data: unknown) => {
|
|
130
104
|
const { feature } = data as { feature: NeuralwattFeatureId };
|
|
131
105
|
loadedFeatures.add(feature);
|
|
132
106
|
});
|
|
133
107
|
|
|
134
|
-
// On session start: request extensions to register, then emit config
|
|
135
108
|
pi.on("session_start", async (_event, ctx) => {
|
|
136
109
|
loadedFeatures.clear();
|
|
137
110
|
pi.events.emit(NEURALWATT_EXTENSIONS_REQUEST_EVENT, undefined);
|
|
138
111
|
emitConfigUpdated(pi);
|
|
139
112
|
|
|
113
|
+
if (!isOffline()) {
|
|
114
|
+
const result = await fetchModels();
|
|
115
|
+
if (result.success) {
|
|
116
|
+
const cacheIds = new Set(NEURALWATT_MODELS_CACHE.map((m) => m.id));
|
|
117
|
+
const liveIds = new Set(result.models.map((m) => m.id));
|
|
118
|
+
const added = result.models.filter((m) => !cacheIds.has(m.id));
|
|
119
|
+
const removed = NEURALWATT_MODELS_CACHE.filter(
|
|
120
|
+
(m) => !liveIds.has(m.id),
|
|
121
|
+
);
|
|
122
|
+
if (added.length > 0 || removed.length > 0) {
|
|
123
|
+
const parts: string[] = [];
|
|
124
|
+
if (added.length > 0) parts.push(`${added.length} new`);
|
|
125
|
+
if (removed.length > 0) parts.push(`${removed.length} removed`);
|
|
126
|
+
ctx.ui.notify(
|
|
127
|
+
`Neuralwatt models updated (${parts.join(", ")})`,
|
|
128
|
+
"info",
|
|
129
|
+
);
|
|
130
|
+
}
|
|
131
|
+
registerNeuralwattProvider(pi, result.models);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
140
135
|
if (ctx.model?.provider !== "neuralwatt") return;
|
|
141
136
|
const apiKey = await getNeuralwattApiKey(ctx.modelRegistry.authStorage);
|
|
142
137
|
if (!apiKey) return;
|
|
143
|
-
const
|
|
144
|
-
if (
|
|
138
|
+
const quotaResult = await fetchQuotas(apiKey);
|
|
139
|
+
if (quotaResult.success) emitQuotas(quotaResult.data.quotas, "api");
|
|
145
140
|
});
|
|
146
141
|
}
|
|
@@ -1,18 +1,10 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
owned_by: string;
|
|
9
|
-
max_model_len: number;
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
interface ApiResponse {
|
|
13
|
-
object: "list";
|
|
14
|
-
data: ApiModel[];
|
|
15
|
-
}
|
|
2
|
+
import type {
|
|
3
|
+
ApiModel as FullApiModel,
|
|
4
|
+
ApiResponse as FullApiResponse,
|
|
5
|
+
} from "../../lib/fetch-models";
|
|
6
|
+
import { mapApiModel } from "../../lib/fetch-models";
|
|
7
|
+
import { NEURALWATT_MODELS_CACHE } from "./models";
|
|
16
8
|
|
|
17
9
|
interface Discrepancy {
|
|
18
10
|
model: string;
|
|
@@ -21,7 +13,7 @@ interface Discrepancy {
|
|
|
21
13
|
api: unknown;
|
|
22
14
|
}
|
|
23
15
|
|
|
24
|
-
async function fetchApiModels(): Promise<
|
|
16
|
+
async function fetchApiModels(): Promise<FullApiModel[]> {
|
|
25
17
|
const apiKey = process.env.NEURALWATT_API_KEY;
|
|
26
18
|
const headers: Record<string, string> = {
|
|
27
19
|
"Content-Type": "application/json",
|
|
@@ -41,13 +33,13 @@ async function fetchApiModels(): Promise<ApiModel[]> {
|
|
|
41
33
|
);
|
|
42
34
|
}
|
|
43
35
|
|
|
44
|
-
const data:
|
|
36
|
+
const data: FullApiResponse = await response.json();
|
|
45
37
|
return data.data;
|
|
46
38
|
}
|
|
47
39
|
|
|
48
40
|
function compareModels(
|
|
49
|
-
apiModels:
|
|
50
|
-
hardcodedModels: typeof
|
|
41
|
+
apiModels: FullApiModel[],
|
|
42
|
+
hardcodedModels: typeof NEURALWATT_MODELS_CACHE,
|
|
51
43
|
): Discrepancy[] {
|
|
52
44
|
const discrepancies: Discrepancy[] = [];
|
|
53
45
|
|
|
@@ -73,10 +65,55 @@ function compareModels(
|
|
|
73
65
|
api: apiModel.max_model_len,
|
|
74
66
|
});
|
|
75
67
|
}
|
|
68
|
+
|
|
69
|
+
// Check metadata-driven fields if available
|
|
70
|
+
const meta = apiModel.metadata;
|
|
71
|
+
if (meta) {
|
|
72
|
+
// Check reasoning
|
|
73
|
+
if (meta.capabilities.reasoning !== hardcoded.reasoning) {
|
|
74
|
+
discrepancies.push({
|
|
75
|
+
model: hardcoded.id,
|
|
76
|
+
field: "reasoning",
|
|
77
|
+
hardcoded: hardcoded.reasoning,
|
|
78
|
+
api: meta.capabilities.reasoning,
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Check pricing
|
|
83
|
+
if (meta.pricing.input_per_million !== hardcoded.cost.input) {
|
|
84
|
+
discrepancies.push({
|
|
85
|
+
model: hardcoded.id,
|
|
86
|
+
field: "cost.input",
|
|
87
|
+
hardcoded: hardcoded.cost.input,
|
|
88
|
+
api: meta.pricing.input_per_million,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
if (meta.pricing.output_per_million !== hardcoded.cost.output) {
|
|
92
|
+
discrepancies.push({
|
|
93
|
+
model: hardcoded.id,
|
|
94
|
+
field: "cost.output",
|
|
95
|
+
hardcoded: hardcoded.cost.output,
|
|
96
|
+
api: meta.pricing.output_per_million,
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Check vision
|
|
101
|
+
const hasVision = hardcoded.input.includes("image");
|
|
102
|
+
if (meta.capabilities.vision !== hasVision) {
|
|
103
|
+
discrepancies.push({
|
|
104
|
+
model: hardcoded.id,
|
|
105
|
+
field: "input (vision)",
|
|
106
|
+
hardcoded: hasVision,
|
|
107
|
+
api: meta.capabilities.vision,
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
}
|
|
76
111
|
}
|
|
77
112
|
|
|
78
113
|
// Check for API models not in hardcoded list
|
|
79
114
|
for (const apiModel of apiModels) {
|
|
115
|
+
if (apiModel.metadata?.deprecated || apiModel.metadata?.pricing.pricing_tbd)
|
|
116
|
+
continue;
|
|
80
117
|
const hardcoded = hardcodedModels.find((m) => m.id === apiModel.id);
|
|
81
118
|
if (!hardcoded) {
|
|
82
119
|
discrepancies.push({
|
|
@@ -94,7 +131,7 @@ function compareModels(
|
|
|
94
131
|
describe("Neuralwatt models", () => {
|
|
95
132
|
it("should match API model definitions", { timeout: 30000 }, async () => {
|
|
96
133
|
const apiModels = await fetchApiModels();
|
|
97
|
-
const discrepancies = compareModels(apiModels,
|
|
134
|
+
const discrepancies = compareModels(apiModels, NEURALWATT_MODELS_CACHE);
|
|
98
135
|
|
|
99
136
|
if (discrepancies.length > 0) {
|
|
100
137
|
console.error("\nModel discrepancies found:");
|
|
@@ -117,4 +154,194 @@ describe("Neuralwatt models", () => {
|
|
|
117
154
|
|
|
118
155
|
expect(discrepancies).toHaveLength(0);
|
|
119
156
|
});
|
|
157
|
+
|
|
158
|
+
it("should map API models with metadata correctly", () => {
|
|
159
|
+
// Simulate a reasoning model with reasoning_effort support (like gpt-oss-20b)
|
|
160
|
+
const apiModelWithEffort: FullApiModel = {
|
|
161
|
+
id: "openai/gpt-oss-20b",
|
|
162
|
+
object: "model",
|
|
163
|
+
created: 1777467968,
|
|
164
|
+
owned_by: "vllm",
|
|
165
|
+
root: "openai/gpt-oss-20b",
|
|
166
|
+
parent: null,
|
|
167
|
+
max_model_len: 16384,
|
|
168
|
+
metadata: {
|
|
169
|
+
display_name: "GPT-OSS 20B",
|
|
170
|
+
description: "OpenAI GPT-OSS 20B",
|
|
171
|
+
provider: "OpenAI",
|
|
172
|
+
huggingface_id: null,
|
|
173
|
+
pricing: {
|
|
174
|
+
input_per_million: 0.03,
|
|
175
|
+
output_per_million: 0.16,
|
|
176
|
+
cached_input_per_million: null,
|
|
177
|
+
cached_output_per_million: null,
|
|
178
|
+
currency: "USD",
|
|
179
|
+
pricing_tbd: false,
|
|
180
|
+
},
|
|
181
|
+
capabilities: {
|
|
182
|
+
tools: true,
|
|
183
|
+
json_mode: true,
|
|
184
|
+
vision: false,
|
|
185
|
+
reasoning: true,
|
|
186
|
+
reasoning_effort: true,
|
|
187
|
+
streaming: true,
|
|
188
|
+
system_role: true,
|
|
189
|
+
developer_role: false,
|
|
190
|
+
},
|
|
191
|
+
limits: {
|
|
192
|
+
max_context_length: 16384,
|
|
193
|
+
max_output_tokens: 4096,
|
|
194
|
+
max_images: null,
|
|
195
|
+
},
|
|
196
|
+
deprecated: false,
|
|
197
|
+
deprecated_message: null,
|
|
198
|
+
},
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
const result = mapApiModel(apiModelWithEffort);
|
|
202
|
+
expect(result.id).toBe("openai/gpt-oss-20b");
|
|
203
|
+
expect(result.name).toBe("GPT-OSS 20B");
|
|
204
|
+
expect(result.reasoning).toBe(true);
|
|
205
|
+
expect(result.contextWindow).toBe(16384);
|
|
206
|
+
expect(result.maxTokens).toBe(4096);
|
|
207
|
+
expect(result.input).toEqual(["text"]);
|
|
208
|
+
expect(result.cost.input).toBe(0.03);
|
|
209
|
+
expect(result.cost.output).toBe(0.16);
|
|
210
|
+
expect(result.thinkingLevelMap).toEqual({
|
|
211
|
+
minimal: "low",
|
|
212
|
+
low: "low",
|
|
213
|
+
medium: "medium",
|
|
214
|
+
high: "high",
|
|
215
|
+
xhigh: null,
|
|
216
|
+
});
|
|
217
|
+
expect(result.fast).toBeUndefined();
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
it("should map fast variants correctly", () => {
|
|
221
|
+
// Simulate a fast variant (owned by "neuralwatt")
|
|
222
|
+
const fastModel: FullApiModel = {
|
|
223
|
+
id: "qwen3.6-35b-fast",
|
|
224
|
+
object: "model",
|
|
225
|
+
created: 0,
|
|
226
|
+
owned_by: "neuralwatt",
|
|
227
|
+
max_model_len: 131072,
|
|
228
|
+
metadata: {
|
|
229
|
+
display_name: "Qwen3.6 35B Fast",
|
|
230
|
+
description: "Fast variant",
|
|
231
|
+
provider: "Qwen",
|
|
232
|
+
huggingface_id: null,
|
|
233
|
+
pricing: {
|
|
234
|
+
input_per_million: 0.05,
|
|
235
|
+
output_per_million: 0.1,
|
|
236
|
+
cached_input_per_million: null,
|
|
237
|
+
cached_output_per_million: null,
|
|
238
|
+
currency: "USD",
|
|
239
|
+
pricing_tbd: false,
|
|
240
|
+
},
|
|
241
|
+
capabilities: {
|
|
242
|
+
tools: true,
|
|
243
|
+
json_mode: true,
|
|
244
|
+
vision: false,
|
|
245
|
+
reasoning: false,
|
|
246
|
+
reasoning_effort: false,
|
|
247
|
+
streaming: true,
|
|
248
|
+
system_role: true,
|
|
249
|
+
developer_role: false,
|
|
250
|
+
},
|
|
251
|
+
limits: {
|
|
252
|
+
max_context_length: 131072,
|
|
253
|
+
max_output_tokens: null,
|
|
254
|
+
max_images: null,
|
|
255
|
+
},
|
|
256
|
+
deprecated: false,
|
|
257
|
+
deprecated_message: null,
|
|
258
|
+
},
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
const result = mapApiModel(fastModel);
|
|
262
|
+
expect(result.id).toBe("qwen3.6-35b-fast");
|
|
263
|
+
expect(result.fast).toBe(true);
|
|
264
|
+
expect(result.reasoning).toBe(false);
|
|
265
|
+
expect(
|
|
266
|
+
(result.compat as Record<string, unknown>)?.supportsReasoningEffort,
|
|
267
|
+
).toBeUndefined();
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
it("should map vision models correctly", () => {
|
|
271
|
+
const visionModel: FullApiModel = {
|
|
272
|
+
id: "moonshotai/Kimi-K2.6",
|
|
273
|
+
object: "model",
|
|
274
|
+
created: 1777467965,
|
|
275
|
+
owned_by: "vllm",
|
|
276
|
+
root: "moonshotai/Kimi-K2.6",
|
|
277
|
+
parent: null,
|
|
278
|
+
max_model_len: 262144,
|
|
279
|
+
metadata: {
|
|
280
|
+
display_name: "Kimi K2.6",
|
|
281
|
+
description: "Moonshot Kimi K2.6",
|
|
282
|
+
provider: "MoonshotAI",
|
|
283
|
+
huggingface_id: null,
|
|
284
|
+
pricing: {
|
|
285
|
+
input_per_million: 0.69,
|
|
286
|
+
output_per_million: 3.22,
|
|
287
|
+
cached_input_per_million: null,
|
|
288
|
+
cached_output_per_million: null,
|
|
289
|
+
currency: "USD",
|
|
290
|
+
pricing_tbd: false,
|
|
291
|
+
},
|
|
292
|
+
capabilities: {
|
|
293
|
+
tools: true,
|
|
294
|
+
json_mode: true,
|
|
295
|
+
vision: true,
|
|
296
|
+
reasoning: true,
|
|
297
|
+
reasoning_effort: false,
|
|
298
|
+
streaming: true,
|
|
299
|
+
system_role: true,
|
|
300
|
+
developer_role: false,
|
|
301
|
+
},
|
|
302
|
+
limits: {
|
|
303
|
+
max_context_length: 262144,
|
|
304
|
+
max_output_tokens: null,
|
|
305
|
+
max_images: 20,
|
|
306
|
+
},
|
|
307
|
+
deprecated: false,
|
|
308
|
+
deprecated_message: null,
|
|
309
|
+
},
|
|
310
|
+
};
|
|
311
|
+
|
|
312
|
+
const result = mapApiModel(visionModel);
|
|
313
|
+
expect(result.input).toEqual(["text", "image"]);
|
|
314
|
+
expect(result.reasoning).toBe(true);
|
|
315
|
+
expect(result.thinkingLevelMap).toEqual({
|
|
316
|
+
minimal: null,
|
|
317
|
+
low: null,
|
|
318
|
+
medium: "medium",
|
|
319
|
+
high: null,
|
|
320
|
+
xhigh: null,
|
|
321
|
+
});
|
|
322
|
+
});
|
|
323
|
+
|
|
324
|
+
it("should use defaults when metadata is missing", () => {
|
|
325
|
+
const bareModel: FullApiModel = {
|
|
326
|
+
id: "test/model",
|
|
327
|
+
object: "model",
|
|
328
|
+
created: 0,
|
|
329
|
+
owned_by: "vllm",
|
|
330
|
+
max_model_len: 8192,
|
|
331
|
+
};
|
|
332
|
+
|
|
333
|
+
const result = mapApiModel(bareModel);
|
|
334
|
+
expect(result.id).toBe("test/model");
|
|
335
|
+
expect(result.name).toBe("test/model");
|
|
336
|
+
expect(result.reasoning).toBe(false);
|
|
337
|
+
expect(result.contextWindow).toBe(8192);
|
|
338
|
+
expect(result.maxTokens).toBe(65536);
|
|
339
|
+
expect(result.input).toEqual(["text"]);
|
|
340
|
+
expect(result.cost.input).toBe(0);
|
|
341
|
+
expect(result.cost.output).toBe(0);
|
|
342
|
+
expect(result.fast).toBeUndefined();
|
|
343
|
+
expect(
|
|
344
|
+
(result.compat as Record<string, unknown>)?.supportsReasoningEffort,
|
|
345
|
+
).toBeUndefined();
|
|
346
|
+
});
|
|
120
347
|
});
|
|
@@ -3,22 +3,25 @@
|
|
|
3
3
|
// Pricing: https://portal.neuralwatt.com/pricing
|
|
4
4
|
// max_model_len from /v1/models, pricing from /pricing page
|
|
5
5
|
|
|
6
|
-
import type { ProviderModelConfig } from "@
|
|
6
|
+
import type { ProviderModelConfig } from "@earendil-works/pi-coding-agent";
|
|
7
7
|
|
|
8
8
|
export interface NeuralwattModelConfig extends ProviderModelConfig {
|
|
9
9
|
/** Fast variant of a parent model (e.g. "glm-5-fast" is the fast variant of "zai-org/GLM-5.1-FP8"). */
|
|
10
10
|
fast?: boolean;
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
const
|
|
14
|
-
minimal:
|
|
15
|
-
low:
|
|
13
|
+
const NEURALWATT_BINARY_THINKING_LEVEL_MAP = {
|
|
14
|
+
minimal: null,
|
|
15
|
+
low: null,
|
|
16
16
|
medium: "medium",
|
|
17
|
-
high:
|
|
18
|
-
xhigh:
|
|
17
|
+
high: null,
|
|
18
|
+
xhigh: null,
|
|
19
19
|
} as const;
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
/** Hardcoded model cache. Used as a fallback on startup before live models are fetched.
|
|
22
|
+
* Updated from https://api.neuralwatt.com/v1/models and https://portal.neuralwatt.com/pricing
|
|
23
|
+
*/
|
|
24
|
+
export const NEURALWATT_MODELS_CACHE: NeuralwattModelConfig[] = [
|
|
22
25
|
// Devstral Small 2 - Mistral
|
|
23
26
|
{
|
|
24
27
|
id: "mistralai/Devstral-Small-2-24B-Instruct-2512",
|
|
@@ -31,7 +34,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
31
34
|
cacheRead: 0,
|
|
32
35
|
cacheWrite: 0,
|
|
33
36
|
},
|
|
34
|
-
contextWindow:
|
|
37
|
+
contextWindow: 262128,
|
|
35
38
|
maxTokens: 32768,
|
|
36
39
|
compat: {
|
|
37
40
|
supportsDeveloperRole: false,
|
|
@@ -51,7 +54,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
51
54
|
cacheRead: 0,
|
|
52
55
|
cacheWrite: 0,
|
|
53
56
|
},
|
|
54
|
-
contextWindow:
|
|
57
|
+
contextWindow: 202736,
|
|
55
58
|
maxTokens: 32768,
|
|
56
59
|
compat: {
|
|
57
60
|
supportsDeveloperRole: false,
|
|
@@ -70,12 +73,11 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
70
73
|
cacheRead: 0,
|
|
71
74
|
cacheWrite: 0,
|
|
72
75
|
},
|
|
73
|
-
contextWindow:
|
|
76
|
+
contextWindow: 202736,
|
|
74
77
|
maxTokens: 32768,
|
|
78
|
+
thinkingLevelMap: NEURALWATT_BINARY_THINKING_LEVEL_MAP,
|
|
75
79
|
compat: {
|
|
76
80
|
supportsDeveloperRole: false,
|
|
77
|
-
supportsReasoningEffort: true,
|
|
78
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
79
81
|
maxTokensField: "max_tokens",
|
|
80
82
|
},
|
|
81
83
|
},
|
|
@@ -92,7 +94,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
92
94
|
cacheRead: 0,
|
|
93
95
|
cacheWrite: 0,
|
|
94
96
|
},
|
|
95
|
-
contextWindow:
|
|
97
|
+
contextWindow: 202736,
|
|
96
98
|
maxTokens: 32768,
|
|
97
99
|
compat: {
|
|
98
100
|
supportsDeveloperRole: false,
|
|
@@ -103,7 +105,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
103
105
|
{
|
|
104
106
|
id: "openai/gpt-oss-20b",
|
|
105
107
|
name: "GPT-OSS 20B",
|
|
106
|
-
reasoning:
|
|
108
|
+
reasoning: true,
|
|
107
109
|
input: ["text"],
|
|
108
110
|
cost: {
|
|
109
111
|
input: 0.03,
|
|
@@ -111,8 +113,15 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
111
113
|
cacheRead: 0,
|
|
112
114
|
cacheWrite: 0,
|
|
113
115
|
},
|
|
114
|
-
contextWindow:
|
|
116
|
+
contextWindow: 16368,
|
|
115
117
|
maxTokens: 4096,
|
|
118
|
+
thinkingLevelMap: {
|
|
119
|
+
minimal: "low",
|
|
120
|
+
low: "low",
|
|
121
|
+
medium: "medium",
|
|
122
|
+
high: "high",
|
|
123
|
+
xhigh: null,
|
|
124
|
+
},
|
|
116
125
|
compat: {
|
|
117
126
|
supportsDeveloperRole: false,
|
|
118
127
|
maxTokensField: "max_tokens",
|
|
@@ -130,12 +139,11 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
130
139
|
cacheRead: 0,
|
|
131
140
|
cacheWrite: 0,
|
|
132
141
|
},
|
|
133
|
-
contextWindow:
|
|
142
|
+
contextWindow: 262128,
|
|
134
143
|
maxTokens: 65536,
|
|
144
|
+
thinkingLevelMap: NEURALWATT_BINARY_THINKING_LEVEL_MAP,
|
|
135
145
|
compat: {
|
|
136
146
|
supportsDeveloperRole: false,
|
|
137
|
-
supportsReasoningEffort: true,
|
|
138
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
139
147
|
maxTokensField: "max_tokens",
|
|
140
148
|
},
|
|
141
149
|
},
|
|
@@ -152,7 +160,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
152
160
|
cacheRead: 0,
|
|
153
161
|
cacheWrite: 0,
|
|
154
162
|
},
|
|
155
|
-
contextWindow:
|
|
163
|
+
contextWindow: 262128,
|
|
156
164
|
maxTokens: 65536,
|
|
157
165
|
compat: {
|
|
158
166
|
supportsDeveloperRole: false,
|
|
@@ -171,12 +179,11 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
171
179
|
cacheRead: 0,
|
|
172
180
|
cacheWrite: 0,
|
|
173
181
|
},
|
|
174
|
-
contextWindow:
|
|
182
|
+
contextWindow: 262128,
|
|
175
183
|
maxTokens: 65536,
|
|
184
|
+
thinkingLevelMap: NEURALWATT_BINARY_THINKING_LEVEL_MAP,
|
|
176
185
|
compat: {
|
|
177
186
|
supportsDeveloperRole: false,
|
|
178
|
-
supportsReasoningEffort: true,
|
|
179
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
180
187
|
maxTokensField: "max_tokens",
|
|
181
188
|
},
|
|
182
189
|
},
|
|
@@ -184,7 +191,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
184
191
|
{
|
|
185
192
|
id: "kimi-k2.6-fast",
|
|
186
193
|
name: "Kimi K2.6 Fast",
|
|
187
|
-
reasoning:
|
|
194
|
+
reasoning: false,
|
|
188
195
|
fast: true,
|
|
189
196
|
input: ["text", "image"],
|
|
190
197
|
cost: {
|
|
@@ -193,12 +200,10 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
193
200
|
cacheRead: 0,
|
|
194
201
|
cacheWrite: 0,
|
|
195
202
|
},
|
|
196
|
-
contextWindow:
|
|
203
|
+
contextWindow: 262128,
|
|
197
204
|
maxTokens: 65536,
|
|
198
205
|
compat: {
|
|
199
206
|
supportsDeveloperRole: false,
|
|
200
|
-
supportsReasoningEffort: true,
|
|
201
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
202
207
|
maxTokensField: "max_tokens",
|
|
203
208
|
},
|
|
204
209
|
},
|
|
@@ -214,12 +219,11 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
214
219
|
cacheRead: 0,
|
|
215
220
|
cacheWrite: 0,
|
|
216
221
|
},
|
|
217
|
-
contextWindow:
|
|
222
|
+
contextWindow: 196592,
|
|
218
223
|
maxTokens: 65536,
|
|
224
|
+
thinkingLevelMap: NEURALWATT_BINARY_THINKING_LEVEL_MAP,
|
|
219
225
|
compat: {
|
|
220
226
|
supportsDeveloperRole: false,
|
|
221
|
-
supportsReasoningEffort: true,
|
|
222
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
223
227
|
maxTokensField: "max_tokens",
|
|
224
228
|
},
|
|
225
229
|
},
|
|
@@ -235,12 +239,11 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
235
239
|
cacheRead: 0,
|
|
236
240
|
cacheWrite: 0,
|
|
237
241
|
},
|
|
238
|
-
contextWindow:
|
|
242
|
+
contextWindow: 262128,
|
|
239
243
|
maxTokens: 65536,
|
|
244
|
+
thinkingLevelMap: NEURALWATT_BINARY_THINKING_LEVEL_MAP,
|
|
240
245
|
compat: {
|
|
241
246
|
supportsDeveloperRole: false,
|
|
242
|
-
supportsReasoningEffort: true,
|
|
243
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
244
247
|
maxTokensField: "max_tokens",
|
|
245
248
|
},
|
|
246
249
|
},
|
|
@@ -257,7 +260,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
257
260
|
cacheRead: 0,
|
|
258
261
|
cacheWrite: 0,
|
|
259
262
|
},
|
|
260
|
-
contextWindow:
|
|
263
|
+
contextWindow: 262128,
|
|
261
264
|
maxTokens: 65536,
|
|
262
265
|
compat: {
|
|
263
266
|
supportsDeveloperRole: false,
|
|
@@ -269,19 +272,18 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
269
272
|
id: "Qwen/Qwen3.6-35B-A3B",
|
|
270
273
|
name: "Qwen3.6 35B",
|
|
271
274
|
reasoning: true,
|
|
272
|
-
input: ["text"],
|
|
275
|
+
input: ["text", "image"],
|
|
273
276
|
cost: {
|
|
274
277
|
input: 0.05,
|
|
275
278
|
output: 0.1,
|
|
276
279
|
cacheRead: 0,
|
|
277
280
|
cacheWrite: 0,
|
|
278
281
|
},
|
|
279
|
-
contextWindow:
|
|
282
|
+
contextWindow: 131056,
|
|
280
283
|
maxTokens: 32768,
|
|
284
|
+
thinkingLevelMap: NEURALWATT_BINARY_THINKING_LEVEL_MAP,
|
|
281
285
|
compat: {
|
|
282
286
|
supportsDeveloperRole: false,
|
|
283
|
-
supportsReasoningEffort: true,
|
|
284
|
-
reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
|
|
285
287
|
maxTokensField: "max_tokens",
|
|
286
288
|
},
|
|
287
289
|
},
|
|
@@ -291,14 +293,14 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
|
|
|
291
293
|
name: "Qwen3.6 35B Fast",
|
|
292
294
|
reasoning: false,
|
|
293
295
|
fast: true,
|
|
294
|
-
input: ["text"],
|
|
296
|
+
input: ["text", "image"],
|
|
295
297
|
cost: {
|
|
296
298
|
input: 0.05,
|
|
297
299
|
output: 0.1,
|
|
298
300
|
cacheRead: 0,
|
|
299
301
|
cacheWrite: 0,
|
|
300
302
|
},
|
|
301
|
-
contextWindow:
|
|
303
|
+
contextWindow: 131056,
|
|
302
304
|
maxTokens: 32768,
|
|
303
305
|
compat: {
|
|
304
306
|
supportsDeveloperRole: false,
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { NeuralwattModelConfig } from "./models";
|
|
2
|
+
|
|
3
|
+
export function buildModelsPayload(models: NeuralwattModelConfig[]) {
|
|
4
|
+
return models.map(({ fast: _fast, ...model }) => ({
|
|
5
|
+
...model,
|
|
6
|
+
compat: {
|
|
7
|
+
supportsDeveloperRole: false,
|
|
8
|
+
maxTokensField: "max_tokens" as const,
|
|
9
|
+
...model.compat,
|
|
10
|
+
},
|
|
11
|
+
}));
|
|
12
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import type { AuthStorage } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
import { getNeuralwattApiKey } from "../../lib/env";
|
|
3
|
+
import type { NeuralwattQuotas } from "../../types/quota-api";
|
|
4
|
+
import { parseQuotaHeaders } from "../../types/quota-events";
|
|
5
|
+
import { fetchQuotas } from "../../utils/quotas";
|
|
6
|
+
|
|
7
|
+
export function buildQuotasFromHeaders(
|
|
8
|
+
headers: Record<string, string>,
|
|
9
|
+
): NeuralwattQuotas | undefined {
|
|
10
|
+
const headerQuotas = parseQuotaHeaders(headers);
|
|
11
|
+
if (!headerQuotas) return;
|
|
12
|
+
|
|
13
|
+
return {
|
|
14
|
+
snapshot_at: new Date().toISOString(),
|
|
15
|
+
balance: {
|
|
16
|
+
credits_remaining_usd: headerQuotas.allowanceRemainingUsd,
|
|
17
|
+
total_credits_usd: 0,
|
|
18
|
+
credits_used_usd: 0,
|
|
19
|
+
accounting_method: "token",
|
|
20
|
+
},
|
|
21
|
+
usage: {
|
|
22
|
+
lifetime: { cost_usd: 0, requests: 0, tokens: 0, energy_kwh: 0 },
|
|
23
|
+
current_month: { cost_usd: 0, requests: 0, tokens: 0, energy_kwh: 0 },
|
|
24
|
+
},
|
|
25
|
+
limits: { overage_limit_usd: null, rate_limit_tier: "standard" },
|
|
26
|
+
subscription:
|
|
27
|
+
headerQuotas.subscriptionPlan !== "none" &&
|
|
28
|
+
headerQuotas.energyRemaining !== undefined
|
|
29
|
+
? {
|
|
30
|
+
plan: headerQuotas.subscriptionPlan,
|
|
31
|
+
status: "active",
|
|
32
|
+
billing_interval: "month",
|
|
33
|
+
current_period_start: "",
|
|
34
|
+
current_period_end: "",
|
|
35
|
+
auto_renew: false,
|
|
36
|
+
kwh_included: headerQuotas.energyIncluded ?? 0,
|
|
37
|
+
kwh_used: headerQuotas.energyUsed ?? 0,
|
|
38
|
+
kwh_remaining: headerQuotas.energyRemaining,
|
|
39
|
+
in_overage: false,
|
|
40
|
+
}
|
|
41
|
+
: null,
|
|
42
|
+
key: { name: "", allowance: null },
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export async function fetchRequestedQuotas(
|
|
47
|
+
data: unknown,
|
|
48
|
+
): Promise<NeuralwattQuotas | undefined> {
|
|
49
|
+
if (!data || typeof data !== "object") return;
|
|
50
|
+
const { authStorage } = data as { authStorage?: AuthStorage };
|
|
51
|
+
if (!authStorage) return;
|
|
52
|
+
const apiKey = await getNeuralwattApiKey(authStorage);
|
|
53
|
+
if (!apiKey) return;
|
|
54
|
+
const result = await fetchQuotas(apiKey);
|
|
55
|
+
if (!result.success) return;
|
|
56
|
+
return result.data.quotas;
|
|
57
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { ExtensionContext } from "@
|
|
1
|
+
import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
|
|
2
2
|
import type { NeuralwattQuotas } from "../../types/quota-api";
|
|
3
3
|
import { formatKwh, formatUsd } from "../../utils/quota-format";
|
|
4
4
|
|
package/src/lib/env.ts
CHANGED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import type { NeuralwattModelConfig } from "../extensions/provider/models";
|
|
2
|
+
|
|
3
|
+
const FETCH_TIMEOUT_MS = 15_000;
|
|
4
|
+
|
|
5
|
+
const NEURALWATT_BINARY_THINKING_LEVEL_MAP = {
|
|
6
|
+
minimal: null,
|
|
7
|
+
low: null,
|
|
8
|
+
medium: "medium",
|
|
9
|
+
high: null,
|
|
10
|
+
xhigh: null,
|
|
11
|
+
} as const;
|
|
12
|
+
|
|
13
|
+
const GPT_OSS_THINKING_LEVEL_MAP = {
|
|
14
|
+
minimal: "low",
|
|
15
|
+
low: "low",
|
|
16
|
+
medium: "medium",
|
|
17
|
+
high: "high",
|
|
18
|
+
xhigh: null,
|
|
19
|
+
} as const;
|
|
20
|
+
|
|
21
|
+
export interface ApiModelMetadata {
|
|
22
|
+
display_name: string;
|
|
23
|
+
description: string | null;
|
|
24
|
+
provider: string;
|
|
25
|
+
huggingface_id: string | null;
|
|
26
|
+
pricing: {
|
|
27
|
+
input_per_million: number;
|
|
28
|
+
output_per_million: number;
|
|
29
|
+
cached_input_per_million: number | null;
|
|
30
|
+
cached_output_per_million: number | null;
|
|
31
|
+
currency: string;
|
|
32
|
+
pricing_tbd: boolean;
|
|
33
|
+
};
|
|
34
|
+
capabilities: {
|
|
35
|
+
tools: boolean;
|
|
36
|
+
json_mode: boolean;
|
|
37
|
+
vision: boolean;
|
|
38
|
+
reasoning: boolean;
|
|
39
|
+
reasoning_effort: boolean;
|
|
40
|
+
streaming: boolean;
|
|
41
|
+
system_role: boolean;
|
|
42
|
+
developer_role: boolean;
|
|
43
|
+
};
|
|
44
|
+
limits: {
|
|
45
|
+
max_context_length: number;
|
|
46
|
+
max_output_tokens: number | null;
|
|
47
|
+
max_images: number | null;
|
|
48
|
+
};
|
|
49
|
+
deprecated: boolean;
|
|
50
|
+
deprecated_message: string | null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export interface ApiModel {
|
|
54
|
+
id: string;
|
|
55
|
+
object: string;
|
|
56
|
+
created: number;
|
|
57
|
+
owned_by: string;
|
|
58
|
+
root?: string;
|
|
59
|
+
parent?: string | null;
|
|
60
|
+
max_model_len: number;
|
|
61
|
+
metadata?: ApiModelMetadata;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export interface ApiResponse {
|
|
65
|
+
object: "list";
|
|
66
|
+
data: ApiModel[];
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** Identify fast variants by their owned_by field or naming convention. */
|
|
70
|
+
function isFastModel(model: ApiModel): boolean {
|
|
71
|
+
if (model.owned_by === "neuralwatt") return true;
|
|
72
|
+
return model.id.endsWith("-fast");
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/** Map API model data to NeuralwattModelConfig. */
|
|
76
|
+
export function mapApiModel(model: ApiModel): NeuralwattModelConfig {
|
|
77
|
+
const meta = model.metadata;
|
|
78
|
+
const fast = isFastModel(model);
|
|
79
|
+
|
|
80
|
+
// Base fields from top-level API data
|
|
81
|
+
const result: NeuralwattModelConfig = {
|
|
82
|
+
id: model.id,
|
|
83
|
+
name: meta?.display_name ?? model.id,
|
|
84
|
+
reasoning: meta?.capabilities.reasoning ?? false,
|
|
85
|
+
contextWindow: model.max_model_len,
|
|
86
|
+
maxTokens: 65536, // sensible default
|
|
87
|
+
cost: {
|
|
88
|
+
input: meta?.pricing.input_per_million ?? 0,
|
|
89
|
+
output: meta?.pricing.output_per_million ?? 0,
|
|
90
|
+
cacheRead: meta?.pricing.cached_input_per_million ?? 0,
|
|
91
|
+
cacheWrite: meta?.pricing.cached_output_per_million ?? 0,
|
|
92
|
+
},
|
|
93
|
+
input: meta?.capabilities.vision ? ["text", "image"] : ["text"],
|
|
94
|
+
compat: {
|
|
95
|
+
supportsDeveloperRole: false,
|
|
96
|
+
maxTokensField: "max_tokens",
|
|
97
|
+
},
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
if (fast) {
|
|
101
|
+
result.fast = true;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Override maxTokens from limits if available
|
|
105
|
+
if (meta?.limits.max_output_tokens) {
|
|
106
|
+
result.maxTokens = meta.limits.max_output_tokens;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (result.reasoning) {
|
|
110
|
+
result.thinkingLevelMap =
|
|
111
|
+
model.id === "openai/gpt-oss-20b"
|
|
112
|
+
? GPT_OSS_THINKING_LEVEL_MAP
|
|
113
|
+
: NEURALWATT_BINARY_THINKING_LEVEL_MAP;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return result;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export type FetchModelsResult =
|
|
120
|
+
| { success: true; models: NeuralwattModelConfig[] }
|
|
121
|
+
| {
|
|
122
|
+
success: false;
|
|
123
|
+
error: { message: string; kind: "timeout" | "network" | "cancelled" };
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Fetch live model definitions from the Neuralwatt /v1/models endpoint.
|
|
128
|
+
*
|
|
129
|
+
* When the API returns metadata (pricing, capabilities, limits), those values
|
|
130
|
+
* are used directly. Fields not exposed by the API fall back to sensible
|
|
131
|
+
* defaults.
|
|
132
|
+
*/
|
|
133
|
+
export async function fetchModels(
|
|
134
|
+
signal?: AbortSignal,
|
|
135
|
+
): Promise<FetchModelsResult> {
|
|
136
|
+
const signals: AbortSignal[] = [AbortSignal.timeout(FETCH_TIMEOUT_MS)];
|
|
137
|
+
if (signal) signals.push(signal);
|
|
138
|
+
const combined = AbortSignal.any(signals);
|
|
139
|
+
|
|
140
|
+
try {
|
|
141
|
+
const response = await fetch("https://api.neuralwatt.com/v1/models", {
|
|
142
|
+
headers: {
|
|
143
|
+
Referer: "https://pi.dev",
|
|
144
|
+
"X-Title": "npm:@aliou/pi-neuralwatt",
|
|
145
|
+
},
|
|
146
|
+
signal: combined,
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
if (!response.ok) {
|
|
150
|
+
return {
|
|
151
|
+
success: false,
|
|
152
|
+
error: {
|
|
153
|
+
message: `Failed to fetch models: ${response.status} ${response.statusText}`,
|
|
154
|
+
kind: "network",
|
|
155
|
+
},
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const data: ApiResponse = await response.json();
|
|
160
|
+
|
|
161
|
+
// Filter out deprecated models
|
|
162
|
+
const active = data.data.filter(
|
|
163
|
+
(m) => !m.metadata?.deprecated && !m.metadata?.pricing.pricing_tbd,
|
|
164
|
+
);
|
|
165
|
+
|
|
166
|
+
const models = active.map(mapApiModel);
|
|
167
|
+
return { success: true, models };
|
|
168
|
+
} catch (err: unknown) {
|
|
169
|
+
if (err instanceof DOMException && err.name === "AbortError") {
|
|
170
|
+
if (
|
|
171
|
+
combined.reason instanceof DOMException &&
|
|
172
|
+
combined.reason.name === "TimeoutError"
|
|
173
|
+
) {
|
|
174
|
+
return {
|
|
175
|
+
success: false,
|
|
176
|
+
error: { message: "Fetch models timed out", kind: "timeout" },
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
return {
|
|
180
|
+
success: false,
|
|
181
|
+
error: { message: "Fetch models cancelled", kind: "cancelled" },
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
const message = err instanceof Error ? err.message : "Unknown error";
|
|
185
|
+
return { success: false, error: { message, kind: "network" } };
|
|
186
|
+
}
|
|
187
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { isOffline } from "./is-offline";
|
|
3
|
+
|
|
4
|
+
describe("isOffline", () => {
|
|
5
|
+
it("returns true when PI_OFFLINE is 1", () => {
|
|
6
|
+
const original = process.env.PI_OFFLINE;
|
|
7
|
+
process.env.PI_OFFLINE = "1";
|
|
8
|
+
expect(isOffline()).toBe(true);
|
|
9
|
+
process.env.PI_OFFLINE = original;
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it("returns true when PI_OFFLINE is true", () => {
|
|
13
|
+
const original = process.env.PI_OFFLINE;
|
|
14
|
+
process.env.PI_OFFLINE = "true";
|
|
15
|
+
expect(isOffline()).toBe(true);
|
|
16
|
+
process.env.PI_OFFLINE = original;
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it("returns true when PI_OFFLINE is yes", () => {
|
|
20
|
+
const original = process.env.PI_OFFLINE;
|
|
21
|
+
process.env.PI_OFFLINE = "yes";
|
|
22
|
+
expect(isOffline()).toBe(true);
|
|
23
|
+
process.env.PI_OFFLINE = original;
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it("returns false when PI_OFFLINE is unset", () => {
|
|
27
|
+
const original = process.env.PI_OFFLINE;
|
|
28
|
+
delete process.env.PI_OFFLINE;
|
|
29
|
+
expect(isOffline()).toBe(false);
|
|
30
|
+
process.env.PI_OFFLINE = original;
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it("returns false when PI_OFFLINE is 0", () => {
|
|
34
|
+
const original = process.env.PI_OFFLINE;
|
|
35
|
+
process.env.PI_OFFLINE = "0";
|
|
36
|
+
expect(isOffline()).toBe(false);
|
|
37
|
+
process.env.PI_OFFLINE = original;
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it("returns false when PI_OFFLINE is false", () => {
|
|
41
|
+
const original = process.env.PI_OFFLINE;
|
|
42
|
+
process.env.PI_OFFLINE = "false";
|
|
43
|
+
expect(isOffline()).toBe(false);
|
|
44
|
+
process.env.PI_OFFLINE = original;
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it("returns false when PI_OFFLINE is no", () => {
|
|
48
|
+
const original = process.env.PI_OFFLINE;
|
|
49
|
+
process.env.PI_OFFLINE = "no";
|
|
50
|
+
expect(isOffline()).toBe(false);
|
|
51
|
+
process.env.PI_OFFLINE = original;
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it("returns false for other values", () => {
|
|
55
|
+
const original = process.env.PI_OFFLINE;
|
|
56
|
+
process.env.PI_OFFLINE = "maybe";
|
|
57
|
+
expect(isOffline()).toBe(false);
|
|
58
|
+
process.env.PI_OFFLINE = original;
|
|
59
|
+
});
|
|
60
|
+
});
|
package/src/utils/quota-bar.ts
CHANGED