npm - @aliou/pi-neuralwatt - Versions diffs - 0.1.2 → 0.4.1 - Mend

@aliou/pi-neuralwatt 0.1.2 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +1 -1
package/package.json +10 -10
package/src/config.ts +2 -2
package/src/extensions/command-quotas/command.ts +4 -1
package/src/extensions/command-quotas/components/quota-tabs.ts +2 -2
package/src/extensions/command-quotas/components/quotas-display.ts +4 -4
package/src/extensions/command-quotas/index.ts +1 -1
package/src/extensions/provider/context-overflow.ts +31 -0
package/src/extensions/provider/index.ts +61 -66
package/src/extensions/provider/models.test.ts +246 -19
package/src/extensions/provider/models.ts +41 -39
package/src/extensions/provider/provider-payload.ts +12 -0
package/src/extensions/provider/quota-store.ts +57 -0
package/src/extensions/quota-warnings/index.ts +1 -1
package/src/extensions/quota-warnings/notifier.ts +1 -1
package/src/extensions/sub-bar-integration/index.ts +1 -1
package/src/lib/env.ts +1 -1
package/src/lib/fetch-models.ts +187 -0
package/src/utils/is-offline.test.ts +60 -0
package/src/utils/is-offline.ts +4 -0
package/src/utils/quota-bar.ts +1 -1

package/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
-![banner](https://assets.aliou.me/pi-extensions/banners/pi-neuralwatt.png)
+![banner](https://assets.aliou.me/github/aliou/pi-neuralwatt/banner.png)
 # Pi Neuralwatt Extension

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aliou/pi-neuralwatt",
-  "version": "0.1.2",
+  "version": "0.4.1",
   "license": "MIT",
   "type": "module",
   "private": false,
@@ -20,7 +20,7 @@
       "./src/extensions/quota-warnings/index.ts",
       "./src/extensions/sub-bar-integration/index.ts"
     ],
-    "video": "https://assets.aliou.me/pi-extensions/demos/pi-neuralwatt.mp4"
+    "video": "https://assets.aliou.me/github/aliou/pi-neuralwatt/demo.mp4"
   },
   "publishConfig": {
     "access": "public"
@@ -31,20 +31,20 @@
     "README.md"
   ],
   "dependencies": {
-    "@aliou/pi-utils-settings": "^0.9.0",
-    "@aliou/pi-utils-ui": "^0.1.0"
+    "@aliou/pi-utils-settings": "^0.15.0",
+    "@aliou/pi-utils-ui": "^0.4.0"
   },
   "peerDependencies": {
-    "@mariozechner/pi-coding-agent": ">=0.67.68",
-    "@mariozechner/pi-tui": ">=0.67.68",
+    "@earendil-works/pi-coding-agent": "0.74.0",
+    "@earendil-works/pi-tui": "0.74.0",
     "@sinclair/typebox": ">=0.34.0"
   },
   "devDependencies": {
     "@aliou/biome-plugins": "^0.8.1",
     "@biomejs/biome": "^2.4.12",
     "@changesets/cli": "^2.27.11",
-    "@mariozechner/pi-coding-agent": "0.67.68",
-    "@mariozechner/pi-tui": "0.67.68",
+    "@earendil-works/pi-coding-agent": "0.74.0",
+    "@earendil-works/pi-tui": "0.74.0",
     "@types/node": "^25.0.10",
     "husky": "^9.1.7",
     "ts-json-schema-generator": "^2.4.0",
@@ -52,10 +52,10 @@
     "vitest": "^4.0.18"
   },
   "peerDependenciesMeta": {
-    "@mariozechner/pi-coding-agent": {
+    "@earendil-works/pi-coding-agent": {
       "optional": true
     },
-    "@mariozechner/pi-tui": {
+    "@earendil-works/pi-tui": {
       "optional": true
     },
     "@sinclair/typebox": {

package/src/config.ts CHANGED Viewed

@@ -3,8 +3,8 @@ import {
   registerSettingsCommand,
   type SettingsSection,
 } from "@aliou/pi-utils-settings";
-import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
-import type { SettingItem } from "@mariozechner/pi-tui";
+import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
+import type { SettingItem } from "@earendil-works/pi-tui";
 export type NeuralwattFeatureId =
   | "quotaCommand"

package/src/extensions/command-quotas/command.ts CHANGED Viewed

@@ -1,5 +1,8 @@
 import { join } from "node:path";
-import { type ExtensionAPI, getAgentDir } from "@mariozechner/pi-coding-agent";
+import {
+  type ExtensionAPI,
+  getAgentDir,
+} from "@earendil-works/pi-coding-agent";
 import { getNeuralwattApiKey } from "../../lib/env";
 import { fetchQuotas } from "../../utils/quotas";
 import { QuotasComponent } from "./components/quotas-display";

package/src/extensions/command-quotas/components/quota-tabs.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import type { Theme } from "@mariozechner/pi-coding-agent";
-import { truncateToWidth } from "@mariozechner/pi-tui";
+import type { Theme } from "@earendil-works/pi-coding-agent";
+import { truncateToWidth } from "@earendil-works/pi-tui";
 import type { NeuralwattQuotas } from "../../../types/quota-api";
 import {
   percentCreditsRemaining,

package/src/extensions/command-quotas/components/quotas-display.ts CHANGED Viewed

@@ -1,7 +1,7 @@
-import type { Theme } from "@mariozechner/pi-coding-agent";
-import { DynamicBorder } from "@mariozechner/pi-coding-agent";
-import type { Component, TUI } from "@mariozechner/pi-tui";
-import { Loader, matchesKey, truncateToWidth } from "@mariozechner/pi-tui";
+import type { Theme } from "@earendil-works/pi-coding-agent";
+import { DynamicBorder } from "@earendil-works/pi-coding-agent";
+import type { Component, TUI } from "@earendil-works/pi-tui";
+import { Loader, matchesKey, truncateToWidth } from "@earendil-works/pi-tui";
 import type { NeuralwattQuotas } from "../../../types/quota-api";
 import {
   renderCreditsTab,

package/src/extensions/command-quotas/index.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
+import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
 import {
   configLoader,
   NEURALWATT_EXTENSIONS_REGISTER_EVENT,

package/src/extensions/provider/context-overflow.ts ADDED Viewed

@@ -0,0 +1,31 @@
+interface AssistantErrorLike {
+  role: string;
+  stopReason?: string;
+  provider?: string;
+  errorMessage?: string;
+}
+const NEURALWATT_CONTEXT_OVERFLOW_PATTERN =
+  /request exceeds model'?s maximum context length/i;
+/**
+ * Normalize Neuralwatt context overflow errors so Pi's native overflow
+ * compaction path can detect them and perform compact-and-retry.
+ */
+export function normalizeNeuralwattContextOverflowError<
+  TMessage extends AssistantErrorLike,
+>(message: TMessage, currentProvider?: string): TMessage | undefined {
+  if (message.role !== "assistant") return;
+  if (message.stopReason !== "error") return;
+  if (message.provider !== "neuralwatt" && currentProvider !== "neuralwatt")
+    return;
+  const errorMessage = message.errorMessage ?? "";
+  if (errorMessage.includes("context_length_exceeded")) return;
+  if (!NEURALWATT_CONTEXT_OVERFLOW_PATTERN.test(errorMessage)) return;
+  return {
+    ...message,
+    errorMessage: `context_length_exceeded: ${errorMessage}`,
+  };
+}

package/src/extensions/provider/index.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { AuthStorage, ExtensionAPI } from "@mariozechner/pi-coding-agent";
+import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
 import {
   configLoader,
   emitConfigUpdated,
@@ -8,17 +8,27 @@ import {
   registerNeuralwattSettings,
 } from "../../config";
 import { getNeuralwattApiKey } from "../../lib/env";
+import { fetchModels } from "../../lib/fetch-models";
 import type { NeuralwattQuotas } from "../../types/quota-api";
 import {
   NEURALWATT_QUOTAS_REQUEST_EVENT,
   NEURALWATT_QUOTAS_UPDATED_EVENT,
   type NeuralwattQuotasUpdatedPayload,
-  parseQuotaHeaders,
 } from "../../types/quota-events";
+import { isOffline } from "../../utils/is-offline";
 import { fetchQuotas } from "../../utils/quotas";
-import { NEURALWATT_MODELS } from "./models";
+import { normalizeNeuralwattContextOverflowError } from "./context-overflow";
+import type { NeuralwattModelConfig } from "./models";
+import { NEURALWATT_MODELS_CACHE } from "./models";
+import { buildModelsPayload } from "./provider-payload";
+import { buildQuotasFromHeaders, fetchRequestedQuotas } from "./quota-store";
-export function registerNeuralwattProvider(pi: ExtensionAPI): void {
+const HEADER_EMIT_THROTTLE_MS = 5_000;
+function registerNeuralwattProvider(
+  pi: ExtensionAPI,
+  models: NeuralwattModelConfig[],
+): void {
   pi.registerProvider("neuralwatt", {
     baseUrl: "https://api.neuralwatt.com/v1",
     apiKey: "NEURALWATT_API_KEY",
@@ -28,32 +38,25 @@ export function registerNeuralwattProvider(pi: ExtensionAPI): void {
       Referer: "https://pi.dev",
       "X-Title": "npm:@aliou/pi-neuralwatt",
     },
-    models: NEURALWATT_MODELS.map(({ fast: _fast, ...model }) => ({
-      ...model,
-      compat: {
-        supportsDeveloperRole: false,
-        maxTokensField: "max_tokens",
-        ...model.compat,
-      },
-    })),
+    models: buildModelsPayload(models),
   });
 }
 export default async function (pi: ExtensionAPI) {
   await configLoader.load();
-  registerNeuralwattProvider(pi);
-  // Track which feature extensions loaded
+  // Register with hardcoded cache immediately so models are available on startup
+  registerNeuralwattProvider(pi, NEURALWATT_MODELS_CACHE);
   const loadedFeatures = new Set<NeuralwattFeatureId>();
-  // Register settings (in the provider, so it's always available)
+  // Register settings in the provider so it is always available.
   registerNeuralwattSettings(pi, {
     getLoadedFeatures: () => loadedFeatures,
   });
-  // --- Quota store (event-based) ---
   let lastHeaderEmitAt = 0;
-  const HEADER_EMIT_THROTTLE_MS = 5_000;
+  let quotaRequestInFlight = false;
   function emitQuotas(
     quotas: NeuralwattQuotas,
@@ -66,81 +69,73 @@ export default async function (pi: ExtensionAPI) {
     pi.events.emit(NEURALWATT_QUOTAS_UPDATED_EVENT, { quotas, source });
   }
-  // Ingest quotas from response headers
+  pi.on("message_end", (event, ctx) => {
+    const message = normalizeNeuralwattContextOverflowError(
+      event.message,
+      ctx.model?.provider,
+    );
+    if (!message) return;
+    return { message };
+  });
   pi.on("after_provider_response", (event, ctx) => {
     if (ctx.model?.provider !== "neuralwatt") return;
-    const headerQuotas = parseQuotaHeaders(event.headers);
-    if (!headerQuotas) return;
-    const quotas: NeuralwattQuotas = {
-      snapshot_at: new Date().toISOString(),
-      balance: {
-        credits_remaining_usd: headerQuotas.allowanceRemainingUsd,
-        total_credits_usd: 0,
-        credits_used_usd: 0,
-        accounting_method: "token",
-      },
-      usage: {
-        lifetime: { cost_usd: 0, requests: 0, tokens: 0, energy_kwh: 0 },
-        current_month: { cost_usd: 0, requests: 0, tokens: 0, energy_kwh: 0 },
-      },
-      limits: { overage_limit_usd: null, rate_limit_tier: "standard" },
-      subscription:
-        headerQuotas.subscriptionPlan !== "none" &&
-        headerQuotas.energyRemaining !== undefined
-          ? {
-              plan: headerQuotas.subscriptionPlan,
-              status: "active",
-              billing_interval: "month",
-              current_period_start: "",
-              current_period_end: "",
-              auto_renew: false,
-              kwh_included: headerQuotas.energyIncluded ?? 0,
-              kwh_used: headerQuotas.energyUsed ?? 0,
-              kwh_remaining: headerQuotas.energyRemaining,
-              in_overage: false,
-            }
-          : null,
-      key: { name: "", allowance: null },
-    };
+    const quotas = buildQuotasFromHeaders(event.headers);
+    if (!quotas) return;
     emitQuotas(quotas, "header");
   });
-  // Respond to quota requests from other extensions
-  let quotaRequestInFlight = false;
   pi.events.on(NEURALWATT_QUOTAS_REQUEST_EVENT, async (data: unknown) => {
     if (quotaRequestInFlight) return;
     quotaRequestInFlight = true;
     try {
-      if (!data || typeof data !== "object") return;
-      const { authStorage } = data as { authStorage?: AuthStorage };
-      if (!authStorage) return;
-      const apiKey = await getNeuralwattApiKey(authStorage);
-      if (!apiKey) return;
-      const result = await fetchQuotas(apiKey);
-      if (result.success) emitQuotas(result.data.quotas, "api");
+      const quotas = await fetchRequestedQuotas(data);
+      if (quotas)
+        pi.events.emit(NEURALWATT_QUOTAS_UPDATED_EVENT, {
+          quotas,
+          source: "api",
+        });
     } finally {
       quotaRequestInFlight = false;
     }
   });
-  // Collect which feature extensions are loaded
   pi.events.on(NEURALWATT_EXTENSIONS_REGISTER_EVENT, (data: unknown) => {
     const { feature } = data as { feature: NeuralwattFeatureId };
     loadedFeatures.add(feature);
   });
-  // On session start: request extensions to register, then emit config
   pi.on("session_start", async (_event, ctx) => {
     loadedFeatures.clear();
     pi.events.emit(NEURALWATT_EXTENSIONS_REQUEST_EVENT, undefined);
     emitConfigUpdated(pi);
+    if (!isOffline()) {
+      const result = await fetchModels();
+      if (result.success) {
+        const cacheIds = new Set(NEURALWATT_MODELS_CACHE.map((m) => m.id));
+        const liveIds = new Set(result.models.map((m) => m.id));
+        const added = result.models.filter((m) => !cacheIds.has(m.id));
+        const removed = NEURALWATT_MODELS_CACHE.filter(
+          (m) => !liveIds.has(m.id),
+        );
+        if (added.length > 0 || removed.length > 0) {
+          const parts: string[] = [];
+          if (added.length > 0) parts.push(`${added.length} new`);
+          if (removed.length > 0) parts.push(`${removed.length} removed`);
+          ctx.ui.notify(
+            `Neuralwatt models updated (${parts.join(", ")})`,
+            "info",
+          );
+        }
+        registerNeuralwattProvider(pi, result.models);
+      }
+    }
     if (ctx.model?.provider !== "neuralwatt") return;
     const apiKey = await getNeuralwattApiKey(ctx.modelRegistry.authStorage);
     if (!apiKey) return;
-    const result = await fetchQuotas(apiKey);
-    if (result.success) emitQuotas(result.data.quotas, "api");
+    const quotaResult = await fetchQuotas(apiKey);
+    if (quotaResult.success) emitQuotas(quotaResult.data.quotas, "api");
   });
 }

package/src/extensions/provider/models.test.ts CHANGED Viewed

@@ -1,18 +1,10 @@
 import { describe, expect, it } from "vitest";
-import { NEURALWATT_MODELS } from "./models";
-interface ApiModel {
-  id: string;
-  object: string;
-  created: number;
-  owned_by: string;
-  max_model_len: number;
-}
-interface ApiResponse {
-  object: "list";
-  data: ApiModel[];
-}
+import type {
+  ApiModel as FullApiModel,
+  ApiResponse as FullApiResponse,
+} from "../../lib/fetch-models";
+import { mapApiModel } from "../../lib/fetch-models";
+import { NEURALWATT_MODELS_CACHE } from "./models";
 interface Discrepancy {
   model: string;
@@ -21,7 +13,7 @@ interface Discrepancy {
   api: unknown;
 }
-async function fetchApiModels(): Promise<ApiModel[]> {
+async function fetchApiModels(): Promise<FullApiModel[]> {
   const apiKey = process.env.NEURALWATT_API_KEY;
   const headers: Record<string, string> = {
     "Content-Type": "application/json",
@@ -41,13 +33,13 @@ async function fetchApiModels(): Promise<ApiModel[]> {
     );
   }
-  const data: ApiResponse = await response.json();
+  const data: FullApiResponse = await response.json();
   return data.data;
 }
 function compareModels(
-  apiModels: ApiModel[],
-  hardcodedModels: typeof NEURALWATT_MODELS,
+  apiModels: FullApiModel[],
+  hardcodedModels: typeof NEURALWATT_MODELS_CACHE,
 ): Discrepancy[] {
   const discrepancies: Discrepancy[] = [];
@@ -73,10 +65,55 @@ function compareModels(
         api: apiModel.max_model_len,
       });
     }
+    // Check metadata-driven fields if available
+    const meta = apiModel.metadata;
+    if (meta) {
+      // Check reasoning
+      if (meta.capabilities.reasoning !== hardcoded.reasoning) {
+        discrepancies.push({
+          model: hardcoded.id,
+          field: "reasoning",
+          hardcoded: hardcoded.reasoning,
+          api: meta.capabilities.reasoning,
+        });
+      }
+      // Check pricing
+      if (meta.pricing.input_per_million !== hardcoded.cost.input) {
+        discrepancies.push({
+          model: hardcoded.id,
+          field: "cost.input",
+          hardcoded: hardcoded.cost.input,
+          api: meta.pricing.input_per_million,
+        });
+      }
+      if (meta.pricing.output_per_million !== hardcoded.cost.output) {
+        discrepancies.push({
+          model: hardcoded.id,
+          field: "cost.output",
+          hardcoded: hardcoded.cost.output,
+          api: meta.pricing.output_per_million,
+        });
+      }
+      // Check vision
+      const hasVision = hardcoded.input.includes("image");
+      if (meta.capabilities.vision !== hasVision) {
+        discrepancies.push({
+          model: hardcoded.id,
+          field: "input (vision)",
+          hardcoded: hasVision,
+          api: meta.capabilities.vision,
+        });
+      }
+    }
   }
   // Check for API models not in hardcoded list
   for (const apiModel of apiModels) {
+    if (apiModel.metadata?.deprecated || apiModel.metadata?.pricing.pricing_tbd)
+      continue;
     const hardcoded = hardcodedModels.find((m) => m.id === apiModel.id);
     if (!hardcoded) {
       discrepancies.push({
@@ -94,7 +131,7 @@ function compareModels(
 describe("Neuralwatt models", () => {
   it("should match API model definitions", { timeout: 30000 }, async () => {
     const apiModels = await fetchApiModels();
-    const discrepancies = compareModels(apiModels, NEURALWATT_MODELS);
+    const discrepancies = compareModels(apiModels, NEURALWATT_MODELS_CACHE);
     if (discrepancies.length > 0) {
       console.error("\nModel discrepancies found:");
@@ -117,4 +154,194 @@ describe("Neuralwatt models", () => {
     expect(discrepancies).toHaveLength(0);
   });
+  it("should map API models with metadata correctly", () => {
+    // Simulate a reasoning model with reasoning_effort support (like gpt-oss-20b)
+    const apiModelWithEffort: FullApiModel = {
+      id: "openai/gpt-oss-20b",
+      object: "model",
+      created: 1777467968,
+      owned_by: "vllm",
+      root: "openai/gpt-oss-20b",
+      parent: null,
+      max_model_len: 16384,
+      metadata: {
+        display_name: "GPT-OSS 20B",
+        description: "OpenAI GPT-OSS 20B",
+        provider: "OpenAI",
+        huggingface_id: null,
+        pricing: {
+          input_per_million: 0.03,
+          output_per_million: 0.16,
+          cached_input_per_million: null,
+          cached_output_per_million: null,
+          currency: "USD",
+          pricing_tbd: false,
+        },
+        capabilities: {
+          tools: true,
+          json_mode: true,
+          vision: false,
+          reasoning: true,
+          reasoning_effort: true,
+          streaming: true,
+          system_role: true,
+          developer_role: false,
+        },
+        limits: {
+          max_context_length: 16384,
+          max_output_tokens: 4096,
+          max_images: null,
+        },
+        deprecated: false,
+        deprecated_message: null,
+      },
+    };
+    const result = mapApiModel(apiModelWithEffort);
+    expect(result.id).toBe("openai/gpt-oss-20b");
+    expect(result.name).toBe("GPT-OSS 20B");
+    expect(result.reasoning).toBe(true);
+    expect(result.contextWindow).toBe(16384);
+    expect(result.maxTokens).toBe(4096);
+    expect(result.input).toEqual(["text"]);
+    expect(result.cost.input).toBe(0.03);
+    expect(result.cost.output).toBe(0.16);
+    expect(result.thinkingLevelMap).toEqual({
+      minimal: "low",
+      low: "low",
+      medium: "medium",
+      high: "high",
+      xhigh: null,
+    });
+    expect(result.fast).toBeUndefined();
+  });
+  it("should map fast variants correctly", () => {
+    // Simulate a fast variant (owned by "neuralwatt")
+    const fastModel: FullApiModel = {
+      id: "qwen3.6-35b-fast",
+      object: "model",
+      created: 0,
+      owned_by: "neuralwatt",
+      max_model_len: 131072,
+      metadata: {
+        display_name: "Qwen3.6 35B Fast",
+        description: "Fast variant",
+        provider: "Qwen",
+        huggingface_id: null,
+        pricing: {
+          input_per_million: 0.05,
+          output_per_million: 0.1,
+          cached_input_per_million: null,
+          cached_output_per_million: null,
+          currency: "USD",
+          pricing_tbd: false,
+        },
+        capabilities: {
+          tools: true,
+          json_mode: true,
+          vision: false,
+          reasoning: false,
+          reasoning_effort: false,
+          streaming: true,
+          system_role: true,
+          developer_role: false,
+        },
+        limits: {
+          max_context_length: 131072,
+          max_output_tokens: null,
+          max_images: null,
+        },
+        deprecated: false,
+        deprecated_message: null,
+      },
+    };
+    const result = mapApiModel(fastModel);
+    expect(result.id).toBe("qwen3.6-35b-fast");
+    expect(result.fast).toBe(true);
+    expect(result.reasoning).toBe(false);
+    expect(
+      (result.compat as Record<string, unknown>)?.supportsReasoningEffort,
+    ).toBeUndefined();
+  });
+  it("should map vision models correctly", () => {
+    const visionModel: FullApiModel = {
+      id: "moonshotai/Kimi-K2.6",
+      object: "model",
+      created: 1777467965,
+      owned_by: "vllm",
+      root: "moonshotai/Kimi-K2.6",
+      parent: null,
+      max_model_len: 262144,
+      metadata: {
+        display_name: "Kimi K2.6",
+        description: "Moonshot Kimi K2.6",
+        provider: "MoonshotAI",
+        huggingface_id: null,
+        pricing: {
+          input_per_million: 0.69,
+          output_per_million: 3.22,
+          cached_input_per_million: null,
+          cached_output_per_million: null,
+          currency: "USD",
+          pricing_tbd: false,
+        },
+        capabilities: {
+          tools: true,
+          json_mode: true,
+          vision: true,
+          reasoning: true,
+          reasoning_effort: false,
+          streaming: true,
+          system_role: true,
+          developer_role: false,
+        },
+        limits: {
+          max_context_length: 262144,
+          max_output_tokens: null,
+          max_images: 20,
+        },
+        deprecated: false,
+        deprecated_message: null,
+      },
+    };
+    const result = mapApiModel(visionModel);
+    expect(result.input).toEqual(["text", "image"]);
+    expect(result.reasoning).toBe(true);
+    expect(result.thinkingLevelMap).toEqual({
+      minimal: null,
+      low: null,
+      medium: "medium",
+      high: null,
+      xhigh: null,
+    });
+  });
+  it("should use defaults when metadata is missing", () => {
+    const bareModel: FullApiModel = {
+      id: "test/model",
+      object: "model",
+      created: 0,
+      owned_by: "vllm",
+      max_model_len: 8192,
+    };
+    const result = mapApiModel(bareModel);
+    expect(result.id).toBe("test/model");
+    expect(result.name).toBe("test/model");
+    expect(result.reasoning).toBe(false);
+    expect(result.contextWindow).toBe(8192);
+    expect(result.maxTokens).toBe(65536);
+    expect(result.input).toEqual(["text"]);
+    expect(result.cost.input).toBe(0);
+    expect(result.cost.output).toBe(0);
+    expect(result.fast).toBeUndefined();
+    expect(
+      (result.compat as Record<string, unknown>)?.supportsReasoningEffort,
+    ).toBeUndefined();
+  });
 });

package/src/extensions/provider/models.ts CHANGED Viewed

@@ -3,22 +3,25 @@
 // Pricing: https://portal.neuralwatt.com/pricing
 // max_model_len from /v1/models, pricing from /pricing page
-import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
+import type { ProviderModelConfig } from "@earendil-works/pi-coding-agent";
 export interface NeuralwattModelConfig extends ProviderModelConfig {
   /** Fast variant of a parent model (e.g. "glm-5-fast" is the fast variant of "zai-org/GLM-5.1-FP8"). */
   fast?: boolean;
 }
-const NEURALWATT_REASONING_EFFORT_MAP = {
-  minimal: "low",
-  low: "low",
+const NEURALWATT_BINARY_THINKING_LEVEL_MAP = {
+  minimal: null,
+  low: null,
   medium: "medium",
-  high: "high",
-  xhigh: "high",
+  high: null,
+  xhigh: null,
 } as const;
-export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
+/** Hardcoded model cache. Used as a fallback on startup before live models are fetched.
+ * Updated from https://api.neuralwatt.com/v1/models and https://portal.neuralwatt.com/pricing
+ */
+export const NEURALWATT_MODELS_CACHE: NeuralwattModelConfig[] = [
   // Devstral Small 2 - Mistral
   {
     id: "mistralai/Devstral-Small-2-24B-Instruct-2512",
@@ -31,7 +34,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
       cacheRead: 0,
       cacheWrite: 0,
     },
-    contextWindow: 262144,
+    contextWindow: 262128,
     maxTokens: 32768,
     compat: {
       supportsDeveloperRole: false,
@@ -51,7 +54,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
       cacheRead: 0,
       cacheWrite: 0,
     },
-    contextWindow: 202752,
+    contextWindow: 202736,
     maxTokens: 32768,
     compat: {
       supportsDeveloperRole: false,
@@ -70,12 +73,11 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
       cacheRead: 0,
       cacheWrite: 0,
     },
-    contextWindow: 202752,
+    contextWindow: 202736,
     maxTokens: 32768,
+    thinkingLevelMap: NEURALWATT_BINARY_THINKING_LEVEL_MAP,
     compat: {
       supportsDeveloperRole: false,
-      supportsReasoningEffort: true,
-      reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
       maxTokensField: "max_tokens",
     },
   },
@@ -92,7 +94,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
       cacheRead: 0,
       cacheWrite: 0,
     },
-    contextWindow: 202752,
+    contextWindow: 202736,
     maxTokens: 32768,
     compat: {
       supportsDeveloperRole: false,
@@ -103,7 +105,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
   {
     id: "openai/gpt-oss-20b",
     name: "GPT-OSS 20B",
-    reasoning: false,
+    reasoning: true,
     input: ["text"],
     cost: {
       input: 0.03,
@@ -111,8 +113,15 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
       cacheRead: 0,
       cacheWrite: 0,
     },
-    contextWindow: 16384,
+    contextWindow: 16368,
     maxTokens: 4096,
+    thinkingLevelMap: {
+      minimal: "low",
+      low: "low",
+      medium: "medium",
+      high: "high",
+      xhigh: null,
+    },
     compat: {
       supportsDeveloperRole: false,
       maxTokensField: "max_tokens",
@@ -130,12 +139,11 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
       cacheRead: 0,
       cacheWrite: 0,
     },
-    contextWindow: 262144,
+    contextWindow: 262128,
     maxTokens: 65536,
+    thinkingLevelMap: NEURALWATT_BINARY_THINKING_LEVEL_MAP,
     compat: {
       supportsDeveloperRole: false,
-      supportsReasoningEffort: true,
-      reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
       maxTokensField: "max_tokens",
     },
   },
@@ -152,7 +160,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
       cacheRead: 0,
       cacheWrite: 0,
     },
-    contextWindow: 262144,
+    contextWindow: 262128,
     maxTokens: 65536,
     compat: {
       supportsDeveloperRole: false,
@@ -171,12 +179,11 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
       cacheRead: 0,
       cacheWrite: 0,
     },
-    contextWindow: 262144,
+    contextWindow: 262128,
     maxTokens: 65536,
+    thinkingLevelMap: NEURALWATT_BINARY_THINKING_LEVEL_MAP,
     compat: {
       supportsDeveloperRole: false,
-      supportsReasoningEffort: true,
-      reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
       maxTokensField: "max_tokens",
     },
   },
@@ -184,7 +191,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
   {
     id: "kimi-k2.6-fast",
     name: "Kimi K2.6 Fast",
-    reasoning: true,
+    reasoning: false,
     fast: true,
     input: ["text", "image"],
     cost: {
@@ -193,12 +200,10 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
       cacheRead: 0,
       cacheWrite: 0,
     },
-    contextWindow: 262144,
+    contextWindow: 262128,
     maxTokens: 65536,
     compat: {
       supportsDeveloperRole: false,
-      supportsReasoningEffort: true,
-      reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
       maxTokensField: "max_tokens",
     },
   },
@@ -214,12 +219,11 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
       cacheRead: 0,
       cacheWrite: 0,
     },
-    contextWindow: 196608,
+    contextWindow: 196592,
     maxTokens: 65536,
+    thinkingLevelMap: NEURALWATT_BINARY_THINKING_LEVEL_MAP,
     compat: {
       supportsDeveloperRole: false,
-      supportsReasoningEffort: true,
-      reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
       maxTokensField: "max_tokens",
     },
   },
@@ -235,12 +239,11 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
       cacheRead: 0,
       cacheWrite: 0,
     },
-    contextWindow: 262144,
+    contextWindow: 262128,
     maxTokens: 65536,
+    thinkingLevelMap: NEURALWATT_BINARY_THINKING_LEVEL_MAP,
     compat: {
       supportsDeveloperRole: false,
-      supportsReasoningEffort: true,
-      reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
       maxTokensField: "max_tokens",
     },
   },
@@ -257,7 +260,7 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
       cacheRead: 0,
       cacheWrite: 0,
     },
-    contextWindow: 262144,
+    contextWindow: 262128,
     maxTokens: 65536,
     compat: {
       supportsDeveloperRole: false,
@@ -269,19 +272,18 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
     id: "Qwen/Qwen3.6-35B-A3B",
     name: "Qwen3.6 35B",
     reasoning: true,
-    input: ["text"],
+    input: ["text", "image"],
     cost: {
       input: 0.05,
       output: 0.1,
       cacheRead: 0,
       cacheWrite: 0,
     },
-    contextWindow: 131072,
+    contextWindow: 131056,
     maxTokens: 32768,
+    thinkingLevelMap: NEURALWATT_BINARY_THINKING_LEVEL_MAP,
     compat: {
       supportsDeveloperRole: false,
-      supportsReasoningEffort: true,
-      reasoningEffortMap: NEURALWATT_REASONING_EFFORT_MAP,
       maxTokensField: "max_tokens",
     },
   },
@@ -291,14 +293,14 @@ export const NEURALWATT_MODELS: NeuralwattModelConfig[] = [
     name: "Qwen3.6 35B Fast",
     reasoning: false,
     fast: true,
-    input: ["text"],
+    input: ["text", "image"],
     cost: {
       input: 0.05,
       output: 0.1,
       cacheRead: 0,
       cacheWrite: 0,
     },
-    contextWindow: 131072,
+    contextWindow: 131056,
     maxTokens: 32768,
     compat: {
       supportsDeveloperRole: false,

package/src/extensions/provider/provider-payload.ts ADDED Viewed

@@ -0,0 +1,12 @@
+import type { NeuralwattModelConfig } from "./models";
+export function buildModelsPayload(models: NeuralwattModelConfig[]) {
+  return models.map(({ fast: _fast, ...model }) => ({
+    ...model,
+    compat: {
+      supportsDeveloperRole: false,
+      maxTokensField: "max_tokens" as const,
+      ...model.compat,
+    },
+  }));
+}

package/src/extensions/provider/quota-store.ts ADDED Viewed

@@ -0,0 +1,57 @@
+import type { AuthStorage } from "@earendil-works/pi-coding-agent";
+import { getNeuralwattApiKey } from "../../lib/env";
+import type { NeuralwattQuotas } from "../../types/quota-api";
+import { parseQuotaHeaders } from "../../types/quota-events";
+import { fetchQuotas } from "../../utils/quotas";
+export function buildQuotasFromHeaders(
+  headers: Record<string, string>,
+): NeuralwattQuotas | undefined {
+  const headerQuotas = parseQuotaHeaders(headers);
+  if (!headerQuotas) return;
+  return {
+    snapshot_at: new Date().toISOString(),
+    balance: {
+      credits_remaining_usd: headerQuotas.allowanceRemainingUsd,
+      total_credits_usd: 0,
+      credits_used_usd: 0,
+      accounting_method: "token",
+    },
+    usage: {
+      lifetime: { cost_usd: 0, requests: 0, tokens: 0, energy_kwh: 0 },
+      current_month: { cost_usd: 0, requests: 0, tokens: 0, energy_kwh: 0 },
+    },
+    limits: { overage_limit_usd: null, rate_limit_tier: "standard" },
+    subscription:
+      headerQuotas.subscriptionPlan !== "none" &&
+      headerQuotas.energyRemaining !== undefined
+        ? {
+            plan: headerQuotas.subscriptionPlan,
+            status: "active",
+            billing_interval: "month",
+            current_period_start: "",
+            current_period_end: "",
+            auto_renew: false,
+            kwh_included: headerQuotas.energyIncluded ?? 0,
+            kwh_used: headerQuotas.energyUsed ?? 0,
+            kwh_remaining: headerQuotas.energyRemaining,
+            in_overage: false,
+          }
+        : null,
+    key: { name: "", allowance: null },
+  };
+}
+export async function fetchRequestedQuotas(
+  data: unknown,
+): Promise<NeuralwattQuotas | undefined> {
+  if (!data || typeof data !== "object") return;
+  const { authStorage } = data as { authStorage?: AuthStorage };
+  if (!authStorage) return;
+  const apiKey = await getNeuralwattApiKey(authStorage);
+  if (!apiKey) return;
+  const result = await fetchQuotas(apiKey);
+  if (!result.success) return;
+  return result.data.quotas;
+}

package/src/extensions/quota-warnings/index.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import type {
   ExtensionAPI,
   ExtensionContext,
-} from "@mariozechner/pi-coding-agent";
+} from "@earendil-works/pi-coding-agent";
 import {
   configLoader,
   NEURALWATT_CONFIG_UPDATED_EVENT,

package/src/extensions/quota-warnings/notifier.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
+import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
 import type { NeuralwattQuotas } from "../../types/quota-api";
 import { formatKwh, formatUsd } from "../../utils/quota-format";

package/src/extensions/sub-bar-integration/index.ts CHANGED Viewed

@@ -3,7 +3,7 @@ import type {
   ExtensionAPI,
   ExtensionContext,
   Theme,
-} from "@mariozechner/pi-coding-agent";
+} from "@earendil-works/pi-coding-agent";
 import {
   configLoader,
   NEURALWATT_CONFIG_UPDATED_EVENT,

package/src/lib/env.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { AuthStorage } from "@mariozechner/pi-coding-agent";
+import type { AuthStorage } from "@earendil-works/pi-coding-agent";
 const PROVIDER_ID = "neuralwatt";

package/src/lib/fetch-models.ts ADDED Viewed

@@ -0,0 +1,187 @@
+import type { NeuralwattModelConfig } from "../extensions/provider/models";
+const FETCH_TIMEOUT_MS = 15_000;
+const NEURALWATT_BINARY_THINKING_LEVEL_MAP = {
+  minimal: null,
+  low: null,
+  medium: "medium",
+  high: null,
+  xhigh: null,
+} as const;
+const GPT_OSS_THINKING_LEVEL_MAP = {
+  minimal: "low",
+  low: "low",
+  medium: "medium",
+  high: "high",
+  xhigh: null,
+} as const;
+export interface ApiModelMetadata {
+  display_name: string;
+  description: string | null;
+  provider: string;
+  huggingface_id: string | null;
+  pricing: {
+    input_per_million: number;
+    output_per_million: number;
+    cached_input_per_million: number | null;
+    cached_output_per_million: number | null;
+    currency: string;
+    pricing_tbd: boolean;
+  };
+  capabilities: {
+    tools: boolean;
+    json_mode: boolean;
+    vision: boolean;
+    reasoning: boolean;
+    reasoning_effort: boolean;
+    streaming: boolean;
+    system_role: boolean;
+    developer_role: boolean;
+  };
+  limits: {
+    max_context_length: number;
+    max_output_tokens: number | null;
+    max_images: number | null;
+  };
+  deprecated: boolean;
+  deprecated_message: string | null;
+}
+export interface ApiModel {
+  id: string;
+  object: string;
+  created: number;
+  owned_by: string;
+  root?: string;
+  parent?: string | null;
+  max_model_len: number;
+  metadata?: ApiModelMetadata;
+}
+export interface ApiResponse {
+  object: "list";
+  data: ApiModel[];
+}
+/** Identify fast variants by their owned_by field or naming convention. */
+function isFastModel(model: ApiModel): boolean {
+  if (model.owned_by === "neuralwatt") return true;
+  return model.id.endsWith("-fast");
+}
+/** Map API model data to NeuralwattModelConfig. */
+export function mapApiModel(model: ApiModel): NeuralwattModelConfig {
+  const meta = model.metadata;
+  const fast = isFastModel(model);
+  // Base fields from top-level API data
+  const result: NeuralwattModelConfig = {
+    id: model.id,
+    name: meta?.display_name ?? model.id,
+    reasoning: meta?.capabilities.reasoning ?? false,
+    contextWindow: model.max_model_len,
+    maxTokens: 65536, // sensible default
+    cost: {
+      input: meta?.pricing.input_per_million ?? 0,
+      output: meta?.pricing.output_per_million ?? 0,
+      cacheRead: meta?.pricing.cached_input_per_million ?? 0,
+      cacheWrite: meta?.pricing.cached_output_per_million ?? 0,
+    },
+    input: meta?.capabilities.vision ? ["text", "image"] : ["text"],
+    compat: {
+      supportsDeveloperRole: false,
+      maxTokensField: "max_tokens",
+    },
+  };
+  if (fast) {
+    result.fast = true;
+  }
+  // Override maxTokens from limits if available
+  if (meta?.limits.max_output_tokens) {
+    result.maxTokens = meta.limits.max_output_tokens;
+  }
+  if (result.reasoning) {
+    result.thinkingLevelMap =
+      model.id === "openai/gpt-oss-20b"
+        ? GPT_OSS_THINKING_LEVEL_MAP
+        : NEURALWATT_BINARY_THINKING_LEVEL_MAP;
+  }
+  return result;
+}
+export type FetchModelsResult =
+  | { success: true; models: NeuralwattModelConfig[] }
+  | {
+      success: false;
+      error: { message: string; kind: "timeout" | "network" | "cancelled" };
+    };
+/**
+ * Fetch live model definitions from the Neuralwatt /v1/models endpoint.
+ *
+ * When the API returns metadata (pricing, capabilities, limits), those values
+ * are used directly. Fields not exposed by the API fall back to sensible
+ * defaults.
+ */
+export async function fetchModels(
+  signal?: AbortSignal,
+): Promise<FetchModelsResult> {
+  const signals: AbortSignal[] = [AbortSignal.timeout(FETCH_TIMEOUT_MS)];
+  if (signal) signals.push(signal);
+  const combined = AbortSignal.any(signals);
+  try {
+    const response = await fetch("https://api.neuralwatt.com/v1/models", {
+      headers: {
+        Referer: "https://pi.dev",
+        "X-Title": "npm:@aliou/pi-neuralwatt",
+      },
+      signal: combined,
+    });
+    if (!response.ok) {
+      return {
+        success: false,
+        error: {
+          message: `Failed to fetch models: ${response.status} ${response.statusText}`,
+          kind: "network",
+        },
+      };
+    }
+    const data: ApiResponse = await response.json();
+    // Filter out deprecated models
+    const active = data.data.filter(
+      (m) => !m.metadata?.deprecated && !m.metadata?.pricing.pricing_tbd,
+    );
+    const models = active.map(mapApiModel);
+    return { success: true, models };
+  } catch (err: unknown) {
+    if (err instanceof DOMException && err.name === "AbortError") {
+      if (
+        combined.reason instanceof DOMException &&
+        combined.reason.name === "TimeoutError"
+      ) {
+        return {
+          success: false,
+          error: { message: "Fetch models timed out", kind: "timeout" },
+        };
+      }
+      return {
+        success: false,
+        error: { message: "Fetch models cancelled", kind: "cancelled" },
+      };
+    }
+    const message = err instanceof Error ? err.message : "Unknown error";
+    return { success: false, error: { message, kind: "network" } };
+  }
+}

package/src/utils/is-offline.test.ts ADDED Viewed

@@ -0,0 +1,60 @@
+import { describe, expect, it } from "vitest";
+import { isOffline } from "./is-offline";
+describe("isOffline", () => {
+  it("returns true when PI_OFFLINE is 1", () => {
+    const original = process.env.PI_OFFLINE;
+    process.env.PI_OFFLINE = "1";
+    expect(isOffline()).toBe(true);
+    process.env.PI_OFFLINE = original;
+  });
+  it("returns true when PI_OFFLINE is true", () => {
+    const original = process.env.PI_OFFLINE;
+    process.env.PI_OFFLINE = "true";
+    expect(isOffline()).toBe(true);
+    process.env.PI_OFFLINE = original;
+  });
+  it("returns true when PI_OFFLINE is yes", () => {
+    const original = process.env.PI_OFFLINE;
+    process.env.PI_OFFLINE = "yes";
+    expect(isOffline()).toBe(true);
+    process.env.PI_OFFLINE = original;
+  });
+  it("returns false when PI_OFFLINE is unset", () => {
+    const original = process.env.PI_OFFLINE;
+    delete process.env.PI_OFFLINE;
+    expect(isOffline()).toBe(false);
+    process.env.PI_OFFLINE = original;
+  });
+  it("returns false when PI_OFFLINE is 0", () => {
+    const original = process.env.PI_OFFLINE;
+    process.env.PI_OFFLINE = "0";
+    expect(isOffline()).toBe(false);
+    process.env.PI_OFFLINE = original;
+  });
+  it("returns false when PI_OFFLINE is false", () => {
+    const original = process.env.PI_OFFLINE;
+    process.env.PI_OFFLINE = "false";
+    expect(isOffline()).toBe(false);
+    process.env.PI_OFFLINE = original;
+  });
+  it("returns false when PI_OFFLINE is no", () => {
+    const original = process.env.PI_OFFLINE;
+    process.env.PI_OFFLINE = "no";
+    expect(isOffline()).toBe(false);
+    process.env.PI_OFFLINE = original;
+  });
+  it("returns false for other values", () => {
+    const original = process.env.PI_OFFLINE;
+    process.env.PI_OFFLINE = "maybe";
+    expect(isOffline()).toBe(false);
+    process.env.PI_OFFLINE = original;
+  });
+});

package/src/utils/is-offline.ts ADDED Viewed

@@ -0,0 +1,4 @@
+export function isOffline(): boolean {
+  const value = process.env.PI_OFFLINE;
+  return value === "1" || value === "true" || value === "yes";
+}

package/src/utils/quota-bar.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { Theme } from "@mariozechner/pi-coding-agent";
+import type { Theme } from "@earendil-works/pi-coding-agent";
 export type Severity = "success" | "warning" | "error";