npm - pi-llama-cpp - Versions diffs - 0.3.3 → 0.4.0 - Mend

pi-llama-cpp 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +8 -5
package/package.json +1 -1
package/src/commands/models.ts +3 -1
package/src/events.ts +5 -2
package/src/index.ts +6 -20
package/src/interfaces/endpoints/models.ts +6 -0
package/src/manager.ts +87 -0
package/src/models/baseModel.ts +8 -27
package/src/models/routerModel.ts +11 -8
package/src/models/singleModel.ts +7 -12
package/src/tools/retriever.ts +1 -1
package/tests/commandManager.test.ts +122 -0
package/tests/handlers.test.ts +7 -2
package/tests/modelsCommand.test.ts +134 -0
package/tests/routerModel.test.ts +18 -82
package/tests/singleModel.test.ts +11 -35
package/src/tools/provider.ts +0 -28

package/README.md CHANGED Viewed

@@ -99,17 +99,20 @@ llama-server --model path/to/model.gguf ...
 ```
 The extension determines the context size as follows:
-- **Router mode** — when loaded, reads `meta.n_ctx` from the `/models` endpoint; when not loaded, reads `--ctx-size` and/or `--fit-ctx` from the model's status `args` array
+- **Router mode**
+  - When loaded, reads `meta.n_ctx` from the `/models` endpoint
+  - When not loaded, reads `--ctx-size` and/or `--fit-ctx` from the server arguments, or `ctx-size` and/or `fit-ctx` keys from the **presets.ini** file.
 - **Single mode** — reads `meta.n_ctx` from the `/models` endpoint
 - Falls back to `128000` if not available
 ### Commands
-| Command   | Description                                                                                |
-| --------- | ------------------------------------------------------------------------------------------ |
-| `/models` | Browse your models with live status. Select a model to load, switch, or unload it.         |
+| Command          | Description                                                                                |
+| ---------------- | ------------------------------------------------------------------------------------------ |
+| `/models`        | Browse your models with live status. Select a model to load, switch, or unload it.         |
+| `/models info`   | Show detailed information for all available models at once.                                |
-> **Note:** When the llama.cpp server is unreachable, `/models` is still available but shows the description `Llama.cpp models (offline)` and displays an error notification with the configured server URL.
+> **Note:** When the llama.cpp server is unreachable, `/models` displays an error notification with the configured server URL.
 ### Model Actions

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-llama-cpp",
-  "version": "0.3.3",
+  "version": "0.4.0",
   "description": "Pi extension for llama.cpp integration. Supports both router and single modes.",
   "keywords": [
     "pi",

package/src/commands/models.ts CHANGED Viewed

@@ -130,8 +130,10 @@ export const notFoundCommand = async (
 /**
  * Handles the /models command
  *
+ * @param args Arguments passed to the command
  * @param ctx The context used by Pi
  * @param pi The Pi extension
+ * @param models List of available models
  */
 export const modelsCommand = async (
   ctx: ExtensionCommandContext,
@@ -173,7 +175,7 @@ export const modelsCommand = async (
       }
       if ((await model.getStatus()) === Status.FAILED) {
-        throw new Error("Failed to load model");
+        throw new Error(`Failed to load model  ${model.name}`);
       }
       await pi.setModel(piModel);

package/src/events.ts CHANGED Viewed

@@ -18,6 +18,9 @@ export const onModelSelect = async (
   const model = models.find((m) => m.id === event.model.id);
   if (!model) return;
-  ctx.ui.notify(`>> Loading ${model.id}...`, "info");
-  await model.load();
+  ctx.ui.notify(`Loading ${model.name}...`, "info");
+  await model
+    .load()
+    .then(() => ctx.ui.notify(`Model ${model.name} ready`, "info"))
+    .catch(() => ctx.ui.notify(`Failed to load model ${model.name}`, "error"));
 };

package/src/index.ts CHANGED Viewed

@@ -2,33 +2,19 @@ import type {
   ExtensionAPI,
   ExtensionCommandContext,
 } from "@earendil-works/pi-coding-agent";
-import { modelsCommand, notFoundCommand } from "./commands/models";
 import { PROVIDER_NAME } from "./constants";
 import { onModelSelect } from "./events";
-import { registerLlamaCppProvider } from "./tools/provider";
-import { isServerReady } from "./tools/retriever";
+import { CommandManager } from "./manager";
 export default async function (pi: ExtensionAPI) {
-  // Server verification
-  if (!(await isServerReady())) {
-    pi.registerCommand("models", {
-      description: `${PROVIDER_NAME} models (offline)`,
-      handler: async (_: string, ctx: ExtensionCommandContext) => {
-        await notFoundCommand(ctx);
-      },
-    });
-    return;
-  }
-  // Provider registration
-  const serverModels = await registerLlamaCppProvider(pi);
+  const manager = new CommandManager(pi);
+  await manager.initialize();
   // Command: /models
   pi.registerCommand("models", {
-    description: `Browse ${PROVIDER_NAME} models (live status)`,
-    handler: async (_: string, ctx: ExtensionCommandContext) =>
-      await modelsCommand(ctx, pi, serverModels),
+    description: `Browse ${PROVIDER_NAME} models`,
+    handler: async (args: string, ctx: ExtensionCommandContext) =>
+      await manager.run(args, ctx),
   });
   // Events registration

package/src/interfaces/endpoints/models.ts CHANGED Viewed

@@ -39,6 +39,7 @@ export interface DataProperty {
   owned_by: string;
   created: number;
   status?: StatusProperty;
+  architecture?: ArchitectureProperty;
   meta?: MetaProperty;
 }
@@ -50,6 +51,11 @@ interface StatusProperty {
   failed?: boolean;
 }
+interface ArchitectureProperty {
+  input_modalities: ("text" | "image" | "audio")[];
+  output_modalities: ["text"];
+}
 interface MetaProperty {
   vocab_type: number;
   n_vocab: number;

package/src/manager.ts ADDED Viewed

@@ -0,0 +1,87 @@
+import type {
+  ExtensionAPI,
+  ExtensionCommandContext,
+  ProviderModelConfig,
+} from "@earendil-works/pi-coding-agent";
+import { modelsCommand, notFoundCommand } from "./commands/models";
+import {
+  DEFAULT_LLAMA_SERVER_URL,
+  PROVIDER_ID,
+  PROVIDER_NAME,
+} from "./constants";
+import { BaseModel } from "./models/baseModel";
+import { resolveApiKey, resolveUrl } from "./tools/resolver";
+import { isServerReady, listModels } from "./tools/retriever";
+export class CommandManager {
+  private baseUrl: string = DEFAULT_LLAMA_SERVER_URL;
+  private serverModels: BaseModel[] = [];
+  constructor(private readonly pi: ExtensionAPI) {}
+  /**
+   * Sets up the initial state of the provider
+   */
+  async initialize() {
+    if (await isServerReady()) {
+      await this.update();
+    } else {
+      await this.register([]);
+    }
+  }
+  /**
+   * Ensures the models are up-to-date with the server
+   */
+  async update() {
+    this.baseUrl = `${await resolveUrl(process.cwd())}`;
+    this.serverModels = await listModels();
+    const modelConfigs = await Promise.all(
+      this.serverModels.map((m) => m.toProviderConfig()),
+    );
+    await this.register(modelConfigs);
+  }
+  /**
+   * Registers the provider in Pi with the given configurations
+   * Note: Registrations overload previous provider
+   *
+   * @param models Provider configurations for the models
+   */
+  async register(models: ProviderModelConfig[]) {
+    this.pi.registerProvider(PROVIDER_ID, {
+      name: PROVIDER_NAME,
+      baseUrl: this.baseUrl,
+      api: "openai-completions",
+      apiKey: await resolveApiKey(),
+      models,
+    });
+  }
+  /**
+   * Dispatches the /models command
+   *
+   * @param args Arguments passed to the command
+   * @param ctx The context used by Pi
+   * @param pi The Pi extension
+   * @returns A command handler
+   */
+  async run(args: string, ctx: ExtensionCommandContext) {
+    if (!(await isServerReady())) {
+      return await notFoundCommand(ctx);
+    }
+    // Command: `/models info`
+    if (args === "info") {
+      const info = await Promise.all(this.serverModels.map((m) => m.getInfo()));
+      const message = ctx.ui.theme.fg("accent", info.join("\n"));
+      ctx.ui.notify(message, "info");
+      return;
+    }
+    // Command: `/models`
+    return await modelsCommand(ctx, this.pi, this.serverModels);
+  }
+}

package/src/models/baseModel.ts CHANGED Viewed

@@ -1,10 +1,5 @@
 import type { ProviderModelConfig } from "@earendil-works/pi-coding-agent";
-import {
-  DEFAULT_CTX,
-  MAX_TOKENS,
-  POLLING_INTERVAL,
-  POLLING_TIMEOUT,
-} from "../constants";
+import { MAX_TOKENS, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
 import { Mode } from "../enums/mode";
 import { Status } from "../enums/status";
 import { DataProperty, ModelsEndpoint } from "../interfaces/endpoints/models";
@@ -55,17 +50,7 @@ export abstract class BaseModel {
    *
    * @returns An array of capabilities, as expected by Pi
    */
-  async getCapabilities(): Promise<["text"] | ["image"]> {
-    try {
-      const { modalities } = await rpc<PropsEndpoint>(
-        `/props?model=${this.id}`,
-      );
-      return modalities.vision ? ["image"] : ["text"];
-    } catch {
-      return ["text"];
-    }
-  }
+  abstract getCapabilities(): Promise<("text" | "image")[]>;
   /**
    * Gets the load status of the model
@@ -75,7 +60,7 @@ export abstract class BaseModel {
   public async getStatus(): Promise<Status> {
     try {
       const { is_sleeping, error } = await rpc<PropsEndpoint>(
-        `/props?model=${this.id}`,
+        `/props?model=${this.id}&autoload=false`,
       );
       if (is_sleeping) return Status.SLEEPING;
@@ -96,15 +81,10 @@ export abstract class BaseModel {
    * @returns The detected context size
    */
   async getContextSize(): Promise<number> {
-    try {
-      const { data } = await rpc<ModelsEndpoint>(`/models`);
-      const model = data.find((d) => d.id === this.id);
+    const { data } = await rpc<ModelsEndpoint>("/models");
+    const { n_ctx } = data.find((m) => m.id === this.id)?.meta!;
-      const response = model?.meta?.n_ctx;
-      return response ?? DEFAULT_CTX;
-    } catch {
-      return DEFAULT_CTX;
-    }
+    return n_ctx;
   }
   /**
@@ -157,7 +137,8 @@ export abstract class BaseModel {
    * Loads the model in llama-server
    */
   async load(): Promise<void> {
-    if ((await this.getStatus()) === Status.LOADED) return;
+    const status = await this.getStatus();
+    if (status === Status.LOADED || status === Status.SLEEPING) return;
     await rpc("/models/load", { model: this.id });
     await this.pollStatus();

package/src/models/routerModel.ts CHANGED Viewed

@@ -50,7 +50,7 @@ export class RouterModel extends BaseModel {
     // Grab the glitch
     while (Date.now() - startTime <= limit) {
       try {
-        await rpc<PropsEndpoint>(`/props?model=${this.id}`);
+        await rpc<PropsEndpoint>(`/props?model=${this.id}&autoload=false`);
         break;
       } catch {
         elapsed += POLLING_INTERVAL;
@@ -62,14 +62,17 @@ export class RouterModel extends BaseModel {
     return await super.pollStatus(startTime, timeout);
   }
-  async getCapabilities(): Promise<["text"] | ["image"]> {
-    // We can get the real capabilities if the model is already loaded
-    if ((await this.getStatus()) === Status.LOADED) {
-      return super.getCapabilities();
-    }
+  async getCapabilities(): Promise<("text" | "image")[]> {
+    const { data } = await rpc<ModelsEndpoint>(`/models`);
+    const model = data.find((d) => d.id === this.id);
+    if (!model) return ["text"];
-    const hasImage = this.model.status?.args?.includes("--mmproj") ?? false;
-    return hasImage ? ["image"] : ["text"];
+    const { input_modalities } = model.architecture!;
+    const response = input_modalities.filter(
+      (mod) => mod === "text" || mod === "image",
+    );
+    return response;
   }
   async getContextSize(): Promise<number> {

package/src/models/singleModel.ts CHANGED Viewed

@@ -1,6 +1,5 @@
-import { DEFAULT_CTX } from "../constants";
 import { Mode } from "../enums/mode";
-import { PropsEndpoint } from "../interfaces/endpoints/props";
+import { ModelsEndpoint } from "../interfaces/endpoints/models";
 import { rpc } from "../tools/retriever";
 import { BaseModel } from "./baseModel";
@@ -9,15 +8,11 @@ export class SingleModel extends BaseModel {
     return Mode.SINGLE;
   }
-  async getContextSize(): Promise<number> {
-    try {
-      const { default_generation_settings } = await rpc<PropsEndpoint>(
-        `/props?model=${this.id}`,
-      );
-      const { n_ctx } = default_generation_settings;
-      return n_ctx;
-    } catch {
-      return DEFAULT_CTX;
-    }
+  async getCapabilities(): Promise<("text" | "image")[]> {
+    const { models } = await rpc<ModelsEndpoint>(`/models`);
+    const [model] = models!;
+    const hasImage = model.capabilities.includes("multimodal");
+    return hasImage ? ["text", "image"] : ["text"];
   }
 }

package/src/tools/retriever.ts CHANGED Viewed

@@ -28,7 +28,7 @@ export const isServerReady = async (): Promise<boolean> => {
 export const rpc = async <T>(
   endpoint: string,
   body?: Record<string, unknown>,
-) => {
+): Promise<T> => {
   const base = await resolveUrl(process.cwd());
   const url = `${base}${endpoint}`;

package/tests/commandManager.test.ts ADDED Viewed

@@ -0,0 +1,122 @@
+import { describe, expect, it, vi, beforeEach } from "vitest";
+import { CommandManager } from "../src/manager";
+import { PROVIDER_ID, PROVIDER_NAME } from "../src/constants";
+// Mock modules at top level (vi.mock is hoisted)
+vi.mock("../src/tools/retriever", () => ({
+  isServerReady: vi.fn(),
+  listModels: vi.fn(),
+}));
+vi.mock("../src/tools/resolver", () => ({
+  resolveUrl: vi.fn(),
+  resolveApiKey: vi.fn(),
+}));
+// Import mocked functions after vi.mock
+import { isServerReady, listModels } from "../src/tools/retriever";
+import { resolveUrl, resolveApiKey } from "../src/tools/resolver";
+const mockPi = {
+  registerProvider: vi.fn(),
+};
+beforeEach(() => {
+  vi.clearAllMocks();
+  (resolveUrl as any).mockResolvedValue("http://127.0.0.1:8080");
+  (resolveApiKey as any).mockResolvedValue("test-key");
+});
+describe("CommandManager", () => {
+  it("should register empty models when server is not ready", async () => {
+    (isServerReady as any).mockResolvedValue(false);
+    const manager = new CommandManager(mockPi as any);
+    await manager.initialize();
+    expect(mockPi.registerProvider).toHaveBeenCalledWith(PROVIDER_ID, {
+      name: PROVIDER_NAME,
+      baseUrl: "http://127.0.0.1:8080",
+      api: "openai-completions",
+      apiKey: "test-key",
+      models: [],
+    });
+  });
+  it("should update and register models when server is ready", async () => {
+    const mockModel = {
+      name: "test-model",
+      id: "test-model",
+      toProviderConfig: vi.fn().mockResolvedValue({ id: "test-model", maxTokens: 32000 }),
+    };
+    (isServerReady as any).mockResolvedValue(true);
+    (listModels as any).mockResolvedValue([mockModel]);
+    const manager = new CommandManager(mockPi as any);
+    await manager.initialize();
+    expect(resolveUrl).toHaveBeenCalledWith(expect.any(String));
+    expect(listModels).toHaveBeenCalled();
+    expect(mockPi.registerProvider).toHaveBeenCalledWith(PROVIDER_ID, {
+      name: PROVIDER_NAME,
+      baseUrl: "http://127.0.0.1:8080",
+      api: "openai-completions",
+      apiKey: "test-key",
+      models: [{ id: "test-model", maxTokens: 32000 }],
+    });
+  });
+  it("should call notFoundCommand when server is not ready in run()", async () => {
+    (isServerReady as any).mockResolvedValue(false);
+    const manager = new CommandManager(mockPi as any);
+    await manager.run("", { ui: { notify: vi.fn() } } as any);
+    expect(mockPi.registerProvider).not.toHaveBeenCalled();
+  });
+  it("should show info for all models when args is 'info'", async () => {
+    const mockModel = {
+      name: "test-model",
+      id: "test-model",
+      getInfo: vi.fn().mockResolvedValue("Model info for test-model"),
+      toProviderConfig: vi.fn().mockResolvedValue({ id: "test-model" }),
+    };
+    (isServerReady as any).mockResolvedValue(true);
+    (listModels as any).mockResolvedValue([mockModel]);
+    const notifyFn = vi.fn();
+    const manager = new CommandManager(mockPi as any);
+    await manager.initialize();
+    await manager.run("info", {
+      ui: { notify: notifyFn, theme: { fg: (_c: string, t: string) => t } },
+    } as any);
+    expect(notifyFn).toHaveBeenCalledWith(
+      "Model info for test-model",
+      "info",
+    );
+    expect(listModels).toHaveBeenCalledOnce();
+  });
+  it("should dispatch modelsCommand when args is empty", async () => {
+    const mockModel = {
+      name: "test-model",
+      id: "test-model",
+      getLabel: vi.fn().mockResolvedValue("test-model"),
+      toProviderConfig: vi.fn().mockResolvedValue({ id: "test-model" }),
+    };
+    (isServerReady as any).mockResolvedValue(true);
+    (listModels as any).mockResolvedValue([mockModel]);
+    const selectFn = vi.fn().mockReturnValue(null); // cancel immediately
+    const manager = new CommandManager(mockPi as any);
+    await manager.initialize();
+    await manager.run("", {
+      ui: { notify: vi.fn(), select: selectFn },
+    } as any);
+    // modelsCommand was called (select is invoked for model picking)
+    expect(selectFn).toHaveBeenCalled();
+  });
+});

package/tests/handlers.test.ts CHANGED Viewed

@@ -62,7 +62,12 @@ const getActionsForModel = async (model: TestModel): Promise<Array<Action>> => {
     [Status.LOADED]: [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL],
     [Status.LOADING]: [Action.INFO, Action.CANCEL],
     [Status.FAILED]: [Action.RETRY, Action.CANCEL],
-    [Status.SLEEPING]: [Action.UNLOAD, Action.INFO, Action.CANCEL],
+    [Status.SLEEPING]: [
+      Action.SWITCH,
+      Action.UNLOAD,
+      Action.INFO,
+      Action.CANCEL,
+    ],
     [Status.UNLOADED]: [Action.LOAD, Action.CANCEL],
   };
@@ -106,7 +111,7 @@ describe("Action availability", () => {
     {
       mode: Mode.ROUTER,
       status: Status.SLEEPING,
-      expected: [Action.UNLOAD, Action.INFO, Action.CANCEL],
+      expected: [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL],
     },
     {
       mode: Mode.ROUTER,

package/tests/modelsCommand.test.ts ADDED Viewed

@@ -0,0 +1,134 @@
+import { describe, expect, it, vi } from "vitest";
+import { modelsCommand } from "../src/commands/models";
+import { Action } from "../src/enums/action";
+import { Mode } from "../src/enums/mode";
+import { Status } from "../src/enums/status";
+import { BaseModel } from "../src/models/baseModel";
+// Mock the retriever module
+vi.mock("../src/tools/retriever", () => ({
+  rpc: vi.fn(),
+  isServerReady: vi.fn(),
+  listModels: vi.fn(),
+}));
+// Helper to create a mock BaseModel
+const createMockModel = (
+  name: string,
+  overrides: Partial<BaseModel> = {},
+): BaseModel =>
+  ({
+    name,
+    id: name,
+    mode: Mode.ROUTER,
+    capabilities: ["text"] as ["text"],
+    getStatus: vi.fn().mockResolvedValue(Status.LOADED),
+    getContextSize: vi.fn().mockResolvedValue(4096),
+    getInfo: vi.fn().mockResolvedValue(`Model: ${name}\nID: ${name}`),
+    load: vi.fn().mockResolvedValue(undefined),
+    unload: vi.fn().mockResolvedValue(undefined),
+    toProviderConfig: vi.fn().mockResolvedValue({}),
+    getLabel: vi.fn().mockResolvedValue(name),
+    ...overrides,
+  }) as unknown as BaseModel;
+const createMockCtx = (
+  selectFn: (prompt: string, options: string[]) => string | null,
+) => ({
+  cwd: "/tmp/test",
+  ui: {
+    select: vi.fn(selectFn),
+    notify: vi.fn(),
+    theme: {
+      fg: (color: string, text: string) => text,
+    },
+  },
+  modelRegistry: {
+    find: vi.fn().mockReturnValue({ id: "test-model-id" }),
+  },
+});
+const createMockPi = () => ({
+  setModel: vi.fn(),
+  registerProvider: vi.fn(),
+});
+describe("modelsCommand", () => {
+  it("should return early on cancel (null model selection)", async () => {
+    const models = [createMockModel("model-a")];
+    const ctx = createMockCtx(() => null);
+    const pi = createMockPi();
+    await modelsCommand(ctx as any, pi as any, models);
+    expect(ctx.ui.notify).not.toHaveBeenCalled();
+  });
+  it("should show info when INFO action is selected", async () => {
+    const model = createMockModel("model-a");
+    const models = [model];
+    const ctx = createMockCtx((prompt) => {
+      if (prompt.includes("models")) return "model-a";
+      return Action.INFO;
+    });
+    const pi = createMockPi();
+    await modelsCommand(ctx as any, pi as any, models);
+    expect(ctx.ui.notify).toHaveBeenCalledWith(
+      "Model: model-a\nID: model-a",
+      "info",
+    );
+  });
+  it("should unload model when UNLOAD action is selected", async () => {
+    const model = createMockModel("model-a");
+    const models = [model];
+    const ctx = createMockCtx((prompt) => {
+      if (prompt.includes("models")) return "model-a";
+      return Action.UNLOAD;
+    });
+    const pi = createMockPi();
+    await modelsCommand(ctx as any, pi as any, models);
+    expect(model.unload).toHaveBeenCalled();
+    expect(ctx.ui.notify).toHaveBeenCalledWith("Unloaded model-a", "info");
+  });
+  it("should load model when LOAD action is selected", async () => {
+    const loadFn = vi.fn().mockResolvedValue(undefined);
+    const model = createMockModel("model-a");
+    (model.load as any) = loadFn;
+    (model.getStatus as any).mockResolvedValue(Status.UNLOADED);
+    const models = [model];
+    const ctx = createMockCtx((prompt) => {
+      if (prompt.includes("models")) return "model-a";
+      return Action.LOAD;
+    });
+    const pi = createMockPi();
+    await modelsCommand(ctx as any, pi as any, models);
+    await vi.waitFor(() => expect(loadFn).toHaveBeenCalled());
+    await vi.waitFor(() => expect(pi.setModel).toHaveBeenCalled());
+  });
+  it("should loop back to model selection when action is cancelled", async () => {
+    const model = createMockModel("model-a");
+    const models = [model];
+    let selectCallCount = 0;
+    const ctx = createMockCtx(() => {
+      selectCallCount++;
+      // 1st: select model-a, 2nd: cancel action, 3rd: cancel model => exit
+      if (selectCallCount === 1) return "model-a";
+      return null;
+    });
+    const pi = createMockPi();
+    await modelsCommand(ctx as any, pi as any, models);
+    expect(ctx.ui.select).toHaveBeenCalledTimes(3);
+    expect(ctx.ui.notify).not.toHaveBeenCalled();
+  });
+});

package/tests/routerModel.test.ts CHANGED Viewed

@@ -154,7 +154,7 @@ describe("RouterModel context size extraction", () => {
     expect(ctxSize).toBe(4096);
   });
-  it("should return DEFAULT_CTX when no context size args are present and loaded", async () => {
+  it("should return n_ctx from meta when loaded without context size args", async () => {
     // First call: getStatus() -> /models
     mockRpc.mockResolvedValueOnce({
       data: [
@@ -168,17 +168,16 @@ describe("RouterModel context size extraction", () => {
         },
       ],
     });
-    // Second call: super.getContextSize() -> /models without meta.n_ctx
+    // Second call: super.getContextSize() -> /models with meta.n_ctx
     mockRpc.mockResolvedValueOnce({
       data: [
         {
           id: "test-model",
+          meta: { n_ctx: 4096 },
         },
       ],
     });
-    const { DEFAULT_CTX } = await import("../src/constants");
     const model = new RouterModel(
       createModel({
         status: {
@@ -190,13 +189,12 @@ describe("RouterModel context size extraction", () => {
     );
     const ctxSize = await model.getContextSize();
-    expect(ctxSize).toBe(DEFAULT_CTX);
+    expect(ctxSize).toBe(4096);
   });
 });
 describe("RouterModel capabilities detection", () => {
-  it("should detect image capability when modalities.vision is true", async () => {
-    // getStatus() calls /models first
+  it("should detect image capability from architecture.input_modalities", async () => {
     mockRpc.mockResolvedValueOnce({
       data: [
         {
@@ -207,21 +205,22 @@ describe("RouterModel capabilities detection", () => {
             preset: "default",
             failed: false,
           },
+          architecture: {
+            input_modalities: ["text", "image"],
+            output_modalities: ["text"],
+          },
         },
       ],
     });
-    // super.getCapabilities() calls /props?model=<id>
-    mockRpc.mockResolvedValueOnce({ modalities: { vision: true } });
     const model = new RouterModel(createModel());
     const capabilities = await model.getCapabilities();
-    expect(capabilities).toEqual(["image"]);
-    expect(mockRpc).toHaveBeenCalledWith("/props?model=test-model");
+    expect(capabilities).toEqual(["text", "image"]);
+    expect(mockRpc).toHaveBeenCalledWith("/models");
   });
-  it("should detect text-only capability when modalities.vision is false", async () => {
-    // getStatus() calls /models first
+  it("should detect text-only capability when only text in input_modalities", async () => {
     mockRpc.mockResolvedValueOnce({
       data: [
         {
@@ -232,11 +231,13 @@ describe("RouterModel capabilities detection", () => {
             preset: "default",
             failed: false,
           },
+          architecture: {
+            input_modalities: ["text"],
+            output_modalities: ["text"],
+          },
         },
       ],
     });
-    // super.getCapabilities() calls /props?model=<id>
-    mockRpc.mockResolvedValueOnce({ modalities: { vision: false } });
     const model = new RouterModel(createModel());
     const capabilities = await model.getCapabilities();
@@ -244,12 +245,11 @@ describe("RouterModel capabilities detection", () => {
     expect(capabilities).toEqual(["text"]);
   });
-  it("should default to text when /props call fails", async () => {
-    // getStatus() calls /models first
+  it("should return text when model not found in /models response", async () => {
     mockRpc.mockResolvedValueOnce({
       data: [
         {
-          id: "test-model",
+          id: "other-model",
           status: {
             value: "loaded",
             args: [],
@@ -259,76 +259,12 @@ describe("RouterModel capabilities detection", () => {
         },
       ],
     });
-    // super.getCapabilities() calls /props?model=<id> which fails
-    mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
     const model = new RouterModel(createModel());
     const capabilities = await model.getCapabilities();
     expect(capabilities).toEqual(["text"]);
   });
-  it("should use status.args to detect image capability when not loaded", async () => {
-    // getStatus() calls /models first, returns unloaded
-    mockRpc.mockResolvedValueOnce({
-      data: [
-        {
-          id: "test-model",
-          status: {
-            value: "unloaded",
-            args: ["--model", "gguf", "--mmproj", "mmproj.gguf"],
-            preset: "default",
-            failed: false,
-          },
-        },
-      ],
-    });
-    const model = new RouterModel(
-      createModel({
-        status: {
-          value: "unloaded",
-          args: ["--model", "gguf", "--mmproj", "mmproj.gguf"],
-          preset: "default",
-          failed: false,
-        },
-      }),
-    );
-    const capabilities = await model.getCapabilities();
-    expect(capabilities).toEqual(["image"]);
-  });
-  it("should return text when not loaded and no --mmproj in args", async () => {
-    // getStatus() calls /models first, returns unloaded
-    mockRpc.mockResolvedValueOnce({
-      data: [
-        {
-          id: "test-model",
-          status: {
-            value: "unloaded",
-            args: ["--model", "gguf"],
-            preset: "default",
-            failed: false,
-          },
-        },
-      ],
-    });
-    const model = new RouterModel(
-      createModel({
-        status: {
-          value: "unloaded",
-          args: ["--model", "gguf"],
-          preset: "default",
-          failed: false,
-        },
-      }),
-    );
-    const capabilities = await model.getCapabilities();
-    expect(capabilities).toEqual(["text"]);
-  });
 });
 describe("RouterModel mode", () => {

package/tests/singleModel.test.ts CHANGED Viewed

@@ -1,5 +1,4 @@
 import { beforeEach, describe, expect, it, vi } from "vitest";
-import { DEFAULT_CTX } from "../src/constants";
 import { Mode } from "../src/enums/mode";
 import { Status } from "../src/enums/status";
 import { ModelProperty } from "../src/interfaces/endpoints/models";
@@ -34,27 +33,22 @@ describe("SingleModel mode", () => {
 });
 describe("SingleModel capabilities", () => {
-  it("should detect image capability when modalities.vision is true", async () => {
-    mockRpc.mockResolvedValueOnce({ modalities: { vision: true } });
-    const model = createModel();
-    const capabilities = await model.getCapabilities();
-    expect(capabilities).toEqual(["image"]);
-    expect(mockRpc).toHaveBeenCalledWith("/props?model=test");
-  });
-  it("should detect text-only capability when modalities.vision is false", async () => {
-    mockRpc.mockResolvedValueOnce({ modalities: { vision: false } });
+  it("should detect image capability when multimodal is in capabilities", async () => {
+    mockRpc.mockResolvedValueOnce({
+      models: [{ id: "test", capabilities: ["multimodal"] }],
+    });
     const model = createModel();
     const capabilities = await model.getCapabilities();
-    expect(capabilities).toEqual(["text"]);
+    expect(capabilities).toEqual(["text", "image"]);
+    expect(mockRpc).toHaveBeenCalledWith("/models");
   });
-  it("should return text when /props call fails", async () => {
-    mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
+  it("should detect text-only capability when multimodal is not in capabilities", async () => {
+    mockRpc.mockResolvedValueOnce({
+      models: [{ id: "test", capabilities: [] }],
+    });
     const model = createModel();
     const capabilities = await model.getCapabilities();
@@ -71,7 +65,7 @@ describe("SingleModel getStatus", () => {
     const status = await model.getStatus();
     expect(status).toBe(Status.LOADED);
-    expect(mockRpc).toHaveBeenCalledWith(`/props?model=${model.id}`);
+    expect(mockRpc).toHaveBeenCalledWith(`/props?model=${model.id}&autoload=false`);
   });
   it("should return SLEEPING when is_sleeping is true", async () => {
@@ -96,22 +90,4 @@ describe("SingleModel getContextSize", () => {
     expect(ctxSize).toBe(8192);
     expect(mockRpc).toHaveBeenCalledWith("/models");
   });
-  it("should return DEFAULT_CTX when model not found in /models", async () => {
-    mockRpc.mockResolvedValueOnce({ data: [] });
-    const model = createModel();
-    const ctxSize = await model.getContextSize();
-    expect(ctxSize).toBe(DEFAULT_CTX);
-  });
-  it("should return DEFAULT_CTX when /models fails", async () => {
-    mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
-    const model = createModel();
-    const ctxSize = await model.getContextSize();
-    expect(ctxSize).toBe(DEFAULT_CTX);
-  });
 });

package/src/tools/provider.ts DELETED Viewed

@@ -1,28 +0,0 @@
-import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
-import { PROVIDER_ID, PROVIDER_NAME } from "../constants";
-import type { BaseModel } from "../models/baseModel";
-import { resolveApiKey, resolveUrl } from "./resolver";
-import { listModels } from "./retriever";
-/**
- * Registers the Llama.cpp provider and returns the fetched models.
- *
- * @param pi The Pi extension API
- * @returns The list of models fetched from the server
- */
-export const registerLlamaCppProvider = async (
-  pi: ExtensionAPI,
-): Promise<BaseModel[]> => {
-  const baseUrl = `${await resolveUrl(process.cwd())}/v1`;
-  const models = await listModels();
-  pi.registerProvider(PROVIDER_ID, {
-    name: PROVIDER_NAME,
-    baseUrl,
-    api: "openai-completions",
-    apiKey: await resolveApiKey(),
-    models: await Promise.all(models.map((m) => m.toProviderConfig())),
-  });
-  return models;
-};