npm - pi-llama-cpp - Versions diffs - 0.5.0 → 0.6.0 - Mend

pi-llama-cpp 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +58 -27
package/package.json +5 -4
package/src/constants.ts +9 -4
package/src/enums/action.ts +3 -2
package/src/enums/mode.ts +1 -0
package/src/enums/status.ts +1 -0
package/src/index.ts +33 -28
package/src/interfaces/auth.ts +1 -5
package/src/interfaces/endpoints/props.ts +1 -0
package/src/managers/command.ts +290 -0
package/src/managers/events.ts +63 -0
package/src/managers/server.ts +71 -0
package/src/models/baseModel.ts +68 -20
package/src/models/legacyModel.ts +45 -0
package/src/models/routerModel.ts +7 -30
package/src/models/singleModel.ts +9 -6
package/src/resolver.ts +123 -0
package/src/server.ts +171 -0
package/tests/commandManager.test.ts +182 -132
package/tests/legacyModel.test.ts +112 -0
package/tests/mocks.ts +97 -0
package/tests/resolver.test.ts +163 -104
package/tests/routerModel.test.ts +46 -68
package/tests/server.test.ts +175 -0
package/tests/serverManager.test.ts +117 -0
package/tests/singleModel.test.ts +21 -29
package/src/commands/models.ts +0 -228
package/src/events.ts +0 -26
package/src/manager.ts +0 -93
package/src/tools/resolver.ts +0 -141
package/src/tools/retriever.ts +0 -71
package/tests/handlers.test.ts +0 -164
package/tests/modelsCommand.test.ts +0 -270

package/tests/server.test.ts ADDED Viewed

@@ -0,0 +1,175 @@
+import { beforeEach, describe, expect, it } from "vitest";
+import { Server } from "../src/server";
+import { createMockServer, mockRpc } from "./mocks";
+beforeEach(() => {
+  mockRpc.mockClear();
+});
+describe("Server providerId", () => {
+  it("should generate a unique provider ID from baseUrl", () => {
+    const server = new Server("http://127.0.0.1:8080");
+    expect(server.providerId).toBe("llama-server=http://127.0.0.1:8080");
+  });
+  it("should generate different IDs for different baseUrls", () => {
+    const server1 = new Server("http://127.0.0.1:8080");
+    const server2 = new Server("http://127.0.0.1:8081");
+    expect(server1.providerId).not.toBe(server2.providerId);
+  });
+});
+describe("Server providerName", () => {
+  it("should generate a human-readable provider name", () => {
+    const server = new Server("http://127.0.0.1:8080");
+    expect(server.providerName).toBe("Llama.cpp (http://127.0.0.1:8080)");
+  });
+});
+describe("Server fetchModels", () => {
+  it("should call the /models endpoint", async () => {
+    mockRpc.mockResolvedValueOnce({
+      data: [{ id: "model1" }],
+      models: [{ id: "model1" }],
+      object: "list",
+    });
+    const server = createMockServer();
+    const result = await server.fetchModels();
+    expect(result).toEqual({
+      data: [{ id: "model1" }],
+      models: [{ id: "model1" }],
+      object: "list",
+    });
+    expect(mockRpc).toHaveBeenCalledWith("/v1/models");
+  });
+});
+describe("Server fetchModelProps", () => {
+  it("should call the /props endpoint with model id", async () => {
+    mockRpc.mockResolvedValueOnce({
+      is_sleeping: false,
+      default_generation_settings: {},
+      total_slots: 1,
+      model_alias: "test",
+      model_path: "/path/to/model.gguf",
+      modalities: { vision: false, audio: false },
+      media_marker: "",
+      endpoint_slots: false,
+      endpoint_props: false,
+      endpoint_metrics: false,
+      webui: false,
+      webui_settings: {},
+      chat_template: "",
+      chat_template_caps: {},
+      bos_token: "",
+      eos_token: "",
+      build_info: "",
+    });
+    const server = createMockServer();
+    const result = await server.fetchModelProps("test-model");
+    expect(result.is_sleeping).toBe(false);
+    expect(mockRpc).toHaveBeenCalledWith(
+      "/props?model=test-model&autoload=false",
+    );
+  });
+});
+describe("Server fetchServerHealth", () => {
+  it("should call the /health endpoint", async () => {
+    mockRpc.mockResolvedValueOnce({ status: "ok" });
+    const server = createMockServer();
+    const result = await server.fetchServerHealth();
+    expect(result).toEqual({ status: "ok" });
+    expect(mockRpc).toHaveBeenCalledWith("/health");
+  });
+});
+describe("Server fetchServerProps", () => {
+  it("should call the /props endpoint without model", async () => {
+    mockRpc.mockResolvedValueOnce({
+      role: "router",
+      default_generation_settings: {},
+      total_slots: 2,
+      model_alias: "",
+      model_path: "",
+      modalities: { vision: false, audio: false },
+      media_marker: "",
+      endpoint_slots: false,
+      endpoint_props: false,
+      endpoint_metrics: false,
+      webui: false,
+      webui_settings: {},
+      chat_template: "",
+      chat_template_caps: {},
+      bos_token: "",
+      eos_token: "",
+      build_info: "",
+      is_sleeping: false,
+    });
+    const server = createMockServer();
+    const result = await server.fetchServerProps();
+    expect(result.role).toBe("router");
+    expect(mockRpc).toHaveBeenCalledWith("/props?autoload=false");
+  });
+});
+describe("Server postRequest", () => {
+  it("should call /models/load with model in body", async () => {
+    mockRpc.mockResolvedValueOnce({});
+    const server = createMockServer();
+    await server.postRequest("load", "test-model");
+    expect(mockRpc).toHaveBeenCalledWith("/models/load", {
+      model: "test-model",
+    });
+  });
+  it("should call /models/unload with model in body", async () => {
+    mockRpc.mockResolvedValueOnce({});
+    const server = createMockServer();
+    await server.postRequest("unload", "test-model");
+    expect(mockRpc).toHaveBeenCalledWith("/models/unload", {
+      model: "test-model",
+    });
+  });
+});
+describe("Server isReady", () => {
+  it("should return true when health status is ok", async () => {
+    mockRpc.mockResolvedValueOnce({ status: "ok" });
+    const server = createMockServer();
+    const ready = await server.isReady();
+    expect(ready).toBe(true);
+  });
+  it("should return false when health check fails", async () => {
+    mockRpc.mockRejectedValueOnce(new Error("connection refused"));
+    const server = createMockServer();
+    const ready = await server.isReady();
+    expect(ready).toBe(false);
+  });
+  it("should return false when health status is not ok", async () => {
+    mockRpc.mockResolvedValueOnce({ status: "error" });
+    const server = createMockServer();
+    const ready = await server.isReady();
+    expect(ready).toBe(false);
+  });
+});

package/tests/serverManager.test.ts ADDED Viewed

@@ -0,0 +1,117 @@
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { ServerManager } from "../src/managers/server";
+import { BaseModel } from "../src/models/baseModel";
+import { Server } from "../src/server";
+import { createMockServer, mockRpc } from "./mocks";
+const mockPi = {
+  registerProvider: vi.fn(),
+  registerCommand: vi.fn(),
+  setModel: vi.fn(),
+};
+beforeEach(() => {
+  vi.clearAllMocks();
+  mockRpc.mockResolvedValue({});
+});
+describe("Server", () => {
+  it("should generate provider IDs from URLs", () => {
+    const server1 = new Server("http://127.0.0.1:8080");
+    expect(server1.providerId).toBe("llama-server=http://127.0.0.1:8080");
+    const server2 = new Server("http://10.0.0.5:8080");
+    expect(server2.providerId).toBe("llama-server=http://10.0.0.5:8080");
+    const server3 = new Server("http://127.0.0.1");
+    expect(server3.providerId).toBe("llama-server=http://127.0.0.1");
+    const server4 = new Server("http://127.0.0.1:80");
+    expect(server4.providerId).toBe("llama-server=http://127.0.0.1:80");
+    const server5 = new Server("https://127.0.0.1:443");
+    expect(server5.providerId).toBe("llama-server=https://127.0.0.1:443");
+  });
+  it("should generate provider names from URLs", () => {
+    const server1 = new Server("http://127.0.0.1:8080");
+    expect(server1.providerName).toBe("Llama.cpp (http://127.0.0.1:8080)");
+    const server2 = new Server("http://10.0.0.5:8080");
+    expect(server2.providerName).toBe("Llama.cpp (http://10.0.0.5:8080)");
+  });
+});
+describe("ServerManager", () => {
+  it("should register providers for all servers", async () => {
+    const mockModel = {
+      name: "test-model",
+      id: "test-model",
+      toProviderConfig: vi.fn().mockResolvedValue({ id: "test-model" }),
+    } as unknown as BaseModel;
+    mockRpc.mockResolvedValue({
+      data: [mockModel],
+      object: "list",
+    });
+    const server1 = createMockServer({
+      baseUrl: "http://127.0.0.1:8080",
+      apiKey: "key-1",
+      providerId: "llama-server=http://127.0.0.1:8080",
+      providerName: "Llama.cpp (http://127.0.0.1:8080)",
+    });
+    const server2 = createMockServer({
+      baseUrl: "http://127.0.0.1:8081",
+      apiKey: "key-2",
+      providerId: "llama-server=http://127.0.0.1:8081",
+      providerName: "Llama.cpp (http://127.0.0.1:8081)",
+    });
+    const manager = new ServerManager([server1, server2] as any);
+    await manager.registerAllProviders(mockPi as any);
+    expect(mockPi.registerProvider).toHaveBeenCalledTimes(2);
+    expect(mockPi.registerProvider).toHaveBeenCalledWith(
+      "llama-server=http://127.0.0.1:8080",
+      {
+        name: "Llama.cpp (http://127.0.0.1:8080)",
+        baseUrl: "http://127.0.0.1:8080",
+        api: "openai-completions",
+        apiKey: "key-1",
+        models: [{ id: "test-model" }],
+      },
+    );
+    expect(mockPi.registerProvider).toHaveBeenCalledWith(
+      "llama-server=http://127.0.0.1:8081",
+      {
+        name: "Llama.cpp (http://127.0.0.1:8081)",
+        baseUrl: "http://127.0.0.1:8081",
+        api: "openai-completions",
+        apiKey: "key-2",
+        models: [{ id: "test-model" }],
+      },
+    );
+  });
+  it("should return all models from all servers", () => {
+    const mockModel1 = {
+      name: "model-1",
+      id: "model-1",
+    } as unknown as BaseModel;
+    const mockModel2 = {
+      name: "model-2",
+      id: "model-2",
+    } as unknown as BaseModel;
+    const server1 = createMockServer({
+      baseUrl: "http://127.0.0.1:8080",
+    });
+    const server2 = createMockServer({
+      baseUrl: "http://127.0.0.1:8081",
+    });
+    const manager = new ServerManager([
+      { ...server1, models: [mockModel1] } as any,
+      { ...server2, models: [mockModel2] } as any,
+    ] as any);
+    const allModels = manager.getAllModels();
+    expect(allModels).toHaveLength(2);
+    expect(allModels[0]).toBe(mockModel1);
+    expect(allModels[1]).toBe(mockModel2);
+  });
+});

package/tests/singleModel.test.ts CHANGED Viewed

@@ -1,29 +1,26 @@
-import { beforeEach, describe, expect, it, vi } from "vitest";
+import { beforeEach, describe, expect, it } from "vitest";
 import { Mode } from "../src/enums/mode";
 import { Status } from "../src/enums/status";
-import { ModelProperty } from "../src/interfaces/endpoints/models";
+import { DataProperty } from "../src/interfaces/endpoints/models";
 import { SingleModel } from "../src/models/singleModel";
-const mockRpc = vi.fn();
-vi.mock("../src/tools/retriever", () => ({
-  rpc: (...args: unknown[]) => mockRpc(...args),
-  isServerReady: vi.fn(),
-  listModels: vi.fn(),
-}));
+import { createMockServer, mockRpc } from "./mocks";
 beforeEach(() => {
-  mockRpc.mockClear();
+  mockRpc.mockReset();
 });
-const createModel = (extra: Partial<ModelProperty> = {}): SingleModel =>
-  new SingleModel({
-    id: "test",
-    tags: [],
-    object: "model",
-    owned_by: "test",
-    created: Date.now(),
-  });
+const createModel = (extra: Partial<DataProperty> = {}): SingleModel =>
+  new SingleModel(
+    {
+      id: "test",
+      tags: [],
+      object: "model",
+      owned_by: "test",
+      created: Date.now(),
+      ...extra,
+    },
+    createMockServer(),
+  );
 describe("SingleModel mode", () => {
   it("should always return SINGLE mode", () => {
@@ -34,21 +31,16 @@ describe("SingleModel mode", () => {
 describe("SingleModel capabilities", () => {
   it("should detect image capability when multimodal is in capabilities", async () => {
-    mockRpc.mockResolvedValueOnce({
-      models: [{ id: "test", capabilities: ["multimodal"] }],
-    });
+    mockRpc.mockResolvedValueOnce({ modalities: { vision: true } });
     const model = createModel();
     const capabilities = await model.getCapabilities();
     expect(capabilities).toEqual(["text", "image"]);
-    expect(mockRpc).toHaveBeenCalledWith("/models");
   });
   it("should detect text-only capability when multimodal is not in capabilities", async () => {
-    mockRpc.mockResolvedValueOnce({
-      models: [{ id: "test", capabilities: [] }],
-    });
+    mockRpc.mockResolvedValueOnce({ modalities: { vision: false } });
     const model = createModel();
     const capabilities = await model.getCapabilities();
@@ -81,8 +73,8 @@ describe("SingleModel getStatus", () => {
 });
 describe("SingleModel getContextSize", () => {
-  it("should return n_ctx from /models endpoint meta", async () => {
-    mockRpc.mockResolvedValueOnce({
+  it("should return n_ctx from /v1/models endpoint meta", async () => {
+    mockRpc.mockResolvedValue({
       data: [{ id: "test", meta: { n_ctx: 8192 } }],
     });
@@ -90,6 +82,6 @@ describe("SingleModel getContextSize", () => {
     const ctxSize = await model.getContextSize();
     expect(ctxSize).toBe(8192);
-    expect(mockRpc).toHaveBeenCalledWith("/models");
+    expect(mockRpc).toHaveBeenCalledWith("/v1/models");
   });
 });

package/src/commands/models.ts DELETED Viewed

@@ -1,228 +0,0 @@
-import type {
-  ExtensionAPI,
-  ExtensionCommandContext,
-  ExtensionContext,
-  SessionBeforeSwitchEvent,
-} from "@earendil-works/pi-coding-agent";
-import { PROVIDER_ID, PROVIDER_NAME, READABLE_TIMEOUT } from "../constants";
-import { Action } from "../enums/action";
-import { Mode } from "../enums/mode";
-import { Status } from "../enums/status";
-import { BaseModel } from "../models/baseModel";
-import { resolveUrl } from "../tools/resolver";
-// In-flight model reference — handler gates on this.
-let inflightModel: BaseModel | null = null;
-export const resetInflightModel = () => (inflightModel = null);
-/**
- * Session-switch handler. Registered once at extension init.
- * Only notifies if a model load is actually in-flight.
- */
-export const onSessionBeforeSwitch = async (
-  _event: SessionBeforeSwitchEvent,
-  ctx: ExtensionContext,
-) => {
-  if (!inflightModel) return;
-  const messages = [
-    `Session change detected while model '${inflightModel.name}' was still loading.`,
-    "Model load will continue in the background, but UI might not update.",
-    "",
-    "Verify that your new model is loaded, or use /models to re-select it afterwards.",
-  ];
-  ctx.ui.notify(messages.join("\n"), "warning");
-  // Show the notification for a reasonable amount of time
-  await new Promise((r) => setTimeout(r, READABLE_TIMEOUT));
-};
-/**
- * Select a model from the list. Returns null if user cancels.
- *
- * @param ctx Pi context
- * @param models A list of models
- * @returns The selected model
- */
-const selectModel = async (
-  ctx: ExtensionCommandContext,
-  models: BaseModel[],
-): Promise<BaseModel | null> => {
-  const labels = await Promise.all(models.map((m) => m.getLabel()));
-  const choice = await ctx.ui.select(`${PROVIDER_NAME} models:`, labels);
-  if (!choice) return null;
-  const idx = labels.indexOf(choice);
-  return models[idx];
-};
-/**
- * Get available actions for a model based on its mode and status.
- *
- * @param model The selected model
- * @returns The array of available actions for the given model status
- */
-const getActionsForModel = async (model: BaseModel): Promise<Array<Action>> => {
-  const routerModeActions: Record<Status, Array<Action>> = {
-    [Status.LOADED]: [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL],
-    [Status.LOADING]: [Action.INFO, Action.CANCEL],
-    [Status.FAILED]: [Action.RETRY, Action.CANCEL],
-    [Status.SLEEPING]: [
-      Action.SWITCH,
-      Action.UNLOAD,
-      Action.INFO,
-      Action.CANCEL,
-    ],
-    [Status.UNLOADED]: [Action.LOAD, Action.CANCEL],
-  };
-  const singleModeActions: Record<Status, Array<Action>> = {
-    [Status.LOADED]: [Action.INFO, Action.CANCEL],
-    [Status.LOADING]: [Action.CANCEL],
-    [Status.FAILED]: [Action.CANCEL],
-    [Status.SLEEPING]: [Action.INFO, Action.CANCEL],
-    [Status.UNLOADED]: [Action.CANCEL],
-  };
-  const allActions =
-    model.mode === Mode.ROUTER ? routerModeActions : singleModeActions;
-  const status = await model.getStatus();
-  return allActions[status];
-};
-/**
- * Selects an action for a model.
- *
- * @param ctx Pi context
- * @param model The selected model
- * @param actions Possible actions to execute
- * @returns The action, or null if user cancels
- */
-const selectAction = async (
-  ctx: ExtensionCommandContext,
-  model: BaseModel,
-  actions: Array<Action>,
-): Promise<Action | null> => {
-  const labels = actions.map((a) => String(a));
-  const choice = await ctx.ui.select(`${model.name}`, labels);
-  if (!choice) return null;
-  const idx = labels.indexOf(choice);
-  return actions[idx];
-};
-/**
- * Handles the menu for model selection
- * Loops: select model → select action → handle action.
- *
- * Escape on actions menu goes back to model selection.
- * Escape on model selection exits.
- *
- * @param ctx Pi context
- * @returns The action and model, if detected
- */
-const modelSelectionHandler = async (
-  ctx: ExtensionCommandContext,
-  models: BaseModel[],
-): Promise<{ action: Action; model: BaseModel } | null> => {
-  while (true) {
-    // Select the model
-    const model = await selectModel(ctx, models);
-    if (!model) return null;
-    // Select the action
-    const actions = await getActionsForModel(model);
-    const action = await selectAction(ctx, model, actions);
-    if (action === null) {
-      // Escape key pressed => back to model selection
-      continue;
-    }
-    // Return the selected action and model
-    return { action, model };
-  }
-};
-/**
- * Handles the /models command when the server is unreachable.
- *
- * @param ctx The context used by Pi
- */
-export const notFoundCommand = async (
-  ctx: ExtensionCommandContext,
-): Promise<void> => {
-  const url = await resolveUrl(ctx.cwd);
-  ctx.ui.notify(`${PROVIDER_NAME} unreachable at ${url}`, "error");
-};
-/**
- * Handles the /models command
- *
- * @param args Arguments passed to the command
- * @param ctx The context used by Pi
- * @param pi The Pi extension
- * @param models List of available models
- */
-export const modelsCommand = async (
-  ctx: ExtensionCommandContext,
-  pi: ExtensionAPI,
-  models: BaseModel[],
-): Promise<void> => {
-  const event = await modelSelectionHandler(ctx, models);
-  if (!event) return;
-  // Detect the model
-  const { action, model } = event;
-  // Action: Cancel
-  if (!action || action === Action.CANCEL) return;
-  // Action: Info
-  if (action === Action.INFO) {
-    const info = await model.getInfo();
-    ctx.ui.notify(`${info}`, "info");
-    return;
-  }
-  // Action: Unload
-  if (action === Action.UNLOAD) {
-    await model.unload();
-    ctx.ui.notify(`Unloaded ${model.name}`, "info");
-    return;
-  }
-  // Actions: Load/Switch/Retry
-  const loadActions = [Action.LOAD, Action.SWITCH, Action.RETRY];
-  if (loadActions.includes(action)) {
-    ctx.ui.notify(`Loading ${model.name}...`, "info");
-    inflightModel = model;
-    const onSuccess = async () => {
-      const piModel = ctx.modelRegistry.find(PROVIDER_ID, model.id);
-      if (!piModel) {
-        throw new Error(`Cannot find model ${model.name} in pi registry`);
-      }
-      if ((await model.getStatus()) === Status.FAILED) {
-        throw new Error(`Failed to load model ${model.name}`);
-      }
-      await pi.setModel(piModel);
-      ctx.ui.notify(`Model ${model.name} ready`, "info");
-    };
-    const onFailure = (err: any) => {
-      const message = err instanceof Error ? err.message : String(err);
-      try {
-        ctx.ui.notify(message, "error");
-      } catch {
-        // ctx went stale between error and notification
-      }
-    };
-    // Load the model without blocking the UI
-    model.load().then(onSuccess).catch(onFailure).finally(resetInflightModel);
-  }
-};

package/src/events.ts DELETED Viewed

@@ -1,26 +0,0 @@
-import { ExtensionContext } from "@earendil-works/pi-coding-agent";
-import { PROVIDER_ID } from "./constants";
-import { ModelSelectEvent } from "./interfaces/events";
-import { listModels } from "./tools/retriever";
-/**
- * Reacts to a new model event triggered by Pi
- * @param event Model selection event
- * @param ctx Pi context
- */
-export const onModelSelect = async (
-  event: ModelSelectEvent,
-  ctx: ExtensionContext,
-) => {
-  if (event.model.provider !== PROVIDER_ID) return;
-  const models = await listModels();
-  const model = models.find((m) => m.id === event.model.id);
-  if (!model) return;
-  ctx.ui.notify(`Loading ${model.name}...`, "info");
-  await model
-    .load()
-    .then(() => ctx.ui.notify(`Model ${model.name} ready`, "info"))
-    .catch(() => ctx.ui.notify(`Failed to load model ${model.name}`, "error"));
-};