npm - pi-llama-cpp - Versions diffs - 0.3.0 → 0.3.2 - Mend

pi-llama-cpp 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/package.json +1 -1
package/src/commands/models.ts +7 -2
package/src/interfaces/endpoints/props.ts +7 -4
package/src/models/baseModel.ts +20 -1
package/src/models/routerModel.ts +42 -1
package/src/models/singleModel.ts +0 -11
package/src/tools/resolver.ts +5 -5
package/src/tools/retriever.ts +8 -11
package/tests/routerModel.test.ts +59 -9
package/tests/singleModel.test.ts +8 -31

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-llama-cpp",
-  "version": "0.3.0",
+  "version": "0.3.2",
   "description": "Pi extension for llama.cpp integration. Supports both router and single modes.",
   "keywords": [
     "pi",

package/src/commands/models.ts CHANGED Viewed

@@ -31,14 +31,19 @@ const selectModel = async (
  * Get available actions for a model based on its mode and status.
  *
  * @param model The selected model
- * @returns
+ * @returns The array of available actions for the given model status
  */
 const getActionsForModel = async (model: BaseModel): Promise<Array<Action>> => {
   const routerModeActions: Record<Status, Array<Action>> = {
     [Status.LOADED]: [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL],
     [Status.LOADING]: [Action.INFO, Action.CANCEL],
     [Status.FAILED]: [Action.RETRY, Action.CANCEL],
-    [Status.SLEEPING]: [Action.UNLOAD, Action.INFO, Action.CANCEL],
+    [Status.SLEEPING]: [
+      Action.SWITCH,
+      Action.UNLOAD,
+      Action.INFO,
+      Action.CANCEL,
+    ],
     [Status.UNLOADED]: [Action.LOAD, Action.CANCEL],
   };

package/src/interfaces/endpoints/props.ts CHANGED Viewed

@@ -1,11 +1,8 @@
 /**
  * The structure of llama-server's /props endpoint
- *
- * In single mode, applies to /props
- * In router mode, applies to /props?model=<id>
  */
 export interface PropsEndpoint {
+  error?: PropsError;
   default_generation_settings: Record<string, any>;
   total_slots: number;
   model_alias: string;
@@ -27,3 +24,9 @@ export interface PropsEndpoint {
   build_info: string;
   is_sleeping: boolean;
 }
+export interface PropsError {
+  code: number;
+  message: string;
+  type: string;
+}

package/src/models/baseModel.ts CHANGED Viewed

@@ -11,6 +11,11 @@ import { DataProperty, ModelsEndpoint } from "../interfaces/endpoints/models";
 import { PropsEndpoint } from "../interfaces/endpoints/props";
 import { rpc } from "../tools/retriever";
+/**
+ * Abstract base class for llama-server models.
+ * Provides common functionality for model identification, status checking,
+ * loading/unloading, and configuration conversion.
+ */
 export abstract class BaseModel {
   constructor(protected readonly model: DataProperty) {}
@@ -65,7 +70,21 @@ export abstract class BaseModel {
   /**
    * Gets the load status of the model
    */
-  abstract getStatus(): Promise<Status>;
+  public async getStatus(): Promise<Status> {
+    try {
+      const { is_sleeping, error } = await rpc<PropsEndpoint>(
+        `/props?model=${this.id}`,
+      );
+      if (is_sleeping) return Status.SLEEPING;
+      if (!error) return Status.LOADED;
+      if (error.code === 503) return Status.LOADING;
+      return Status.UNLOADED;
+    } catch (err) {
+      return Status.FAILED;
+    }
+  }
   /**
    * Gets the context size of a particular model

package/src/models/routerModel.ts CHANGED Viewed

@@ -2,9 +2,15 @@ import { DEFAULT_CTX } from "../constants";
 import { Mode } from "../enums/mode";
 import { Status } from "../enums/status";
 import { ModelsEndpoint } from "../interfaces/endpoints/models";
+import { PropsEndpoint } from "../interfaces/endpoints/props";
 import { rpc } from "../tools/retriever";
 import { BaseModel } from "./baseModel";
+/**
+ * Represents a model in llama-server router mode.
+ * Tracks per-model status from the /models endpoint and extracts
+ * context size from startup arguments when the model is not loaded.
+ */
 export class RouterModel extends BaseModel {
   get mode(): Mode {
     return Mode.ROUTER;
@@ -17,7 +23,20 @@ export class RouterModel extends BaseModel {
     const status = this.statusMapper[model.status!.value];
     if (status === Status.UNLOADED) {
-      if (this.model.status!.failed) return Status.FAILED;
+      if (this.model.status!.failed) {
+        /**
+         * Workaround for the currently-bugged /models status detection
+         * (I suspect it was introduced in PR #22683 of llama.cpp)
+         *
+         * This workaround will show an eternal "loading" status when the model's real status
+         * is "failed", which is acceptable, because models in "failed" or "loading" status
+         * shouldn't be used.
+         *
+         * In exchange, it will allow unloaded models to be correctly shown as "unloaded".
+         */
+        // return Status.FAILED;  // <-- Original implementation
+        return await super.getStatus();
+      }
       return Status.UNLOADED;
     }
@@ -25,6 +44,28 @@ export class RouterModel extends BaseModel {
     return status;
   }
+  /**
+   * Workaround for the currently-bugged /models status detection
+   * (I suspect it was introduced in PR #22683 of llama.cpp)
+   *
+   * @returns The detected status
+   */
+  private async getStatusWorkaround(): Promise<Status> {
+    try {
+      const { is_sleeping, error } = await rpc<PropsEndpoint>(
+        `/props?model=${this.id}`,
+      );
+      if (is_sleeping) return Status.SLEEPING;
+      if (!error) return Status.LOADED;
+      if (error.code === 503) return Status.LOADING;
+      return Status.UNLOADED;
+    } catch (err) {
+      return Status.FAILED;
+    }
+  }
   async getCapabilities(): Promise<["text"] | ["image"]> {
     // We can get the real capabilities if the model is already loaded
     if ((await this.getStatus()) === Status.LOADED) {

package/src/models/singleModel.ts CHANGED Viewed

@@ -1,19 +1,8 @@
 import { Mode } from "../enums/mode";
-import { Status } from "../enums/status";
-import { PropsEndpoint } from "../interfaces/endpoints/props";
-import { rpc } from "../tools/retriever";
 import { BaseModel } from "./baseModel";
 export class SingleModel extends BaseModel {
   get mode(): Mode {
     return Mode.SINGLE;
   }
-  async getStatus(): Promise<Status> {
-    // In single-mode, the extension will only work when the model is fully loaded
-    const { is_sleeping } = await rpc<PropsEndpoint>("/props");
-    if (is_sleeping) return Status.SLEEPING;
-    return Status.LOADED;
-  }
 }

package/src/tools/resolver.ts CHANGED Viewed

@@ -25,9 +25,9 @@ const fileExists = async (filePath: string): Promise<boolean> => {
 };
 /**
- * Reads the contents of a file as JSON
- * @param filePath The path
- * @returns The content as JSON
+ * Reads and parses the contents of a file as JSON
+ * @param filePath The path to the file
+ * @returns The parsed content, or null if parsing fails
  */
 const readContents = async <T>(filePath: string): Promise<T | null> => {
   const raw = await readFile(filePath, "utf-8");
@@ -41,10 +41,10 @@ const readContents = async <T>(filePath: string): Promise<T | null> => {
 };
 /**
- * Reads a string value from a JSON config file
+ * Reads a value from a JSON config file by key
  * @param filePath Path to the JSON config file
  * @param key Key to extract from the parsed JSON
- * @returns The string value, or null if file/key missing or invalid
+ * @returns The value at the given key, or null if file/key missing or invalid
  */
 const readConfigValue = async <T>(
   filePath: string,

package/src/tools/retriever.ts CHANGED Viewed

@@ -19,10 +19,11 @@ export const isServerReady = async (): Promise<boolean> => {
 };
 /**
- * Extracts the data of a fetch command
- * @param endpoint The endpoint to fetch from
- * @param body The body (optional)
- * @returns Data from the fetch command
+ * Makes an HTTP request to the llama-server and returns the parsed JSON response
+ *
+ * @param endpoint The endpoint path to fetch (e.g. "/health")
+ * @param body The optional request body for POST requests
+ * @returns The parsed JSON response from the server
  */
 export const rpc = async <T>(
   endpoint: string,
@@ -46,11 +47,8 @@ export const rpc = async <T>(
     },
   });
-  if (!res.ok) {
-    const text = await res.text();
-    throw new Error(`${res.status}: ${text}`);
-  }
-  return res.json() as T;
+  const response: T = await res.json();
+  return response;
 };
 /**
@@ -62,8 +60,7 @@ export const listModels = async (): Promise<BaseModel[]> => {
   const { models, data } = await rpc<ModelsEndpoint>("/models");
   if (models) {
-    const [extra] = models;
-    return data.map((m) => new SingleModel(m, extra));
+    return data.map((m) => new SingleModel(m));
   }
   const response = data

package/tests/routerModel.test.ts CHANGED Viewed

@@ -115,7 +115,18 @@ describe("RouterModel context size extraction", () => {
       data: [
         {
           id: "test-model",
-          status: { value: "loaded", args: ["--model", "gguf", "--ctx-size", "4096", "--fit-ctx", "8192"], preset: "default" },
+          status: {
+            value: "loaded",
+            args: [
+              "--model",
+              "gguf",
+              "--ctx-size",
+              "4096",
+              "--fit-ctx",
+              "8192",
+            ],
+            preset: "default",
+          },
         },
       ],
     });
@@ -149,7 +160,11 @@ describe("RouterModel context size extraction", () => {
       data: [
         {
           id: "test-model",
-          status: { value: "loaded", args: ["--model", "gguf"], preset: "default" },
+          status: {
+            value: "loaded",
+            args: ["--model", "gguf"],
+            preset: "default",
+          },
         },
       ],
     });
@@ -186,7 +201,12 @@ describe("RouterModel capabilities detection", () => {
       data: [
         {
           id: "test-model",
-          status: { value: "loaded", args: [], preset: "default", failed: false },
+          status: {
+            value: "loaded",
+            args: [],
+            preset: "default",
+            failed: false,
+          },
         },
       ],
     });
@@ -206,7 +226,12 @@ describe("RouterModel capabilities detection", () => {
       data: [
         {
           id: "test-model",
-          status: { value: "loaded", args: [], preset: "default", failed: false },
+          status: {
+            value: "loaded",
+            args: [],
+            preset: "default",
+            failed: false,
+          },
         },
       ],
     });
@@ -225,7 +250,12 @@ describe("RouterModel capabilities detection", () => {
       data: [
         {
           id: "test-model",
-          status: { value: "loaded", args: [], preset: "default", failed: false },
+          status: {
+            value: "loaded",
+            args: [],
+            preset: "default",
+            failed: false,
+          },
         },
       ],
     });
@@ -244,14 +274,24 @@ describe("RouterModel capabilities detection", () => {
       data: [
         {
           id: "test-model",
-          status: { value: "unloaded", args: ["--model", "gguf", "--mmproj", "mmproj.gguf"], preset: "default", failed: false },
+          status: {
+            value: "unloaded",
+            args: ["--model", "gguf", "--mmproj", "mmproj.gguf"],
+            preset: "default",
+            failed: false,
+          },
         },
       ],
     });
     const model = new RouterModel(
       createModel({
-        status: { value: "unloaded", args: ["--model", "gguf", "--mmproj", "mmproj.gguf"], preset: "default", failed: false },
+        status: {
+          value: "unloaded",
+          args: ["--model", "gguf", "--mmproj", "mmproj.gguf"],
+          preset: "default",
+          failed: false,
+        },
       }),
     );
     const capabilities = await model.getCapabilities();
@@ -265,14 +305,24 @@ describe("RouterModel capabilities detection", () => {
       data: [
         {
           id: "test-model",
-          status: { value: "unloaded", args: ["--model", "gguf"], preset: "default", failed: false },
+          status: {
+            value: "unloaded",
+            args: ["--model", "gguf"],
+            preset: "default",
+            failed: false,
+          },
         },
       ],
     });
     const model = new RouterModel(
       createModel({
-        status: { value: "unloaded", args: ["--model", "gguf"], preset: "default", failed: false },
+        status: {
+          value: "unloaded",
+          args: ["--model", "gguf"],
+          preset: "default",
+          failed: false,
+        },
       }),
     );
     const capabilities = await model.getCapabilities();

package/tests/singleModel.test.ts CHANGED Viewed

@@ -18,36 +18,13 @@ beforeEach(() => {
 });
 const createModel = (extra: Partial<ModelProperty> = {}): SingleModel =>
-  new SingleModel(
-    {
-      id: "test",
-      tags: [],
-      object: "model",
-      owned_by: "test",
-      created: Date.now(),
-    },
-    {
-      name: "test",
-      model: "test.gguf",
-      modified_at: new Date().toISOString(),
-      size: "1B",
-      digest: "abc123",
-      type: "model",
-      description: "test",
-      tags: [],
-      capabilities: [],
-      parameters: "",
-      details: {
-        parent_model: "",
-        format: "",
-        family: "",
-        families: [],
-        parameter_size: "",
-        quantization_level: "",
-      },
-      ...extra,
-    },
-  );
+  new SingleModel({
+    id: "test",
+    tags: [],
+    object: "model",
+    owned_by: "test",
+    created: Date.now(),
+  });
 describe("SingleModel mode", () => {
   it("should always return SINGLE mode", () => {
@@ -94,7 +71,7 @@ describe("SingleModel getStatus", () => {
     const status = await model.getStatus();
     expect(status).toBe(Status.LOADED);
-    expect(mockRpc).toHaveBeenCalledWith("/props");
+    expect(mockRpc).toHaveBeenCalledWith(`/props?model=${model.id}`);
   });
   it("should return SLEEPING when is_sleeping is true", async () => {