npm - pi-llama-cpp - Versions diffs - 0.3.1 → 0.3.3 - Mend

pi-llama-cpp 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json +1 -1
package/src/commands/models.ts +6 -1
package/src/interfaces/endpoints/props.ts +7 -4
package/src/models/baseModel.ts +32 -5
package/src/models/routerModel.ts +37 -5
package/src/models/singleModel.ts +11 -7
package/tests/routerModel.test.ts +59 -9
package/tests/singleModel.test.ts +8 -31

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-llama-cpp",
-  "version": "0.3.1",
+  "version": "0.3.3",
   "description": "Pi extension for llama.cpp integration. Supports both router and single modes.",
   "keywords": [
     "pi",

package/src/commands/models.ts CHANGED Viewed

@@ -38,7 +38,12 @@ const getActionsForModel = async (model: BaseModel): Promise<Array<Action>> => {
     [Status.LOADED]: [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL],
     [Status.LOADING]: [Action.INFO, Action.CANCEL],
     [Status.FAILED]: [Action.RETRY, Action.CANCEL],
-    [Status.SLEEPING]: [Action.UNLOAD, Action.INFO, Action.CANCEL],
+    [Status.SLEEPING]: [
+      Action.SWITCH,
+      Action.UNLOAD,
+      Action.INFO,
+      Action.CANCEL,
+    ],
     [Status.UNLOADED]: [Action.LOAD, Action.CANCEL],
   };

package/src/interfaces/endpoints/props.ts CHANGED Viewed

@@ -1,11 +1,8 @@
 /**
  * The structure of llama-server's /props endpoint
- *
- * In single mode, applies to /props
- * In router mode, applies to /props?model=<id>
  */
 export interface PropsEndpoint {
+  error?: PropsError;
   default_generation_settings: Record<string, any>;
   total_slots: number;
   model_alias: string;
@@ -27,3 +24,9 @@ export interface PropsEndpoint {
   build_info: string;
   is_sleeping: boolean;
 }
+export interface PropsError {
+  code: number;
+  message: string;
+  type: string;
+}

package/src/models/baseModel.ts CHANGED Viewed

@@ -69,11 +69,31 @@ export abstract class BaseModel {
   /**
    * Gets the load status of the model
+   *
+   * @returns The current status
    */
-  abstract getStatus(): Promise<Status>;
+  public async getStatus(): Promise<Status> {
+    try {
+      const { is_sleeping, error } = await rpc<PropsEndpoint>(
+        `/props?model=${this.id}`,
+      );
+      if (is_sleeping) return Status.SLEEPING;
+      if (!error) return Status.LOADED;
+      if (error.code === 503) return Status.LOADING;
+      if (error.code === 400 && error.message === "model is not loaded")
+        return Status.UNLOADED;
+      return Status.FAILED;
+    } catch (err) {
+      return Status.FAILED;
+    }
+  }
   /**
    * Gets the context size of a particular model
+   *
+   * @returns The detected context size
    */
   async getContextSize(): Promise<number> {
     try {
@@ -116,6 +136,7 @@ export abstract class BaseModel {
   /**
    * Converts the llama-server model into a configuration object used by Pi
+   *
    * @returns A Pi configuration object
    */
   async toProviderConfig(): Promise<ProviderModelConfig> {
@@ -153,15 +174,21 @@ export abstract class BaseModel {
    * Polls llama-server to check when the model is loaded
    *
    * @param startTime The initial polling timestamp
+   * @param timeout The maximum amount of ms before timeout. Defaults to POLLING_TIMEOUT
+   * @param interval The polling interval. Defaults to POLLING_INTERVAL
    */
-  async pollStatus(startTime = Date.now()): Promise<void> {
+  async pollStatus(
+    startTime: number = Date.now(),
+    timeout: number = POLLING_TIMEOUT,
+    interval: number = POLLING_INTERVAL,
+  ): Promise<void> {
     while ((await this.getStatus()) === Status.LOADING) {
       // Force a timeout if we wasted too much time polling
-      if (Date.now() - startTime > POLLING_TIMEOUT) {
-        const message = `Model loading timed out after ${POLLING_TIMEOUT} ms: ${this.id}`;
+      if (Date.now() - startTime > timeout) {
+        const message = `Model loading timed out after ${timeout} ms: ${this.id}`;
         throw new Error(message);
       }
-      await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
+      await new Promise((r) => setTimeout(r, interval));
     }
   }
 }

package/src/models/routerModel.ts CHANGED Viewed

@@ -1,7 +1,8 @@
-import { DEFAULT_CTX } from "../constants";
+import { DEFAULT_CTX, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
 import { Mode } from "../enums/mode";
 import { Status } from "../enums/status";
 import { ModelsEndpoint } from "../interfaces/endpoints/models";
+import { PropsEndpoint } from "../interfaces/endpoints/props";
 import { rpc } from "../tools/retriever";
 import { BaseModel } from "./baseModel";
@@ -21,15 +22,46 @@ export class RouterModel extends BaseModel {
     if (!model) return Status.FAILED;
     const status = this.statusMapper[model.status!.value];
-    if (status === Status.UNLOADED) {
-      if (this.model.status!.failed) return Status.FAILED;
-      return Status.UNLOADED;
+    if (status === Status.UNLOADED || status === Status.LOADING) {
+      return super.getStatus();
     }
     return status;
   }
+  /**
+   * Workaround for the currently-bugged /models status detection
+   * (I suspect it was introduced in PR #22683 of llama.cpp)
+   *
+   * When a model is loaded for the very first time,
+   * this workaround will try to poll to /props instead of /models
+   * for up to 5 seconds to try to detect if the model is really loading,
+   * or if it definitely failed.
+   *
+   * The tradeoff is that we'll have to wait for 5 seconds
+   * while the model is "loading", while not really loading.
+   *
+   * In exchange, it will allow unloaded models to be correctly shown as "unloaded".
+   */
+  async pollStatus(startTime = Date.now()): Promise<void> {
+    let elapsed = 0;
+    const limit = 5000;
+    // Grab the glitch
+    while (Date.now() - startTime <= limit) {
+      try {
+        await rpc<PropsEndpoint>(`/props?model=${this.id}`);
+        break;
+      } catch {
+        elapsed += POLLING_INTERVAL;
+        await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
+      }
+    }
+    const timeout = POLLING_TIMEOUT - elapsed;
+    return await super.pollStatus(startTime, timeout);
+  }
   async getCapabilities(): Promise<["text"] | ["image"]> {
     // We can get the real capabilities if the model is already loaded
     if ((await this.getStatus()) === Status.LOADED) {

package/src/models/singleModel.ts CHANGED Viewed

@@ -1,5 +1,5 @@
+import { DEFAULT_CTX } from "../constants";
 import { Mode } from "../enums/mode";
-import { Status } from "../enums/status";
 import { PropsEndpoint } from "../interfaces/endpoints/props";
 import { rpc } from "../tools/retriever";
 import { BaseModel } from "./baseModel";
@@ -9,11 +9,15 @@ export class SingleModel extends BaseModel {
     return Mode.SINGLE;
   }
-  async getStatus(): Promise<Status> {
-    // In single-mode, the extension will only work when the model is fully loaded
-    const { is_sleeping } = await rpc<PropsEndpoint>("/props");
-    if (is_sleeping) return Status.SLEEPING;
-    return Status.LOADED;
+  async getContextSize(): Promise<number> {
+    try {
+      const { default_generation_settings } = await rpc<PropsEndpoint>(
+        `/props?model=${this.id}`,
+      );
+      const { n_ctx } = default_generation_settings;
+      return n_ctx;
+    } catch {
+      return DEFAULT_CTX;
+    }
   }
 }

package/tests/routerModel.test.ts CHANGED Viewed

@@ -115,7 +115,18 @@ describe("RouterModel context size extraction", () => {
       data: [
         {
           id: "test-model",
-          status: { value: "loaded", args: ["--model", "gguf", "--ctx-size", "4096", "--fit-ctx", "8192"], preset: "default" },
+          status: {
+            value: "loaded",
+            args: [
+              "--model",
+              "gguf",
+              "--ctx-size",
+              "4096",
+              "--fit-ctx",
+              "8192",
+            ],
+            preset: "default",
+          },
         },
       ],
     });
@@ -149,7 +160,11 @@ describe("RouterModel context size extraction", () => {
       data: [
         {
           id: "test-model",
-          status: { value: "loaded", args: ["--model", "gguf"], preset: "default" },
+          status: {
+            value: "loaded",
+            args: ["--model", "gguf"],
+            preset: "default",
+          },
         },
       ],
     });
@@ -186,7 +201,12 @@ describe("RouterModel capabilities detection", () => {
       data: [
         {
           id: "test-model",
-          status: { value: "loaded", args: [], preset: "default", failed: false },
+          status: {
+            value: "loaded",
+            args: [],
+            preset: "default",
+            failed: false,
+          },
         },
       ],
     });
@@ -206,7 +226,12 @@ describe("RouterModel capabilities detection", () => {
       data: [
         {
           id: "test-model",
-          status: { value: "loaded", args: [], preset: "default", failed: false },
+          status: {
+            value: "loaded",
+            args: [],
+            preset: "default",
+            failed: false,
+          },
         },
       ],
     });
@@ -225,7 +250,12 @@ describe("RouterModel capabilities detection", () => {
       data: [
         {
           id: "test-model",
-          status: { value: "loaded", args: [], preset: "default", failed: false },
+          status: {
+            value: "loaded",
+            args: [],
+            preset: "default",
+            failed: false,
+          },
         },
       ],
     });
@@ -244,14 +274,24 @@ describe("RouterModel capabilities detection", () => {
       data: [
         {
           id: "test-model",
-          status: { value: "unloaded", args: ["--model", "gguf", "--mmproj", "mmproj.gguf"], preset: "default", failed: false },
+          status: {
+            value: "unloaded",
+            args: ["--model", "gguf", "--mmproj", "mmproj.gguf"],
+            preset: "default",
+            failed: false,
+          },
         },
       ],
     });
     const model = new RouterModel(
       createModel({
-        status: { value: "unloaded", args: ["--model", "gguf", "--mmproj", "mmproj.gguf"], preset: "default", failed: false },
+        status: {
+          value: "unloaded",
+          args: ["--model", "gguf", "--mmproj", "mmproj.gguf"],
+          preset: "default",
+          failed: false,
+        },
       }),
     );
     const capabilities = await model.getCapabilities();
@@ -265,14 +305,24 @@ describe("RouterModel capabilities detection", () => {
       data: [
         {
           id: "test-model",
-          status: { value: "unloaded", args: ["--model", "gguf"], preset: "default", failed: false },
+          status: {
+            value: "unloaded",
+            args: ["--model", "gguf"],
+            preset: "default",
+            failed: false,
+          },
         },
       ],
     });
     const model = new RouterModel(
       createModel({
-        status: { value: "unloaded", args: ["--model", "gguf"], preset: "default", failed: false },
+        status: {
+          value: "unloaded",
+          args: ["--model", "gguf"],
+          preset: "default",
+          failed: false,
+        },
       }),
     );
     const capabilities = await model.getCapabilities();

package/tests/singleModel.test.ts CHANGED Viewed

@@ -18,36 +18,13 @@ beforeEach(() => {
 });
 const createModel = (extra: Partial<ModelProperty> = {}): SingleModel =>
-  new SingleModel(
-    {
-      id: "test",
-      tags: [],
-      object: "model",
-      owned_by: "test",
-      created: Date.now(),
-    },
-    {
-      name: "test",
-      model: "test.gguf",
-      modified_at: new Date().toISOString(),
-      size: "1B",
-      digest: "abc123",
-      type: "model",
-      description: "test",
-      tags: [],
-      capabilities: [],
-      parameters: "",
-      details: {
-        parent_model: "",
-        format: "",
-        family: "",
-        families: [],
-        parameter_size: "",
-        quantization_level: "",
-      },
-      ...extra,
-    },
-  );
+  new SingleModel({
+    id: "test",
+    tags: [],
+    object: "model",
+    owned_by: "test",
+    created: Date.now(),
+  });
 describe("SingleModel mode", () => {
   it("should always return SINGLE mode", () => {
@@ -94,7 +71,7 @@ describe("SingleModel getStatus", () => {
     const status = await model.getStatus();
     expect(status).toBe(Status.LOADED);
-    expect(mockRpc).toHaveBeenCalledWith("/props");
+    expect(mockRpc).toHaveBeenCalledWith(`/props?model=${model.id}`);
   });
   it("should return SLEEPING when is_sleeping is true", async () => {