npm - pi-llama-cpp - Versions diffs - 0.3.2 → 0.3.4 - Mend

pi-llama-cpp 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json +1 -1
package/src/models/baseModel.ts +24 -11
package/src/models/routerModel.ts +28 -32
package/tests/routerModel.test.ts +2 -7
package/tests/singleModel.test.ts +4 -13

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-llama-cpp",
-  "version": "0.3.2",
+  "version": "0.3.4",
   "description": "Pi extension for llama.cpp integration. Supports both router and single modes.",
   "keywords": [
     "pi",

package/src/models/baseModel.ts CHANGED Viewed

@@ -7,7 +7,7 @@ import {
 } from "../constants";
 import { Mode } from "../enums/mode";
 import { Status } from "../enums/status";
-import { DataProperty, ModelsEndpoint } from "../interfaces/endpoints/models";
+import { DataProperty } from "../interfaces/endpoints/models";
 import { PropsEndpoint } from "../interfaces/endpoints/props";
 import { rpc } from "../tools/retriever";
@@ -69,6 +69,8 @@ export abstract class BaseModel {
   /**
    * Gets the load status of the model
+   *
+   * @returns The current status
    */
   public async getStatus(): Promise<Status> {
     try {
@@ -79,8 +81,10 @@ export abstract class BaseModel {
       if (is_sleeping) return Status.SLEEPING;
       if (!error) return Status.LOADED;
       if (error.code === 503) return Status.LOADING;
+      if (error.code === 400 && error.message === "model is not loaded")
+        return Status.UNLOADED;
-      return Status.UNLOADED;
+      return Status.FAILED;
     } catch (err) {
       return Status.FAILED;
     }
@@ -88,14 +92,16 @@ export abstract class BaseModel {
   /**
    * Gets the context size of a particular model
+   *
+   * @returns The detected context size
    */
   async getContextSize(): Promise<number> {
     try {
-      const { data } = await rpc<ModelsEndpoint>(`/models`);
-      const model = data.find((d) => d.id === this.id);
-      const response = model?.meta?.n_ctx;
-      return response ?? DEFAULT_CTX;
+      const { default_generation_settings } = await rpc<PropsEndpoint>(
+        `/props?model=${this.id}`,
+      );
+      const { n_ctx } = default_generation_settings;
+      return n_ctx;
     } catch {
       return DEFAULT_CTX;
     }
@@ -130,6 +136,7 @@ export abstract class BaseModel {
   /**
    * Converts the llama-server model into a configuration object used by Pi
+   *
    * @returns A Pi configuration object
    */
   async toProviderConfig(): Promise<ProviderModelConfig> {
@@ -167,15 +174,21 @@ export abstract class BaseModel {
    * Polls llama-server to check when the model is loaded
    *
    * @param startTime The initial polling timestamp
+   * @param timeout The maximum amount of ms before timeout. Defaults to POLLING_TIMEOUT
+   * @param interval The polling interval. Defaults to POLLING_INTERVAL
    */
-  async pollStatus(startTime = Date.now()): Promise<void> {
+  async pollStatus(
+    startTime: number = Date.now(),
+    timeout: number = POLLING_TIMEOUT,
+    interval: number = POLLING_INTERVAL,
+  ): Promise<void> {
     while ((await this.getStatus()) === Status.LOADING) {
       // Force a timeout if we wasted too much time polling
-      if (Date.now() - startTime > POLLING_TIMEOUT) {
-        const message = `Model loading timed out after ${POLLING_TIMEOUT} ms: ${this.id}`;
+      if (Date.now() - startTime > timeout) {
+        const message = `Model loading timed out after ${timeout} ms: ${this.id}`;
         throw new Error(message);
       }
-      await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
+      await new Promise((r) => setTimeout(r, interval));
     }
   }
 }

package/src/models/routerModel.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { DEFAULT_CTX } from "../constants";
+import { DEFAULT_CTX, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
 import { Mode } from "../enums/mode";
 import { Status } from "../enums/status";
 import { ModelsEndpoint } from "../interfaces/endpoints/models";
@@ -22,23 +22,8 @@ export class RouterModel extends BaseModel {
     if (!model) return Status.FAILED;
     const status = this.statusMapper[model.status!.value];
-    if (status === Status.UNLOADED) {
-      if (this.model.status!.failed) {
-        /**
-         * Workaround for the currently-bugged /models status detection
-         * (I suspect it was introduced in PR #22683 of llama.cpp)
-         *
-         * This workaround will show an eternal "loading" status when the model's real status
-         * is "failed", which is acceptable, because models in "failed" or "loading" status
-         * shouldn't be used.
-         *
-         * In exchange, it will allow unloaded models to be correctly shown as "unloaded".
-         */
-        // return Status.FAILED;  // <-- Original implementation
-        return await super.getStatus();
-      }
-      return Status.UNLOADED;
+    if (status === Status.UNLOADED || status === Status.LOADING) {
+      return super.getStatus();
     }
     return status;
@@ -48,22 +33,33 @@ export class RouterModel extends BaseModel {
    * Workaround for the currently-bugged /models status detection
    * (I suspect it was introduced in PR #22683 of llama.cpp)
    *
-   * @returns The detected status
+   * When a model is loaded for the very first time,
+   * this workaround will try to poll to /props instead of /models
+   * for up to 5 seconds to try to detect if the model is really loading,
+   * or if it definitely failed.
+   *
+   * The tradeoff is that we'll have to wait for 5 seconds
+   * while the model is "loading", while not really loading.
+   *
+   * In exchange, it will allow unloaded models to be correctly shown as "unloaded".
    */
-  private async getStatusWorkaround(): Promise<Status> {
-    try {
-      const { is_sleeping, error } = await rpc<PropsEndpoint>(
-        `/props?model=${this.id}`,
-      );
-      if (is_sleeping) return Status.SLEEPING;
-      if (!error) return Status.LOADED;
-      if (error.code === 503) return Status.LOADING;
-      return Status.UNLOADED;
-    } catch (err) {
-      return Status.FAILED;
+  async pollStatus(startTime = Date.now()): Promise<void> {
+    let elapsed = 0;
+    const limit = 5000;
+    // Grab the glitch
+    while (Date.now() - startTime <= limit) {
+      try {
+        await rpc<PropsEndpoint>(`/props?model=${this.id}`);
+        break;
+      } catch {
+        elapsed += POLLING_INTERVAL;
+        await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
+      }
     }
+    const timeout = POLLING_TIMEOUT - elapsed;
+    return await super.pollStatus(startTime, timeout);
   }
   async getCapabilities(): Promise<["text"] | ["image"]> {

package/tests/routerModel.test.ts CHANGED Viewed

@@ -130,14 +130,9 @@ describe("RouterModel context size extraction", () => {
         },
       ],
     });
-    // Second call: super.getContextSize() -> /models with meta.n_ctx
+    // Second call: super.getContextSize() -> /props?model=test-model with default_generation_settings.n_ctx
     mockRpc.mockResolvedValueOnce({
-      data: [
-        {
-          id: "test-model",
-          meta: { n_ctx: 4096 },
-        },
-      ],
+      default_generation_settings: { n_ctx: 4096 },
     });
     const model = new RouterModel(

package/tests/singleModel.test.ts CHANGED Viewed

@@ -85,28 +85,19 @@ describe("SingleModel getStatus", () => {
 });
 describe("SingleModel getContextSize", () => {
-  it("should return n_ctx from /models endpoint meta", async () => {
+  it("should return n_ctx from /props endpoint default_generation_settings", async () => {
     mockRpc.mockResolvedValueOnce({
-      data: [{ id: "test", meta: { n_ctx: 8192 } }],
+      default_generation_settings: { n_ctx: 8192 },
     });
     const model = createModel();
     const ctxSize = await model.getContextSize();
     expect(ctxSize).toBe(8192);
-    expect(mockRpc).toHaveBeenCalledWith("/models");
-  });
-  it("should return DEFAULT_CTX when model not found in /models", async () => {
-    mockRpc.mockResolvedValueOnce({ data: [] });
-    const model = createModel();
-    const ctxSize = await model.getContextSize();
-    expect(ctxSize).toBe(DEFAULT_CTX);
+    expect(mockRpc).toHaveBeenCalledWith("/props?model=test");
   });
-  it("should return DEFAULT_CTX when /models fails", async () => {
+  it("should return DEFAULT_CTX when /props fails", async () => {
     mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
     const model = createModel();