npm - pi-llama-cpp - Versions diffs - 0.5.1 → 0.6.0 - Mend

pi-llama-cpp 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +58 -27
package/package.json +5 -4
package/src/constants.ts +9 -4
package/src/enums/action.ts +3 -2
package/src/enums/mode.ts +1 -0
package/src/enums/status.ts +1 -0
package/src/index.ts +33 -28
package/src/interfaces/auth.ts +1 -5
package/src/interfaces/endpoints/props.ts +1 -0
package/src/managers/command.ts +290 -0
package/src/managers/events.ts +63 -0
package/src/managers/server.ts +71 -0
package/src/models/baseModel.ts +68 -20
package/src/models/legacyModel.ts +45 -0
package/src/models/routerModel.ts +7 -30
package/src/models/singleModel.ts +9 -6
package/src/resolver.ts +123 -0
package/src/server.ts +171 -0
package/tests/commandManager.test.ts +182 -133
package/tests/legacyModel.test.ts +112 -0
package/tests/mocks.ts +97 -0
package/tests/resolver.test.ts +163 -104
package/tests/routerModel.test.ts +46 -68
package/tests/server.test.ts +175 -0
package/tests/serverManager.test.ts +117 -0
package/tests/singleModel.test.ts +21 -29
package/src/commands/models.ts +0 -228
package/src/events.ts +0 -26
package/src/manager.ts +0 -96
package/src/tools/resolver.ts +0 -136
package/src/tools/retriever.ts +0 -71
package/tests/handlers.test.ts +0 -164
package/tests/modelsCommand.test.ts +0 -270

package/src/managers/events.ts ADDED Viewed

@@ -0,0 +1,63 @@
+import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
+import { READABLE_TIMEOUT } from "../constants";
+import { ModelSelectEvent } from "../interfaces/events";
+import { BaseModel } from "../models/baseModel";
+import { Server } from "../server";
+export class EventManager {
+  static inflightModel: BaseModel | null = null;
+  constructor(private readonly servers: Server[]) {}
+  /**
+   * Reacts to a new model event triggered by Pi
+   *
+   * @param event Model selection event
+   * @param ctx Pi context
+   */
+  async onModelSelect(event: ModelSelectEvent, ctx: ExtensionContext) {
+    for (const { providerId, models } of this.servers) {
+      if (event.model.provider !== providerId) continue;
+      const model = models.find((m) => m.id === event.model.id);
+      if (!model) continue;
+      ctx.ui.notify(`Loading ${model.name}...`, "info");
+      await model
+        .load()
+        .then(() => ctx.ui.notify(`Model ${model.name} ready`, "info"))
+        .catch(() =>
+          ctx.ui.notify(`Failed to load model ${model.name}`, "error"),
+        );
+      return;
+    }
+  }
+  /**
+   * Session-switch handler. Registered once at extension init.
+   * Only notifies if a model load is actually in-flight.
+   *
+   * @param ctx Pi context
+   */
+  async onSessionBeforeSwitch(ctx: ExtensionContext) {
+    if (!EventManager.inflightModel) return;
+    const messages = [
+      `Session change detected while model '${EventManager.inflightModel.name}' was still loading.`,
+      "Model load will continue in the background, but UI might not update.",
+      "",
+      "Verify that your new model is loaded, or use /models to re-select it afterwards.",
+    ];
+    ctx.ui.notify(messages.join("\n"), "warning");
+    // Show the notification for a reasonable amount of time
+    await new Promise((r) => setTimeout(r, READABLE_TIMEOUT));
+  }
+  /**
+   * Resets the in-flight model reference.
+   */
+  static resetInflightModel() {
+    EventManager.inflightModel = null;
+  }
+}

package/src/managers/server.ts ADDED Viewed

@@ -0,0 +1,71 @@
+import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
+import { API_TYPE } from "../constants";
+import { BaseModel } from "../models/baseModel";
+import { Server } from "../server";
+export class ServerManager {
+  readonly failedUrls: string[] = [];
+  constructor(private readonly servers: Server[]) {}
+  /**
+   * Registers one provider per server in Pi with their model configurations.
+   * Call this after the servers have been initialized.
+   * The manual awaiting per-server is deliberate (we want them in order)
+   *
+   * @param pi The Pi extension
+   */
+  async registerAllProviders(pi: ExtensionAPI) {
+    this.failedUrls.length = 0;
+    for (const server of this.servers) {
+      await this.registerProvider(server, pi);
+    }
+  }
+  /**
+   * Creates a Pi provider for the given server
+   *
+   * @param server The server
+   */
+  private async registerProvider(server: Server, pi: ExtensionAPI) {
+    try {
+      await server.initialize();
+    } catch {
+      this.failedUrls.push(server.baseUrl);
+      return;
+    }
+    // Setup the Pi registration
+    const { baseUrl, models, providerId, providerName } = server;
+    const apiKey = await server.getApiKey();
+    const modelConfigs = await Promise.all(
+      models.map((m) => m.toProviderConfig()),
+    );
+    pi.registerProvider(providerId, {
+      name: providerName,
+      baseUrl: baseUrl,
+      api: API_TYPE,
+      apiKey: apiKey,
+      models: modelConfigs,
+    });
+  }
+  /**
+   * Returns all models from all servers.
+   *
+   * @returns Flat array of all models across all servers
+   */
+  getAllModels(): BaseModel[] {
+    const response = [];
+    for (const { models } of this.servers) {
+      for (const model of models) {
+        response.push(model);
+      }
+    }
+    return response;
+  }
+}

package/src/models/baseModel.ts CHANGED Viewed

@@ -1,10 +1,9 @@
 import type { ProviderModelConfig } from "@earendil-works/pi-coding-agent";
-import { POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
+import { DEFAULT_CTX, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
 import { Mode } from "../enums/mode";
 import { Status } from "../enums/status";
-import { DataProperty, ModelsEndpoint } from "../interfaces/endpoints/models";
-import { PropsEndpoint } from "../interfaces/endpoints/props";
-import { rpc } from "../tools/retriever";
+import { DataProperty } from "../interfaces/endpoints/models";
+import { Server } from "../server";
 /**
  * Abstract base class for llama-server models.
@@ -12,7 +11,10 @@ import { rpc } from "../tools/retriever";
  * loading/unloading, and configuration conversion.
  */
 export abstract class BaseModel {
-  constructor(protected readonly model: DataProperty) {}
+  constructor(
+    protected readonly model: DataProperty,
+    protected readonly server: Server,
+  ) {}
   protected readonly statusMapper: Record<string, Status> = {
     loaded: Status.LOADED,
@@ -28,20 +30,44 @@ export abstract class BaseModel {
     [Status.FAILED]: "🔴",
     [Status.SLEEPING]: "🔵",
     [Status.UNLOADED]: "⚪",
+    [Status.UNAUTHORIZED]: "⛔",
   };
   abstract get mode(): Mode;
+  /**
+   * Returns the server URL associated with this model
+   */
+  get serverUrl(): string {
+    return this.server.baseUrl;
+  }
+  /**
+   * Returns the provider id associated with this model
+   */
+  get serverId(): string {
+    return this.server.providerId;
+  }
+  /**
+   * Returns the model's unique identifier
+   */
   get id(): string {
     return this.model.id;
   }
+  /**
+   * Returns the model's display name (first alias, or id as fallback)
+   */
   get name(): string {
     return this.model.aliases?.[0] || this.model.id;
   }
+  /**
+   * Whether the model is a reasoning model.
+   * Currently always returns true since there's no way to detect this from llama-server.
+   */
   get reasoning(): boolean {
-    // We don't have a way to detect this, so we'll fallback to true
     return true;
   }
@@ -50,21 +76,38 @@ export abstract class BaseModel {
    *
    * @returns An array of capabilities, as expected by Pi
    */
-  abstract getCapabilities(): Promise<("text" | "image")[]>;
+  async getCapabilities(): Promise<("text" | "image")[]> {
+    try {
+      // When loaded, this works alright
+      const { modalities } = await this.server.fetchModelProps(this.id);
+      return modalities.vision ? ["text", "image"] : ["text"];
+    } catch {
+      // Otherwise, we have to search for it ourselves
+      const { data } = await this.server.fetchModels();
+      const model = data.find((d) => d.id === this.id);
+      if (!model) return ["text"];
+      const { input_modalities } = model.architecture!;
+      const response = input_modalities.filter(
+        (mod) => mod === "text" || mod === "image",
+      );
+      return response;
+    }
+  }
   /**
    * Gets the load status of the model
    *
-   * @returns The current status
+   * @returns The current {@link Status}
    */
   public async getStatus(): Promise<Status> {
     try {
-      const { is_sleeping, error } = await rpc<PropsEndpoint>(
-        `/props?model=${this.id}&autoload=false`,
-      );
+      const { is_sleeping, error } = await this.server.fetchModelProps(this.id);
       if (is_sleeping) return Status.SLEEPING;
       if (!error) return Status.LOADED;
+      if (error.code === 401) return Status.UNAUTHORIZED;
       if (error.code === 503) return Status.LOADING;
       if (error.code === 400 && error.message === "model is not loaded")
         return Status.UNLOADED;
@@ -76,19 +119,23 @@ export abstract class BaseModel {
   }
   /**
-   * Gets the context size of a particular model
+   * Gets the context size of a particular model.
    *
-   * @returns The detected context size
+   * @returns The context size in tokens
    */
   async getContextSize(): Promise<number> {
-    const { data } = await rpc<ModelsEndpoint>("/models");
-    const { n_ctx } = data.find((m) => m.id === this.id)?.meta!;
+    try {
+      const { data } = await this.server.fetchModels();
+      const { n_ctx } = data.find((m) => m.id === this.id)?.meta!;
-    return n_ctx;
+      return n_ctx ?? DEFAULT_CTX;
+    } catch {
+      return DEFAULT_CTX;
+    }
   }
   /**
-   * Sets up a label for the model selection screen
+   * Returns a label for the model selection screen
    * @returns A label structured as "<icon> <name>"
    */
   async getLabel(): Promise<string> {
@@ -97,11 +144,12 @@ export abstract class BaseModel {
   }
   /**
-   * Returns a human-readable information about the model
+   * Returns human-readable information about the model
    * @returns A string with the model information
    */
   async getInfo(): Promise<string> {
     const messages = [
+      `Server       : ${this.serverUrl}`,
       `ID           : ${this.id}`,
       `Model        : ${this.name}`,
       `Reasoning    : ${this.reasoning}`,
@@ -140,7 +188,7 @@ export abstract class BaseModel {
     const status = await this.getStatus();
     if (status === Status.LOADED || status === Status.SLEEPING) return;
-    await rpc("/models/load", { model: this.id });
+    await this.server.postRequest("load", this.id);
     await this.pollStatus();
   }
@@ -148,7 +196,7 @@ export abstract class BaseModel {
    * Unloads the model from llama-server
    */
   async unload(): Promise<void> {
-    await rpc("/models/unload", { model: this.id });
+    await this.server.postRequest("unload", this.id);
   }
   /**

package/src/models/legacyModel.ts ADDED Viewed

@@ -0,0 +1,45 @@
+import { DEFAULT_CTX } from "../constants";
+import { Mode } from "../enums/mode";
+import { SingleModel } from "./singleModel";
+export class LegacyModel extends SingleModel {
+  get mode(): Mode {
+    return Mode.LEGACY;
+  }
+  /**
+   * Retrieves the context size when the user is running
+   * a server that uses legacy models, such as ik_llama.cpp
+   *
+   * @returns The context size
+   */
+  async getContextSize(): Promise<number> {
+    const props = await this.server.fetchModelProps(this.id);
+    const models = await this.server.fetchModels();
+    const { n_ctx } = props as unknown as { n_ctx: number };
+    const { data } = models as unknown as {
+      data: { max_model_len: number }[];
+    };
+    const [{ max_model_len }] = data;
+    const contextSize = max_model_len === 0 ? n_ctx : max_model_len;
+    return contextSize ?? DEFAULT_CTX;
+  }
+  /**
+   * Detects the capabilities of the model when the user is running
+   * a server that uses legacy models, such as ik_llama.cpp
+   *
+   * @returns An array of capabilities, as expected by Pi
+   */
+  async getCapabilities(): Promise<("text" | "image")[]> {
+    try {
+      return await super.getCapabilities();
+    } catch {
+      // When auth is wrong in a legacy model, we simply can't detect the real capabilities
+      return ["text"];
+    }
+  }
+}

package/src/models/routerModel.ts CHANGED Viewed

@@ -1,9 +1,6 @@
 import { DEFAULT_CTX, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
 import { Mode } from "../enums/mode";
 import { Status } from "../enums/status";
-import { ModelsEndpoint } from "../interfaces/endpoints/models";
-import { PropsEndpoint } from "../interfaces/endpoints/props";
-import { rpc } from "../tools/retriever";
 import { BaseModel } from "./baseModel";
 /**
@@ -16,19 +13,6 @@ export class RouterModel extends BaseModel {
     return Mode.ROUTER;
   }
-  async getStatus(): Promise<Status> {
-    const { data } = await rpc<ModelsEndpoint>("/models");
-    const model = data.find((m) => m.id === this.id);
-    if (!model) return Status.FAILED;
-    const status = this.statusMapper[model.status!.value];
-    if (status === Status.UNLOADED || status === Status.LOADING) {
-      return super.getStatus();
-    }
-    return status;
-  }
   /**
    * Workaround for the currently-bugged /models status detection
    * (I suspect it was introduced in PR #22683 of llama.cpp)
@@ -50,7 +34,7 @@ export class RouterModel extends BaseModel {
     // Grab the glitch
     while (Date.now() - startTime <= limit) {
       try {
-        await rpc<PropsEndpoint>(`/props?model=${this.id}&autoload=false`);
+        await this.server.fetchModelProps(this.id);
         break;
       } catch {
         elapsed += POLLING_INTERVAL;
@@ -62,19 +46,12 @@ export class RouterModel extends BaseModel {
     return await super.pollStatus(startTime, timeout);
   }
-  async getCapabilities(): Promise<("text" | "image")[]> {
-    const { data } = await rpc<ModelsEndpoint>(`/models`);
-    const model = data.find((d) => d.id === this.id);
-    if (!model) return ["text"];
-    const { input_modalities } = model.architecture!;
-    const response = input_modalities.filter(
-      (mod) => mod === "text" || mod === "image",
-    );
-    return response;
-  }
+  /**
+   * Gets the context size of a particular model.
+   * In router mode, falls back to parsing CLI args when the model is unloaded.
+   *
+   * @returns The context size in tokens
+   */
   async getContextSize(): Promise<number> {
     // We can get a more accurate context size if the model is already loaded
     if ((await this.getStatus()) === Status.LOADED) {

package/src/models/singleModel.ts CHANGED Viewed

@@ -1,6 +1,4 @@
 import { Mode } from "../enums/mode";
-import { ModelsEndpoint } from "../interfaces/endpoints/models";
-import { rpc } from "../tools/retriever";
 import { BaseModel } from "./baseModel";
 export class SingleModel extends BaseModel {
@@ -9,10 +7,15 @@ export class SingleModel extends BaseModel {
   }
   async getCapabilities(): Promise<("text" | "image")[]> {
-    const { models } = await rpc<ModelsEndpoint>(`/models`);
-    const [model] = models!;
+    try {
+      return await super.getCapabilities();
+    } catch {
+      // This is required when auth is wrong
+      const { models } = await this.server.fetchModels();
+      const [{ capabilities }] = models!;
-    const hasImage = model.capabilities.includes("multimodal");
-    return hasImage ? ["text", "image"] : ["text"];
+      const hasImage = capabilities.includes("multimodal");
+      return hasImage ? ["text", "image"] : ["text"];
+    }
   }
 }

package/src/resolver.ts ADDED Viewed

@@ -0,0 +1,123 @@
+import { getAgentDir } from "@earendil-works/pi-coding-agent";
+import { access, constants, readFile } from "node:fs/promises";
+import { join } from "node:path";
+import { API_KEY_PLACEHOLDER, DEFAULT_LLAMA_SERVER_URL } from "./constants";
+import { AuthFile } from "./interfaces/auth";
+export class ConfigResolver {
+  private cachedUrls: string[] = [];
+  /**
+   * Detects if a particular file is present
+   */
+  private async fileExists(filePath: string): Promise<boolean> {
+    try {
+      await access(filePath, constants.F_OK);
+      return true;
+    } catch {
+      return false;
+    }
+  }
+  /**
+   * Reads and parses the contents of a file as JSON
+   */
+  private async readJson<T>(filePath: string): Promise<T | null> {
+    const raw = await readFile(filePath, "utf-8");
+    try {
+      return JSON.parse(raw) as T;
+    } catch {
+      return null;
+    }
+  }
+  /**
+   * Reads a value from a JSON config file by key
+   */
+  private async readConfigValue<T>(
+    filePath: string,
+    key: keyof T,
+  ): Promise<T[keyof T] | null> {
+    const cfg = await this.readJson<T>(filePath);
+    return cfg?.[key] ?? null;
+  }
+  /**
+   * Resolves the llama-server URL by searching in the global settings.json
+   */
+  private async resolveGlobalUrl(): Promise<string | null> {
+    const globalPath = join(getAgentDir(), "settings.json");
+    if (!(await this.fileExists(globalPath))) return null;
+    return this.readConfigValue<Record<string, string>>(
+      globalPath,
+      "llamaServerUrl",
+    );
+  }
+  /**
+   * Resolves the llama-server URL by searching in the project's .pi/llama-server.json
+   */
+  private async resolveProjectUrl(cwd: string): Promise<string | null> {
+    const projectPath = join(cwd, ".pi", "llama-server.json");
+    if (!(await this.fileExists(projectPath))) return null;
+    return this.readConfigValue<Record<string, string>>(projectPath, "url");
+  }
+  /**
+   * Resolves the llama-server URL from the environment
+   */
+  private async resolveEnvUrl(): Promise<string | null> {
+    return process.env.LLAMA_SERVER_URL ?? null;
+  }
+  /**
+   * Tries all possible ways to retrieve the llama-server URL(s)
+   */
+  private async extractJoinedUrls(cwd: string): Promise<string> {
+    // 1. per-project config
+    let response = await this.resolveProjectUrl(cwd);
+    if (response) return response;
+    // 2. env
+    response = await this.resolveEnvUrl();
+    if (response) return response;
+    // 3. global settings
+    response = await this.resolveGlobalUrl();
+    if (response) return response;
+    // 4. default
+    return DEFAULT_LLAMA_SERVER_URL;
+  }
+  /**
+   * Resolves URLs where llama-servers are running (cached)
+   */
+  async resolveUrls(cwd: string): Promise<string[]> {
+    if (this.cachedUrls.length > 0) return this.cachedUrls;
+    const raw = await this.extractJoinedUrls(cwd);
+    const urls = raw
+      .split(";")
+      .map((u) => u.trim())
+      .filter((u) => u.length > 0)
+      .map((u) => u.replace(/\/+$/, ""));
+    this.cachedUrls = urls;
+    return this.cachedUrls;
+  }
+  /**
+   * Resolves API key for the provider ID using Pi's auth.json
+   * Deliberately not cached, to react to changes in the file
+   */
+  async resolveApiKey(providerId: string): Promise<string> {
+    const authPath = join(getAgentDir(), "auth.json");
+    if (!(await this.fileExists(authPath))) return API_KEY_PLACEHOLDER;
+    const auth = await this.readJson<AuthFile>(authPath);
+    const apiKey = auth?.[providerId]?.key ?? API_KEY_PLACEHOLDER;
+    return apiKey;
+  }
+}