npm - pi-llama-cpp - Versions diffs - 0.5.1 → 0.7.0 - Mend

pi-llama-cpp 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/README.md +96 -30
package/package.json +6 -5
package/src/constants.ts +27 -5
package/src/enums/action.ts +3 -2
package/src/enums/mode.ts +1 -0
package/src/enums/serverStatus.ts +6 -0
package/src/enums/status.ts +1 -0
package/src/index.ts +53 -31
package/src/interfaces/auth.ts +1 -5
package/src/interfaces/endpoints/props.ts +1 -0
package/src/interfaces/levels.ts +7 -0
package/src/managers/command.ts +290 -0
package/src/managers/events.ts +101 -0
package/src/managers/server.ts +136 -0
package/src/models/baseModel.ts +75 -20
package/src/models/legacyModel.ts +45 -0
package/src/models/routerModel.ts +7 -30
package/src/models/singleModel.ts +9 -6
package/src/resolver.ts +152 -0
package/src/server.ts +187 -0
package/tests/commandManager.test.ts +182 -133
package/tests/events.test.ts +256 -0
package/tests/legacyModel.test.ts +112 -0
package/tests/mocks.ts +100 -0
package/tests/resolver.test.ts +143 -106
package/tests/routerModel.test.ts +46 -68
package/tests/server.test.ts +176 -0
package/tests/serverManager.test.ts +130 -0
package/tests/singleModel.test.ts +21 -29
package/src/commands/models.ts +0 -228
package/src/events.ts +0 -26
package/src/manager.ts +0 -96
package/src/tools/resolver.ts +0 -136
package/src/tools/retriever.ts +0 -71
package/tests/handlers.test.ts +0 -164
package/tests/modelsCommand.test.ts +0 -270

package/src/models/baseModel.ts CHANGED Viewed

@@ -1,10 +1,9 @@
 import type { ProviderModelConfig } from "@earendil-works/pi-coding-agent";
-import { POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
+import { DEFAULT_CTX, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
 import { Mode } from "../enums/mode";
 import { Status } from "../enums/status";
-import { DataProperty, ModelsEndpoint } from "../interfaces/endpoints/models";
-import { PropsEndpoint } from "../interfaces/endpoints/props";
-import { rpc } from "../tools/retriever";
+import { DataProperty } from "../interfaces/endpoints/models";
+import { Server } from "../server";
 /**
  * Abstract base class for llama-server models.
@@ -12,7 +11,10 @@ import { rpc } from "../tools/retriever";
  * loading/unloading, and configuration conversion.
  */
 export abstract class BaseModel {
-  constructor(protected readonly model: DataProperty) {}
+  constructor(
+    protected readonly model: DataProperty,
+    protected readonly server: Server,
+  ) {}
   protected readonly statusMapper: Record<string, Status> = {
     loaded: Status.LOADED,
@@ -28,20 +30,44 @@ export abstract class BaseModel {
     [Status.FAILED]: "🔴",
     [Status.SLEEPING]: "🔵",
     [Status.UNLOADED]: "⚪",
+    [Status.UNAUTHORIZED]: "⛔",
   };
   abstract get mode(): Mode;
+  /**
+   * Returns the server URL associated with this model
+   */
+  get serverUrl(): string {
+    return this.server.baseUrl;
+  }
+  /**
+   * Returns the provider id associated with this model
+   */
+  get serverId(): string {
+    return this.server.providerId;
+  }
+  /**
+   * Returns the model's unique identifier
+   */
   get id(): string {
     return this.model.id;
   }
+  /**
+   * Returns the model's display name (first alias, or id as fallback)
+   */
   get name(): string {
     return this.model.aliases?.[0] || this.model.id;
   }
+  /**
+   * Whether the model is a reasoning model.
+   * Currently always returns true since there's no way to detect this from llama-server.
+   */
   get reasoning(): boolean {
-    // We don't have a way to detect this, so we'll fallback to true
     return true;
   }
@@ -50,21 +76,38 @@ export abstract class BaseModel {
    *
    * @returns An array of capabilities, as expected by Pi
    */
-  abstract getCapabilities(): Promise<("text" | "image")[]>;
+  async getCapabilities(): Promise<("text" | "image")[]> {
+    try {
+      // When loaded, this works alright
+      const { modalities } = await this.server.fetchModelProps(this.id);
+      return modalities.vision ? ["text", "image"] : ["text"];
+    } catch {
+      // Otherwise, we have to search for it ourselves
+      const { data } = await this.server.fetchModels();
+      const model = data.find((d) => d.id === this.id);
+      if (!model) return ["text"];
+      const { input_modalities } = model.architecture!;
+      const response = input_modalities.filter(
+        (mod) => mod === "text" || mod === "image",
+      );
+      return response;
+    }
+  }
   /**
    * Gets the load status of the model
    *
-   * @returns The current status
+   * @returns The current {@link Status}
    */
   public async getStatus(): Promise<Status> {
     try {
-      const { is_sleeping, error } = await rpc<PropsEndpoint>(
-        `/props?model=${this.id}&autoload=false`,
-      );
+      const { is_sleeping, error } = await this.server.fetchModelProps(this.id);
       if (is_sleeping) return Status.SLEEPING;
       if (!error) return Status.LOADED;
+      if (error.code === 401) return Status.UNAUTHORIZED;
       if (error.code === 503) return Status.LOADING;
       if (error.code === 400 && error.message === "model is not loaded")
         return Status.UNLOADED;
@@ -76,19 +119,23 @@ export abstract class BaseModel {
   }
   /**
-   * Gets the context size of a particular model
+   * Gets the context size of a particular model.
    *
-   * @returns The detected context size
+   * @returns The context size in tokens
    */
   async getContextSize(): Promise<number> {
-    const { data } = await rpc<ModelsEndpoint>("/models");
-    const { n_ctx } = data.find((m) => m.id === this.id)?.meta!;
+    try {
+      const { data } = await this.server.fetchModels();
+      const { n_ctx } = data.find((m) => m.id === this.id)?.meta!;
-    return n_ctx;
+      return n_ctx ?? DEFAULT_CTX;
+    } catch {
+      return DEFAULT_CTX;
+    }
   }
   /**
-   * Sets up a label for the model selection screen
+   * Returns a label for the model selection screen
    * @returns A label structured as "<icon> <name>"
    */
   async getLabel(): Promise<string> {
@@ -97,11 +144,12 @@ export abstract class BaseModel {
   }
   /**
-   * Returns a human-readable information about the model
+   * Returns human-readable information about the model
    * @returns A string with the model information
    */
   async getInfo(): Promise<string> {
     const messages = [
+      `Server       : ${this.serverUrl}`,
       `ID           : ${this.id}`,
       `Model        : ${this.name}`,
       `Reasoning    : ${this.reasoning}`,
@@ -124,6 +172,13 @@ export abstract class BaseModel {
       id: this.id,
       name: this.name,
       reasoning: this.reasoning,
+      thinkingLevelMap: {
+        minimal: "minimal",
+        low: "low",
+        medium: "medium",
+        high: "high",
+        xhigh: "xhigh",
+      },
       input: await this.getCapabilities(),
       contextWindow: await this.getContextSize(),
       cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
@@ -140,7 +195,7 @@ export abstract class BaseModel {
     const status = await this.getStatus();
     if (status === Status.LOADED || status === Status.SLEEPING) return;
-    await rpc("/models/load", { model: this.id });
+    await this.server.postRequest("load", this.id);
     await this.pollStatus();
   }
@@ -148,7 +203,7 @@ export abstract class BaseModel {
    * Unloads the model from llama-server
    */
   async unload(): Promise<void> {
-    await rpc("/models/unload", { model: this.id });
+    await this.server.postRequest("unload", this.id);
   }
   /**

package/src/models/legacyModel.ts ADDED Viewed

@@ -0,0 +1,45 @@
+import { DEFAULT_CTX } from "../constants";
+import { Mode } from "../enums/mode";
+import { SingleModel } from "./singleModel";
+export class LegacyModel extends SingleModel {
+  get mode(): Mode {
+    return Mode.LEGACY;
+  }
+  /**
+   * Retrieves the context size when the user is running
+   * a server that uses legacy models, such as ik_llama.cpp
+   *
+   * @returns The context size
+   */
+  async getContextSize(): Promise<number> {
+    const props = await this.server.fetchModelProps(this.id);
+    const models = await this.server.fetchModels();
+    const { n_ctx } = props as unknown as { n_ctx: number };
+    const { data } = models as unknown as {
+      data: { max_model_len: number }[];
+    };
+    const [{ max_model_len }] = data;
+    const contextSize = max_model_len === 0 ? n_ctx : max_model_len;
+    return contextSize ?? DEFAULT_CTX;
+  }
+  /**
+   * Detects the capabilities of the model when the user is running
+   * a server that uses legacy models, such as ik_llama.cpp
+   *
+   * @returns An array of capabilities, as expected by Pi
+   */
+  async getCapabilities(): Promise<("text" | "image")[]> {
+    try {
+      return await super.getCapabilities();
+    } catch {
+      // When auth is wrong in a legacy model, we simply can't detect the real capabilities
+      return ["text"];
+    }
+  }
+}

package/src/models/routerModel.ts CHANGED Viewed

@@ -1,9 +1,6 @@
 import { DEFAULT_CTX, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
 import { Mode } from "../enums/mode";
 import { Status } from "../enums/status";
-import { ModelsEndpoint } from "../interfaces/endpoints/models";
-import { PropsEndpoint } from "../interfaces/endpoints/props";
-import { rpc } from "../tools/retriever";
 import { BaseModel } from "./baseModel";
 /**
@@ -16,19 +13,6 @@ export class RouterModel extends BaseModel {
     return Mode.ROUTER;
   }
-  async getStatus(): Promise<Status> {
-    const { data } = await rpc<ModelsEndpoint>("/models");
-    const model = data.find((m) => m.id === this.id);
-    if (!model) return Status.FAILED;
-    const status = this.statusMapper[model.status!.value];
-    if (status === Status.UNLOADED || status === Status.LOADING) {
-      return super.getStatus();
-    }
-    return status;
-  }
   /**
    * Workaround for the currently-bugged /models status detection
    * (I suspect it was introduced in PR #22683 of llama.cpp)
@@ -50,7 +34,7 @@ export class RouterModel extends BaseModel {
     // Grab the glitch
     while (Date.now() - startTime <= limit) {
       try {
-        await rpc<PropsEndpoint>(`/props?model=${this.id}&autoload=false`);
+        await this.server.fetchModelProps(this.id);
         break;
       } catch {
         elapsed += POLLING_INTERVAL;
@@ -62,19 +46,12 @@ export class RouterModel extends BaseModel {
     return await super.pollStatus(startTime, timeout);
   }
-  async getCapabilities(): Promise<("text" | "image")[]> {
-    const { data } = await rpc<ModelsEndpoint>(`/models`);
-    const model = data.find((d) => d.id === this.id);
-    if (!model) return ["text"];
-    const { input_modalities } = model.architecture!;
-    const response = input_modalities.filter(
-      (mod) => mod === "text" || mod === "image",
-    );
-    return response;
-  }
+  /**
+   * Gets the context size of a particular model.
+   * In router mode, falls back to parsing CLI args when the model is unloaded.
+   *
+   * @returns The context size in tokens
+   */
   async getContextSize(): Promise<number> {
     // We can get a more accurate context size if the model is already loaded
     if ((await this.getStatus()) === Status.LOADED) {

package/src/models/singleModel.ts CHANGED Viewed

@@ -1,6 +1,4 @@
 import { Mode } from "../enums/mode";
-import { ModelsEndpoint } from "../interfaces/endpoints/models";
-import { rpc } from "../tools/retriever";
 import { BaseModel } from "./baseModel";
 export class SingleModel extends BaseModel {
@@ -9,10 +7,15 @@ export class SingleModel extends BaseModel {
   }
   async getCapabilities(): Promise<("text" | "image")[]> {
-    const { models } = await rpc<ModelsEndpoint>(`/models`);
-    const [model] = models!;
+    try {
+      return await super.getCapabilities();
+    } catch {
+      // This is required when auth is wrong
+      const { models } = await this.server.fetchModels();
+      const [{ capabilities }] = models!;
-    const hasImage = model.capabilities.includes("multimodal");
-    return hasImage ? ["text", "image"] : ["text"];
+      const hasImage = capabilities.includes("multimodal");
+      return hasImage ? ["text", "image"] : ["text"];
+    }
   }
 }

package/src/resolver.ts ADDED Viewed

@@ -0,0 +1,152 @@
+import {
+  AuthStorage,
+  getAgentDir,
+  SettingsManager,
+} from "@earendil-works/pi-coding-agent";
+import { readFile } from "node:fs/promises";
+import { join } from "node:path";
+import {
+  API_KEY_PLACEHOLDER,
+  DEFAULT_LLAMA_SERVER_URL,
+  DEFAULT_THINKING_BUDGETS,
+} from "./constants";
+import { ThinkingLevel } from "./interfaces/levels";
+export class ConfigResolver {
+  private warnings: string[] = [];
+  private cachedUrls: string[] = [];
+  private authStorage = AuthStorage.create(join(getAgentDir(), "auth.json"));
+  private settingsManager = SettingsManager.create(
+    process.cwd(),
+    getAgentDir(),
+  );
+  /**
+   * Resolves the llama-server URL by searching in the global settings.json
+   */
+  private async resolveGlobalUrl(): Promise<string | null> {
+    const settings = this.settingsManager.getGlobalSettings();
+    const { llamaServerUrl = null } = settings as Record<string, string>;
+    return llamaServerUrl;
+  }
+  /**
+   * Resolves the llama-server URL by searching in the project's .pi/settings.json
+   */
+  private async resolveProjectUrl(): Promise<string | null> {
+    // Warn the user for deprecation
+    try {
+      const filePath = join(process.cwd(), ".pi", "llama-server.json");
+      const { url = null } = JSON.parse(await readFile(filePath, "utf-8"));
+      const messages = [
+        "[pi-llama-cpp]",
+        "The project-level `.pi/llama-server.json` file has been deprecated.",
+        "It will work for now, but you must follow these instructions as soon as possible:",
+        '- Move your url to the project-level `.pi/settings.json` file as {"llamaServerUrl": "<url>"}.',
+        "- Remove the old `.pi/llama-server.json` file.",
+      ];
+      this.warnings.push(messages.join("\n"));
+      return url;
+    } catch {
+      // No old file available, continue as normal
+    }
+    const settings = this.settingsManager.getProjectSettings();
+    const { llamaServerUrl = null } = settings as Record<string, string>;
+    return llamaServerUrl;
+  }
+  /**
+   * Resolves the llama-server URL from the environment
+   */
+  private async resolveEnvUrl(): Promise<string | null> {
+    return process.env.LLAMA_SERVER_URL ?? null;
+  }
+  /**
+   * Tries all possible ways to retrieve the llama-server URL(s)
+   */
+  private async extractJoinedUrls(): Promise<string> {
+    // 1. per-project config
+    let response = await this.resolveProjectUrl();
+    if (response) return response;
+    // 2. env
+    response = await this.resolveEnvUrl();
+    if (response) return response;
+    // 3. global settings
+    response = await this.resolveGlobalUrl();
+    if (response) return response;
+    // 4. default
+    return DEFAULT_LLAMA_SERVER_URL;
+  }
+  /**
+   * Resolves URLs where llama-servers are running (cached)
+   */
+  async resolveUrls(): Promise<string[]> {
+    if (this.cachedUrls.length > 0) return this.cachedUrls;
+    const raw = await this.extractJoinedUrls();
+    const urls = raw
+      .split(";")
+      .map((u) => u.trim())
+      .filter((u) => u.length > 0)
+      .map((u) => u.replace(/\/+$/, ""));
+    this.cachedUrls = urls;
+    return this.cachedUrls;
+  }
+  /**
+   * Resolves API key for the provider ID using Pi's AuthStorage
+   */
+  async resolveApiKey(providerId: string): Promise<string> {
+    this.authStorage.reload();
+    const apiKey = await this.authStorage.getApiKey(providerId);
+    return apiKey ?? API_KEY_PLACEHOLDER;
+  }
+  /**
+   * Returns warnings collected during URL resolution.
+   */
+  getWarnings(): string[] {
+    const warnings = [...this.warnings];
+    this.warnings.length = 0;
+    return warnings;
+  }
+  /*
+   * Resolves the current thinking level from Pi.
+   *
+   * @returns Selected level
+   */
+  resolveThinkingLevel(): ThinkingLevel | undefined {
+    return this.settingsManager.getDefaultThinkingLevel();
+  }
+  /**
+   * Resolves the effective thinking budgets from settings
+   *
+   * @returns Thinking budgets
+   */
+  resolveThinkingBudgets(): Record<ThinkingLevel, number> {
+    const settingsBudgets = this.settingsManager.getThinkingBudgets() ?? {};
+    const availableBudgets = {
+      ...DEFAULT_THINKING_BUDGETS,
+      ...settingsBudgets,
+    };
+    return availableBudgets;
+  }
+}

package/src/server.ts ADDED Viewed

@@ -0,0 +1,187 @@
+import { PROVIDER_NAME, PROVIDER_PREFIX } from "./constants";
+import { Mode } from "./enums/mode";
+import { ServerStatus } from "./enums/serverStatus";
+import { HealthEndpoint } from "./interfaces/endpoints/health";
+import { ModelsEndpoint } from "./interfaces/endpoints/models";
+import { PropsEndpoint } from "./interfaces/endpoints/props";
+import { BaseModel } from "./models/baseModel";
+import { LegacyModel } from "./models/legacyModel";
+import { RouterModel } from "./models/routerModel";
+import { SingleModel } from "./models/singleModel";
+import { ConfigResolver } from "./resolver";
+export class Server {
+  public readonly models: BaseModel[] = [];
+  private configResolver = new ConfigResolver();
+  constructor(readonly baseUrl: string) {}
+  /**
+   * Generates a unique provider ID from a server URL.
+   */
+  get providerId(): string {
+    return `${PROVIDER_PREFIX}=${this.baseUrl}`;
+  }
+  /**
+   * Generates a human-readable provider name from a server URL.
+   */
+  get providerName(): string {
+    return `${PROVIDER_NAME} (${this.baseUrl})`;
+  }
+  /**
+   * Retrieves the API key from the resolver
+   * @returns The API key
+   */
+  async getApiKey(): Promise<string> {
+    return await this.configResolver.resolveApiKey(this.providerId);
+  }
+  /**
+   * Fetches models from the server and populates {@link models}
+   */
+  async initialize() {
+    const { data } = await this.fetchModels();
+    const mode = await this.detectServerMode();
+    // Setup models
+    const modelCtor = {
+      [Mode.ROUTER]: RouterModel,
+      [Mode.LEGACY]: LegacyModel,
+      [Mode.SINGLE]: SingleModel,
+    }[mode];
+    const models: BaseModel[] = data
+      .map((m) => new modelCtor(m, this))
+      .sort((a, b) => (a.id > b.id ? 1 : a.id === b.id ? 0 : -1));
+    this.models.length = 0;
+    this.models.push(...models);
+  }
+  /**
+   * Detects the mode of the server
+   *
+   * @returns The detected mode
+   */
+  private async detectServerMode(): Promise<Mode> {
+    const { role } = await this.fetchServerProps();
+    const { data } = await this.fetchModels();
+    if (role === "router") return Mode.ROUTER;
+    if ("max_model_len" in data[0]) return Mode.LEGACY;
+    return Mode.SINGLE;
+  }
+  /**
+   * Checks if the server is ready, with a timeout.
+   *
+   * @param timeout Maximum time to wait for the health check
+   * @returns The server status
+   */
+  async isReady(timeout: number): Promise<ServerStatus> {
+    try {
+      const timeoutPromise = new Promise<never>((_, reject) =>
+        setTimeout(() => reject(new Error("timeout")), timeout),
+      );
+      const health = await Promise.race([
+        this.fetchServerHealth(),
+        timeoutPromise,
+      ]);
+      if (health.status === "ok") {
+        return ServerStatus.READY;
+      }
+      return ServerStatus.UNREACHABLE;
+    } catch (error) {
+      if (error instanceof Error && error.message === "timeout") {
+        return ServerStatus.TIMEOUT;
+      }
+      return ServerStatus.UNREACHABLE;
+    }
+  }
+  /**
+   * Retrieves the health status of the server
+   *
+   * @returns The health status
+   */
+  async fetchServerHealth(): Promise<HealthEndpoint> {
+    return await this.rpc<HealthEndpoint>("/health");
+  }
+  /**
+   * Fetches models from the server
+   *
+   * @return The models from the server
+   */
+  async fetchModels(): Promise<ModelsEndpoint> {
+    return await this.rpc<ModelsEndpoint>("/v1/models");
+  }
+  /**
+   * Fetches general properties of the server
+   *
+   * @return The properties of the server
+   */
+  async fetchServerProps(): Promise<PropsEndpoint> {
+    return await this.rpc<PropsEndpoint>("/props?autoload=false");
+  }
+  /**
+   * Fetches properties of a specific model from the server
+   *
+   * @param modelId The ID of the model
+   * @return The properties of the specified model
+   */
+  async fetchModelProps(modelId: string): Promise<PropsEndpoint> {
+    return await this.rpc<PropsEndpoint>(
+      `/props?model=${modelId}&autoload=false`,
+    );
+  }
+  /**
+   * Sends a request associated to a specific model from the server
+   *
+   * @param resource The specified resource ("load" | "unload")
+   * @param model The targeted model
+   */
+  async postRequest(
+    resource: "load" | "unload",
+    model: string,
+  ): Promise<ModelsEndpoint> {
+    return await this.rpc<ModelsEndpoint>(`/models/${resource}`, { model });
+  }
+  /**
+   * Makes an HTTP request to the llama-server and returns the parsed JSON response
+   *
+   * @param endpoint The endpoint path to fetch (e.g. "/health")
+   * @param body The optional request body for POST requests
+   * @returns The parsed JSON response from the server
+   */
+  private async rpc<T>(
+    endpoint: string,
+    body?: Record<string, unknown>,
+  ): Promise<T> {
+    const url = `${this.baseUrl}${endpoint}`;
+    const apiKey = await this.getApiKey();
+    const data = {
+      method: body ? "POST" : "GET",
+      headers: body ? { "Content-Type": "application/json" } : undefined,
+      body: body ? JSON.stringify(body) : undefined,
+    };
+    const res = await fetch(url, {
+      ...data,
+      headers: {
+        ...data.headers,
+        ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
+      },
+    });
+    const response: T = await res.json();
+    return response;
+  }
+}