npm - pi-llama-cpp - Versions diffs - 0.5.1 → 0.7.0 - Mend

pi-llama-cpp 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/README.md +96 -30
package/package.json +6 -5
package/src/constants.ts +27 -5
package/src/enums/action.ts +3 -2
package/src/enums/mode.ts +1 -0
package/src/enums/serverStatus.ts +6 -0
package/src/enums/status.ts +1 -0
package/src/index.ts +53 -31
package/src/interfaces/auth.ts +1 -5
package/src/interfaces/endpoints/props.ts +1 -0
package/src/interfaces/levels.ts +7 -0
package/src/managers/command.ts +290 -0
package/src/managers/events.ts +101 -0
package/src/managers/server.ts +136 -0
package/src/models/baseModel.ts +75 -20
package/src/models/legacyModel.ts +45 -0
package/src/models/routerModel.ts +7 -30
package/src/models/singleModel.ts +9 -6
package/src/resolver.ts +152 -0
package/src/server.ts +187 -0
package/tests/commandManager.test.ts +182 -133
package/tests/events.test.ts +256 -0
package/tests/legacyModel.test.ts +112 -0
package/tests/mocks.ts +100 -0
package/tests/resolver.test.ts +143 -106
package/tests/routerModel.test.ts +46 -68
package/tests/server.test.ts +176 -0
package/tests/serverManager.test.ts +130 -0
package/tests/singleModel.test.ts +21 -29
package/src/commands/models.ts +0 -228
package/src/events.ts +0 -26
package/src/manager.ts +0 -96
package/src/tools/resolver.ts +0 -136
package/src/tools/retriever.ts +0 -71
package/tests/handlers.test.ts +0 -164
package/tests/modelsCommand.test.ts +0 -270

package/src/managers/command.ts ADDED Viewed

@@ -0,0 +1,290 @@
+import type {
+  ExtensionAPI,
+  ExtensionCommandContext,
+} from "@earendil-works/pi-coding-agent";
+import { AutocompleteItem } from "@earendil-works/pi-tui";
+import { PROVIDER_NAME } from "../constants";
+import { Action } from "../enums/action";
+import { Mode } from "../enums/mode";
+import { Status } from "../enums/status";
+import { BaseModel } from "../models/baseModel";
+import { EventManager } from "./events";
+import { ServerManager } from "./server";
+export class CommandManager {
+  constructor(private readonly serverManager: ServerManager) {}
+  /**
+   * Sets up the argument completions for the `/models` command
+   *
+   * @param prefix Prefix written by the user
+   * @returns Completions with that prefix
+   */
+  getArgumentCompletions(prefix: string): AutocompleteItem[] | null {
+    const available = [
+      {
+        value: "info",
+        label: "info",
+        description: "Show information of all models",
+      },
+      {
+        value: "unload",
+        label: "unload",
+        description: "Unload all models",
+      },
+    ];
+    const filtered = available.filter((a) => a.value.startsWith(prefix));
+    return filtered.length > 0 ? filtered : null;
+  }
+  /**
+   * Executes the action for the `/models` command
+   *
+   * @param args Arguments of the command
+   * @param ctx The context used by Pi
+   * @param pi The Pi extension
+   */
+  async handleCommand(
+    args: string,
+    ctx: ExtensionCommandContext,
+    pi: ExtensionAPI,
+  ) {
+    // Re-register providers so Pi sees updated model states
+    await this.serverManager.update(pi);
+    // Notify about unreachable servers
+    for (const url of this.serverManager.failedUrls) {
+      this.notifyNotFound(ctx, url);
+    }
+    if (args === "unload") {
+      await Promise.all(
+        this.serverManager.getAllModels().map((model) => model.unload()),
+      );
+      ctx.ui.notify(`Unloaded all ${PROVIDER_NAME} models`, "info");
+      return;
+    }
+    if (args === "info") {
+      const infos = await Promise.all(
+        this.serverManager.getAllModels().map((model) => model.getInfo()),
+      );
+      ctx.ui.notify(ctx.ui.theme.fg("accent", infos.join("\n")), "info");
+      return;
+    }
+    // Interactive menu: show <name> (<server_url>)
+    await this.runModelsMenu(ctx, pi);
+  }
+  /**
+   * Notifies the user that a server is unreachable.
+   */
+  private notifyNotFound(ctx: ExtensionCommandContext, url: string): void {
+    ctx.ui.notify(`${PROVIDER_NAME} unreachable at ${url}`, "error");
+  }
+  /**
+   * Runs the interactive model selection menu.
+   */
+  private async runModelsMenu(
+    ctx: ExtensionCommandContext,
+    pi: ExtensionAPI,
+  ): Promise<void> {
+    const event = await this.modelSelectionHandler(
+      ctx,
+      this.serverManager.getAllModels(),
+    );
+    if (!event) return;
+    const { action, model } = event;
+    // Action: Cancel
+    if (!action || action === Action.CANCEL) return;
+    // Action: Info
+    if (action === Action.INFO) {
+      const info = await model.getInfo();
+      ctx.ui.notify(`${info}`, "info");
+      return;
+    }
+    // Action: Unload
+    if (action === Action.UNLOAD) {
+      await model.unload();
+      ctx.ui.notify(`Unloaded ${model.name}`, "info");
+      return;
+    }
+    // Action: Switch
+    if (action === Action.SWITCH) {
+      const { serverId } = model;
+      const piModel = ctx.modelRegistry.find(serverId, model.id);
+      if (!piModel)
+        throw new Error(`Cannot find model ${model.name} in pi registry`);
+      await pi.setModel(piModel);
+      ctx.ui.notify(`Model ${model.name} ready`, "info");
+      return;
+    }
+    // Actions: Load / Load & Switch / Retry
+    const loadActions = [Action.LOAD, Action.LOAD_AND_SWITCH, Action.RETRY];
+    if (loadActions.includes(action)) {
+      ctx.ui.notify(`Loading ${model.name}...`, "info");
+      EventManager.inflightModel = model;
+      const onSuccess = async () => {
+        const { serverId } = model;
+        const piModel = ctx.modelRegistry.find(serverId, model.id);
+        if (!piModel)
+          throw new Error(`Cannot find model ${model.name} in pi registry`);
+        // Verify auth
+        if ((await model.getStatus()) === Status.UNAUTHORIZED)
+          throw new Error(
+            `Unauthorized for ${model.name}. Use /login and add your API key.`,
+          );
+        // Verify failure
+        if ((await model.getStatus()) === Status.FAILED)
+          throw new Error(`Failed to load model ${model.name}`);
+        // Select the model if asked
+        if (action === Action.LOAD_AND_SWITCH) await pi.setModel(piModel);
+        ctx.ui.notify(`Model ${model.name} ready`, "info");
+      };
+      const onFailure = (err: any) => {
+        const message = err instanceof Error ? err.message : String(err);
+        try {
+          ctx.ui.notify(message, "error");
+        } catch {
+          // ctx went stale between error and notification
+        }
+      };
+      // Load the model without blocking the UI
+      model
+        .load()
+        .then(onSuccess)
+        .catch(onFailure)
+        .finally(EventManager.resetInflightModel);
+    }
+  }
+  /**
+   * Handles the menu for model selection.
+   * Loops: select model → select action → handle action.
+   *
+   * Escape on actions menu goes back to model selection.
+   * Escape on model selection exits.
+   *
+   * @returns The selected action and model
+   */
+  private async modelSelectionHandler(
+    ctx: ExtensionCommandContext,
+    models: BaseModel[],
+  ): Promise<{ action: Action; model: BaseModel } | null> {
+    while (true) {
+      // Select the model
+      const model = await this.selectModel(ctx, models);
+      if (!model) return null;
+      // Select the action
+      const actions = await this.getActionsForModel(model);
+      const action = await this.selectAction(ctx, model, actions);
+      if (action === null) {
+        // Escape key pressed => back to model selection
+        continue;
+      }
+      // Return the selected action and model
+      return { action, model };
+    }
+  }
+  /**
+   * Select a model from the list. Returns null if user cancels.
+   *
+   * @returns The model selected by the user
+   */
+  private async selectModel(
+    ctx: ExtensionCommandContext,
+    models: BaseModel[],
+  ): Promise<BaseModel | null> {
+    const labels = await Promise.all(
+      models.map(async (model) => ({
+        label: (await model.getLabel()).trim(),
+        serverUrl: model.serverUrl,
+      })),
+    );
+    // Count grapheme clusters (not UTF-16 code units) so emoji padding aligns visually
+    const graphemeLength = (str: string) =>
+      [...new Intl.Segmenter().segment(str)].length;
+    // Decorate the label so the spacing makes it seem more like a table
+    const maxLength = Math.max(
+      ...labels.map(({ label }) => graphemeLength(label)),
+    );
+    const choices = labels.map(({ label, serverUrl }) => {
+      const extraPadding = 2;
+      const padLen = maxLength - graphemeLength(label) + extraPadding;
+      return `${label}${" ".repeat(padLen)} [Server: ${serverUrl}]`;
+    });
+    const choice = await ctx.ui.select(`${PROVIDER_NAME} models:`, choices);
+    if (!choice) return null;
+    const idx = choices.indexOf(choice);
+    return models[idx];
+  }
+  /**
+   * Get available actions for a model based on its mode and status.
+   *
+   * @returns A mapping of actions for each status
+   */
+  private async getActionsForModel(model: BaseModel): Promise<Array<Action>> {
+    const allActions: Record<Status, Array<Action>> = {
+      [Status.LOADED]:
+        model.mode === Mode.ROUTER
+          ? [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL]
+          : [Action.SWITCH, Action.INFO, Action.CANCEL],
+      [Status.LOADING]: [Action.INFO, Action.CANCEL],
+      [Status.FAILED]: [Action.RETRY, Action.CANCEL],
+      [Status.SLEEPING]: [
+        Action.SWITCH,
+        Action.UNLOAD,
+        Action.INFO,
+        Action.CANCEL,
+      ],
+      [Status.UNLOADED]: [Action.LOAD_AND_SWITCH, Action.LOAD, Action.CANCEL],
+      [Status.UNAUTHORIZED]: [Action.INFO, Action.CANCEL],
+    };
+    const status = await model.getStatus();
+    return allActions[status];
+  }
+  /**
+   * Selects an action for a model.
+   *
+   * @returns The selected action
+   */
+  private async selectAction(
+    ctx: ExtensionCommandContext,
+    model: BaseModel,
+    actions: Array<Action>,
+  ): Promise<Action | null> {
+    const labels = actions.map((a) => String(a));
+    const choice = await ctx.ui.select(`${model.name}`, labels);
+    if (!choice) return null;
+    const idx = labels.indexOf(choice);
+    return actions[idx];
+  }
+}

package/src/managers/events.ts ADDED Viewed

@@ -0,0 +1,101 @@
+import {
+  type BeforeProviderRequestEvent,
+  type ExtensionContext,
+} from "@earendil-works/pi-coding-agent";
+import { READABLE_TIMEOUT } from "../constants";
+import { ModelSelectEvent } from "../interfaces/events";
+import { BaseModel } from "../models/baseModel";
+import { ConfigResolver } from "../resolver";
+import { Server } from "../server";
+export class EventManager {
+  static inflightModel: BaseModel | null = null;
+  private readonly resolver = new ConfigResolver();
+  constructor(private readonly servers: Server[]) {}
+  /**
+   * Resets the in-flight model reference.
+   */
+  static resetInflightModel() {
+    EventManager.inflightModel = null;
+  }
+  /**
+   * Reacts to a new model event triggered by Pi
+   *
+   * @param event Model selection event
+   * @param ctx Pi context
+   */
+  async onModelSelect(event: ModelSelectEvent, ctx: ExtensionContext) {
+    for (const { providerId, models } of this.servers) {
+      if (event.model.provider !== providerId) continue;
+      const model = models.find((m) => m.id === event.model.id);
+      if (!model) continue;
+      ctx.ui.notify(`Loading ${model.name}...`, "info");
+      await model
+        .load()
+        .then(() => ctx.ui.notify(`Model ${model.name} ready`, "info"))
+        .catch(() =>
+          ctx.ui.notify(`Failed to load model ${model.name}`, "error"),
+        );
+      return;
+    }
+  }
+  /**
+   * Session-switch handler. Registered once at extension init.
+   * Only notifies if a model load is actually in-flight.
+   *
+   * @param ctx Pi context
+   */
+  async onSessionBeforeSwitch(ctx: ExtensionContext) {
+    if (!EventManager.inflightModel) return;
+    const messages = [
+      `Session change detected while model '${EventManager.inflightModel.name}' was still loading.`,
+      "Model load will continue in the background, but UI might not update.",
+      "",
+      "Verify that your new model is loaded, or use /models to re-select it afterwards.",
+    ];
+    ctx.ui.notify(messages.join("\n"), "warning");
+    // Show the notification for a reasonable amount of time
+    await new Promise((r) => setTimeout(r, READABLE_TIMEOUT));
+  }
+  /**
+   * Intercepts the request to add extra information, useful to llama.cpp.
+   * Adds a custom thinking budget to the request payload.
+   *
+   * @param event Request event
+   * @returns Updated payload
+   */
+  async onBeforeProviderRequest(event: BeforeProviderRequestEvent) {
+    const payload = event.payload as { model?: string };
+    const { model } = payload;
+    if (!model) return payload;
+    // Check if this model belongs to one of our servers
+    const isLlamaCpp = this.servers.some((s) =>
+      s.models.some((m) => m.id === model),
+    );
+    if (!isLlamaCpp) return payload;
+    // Retrieve pi's current thinking level, so we can setup a budget
+    const level = this.resolver.resolveThinkingLevel() ?? "medium";
+    const budgets = this.resolver.resolveThinkingBudgets();
+    const thinking_budget_tokens = budgets[level];
+    // Setup payload
+    if (level === "off")
+      return { ...payload, chat_template_kwargs: { enable_thinking: false } };
+    if (level === "xhigh") return payload;
+    return { ...payload, thinking_budget_tokens };
+  }
+}

package/src/managers/server.ts ADDED Viewed

@@ -0,0 +1,136 @@
+import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
+import { API_TYPE, PROVIDER_NAME, SERVER_TIMEOUT } from "../constants";
+import { ServerStatus } from "../enums/serverStatus";
+import { BaseModel } from "../models/baseModel";
+import { Server } from "../server";
+export class ServerManager {
+  readonly failedUrls: string[] = [];
+  private readonly warnings: string[] = [];
+  constructor(private readonly servers: Server[]) {}
+  /**
+   * Verifies reachability of servers and registers the providers
+   *
+   * @param pi The Pi extension API
+   */
+  async initialize(pi: ExtensionAPI) {
+    // Register the providers with a timeout first
+    await this.update(pi, SERVER_TIMEOUT);
+  }
+  /**
+   * Registers one provider per server in Pi with their model configurations.
+   * The manual awaiting per-server is deliberate (we want them in order)
+   *
+   * @param pi The Pi extension API
+   * @param timeout (Optional) Timeout before assuming server has failed
+   */
+  async update(pi: ExtensionAPI, timeout?: number) {
+    this.failedUrls.length = 0;
+    const registrableServers = timeout
+      ? await this.findRegistrableServers(timeout)
+      : this.servers;
+    // Initialization and registration
+    for (const server of registrableServers) {
+      try {
+        await server.initialize();
+        await this.registerProvider(server, pi);
+      } catch {
+        this.failedUrls.push(server.baseUrl);
+        continue;
+      }
+    }
+  }
+  /**
+   * Runs concurrent health checks and returns only healthy servers.
+   *
+   * @param timeout Maximum time to wait for each server
+   * @returns Array of servers that passed the health check
+   */
+  private async findRegistrableServers(timeout: number): Promise<Server[]> {
+    const healthResults = await Promise.all(
+      this.servers.map(async (server) => {
+        const status = await server.isReady(timeout);
+        return { server, status };
+      }),
+    );
+    const response: Server[] = [];
+    for (const { server, status } of healthResults) {
+      if (status === ServerStatus.READY) {
+        response.push(server);
+      } else if (status === ServerStatus.TIMEOUT) {
+        const message = [
+          "[pi-llama-cpp]",
+          `${PROVIDER_NAME} server initialization for '${server.baseUrl}' took more than ${SERVER_TIMEOUT} ms, so it has been skipped.`,
+          "Run `/models` to retry without timeout and see all models.",
+        ].join("\n");
+        this.warnings.push(message);
+        this.failedUrls.push(server.baseUrl);
+      } else {
+        const message = [
+          "[pi-llama-cpp]",
+          `${PROVIDER_NAME} server at '${server.baseUrl}' is unreachable.`,
+          "Check the URL and try again. Run `/models` to retry.",
+        ].join("\n");
+        this.warnings.push(message);
+        this.failedUrls.push(server.baseUrl);
+      }
+    }
+    return response;
+  }
+  /**
+   * Creates a Pi provider for the given server
+   *
+   * @param server The server
+   */
+  private async registerProvider(server: Server, pi: ExtensionAPI) {
+    const { baseUrl, models, providerId, providerName } = server;
+    const apiKey = await server.getApiKey();
+    const modelConfigs = await Promise.all(
+      models.map((m) => m.toProviderConfig()),
+    );
+    pi.registerProvider(providerId, {
+      name: providerName,
+      baseUrl: baseUrl,
+      api: API_TYPE,
+      apiKey: apiKey,
+      models: modelConfigs,
+    });
+  }
+  /**
+   * Returns warnings collected during initialization.
+   */
+  getWarnings(): string[] {
+    const warnings = [...this.warnings];
+    this.warnings.length = 0;
+    return warnings;
+  }
+  /**
+   * Returns all models from all servers.
+   *
+   * @returns Flat array of all models across all servers
+   */
+  getAllModels(): BaseModel[] {
+    const response = [];
+    for (const { models } of this.servers) {
+      for (const model of models) {
+        response.push(model);
+      }
+    }
+    return response;
+  }
+}