npm - pi-llama-cpp - Versions diffs - 0.2.1 → 0.2.2 - Mend

pi-llama-cpp 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/README.md +23 -7
package/package.json +1 -1
package/src/constants.ts +1 -1
package/src/events.ts +3 -3
package/src/handlers.ts +2 -2
package/src/interfaces/auth.ts +10 -0
package/src/interfaces/endpoints/health.ts +6 -0
package/src/interfaces/endpoints/models.ts +60 -0
package/src/interfaces/endpoints/props.ts +29 -0
package/src/interfaces/endpoints/slots.ts +15 -0
package/src/models/baseModel.ts +10 -2
package/src/models/routerModel.ts +8 -16
package/src/models/singleModel.ts +24 -15
package/src/tools/resolver.ts +2 -2
package/src/tools/retriever.ts +6 -8
package/src/interfaces/IAuthFile.ts +0 -10
package/src/interfaces/IRouterModel.ts +0 -17
package/src/interfaces/ISingleModel.ts +0 -20
/package/src/interfaces/{IModelSelectEvent.ts → events.ts} +0 -0

package/README.md CHANGED Viewed

@@ -18,9 +18,13 @@ A [Pi Coding Agent](https://pi.dev/) extension that integrates with a running [l
 | 🟢 | Loaded | Model is active and ready to use |
 | 🟡 | Loading | Model is currently being loaded |
 | 🔴 | Failed | Model failed to load |
-| 🔵 | Sleeping | Model is loaded but inactive (router mode) |
+| 🔵 | Sleeping | Model is available, but inactive |
 | ⚪ | Unloaded | Model is not loaded on the server |
+> **Note**: The `Sleeping` status only shows when you start your server with `llama-server --sleep-idle-seconds <n> ...`.
+This is a **llama.cpp server flag** that tells the server to put idle models to sleep after `n` seconds.
+The model awakens automatically when you send a message.
 ## Installation
 This package is a Pi extension. Install it with
@@ -65,6 +69,8 @@ If your llama.cpp server requires authentication, use `/login` in Pi, select the
 Alternatively, configure the API key in `~/.pi/agent/auth.json` using the provider ID `llama-server`:
+> **Note**: The provider is displayed as **Llama.cpp** in the Pi UI, but its internal identifier is `llama-server` — use this ID when configuring `auth.json` or other programmatic access.
 ```json
 {
   "llama-server": {
@@ -86,21 +92,24 @@ Make sure your llama.cpp server is running with the appropriate flags.
 llama-server --models-preset path/to/presets.ini ...
 ```
-The extension reads the context size from the preset file using the `ctx-size` and/or `fit-ctx` keys.
 - For single-model mode, start the server with:
 ```bash
-llama-server --model path/to/model.gguf --ctx-size 128000 ...
+llama-server --model path/to/model.gguf ...
 ```
+The extension determines the context size as follows:
+- **Router mode** — reads from the preset file's `ctx-size` and/or `fit-ctx` keys
+- **Single mode** — reads from the `/slots` endpoint (stores it in cache afterwards)
+- Falls back to `128000` if not available
 ### Commands
 | Command   | Description                                                                                |
 | --------- | ------------------------------------------------------------------------------------------ |
 | `/models` | Browse your models with live status. Select a model to load, switch, or unload it.         |
-> **Note:** When the llama.cpp server is unreachable, `/models` is still available but displays an error notification with the configured server URL.
+> **Note:** When the llama.cpp server is unreachable, `/models` is still available but shows the description `Llama.cpp models (offline)` and displays an error notification with the configured server URL.
 ### Model Actions
@@ -117,13 +126,20 @@ When browsing models via the `/models` command, you can:
 ### Model Selection Event
-When Pi switches models (via `model_select`), the extension automatically loads the selected model on the llama.cpp server. This keeps the server in sync with the active model in Pi.
+When you switch models via Pi's model picker (instead of using the `/models` command), the extension listens for the `model_select` event, which also loads the requested model before the conversation begins.
+This keeps the server in sync with the active model in Pi, regardless of how the switch was initiated — you don't need to manually load models before using them.
+### Loading Models
+When you trigger a load, switch, or retry action, the extension polls the server to track progress. If a model takes longer than **60 seconds** to load, the polling times out with an error.
+> **Note:** The timeout is only for the polling. The model might still be loading.
 ### Model Configuration
 Each model exposed to Pi includes the following defaults:
-- **`maxTokens`** — `16384` (maximum tokens per response)
+- **`maxTokens`** — `32000` (maximum possible tokens per response according to Pi's source code)
 - **`reasoning`** — `true` (assumed, as llama.cpp's `/models` endpoint does not expose it)
 - **`cost`** — all zero (local model)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-llama-cpp",
-  "version": "0.2.1",
+  "version": "0.2.2",
   "description": "Pi extension for llama.cpp integration. Supports both router and single modes",
   "keywords": [
     "pi",

package/src/constants.ts CHANGED Viewed

@@ -21,7 +21,7 @@ export const DEFAULT_CTX = 128000;
 /**
  * Maximum number of tokens a model can generate in a single response
  */
-export const MAX_TOKENS = 16384;
+export const MAX_TOKENS = 32000;
 /**
  * Polling interval (ms) for checking model load status

package/src/events.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { ExtensionContext } from "@mariozechner/pi-coding-agent";
-import { PROVIDER_NAME } from "./constants";
-import { ModelSelectEvent } from "./interfaces/IModelSelectEvent";
+import { PROVIDER_ID } from "./constants";
+import { ModelSelectEvent } from "./interfaces/events";
 import { listModels } from "./tools/retriever";
 /**
@@ -12,7 +12,7 @@ export const onModelSelect = async (
   event: ModelSelectEvent,
   ctx: ExtensionContext,
 ) => {
-  if (event.model.provider !== PROVIDER_NAME) return;
+  if (event.model.provider !== PROVIDER_ID) return;
   const models = await listModels();
   const model = models.find((m) => m.id === event.model.id);

package/src/handlers.ts CHANGED Viewed

@@ -35,7 +35,7 @@ const selectModel = async (
 const getActionsForModel = async (model: BaseModel): Promise<Array<Action>> => {
   const routerModeActions: Record<Status, Array<Action>> = {
     [Status.LOADED]: [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL],
-    [Status.LOADING]: [Action.CANCEL],
+    [Status.LOADING]: [Action.INFO, Action.CANCEL],
     [Status.FAILED]: [Action.RETRY, Action.CANCEL],
     [Status.SLEEPING]: [Action.UNLOAD, Action.INFO, Action.CANCEL],
     [Status.UNLOADED]: [Action.LOAD, Action.CANCEL],
@@ -45,7 +45,7 @@ const getActionsForModel = async (model: BaseModel): Promise<Array<Action>> => {
     [Status.LOADED]: [Action.INFO, Action.CANCEL],
     [Status.LOADING]: [Action.CANCEL],
     [Status.FAILED]: [Action.CANCEL],
-    [Status.SLEEPING]: [Action.CANCEL],
+    [Status.SLEEPING]: [Action.INFO, Action.CANCEL],
     [Status.UNLOADED]: [Action.CANCEL],
   };

package/src/interfaces/auth.ts ADDED Viewed

@@ -0,0 +1,10 @@
+import { PROVIDER_ID } from "../constants";
+export interface Auth {
+  type: string;
+  key: string;
+}
+export interface AuthFile {
+  [PROVIDER_ID]: Auth;
+}

package/src/interfaces/endpoints/health.ts ADDED Viewed

@@ -0,0 +1,6 @@
+/**
+ * The structure of llama-server's /health endpoint
+ */
+export interface HealthEndpoint {
+  status: "ok";
+}

package/src/interfaces/endpoints/models.ts ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * The structure of llama-server's /models endpoint
+ *
+ * In single mode, the `models` property is not returned
+ * In router mode, everything is used
+ */
+export interface ModelsEndpoint {
+  models?: ModelProperty[];
+  object: string;
+  data: DataProperty[];
+}
+export interface ModelProperty {
+  name: string;
+  model: string;
+  modified_at: string;
+  size: string;
+  digest: string;
+  type: string;
+  description: string;
+  tags: string[];
+  capabilities: string[];
+  parameters: string;
+  details: {
+    parent_model: string;
+    format: string;
+    family: string;
+    families: string[];
+    parameter_size: string;
+    quantization_level: string;
+  };
+}
+export interface DataProperty {
+  id: string;
+  aliases?: string[];
+  tags: string[];
+  object: string;
+  owned_by: string;
+  created: number;
+  status?: StatusProperty;
+  meta?: MetaProperty;
+}
+interface StatusProperty {
+  value: string;
+  args: string[];
+  preset: string;
+  exit_code?: number;
+  failed?: boolean;
+}
+interface MetaProperty {
+  vocab_type: number;
+  n_vocab: number;
+  n_ctx_train: number;
+  n_embd: number;
+  n_params: number;
+  size: number;
+}

package/src/interfaces/endpoints/props.ts ADDED Viewed

@@ -0,0 +1,29 @@
+/**
+ * The structure of llama-server's /props endpoint
+ *
+ * In single mode, applies to /props
+ * In router mode, applies to /props?model=<id>
+ */
+export interface PropsEndpoint {
+  default_generation_settings: Record<string, any>;
+  total_slots: number;
+  model_alias: string;
+  model_path: string;
+  modalities: {
+    vision: boolean;
+    audio: boolean;
+  };
+  media_marker: string;
+  endpoint_slots: boolean;
+  endpoint_props: boolean;
+  endpoint_metrics: boolean;
+  webui: boolean;
+  webui_settings: Record<string, any>;
+  chat_template: string;
+  chat_template_caps: Record<string, boolean>;
+  bos_token: string;
+  eos_token: string;
+  build_info: string;
+  is_sleeping: boolean;
+}

package/src/interfaces/endpoints/slots.ts ADDED Viewed

@@ -0,0 +1,15 @@
+/**
+ * The structure of llama-server's /slots endpoint
+ *
+ * In single mode, applies to /slots
+ * In router mode, applies to /slots?model=<id>
+ */
+export interface SlotsEndpoint {
+  id: number;
+  n_ctx: number;
+  speculative: boolean;
+  is_processing: boolean;
+  id_task?: number;
+  params?: Array<Record<string, any>>;
+  next_token?: Array<Record<string, any>>;
+}

package/src/models/baseModel.ts CHANGED Viewed

@@ -2,9 +2,12 @@ import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
 import { MAX_TOKENS, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
 import { Mode } from "../enums/mode";
 import { Status } from "../enums/status";
+import { DataProperty } from "../interfaces/endpoints/models";
 import { rpc } from "../tools/retriever";
 export abstract class BaseModel {
+  constructor(protected readonly model: DataProperty) {}
   protected readonly statusMapper: Record<string, Status> = {
     loaded: Status.LOADED,
     loading: Status.LOADING,
@@ -23,9 +26,13 @@ export abstract class BaseModel {
   abstract get mode(): Mode;
-  abstract get id(): string;
+  get id(): string {
+    return this.model.id;
+  }
-  abstract get name(): string;
+  get name(): string {
+    return this.model.aliases?.[0] || this.model.id;
+  }
   get reasoning(): boolean {
     // We don't have a way to detect this, so we'll fallback to true
@@ -67,6 +74,7 @@ export abstract class BaseModel {
       `Reasoning    : ${this.reasoning}`,
       `Capabilities : ${this.capabilities.join(", ")}`,
       `Context size : ${await this.getContextSize()}`,
+      `Status       : ${await this.getStatus()}`,
     ];
     const response = `${messages.join("\n")}\n`;

package/src/models/routerModel.ts CHANGED Viewed

@@ -1,40 +1,32 @@
 import { DEFAULT_CTX } from "../constants";
 import { Mode } from "../enums/mode";
 import { Status } from "../enums/status";
-import { IRouterModel } from "../interfaces/IRouterModel";
+import { DataProperty, ModelsEndpoint } from "../interfaces/endpoints/models";
 import { rpc } from "../tools/retriever";
 import { BaseModel } from "./baseModel";
 export class RouterModel extends BaseModel {
-  constructor(private readonly model: IRouterModel) {
-    super();
+  constructor(protected readonly model: DataProperty) {
+    super(model);
   }
   get mode(): Mode {
     return Mode.ROUTER;
   }
-  get id(): string {
-    return this.model.id;
-  }
-  get name(): string {
-    return this.model.aliases?.[0] || this.model.id;
-  }
   get capabilities(): ["text"] | ["image"] {
-    const hasImage = this.model.status.args?.includes("--mmproj") ?? false;
+    const hasImage = this.model.status!.args?.includes("--mmproj") ?? false;
     return hasImage ? ["image"] : ["text"];
   }
   async getStatus(): Promise<Status> {
-    const { data } = await rpc<{ data: IRouterModel[] }>("/models");
+    const { data } = await rpc<ModelsEndpoint>("/models");
     const model = data.find((m) => m.id === this.id);
     if (!model) return Status.FAILED;
-    const status = this.statusMapper[model.status.value];
+    const status = this.statusMapper[model.status!.value];
     if (status === Status.UNLOADED) {
-      if (this.model.status.failed) return Status.FAILED;
+      if (this.model.status!.failed) return Status.FAILED;
       return Status.UNLOADED;
     }
@@ -58,7 +50,7 @@ export class RouterModel extends BaseModel {
    * @returns The value
    */
   private extractFrom(arg: string): number | null {
-    const args = this.model.status.args;
+    const args = this.model.status!.args;
     if (!args) return null;
     const ctxIdx = args.indexOf(arg);

package/src/models/singleModel.ts CHANGED Viewed

@@ -1,41 +1,50 @@
 import { DEFAULT_CTX } from "../constants";
 import { Mode } from "../enums/mode";
 import { Status } from "../enums/status";
-import { ISingleModel } from "../interfaces/ISingleModel";
+import { DataProperty, ModelProperty } from "../interfaces/endpoints/models";
+import { PropsEndpoint } from "../interfaces/endpoints/props";
+import { SlotsEndpoint } from "../interfaces/endpoints/slots";
 import { rpc } from "../tools/retriever";
 import { BaseModel } from "./baseModel";
 export class SingleModel extends BaseModel {
-  constructor(private readonly model: ISingleModel) {
-    super();
+  private contextSize?: number;
+  constructor(
+    protected readonly model: DataProperty,
+    private readonly extra: ModelProperty,
+  ) {
+    super(model);
   }
   get mode(): Mode {
     return Mode.SINGLE;
   }
-  get id(): string {
-    return this.model.name;
-  }
-  get name(): string {
-    return this.model.name;
-  }
   get capabilities(): ["text"] | ["image"] {
-    const hasImage = this.model.capabilities.includes("multimodal");
+    const hasImage = this.extra.capabilities.includes("multimodal");
     return hasImage ? ["image"] : ["text"];
   }
   async getStatus(): Promise<Status> {
     // In single-mode, the extension will only work when the model is fully loaded
+    const { is_sleeping } = await rpc<PropsEndpoint>("/props");
+    if (is_sleeping) return Status.SLEEPING;
     return Status.LOADED;
   }
   async getContextSize(): Promise<number> {
-    const slots = await rpc<{ n_ctx: number }[]>("/slots");
-    const [{ n_ctx }] = slots;
+    // Avoid calling the endpoint if we already have the value
+    if (this.contextSize) return this.contextSize;
+    try {
+      const [{ n_ctx }] = await rpc<SlotsEndpoint[]>("/slots");
+      this.contextSize = n_ctx;
-    return n_ctx ?? DEFAULT_CTX;
+      return this.contextSize;
+    } catch {
+      return DEFAULT_CTX;
+    }
   }
 }

package/src/tools/resolver.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { access, constants, readFile } from "node:fs/promises";
 import { join } from "node:path";
 import { DEFAULT_LLAMA_SERVER_URL, PROVIDER_ID } from "../constants";
-import { IAuth, IAuthFile } from "../interfaces/IAuthFile";
+import { Auth, AuthFile } from "../interfaces/auth";
 // The URL is detected once, to reuse forever
 let resolvedUrl: string | undefined;
@@ -60,7 +60,7 @@ export const resolveApiKey = async (): Promise<string> => {
   const authPath = join(process.env.HOME || ".", ".pi", "agent", "auth.json");
   if (!(await fileExists(authPath))) return placeholder;
-  const cfg = await readConfigValue<IAuthFile, IAuth | null>(
+  const cfg = await readConfigValue<AuthFile, Auth | null>(
     authPath,
     PROVIDER_ID,
   );

package/src/tools/retriever.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { IRouterModel } from "../interfaces/IRouterModel";
-import { ISingleModel } from "../interfaces/ISingleModel";
+import { HealthEndpoint } from "../interfaces/endpoints/health";
+import { ModelsEndpoint } from "../interfaces/endpoints/models";
 import { BaseModel } from "../models/baseModel";
 import { RouterModel } from "../models/routerModel";
 import { SingleModel } from "../models/singleModel";
@@ -11,7 +11,7 @@ import { resolveApiKey, resolveUrl } from "./resolver";
  */
 export const isServerReady = async (): Promise<boolean> => {
   try {
-    const { status } = await rpc<{ status: string }>("/health");
+    const { status } = await rpc<HealthEndpoint>("/health");
     return status === "ok";
   } catch {
     return false;
@@ -59,13 +59,11 @@ export const rpc = async <T>(
  * @returns The list of models
  */
 export const listModels = async (): Promise<BaseModel[]> => {
-  const { models, data } = await rpc<{
-    models?: ISingleModel[];
-    data: IRouterModel[];
-  }>("/models");
+  const { models, data } = await rpc<ModelsEndpoint>("/models");
   if (models) {
-    return models.map((m) => new SingleModel(m));
+    const [extra] = models;
+    return data.map((m) => new SingleModel(m, extra));
   }
   const response = data

package/src/interfaces/IAuthFile.ts DELETED Viewed

@@ -1,10 +0,0 @@
-import { PROVIDER_NAME } from "../constants";
-export interface IAuth {
-  type: string;
-  key: string;
-}
-export interface IAuthFile {
-  [PROVIDER_NAME]: IAuth;
-}

package/src/interfaces/IRouterModel.ts DELETED Viewed

@@ -1,17 +0,0 @@
-interface IRouterModelStatus {
-  value: string;
-  args: string[];
-  preset: string;
-  exit_code?: number;
-  failed?: boolean;
-}
-export interface IRouterModel {
-  id: string;
-  aliases?: string[];
-  tags: string[];
-  object: string;
-  owned_by: string;
-  created: number;
-  status: IRouterModelStatus;
-}

package/src/interfaces/ISingleModel.ts DELETED Viewed

@@ -1,20 +0,0 @@
-export interface ISingleModel {
-  name: string;
-  model: string;
-  modified_at: string;
-  size: string;
-  digest: string;
-  type: string;
-  description: string;
-  tags: string[];
-  capabilities: string[];
-  parameters: string;
-  details: {
-    parent_model: string;
-    format: string;
-    family: string;
-    families: string[];
-    parameter_size: string;
-    quantization_level: string;
-  };
-}

/package/src/interfaces/{IModelSelectEvent.ts → events.ts} RENAMED Viewed

File without changes