pi-llama-cpp 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,7 +5,16 @@ A [Pi Coding Agent](https://pi.dev/) extension that integrates with a running [l
5
5
  ## Features
6
6
 
7
7
  - **Auto-detect models** — discovers all models available on your running llama.cpp server
8
- - **Live status indicators** — see which models are loaded, loading, failed, or unloaded with color-coded icons
8
+ - **Live status indicators** — see which models are loaded, loading, failed, sleeping, or unloaded with color-coded icons
9
+
10
+ | Icon | Status | Description |
11
+ |------|--------|-------------|
12
+ | 🟢 | Loaded | Model is active and ready to use |
13
+ | 🟡 | Loading | Model is currently being loaded |
14
+ | 🔴 | Failed | Model failed to load |
15
+ | 🔵 | Sleeping | Model is loaded but inactive (router mode) |
16
+ | ⚪ | Unloaded | Model is not loaded on the server |
17
+
9
18
  - **Load / unload / switch** — manage models directly from the Pi command palette
10
19
  - **Multi-model router support** — works with both single-model and multi-model llama.cpp server configurations
11
20
  - **Image model support** — detects multimodal models automatically
@@ -51,15 +60,15 @@ The extension resolves the llama.cpp server URL using the following priority ord
51
60
 
52
61
  ### API Key
53
62
 
54
- If your llama.cpp server requires authentication, use `/login` in Pi, select the "API key" option, and choose the `llama-server` provider.
63
+ If your llama.cpp server requires authentication, use `/login` in Pi, select the "API key" option, and choose the `Llama.cpp` provider.
55
64
 
56
65
  Alternatively, configure the API key in `~/.pi/agent/auth.json`:
57
66
 
58
67
  ```json
59
68
  {
60
69
  "llama-server": {
61
- "type": "bearer",
62
- "key": "your-api-key-here"
70
+ "type": "api_key",
71
+ "key": "<your-api-key-here>"
63
72
  }
64
73
  }
65
74
  ```
@@ -86,7 +95,7 @@ llama-server --model path/to/model.gguf --ctx-size 128000 ...
86
95
 
87
96
  | Command | Description |
88
97
  | --------- | ------------------------------------------------------------------------------------------ |
89
- | `/models` | Browse llama-server models with live status. Select a model to load, switch, or unload it. |
98
+ | `/models` | Browse your models with live status. Select a model to load, switch, or unload it. |
90
99
 
91
100
  ### Model Actions
92
101
 
@@ -95,10 +104,13 @@ When browsing models via the `/models` command, you can:
95
104
  - **Load & switch** — Load an unloaded model and switch to it
96
105
  - **Switch model** — Switch to a model that is already loaded
97
106
  - **Unload** — Unload a loaded model to free memory
107
+ - **Retry** — Retry loading a failed model
108
+ - **Info** — View model details (ID, capabilities, context size)
109
+ - **Cancel** — Cancel the current operation
98
110
 
99
111
  ### Model Selection Event
100
112
 
101
- When Pi switches models (e.g., via `model_select`), the extension automatically loads the selected model on the llama.cpp server. This keeps the server in sync with the active model in Pi.
113
+ When Pi switches models (via `model_select`), the extension automatically loads the selected model on the llama.cpp server. This keeps the server in sync with the active model in Pi.
102
114
 
103
115
  ### Model Configuration
104
116
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-llama-cpp",
3
- "version": "0.1.2",
3
+ "version": "0.2.0",
4
4
  "description": "Pi extension for llama.cpp integration. Supports both router and single modes",
5
5
  "keywords": [
6
6
  "pi",
@@ -20,7 +20,19 @@
20
20
  },
21
21
  "pi": {
22
22
  "extensions": [
23
- "./index.ts"
23
+ "./src/index.ts"
24
24
  ]
25
+ },
26
+ "prettier": {
27
+ "plugins": [
28
+ "prettier-plugin-organize-imports"
29
+ ]
30
+ },
31
+ "peerDependencies": {
32
+ "@mariozechner/pi-coding-agent": "*"
33
+ },
34
+ "devDependencies": {
35
+ "@types/node": "^25.6.0",
36
+ "prettier-plugin-organize-imports": "^4.3.0"
25
37
  }
26
38
  }
package/src/constants.ts CHANGED
@@ -1,7 +1,12 @@
1
+ /**
2
+ * This provider's id
3
+ */
4
+ export const PROVIDER_ID = "llama-server";
5
+
1
6
  /**
2
7
  * This provider's name
3
8
  */
4
- export const PROVIDER_NAME = "llama-server";
9
+ export const PROVIDER_NAME = "Llama.cpp";
5
10
 
6
11
  /**
7
12
  * The default URL if the resolver couldn't find it
@@ -1,7 +1,9 @@
1
1
  /** The possible actions for the /models command */
2
- export enum Actions {
2
+ export enum Action {
3
3
  SWITCH = "Switch model",
4
+ RETRY = "Retry",
4
5
  LOAD = "Load & switch",
5
6
  UNLOAD = "Unload",
7
+ INFO = "Info",
6
8
  CANCEL = "Cancel",
7
9
  }
@@ -0,0 +1,5 @@
1
+ /** The possible modes of llama-server models */
2
+ export enum Mode {
3
+ ROUTER = "router",
4
+ SINGLE = "single",
5
+ }
@@ -3,5 +3,6 @@ export enum Status {
3
3
  LOADED = "loaded",
4
4
  LOADING = "loading",
5
5
  FAILED = "failed",
6
+ SLEEPING = "sleeping",
6
7
  UNLOADED = "unloaded",
7
8
  }
package/src/events.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import { ExtensionContext } from "@mariozechner/pi-coding-agent";
2
2
  import { PROVIDER_NAME } from "./constants";
3
- import { listModels } from "./tools/retriever";
4
3
  import { ModelSelectEvent } from "./interfaces/IModelSelectEvent";
4
+ import { listModels } from "./tools/retriever";
5
5
 
6
6
  /**
7
7
  * Reacts to a new model event triggered by Pi
package/src/handlers.ts CHANGED
@@ -2,10 +2,11 @@ import type {
2
2
  ExtensionAPI,
3
3
  ExtensionCommandContext,
4
4
  } from "@mariozechner/pi-coding-agent";
5
+ import { PROVIDER_ID, PROVIDER_NAME } from "./constants";
6
+ import { Action } from "./enums/action";
7
+ import { Mode } from "./enums/mode";
5
8
  import { Status } from "./enums/status";
6
9
  import { BaseModel } from "./models/baseModel";
7
- import { Actions } from "./enums/actions";
8
- import { PROVIDER_NAME } from "./constants";
9
10
 
10
11
  /**
11
12
  * Defines a handler when llama-server is running
@@ -15,7 +16,7 @@ import { PROVIDER_NAME } from "./constants";
15
16
  const modelSelectionHandler = async (
16
17
  ctx: ExtensionCommandContext,
17
18
  models: BaseModel[],
18
- ): Promise<{ action: Actions; model: BaseModel } | null> => {
19
+ ): Promise<{ action: Action; model: BaseModel } | null> => {
19
20
  // Setup the labels
20
21
  const labels = await Promise.all(models.map((m) => m.getLabel()));
21
22
 
@@ -26,19 +27,32 @@ const modelSelectionHandler = async (
26
27
  const idx = labels.indexOf(choice);
27
28
  const model = models[idx];
28
29
 
29
- // Define the actions that the user can do
30
- const allActions = {
31
- [Status.LOADED]: [Actions.UNLOAD, Actions.CANCEL],
32
- [Status.LOADING]: [Actions.CANCEL],
33
- [Status.FAILED]: [Actions.SWITCH, Actions.CANCEL],
34
- [Status.UNLOADED]: [Actions.SWITCH, Actions.CANCEL],
30
+ // Router mode actions
31
+ const routerModeActions: Record<Status, Array<Action>> = {
32
+ [Status.LOADED]: [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL],
33
+ [Status.LOADING]: [Action.CANCEL],
34
+ [Status.FAILED]: [Action.RETRY, Action.CANCEL],
35
+ [Status.SLEEPING]: [Action.UNLOAD, Action.INFO, Action.CANCEL],
36
+ [Status.UNLOADED]: [Action.LOAD, Action.CANCEL],
37
+ };
38
+
39
+ // Single mode actions (more limited)
40
+ const singleModeActions: Record<Status, Array<Action>> = {
41
+ [Status.LOADED]: [Action.INFO, Action.CANCEL],
42
+ [Status.LOADING]: [Action.CANCEL],
43
+ [Status.FAILED]: [Action.CANCEL],
44
+ [Status.SLEEPING]: [Action.CANCEL],
45
+ [Status.UNLOADED]: [Action.CANCEL],
35
46
  };
36
47
 
48
+ // Define the actions that the user can do
49
+ const allActions =
50
+ model.mode === Mode.ROUTER ? routerModeActions : singleModeActions;
51
+
37
52
  const status = await model.getStatus();
38
53
  const actions = allActions[status];
39
54
 
40
- const action = (await ctx.ui.select(`${model.id}`, actions)) as Actions;
41
- if (!action || action === Actions.CANCEL) return null;
55
+ const action = (await ctx.ui.select(`${model.name}`, actions)) as Action;
42
56
 
43
57
  // Send the selected action with the corresponding model
44
58
  return { action, model };
@@ -60,25 +74,40 @@ export const modelsCommandHandler = async (
60
74
  // Detect the model
61
75
  const { action, model } = event;
62
76
 
63
- // Execute the selected action
64
- if (action === Actions.UNLOAD) {
77
+ // Action: Cancel
78
+ if (!action || action === Action.CANCEL) return;
79
+
80
+ // Action: Info
81
+ if (action === Action.INFO) {
82
+ const info = await model.getInfo();
83
+ ctx.ui.notify(`${info}`, "info");
84
+ return;
85
+ }
86
+
87
+ // Action: Unload
88
+ if (action === Action.UNLOAD) {
65
89
  await model.unload();
66
- ctx.ui.notify(`Unloaded ${model.id}`, "info");
67
- } else {
68
- const status = await model.getStatus();
69
- if (status === Status.LOADED) return;
90
+ ctx.ui.notify(`Unloaded ${model.name}`, "info");
91
+ return;
92
+ }
70
93
 
71
- ctx.ui.notify(`Loading ${model.id}...`, "info");
94
+ // Actions: Load/Switch/Retry
95
+ const loadActions = [Action.LOAD, Action.SWITCH, Action.RETRY];
96
+ if (loadActions.includes(action)) {
97
+ ctx.ui.notify(`Loading ${model.name}...`, "info");
72
98
 
73
- // Load the model without blocking the UI
74
99
  const onSuccess = async () => {
75
- const piModel = ctx.modelRegistry.find(PROVIDER_NAME, model.id);
100
+ const piModel = ctx.modelRegistry.find(PROVIDER_ID, model.id);
76
101
  if (!piModel) {
77
- throw new Error(`Cannot find model ${model.id} in pi registry`);
102
+ throw new Error(`Cannot find model ${model.name} in pi registry`);
103
+ }
104
+
105
+ if ((await model.getStatus()) === Status.FAILED) {
106
+ throw new Error("Failed to load model");
78
107
  }
79
108
 
80
109
  await pi.setModel(piModel);
81
- ctx.ui.notify(`Model ${model.id} ready`, "info");
110
+ ctx.ui.notify(`Model ${model.name} ready`, "info");
82
111
  };
83
112
 
84
113
  const onFailure = (err: any) => {
@@ -86,6 +115,7 @@ export const modelsCommandHandler = async (
86
115
  ctx.ui.notify(message, "error");
87
116
  };
88
117
 
118
+ // Load the model without blocking the UI
89
119
  model.load().then(onSuccess).catch(onFailure);
90
120
  }
91
121
  };
@@ -2,11 +2,11 @@ import type {
2
2
  ExtensionAPI,
3
3
  ExtensionCommandContext,
4
4
  } from "@mariozechner/pi-coding-agent";
5
- import { modelsCommandHandler } from "./src/handlers";
6
- import { isServerReady, listModels } from "./src/tools/retriever";
7
- import { resolveApiKey, resolveUrl } from "./src/tools/resolver";
8
- import { PROVIDER_NAME } from "./src/constants";
9
- import { onModelSelect } from "./src/events";
5
+ import { PROVIDER_ID, PROVIDER_NAME } from "./constants";
6
+ import { onModelSelect } from "./events";
7
+ import { modelsCommandHandler } from "./handlers";
8
+ import { resolveApiKey, resolveUrl } from "./tools/resolver";
9
+ import { isServerReady, listModels } from "./tools/retriever";
10
10
 
11
11
  export default async function (pi: ExtensionAPI) {
12
12
  // Command registration
@@ -36,7 +36,8 @@ export default async function (pi: ExtensionAPI) {
36
36
  });
37
37
 
38
38
  // Provider registration
39
- pi.registerProvider(PROVIDER_NAME, {
39
+ pi.registerProvider(PROVIDER_ID, {
40
+ name: PROVIDER_NAME,
40
41
  baseUrl: `${url}/v1`,
41
42
  api: "openai-completions",
42
43
  apiKey: await resolveApiKey(),
@@ -1,8 +1,10 @@
1
1
  import { PROVIDER_NAME } from "../constants";
2
2
 
3
+ export interface IAuth {
4
+ type: string;
5
+ key: string;
6
+ }
7
+
3
8
  export interface IAuthFile {
4
- [PROVIDER_NAME]: {
5
- type: string;
6
- key: string;
7
- };
9
+ [PROVIDER_NAME]: IAuth;
8
10
  }
@@ -1,3 +1,11 @@
1
+ interface IRouterModelStatus {
2
+ value: string;
3
+ args: string[];
4
+ preset: string;
5
+ exit_code?: number;
6
+ failed?: boolean;
7
+ }
8
+
1
9
  export interface IRouterModel {
2
10
  id: string;
3
11
  aliases?: string[];
@@ -5,5 +13,5 @@ export interface IRouterModel {
5
13
  object: string;
6
14
  owned_by: string;
7
15
  created: number;
8
- status: { value: string; args: string[] };
16
+ status: IRouterModelStatus;
9
17
  }
@@ -1,5 +1,6 @@
1
1
  import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
2
2
  import { MAX_TOKENS, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
3
+ import { Mode } from "../enums/mode";
3
4
  import { Status } from "../enums/status";
4
5
  import { rpc } from "../tools/retriever";
5
6
 
@@ -8,6 +9,7 @@ export abstract class BaseModel {
8
9
  loaded: Status.LOADED,
9
10
  loading: Status.LOADING,
10
11
  failed: Status.FAILED,
12
+ sleeping: Status.SLEEPING,
11
13
  unloaded: Status.UNLOADED,
12
14
  };
13
15
 
@@ -15,9 +17,12 @@ export abstract class BaseModel {
15
17
  [Status.LOADED]: "🟢",
16
18
  [Status.LOADING]: "🟡",
17
19
  [Status.FAILED]: "🔴",
20
+ [Status.SLEEPING]: "🔵",
18
21
  [Status.UNLOADED]: "⚪",
19
22
  };
20
23
 
24
+ abstract get mode(): Mode;
25
+
21
26
  abstract get id(): string;
22
27
 
23
28
  abstract get name(): string;
@@ -43,13 +48,31 @@ export abstract class BaseModel {
43
48
  abstract getContextSize(): Promise<number>;
44
49
 
45
50
  /**
46
- * Returns the corresponding label of our load status
51
+ * Sets up a label for the model selection screen
52
+ * @returns A label structured as "<icon> <name>"
47
53
  */
48
54
  async getLabel(): Promise<string> {
49
55
  const status = await this.getStatus();
50
56
  return `${this.labelIcons[status]} ${this.name}`;
51
57
  }
52
58
 
59
+ /**
60
+ * Returns a human-readable information about the model
61
+ * @returns A string with the model information
62
+ */
63
+ async getInfo(): Promise<string> {
64
+ const messages = [
65
+ `ID : ${this.id}`,
66
+ `Model : ${this.name}`,
67
+ `Reasoning : ${this.reasoning}`,
68
+ `Capabilities : ${this.capabilities.join(", ")}`,
69
+ `Context size : ${await this.getContextSize()}`,
70
+ ];
71
+
72
+ const response = `${messages.join("\n")}\n`;
73
+ return response;
74
+ }
75
+
53
76
  /**
54
77
  * Converts the llama-server model into a configuration object used by Pi
55
78
  * @returns A Pi configuration object
@@ -1,7 +1,8 @@
1
- import { IRouterModel } from "../interfaces/IRouterModel";
2
1
  import { DEFAULT_CTX } from "../constants";
3
- import { rpc } from "../tools/retriever";
2
+ import { Mode } from "../enums/mode";
4
3
  import { Status } from "../enums/status";
4
+ import { IRouterModel } from "../interfaces/IRouterModel";
5
+ import { rpc } from "../tools/retriever";
5
6
  import { BaseModel } from "./baseModel";
6
7
 
7
8
  export class RouterModel extends BaseModel {
@@ -9,6 +10,10 @@ export class RouterModel extends BaseModel {
9
10
  super();
10
11
  }
11
12
 
13
+ get mode(): Mode {
14
+ return Mode.ROUTER;
15
+ }
16
+
12
17
  get id(): string {
13
18
  return this.model.id;
14
19
  }
@@ -25,12 +30,16 @@ export class RouterModel extends BaseModel {
25
30
  async getStatus(): Promise<Status> {
26
31
  const { data } = await rpc<{ data: IRouterModel[] }>("/models");
27
32
  const model = data.find((m) => m.id === this.id);
28
- if (!model) return Status.UNLOADED;
33
+ if (!model) return Status.FAILED;
34
+
35
+ const status = this.statusMapper[model.status.value];
36
+ if (status === Status.UNLOADED) {
37
+ if (this.model.status.failed) return Status.FAILED;
29
38
 
30
- const response = this.statusMapper[model.status.value];
31
- if (!response) return Status.UNLOADED;
39
+ return Status.UNLOADED;
40
+ }
32
41
 
33
- return response;
42
+ return status;
34
43
  }
35
44
 
36
45
  async getContextSize(): Promise<number> {
@@ -1,7 +1,8 @@
1
- import { ISingleModel } from "../interfaces/ISingleModel";
2
1
  import { DEFAULT_CTX } from "../constants";
3
- import { rpc } from "../tools/retriever";
2
+ import { Mode } from "../enums/mode";
4
3
  import { Status } from "../enums/status";
4
+ import { ISingleModel } from "../interfaces/ISingleModel";
5
+ import { rpc } from "../tools/retriever";
5
6
  import { BaseModel } from "./baseModel";
6
7
 
7
8
  export class SingleModel extends BaseModel {
@@ -9,6 +10,10 @@ export class SingleModel extends BaseModel {
9
10
  super();
10
11
  }
11
12
 
13
+ get mode(): Mode {
14
+ return Mode.SINGLE;
15
+ }
16
+
12
17
  get id(): string {
13
18
  return this.model.name;
14
19
  }
@@ -1,7 +1,7 @@
1
- import { DEFAULT_LLAMA_SERVER_URL, PROVIDER_NAME } from "../constants";
2
- import { access, readFile, constants } from "node:fs/promises";
1
+ import { access, constants, readFile } from "node:fs/promises";
3
2
  import { join } from "node:path";
4
- import { IAuthFile } from "../interfaces/IAuthFile";
3
+ import { DEFAULT_LLAMA_SERVER_URL, PROVIDER_ID } from "../constants";
4
+ import { IAuth, IAuthFile } from "../interfaces/IAuthFile";
5
5
 
6
6
  // The URL is detected once, to reuse forever
7
7
  let resolvedUrl: string | undefined;
@@ -42,10 +42,10 @@ const readContents = async <T>(filePath: string): Promise<T | null> => {
42
42
  * @param key Key to extract from the parsed JSON
43
43
  * @returns The string value, or null if file/key missing or invalid
44
44
  */
45
- const readConfigValue = async <T>(
45
+ const readConfigValue = async <T, U>(
46
46
  filePath: string,
47
47
  key: string,
48
- ): Promise<string | null> => {
48
+ ): Promise<U> => {
49
49
  const cfg = await readContents<T>(filePath);
50
50
  return (cfg as Record<string, any>)?.[key] || null;
51
51
  };
@@ -60,8 +60,11 @@ export const resolveApiKey = async (): Promise<string> => {
60
60
  const authPath = join(process.env.HOME || ".", ".pi", "agent", "auth.json");
61
61
  if (!(await fileExists(authPath))) return placeholder;
62
62
 
63
- const response = await readConfigValue<IAuthFile>(authPath, PROVIDER_NAME);
64
- return response ?? placeholder;
63
+ const cfg = await readConfigValue<IAuthFile, IAuth | null>(
64
+ authPath,
65
+ PROVIDER_ID,
66
+ );
67
+ return cfg?.key ?? placeholder;
65
68
  };
66
69
 
67
70
  /**
@@ -77,7 +80,11 @@ const resolveGlobalUrl = async (): Promise<string | null> => {
77
80
  );
78
81
 
79
82
  if (!(await fileExists(globalPath))) return null;
80
- return readConfigValue<Record<string, string>>(globalPath, "llamaServerUrl");
83
+
84
+ return readConfigValue<Record<string, string>, string>(
85
+ globalPath,
86
+ "llamaServerUrl",
87
+ );
81
88
  };
82
89
 
83
90
  /**
@@ -89,7 +96,7 @@ const resolveProjectUrl = async (cwd: string): Promise<string | null> => {
89
96
  const projectPath = join(cwd, ".pi", "llama-server.json");
90
97
 
91
98
  if (!(await fileExists(projectPath))) return null;
92
- return readConfigValue<Record<string, string>>(projectPath, "url");
99
+ return readConfigValue<Record<string, string>, string>(projectPath, "url");
93
100
  };
94
101
 
95
102
  /**
@@ -1,8 +1,8 @@
1
- import { ISingleModel } from "../interfaces/ISingleModel";
2
1
  import { IRouterModel } from "../interfaces/IRouterModel";
3
- import { SingleModel } from "../models/singleModel";
4
- import { RouterModel } from "../models/routerModel";
2
+ import { ISingleModel } from "../interfaces/ISingleModel";
5
3
  import { BaseModel } from "../models/baseModel";
4
+ import { RouterModel } from "../models/routerModel";
5
+ import { SingleModel } from "../models/singleModel";
6
6
  import { resolveApiKey, resolveUrl } from "./resolver";
7
7
 
8
8
  /**
package/tsconfig.json ADDED
@@ -0,0 +1,13 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2020",
4
+ "module": "NodeNext",
5
+ "moduleResolution": "NodeNext",
6
+ "strict": true,
7
+ "esModuleInterop": true,
8
+ "skipLibCheck": true,
9
+ "forceConsistentCasingInFileNames": true,
10
+ "types": ["node"]
11
+ },
12
+ "include": ["src/**/*.ts"]
13
+ }