pi-llama-cpp 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -99,17 +99,20 @@ llama-server --model path/to/model.gguf ...
99
99
  ```
100
100
 
101
101
  The extension determines the context size as follows:
102
- - **Router mode** — when loaded, reads `meta.n_ctx` from the `/models` endpoint; when not loaded, reads `--ctx-size` and/or `--fit-ctx` from the model's status `args` array
102
+ - **Router mode**
103
+ - When loaded, reads `meta.n_ctx` from the `/models` endpoint
104
+ - When not loaded, reads `--ctx-size` and/or `--fit-ctx` from the server arguments, or `ctx-size` and/or `fit-ctx` keys from the **presets.ini** file.
103
105
  - **Single mode** — reads `meta.n_ctx` from the `/models` endpoint
104
106
  - Falls back to `128000` if not available
105
107
 
106
108
  ### Commands
107
109
 
108
- | Command | Description |
109
- | --------- | ------------------------------------------------------------------------------------------ |
110
- | `/models` | Browse your models with live status. Select a model to load, switch, or unload it. |
110
+ | Command | Description |
111
+ | ---------------- | ------------------------------------------------------------------------------------------ |
112
+ | `/models` | Browse your models with live status. Select a model to load, switch, or unload it. |
113
+ | `/models info` | Show detailed information for all available models at once. |
111
114
 
112
- > **Note:** When the llama.cpp server is unreachable, `/models` is still available but shows the description `Llama.cpp models (offline)` and displays an error notification with the configured server URL.
115
+ > **Note:** When the llama.cpp server is unreachable, `/models` displays an error notification with the configured server URL.
113
116
 
114
117
  ### Model Actions
115
118
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-llama-cpp",
3
- "version": "0.3.3",
3
+ "version": "0.4.0",
4
4
  "description": "Pi extension for llama.cpp integration. Supports both router and single modes.",
5
5
  "keywords": [
6
6
  "pi",
@@ -130,8 +130,10 @@ export const notFoundCommand = async (
130
130
  /**
131
131
  * Handles the /models command
132
132
  *
133
+ * @param args Arguments passed to the command
133
134
  * @param ctx The context used by Pi
134
135
  * @param pi The Pi extension
136
+ * @param models List of available models
135
137
  */
136
138
  export const modelsCommand = async (
137
139
  ctx: ExtensionCommandContext,
@@ -173,7 +175,7 @@ export const modelsCommand = async (
173
175
  }
174
176
 
175
177
  if ((await model.getStatus()) === Status.FAILED) {
176
- throw new Error("Failed to load model");
178
+ throw new Error(`Failed to load model ${model.name}`);
177
179
  }
178
180
 
179
181
  await pi.setModel(piModel);
package/src/events.ts CHANGED
@@ -18,6 +18,9 @@ export const onModelSelect = async (
18
18
  const model = models.find((m) => m.id === event.model.id);
19
19
  if (!model) return;
20
20
 
21
- ctx.ui.notify(`>> Loading ${model.id}...`, "info");
22
- await model.load();
21
+ ctx.ui.notify(`Loading ${model.name}...`, "info");
22
+ await model
23
+ .load()
24
+ .then(() => ctx.ui.notify(`Model ${model.name} ready`, "info"))
25
+ .catch(() => ctx.ui.notify(`Failed to load model ${model.name}`, "error"));
23
26
  };
package/src/index.ts CHANGED
@@ -2,33 +2,19 @@ import type {
2
2
  ExtensionAPI,
3
3
  ExtensionCommandContext,
4
4
  } from "@earendil-works/pi-coding-agent";
5
- import { modelsCommand, notFoundCommand } from "./commands/models";
6
5
  import { PROVIDER_NAME } from "./constants";
7
6
  import { onModelSelect } from "./events";
8
- import { registerLlamaCppProvider } from "./tools/provider";
9
- import { isServerReady } from "./tools/retriever";
7
+ import { CommandManager } from "./manager";
10
8
 
11
9
  export default async function (pi: ExtensionAPI) {
12
- // Server verification
13
- if (!(await isServerReady())) {
14
- pi.registerCommand("models", {
15
- description: `${PROVIDER_NAME} models (offline)`,
16
- handler: async (_: string, ctx: ExtensionCommandContext) => {
17
- await notFoundCommand(ctx);
18
- },
19
- });
20
-
21
- return;
22
- }
23
-
24
- // Provider registration
25
- const serverModels = await registerLlamaCppProvider(pi);
10
+ const manager = new CommandManager(pi);
11
+ await manager.initialize();
26
12
 
27
13
  // Command: /models
28
14
  pi.registerCommand("models", {
29
- description: `Browse ${PROVIDER_NAME} models (live status)`,
30
- handler: async (_: string, ctx: ExtensionCommandContext) =>
31
- await modelsCommand(ctx, pi, serverModels),
15
+ description: `Browse ${PROVIDER_NAME} models`,
16
+ handler: async (args: string, ctx: ExtensionCommandContext) =>
17
+ await manager.run(args, ctx),
32
18
  });
33
19
 
34
20
  // Events registration
@@ -39,6 +39,7 @@ export interface DataProperty {
39
39
  owned_by: string;
40
40
  created: number;
41
41
  status?: StatusProperty;
42
+ architecture?: ArchitectureProperty;
42
43
  meta?: MetaProperty;
43
44
  }
44
45
 
@@ -50,6 +51,11 @@ interface StatusProperty {
50
51
  failed?: boolean;
51
52
  }
52
53
 
54
+ interface ArchitectureProperty {
55
+ input_modalities: ("text" | "image" | "audio")[];
56
+ output_modalities: ["text"];
57
+ }
58
+
53
59
  interface MetaProperty {
54
60
  vocab_type: number;
55
61
  n_vocab: number;
package/src/manager.ts ADDED
@@ -0,0 +1,87 @@
1
+ import type {
2
+ ExtensionAPI,
3
+ ExtensionCommandContext,
4
+ ProviderModelConfig,
5
+ } from "@earendil-works/pi-coding-agent";
6
+ import { modelsCommand, notFoundCommand } from "./commands/models";
7
+ import {
8
+ DEFAULT_LLAMA_SERVER_URL,
9
+ PROVIDER_ID,
10
+ PROVIDER_NAME,
11
+ } from "./constants";
12
+ import { BaseModel } from "./models/baseModel";
13
+ import { resolveApiKey, resolveUrl } from "./tools/resolver";
14
+ import { isServerReady, listModels } from "./tools/retriever";
15
+
16
+ export class CommandManager {
17
+ private baseUrl: string = DEFAULT_LLAMA_SERVER_URL;
18
+ private serverModels: BaseModel[] = [];
19
+
20
+ constructor(private readonly pi: ExtensionAPI) {}
21
+
22
+ /**
23
+ * Sets up the initial state of the provider
24
+ */
25
+ async initialize() {
26
+ if (await isServerReady()) {
27
+ await this.update();
28
+ } else {
29
+ await this.register([]);
30
+ }
31
+ }
32
+
33
+ /**
34
+ * Ensures the models are up-to-date with the server
35
+ */
36
+ async update() {
37
+ this.baseUrl = `${await resolveUrl(process.cwd())}`;
38
+
39
+ this.serverModels = await listModels();
40
+ const modelConfigs = await Promise.all(
41
+ this.serverModels.map((m) => m.toProviderConfig()),
42
+ );
43
+
44
+ await this.register(modelConfigs);
45
+ }
46
+
47
+ /**
48
+ * Registers the provider in Pi with the given configurations
49
+ * Note: Registrations overload previous provider
50
+ *
51
+ * @param models Provider configurations for the models
52
+ */
53
+ async register(models: ProviderModelConfig[]) {
54
+ this.pi.registerProvider(PROVIDER_ID, {
55
+ name: PROVIDER_NAME,
56
+ baseUrl: this.baseUrl,
57
+ api: "openai-completions",
58
+ apiKey: await resolveApiKey(),
59
+ models,
60
+ });
61
+ }
62
+
63
+ /**
64
+ * Dispatches the /models command
65
+ *
66
+ * @param args Arguments passed to the command
67
+ * @param ctx The context used by Pi
68
+ * @param pi The Pi extension
69
+ * @returns A command handler
70
+ */
71
+ async run(args: string, ctx: ExtensionCommandContext) {
72
+ if (!(await isServerReady())) {
73
+ return await notFoundCommand(ctx);
74
+ }
75
+
76
+ // Command: `/models info`
77
+ if (args === "info") {
78
+ const info = await Promise.all(this.serverModels.map((m) => m.getInfo()));
79
+ const message = ctx.ui.theme.fg("accent", info.join("\n"));
80
+ ctx.ui.notify(message, "info");
81
+ return;
82
+ }
83
+
84
+ // Command: `/models`
85
+ return await modelsCommand(ctx, this.pi, this.serverModels);
86
+ }
87
+ }
@@ -1,10 +1,5 @@
1
1
  import type { ProviderModelConfig } from "@earendil-works/pi-coding-agent";
2
- import {
3
- DEFAULT_CTX,
4
- MAX_TOKENS,
5
- POLLING_INTERVAL,
6
- POLLING_TIMEOUT,
7
- } from "../constants";
2
+ import { MAX_TOKENS, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
8
3
  import { Mode } from "../enums/mode";
9
4
  import { Status } from "../enums/status";
10
5
  import { DataProperty, ModelsEndpoint } from "../interfaces/endpoints/models";
@@ -55,17 +50,7 @@ export abstract class BaseModel {
55
50
  *
56
51
  * @returns An array of capabilities, as expected by Pi
57
52
  */
58
- async getCapabilities(): Promise<["text"] | ["image"]> {
59
- try {
60
- const { modalities } = await rpc<PropsEndpoint>(
61
- `/props?model=${this.id}`,
62
- );
63
-
64
- return modalities.vision ? ["image"] : ["text"];
65
- } catch {
66
- return ["text"];
67
- }
68
- }
53
+ abstract getCapabilities(): Promise<("text" | "image")[]>;
69
54
 
70
55
  /**
71
56
  * Gets the load status of the model
@@ -75,7 +60,7 @@ export abstract class BaseModel {
75
60
  public async getStatus(): Promise<Status> {
76
61
  try {
77
62
  const { is_sleeping, error } = await rpc<PropsEndpoint>(
78
- `/props?model=${this.id}`,
63
+ `/props?model=${this.id}&autoload=false`,
79
64
  );
80
65
 
81
66
  if (is_sleeping) return Status.SLEEPING;
@@ -96,15 +81,10 @@ export abstract class BaseModel {
96
81
  * @returns The detected context size
97
82
  */
98
83
  async getContextSize(): Promise<number> {
99
- try {
100
- const { data } = await rpc<ModelsEndpoint>(`/models`);
101
- const model = data.find((d) => d.id === this.id);
84
+ const { data } = await rpc<ModelsEndpoint>("/models");
85
+ const { n_ctx } = data.find((m) => m.id === this.id)?.meta!;
102
86
 
103
- const response = model?.meta?.n_ctx;
104
- return response ?? DEFAULT_CTX;
105
- } catch {
106
- return DEFAULT_CTX;
107
- }
87
+ return n_ctx;
108
88
  }
109
89
 
110
90
  /**
@@ -157,7 +137,8 @@ export abstract class BaseModel {
157
137
  * Loads the model in llama-server
158
138
  */
159
139
  async load(): Promise<void> {
160
- if ((await this.getStatus()) === Status.LOADED) return;
140
+ const status = await this.getStatus();
141
+ if (status === Status.LOADED || status === Status.SLEEPING) return;
161
142
 
162
143
  await rpc("/models/load", { model: this.id });
163
144
  await this.pollStatus();
@@ -50,7 +50,7 @@ export class RouterModel extends BaseModel {
50
50
  // Grab the glitch
51
51
  while (Date.now() - startTime <= limit) {
52
52
  try {
53
- await rpc<PropsEndpoint>(`/props?model=${this.id}`);
53
+ await rpc<PropsEndpoint>(`/props?model=${this.id}&autoload=false`);
54
54
  break;
55
55
  } catch {
56
56
  elapsed += POLLING_INTERVAL;
@@ -62,14 +62,17 @@ export class RouterModel extends BaseModel {
62
62
  return await super.pollStatus(startTime, timeout);
63
63
  }
64
64
 
65
- async getCapabilities(): Promise<["text"] | ["image"]> {
66
- // We can get the real capabilities if the model is already loaded
67
- if ((await this.getStatus()) === Status.LOADED) {
68
- return super.getCapabilities();
69
- }
65
+ async getCapabilities(): Promise<("text" | "image")[]> {
66
+ const { data } = await rpc<ModelsEndpoint>(`/models`);
67
+ const model = data.find((d) => d.id === this.id);
68
+ if (!model) return ["text"];
70
69
 
71
- const hasImage = this.model.status?.args?.includes("--mmproj") ?? false;
72
- return hasImage ? ["image"] : ["text"];
70
+ const { input_modalities } = model.architecture!;
71
+ const response = input_modalities.filter(
72
+ (mod) => mod === "text" || mod === "image",
73
+ );
74
+
75
+ return response;
73
76
  }
74
77
 
75
78
  async getContextSize(): Promise<number> {
@@ -1,6 +1,5 @@
1
- import { DEFAULT_CTX } from "../constants";
2
1
  import { Mode } from "../enums/mode";
3
- import { PropsEndpoint } from "../interfaces/endpoints/props";
2
+ import { ModelsEndpoint } from "../interfaces/endpoints/models";
4
3
  import { rpc } from "../tools/retriever";
5
4
  import { BaseModel } from "./baseModel";
6
5
 
@@ -9,15 +8,11 @@ export class SingleModel extends BaseModel {
9
8
  return Mode.SINGLE;
10
9
  }
11
10
 
12
- async getContextSize(): Promise<number> {
13
- try {
14
- const { default_generation_settings } = await rpc<PropsEndpoint>(
15
- `/props?model=${this.id}`,
16
- );
17
- const { n_ctx } = default_generation_settings;
18
- return n_ctx;
19
- } catch {
20
- return DEFAULT_CTX;
21
- }
11
+ async getCapabilities(): Promise<("text" | "image")[]> {
12
+ const { models } = await rpc<ModelsEndpoint>(`/models`);
13
+ const [model] = models!;
14
+
15
+ const hasImage = model.capabilities.includes("multimodal");
16
+ return hasImage ? ["text", "image"] : ["text"];
22
17
  }
23
18
  }
@@ -28,7 +28,7 @@ export const isServerReady = async (): Promise<boolean> => {
28
28
  export const rpc = async <T>(
29
29
  endpoint: string,
30
30
  body?: Record<string, unknown>,
31
- ) => {
31
+ ): Promise<T> => {
32
32
  const base = await resolveUrl(process.cwd());
33
33
  const url = `${base}${endpoint}`;
34
34
 
@@ -0,0 +1,122 @@
1
+ import { describe, expect, it, vi, beforeEach } from "vitest";
2
+ import { CommandManager } from "../src/manager";
3
+ import { PROVIDER_ID, PROVIDER_NAME } from "../src/constants";
4
+
5
+ // Mock modules at top level (vi.mock is hoisted)
6
+ vi.mock("../src/tools/retriever", () => ({
7
+ isServerReady: vi.fn(),
8
+ listModels: vi.fn(),
9
+ }));
10
+
11
+ vi.mock("../src/tools/resolver", () => ({
12
+ resolveUrl: vi.fn(),
13
+ resolveApiKey: vi.fn(),
14
+ }));
15
+
16
+ // Import mocked functions after vi.mock
17
+ import { isServerReady, listModels } from "../src/tools/retriever";
18
+ import { resolveUrl, resolveApiKey } from "../src/tools/resolver";
19
+
20
+ const mockPi = {
21
+ registerProvider: vi.fn(),
22
+ };
23
+
24
+ beforeEach(() => {
25
+ vi.clearAllMocks();
26
+ (resolveUrl as any).mockResolvedValue("http://127.0.0.1:8080");
27
+ (resolveApiKey as any).mockResolvedValue("test-key");
28
+ });
29
+
30
+ describe("CommandManager", () => {
31
+ it("should register empty models when server is not ready", async () => {
32
+ (isServerReady as any).mockResolvedValue(false);
33
+
34
+ const manager = new CommandManager(mockPi as any);
35
+ await manager.initialize();
36
+
37
+ expect(mockPi.registerProvider).toHaveBeenCalledWith(PROVIDER_ID, {
38
+ name: PROVIDER_NAME,
39
+ baseUrl: "http://127.0.0.1:8080",
40
+ api: "openai-completions",
41
+ apiKey: "test-key",
42
+ models: [],
43
+ });
44
+ });
45
+
46
+ it("should update and register models when server is ready", async () => {
47
+ const mockModel = {
48
+ name: "test-model",
49
+ id: "test-model",
50
+ toProviderConfig: vi.fn().mockResolvedValue({ id: "test-model", maxTokens: 32000 }),
51
+ };
52
+ (isServerReady as any).mockResolvedValue(true);
53
+ (listModels as any).mockResolvedValue([mockModel]);
54
+
55
+ const manager = new CommandManager(mockPi as any);
56
+ await manager.initialize();
57
+
58
+ expect(resolveUrl).toHaveBeenCalledWith(expect.any(String));
59
+ expect(listModels).toHaveBeenCalled();
60
+ expect(mockPi.registerProvider).toHaveBeenCalledWith(PROVIDER_ID, {
61
+ name: PROVIDER_NAME,
62
+ baseUrl: "http://127.0.0.1:8080",
63
+ api: "openai-completions",
64
+ apiKey: "test-key",
65
+ models: [{ id: "test-model", maxTokens: 32000 }],
66
+ });
67
+ });
68
+
69
+ it("should call notFoundCommand when server is not ready in run()", async () => {
70
+ (isServerReady as any).mockResolvedValue(false);
71
+
72
+ const manager = new CommandManager(mockPi as any);
73
+ await manager.run("", { ui: { notify: vi.fn() } } as any);
74
+
75
+ expect(mockPi.registerProvider).not.toHaveBeenCalled();
76
+ });
77
+
78
+ it("should show info for all models when args is 'info'", async () => {
79
+ const mockModel = {
80
+ name: "test-model",
81
+ id: "test-model",
82
+ getInfo: vi.fn().mockResolvedValue("Model info for test-model"),
83
+ toProviderConfig: vi.fn().mockResolvedValue({ id: "test-model" }),
84
+ };
85
+ (isServerReady as any).mockResolvedValue(true);
86
+ (listModels as any).mockResolvedValue([mockModel]);
87
+
88
+ const notifyFn = vi.fn();
89
+ const manager = new CommandManager(mockPi as any);
90
+ await manager.initialize();
91
+ await manager.run("info", {
92
+ ui: { notify: notifyFn, theme: { fg: (_c: string, t: string) => t } },
93
+ } as any);
94
+
95
+ expect(notifyFn).toHaveBeenCalledWith(
96
+ "Model info for test-model",
97
+ "info",
98
+ );
99
+ expect(listModels).toHaveBeenCalledOnce();
100
+ });
101
+
102
+ it("should dispatch modelsCommand when args is empty", async () => {
103
+ const mockModel = {
104
+ name: "test-model",
105
+ id: "test-model",
106
+ getLabel: vi.fn().mockResolvedValue("test-model"),
107
+ toProviderConfig: vi.fn().mockResolvedValue({ id: "test-model" }),
108
+ };
109
+ (isServerReady as any).mockResolvedValue(true);
110
+ (listModels as any).mockResolvedValue([mockModel]);
111
+
112
+ const selectFn = vi.fn().mockReturnValue(null); // cancel immediately
113
+ const manager = new CommandManager(mockPi as any);
114
+ await manager.initialize();
115
+ await manager.run("", {
116
+ ui: { notify: vi.fn(), select: selectFn },
117
+ } as any);
118
+
119
+ // modelsCommand was called (select is invoked for model picking)
120
+ expect(selectFn).toHaveBeenCalled();
121
+ });
122
+ });
@@ -62,7 +62,12 @@ const getActionsForModel = async (model: TestModel): Promise<Array<Action>> => {
62
62
  [Status.LOADED]: [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL],
63
63
  [Status.LOADING]: [Action.INFO, Action.CANCEL],
64
64
  [Status.FAILED]: [Action.RETRY, Action.CANCEL],
65
- [Status.SLEEPING]: [Action.UNLOAD, Action.INFO, Action.CANCEL],
65
+ [Status.SLEEPING]: [
66
+ Action.SWITCH,
67
+ Action.UNLOAD,
68
+ Action.INFO,
69
+ Action.CANCEL,
70
+ ],
66
71
  [Status.UNLOADED]: [Action.LOAD, Action.CANCEL],
67
72
  };
68
73
 
@@ -106,7 +111,7 @@ describe("Action availability", () => {
106
111
  {
107
112
  mode: Mode.ROUTER,
108
113
  status: Status.SLEEPING,
109
- expected: [Action.UNLOAD, Action.INFO, Action.CANCEL],
114
+ expected: [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL],
110
115
  },
111
116
  {
112
117
  mode: Mode.ROUTER,
@@ -0,0 +1,134 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { modelsCommand } from "../src/commands/models";
3
+ import { Action } from "../src/enums/action";
4
+ import { Mode } from "../src/enums/mode";
5
+ import { Status } from "../src/enums/status";
6
+ import { BaseModel } from "../src/models/baseModel";
7
+
8
+ // Mock the retriever module
9
+ vi.mock("../src/tools/retriever", () => ({
10
+ rpc: vi.fn(),
11
+ isServerReady: vi.fn(),
12
+ listModels: vi.fn(),
13
+ }));
14
+
15
+ // Helper to create a mock BaseModel
16
+ const createMockModel = (
17
+ name: string,
18
+ overrides: Partial<BaseModel> = {},
19
+ ): BaseModel =>
20
+ ({
21
+ name,
22
+ id: name,
23
+ mode: Mode.ROUTER,
24
+ capabilities: ["text"] as ["text"],
25
+ getStatus: vi.fn().mockResolvedValue(Status.LOADED),
26
+ getContextSize: vi.fn().mockResolvedValue(4096),
27
+ getInfo: vi.fn().mockResolvedValue(`Model: ${name}\nID: ${name}`),
28
+ load: vi.fn().mockResolvedValue(undefined),
29
+ unload: vi.fn().mockResolvedValue(undefined),
30
+ toProviderConfig: vi.fn().mockResolvedValue({}),
31
+ getLabel: vi.fn().mockResolvedValue(name),
32
+ ...overrides,
33
+ }) as unknown as BaseModel;
34
+
35
+ const createMockCtx = (
36
+ selectFn: (prompt: string, options: string[]) => string | null,
37
+ ) => ({
38
+ cwd: "/tmp/test",
39
+ ui: {
40
+ select: vi.fn(selectFn),
41
+ notify: vi.fn(),
42
+ theme: {
43
+ fg: (color: string, text: string) => text,
44
+ },
45
+ },
46
+ modelRegistry: {
47
+ find: vi.fn().mockReturnValue({ id: "test-model-id" }),
48
+ },
49
+ });
50
+
51
+ const createMockPi = () => ({
52
+ setModel: vi.fn(),
53
+ registerProvider: vi.fn(),
54
+ });
55
+
56
+ describe("modelsCommand", () => {
57
+ it("should return early on cancel (null model selection)", async () => {
58
+ const models = [createMockModel("model-a")];
59
+ const ctx = createMockCtx(() => null);
60
+ const pi = createMockPi();
61
+
62
+ await modelsCommand(ctx as any, pi as any, models);
63
+
64
+ expect(ctx.ui.notify).not.toHaveBeenCalled();
65
+ });
66
+
67
+ it("should show info when INFO action is selected", async () => {
68
+ const model = createMockModel("model-a");
69
+ const models = [model];
70
+ const ctx = createMockCtx((prompt) => {
71
+ if (prompt.includes("models")) return "model-a";
72
+ return Action.INFO;
73
+ });
74
+ const pi = createMockPi();
75
+
76
+ await modelsCommand(ctx as any, pi as any, models);
77
+
78
+ expect(ctx.ui.notify).toHaveBeenCalledWith(
79
+ "Model: model-a\nID: model-a",
80
+ "info",
81
+ );
82
+ });
83
+
84
+ it("should unload model when UNLOAD action is selected", async () => {
85
+ const model = createMockModel("model-a");
86
+ const models = [model];
87
+ const ctx = createMockCtx((prompt) => {
88
+ if (prompt.includes("models")) return "model-a";
89
+ return Action.UNLOAD;
90
+ });
91
+ const pi = createMockPi();
92
+
93
+ await modelsCommand(ctx as any, pi as any, models);
94
+
95
+ expect(model.unload).toHaveBeenCalled();
96
+ expect(ctx.ui.notify).toHaveBeenCalledWith("Unloaded model-a", "info");
97
+ });
98
+
99
+ it("should load model when LOAD action is selected", async () => {
100
+ const loadFn = vi.fn().mockResolvedValue(undefined);
101
+ const model = createMockModel("model-a");
102
+ (model.load as any) = loadFn;
103
+ (model.getStatus as any).mockResolvedValue(Status.UNLOADED);
104
+ const models = [model];
105
+ const ctx = createMockCtx((prompt) => {
106
+ if (prompt.includes("models")) return "model-a";
107
+ return Action.LOAD;
108
+ });
109
+ const pi = createMockPi();
110
+
111
+ await modelsCommand(ctx as any, pi as any, models);
112
+ await vi.waitFor(() => expect(loadFn).toHaveBeenCalled());
113
+ await vi.waitFor(() => expect(pi.setModel).toHaveBeenCalled());
114
+ });
115
+
116
+ it("should loop back to model selection when action is cancelled", async () => {
117
+ const model = createMockModel("model-a");
118
+ const models = [model];
119
+
120
+ let selectCallCount = 0;
121
+ const ctx = createMockCtx(() => {
122
+ selectCallCount++;
123
+ // 1st: select model-a, 2nd: cancel action, 3rd: cancel model => exit
124
+ if (selectCallCount === 1) return "model-a";
125
+ return null;
126
+ });
127
+ const pi = createMockPi();
128
+
129
+ await modelsCommand(ctx as any, pi as any, models);
130
+
131
+ expect(ctx.ui.select).toHaveBeenCalledTimes(3);
132
+ expect(ctx.ui.notify).not.toHaveBeenCalled();
133
+ });
134
+ });
@@ -154,7 +154,7 @@ describe("RouterModel context size extraction", () => {
154
154
  expect(ctxSize).toBe(4096);
155
155
  });
156
156
 
157
- it("should return DEFAULT_CTX when no context size args are present and loaded", async () => {
157
+ it("should return n_ctx from meta when loaded without context size args", async () => {
158
158
  // First call: getStatus() -> /models
159
159
  mockRpc.mockResolvedValueOnce({
160
160
  data: [
@@ -168,17 +168,16 @@ describe("RouterModel context size extraction", () => {
168
168
  },
169
169
  ],
170
170
  });
171
- // Second call: super.getContextSize() -> /models without meta.n_ctx
171
+ // Second call: super.getContextSize() -> /models with meta.n_ctx
172
172
  mockRpc.mockResolvedValueOnce({
173
173
  data: [
174
174
  {
175
175
  id: "test-model",
176
+ meta: { n_ctx: 4096 },
176
177
  },
177
178
  ],
178
179
  });
179
180
 
180
- const { DEFAULT_CTX } = await import("../src/constants");
181
-
182
181
  const model = new RouterModel(
183
182
  createModel({
184
183
  status: {
@@ -190,13 +189,12 @@ describe("RouterModel context size extraction", () => {
190
189
  );
191
190
 
192
191
  const ctxSize = await model.getContextSize();
193
- expect(ctxSize).toBe(DEFAULT_CTX);
192
+ expect(ctxSize).toBe(4096);
194
193
  });
195
194
  });
196
195
 
197
196
  describe("RouterModel capabilities detection", () => {
198
- it("should detect image capability when modalities.vision is true", async () => {
199
- // getStatus() calls /models first
197
+ it("should detect image capability from architecture.input_modalities", async () => {
200
198
  mockRpc.mockResolvedValueOnce({
201
199
  data: [
202
200
  {
@@ -207,21 +205,22 @@ describe("RouterModel capabilities detection", () => {
207
205
  preset: "default",
208
206
  failed: false,
209
207
  },
208
+ architecture: {
209
+ input_modalities: ["text", "image"],
210
+ output_modalities: ["text"],
211
+ },
210
212
  },
211
213
  ],
212
214
  });
213
- // super.getCapabilities() calls /props?model=<id>
214
- mockRpc.mockResolvedValueOnce({ modalities: { vision: true } });
215
215
 
216
216
  const model = new RouterModel(createModel());
217
217
  const capabilities = await model.getCapabilities();
218
218
 
219
- expect(capabilities).toEqual(["image"]);
220
- expect(mockRpc).toHaveBeenCalledWith("/props?model=test-model");
219
+ expect(capabilities).toEqual(["text", "image"]);
220
+ expect(mockRpc).toHaveBeenCalledWith("/models");
221
221
  });
222
222
 
223
- it("should detect text-only capability when modalities.vision is false", async () => {
224
- // getStatus() calls /models first
223
+ it("should detect text-only capability when only text in input_modalities", async () => {
225
224
  mockRpc.mockResolvedValueOnce({
226
225
  data: [
227
226
  {
@@ -232,11 +231,13 @@ describe("RouterModel capabilities detection", () => {
232
231
  preset: "default",
233
232
  failed: false,
234
233
  },
234
+ architecture: {
235
+ input_modalities: ["text"],
236
+ output_modalities: ["text"],
237
+ },
235
238
  },
236
239
  ],
237
240
  });
238
- // super.getCapabilities() calls /props?model=<id>
239
- mockRpc.mockResolvedValueOnce({ modalities: { vision: false } });
240
241
 
241
242
  const model = new RouterModel(createModel());
242
243
  const capabilities = await model.getCapabilities();
@@ -244,12 +245,11 @@ describe("RouterModel capabilities detection", () => {
244
245
  expect(capabilities).toEqual(["text"]);
245
246
  });
246
247
 
247
- it("should default to text when /props call fails", async () => {
248
- // getStatus() calls /models first
248
+ it("should return text when model not found in /models response", async () => {
249
249
  mockRpc.mockResolvedValueOnce({
250
250
  data: [
251
251
  {
252
- id: "test-model",
252
+ id: "other-model",
253
253
  status: {
254
254
  value: "loaded",
255
255
  args: [],
@@ -259,76 +259,12 @@ describe("RouterModel capabilities detection", () => {
259
259
  },
260
260
  ],
261
261
  });
262
- // super.getCapabilities() calls /props?model=<id> which fails
263
- mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
264
262
 
265
263
  const model = new RouterModel(createModel());
266
264
  const capabilities = await model.getCapabilities();
267
265
 
268
266
  expect(capabilities).toEqual(["text"]);
269
267
  });
270
-
271
- it("should use status.args to detect image capability when not loaded", async () => {
272
- // getStatus() calls /models first, returns unloaded
273
- mockRpc.mockResolvedValueOnce({
274
- data: [
275
- {
276
- id: "test-model",
277
- status: {
278
- value: "unloaded",
279
- args: ["--model", "gguf", "--mmproj", "mmproj.gguf"],
280
- preset: "default",
281
- failed: false,
282
- },
283
- },
284
- ],
285
- });
286
-
287
- const model = new RouterModel(
288
- createModel({
289
- status: {
290
- value: "unloaded",
291
- args: ["--model", "gguf", "--mmproj", "mmproj.gguf"],
292
- preset: "default",
293
- failed: false,
294
- },
295
- }),
296
- );
297
- const capabilities = await model.getCapabilities();
298
-
299
- expect(capabilities).toEqual(["image"]);
300
- });
301
-
302
- it("should return text when not loaded and no --mmproj in args", async () => {
303
- // getStatus() calls /models first, returns unloaded
304
- mockRpc.mockResolvedValueOnce({
305
- data: [
306
- {
307
- id: "test-model",
308
- status: {
309
- value: "unloaded",
310
- args: ["--model", "gguf"],
311
- preset: "default",
312
- failed: false,
313
- },
314
- },
315
- ],
316
- });
317
-
318
- const model = new RouterModel(
319
- createModel({
320
- status: {
321
- value: "unloaded",
322
- args: ["--model", "gguf"],
323
- preset: "default",
324
- failed: false,
325
- },
326
- }),
327
- );
328
- const capabilities = await model.getCapabilities();
329
-
330
- expect(capabilities).toEqual(["text"]);
331
- });
332
268
  });
333
269
 
334
270
  describe("RouterModel mode", () => {
@@ -1,5 +1,4 @@
1
1
  import { beforeEach, describe, expect, it, vi } from "vitest";
2
- import { DEFAULT_CTX } from "../src/constants";
3
2
  import { Mode } from "../src/enums/mode";
4
3
  import { Status } from "../src/enums/status";
5
4
  import { ModelProperty } from "../src/interfaces/endpoints/models";
@@ -34,27 +33,22 @@ describe("SingleModel mode", () => {
34
33
  });
35
34
 
36
35
  describe("SingleModel capabilities", () => {
37
- it("should detect image capability when modalities.vision is true", async () => {
38
- mockRpc.mockResolvedValueOnce({ modalities: { vision: true } });
39
-
40
- const model = createModel();
41
- const capabilities = await model.getCapabilities();
42
-
43
- expect(capabilities).toEqual(["image"]);
44
- expect(mockRpc).toHaveBeenCalledWith("/props?model=test");
45
- });
46
-
47
- it("should detect text-only capability when modalities.vision is false", async () => {
48
- mockRpc.mockResolvedValueOnce({ modalities: { vision: false } });
36
+ it("should detect image capability when multimodal is in capabilities", async () => {
37
+ mockRpc.mockResolvedValueOnce({
38
+ models: [{ id: "test", capabilities: ["multimodal"] }],
39
+ });
49
40
 
50
41
  const model = createModel();
51
42
  const capabilities = await model.getCapabilities();
52
43
 
53
- expect(capabilities).toEqual(["text"]);
44
+ expect(capabilities).toEqual(["text", "image"]);
45
+ expect(mockRpc).toHaveBeenCalledWith("/models");
54
46
  });
55
47
 
56
- it("should return text when /props call fails", async () => {
57
- mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
48
+ it("should detect text-only capability when multimodal is not in capabilities", async () => {
49
+ mockRpc.mockResolvedValueOnce({
50
+ models: [{ id: "test", capabilities: [] }],
51
+ });
58
52
 
59
53
  const model = createModel();
60
54
  const capabilities = await model.getCapabilities();
@@ -71,7 +65,7 @@ describe("SingleModel getStatus", () => {
71
65
  const status = await model.getStatus();
72
66
 
73
67
  expect(status).toBe(Status.LOADED);
74
- expect(mockRpc).toHaveBeenCalledWith(`/props?model=${model.id}`);
68
+ expect(mockRpc).toHaveBeenCalledWith(`/props?model=${model.id}&autoload=false`);
75
69
  });
76
70
 
77
71
  it("should return SLEEPING when is_sleeping is true", async () => {
@@ -96,22 +90,4 @@ describe("SingleModel getContextSize", () => {
96
90
  expect(ctxSize).toBe(8192);
97
91
  expect(mockRpc).toHaveBeenCalledWith("/models");
98
92
  });
99
-
100
- it("should return DEFAULT_CTX when model not found in /models", async () => {
101
- mockRpc.mockResolvedValueOnce({ data: [] });
102
-
103
- const model = createModel();
104
- const ctxSize = await model.getContextSize();
105
-
106
- expect(ctxSize).toBe(DEFAULT_CTX);
107
- });
108
-
109
- it("should return DEFAULT_CTX when /models fails", async () => {
110
- mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
111
-
112
- const model = createModel();
113
- const ctxSize = await model.getContextSize();
114
-
115
- expect(ctxSize).toBe(DEFAULT_CTX);
116
- });
117
93
  });
@@ -1,28 +0,0 @@
1
- import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
- import { PROVIDER_ID, PROVIDER_NAME } from "../constants";
3
- import type { BaseModel } from "../models/baseModel";
4
- import { resolveApiKey, resolveUrl } from "./resolver";
5
- import { listModels } from "./retriever";
6
-
7
- /**
8
- * Registers the Llama.cpp provider and returns the fetched models.
9
- *
10
- * @param pi The Pi extension API
11
- * @returns The list of models fetched from the server
12
- */
13
- export const registerLlamaCppProvider = async (
14
- pi: ExtensionAPI,
15
- ): Promise<BaseModel[]> => {
16
- const baseUrl = `${await resolveUrl(process.cwd())}/v1`;
17
- const models = await listModels();
18
-
19
- pi.registerProvider(PROVIDER_ID, {
20
- name: PROVIDER_NAME,
21
- baseUrl,
22
- api: "openai-completions",
23
- apiKey: await resolveApiKey(),
24
- models: await Promise.all(models.map((m) => m.toProviderConfig())),
25
- });
26
-
27
- return models;
28
- };