pi-llama-cpp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE.md ADDED
@@ -0,0 +1,9 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Gabriel Sanhueza (https://github.com/gsanhueza)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,109 @@
1
+ # pi-llama-cpp
2
+
3
+ A [Pi Coding Agent](https://pi.dev/) extension that integrates with a running [llama.cpp server](https://github.com/ggml-org/llama.cpp) to provide live model browsing, loading, and switching directly from Pi.
4
+
5
+ ## Features
6
+
7
+ - **Auto-detect models** — discovers all models available on your running llama.cpp server
8
+ - **Live status indicators** — see which models are loaded, loading, failed, or unloaded with color-coded icons
9
+ - **Load / unload / switch** — manage models directly from the Pi command palette
10
+ - **Multi-model router support** — works with both single-model and multi-model llama.cpp server configurations
11
+ - **Image model support** — detects multimodal models automatically
12
+ - **Flexible URL resolution** — configures the server URL via project config, environment variable, or global settings
13
+
14
+ ## Installation
15
+
16
+ This package is a Pi extension. Install it in your project:
17
+
18
+ ```bash
19
+ pi install https://github.com/gsanhueza/pi-llama-cpp
20
+ ```
21
+
22
+ ## Configuration
23
+
24
+ The extension resolves the llama.cpp server URL using the following priority order:
25
+
26
+ 1. **Per-project config** — `.pi/llama-server.json` in your project root:
27
+
28
+ ```json
29
+ {
30
+ "url": "http://127.0.0.1:8080"
31
+ }
32
+ ```
33
+
34
+ 2. **Environment variable** — `LLAMA_SERVER_URL`
35
+
36
+ 3. **Global settings** — `~/.pi/agent/settings.json`:
37
+
38
+ ```json
39
+ {
40
+ "llamaServerUrl": "http://127.0.0.1:8080"
41
+ }
42
+ ```
43
+
44
+ 4. **Default** — `http://127.0.0.1:8080`
45
+
46
+ ### API Key
47
+
48
+ If your llama.cpp server requires authentication, use `/login` in Pi, select the "API key" option, and choose the `llama-server` provider.
49
+
50
+ Alternatively, configure the API key in `~/.pi/agent/auth.json`:
51
+
52
+ ```json
53
+ {
54
+ "llama-server": {
55
+ "type": "bearer",
56
+ "key": "your-api-key-here"
57
+ }
58
+ }
59
+ ```
60
+
61
+ ## Usage
62
+
63
+ ### Prerequisites
64
+
65
+ Make sure your llama.cpp server is running with the appropriate flags. For multi-model support (model router), start the server with:
66
+
67
+ ```bash
68
+ llama-server --models-preset path/to/presets.ini
69
+ ```
70
+
71
+ (You can use both `--fit-ctx` and `--ctx-size` in the preset — the extension checks both.)
72
+
73
+ For single-model mode, a standard invocation works:
74
+
75
+ ```bash
76
+ llama-server --model path/to/model.gguf --ctx-size 128000 ...
77
+ ```
78
+
79
+ ### Commands
80
+
81
+ | Command | Description |
82
+ | --------- | ------------------------------------------------------------------------------------------ |
83
+ | `/models` | Browse llama-server models with live status. Select a model to load, switch, or unload it. |
84
+
85
+ ### Model Actions
86
+
87
+ When browsing models via the `/models` command, you can:
88
+
89
+ - **Load & switch** — Load an unloaded model and switch to it
90
+ - **Switch model** — Switch to a model that is already loaded
91
+ - **Unload** — Unload a loaded model to free memory
92
+
93
+ ### Model Selection Event
94
+
95
+ When Pi switches models (e.g., via `model_select`), the extension automatically loads the selected model on the llama.cpp server. This keeps the server in sync with the active model in Pi.
96
+
97
+ ### Model Configuration
98
+
99
+ Each model exposed to Pi includes the following defaults:
100
+
101
+ - **`maxTokens`** — `16384` (maximum tokens per response)
102
+ - **`reasoning`** — `true` (assumed, as llama.cpp's `/models` endpoint does not expose it)
103
+ - **`cost`** — all zero (local model)
104
+
105
+ ## Dependencies
106
+
107
+ | Dependency | Purpose |
108
+ | ------------------------------- | ------------------------------------- |
109
+ | `@mariozechner/pi-coding-agent` | Pi Coding Agent SDK (peer dependency) |
package/index.ts ADDED
@@ -0,0 +1,48 @@
1
+ import type {
2
+ ExtensionAPI,
3
+ ExtensionCommandContext,
4
+ } from "@mariozechner/pi-coding-agent";
5
+ import { modelsCommandHandler } from "./src/handlers";
6
+ import { isServerReady, listModels } from "./src/tools/retriever";
7
+ import { resolveApiKey, resolveUrl } from "./src/tools/resolver";
8
+ import { PROVIDER_NAME } from "./src/constants";
9
+ import { onModelSelect } from "./src/events";
10
+
11
+ export default async function (pi: ExtensionAPI) {
12
+ // Command registration
13
+ if (!(await isServerReady())) {
14
+ pi.registerCommand("models", {
15
+ description: `${PROVIDER_NAME} models (offline)`,
16
+ handler: async (
17
+ _: string,
18
+ ctx: ExtensionCommandContext,
19
+ ): Promise<void> => {
20
+ const url = await resolveUrl(ctx.cwd);
21
+ ctx.ui.notify(`${PROVIDER_NAME} unreachable at ${url}`, "error");
22
+ },
23
+ });
24
+
25
+ return;
26
+ }
27
+
28
+ const cwd = process.cwd();
29
+ const url = await resolveUrl(cwd);
30
+ const serverModels = await listModels();
31
+
32
+ pi.registerCommand("models", {
33
+ description: `Browse ${PROVIDER_NAME} models (live status)`,
34
+ handler: async (_: string, ctx: ExtensionCommandContext) =>
35
+ await modelsCommandHandler(ctx, pi, serverModels),
36
+ });
37
+
38
+ // Provider registration
39
+ pi.registerProvider(PROVIDER_NAME, {
40
+ baseUrl: `${url}/v1`,
41
+ api: "openai-completions",
42
+ apiKey: await resolveApiKey(),
43
+ models: await Promise.all(serverModels.map((m) => m.toProviderConfig())),
44
+ });
45
+
46
+ // Events registration
47
+ pi.on("model_select", onModelSelect);
48
+ }
package/package.json ADDED
@@ -0,0 +1,19 @@
1
+ {
2
+ "name": "pi-llama-cpp",
3
+ "version": "0.1.0",
4
+ "description": "Pi extension for llama.cpp integration. Supports both router and single modes",
5
+ "keywords": [
6
+ "pi-package",
7
+ "pi-extension",
8
+ "llama-cpp",
9
+ "llama.cpp"
10
+ ],
11
+ "peerDependencies": {
12
+ "@mariozechner/pi-coding-agent": "*"
13
+ },
14
+ "pi": {
15
+ "extensions": [
16
+ "./index"
17
+ ]
18
+ }
19
+ }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * This provider's name
3
+ */
4
+ export const PROVIDER_NAME = "llama-server";
5
+
6
+ /**
7
+ * The default URL if the resolver couldn't find it
8
+ */
9
+ export const DEFAULT_LLAMA_SERVER_URL = "http://127.0.0.1:8080";
10
+
11
+ /**
12
+ * The default context if the server didn't expose it
13
+ */
14
+ export const DEFAULT_CTX = 128000;
15
+
16
+ /**
17
+ * Maximum number of tokens a model can generate in a single response
18
+ */
19
+ export const MAX_TOKENS = 16384;
20
+
21
+ /**
22
+ * Polling interval (ms) for checking model load status
23
+ */
24
+ export const POLLING_INTERVAL = 500;
25
+
26
+ /**
27
+ * Maximum time (ms) to wait for model loading before giving up
28
+ */
29
+ export const POLLING_TIMEOUT = 60000;
@@ -0,0 +1,7 @@
1
+ /** The possible actions for the /models command */
2
+ export enum Actions {
3
+ SWITCH = "Switch model",
4
+ LOAD = "Load & switch",
5
+ UNLOAD = "Unload",
6
+ CANCEL = "Cancel",
7
+ }
@@ -0,0 +1,7 @@
1
+ /** The possible statuses of llama-server models */
2
+ export enum Status {
3
+ LOADED = "loaded",
4
+ LOADING = "loading",
5
+ FAILED = "failed",
6
+ UNLOADED = "unloaded",
7
+ }
package/src/events.ts ADDED
@@ -0,0 +1,23 @@
1
+ import { ExtensionContext } from "@mariozechner/pi-coding-agent";
2
+ import { PROVIDER_NAME } from "./constants";
3
+ import { listModels } from "./tools/retriever";
4
+ import { ModelSelectEvent } from "./interfaces/IModelSelectEvent";
5
+
6
+ /**
7
+ * Reacts to a new model event triggered by Pi
8
+ * @param event Model selection event
9
+ * @param ctx Pi context
10
+ */
11
+ export const onModelSelect = async (
12
+ event: ModelSelectEvent,
13
+ ctx: ExtensionContext,
14
+ ) => {
15
+ if (event.model.provider !== PROVIDER_NAME) return;
16
+
17
+ const models = await listModels();
18
+ const model = models.find((m) => m.id === event.model.id);
19
+ if (!model) return;
20
+
21
+ ctx.ui.notify(`>> Loading ${model.id}...`, "info");
22
+ await model.load();
23
+ };
@@ -0,0 +1,91 @@
1
+ import type {
2
+ ExtensionAPI,
3
+ ExtensionCommandContext,
4
+ } from "@mariozechner/pi-coding-agent";
5
+ import { Status } from "./enums/status";
6
+ import { BaseModel } from "./models/baseModel";
7
+ import { Actions } from "./enums/actions";
8
+ import { PROVIDER_NAME } from "./constants";
9
+
10
+ /**
11
+ * Defines a handler when llama-server is running
12
+ * @param ctx Pi context
13
+ * @returns The action and model, if detected
14
+ */
15
+ const modelSelectionHandler = async (
16
+ ctx: ExtensionCommandContext,
17
+ models: BaseModel[],
18
+ ): Promise<{ action: Actions; model: BaseModel } | null> => {
19
+ // Setup the labels
20
+ const labels = await Promise.all(models.map((m) => m.getLabel()));
21
+
22
+ // Detect the selected model
23
+ const choice = await ctx.ui.select(`${PROVIDER_NAME} models:`, labels);
24
+ if (!choice) return null;
25
+
26
+ const idx = labels.indexOf(choice);
27
+ const model = models[idx];
28
+
29
+ // Define the actions that the user can do
30
+ const allActions = {
31
+ [Status.LOADED]: [Actions.UNLOAD, Actions.CANCEL],
32
+ [Status.LOADING]: [Actions.CANCEL],
33
+ [Status.FAILED]: [Actions.SWITCH, Actions.CANCEL],
34
+ [Status.UNLOADED]: [Actions.SWITCH, Actions.CANCEL],
35
+ };
36
+
37
+ const status = await model.getStatus();
38
+ const actions = allActions[status];
39
+
40
+ const action = (await ctx.ui.select(`${model.id}`, actions)) as Actions;
41
+ if (!action || action === Actions.CANCEL) return null;
42
+
43
+ // Send the selected action with the corresponding model
44
+ return { action, model };
45
+ };
46
+
47
+ /**
48
+ * Handles the /models command
49
+ * @param ctx The context used by Pi
50
+ * @param pi The Pi extension
51
+ */
52
+ export const modelsCommandHandler = async (
53
+ ctx: ExtensionCommandContext,
54
+ pi: ExtensionAPI,
55
+ models: BaseModel[],
56
+ ): Promise<void> => {
57
+ const event = await modelSelectionHandler(ctx, models);
58
+ if (!event) return;
59
+
60
+ // Detect the model
61
+ const { action, model } = event;
62
+
63
+ // Execute the selected action
64
+ if (action === Actions.UNLOAD) {
65
+ await model.unload();
66
+ ctx.ui.notify(`Unloaded ${model.id}`, "info");
67
+ } else {
68
+ const status = await model.getStatus();
69
+ if (status === Status.LOADED) return;
70
+
71
+ ctx.ui.notify(`Loading ${model.id}...`, "info");
72
+
73
+ // Load the model without blocking the UI
74
+ const onSuccess = async () => {
75
+ const piModel = ctx.modelRegistry.find(PROVIDER_NAME, model.id);
76
+ if (!piModel) {
77
+ throw new Error(`Cannot find model ${model.id} in pi registry`);
78
+ }
79
+
80
+ await pi.setModel(piModel);
81
+ ctx.ui.notify(`Model ${model.id} ready`, "info");
82
+ };
83
+
84
+ const onFailure = (err: any) => {
85
+ const message = err instanceof Error ? err.message : String(err);
86
+ ctx.ui.notify(message, "error");
87
+ };
88
+
89
+ model.load().then(onSuccess).catch(onFailure);
90
+ }
91
+ };
@@ -0,0 +1,8 @@
1
+ import { PROVIDER_NAME } from "../constants";
2
+
3
+ export interface IAuthFile {
4
+ [PROVIDER_NAME]: {
5
+ type: string;
6
+ key: string;
7
+ };
8
+ }
@@ -0,0 +1,3 @@
1
+ export interface ModelSelectEvent {
2
+ model: { id: string; provider: string };
3
+ }
@@ -0,0 +1,9 @@
1
+ export interface IRouterModel {
2
+ id: string;
3
+ aliases?: string[];
4
+ tags: string[];
5
+ object: string;
6
+ owned_by: string;
7
+ created: number;
8
+ status: { value: string; args: string[] };
9
+ }
@@ -0,0 +1,20 @@
1
+ export interface ISingleModel {
2
+ name: string;
3
+ model: string;
4
+ modified_at: string;
5
+ size: string;
6
+ digest: string;
7
+ type: string;
8
+ description: string;
9
+ tags: string[];
10
+ capabilities: string[];
11
+ parameters: string;
12
+ details: {
13
+ parent_model: string;
14
+ format: string;
15
+ family: string;
16
+ families: string[];
17
+ parameter_size: string;
18
+ quantization_level: string;
19
+ };
20
+ }
@@ -0,0 +1,111 @@
1
+ import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
2
+ import { MAX_TOKENS, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
3
+ import { Status } from "../enums/status";
4
+ import { rpc } from "../tools/retriever";
5
+
6
+ export abstract class BaseModel {
7
+ protected readonly statusMapper: Record<string, Status> = {
8
+ loaded: Status.LOADED,
9
+ loading: Status.LOADING,
10
+ failed: Status.FAILED,
11
+ unloaded: Status.UNLOADED,
12
+ };
13
+
14
+ protected readonly labelIcons: Record<Status, string> = {
15
+ [Status.LOADED]: "🟢",
16
+ [Status.LOADING]: "🟡",
17
+ [Status.FAILED]: "🔴",
18
+ [Status.UNLOADED]: "⚪",
19
+ };
20
+
21
+ abstract get id(): string;
22
+
23
+ abstract get name(): string;
24
+
25
+ get reasoning(): boolean {
26
+ // We don't have a way to detect this, so we'll fallback to true
27
+ return true;
28
+ }
29
+
30
+ /**
31
+ * Detects if the model can load images
32
+ */
33
+ abstract get capabilities(): ["text"] | ["image"];
34
+
35
+ /**
36
+ * Gets the load status of the model
37
+ */
38
+ abstract getStatus(): Promise<Status>;
39
+
40
+ /**
41
+ * Gets the context size of a particular model
42
+ */
43
+ abstract getContextSize(): Promise<number>;
44
+
45
+ /**
46
+ * Returns the corresponding label of our load status
47
+ */
48
+ async getLabel(): Promise<string> {
49
+ const status = await this.getStatus();
50
+ return `${this.labelIcons[status]} ${this.name}`;
51
+ }
52
+
53
+ /**
54
+ * Converts the llama-server model into a configuration object used by Pi
55
+ * @returns A Pi configuration object
56
+ */
57
+ async toProviderConfig(): Promise<ProviderModelConfig> {
58
+ const response = {
59
+ id: this.id,
60
+ name: this.name,
61
+ reasoning: this.reasoning,
62
+ input: this.capabilities,
63
+ contextWindow: await this.getContextSize(),
64
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
65
+ maxTokens: MAX_TOKENS,
66
+ };
67
+
68
+ return response;
69
+ }
70
+
71
+ /**
72
+ * Loads the model in llama-server
73
+ */
74
+ async load(): Promise<void> {
75
+ if ((await this.getStatus()) === Status.LOADED) return;
76
+
77
+ await rpc("/models/load", { model: this.id });
78
+ await this.pollStatus();
79
+ }
80
+
81
+ /**
82
+ * Unloads the model from llama-server
83
+ */
84
+
85
+ async unload(): Promise<void> {
86
+ await rpc("/models/unload", { model: this.id });
87
+ }
88
+
89
+ /**
90
+ * Polls llama-server to check when the model is loaded
91
+ */
92
+ async pollStatus(): Promise<void> {
93
+ const startTime = Date.now();
94
+
95
+ // Check loading status
96
+ try {
97
+ while ((await this.getStatus()) === Status.LOADING) {
98
+ // Force a timeout if we wasted too much time polling
99
+ if (Date.now() - startTime > POLLING_TIMEOUT) {
100
+ const message = `Model loading timed out after ${POLLING_TIMEOUT} ms: ${this.id}`;
101
+ throw new Error(message);
102
+ }
103
+
104
+ await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
105
+ }
106
+ } catch (err) {
107
+ const message = err instanceof Error ? err.message : String(err);
108
+ throw new Error(message);
109
+ }
110
+ }
111
+ }
@@ -0,0 +1,65 @@
1
+ import { IRouterModel } from "../interfaces/IRouterModel";
2
+ import { DEFAULT_CTX } from "../constants";
3
+ import { rpc } from "../tools/retriever";
4
+ import { Status } from "../enums/status";
5
+ import { BaseModel } from "./baseModel";
6
+
7
+ export class RouterModel extends BaseModel {
8
+ constructor(private readonly model: IRouterModel) {
9
+ super();
10
+ }
11
+
12
+ get id(): string {
13
+ return this.model.id;
14
+ }
15
+
16
+ get name(): string {
17
+ return this.model.aliases?.[0] || this.model.id;
18
+ }
19
+
20
+ get capabilities(): ["text"] | ["image"] {
21
+ const hasImage = this.model.status.args?.includes("--mmproj") ?? false;
22
+ return hasImage ? ["image"] : ["text"];
23
+ }
24
+
25
+ async getStatus(): Promise<Status> {
26
+ const { data } = await rpc<{ data: IRouterModel[] }>("/models");
27
+ const model = data.find((m) => m.id === this.id);
28
+ if (!model) return Status.UNLOADED;
29
+
30
+ const response = this.statusMapper[model.status.value];
31
+ if (!response) return Status.UNLOADED;
32
+
33
+ return response;
34
+ }
35
+
36
+ async getContextSize(): Promise<number> {
37
+ let response = this.extractFrom("--ctx-size");
38
+ if (response) return response;
39
+
40
+ response = this.extractFrom("--fit-ctx");
41
+ if (response) return response;
42
+
43
+ return DEFAULT_CTX;
44
+ }
45
+
46
+ /**
47
+ * Extracts the value from a llama-server argument
48
+ * @param arg The argument
49
+ * @returns The value
50
+ */
51
+ private extractFrom(arg: string): number | null {
52
+ const args = this.model.status.args;
53
+ if (!args) return null;
54
+
55
+ const ctxIdx = args.indexOf(arg);
56
+
57
+ if (ctxIdx === -1) return null;
58
+ if (args.length <= ctxIdx + 1) return null;
59
+
60
+ const parsed = parseInt(args[ctxIdx + 1], 10);
61
+ if (!isNaN(parsed)) return parsed;
62
+
63
+ return null;
64
+ }
65
+ }
@@ -0,0 +1,36 @@
1
+ import { ISingleModel } from "../interfaces/ISingleModel";
2
+ import { DEFAULT_CTX } from "../constants";
3
+ import { rpc } from "../tools/retriever";
4
+ import { Status } from "../enums/status";
5
+ import { BaseModel } from "./baseModel";
6
+
7
+ export class SingleModel extends BaseModel {
8
+ constructor(private readonly model: ISingleModel) {
9
+ super();
10
+ }
11
+
12
+ get id(): string {
13
+ return this.model.name;
14
+ }
15
+
16
+ get name(): string {
17
+ return this.model.name;
18
+ }
19
+
20
+ get capabilities(): ["text"] | ["image"] {
21
+ const hasImage = this.model.capabilities.includes("multimodal");
22
+ return hasImage ? ["image"] : ["text"];
23
+ }
24
+
25
+ async getStatus(): Promise<Status> {
26
+ // In single-mode, the extension will only work when the model is fully loaded
27
+ return Status.LOADED;
28
+ }
29
+
30
+ async getContextSize(): Promise<number> {
31
+ const slots = await rpc<{ n_ctx: number }[]>("/slots");
32
+ const [{ n_ctx }] = slots;
33
+
34
+ return n_ctx ?? DEFAULT_CTX;
35
+ }
36
+ }
@@ -0,0 +1,138 @@
1
+ import { DEFAULT_LLAMA_SERVER_URL, PROVIDER_NAME } from "../constants";
2
+ import { access, readFile, constants } from "node:fs/promises";
3
+ import { join } from "node:path";
4
+ import { IAuthFile } from "../interfaces/IAuthFile";
5
+
6
+ // The URL is detected once, to reuse forever
7
+ let resolvedUrl: string | undefined;
8
+
9
+ /**
10
+ * Detects if a particular file is present
11
+ * @param filePath The path
12
+ * @returns True if exists
13
+ */
14
+ const fileExists = async (filePath: string): Promise<boolean> => {
15
+ try {
16
+ await access(filePath, constants.F_OK);
17
+ return true;
18
+ } catch (error) {
19
+ return false;
20
+ }
21
+ };
22
+
23
+ /**
24
+ * Reads the contents of a file as JSON
25
+ * @param filePath The path
26
+ * @returns The content as JSON
27
+ */
28
+ const readContents = async <T>(filePath: string): Promise<T | null> => {
29
+ const raw = await readFile(filePath, "utf-8");
30
+
31
+ try {
32
+ const contents = JSON.parse(raw);
33
+ return contents;
34
+ } catch (err) {
35
+ return null;
36
+ }
37
+ };
38
+
39
+ /**
40
+ * Reads a string value from a JSON config file
41
+ * @param filePath Path to the JSON config file
42
+ * @param key Key to extract from the parsed JSON
43
+ * @returns The string value, or null if file/key missing or invalid
44
+ */
45
+ const readConfigValue = async <T>(
46
+ filePath: string,
47
+ key: string,
48
+ ): Promise<string | null> => {
49
+ const cfg = await readContents<T>(filePath);
50
+ return (cfg as Record<string, any>)?.[key] || null;
51
+ };
52
+
53
+ /**
54
+ * Reads API key from Pi's auth file
55
+ * @returns The API key, as defined by the auth.json file
56
+ */
57
+ export const resolveApiKey = async (): Promise<string> => {
58
+ const placeholder = "sk-placeholder";
59
+
60
+ const authPath = join(process.env.HOME || ".", ".pi", "agent", "auth.json");
61
+ if (!(await fileExists(authPath))) return placeholder;
62
+
63
+ const response = await readConfigValue<IAuthFile>(authPath, PROVIDER_NAME);
64
+ return response ?? placeholder;
65
+ };
66
+
67
+ /**
68
+ * Resolves the llama-server url by searching for it in the global settings.json file
69
+ * @returns The URL, if found.
70
+ */
71
+ const resolveGlobalUrl = async (): Promise<string | null> => {
72
+ const globalPath = join(
73
+ process.env.HOME || ".",
74
+ ".pi",
75
+ "agent",
76
+ "settings.json",
77
+ );
78
+
79
+ if (!(await fileExists(globalPath))) return null;
80
+ return readConfigValue<Record<string, string>>(globalPath, "llamaServerUrl");
81
+ };
82
+
83
+ /**
84
+ * Resolves the llama-server url by searching for it in the project's .pi/llama-server.json file
85
+ * @param cwd The current working directory
86
+ * @returns The URL, if found.
87
+ */
88
+ const resolveProjectUrl = async (cwd: string): Promise<string | null> => {
89
+ const projectPath = join(cwd, ".pi", "llama-server.json");
90
+
91
+ if (!(await fileExists(projectPath))) return null;
92
+ return readConfigValue<Record<string, string>>(projectPath, "url");
93
+ };
94
+
95
+ /**
96
+ * Resolves the llama-server url by searching for it in the environment
97
+ * @returns The URL, if found.
98
+ */
99
+ const resolveEnvUrl = async (): Promise<string | null> => {
100
+ return process.env.LLAMA_SERVER_URL ?? null;
101
+ };
102
+
103
+ /**
104
+ * Tries all possible ways to retrieve the llama-server URL
105
+ * @param cwd The current working directory
106
+ * @returns The URL, or a default if not found
107
+ */
108
+ const resolveUrlWithFallbacks = async (cwd: string): Promise<string> => {
109
+ // 1. per-project config
110
+ let response = await resolveProjectUrl(cwd);
111
+ if (response) return response;
112
+
113
+ // 2. env
114
+ response = await resolveEnvUrl();
115
+ if (response) return response;
116
+
117
+ // 3. global settings: ~/.pi/agent/settings.json
118
+ response = await resolveGlobalUrl();
119
+ if (response) return response;
120
+
121
+ // 4. default
122
+ return DEFAULT_LLAMA_SERVER_URL;
123
+ };
124
+
125
+ /**
126
+ * Resolves the URL where llama-server is running
127
+ * @param cwd The current working directory
128
+ * @returns The URL, or a default if not found
129
+ */
130
+ export const resolveUrl = async (cwd: string): Promise<string> => {
131
+ if (resolvedUrl) return resolvedUrl;
132
+ const result = await resolveUrlWithFallbacks(cwd);
133
+
134
+ // Strip trailing slashes
135
+ resolvedUrl = result.replace(/\/+$/, "");
136
+
137
+ return resolvedUrl;
138
+ };
@@ -0,0 +1,76 @@
1
+ import { ISingleModel } from "../interfaces/ISingleModel";
2
+ import { IRouterModel } from "../interfaces/IRouterModel";
3
+ import { SingleModel } from "../models/singleModel";
4
+ import { RouterModel } from "../models/routerModel";
5
+ import { BaseModel } from "../models/baseModel";
6
+ import { resolveApiKey, resolveUrl } from "./resolver";
7
+
8
+ /**
9
+ * Detects if the server is ready
10
+ * @returns True if it's ready to work
11
+ */
12
+ export const isServerReady = async (): Promise<boolean> => {
13
+ try {
14
+ const { status } = await rpc<{ status: string }>("/health");
15
+ return status === "ok";
16
+ } catch {
17
+ return false;
18
+ }
19
+ };
20
+
21
+ /**
22
+ * Extracts the data of a fetch command
23
+ * @param endpoint The endpoint to fetch from
24
+ * @param body The body (optional)
25
+ * @returns Data from the fetch command
26
+ */
27
+ export const rpc = async <T>(
28
+ endpoint: string,
29
+ body?: Record<string, unknown>,
30
+ ) => {
31
+ const base = await resolveUrl(process.cwd());
32
+ const url = `${base}${endpoint}`;
33
+
34
+ const data = {
35
+ method: body ? "POST" : "GET",
36
+ headers: body ? { "Content-Type": "application/json" } : undefined,
37
+ body: body ? JSON.stringify(body) : undefined,
38
+ };
39
+
40
+ const apiKey = await resolveApiKey();
41
+ const res = await fetch(url, {
42
+ ...data,
43
+ headers: {
44
+ ...data.headers,
45
+ ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
46
+ },
47
+ });
48
+
49
+ if (!res.ok) {
50
+ const text = await res.text();
51
+ throw new Error(`${res.status}: ${text}`);
52
+ }
53
+ return res.json() as T;
54
+ };
55
+
56
+ /**
57
+ * Retrieves a list of available models from llama-server
58
+ * @param base Base URL to use
59
+ * @returns The list of models
60
+ */
61
+ export const listModels = async (): Promise<BaseModel[]> => {
62
+ const { models, data } = await rpc<{
63
+ models?: ISingleModel[];
64
+ data: IRouterModel[];
65
+ }>("/models");
66
+
67
+ if (models) {
68
+ return models.map((m) => new SingleModel(m));
69
+ }
70
+
71
+ const response = data
72
+ .map((m) => new RouterModel(m))
73
+ .sort((a, b) => (a.id > b.id ? 1 : a.id === b.id ? 0 : -1));
74
+
75
+ return response;
76
+ };