pi-llama-cpp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +9 -0
- package/README.md +109 -0
- package/index.ts +48 -0
- package/package.json +19 -0
- package/src/constants.ts +29 -0
- package/src/enums/actions.ts +7 -0
- package/src/enums/status.ts +7 -0
- package/src/events.ts +23 -0
- package/src/handlers.ts +91 -0
- package/src/interfaces/IAuthFile.ts +8 -0
- package/src/interfaces/IModelSelectEvent.ts +3 -0
- package/src/interfaces/IRouterModel.ts +9 -0
- package/src/interfaces/ISingleModel.ts +20 -0
- package/src/models/baseModel.ts +111 -0
- package/src/models/routerModel.ts +65 -0
- package/src/models/singleModel.ts +36 -0
- package/src/tools/resolver.ts +138 -0
- package/src/tools/retriever.ts +76 -0
package/LICENSE.md
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Gabriel Sanhueza (https://github.com/gsanhueza)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
6
|
+
|
|
7
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
8
|
+
|
|
9
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# pi-llama-cpp
|
|
2
|
+
|
|
3
|
+
A [Pi Coding Agent](https://pi.dev/) extension that integrates with a running [llama.cpp server](https://github.com/ggml-org/llama.cpp) to provide live model browsing, loading, and switching directly from Pi.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Auto-detect models** — discovers all models available on your running llama.cpp server
|
|
8
|
+
- **Live status indicators** — see which models are loaded, loading, failed, or unloaded with color-coded icons
|
|
9
|
+
- **Load / unload / switch** — manage models directly from the Pi command palette
|
|
10
|
+
- **Multi-model router support** — works with both single-model and multi-model llama.cpp server configurations
|
|
11
|
+
- **Image model support** — detects multimodal models automatically
|
|
12
|
+
- **Flexible URL resolution** — configures the server URL via project config, environment variable, or global settings
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
This package is a Pi extension. Install it in your project:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pi install https://github.com/gsanhueza/pi-llama-cpp
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Configuration
|
|
23
|
+
|
|
24
|
+
The extension resolves the llama.cpp server URL using the following priority order:
|
|
25
|
+
|
|
26
|
+
1. **Per-project config** — `.pi/llama-server.json` in your project root:
|
|
27
|
+
|
|
28
|
+
```json
|
|
29
|
+
{
|
|
30
|
+
"url": "http://127.0.0.1:8080"
|
|
31
|
+
}
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
2. **Environment variable** — `LLAMA_SERVER_URL`
|
|
35
|
+
|
|
36
|
+
3. **Global settings** — `~/.pi/agent/settings.json`:
|
|
37
|
+
|
|
38
|
+
```json
|
|
39
|
+
{
|
|
40
|
+
"llamaServerUrl": "http://127.0.0.1:8080"
|
|
41
|
+
}
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
4. **Default** — `http://127.0.0.1:8080`
|
|
45
|
+
|
|
46
|
+
### API Key
|
|
47
|
+
|
|
48
|
+
If your llama.cpp server requires authentication, use `/login` in Pi, select the "API key" option, and choose the `llama-server` provider.
|
|
49
|
+
|
|
50
|
+
Alternatively, configure the API key in `~/.pi/agent/auth.json`:
|
|
51
|
+
|
|
52
|
+
```json
|
|
53
|
+
{
|
|
54
|
+
"llama-server": {
|
|
55
|
+
"type": "bearer",
|
|
56
|
+
"key": "your-api-key-here"
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Usage
|
|
62
|
+
|
|
63
|
+
### Prerequisites
|
|
64
|
+
|
|
65
|
+
Make sure your llama.cpp server is running with the appropriate flags. For multi-model support (model router), start the server with:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
llama-server --models-preset path/to/presets.ini
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
(You can use both `--fit-ctx` and `--ctx-size` in the preset — the extension checks both.)
|
|
72
|
+
|
|
73
|
+
For single-model mode, a standard invocation works:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
llama-server --model path/to/model.gguf --ctx-size 128000 ...
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Commands
|
|
80
|
+
|
|
81
|
+
| Command | Description |
|
|
82
|
+
| --------- | ------------------------------------------------------------------------------------------ |
|
|
83
|
+
| `/models` | Browse llama-server models with live status. Select a model to load, switch, or unload it. |
|
|
84
|
+
|
|
85
|
+
### Model Actions
|
|
86
|
+
|
|
87
|
+
When browsing models via the `/models` command, you can:
|
|
88
|
+
|
|
89
|
+
- **Load & switch** — Load an unloaded model and switch to it
|
|
90
|
+
- **Switch model** — Switch to a model that is already loaded
|
|
91
|
+
- **Unload** — Unload a loaded model to free memory
|
|
92
|
+
|
|
93
|
+
### Model Selection Event
|
|
94
|
+
|
|
95
|
+
When Pi switches models (e.g., via `model_select`), the extension automatically loads the selected model on the llama.cpp server. This keeps the server in sync with the active model in Pi.
|
|
96
|
+
|
|
97
|
+
### Model Configuration
|
|
98
|
+
|
|
99
|
+
Each model exposed to Pi includes the following defaults:
|
|
100
|
+
|
|
101
|
+
- **`maxTokens`** — `16384` (maximum tokens per response)
|
|
102
|
+
- **`reasoning`** — `true` (assumed, as llama.cpp's `/models` endpoint does not expose it)
|
|
103
|
+
- **`cost`** — all zero (local model)
|
|
104
|
+
|
|
105
|
+
## Dependencies
|
|
106
|
+
|
|
107
|
+
| Dependency | Purpose |
|
|
108
|
+
| ------------------------------- | ------------------------------------- |
|
|
109
|
+
| `@mariozechner/pi-coding-agent` | Pi Coding Agent SDK (peer dependency) |
|
package/index.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
ExtensionAPI,
|
|
3
|
+
ExtensionCommandContext,
|
|
4
|
+
} from "@mariozechner/pi-coding-agent";
|
|
5
|
+
import { modelsCommandHandler } from "./src/handlers";
|
|
6
|
+
import { isServerReady, listModels } from "./src/tools/retriever";
|
|
7
|
+
import { resolveApiKey, resolveUrl } from "./src/tools/resolver";
|
|
8
|
+
import { PROVIDER_NAME } from "./src/constants";
|
|
9
|
+
import { onModelSelect } from "./src/events";
|
|
10
|
+
|
|
11
|
+
export default async function (pi: ExtensionAPI) {
|
|
12
|
+
// Command registration
|
|
13
|
+
if (!(await isServerReady())) {
|
|
14
|
+
pi.registerCommand("models", {
|
|
15
|
+
description: `${PROVIDER_NAME} models (offline)`,
|
|
16
|
+
handler: async (
|
|
17
|
+
_: string,
|
|
18
|
+
ctx: ExtensionCommandContext,
|
|
19
|
+
): Promise<void> => {
|
|
20
|
+
const url = await resolveUrl(ctx.cwd);
|
|
21
|
+
ctx.ui.notify(`${PROVIDER_NAME} unreachable at ${url}`, "error");
|
|
22
|
+
},
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const cwd = process.cwd();
|
|
29
|
+
const url = await resolveUrl(cwd);
|
|
30
|
+
const serverModels = await listModels();
|
|
31
|
+
|
|
32
|
+
pi.registerCommand("models", {
|
|
33
|
+
description: `Browse ${PROVIDER_NAME} models (live status)`,
|
|
34
|
+
handler: async (_: string, ctx: ExtensionCommandContext) =>
|
|
35
|
+
await modelsCommandHandler(ctx, pi, serverModels),
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
// Provider registration
|
|
39
|
+
pi.registerProvider(PROVIDER_NAME, {
|
|
40
|
+
baseUrl: `${url}/v1`,
|
|
41
|
+
api: "openai-completions",
|
|
42
|
+
apiKey: await resolveApiKey(),
|
|
43
|
+
models: await Promise.all(serverModels.map((m) => m.toProviderConfig())),
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
// Events registration
|
|
47
|
+
pi.on("model_select", onModelSelect);
|
|
48
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "pi-llama-cpp",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Pi extension for llama.cpp integration. Supports both router and single modes",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"pi-package",
|
|
7
|
+
"pi-extension",
|
|
8
|
+
"llama-cpp",
|
|
9
|
+
"llama.cpp"
|
|
10
|
+
],
|
|
11
|
+
"peerDependencies": {
|
|
12
|
+
"@mariozechner/pi-coding-agent": "*"
|
|
13
|
+
},
|
|
14
|
+
"pi": {
|
|
15
|
+
"extensions": [
|
|
16
|
+
"./index"
|
|
17
|
+
]
|
|
18
|
+
}
|
|
19
|
+
}
|
package/src/constants.ts
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* This provider's name
|
|
3
|
+
*/
|
|
4
|
+
export const PROVIDER_NAME = "llama-server";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* The default URL if the resolver couldn't find it
|
|
8
|
+
*/
|
|
9
|
+
export const DEFAULT_LLAMA_SERVER_URL = "http://127.0.0.1:8080";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* The default context if the server didn't expose it
|
|
13
|
+
*/
|
|
14
|
+
export const DEFAULT_CTX = 128000;
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Maximum number of tokens a model can generate in a single response
|
|
18
|
+
*/
|
|
19
|
+
export const MAX_TOKENS = 16384;
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Polling interval (ms) for checking model load status
|
|
23
|
+
*/
|
|
24
|
+
export const POLLING_INTERVAL = 500;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Maximum time (ms) to wait for model loading before giving up
|
|
28
|
+
*/
|
|
29
|
+
export const POLLING_TIMEOUT = 60000;
|
package/src/events.ts
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { ExtensionContext } from "@mariozechner/pi-coding-agent";
|
|
2
|
+
import { PROVIDER_NAME } from "./constants";
|
|
3
|
+
import { listModels } from "./tools/retriever";
|
|
4
|
+
import { ModelSelectEvent } from "./interfaces/IModelSelectEvent";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Reacts to a new model event triggered by Pi
|
|
8
|
+
* @param event Model selection event
|
|
9
|
+
* @param ctx Pi context
|
|
10
|
+
*/
|
|
11
|
+
export const onModelSelect = async (
|
|
12
|
+
event: ModelSelectEvent,
|
|
13
|
+
ctx: ExtensionContext,
|
|
14
|
+
) => {
|
|
15
|
+
if (event.model.provider !== PROVIDER_NAME) return;
|
|
16
|
+
|
|
17
|
+
const models = await listModels();
|
|
18
|
+
const model = models.find((m) => m.id === event.model.id);
|
|
19
|
+
if (!model) return;
|
|
20
|
+
|
|
21
|
+
ctx.ui.notify(`>> Loading ${model.id}...`, "info");
|
|
22
|
+
await model.load();
|
|
23
|
+
};
|
package/src/handlers.ts
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
ExtensionAPI,
|
|
3
|
+
ExtensionCommandContext,
|
|
4
|
+
} from "@mariozechner/pi-coding-agent";
|
|
5
|
+
import { Status } from "./enums/status";
|
|
6
|
+
import { BaseModel } from "./models/baseModel";
|
|
7
|
+
import { Actions } from "./enums/actions";
|
|
8
|
+
import { PROVIDER_NAME } from "./constants";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Defines a handler when llama-server is running
|
|
12
|
+
* @param ctx Pi context
|
|
13
|
+
* @returns The action and model, if detected
|
|
14
|
+
*/
|
|
15
|
+
const modelSelectionHandler = async (
|
|
16
|
+
ctx: ExtensionCommandContext,
|
|
17
|
+
models: BaseModel[],
|
|
18
|
+
): Promise<{ action: Actions; model: BaseModel } | null> => {
|
|
19
|
+
// Setup the labels
|
|
20
|
+
const labels = await Promise.all(models.map((m) => m.getLabel()));
|
|
21
|
+
|
|
22
|
+
// Detect the selected model
|
|
23
|
+
const choice = await ctx.ui.select(`${PROVIDER_NAME} models:`, labels);
|
|
24
|
+
if (!choice) return null;
|
|
25
|
+
|
|
26
|
+
const idx = labels.indexOf(choice);
|
|
27
|
+
const model = models[idx];
|
|
28
|
+
|
|
29
|
+
// Define the actions that the user can do
|
|
30
|
+
const allActions = {
|
|
31
|
+
[Status.LOADED]: [Actions.UNLOAD, Actions.CANCEL],
|
|
32
|
+
[Status.LOADING]: [Actions.CANCEL],
|
|
33
|
+
[Status.FAILED]: [Actions.SWITCH, Actions.CANCEL],
|
|
34
|
+
[Status.UNLOADED]: [Actions.SWITCH, Actions.CANCEL],
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
const status = await model.getStatus();
|
|
38
|
+
const actions = allActions[status];
|
|
39
|
+
|
|
40
|
+
const action = (await ctx.ui.select(`${model.id}`, actions)) as Actions;
|
|
41
|
+
if (!action || action === Actions.CANCEL) return null;
|
|
42
|
+
|
|
43
|
+
// Send the selected action with the corresponding model
|
|
44
|
+
return { action, model };
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Handles the /models command
|
|
49
|
+
* @param ctx The context used by Pi
|
|
50
|
+
* @param pi The Pi extension
|
|
51
|
+
*/
|
|
52
|
+
export const modelsCommandHandler = async (
|
|
53
|
+
ctx: ExtensionCommandContext,
|
|
54
|
+
pi: ExtensionAPI,
|
|
55
|
+
models: BaseModel[],
|
|
56
|
+
): Promise<void> => {
|
|
57
|
+
const event = await modelSelectionHandler(ctx, models);
|
|
58
|
+
if (!event) return;
|
|
59
|
+
|
|
60
|
+
// Detect the model
|
|
61
|
+
const { action, model } = event;
|
|
62
|
+
|
|
63
|
+
// Execute the selected action
|
|
64
|
+
if (action === Actions.UNLOAD) {
|
|
65
|
+
await model.unload();
|
|
66
|
+
ctx.ui.notify(`Unloaded ${model.id}`, "info");
|
|
67
|
+
} else {
|
|
68
|
+
const status = await model.getStatus();
|
|
69
|
+
if (status === Status.LOADED) return;
|
|
70
|
+
|
|
71
|
+
ctx.ui.notify(`Loading ${model.id}...`, "info");
|
|
72
|
+
|
|
73
|
+
// Load the model without blocking the UI
|
|
74
|
+
const onSuccess = async () => {
|
|
75
|
+
const piModel = ctx.modelRegistry.find(PROVIDER_NAME, model.id);
|
|
76
|
+
if (!piModel) {
|
|
77
|
+
throw new Error(`Cannot find model ${model.id} in pi registry`);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
await pi.setModel(piModel);
|
|
81
|
+
ctx.ui.notify(`Model ${model.id} ready`, "info");
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
const onFailure = (err: any) => {
|
|
85
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
86
|
+
ctx.ui.notify(message, "error");
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
model.load().then(onSuccess).catch(onFailure);
|
|
90
|
+
}
|
|
91
|
+
};
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export interface ISingleModel {
|
|
2
|
+
name: string;
|
|
3
|
+
model: string;
|
|
4
|
+
modified_at: string;
|
|
5
|
+
size: string;
|
|
6
|
+
digest: string;
|
|
7
|
+
type: string;
|
|
8
|
+
description: string;
|
|
9
|
+
tags: string[];
|
|
10
|
+
capabilities: string[];
|
|
11
|
+
parameters: string;
|
|
12
|
+
details: {
|
|
13
|
+
parent_model: string;
|
|
14
|
+
format: string;
|
|
15
|
+
family: string;
|
|
16
|
+
families: string[];
|
|
17
|
+
parameter_size: string;
|
|
18
|
+
quantization_level: string;
|
|
19
|
+
};
|
|
20
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
|
|
2
|
+
import { MAX_TOKENS, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
|
|
3
|
+
import { Status } from "../enums/status";
|
|
4
|
+
import { rpc } from "../tools/retriever";
|
|
5
|
+
|
|
6
|
+
export abstract class BaseModel {
|
|
7
|
+
protected readonly statusMapper: Record<string, Status> = {
|
|
8
|
+
loaded: Status.LOADED,
|
|
9
|
+
loading: Status.LOADING,
|
|
10
|
+
failed: Status.FAILED,
|
|
11
|
+
unloaded: Status.UNLOADED,
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
protected readonly labelIcons: Record<Status, string> = {
|
|
15
|
+
[Status.LOADED]: "🟢",
|
|
16
|
+
[Status.LOADING]: "🟡",
|
|
17
|
+
[Status.FAILED]: "🔴",
|
|
18
|
+
[Status.UNLOADED]: "⚪",
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
abstract get id(): string;
|
|
22
|
+
|
|
23
|
+
abstract get name(): string;
|
|
24
|
+
|
|
25
|
+
get reasoning(): boolean {
|
|
26
|
+
// We don't have a way to detect this, so we'll fallback to true
|
|
27
|
+
return true;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Detects if the model can load images
|
|
32
|
+
*/
|
|
33
|
+
abstract get capabilities(): ["text"] | ["image"];
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Gets the load status of the model
|
|
37
|
+
*/
|
|
38
|
+
abstract getStatus(): Promise<Status>;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Gets the context size of a particular model
|
|
42
|
+
*/
|
|
43
|
+
abstract getContextSize(): Promise<number>;
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Returns the corresponding label of our load status
|
|
47
|
+
*/
|
|
48
|
+
async getLabel(): Promise<string> {
|
|
49
|
+
const status = await this.getStatus();
|
|
50
|
+
return `${this.labelIcons[status]} ${this.name}`;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Converts the llama-server model into a configuration object used by Pi
|
|
55
|
+
* @returns A Pi configuration object
|
|
56
|
+
*/
|
|
57
|
+
async toProviderConfig(): Promise<ProviderModelConfig> {
|
|
58
|
+
const response = {
|
|
59
|
+
id: this.id,
|
|
60
|
+
name: this.name,
|
|
61
|
+
reasoning: this.reasoning,
|
|
62
|
+
input: this.capabilities,
|
|
63
|
+
contextWindow: await this.getContextSize(),
|
|
64
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
65
|
+
maxTokens: MAX_TOKENS,
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
return response;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Loads the model in llama-server
|
|
73
|
+
*/
|
|
74
|
+
async load(): Promise<void> {
|
|
75
|
+
if ((await this.getStatus()) === Status.LOADED) return;
|
|
76
|
+
|
|
77
|
+
await rpc("/models/load", { model: this.id });
|
|
78
|
+
await this.pollStatus();
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Unloads the model from llama-server
|
|
83
|
+
*/
|
|
84
|
+
|
|
85
|
+
async unload(): Promise<void> {
|
|
86
|
+
await rpc("/models/unload", { model: this.id });
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Polls llama-server to check when the model is loaded
|
|
91
|
+
*/
|
|
92
|
+
async pollStatus(): Promise<void> {
|
|
93
|
+
const startTime = Date.now();
|
|
94
|
+
|
|
95
|
+
// Check loading status
|
|
96
|
+
try {
|
|
97
|
+
while ((await this.getStatus()) === Status.LOADING) {
|
|
98
|
+
// Force a timeout if we wasted too much time polling
|
|
99
|
+
if (Date.now() - startTime > POLLING_TIMEOUT) {
|
|
100
|
+
const message = `Model loading timed out after ${POLLING_TIMEOUT} ms: ${this.id}`;
|
|
101
|
+
throw new Error(message);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
|
|
105
|
+
}
|
|
106
|
+
} catch (err) {
|
|
107
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
108
|
+
throw new Error(message);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { IRouterModel } from "../interfaces/IRouterModel";
|
|
2
|
+
import { DEFAULT_CTX } from "../constants";
|
|
3
|
+
import { rpc } from "../tools/retriever";
|
|
4
|
+
import { Status } from "../enums/status";
|
|
5
|
+
import { BaseModel } from "./baseModel";
|
|
6
|
+
|
|
7
|
+
export class RouterModel extends BaseModel {
|
|
8
|
+
constructor(private readonly model: IRouterModel) {
|
|
9
|
+
super();
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
get id(): string {
|
|
13
|
+
return this.model.id;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
get name(): string {
|
|
17
|
+
return this.model.aliases?.[0] || this.model.id;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
get capabilities(): ["text"] | ["image"] {
|
|
21
|
+
const hasImage = this.model.status.args?.includes("--mmproj") ?? false;
|
|
22
|
+
return hasImage ? ["image"] : ["text"];
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
async getStatus(): Promise<Status> {
|
|
26
|
+
const { data } = await rpc<{ data: IRouterModel[] }>("/models");
|
|
27
|
+
const model = data.find((m) => m.id === this.id);
|
|
28
|
+
if (!model) return Status.UNLOADED;
|
|
29
|
+
|
|
30
|
+
const response = this.statusMapper[model.status.value];
|
|
31
|
+
if (!response) return Status.UNLOADED;
|
|
32
|
+
|
|
33
|
+
return response;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async getContextSize(): Promise<number> {
|
|
37
|
+
let response = this.extractFrom("--ctx-size");
|
|
38
|
+
if (response) return response;
|
|
39
|
+
|
|
40
|
+
response = this.extractFrom("--fit-ctx");
|
|
41
|
+
if (response) return response;
|
|
42
|
+
|
|
43
|
+
return DEFAULT_CTX;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Extracts the value from a llama-server argument
|
|
48
|
+
* @param arg The argument
|
|
49
|
+
* @returns The value
|
|
50
|
+
*/
|
|
51
|
+
private extractFrom(arg: string): number | null {
|
|
52
|
+
const args = this.model.status.args;
|
|
53
|
+
if (!args) return null;
|
|
54
|
+
|
|
55
|
+
const ctxIdx = args.indexOf(arg);
|
|
56
|
+
|
|
57
|
+
if (ctxIdx === -1) return null;
|
|
58
|
+
if (args.length <= ctxIdx + 1) return null;
|
|
59
|
+
|
|
60
|
+
const parsed = parseInt(args[ctxIdx + 1], 10);
|
|
61
|
+
if (!isNaN(parsed)) return parsed;
|
|
62
|
+
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { ISingleModel } from "../interfaces/ISingleModel";
|
|
2
|
+
import { DEFAULT_CTX } from "../constants";
|
|
3
|
+
import { rpc } from "../tools/retriever";
|
|
4
|
+
import { Status } from "../enums/status";
|
|
5
|
+
import { BaseModel } from "./baseModel";
|
|
6
|
+
|
|
7
|
+
export class SingleModel extends BaseModel {
|
|
8
|
+
constructor(private readonly model: ISingleModel) {
|
|
9
|
+
super();
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
get id(): string {
|
|
13
|
+
return this.model.name;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
get name(): string {
|
|
17
|
+
return this.model.name;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
get capabilities(): ["text"] | ["image"] {
|
|
21
|
+
const hasImage = this.model.capabilities.includes("multimodal");
|
|
22
|
+
return hasImage ? ["image"] : ["text"];
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
async getStatus(): Promise<Status> {
|
|
26
|
+
// In single-mode, the extension will only work when the model is fully loaded
|
|
27
|
+
return Status.LOADED;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
async getContextSize(): Promise<number> {
|
|
31
|
+
const slots = await rpc<{ n_ctx: number }[]>("/slots");
|
|
32
|
+
const [{ n_ctx }] = slots;
|
|
33
|
+
|
|
34
|
+
return n_ctx ?? DEFAULT_CTX;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import { DEFAULT_LLAMA_SERVER_URL, PROVIDER_NAME } from "../constants";
|
|
2
|
+
import { access, readFile, constants } from "node:fs/promises";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { IAuthFile } from "../interfaces/IAuthFile";
|
|
5
|
+
|
|
6
|
+
// The URL is detected once, to reuse forever
|
|
7
|
+
let resolvedUrl: string | undefined;
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Detects if a particular file is present
|
|
11
|
+
* @param filePath The path
|
|
12
|
+
* @returns True if exists
|
|
13
|
+
*/
|
|
14
|
+
const fileExists = async (filePath: string): Promise<boolean> => {
|
|
15
|
+
try {
|
|
16
|
+
await access(filePath, constants.F_OK);
|
|
17
|
+
return true;
|
|
18
|
+
} catch (error) {
|
|
19
|
+
return false;
|
|
20
|
+
}
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Reads the contents of a file as JSON
|
|
25
|
+
* @param filePath The path
|
|
26
|
+
* @returns The content as JSON
|
|
27
|
+
*/
|
|
28
|
+
const readContents = async <T>(filePath: string): Promise<T | null> => {
|
|
29
|
+
const raw = await readFile(filePath, "utf-8");
|
|
30
|
+
|
|
31
|
+
try {
|
|
32
|
+
const contents = JSON.parse(raw);
|
|
33
|
+
return contents;
|
|
34
|
+
} catch (err) {
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Reads a string value from a JSON config file
|
|
41
|
+
* @param filePath Path to the JSON config file
|
|
42
|
+
* @param key Key to extract from the parsed JSON
|
|
43
|
+
* @returns The string value, or null if file/key missing or invalid
|
|
44
|
+
*/
|
|
45
|
+
const readConfigValue = async <T>(
|
|
46
|
+
filePath: string,
|
|
47
|
+
key: string,
|
|
48
|
+
): Promise<string | null> => {
|
|
49
|
+
const cfg = await readContents<T>(filePath);
|
|
50
|
+
return (cfg as Record<string, any>)?.[key] || null;
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Reads API key from Pi's auth file
|
|
55
|
+
* @returns The API key, as defined by the auth.json file
|
|
56
|
+
*/
|
|
57
|
+
export const resolveApiKey = async (): Promise<string> => {
|
|
58
|
+
const placeholder = "sk-placeholder";
|
|
59
|
+
|
|
60
|
+
const authPath = join(process.env.HOME || ".", ".pi", "agent", "auth.json");
|
|
61
|
+
if (!(await fileExists(authPath))) return placeholder;
|
|
62
|
+
|
|
63
|
+
const response = await readConfigValue<IAuthFile>(authPath, PROVIDER_NAME);
|
|
64
|
+
return response ?? placeholder;
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Resolves the llama-server url by searching for it in the global settings.json file
|
|
69
|
+
* @returns The URL, if found.
|
|
70
|
+
*/
|
|
71
|
+
const resolveGlobalUrl = async (): Promise<string | null> => {
|
|
72
|
+
const globalPath = join(
|
|
73
|
+
process.env.HOME || ".",
|
|
74
|
+
".pi",
|
|
75
|
+
"agent",
|
|
76
|
+
"settings.json",
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
if (!(await fileExists(globalPath))) return null;
|
|
80
|
+
return readConfigValue<Record<string, string>>(globalPath, "llamaServerUrl");
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Resolves the llama-server url by searching for it in the project's .pi/llama-server.json file
|
|
85
|
+
* @param cwd The current working directory
|
|
86
|
+
* @returns The URL, if found.
|
|
87
|
+
*/
|
|
88
|
+
const resolveProjectUrl = async (cwd: string): Promise<string | null> => {
|
|
89
|
+
const projectPath = join(cwd, ".pi", "llama-server.json");
|
|
90
|
+
|
|
91
|
+
if (!(await fileExists(projectPath))) return null;
|
|
92
|
+
return readConfigValue<Record<string, string>>(projectPath, "url");
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Resolves the llama-server url by searching for it in the environment
|
|
97
|
+
* @returns The URL, if found.
|
|
98
|
+
*/
|
|
99
|
+
const resolveEnvUrl = async (): Promise<string | null> => {
|
|
100
|
+
return process.env.LLAMA_SERVER_URL ?? null;
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Tries all possible ways to retrieve the llama-server URL
|
|
105
|
+
* @param cwd The current working directory
|
|
106
|
+
* @returns The URL, or a default if not found
|
|
107
|
+
*/
|
|
108
|
+
const resolveUrlWithFallbacks = async (cwd: string): Promise<string> => {
|
|
109
|
+
// 1. per-project config
|
|
110
|
+
let response = await resolveProjectUrl(cwd);
|
|
111
|
+
if (response) return response;
|
|
112
|
+
|
|
113
|
+
// 2. env
|
|
114
|
+
response = await resolveEnvUrl();
|
|
115
|
+
if (response) return response;
|
|
116
|
+
|
|
117
|
+
// 3. global settings: ~/.pi/agent/settings.json
|
|
118
|
+
response = await resolveGlobalUrl();
|
|
119
|
+
if (response) return response;
|
|
120
|
+
|
|
121
|
+
// 4. default
|
|
122
|
+
return DEFAULT_LLAMA_SERVER_URL;
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Resolves the URL where llama-server is running
|
|
127
|
+
* @param cwd The current working directory
|
|
128
|
+
* @returns The URL, or a default if not found
|
|
129
|
+
*/
|
|
130
|
+
export const resolveUrl = async (cwd: string): Promise<string> => {
|
|
131
|
+
if (resolvedUrl) return resolvedUrl;
|
|
132
|
+
const result = await resolveUrlWithFallbacks(cwd);
|
|
133
|
+
|
|
134
|
+
// Strip trailing slashes
|
|
135
|
+
resolvedUrl = result.replace(/\/+$/, "");
|
|
136
|
+
|
|
137
|
+
return resolvedUrl;
|
|
138
|
+
};
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { ISingleModel } from "../interfaces/ISingleModel";
|
|
2
|
+
import { IRouterModel } from "../interfaces/IRouterModel";
|
|
3
|
+
import { SingleModel } from "../models/singleModel";
|
|
4
|
+
import { RouterModel } from "../models/routerModel";
|
|
5
|
+
import { BaseModel } from "../models/baseModel";
|
|
6
|
+
import { resolveApiKey, resolveUrl } from "./resolver";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Detects if the server is ready
|
|
10
|
+
* @returns True if it's ready to work
|
|
11
|
+
*/
|
|
12
|
+
export const isServerReady = async (): Promise<boolean> => {
|
|
13
|
+
try {
|
|
14
|
+
const { status } = await rpc<{ status: string }>("/health");
|
|
15
|
+
return status === "ok";
|
|
16
|
+
} catch {
|
|
17
|
+
return false;
|
|
18
|
+
}
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Extracts the data of a fetch command
|
|
23
|
+
* @param endpoint The endpoint to fetch from
|
|
24
|
+
* @param body The body (optional)
|
|
25
|
+
* @returns Data from the fetch command
|
|
26
|
+
*/
|
|
27
|
+
export const rpc = async <T>(
|
|
28
|
+
endpoint: string,
|
|
29
|
+
body?: Record<string, unknown>,
|
|
30
|
+
) => {
|
|
31
|
+
const base = await resolveUrl(process.cwd());
|
|
32
|
+
const url = `${base}${endpoint}`;
|
|
33
|
+
|
|
34
|
+
const data = {
|
|
35
|
+
method: body ? "POST" : "GET",
|
|
36
|
+
headers: body ? { "Content-Type": "application/json" } : undefined,
|
|
37
|
+
body: body ? JSON.stringify(body) : undefined,
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
const apiKey = await resolveApiKey();
|
|
41
|
+
const res = await fetch(url, {
|
|
42
|
+
...data,
|
|
43
|
+
headers: {
|
|
44
|
+
...data.headers,
|
|
45
|
+
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
|
|
46
|
+
},
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
if (!res.ok) {
|
|
50
|
+
const text = await res.text();
|
|
51
|
+
throw new Error(`${res.status}: ${text}`);
|
|
52
|
+
}
|
|
53
|
+
return res.json() as T;
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Retrieves a list of available models from llama-server
|
|
58
|
+
* @param base Base URL to use
|
|
59
|
+
* @returns The list of models
|
|
60
|
+
*/
|
|
61
|
+
export const listModels = async (): Promise<BaseModel[]> => {
|
|
62
|
+
const { models, data } = await rpc<{
|
|
63
|
+
models?: ISingleModel[];
|
|
64
|
+
data: IRouterModel[];
|
|
65
|
+
}>("/models");
|
|
66
|
+
|
|
67
|
+
if (models) {
|
|
68
|
+
return models.map((m) => new SingleModel(m));
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const response = data
|
|
72
|
+
.map((m) => new RouterModel(m))
|
|
73
|
+
.sort((a, b) => (a.id > b.id ? 1 : a.id === b.id ? 0 : -1));
|
|
74
|
+
|
|
75
|
+
return response;
|
|
76
|
+
};
|