pi-llama-cpp 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -20
- package/package.json +1 -1
- package/src/constants.ts +1 -1
- package/src/events.ts +3 -3
- package/src/handlers.ts +70 -18
- package/src/interfaces/auth.ts +10 -0
- package/src/interfaces/endpoints/health.ts +6 -0
- package/src/interfaces/endpoints/models.ts +60 -0
- package/src/interfaces/endpoints/props.ts +29 -0
- package/src/interfaces/endpoints/slots.ts +15 -0
- package/src/models/baseModel.ts +10 -2
- package/src/models/routerModel.ts +8 -16
- package/src/models/singleModel.ts +24 -15
- package/src/tools/resolver.ts +2 -2
- package/src/tools/retriever.ts +6 -8
- package/src/interfaces/IAuthFile.ts +0 -10
- package/src/interfaces/IRouterModel.ts +0 -17
- package/src/interfaces/ISingleModel.ts +0 -20
- /package/src/interfaces/{IModelSelectEvent.ts → events.ts} +0 -0
package/README.md
CHANGED
|
@@ -6,20 +6,25 @@ A [Pi Coding Agent](https://pi.dev/) extension that integrates with a running [l
|
|
|
6
6
|
|
|
7
7
|
- **Auto-detect models** — discovers all models available on your running llama.cpp server
|
|
8
8
|
- **Live status indicators** — see which models are loaded, loading, failed, sleeping, or unloaded with color-coded icons
|
|
9
|
-
|
|
10
|
-
| Icon | Status | Description |
|
|
11
|
-
|------|--------|-------------|
|
|
12
|
-
| 🟢 | Loaded | Model is active and ready to use |
|
|
13
|
-
| 🟡 | Loading | Model is currently being loaded |
|
|
14
|
-
| 🔴 | Failed | Model failed to load |
|
|
15
|
-
| 🔵 | Sleeping | Model is loaded but inactive (router mode) |
|
|
16
|
-
| ⚪ | Unloaded | Model is not loaded on the server |
|
|
17
|
-
|
|
18
9
|
- **Load / unload / switch** — manage models directly from the Pi command palette
|
|
19
10
|
- **Multi-model router support** — works with both single-model and multi-model llama.cpp server configurations
|
|
20
|
-
- **Image
|
|
11
|
+
- **Image capabilities detection** — detects multimodal models automatically
|
|
21
12
|
- **Flexible URL resolution** — configures the server URL via project config, environment variable, or global settings
|
|
22
13
|
|
|
14
|
+
### Status Indicators
|
|
15
|
+
|
|
16
|
+
| Icon | Status | Description |
|
|
17
|
+
|------|--------|-------------|
|
|
18
|
+
| 🟢 | Loaded | Model is active and ready to use |
|
|
19
|
+
| 🟡 | Loading | Model is currently being loaded |
|
|
20
|
+
| 🔴 | Failed | Model failed to load |
|
|
21
|
+
| 🔵 | Sleeping | Model is available, but inactive |
|
|
22
|
+
| ⚪ | Unloaded | Model is not loaded on the server |
|
|
23
|
+
|
|
24
|
+
> **Note**: The `Sleeping` status only shows when you start your server with `llama-server --sleep-idle-seconds <n> ...`.
|
|
25
|
+
This is a **llama.cpp server flag** that tells the server to put idle models to sleep after `n` seconds.
|
|
26
|
+
The model awakens automatically when you send a message.
|
|
27
|
+
|
|
23
28
|
## Installation
|
|
24
29
|
|
|
25
30
|
This package is a Pi extension. Install it with
|
|
@@ -60,9 +65,11 @@ The extension resolves the llama.cpp server URL using the following priority ord
|
|
|
60
65
|
|
|
61
66
|
### API Key
|
|
62
67
|
|
|
63
|
-
If your llama.cpp server requires authentication, use `/login` in Pi, select the "API key" option, and choose the `Llama.cpp` provider.
|
|
68
|
+
If your llama.cpp server requires authentication, use `/login` in Pi, select the "API key" option, and choose the `Llama.cpp` provider from the list.
|
|
69
|
+
|
|
70
|
+
Alternatively, configure the API key in `~/.pi/agent/auth.json` using the provider ID `llama-server`:
|
|
64
71
|
|
|
65
|
-
|
|
72
|
+
> **Note**: The provider is displayed as **Llama.cpp** in the Pi UI, but its internal identifier is `llama-server` — use this ID when configuring `auth.json` or other programmatic access.
|
|
66
73
|
|
|
67
74
|
```json
|
|
68
75
|
{
|
|
@@ -77,26 +84,33 @@ Alternatively, configure the API key in `~/.pi/agent/auth.json`:
|
|
|
77
84
|
|
|
78
85
|
### Prerequisites
|
|
79
86
|
|
|
80
|
-
Make sure your llama.cpp server is running with the appropriate flags.
|
|
87
|
+
Make sure your llama.cpp server is running with the appropriate flags.
|
|
88
|
+
|
|
89
|
+
- For multi-model support (model router), start the server with:
|
|
81
90
|
|
|
82
91
|
```bash
|
|
83
|
-
llama-server --models-preset path/to/presets.ini
|
|
92
|
+
llama-server --models-preset path/to/presets.ini ...
|
|
84
93
|
```
|
|
85
94
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
For single-model mode, a standard invocation works:
|
|
95
|
+
- For single-model mode, start the server with:
|
|
89
96
|
|
|
90
97
|
```bash
|
|
91
|
-
llama-server --model path/to/model.gguf
|
|
98
|
+
llama-server --model path/to/model.gguf ...
|
|
92
99
|
```
|
|
93
100
|
|
|
101
|
+
The extension determines the context size as follows:
|
|
102
|
+
- **Router mode** — reads from the preset file's `ctx-size` and/or `fit-ctx` keys
|
|
103
|
+
- **Single mode** — reads from the `/slots` endpoint (stores it in cache afterwards)
|
|
104
|
+
- Falls back to `128000` if not available
|
|
105
|
+
|
|
94
106
|
### Commands
|
|
95
107
|
|
|
96
108
|
| Command | Description |
|
|
97
109
|
| --------- | ------------------------------------------------------------------------------------------ |
|
|
98
110
|
| `/models` | Browse your models with live status. Select a model to load, switch, or unload it. |
|
|
99
111
|
|
|
112
|
+
> **Note:** When the llama.cpp server is unreachable, `/models` is still available but shows the description `Llama.cpp models (offline)` and displays an error notification with the configured server URL.
|
|
113
|
+
|
|
100
114
|
### Model Actions
|
|
101
115
|
|
|
102
116
|
When browsing models via the `/models` command, you can:
|
|
@@ -108,15 +122,24 @@ When browsing models via the `/models` command, you can:
|
|
|
108
122
|
- **Info** — View model details (ID, capabilities, context size)
|
|
109
123
|
- **Cancel** — Cancel the current operation
|
|
110
124
|
|
|
125
|
+
> **Note:** In single-model mode, only **Info** and **Cancel** are available, since there is only one model loaded on the server.
|
|
126
|
+
|
|
111
127
|
### Model Selection Event
|
|
112
128
|
|
|
113
|
-
When
|
|
129
|
+
When you switch models via Pi's model picker (instead of using the `/models` command), the extension listens for the `model_select` event, which also loads the requested model before the conversation begins.
|
|
130
|
+
|
|
131
|
+
This keeps the server in sync with the active model in Pi, regardless of how the switch was initiated — you don't need to manually load models before using them.
|
|
132
|
+
|
|
133
|
+
### Loading Models
|
|
134
|
+
|
|
135
|
+
When you trigger a load, switch, or retry action, the extension polls the server to track progress. If a model takes longer than **60 seconds** to load, the polling times out with an error.
|
|
136
|
+
> **Note:** The timeout is only for the polling. The model might still be loading.
|
|
114
137
|
|
|
115
138
|
### Model Configuration
|
|
116
139
|
|
|
117
140
|
Each model exposed to Pi includes the following defaults:
|
|
118
141
|
|
|
119
|
-
- **`maxTokens`** — `
|
|
142
|
+
- **`maxTokens`** — `32000` (maximum possible tokens per response according to Pi's source code)
|
|
120
143
|
- **`reasoning`** — `true` (assumed, as llama.cpp's `/models` endpoint does not expose it)
|
|
121
144
|
- **`cost`** — all zero (local model)
|
|
122
145
|
|
package/package.json
CHANGED
package/src/constants.ts
CHANGED
package/src/events.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { ExtensionContext } from "@mariozechner/pi-coding-agent";
|
|
2
|
-
import {
|
|
3
|
-
import { ModelSelectEvent } from "./interfaces/
|
|
2
|
+
import { PROVIDER_ID } from "./constants";
|
|
3
|
+
import { ModelSelectEvent } from "./interfaces/events";
|
|
4
4
|
import { listModels } from "./tools/retriever";
|
|
5
5
|
|
|
6
6
|
/**
|
|
@@ -12,7 +12,7 @@ export const onModelSelect = async (
|
|
|
12
12
|
event: ModelSelectEvent,
|
|
13
13
|
ctx: ExtensionContext,
|
|
14
14
|
) => {
|
|
15
|
-
if (event.model.provider !==
|
|
15
|
+
if (event.model.provider !== PROVIDER_ID) return;
|
|
16
16
|
|
|
17
17
|
const models = await listModels();
|
|
18
18
|
const model = models.find((m) => m.id === event.model.id);
|
package/src/handlers.ts
CHANGED
|
@@ -9,57 +9,109 @@ import { Status } from "./enums/status";
|
|
|
9
9
|
import { BaseModel } from "./models/baseModel";
|
|
10
10
|
|
|
11
11
|
/**
|
|
12
|
-
*
|
|
12
|
+
* Select a model from the list. Returns null if user cancels.
|
|
13
|
+
*
|
|
13
14
|
* @param ctx Pi context
|
|
14
|
-
* @
|
|
15
|
+
* @param models A list of models
|
|
16
|
+
* @returns The selected model
|
|
15
17
|
*/
|
|
16
|
-
const
|
|
18
|
+
const selectModel = async (
|
|
17
19
|
ctx: ExtensionCommandContext,
|
|
18
20
|
models: BaseModel[],
|
|
19
|
-
): Promise<
|
|
20
|
-
// Setup the labels
|
|
21
|
+
): Promise<BaseModel | null> => {
|
|
21
22
|
const labels = await Promise.all(models.map((m) => m.getLabel()));
|
|
22
|
-
|
|
23
|
-
// Detect the selected model
|
|
24
23
|
const choice = await ctx.ui.select(`${PROVIDER_NAME} models:`, labels);
|
|
25
24
|
if (!choice) return null;
|
|
26
|
-
|
|
27
25
|
const idx = labels.indexOf(choice);
|
|
28
|
-
|
|
26
|
+
return models[idx];
|
|
27
|
+
};
|
|
29
28
|
|
|
30
|
-
|
|
29
|
+
/**
|
|
30
|
+
* Get available actions for a model based on its mode and status.
|
|
31
|
+
*
|
|
32
|
+
* @param model The selected model
|
|
33
|
+
* @returns
|
|
34
|
+
*/
|
|
35
|
+
const getActionsForModel = async (model: BaseModel): Promise<Array<Action>> => {
|
|
31
36
|
const routerModeActions: Record<Status, Array<Action>> = {
|
|
32
37
|
[Status.LOADED]: [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL],
|
|
33
|
-
[Status.LOADING]: [Action.CANCEL],
|
|
38
|
+
[Status.LOADING]: [Action.INFO, Action.CANCEL],
|
|
34
39
|
[Status.FAILED]: [Action.RETRY, Action.CANCEL],
|
|
35
40
|
[Status.SLEEPING]: [Action.UNLOAD, Action.INFO, Action.CANCEL],
|
|
36
41
|
[Status.UNLOADED]: [Action.LOAD, Action.CANCEL],
|
|
37
42
|
};
|
|
38
43
|
|
|
39
|
-
// Single mode actions (more limited)
|
|
40
44
|
const singleModeActions: Record<Status, Array<Action>> = {
|
|
41
45
|
[Status.LOADED]: [Action.INFO, Action.CANCEL],
|
|
42
46
|
[Status.LOADING]: [Action.CANCEL],
|
|
43
47
|
[Status.FAILED]: [Action.CANCEL],
|
|
44
|
-
[Status.SLEEPING]: [Action.CANCEL],
|
|
48
|
+
[Status.SLEEPING]: [Action.INFO, Action.CANCEL],
|
|
45
49
|
[Status.UNLOADED]: [Action.CANCEL],
|
|
46
50
|
};
|
|
47
51
|
|
|
48
|
-
// Define the actions that the user can do
|
|
49
52
|
const allActions =
|
|
50
53
|
model.mode === Mode.ROUTER ? routerModeActions : singleModeActions;
|
|
51
54
|
|
|
52
55
|
const status = await model.getStatus();
|
|
53
|
-
|
|
56
|
+
return allActions[status];
|
|
57
|
+
};
|
|
54
58
|
|
|
55
|
-
|
|
59
|
+
/**
|
|
60
|
+
* Selects an action for a model.
|
|
61
|
+
*
|
|
62
|
+
* @param ctx Pi context
|
|
63
|
+
* @param model The selected model
|
|
64
|
+
* @param actions Possible actions to execute
|
|
65
|
+
* @returns The action, or null if user cancels
|
|
66
|
+
*/
|
|
67
|
+
const selectAction = async (
|
|
68
|
+
ctx: ExtensionCommandContext,
|
|
69
|
+
model: BaseModel,
|
|
70
|
+
actions: Array<Action>,
|
|
71
|
+
): Promise<Action | null> => {
|
|
72
|
+
const labels = actions.map((a) => String(a));
|
|
73
|
+
const choice = await ctx.ui.select(`${model.name}`, labels);
|
|
74
|
+
if (!choice) return null;
|
|
75
|
+
|
|
76
|
+
const idx = labels.indexOf(choice);
|
|
77
|
+
return actions[idx];
|
|
78
|
+
};
|
|
56
79
|
|
|
57
|
-
|
|
58
|
-
|
|
80
|
+
/**
|
|
81
|
+
* Handles the menu for model selection
|
|
82
|
+
* Loops: select model → select action → handle action.
|
|
83
|
+
*
|
|
84
|
+
* Escape on actions menu goes back to model selection.
|
|
85
|
+
* Escape on model selection exits.
|
|
86
|
+
*
|
|
87
|
+
* @param ctx Pi context
|
|
88
|
+
* @returns The action and model, if detected
|
|
89
|
+
*/
|
|
90
|
+
const modelSelectionHandler = async (
|
|
91
|
+
ctx: ExtensionCommandContext,
|
|
92
|
+
models: BaseModel[],
|
|
93
|
+
): Promise<{ action: Action; model: BaseModel } | null> => {
|
|
94
|
+
while (true) {
|
|
95
|
+
// Select the model
|
|
96
|
+
const model = await selectModel(ctx, models);
|
|
97
|
+
if (!model) return null;
|
|
98
|
+
|
|
99
|
+
// Select the action
|
|
100
|
+
const actions = await getActionsForModel(model);
|
|
101
|
+
const action = await selectAction(ctx, model, actions);
|
|
102
|
+
if (action === null) {
|
|
103
|
+
// Escape key pressed => back to model selection
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Return the selected action and model
|
|
108
|
+
return { action, model };
|
|
109
|
+
}
|
|
59
110
|
};
|
|
60
111
|
|
|
61
112
|
/**
|
|
62
113
|
* Handles the /models command
|
|
114
|
+
*
|
|
63
115
|
* @param ctx The context used by Pi
|
|
64
116
|
* @param pi The Pi extension
|
|
65
117
|
*/
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The structure of llama-server's /models endpoint
|
|
3
|
+
*
|
|
4
|
+
* In single mode, the `models` property is not returned
|
|
5
|
+
* In router mode, everything is used
|
|
6
|
+
*/
|
|
7
|
+
export interface ModelsEndpoint {
|
|
8
|
+
models?: ModelProperty[];
|
|
9
|
+
object: string;
|
|
10
|
+
data: DataProperty[];
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface ModelProperty {
|
|
14
|
+
name: string;
|
|
15
|
+
model: string;
|
|
16
|
+
modified_at: string;
|
|
17
|
+
size: string;
|
|
18
|
+
digest: string;
|
|
19
|
+
type: string;
|
|
20
|
+
description: string;
|
|
21
|
+
tags: string[];
|
|
22
|
+
capabilities: string[];
|
|
23
|
+
parameters: string;
|
|
24
|
+
details: {
|
|
25
|
+
parent_model: string;
|
|
26
|
+
format: string;
|
|
27
|
+
family: string;
|
|
28
|
+
families: string[];
|
|
29
|
+
parameter_size: string;
|
|
30
|
+
quantization_level: string;
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface DataProperty {
|
|
35
|
+
id: string;
|
|
36
|
+
aliases?: string[];
|
|
37
|
+
tags: string[];
|
|
38
|
+
object: string;
|
|
39
|
+
owned_by: string;
|
|
40
|
+
created: number;
|
|
41
|
+
status?: StatusProperty;
|
|
42
|
+
meta?: MetaProperty;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
interface StatusProperty {
|
|
46
|
+
value: string;
|
|
47
|
+
args: string[];
|
|
48
|
+
preset: string;
|
|
49
|
+
exit_code?: number;
|
|
50
|
+
failed?: boolean;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
interface MetaProperty {
|
|
54
|
+
vocab_type: number;
|
|
55
|
+
n_vocab: number;
|
|
56
|
+
n_ctx_train: number;
|
|
57
|
+
n_embd: number;
|
|
58
|
+
n_params: number;
|
|
59
|
+
size: number;
|
|
60
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
|
|
2
|
+
/**
|
|
3
|
+
* The structure of llama-server's /props endpoint
|
|
4
|
+
*
|
|
5
|
+
* In single mode, applies to /props
|
|
6
|
+
* In router mode, applies to /props?model=<id>
|
|
7
|
+
*/
|
|
8
|
+
export interface PropsEndpoint {
|
|
9
|
+
default_generation_settings: Record<string, any>;
|
|
10
|
+
total_slots: number;
|
|
11
|
+
model_alias: string;
|
|
12
|
+
model_path: string;
|
|
13
|
+
modalities: {
|
|
14
|
+
vision: boolean;
|
|
15
|
+
audio: boolean;
|
|
16
|
+
};
|
|
17
|
+
media_marker: string;
|
|
18
|
+
endpoint_slots: boolean;
|
|
19
|
+
endpoint_props: boolean;
|
|
20
|
+
endpoint_metrics: boolean;
|
|
21
|
+
webui: boolean;
|
|
22
|
+
webui_settings: Record<string, any>;
|
|
23
|
+
chat_template: string;
|
|
24
|
+
chat_template_caps: Record<string, boolean>;
|
|
25
|
+
bos_token: string;
|
|
26
|
+
eos_token: string;
|
|
27
|
+
build_info: string;
|
|
28
|
+
is_sleeping: boolean;
|
|
29
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The structure of llama-server's /slots endpoint
|
|
3
|
+
*
|
|
4
|
+
* In single mode, applies to /slots
|
|
5
|
+
* In router mode, applies to /slots?model=<id>
|
|
6
|
+
*/
|
|
7
|
+
export interface SlotsEndpoint {
|
|
8
|
+
id: number;
|
|
9
|
+
n_ctx: number;
|
|
10
|
+
speculative: boolean;
|
|
11
|
+
is_processing: boolean;
|
|
12
|
+
id_task?: number;
|
|
13
|
+
params?: Array<Record<string, any>>;
|
|
14
|
+
next_token?: Array<Record<string, any>>;
|
|
15
|
+
}
|
package/src/models/baseModel.ts
CHANGED
|
@@ -2,9 +2,12 @@ import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
|
|
|
2
2
|
import { MAX_TOKENS, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
|
|
3
3
|
import { Mode } from "../enums/mode";
|
|
4
4
|
import { Status } from "../enums/status";
|
|
5
|
+
import { DataProperty } from "../interfaces/endpoints/models";
|
|
5
6
|
import { rpc } from "../tools/retriever";
|
|
6
7
|
|
|
7
8
|
export abstract class BaseModel {
|
|
9
|
+
constructor(protected readonly model: DataProperty) {}
|
|
10
|
+
|
|
8
11
|
protected readonly statusMapper: Record<string, Status> = {
|
|
9
12
|
loaded: Status.LOADED,
|
|
10
13
|
loading: Status.LOADING,
|
|
@@ -23,9 +26,13 @@ export abstract class BaseModel {
|
|
|
23
26
|
|
|
24
27
|
abstract get mode(): Mode;
|
|
25
28
|
|
|
26
|
-
|
|
29
|
+
get id(): string {
|
|
30
|
+
return this.model.id;
|
|
31
|
+
}
|
|
27
32
|
|
|
28
|
-
|
|
33
|
+
get name(): string {
|
|
34
|
+
return this.model.aliases?.[0] || this.model.id;
|
|
35
|
+
}
|
|
29
36
|
|
|
30
37
|
get reasoning(): boolean {
|
|
31
38
|
// We don't have a way to detect this, so we'll fallback to true
|
|
@@ -67,6 +74,7 @@ export abstract class BaseModel {
|
|
|
67
74
|
`Reasoning : ${this.reasoning}`,
|
|
68
75
|
`Capabilities : ${this.capabilities.join(", ")}`,
|
|
69
76
|
`Context size : ${await this.getContextSize()}`,
|
|
77
|
+
`Status : ${await this.getStatus()}`,
|
|
70
78
|
];
|
|
71
79
|
|
|
72
80
|
const response = `${messages.join("\n")}\n`;
|
|
@@ -1,40 +1,32 @@
|
|
|
1
1
|
import { DEFAULT_CTX } from "../constants";
|
|
2
2
|
import { Mode } from "../enums/mode";
|
|
3
3
|
import { Status } from "../enums/status";
|
|
4
|
-
import {
|
|
4
|
+
import { DataProperty, ModelsEndpoint } from "../interfaces/endpoints/models";
|
|
5
5
|
import { rpc } from "../tools/retriever";
|
|
6
6
|
import { BaseModel } from "./baseModel";
|
|
7
7
|
|
|
8
8
|
export class RouterModel extends BaseModel {
|
|
9
|
-
constructor(
|
|
10
|
-
super();
|
|
9
|
+
constructor(protected readonly model: DataProperty) {
|
|
10
|
+
super(model);
|
|
11
11
|
}
|
|
12
12
|
|
|
13
13
|
get mode(): Mode {
|
|
14
14
|
return Mode.ROUTER;
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
get id(): string {
|
|
18
|
-
return this.model.id;
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
get name(): string {
|
|
22
|
-
return this.model.aliases?.[0] || this.model.id;
|
|
23
|
-
}
|
|
24
|
-
|
|
25
17
|
get capabilities(): ["text"] | ["image"] {
|
|
26
|
-
const hasImage = this.model.status
|
|
18
|
+
const hasImage = this.model.status!.args?.includes("--mmproj") ?? false;
|
|
27
19
|
return hasImage ? ["image"] : ["text"];
|
|
28
20
|
}
|
|
29
21
|
|
|
30
22
|
async getStatus(): Promise<Status> {
|
|
31
|
-
const { data } = await rpc<
|
|
23
|
+
const { data } = await rpc<ModelsEndpoint>("/models");
|
|
32
24
|
const model = data.find((m) => m.id === this.id);
|
|
33
25
|
if (!model) return Status.FAILED;
|
|
34
26
|
|
|
35
|
-
const status = this.statusMapper[model.status
|
|
27
|
+
const status = this.statusMapper[model.status!.value];
|
|
36
28
|
if (status === Status.UNLOADED) {
|
|
37
|
-
if (this.model.status
|
|
29
|
+
if (this.model.status!.failed) return Status.FAILED;
|
|
38
30
|
|
|
39
31
|
return Status.UNLOADED;
|
|
40
32
|
}
|
|
@@ -58,7 +50,7 @@ export class RouterModel extends BaseModel {
|
|
|
58
50
|
* @returns The value
|
|
59
51
|
*/
|
|
60
52
|
private extractFrom(arg: string): number | null {
|
|
61
|
-
const args = this.model.status
|
|
53
|
+
const args = this.model.status!.args;
|
|
62
54
|
if (!args) return null;
|
|
63
55
|
|
|
64
56
|
const ctxIdx = args.indexOf(arg);
|
|
@@ -1,41 +1,50 @@
|
|
|
1
1
|
import { DEFAULT_CTX } from "../constants";
|
|
2
2
|
import { Mode } from "../enums/mode";
|
|
3
3
|
import { Status } from "../enums/status";
|
|
4
|
-
import {
|
|
4
|
+
import { DataProperty, ModelProperty } from "../interfaces/endpoints/models";
|
|
5
|
+
import { PropsEndpoint } from "../interfaces/endpoints/props";
|
|
6
|
+
import { SlotsEndpoint } from "../interfaces/endpoints/slots";
|
|
5
7
|
import { rpc } from "../tools/retriever";
|
|
6
8
|
import { BaseModel } from "./baseModel";
|
|
7
9
|
|
|
8
10
|
export class SingleModel extends BaseModel {
|
|
9
|
-
|
|
10
|
-
|
|
11
|
+
private contextSize?: number;
|
|
12
|
+
|
|
13
|
+
constructor(
|
|
14
|
+
protected readonly model: DataProperty,
|
|
15
|
+
private readonly extra: ModelProperty,
|
|
16
|
+
) {
|
|
17
|
+
super(model);
|
|
11
18
|
}
|
|
12
19
|
|
|
13
20
|
get mode(): Mode {
|
|
14
21
|
return Mode.SINGLE;
|
|
15
22
|
}
|
|
16
23
|
|
|
17
|
-
get id(): string {
|
|
18
|
-
return this.model.name;
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
get name(): string {
|
|
22
|
-
return this.model.name;
|
|
23
|
-
}
|
|
24
|
-
|
|
25
24
|
get capabilities(): ["text"] | ["image"] {
|
|
26
|
-
const hasImage = this.
|
|
25
|
+
const hasImage = this.extra.capabilities.includes("multimodal");
|
|
27
26
|
return hasImage ? ["image"] : ["text"];
|
|
28
27
|
}
|
|
29
28
|
|
|
30
29
|
async getStatus(): Promise<Status> {
|
|
31
30
|
// In single-mode, the extension will only work when the model is fully loaded
|
|
31
|
+
const { is_sleeping } = await rpc<PropsEndpoint>("/props");
|
|
32
|
+
if (is_sleeping) return Status.SLEEPING;
|
|
33
|
+
|
|
32
34
|
return Status.LOADED;
|
|
33
35
|
}
|
|
34
36
|
|
|
35
37
|
async getContextSize(): Promise<number> {
|
|
36
|
-
|
|
37
|
-
|
|
38
|
+
// Avoid calling the endpoint if we already have the value
|
|
39
|
+
if (this.contextSize) return this.contextSize;
|
|
40
|
+
|
|
41
|
+
try {
|
|
42
|
+
const [{ n_ctx }] = await rpc<SlotsEndpoint[]>("/slots");
|
|
43
|
+
this.contextSize = n_ctx;
|
|
38
44
|
|
|
39
|
-
|
|
45
|
+
return this.contextSize;
|
|
46
|
+
} catch {
|
|
47
|
+
return DEFAULT_CTX;
|
|
48
|
+
}
|
|
40
49
|
}
|
|
41
50
|
}
|
package/src/tools/resolver.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { access, constants, readFile } from "node:fs/promises";
|
|
2
2
|
import { join } from "node:path";
|
|
3
3
|
import { DEFAULT_LLAMA_SERVER_URL, PROVIDER_ID } from "../constants";
|
|
4
|
-
import {
|
|
4
|
+
import { Auth, AuthFile } from "../interfaces/auth";
|
|
5
5
|
|
|
6
6
|
// The URL is detected once, to reuse forever
|
|
7
7
|
let resolvedUrl: string | undefined;
|
|
@@ -60,7 +60,7 @@ export const resolveApiKey = async (): Promise<string> => {
|
|
|
60
60
|
const authPath = join(process.env.HOME || ".", ".pi", "agent", "auth.json");
|
|
61
61
|
if (!(await fileExists(authPath))) return placeholder;
|
|
62
62
|
|
|
63
|
-
const cfg = await readConfigValue<
|
|
63
|
+
const cfg = await readConfigValue<AuthFile, Auth | null>(
|
|
64
64
|
authPath,
|
|
65
65
|
PROVIDER_ID,
|
|
66
66
|
);
|
package/src/tools/retriever.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { HealthEndpoint } from "../interfaces/endpoints/health";
|
|
2
|
+
import { ModelsEndpoint } from "../interfaces/endpoints/models";
|
|
3
3
|
import { BaseModel } from "../models/baseModel";
|
|
4
4
|
import { RouterModel } from "../models/routerModel";
|
|
5
5
|
import { SingleModel } from "../models/singleModel";
|
|
@@ -11,7 +11,7 @@ import { resolveApiKey, resolveUrl } from "./resolver";
|
|
|
11
11
|
*/
|
|
12
12
|
export const isServerReady = async (): Promise<boolean> => {
|
|
13
13
|
try {
|
|
14
|
-
const { status } = await rpc<
|
|
14
|
+
const { status } = await rpc<HealthEndpoint>("/health");
|
|
15
15
|
return status === "ok";
|
|
16
16
|
} catch {
|
|
17
17
|
return false;
|
|
@@ -59,13 +59,11 @@ export const rpc = async <T>(
|
|
|
59
59
|
* @returns The list of models
|
|
60
60
|
*/
|
|
61
61
|
export const listModels = async (): Promise<BaseModel[]> => {
|
|
62
|
-
const { models, data } = await rpc<
|
|
63
|
-
models?: ISingleModel[];
|
|
64
|
-
data: IRouterModel[];
|
|
65
|
-
}>("/models");
|
|
62
|
+
const { models, data } = await rpc<ModelsEndpoint>("/models");
|
|
66
63
|
|
|
67
64
|
if (models) {
|
|
68
|
-
|
|
65
|
+
const [extra] = models;
|
|
66
|
+
return data.map((m) => new SingleModel(m, extra));
|
|
69
67
|
}
|
|
70
68
|
|
|
71
69
|
const response = data
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
interface IRouterModelStatus {
|
|
2
|
-
value: string;
|
|
3
|
-
args: string[];
|
|
4
|
-
preset: string;
|
|
5
|
-
exit_code?: number;
|
|
6
|
-
failed?: boolean;
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
export interface IRouterModel {
|
|
10
|
-
id: string;
|
|
11
|
-
aliases?: string[];
|
|
12
|
-
tags: string[];
|
|
13
|
-
object: string;
|
|
14
|
-
owned_by: string;
|
|
15
|
-
created: number;
|
|
16
|
-
status: IRouterModelStatus;
|
|
17
|
-
}
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
export interface ISingleModel {
|
|
2
|
-
name: string;
|
|
3
|
-
model: string;
|
|
4
|
-
modified_at: string;
|
|
5
|
-
size: string;
|
|
6
|
-
digest: string;
|
|
7
|
-
type: string;
|
|
8
|
-
description: string;
|
|
9
|
-
tags: string[];
|
|
10
|
-
capabilities: string[];
|
|
11
|
-
parameters: string;
|
|
12
|
-
details: {
|
|
13
|
-
parent_model: string;
|
|
14
|
-
format: string;
|
|
15
|
-
family: string;
|
|
16
|
-
families: string[];
|
|
17
|
-
parameter_size: string;
|
|
18
|
-
quantization_level: string;
|
|
19
|
-
};
|
|
20
|
-
}
|
|
File without changes
|