pi-vision-handoff 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +206 -0
- package/package.json +56 -0
- package/src/index.ts +198 -0
- package/src/vision-model-selector.ts +333 -0
- package/vision-handoff.ts +479 -0
- package/vitest.config.ts +15 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Tom X Nguyen
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
# ๐๏ธ pi-vision-handoff
|
|
4
|
+
|
|
5
|
+
**Give text-only [pi](https://github.com/earendil-works/pi-coding-agent) models vision**
|
|
6
|
+
|
|
7
|
+
_Describe images with a vision model you pick, then feed the text to models that can't see._
|
|
8
|
+
|
|
9
|
+
[](https://github.com/earendil-works/pi-coding-agent)
|
|
10
|
+
[](https://www.npmjs.com/package/pi-vision-handoff)
|
|
11
|
+
[](./LICENSE)
|
|
12
|
+
|
|
13
|
+
</div>
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## The Problem
|
|
18
|
+
|
|
19
|
+
Some of the best coding models are blind. You paste a screenshot, a UI mock, a stack trace, or a diagram into pi โ and a text-only model either silently ignores the image or rejects the request outright. Up to now your only options were to describe the image yourself, or switch to a (often weaker-for-coding) vision model just to read it.
|
|
20
|
+
|
|
21
|
+
The `pi-umans-provider` extension quietly solved this for GLM 5.1: a hardcoded "vision handoff" pipeline that had `umans-flash` describe each image at prompt time and swapped the text in for the image block before the request left. It worked great โ but it was welded to one provider, one describer, and one set of models.
|
|
22
|
+
|
|
23
|
+
## The Solution
|
|
24
|
+
|
|
25
|
+
`pi-vision-handoff` extracts that pipeline and makes it **provider-agnostic**:
|
|
26
|
+
|
|
27
|
+
- **Pick any vision-capable model** from your registry via an interactive picker โ OpenAI, Anthropic, Google, Ollama, or any custom provider pi knows about.
|
|
28
|
+
- Your choice **persists** to `~/.pi/agent/extensions/pi-vision-handoff.json`.
|
|
29
|
+
- For any model that doesn't declare image input (or any model you explicitly target), `pi-vision-handoff` describes the image with your chosen vision model **before** the request is sent, then **swaps the image block for the description** in the actual provider payload.
|
|
30
|
+
- Works across all three request formats pi uses โ OpenAI Chat Completions, OpenAI Responses, and Anthropic Messages โ detected by block shape.
|
|
31
|
+
- Descriptions are **cached per image hash** (LRU), so the swap is instant by the time the request fires.
|
|
32
|
+
|
|
33
|
+
No `settings.json` touched. No per-provider glue. Pick a describer once and every text-only model you own can suddenly see.
|
|
34
|
+
|
|
35
|
+
## Features
|
|
36
|
+
|
|
37
|
+
- **๐ฎ Interactive picker** โ `/vision-handoff` opens a TUI listing every model, vision-capable ones first (๐), to choose your describer.
|
|
38
|
+
- **๐ Provider-agnostic** โ uses pi's own model execution machinery (`@earendil-works/pi-ai`'s `complete()`), so it works with any provider/configured model, including custom provider extensions.
|
|
39
|
+
- **๐ง Automatic targets** โ by default, handoff applies to *every* model that lacks native vision. Opt out with `/vision-handoff auto off`.
|
|
40
|
+
- **๐๏ธ Explicit overrides** โ force handoff for specific models (e.g. a weak vision model) with `/vision-handoff add`.
|
|
41
|
+
- **โก Cache-warmed** โ `before_agent_start` describes attached images the moment you submit, so the request is rarely delayed.
|
|
42
|
+
- **๐ก๏ธ Graceful degradation** โ no API key? Describer unreachable? Aborted? The image is replaced with a clean `[Image: description unavailable]` placeholder instead of crashing your turn.
|
|
43
|
+
- **๐ง Tunable** โ cap description length and cache size in the config file.
|
|
44
|
+
|
|
45
|
+
## Usage
|
|
46
|
+
|
|
47
|
+
### Interactive Commands
|
|
48
|
+
|
|
49
|
+
| Command | What it does |
|
|
50
|
+
|---------|-------------|
|
|
51
|
+
| `/vision-handoff` | Open the interactive picker to choose the vision model |
|
|
52
|
+
| `/vision-handoff select` | Same as `/vision-handoff` |
|
|
53
|
+
| `/vision-handoff model openai/gpt-4o` | Set the vision model directly |
|
|
54
|
+
| `/vision-handoff status` | Show current config + whether handoff is active for the current model |
|
|
55
|
+
| `/vision-handoff enable` / `disable` | Master switch (keeps your configured model) |
|
|
56
|
+
| `/vision-handoff auto on` / `off` | Toggle automatic handoff for all non-vision models |
|
|
57
|
+
| `/vision-handoff add ollama/llava:13b` | Force handoff for an extra model |
|
|
58
|
+
| `/vision-handoff remove ollama/llava:13b` | Stop forcing handoff for a model |
|
|
59
|
+
| `/vision-handoff clear` | Clear the configured vision model |
|
|
60
|
+
| `/vision-handoff help` | Show usage reference |
|
|
61
|
+
|
|
62
|
+
### Config File
|
|
63
|
+
|
|
64
|
+
Created automatically at `~/.pi/agent/extensions/pi-vision-handoff.json` on first change:
|
|
65
|
+
|
|
66
|
+
```json
|
|
67
|
+
{
|
|
68
|
+
"enabled": true,
|
|
69
|
+
"visionModel": "openai/gpt-4o",
|
|
70
|
+
"autoHandoff": true,
|
|
71
|
+
"handoffModels": ["ollama/llava:13b"],
|
|
72
|
+
"maxTokens": 1024,
|
|
73
|
+
"cacheMax": 50,
|
|
74
|
+
"prompt": "Describe this image exhaustivelyโฆ",
|
|
75
|
+
"userPromptPrefix": "The user's request about this image: "
|
|
76
|
+
}
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
| Field | Default | Effect |
|
|
80
|
+
|-------|---------|--------|
|
|
81
|
+
| `enabled` | `true` | Master switch. When `false`, no handoff occurs. |
|
|
82
|
+
| `visionModel` | `null` | The describer, as `provider/id`. `null` = not configured (handoff inactive). |
|
|
83
|
+
| `autoHandoff` | `true` | Apply handoff to every model whose `input` does not include `image`. |
|
|
84
|
+
| `handoffModels` | `[]` | Extra `provider/id` refs that should also receive handoff. |
|
|
85
|
+
| `maxTokens` | `1024` | Cap on a single description's output. |
|
|
86
|
+
| `cacheMax` | `50` | Max described images kept in the in-memory cache per session. |
|
|
87
|
+
| `prompt` | _(built-in)_ | Override the describer system prompt. |
|
|
88
|
+
| `userPromptPrefix` | _(built-in)_ | Override the prefix prepended to your original prompt. |
|
|
89
|
+
|
|
90
|
+
> The config path uses pi's `getAgentDir()` โ set `PI_CODING_AGENT_DIR` to relocate it.
|
|
91
|
+
|
|
92
|
+
## Installation
|
|
93
|
+
|
|
94
|
+
**With `pi install`** (recommended):
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
pi install npm:pi-vision-handoff
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Or install from GitHub:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
pi install https://github.com/monotykamary/pi-vision-handoff
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
**With npm**:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
npm install pi-vision-handoff
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Or in `~/.pi/agent/settings.json`:
|
|
113
|
+
|
|
114
|
+
```json
|
|
115
|
+
{
|
|
116
|
+
"packages": [
|
|
117
|
+
"npm:pi-vision-handoff"
|
|
118
|
+
]
|
|
119
|
+
}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Then `/reload` or restart pi.
|
|
123
|
+
|
|
124
|
+
For a quick one-off test:
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
pi -e ./vision-handoff.ts
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## How It Works
|
|
131
|
+
|
|
132
|
+
```
|
|
133
|
+
You submit a prompt + image, on a text-only model
|
|
134
|
+
โ before_agent_start
|
|
135
|
+
โข is this model a handoff target? (non-vision, or in handoffModels)
|
|
136
|
+
โข for each attached image โ describeImage() with your chosen vision model
|
|
137
|
+
- complete() via pi-ai (resolves API key/headers/baseUrl for you)
|
|
138
|
+
- cached by sha256(mime + base64)
|
|
139
|
+
โข fire-and-forget โ warms the cache
|
|
140
|
+
โ before_provider_request
|
|
141
|
+
โข walk the provider payload's messages[]
|
|
142
|
+
โข for every image block (detected by shape) โ swap for a text block:
|
|
143
|
+
"[Image: <cached description>]"
|
|
144
|
+
โข returns the modified payload to pi
|
|
145
|
+
|
|
146
|
+
Result: the text-only model receives a vivid text description in place of
|
|
147
|
+
the image, and your turn proceeds normally.
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Image-block formats handled
|
|
151
|
+
|
|
152
|
+
| API | Image block shape | Replacement |
|
|
153
|
+
|-----|-------------------|-------------|
|
|
154
|
+
| OpenAI Chat Completions | `{ type: "image_url", image_url: { url: "data:โฆ" } }` | `{ type: "text", text }` |
|
|
155
|
+
| OpenAI Responses | `{ type: "input_image", image_url: "data:โฆ" }` | `{ type: "input_text", text }` |
|
|
156
|
+
| Anthropic Messages | `{ type: "image", source: { type: "base64", media_type, data } }` | `{ type: "text", text }` |
|
|
157
|
+
|
|
158
|
+
The describer call itself goes through pi's normal model machinery (`complete()`), **not** the agent event loop โ so it never re-triggers `before_provider_request` (no recursion).
|
|
159
|
+
|
|
160
|
+
## Comparison with Alternatives
|
|
161
|
+
|
|
162
|
+
| Approach | Pros | Cons |
|
|
163
|
+
|----------|------|------|
|
|
164
|
+
| **pi-vision-handoff** (this) | Provider-agnostic; pick any describer; automatic for text-only models; cached; survives across providers | Adds one extra model call per unique image |
|
|
165
|
+
| Native vision on every model | Zero overhead | Not all models support it; you may be forced off your preferred coding model |
|
|
166
|
+
| Manually describing images | No extension | Tedious; lossy; kills the "paste a screenshot" workflow |
|
|
167
|
+
| The original `pi-umans-provider` handoff | Battle-tested | Hardcoded to `umans-flash` + UMANS models only |
|
|
168
|
+
| Switching to a vision model to read an image, then back | Works | Context loss across model swaps; worse coding model for the actual work |
|
|
169
|
+
|
|
170
|
+
## Development
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
pnpm install
|
|
174
|
+
pnpm test # Vitest unit tests (31 passing)
|
|
175
|
+
pnpm typecheck # TypeScript validation
|
|
176
|
+
pnpm lint:dead # Dead code detection (knip)
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### Structure
|
|
180
|
+
|
|
181
|
+
```
|
|
182
|
+
.
|
|
183
|
+
โโโ vision-handoff.ts # Main extension: hooks, command, describer
|
|
184
|
+
โโโ src/
|
|
185
|
+
โ โโโ index.ts # Config schema, read/write, image-block helpers
|
|
186
|
+
โ โโโ vision-model-selector.ts # Interactive picker TUI component
|
|
187
|
+
โโโ __tests__/unit/
|
|
188
|
+
โ โโโ config-dir.test.ts # Ensures getAgentDir() usage
|
|
189
|
+
โ โโโ vision-handoff.test.ts # Config, refs, image-block extraction, round-trip
|
|
190
|
+
โโโ package.json
|
|
191
|
+
โโโ tsconfig.json
|
|
192
|
+
โโโ knip.json
|
|
193
|
+
โโโ vitest.config.ts
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
## Acknowledgements
|
|
197
|
+
|
|
198
|
+
The vision handoff concept and the exhaustive describer prompt originate from
|
|
199
|
+
the [pi-umans-provider](https://github.com/monotykamary/pi-umans-provider) GLM 5.1
|
|
200
|
+
pipeline. The picker TUI builds on the patterns from
|
|
201
|
+
[pi-hide-providers](https://github.com/monotykamary/pi-hide-providers), which in
|
|
202
|
+
turn mirror pi core's built-in selectors.
|
|
203
|
+
|
|
204
|
+
## License
|
|
205
|
+
|
|
206
|
+
MIT
|
package/package.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "pi-vision-handoff",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Give text-only pi models vision โ describe images with a vision model you pick via an interactive picker, then hand off the text description to non-vision models",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"author": "Tom X Nguyen",
|
|
7
|
+
"license": "MIT",
|
|
8
|
+
"repository": {
|
|
9
|
+
"type": "git",
|
|
10
|
+
"url": "git+https://github.com/monotykamary/pi-vision-handoff.git"
|
|
11
|
+
},
|
|
12
|
+
"homepage": "https://github.com/monotykamary/pi-vision-handoff#readme",
|
|
13
|
+
"bugs": {
|
|
14
|
+
"url": "https://github.com/monotykamary/pi-vision-handoff/issues"
|
|
15
|
+
},
|
|
16
|
+
"keywords": [
|
|
17
|
+
"pi-package",
|
|
18
|
+
"pi",
|
|
19
|
+
"pi-coding-agent",
|
|
20
|
+
"extension",
|
|
21
|
+
"vision",
|
|
22
|
+
"image",
|
|
23
|
+
"handoff",
|
|
24
|
+
"multimodal",
|
|
25
|
+
"model-selector",
|
|
26
|
+
"accessibility"
|
|
27
|
+
],
|
|
28
|
+
"files": [
|
|
29
|
+
"*.ts",
|
|
30
|
+
"src/",
|
|
31
|
+
"README.md"
|
|
32
|
+
],
|
|
33
|
+
"devDependencies": {
|
|
34
|
+
"@earendil-works/pi-ai": "0.79.10",
|
|
35
|
+
"@earendil-works/pi-coding-agent": "0.79.4",
|
|
36
|
+
"@earendil-works/pi-tui": "0.79.4",
|
|
37
|
+
"@types/node": "25.9.1",
|
|
38
|
+
"@vitest/coverage-v8": "4.1.7",
|
|
39
|
+
"knip": "6.14.1",
|
|
40
|
+
"typescript": "6.0.3",
|
|
41
|
+
"vitest": "4.1.7"
|
|
42
|
+
},
|
|
43
|
+
"pi": {
|
|
44
|
+
"extensions": [
|
|
45
|
+
"./vision-handoff.ts"
|
|
46
|
+
]
|
|
47
|
+
},
|
|
48
|
+
"peerDependencies": {},
|
|
49
|
+
"scripts": {
|
|
50
|
+
"test": "vitest run",
|
|
51
|
+
"test:watch": "vitest",
|
|
52
|
+
"test:coverage": "vitest run --coverage",
|
|
53
|
+
"typecheck": "tsc --noEmit",
|
|
54
|
+
"lint:dead": "knip --no-gitignore"
|
|
55
|
+
}
|
|
56
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared constants, types, and utilities for pi-vision-handoff.
|
|
3
|
+
*
|
|
4
|
+
* Config lives at ~/.pi/agent/extensions/pi-vision-handoff.json โ the same
|
|
5
|
+
* convention pi-model-sort uses for picker-backed extensions.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { getAgentDir } from "@earendil-works/pi-coding-agent";
|
|
9
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
10
|
+
import { join } from "node:path";
|
|
11
|
+
|
|
12
|
+
/** Subdirectory under the pi agent dir where picker extensions store config. */
|
|
13
|
+
const CONFIG_SUBDIR = "extensions";
|
|
14
|
+
|
|
15
|
+
/** Config file name. */
|
|
16
|
+
export const CONFIG_FILENAME = "pi-vision-handoff.json";
|
|
17
|
+
|
|
18
|
+
/** Full config path: ~/.pi/agent/extensions/pi-vision-handoff.json */
|
|
19
|
+
export function getConfigPath(): string {
|
|
20
|
+
return join(getAgentDir(), CONFIG_SUBDIR, CONFIG_FILENAME);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** Description shown in the / commands list. */
|
|
24
|
+
export const HANDOFF_COMMAND_DESCRIPTION =
|
|
25
|
+
"Configure vision handoff โ pick a vision model to describe images for text-only models";
|
|
26
|
+
|
|
27
|
+
/** Default system prompt for the vision describer. Mirrors the pi-umans-provider pipeline. */
|
|
28
|
+
export const DEFAULT_VISION_PROMPT =
|
|
29
|
+
"You are a vision assistant for a coding agent. Describe this image exhaustively. Cover: all visible text (verbatim if possible), code snippets, UI layout and widgets, diagrams and flow arrows, error messages and stack traces, file trees, terminal output, color and style details, spatial relationships between elements, and anything else a developer would need to act on this image. Do not summarize โ be exhaustive.";
|
|
30
|
+
|
|
31
|
+
/** Prefix prepended to the user's original prompt when describing an image. */
|
|
32
|
+
export const DEFAULT_USER_PROMPT_PREFIX = "The user's request about this image: ";
|
|
33
|
+
|
|
34
|
+
/** Placeholder text block injected in place of an image block. */
|
|
35
|
+
export const IMAGE_PLACEHOLDER_PREFIX = "[Image: ";
|
|
36
|
+
export const IMAGE_PLACEHOLDER_SUFFIX = "]";
|
|
37
|
+
|
|
38
|
+
/** Default max output tokens for a single image description. */
|
|
39
|
+
export const DEFAULT_MAX_TOKENS = 1024;
|
|
40
|
+
|
|
41
|
+
/** Default vision cache size (number of described images kept in memory per session). */
|
|
42
|
+
export const DEFAULT_CACHE_MAX = 50;
|
|
43
|
+
|
|
44
|
+
/** Per-description request timeout. */
|
|
45
|
+
export const DESCRIBE_TIMEOUT_MS = 30_000;
|
|
46
|
+
|
|
47
|
+
export interface VisionHandoffConfig {
|
|
48
|
+
/** Master switch. When false, no handoff occurs even if a vision model is configured. */
|
|
49
|
+
enabled: boolean;
|
|
50
|
+
/** The vision-capable model that describes images, as "provider/id". null = not configured. */
|
|
51
|
+
visionModel: string | null;
|
|
52
|
+
/** When true (default), handoff is applied to every model whose input does not include "image". */
|
|
53
|
+
autoHandoff: boolean;
|
|
54
|
+
/** Extra "provider/id" refs that should ALSO receive handoff (e.g. weak vision models). */
|
|
55
|
+
handoffModels: string[];
|
|
56
|
+
/** Max output tokens for a single description. */
|
|
57
|
+
maxTokens: number;
|
|
58
|
+
/** Max images kept in the in-memory description cache. */
|
|
59
|
+
cacheMax: number;
|
|
60
|
+
/** Override the describer system prompt (defaults to DEFAULT_VISION_PROMPT). */
|
|
61
|
+
prompt?: string;
|
|
62
|
+
/** Override the user-prompt prefix (defaults to DEFAULT_USER_PROMPT_PREFIX). */
|
|
63
|
+
userPromptPrefix?: string;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export const DEFAULT_CONFIG: VisionHandoffConfig = {
|
|
67
|
+
enabled: true,
|
|
68
|
+
visionModel: null,
|
|
69
|
+
autoHandoff: true,
|
|
70
|
+
handoffModels: [],
|
|
71
|
+
maxTokens: DEFAULT_MAX_TOKENS,
|
|
72
|
+
cacheMax: DEFAULT_CACHE_MAX,
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
/** Parse a "provider/id" reference. Returns null if malformed. */
|
|
76
|
+
export function parseModelRef(ref: string): { provider: string; id: string } | null {
|
|
77
|
+
const trimmed = ref.trim();
|
|
78
|
+
if (!trimmed) return null;
|
|
79
|
+
const slashIndex = trimmed.indexOf("/");
|
|
80
|
+
if (slashIndex <= 0) return null; // no slash, or empty provider
|
|
81
|
+
const provider = trimmed.slice(0, slashIndex);
|
|
82
|
+
const id = trimmed.slice(slashIndex + 1);
|
|
83
|
+
if (!provider || !id) return null;
|
|
84
|
+
return { provider, id };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/** Format a provider/id reference string. */
|
|
88
|
+
export function formatModelRef(provider: string, id: string): string {
|
|
89
|
+
return `${provider}/${id}`;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** Whether a model declares image input. */
|
|
93
|
+
export function isVisionModel(model: { input?: ("text" | "image")[] } | undefined | null): boolean {
|
|
94
|
+
return !!model && Array.isArray(model.input) && model.input.includes("image");
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/** Merge a parsed config object onto defaults, tolerating missing/invalid fields. */
|
|
98
|
+
export function normalizeConfig(raw: unknown): VisionHandoffConfig {
|
|
99
|
+
const base: VisionHandoffConfig = { ...DEFAULT_CONFIG };
|
|
100
|
+
if (!raw || typeof raw !== "object") return base;
|
|
101
|
+
const obj = raw as Record<string, unknown>;
|
|
102
|
+
|
|
103
|
+
if (typeof obj.enabled === "boolean") base.enabled = obj.enabled;
|
|
104
|
+
if (typeof obj.visionModel === "string" && obj.visionModel.trim()) {
|
|
105
|
+
base.visionModel = parseModelRef(obj.visionModel) ? obj.visionModel.trim() : null;
|
|
106
|
+
} else if (obj.visionModel === null) {
|
|
107
|
+
base.visionModel = null;
|
|
108
|
+
}
|
|
109
|
+
if (typeof obj.autoHandoff === "boolean") base.autoHandoff = obj.autoHandoff;
|
|
110
|
+
|
|
111
|
+
if (Array.isArray(obj.handoffModels)) {
|
|
112
|
+
base.handoffModels = obj.handoffModels
|
|
113
|
+
.filter((m): m is string => typeof m === "string")
|
|
114
|
+
.map((m) => m.trim())
|
|
115
|
+
.filter((m) => m && parseModelRef(m));
|
|
116
|
+
}
|
|
117
|
+
if (typeof obj.maxTokens === "number" && Number.isFinite(obj.maxTokens) && obj.maxTokens > 0) {
|
|
118
|
+
base.maxTokens = Math.floor(obj.maxTokens);
|
|
119
|
+
}
|
|
120
|
+
if (typeof obj.cacheMax === "number" && Number.isFinite(obj.cacheMax) && obj.cacheMax > 0) {
|
|
121
|
+
base.cacheMax = Math.floor(obj.cacheMax);
|
|
122
|
+
}
|
|
123
|
+
if (typeof obj.prompt === "string" && obj.prompt.trim()) base.prompt = obj.prompt;
|
|
124
|
+
if (typeof obj.userPromptPrefix === "string") base.userPromptPrefix = obj.userPromptPrefix;
|
|
125
|
+
|
|
126
|
+
return base;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/** Read config from disk (falls back to defaults on missing/corrupt file). */
|
|
130
|
+
export function readConfig(): VisionHandoffConfig {
|
|
131
|
+
const path = getConfigPath();
|
|
132
|
+
if (!existsSync(path)) return { ...DEFAULT_CONFIG };
|
|
133
|
+
try {
|
|
134
|
+
const raw = readFileSync(path, "utf8");
|
|
135
|
+
return normalizeConfig(JSON.parse(raw));
|
|
136
|
+
} catch {
|
|
137
|
+
return { ...DEFAULT_CONFIG };
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/** Write config to disk. Creates the directory if needed. Returns the path written. */
|
|
142
|
+
export function writeConfig(config: VisionHandoffConfig): string {
|
|
143
|
+
const path = getConfigPath();
|
|
144
|
+
const dir = join(getAgentDir(), CONFIG_SUBDIR);
|
|
145
|
+
if (!existsSync(dir)) {
|
|
146
|
+
mkdirSync(dir, { recursive: true });
|
|
147
|
+
}
|
|
148
|
+
writeFileSync(path, JSON.stringify(config, null, 2) + "\n", "utf8");
|
|
149
|
+
return path;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
export interface ExtractedImage {
|
|
153
|
+
data: string;
|
|
154
|
+
mimeType: string;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/** Parse a `data:<mime>;base64,<data>` URL into raw base64 + mime. Returns null if not a data URL. */
|
|
158
|
+
export function parseDataUrl(url: string): ExtractedImage | null {
|
|
159
|
+
const match = /^data:([^;,]+)?(?:;base64)?,(.*)$/s.exec(url);
|
|
160
|
+
if (!match) return null;
|
|
161
|
+
return { mimeType: match[1] || "image/png", data: match[2] };
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Detect an image block by shape across the three request formats pi uses:
|
|
166
|
+
* openai-completions: { type: "image_url", image_url: { url: "data:..." } }
|
|
167
|
+
* openai-responses: { type: "input_image", image_url: "data:..." | { url } }
|
|
168
|
+
* anthropic-messages: { type: "image", source: { type: "base64", media_type, data } }
|
|
169
|
+
*/
|
|
170
|
+
export function extractImageFromBlock(block: unknown): ExtractedImage | null {
|
|
171
|
+
if (!block || typeof block !== "object") return null;
|
|
172
|
+
const b = block as Record<string, any>;
|
|
173
|
+
|
|
174
|
+
if (b.type === "image_url" && typeof b.image_url?.url === "string") {
|
|
175
|
+
return parseDataUrl(b.image_url.url);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (b.type === "input_image") {
|
|
179
|
+
const url = typeof b.image_url === "string" ? b.image_url : b.image_url?.url;
|
|
180
|
+
if (typeof url === "string") return parseDataUrl(url);
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (b.type === "image" && b.source?.type === "base64" && typeof b.source.data === "string") {
|
|
185
|
+
return { data: b.source.data, mimeType: b.source.media_type || "image/png" };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return null;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/** Build a text block that replaces an image block, matching the request format. */
|
|
192
|
+
export function makeReplacementText(block: unknown, description: string): Record<string, unknown> {
|
|
193
|
+
const b = (block ?? null) as Record<string, unknown> | null;
|
|
194
|
+
if (b?.type === "input_image") {
|
|
195
|
+
return { type: "input_text", text: description };
|
|
196
|
+
}
|
|
197
|
+
return { type: "text", text: description };
|
|
198
|
+
}
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VisionModelSelectorComponent โ an interactive TUI for choosing which model
|
|
3
|
+
* describes images during vision handoff.
|
|
4
|
+
*
|
|
5
|
+
* Uses the same patterns as pi's built-in selectors and pi-hide-providers:
|
|
6
|
+
* - Lists all models, vision-capable ones first (๐ badge)
|
|
7
|
+
* - A leading "None" row clears the configured vision model
|
|
8
|
+
* - Search/filter via Input component
|
|
9
|
+
* - Enter or Ctrl+S confirms the highlighted model and saves
|
|
10
|
+
* - Esc / Ctrl+C cancels
|
|
11
|
+
* - The currently configured vision model is marked โ
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import {
|
|
15
|
+
Container,
|
|
16
|
+
type Component,
|
|
17
|
+
fuzzyFilter,
|
|
18
|
+
getKeybindings,
|
|
19
|
+
Input,
|
|
20
|
+
Key,
|
|
21
|
+
matchesKey,
|
|
22
|
+
Spacer,
|
|
23
|
+
Text,
|
|
24
|
+
} from "@earendil-works/pi-tui";
|
|
25
|
+
import type { Theme } from "@earendil-works/pi-coding-agent";
|
|
26
|
+
import { DynamicBorder, keyText } from "@earendil-works/pi-coding-agent";
|
|
27
|
+
import { formatModelRef, isVisionModel } from "./index.js";
|
|
28
|
+
|
|
29
|
+
interface DisplayItem {
|
|
30
|
+
/** "provider/id", or null for the synthetic "None" row. */
|
|
31
|
+
ref: string | null;
|
|
32
|
+
provider: string;
|
|
33
|
+
modelId: string;
|
|
34
|
+
modelName: string;
|
|
35
|
+
vision: boolean;
|
|
36
|
+
none?: boolean;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface VisionModelSelectorResult {
|
|
40
|
+
/** The selected "provider/id", or null if the user picked "None" / cancelled. */
|
|
41
|
+
ref: string | null;
|
|
42
|
+
/** True if the user cancelled (esc) โ config should not change. */
|
|
43
|
+
cancelled: boolean;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export class VisionModelSelectorComponent implements Component {
|
|
47
|
+
private theme: Theme;
|
|
48
|
+
private done: (result: VisionModelSelectorResult) => void;
|
|
49
|
+
|
|
50
|
+
private allItems: DisplayItem[];
|
|
51
|
+
private filteredItems: DisplayItem[];
|
|
52
|
+
private selectedIndex = 0;
|
|
53
|
+
private readonly maxVisible = 10;
|
|
54
|
+
private searchInput: Input;
|
|
55
|
+
private listContainer: Container;
|
|
56
|
+
private footerText: Text;
|
|
57
|
+
|
|
58
|
+
private currentRef: string | null;
|
|
59
|
+
|
|
60
|
+
private _focused = false;
|
|
61
|
+
get focused(): boolean {
|
|
62
|
+
return this._focused;
|
|
63
|
+
}
|
|
64
|
+
set focused(value: boolean) {
|
|
65
|
+
this._focused = value;
|
|
66
|
+
this.searchInput.focused = value;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
constructor(
|
|
70
|
+
theme: Theme,
|
|
71
|
+
allModels: Array<{ provider: string; id: string; name: string; input?: ("text" | "image")[] }>,
|
|
72
|
+
currentRef: string | null,
|
|
73
|
+
done: (result: VisionModelSelectorResult) => void,
|
|
74
|
+
) {
|
|
75
|
+
this.theme = theme;
|
|
76
|
+
this.done = done;
|
|
77
|
+
this.currentRef = currentRef;
|
|
78
|
+
this.allItems = this.buildItems(allModels);
|
|
79
|
+
this.filteredItems = this.allItems;
|
|
80
|
+
|
|
81
|
+
const startIdx = this.allItems.findIndex((i) => i.ref === currentRef);
|
|
82
|
+
this.selectedIndex = startIdx >= 0 ? startIdx : 0;
|
|
83
|
+
|
|
84
|
+
this.searchInput = new Input();
|
|
85
|
+
this.listContainer = new Container();
|
|
86
|
+
this.footerText = new Text(this.getFooterText(), 0, 0);
|
|
87
|
+
|
|
88
|
+
this.searchInput.onSubmit = () => {
|
|
89
|
+
const item = this.filteredItems[this.selectedIndex];
|
|
90
|
+
if (item) this.confirm(item);
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
this.updateList();
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
render(width: number): string[] {
|
|
97
|
+
const lines: string[] = [];
|
|
98
|
+
lines.push(...new DynamicBorder((s) => this.theme.fg("accent", s)).render(width));
|
|
99
|
+
lines.push("");
|
|
100
|
+
lines.push(this.theme.fg("accent", this.theme.bold("Vision Handoff")));
|
|
101
|
+
lines.push(
|
|
102
|
+
this.theme.fg("muted", "Pick a vision-capable model to describe images for text-only models."),
|
|
103
|
+
);
|
|
104
|
+
lines.push("");
|
|
105
|
+
lines.push(...this.searchInput.render(width));
|
|
106
|
+
lines.push("");
|
|
107
|
+
lines.push(...this.listContainer.render(width));
|
|
108
|
+
lines.push("");
|
|
109
|
+
lines.push(...this.footerText.render(width));
|
|
110
|
+
lines.push(...new DynamicBorder((s) => this.theme.fg("accent", s)).render(width));
|
|
111
|
+
return lines;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
handleInput(data: string): void {
|
|
115
|
+
const kb = getKeybindings();
|
|
116
|
+
|
|
117
|
+
if (kb.matches(data, "tui.select.up")) {
|
|
118
|
+
if (this.filteredItems.length === 0) return;
|
|
119
|
+
this.selectedIndex =
|
|
120
|
+
this.selectedIndex === 0
|
|
121
|
+
? this.filteredItems.length - 1
|
|
122
|
+
: this.selectedIndex - 1;
|
|
123
|
+
this.updateList();
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (kb.matches(data, "tui.select.down")) {
|
|
128
|
+
if (this.filteredItems.length === 0) return;
|
|
129
|
+
this.selectedIndex =
|
|
130
|
+
this.selectedIndex === this.filteredItems.length - 1
|
|
131
|
+
? 0
|
|
132
|
+
: this.selectedIndex + 1;
|
|
133
|
+
this.updateList();
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (kb.matches(data, "tui.select.confirm")) {
|
|
138
|
+
const item = this.filteredItems[this.selectedIndex];
|
|
139
|
+
if (item) this.confirm(item);
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (matchesKey(data, Key.ctrl("s"))) {
|
|
144
|
+
const item = this.filteredItems[this.selectedIndex];
|
|
145
|
+
if (item) this.confirm(item);
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
if (matchesKey(data, Key.escape)) {
|
|
150
|
+
this.finish(true);
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (matchesKey(data, Key.ctrl("c"))) {
|
|
155
|
+
if (this.searchInput.getValue()) {
|
|
156
|
+
this.searchInput.setValue("");
|
|
157
|
+
this.refresh();
|
|
158
|
+
} else {
|
|
159
|
+
this.finish(true);
|
|
160
|
+
}
|
|
161
|
+
return;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
this.searchInput.handleInput(data);
|
|
165
|
+
this.refresh();
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
invalidate(): void {
|
|
169
|
+
this.searchInput.invalidate();
|
|
170
|
+
this.listContainer.invalidate();
|
|
171
|
+
this.footerText.invalidate();
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Internal helpers
|
|
175
|
+
|
|
176
|
+
private buildItems(
|
|
177
|
+
allModels: Array<{ provider: string; id: string; name: string; input?: ("text" | "image")[] }>,
|
|
178
|
+
): DisplayItem[] {
|
|
179
|
+
const items: DisplayItem[] = [
|
|
180
|
+
{
|
|
181
|
+
ref: null,
|
|
182
|
+
provider: "",
|
|
183
|
+
modelId: "none",
|
|
184
|
+
modelName: "None โ disable vision handoff",
|
|
185
|
+
vision: false,
|
|
186
|
+
none: true,
|
|
187
|
+
},
|
|
188
|
+
];
|
|
189
|
+
|
|
190
|
+
const make = (m: {
|
|
191
|
+
provider: string;
|
|
192
|
+
id: string;
|
|
193
|
+
name: string;
|
|
194
|
+
input?: ("text" | "image")[];
|
|
195
|
+
}): DisplayItem => ({
|
|
196
|
+
ref: formatModelRef(m.provider, m.id),
|
|
197
|
+
provider: m.provider,
|
|
198
|
+
modelId: m.id,
|
|
199
|
+
modelName: m.name || m.id,
|
|
200
|
+
vision: isVisionModel(m),
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
// Vision-capable first (registry order), then the rest (registry order).
|
|
204
|
+
const visionModels = allModels.filter((m) => isVisionModel(m)).map(make);
|
|
205
|
+
const textModels = allModels.filter((m) => !isVisionModel(m)).map(make);
|
|
206
|
+
return [...items, ...visionModels, ...textModels];
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
private getFooterText(): string {
|
|
210
|
+
const totalCount = this.allItems.length - 1; // exclude the None row
|
|
211
|
+
const visionCount = this.allItems.filter((i) => i.vision).length;
|
|
212
|
+
|
|
213
|
+
const current = this.currentRef
|
|
214
|
+
? `current: ${this.currentRef}`
|
|
215
|
+
: "current: none";
|
|
216
|
+
|
|
217
|
+
const parts: string[] = [
|
|
218
|
+
`${keyText("tui.select.confirm")} select`,
|
|
219
|
+
`ctrl+s done`,
|
|
220
|
+
`esc cancel`,
|
|
221
|
+
this.searchInput.getValue() ? `${this.filteredItems.length - 1} match` : `${totalCount} models ยท ${visionCount} vision`,
|
|
222
|
+
];
|
|
223
|
+
|
|
224
|
+
return this.theme.fg("dim", ` ${parts.join(" ยท ")} ยท ${current} `);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
private refresh(): void {
|
|
228
|
+
const query = this.searchInput.getValue();
|
|
229
|
+
this.filteredItems = query
|
|
230
|
+
? fuzzyFilter(
|
|
231
|
+
this.allItems,
|
|
232
|
+
query,
|
|
233
|
+
(i) => `${i.provider} ${i.modelId} ${i.ref ?? "none"} ${i.modelName}`,
|
|
234
|
+
)
|
|
235
|
+
: this.allItems;
|
|
236
|
+
this.selectedIndex = Math.min(
|
|
237
|
+
this.selectedIndex,
|
|
238
|
+
Math.max(0, this.filteredItems.length - 1),
|
|
239
|
+
);
|
|
240
|
+
this.updateList();
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
private updateList(): void {
|
|
244
|
+
this.listContainer.clear();
|
|
245
|
+
|
|
246
|
+
if (this.filteredItems.length === 0) {
|
|
247
|
+
this.listContainer.addChild(
|
|
248
|
+
new Text(this.theme.fg("muted", " No matching models"), 0, 0),
|
|
249
|
+
);
|
|
250
|
+
this.footerText.setText(this.getFooterText());
|
|
251
|
+
return;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
const startIndex = Math.max(
|
|
255
|
+
0,
|
|
256
|
+
Math.min(
|
|
257
|
+
this.selectedIndex - Math.floor(this.maxVisible / 2),
|
|
258
|
+
this.filteredItems.length - this.maxVisible,
|
|
259
|
+
),
|
|
260
|
+
);
|
|
261
|
+
const endIndex = Math.min(startIndex + this.maxVisible, this.filteredItems.length);
|
|
262
|
+
|
|
263
|
+
for (let i = startIndex; i < endIndex; i++) {
|
|
264
|
+
const item = this.filteredItems[i];
|
|
265
|
+
if (!item) continue;
|
|
266
|
+
|
|
267
|
+
const isSelected = i === this.selectedIndex;
|
|
268
|
+
const prefix = isSelected ? this.theme.fg("accent", "โ ") : " ";
|
|
269
|
+
|
|
270
|
+
let label: string;
|
|
271
|
+
if (item.none) {
|
|
272
|
+
label = this.theme.fg("warning", item.modelName);
|
|
273
|
+
} else {
|
|
274
|
+
const labelled = isSelected
|
|
275
|
+
? this.theme.fg("accent", item.modelId)
|
|
276
|
+
: item.modelId;
|
|
277
|
+
const badge = item.vision ? this.theme.fg("success", " ๐") : this.theme.fg("muted", " ยท");
|
|
278
|
+
const providerBadge = this.theme.fg("muted", ` [${item.provider}]`);
|
|
279
|
+
label = `${labelled}${providerBadge}${badge}`;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
const current = item.ref === this.currentRef && item.ref !== null
|
|
283
|
+
? this.theme.fg("success", " โ")
|
|
284
|
+
: item.none && this.currentRef === null
|
|
285
|
+
? this.theme.fg("success", " โ")
|
|
286
|
+
: "";
|
|
287
|
+
|
|
288
|
+
this.listContainer.addChild(new Text(`${prefix}${label}${current}`, 0, 0));
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
if (startIndex > 0 || endIndex < this.filteredItems.length) {
|
|
292
|
+
this.listContainer.addChild(
|
|
293
|
+
new Text(
|
|
294
|
+
this.theme.fg("muted", ` (${this.selectedIndex + 1}/${this.filteredItems.length})`),
|
|
295
|
+
0, 0,
|
|
296
|
+
),
|
|
297
|
+
);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
const selected = this.filteredItems[this.selectedIndex];
|
|
301
|
+
if (selected) {
|
|
302
|
+
this.listContainer.addChild(new Spacer(1));
|
|
303
|
+
if (selected.none) {
|
|
304
|
+
this.listContainer.addChild(
|
|
305
|
+
new Text(this.theme.fg("muted", ` ${selected.modelName}`), 0, 0),
|
|
306
|
+
);
|
|
307
|
+
} else {
|
|
308
|
+
this.listContainer.addChild(
|
|
309
|
+
new Text(this.theme.fg("muted", ` Model Name: ${selected.modelName}`), 0, 0),
|
|
310
|
+
);
|
|
311
|
+
this.listContainer.addChild(
|
|
312
|
+
new Text(
|
|
313
|
+
this.theme.fg(
|
|
314
|
+
"dim",
|
|
315
|
+
`${selected.vision ? "๐ vision-capable โ recommended describer" : "no native vision โ not a good describer"}`,
|
|
316
|
+
),
|
|
317
|
+
0, 0,
|
|
318
|
+
),
|
|
319
|
+
);
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
this.footerText.setText(this.getFooterText());
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
private confirm(item: DisplayItem): void {
|
|
327
|
+
this.done({ ref: item.ref, cancelled: false });
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
private finish(cancelled: boolean): void {
|
|
331
|
+
this.done({ ref: null, cancelled });
|
|
332
|
+
}
|
|
333
|
+
}
|
|
@@ -0,0 +1,479 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pi-vision-handoff โ give text-only models vision by proxying image input
|
|
3
|
+
* through a vision-capable model of your choice.
|
|
4
|
+
*
|
|
5
|
+
* Extracted from the GLM 5.1 vision-handoff pipeline in pi-umans-provider and
|
|
6
|
+
* generalized: instead of a hardcoded describer, the user picks any
|
|
7
|
+
* vision-capable model from the registry via an interactive picker, and the
|
|
8
|
+
* choice is persisted to ~/.pi/agent/extensions/pi-vision-handoff.json.
|
|
9
|
+
*
|
|
10
|
+
* Pipeline (provider-agnostic via @earendil-works/pi-ai's complete()):
|
|
11
|
+
* before_agent_start โ warm the description cache for attached images
|
|
12
|
+
* before_provider_request โ swap image blocks in the payload for text
|
|
13
|
+
*
|
|
14
|
+
* Image blocks are detected by shape across the three request formats pi uses:
|
|
15
|
+
* openai-completions: { type: "image_url", image_url: { url: "data:..." } }
|
|
16
|
+
* openai-responses: { type: "input_image", image_url: "data:..." }
|
|
17
|
+
* anthropic-messages: { type: "image", source: { type: "base64", media_type, data } }
|
|
18
|
+
*
|
|
19
|
+
* Descriptions are cached per image hash (LRU, size = config.cacheMax) so the
|
|
20
|
+
* swap is instant by the time before_provider_request fires.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import crypto from "node:crypto";
|
|
24
|
+
import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext, ModelRegistry } from "@earendil-works/pi-coding-agent";
|
|
25
|
+
import { complete } from "@earendil-works/pi-ai";
|
|
26
|
+
import type { Api, ImageContent, Message, Model, TextContent } from "@earendil-works/pi-ai";
|
|
27
|
+
import {
|
|
28
|
+
DEFAULT_USER_PROMPT_PREFIX,
|
|
29
|
+
DEFAULT_VISION_PROMPT,
|
|
30
|
+
DESCRIBE_TIMEOUT_MS,
|
|
31
|
+
HANDOFF_COMMAND_DESCRIPTION,
|
|
32
|
+
IMAGE_PLACEHOLDER_PREFIX,
|
|
33
|
+
IMAGE_PLACEHOLDER_SUFFIX,
|
|
34
|
+
extractImageFromBlock,
|
|
35
|
+
formatModelRef,
|
|
36
|
+
isVisionModel,
|
|
37
|
+
makeReplacementText,
|
|
38
|
+
parseModelRef,
|
|
39
|
+
readConfig,
|
|
40
|
+
writeConfig,
|
|
41
|
+
type VisionHandoffConfig,
|
|
42
|
+
} from "./src/index.js";
|
|
43
|
+
import { VisionModelSelectorComponent, type VisionModelSelectorResult } from "./src/vision-model-selector.js";
|
|
44
|
+
|
|
45
|
+
const UNAVAILABLE = `${IMAGE_PLACEHOLDER_PREFIX}description unavailable${IMAGE_PLACEHOLDER_SUFFIX}`;
|
|
46
|
+
|
|
47
|
+
let config: VisionHandoffConfig = readConfig();
|
|
48
|
+
|
|
49
|
+
const visionCache = new Map<string, Promise<string>>();
|
|
50
|
+
let visionModelCache: { ref: string; model: Model<Api> } | null = null;
|
|
51
|
+
let visionModelUnresolvedRef: string | null = null;
|
|
52
|
+
|
|
53
|
+
function isConfigured(cfg: VisionHandoffConfig): boolean {
|
|
54
|
+
return cfg.enabled && !!cfg.visionModel;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function isHandoffTarget(
|
|
58
|
+
model: { provider?: string; id?: string; input?: ("text" | "image")[] } | undefined | null,
|
|
59
|
+
cfg: VisionHandoffConfig,
|
|
60
|
+
): boolean {
|
|
61
|
+
if (!model || !model.provider || !model.id) return false;
|
|
62
|
+
const ref = formatModelRef(model.provider, model.id);
|
|
63
|
+
if (cfg.handoffModels.includes(ref)) return true;
|
|
64
|
+
if (cfg.autoHandoff && !isVisionModel(model)) return true;
|
|
65
|
+
return false;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function resolveVisionModel(modelRegistry: ModelRegistry, ref: string): Model<Api> | null {
|
|
69
|
+
if (visionModelCache && visionModelCache.ref === ref) return visionModelCache.model;
|
|
70
|
+
const parsed = parseModelRef(ref);
|
|
71
|
+
if (!parsed) return null;
|
|
72
|
+
const model = modelRegistry.find(parsed.provider, parsed.id);
|
|
73
|
+
if (!model) return null;
|
|
74
|
+
visionModelCache = { ref, model };
|
|
75
|
+
return model;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function imageHash(mimeType: string, data: string): string {
|
|
79
|
+
return crypto.createHash("sha256").update(`${mimeType}\x00${data}`).digest("hex").slice(0, 32);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async function describeImage(
|
|
83
|
+
data: string,
|
|
84
|
+
mimeType: string,
|
|
85
|
+
userPrompt: string,
|
|
86
|
+
visionModel: Model<Api>,
|
|
87
|
+
modelRegistry: ModelRegistry,
|
|
88
|
+
cfg: VisionHandoffConfig,
|
|
89
|
+
): Promise<string> {
|
|
90
|
+
const key = imageHash(mimeType, data);
|
|
91
|
+
const cached = visionCache.get(key);
|
|
92
|
+
if (cached) return cached;
|
|
93
|
+
|
|
94
|
+
const promise = (async (): Promise<string> => {
|
|
95
|
+
const auth = await modelRegistry.getApiKeyAndHeaders(visionModel);
|
|
96
|
+
if (!auth.ok || !auth.apiKey) return UNAVAILABLE;
|
|
97
|
+
|
|
98
|
+
const prefix = cfg.userPromptPrefix ?? DEFAULT_USER_PROMPT_PREFIX;
|
|
99
|
+
const systemPrompt = cfg.prompt ?? DEFAULT_VISION_PROMPT;
|
|
100
|
+
|
|
101
|
+
const content: (TextContent | ImageContent)[] = [];
|
|
102
|
+
if (userPrompt && userPrompt.trim()) {
|
|
103
|
+
content.push({ type: "text", text: prefix + userPrompt });
|
|
104
|
+
} else {
|
|
105
|
+
content.push({ type: "text", text: "Describe this image." } satisfies TextContent);
|
|
106
|
+
}
|
|
107
|
+
content.push({ type: "image", data, mimeType } satisfies ImageContent);
|
|
108
|
+
|
|
109
|
+
const userMessage: Message = {
|
|
110
|
+
role: "user",
|
|
111
|
+
content,
|
|
112
|
+
timestamp: Date.now(),
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
const controller = new AbortController();
|
|
116
|
+
const timer = setTimeout(() => controller.abort(), DESCRIBE_TIMEOUT_MS);
|
|
117
|
+
try {
|
|
118
|
+
const response = await complete(
|
|
119
|
+
visionModel,
|
|
120
|
+
{ systemPrompt, messages: [userMessage] },
|
|
121
|
+
{
|
|
122
|
+
apiKey: auth.apiKey,
|
|
123
|
+
headers: auth.headers,
|
|
124
|
+
signal: controller.signal,
|
|
125
|
+
maxTokens: cfg.maxTokens,
|
|
126
|
+
},
|
|
127
|
+
);
|
|
128
|
+
if (response.stopReason === "aborted" || response.stopReason === "error") {
|
|
129
|
+
return UNAVAILABLE;
|
|
130
|
+
}
|
|
131
|
+
const description = response.content
|
|
132
|
+
.filter((c): c is TextContent => c.type === "text")
|
|
133
|
+
.map((c) => c.text)
|
|
134
|
+
.join("\n")
|
|
135
|
+
.trim();
|
|
136
|
+
if (!description) return UNAVAILABLE;
|
|
137
|
+
return `${IMAGE_PLACEHOLDER_PREFIX}${description}${IMAGE_PLACEHOLDER_SUFFIX}`;
|
|
138
|
+
} catch {
|
|
139
|
+
return UNAVAILABLE;
|
|
140
|
+
} finally {
|
|
141
|
+
clearTimeout(timer);
|
|
142
|
+
}
|
|
143
|
+
})();
|
|
144
|
+
|
|
145
|
+
if (visionCache.size >= cfg.cacheMax) {
|
|
146
|
+
const firstKey = visionCache.keys().next().value;
|
|
147
|
+
if (firstKey !== undefined) visionCache.delete(firstKey);
|
|
148
|
+
}
|
|
149
|
+
visionCache.set(key, promise);
|
|
150
|
+
|
|
151
|
+
return promise;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
async function replaceImagesWithDescriptions(
|
|
155
|
+
payload: Record<string, unknown>,
|
|
156
|
+
userPrompt: string,
|
|
157
|
+
visionModel: Model<Api>,
|
|
158
|
+
modelRegistry: ModelRegistry,
|
|
159
|
+
cfg: VisionHandoffConfig,
|
|
160
|
+
): Promise<boolean> {
|
|
161
|
+
const messages = payload.messages;
|
|
162
|
+
if (!Array.isArray(messages)) return false;
|
|
163
|
+
|
|
164
|
+
let replaced = false;
|
|
165
|
+
for (const msg of messages) {
|
|
166
|
+
if (!msg || typeof msg !== "object") continue;
|
|
167
|
+
const content = (msg as Record<string, unknown>).content;
|
|
168
|
+
if (!Array.isArray(content)) continue;
|
|
169
|
+
|
|
170
|
+
for (let i = 0; i < content.length; i++) {
|
|
171
|
+
const img = extractImageFromBlock(content[i]);
|
|
172
|
+
if (!img) continue;
|
|
173
|
+
const description = await describeImage(img.data, img.mimeType, userPrompt, visionModel, modelRegistry, cfg);
|
|
174
|
+
content[i] = makeReplacementText(content[i], description);
|
|
175
|
+
replaced = true;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
return replaced;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
function notifyUnresolvedVisionModel(ctx: ExtensionContext, ref: string): void {
|
|
182
|
+
if (visionModelUnresolvedRef === ref) return;
|
|
183
|
+
visionModelUnresolvedRef = ref;
|
|
184
|
+
if (ctx.hasUI) {
|
|
185
|
+
ctx.ui.notify(
|
|
186
|
+
`pi-vision-handoff: configured vision model "${ref}" was not found in the registry โ run /vision-handoff to pick a model.`,
|
|
187
|
+
"warning",
|
|
188
|
+
);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
export default function (pi: ExtensionAPI) {
|
|
193
|
+
config = readConfig();
|
|
194
|
+
|
|
195
|
+
pi.on("session_start", async () => {
|
|
196
|
+
// Reload in case the user edited the config on disk from another session.
|
|
197
|
+
config = readConfig();
|
|
198
|
+
visionModelCache = null;
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
pi.on("before_agent_start", async (event, ctx) => {
|
|
202
|
+
if (!isConfigured(config)) return;
|
|
203
|
+
if (!isHandoffTarget(ctx.model, config)) return;
|
|
204
|
+
const images = event.images;
|
|
205
|
+
if (!images || images.length === 0) return;
|
|
206
|
+
|
|
207
|
+
const visionModel = resolveVisionModel(ctx.modelRegistry, config.visionModel!);
|
|
208
|
+
if (!visionModel) {
|
|
209
|
+
notifyUnresolvedVisionModel(ctx, config.visionModel!);
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const userPrompt = event.prompt || "";
|
|
214
|
+
for (const image of images) {
|
|
215
|
+
if (!image || image.type !== "image" || !image.data) continue;
|
|
216
|
+
const mimeType = image.mimeType || "image/png";
|
|
217
|
+
describeImage(image.data, mimeType, userPrompt, visionModel, ctx.modelRegistry, config).catch(
|
|
218
|
+
() => {},
|
|
219
|
+
);
|
|
220
|
+
}
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
pi.on("before_provider_request", async (event, ctx) => {
|
|
224
|
+
if (!isConfigured(config)) return;
|
|
225
|
+
if (!isHandoffTarget(ctx.model, config)) return;
|
|
226
|
+
|
|
227
|
+
const visionModel = resolveVisionModel(ctx.modelRegistry, config.visionModel!);
|
|
228
|
+
if (!visionModel) {
|
|
229
|
+
notifyUnresolvedVisionModel(ctx, config.visionModel!);
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const payload = event.payload as Record<string, unknown>;
|
|
234
|
+
await replaceImagesWithDescriptions(payload, "", visionModel, ctx.modelRegistry, config);
|
|
235
|
+
return payload;
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
pi.on("model_select", (event, ctx) => {
|
|
239
|
+
if (!ctx.hasUI) return;
|
|
240
|
+
if (!isConfigured(config)) return;
|
|
241
|
+
const model = event.model;
|
|
242
|
+
if (!model) return;
|
|
243
|
+
if (isHandoffTarget(model, config) && !isVisionModel(model)) {
|
|
244
|
+
ctx.ui.notify(
|
|
245
|
+
`pi-vision-handoff: active โ images will be described by ${config.visionModel}`,
|
|
246
|
+
"info",
|
|
247
|
+
);
|
|
248
|
+
}
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
pi.registerCommand("vision-handoff", {
|
|
252
|
+
description: HANDOFF_COMMAND_DESCRIPTION,
|
|
253
|
+
getArgumentCompletions(prefix: string) {
|
|
254
|
+
const subcommands = ["select", "model", "status", "enable", "disable", "auto", "add", "remove", "clear", "help"];
|
|
255
|
+
const matches = subcommands.filter((s) => s.startsWith(prefix));
|
|
256
|
+
return matches.length > 0 ? matches.map((s) => ({ value: s, label: s })) : null;
|
|
257
|
+
},
|
|
258
|
+
handler: async (args, ctx) => {
|
|
259
|
+
await handleHandoffCommand(ctx, args.trim());
|
|
260
|
+
},
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
async function handleHandoffCommand(ctx: ExtensionCommandContext, args: string): Promise<void> {
|
|
265
|
+
const parts = args.split(/\s+/);
|
|
266
|
+
const subcommand = parts[0]?.toLowerCase() ?? "";
|
|
267
|
+
const rest = parts.slice(1).join(" ");
|
|
268
|
+
|
|
269
|
+
// /vision-handoff (no args) or /vision-handoff select โ interactive picker
|
|
270
|
+
if (!subcommand || subcommand === "select") {
|
|
271
|
+
await showSelector(ctx);
|
|
272
|
+
return;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
if (subcommand === "help") {
|
|
276
|
+
ctx.ui.notify(
|
|
277
|
+
[
|
|
278
|
+
"pi-vision-handoff commands:",
|
|
279
|
+
" /vision-handoff Open interactive picker to choose the vision model",
|
|
280
|
+
" /vision-handoff select Same as /vision-handoff",
|
|
281
|
+
" /vision-handoff model <p/id> Set the vision model directly",
|
|
282
|
+
" /vision-handoff status Show current config and active state",
|
|
283
|
+
" /vision-handoff enable Enable vision handoff",
|
|
284
|
+
" /vision-handoff disable Disable vision handoff (keeps configured model)",
|
|
285
|
+
" /vision-handoff auto <on|off> Toggle automatic handoff for all non-vision models",
|
|
286
|
+
" /vision-handoff add <p/id> Force handoff for an extra model",
|
|
287
|
+
" /vision-handoff remove <p/id> Stop forcing handoff for a model",
|
|
288
|
+
" /vision-handoff clear Clear the configured vision model",
|
|
289
|
+
" /vision-handoff help This message",
|
|
290
|
+
"",
|
|
291
|
+
"Config: ~/.pi/agent/extensions/pi-vision-handoff.json",
|
|
292
|
+
"Mechanism: before_agent_start warms a description cache; before_provider_request",
|
|
293
|
+
" swaps image blocks in the payload for the cached text description.",
|
|
294
|
+
].join("\n"),
|
|
295
|
+
"info",
|
|
296
|
+
);
|
|
297
|
+
return;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (subcommand === "status") {
|
|
301
|
+
showStatus(ctx);
|
|
302
|
+
return;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
if (subcommand === "enable") {
|
|
306
|
+
updateConfig(ctx, (c) => ({ ...c, enabled: true }), "Vision handoff enabled.");
|
|
307
|
+
return;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
if (subcommand === "disable") {
|
|
311
|
+
updateConfig(ctx, (c) => ({ ...c, enabled: false }), "Vision handoff disabled.");
|
|
312
|
+
return;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
if (subcommand === "auto") {
|
|
316
|
+
const value = rest.toLowerCase();
|
|
317
|
+
if (value !== "on" && value !== "off") {
|
|
318
|
+
ctx.ui.notify("Usage: /vision-handoff auto <on|off>", "warning");
|
|
319
|
+
return;
|
|
320
|
+
}
|
|
321
|
+
const on = value === "on";
|
|
322
|
+
updateConfig(
|
|
323
|
+
ctx,
|
|
324
|
+
(c) => ({ ...c, autoHandoff: on }),
|
|
325
|
+
`Automatic handoff for non-vision models ${on ? "on" : "off"}.`,
|
|
326
|
+
);
|
|
327
|
+
return;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
if (subcommand === "clear") {
|
|
331
|
+
updateConfig(
|
|
332
|
+
ctx,
|
|
333
|
+
(c) => ({ ...c, visionModel: null }),
|
|
334
|
+
"Vision model cleared โ handoff inactive until you pick a model.",
|
|
335
|
+
);
|
|
336
|
+
return;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
if (subcommand === "model") {
|
|
340
|
+
if (!rest) {
|
|
341
|
+
ctx.ui.notify("Usage: /vision-handoff model <provider/id>", "warning");
|
|
342
|
+
return;
|
|
343
|
+
}
|
|
344
|
+
const parsed = parseModelRef(rest);
|
|
345
|
+
if (!parsed) {
|
|
346
|
+
ctx.ui.notify(`Invalid model reference: "${rest}". Use "provider/id".`, "error");
|
|
347
|
+
return;
|
|
348
|
+
}
|
|
349
|
+
const model = ctx.modelRegistry.find(parsed.provider, parsed.id);
|
|
350
|
+
if (!model) {
|
|
351
|
+
ctx.ui.notify(`Model not found: ${rest}. Use /vision-handoff to pick from the list.`, "error");
|
|
352
|
+
return;
|
|
353
|
+
}
|
|
354
|
+
const ref = formatModelRef(parsed.provider, parsed.id);
|
|
355
|
+
updateConfig(ctx, (c) => ({ ...c, visionModel: ref }), `Vision model set to ${ref}.`);
|
|
356
|
+
if (!isVisionModel(model)) {
|
|
357
|
+
ctx.ui.notify(
|
|
358
|
+
`Note: ${ref} does not declare image input โ it may not describe images well.`,
|
|
359
|
+
"warning",
|
|
360
|
+
);
|
|
361
|
+
}
|
|
362
|
+
return;
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
if (subcommand === "add") {
|
|
366
|
+
if (!rest) {
|
|
367
|
+
ctx.ui.notify("Usage: /vision-handoff add <provider/id>", "warning");
|
|
368
|
+
return;
|
|
369
|
+
}
|
|
370
|
+
const parsed = parseModelRef(rest);
|
|
371
|
+
if (!parsed) {
|
|
372
|
+
ctx.ui.notify(`Invalid model reference: "${rest}". Use "provider/id".`, "error");
|
|
373
|
+
return;
|
|
374
|
+
}
|
|
375
|
+
const ref = formatModelRef(parsed.provider, parsed.id);
|
|
376
|
+
updateConfig(
|
|
377
|
+
ctx,
|
|
378
|
+
(c) => ({ ...c, handoffModels: Array.from(new Set([...c.handoffModels, ref])) }),
|
|
379
|
+
`Added ${ref} to handoff targets.`,
|
|
380
|
+
);
|
|
381
|
+
return;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
if (subcommand === "remove") {
|
|
385
|
+
if (!rest) {
|
|
386
|
+
ctx.ui.notify("Usage: /vision-handoff remove <provider/id>", "warning");
|
|
387
|
+
return;
|
|
388
|
+
}
|
|
389
|
+
const parsed = parseModelRef(rest);
|
|
390
|
+
if (!parsed) {
|
|
391
|
+
ctx.ui.notify(`Invalid model reference: "${rest}". Use "provider/id".`, "error");
|
|
392
|
+
return;
|
|
393
|
+
}
|
|
394
|
+
const ref = formatModelRef(parsed.provider, parsed.id);
|
|
395
|
+
const before = config.handoffModels.length;
|
|
396
|
+
updateConfig(
|
|
397
|
+
ctx,
|
|
398
|
+
(c) => ({ ...c, handoffModels: c.handoffModels.filter((m) => m !== ref) }),
|
|
399
|
+
`Removed ${ref} from handoff targets.`,
|
|
400
|
+
);
|
|
401
|
+
if (config.handoffModels.length === before) {
|
|
402
|
+
ctx.ui.notify(`Note: ${ref} was not in the handoff list.`, "info");
|
|
403
|
+
}
|
|
404
|
+
return;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
ctx.ui.notify(`Unknown subcommand: "${subcommand}". Use /vision-handoff help for usage.`, "warning");
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
function updateConfig(
|
|
411
|
+
ctx: ExtensionCommandContext,
|
|
412
|
+
transform: (c: VisionHandoffConfig) => VisionHandoffConfig,
|
|
413
|
+
message: string,
|
|
414
|
+
): void {
|
|
415
|
+
const next = transform(config);
|
|
416
|
+
const path = writeConfig(next);
|
|
417
|
+
config = next;
|
|
418
|
+
visionModelCache = null;
|
|
419
|
+
visionModelUnresolvedRef = null;
|
|
420
|
+
ctx.ui.notify(`${message} (config: ${path})`, "info");
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
async function showSelector(ctx: ExtensionCommandContext): Promise<void> {
|
|
424
|
+
if (!ctx.hasUI) {
|
|
425
|
+
ctx.ui.notify("/vision-handoff requires interactive mode.", "error");
|
|
426
|
+
return;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
const allModels = ctx.modelRegistry
|
|
430
|
+
.getAll()
|
|
431
|
+
.map((m) => ({ provider: m.provider, id: m.id, name: m.name, input: m.input }));
|
|
432
|
+
|
|
433
|
+
const result = await ctx.ui.custom<VisionModelSelectorResult>((tui, theme, _kb, done) => {
|
|
434
|
+
const selector = new VisionModelSelectorComponent(theme, allModels, config.visionModel, (r) => done(r));
|
|
435
|
+
return {
|
|
436
|
+
render(width: number) {
|
|
437
|
+
return selector.render(width);
|
|
438
|
+
},
|
|
439
|
+
invalidate() {
|
|
440
|
+
selector.invalidate();
|
|
441
|
+
},
|
|
442
|
+
handleInput(data: string) {
|
|
443
|
+
selector.handleInput(data);
|
|
444
|
+
tui.requestRender();
|
|
445
|
+
},
|
|
446
|
+
};
|
|
447
|
+
});
|
|
448
|
+
|
|
449
|
+
if (!result || result.cancelled) {
|
|
450
|
+
ctx.ui.notify("Vision handoff picker cancelled.", "info");
|
|
451
|
+
return;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
const ref = result.ref;
|
|
455
|
+
updateConfig(ctx, (c) => ({ ...c, visionModel: ref }), ref ? `Vision model set to ${ref}` : "Vision model cleared");
|
|
456
|
+
if (!ref) {
|
|
457
|
+
ctx.ui.notify("Handoff is inactive until you pick a vision model.", "warning");
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
function showStatus(ctx: ExtensionCommandContext): void {
|
|
462
|
+
const lines: string[] = [];
|
|
463
|
+
lines.push(`Vision handoff: ${config.enabled ? "enabled" : "disabled"}`);
|
|
464
|
+
lines.push(`Vision model: ${config.visionModel ?? "(none โ pick one with /vision-handoff)"}`);
|
|
465
|
+
lines.push(`Auto handoff (non-vision models): ${config.autoHandoff ? "on" : "off"}`);
|
|
466
|
+
lines.push(`Handoff targets (explicit): ${config.handoffModels.length ? config.handoffModels.join(", ") : "(none)"}`);
|
|
467
|
+
lines.push(`maxTokens: ${config.maxTokens} ยท cacheMax: ${config.cacheMax}`);
|
|
468
|
+
|
|
469
|
+
const model = ctx.model;
|
|
470
|
+
let active = false;
|
|
471
|
+
if (isConfigured(config) && model) {
|
|
472
|
+
active = isHandoffTarget(model, config);
|
|
473
|
+
}
|
|
474
|
+
lines.push(
|
|
475
|
+
`Active for current model (${model ? formatModelRef(model.provider, model.id) : "none"}): ${active ? "yes" : "no"}`,
|
|
476
|
+
);
|
|
477
|
+
|
|
478
|
+
ctx.ui.notify(lines.join("\n"), "info");
|
|
479
|
+
}
|
package/vitest.config.ts
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { defineConfig } from "vitest/config";
|
|
2
|
+
|
|
3
|
+
export default defineConfig({
|
|
4
|
+
test: {
|
|
5
|
+
globals: true,
|
|
6
|
+
environment: "node",
|
|
7
|
+
include: ["__tests__/**/*.test.ts"],
|
|
8
|
+
exclude: ["node_modules", "dist", ".idea", ".git", ".cache"],
|
|
9
|
+
coverage: {
|
|
10
|
+
provider: "v8",
|
|
11
|
+
reporter: ["text", "json", "html"],
|
|
12
|
+
exclude: ["node_modules/", "**/*.d.ts", "**/*.test.ts"],
|
|
13
|
+
},
|
|
14
|
+
},
|
|
15
|
+
});
|