@skelm/pi 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # @skelm/pi
2
2
 
3
- > Pi coding-agent backend for [skelm](https://github.com/scottgl9/skelm) — integrates [`@mariozechner/pi-coding-agent`](https://www.npmjs.com/package/@mariozechner/pi-coding-agent) with full permission enforcement.
3
+ > Pi coding-agent backend for [skelm](https://github.com/scottgl9/skelm) — integrates [`@earendil-works/pi-coding-agent`](https://www.npmjs.com/package/@earendil-works/pi-coding-agent) with full permission enforcement.
4
4
 
5
5
  [![npm](https://img.shields.io/npm/v/@skelm/pi)](https://www.npmjs.com/package/@skelm/pi)
6
6
 
@@ -10,10 +10,10 @@ Two backends are available:
10
10
 
11
11
  | | `createPiBackend` (RPC) | `createPiSdkBackend` (SDK) |
12
12
  |---|---|---|
13
- | **How it works** | Spawns `pi --mode rpc` per call | Uses `@mariozechner/pi-coding-agent` SDK directly |
13
+ | **How it works** | Spawns `pi --mode rpc` per call | Uses `@earendil-works/pi-coding-agent` SDK directly |
14
14
  | **Tool enforcement** | Advisory (skelm intercepts after the fact) | Native (pi hard-enforces the allowlist) |
15
15
  | **System prompt** | Not controllable | Pi's default; `req.system` appended; optional full replace |
16
- | **Peer dependency** | `pi` CLI on `$PATH` | `@mariozechner/pi-coding-agent` installed |
16
+ | **Peer dependency** | `pi` CLI on `$PATH` | `@earendil-works/pi-coding-agent` installed |
17
17
 
18
18
  Use the **SDK backend** for new work — it gives you hard tool enforcement and real system prompt control. The RPC backend exists for environments where the SDK peer dependency can't be installed.
19
19
 
@@ -26,13 +26,13 @@ npm install @skelm/pi
26
26
  **RPC backend** additionally requires the `pi` CLI on `$PATH`:
27
27
 
28
28
  ```bash
29
- npm install -g @mariozechner/pi-coding-agent # installs the `pi` binary
29
+ npm install -g @earendil-works/pi-coding-agent # installs the `pi` binary
30
30
  ```
31
31
 
32
32
  **SDK backend** additionally requires the SDK as a peer dependency:
33
33
 
34
34
  ```bash
35
- npm install @mariozechner/pi-coding-agent
35
+ npm install @earendil-works/pi-coding-agent
36
36
  ```
37
37
 
38
38
  ## SDK backend (recommended)
@@ -63,7 +63,7 @@ export default defineConfig({
63
63
  A workflow that reviews a PR using a **skill** that encodes your team's style guide:
64
64
 
65
65
  ```ts
66
- // workflows/review-pr.workflow.ts
66
+ // workflows/review-pr.workflow.mts
67
67
  import { agent, pipeline } from 'skelm'
68
68
  import { z } from 'zod'
69
69
 
package/dist/backend.js CHANGED
@@ -5,8 +5,8 @@
5
5
  // concurrency semaphore to avoid spawning unlimited processes.
6
6
  //
7
7
  // Pi does NOT speak ACP; this backend uses the native pi RPC protocol
8
- // documented in @mariozechner/pi-coding-agent/docs/rpc.md.
9
- import { PermissionDeniedError, createConcurrencySemaphore, loadSkillBodies } from '@skelm/core';
8
+ // documented in @earendil-works/pi-coding-agent/docs/rpc.md.
9
+ import { PermissionDeniedError, createConcurrencySemaphore, extractPromptText, loadSkillBodies, } from '@skelm/core';
10
10
  import { PiRpcClient } from './rpc-client.js';
11
11
  /** Custom error types exposed from @skelm/pi */
12
12
  export class PiBackendError extends Error {
@@ -44,6 +44,10 @@ export function createPiBackend(options = {}) {
44
44
  mcp: false, // pi manages its own tools; no external MCP wiring
45
45
  skills: true,
46
46
  modelSelection: options.model !== undefined,
47
+ // RPC mode forwards prompts as text to a subprocess; image bytes
48
+ // cannot cross that boundary, so vision is explicitly off. Callers
49
+ // wanting multimodal must use the pi-sdk backend.
50
+ vision: false,
47
51
  // RPC mode runs pi in a subprocess; skelm cannot intercept tool_call events
48
52
  // mid-run, so it cannot enforce allowedTools, allowedExecutables,
49
53
  // fsRead/fsWrite, allowedMcpServers, or allowedSkills. The new gateway
@@ -112,7 +116,7 @@ export function createPiBackend(options = {}) {
112
116
  catch (err) {
113
117
  if (err instanceof Error) {
114
118
  if (err.message.includes('ENOENT') || err.message.includes('EACCES')) {
115
- throw new PiBackendAuthenticationError('pi binary not found or not executable. Install it: npm install -g @mariozechner/pi-coding-agent', err);
119
+ throw new PiBackendAuthenticationError('pi binary not found or not executable. Install it: npm install -g @earendil-works/pi-coding-agent', err);
116
120
  }
117
121
  if (err.message.includes('timed out')) {
118
122
  throw new PiBackendTimeoutError(err.message, err);
@@ -185,6 +189,6 @@ function buildPrompt(req, skillBodies = []) {
185
189
  systemParts.push(body);
186
190
  if (systemParts.length > 0)
187
191
  parts.push(`[System: ${systemParts.join('\n\n---\n\n')}]`);
188
- parts.push(req.prompt);
192
+ parts.push(extractPromptText(req.prompt));
189
193
  return parts.join('\n\n');
190
194
  }
package/dist/index.d.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * @skelm/pi - Pi coding agent backend for skelm
3
3
  *
4
- * Integration with the Pi coding agent (@mariozechner/pi-coding-agent) via
4
+ * Integration with the Pi coding agent (@earendil-works/pi-coding-agent) via
5
5
  * RPC mode. Spawns `pi --mode rpc` per call, uses the documented JSONL
6
6
  * protocol to stream the response.
7
7
  */
package/dist/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * @skelm/pi - Pi coding agent backend for skelm
3
3
  *
4
- * Integration with the Pi coding agent (@mariozechner/pi-coding-agent) via
4
+ * Integration with the Pi coding agent (@earendil-works/pi-coding-agent) via
5
5
  * RPC mode. Spawns `pi --mode rpc` per call, uses the documented JSONL
6
6
  * protocol to stream the response.
7
7
  */
package/dist/provider.js CHANGED
@@ -15,7 +15,7 @@ export class PiProvider extends ProviderPluginBase {
15
15
  id: 'pi',
16
16
  name: 'Pi Coding Agent',
17
17
  version: '1.0.0',
18
- description: 'Pi coding agent provider (@mariozechner/pi-coding-agent)',
18
+ description: 'Pi coding agent provider (@earendil-works/pi-coding-agent)',
19
19
  logLevel: options?.logLevel ?? 'info',
20
20
  });
21
21
  }
@@ -59,7 +59,7 @@ export class PiProvider extends ProviderPluginBase {
59
59
  execSync(`${this._cmd} --version`, { stdio: 'ignore' });
60
60
  }
61
61
  catch {
62
- throw new Error(`Pi binary not found: '${this._cmd}'. Install: npm install -g @mariozechner/pi-coding-agent`);
62
+ throw new Error(`Pi binary not found: '${this._cmd}'. Install: npm install -g @earendil-works/pi-coding-agent`);
63
63
  }
64
64
  }
65
65
  async healthCheck() {
@@ -27,13 +27,6 @@ export declare class PiSdkBackendAuthenticationError extends PiSdkBackendError {
27
27
  }
28
28
  export declare class PiSdkBackendTimeoutError extends PiSdkBackendError {
29
29
  }
30
- /**
31
- * Create a pi coding agent backend using the pi SDK.
32
- *
33
- * This backend builds an explicit tool allowlist from the skelm permission
34
- * policy so pi itself enforces which tools the agent may use. This provides
35
- * native enforcement rather than the advisory enforcement of the RPC backend.
36
- */
37
30
  export declare function createPiSdkBackend(options?: PiSdkBackendOptions): SkelmBackend;
38
31
  /**
39
32
  * Derive a pi tool allowlist from a skelm permission policy.
@@ -16,7 +16,41 @@
16
16
  * fsWrite.size > 0 → 'write', 'edit'
17
17
  * undefined policy → no override (pi defaults)
18
18
  */
19
- import { assertEgressEnforceable as assertEgressEnforceableCore, createConcurrencySemaphore, loadSkillBodies, } from '@skelm/core';
19
+ import { assertEgressEnforceable as assertEgressEnforceableCore, createConcurrencySemaphore, extractPromptText, loadSkillBodies, } from '@skelm/core';
20
+ /**
21
+ * Extract image parts from a prompt for forwarding to pi's `session.prompt`
22
+ * via its `images` option. Pi's ImageContent (`{type:'image', data, mimeType}`)
23
+ * matches skelm's image ContentPart shape one-for-one.
24
+ */
25
+ function extractPromptImages(prompt) {
26
+ if (typeof prompt === 'string' || prompt === undefined)
27
+ return [];
28
+ return prompt
29
+ .filter((p) => p.type === 'image')
30
+ .map((p) => ({ mimeType: p.mimeType, data: p.data }));
31
+ }
32
+ /**
33
+ * Collect image parts from all `role: 'user'` messages in an `InferRequest`.
34
+ *
35
+ * Intentionally first-turn-only: pi's `session.prompt(text, { images })` is
36
+ * turn-scoped — it sends the supplied images alongside `text` as one user
37
+ * message and starts the agent loop. Multi-turn conversations that resubmit
38
+ * prior-turn imagery would either re-attach the same bytes (wasteful) or
39
+ * silently drop history images here; the simpler behavior is to bundle every
40
+ * image into the single outgoing turn and let pi's session history persist
41
+ * what the model already saw. Assistant/tool messages don't carry images on
42
+ * the skelm side, so filtering on `role: 'user'` is sufficient.
43
+ */
44
+ function gatherImagesFromMessages(messages) {
45
+ const out = [];
46
+ for (const m of messages) {
47
+ if (m.role === 'user') {
48
+ for (const img of extractPromptImages(m.content))
49
+ out.push(img);
50
+ }
51
+ }
52
+ return out;
53
+ }
20
54
  import { PiSdkClient, PiSdkUpstreamError } from './sdk-client.js';
21
55
  const assertEgressEnforceable = (policy) => assertEgressEnforceableCore(policy, 'pi-sdk');
22
56
  export class PiSdkBackendError extends Error {
@@ -38,7 +72,81 @@ export class PiSdkBackendTimeoutError extends PiSdkBackendError {
38
72
  * policy so pi itself enforces which tools the agent may use. This provides
39
73
  * native enforcement rather than the advisory enforcement of the RPC backend.
40
74
  */
75
+ /**
76
+ * Resolve provider/model/baseUrl/apiKey from explicit options, falling back to
77
+ * OPENAI_* env vars when present. Returns `undefined` when there's nothing to
78
+ * override — preserving the prior behavior of deferring to
79
+ * `~/.pi/agent/models.json`. Per finding-119.
80
+ *
81
+ * Called once at `createPiSdkBackend()` time — env vars are snapshotted at
82
+ * backend construction. Mutating `OPENAI_BASE_URL` (or its siblings) after
83
+ * the backend exists has no effect on subsequent calls; construct a fresh
84
+ * backend if you need to switch endpoints at runtime. This matches how every
85
+ * other skelm backend reads env vars at construction.
86
+ */
87
+ /**
88
+ * Placeholder forwarded to the underlying provider when the operator has
89
+ * pointed pi at a local OpenAI-compatible endpoint but supplied no real
90
+ * key (or the sentinel `'unused'`). `@earendil-works/pi-coding-agent`
91
+ * v0.75+ rejects `apiKey: undefined | '' | 'unused'` at provider-define
92
+ * time with `"apiKey" or "oauth" is required` — see finding-129. Local
93
+ * servers (llamacpp, sglang, vLLM, ollama) ignore the auth header, so
94
+ * we forward a non-empty placeholder when the caller's intent is "no
95
+ * authentication needed."
96
+ */
97
+ const NO_AUTH_PLACEHOLDER = 'sk-no-key-required';
98
+ function isMissingApiKey(apiKey) {
99
+ return apiKey === undefined || apiKey === '' || apiKey === 'unused';
100
+ }
101
+ function resolveProviderOverride(options) {
102
+ const provider = options.provider ?? process.env.OPENAI_PROVIDER;
103
+ const model = options.model ?? process.env.OPENAI_MODEL;
104
+ const baseUrl = options.baseUrl ?? process.env.OPENAI_BASE_URL;
105
+ const rawApiKey = options.apiKey ?? process.env.OPENAI_API_KEY;
106
+ // Only override when the caller has actually said something — either an
107
+ // explicit option or a non-empty env var. A bare `provider`/`model` without
108
+ // any endpoint hint defaults to provider='openai' for parity with the rest
109
+ // of skelm's OpenAI-compatible backends.
110
+ if (provider === undefined &&
111
+ model === undefined &&
112
+ baseUrl === undefined &&
113
+ rawApiKey === undefined) {
114
+ return undefined;
115
+ }
116
+ if (model === undefined) {
117
+ // No model id at all → cannot register a model entry; let pi pick its
118
+ // built-in default. Pi's default is OpenAI cloud `gpt-5.4`.
119
+ return undefined;
120
+ }
121
+ // Promote missing/sentinel apiKey to a placeholder when we're routing to
122
+ // a non-default endpoint — finding-129. Without this the provider library
123
+ // throws before we ever reach the wire.
124
+ const apiKey = isMissingApiKey(rawApiKey) ? NO_AUTH_PLACEHOLDER : rawApiKey;
125
+ return {
126
+ provider: provider ?? 'openai',
127
+ model,
128
+ ...(baseUrl !== undefined && { baseUrl }),
129
+ apiKey,
130
+ contextWindow: options.contextWindow ?? 131_072,
131
+ maxTokens: options.maxTokens ?? 4096,
132
+ };
133
+ }
134
+ function assertProviderConfigured(providerOverride, action) {
135
+ if (providerOverride === undefined) {
136
+ // Finding-131: refuse to dispatch when no provider/model/baseUrl/apiKey
137
+ // is configured. Without this, pi-coding-agent falls back to its
138
+ // built-in default (OpenAI cloud) and any request silently traverses
139
+ // an undeclared upstream — a security concern (prompts /
140
+ // tool-arguments / file content sent off-host without consent). The
141
+ // word "AuthenticationError" appears in the message so external
142
+ // harnesses that pattern-match on auth failure shapes still recognise
143
+ // it. To intentionally rely on ~/.pi/agent/models.json, pass an
144
+ // explicit `provider` + `model` to `createPiSdkBackend`.
145
+ throw new PiSdkBackendAuthenticationError(`pi-sdk ${action} refused: no provider/model/baseUrl/apiKey configured. Set OPENAI_BASE_URL / OPENAI_API_KEY / OPENAI_MODEL, or pass {baseUrl, apiKey, model} to createPiSdkBackend(). (AuthenticationError)`);
146
+ }
147
+ }
41
148
  export function createPiSdkBackend(options = {}) {
149
+ const providerOverride = resolveProviderOverride(options);
42
150
  const capabilities = {
43
151
  prompt: true,
44
152
  streaming: true,
@@ -47,6 +155,15 @@ export function createPiSdkBackend(options = {}) {
47
155
  skills: true,
48
156
  modelSelection: false,
49
157
  toolPermissions: 'native',
158
+ // Pi natively supports multimodal user-message content via its
159
+ // `session.prompt(text, { images })` knob; image parts are forwarded as
160
+ // pi-ai's ImageContent (same shape as skelm's). Whether the configured
161
+ // pi model can actually process images depends on `~/.pi/agent/models.json`
162
+ // (the `input` field on the Model entry); non-vision models surface their
163
+ // own error which the backend propagates. Set `vision: false` to flip on
164
+ // the framework's vision gate for deployments pinned to a text-only pi
165
+ // model.
166
+ vision: options.vision ?? true,
50
167
  };
51
168
  const { acquire, release } = createConcurrencySemaphore(options.maxConcurrent ?? 4);
52
169
  return {
@@ -57,6 +174,7 @@ export function createPiSdkBackend(options = {}) {
57
174
  // Fail-closed before acquiring the concurrency slot — see comment on
58
175
  // assertEgressEnforceable.
59
176
  assertEgressEnforceable(context.permissions);
177
+ assertProviderConfigured(providerOverride, 'inference');
60
178
  await acquire();
61
179
  try {
62
180
  const cwd = options.cwd;
@@ -69,12 +187,14 @@ export function createPiSdkBackend(options = {}) {
69
187
  ...(options.noExtensions !== undefined && { noExtensions: options.noExtensions }),
70
188
  ...(options.noSkills !== undefined && { noSkills: options.noSkills }),
71
189
  ...(options.noContextFiles !== undefined && { noContextFiles: options.noContextFiles }),
190
+ ...(providerOverride !== undefined && { providerOverride }),
72
191
  ...(request.system !== undefined && {
73
192
  system: request.system,
74
193
  replaceSystemPrompt: false,
75
194
  }),
76
195
  });
77
- const result = await client.prompt(promptText, context.signal, options.timeout ?? 300_000, context.onPartial);
196
+ const inferImages = gatherImagesFromMessages(request.messages);
197
+ const result = await client.prompt(promptText, context.signal, options.timeout ?? 300_000, context.onPartial, inferImages.length > 0 ? inferImages : undefined);
78
198
  const response = {
79
199
  ...(result.usage !== undefined && {
80
200
  usage: {
@@ -103,6 +223,7 @@ export function createPiSdkBackend(options = {}) {
103
223
  // Fail-closed before acquiring the concurrency slot — see comment on
104
224
  // assertEgressEnforceable.
105
225
  assertEgressEnforceable(policy);
226
+ assertProviderConfigured(providerOverride, 'agent execution');
106
227
  await acquire();
107
228
  try {
108
229
  const toolAllowlist = derivePiToolAllowlist(policy);
@@ -116,13 +237,15 @@ export function createPiSdkBackend(options = {}) {
116
237
  ...(options.noExtensions !== undefined && { noExtensions: options.noExtensions }),
117
238
  ...(options.noSkills !== undefined && { noSkills: options.noSkills }),
118
239
  ...(options.noContextFiles !== undefined && { noContextFiles: options.noContextFiles }),
240
+ ...(providerOverride !== undefined && { providerOverride }),
119
241
  // System prompt: inject content and indicate whether to replace pi's base
120
242
  ...(systemContent !== undefined && {
121
243
  system: systemContent,
122
244
  replaceSystemPrompt: options.systemPrompt !== undefined,
123
245
  }),
124
246
  });
125
- const result = await client.prompt(request.prompt, context.signal, options.timeout ?? 300_000, context.onPartial);
247
+ const agentImages = extractPromptImages(request.prompt);
248
+ const result = await client.prompt(extractPromptText(request.prompt), context.signal, options.timeout ?? 300_000, context.onPartial, agentImages.length > 0 ? agentImages : undefined);
126
249
  return {
127
250
  text: result.text,
128
251
  stopReason: result.stopReason,
@@ -151,7 +274,7 @@ function classifyPiSdkError(err, action) {
151
274
  }
152
275
  if (err instanceof Error) {
153
276
  if (err.message.includes('ENOENT') || err.message.includes('not installed')) {
154
- return new PiSdkBackendAuthenticationError('pi SDK not available. Install it: npm install @mariozechner/pi-coding-agent', err);
277
+ return new PiSdkBackendAuthenticationError('pi SDK not available. Install it: npm install @earendil-works/pi-coding-agent', err);
155
278
  }
156
279
  if (err.message.includes('timed out')) {
157
280
  return new PiSdkBackendTimeoutError(err.message, err);
@@ -217,11 +340,20 @@ function buildSystemContent(systemBase, req, skillBodies) {
217
340
  * histories we serialize the conversation into a labeled transcript.
218
341
  */
219
342
  function buildInferPrompt(req) {
343
+ // Pi does not support image content; collapse any multimodal messages to
344
+ // their text parts. Callers needing vision should route to a vision-capable
345
+ // backend (anthropic / openai).
346
+ const asText = (content) => typeof content === 'string'
347
+ ? content
348
+ : content
349
+ .filter((p) => p.type === 'text')
350
+ .map((p) => p.text)
351
+ .join('');
220
352
  if (req.messages.length === 1 && req.messages[0]?.role === 'user') {
221
- return req.messages[0].content;
353
+ return asText(req.messages[0].content);
222
354
  }
223
355
  return req.messages
224
- .map((m) => `${m.role === 'user' ? 'User' : m.role === 'assistant' ? 'Assistant' : m.role}: ${m.content}`)
356
+ .map((m) => `${m.role === 'user' ? 'User' : m.role === 'assistant' ? 'Assistant' : m.role}: ${asText(m.content)}`)
225
357
  .join('\n\n');
226
358
  }
227
359
  /**
@@ -49,6 +49,28 @@ export interface PiSdkClientOptions {
49
49
  * Default: false — project context files are useful and safe.
50
50
  */
51
51
  noContextFiles?: boolean;
52
+ /**
53
+ * Provider configuration applied to pi's `ModelRegistry` at session start.
54
+ * When set, the named provider is (re-)registered with the supplied
55
+ * `baseUrl`/`apiKey`/`model`, overriding whatever `~/.pi/agent/models.json`
56
+ * declares for the lifetime of this session. Allows pi-sdk to be pointed
57
+ * at a local OpenAI-compatible endpoint via `OPENAI_BASE_URL` /
58
+ * `OPENAI_API_KEY` / `OPENAI_MODEL` without touching the user's pi
59
+ * config (finding-119).
60
+ *
61
+ * Either all four fields are honored together, or none of them — the
62
+ * backend resolves env-var defaults and only forwards a populated object.
63
+ */
64
+ providerOverride?: {
65
+ provider: string;
66
+ model: string;
67
+ baseUrl?: string;
68
+ apiKey?: string;
69
+ /** Metadata declared on the registered model entry; see PiSdkBackendOptions. */
70
+ contextWindow?: number;
71
+ /** Metadata declared on the registered model entry; see PiSdkBackendOptions. */
72
+ maxTokens?: number;
73
+ };
52
74
  }
53
75
  export interface PiSdkResponse {
54
76
  text: string;
@@ -79,6 +101,9 @@ export declare class PiSdkUpstreamError extends Error {
79
101
  export declare class PiSdkClient {
80
102
  private readonly opts;
81
103
  constructor(opts?: PiSdkClientOptions);
82
- prompt(text: string, signal?: AbortSignal, timeoutMs?: number, onPartial?: (delta: string) => void): Promise<PiSdkResponse>;
104
+ prompt(text: string, signal?: AbortSignal, timeoutMs?: number, onPartial?: (delta: string) => void, images?: ReadonlyArray<{
105
+ mimeType: string;
106
+ data: string;
107
+ }>): Promise<PiSdkResponse>;
83
108
  private _run;
84
109
  }
@@ -30,17 +30,49 @@ export class PiSdkClient {
30
30
  constructor(opts = {}) {
31
31
  this.opts = opts;
32
32
  }
33
- async prompt(text, signal, timeoutMs, onPartial) {
34
- // Dynamic import keeps @mariozechner/pi-coding-agent optional at runtime
35
- const pi = await import('@mariozechner/pi-coding-agent').catch(() => {
36
- throw new Error('pi SDK not installed. Add @mariozechner/pi-coding-agent to your project: npm install @mariozechner/pi-coding-agent');
33
+ async prompt(text, signal, timeoutMs, onPartial, images) {
34
+ // Dynamic import keeps @earendil-works/pi-coding-agent optional at runtime
35
+ const pi = await import('@earendil-works/pi-coding-agent').catch(() => {
36
+ throw new Error('pi SDK not installed. Add @earendil-works/pi-coding-agent to your project: npm install @earendil-works/pi-coding-agent');
37
37
  });
38
38
  const { createAgentSessionServices, createAgentSessionFromServices, SessionManager } = pi;
39
39
  const cwd = this.opts.cwd ?? process.cwd();
40
- const systemPromptOverride = this.opts.system !== undefined
41
- ? (base) => this.opts.replaceSystemPrompt
42
- ? this.opts.system
43
- : [base, this.opts.system].filter(Boolean).join('\n\n')
40
+ // Issue #193: pi's built-in coding-agent system prompt (~13 KB, "You are
41
+ // an expert coding assistant operating inside pi …") strongly biases the
42
+ // model toward file/code workflows and away from visual reasoning. When
43
+ // the caller threads image content but supplies no system override of
44
+ // their own, qwen35-VL / GPT-4V / Claude-Vision reliably reply "I cannot
45
+ // view or analyze images" — even though pi-coding-agent itself correctly
46
+ // packs the image into the chat-completions `image_url` content part.
47
+ // The image bytes reach the wire; the model just disengages because the
48
+ // system prompt told it it's a coding tool.
49
+ //
50
+ // Fix: when this turn contains images, append a short vision-enable hint
51
+ // to whichever base+user system prompt would otherwise be sent. The hint
52
+ // does NOT override pi's coding-agent prompt — it augments it — so
53
+ // coding-on-screenshot pipelines still get coding-agent capabilities AND
54
+ // visual reasoning.
55
+ const hasImages = images !== undefined && images.length > 0;
56
+ const userSystem = this.opts.system;
57
+ const replace = this.opts.replaceSystemPrompt === true;
58
+ const visionHint = 'The user has attached one or more images to their message. ' +
59
+ 'You have vision capability — look at the image(s) and address what you see when answering.';
60
+ const systemPromptOverride = userSystem !== undefined || hasImages
61
+ ? (base) => {
62
+ // Caller fully replaces pi's prompt — they own the whole shape.
63
+ // Trust the caller covered image guidance themselves; don't
64
+ // double-inject the vision hint.
65
+ if (replace && userSystem !== undefined)
66
+ return userSystem;
67
+ const parts = [];
68
+ if (!replace && base !== undefined && base.length > 0)
69
+ parts.push(base);
70
+ if (userSystem !== undefined)
71
+ parts.push(userSystem);
72
+ if (hasImages)
73
+ parts.push(visionHint);
74
+ return parts.length > 0 ? parts.join('\n\n') : undefined;
75
+ }
44
76
  : undefined;
45
77
  const services = await createAgentSessionServices({
46
78
  cwd,
@@ -51,20 +83,49 @@ export class PiSdkClient {
51
83
  ...(systemPromptOverride !== undefined && { systemPromptOverride }),
52
84
  },
53
85
  });
86
+ // Apply provider/model override before the session is created so the
87
+ // registered model overrides whatever ~/.pi/agent/models.json declares.
88
+ // The override is registered with `openai-completions` as the API since
89
+ // every local OpenAI-compatible server (sglang, vLLM, llama.cpp, ollama)
90
+ // implements that surface but rarely the newer Responses API.
91
+ const override = this.opts.providerOverride;
92
+ // Pi-coding-agent doesn't re-export Model<Api> at its package root, so
93
+ // we let TypeScript infer the type from `services.modelRegistry.find()`.
94
+ let pickedModel;
95
+ if (override !== undefined) {
96
+ services.modelRegistry.registerProvider(override.provider, {
97
+ ...(override.baseUrl !== undefined && { baseUrl: override.baseUrl }),
98
+ ...(override.apiKey !== undefined && { apiKey: override.apiKey }),
99
+ models: [
100
+ {
101
+ id: override.model,
102
+ name: override.model,
103
+ api: 'openai-completions',
104
+ reasoning: false,
105
+ input: ['text', 'image'],
106
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
107
+ contextWindow: override.contextWindow ?? 131_072,
108
+ maxTokens: override.maxTokens ?? 4096,
109
+ },
110
+ ],
111
+ });
112
+ pickedModel = services.modelRegistry.find(override.provider, override.model);
113
+ }
54
114
  const { session } = await createAgentSessionFromServices({
55
115
  services,
56
116
  sessionManager: SessionManager.inMemory(),
57
117
  ...(this.opts.tools !== undefined && { tools: this.opts.tools }),
58
118
  ...(this.opts.noTools !== undefined && { noTools: this.opts.noTools }),
119
+ ...(pickedModel !== undefined && { model: pickedModel }),
59
120
  });
60
121
  try {
61
- return await this._run(session, text, signal, timeoutMs ?? 300_000, onPartial);
122
+ return await this._run(session, text, signal, timeoutMs ?? 300_000, onPartial, images);
62
123
  }
63
124
  finally {
64
125
  session.dispose();
65
126
  }
66
127
  }
67
- _run(session, text, signal, timeoutMs, onPartial) {
128
+ _run(session, text, signal, timeoutMs, onPartial, images) {
68
129
  return new Promise((resolve, reject) => {
69
130
  let settled = false;
70
131
  const settle = (fn) => {
@@ -160,7 +221,16 @@ export class PiSdkClient {
160
221
  });
161
222
  }
162
223
  });
163
- session.prompt(text).catch((err) => {
224
+ const promptOpts = images !== undefined && images.length > 0
225
+ ? {
226
+ images: images.map((img) => ({
227
+ type: 'image',
228
+ data: img.data,
229
+ mimeType: img.mimeType,
230
+ })),
231
+ }
232
+ : undefined;
233
+ session.prompt(text, promptOpts).catch((err) => {
164
234
  unsub();
165
235
  settle(() => reject(err));
166
236
  });
package/dist/types.d.ts CHANGED
@@ -38,6 +38,68 @@ export interface PiSdkBackendOptions {
38
38
  * Default: false — project context files are useful and safe.
39
39
  */
40
40
  noContextFiles?: boolean;
41
+ /**
42
+ * Advertise `capabilities.vision`. Defaults to `true`: image parts in the
43
+ * prompt are forwarded to pi via `session.prompt(text, { images })`. Whether
44
+ * the configured pi model actually accepts images depends on its
45
+ * `~/.pi/agent/models.json` entry (the model's `input` field). Set
46
+ * `vision: false` to flip on the framework's vision gate for deployments
47
+ * pinned to a text-only pi model.
48
+ */
49
+ vision?: boolean;
50
+ /**
51
+ * Provider name to register with pi's `ModelRegistry` at session start.
52
+ * Defaults to `process.env.OPENAI_PROVIDER ?? 'openai'`. Pass an explicit
53
+ * value to pin a different provider (e.g. `'anthropic'`).
54
+ *
55
+ * Together with `model` / `baseUrl` / `apiKey`, this lets pi-sdk be pointed
56
+ * at a local OpenAI-compatible server (sglang, vLLM, llama.cpp, ollama)
57
+ * without hand-editing `~/.pi/agent/models.json`. Per finding-119 the env
58
+ * vars are honored automatically so pi-sdk reaches the same endpoint as
59
+ * every other skelm backend in the same config.
60
+ *
61
+ * `OPENAI_PROVIDER` is a pi-sdk-specific addition (the cross-backend
62
+ * convention is just `OPENAI_BASE_URL` / `OPENAI_API_KEY` / `OPENAI_MODEL`).
63
+ * It exists because pi's ModelRegistry is keyed by provider name; if you
64
+ * want to register the override against a non-`openai` provider (e.g.
65
+ * `'anthropic'`) without an explicit option, this is the knob.
66
+ */
67
+ provider?: string;
68
+ /**
69
+ * Model id used when registering the provider above. Defaults to
70
+ * `process.env.OPENAI_MODEL` when set, otherwise pi's own default
71
+ * (`gpt-5.4` at time of writing). Explicit value overrides env.
72
+ */
73
+ model?: string;
74
+ /**
75
+ * Base URL of the OpenAI-compatible endpoint. Defaults to
76
+ * `process.env.OPENAI_BASE_URL` when set. Trailing `/v1` is preserved
77
+ * verbatim — the pi SDK does not append it itself.
78
+ */
79
+ baseUrl?: string;
80
+ /**
81
+ * API key for the configured provider. Defaults to
82
+ * `process.env.OPENAI_API_KEY` when set. Local servers that ignore auth
83
+ * still need a non-empty value (e.g. `"unused"`); pass an explicit string
84
+ * to override.
85
+ */
86
+ apiKey?: string;
87
+ /**
88
+ * Optional `contextWindow` (in tokens) declared on the registered model
89
+ * entry. Defaults to 131_072 — a permissive ceiling that works for most
90
+ * modern local-LLM servers (sglang qwen3-coder, vLLM llama-3.1, …) and
91
+ * matches pi's built-in qwen/gpt defaults. Override when pinning pi-sdk
92
+ * at a small-context model (e.g. llama.cpp serving a 4K-context variant)
93
+ * so pi's own context-tracking math stays honest. The value is metadata —
94
+ * pi does not use it for hard truncation today, but downstream tooling may.
95
+ */
96
+ contextWindow?: number;
97
+ /**
98
+ * Optional `maxTokens` (in tokens) declared on the registered model
99
+ * entry. Defaults to 4096. Same metadata-only role as `contextWindow`;
100
+ * override when targeting a model with a tighter (or looser) output cap.
101
+ */
102
+ maxTokens?: number;
41
103
  }
42
104
  export interface PiBackendOptions {
43
105
  /** Backend id (default: 'pi') */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@skelm/pi",
3
- "version": "0.4.2",
3
+ "version": "0.4.3",
4
4
  "description": "Pi coding-agent backend for skelm with full permission enforcement",
5
5
  "license": "MIT",
6
6
  "author": "Scott Glover <scottgl@gmail.com>",
@@ -45,17 +45,17 @@
45
45
  "clean": "rm -rf dist tsconfig.tsbuildinfo"
46
46
  },
47
47
  "peerDependencies": {
48
- "@mariozechner/pi-coding-agent": ">=0.73.0",
49
- "@skelm/core": "^0.4.2"
48
+ "@earendil-works/pi-coding-agent": ">=0.75.0",
49
+ "@skelm/core": "^0.4.3"
50
50
  },
51
51
  "peerDependenciesMeta": {
52
- "@mariozechner/pi-coding-agent": {
52
+ "@earendil-works/pi-coding-agent": {
53
53
  "optional": true
54
54
  }
55
55
  },
56
56
  "devDependencies": {
57
- "@mariozechner/pi-coding-agent": "^0.73.0",
58
- "@skelm/core": "^0.4.2",
57
+ "@earendil-works/pi-coding-agent": "^0.75.4",
58
+ "@skelm/core": "^0.4.3",
59
59
  "@types/node": "^20.10.0",
60
60
  "typescript": "^5.3.0",
61
61
  "vitest": "^1.0.0"