pi-glm-image-summary 0.1.0 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +21 -7
  2. package/package.json +42 -30
  3. package/src/index.ts +137 -221
package/README.md CHANGED
@@ -1,10 +1,10 @@
1
1
  # pi-glm-image-summary
2
2
 
3
- A [pi](https://github.com/badlogic/pi-mono) extension that intercepts image reads when using glm-4.7 and sends them to glm-4.6v for detailed analysis.
3
+ A [pi](https://github.com/badlogic/pi-mono) extension that intercepts image reads when using non-vision GLM models and sends them to glm-4.6v for detailed analysis.
4
4
 
5
5
  ## Why?
6
6
 
7
- GLM-4.7 is a powerful text model but has limited vision capabilities. GLM-4.6v, on the other hand, has stronger vision support. This extension automatically detects when you're using glm-4.7 and intercepts image reads, sending them to glm-4.6v for comprehensive analysis.
7
+ GLM text models (glm-4.6, glm-4.7, glm-4.7-flash) have no vision capabilities. GLM-4.6v does. This extension automatically detects when you're using a non-vision GLM model and intercepts image reads, sending them to glm-4.6v for comprehensive analysis.
8
8
 
9
9
  ## Features
10
10
 
@@ -14,17 +14,31 @@ GLM-4.7 is a powerful text model but has limited vision capabilities. GLM-4.6v,
14
14
 
15
15
  ## Installation
16
16
 
17
- The extension is already installed in `~/.pi/agent/extensions/pi-glm-image-summary/`.
17
+ Install globally:
18
18
 
19
- ## Usage
19
+ ```bash
20
+ pi install npm:pi-glm-image-summary
21
+ ```
22
+
23
+ Or install for a specific project (writes to `.pi/settings.json`):
24
+
25
+ ```bash
26
+ pi install -l npm:pi-glm-image-summary
27
+ ```
20
28
 
21
- Load the extension when starting pi:
29
+ To try it without installing:
22
30
 
23
31
  ```bash
24
- pi -e ~/.pi/agent/extensions/pi-glm-image-summary --provider zai --model glm-4.7
32
+ pi -e npm:pi-glm-image-summary
25
33
  ```
26
34
 
27
- Or add it to your pi config for automatic loading.
35
+ ## Usage
36
+
37
+ Once installed, the extension loads automatically when you start pi:
38
+
39
+ ```bash
40
+ pi --provider zai --model glm-4.7
41
+ ```
28
42
 
29
43
  ### Automatic Mode
30
44
 
package/package.json CHANGED
@@ -1,32 +1,44 @@
1
1
  {
2
- "name": "pi-glm-image-summary",
3
- "version": "0.1.0",
4
- "type": "module",
5
- "description": "Pi extension that intercepts image reads when using glm-4.7 and sends them to glm-4.6v for detailed analysis",
6
- "keywords": [
7
- "pi-package",
8
- "glm",
9
- "image",
10
- "vision",
11
- "ai"
12
- ],
13
- "author": "kaofelix",
14
- "license": "MIT",
15
- "repository": {
16
- "type": "git",
17
- "url": "git+https://github.com/kaofelix/pi-glm-image-summary.git"
18
- },
19
- "files": [
20
- "src"
21
- ],
22
- "scripts": {
23
- "clean": "echo 'nothing to clean'",
24
- "build": "echo 'nothing to build'",
25
- "check": "echo 'nothing to check'"
26
- },
27
- "pi": {
28
- "extensions": [
29
- "./src/index.ts"
30
- ]
31
- }
2
+ "name": "pi-glm-image-summary",
3
+ "version": "0.1.4",
4
+ "type": "module",
5
+ "description": "Pi extension that intercepts image reads when using glm-4.7 and sends them to glm-4.6v for detailed analysis",
6
+ "keywords": [
7
+ "pi-package",
8
+ "glm",
9
+ "image",
10
+ "vision",
11
+ "ai"
12
+ ],
13
+ "author": "kaofelix",
14
+ "license": "MIT",
15
+ "repository": {
16
+ "type": "git",
17
+ "url": "git+https://github.com/kaofelix/pi-glm-image-summary.git"
18
+ },
19
+ "files": [
20
+ "src"
21
+ ],
22
+ "scripts": {
23
+ "format": "biome format --write .",
24
+ "lint": "biome lint .",
25
+ "typecheck": "tsc --noEmit",
26
+ "check": "biome check . && tsc --noEmit"
27
+ },
28
+ "pi": {
29
+ "extensions": [
30
+ "./src/index.ts"
31
+ ]
32
+ },
33
+ "peerDependencies": {
34
+ "@mariozechner/pi-ai": "*",
35
+ "@mariozechner/pi-coding-agent": "*"
36
+ },
37
+ "devDependencies": {
38
+ "@biomejs/biome": "^2.3.13",
39
+ "@mariozechner/pi-ai": "^0.51.0",
40
+ "@mariozechner/pi-coding-agent": "^0.51.0",
41
+ "@types/node": "^25.2.0",
42
+ "typescript": "^5.9.3"
43
+ }
32
44
  }
package/src/index.ts CHANGED
@@ -1,32 +1,31 @@
1
1
  /**
2
2
  * GLM Image Summary Extension
3
3
  *
4
- * When using glm-4.7, this extension intercepts image reads and sends them
5
- * to glm-4.6v for detailed analysis using a subprocess. This provides better
6
- * image understanding since glm-4.6v has stronger vision capabilities.
4
+ * When using non-vision GLM models (glm-4.6, glm-4.7, glm-4.7-flash), this
5
+ * extension intercepts image reads and sends them to glm-4.6v for detailed
6
+ * analysis using a subprocess. This provides better image understanding since
7
+ * glm-4.6v has stronger vision capabilities.
7
8
  *
8
9
  * Usage:
9
- * pi -e ~/.pi/agent/extensions/pi-glm-image-summary --provider zai --model glm-4.7
10
+ * pi -e npm:pi-glm-image-summary --provider zai --model glm-4.7
10
11
  *
11
12
  * The extension will:
12
- * 1. Detect when glm-4.7 is the current model
13
+ * 1. Detect when a non-vision GLM model is being used
13
14
  * 2. Check if the file being read is an image
14
15
  * 3. Call pi subprocess with glm-4.6v to analyze the image
15
- * 4. Return the summary text to glm-4.7
16
+ * 4. Return the summary text to the current model
16
17
  */
17
18
 
18
19
  import { spawn } from "node:child_process";
19
- import type { TextContent } from "@mariozechner/pi-ai";
20
+ import { resolve } from "node:path";
20
21
  import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
21
- import {
22
- BorderedLoader,
23
- createReadTool,
24
- type ReadOperations,
25
- type ReadToolDetails,
26
- } from "@mariozechner/pi-coding-agent";
27
- import { constants } from "fs";
28
- import { access as fsAccess, readFile as fsReadFile } from "fs/promises";
29
- import { resolve } from "path";
22
+ import { BorderedLoader, createReadTool } from "@mariozechner/pi-coding-agent";
23
+
24
+ // Configuration
25
+ const VISION_PROVIDER = "zai";
26
+ const VISION_MODEL = "glm-4.6v";
27
+ const NON_VISION_MODELS = ["glm-4.6", "glm-4.7", "glm-4.7-flash"];
28
+ const SUPPORTED_IMAGE_EXTENSIONS = ["jpg", "jpeg", "png", "gif", "webp"];
30
29
 
31
30
  const SUMMARY_PROMPT = `Please analyze this image comprehensively. Extract ALL information from the image including:
32
31
 
@@ -40,162 +39,149 @@ const SUMMARY_PROMPT = `Please analyze this image comprehensively. Extract ALL i
40
39
 
41
40
  Format your response clearly with sections and bullet points. Be extremely thorough - the user needs to understand everything visible in this image to perform their task.`;
42
41
 
43
- export default function (pi: ExtensionAPI) {
44
- const localCwd = process.cwd();
45
- const localRead = createReadTool(localCwd);
46
-
47
- // Custom read operations that detect images
48
- const readOps: ReadOperations = {
49
- readFile: (path) => fsReadFile(path),
50
- access: (path) => fsAccess(path, constants.R_OK),
51
- detectImageMimeType: async (absolutePath: string) => {
52
- // Simple MIME type detection
53
- const ext = absolutePath.split(".").pop()?.toLowerCase();
54
- const supported = ["jpg", "jpeg", "png", "gif", "webp"];
55
- if (ext && supported.includes(ext)) {
56
- return `image/${ext === "jpg" ? "jpeg" : ext}`;
42
+ // Types for pi JSON output
43
+ interface PiMessage {
44
+ role: string;
45
+ content?: PiContentBlock[];
46
+ }
47
+
48
+ interface PiContentBlock {
49
+ type: string;
50
+ text?: string;
51
+ }
52
+
53
+ interface PiJsonOutput {
54
+ messages?: PiMessage[];
55
+ }
56
+
57
+ function isImageFile(path: string): boolean {
58
+ const ext = path.split(".").pop()?.toLowerCase();
59
+ return ext !== undefined && SUPPORTED_IMAGE_EXTENSIONS.includes(ext);
60
+ }
61
+
62
+ function extractTextFromPiOutput(output: string): string {
63
+ try {
64
+ const json: PiJsonOutput = JSON.parse(output);
65
+ if (json.messages && Array.isArray(json.messages)) {
66
+ const assistantMsg = json.messages.findLast((m: PiMessage) => m.role === "assistant");
67
+ if (assistantMsg?.content) {
68
+ return assistantMsg.content
69
+ .filter((c: PiContentBlock) => c.type === "text")
70
+ .map((c: PiContentBlock) => c.text ?? "")
71
+ .join("\n");
57
72
  }
58
- return null;
59
- },
60
- };
73
+ }
74
+ } catch {
75
+ // Not JSON, return as-is
76
+ }
77
+ return output;
78
+ }
79
+
80
+ interface AnalyzeImageOptions {
81
+ absolutePath: string;
82
+ signal?: AbortSignal;
83
+ }
84
+
85
+ async function analyzeImage({ absolutePath, signal }: AnalyzeImageOptions): Promise<string> {
86
+ return new Promise((resolve, reject) => {
87
+ const args = [
88
+ `@${absolutePath}`,
89
+ "--provider",
90
+ VISION_PROVIDER,
91
+ "--model",
92
+ VISION_MODEL,
93
+ "-p",
94
+ SUMMARY_PROMPT,
95
+ "--json",
96
+ ];
97
+
98
+ const child = spawn("pi", args, {
99
+ stdio: ["ignore", "pipe", "pipe"],
100
+ env: process.env,
101
+ });
102
+
103
+ let stdout = "";
104
+ let stderr = "";
105
+
106
+ child.stdout.on("data", (data: Buffer) => {
107
+ stdout += data.toString();
108
+ });
109
+
110
+ child.stderr.on("data", (data: Buffer) => {
111
+ stderr += data.toString();
112
+ });
113
+
114
+ child.on("error", (err: Error) => {
115
+ reject(err);
116
+ });
117
+
118
+ child.on("close", (code: number | null) => {
119
+ if (code !== 0) {
120
+ reject(new Error(`pi subprocess failed (${code}): ${stderr}`));
121
+ } else {
122
+ resolve(extractTextFromPiOutput(stdout.trim()));
123
+ }
124
+ });
125
+
126
+ if (signal) {
127
+ const onAbort = () => {
128
+ child.kill();
129
+ reject(new Error("Operation aborted"));
130
+ };
131
+ signal.addEventListener("abort", onAbort, { once: true });
132
+ child.on("close", () => {
133
+ signal.removeEventListener("abort", onAbort);
134
+ });
135
+ }
136
+ });
137
+ }
138
+
139
+ export default function (pi: ExtensionAPI) {
140
+ const localRead = createReadTool(process.cwd());
61
141
 
62
- // Override the read tool
142
+ // Override the read tool to intercept image reads for non-vision models
63
143
  pi.registerTool({
64
144
  ...localRead,
65
145
  async execute(toolCallId, params, signal, onUpdate, ctx) {
66
146
  const { path } = params;
67
147
  const absolutePath = resolve(ctx.cwd, path);
68
148
 
69
- // Check if current model is glm-4.7
70
- const currentModel = ctx.model;
71
- const isGlm4_7 = currentModel?.id === "glm-4.7" || currentModel?.id === "glm-4.7-long";
72
-
73
- // If not glm-4.7, use standard read
74
- if (!isGlm4_7) {
149
+ // Check if we need to proxy through vision model
150
+ const needsVisionProxy = ctx.model?.id && NON_VISION_MODELS.includes(ctx.model.id);
151
+ if (!needsVisionProxy || !isImageFile(absolutePath)) {
75
152
  return localRead.execute(toolCallId, params, signal, onUpdate);
76
153
  }
77
154
 
78
- // Check if file is an image
79
- const mimeType = await readOps.detectImageMimeType?.(absolutePath);
80
- if (!mimeType) {
81
- // Not an image, use standard read
82
- return localRead.execute(toolCallId, params, signal, onUpdate);
83
- }
84
-
85
- // Call pi subprocess with glm-4.6v to analyze the image
155
+ // Analyze image with vision model
86
156
  onUpdate?.({
87
- content: [{ type: "text", text: `[Analyzing image with glm-4.6v...]` }],
157
+ content: [{ type: "text", text: `[Analyzing image with ${VISION_MODEL}...]` }],
158
+ details: {},
88
159
  });
89
160
 
90
161
  try {
91
- const result = await new Promise<{ text: string }>((resolveResult, reject) => {
92
- // Use @ prefix to indicate image attachment, and absolute path
93
- const args = [
94
- `@${absolutePath}`,
95
- "--provider",
96
- "zai",
97
- "--model",
98
- "glm-4.6v",
99
- "-p",
100
- SUMMARY_PROMPT,
101
- "--json", // Get structured output
102
- ];
103
-
104
- const child = spawn("pi", args, {
105
- stdio: ["ignore", "pipe", "pipe"],
106
- env: process.env,
107
- });
108
-
109
- let stdout = "";
110
- let stderr = "";
111
-
112
- child.stdout.on("data", (data) => {
113
- stdout += data.toString();
114
- });
115
-
116
- child.stderr.on("data", (data) => {
117
- stderr += data.toString();
118
- });
119
-
120
- child.on("error", (err) => {
121
- reject(err);
122
- });
123
-
124
- child.on("close", (code) => {
125
- if (code !== 0) {
126
- reject(new Error(`pi subprocess failed (${code}): ${stderr}`));
127
- } else {
128
- resolveResult({ text: stdout.trim() });
129
- }
130
- });
131
-
132
- // Handle abort signal
133
- if (signal) {
134
- const onAbort = () => {
135
- child.kill();
136
- reject(new Error("Operation aborted"));
137
- };
138
- signal.addEventListener("abort", onAbort, { once: true });
139
- child.on("close", () => {
140
- signal.removeEventListener("abort", onAbort);
141
- });
142
- }
143
- });
162
+ const summaryText = await analyzeImage({ absolutePath, signal });
144
163
 
145
164
  if (signal?.aborted) {
146
165
  throw new Error("Operation aborted");
147
166
  }
148
167
 
149
- // Parse the result
150
- let summaryText: string;
151
- try {
152
- // Try to parse as JSON first
153
- const json = JSON.parse(result.text);
154
- // Extract message content from the response
155
- if (json.messages && Array.isArray(json.messages)) {
156
- // Get the last assistant message
157
- const assistantMsg = json.messages.findLast((m: any) => m.role === "assistant");
158
- if (assistantMsg?.content) {
159
- summaryText = assistantMsg.content
160
- .filter((c: any) => c.type === "text")
161
- .map((c: any) => c.text)
162
- .join("\n");
163
- } else {
164
- summaryText = result.text;
165
- }
166
- } else {
167
- summaryText = result.text;
168
- }
169
- } catch {
170
- // Not JSON, use as-is
171
- summaryText = result.text;
172
- }
173
-
174
- const readResult = {
175
- content: [
176
- {
177
- type: "text",
178
- text: `[Image analyzed with glm-4.6v]\n\n${summaryText}`,
179
- } as TextContent,
180
- ],
181
- details: { summaryModel: "glm-4.6v" } as ReadToolDetails,
168
+ const result = {
169
+ content: [{ type: "text" as const, text: `[Image analyzed with ${VISION_MODEL}]\n\n${summaryText}` }],
170
+ details: {},
182
171
  };
183
172
 
184
- onUpdate?.(readResult);
185
- return readResult;
186
- } catch (error: any) {
187
- // Throw an error so it shows as red in the UI
188
- const errorMsg = `Image analysis failed with glm-4.6v: ${error.message}. The image may not be supported (e.g., animated GIFs) or there was a connection issue.`;
189
- const err = new Error(errorMsg);
190
- (err as any).isToolError = true; // Mark as a tool error for better handling
191
- throw err;
173
+ onUpdate?.(result);
174
+ return result;
175
+ } catch (error) {
176
+ const message = error instanceof Error ? error.message : String(error);
177
+ throw new Error(`Image analysis failed: ${message}`);
192
178
  }
193
179
  },
194
180
  });
195
181
 
196
- // Add a command to manually trigger image analysis
182
+ // Command for manual image analysis
197
183
  pi.registerCommand("analyze-image", {
198
- description: "Analyze an image file using glm-4.6v",
184
+ description: `Analyze an image file using ${VISION_MODEL}`,
199
185
  handler: async (args, ctx) => {
200
186
  if (!ctx.hasUI) {
201
187
  ctx.ui.notify("analyze-image requires interactive mode", "error");
@@ -210,90 +196,21 @@ export default function (pi: ExtensionAPI) {
210
196
 
211
197
  const absolutePath = resolve(ctx.cwd, imagePath);
212
198
 
213
- // Check if file is an image
214
- const mimeType = await readOps.detectImageMimeType?.(absolutePath);
215
- if (!mimeType) {
199
+ if (!isImageFile(absolutePath)) {
216
200
  ctx.ui.notify("Not a supported image file", "error");
217
201
  return;
218
202
  }
219
203
 
220
- // Call pi subprocess with glm-4.6v to analyze the image
221
204
  const result = await ctx.ui.custom<string | null>((tui, theme, _kb, done) => {
222
205
  const loader = new BorderedLoader(tui, theme, `Analyzing ${imagePath}...`);
223
206
  loader.onAbort = () => done(null);
224
207
 
225
- // Use @ prefix to indicate image attachment, and absolute path
226
- const args = [
227
- `@${absolutePath}`,
228
- "--provider",
229
- "zai",
230
- "--model",
231
- "glm-4.6v",
232
- "-p",
233
- SUMMARY_PROMPT,
234
- "--json",
235
- ];
236
-
237
- const child = spawn("pi", args, {
238
- stdio: ["ignore", "pipe", "pipe"],
239
- env: process.env,
240
- });
241
-
242
- let stdout = "";
243
- let stderr = "";
244
-
245
- child.stdout.on("data", (data) => {
246
- stdout += data.toString();
247
- });
248
-
249
- child.stderr.on("data", (data) => {
250
- stderr += data.toString();
251
- });
252
-
253
- child.on("error", (err) => {
254
- console.error("Image analysis failed:", err);
255
- ctx.ui.notify(`Analysis failed: ${err.message}`, "error");
256
- done(null);
257
- });
258
-
259
- child.on("close", (code) => {
260
- if (code !== 0) {
261
- console.error("Image analysis failed:", stderr);
262
- ctx.ui.notify(`Analysis failed: ${stderr}`, "error");
208
+ analyzeImage({ absolutePath, signal: loader.signal })
209
+ .then((text) => done(text))
210
+ .catch((err) => {
211
+ ctx.ui.notify(`Analysis failed: ${err.message}`, "error");
263
212
  done(null);
264
- return;
265
- }
266
-
267
- let summaryText: string;
268
- try {
269
- const json = JSON.parse(stdout);
270
- if (json.messages && Array.isArray(json.messages)) {
271
- const assistantMsg = json.messages.findLast((m: any) => m.role === "assistant");
272
- if (assistantMsg?.content) {
273
- summaryText = assistantMsg.content
274
- .filter((c: any) => c.type === "text")
275
- .map((c: any) => c.text)
276
- .join("\n");
277
- } else {
278
- summaryText = stdout;
279
- }
280
- } else {
281
- summaryText = stdout;
282
- }
283
- } catch {
284
- summaryText = stdout;
285
- }
286
-
287
- done(summaryText);
288
- });
289
-
290
- if (loader.signal.aborted) {
291
- child.kill();
292
- }
293
-
294
- loader.signal.addEventListener("abort", () => {
295
- child.kill();
296
- });
213
+ });
297
214
 
298
215
  return loader;
299
216
  });
@@ -303,7 +220,6 @@ export default function (pi: ExtensionAPI) {
303
220
  return;
304
221
  }
305
222
 
306
- // Show the analysis
307
223
  await ctx.ui.editor("Image Analysis", result);
308
224
  },
309
225
  });