@tyvm/knowhow 0.0.63 → 0.0.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/package.json +1 -1
  2. package/src/chat/modules/AgentModule.ts +7 -1
  3. package/src/clients/anthropic.ts +33 -1
  4. package/src/processors/Base64ImageDetector.ts +193 -40
  5. package/src/processors/index.ts +1 -1
  6. package/tests/plugins/language/languagePlugin-content-triggers.test.ts +5 -1
  7. package/tests/plugins/language/languagePlugin.test.ts +5 -1
  8. package/tests/processors/Base64ImageDetector.test.ts +263 -70
  9. package/tests/services/Tools.test.ts +6 -4
  10. package/ts_build/package.json +1 -1
  11. package/ts_build/src/chat/modules/AgentModule.js +5 -1
  12. package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
  13. package/ts_build/src/clients/anthropic.js +30 -1
  14. package/ts_build/src/clients/anthropic.js.map +1 -1
  15. package/ts_build/src/processors/Base64ImageDetector.d.ts +7 -3
  16. package/ts_build/src/processors/Base64ImageDetector.js +147 -27
  17. package/ts_build/src/processors/Base64ImageDetector.js.map +1 -1
  18. package/ts_build/src/processors/index.d.ts +1 -1
  19. package/ts_build/src/processors/index.js +2 -2
  20. package/ts_build/src/processors/index.js.map +1 -1
  21. package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js +5 -1
  22. package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js.map +1 -1
  23. package/ts_build/tests/plugins/language/languagePlugin.test.js +5 -1
  24. package/ts_build/tests/plugins/language/languagePlugin.test.js.map +1 -1
  25. package/ts_build/tests/processors/Base64ImageDetector.test.js +221 -59
  26. package/ts_build/tests/processors/Base64ImageDetector.test.js.map +1 -1
  27. package/ts_build/tests/services/Tools.test.js +3 -3
  28. package/ts_build/tests/services/Tools.test.js.map +1 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tyvm/knowhow",
3
- "version": "0.0.63",
3
+ "version": "0.0.64",
4
4
  "description": "ai cli with plugins and agents",
5
5
  "main": "ts_build/src/index.js",
6
6
  "bin": {
@@ -21,6 +21,7 @@ import {
21
21
  CustomVariables,
22
22
  XmlToolCallProcessor,
23
23
  HarmonyToolProcessor,
24
+ Base64ImageProcessor,
24
25
  } from "../../processors/index";
25
26
  import { TaskInfo, ChatSession } from "../types";
26
27
  import { agents } from "../../agents";
@@ -589,7 +590,9 @@ Please continue from where you left off and complete the original request.
589
590
  Boolean(msg.role === "tool" && msg.tool_call_id)
590
591
  ),
591
592
  ];
593
+
592
594
  agent.messageProcessor.setProcessors("pre_call", [
595
+ new Base64ImageProcessor(agent.tools).createProcessor(),
593
596
  ...caching,
594
597
  new CustomVariables(agent.tools).createProcessor(),
595
598
  ]);
@@ -599,7 +602,10 @@ Please continue from where you left off and complete the original request.
599
602
  new HarmonyToolProcessor().createProcessor(),
600
603
  ]);
601
604
 
602
- agent.messageProcessor.setProcessors("post_tools", caching);
605
+ agent.messageProcessor.setProcessors("post_tools", [
606
+ new Base64ImageProcessor(agent.tools).createProcessor(),
607
+ ...caching,
608
+ ]);
603
609
 
604
610
  // Set up event listeners
605
611
  if (!agent.agentEvents.listenerCount(agent.eventTypes.toolCall)) {
@@ -184,12 +184,44 @@ export class GenericAnthropicClient implements GenericClient {
184
184
  });
185
185
  }
186
186
 
187
+ // Convert tool message content to appropriate format
188
+ let toolResultContent: string | (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[];
189
+
190
+ if (typeof msg.content === "string") {
191
+ toolResultContent = msg.content;
192
+ } else if (Array.isArray(msg.content)) {
193
+ // Transform image_url format to Anthropic's image format
194
+ toolResultContent = msg.content.map((item): Anthropic.TextBlockParam | Anthropic.ImageBlockParam => {
195
+ if (item.type === "image_url") {
196
+ const url = item.image_url.url;
197
+ const isDataUrl = url.startsWith("data:");
198
+ const base64Data = isDataUrl ? url.split(",")[1] : url;
199
+ const mediaType = isDataUrl ? url.match(/data:([^;]+);/)?.[1] || "image/jpeg" : "image/jpeg";
200
+
201
+ return {
202
+ type: "image" as const,
203
+ source: {
204
+ type: "base64" as const,
205
+ media_type: mediaType as any,
206
+ data: base64Data,
207
+ },
208
+ };
209
+ } else if (item.type === "text") {
210
+ return { type: "text" as const, text: item.text };
211
+ }
212
+ // Fallback for unknown types
213
+ return { type: "text" as const, text: String(item) };
214
+ }) as (Anthropic.TextBlockParam | Anthropic.ImageBlockParam)[];
215
+ } else {
216
+ toolResultContent = String(msg.content);
217
+ }
218
+
187
219
  toolMessages.push({
188
220
  role: "user",
189
221
  content: [
190
222
  {
191
223
  type: "tool_result",
192
- content: msg.content as string,
224
+ content: toolResultContent,
193
225
  tool_use_id: msg.tool_call_id,
194
226
  },
195
227
  ],
@@ -1,5 +1,8 @@
1
+ import * as fs from "fs";
2
+ import * as path from "path";
1
3
  import { Message } from "../clients/types";
2
4
  import { MessageProcessorFunction } from "../services/MessageProcessor";
5
+ import { ToolsService } from "../services";
3
6
 
4
7
  interface ImageContent {
5
8
  type: "image_url";
@@ -14,23 +17,23 @@ interface TextContent {
14
17
  text: string;
15
18
  }
16
19
 
17
- export class Base64ImageDetector {
18
- private imageDetail: "auto" | "low" | "high";
19
- private supportedFormats: string[];
20
+ export class Base64ImageProcessor {
21
+ private imageDetail: "auto" | "low" | "high" = "auto";
22
+ private supportedFormats = ["png", "jpeg", "jpg", "gif", "webp"];
20
23
 
21
- constructor(
22
- imageDetail: "auto" | "low" | "high" = "auto",
23
- supportedFormats: string[] = ["png", "jpeg", "jpg", "gif", "webp"]
24
- ) {
25
- this.imageDetail = imageDetail;
26
- this.supportedFormats = supportedFormats;
24
+ constructor(toolsService?: ToolsService) {
25
+ this.registerTool(toolsService);
27
26
  }
28
27
 
29
- private isBase64Image(text: string): { isImage: boolean; mimeType?: string; data?: string } {
28
+ private isBase64Image(text: string): {
29
+ isImage: boolean;
30
+ mimeType?: string;
31
+ data?: string;
32
+ } {
30
33
  // Check for data URL format: data:image/type;base64,actualdata
31
34
  const dataUrlPattern = /^data:image\/([a-zA-Z]+);base64,(.+)$/;
32
35
  const match = text.match(dataUrlPattern);
33
-
36
+
34
37
  if (match) {
35
38
  const [, mimeType, data] = match;
36
39
  if (this.supportedFormats.includes(mimeType.toLowerCase())) {
@@ -47,14 +50,17 @@ export class Base64ImageDetector {
47
50
  try {
48
51
  const decoded = atob(header);
49
52
  // Check for common image file signatures
50
- if (decoded.startsWith('\x89PNG')) {
51
- return { isImage: true, mimeType: 'png', data: text };
52
- } else if (decoded.startsWith('\xFF\xD8\xFF')) {
53
- return { isImage: true, mimeType: 'jpeg', data: text };
54
- } else if (decoded.startsWith('GIF87a') || decoded.startsWith('GIF89a')) {
55
- return { isImage: true, mimeType: 'gif', data: text };
56
- } else if (decoded.startsWith('RIFF') && decoded.includes('WEBP')) {
57
- return { isImage: true, mimeType: 'webp', data: text };
53
+ if (decoded.startsWith("\x89PNG")) {
54
+ return { isImage: true, mimeType: "png", data: text };
55
+ } else if (decoded.startsWith("\xFF\xD8\xFF")) {
56
+ return { isImage: true, mimeType: "jpeg", data: text };
57
+ } else if (
58
+ decoded.startsWith("GIF87a") ||
59
+ decoded.startsWith("GIF89a")
60
+ ) {
61
+ return { isImage: true, mimeType: "gif", data: text };
62
+ } else if (decoded.startsWith("RIFF") && decoded.includes("WEBP")) {
63
+ return { isImage: true, mimeType: "webp", data: text };
58
64
  }
59
65
  } catch (e) {
60
66
  // Not valid base64 or not an image
@@ -66,26 +72,26 @@ export class Base64ImageDetector {
66
72
 
67
73
  private convertBase64ToImageContent(text: string): ImageContent | null {
68
74
  const detection = this.isBase64Image(text);
69
-
75
+
70
76
  if (!detection.isImage) {
71
77
  return null;
72
78
  }
73
79
 
74
- const dataUrl = detection.data!.startsWith('data:')
75
- ? detection.data
80
+ const dataUrl = detection.data!.startsWith("data:")
81
+ ? detection.data
76
82
  : `data:image/${detection.mimeType};base64,${detection.data}`;
77
83
 
78
84
  return {
79
85
  type: "image_url",
80
86
  image_url: {
81
87
  url: dataUrl,
82
- detail: this.imageDetail
83
- }
88
+ detail: this.imageDetail,
89
+ },
84
90
  };
85
91
  }
86
92
 
87
93
  private processMessageContent(message: Message): void {
88
- if (typeof message.content === 'string') {
94
+ if (typeof message.content === "string") {
89
95
  const imageContent = this.convertBase64ToImageContent(message.content);
90
96
  if (imageContent) {
91
97
  // Convert string content to multimodal array
@@ -94,9 +100,9 @@ export class Base64ImageDetector {
94
100
  } else if (Array.isArray(message.content)) {
95
101
  // Process each content item
96
102
  const newContent: (TextContent | ImageContent)[] = [];
97
-
103
+
98
104
  for (const item of message.content) {
99
- if (item.type === 'text' && item.text) {
105
+ if (item.type === "text" && item.text) {
100
106
  const imageContent = this.convertBase64ToImageContent(item.text);
101
107
  if (imageContent) {
102
108
  newContent.push(imageContent);
@@ -107,7 +113,7 @@ export class Base64ImageDetector {
107
113
  newContent.push(item as TextContent | ImageContent);
108
114
  }
109
115
  }
110
-
116
+
111
117
  message.content = newContent;
112
118
  }
113
119
  }
@@ -119,22 +125,22 @@ export class Base64ImageDetector {
119
125
  try {
120
126
  const args = JSON.parse(toolCall.function.arguments);
121
127
  let modified = false;
122
-
128
+
123
129
  // Recursively check all string values in arguments
124
130
  const processValue = (obj: any): any => {
125
- if (typeof obj === 'string') {
131
+ if (typeof obj === "string") {
126
132
  const detection = this.isBase64Image(obj);
127
133
  if (detection.isImage) {
128
134
  modified = true;
129
- const dataUrl = detection.data!.startsWith('data:')
130
- ? detection.data
135
+ const dataUrl = detection.data!.startsWith("data:")
136
+ ? detection.data
131
137
  : `data:image/${detection.mimeType};base64,${detection.data}`;
132
138
  return `[CONVERTED TO IMAGE: ${dataUrl.substring(0, 50)}...]`;
133
139
  }
134
140
  return obj;
135
141
  } else if (Array.isArray(obj)) {
136
142
  return obj.map(processValue);
137
- } else if (obj && typeof obj === 'object') {
143
+ } else if (obj && typeof obj === "object") {
138
144
  const result = {};
139
145
  for (const [key, value] of Object.entries(obj)) {
140
146
  result[key] = processValue(value);
@@ -143,7 +149,7 @@ export class Base64ImageDetector {
143
149
  }
144
150
  return obj;
145
151
  };
146
-
152
+
147
153
  const processedArgs = processValue(args);
148
154
  if (modified) {
149
155
  toolCall.function.arguments = JSON.stringify(processedArgs);
@@ -152,10 +158,13 @@ export class Base64ImageDetector {
152
158
  // Arguments are not valid JSON, treat as string
153
159
  const detection = this.isBase64Image(toolCall.function.arguments);
154
160
  if (detection.isImage) {
155
- const dataUrl = detection.data!.startsWith('data:')
156
- ? detection.data
161
+ const dataUrl = detection.data!.startsWith("data:")
162
+ ? detection.data
157
163
  : `data:image/${detection.mimeType};base64,${detection.data}`;
158
- toolCall.function.arguments = `[CONVERTED TO IMAGE: ${dataUrl.substring(0, 50)}...]`;
164
+ toolCall.function.arguments = `[CONVERTED TO IMAGE: ${dataUrl.substring(
165
+ 0,
166
+ 50
167
+ )}...]`;
159
168
  }
160
169
  }
161
170
  }
@@ -163,14 +172,45 @@ export class Base64ImageDetector {
163
172
  }
164
173
  }
165
174
 
175
+ private processToolMessageContent(message: Message): void {
176
+ // Tool messages have string content that might be a JSON string containing image data
177
+ if (typeof message.content === "string" && message.content.trim()) {
178
+ try {
179
+ // Try to parse as JSON
180
+ const parsed = JSON.parse(message.content);
181
+
182
+ // Check if it's an image_url object
183
+ if (parsed.type === "image_url" && parsed.image_url?.url) {
184
+ // Convert the tool message content from JSON string to an array with the image
185
+ message.content = [parsed];
186
+ }
187
+ } catch (e) {
188
+ // Not JSON, check if it's a plain base64 string (only if still a string)
189
+ if (typeof message.content === "string") {
190
+ const imageContent = this.convertBase64ToImageContent(message.content);
191
+ if (imageContent) {
192
+ message.content = [imageContent];
193
+ }
194
+ }
195
+ }
196
+ }
197
+ }
198
+
166
199
  createProcessor(): MessageProcessorFunction {
167
200
  return (originalMessages: Message[], modifiedMessages: Message[]) => {
168
201
  for (const message of modifiedMessages) {
169
- // Only process user messages (images typically come from users)
170
- if (message.role === 'user') {
202
+ // Process user messages (images from user input)
203
+ if (message.role === "user") {
171
204
  this.processMessageContent(message);
172
205
  }
173
206
 
207
+ // Process tool messages (images from loadImageAsBase64 tool)
208
+ // Tool responses come back as JSON strings that need to be parsed
209
+ // and converted to proper image content before the agent sees them
210
+ if (message.role === "tool") {
211
+ this.processToolMessageContent(message);
212
+ }
213
+
174
214
  // Process tool calls in any message
175
215
  this.processToolCallArguments(message);
176
216
  }
@@ -184,7 +224,120 @@ export class Base64ImageDetector {
184
224
  setSupportedFormats(formats: string[]): void {
185
225
  this.supportedFormats = formats;
186
226
  }
227
+
228
+ /**
229
+ * Registers the loadImageAsBase64 tool with the ToolsService
230
+ */
231
+ registerTool(toolsService?: ToolsService): void {
232
+ if (toolsService) {
233
+ const toolDefinition = {
234
+ type: "function" as const,
235
+ function: {
236
+ name: "loadImageAsBase64",
237
+ description:
238
+ "Load an image file from a file path and return it as a base64 data URL. This enables you to view and analyze images from the filesystem. Use this when the user provides a screenshot path or asks you to look at an image file.",
239
+ parameters: {
240
+ type: "object",
241
+ positional: true,
242
+ properties: {
243
+ filePath: {
244
+ type: "string",
245
+ description: "The absolute or relative path to the image file",
246
+ },
247
+ detail: {
248
+ type: "string",
249
+ description:
250
+ "The level of detail for image analysis. Options: 'auto' (default), 'low' (faster, less detail), 'high' (slower, more detail)",
251
+ },
252
+ },
253
+ required: ["filePath"],
254
+ },
255
+ },
256
+ };
257
+
258
+ toolsService.addTools([toolDefinition]);
259
+ toolsService.addFunctions({
260
+ loadImageAsBase64: async (
261
+ filePath: string,
262
+ detail?: "auto" | "low" | "high"
263
+ ) => {
264
+ return await this.loadImageAsBase64(filePath, detail);
265
+ },
266
+ });
267
+ }
268
+ }
269
+
270
+ /**
271
+ * Loads an image from a file path and returns it as a base64 data URL
272
+ */
273
+ private async loadImageAsBase64(
274
+ filePath: string,
275
+ detail: "auto" | "low" | "high" = "auto"
276
+ ): Promise<string> {
277
+ try {
278
+ // Check if file exists
279
+ if (!fs.existsSync(filePath)) {
280
+ throw new Error(`File not found: ${filePath}`);
281
+ }
282
+
283
+ // Get file stats to verify it's a file
284
+ const stats = fs.statSync(filePath);
285
+ if (!stats.isFile()) {
286
+ throw new Error(`Path is not a file: ${filePath}`);
287
+ }
288
+
289
+ // Detect MIME type from file extension
290
+ const ext = path.extname(filePath).toLowerCase().replace(".", "");
291
+ const mimeTypeMap: { [key: string]: string } = {
292
+ png: "image/png",
293
+ jpg: "image/jpeg",
294
+ jpeg: "image/jpeg",
295
+ gif: "image/gif",
296
+ webp: "image/webp",
297
+ bmp: "image/bmp",
298
+ svg: "image/svg+xml",
299
+ };
300
+
301
+ const mimeType = mimeTypeMap[ext];
302
+ if (!mimeType) {
303
+ throw new Error(
304
+ `Unsupported image format: ${ext}. Supported formats: ${Object.keys(
305
+ mimeTypeMap
306
+ ).join(", ")}`
307
+ );
308
+ }
309
+
310
+ // Check if format is supported
311
+ const simpleType = ext === "jpg" ? "jpeg" : ext;
312
+ if (!this.supportedFormats.includes(simpleType)) {
313
+ throw new Error(
314
+ `Image format ${ext} is not in supported formats: ${this.supportedFormats.join(
315
+ ", "
316
+ )}`
317
+ );
318
+ }
319
+
320
+ // Read the file as base64
321
+ const imageBuffer = fs.readFileSync(filePath);
322
+ const base64Data = imageBuffer.toString("base64");
323
+
324
+ // Create data URL
325
+ const dataUrl = `data:${mimeType};base64,${base64Data}`;
326
+
327
+ // Return in a format that indicates this is an image
328
+ // The Base64ImageDetector will convert this to proper image content
329
+ return JSON.stringify({
330
+ type: "image_url",
331
+ image_url: {
332
+ url: dataUrl,
333
+ detail: detail || this.imageDetail,
334
+ },
335
+ });
336
+ } catch (error) {
337
+ throw new Error(`Failed to load image: ${error.message}`);
338
+ }
339
+ }
187
340
  }
188
341
 
189
342
  // Global instance
190
- export const globalBase64ImageDetector = new Base64ImageDetector();
343
+ export const globalBase64ImageDetector = new Base64ImageProcessor();
@@ -1,4 +1,4 @@
1
- export { Base64ImageDetector } from "./Base64ImageDetector";
1
+ export { Base64ImageProcessor } from "./Base64ImageDetector";
2
2
  export { CustomVariables } from "./CustomVariables";
3
3
  export { TokenCompressor } from "./TokenCompressor";
4
4
  export { JsonCompressor, JsonSchema, CompressionMetadata, JsonCompressorStorage } from "./JsonCompressor";
@@ -7,7 +7,11 @@ import { getConfig, getLanguageConfig } from "../../../src/config";
7
7
  jest.mock("../../../src/utils", () => ({
8
8
  readFile: jest.fn(),
9
9
  fileExists: jest.fn().mockReturnValue(true),
10
- fileStat: jest.fn(),
10
+ fileStat: jest.fn().mockResolvedValue({
11
+ isDirectory: jest.fn().mockReturnValue(false),
12
+ isFile: jest.fn().mockReturnValue(true),
13
+ size: 1024,
14
+ }),
11
15
  }));
12
16
 
13
17
  jest.mock("../../../src/services/EventService", () => ({
@@ -1,7 +1,11 @@
1
1
  jest.mock("../../../src/utils", () => ({
2
2
  readFile: jest.fn().mockReturnValue(Buffer.from("test")),
3
3
  fileExists: jest.fn().mockReturnValue(true),
4
- fileStat: jest.fn(),
4
+ fileStat: jest.fn().mockResolvedValue({
5
+ isDirectory: jest.fn().mockReturnValue(false),
6
+ isFile: jest.fn().mockReturnValue(true),
7
+ size: 1024,
8
+ }),
5
9
  }));
6
10
 
7
11
  jest.mock("../../../src/services/EventService", () => ({