hoomanjs 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "hoomanjs",
3
- "version": "1.2.0",
3
+ "version": "1.3.0",
4
4
  "description": "Bun-powered local AI agent CLI with chat, exec, ACP, MCP, and skills support.",
5
5
  "author": {
6
6
  "name": "Vaibhav Pandey",
package/src/cli.ts CHANGED
@@ -134,12 +134,17 @@ program
134
134
  "MCP notification channel to subscribe to (repeatable).",
135
135
  (value: string, previous?: string[]) => [...(previous ?? []), value],
136
136
  )
137
+ .option(
138
+ "--debug",
139
+ "Log each MCP channel notification payload to the console.",
140
+ )
137
141
  .addOption(createToolkitOption())
138
142
  .action(
139
143
  async (options: {
140
144
  session?: string;
141
145
  toolkit?: Toolkit;
142
146
  channel?: string[];
147
+ debug?: boolean;
143
148
  }) => {
144
149
  const sessionId = options.session?.trim() || crypto.randomUUID();
145
150
  const channels = options.channel ?? [];
@@ -151,7 +156,12 @@ program
151
156
  true,
152
157
  );
153
158
  try {
154
- await daemon({ agent, manager, channels });
159
+ await daemon({
160
+ agent,
161
+ manager,
162
+ channels,
163
+ debug: Boolean(options.debug),
164
+ });
155
165
  } finally {
156
166
  try {
157
167
  await manager.disconnect();
@@ -2,9 +2,20 @@ import { createHash } from "node:crypto";
2
2
  import fs from "node:fs/promises";
3
3
  import path from "node:path";
4
4
  import os from "node:os";
5
- import { tool } from "@strands-agents/sdk";
5
+ import {
6
+ DocumentBlock,
7
+ ImageBlock,
8
+ TextBlock,
9
+ VideoBlock,
10
+ tool,
11
+ type JSONValue,
12
+ } from "@strands-agents/sdk";
6
13
  import { getCwd } from "../utils/cwd-context.ts";
7
- import type { JSONValue } from "@strands-agents/sdk";
14
+ import {
15
+ detectDocumentFormat,
16
+ detectImageFormat,
17
+ detectVideoFormat,
18
+ } from "../utils/file-formats.ts";
8
19
  import { z } from "zod";
9
20
 
10
21
  const DEFAULT_READ_LIMIT = 250;
@@ -190,7 +201,7 @@ async function readTextFile(
190
201
  const buffer = await fs.readFile(filePath);
191
202
  if (isProbablyBinary(buffer)) {
192
203
  throw new Error(
193
- "File appears to be binary. Use get_file_info or read_file with as_base64 if you extend the tool for binary reads.",
204
+ "File appears to be binary. Call read_file again with `binary: true` images (png/jpeg/gif/webp), videos (mp4/mov/mkv/webm/etc.), and documents (pdf/docx/csv/etc.) are returned as multimodal content blocks the provider can forward to the model; unknown binary types come back as base64.",
194
205
  );
195
206
  }
196
207
 
@@ -211,14 +222,87 @@ async function readTextFile(
211
222
  };
212
223
  }
213
224
 
214
- async function readBinaryFile(filePath: string): Promise<{
215
- path: string;
216
- encoding: "base64";
217
- content: string;
218
- sizeBytes: number;
219
- }> {
225
+ type BinaryReadResult =
226
+ | Array<TextBlock | ImageBlock | VideoBlock | DocumentBlock>
227
+ | {
228
+ path: string;
229
+ encoding: "base64";
230
+ content: string;
231
+ sizeBytes: number;
232
+ };
233
+
234
+ async function readBinaryFile(
235
+ filePath: string,
236
+ options?: { maxBytes?: number },
237
+ ): Promise<BinaryReadResult> {
220
238
  await ensureFile(filePath);
239
+ const stat = await fs.stat(filePath);
240
+
241
+ if (stat.size > (options?.maxBytes ?? DEFAULT_MAX_READ_BYTES)) {
242
+ throw new Error(
243
+ `File too large to read safely (${stat.size} bytes). Use get_file_info for metadata or process the file with another tool.`,
244
+ );
245
+ }
246
+
221
247
  const buffer = await fs.readFile(filePath);
248
+ // ImageBlock / DocumentBlock expect Uint8Array; construct a zero-copy view.
249
+ const bytes = new Uint8Array(
250
+ buffer.buffer,
251
+ buffer.byteOffset,
252
+ buffer.byteLength,
253
+ );
254
+
255
+ const imageFormat = detectImageFormat(filePath);
256
+ if (imageFormat) {
257
+ const metadata = new TextBlock(
258
+ JSON.stringify({
259
+ path: filePath,
260
+ kind: "image",
261
+ format: imageFormat,
262
+ size_bytes: stat.size,
263
+ }),
264
+ );
265
+ const image = new ImageBlock({
266
+ format: imageFormat,
267
+ source: { bytes },
268
+ });
269
+ return [metadata, image];
270
+ }
271
+
272
+ const videoFormat = detectVideoFormat(filePath);
273
+ if (videoFormat) {
274
+ const metadata = new TextBlock(
275
+ JSON.stringify({
276
+ path: filePath,
277
+ kind: "video",
278
+ format: videoFormat,
279
+ size_bytes: stat.size,
280
+ }),
281
+ );
282
+ const video = new VideoBlock({
283
+ format: videoFormat,
284
+ source: { bytes },
285
+ });
286
+ return [metadata, video];
287
+ }
288
+
289
+ const documentFormat = detectDocumentFormat(filePath);
290
+ if (documentFormat) {
291
+ const metadata = new TextBlock(
292
+ JSON.stringify({
293
+ path: filePath,
294
+ kind: "document",
295
+ format: documentFormat,
296
+ size_bytes: stat.size,
297
+ }),
298
+ );
299
+ const document = new DocumentBlock({
300
+ name: path.basename(filePath),
301
+ format: documentFormat,
302
+ source: { bytes },
303
+ });
304
+ return [metadata, document];
305
+ }
222
306
 
223
307
  return {
224
308
  path: filePath,
@@ -437,7 +521,9 @@ function createFilesystemSchema() {
437
521
  binary: z
438
522
  .boolean()
439
523
  .optional()
440
- .describe("Return file as base64 instead of UTF-8 text."),
524
+ .describe(
525
+ "Read as binary. Images, videos, and documents are returned as multimodal content blocks (forwarded to the active provider's native media format where supported); other binary files come back as base64.",
526
+ ),
441
527
  }),
442
528
  readMultipleFiles: z.object({
443
529
  paths: z.array(z.string()).min(1).describe("List of file paths to read."),
@@ -519,17 +605,23 @@ export function createFilesystemTools() {
519
605
  tool({
520
606
  name: "read_file",
521
607
  description:
522
- "Read a text file with optional line offset/limit. For binary files, enable the `binary` option to return base64.",
608
+ "Read a file. Defaults to UTF-8 text with optional line offset/limit. Pass `binary: true` for non-text files: images (jpeg/png/gif/webp), videos (mp4/mov/mkv/webm/etc.), and documents (pdf/docx/csv/etc.) are returned as multimodal content blocks — the active model provider forwards them natively where supported (Bedrock for all; Anthropic for images + docs; Google for images + docs; OpenAI for images; Ollama for images) and logs a warning for unsupported kinds. Any other binary file is returned as base64.",
523
609
  inputSchema: schema.readFile,
524
610
  callback: async (input) => {
525
611
  const filePath = normalizeUserPath(input.path);
526
- const result = input.binary
527
- ? await readBinaryFile(filePath)
528
- : await readTextFile(filePath, {
529
- offset: input.offset,
530
- limit: input.limit,
531
- });
532
612
 
613
+ if (input.binary) {
614
+ // Binary reads can return SDK media blocks (ImageBlock / DocumentBlock)
615
+ // or a plain base64 JSON object. Both are accepted by FunctionTool's
616
+ // result wrapping, but the callback signature is JSONValue, so cast.
617
+ const result = await readBinaryFile(filePath);
618
+ return result as unknown as JSONValue;
619
+ }
620
+
621
+ const result = await readTextFile(filePath, {
622
+ offset: input.offset,
623
+ limit: input.limit,
624
+ });
533
625
  return toJsonValue(result);
534
626
  },
535
627
  }),
@@ -0,0 +1,60 @@
1
+ import path from "node:path";
2
+ import type {
3
+ DocumentFormat,
4
+ ImageFormat,
5
+ VideoFormat,
6
+ } from "@strands-agents/sdk";
7
+
8
+ // Extension → SDK media format. Values must match the unions the Strands SDK
9
+ // exposes so ImageBlock / VideoBlock / DocumentBlock construct cleanly. Each
10
+ // provider adapter (OpenAI, Anthropic, Bedrock, Google, Ollama) converts these
11
+ // into its native shape or gracefully drops unsupported ones with a warning —
12
+ // the paired TextBlock metadata still reaches the model either way.
13
+ const IMAGE_EXT_FORMATS: Record<string, ImageFormat> = {
14
+ ".png": "png",
15
+ ".jpg": "jpeg",
16
+ ".jpeg": "jpeg",
17
+ ".gif": "gif",
18
+ ".webp": "webp",
19
+ };
20
+
21
+ const VIDEO_EXT_FORMATS: Record<string, VideoFormat> = {
22
+ ".mp4": "mp4",
23
+ ".mov": "mov",
24
+ ".mkv": "mkv",
25
+ ".webm": "webm",
26
+ ".flv": "flv",
27
+ ".mpeg": "mpeg",
28
+ ".mpg": "mpg",
29
+ ".wmv": "wmv",
30
+ ".3gp": "3gp",
31
+ };
32
+
33
+ const DOCUMENT_EXT_FORMATS: Record<string, DocumentFormat> = {
34
+ ".pdf": "pdf",
35
+ ".csv": "csv",
36
+ ".doc": "doc",
37
+ ".docx": "docx",
38
+ ".xls": "xls",
39
+ ".xlsx": "xlsx",
40
+ ".html": "html",
41
+ ".htm": "html",
42
+ ".txt": "txt",
43
+ ".md": "md",
44
+ ".json": "json",
45
+ ".xml": "xml",
46
+ };
47
+
48
+ export function detectImageFormat(filePath: string): ImageFormat | undefined {
49
+ return IMAGE_EXT_FORMATS[path.extname(filePath).toLowerCase()];
50
+ }
51
+
52
+ export function detectVideoFormat(filePath: string): VideoFormat | undefined {
53
+ return VIDEO_EXT_FORMATS[path.extname(filePath).toLowerCase()];
54
+ }
55
+
56
+ export function detectDocumentFormat(
57
+ filePath: string,
58
+ ): DocumentFormat | undefined {
59
+ return DOCUMENT_EXT_FORMATS[path.extname(filePath).toLowerCase()];
60
+ }
@@ -10,6 +10,7 @@ type RunDaemonOptions = {
10
10
  agent: Agent;
11
11
  manager: McpManager;
12
12
  channels: string[];
13
+ debug?: boolean;
13
14
  };
14
15
 
15
16
  function debug(text: string): void {
@@ -39,13 +40,16 @@ export async function main(options: RunDaemonOptions): Promise<void> {
39
40
  );
40
41
 
41
42
  const [queue, stop] = await createQueue(async (message: ChannelMessage) => {
42
- debug(`notification from ${message.meta.server}:${message.meta.channel}`);
43
+ debug(`processing ${message.meta.server}:${message.meta.channel}`);
44
+ if (options.debug) {
45
+ debug(`raw → ${JSON.stringify(message.meta)}`);
46
+ }
43
47
  try {
44
48
  await options.agent.invoke(message.prompt);
45
49
  } catch (error) {
46
50
  const text = error instanceof Error ? error.message : String(error);
47
51
  debug(
48
- `turn failed for ${message.meta.server}:${message.meta.channel}: ${text}`,
52
+ `turn failed ${message.meta.server}:${message.meta.channel}: ${text}`,
49
53
  );
50
54
  }
51
55
  }, unsubscribe);