@tyvm/knowhow 0.0.109-dev.e88af1e → 0.0.110

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/autodoc/README.md +324 -0
  2. package/autodoc/chat-guide.md +268 -365
  3. package/autodoc/cli-reference.md +399 -473
  4. package/autodoc/config-reference.md +431 -330
  5. package/autodoc/embeddings-guide.md +223 -322
  6. package/autodoc/generate-guide.md +261 -301
  7. package/autodoc/language-plugin-guide.md +221 -247
  8. package/autodoc/modules-guide.md +242 -215
  9. package/autodoc/plugins-guide.md +470 -469
  10. package/autodoc/quickstart-guide.md +67 -70
  11. package/autodoc/skills-guide.md +455 -339
  12. package/autodoc/worker-guide.md +301 -308
  13. package/package.json +1 -1
  14. package/src/agents/tools/list.ts +2 -2
  15. package/src/ai.ts +81 -37
  16. package/src/chat/CliChatService.ts +1 -1
  17. package/src/chat/modules/SystemModule.ts +2 -2
  18. package/src/clients/anthropic.ts +1 -1
  19. package/src/clients/index.ts +25 -6
  20. package/src/clients/openai.ts +8 -5
  21. package/src/clients/types.ts +29 -6
  22. package/src/clients/withRetry.ts +89 -0
  23. package/src/commands/agent.ts +30 -0
  24. package/src/commands/modules.ts +365 -30
  25. package/src/config.ts +1 -1
  26. package/src/hashes.ts +8 -9
  27. package/src/index.ts +4 -2
  28. package/src/processors/Base64ImageDetector.ts +73 -0
  29. package/src/services/MediaProcessorService.ts +79 -10
  30. package/src/services/modules/index.ts +24 -19
  31. package/tests/processors/Base64ImageDetector.test.ts +160 -0
  32. package/tests/unit/clients/AIClient.test.ts +446 -0
  33. package/tests/unit/clients/withRetry.test.ts +319 -0
  34. package/tests/unit/commands/github-credentials.test.ts +1 -2
  35. package/ts_build/package.json +1 -1
  36. package/ts_build/src/agents/tools/list.js +2 -2
  37. package/ts_build/src/agents/tools/list.js.map +1 -1
  38. package/ts_build/src/ai.d.ts +3 -3
  39. package/ts_build/src/ai.js +51 -23
  40. package/ts_build/src/ai.js.map +1 -1
  41. package/ts_build/src/chat/CliChatService.js +1 -1
  42. package/ts_build/src/chat/CliChatService.js.map +1 -1
  43. package/ts_build/src/chat/modules/SystemModule.js +2 -2
  44. package/ts_build/src/chat/modules/SystemModule.js.map +1 -1
  45. package/ts_build/src/clients/anthropic.js +1 -1
  46. package/ts_build/src/clients/anthropic.js.map +1 -1
  47. package/ts_build/src/clients/index.js +7 -6
  48. package/ts_build/src/clients/index.js.map +1 -1
  49. package/ts_build/src/clients/openai.js +4 -4
  50. package/ts_build/src/clients/openai.js.map +1 -1
  51. package/ts_build/src/clients/types.d.ts +12 -6
  52. package/ts_build/src/clients/withRetry.d.ts +2 -0
  53. package/ts_build/src/clients/withRetry.js +60 -0
  54. package/ts_build/src/clients/withRetry.js.map +1 -0
  55. package/ts_build/src/commands/agent.js +25 -0
  56. package/ts_build/src/commands/agent.js.map +1 -1
  57. package/ts_build/src/commands/modules.js +297 -17
  58. package/ts_build/src/commands/modules.js.map +1 -1
  59. package/ts_build/src/config.js +1 -1
  60. package/ts_build/src/config.js.map +1 -1
  61. package/ts_build/src/hashes.js +5 -7
  62. package/ts_build/src/hashes.js.map +1 -1
  63. package/ts_build/src/index.js +1 -1
  64. package/ts_build/src/index.js.map +1 -1
  65. package/ts_build/src/processors/Base64ImageDetector.d.ts +3 -0
  66. package/ts_build/src/processors/Base64ImageDetector.js +42 -0
  67. package/ts_build/src/processors/Base64ImageDetector.js.map +1 -1
  68. package/ts_build/src/services/MediaProcessorService.d.ts +5 -4
  69. package/ts_build/src/services/MediaProcessorService.js +53 -8
  70. package/ts_build/src/services/MediaProcessorService.js.map +1 -1
  71. package/ts_build/src/services/modules/index.js +17 -13
  72. package/ts_build/src/services/modules/index.js.map +1 -1
  73. package/ts_build/tests/processors/Base64ImageDetector.test.js +111 -0
  74. package/ts_build/tests/processors/Base64ImageDetector.test.js.map +1 -1
  75. package/ts_build/tests/unit/clients/AIClient.test.d.ts +1 -0
  76. package/ts_build/tests/unit/clients/AIClient.test.js +339 -0
  77. package/ts_build/tests/unit/clients/AIClient.test.js.map +1 -0
  78. package/ts_build/tests/unit/clients/withRetry.test.d.ts +1 -0
  79. package/ts_build/tests/unit/clients/withRetry.test.js +225 -0
  80. package/ts_build/tests/unit/clients/withRetry.test.js.map +1 -0
  81. package/ts_build/tests/unit/commands/github-credentials.test.js +1 -2
  82. package/ts_build/tests/unit/commands/github-credentials.test.js.map +1 -1
@@ -1,9 +1,10 @@
1
1
  import * as fs from "fs";
2
2
  import * as path from "path";
3
- import { exec } from "child_process";
3
+ import { exec, spawn } from "child_process";
4
4
  import { promisify } from "util";
5
5
  import { fileExists, readFile, mkdir } from "../utils";
6
6
  import { AIClient } from "../clients";
7
+ import { Models } from "../types";
7
8
 
8
9
  const execPromise = promisify(exec);
9
10
 
@@ -36,7 +37,7 @@ export interface KeyframeInfo {
36
37
  * audio/video processing steps after downloading with ytdl.
37
38
  */
38
39
  export class MediaProcessorService {
39
- constructor(private clients: any) {}
40
+ constructor(private clients: AIClient) {}
40
41
 
41
42
  /**
42
43
  * Split an audio/video file into fixed-length mp3 chunks using ffmpeg.
@@ -45,7 +46,8 @@ export class MediaProcessorService {
45
46
  filePath: string,
46
47
  outputDir: string,
47
48
  CHUNK_LENGTH_SECONDS = 30,
48
- reuseExistingChunks = true
49
+ reuseExistingChunks = true,
50
+ onProgress?: (progressFraction: number) => void
49
51
  ): Promise<string[]> {
50
52
  const parsed = path.parse(filePath);
51
53
  const fileName = parsed.name;
@@ -72,8 +74,70 @@ export class MediaProcessorService {
72
74
  }
73
75
  }
74
76
 
75
- const command = `ffmpeg -i "${filePath}" -f segment -segment_time ${CHUNK_LENGTH_SECONDS} -map 0:a:0 -acodec mp3 -vn "${outputDirPath}/chunk%04d.mp3"`;
76
- await execAsync(command);
77
+ // Use faster encoding settings:
78
+ // - mono audio (-ac 1): halves encoding work, Whisper handles mono fine
79
+ // - low bitrate (-b:a 32k): sufficient for speech, much faster encode + smaller files
80
+ // - fast preset not available for mp3 encoder, but limiting bitrate helps
81
+ // - -threads 0: use all available CPU threads for faster processing
82
+ // If the input is already an mp3, copy the audio stream to avoid re-encoding
83
+ const inputExt = path.extname(filePath).toLowerCase().replace('.', '');
84
+ const isAlreadyMp3 = inputExt === 'mp3';
85
+ const audioCodecArgs = isAlreadyMp3
86
+ ? '-acodec copy'
87
+ : '-acodec libmp3lame -ac 1 -b:a 32k -threads 0';
88
+
89
+ // Use -progress pipe:1 to get real-time progress from ffmpeg
90
+ // We need the total duration first to calculate fraction
91
+ await new Promise<void>((resolve, reject) => {
92
+ // Get total duration via ffprobe first
93
+ let totalDurationSeconds = 0;
94
+ exec(
95
+ `ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "${filePath}"`,
96
+ (err, stdout) => {
97
+ if (!err && stdout.trim()) {
98
+ totalDurationSeconds = parseFloat(stdout.trim()) || 0;
99
+ }
100
+
101
+ // Now run ffmpeg with progress reporting
102
+ const args = [
103
+ '-i', filePath,
104
+ '-f', 'segment',
105
+ '-segment_time', String(CHUNK_LENGTH_SECONDS),
106
+ '-map', '0:a:0',
107
+ ...audioCodecArgs.split(' '),
108
+ '-vn',
109
+ ...(onProgress ? ['-progress', 'pipe:1'] : []),
110
+ `${outputDirPath}/chunk%04d.mp3`,
111
+ ];
112
+
113
+ const proc = spawn('ffmpeg', args);
114
+
115
+ let stdoutBuf = '';
116
+ proc.stdout?.on('data', (data: Buffer) => {
117
+ stdoutBuf += data.toString();
118
+ if (onProgress && totalDurationSeconds > 0) {
119
+ // ffmpeg -progress outputs key=value lines; look for out_time_ms
120
+ const match = stdoutBuf.match(/out_time_ms=(\d+)/g);
121
+ if (match) {
122
+ const last = match[match.length - 1];
123
+ const ms = parseInt(last.split('=')[1], 10);
124
+ const fraction = Math.min(ms / 1000 / totalDurationSeconds, 1);
125
+ onProgress(fraction);
126
+ // Keep only tail to avoid unbounded buffer growth
127
+ stdoutBuf = stdoutBuf.slice(-500);
128
+ }
129
+ }
130
+ });
131
+
132
+ proc.on('close', (code) => {
133
+ if (code === 0) resolve();
134
+ else reject(new Error(`ffmpeg exited with code ${code}`));
135
+ });
136
+ proc.on('error', reject);
137
+ }
138
+ );
139
+ });
140
+
77
141
  await fs.promises.writeFile(doneFilePath, "done");
78
142
 
79
143
  const folderFiles = await fs.promises.readdir(outputDirPath);
@@ -298,8 +362,9 @@ export class MediaProcessorService {
298
362
  });
299
363
  const image = `data:image/jpeg;base64,${base64}`;
300
364
  return this.clients.createCompletion("openai", {
301
- model: "gpt-4o",
365
+ model: Models.openai.GPT_4o,
302
366
  max_tokens: 2500,
367
+ timeout: 20000,
303
368
  messages: [
304
369
  {
305
370
  role: "user",
@@ -315,7 +380,8 @@ export class MediaProcessorService {
315
380
  async *streamProcessVideo(
316
381
  filePath: string,
317
382
  reusePreviousTranscript = true,
318
- chunkTime = 30
383
+ chunkTime = 30,
384
+ onChunkingProgress?: (fraction: number) => void
319
385
  ) {
320
386
  const parsed = path.parse(filePath);
321
387
  const videoJson = `${parsed.dir}/${parsed.name}/video.json`;
@@ -324,7 +390,8 @@ export class MediaProcessorService {
324
390
  const transcriptions = this.streamProcessAudio(
325
391
  filePath,
326
392
  reusePreviousTranscript,
327
- chunkTime
393
+ chunkTime,
394
+ onChunkingProgress
328
395
  );
329
396
 
330
397
  console.log("Extracting keyframes...");
@@ -352,7 +419,8 @@ export class MediaProcessorService {
352
419
  async *streamProcessAudio(
353
420
  filePath: string,
354
421
  reusePreviousTranscript = true,
355
- chunkTime = 30
422
+ chunkTime = 30,
423
+ onChunkingProgress?: (fraction: number) => void
356
424
  ): AsyncGenerator<TranscriptChunk> {
357
425
  const parsed = path.parse(filePath);
358
426
  const outputPath = `${parsed.dir}/${parsed.name}/transcript.json`;
@@ -382,7 +450,8 @@ export class MediaProcessorService {
382
450
  filePath,
383
451
  parsed.dir,
384
452
  chunkTime,
385
- reusePreviousTranscript
453
+ reusePreviousTranscript,
454
+ onChunkingProgress
386
455
  );
387
456
 
388
457
  for await (const chunk of this.streamTranscription(
@@ -31,14 +31,10 @@ export class ModulesService {
31
31
  // puts packages), then cwd node_modules, then global node_modules.
32
32
  // This allows modules installed via `knowhow modules install` to be found
33
33
  // even when knowhow itself is installed globally.
34
- const cwdPaths = (require as any).resolve
35
- ? require.resolve.paths?.("") || []
36
- : [];
37
34
  const resolvePaths = [
38
35
  path.join(process.cwd(), ".knowhow", "node_modules"),
39
36
  path.join(os.homedir(), ".knowhow", "node_modules"),
40
37
  path.join(process.cwd(), "node_modules"),
41
- ...cwdPaths,
42
38
  ];
43
39
 
44
40
  for (const modulePath of allModulePaths) {
@@ -57,22 +53,31 @@ export class ModulesService {
57
53
  resolvedPath = modulePath; // fall back to normal require resolution
58
54
  }
59
55
  }
60
- const rawModule = require(resolvedPath);
61
- const importedModule = (rawModule.default || rawModule) as KnowhowModule;
62
- context.Events?.log(
63
- "ModulesService",
64
- `🔌 Loading module: ${modulePath} (resolved: ${resolvedPath})`
65
- );
66
- await importedModule.init({
67
- config,
68
- cwd: process.cwd(),
69
- context: context as ModuleContext,
70
- });
71
- context.Events?.log(
72
- "ModulesService",
73
- `✅ Module initialized: ${modulePath} (tools: ${importedModule.tools.length}, agents: ${importedModule.agents.length}, plugins: ${importedModule.plugins.length}, clients: ${importedModule.clients.length})`
74
- );
75
56
 
57
+ let importedModule: KnowhowModule;
58
+ try {
59
+ const rawModule = require(resolvedPath);
60
+ importedModule = (rawModule.default || rawModule) as KnowhowModule;
61
+ context.Events?.log(
62
+ "ModulesService",
63
+ `🔌 Loading module: ${modulePath} (resolved: ${resolvedPath})`
64
+ );
65
+ await importedModule.init({
66
+ config,
67
+ cwd: process.cwd(),
68
+ context: context as ModuleContext,
69
+ });
70
+ context.Events?.log(
71
+ "ModulesService",
72
+ `✅ Module initialized: ${modulePath} (tools: ${importedModule.tools.length}, agents: ${importedModule.agents.length}, plugins: ${importedModule.plugins.length}, clients: ${importedModule.clients.length})`
73
+ );
74
+ } catch (err: any) {
75
+ process.stderr.write(
76
+ `\n⚠️ Failed to load module "${modulePath}": ${err.message}\n` +
77
+ ` Run "knowhow modules setup --global" or "knowhow modules install ${modulePath} --global" to fix this.\n\n`
78
+ );
79
+ continue;
80
+ }
76
81
  // Only register tools/agents/plugins/clients if the relevant services
77
82
  // are available in context (they may not be during early CLI command registration)
78
83
  if (context.Agents) {
@@ -594,3 +594,163 @@ describe("Base64ImageDetector", () => {
594
594
  });
595
595
  });
596
596
  });
597
+
598
+ describe("Base64ImageDetector - image path hint detection", () => {
599
+ let detector: Base64ImageProcessor;
600
+ let processor: ReturnType<Base64ImageProcessor["createProcessor"]>;
601
+
602
+ beforeEach(() => {
603
+ detector = new Base64ImageProcessor();
604
+ processor = detector.createProcessor();
605
+ });
606
+
607
+ /**
608
+ * Simulates the actual output from the Playwright MCP browser_take_screenshot tool.
609
+ * When an agent calls browser_take_screenshot, the tool message content looks like:
610
+ *
611
+ * ### Result
612
+ * - [Screenshot of viewport](./hackernews-screenshot.png)
613
+ * ### Ran Playwright code
614
+ * ...
615
+ *
616
+ * The Base64ImageDetector should detect the .png path in this text and add a hint
617
+ * telling the model it can call loadImageAsBase64 to actually view the image.
618
+ */
619
+ describe("browser screenshot tool response", () => {
620
+ it("should detect image path from a browser screenshot tool message and add hint", () => {
621
+ // This is the exact format returned by the browser MCP take_screenshot tool
622
+ const screenshotToolResponse =
623
+ "### Result\n- [Screenshot of viewport](./hackernews-screenshot.png)\n### Ran Playwright code\n```js\nawait page.screenshot({ path: './hackernews-screenshot.png', scale: 'css', type: 'png' });\n```";
624
+
625
+ const originalMessages: Message[] = [];
626
+ const modifiedMessages: Message[] = [
627
+ {
628
+ role: "tool",
629
+ content: screenshotToolResponse,
630
+ tool_call_id: "call_abc123",
631
+ },
632
+ ];
633
+
634
+ processor(originalMessages, modifiedMessages);
635
+
636
+ const content = modifiedMessages[0].content as string;
637
+ expect(typeof content).toBe("string");
638
+ // Should still contain the original text
639
+ expect(content).toContain("Screenshot of viewport");
640
+ expect(content).toContain("./hackernews-screenshot.png");
641
+ // Should contain the hint
642
+ expect(content).toContain("[TIP:");
643
+ expect(content).toContain("loadImageAsBase64");
644
+ expect(content).toContain("./hackernews-screenshot.png");
645
+ });
646
+
647
+ it("should include the exact file path in the hint", () => {
648
+ const screenshotPath = "./hackernews-screenshot.png";
649
+ const toolResponse = `### Result\n- [Screenshot of viewport](${screenshotPath})\n`;
650
+
651
+ const originalMessages: Message[] = [];
652
+ const modifiedMessages: Message[] = [
653
+ {
654
+ role: "tool",
655
+ content: toolResponse,
656
+ tool_call_id: "call_xyz789",
657
+ },
658
+ ];
659
+
660
+ processor(originalMessages, modifiedMessages);
661
+
662
+ const content = modifiedMessages[0].content as string;
663
+ // The hint should reference the exact path
664
+ expect(content).toContain(`loadImageAsBase64("${screenshotPath}")`);
665
+ });
666
+
667
+ it("should not add hint when message contains no image paths", () => {
668
+ const toolResponse = "The page has been loaded successfully.";
669
+
670
+ const originalMessages: Message[] = [];
671
+ const modifiedMessages: Message[] = [
672
+ {
673
+ role: "tool",
674
+ content: toolResponse,
675
+ tool_call_id: "call_noimages",
676
+ },
677
+ ];
678
+
679
+ processor(originalMessages, modifiedMessages);
680
+
681
+ const content = modifiedMessages[0].content as string;
682
+ expect(content).toBe(toolResponse);
683
+ expect(content).not.toContain("[TIP:");
684
+ });
685
+
686
+ it("should detect absolute paths like /tmp/page-123.png", () => {
687
+ const toolResponse =
688
+ "Screenshot saved to /tmp/page-2026-01-01T12-00-00.png for review.";
689
+
690
+ const originalMessages: Message[] = [];
691
+ const modifiedMessages: Message[] = [
692
+ {
693
+ role: "tool",
694
+ content: toolResponse,
695
+ tool_call_id: "call_abs",
696
+ },
697
+ ];
698
+
699
+ processor(originalMessages, modifiedMessages);
700
+
701
+ const content = modifiedMessages[0].content as string;
702
+ expect(content).toContain("[TIP:");
703
+ expect(content).toContain("loadImageAsBase64");
704
+ expect(content).toContain("/tmp/page-2026-01-01T12-00-00.png");
705
+ });
706
+
707
+ it("should detect multiple image paths and hint about all of them", () => {
708
+ const toolResponse =
709
+ "Before: ./before.png\nAfter: ./after.jpg\nDiff: ./diff.png";
710
+
711
+ const originalMessages: Message[] = [];
712
+ const modifiedMessages: Message[] = [
713
+ {
714
+ role: "tool",
715
+ content: toolResponse,
716
+ tool_call_id: "call_multi",
717
+ },
718
+ ];
719
+
720
+ processor(originalMessages, modifiedMessages);
721
+
722
+ const content = modifiedMessages[0].content as string;
723
+ expect(content).toContain("[TIP:");
724
+ expect(content).toContain("./before.png");
725
+ expect(content).toContain("./after.jpg");
726
+ expect(content).toContain("./diff.png");
727
+ });
728
+
729
+ it("should not add hint to messages that are actual base64 image data (already converted)", () => {
730
+ const validPngBase64 =
731
+ "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==";
732
+
733
+ const originalMessages: Message[] = [];
734
+ const modifiedMessages: Message[] = [
735
+ {
736
+ role: "tool",
737
+ content: validPngBase64,
738
+ tool_call_id: "call_base64",
739
+ },
740
+ ];
741
+
742
+ processor(originalMessages, modifiedMessages);
743
+
744
+ // Should be converted to image array, not get a text hint
745
+ const content = modifiedMessages[0].content;
746
+ if (Array.isArray(content)) {
747
+ // Good - was converted to image content, no hint needed
748
+ expect(content[0]).toHaveProperty("type", "image_url");
749
+ } else {
750
+ // If kept as string, the hint should NOT be about a file path
751
+ // because base64 data URLs don't contain file paths
752
+ expect(content as string).not.toMatch(/loadImageAsBase64\("data:/);
753
+ }
754
+ });
755
+ });
756
+ });