@tyvm/knowhow 0.0.109 → 0.0.110

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/autodoc/README.md +324 -0
  2. package/autodoc/chat-guide.md +268 -365
  3. package/autodoc/cli-reference.md +399 -473
  4. package/autodoc/config-reference.md +431 -330
  5. package/autodoc/embeddings-guide.md +223 -322
  6. package/autodoc/generate-guide.md +261 -301
  7. package/autodoc/language-plugin-guide.md +221 -247
  8. package/autodoc/modules-guide.md +242 -215
  9. package/autodoc/plugins-guide.md +470 -469
  10. package/autodoc/quickstart-guide.md +67 -70
  11. package/autodoc/skills-guide.md +455 -339
  12. package/autodoc/worker-guide.md +301 -308
  13. package/package.json +1 -1
  14. package/scripts/build-for-node.sh +10 -24
  15. package/src/agents/tools/list.ts +2 -2
  16. package/src/ai.ts +81 -37
  17. package/src/chat/CliChatService.ts +1 -1
  18. package/src/chat/modules/AgentModule.ts +7 -2
  19. package/src/chat/modules/SessionsModule.ts +40 -1
  20. package/src/chat/modules/SystemModule.ts +2 -2
  21. package/src/clients/anthropic.ts +1 -1
  22. package/src/clients/index.ts +25 -6
  23. package/src/clients/openai.ts +8 -5
  24. package/src/clients/types.ts +29 -6
  25. package/src/clients/withRetry.ts +89 -0
  26. package/src/commands/agent.ts +30 -0
  27. package/src/commands/modules.ts +417 -47
  28. package/src/config.ts +1 -1
  29. package/src/fileSync.ts +20 -12
  30. package/src/hashes.ts +43 -22
  31. package/src/index.ts +4 -2
  32. package/src/processors/Base64ImageDetector.ts +73 -0
  33. package/src/services/MediaProcessorService.ts +79 -10
  34. package/src/services/modules/index.ts +47 -18
  35. package/tests/processors/Base64ImageDetector.test.ts +160 -0
  36. package/tests/unit/clients/AIClient.test.ts +446 -0
  37. package/tests/unit/clients/withRetry.test.ts +319 -0
  38. package/tests/unit/commands/github-credentials.test.ts +1 -2
  39. package/ts_build/package.json +1 -1
  40. package/ts_build/src/agents/tools/list.js +2 -2
  41. package/ts_build/src/agents/tools/list.js.map +1 -1
  42. package/ts_build/src/ai.d.ts +3 -3
  43. package/ts_build/src/ai.js +51 -23
  44. package/ts_build/src/ai.js.map +1 -1
  45. package/ts_build/src/chat/CliChatService.js +1 -1
  46. package/ts_build/src/chat/CliChatService.js.map +1 -1
  47. package/ts_build/src/chat/modules/AgentModule.js +5 -2
  48. package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
  49. package/ts_build/src/chat/modules/SessionsModule.js +30 -1
  50. package/ts_build/src/chat/modules/SessionsModule.js.map +1 -1
  51. package/ts_build/src/chat/modules/SystemModule.js +2 -2
  52. package/ts_build/src/chat/modules/SystemModule.js.map +1 -1
  53. package/ts_build/src/clients/anthropic.js +1 -1
  54. package/ts_build/src/clients/anthropic.js.map +1 -1
  55. package/ts_build/src/clients/index.js +7 -6
  56. package/ts_build/src/clients/index.js.map +1 -1
  57. package/ts_build/src/clients/openai.js +4 -4
  58. package/ts_build/src/clients/openai.js.map +1 -1
  59. package/ts_build/src/clients/types.d.ts +12 -6
  60. package/ts_build/src/clients/withRetry.d.ts +2 -0
  61. package/ts_build/src/clients/withRetry.js +60 -0
  62. package/ts_build/src/clients/withRetry.js.map +1 -0
  63. package/ts_build/src/commands/agent.js +25 -0
  64. package/ts_build/src/commands/agent.js.map +1 -1
  65. package/ts_build/src/commands/modules.js +359 -32
  66. package/ts_build/src/commands/modules.js.map +1 -1
  67. package/ts_build/src/config.js +1 -1
  68. package/ts_build/src/config.js.map +1 -1
  69. package/ts_build/src/fileSync.d.ts +2 -2
  70. package/ts_build/src/fileSync.js +13 -11
  71. package/ts_build/src/fileSync.js.map +1 -1
  72. package/ts_build/src/hashes.d.ts +2 -2
  73. package/ts_build/src/hashes.js +40 -16
  74. package/ts_build/src/hashes.js.map +1 -1
  75. package/ts_build/src/index.js +1 -1
  76. package/ts_build/src/index.js.map +1 -1
  77. package/ts_build/src/processors/Base64ImageDetector.d.ts +3 -0
  78. package/ts_build/src/processors/Base64ImageDetector.js +42 -0
  79. package/ts_build/src/processors/Base64ImageDetector.js.map +1 -1
  80. package/ts_build/src/services/MediaProcessorService.d.ts +5 -4
  81. package/ts_build/src/services/MediaProcessorService.js +53 -8
  82. package/ts_build/src/services/MediaProcessorService.js.map +1 -1
  83. package/ts_build/src/services/modules/index.js +35 -12
  84. package/ts_build/src/services/modules/index.js.map +1 -1
  85. package/ts_build/tests/processors/Base64ImageDetector.test.js +111 -0
  86. package/ts_build/tests/processors/Base64ImageDetector.test.js.map +1 -1
  87. package/ts_build/tests/unit/clients/AIClient.test.d.ts +1 -0
  88. package/ts_build/tests/unit/clients/AIClient.test.js +339 -0
  89. package/ts_build/tests/unit/clients/AIClient.test.js.map +1 -0
  90. package/ts_build/tests/unit/clients/withRetry.test.d.ts +1 -0
  91. package/ts_build/tests/unit/clients/withRetry.test.js +225 -0
  92. package/ts_build/tests/unit/clients/withRetry.test.js.map +1 -0
  93. package/ts_build/tests/unit/commands/github-credentials.test.js +1 -2
  94. package/ts_build/tests/unit/commands/github-credentials.test.js.map +1 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tyvm/knowhow",
3
- "version": "0.0.109",
3
+ "version": "0.0.110",
4
4
  "description": "ai cli with plugins and agents",
5
5
  "main": "ts_build/src/index.js",
6
6
  "bin": {
@@ -7,11 +7,10 @@
7
7
  # This script:
8
8
  # 1. Compiles TypeScript with Node 20 (required for workspace deps)
9
9
  # 2. Creates /tmp/knowhow-node-<major> with the compiled output
10
- # 3. Installs the correct isolated-vm version for the target node in that dir
11
- # 4. Symlinks the package globally for ALL installed nvm versions matching the target
10
+ # 3. Symlinks the package globally for ALL installed nvm versions matching the target
12
11
  #
13
- # This approach avoids polluting the workspace node_modules with a different
14
- # isolated-vm ABI, so Node 20 and Node 24 builds can coexist.
12
+ # Note: isolated-vm is now in @tyvm/knowhow-module-script install that separately
13
+ # for the correct node version if you need script execution support.
15
14
 
16
15
  set -e
17
16
 
@@ -81,23 +80,11 @@ fi
81
80
 
82
81
  # Use the last (latest patch) for building
83
82
  TARGET_NODE_BIN="${TARGET_NODE_BINS[${#TARGET_NODE_BINS[@]}-1]}"
84
- TARGET_NODE_NPM="$(dirname "$TARGET_NODE_BIN")/npm"
85
- TARGET_NODE_DIR="$(dirname "$TARGET_NODE_BIN")"
86
83
  TARGET_NODE_ACTUAL_VERSION="$("$TARGET_NODE_BIN" --version)"
87
84
 
88
85
  echo "🎯 Found Node $TARGET_VERSION installs: ${TARGET_NODE_BINS[*]}"
89
86
  echo "🔨 Building with: $TARGET_NODE_BIN ($TARGET_NODE_ACTUAL_VERSION)"
90
87
 
91
- # --- Pick the right isolated-vm version for the target node ---
92
- # isolated-vm@5.x supports Node <22, isolated-vm@6.x requires Node >=22
93
- if [ "$TARGET_MAJOR" -ge 22 ]; then
94
- IVM_VERSION="^6.0.0"
95
- echo "📌 Using isolated-vm@6.x (Node >= 22)"
96
- else
97
- IVM_VERSION="^5.0.4"
98
- echo "📌 Using isolated-vm@5.x (Node < 22)"
99
- fi
100
-
101
88
  # --- Create staging directory ---
102
89
  STAGING_DIR="/tmp/knowhow-node-${TARGET_MAJOR}"
103
90
  rm -rf "$STAGING_DIR"
@@ -114,13 +101,11 @@ for item in README.md LICENSE .npmignore; do
114
101
  [ -e "$PACKAGE_DIR/$item" ] && cp "$PACKAGE_DIR/$item" "$STAGING_DIR/" || true
115
102
  done
116
103
 
117
- # --- Patch package.json for target isolated-vm version ---
118
- echo "📝 Patching package.json for isolated-vm $IVM_VERSION..."
104
+ # --- Patch package.json to remove workspace protocol deps ---
105
+ echo "📝 Patching package.json..."
119
106
  "$NODE20_BIN" -e "
120
107
  const fs = require('fs');
121
108
  const pkg = JSON.parse(fs.readFileSync('$STAGING_DIR/package.json', 'utf8'));
122
- pkg.dependencies['isolated-vm'] = '$IVM_VERSION';
123
- // Remove workspace protocol deps that won't resolve outside the monorepo
124
109
  if (pkg.dependencies) {
125
110
  for (const [k, v] of Object.entries(pkg.dependencies)) {
126
111
  if (String(v).startsWith('workspace:')) delete pkg.dependencies[k];
@@ -130,13 +115,14 @@ echo "📝 Patching package.json for isolated-vm $IVM_VERSION..."
130
115
  console.log('✅ package.json patched');
131
116
  "
132
117
 
133
- # --- Install deps in staging dir using target node ---
118
+ # --- Install dependencies in staging dir with target Node ---
119
+ TARGET_NODE_NPM="$(dirname "$TARGET_NODE_BIN")/npm"
134
120
  echo ""
135
121
  echo "📦 Installing dependencies in staging dir with Node $TARGET_MAJOR..."
136
122
  cd "$STAGING_DIR"
137
- # Prepend target node bin to PATH so npm/node-gyp uses the correct node version
138
- PATH="$TARGET_NODE_DIR:$PATH" "$TARGET_NODE_NPM" install --no-save 2>&1
139
- echo "✅ Dependencies installed (isolated-vm compiled for Node $TARGET_MAJOR)"
123
+ "$TARGET_NODE_NPM" install --omit=dev
124
+ echo " Dependencies installed"
125
+ cd "$PACKAGE_DIR"
140
126
 
141
127
  # --- Symlink globally for ALL matching Node version installs ---
142
128
  PKG_NAME="$("$NODE20_BIN" -e "console.log(require('$STAGING_DIR/package.json').name)")"
@@ -156,8 +156,8 @@ export const includedTools = [
156
156
  },
157
157
  model: {
158
158
  type: "string",
159
- description: "The model to use (default: 'gpt-4o')",
160
- default: "gpt-4o",
159
+ description: "The model to use (default: 'gpt-5.4-nano')",
160
+ default: "gpt-5.4-nano",
161
161
  },
162
162
  },
163
163
  required: ["imageUrl", "question"],
package/src/ai.ts CHANGED
@@ -79,59 +79,103 @@ function estimateTokens(text: string): number {
79
79
  return Math.ceil(text.length / 4);
80
80
  }
81
81
 
82
- export async function summarizeTexts(
82
+ /**
83
+ * Returns true if the error looks like a context-window-exceeded error from any provider.
84
+ */
85
+ function isContextLengthError(err: any): boolean {
86
+ const msg: string = (err?.message || "").toLowerCase();
87
+ return (
88
+ msg.includes("context window") ||
89
+ msg.includes("context length") ||
90
+ msg.includes("maximum context") ||
91
+ msg.includes("input too long") ||
92
+ msg.includes("too long") ||
93
+ msg.includes("exceeds the context") ||
94
+ msg.includes("input exceeds") ||
95
+ (err?.status === 400 && msg.includes("context"))
96
+ );
97
+ }
98
+
99
+ /**
100
+ * Recursively summarize an array of texts using a split-and-summarize approach.
101
+ * When the combined texts exceed the context window (either by estimate or actual API error),
102
+ * split the array in half, summarize each half recursively, then combine.
103
+ */
104
+ async function summarizeTextsRecursive(
83
105
  texts: string[],
84
106
  template: string,
85
- model = "",
86
- agent = ""
87
- ) {
88
- const effectiveModel = model || Models.openai.GPT_54_Nano;
107
+ model: string,
108
+ agent: string,
109
+ contextLimit: number,
110
+ depth = 0
111
+ ): Promise<string> {
112
+ const indent = " ".repeat(depth);
113
+
114
+ // Base case: single text — just run the prompt directly
115
+ if (texts.length === 1) {
116
+ const content = template.replaceAll("{text}", texts[0]);
117
+ console.log(`${indent}summarizeTexts[depth=${depth}]: single text, ~${estimateTokens(content)} tokens`);
118
+ return singlePrompt(content, model, agent);
119
+ }
89
120
 
90
- // Estimate total tokens if we were to combine all texts into one prompt
121
+ // Check if combined fits in context window by estimate
91
122
  const combinedText = texts.join("\n\n");
92
123
  const combinedContent = template.replaceAll("{text}", combinedText);
93
124
  const estimatedTokens = estimateTokens(combinedContent);
94
- const contextLimit = getModelContextLimit(effectiveModel);
95
-
96
- console.log(
97
- `summarizeTexts: ${texts.length} text(s), ~${estimatedTokens} estimated tokens, context limit: ${contextLimit}`
98
- );
99
125
 
100
- // If everything fits in one context window, do a single prompt
101
126
  if (estimatedTokens < contextLimit) {
102
- console.log("summarizeTexts: fits in context window, using single prompt");
103
- return singlePrompt(combinedContent, model, agent).catch((err) => {
104
- return `Texts of combined length ${combinedText.length} could not be summarized due to error: ${err.message}`;
105
- });
127
+ // Try single combined prompt — if context error, fall through to split
128
+ console.log(`${indent}summarizeTexts[depth=${depth}]: ${texts.length} texts, ~${estimatedTokens} tokens, trying combined`);
129
+ try {
130
+ return await singlePrompt(combinedContent, model, agent);
131
+ } catch (err: any) {
132
+ if (!isContextLengthError(err)) throw err;
133
+ console.log(`${indent}summarizeTexts[depth=${depth}]: API rejected (context too long), splitting in half`);
134
+ }
135
+ } else {
136
+ console.log(`${indent}summarizeTexts[depth=${depth}]: ${texts.length} texts, ~${estimatedTokens} tokens exceeds limit, splitting in half`);
106
137
  }
107
138
 
108
- // Otherwise summarize each text individually, then combine
109
- console.log(
110
- "summarizeTexts: exceeds context window, summarizing texts individually"
111
- );
112
- const summaries = [];
113
- for (const text of texts) {
114
- const content = template.replaceAll("{text}", text);
139
+ // Split texts in half and recurse
140
+ const mid = Math.ceil(texts.length / 2);
141
+ const left = texts.slice(0, mid);
142
+ const right = texts.slice(mid);
115
143
 
116
- console.log(content);
144
+ const [leftSummary, rightSummary] = await Promise.all([
145
+ summarizeTextsRecursive(left, template, model, agent, contextLimit, depth + 1),
146
+ summarizeTextsRecursive(right, template, model, agent, contextLimit, depth + 1),
147
+ ]);
117
148
 
118
- const summary = await singlePrompt(content, model, agent).catch((err) => {
119
- return `Text of length ${text.length} could not be summarized due to error: ${err.message}`;
120
- });
121
- summaries.push(summary);
122
- }
149
+ // Combine the two halves with a final summary prompt
150
+ const combinedSummaries = [leftSummary, rightSummary].join("\n\n");
151
+ const finalContent = template.replaceAll("{text}", combinedSummaries);
152
+ const finalEstimate = estimateTokens(finalContent);
153
+ console.log(`${indent}summarizeTexts[depth=${depth}]: combining halves, ~${finalEstimate} tokens`);
123
154
 
124
- if (summaries.length === 1) {
125
- return summaries[0];
155
+ if (finalEstimate < contextLimit) {
156
+ return singlePrompt(finalContent, model, agent);
126
157
  }
127
158
 
128
- // Otherwise form a final summary of the pieces
129
- const finalPrompt =
130
- `Generate a final output for this prompt ${template} with these incremental summaries: ` +
131
- summaries.join("\n\n");
159
+ // If even the combined summaries are too long, recurse one more level
160
+ return summarizeTextsRecursive([leftSummary, rightSummary], template, model, agent, contextLimit, depth + 1);
161
+ }
162
+
163
+ export async function summarizeTexts(
164
+ texts: string[],
165
+ template: string,
166
+ model = "",
167
+ agent = ""
168
+ ) {
169
+ const effectiveModel = model || Models.openai.GPT_54_Nano;
170
+ const contextLimit = getModelContextLimit(effectiveModel);
171
+
172
+ console.log(
173
+ `summarizeTexts: ${texts.length} text(s), context limit: ${contextLimit}, model: ${effectiveModel}`
174
+ );
132
175
 
133
- const finalSummary = await singlePrompt(finalPrompt, model, agent);
134
- return finalSummary;
176
+ return summarizeTextsRecursive(texts, template, model, agent, contextLimit).catch((err) => {
177
+ return `Texts of combined length ${texts.reduce((a, t) => a + t.length, 0)} could not be summarized due to error: ${err.message}`;
178
+ });
135
179
  }
136
180
 
137
181
  export async function chunkText(text: string, chunkSize?: number) {
@@ -38,7 +38,7 @@ export class CliChatService implements ChatService {
38
38
  searchMode: false,
39
39
  voiceMode: false,
40
40
  multilineMode: false,
41
- currentModel: "gpt-4o",
41
+ currentModel: "gpt-5.4-nano",
42
42
  currentProvider: "openai",
43
43
  chatHistory: this.chatHistory,
44
44
  plugins,
@@ -517,7 +517,12 @@ export class AgentModule extends BaseChatModule {
517
517
 
518
518
  // Restore the full message history from the last thread
519
519
  const threads = session.threads || [];
520
- const lastThread = threads.length > 0 ? threads[threads.length - 1] : [];
520
+ // Guard against sessions saved with a flat Message[] instead of Message[][]
521
+ // (a bug where threadUpdate emitted a single thread instead of all threads)
522
+ const normalizedThreads: Message[][] = threads.length > 0 && !Array.isArray(threads[0])
523
+ ? [threads as unknown as Message[]]
524
+ : threads as Message[][];
525
+ const lastThread = normalizedThreads.length > 0 ? normalizedThreads[normalizedThreads.length - 1] : [];
521
526
  const resumeMessages = [...lastThread];
522
527
 
523
528
  // Append the resume prompt to the last user message (or add a new one)
@@ -701,7 +706,7 @@ export class AgentModule extends BaseChatModule {
701
706
 
702
707
  // Set up session update listener
703
708
  const threadUpdateHandler = async (threadState: any) => {
704
- this.updateSession(taskId, threadState);
709
+ this.updateSession(taskId, agent.getThreads());
705
710
  taskInfo.totalCost = agent.getTotalCostUsd();
706
711
  };
707
712
  agent.agentEvents.on(agent.eventTypes.threadUpdate, threadUpdateHandler);
@@ -362,8 +362,47 @@ export class SessionsModule extends BaseChatModule {
362
362
  // Check filesystem agent (may have metadata with threads)
363
363
  const fsAgentPath = path.join(".knowhow", "processes", "agents", id);
364
364
  if (fs.existsSync(fsAgentPath)) {
365
+ // Try to load threads from metadata.json and resume
366
+ const metadataPath = path.join(fsAgentPath, "metadata.json");
367
+ if (fs.existsSync(metadataPath)) {
368
+ try {
369
+ const raw = fs.readFileSync(metadataPath, "utf-8");
370
+ const metadata = JSON.parse(raw);
371
+ const threads: any[] = metadata.threads || [];
372
+ const agentName = metadata.agentName || "Developer";
373
+
374
+ // Try to get initialInput from the saved session file (more complete)
375
+ // since metadata.json doesn't always store it
376
+ const savedSession = sessionManager.loadSession(id);
377
+ const initialInput = savedSession?.initialInput || metadata.initialInput || metadata.prompt || "";
378
+
379
+ console.log(`\n📋 Found task in filesystem: ${id}`);
380
+ console.log(` Agent : ${agentName}`);
381
+ console.log(` Task : ${initialInput}`);
382
+ console.log(` Status : ${metadata.status || "unknown"}`);
383
+
384
+ const additionalContext = await this.chatService?.getInput(
385
+ "Add any additional context for resuming this session (or press Enter to skip): "
386
+ );
387
+
388
+ // Normalize threads: if flat Message[] (old buggy format), wrap in array
389
+ const normalizedThreads = threads.length > 0 && !Array.isArray(threads[0])
390
+ ? [threads]
391
+ : threads;
392
+
393
+ await this.agentModule.resumeFromMessages({
394
+ agentName,
395
+ taskId: id,
396
+ threads: normalizedThreads,
397
+ input: additionalContext?.trim() || initialInput || "",
398
+ });
399
+ return;
400
+ } catch (e: any) {
401
+ console.error(`⚠️ Failed to load metadata for task ${id}: ${e.message}`);
402
+ }
403
+ }
365
404
  console.log(
366
- `⚠️ Task ${id} exists in the filesystem but has no saved session.\n` +
405
+ `⚠️ Task ${id} exists in the filesystem but has no saved session or metadata.\n` +
367
406
  ` Use /attach ${id} if it is still running.`
368
407
  );
369
408
  return;
@@ -45,7 +45,7 @@ export class SystemModule extends BaseChatModule {
45
45
  const agent = context?.selectedAgent;
46
46
  const Clients = agent.clientService;
47
47
  const currentProvider = context?.currentProvider || "openai";
48
- const currentModel = context?.currentModel || "gpt-4o";
48
+ const currentModel = context?.currentModel || "gpt-5.4-nano";
49
49
 
50
50
  const models = Clients.getRegisteredModels(currentProvider);
51
51
  console.log(models);
@@ -86,7 +86,7 @@ export class SystemModule extends BaseChatModule {
86
86
  const Clients = agent.clientService;
87
87
 
88
88
  const currentProvider = context?.currentProvider || "openai";
89
- const currentModel = context?.currentModel || "gpt-4o";
89
+ const currentModel = context?.currentModel || "gpt-5.4-nano";
90
90
 
91
91
  const providers = Object.keys(Clients.clients);
92
92
  console.log(providers);
@@ -376,7 +376,7 @@ export class GenericAnthropicClient implements GenericClient {
376
376
  tool_choice: { type: "auto" },
377
377
  tools,
378
378
  }),
379
- });
379
+ }, { signal: options.signal });
380
380
 
381
381
  if (!response.content || !response.content.length) {
382
382
  console.log("no content in Anthropic response", response);
@@ -33,6 +33,7 @@ import { ContextLimits } from "./contextLimits";
33
33
  import { OpenAiTextPricing } from "./pricing/openai";
34
34
  import { AnthropicTextPricing } from "./pricing/anthropic";
35
35
  import { GeminiPricing } from "./pricing/google";
36
+ import { withRetry } from "./withRetry";
36
37
  import {
37
38
  XaiTextPricing,
38
39
  XaiImagePricing,
@@ -665,7 +666,10 @@ export class AIClient {
665
666
  } model registered. Try using ${JSON.stringify(this.listAllModels())}`
666
667
  );
667
668
  }
668
- return client.createChatCompletion({ ...options, model });
669
+ return withRetry(
670
+ (signal) => client.createChatCompletion({ ...options, model, signal }),
671
+ options
672
+ );
669
673
  }
670
674
 
671
675
  async createEmbedding(
@@ -680,7 +684,10 @@ export class AIClient {
680
684
  } model registered. Try using ${JSON.stringify(this.listAllModels())}`
681
685
  );
682
686
  }
683
- return client.createEmbedding({ ...options, model });
687
+ return withRetry(
688
+ (signal) => client.createEmbedding({ ...options, model, signal }),
689
+ options
690
+ );
684
691
  }
685
692
 
686
693
  async createAudioTranscription(
@@ -693,7 +700,10 @@ export class AIClient {
693
700
  `Provider ${provider} does not support audio transcription.`
694
701
  );
695
702
  }
696
- return client.createAudioTranscription(options);
703
+ return withRetry(
704
+ (signal) => client.createAudioTranscription({ ...options, signal }),
705
+ options
706
+ );
697
707
  }
698
708
 
699
709
  async createAudioGeneration(
@@ -711,7 +721,10 @@ export class AIClient {
711
721
  `Model ${options.model} not registered for provider ${provider}.`
712
722
  );
713
723
  }
714
- return client.createAudioGeneration({ ...options, model });
724
+ return withRetry(
725
+ (signal) => client.createAudioGeneration({ ...options, model, signal }),
726
+ options
727
+ );
715
728
  }
716
729
 
717
730
  async createImageGeneration(
@@ -729,7 +742,10 @@ export class AIClient {
729
742
  `Model ${options.model} not registered for provider ${provider}.`
730
743
  );
731
744
  }
732
- return client.createImageGeneration({ ...options, model });
745
+ return withRetry(
746
+ (signal) => client.createImageGeneration({ ...options, model, signal }),
747
+ options
748
+ );
733
749
  }
734
750
 
735
751
  async createVideoGeneration(
@@ -747,7 +763,10 @@ export class AIClient {
747
763
  `Model ${options.model} not registered for provider ${provider}.`
748
764
  );
749
765
  }
750
- return client.createVideoGeneration({ ...options, model });
766
+ return withRetry(
767
+ (signal) => client.createVideoGeneration({ ...options, model, signal }),
768
+ options
769
+ );
751
770
  }
752
771
 
753
772
  async getVideoStatus(
@@ -63,6 +63,10 @@ export class GenericOpenAiClient implements GenericClient {
63
63
  });
64
64
  }
65
65
 
66
+ /**
67
+ * Execute a function with timeout, retries, and exponential backoff.
68
+ * Retriable errors: 5xx, timeout, ECONNRESET, ETIMEDOUT, rate limits (429).
69
+ */
66
70
  reasoningEffort(
67
71
  messages: CompletionOptions["messages"]
68
72
  ): "low" | "medium" | "high" {
@@ -155,12 +159,11 @@ export class GenericOpenAiClient implements GenericClient {
155
159
  max_completion_tokens: Math.max(options.max_tokens ?? 0, 16_000),
156
160
  reasoning_effort: this.resolveReasoningEffort(options),
157
161
  }),
158
-
159
162
  ...(options.tools && {
160
163
  tools: options.tools,
161
164
  tool_choice: "auto",
162
165
  }),
163
- });
166
+ }, { signal: options.signal });
164
167
 
165
168
  const usdCost = this.calculateCost(options.model, response.usage);
166
169
 
@@ -453,7 +456,7 @@ export class GenericOpenAiClient implements GenericClient {
453
456
  prompt: options.prompt,
454
457
  response_format: options.response_format || "verbose_json",
455
458
  temperature: options.temperature,
456
- });
459
+ }, { signal: options.signal });
457
460
 
458
461
  // Calculate cost: $0.006 per minute for Whisper
459
462
  const duration = typeof response === "object" && "duration" in response && typeof response.duration === "number"
@@ -489,7 +492,7 @@ export class GenericOpenAiClient implements GenericClient {
489
492
  voice: options.voice as any,
490
493
  response_format: options.response_format || "mp3",
491
494
  speed: options.speed,
492
- });
495
+ }, { signal: options.signal });
493
496
 
494
497
  const buffer = Buffer.from(await response.arrayBuffer());
495
498
 
@@ -518,7 +521,7 @@ export class GenericOpenAiClient implements GenericClient {
518
521
  style: options.style,
519
522
  response_format: options.response_format,
520
523
  user: options.user,
521
- });
524
+ }, { signal: options.signal });
522
525
 
523
526
  // Cost calculation varies by model and settings
524
527
  // DALL-E 3: $0.040-$0.120 per image depending on quality/size
@@ -57,7 +57,30 @@ export interface ToolCall {
57
57
  };
58
58
  }
59
59
 
60
- export interface CompletionOptions {
60
+ export interface RetryOptions {
61
+ /**
62
+ * Request timeout in milliseconds per attempt. If the request does not complete
63
+ * within this time it is aborted and retried according to maxRetries.
64
+ */
65
+ timeout?: number;
66
+ /**
67
+ * Maximum number of retry attempts for retriable errors (5xx, timeout, ECONNRESET, 429).
68
+ * Default: 2. Set to 0 to disable retries.
69
+ */
70
+ maxRetries?: number;
71
+ /**
72
+ * Base backoff delay in milliseconds for exponential retry backoff.
73
+ * Default: 1000ms. Each retry waits backoffMs * 2^attempt ms.
74
+ */
75
+ backoffMs?: number;
76
+ /**
77
+ * Optional external AbortSignal. When the signal is aborted the current
78
+ * attempt is cancelled immediately and no further retries are made.
79
+ */
80
+ signal?: AbortSignal;
81
+ }
82
+
83
+ export interface CompletionOptions extends RetryOptions {
61
84
  model: string;
62
85
  messages: Message[];
63
86
  tools?: Tool[];
@@ -113,7 +136,7 @@ export interface CompletionResponse {
113
136
  usd_cost?: number;
114
137
  }
115
138
 
116
- export interface EmbeddingOptions {
139
+ export interface EmbeddingOptions extends RetryOptions {
117
140
  input: string;
118
141
  model?: string;
119
142
  }
@@ -132,7 +155,7 @@ export interface EmbeddingResponse {
132
155
  usd_cost?: number;
133
156
  }
134
157
 
135
- export interface AudioTranscriptionOptions {
158
+ export interface AudioTranscriptionOptions extends RetryOptions {
136
159
  file: Blob | File | any; // Support for Node.js ReadStream or web File/Blob
137
160
  model?: string;
138
161
  language?: string;
@@ -162,7 +185,7 @@ export interface AudioTranscriptionResponse {
162
185
  usd_cost?: number;
163
186
  }
164
187
 
165
- export interface AudioGenerationOptions {
188
+ export interface AudioGenerationOptions extends RetryOptions {
166
189
  model: string;
167
190
  input: string;
168
191
  voice: string; // e.g. "alloy", "echo", "fable", "onyx", "nova", "shimmer" for OpenAI; "Kore", "Puck" etc. for Gemini
@@ -176,7 +199,7 @@ export interface AudioGenerationResponse {
176
199
  usd_cost?: number;
177
200
  }
178
201
 
179
- export interface ImageGenerationOptions {
202
+ export interface ImageGenerationOptions extends RetryOptions {
180
203
  model: string;
181
204
  prompt: string;
182
205
  n?: number;
@@ -197,7 +220,7 @@ export interface ImageGenerationResponse {
197
220
  usd_cost?: number;
198
221
  }
199
222
 
200
- export interface VideoGenerationOptions {
223
+ export interface VideoGenerationOptions extends RetryOptions {
201
224
  model: string;
202
225
  prompt: string;
203
226
  duration?: number; // seconds
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Shared retry/timeout helper for all AI clients.
3
+ *
4
+ * Executes `fn` with exponential backoff for retriable errors:
5
+ * - Rate limits (429)
6
+ * - Timeouts (AbortError, ETIMEDOUT, ECONNRESET)
7
+ * - Server errors (5xx)
8
+ *
9
+ * @param fn Function to execute. Receives a combined AbortSignal
10
+ * that fires on per-attempt timeout OR external signal abort.
11
+ * @param opts Any object with optional RetryOptions fields (timeout, maxRetries,
12
+ * backoffMs, signal). Extra fields are ignored — so you can pass the
13
+ * full options object from any AI method directly.
14
+ * - timeout: Per-attempt timeout in ms. No timeout if omitted.
15
+ * - maxRetries: Max retry attempts after first failure. Default: 2.
16
+ * - backoffMs: Base backoff delay in ms. Default: 1000.
17
+ * - signal: Optional external AbortSignal. When aborted, the current
18
+ * attempt is cancelled and no further retries are made.
19
+ */
20
+ import type { RetryOptions } from "./types";
21
+
22
+ export async function withRetry<T>(
23
+ fn: (signal?: AbortSignal) => Promise<T>,
24
+ opts: RetryOptions = {}
25
+ ): Promise<T> {
26
+ const maxRetries = opts.maxRetries ?? 2;
27
+ const backoffMs = opts.backoffMs ?? 1000;
28
+ const timeout = opts.timeout;
29
+ const externalSignal = opts.signal;
30
+
31
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
32
+ // If the external signal is already aborted, bail out immediately.
33
+ if (externalSignal?.aborted) {
34
+ throw externalSignal.reason ?? new DOMException("Aborted", "AbortError");
35
+ }
36
+
37
+ let timer: ReturnType<typeof setTimeout> | undefined;
38
+ // Combine per-attempt timeout with the external signal into one controller.
39
+ const controller = timeout || externalSignal ? new AbortController() : undefined;
40
+
41
+ if (controller) {
42
+ if (timeout) {
43
+ timer = setTimeout(() => controller.abort(new DOMException("Request timed out", "TimeoutError")), timeout);
44
+ }
45
+ // Forward external signal abort into our combined controller.
46
+ if (externalSignal) {
47
+ const onExternalAbort = () => controller.abort(externalSignal.reason ?? new DOMException("Aborted", "AbortError"));
48
+ if (externalSignal.aborted) {
49
+ controller.abort(externalSignal.reason ?? new DOMException("Aborted", "AbortError"));
50
+ } else {
51
+ externalSignal.addEventListener("abort", onExternalAbort, { once: true });
52
+ // Clean up the listener after the attempt resolves/rejects.
53
+ controller.signal.addEventListener("abort", () =>
54
+ externalSignal.removeEventListener("abort", onExternalAbort), { once: true }
55
+ );
56
+ }
57
+ }
58
+ }
59
+
60
+ try {
61
+ const result = await fn(controller?.signal);
62
+ return result;
63
+ } catch (err: unknown) {
64
+ clearTimeout(timer);
65
+ // If the external signal was aborted, don't retry — propagate immediately.
66
+ if (externalSignal?.aborted) {
67
+ throw err;
68
+ }
69
+ const errStr = String(err);
70
+ const isRetriable =
71
+ errStr.includes('429') ||
72
+ errStr.includes('timeout') ||
73
+ errStr.includes('TimeoutError') ||
74
+ errStr.includes('ECONNRESET') ||
75
+ errStr.includes('ETIMEDOUT') ||
76
+ errStr.includes('AbortError') ||
77
+ /5\d\d/.test(errStr);
78
+ if (isRetriable && attempt < maxRetries) {
79
+ const delay = backoffMs * Math.pow(2, attempt);
80
+ await new Promise((resolve) => setTimeout(resolve, delay));
81
+ continue;
82
+ }
83
+ throw err;
84
+ } finally {
85
+ clearTimeout(timer);
86
+ }
87
+ }
88
+ throw new Error('withRetry: exhausted retries');
89
+ }