@tyvm/knowhow 0.0.69 → 0.0.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. package/docs/shell-commands.md +174 -0
  2. package/package.json +1 -1
  3. package/src/agents/base/base.ts +1 -3
  4. package/src/agents/developer/developer.ts +21 -13
  5. package/src/agents/tools/agentCall.ts +4 -2
  6. package/src/agents/tools/fileSearch.ts +5 -1
  7. package/src/agents/tools/startAgentTask.ts +131 -22
  8. package/src/chat/CliChatService.ts +57 -11
  9. package/src/chat/modules/AgentModule.ts +72 -12
  10. package/src/chat/modules/CustomCommandsModule.ts +79 -0
  11. package/src/chat/modules/InternalChatModule.ts +11 -1
  12. package/src/chat/modules/ShellCommandModule.ts +96 -0
  13. package/src/chat/modules/index.ts +1 -0
  14. package/src/chat/types.ts +14 -2
  15. package/src/chat.ts +16 -13
  16. package/src/cli.ts +16 -6
  17. package/src/clients/anthropic.ts +41 -90
  18. package/src/clients/gemini.ts +445 -87
  19. package/src/clients/index.ts +125 -0
  20. package/src/clients/knowhow.ts +81 -0
  21. package/src/clients/openai.ts +256 -145
  22. package/src/clients/pricing/anthropic.ts +90 -0
  23. package/src/clients/pricing/google.ts +65 -0
  24. package/src/clients/pricing/index.ts +4 -0
  25. package/src/clients/pricing/openai.ts +134 -0
  26. package/src/clients/pricing/xai.ts +62 -0
  27. package/src/clients/types.ts +170 -1
  28. package/src/clients/xai.ts +275 -46
  29. package/src/config.ts +61 -15
  30. package/src/embeddings.ts +9 -1
  31. package/src/microphone.ts +15 -16
  32. package/src/migrations.ts +151 -0
  33. package/src/plugins/AgentsMdPlugin.ts +118 -0
  34. package/src/plugins/PluginBase.ts +8 -0
  35. package/src/plugins/downloader/downloader.ts +5 -6
  36. package/src/plugins/embedding.ts +10 -8
  37. package/src/plugins/exec.ts +70 -0
  38. package/src/plugins/github.ts +120 -74
  39. package/src/plugins/language.ts +11 -13
  40. package/src/plugins/plugins.ts +25 -4
  41. package/src/plugins/tmux.ts +132 -0
  42. package/src/plugins/types.ts +1 -0
  43. package/src/plugins/vim.ts +14 -1
  44. package/src/services/AgentSyncFs.ts +417 -0
  45. package/src/services/{AgentSynchronization.ts → AgentSyncKnowhowWeb.ts} +2 -2
  46. package/src/services/EventService.ts +0 -1
  47. package/src/services/KnowhowClient.ts +106 -0
  48. package/src/services/index.ts +4 -2
  49. package/src/types.ts +57 -4
  50. package/src/worker.ts +11 -6
  51. package/tests/manual/modalities/README.md +157 -0
  52. package/tests/manual/modalities/google.modalities.test.ts +335 -0
  53. package/tests/manual/modalities/openai.modalities.test.ts +329 -0
  54. package/tests/manual/modalities/streaming.test.ts +260 -0
  55. package/tests/manual/modalities/xai.modalities.test.ts +307 -0
  56. package/tests/plugins/language/languagePlugin-content-triggers.test.ts +5 -5
  57. package/tests/plugins/language/languagePlugin-integration.test.ts +1 -1
  58. package/tests/plugins/language/languagePlugin.test.ts +17 -8
  59. package/ts_build/package.json +1 -1
  60. package/ts_build/src/agents/base/base.js +1 -1
  61. package/ts_build/src/agents/base/base.js.map +1 -1
  62. package/ts_build/src/agents/developer/developer.js +21 -12
  63. package/ts_build/src/agents/developer/developer.js.map +1 -1
  64. package/ts_build/src/agents/tools/agentCall.js +4 -2
  65. package/ts_build/src/agents/tools/agentCall.js.map +1 -1
  66. package/ts_build/src/agents/tools/executeScript/index.d.ts +1 -1
  67. package/ts_build/src/agents/tools/fileSearch.js +2 -1
  68. package/ts_build/src/agents/tools/fileSearch.js.map +1 -1
  69. package/ts_build/src/agents/tools/github/index.d.ts +1 -1
  70. package/ts_build/src/agents/tools/startAgentTask.d.ts +2 -1
  71. package/ts_build/src/agents/tools/startAgentTask.js +118 -17
  72. package/ts_build/src/agents/tools/startAgentTask.js.map +1 -1
  73. package/ts_build/src/chat/CliChatService.d.ts +4 -0
  74. package/ts_build/src/chat/CliChatService.js +39 -5
  75. package/ts_build/src/chat/CliChatService.js.map +1 -1
  76. package/ts_build/src/chat/modules/AgentModule.d.ts +4 -1
  77. package/ts_build/src/chat/modules/AgentModule.js +49 -11
  78. package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
  79. package/ts_build/src/chat/modules/CustomCommandsModule.d.ts +9 -0
  80. package/ts_build/src/chat/modules/CustomCommandsModule.js +58 -0
  81. package/ts_build/src/chat/modules/CustomCommandsModule.js.map +1 -0
  82. package/ts_build/src/chat/modules/InternalChatModule.d.ts +2 -0
  83. package/ts_build/src/chat/modules/InternalChatModule.js +10 -0
  84. package/ts_build/src/chat/modules/InternalChatModule.js.map +1 -1
  85. package/ts_build/src/chat/modules/ShellCommandModule.d.ts +8 -0
  86. package/ts_build/src/chat/modules/ShellCommandModule.js +83 -0
  87. package/ts_build/src/chat/modules/ShellCommandModule.js.map +1 -0
  88. package/ts_build/src/chat/modules/index.d.ts +1 -0
  89. package/ts_build/src/chat/modules/index.js +3 -1
  90. package/ts_build/src/chat/modules/index.js.map +1 -1
  91. package/ts_build/src/chat/types.d.ts +11 -1
  92. package/ts_build/src/chat.js +16 -13
  93. package/ts_build/src/chat.js.map +1 -1
  94. package/ts_build/src/cli.js +10 -3
  95. package/ts_build/src/cli.js.map +1 -1
  96. package/ts_build/src/clients/anthropic.d.ts +5 -1
  97. package/ts_build/src/clients/anthropic.js +18 -91
  98. package/ts_build/src/clients/anthropic.js.map +1 -1
  99. package/ts_build/src/clients/gemini.d.ts +80 -2
  100. package/ts_build/src/clients/gemini.js +336 -74
  101. package/ts_build/src/clients/gemini.js.map +1 -1
  102. package/ts_build/src/clients/index.d.ts +9 -1
  103. package/ts_build/src/clients/index.js +65 -0
  104. package/ts_build/src/clients/index.js.map +1 -1
  105. package/ts_build/src/clients/knowhow.d.ts +9 -1
  106. package/ts_build/src/clients/knowhow.js +43 -0
  107. package/ts_build/src/clients/knowhow.js.map +1 -1
  108. package/ts_build/src/clients/openai.d.ts +9 -1
  109. package/ts_build/src/clients/openai.js +201 -133
  110. package/ts_build/src/clients/openai.js.map +1 -1
  111. package/ts_build/src/clients/pricing/anthropic.d.ts +17 -0
  112. package/ts_build/src/clients/pricing/anthropic.js +93 -0
  113. package/ts_build/src/clients/pricing/anthropic.js.map +1 -0
  114. package/ts_build/src/clients/pricing/google.d.ts +73 -0
  115. package/ts_build/src/clients/pricing/google.js +68 -0
  116. package/ts_build/src/clients/pricing/google.js.map +1 -0
  117. package/ts_build/src/clients/pricing/index.d.ts +4 -0
  118. package/ts_build/src/clients/pricing/index.js +14 -0
  119. package/ts_build/src/clients/pricing/index.js.map +1 -0
  120. package/ts_build/src/clients/pricing/openai.d.ts +7 -0
  121. package/ts_build/src/clients/pricing/openai.js +137 -0
  122. package/ts_build/src/clients/pricing/openai.js.map +1 -0
  123. package/ts_build/src/clients/pricing/xai.d.ts +26 -0
  124. package/ts_build/src/clients/pricing/xai.js +59 -0
  125. package/ts_build/src/clients/pricing/xai.js.map +1 -0
  126. package/ts_build/src/clients/types.d.ts +135 -0
  127. package/ts_build/src/clients/xai.d.ts +9 -1
  128. package/ts_build/src/clients/xai.js +178 -46
  129. package/ts_build/src/clients/xai.js.map +1 -1
  130. package/ts_build/src/config.d.ts +1 -0
  131. package/ts_build/src/config.js +45 -16
  132. package/ts_build/src/config.js.map +1 -1
  133. package/ts_build/src/embeddings.js +8 -1
  134. package/ts_build/src/embeddings.js.map +1 -1
  135. package/ts_build/src/microphone.js +7 -9
  136. package/ts_build/src/microphone.js.map +1 -1
  137. package/ts_build/src/migrations.d.ts +17 -0
  138. package/ts_build/src/migrations.js +86 -0
  139. package/ts_build/src/migrations.js.map +1 -0
  140. package/ts_build/src/plugins/AgentsMdPlugin.d.ts +13 -0
  141. package/ts_build/src/plugins/AgentsMdPlugin.js +118 -0
  142. package/ts_build/src/plugins/AgentsMdPlugin.js.map +1 -0
  143. package/ts_build/src/plugins/PluginBase.d.ts +1 -0
  144. package/ts_build/src/plugins/PluginBase.js +3 -0
  145. package/ts_build/src/plugins/PluginBase.js.map +1 -1
  146. package/ts_build/src/plugins/downloader/downloader.js +5 -5
  147. package/ts_build/src/plugins/downloader/downloader.js.map +1 -1
  148. package/ts_build/src/plugins/embedding.js +9 -8
  149. package/ts_build/src/plugins/embedding.js.map +1 -1
  150. package/ts_build/src/plugins/exec.d.ts +10 -0
  151. package/ts_build/src/plugins/exec.js +56 -0
  152. package/ts_build/src/plugins/exec.js.map +1 -0
  153. package/ts_build/src/plugins/github.js +93 -51
  154. package/ts_build/src/plugins/github.js.map +1 -1
  155. package/ts_build/src/plugins/language.js +14 -11
  156. package/ts_build/src/plugins/language.js.map +1 -1
  157. package/ts_build/src/plugins/plugins.d.ts +1 -0
  158. package/ts_build/src/plugins/plugins.js +19 -1
  159. package/ts_build/src/plugins/plugins.js.map +1 -1
  160. package/ts_build/src/plugins/tmux.d.ts +14 -0
  161. package/ts_build/src/plugins/tmux.js +108 -0
  162. package/ts_build/src/plugins/tmux.js.map +1 -0
  163. package/ts_build/src/plugins/types.d.ts +1 -0
  164. package/ts_build/src/plugins/vim.js +11 -1
  165. package/ts_build/src/plugins/vim.js.map +1 -1
  166. package/ts_build/src/services/AgentSyncFs.d.ts +34 -0
  167. package/ts_build/src/services/AgentSyncFs.js +325 -0
  168. package/ts_build/src/services/AgentSyncFs.js.map +1 -0
  169. package/ts_build/src/services/AgentSyncKnowhowWeb.d.ts +29 -0
  170. package/ts_build/src/services/AgentSyncKnowhowWeb.js +178 -0
  171. package/ts_build/src/services/AgentSyncKnowhowWeb.js.map +1 -0
  172. package/ts_build/src/services/AgentSynchronization.d.ts +1 -1
  173. package/ts_build/src/services/AgentSynchronization.js +3 -3
  174. package/ts_build/src/services/AgentSynchronization.js.map +1 -1
  175. package/ts_build/src/services/EventService.js.map +1 -1
  176. package/ts_build/src/services/KnowhowClient.d.ts +9 -1
  177. package/ts_build/src/services/KnowhowClient.js +58 -0
  178. package/ts_build/src/services/KnowhowClient.js.map +1 -1
  179. package/ts_build/src/services/index.d.ts +2 -1
  180. package/ts_build/src/services/index.js +2 -1
  181. package/ts_build/src/services/index.js.map +1 -1
  182. package/ts_build/src/types.d.ts +26 -1
  183. package/ts_build/src/types.js +45 -4
  184. package/ts_build/src/types.js.map +1 -1
  185. package/ts_build/src/utils/PersistentInputManager.d.ts +28 -0
  186. package/ts_build/src/utils/PersistentInputManager.js +293 -0
  187. package/ts_build/src/utils/PersistentInputManager.js.map +1 -0
  188. package/ts_build/src/worker.js +2 -2
  189. package/ts_build/src/worker.js.map +1 -1
  190. package/ts_build/tests/manual/modalities/google.modalities.test.d.ts +1 -0
  191. package/ts_build/tests/manual/modalities/google.modalities.test.js +252 -0
  192. package/ts_build/tests/manual/modalities/google.modalities.test.js.map +1 -0
  193. package/ts_build/tests/manual/modalities/openai.modalities.test.d.ts +1 -0
  194. package/ts_build/tests/manual/modalities/openai.modalities.test.js +252 -0
  195. package/ts_build/tests/manual/modalities/openai.modalities.test.js.map +1 -0
  196. package/ts_build/tests/manual/modalities/streaming.test.d.ts +1 -0
  197. package/ts_build/tests/manual/modalities/streaming.test.js +206 -0
  198. package/ts_build/tests/manual/modalities/streaming.test.js.map +1 -0
  199. package/ts_build/tests/manual/modalities/xai.modalities.test.d.ts +1 -0
  200. package/ts_build/tests/manual/modalities/xai.modalities.test.js +226 -0
  201. package/ts_build/tests/manual/modalities/xai.modalities.test.js.map +1 -0
  202. package/ts_build/tests/manual/persistent-input-test.d.ts +1 -0
  203. package/ts_build/tests/manual/persistent-input-test.js +35 -0
  204. package/ts_build/tests/manual/persistent-input-test.js.map +1 -0
  205. package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js +5 -5
  206. package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js.map +1 -1
  207. package/ts_build/tests/plugins/language/languagePlugin-integration.test.js +1 -1
  208. package/ts_build/tests/plugins/language/languagePlugin-integration.test.js.map +1 -1
  209. package/ts_build/tests/plugins/language/languagePlugin.test.js +17 -7
  210. package/ts_build/tests/plugins/language/languagePlugin.test.js.map +1 -1
@@ -4,6 +4,20 @@ import {
4
4
  EmbeddingOptions,
5
5
  EmbeddingResponse,
6
6
  GenericClient,
7
+ AudioTranscriptionOptions,
8
+ AudioTranscriptionResponse,
9
+ AudioGenerationOptions,
10
+ AudioGenerationResponse,
11
+ ImageGenerationOptions,
12
+ ImageGenerationResponse,
13
+ VideoGenerationOptions,
14
+ VideoGenerationResponse,
15
+ VideoStatusOptions,
16
+ VideoStatusResponse,
17
+ FileUploadOptions,
18
+ FileUploadResponse,
19
+ FileDownloadOptions,
20
+ FileDownloadResponse,
7
21
  } from "./types";
8
22
  import { GenericOpenAiClient } from "./openai";
9
23
  import { GenericAnthropicClient } from "./anthropic";
@@ -314,6 +328,117 @@ export class AIClient {
314
328
  return client.createEmbedding({ ...options, model });
315
329
  }
316
330
 
331
+ async createAudioTranscription(
332
+ provider: string,
333
+ options: AudioTranscriptionOptions
334
+ ): Promise<AudioTranscriptionResponse> {
335
+ const { client } = this.getClient(provider, options.model);
336
+ if (!client || !client.createAudioTranscription) {
337
+ throw new Error(
338
+ `Provider ${provider} does not support audio transcription.`
339
+ );
340
+ }
341
+ return client.createAudioTranscription(options);
342
+ }
343
+
344
+ async createAudioGeneration(
345
+ provider: string,
346
+ options: AudioGenerationOptions
347
+ ): Promise<AudioGenerationResponse> {
348
+ const { client, model } = this.getClient(provider, options.model);
349
+ if (!client || !client.createAudioGeneration) {
350
+ throw new Error(
351
+ `Provider ${provider} does not support audio generation.`
352
+ );
353
+ }
354
+ if (!model) {
355
+ throw new Error(
356
+ `Model ${options.model} not registered for provider ${provider}.`
357
+ );
358
+ }
359
+ return client.createAudioGeneration({ ...options, model });
360
+ }
361
+
362
+ async createImageGeneration(
363
+ provider: string,
364
+ options: ImageGenerationOptions
365
+ ): Promise<ImageGenerationResponse> {
366
+ const { client, model } = this.getClient(provider, options.model);
367
+ if (!client || !client.createImageGeneration) {
368
+ throw new Error(
369
+ `Provider ${provider} does not support image generation.`
370
+ );
371
+ }
372
+ if (!model) {
373
+ throw new Error(
374
+ `Model ${options.model} not registered for provider ${provider}.`
375
+ );
376
+ }
377
+ return client.createImageGeneration({ ...options, model });
378
+ }
379
+
380
+ async createVideoGeneration(
381
+ provider: string,
382
+ options: VideoGenerationOptions
383
+ ): Promise<VideoGenerationResponse> {
384
+ const { client, model } = this.getClient(provider, options.model);
385
+ if (!client || !client.createVideoGeneration) {
386
+ throw new Error(
387
+ `Provider ${provider} does not support video generation.`
388
+ );
389
+ }
390
+ if (!model) {
391
+ throw new Error(
392
+ `Model ${options.model} not registered for provider ${provider}.`
393
+ );
394
+ }
395
+ return client.createVideoGeneration({ ...options, model });
396
+ }
397
+
398
+ async getVideoStatus(
399
+ provider: string,
400
+ options: VideoStatusOptions
401
+ ): Promise<VideoStatusResponse> {
402
+ const { client } = this.getClient(provider, options.model);
403
+ if (!client || !client.getVideoStatus) {
404
+ throw new Error(`Provider ${provider} does not support getVideoStatus.`);
405
+ }
406
+ return client.getVideoStatus(options);
407
+ }
408
+
409
+ async downloadVideo(
410
+ provider: string,
411
+ options: FileDownloadOptions
412
+ ): Promise<FileDownloadResponse> {
413
+ const { client } = this.getClient(provider);
414
+ if (!client || !client.downloadVideo) {
415
+ throw new Error(`Provider ${provider} does not support downloadVideo.`);
416
+ }
417
+ return client.downloadVideo(options);
418
+ }
419
+
420
+ async uploadFile(
421
+ provider: string,
422
+ options: FileUploadOptions
423
+ ): Promise<FileUploadResponse> {
424
+ const { client } = this.getClient(provider);
425
+ if (!client || !client.uploadFile) {
426
+ throw new Error(`Provider ${provider} does not support uploadFile.`);
427
+ }
428
+ return client.uploadFile(options);
429
+ }
430
+
431
+ async downloadFile(
432
+ provider: string,
433
+ options: FileDownloadOptions
434
+ ): Promise<FileDownloadResponse> {
435
+ const { client } = this.getClient(provider);
436
+ if (!client || !client.downloadFile) {
437
+ throw new Error(`Provider ${provider} does not support downloadFile.`);
438
+ }
439
+ return client.downloadFile(options);
440
+ }
441
+
317
442
  getRegisteredModels(provider: string): string[] {
318
443
  return this.clientModels[provider] || [];
319
444
  }
@@ -4,6 +4,20 @@ import {
4
4
  CompletionResponse,
5
5
  EmbeddingOptions,
6
6
  EmbeddingResponse,
7
+ AudioTranscriptionOptions,
8
+ AudioTranscriptionResponse,
9
+ AudioGenerationOptions,
10
+ AudioGenerationResponse,
11
+ ImageGenerationOptions,
12
+ ImageGenerationResponse,
13
+ VideoGenerationOptions,
14
+ VideoGenerationResponse,
15
+ VideoStatusOptions,
16
+ VideoStatusResponse,
17
+ FileUploadOptions,
18
+ FileUploadResponse,
19
+ FileDownloadOptions,
20
+ FileDownloadResponse,
7
21
  } from "./types";
8
22
  import { KnowhowSimpleClient, KNOWHOW_API_URL } from "../services/KnowhowClient";
9
23
 
@@ -35,4 +49,71 @@ export class KnowhowGenericClient implements GenericClient {
35
49
  const response = await this.client.getModels();
36
50
  return response.data;
37
51
  }
52
+
53
+ async createAudioTranscription(
54
+ options: AudioTranscriptionOptions
55
+ ): Promise<AudioTranscriptionResponse> {
56
+ const response = await this.client.createAudioTranscription(options);
57
+ return response.data;
58
+ }
59
+
60
+ async createAudioGeneration(
61
+ options: AudioGenerationOptions
62
+ ): Promise<AudioGenerationResponse> {
63
+ const response = await this.client.createAudioGeneration(options);
64
+ // The backend returns audio as base64 or buffer - normalize to Buffer
65
+ const data = response.data as any;
66
+ return {
67
+ ...data,
68
+ audio: data.audio ? Buffer.from(data.audio, "base64") : data.audio,
69
+ };
70
+ }
71
+
72
+ async createImageGeneration(
73
+ options: ImageGenerationOptions
74
+ ): Promise<ImageGenerationResponse> {
75
+ const response = await this.client.createImageGeneration(options);
76
+ return response.data;
77
+ }
78
+
79
+ async createVideoGeneration(
80
+ options: VideoGenerationOptions
81
+ ): Promise<VideoGenerationResponse> {
82
+ const response = await this.client.createVideoGeneration(options);
83
+ return response.data;
84
+ }
85
+
86
+ async getVideoStatus(
87
+ options: VideoStatusOptions
88
+ ): Promise<VideoStatusResponse> {
89
+ const response = await this.client.getVideoStatus(options);
90
+ return response.data;
91
+ }
92
+
93
+ async downloadVideo(
94
+ options: FileDownloadOptions
95
+ ): Promise<FileDownloadResponse> {
96
+ const response = await this.client.downloadVideo(options);
97
+ return {
98
+ data: Buffer.from(response.data as ArrayBuffer),
99
+ mimeType: (response.headers?.["content-type"] as string) || "video/mp4",
100
+ };
101
+ }
102
+
103
+ async uploadFile(options: FileUploadOptions): Promise<FileUploadResponse> {
104
+ const response = await this.client.uploadFile(options);
105
+ return response.data;
106
+ }
107
+
108
+ async downloadFile(
109
+ options: FileDownloadOptions
110
+ ): Promise<FileDownloadResponse> {
111
+ const response = await this.client.downloadFile(options);
112
+ return {
113
+ data: Buffer.from(response.data as ArrayBuffer),
114
+ mimeType:
115
+ (response.headers?.["content-type"] as string) ||
116
+ "application/octet-stream",
117
+ };
118
+ }
38
119
  }
@@ -1,11 +1,26 @@
1
1
  import OpenAI from "openai";
2
2
  import { getConfigSync } from "../config";
3
+ import { OpenAiTextPricing } from "./pricing";
3
4
  import {
4
5
  GenericClient,
5
6
  CompletionOptions,
6
7
  CompletionResponse,
7
8
  EmbeddingOptions,
8
9
  EmbeddingResponse,
10
+ AudioTranscriptionOptions,
11
+ AudioTranscriptionResponse,
12
+ AudioGenerationOptions,
13
+ AudioGenerationResponse,
14
+ ImageGenerationOptions,
15
+ ImageGenerationResponse,
16
+ VideoGenerationOptions,
17
+ VideoGenerationResponse,
18
+ VideoStatusOptions,
19
+ VideoStatusResponse,
20
+ FileUploadOptions,
21
+ FileUploadResponse,
22
+ FileDownloadOptions,
23
+ FileDownloadResponse,
9
24
  } from "./types";
10
25
  import {
11
26
  ChatCompletionMessageParam,
@@ -119,150 +134,7 @@ export class GenericOpenAiClient implements GenericClient {
119
134
  }
120
135
 
121
136
  pricesPerMillion() {
122
- return {
123
- [Models.openai.GPT_4o]: {
124
- input: 2.5,
125
- cached_input: 1.25,
126
- output: 10.0,
127
- },
128
- [Models.openai.GPT_4o_Mini]: {
129
- input: 0.15,
130
- cached_input: 0.075,
131
- output: 0.6,
132
- },
133
- [Models.openai.o1]: {
134
- input: 15.0,
135
- cached_input: 7.5,
136
- output: 60.0,
137
- },
138
- [Models.openai.o1_Mini]: {
139
- input: 1.1,
140
- cached_input: 0.55,
141
- output: 4.4,
142
- },
143
- [Models.openai.o3_Mini]: {
144
- input: 1.1,
145
- cached_input: 0.55,
146
- output: 4.4,
147
- },
148
- [Models.openai.GPT_41]: {
149
- input: 2.0,
150
- cached_input: 0.5,
151
- output: 8.0,
152
- },
153
- [Models.openai.GPT_41_Mini]: {
154
- input: 0.4,
155
- cached_input: 0.1,
156
- output: 1.6,
157
- },
158
- [Models.openai.GPT_41_Nano]: {
159
- input: 0.1,
160
- cached_input: 0.025,
161
- output: 0.4,
162
- },
163
- [Models.openai.GPT_45]: {
164
- input: 75.0,
165
- cached_input: 37.5,
166
- output: 150.0,
167
- },
168
- [Models.openai.GPT_4o_Audio]: {
169
- input: 2.5,
170
- cached_input: 0,
171
- output: 10.0,
172
- },
173
- [Models.openai.GPT_4o_Realtime]: {
174
- input: 5.0,
175
- cached_input: 2.5,
176
- output: 20.0,
177
- },
178
- [Models.openai.GPT_4o_Mini_Audio]: {
179
- input: 0.15,
180
- cached_input: 0,
181
- output: 0.6,
182
- },
183
- [Models.openai.GPT_4o_Mini_Realtime]: {
184
- input: 0.6,
185
- cached_input: 0.3,
186
- output: 2.4,
187
- },
188
- [Models.openai.o1_Pro]: {
189
- input: 150.0,
190
- cached_input: 0,
191
- output: 600.0,
192
- },
193
- [Models.openai.o3]: {
194
- input: 2.0,
195
- cached_input: 0.5,
196
- output: 8.0,
197
- },
198
- [Models.openai.o4_Mini]: {
199
- input: 1.1,
200
- cached_input: 0.275,
201
- output: 4.4,
202
- },
203
- [Models.openai.GPT_4o_Mini_Search]: {
204
- input: 0.15,
205
- cached_input: 0,
206
- output: 0.6,
207
- },
208
- [Models.openai.GPT_4o_Search]: {
209
- input: 2.5,
210
- cached_input: 0,
211
- output: 10.0,
212
- },
213
- [Models.openai.GPT_5_2]: {
214
- input: 1.75,
215
- cached_input: 0.175,
216
- output: 14,
217
- },
218
- [Models.openai.GPT_5_1]: {
219
- input: 1.25,
220
- cached_input: 0.125,
221
- output: 10,
222
- },
223
- [Models.openai.GPT_5]: {
224
- input: 1.25,
225
- cached_input: 0.125,
226
- output: 10,
227
- },
228
- [Models.openai.GPT_5_Mini]: {
229
- input: 0.25,
230
- cached_input: 0.025,
231
- output: 2,
232
- },
233
- [Models.openai.GPT_5_Nano]: {
234
- input: 0.05,
235
- cached_input: 0.005,
236
- output: 0.4,
237
- },
238
- /*
239
- *[Models.openai.Computer_Use]: {
240
- * input: 3.0,
241
- * cached_input: 0,
242
- * output: 12.0,
243
- *},
244
- *[Models.openai.Codex_Mini]: {
245
- * input: 1.5,
246
- * cached_input: 0.375,
247
- * output: 6.0,
248
- *},
249
- */
250
- [EmbeddingModels.openai.EmbeddingAda2]: {
251
- input: 0.1,
252
- cached_input: 0,
253
- output: 0,
254
- },
255
- [EmbeddingModels.openai.EmbeddingLarge3]: {
256
- input: 0.13,
257
- cached_input: 0,
258
- output: 0,
259
- },
260
- [EmbeddingModels.openai.EmbeddingSmall3]: {
261
- input: 0.02,
262
- cached_input: 0,
263
- output: 0,
264
- },
265
- };
137
+ return OpenAiTextPricing;
266
138
  }
267
139
 
268
140
  calculateCost(
@@ -271,7 +143,7 @@ export class GenericOpenAiClient implements GenericClient {
271
143
  | OpenAI.ChatCompletion["usage"]
272
144
  | OpenAI.CreateEmbeddingResponse["usage"]
273
145
  ): number | undefined {
274
- const pricing = this.pricesPerMillion()[model];
146
+ const pricing = this.pricesPerMillion()[model] || OpenAiTextPricing[model];
275
147
 
276
148
  if (!pricing) {
277
149
  return undefined;
@@ -318,4 +190,243 @@ export class GenericOpenAiClient implements GenericClient {
318
190
  usd_cost: this.calculateCost(options.model, openAiEmbedding.usage),
319
191
  };
320
192
  }
193
+
194
+ async createAudioTranscription(
195
+ options: AudioTranscriptionOptions
196
+ ): Promise<AudioTranscriptionResponse> {
197
+ // Convert Buffer to File if needed
198
+ let file = options.file;
199
+ if (Buffer.isBuffer(options.file)) {
200
+ const fileName = options.fileName || "audio.mp3";
201
+ file = await OpenAI.toFile(options.file, fileName);
202
+ }
203
+
204
+ const response = await this.client.audio.transcriptions.create({
205
+ file: file,
206
+ model: options.model || "whisper-1",
207
+ language: options.language,
208
+ prompt: options.prompt,
209
+ response_format: options.response_format || "verbose_json",
210
+ temperature: options.temperature,
211
+ });
212
+
213
+ // Calculate cost: $0.006 per minute for Whisper
214
+ const duration = typeof response === "object" && "duration" in response && typeof response.duration === "number"
215
+ ? response.duration
216
+ : undefined;
217
+ const usdCost = duration ? (duration / 60) * 0.006 : undefined;
218
+
219
+ if (typeof response === "string") {
220
+ return {
221
+ text: response,
222
+ usd_cost: usdCost,
223
+ };
224
+ }
225
+
226
+ // Cast to any to access verbose response properties
227
+ const verboseResponse = response as any;
228
+
229
+ return {
230
+ text: response.text,
231
+ language: verboseResponse.language,
232
+ duration: verboseResponse.duration,
233
+ segments: verboseResponse.segments,
234
+ usd_cost: usdCost,
235
+ };
236
+ }
237
+
238
+ async createAudioGeneration(
239
+ options: AudioGenerationOptions
240
+ ): Promise<AudioGenerationResponse> {
241
+ const response = await this.client.audio.speech.create({
242
+ model: options.model,
243
+ input: options.input,
244
+ voice: options.voice as any,
245
+ response_format: options.response_format || "mp3",
246
+ speed: options.speed,
247
+ });
248
+
249
+ const buffer = Buffer.from(await response.arrayBuffer());
250
+
251
+ // Calculate cost based on model and character count
252
+ // TTS: $15.00 / 1M characters, TTS HD: $30.00 / 1M characters
253
+ const isHD = options.model.includes("hd");
254
+ const pricePerMillion = isHD ? 30.0 : 15.0;
255
+ const usdCost = (options.input.length * pricePerMillion) / 1e6;
256
+
257
+ return {
258
+ audio: buffer,
259
+ format: options.response_format || "mp3",
260
+ usd_cost: usdCost,
261
+ };
262
+ }
263
+
264
+ async createImageGeneration(
265
+ options: ImageGenerationOptions
266
+ ): Promise<ImageGenerationResponse> {
267
+ const response = await this.client.images.generate({
268
+ model: options.model,
269
+ prompt: options.prompt,
270
+ n: options.n,
271
+ size: options.size,
272
+ quality: options.quality,
273
+ style: options.style,
274
+ response_format: options.response_format,
275
+ user: options.user,
276
+ });
277
+
278
+ // Cost calculation varies by model and settings
279
+ // DALL-E 3: $0.040-$0.120 per image depending on quality/size
280
+ // DALL-E 2: $0.016-$0.020 per image
281
+ const estimatedCostPerImage = options.quality === "hd" ? 0.08 : 0.04;
282
+ const usdCost = (options.n || 1) * estimatedCostPerImage;
283
+
284
+ return { ...response, usd_cost: usdCost };
285
+ }
286
+
287
+ async createVideoGeneration(
288
+ options: VideoGenerationOptions
289
+ ): Promise<VideoGenerationResponse> {
290
+ const apiKey = this.apiKey || process.env.OPENAI_KEY;
291
+ if (!apiKey) {
292
+ throw new Error("OpenAI API key is required for video generation");
293
+ }
294
+
295
+ const model = options.model || "sora-2";
296
+
297
+ // Step 1: Create the video job
298
+ const createPayload: any = {
299
+ model,
300
+ prompt: options.prompt,
301
+ };
302
+
303
+ if (options.duration) {
304
+ // OpenAI API requires seconds as a string: '4', '8', or '12'
305
+ // Round to nearest valid value
306
+ const validSeconds = [4, 8, 12];
307
+ const duration = options.duration as number;
308
+ const nearest = validSeconds.reduce((prev, curr) =>
309
+ Math.abs(curr - duration) < Math.abs(prev - duration) ? curr : prev
310
+ );
311
+ createPayload.seconds = String(nearest);
312
+ }
313
+ if (options.resolution) {
314
+ createPayload.size = options.resolution;
315
+ }
316
+ if (options.n) {
317
+ createPayload.n = options.n;
318
+ }
319
+
320
+ const createResponse = await fetch("https://api.openai.com/v1/videos", {
321
+ method: "POST",
322
+ headers: {
323
+ "Content-Type": "application/json",
324
+ Authorization: `Bearer ${apiKey}`,
325
+ },
326
+ body: JSON.stringify(createPayload),
327
+ });
328
+
329
+ if (!createResponse.ok) {
330
+ const errorText = await createResponse.text();
331
+ throw new Error(
332
+ `OpenAI video generation failed: ${createResponse.status} ${errorText}`
333
+ );
334
+ }
335
+
336
+ const createData = await createResponse.json();
337
+ const videoId = createData.id;
338
+
339
+ if (!videoId) {
340
+ throw new Error("No video ID returned from OpenAI video generation");
341
+ }
342
+
343
+ // Return immediately with the jobId – do NOT poll here.
344
+ // Use getVideoStatus() to poll and downloadVideo() to fetch the result.
345
+ return {
346
+ created: createData.created_at || Math.floor(Date.now() / 1000),
347
+ data: [],
348
+ jobId: videoId,
349
+ usd_cost: undefined,
350
+ };
351
+ }
352
+
353
+ async getVideoStatus(options: VideoStatusOptions): Promise<VideoStatusResponse> {
354
+ const apiKey = this.apiKey || process.env.OPENAI_KEY;
355
+ if (!apiKey) throw new Error("OpenAI API key not set");
356
+ const response = await fetch(`https://api.openai.com/v1/videos/${options.jobId}`, {
357
+ method: "GET",
358
+ headers: { Authorization: `Bearer ${apiKey}` },
359
+ });
360
+ if (!response.ok) {
361
+ const errorText = await response.text();
362
+ throw new Error(`OpenAI getVideoStatus failed: ${response.status} ${errorText}`);
363
+ }
364
+ const data = await response.json();
365
+ let status: VideoStatusResponse["status"] = "in_progress";
366
+ if (data.status === "completed") status = "completed";
367
+ else if (data.status === "failed") status = "failed";
368
+ else if (data.status === "queued") status = "queued";
369
+ else if (data.status === "in_progress") status = "in_progress";
370
+ return {
371
+ jobId: options.jobId,
372
+ status,
373
+ data: data.result?.url ? [{ url: data.result.url }] : undefined,
374
+ error: data.error?.message,
375
+ };
376
+ }
377
+
378
+ async downloadVideo(options: FileDownloadOptions): Promise<FileDownloadResponse> {
379
+ const apiKey = this.apiKey || process.env.OPENAI_KEY;
380
+ if (!apiKey) throw new Error("OpenAI API key not set");
381
+ const fileId = options.fileId;
382
+ if (!fileId) throw new Error("downloadVideo requires fileId (the jobId)");
383
+ const response = await fetch(`https://api.openai.com/v1/videos/${fileId}/content`, {
384
+ method: "GET",
385
+ headers: { Authorization: `Bearer ${apiKey}` },
386
+ });
387
+ if (!response.ok) {
388
+ const errorText = await response.text();
389
+ throw new Error(`OpenAI downloadVideo failed: ${response.status} ${errorText}`);
390
+ }
391
+ const mimeType = response.headers.get("content-type") || "video/mp4";
392
+ return { data: Buffer.from(await response.arrayBuffer()), mimeType };
393
+ }
394
+
395
+ async uploadFile(options: FileUploadOptions): Promise<FileUploadResponse> {
396
+ const apiKey = this.apiKey || process.env.OPENAI_KEY;
397
+ if (!apiKey) throw new Error("OpenAI API key not set");
398
+ const formData = new FormData();
399
+ formData.append("purpose", "assistants");
400
+ const blob = new Blob([options.data], { type: options.mimeType || "application/octet-stream" });
401
+ formData.append("file", blob, options.fileName || "upload");
402
+ const response = await fetch("https://api.openai.com/v1/files", {
403
+ method: "POST",
404
+ headers: { Authorization: `Bearer ${apiKey}` },
405
+ body: formData,
406
+ });
407
+ if (!response.ok) {
408
+ const errorText = await response.text();
409
+ throw new Error(`OpenAI uploadFile failed: ${response.status} ${errorText}`);
410
+ }
411
+ const data = await response.json();
412
+ return { fileId: data.id, uri: data.uri };
413
+ }
414
+
415
+ async downloadFile(options: FileDownloadOptions): Promise<FileDownloadResponse> {
416
+ const apiKey = this.apiKey || process.env.OPENAI_KEY;
417
+ if (!apiKey) throw new Error("OpenAI API key not set");
418
+ const fileId = options.fileId;
419
+ if (!fileId) throw new Error("downloadFile requires fileId");
420
+ const response = await fetch(`https://api.openai.com/v1/files/${fileId}/content`, {
421
+ method: "GET",
422
+ headers: { Authorization: `Bearer ${apiKey}` },
423
+ });
424
+ if (!response.ok) {
425
+ const errorText = await response.text();
426
+ throw new Error(`OpenAI downloadFile failed: ${response.status} ${errorText}`);
427
+ }
428
+ const mimeType = response.headers.get("content-type") || undefined;
429
+ const data = Buffer.from(await response.arrayBuffer());
430
+ return { data, mimeType };
431
+ }
321
432
  }