@tyvm/knowhow 0.0.69 → 0.0.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/shell-commands.md +174 -0
- package/package.json +1 -1
- package/src/agents/base/base.ts +1 -3
- package/src/agents/developer/developer.ts +21 -13
- package/src/agents/tools/agentCall.ts +4 -2
- package/src/agents/tools/fileSearch.ts +5 -1
- package/src/agents/tools/startAgentTask.ts +131 -22
- package/src/chat/CliChatService.ts +57 -11
- package/src/chat/modules/AgentModule.ts +72 -12
- package/src/chat/modules/CustomCommandsModule.ts +79 -0
- package/src/chat/modules/InternalChatModule.ts +11 -1
- package/src/chat/modules/ShellCommandModule.ts +96 -0
- package/src/chat/modules/index.ts +1 -0
- package/src/chat/types.ts +14 -2
- package/src/chat.ts +16 -13
- package/src/cli.ts +16 -6
- package/src/clients/anthropic.ts +41 -90
- package/src/clients/gemini.ts +445 -87
- package/src/clients/index.ts +125 -0
- package/src/clients/knowhow.ts +81 -0
- package/src/clients/openai.ts +256 -145
- package/src/clients/pricing/anthropic.ts +90 -0
- package/src/clients/pricing/google.ts +65 -0
- package/src/clients/pricing/index.ts +4 -0
- package/src/clients/pricing/openai.ts +134 -0
- package/src/clients/pricing/xai.ts +62 -0
- package/src/clients/types.ts +170 -1
- package/src/clients/xai.ts +275 -46
- package/src/config.ts +61 -15
- package/src/embeddings.ts +9 -1
- package/src/microphone.ts +15 -16
- package/src/migrations.ts +151 -0
- package/src/plugins/AgentsMdPlugin.ts +118 -0
- package/src/plugins/PluginBase.ts +8 -0
- package/src/plugins/downloader/downloader.ts +5 -6
- package/src/plugins/embedding.ts +10 -8
- package/src/plugins/exec.ts +70 -0
- package/src/plugins/github.ts +120 -74
- package/src/plugins/language.ts +11 -13
- package/src/plugins/plugins.ts +25 -4
- package/src/plugins/tmux.ts +132 -0
- package/src/plugins/types.ts +1 -0
- package/src/plugins/vim.ts +14 -1
- package/src/services/AgentSyncFs.ts +417 -0
- package/src/services/{AgentSynchronization.ts → AgentSyncKnowhowWeb.ts} +2 -2
- package/src/services/EventService.ts +0 -1
- package/src/services/KnowhowClient.ts +106 -0
- package/src/services/index.ts +4 -2
- package/src/types.ts +57 -4
- package/src/worker.ts +11 -6
- package/tests/manual/modalities/README.md +157 -0
- package/tests/manual/modalities/google.modalities.test.ts +335 -0
- package/tests/manual/modalities/openai.modalities.test.ts +329 -0
- package/tests/manual/modalities/streaming.test.ts +260 -0
- package/tests/manual/modalities/xai.modalities.test.ts +307 -0
- package/tests/plugins/language/languagePlugin-content-triggers.test.ts +5 -5
- package/tests/plugins/language/languagePlugin-integration.test.ts +1 -1
- package/tests/plugins/language/languagePlugin.test.ts +17 -8
- package/ts_build/package.json +1 -1
- package/ts_build/src/agents/base/base.js +1 -1
- package/ts_build/src/agents/base/base.js.map +1 -1
- package/ts_build/src/agents/developer/developer.js +21 -12
- package/ts_build/src/agents/developer/developer.js.map +1 -1
- package/ts_build/src/agents/tools/agentCall.js +4 -2
- package/ts_build/src/agents/tools/agentCall.js.map +1 -1
- package/ts_build/src/agents/tools/executeScript/index.d.ts +1 -1
- package/ts_build/src/agents/tools/fileSearch.js +2 -1
- package/ts_build/src/agents/tools/fileSearch.js.map +1 -1
- package/ts_build/src/agents/tools/github/index.d.ts +1 -1
- package/ts_build/src/agents/tools/startAgentTask.d.ts +2 -1
- package/ts_build/src/agents/tools/startAgentTask.js +118 -17
- package/ts_build/src/agents/tools/startAgentTask.js.map +1 -1
- package/ts_build/src/chat/CliChatService.d.ts +4 -0
- package/ts_build/src/chat/CliChatService.js +39 -5
- package/ts_build/src/chat/CliChatService.js.map +1 -1
- package/ts_build/src/chat/modules/AgentModule.d.ts +4 -1
- package/ts_build/src/chat/modules/AgentModule.js +49 -11
- package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
- package/ts_build/src/chat/modules/CustomCommandsModule.d.ts +9 -0
- package/ts_build/src/chat/modules/CustomCommandsModule.js +58 -0
- package/ts_build/src/chat/modules/CustomCommandsModule.js.map +1 -0
- package/ts_build/src/chat/modules/InternalChatModule.d.ts +2 -0
- package/ts_build/src/chat/modules/InternalChatModule.js +10 -0
- package/ts_build/src/chat/modules/InternalChatModule.js.map +1 -1
- package/ts_build/src/chat/modules/ShellCommandModule.d.ts +8 -0
- package/ts_build/src/chat/modules/ShellCommandModule.js +83 -0
- package/ts_build/src/chat/modules/ShellCommandModule.js.map +1 -0
- package/ts_build/src/chat/modules/index.d.ts +1 -0
- package/ts_build/src/chat/modules/index.js +3 -1
- package/ts_build/src/chat/modules/index.js.map +1 -1
- package/ts_build/src/chat/types.d.ts +11 -1
- package/ts_build/src/chat.js +16 -13
- package/ts_build/src/chat.js.map +1 -1
- package/ts_build/src/cli.js +10 -3
- package/ts_build/src/cli.js.map +1 -1
- package/ts_build/src/clients/anthropic.d.ts +5 -1
- package/ts_build/src/clients/anthropic.js +18 -91
- package/ts_build/src/clients/anthropic.js.map +1 -1
- package/ts_build/src/clients/gemini.d.ts +80 -2
- package/ts_build/src/clients/gemini.js +336 -74
- package/ts_build/src/clients/gemini.js.map +1 -1
- package/ts_build/src/clients/index.d.ts +9 -1
- package/ts_build/src/clients/index.js +65 -0
- package/ts_build/src/clients/index.js.map +1 -1
- package/ts_build/src/clients/knowhow.d.ts +9 -1
- package/ts_build/src/clients/knowhow.js +43 -0
- package/ts_build/src/clients/knowhow.js.map +1 -1
- package/ts_build/src/clients/openai.d.ts +9 -1
- package/ts_build/src/clients/openai.js +201 -133
- package/ts_build/src/clients/openai.js.map +1 -1
- package/ts_build/src/clients/pricing/anthropic.d.ts +17 -0
- package/ts_build/src/clients/pricing/anthropic.js +93 -0
- package/ts_build/src/clients/pricing/anthropic.js.map +1 -0
- package/ts_build/src/clients/pricing/google.d.ts +73 -0
- package/ts_build/src/clients/pricing/google.js +68 -0
- package/ts_build/src/clients/pricing/google.js.map +1 -0
- package/ts_build/src/clients/pricing/index.d.ts +4 -0
- package/ts_build/src/clients/pricing/index.js +14 -0
- package/ts_build/src/clients/pricing/index.js.map +1 -0
- package/ts_build/src/clients/pricing/openai.d.ts +7 -0
- package/ts_build/src/clients/pricing/openai.js +137 -0
- package/ts_build/src/clients/pricing/openai.js.map +1 -0
- package/ts_build/src/clients/pricing/xai.d.ts +26 -0
- package/ts_build/src/clients/pricing/xai.js +59 -0
- package/ts_build/src/clients/pricing/xai.js.map +1 -0
- package/ts_build/src/clients/types.d.ts +135 -0
- package/ts_build/src/clients/xai.d.ts +9 -1
- package/ts_build/src/clients/xai.js +178 -46
- package/ts_build/src/clients/xai.js.map +1 -1
- package/ts_build/src/config.d.ts +1 -0
- package/ts_build/src/config.js +45 -16
- package/ts_build/src/config.js.map +1 -1
- package/ts_build/src/embeddings.js +8 -1
- package/ts_build/src/embeddings.js.map +1 -1
- package/ts_build/src/microphone.js +7 -9
- package/ts_build/src/microphone.js.map +1 -1
- package/ts_build/src/migrations.d.ts +17 -0
- package/ts_build/src/migrations.js +86 -0
- package/ts_build/src/migrations.js.map +1 -0
- package/ts_build/src/plugins/AgentsMdPlugin.d.ts +13 -0
- package/ts_build/src/plugins/AgentsMdPlugin.js +118 -0
- package/ts_build/src/plugins/AgentsMdPlugin.js.map +1 -0
- package/ts_build/src/plugins/PluginBase.d.ts +1 -0
- package/ts_build/src/plugins/PluginBase.js +3 -0
- package/ts_build/src/plugins/PluginBase.js.map +1 -1
- package/ts_build/src/plugins/downloader/downloader.js +5 -5
- package/ts_build/src/plugins/downloader/downloader.js.map +1 -1
- package/ts_build/src/plugins/embedding.js +9 -8
- package/ts_build/src/plugins/embedding.js.map +1 -1
- package/ts_build/src/plugins/exec.d.ts +10 -0
- package/ts_build/src/plugins/exec.js +56 -0
- package/ts_build/src/plugins/exec.js.map +1 -0
- package/ts_build/src/plugins/github.js +93 -51
- package/ts_build/src/plugins/github.js.map +1 -1
- package/ts_build/src/plugins/language.js +14 -11
- package/ts_build/src/plugins/language.js.map +1 -1
- package/ts_build/src/plugins/plugins.d.ts +1 -0
- package/ts_build/src/plugins/plugins.js +19 -1
- package/ts_build/src/plugins/plugins.js.map +1 -1
- package/ts_build/src/plugins/tmux.d.ts +14 -0
- package/ts_build/src/plugins/tmux.js +108 -0
- package/ts_build/src/plugins/tmux.js.map +1 -0
- package/ts_build/src/plugins/types.d.ts +1 -0
- package/ts_build/src/plugins/vim.js +11 -1
- package/ts_build/src/plugins/vim.js.map +1 -1
- package/ts_build/src/services/AgentSyncFs.d.ts +34 -0
- package/ts_build/src/services/AgentSyncFs.js +325 -0
- package/ts_build/src/services/AgentSyncFs.js.map +1 -0
- package/ts_build/src/services/AgentSyncKnowhowWeb.d.ts +29 -0
- package/ts_build/src/services/AgentSyncKnowhowWeb.js +178 -0
- package/ts_build/src/services/AgentSyncKnowhowWeb.js.map +1 -0
- package/ts_build/src/services/AgentSynchronization.d.ts +1 -1
- package/ts_build/src/services/AgentSynchronization.js +3 -3
- package/ts_build/src/services/AgentSynchronization.js.map +1 -1
- package/ts_build/src/services/EventService.js.map +1 -1
- package/ts_build/src/services/KnowhowClient.d.ts +9 -1
- package/ts_build/src/services/KnowhowClient.js +58 -0
- package/ts_build/src/services/KnowhowClient.js.map +1 -1
- package/ts_build/src/services/index.d.ts +2 -1
- package/ts_build/src/services/index.js +2 -1
- package/ts_build/src/services/index.js.map +1 -1
- package/ts_build/src/types.d.ts +26 -1
- package/ts_build/src/types.js +45 -4
- package/ts_build/src/types.js.map +1 -1
- package/ts_build/src/utils/PersistentInputManager.d.ts +28 -0
- package/ts_build/src/utils/PersistentInputManager.js +293 -0
- package/ts_build/src/utils/PersistentInputManager.js.map +1 -0
- package/ts_build/src/worker.js +2 -2
- package/ts_build/src/worker.js.map +1 -1
- package/ts_build/tests/manual/modalities/google.modalities.test.d.ts +1 -0
- package/ts_build/tests/manual/modalities/google.modalities.test.js +252 -0
- package/ts_build/tests/manual/modalities/google.modalities.test.js.map +1 -0
- package/ts_build/tests/manual/modalities/openai.modalities.test.d.ts +1 -0
- package/ts_build/tests/manual/modalities/openai.modalities.test.js +252 -0
- package/ts_build/tests/manual/modalities/openai.modalities.test.js.map +1 -0
- package/ts_build/tests/manual/modalities/streaming.test.d.ts +1 -0
- package/ts_build/tests/manual/modalities/streaming.test.js +206 -0
- package/ts_build/tests/manual/modalities/streaming.test.js.map +1 -0
- package/ts_build/tests/manual/modalities/xai.modalities.test.d.ts +1 -0
- package/ts_build/tests/manual/modalities/xai.modalities.test.js +226 -0
- package/ts_build/tests/manual/modalities/xai.modalities.test.js.map +1 -0
- package/ts_build/tests/manual/persistent-input-test.d.ts +1 -0
- package/ts_build/tests/manual/persistent-input-test.js +35 -0
- package/ts_build/tests/manual/persistent-input-test.js.map +1 -0
- package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js +5 -5
- package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js.map +1 -1
- package/ts_build/tests/plugins/language/languagePlugin-integration.test.js +1 -1
- package/ts_build/tests/plugins/language/languagePlugin-integration.test.js.map +1 -1
- package/ts_build/tests/plugins/language/languagePlugin.test.js +17 -7
- package/ts_build/tests/plugins/language/languagePlugin.test.js.map +1 -1
package/src/clients/gemini.ts
CHANGED
|
@@ -9,8 +9,12 @@ import {
|
|
|
9
9
|
ToolConfig,
|
|
10
10
|
UsageMetadata,
|
|
11
11
|
} from "@google/genai";
|
|
12
|
+
import * as os from "os";
|
|
13
|
+
import * as fsSync from "fs";
|
|
14
|
+
import * as pathSync from "path";
|
|
12
15
|
import { wait } from "../utils";
|
|
13
16
|
import { EmbeddingModels, Models } from "../types";
|
|
17
|
+
import { GeminiTextPricing } from "./pricing";
|
|
14
18
|
|
|
15
19
|
import {
|
|
16
20
|
GenericClient,
|
|
@@ -23,6 +27,20 @@ import {
|
|
|
23
27
|
MessageContent,
|
|
24
28
|
ToolCall,
|
|
25
29
|
OutputMessage,
|
|
30
|
+
AudioTranscriptionOptions,
|
|
31
|
+
AudioTranscriptionResponse,
|
|
32
|
+
AudioGenerationOptions,
|
|
33
|
+
AudioGenerationResponse,
|
|
34
|
+
ImageGenerationOptions,
|
|
35
|
+
ImageGenerationResponse,
|
|
36
|
+
VideoGenerationOptions,
|
|
37
|
+
VideoGenerationResponse,
|
|
38
|
+
VideoStatusOptions,
|
|
39
|
+
VideoStatusResponse,
|
|
40
|
+
FileUploadOptions,
|
|
41
|
+
FileUploadResponse,
|
|
42
|
+
FileDownloadOptions,
|
|
43
|
+
FileDownloadResponse,
|
|
26
44
|
} from "./types";
|
|
27
45
|
|
|
28
46
|
function getMimeTypeFromUrl(url: string): string {
|
|
@@ -32,6 +50,51 @@ function getMimeTypeFromUrl(url: string): string {
|
|
|
32
50
|
return "image/jpeg";
|
|
33
51
|
}
|
|
34
52
|
|
|
53
|
+
function getVideoMimeTypeFromUrl(url: string): string {
|
|
54
|
+
if (url.endsWith(".mp4")) return "video/mp4";
|
|
55
|
+
if (url.endsWith(".webm")) return "video/webm";
|
|
56
|
+
if (url.endsWith(".mov")) return "video/quicktime";
|
|
57
|
+
if (url.endsWith(".avi")) return "video/x-msvideo";
|
|
58
|
+
return "video/mp4";
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Converts raw PCM audio data to WAV format by prepending a WAV header.
|
|
63
|
+
* Gemini TTS returns raw 16-bit PCM (audio/L16) which needs a WAV header to be playable.
|
|
64
|
+
*/
|
|
65
|
+
function pcmToWav(
|
|
66
|
+
pcmData: Buffer,
|
|
67
|
+
sampleRate: number = 24000,
|
|
68
|
+
numChannels: number = 1,
|
|
69
|
+
bitsPerSample: number = 16
|
|
70
|
+
): Buffer {
|
|
71
|
+
const dataSize = pcmData.length;
|
|
72
|
+
const headerSize = 44;
|
|
73
|
+
const wavBuffer = Buffer.alloc(headerSize + dataSize);
|
|
74
|
+
|
|
75
|
+
// RIFF header
|
|
76
|
+
wavBuffer.write("RIFF", 0);
|
|
77
|
+
wavBuffer.writeUInt32LE(36 + dataSize, 4); // file size - 8
|
|
78
|
+
wavBuffer.write("WAVE", 8);
|
|
79
|
+
|
|
80
|
+
// fmt chunk
|
|
81
|
+
wavBuffer.write("fmt ", 12);
|
|
82
|
+
wavBuffer.writeUInt32LE(16, 16); // chunk size
|
|
83
|
+
wavBuffer.writeUInt16LE(1, 20); // PCM format
|
|
84
|
+
wavBuffer.writeUInt16LE(numChannels, 22);
|
|
85
|
+
wavBuffer.writeUInt32LE(sampleRate, 24);
|
|
86
|
+
wavBuffer.writeUInt32LE(sampleRate * numChannels * (bitsPerSample / 8), 28); // byte rate
|
|
87
|
+
wavBuffer.writeUInt16LE(numChannels * (bitsPerSample / 8), 32); // block align
|
|
88
|
+
wavBuffer.writeUInt16LE(bitsPerSample, 34);
|
|
89
|
+
|
|
90
|
+
// data chunk
|
|
91
|
+
wavBuffer.write("data", 36);
|
|
92
|
+
wavBuffer.writeUInt32LE(dataSize, 40);
|
|
93
|
+
pcmData.copy(wavBuffer, 44);
|
|
94
|
+
|
|
95
|
+
return wavBuffer;
|
|
96
|
+
}
|
|
97
|
+
|
|
35
98
|
export class GenericGeminiClient implements GenericClient {
|
|
36
99
|
private client: GoogleGenAI;
|
|
37
100
|
private apiKey?: string;
|
|
@@ -64,16 +127,33 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
64
127
|
return { text: part.text };
|
|
65
128
|
}
|
|
66
129
|
if (part.type === "image_url") {
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
130
|
+
const url = part.image_url.url;
|
|
131
|
+
if (url.startsWith("data:")) {
|
|
132
|
+
const [header, base64Data] = url.split(",");
|
|
133
|
+
const mimeType = header.split(":")[1].split(";")[0];
|
|
134
|
+
return {
|
|
135
|
+
inlineData: {
|
|
136
|
+
data: base64Data,
|
|
137
|
+
mimeType,
|
|
138
|
+
},
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// If it's a File API URI
|
|
143
|
+
if (url.startsWith("https://generativelanguage.googleapis.com")) {
|
|
144
|
+
return {
|
|
145
|
+
fileData: {
|
|
146
|
+
fileUri: url,
|
|
147
|
+
mimeType: getMimeTypeFromUrl(url),
|
|
148
|
+
},
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
if (part.type === "video_url") {
|
|
153
|
+
const mimeType = getVideoMimeTypeFromUrl(part.video_url.url);
|
|
74
154
|
return {
|
|
75
155
|
fileData: {
|
|
76
|
-
|
|
156
|
+
fileUri: part.video_url.url,
|
|
77
157
|
mimeType,
|
|
78
158
|
},
|
|
79
159
|
};
|
|
@@ -81,9 +161,9 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
81
161
|
// Handle other potential generic message content types if necessary
|
|
82
162
|
// For now, only text and image_url are explicitly handled.
|
|
83
163
|
console.warn(
|
|
84
|
-
`Unsupported generic message content part type: ${
|
|
164
|
+
`Unsupported generic message content part type: ${part.type}`
|
|
85
165
|
);
|
|
86
|
-
return { text: `[Unsupported content type: ${
|
|
166
|
+
return { text: `[Unsupported content type: ${part.type}]` };
|
|
87
167
|
})
|
|
88
168
|
.filter((part) => !!part); // Filter out any null/undefined parts if transformation fails
|
|
89
169
|
}
|
|
@@ -119,7 +199,7 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
119
199
|
(systemInstruction ? systemInstruction + "\n" : "") +
|
|
120
200
|
this.transformContentParts(msg.content)
|
|
121
201
|
.filter((p) => "text" in p && typeof p.text === "string")
|
|
122
|
-
.map((p) =>
|
|
202
|
+
.map((p) => p.text)
|
|
123
203
|
.join("\n");
|
|
124
204
|
}
|
|
125
205
|
} else if (msg.role === "user" || msg.role === "assistant") {
|
|
@@ -215,13 +295,13 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
215
295
|
* @returns A cleaned schema object compatible with Gemini API
|
|
216
296
|
*/
|
|
217
297
|
private cleanSchemaForGemini(schema: any): any {
|
|
218
|
-
if (!schema || typeof schema !==
|
|
298
|
+
if (!schema || typeof schema !== "object") {
|
|
219
299
|
return schema;
|
|
220
300
|
}
|
|
221
301
|
|
|
222
302
|
// Handle arrays
|
|
223
303
|
if (Array.isArray(schema)) {
|
|
224
|
-
return schema.map(item => this.cleanSchemaForGemini(item));
|
|
304
|
+
return schema.map((item) => this.cleanSchemaForGemini(item));
|
|
225
305
|
}
|
|
226
306
|
|
|
227
307
|
const cleaned: any = {};
|
|
@@ -236,24 +316,29 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
236
316
|
// - $ref: JSON Schema references not supported
|
|
237
317
|
// - $defs: JSON Schema definitions not supported
|
|
238
318
|
// - positional: internal knowhow property, not part of JSON Schema
|
|
239
|
-
if (
|
|
319
|
+
if (
|
|
320
|
+
key === "additionalProperties" ||
|
|
321
|
+
key === "$ref" ||
|
|
322
|
+
key === "$defs" ||
|
|
323
|
+
key === "positional"
|
|
324
|
+
) {
|
|
240
325
|
continue;
|
|
241
326
|
}
|
|
242
327
|
|
|
243
328
|
const value = schema[key];
|
|
244
329
|
|
|
245
330
|
// Convert type to uppercase if it's a string
|
|
246
|
-
if (key ===
|
|
331
|
+
if (key === "type" && typeof value === "string") {
|
|
247
332
|
cleaned[key] = value.toUpperCase();
|
|
248
333
|
}
|
|
249
334
|
// Handle type arrays (e.g., ["string", "null"])
|
|
250
|
-
else if (key ===
|
|
251
|
-
cleaned[key] = value.map((t: string) =>
|
|
252
|
-
typeof t ===
|
|
335
|
+
else if (key === "type" && Array.isArray(value)) {
|
|
336
|
+
cleaned[key] = value.map((t: string) =>
|
|
337
|
+
typeof t === "string" ? t.toUpperCase() : t
|
|
253
338
|
);
|
|
254
339
|
}
|
|
255
340
|
// Recursively clean nested objects
|
|
256
|
-
else if (typeof value ===
|
|
341
|
+
else if (typeof value === "object" && value !== null) {
|
|
257
342
|
cleaned[key] = this.cleanSchemaForGemini(value);
|
|
258
343
|
}
|
|
259
344
|
// Copy primitive values as-is
|
|
@@ -277,12 +362,14 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
277
362
|
|
|
278
363
|
const functionDeclarations: FunctionDeclaration[] = tools.map((tool) => {
|
|
279
364
|
// Clean the entire parameters schema to remove unsupported fields
|
|
280
|
-
const cleanedParameters = this.cleanSchemaForGemini(
|
|
281
|
-
|
|
365
|
+
const cleanedParameters = this.cleanSchemaForGemini(
|
|
366
|
+
tool.function.parameters
|
|
367
|
+
);
|
|
368
|
+
|
|
282
369
|
return {
|
|
283
370
|
name: tool.function.name,
|
|
284
371
|
description: tool.function.description || "",
|
|
285
|
-
parameters: cleanedParameters
|
|
372
|
+
parameters: cleanedParameters,
|
|
286
373
|
};
|
|
287
374
|
});
|
|
288
375
|
|
|
@@ -391,75 +478,16 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
391
478
|
usd_cost: usdCost,
|
|
392
479
|
};
|
|
393
480
|
} catch (error) {
|
|
394
|
-
console.error(
|
|
481
|
+
console.error(
|
|
482
|
+
"Error calling Google GenAI generateContent:",
|
|
483
|
+
error.message
|
|
484
|
+
);
|
|
395
485
|
throw error;
|
|
396
486
|
}
|
|
397
487
|
}
|
|
398
488
|
|
|
399
|
-
pricesPerMillion()
|
|
400
|
-
return
|
|
401
|
-
[Models.google.Gemini_3_Preview]: {
|
|
402
|
-
input: 2,
|
|
403
|
-
input_gt_200k: 4,
|
|
404
|
-
output: 12,
|
|
405
|
-
output_gt_200k: 18,
|
|
406
|
-
context_caching: 0.2,
|
|
407
|
-
context_caching_gt_200k: 0.4,
|
|
408
|
-
},
|
|
409
|
-
[Models.google.Gemini_25_Flash_Preview]: {
|
|
410
|
-
input: 0.3,
|
|
411
|
-
output: 2.5,
|
|
412
|
-
thinking_output: 3.5,
|
|
413
|
-
context_caching: 0.0375,
|
|
414
|
-
},
|
|
415
|
-
[Models.google.Gemini_25_Pro_Preview]: {
|
|
416
|
-
input: 1.25,
|
|
417
|
-
input_gt_200k: 2.5,
|
|
418
|
-
output: 10.0,
|
|
419
|
-
output_gt_200k: 15.0,
|
|
420
|
-
context_caching: 0.125,
|
|
421
|
-
context_caching_gt_200k: 0.25,
|
|
422
|
-
},
|
|
423
|
-
[Models.google.Gemini_20_Flash]: {
|
|
424
|
-
input: 0.1,
|
|
425
|
-
output: 0.4,
|
|
426
|
-
context_caching: 0.025,
|
|
427
|
-
},
|
|
428
|
-
[Models.google.Gemini_20_Flash_Preview_Image_Generation]: {
|
|
429
|
-
input: 0.1,
|
|
430
|
-
output: 0.4,
|
|
431
|
-
image_generation: 0.039,
|
|
432
|
-
},
|
|
433
|
-
[Models.google.Gemini_20_Flash_Lite]: {
|
|
434
|
-
input: 0.075,
|
|
435
|
-
output: 0.3,
|
|
436
|
-
},
|
|
437
|
-
[Models.google.Gemini_15_Flash]: {
|
|
438
|
-
input: 0.075,
|
|
439
|
-
output: 0.3,
|
|
440
|
-
context_caching: 0.01875,
|
|
441
|
-
},
|
|
442
|
-
[Models.google.Gemini_15_Flash_8B]: {
|
|
443
|
-
input: 0.0375,
|
|
444
|
-
output: 0.15,
|
|
445
|
-
context_caching: 0.01,
|
|
446
|
-
},
|
|
447
|
-
[Models.google.Gemini_15_Pro]: {
|
|
448
|
-
input: 1.25,
|
|
449
|
-
output: 5.0,
|
|
450
|
-
context_caching: 0.3125,
|
|
451
|
-
},
|
|
452
|
-
[Models.google.Imagen_3]: {
|
|
453
|
-
image_generation: 0.03,
|
|
454
|
-
},
|
|
455
|
-
[Models.google.Veo_2]: {
|
|
456
|
-
video_generation: 0.35,
|
|
457
|
-
},
|
|
458
|
-
[EmbeddingModels.google.Gemini_Embedding]: {
|
|
459
|
-
input: 0, // Free of charge
|
|
460
|
-
output: 0, // Free of charge
|
|
461
|
-
},
|
|
462
|
-
};
|
|
489
|
+
pricesPerMillion() {
|
|
490
|
+
return GeminiTextPricing;
|
|
463
491
|
}
|
|
464
492
|
|
|
465
493
|
calculateCost(model: string, usage: UsageMetadata): number | undefined {
|
|
@@ -558,4 +586,334 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
558
586
|
throw error;
|
|
559
587
|
}
|
|
560
588
|
}
|
|
589
|
+
|
|
590
|
+
async createAudioTranscription(
|
|
591
|
+
options: AudioTranscriptionOptions
|
|
592
|
+
): Promise<AudioTranscriptionResponse> {
|
|
593
|
+
throw new Error(
|
|
594
|
+
"Audio transcription is not yet supported by the Gemini client. Use OpenAI client with Whisper model instead."
|
|
595
|
+
);
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
async createAudioGeneration(
|
|
599
|
+
options: AudioGenerationOptions
|
|
600
|
+
): Promise<AudioGenerationResponse> {
|
|
601
|
+
try {
|
|
602
|
+
const response = await this.client.models.generateContent({
|
|
603
|
+
model: options.model,
|
|
604
|
+
contents: [
|
|
605
|
+
{
|
|
606
|
+
role: "user",
|
|
607
|
+
parts: [{ text: options.input }],
|
|
608
|
+
},
|
|
609
|
+
],
|
|
610
|
+
config: {
|
|
611
|
+
responseModalities: ["AUDIO"],
|
|
612
|
+
speechConfig: {
|
|
613
|
+
voiceConfig: {
|
|
614
|
+
prebuiltVoiceConfig: {
|
|
615
|
+
voiceName: options.voice || "Puck",
|
|
616
|
+
},
|
|
617
|
+
},
|
|
618
|
+
},
|
|
619
|
+
},
|
|
620
|
+
});
|
|
621
|
+
|
|
622
|
+
// Extract audio data from the response
|
|
623
|
+
// Gemini returns inline audio data in the response parts
|
|
624
|
+
const audioPart = response.candidates?.[0]?.content?.parts?.find(
|
|
625
|
+
(part: any) => part.inlineData?.mimeType?.startsWith("audio/")
|
|
626
|
+
);
|
|
627
|
+
|
|
628
|
+
if (!audioPart || !audioPart.inlineData) {
|
|
629
|
+
throw new Error("No audio data returned from Gemini TTS");
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
// Convert base64 to buffer
|
|
633
|
+
const rawBuffer = Buffer.from(audioPart.inlineData.data, "base64");
|
|
634
|
+
const mimeType = audioPart.inlineData.mimeType || "audio/wav";
|
|
635
|
+
|
|
636
|
+
// Gemini returns raw PCM (audio/L16) - convert to WAV format for playability
|
|
637
|
+
let audioBuffer = rawBuffer;
|
|
638
|
+
if (mimeType.includes("L16") || mimeType.includes("pcm")) {
|
|
639
|
+
// Parse sample rate from mime type e.g. "audio/L16;codec=pcm;rate=24000"
|
|
640
|
+
const rateMatch = mimeType.match(/rate=(\d+)/);
|
|
641
|
+
const sampleRate = rateMatch ? parseInt(rateMatch[1], 10) : 24000;
|
|
642
|
+
audioBuffer = pcmToWav(rawBuffer, sampleRate);
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
return {
|
|
646
|
+
audio: audioBuffer,
|
|
647
|
+
format: "audio/wav",
|
|
648
|
+
};
|
|
649
|
+
} catch (error) {
|
|
650
|
+
console.error("Error calling Gemini TTS:", error);
|
|
651
|
+
throw error;
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
async createImageGeneration(
|
|
656
|
+
options: ImageGenerationOptions
|
|
657
|
+
): Promise<ImageGenerationResponse> {
|
|
658
|
+
try {
|
|
659
|
+
// Check if using Imagen 3 model or Gemini Flash inline generation
|
|
660
|
+
const isImagen3 = options.model?.includes("imagen");
|
|
661
|
+
|
|
662
|
+
if (isImagen3) {
|
|
663
|
+
// Imagen 3 uses the generateImages endpoint
|
|
664
|
+
const response = await this.client.models.generateImages({
|
|
665
|
+
model: options.model,
|
|
666
|
+
prompt: options.prompt,
|
|
667
|
+
config: {
|
|
668
|
+
numberOfImages: options.n || 1,
|
|
669
|
+
},
|
|
670
|
+
});
|
|
671
|
+
|
|
672
|
+
// Convert response to ImageGenerationResponse format
|
|
673
|
+
const generatedImages = response.generatedImages || [];
|
|
674
|
+
const images = generatedImages.map((img) => ({
|
|
675
|
+
// imageBytes is already a base64-encoded string from the API
|
|
676
|
+
// Don't re-encode it, just use it directly
|
|
677
|
+
b64_json: img.image?.imageBytes
|
|
678
|
+
? img.image.imageBytes
|
|
679
|
+
: "",
|
|
680
|
+
revised_prompt: options.prompt,
|
|
681
|
+
}));
|
|
682
|
+
|
|
683
|
+
return {
|
|
684
|
+
created: Math.floor(Date.now() / 1000),
|
|
685
|
+
data: images,
|
|
686
|
+
usd_cost: 0.03 * images.length,
|
|
687
|
+
};
|
|
688
|
+
} else {
|
|
689
|
+
// Use Gemini Flash inline image generation (e.g., gemini-2.0-flash-preview-image-generation)
|
|
690
|
+
const response = await this.client.models.generateContent({
|
|
691
|
+
model: options.model,
|
|
692
|
+
contents: [
|
|
693
|
+
{
|
|
694
|
+
role: "user",
|
|
695
|
+
parts: [{ text: options.prompt }],
|
|
696
|
+
},
|
|
697
|
+
],
|
|
698
|
+
config: {
|
|
699
|
+
responseModalities: ["IMAGE", "TEXT"],
|
|
700
|
+
},
|
|
701
|
+
});
|
|
702
|
+
|
|
703
|
+
// Extract image data from the response
|
|
704
|
+
const imageParts =
|
|
705
|
+
response.candidates?.[0]?.content?.parts?.filter((part: any) =>
|
|
706
|
+
part.inlineData?.mimeType?.startsWith("image/")
|
|
707
|
+
) || [];
|
|
708
|
+
|
|
709
|
+
if (imageParts.length === 0) {
|
|
710
|
+
throw new Error("No image data returned from Gemini");
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
const images = imageParts.map((part: any) => ({
|
|
714
|
+
b64_json: part.inlineData.data,
|
|
715
|
+
revised_prompt: options.prompt,
|
|
716
|
+
}));
|
|
717
|
+
|
|
718
|
+
const usageMetadata = response.usageMetadata;
|
|
719
|
+
const usdCost = usageMetadata
|
|
720
|
+
? this.calculateCost(options.model, usageMetadata)
|
|
721
|
+
: undefined;
|
|
722
|
+
|
|
723
|
+
return {
|
|
724
|
+
created: Math.floor(Date.now() / 1000),
|
|
725
|
+
data: images,
|
|
726
|
+
usd_cost: usdCost,
|
|
727
|
+
};
|
|
728
|
+
}
|
|
729
|
+
} catch (error) {
|
|
730
|
+
console.error("Error calling Gemini image generation:", error);
|
|
731
|
+
throw error;
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
async createVideoGeneration(
|
|
736
|
+
options: VideoGenerationOptions
|
|
737
|
+
): Promise<VideoGenerationResponse> {
|
|
738
|
+
try {
|
|
739
|
+
// Submit the video generation job – do NOT poll here.
|
|
740
|
+
// Use getVideoStatus() to poll and downloadFile() to fetch the result.
|
|
741
|
+
const operation = await this.client.models.generateVideos({
|
|
742
|
+
model: options.model,
|
|
743
|
+
prompt: options.prompt,
|
|
744
|
+
config: {
|
|
745
|
+
numberOfVideos: options.n || 1,
|
|
746
|
+
...(options.duration && {
|
|
747
|
+
durationSeconds: Math.max(6, options.duration),
|
|
748
|
+
}),
|
|
749
|
+
...(options.resolution && { resolution: options.resolution }),
|
|
750
|
+
...(options.aspect_ratio && { aspectRatio: options.aspect_ratio }),
|
|
751
|
+
},
|
|
752
|
+
});
|
|
753
|
+
|
|
754
|
+
// Calculate estimated cost: $0.35 per second of video
|
|
755
|
+
const duration = options.duration || 5; // Default 5 seconds
|
|
756
|
+
const usdCost = (options.n || 1) * duration * 0.35;
|
|
757
|
+
|
|
758
|
+
// Return the operation name as jobId so callers can use getVideoStatus / downloadVideo
|
|
759
|
+
return {
|
|
760
|
+
created: Math.floor(Date.now() / 1000),
|
|
761
|
+
data: [],
|
|
762
|
+
jobId: operation.name,
|
|
763
|
+
usd_cost: usdCost,
|
|
764
|
+
};
|
|
765
|
+
} catch (error) {
|
|
766
|
+
console.error("Error calling Gemini video generation:", error);
|
|
767
|
+
throw error;
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
async getVideoStatus(options: VideoStatusOptions): Promise<VideoStatusResponse> {
|
|
772
|
+
try {
|
|
773
|
+
const operation = await this.client.operations.getVideosOperation({
|
|
774
|
+
operation: { name: options.jobId },
|
|
775
|
+
});
|
|
776
|
+
|
|
777
|
+
if (operation.error) {
|
|
778
|
+
return {
|
|
779
|
+
jobId: options.jobId,
|
|
780
|
+
status: "failed",
|
|
781
|
+
error: JSON.stringify(operation.error),
|
|
782
|
+
};
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
if (!operation.done) {
|
|
786
|
+
return {
|
|
787
|
+
jobId: options.jobId,
|
|
788
|
+
status: "in_progress",
|
|
789
|
+
};
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
// Completed – extract file URIs
|
|
793
|
+
const generatedVideos = operation.response?.generatedVideos || [];
|
|
794
|
+
const data = generatedVideos.map((vid) => {
|
|
795
|
+
const videoBytes: string | undefined = vid.video?.videoBytes;
|
|
796
|
+
const uri: string | undefined = vid.video?.uri;
|
|
797
|
+
return {
|
|
798
|
+
b64_json: videoBytes || undefined,
|
|
799
|
+
url: uri || undefined,
|
|
800
|
+
fileUri: uri || undefined,
|
|
801
|
+
};
|
|
802
|
+
});
|
|
803
|
+
|
|
804
|
+
return {
|
|
805
|
+
jobId: options.jobId,
|
|
806
|
+
status: "completed",
|
|
807
|
+
data,
|
|
808
|
+
};
|
|
809
|
+
} catch (error) {
|
|
810
|
+
console.error("Error checking Gemini video status:", error);
|
|
811
|
+
throw error;
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
/**
|
|
816
|
+
* Download a video (or any file) via the Google GenAI Files API.
|
|
817
|
+
* Pass either `fileId` (the files/* name) or `uri` (the full URI).
|
|
818
|
+
*/
|
|
819
|
+
async downloadVideo(options: FileDownloadOptions): Promise<FileDownloadResponse> {
|
|
820
|
+
return this.downloadFile(options);
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
/**
|
|
824
|
+
* Upload a file to the Google GenAI Files API.
|
|
825
|
+
*/
|
|
826
|
+
async uploadFile(options: FileUploadOptions): Promise<FileUploadResponse> {
|
|
827
|
+
try {
|
|
828
|
+
const blob = new Blob([options.data], { type: options.mimeType });
|
|
829
|
+
const uploadedFile = await this.client.files.upload({
|
|
830
|
+
file: blob,
|
|
831
|
+
config: {
|
|
832
|
+
mimeType: options.mimeType,
|
|
833
|
+
displayName: options.displayName,
|
|
834
|
+
name: options.fileName,
|
|
835
|
+
},
|
|
836
|
+
});
|
|
837
|
+
|
|
838
|
+
return {
|
|
839
|
+
fileId: uploadedFile.name,
|
|
840
|
+
uri: uploadedFile.uri,
|
|
841
|
+
url: uploadedFile.downloadUri || uploadedFile.uri,
|
|
842
|
+
mimeType: uploadedFile.mimeType,
|
|
843
|
+
sizeBytes: uploadedFile.sizeBytes ? Number(uploadedFile.sizeBytes) : undefined,
|
|
844
|
+
};
|
|
845
|
+
} catch (error) {
|
|
846
|
+
console.error("Error uploading file to Google GenAI Files API:", error);
|
|
847
|
+
throw error;
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
/**
|
|
852
|
+
* Download a file from the Google GenAI Files API.
|
|
853
|
+
*
|
|
854
|
+
* The SDK's `files.download()` writes to disk, so we use a temp file and
|
|
855
|
+
* read it back as a Buffer. Pass either:
|
|
856
|
+
* - `fileId`: the files/* resource name (e.g. "files/abc-123") or a Video uri
|
|
857
|
+
* - `uri`: the full Video.uri returned in GeneratedVideo (also accepted as fileId)
|
|
858
|
+
*
|
|
859
|
+
* For generated videos the `file` param accepts the Video object directly
|
|
860
|
+
* (uri + optional mimeType), which the SDK resolves to a download URL.
|
|
861
|
+
*/
|
|
862
|
+
async downloadFile(options: FileDownloadOptions): Promise<FileDownloadResponse> {
|
|
863
|
+
const mimeMap: Record<string, string> = {
|
|
864
|
+
".mp4": "video/mp4",
|
|
865
|
+
".webm": "video/webm",
|
|
866
|
+
".mov": "video/quicktime",
|
|
867
|
+
".png": "image/png",
|
|
868
|
+
".jpg": "image/jpeg",
|
|
869
|
+
".jpeg": "image/jpeg",
|
|
870
|
+
".gif": "image/gif",
|
|
871
|
+
".wav": "audio/wav",
|
|
872
|
+
".mp3": "audio/mpeg",
|
|
873
|
+
};
|
|
874
|
+
|
|
875
|
+
try {
|
|
876
|
+
// The Google GenAI SDK's files.download() uses an async pipe that is NOT
|
|
877
|
+
// properly awaited, so we fetch the file directly via HTTP instead.
|
|
878
|
+
// Build the download URL from the uri/fileId.
|
|
879
|
+
const rawUri = options.uri || options.fileId || "";
|
|
880
|
+
|
|
881
|
+
// If it's already a full https URL, use it directly (append API key).
|
|
882
|
+
// Otherwise construct the Files API download URL from the resource name.
|
|
883
|
+
let downloadUrl: string;
|
|
884
|
+
if (rawUri.startsWith("https://")) {
|
|
885
|
+
// Append API key if not already present
|
|
886
|
+
const sep = rawUri.includes("?") ? "&" : "?";
|
|
887
|
+
downloadUrl = `${rawUri}${sep}key=${this.apiKey}`;
|
|
888
|
+
} else {
|
|
889
|
+
// Strip leading "files/" if present to get just the file ID
|
|
890
|
+
const fileId = rawUri.replace(/^files\//, "");
|
|
891
|
+
downloadUrl = `https://generativelanguage.googleapis.com/v1beta/files/${fileId}:download?alt=media&key=${this.apiKey}`;
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
const response = await fetch(downloadUrl);
|
|
895
|
+
if (!response.ok) {
|
|
896
|
+
throw new Error(`HTTP ${response.status} ${response.statusText} downloading ${downloadUrl}`);
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
900
|
+
const data = Buffer.from(arrayBuffer);
|
|
901
|
+
|
|
902
|
+
// If caller supplied a filePath, write to it (creating dirs as needed)
|
|
903
|
+
if (options.filePath) {
|
|
904
|
+
fsSync.mkdirSync(pathSync.dirname(options.filePath), { recursive: true });
|
|
905
|
+
fsSync.writeFileSync(options.filePath, data);
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
// Infer mime type from the URI/fileId first (more reliable), then from the path
|
|
909
|
+
const sourceForExt = options.uri || options.fileId || options.filePath || "";
|
|
910
|
+
const ext = pathSync.extname(sourceForExt.split("?")[0]).toLowerCase();
|
|
911
|
+
const mimeType = mimeMap[ext] || "video/mp4";
|
|
912
|
+
|
|
913
|
+
return { data, mimeType };
|
|
914
|
+
} catch (error) {
|
|
915
|
+
console.error("Error downloading file from Google GenAI Files API:", error);
|
|
916
|
+
throw error;
|
|
917
|
+
}
|
|
918
|
+
}
|
|
561
919
|
}
|