@tyvm/knowhow 0.0.68 → 0.0.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/shell-commands.md +174 -0
- package/package.json +2 -2
- package/src/agents/base/base.ts +1 -3
- package/src/agents/developer/developer.ts +21 -16
- package/src/agents/tools/agentCall.ts +4 -2
- package/src/agents/tools/fileSearch.ts +5 -1
- package/src/agents/tools/list.ts +41 -37
- package/src/agents/tools/startAgentTask.ts +131 -22
- package/src/chat/CliChatService.ts +57 -11
- package/src/chat/modules/AgentModule.ts +72 -12
- package/src/chat/modules/CustomCommandsModule.ts +79 -0
- package/src/chat/modules/InternalChatModule.ts +11 -1
- package/src/chat/modules/ShellCommandModule.ts +96 -0
- package/src/chat/modules/index.ts +1 -0
- package/src/chat/types.ts +14 -2
- package/src/chat.ts +16 -13
- package/src/cli.ts +16 -6
- package/src/clients/anthropic.ts +88 -91
- package/src/clients/gemini.ts +495 -94
- package/src/clients/index.ts +125 -0
- package/src/clients/knowhow.ts +81 -0
- package/src/clients/openai.ts +256 -145
- package/src/clients/pricing/anthropic.ts +90 -0
- package/src/clients/pricing/google.ts +65 -0
- package/src/clients/pricing/index.ts +4 -0
- package/src/clients/pricing/openai.ts +134 -0
- package/src/clients/pricing/xai.ts +62 -0
- package/src/clients/types.ts +170 -1
- package/src/clients/xai.ts +275 -46
- package/src/config.ts +61 -15
- package/src/embeddings.ts +9 -1
- package/src/microphone.ts +15 -16
- package/src/migrations.ts +151 -0
- package/src/plugins/AgentsMdPlugin.ts +118 -0
- package/src/plugins/PluginBase.ts +8 -0
- package/src/plugins/downloader/downloader.ts +5 -6
- package/src/plugins/embedding.ts +10 -8
- package/src/plugins/exec.ts +70 -0
- package/src/plugins/github.ts +120 -74
- package/src/plugins/language.ts +11 -13
- package/src/plugins/plugins.ts +25 -4
- package/src/plugins/tmux.ts +132 -0
- package/src/plugins/types.ts +1 -0
- package/src/plugins/vim.ts +14 -1
- package/src/server/index.ts +2 -0
- package/src/services/AgentSyncFs.ts +417 -0
- package/src/services/{AgentSynchronization.ts → AgentSyncKnowhowWeb.ts} +2 -2
- package/src/services/EventService.ts +0 -1
- package/src/services/KnowhowClient.ts +106 -0
- package/src/services/index.ts +4 -2
- package/src/types.ts +57 -4
- package/src/worker.ts +25 -2
- package/tests/manual/modalities/README.md +157 -0
- package/tests/manual/modalities/google.modalities.test.ts +335 -0
- package/tests/manual/modalities/openai.modalities.test.ts +329 -0
- package/tests/manual/modalities/streaming.test.ts +260 -0
- package/tests/manual/modalities/xai.modalities.test.ts +307 -0
- package/tests/plugins/language/languagePlugin-content-triggers.test.ts +5 -5
- package/tests/plugins/language/languagePlugin-integration.test.ts +1 -1
- package/tests/plugins/language/languagePlugin.test.ts +17 -8
- package/ts_build/package.json +2 -2
- package/ts_build/src/agents/base/base.js +1 -1
- package/ts_build/src/agents/base/base.js.map +1 -1
- package/ts_build/src/agents/developer/developer.js +21 -15
- package/ts_build/src/agents/developer/developer.js.map +1 -1
- package/ts_build/src/agents/tools/agentCall.js +4 -2
- package/ts_build/src/agents/tools/agentCall.js.map +1 -1
- package/ts_build/src/agents/tools/executeScript/index.d.ts +1 -1
- package/ts_build/src/agents/tools/fileSearch.js +2 -1
- package/ts_build/src/agents/tools/fileSearch.js.map +1 -1
- package/ts_build/src/agents/tools/github/index.d.ts +1 -1
- package/ts_build/src/agents/tools/list.js +41 -37
- package/ts_build/src/agents/tools/list.js.map +1 -1
- package/ts_build/src/agents/tools/startAgentTask.d.ts +2 -1
- package/ts_build/src/agents/tools/startAgentTask.js +118 -17
- package/ts_build/src/agents/tools/startAgentTask.js.map +1 -1
- package/ts_build/src/chat/CliChatService.d.ts +4 -0
- package/ts_build/src/chat/CliChatService.js +39 -5
- package/ts_build/src/chat/CliChatService.js.map +1 -1
- package/ts_build/src/chat/modules/AgentModule.d.ts +4 -1
- package/ts_build/src/chat/modules/AgentModule.js +49 -11
- package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
- package/ts_build/src/chat/modules/CustomCommandsModule.d.ts +9 -0
- package/ts_build/src/chat/modules/CustomCommandsModule.js +58 -0
- package/ts_build/src/chat/modules/CustomCommandsModule.js.map +1 -0
- package/ts_build/src/chat/modules/InternalChatModule.d.ts +2 -0
- package/ts_build/src/chat/modules/InternalChatModule.js +10 -0
- package/ts_build/src/chat/modules/InternalChatModule.js.map +1 -1
- package/ts_build/src/chat/modules/ShellCommandModule.d.ts +8 -0
- package/ts_build/src/chat/modules/ShellCommandModule.js +83 -0
- package/ts_build/src/chat/modules/ShellCommandModule.js.map +1 -0
- package/ts_build/src/chat/modules/index.d.ts +1 -0
- package/ts_build/src/chat/modules/index.js +3 -1
- package/ts_build/src/chat/modules/index.js.map +1 -1
- package/ts_build/src/chat/types.d.ts +11 -1
- package/ts_build/src/chat.js +16 -13
- package/ts_build/src/chat.js.map +1 -1
- package/ts_build/src/cli.js +10 -3
- package/ts_build/src/cli.js.map +1 -1
- package/ts_build/src/clients/anthropic.d.ts +6 -1
- package/ts_build/src/clients/anthropic.js +47 -92
- package/ts_build/src/clients/anthropic.js.map +1 -1
- package/ts_build/src/clients/gemini.d.ts +81 -2
- package/ts_build/src/clients/gemini.js +362 -79
- package/ts_build/src/clients/gemini.js.map +1 -1
- package/ts_build/src/clients/index.d.ts +9 -1
- package/ts_build/src/clients/index.js +65 -0
- package/ts_build/src/clients/index.js.map +1 -1
- package/ts_build/src/clients/knowhow.d.ts +9 -1
- package/ts_build/src/clients/knowhow.js +43 -0
- package/ts_build/src/clients/knowhow.js.map +1 -1
- package/ts_build/src/clients/openai.d.ts +9 -1
- package/ts_build/src/clients/openai.js +201 -133
- package/ts_build/src/clients/openai.js.map +1 -1
- package/ts_build/src/clients/pricing/anthropic.d.ts +17 -0
- package/ts_build/src/clients/pricing/anthropic.js +93 -0
- package/ts_build/src/clients/pricing/anthropic.js.map +1 -0
- package/ts_build/src/clients/pricing/google.d.ts +73 -0
- package/ts_build/src/clients/pricing/google.js +68 -0
- package/ts_build/src/clients/pricing/google.js.map +1 -0
- package/ts_build/src/clients/pricing/index.d.ts +4 -0
- package/ts_build/src/clients/pricing/index.js +14 -0
- package/ts_build/src/clients/pricing/index.js.map +1 -0
- package/ts_build/src/clients/pricing/openai.d.ts +7 -0
- package/ts_build/src/clients/pricing/openai.js +137 -0
- package/ts_build/src/clients/pricing/openai.js.map +1 -0
- package/ts_build/src/clients/pricing/xai.d.ts +26 -0
- package/ts_build/src/clients/pricing/xai.js +59 -0
- package/ts_build/src/clients/pricing/xai.js.map +1 -0
- package/ts_build/src/clients/types.d.ts +135 -0
- package/ts_build/src/clients/xai.d.ts +9 -1
- package/ts_build/src/clients/xai.js +178 -46
- package/ts_build/src/clients/xai.js.map +1 -1
- package/ts_build/src/config.d.ts +1 -0
- package/ts_build/src/config.js +45 -16
- package/ts_build/src/config.js.map +1 -1
- package/ts_build/src/embeddings.js +8 -1
- package/ts_build/src/embeddings.js.map +1 -1
- package/ts_build/src/microphone.js +7 -9
- package/ts_build/src/microphone.js.map +1 -1
- package/ts_build/src/migrations.d.ts +17 -0
- package/ts_build/src/migrations.js +86 -0
- package/ts_build/src/migrations.js.map +1 -0
- package/ts_build/src/plugins/AgentsMdPlugin.d.ts +13 -0
- package/ts_build/src/plugins/AgentsMdPlugin.js +118 -0
- package/ts_build/src/plugins/AgentsMdPlugin.js.map +1 -0
- package/ts_build/src/plugins/PluginBase.d.ts +1 -0
- package/ts_build/src/plugins/PluginBase.js +3 -0
- package/ts_build/src/plugins/PluginBase.js.map +1 -1
- package/ts_build/src/plugins/downloader/downloader.js +5 -5
- package/ts_build/src/plugins/downloader/downloader.js.map +1 -1
- package/ts_build/src/plugins/embedding.js +9 -8
- package/ts_build/src/plugins/embedding.js.map +1 -1
- package/ts_build/src/plugins/exec.d.ts +10 -0
- package/ts_build/src/plugins/exec.js +56 -0
- package/ts_build/src/plugins/exec.js.map +1 -0
- package/ts_build/src/plugins/github.js +93 -51
- package/ts_build/src/plugins/github.js.map +1 -1
- package/ts_build/src/plugins/language.js +14 -11
- package/ts_build/src/plugins/language.js.map +1 -1
- package/ts_build/src/plugins/plugins.d.ts +1 -0
- package/ts_build/src/plugins/plugins.js +19 -1
- package/ts_build/src/plugins/plugins.js.map +1 -1
- package/ts_build/src/plugins/tmux.d.ts +14 -0
- package/ts_build/src/plugins/tmux.js +108 -0
- package/ts_build/src/plugins/tmux.js.map +1 -0
- package/ts_build/src/plugins/types.d.ts +1 -0
- package/ts_build/src/plugins/vim.js +11 -1
- package/ts_build/src/plugins/vim.js.map +1 -1
- package/ts_build/src/server/index.js.map +1 -1
- package/ts_build/src/services/AgentSyncFs.d.ts +34 -0
- package/ts_build/src/services/AgentSyncFs.js +325 -0
- package/ts_build/src/services/AgentSyncFs.js.map +1 -0
- package/ts_build/src/services/AgentSyncKnowhowWeb.d.ts +29 -0
- package/ts_build/src/services/AgentSyncKnowhowWeb.js +178 -0
- package/ts_build/src/services/AgentSyncKnowhowWeb.js.map +1 -0
- package/ts_build/src/services/AgentSynchronization.d.ts +1 -1
- package/ts_build/src/services/AgentSynchronization.js +3 -3
- package/ts_build/src/services/AgentSynchronization.js.map +1 -1
- package/ts_build/src/services/EventService.js.map +1 -1
- package/ts_build/src/services/KnowhowClient.d.ts +9 -1
- package/ts_build/src/services/KnowhowClient.js +58 -0
- package/ts_build/src/services/KnowhowClient.js.map +1 -1
- package/ts_build/src/services/index.d.ts +2 -1
- package/ts_build/src/services/index.js +2 -1
- package/ts_build/src/services/index.js.map +1 -1
- package/ts_build/src/types.d.ts +26 -1
- package/ts_build/src/types.js +45 -4
- package/ts_build/src/types.js.map +1 -1
- package/ts_build/src/utils/PersistentInputManager.d.ts +28 -0
- package/ts_build/src/utils/PersistentInputManager.js +293 -0
- package/ts_build/src/utils/PersistentInputManager.js.map +1 -0
- package/ts_build/src/worker.js +11 -2
- package/ts_build/src/worker.js.map +1 -1
- package/ts_build/tests/manual/modalities/google.modalities.test.d.ts +1 -0
- package/ts_build/tests/manual/modalities/google.modalities.test.js +252 -0
- package/ts_build/tests/manual/modalities/google.modalities.test.js.map +1 -0
- package/ts_build/tests/manual/modalities/openai.modalities.test.d.ts +1 -0
- package/ts_build/tests/manual/modalities/openai.modalities.test.js +252 -0
- package/ts_build/tests/manual/modalities/openai.modalities.test.js.map +1 -0
- package/ts_build/tests/manual/modalities/streaming.test.d.ts +1 -0
- package/ts_build/tests/manual/modalities/streaming.test.js +206 -0
- package/ts_build/tests/manual/modalities/streaming.test.js.map +1 -0
- package/ts_build/tests/manual/modalities/xai.modalities.test.d.ts +1 -0
- package/ts_build/tests/manual/modalities/xai.modalities.test.js +226 -0
- package/ts_build/tests/manual/modalities/xai.modalities.test.js.map +1 -0
- package/ts_build/tests/manual/persistent-input-test.d.ts +1 -0
- package/ts_build/tests/manual/persistent-input-test.js +35 -0
- package/ts_build/tests/manual/persistent-input-test.js.map +1 -0
- package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js +5 -5
- package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js.map +1 -1
- package/ts_build/tests/plugins/language/languagePlugin-integration.test.js +1 -1
- package/ts_build/tests/plugins/language/languagePlugin-integration.test.js.map +1 -1
- package/ts_build/tests/plugins/language/languagePlugin.test.js +17 -7
- package/ts_build/tests/plugins/language/languagePlugin.test.js.map +1 -1
package/src/clients/gemini.ts
CHANGED
|
@@ -9,8 +9,12 @@ import {
|
|
|
9
9
|
ToolConfig,
|
|
10
10
|
UsageMetadata,
|
|
11
11
|
} from "@google/genai";
|
|
12
|
+
import * as os from "os";
|
|
13
|
+
import * as fsSync from "fs";
|
|
14
|
+
import * as pathSync from "path";
|
|
12
15
|
import { wait } from "../utils";
|
|
13
16
|
import { EmbeddingModels, Models } from "../types";
|
|
17
|
+
import { GeminiTextPricing } from "./pricing";
|
|
14
18
|
|
|
15
19
|
import {
|
|
16
20
|
GenericClient,
|
|
@@ -23,6 +27,20 @@ import {
|
|
|
23
27
|
MessageContent,
|
|
24
28
|
ToolCall,
|
|
25
29
|
OutputMessage,
|
|
30
|
+
AudioTranscriptionOptions,
|
|
31
|
+
AudioTranscriptionResponse,
|
|
32
|
+
AudioGenerationOptions,
|
|
33
|
+
AudioGenerationResponse,
|
|
34
|
+
ImageGenerationOptions,
|
|
35
|
+
ImageGenerationResponse,
|
|
36
|
+
VideoGenerationOptions,
|
|
37
|
+
VideoGenerationResponse,
|
|
38
|
+
VideoStatusOptions,
|
|
39
|
+
VideoStatusResponse,
|
|
40
|
+
FileUploadOptions,
|
|
41
|
+
FileUploadResponse,
|
|
42
|
+
FileDownloadOptions,
|
|
43
|
+
FileDownloadResponse,
|
|
26
44
|
} from "./types";
|
|
27
45
|
|
|
28
46
|
function getMimeTypeFromUrl(url: string): string {
|
|
@@ -32,6 +50,51 @@ function getMimeTypeFromUrl(url: string): string {
|
|
|
32
50
|
return "image/jpeg";
|
|
33
51
|
}
|
|
34
52
|
|
|
53
|
+
function getVideoMimeTypeFromUrl(url: string): string {
|
|
54
|
+
if (url.endsWith(".mp4")) return "video/mp4";
|
|
55
|
+
if (url.endsWith(".webm")) return "video/webm";
|
|
56
|
+
if (url.endsWith(".mov")) return "video/quicktime";
|
|
57
|
+
if (url.endsWith(".avi")) return "video/x-msvideo";
|
|
58
|
+
return "video/mp4";
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Converts raw PCM audio data to WAV format by prepending a WAV header.
|
|
63
|
+
* Gemini TTS returns raw 16-bit PCM (audio/L16) which needs a WAV header to be playable.
|
|
64
|
+
*/
|
|
65
|
+
function pcmToWav(
|
|
66
|
+
pcmData: Buffer,
|
|
67
|
+
sampleRate: number = 24000,
|
|
68
|
+
numChannels: number = 1,
|
|
69
|
+
bitsPerSample: number = 16
|
|
70
|
+
): Buffer {
|
|
71
|
+
const dataSize = pcmData.length;
|
|
72
|
+
const headerSize = 44;
|
|
73
|
+
const wavBuffer = Buffer.alloc(headerSize + dataSize);
|
|
74
|
+
|
|
75
|
+
// RIFF header
|
|
76
|
+
wavBuffer.write("RIFF", 0);
|
|
77
|
+
wavBuffer.writeUInt32LE(36 + dataSize, 4); // file size - 8
|
|
78
|
+
wavBuffer.write("WAVE", 8);
|
|
79
|
+
|
|
80
|
+
// fmt chunk
|
|
81
|
+
wavBuffer.write("fmt ", 12);
|
|
82
|
+
wavBuffer.writeUInt32LE(16, 16); // chunk size
|
|
83
|
+
wavBuffer.writeUInt16LE(1, 20); // PCM format
|
|
84
|
+
wavBuffer.writeUInt16LE(numChannels, 22);
|
|
85
|
+
wavBuffer.writeUInt32LE(sampleRate, 24);
|
|
86
|
+
wavBuffer.writeUInt32LE(sampleRate * numChannels * (bitsPerSample / 8), 28); // byte rate
|
|
87
|
+
wavBuffer.writeUInt16LE(numChannels * (bitsPerSample / 8), 32); // block align
|
|
88
|
+
wavBuffer.writeUInt16LE(bitsPerSample, 34);
|
|
89
|
+
|
|
90
|
+
// data chunk
|
|
91
|
+
wavBuffer.write("data", 36);
|
|
92
|
+
wavBuffer.writeUInt32LE(dataSize, 40);
|
|
93
|
+
pcmData.copy(wavBuffer, 44);
|
|
94
|
+
|
|
95
|
+
return wavBuffer;
|
|
96
|
+
}
|
|
97
|
+
|
|
35
98
|
export class GenericGeminiClient implements GenericClient {
|
|
36
99
|
private client: GoogleGenAI;
|
|
37
100
|
private apiKey?: string;
|
|
@@ -64,16 +127,33 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
64
127
|
return { text: part.text };
|
|
65
128
|
}
|
|
66
129
|
if (part.type === "image_url") {
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
130
|
+
const url = part.image_url.url;
|
|
131
|
+
if (url.startsWith("data:")) {
|
|
132
|
+
const [header, base64Data] = url.split(",");
|
|
133
|
+
const mimeType = header.split(":")[1].split(";")[0];
|
|
134
|
+
return {
|
|
135
|
+
inlineData: {
|
|
136
|
+
data: base64Data,
|
|
137
|
+
mimeType,
|
|
138
|
+
},
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// If it's a File API URI
|
|
143
|
+
if (url.startsWith("https://generativelanguage.googleapis.com")) {
|
|
144
|
+
return {
|
|
145
|
+
fileData: {
|
|
146
|
+
fileUri: url,
|
|
147
|
+
mimeType: getMimeTypeFromUrl(url),
|
|
148
|
+
},
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
if (part.type === "video_url") {
|
|
153
|
+
const mimeType = getVideoMimeTypeFromUrl(part.video_url.url);
|
|
74
154
|
return {
|
|
75
155
|
fileData: {
|
|
76
|
-
|
|
156
|
+
fileUri: part.video_url.url,
|
|
77
157
|
mimeType,
|
|
78
158
|
},
|
|
79
159
|
};
|
|
@@ -81,9 +161,9 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
81
161
|
// Handle other potential generic message content types if necessary
|
|
82
162
|
// For now, only text and image_url are explicitly handled.
|
|
83
163
|
console.warn(
|
|
84
|
-
`Unsupported generic message content part type: ${
|
|
164
|
+
`Unsupported generic message content part type: ${part.type}`
|
|
85
165
|
);
|
|
86
|
-
return { text: `[Unsupported content type: ${
|
|
166
|
+
return { text: `[Unsupported content type: ${part.type}]` };
|
|
87
167
|
})
|
|
88
168
|
.filter((part) => !!part); // Filter out any null/undefined parts if transformation fails
|
|
89
169
|
}
|
|
@@ -119,7 +199,7 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
119
199
|
(systemInstruction ? systemInstruction + "\n" : "") +
|
|
120
200
|
this.transformContentParts(msg.content)
|
|
121
201
|
.filter((p) => "text" in p && typeof p.text === "string")
|
|
122
|
-
.map((p) =>
|
|
202
|
+
.map((p) => p.text)
|
|
123
203
|
.join("\n");
|
|
124
204
|
}
|
|
125
205
|
} else if (msg.role === "user" || msg.role === "assistant") {
|
|
@@ -207,6 +287,69 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
207
287
|
return { systemInstruction, contents: googleContents };
|
|
208
288
|
}
|
|
209
289
|
|
|
290
|
+
/**
|
|
291
|
+
* Recursively cleans a JSON schema to remove properties not supported by Gemini API.
|
|
292
|
+
* Removes: additionalProperties, $ref, and other unsupported fields.
|
|
293
|
+
* Converts type strings to uppercase as required by Gemini.
|
|
294
|
+
* @param schema The schema object to clean
|
|
295
|
+
* @returns A cleaned schema object compatible with Gemini API
|
|
296
|
+
*/
|
|
297
|
+
private cleanSchemaForGemini(schema: any): any {
|
|
298
|
+
if (!schema || typeof schema !== "object") {
|
|
299
|
+
return schema;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Handle arrays
|
|
303
|
+
if (Array.isArray(schema)) {
|
|
304
|
+
return schema.map((item) => this.cleanSchemaForGemini(item));
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const cleaned: any = {};
|
|
308
|
+
|
|
309
|
+
for (const key in schema) {
|
|
310
|
+
if (!Object.prototype.hasOwnProperty.call(schema, key)) {
|
|
311
|
+
continue;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// Skip unsupported properties:
|
|
315
|
+
// - additionalProperties: not supported by Gemini
|
|
316
|
+
// - $ref: JSON Schema references not supported
|
|
317
|
+
// - $defs: JSON Schema definitions not supported
|
|
318
|
+
// - positional: internal knowhow property, not part of JSON Schema
|
|
319
|
+
if (
|
|
320
|
+
key === "additionalProperties" ||
|
|
321
|
+
key === "$ref" ||
|
|
322
|
+
key === "$defs" ||
|
|
323
|
+
key === "positional"
|
|
324
|
+
) {
|
|
325
|
+
continue;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
const value = schema[key];
|
|
329
|
+
|
|
330
|
+
// Convert type to uppercase if it's a string
|
|
331
|
+
if (key === "type" && typeof value === "string") {
|
|
332
|
+
cleaned[key] = value.toUpperCase();
|
|
333
|
+
}
|
|
334
|
+
// Handle type arrays (e.g., ["string", "null"])
|
|
335
|
+
else if (key === "type" && Array.isArray(value)) {
|
|
336
|
+
cleaned[key] = value.map((t: string) =>
|
|
337
|
+
typeof t === "string" ? t.toUpperCase() : t
|
|
338
|
+
);
|
|
339
|
+
}
|
|
340
|
+
// Recursively clean nested objects
|
|
341
|
+
else if (typeof value === "object" && value !== null) {
|
|
342
|
+
cleaned[key] = this.cleanSchemaForGemini(value);
|
|
343
|
+
}
|
|
344
|
+
// Copy primitive values as-is
|
|
345
|
+
else {
|
|
346
|
+
cleaned[key] = value;
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
return cleaned;
|
|
351
|
+
}
|
|
352
|
+
|
|
210
353
|
/**
|
|
211
354
|
* Transforms generic Tool array into Google GenAI tools format.
|
|
212
355
|
* @param tools The generic tool array.
|
|
@@ -218,28 +361,15 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
218
361
|
}
|
|
219
362
|
|
|
220
363
|
const functionDeclarations: FunctionDeclaration[] = tools.map((tool) => {
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
key
|
|
226
|
-
)
|
|
227
|
-
) {
|
|
228
|
-
continue;
|
|
229
|
-
}
|
|
364
|
+
// Clean the entire parameters schema to remove unsupported fields
|
|
365
|
+
const cleanedParameters = this.cleanSchemaForGemini(
|
|
366
|
+
tool.function.parameters
|
|
367
|
+
);
|
|
230
368
|
|
|
231
|
-
tool.function.parameters.properties[key].type =
|
|
232
|
-
tool.function.parameters.properties[key].type.toUpperCase();
|
|
233
|
-
}
|
|
234
369
|
return {
|
|
235
370
|
name: tool.function.name,
|
|
236
371
|
description: tool.function.description || "",
|
|
237
|
-
|
|
238
|
-
parameters: {
|
|
239
|
-
type: "OBJECT",
|
|
240
|
-
properties: tool.function.parameters.properties, // Assume direct compatibility for properties structure
|
|
241
|
-
required: tool.function.parameters.required || [],
|
|
242
|
-
} as any,
|
|
372
|
+
parameters: cleanedParameters,
|
|
243
373
|
};
|
|
244
374
|
});
|
|
245
375
|
|
|
@@ -348,75 +478,16 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
348
478
|
usd_cost: usdCost,
|
|
349
479
|
};
|
|
350
480
|
} catch (error) {
|
|
351
|
-
console.error(
|
|
481
|
+
console.error(
|
|
482
|
+
"Error calling Google GenAI generateContent:",
|
|
483
|
+
error.message
|
|
484
|
+
);
|
|
352
485
|
throw error;
|
|
353
486
|
}
|
|
354
487
|
}
|
|
355
488
|
|
|
356
|
-
pricesPerMillion()
|
|
357
|
-
return
|
|
358
|
-
[Models.google.Gemini_3_Preview]: {
|
|
359
|
-
input: 2,
|
|
360
|
-
input_gt_200k: 4,
|
|
361
|
-
output: 12,
|
|
362
|
-
output_gt_200k: 18,
|
|
363
|
-
context_caching: 0.2,
|
|
364
|
-
context_caching_gt_200k: 0.4,
|
|
365
|
-
},
|
|
366
|
-
[Models.google.Gemini_25_Flash_Preview]: {
|
|
367
|
-
input: 0.3,
|
|
368
|
-
output: 2.5,
|
|
369
|
-
thinking_output: 3.5,
|
|
370
|
-
context_caching: 0.0375,
|
|
371
|
-
},
|
|
372
|
-
[Models.google.Gemini_25_Pro_Preview]: {
|
|
373
|
-
input: 1.25,
|
|
374
|
-
input_gt_200k: 2.5,
|
|
375
|
-
output: 10.0,
|
|
376
|
-
output_gt_200k: 15.0,
|
|
377
|
-
context_caching: 0.125,
|
|
378
|
-
context_caching_gt_200k: 0.25,
|
|
379
|
-
},
|
|
380
|
-
[Models.google.Gemini_20_Flash]: {
|
|
381
|
-
input: 0.1,
|
|
382
|
-
output: 0.4,
|
|
383
|
-
context_caching: 0.025,
|
|
384
|
-
},
|
|
385
|
-
[Models.google.Gemini_20_Flash_Preview_Image_Generation]: {
|
|
386
|
-
input: 0.1,
|
|
387
|
-
output: 0.4,
|
|
388
|
-
image_generation: 0.039,
|
|
389
|
-
},
|
|
390
|
-
[Models.google.Gemini_20_Flash_Lite]: {
|
|
391
|
-
input: 0.075,
|
|
392
|
-
output: 0.3,
|
|
393
|
-
},
|
|
394
|
-
[Models.google.Gemini_15_Flash]: {
|
|
395
|
-
input: 0.075,
|
|
396
|
-
output: 0.3,
|
|
397
|
-
context_caching: 0.01875,
|
|
398
|
-
},
|
|
399
|
-
[Models.google.Gemini_15_Flash_8B]: {
|
|
400
|
-
input: 0.0375,
|
|
401
|
-
output: 0.15,
|
|
402
|
-
context_caching: 0.01,
|
|
403
|
-
},
|
|
404
|
-
[Models.google.Gemini_15_Pro]: {
|
|
405
|
-
input: 1.25,
|
|
406
|
-
output: 5.0,
|
|
407
|
-
context_caching: 0.3125,
|
|
408
|
-
},
|
|
409
|
-
[Models.google.Imagen_3]: {
|
|
410
|
-
image_generation: 0.03,
|
|
411
|
-
},
|
|
412
|
-
[Models.google.Veo_2]: {
|
|
413
|
-
video_generation: 0.35,
|
|
414
|
-
},
|
|
415
|
-
[EmbeddingModels.google.Gemini_Embedding]: {
|
|
416
|
-
input: 0, // Free of charge
|
|
417
|
-
output: 0, // Free of charge
|
|
418
|
-
},
|
|
419
|
-
};
|
|
489
|
+
pricesPerMillion() {
|
|
490
|
+
return GeminiTextPricing;
|
|
420
491
|
}
|
|
421
492
|
|
|
422
493
|
calculateCost(model: string, usage: UsageMetadata): number | undefined {
|
|
@@ -515,4 +586,334 @@ export class GenericGeminiClient implements GenericClient {
|
|
|
515
586
|
throw error;
|
|
516
587
|
}
|
|
517
588
|
}
|
|
589
|
+
|
|
590
|
+
async createAudioTranscription(
|
|
591
|
+
options: AudioTranscriptionOptions
|
|
592
|
+
): Promise<AudioTranscriptionResponse> {
|
|
593
|
+
throw new Error(
|
|
594
|
+
"Audio transcription is not yet supported by the Gemini client. Use OpenAI client with Whisper model instead."
|
|
595
|
+
);
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
async createAudioGeneration(
|
|
599
|
+
options: AudioGenerationOptions
|
|
600
|
+
): Promise<AudioGenerationResponse> {
|
|
601
|
+
try {
|
|
602
|
+
const response = await this.client.models.generateContent({
|
|
603
|
+
model: options.model,
|
|
604
|
+
contents: [
|
|
605
|
+
{
|
|
606
|
+
role: "user",
|
|
607
|
+
parts: [{ text: options.input }],
|
|
608
|
+
},
|
|
609
|
+
],
|
|
610
|
+
config: {
|
|
611
|
+
responseModalities: ["AUDIO"],
|
|
612
|
+
speechConfig: {
|
|
613
|
+
voiceConfig: {
|
|
614
|
+
prebuiltVoiceConfig: {
|
|
615
|
+
voiceName: options.voice || "Puck",
|
|
616
|
+
},
|
|
617
|
+
},
|
|
618
|
+
},
|
|
619
|
+
},
|
|
620
|
+
});
|
|
621
|
+
|
|
622
|
+
// Extract audio data from the response
|
|
623
|
+
// Gemini returns inline audio data in the response parts
|
|
624
|
+
const audioPart = response.candidates?.[0]?.content?.parts?.find(
|
|
625
|
+
(part: any) => part.inlineData?.mimeType?.startsWith("audio/")
|
|
626
|
+
);
|
|
627
|
+
|
|
628
|
+
if (!audioPart || !audioPart.inlineData) {
|
|
629
|
+
throw new Error("No audio data returned from Gemini TTS");
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
// Convert base64 to buffer
|
|
633
|
+
const rawBuffer = Buffer.from(audioPart.inlineData.data, "base64");
|
|
634
|
+
const mimeType = audioPart.inlineData.mimeType || "audio/wav";
|
|
635
|
+
|
|
636
|
+
// Gemini returns raw PCM (audio/L16) - convert to WAV format for playability
|
|
637
|
+
let audioBuffer = rawBuffer;
|
|
638
|
+
if (mimeType.includes("L16") || mimeType.includes("pcm")) {
|
|
639
|
+
// Parse sample rate from mime type e.g. "audio/L16;codec=pcm;rate=24000"
|
|
640
|
+
const rateMatch = mimeType.match(/rate=(\d+)/);
|
|
641
|
+
const sampleRate = rateMatch ? parseInt(rateMatch[1], 10) : 24000;
|
|
642
|
+
audioBuffer = pcmToWav(rawBuffer, sampleRate);
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
return {
|
|
646
|
+
audio: audioBuffer,
|
|
647
|
+
format: "audio/wav",
|
|
648
|
+
};
|
|
649
|
+
} catch (error) {
|
|
650
|
+
console.error("Error calling Gemini TTS:", error);
|
|
651
|
+
throw error;
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
async createImageGeneration(
|
|
656
|
+
options: ImageGenerationOptions
|
|
657
|
+
): Promise<ImageGenerationResponse> {
|
|
658
|
+
try {
|
|
659
|
+
// Check if using Imagen 3 model or Gemini Flash inline generation
|
|
660
|
+
const isImagen3 = options.model?.includes("imagen");
|
|
661
|
+
|
|
662
|
+
if (isImagen3) {
|
|
663
|
+
// Imagen 3 uses the generateImages endpoint
|
|
664
|
+
const response = await this.client.models.generateImages({
|
|
665
|
+
model: options.model,
|
|
666
|
+
prompt: options.prompt,
|
|
667
|
+
config: {
|
|
668
|
+
numberOfImages: options.n || 1,
|
|
669
|
+
},
|
|
670
|
+
});
|
|
671
|
+
|
|
672
|
+
// Convert response to ImageGenerationResponse format
|
|
673
|
+
const generatedImages = response.generatedImages || [];
|
|
674
|
+
const images = generatedImages.map((img) => ({
|
|
675
|
+
// imageBytes is already a base64-encoded string from the API
|
|
676
|
+
// Don't re-encode it, just use it directly
|
|
677
|
+
b64_json: img.image?.imageBytes
|
|
678
|
+
? img.image.imageBytes
|
|
679
|
+
: "",
|
|
680
|
+
revised_prompt: options.prompt,
|
|
681
|
+
}));
|
|
682
|
+
|
|
683
|
+
return {
|
|
684
|
+
created: Math.floor(Date.now() / 1000),
|
|
685
|
+
data: images,
|
|
686
|
+
usd_cost: 0.03 * images.length,
|
|
687
|
+
};
|
|
688
|
+
} else {
|
|
689
|
+
// Use Gemini Flash inline image generation (e.g., gemini-2.0-flash-preview-image-generation)
|
|
690
|
+
const response = await this.client.models.generateContent({
|
|
691
|
+
model: options.model,
|
|
692
|
+
contents: [
|
|
693
|
+
{
|
|
694
|
+
role: "user",
|
|
695
|
+
parts: [{ text: options.prompt }],
|
|
696
|
+
},
|
|
697
|
+
],
|
|
698
|
+
config: {
|
|
699
|
+
responseModalities: ["IMAGE", "TEXT"],
|
|
700
|
+
},
|
|
701
|
+
});
|
|
702
|
+
|
|
703
|
+
// Extract image data from the response
|
|
704
|
+
const imageParts =
|
|
705
|
+
response.candidates?.[0]?.content?.parts?.filter((part: any) =>
|
|
706
|
+
part.inlineData?.mimeType?.startsWith("image/")
|
|
707
|
+
) || [];
|
|
708
|
+
|
|
709
|
+
if (imageParts.length === 0) {
|
|
710
|
+
throw new Error("No image data returned from Gemini");
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
const images = imageParts.map((part: any) => ({
|
|
714
|
+
b64_json: part.inlineData.data,
|
|
715
|
+
revised_prompt: options.prompt,
|
|
716
|
+
}));
|
|
717
|
+
|
|
718
|
+
const usageMetadata = response.usageMetadata;
|
|
719
|
+
const usdCost = usageMetadata
|
|
720
|
+
? this.calculateCost(options.model, usageMetadata)
|
|
721
|
+
: undefined;
|
|
722
|
+
|
|
723
|
+
return {
|
|
724
|
+
created: Math.floor(Date.now() / 1000),
|
|
725
|
+
data: images,
|
|
726
|
+
usd_cost: usdCost,
|
|
727
|
+
};
|
|
728
|
+
}
|
|
729
|
+
} catch (error) {
|
|
730
|
+
console.error("Error calling Gemini image generation:", error);
|
|
731
|
+
throw error;
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
async createVideoGeneration(
|
|
736
|
+
options: VideoGenerationOptions
|
|
737
|
+
): Promise<VideoGenerationResponse> {
|
|
738
|
+
try {
|
|
739
|
+
// Submit the video generation job – do NOT poll here.
|
|
740
|
+
// Use getVideoStatus() to poll and downloadFile() to fetch the result.
|
|
741
|
+
const operation = await this.client.models.generateVideos({
|
|
742
|
+
model: options.model,
|
|
743
|
+
prompt: options.prompt,
|
|
744
|
+
config: {
|
|
745
|
+
numberOfVideos: options.n || 1,
|
|
746
|
+
...(options.duration && {
|
|
747
|
+
durationSeconds: Math.max(6, options.duration),
|
|
748
|
+
}),
|
|
749
|
+
...(options.resolution && { resolution: options.resolution }),
|
|
750
|
+
...(options.aspect_ratio && { aspectRatio: options.aspect_ratio }),
|
|
751
|
+
},
|
|
752
|
+
});
|
|
753
|
+
|
|
754
|
+
// Calculate estimated cost: $0.35 per second of video
|
|
755
|
+
const duration = options.duration || 5; // Default 5 seconds
|
|
756
|
+
const usdCost = (options.n || 1) * duration * 0.35;
|
|
757
|
+
|
|
758
|
+
// Return the operation name as jobId so callers can use getVideoStatus / downloadVideo
|
|
759
|
+
return {
|
|
760
|
+
created: Math.floor(Date.now() / 1000),
|
|
761
|
+
data: [],
|
|
762
|
+
jobId: operation.name,
|
|
763
|
+
usd_cost: usdCost,
|
|
764
|
+
};
|
|
765
|
+
} catch (error) {
|
|
766
|
+
console.error("Error calling Gemini video generation:", error);
|
|
767
|
+
throw error;
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
async getVideoStatus(options: VideoStatusOptions): Promise<VideoStatusResponse> {
|
|
772
|
+
try {
|
|
773
|
+
const operation = await this.client.operations.getVideosOperation({
|
|
774
|
+
operation: { name: options.jobId },
|
|
775
|
+
});
|
|
776
|
+
|
|
777
|
+
if (operation.error) {
|
|
778
|
+
return {
|
|
779
|
+
jobId: options.jobId,
|
|
780
|
+
status: "failed",
|
|
781
|
+
error: JSON.stringify(operation.error),
|
|
782
|
+
};
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
if (!operation.done) {
|
|
786
|
+
return {
|
|
787
|
+
jobId: options.jobId,
|
|
788
|
+
status: "in_progress",
|
|
789
|
+
};
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
// Completed – extract file URIs
|
|
793
|
+
const generatedVideos = operation.response?.generatedVideos || [];
|
|
794
|
+
const data = generatedVideos.map((vid) => {
|
|
795
|
+
const videoBytes: string | undefined = vid.video?.videoBytes;
|
|
796
|
+
const uri: string | undefined = vid.video?.uri;
|
|
797
|
+
return {
|
|
798
|
+
b64_json: videoBytes || undefined,
|
|
799
|
+
url: uri || undefined,
|
|
800
|
+
fileUri: uri || undefined,
|
|
801
|
+
};
|
|
802
|
+
});
|
|
803
|
+
|
|
804
|
+
return {
|
|
805
|
+
jobId: options.jobId,
|
|
806
|
+
status: "completed",
|
|
807
|
+
data,
|
|
808
|
+
};
|
|
809
|
+
} catch (error) {
|
|
810
|
+
console.error("Error checking Gemini video status:", error);
|
|
811
|
+
throw error;
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
/**
|
|
816
|
+
* Download a video (or any file) via the Google GenAI Files API.
|
|
817
|
+
* Pass either `fileId` (the files/* name) or `uri` (the full URI).
|
|
818
|
+
*/
|
|
819
|
+
async downloadVideo(options: FileDownloadOptions): Promise<FileDownloadResponse> {
|
|
820
|
+
return this.downloadFile(options);
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
/**
|
|
824
|
+
* Upload a file to the Google GenAI Files API.
|
|
825
|
+
*/
|
|
826
|
+
async uploadFile(options: FileUploadOptions): Promise<FileUploadResponse> {
|
|
827
|
+
try {
|
|
828
|
+
const blob = new Blob([options.data], { type: options.mimeType });
|
|
829
|
+
const uploadedFile = await this.client.files.upload({
|
|
830
|
+
file: blob,
|
|
831
|
+
config: {
|
|
832
|
+
mimeType: options.mimeType,
|
|
833
|
+
displayName: options.displayName,
|
|
834
|
+
name: options.fileName,
|
|
835
|
+
},
|
|
836
|
+
});
|
|
837
|
+
|
|
838
|
+
return {
|
|
839
|
+
fileId: uploadedFile.name,
|
|
840
|
+
uri: uploadedFile.uri,
|
|
841
|
+
url: uploadedFile.downloadUri || uploadedFile.uri,
|
|
842
|
+
mimeType: uploadedFile.mimeType,
|
|
843
|
+
sizeBytes: uploadedFile.sizeBytes ? Number(uploadedFile.sizeBytes) : undefined,
|
|
844
|
+
};
|
|
845
|
+
} catch (error) {
|
|
846
|
+
console.error("Error uploading file to Google GenAI Files API:", error);
|
|
847
|
+
throw error;
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
/**
|
|
852
|
+
* Download a file from the Google GenAI Files API.
|
|
853
|
+
*
|
|
854
|
+
* The SDK's `files.download()` writes to disk, so we use a temp file and
|
|
855
|
+
* read it back as a Buffer. Pass either:
|
|
856
|
+
* - `fileId`: the files/* resource name (e.g. "files/abc-123") or a Video uri
|
|
857
|
+
* - `uri`: the full Video.uri returned in GeneratedVideo (also accepted as fileId)
|
|
858
|
+
*
|
|
859
|
+
* For generated videos the `file` param accepts the Video object directly
|
|
860
|
+
* (uri + optional mimeType), which the SDK resolves to a download URL.
|
|
861
|
+
*/
|
|
862
|
+
async downloadFile(options: FileDownloadOptions): Promise<FileDownloadResponse> {
|
|
863
|
+
const mimeMap: Record<string, string> = {
|
|
864
|
+
".mp4": "video/mp4",
|
|
865
|
+
".webm": "video/webm",
|
|
866
|
+
".mov": "video/quicktime",
|
|
867
|
+
".png": "image/png",
|
|
868
|
+
".jpg": "image/jpeg",
|
|
869
|
+
".jpeg": "image/jpeg",
|
|
870
|
+
".gif": "image/gif",
|
|
871
|
+
".wav": "audio/wav",
|
|
872
|
+
".mp3": "audio/mpeg",
|
|
873
|
+
};
|
|
874
|
+
|
|
875
|
+
try {
|
|
876
|
+
// The Google GenAI SDK's files.download() uses an async pipe that is NOT
|
|
877
|
+
// properly awaited, so we fetch the file directly via HTTP instead.
|
|
878
|
+
// Build the download URL from the uri/fileId.
|
|
879
|
+
const rawUri = options.uri || options.fileId || "";
|
|
880
|
+
|
|
881
|
+
// If it's already a full https URL, use it directly (append API key).
|
|
882
|
+
// Otherwise construct the Files API download URL from the resource name.
|
|
883
|
+
let downloadUrl: string;
|
|
884
|
+
if (rawUri.startsWith("https://")) {
|
|
885
|
+
// Append API key if not already present
|
|
886
|
+
const sep = rawUri.includes("?") ? "&" : "?";
|
|
887
|
+
downloadUrl = `${rawUri}${sep}key=${this.apiKey}`;
|
|
888
|
+
} else {
|
|
889
|
+
// Strip leading "files/" if present to get just the file ID
|
|
890
|
+
const fileId = rawUri.replace(/^files\//, "");
|
|
891
|
+
downloadUrl = `https://generativelanguage.googleapis.com/v1beta/files/${fileId}:download?alt=media&key=${this.apiKey}`;
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
const response = await fetch(downloadUrl);
|
|
895
|
+
if (!response.ok) {
|
|
896
|
+
throw new Error(`HTTP ${response.status} ${response.statusText} downloading ${downloadUrl}`);
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
900
|
+
const data = Buffer.from(arrayBuffer);
|
|
901
|
+
|
|
902
|
+
// If caller supplied a filePath, write to it (creating dirs as needed)
|
|
903
|
+
if (options.filePath) {
|
|
904
|
+
fsSync.mkdirSync(pathSync.dirname(options.filePath), { recursive: true });
|
|
905
|
+
fsSync.writeFileSync(options.filePath, data);
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
// Infer mime type from the URI/fileId first (more reliable), then from the path
|
|
909
|
+
const sourceForExt = options.uri || options.fileId || options.filePath || "";
|
|
910
|
+
const ext = pathSync.extname(sourceForExt.split("?")[0]).toLowerCase();
|
|
911
|
+
const mimeType = mimeMap[ext] || "video/mp4";
|
|
912
|
+
|
|
913
|
+
return { data, mimeType };
|
|
914
|
+
} catch (error) {
|
|
915
|
+
console.error("Error downloading file from Google GenAI Files API:", error);
|
|
916
|
+
throw error;
|
|
917
|
+
}
|
|
918
|
+
}
|
|
518
919
|
}
|