@xiaozhiclaw/provider-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/adapters/aliyun-oss-file-upload-adapter.d.ts +44 -0
  2. package/dist/adapters/aliyun-oss-file-upload-adapter.js +96 -0
  3. package/dist/adapters/gemini-file-upload-adapter.d.ts +26 -0
  4. package/dist/adapters/gemini-file-upload-adapter.js +92 -0
  5. package/dist/adapters/hub-oss-file-upload-adapter.d.ts +29 -0
  6. package/dist/adapters/hub-oss-file-upload-adapter.js +53 -0
  7. package/dist/adapters/index.d.ts +10 -0
  8. package/dist/adapters/index.js +10 -0
  9. package/dist/adapters/openai-file-upload-adapter.d.ts +38 -0
  10. package/dist/adapters/openai-file-upload-adapter.js +56 -0
  11. package/dist/adapters/volcengine-file-upload-adapter.d.ts +24 -0
  12. package/dist/adapters/volcengine-file-upload-adapter.js +45 -0
  13. package/dist/builtin-providers.d.ts +8 -0
  14. package/dist/builtin-providers.js +2237 -0
  15. package/dist/constants.d.ts +1 -0
  16. package/dist/constants.js +1 -0
  17. package/dist/credentials.d.ts +1 -0
  18. package/dist/credentials.js +8 -0
  19. package/dist/debug-transport.d.ts +12 -0
  20. package/dist/debug-transport.js +99 -0
  21. package/dist/errors.d.ts +11 -0
  22. package/dist/errors.js +12 -0
  23. package/dist/events.d.ts +48 -0
  24. package/dist/events.js +1 -0
  25. package/dist/file-upload-service.d.ts +68 -0
  26. package/dist/file-upload-service.js +110 -0
  27. package/dist/gemini-schema-utils.d.ts +17 -0
  28. package/dist/gemini-schema-utils.js +76 -0
  29. package/dist/index.d.ts +37 -0
  30. package/dist/index.js +33 -0
  31. package/dist/llm-client.d.ts +43 -0
  32. package/dist/llm-client.js +217 -0
  33. package/dist/media-client.d.ts +42 -0
  34. package/dist/media-client.js +174 -0
  35. package/dist/media-transport.d.ts +176 -0
  36. package/dist/media-transport.js +16 -0
  37. package/dist/media.d.ts +2 -0
  38. package/dist/media.js +1 -0
  39. package/dist/model-detection.d.ts +22 -0
  40. package/dist/model-detection.js +28 -0
  41. package/dist/paths.d.ts +2 -0
  42. package/dist/paths.js +11 -0
  43. package/dist/provider-def.d.ts +220 -0
  44. package/dist/provider-def.js +9 -0
  45. package/dist/provider-registry.d.ts +51 -0
  46. package/dist/provider-registry.js +130 -0
  47. package/dist/provider-tool-api.d.ts +44 -0
  48. package/dist/provider-tool-api.js +9 -0
  49. package/dist/provider-variant-resolver.d.ts +35 -0
  50. package/dist/provider-variant-resolver.js +174 -0
  51. package/dist/retry.d.ts +37 -0
  52. package/dist/retry.js +71 -0
  53. package/dist/transport.d.ts +281 -0
  54. package/dist/transport.js +27 -0
  55. package/dist/transports/anthropic-messages.d.ts +65 -0
  56. package/dist/transports/anthropic-messages.js +1004 -0
  57. package/dist/transports/gemini-cache-api.d.ts +86 -0
  58. package/dist/transports/gemini-cache-api.js +141 -0
  59. package/dist/transports/gemini-file-api.d.ts +90 -0
  60. package/dist/transports/gemini-file-api.js +164 -0
  61. package/dist/transports/gemini-generatecontent.d.ts +56 -0
  62. package/dist/transports/gemini-generatecontent.js +688 -0
  63. package/dist/transports/gemini-lyria-realtime.d.ts +117 -0
  64. package/dist/transports/gemini-lyria-realtime.js +295 -0
  65. package/dist/transports/gemini-media.d.ts +53 -0
  66. package/dist/transports/gemini-media.js +383 -0
  67. package/dist/transports/media-resolve.d.ts +50 -0
  68. package/dist/transports/media-resolve.js +91 -0
  69. package/dist/transports/minimax-media.d.ts +56 -0
  70. package/dist/transports/minimax-media.js +433 -0
  71. package/dist/transports/openai-chat.d.ts +81 -0
  72. package/dist/transports/openai-chat.js +782 -0
  73. package/dist/transports/openai-media.d.ts +24 -0
  74. package/dist/transports/openai-media.js +118 -0
  75. package/dist/transports/openai-responses.d.ts +63 -0
  76. package/dist/transports/openai-responses.js +778 -0
  77. package/dist/transports/qwen-media.d.ts +59 -0
  78. package/dist/transports/qwen-media.js +411 -0
  79. package/dist/transports/realtime-transport.d.ts +183 -0
  80. package/dist/transports/realtime-transport.js +332 -0
  81. package/dist/transports/volcengine-grounding.d.ts +58 -0
  82. package/dist/transports/volcengine-grounding.js +69 -0
  83. package/dist/transports/volcengine-media.d.ts +94 -0
  84. package/dist/transports/volcengine-media.js +801 -0
  85. package/dist/transports/volcengine-responses.d.ts +64 -0
  86. package/dist/transports/volcengine-responses.js +797 -0
  87. package/dist/transports/zhipu-media.d.ts +82 -0
  88. package/dist/transports/zhipu-media.js +522 -0
  89. package/dist/transports/zhipu-tool-api.d.ts +35 -0
  90. package/dist/transports/zhipu-tool-api.js +126 -0
  91. package/dist/wire-types.d.ts +51 -0
  92. package/dist/wire-types.js +1 -0
  93. package/package.json +33 -0
@@ -0,0 +1,117 @@
1
+ /**
2
+ * GeminiLyriaRealtimeSession 鈥?WebSocket-based real-time streaming music generation.
3
+ *
4
+ * Uses Lyria RealTime (`lyria-realtime-exp`) via the Gemini Live API WebSocket.
5
+ * Provides both a full interactive session API and a one-shot convenience method.
6
+ *
7
+ * Protocol:
8
+ * - WebSocket URL: wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent
9
+ * - Auth: API key as query parameter
10
+ * - Client 鈫?Server: setup, musicInput (weightedPrompts, musicGenerationConfig, playbackControl)
11
+ * - Server 鈫?Client: serverContent.audioChunks (base64 PCM s16le, 48kHz, stereo)
12
+ *
13
+ * Docs: https://ai.google.dev/gemini-api/docs/realtime-music-generation
14
+ */
15
+ export interface WeightedPrompt {
16
+ text: string;
17
+ weight: number;
18
+ }
19
+ export type MusicScale = "C_MAJOR_A_MINOR" | "D_FLAT_MAJOR_B_FLAT_MINOR" | "D_MAJOR_B_MINOR" | "E_FLAT_MAJOR_C_MINOR" | "E_MAJOR_D_FLAT_MINOR" | "F_MAJOR_D_MINOR" | "G_FLAT_MAJOR_E_FLAT_MINOR" | "G_MAJOR_E_MINOR" | "A_FLAT_MAJOR_F_MINOR" | "A_MAJOR_G_FLAT_MINOR" | "B_FLAT_MAJOR_G_MINOR" | "B_MAJOR_A_FLAT_MINOR" | "SCALE_UNSPECIFIED";
20
+ export type MusicGenerationMode = "QUALITY" | "DIVERSITY" | "VOCALIZATION";
21
+ export interface MusicGenerationConfig {
22
+ bpm?: number;
23
+ density?: number;
24
+ brightness?: number;
25
+ guidance?: number;
26
+ scale?: MusicScale;
27
+ temperature?: number;
28
+ topK?: number;
29
+ seed?: number;
30
+ muteBass?: boolean;
31
+ muteDrums?: boolean;
32
+ onlyBassAndDrums?: boolean;
33
+ musicGenerationMode?: MusicGenerationMode;
34
+ audioFormat?: string;
35
+ sampleRateHz?: number;
36
+ }
37
+ export interface LyriaRealtimeConfig {
38
+ /** Base URL (REST), e.g. "https://generativelanguage.googleapis.com/v1beta" */
39
+ baseUrl: string;
40
+ }
41
+ export interface LyriaRealtimeSessionOptions {
42
+ model?: string;
43
+ prompts: WeightedPrompt[];
44
+ config?: MusicGenerationConfig;
45
+ }
46
+ /** Audio chunk received from the server. */
47
+ export interface AudioChunk {
48
+ /** Raw PCM s16le data (48kHz, stereo) */
49
+ data: Buffer;
50
+ }
51
+ /**
52
+ * Interactive Lyria RealTime session over WebSocket.
53
+ *
54
+ * Usage:
55
+ * const session = new GeminiLyriaRealtimeSession({ baseUrl: "..." });
56
+ * await session.connect(apiKey, { prompts: [{ text: "jazz", weight: 1 }] });
57
+ * session.onAudioChunk = (chunk) => { ... };
58
+ * await session.play();
59
+ * // ... later
60
+ * await session.stop();
61
+ * session.close();
62
+ */
63
+ export declare class GeminiLyriaRealtimeSession {
64
+ private ws;
65
+ private wsUrl;
66
+ private model;
67
+ /** Called for each audio chunk received from the server. */
68
+ onAudioChunk: ((chunk: AudioChunk) => void) | null;
69
+ /** Called when the server reports a filtered prompt. */
70
+ onFilteredPrompt: ((reason: string) => void) | null;
71
+ /** Called on WebSocket error. */
72
+ onError: ((error: Error) => void) | null;
73
+ /** Called when the WebSocket connection closes. */
74
+ onClose: (() => void) | null;
75
+ constructor(config: LyriaRealtimeConfig);
76
+ /**
77
+ * Connect to the Lyria RealTime WebSocket and send setup + initial config.
78
+ */
79
+ connect(apiKey: string, options: LyriaRealtimeSessionOptions): Promise<void>;
80
+ /** Set or update weighted prompts (smooth transition). */
81
+ setWeightedPrompts(prompts: WeightedPrompt[]): Promise<void>;
82
+ /** Set or update music generation config. */
83
+ setMusicGenerationConfig(config: MusicGenerationConfig): Promise<void>;
84
+ /** Start streaming music. */
85
+ play(): Promise<void>;
86
+ /** Pause music streaming (can resume with play). */
87
+ pause(): Promise<void>;
88
+ /** Stop music streaming (terminates the current piece). */
89
+ stop(): Promise<void>;
90
+ /** Reset model context (for BPM/scale changes). */
91
+ resetContext(): Promise<void>;
92
+ /** Close the WebSocket connection. */
93
+ close(): void;
94
+ /** Whether the session is connected. */
95
+ get connected(): boolean;
96
+ private send;
97
+ private handleMessage;
98
+ }
99
+ /**
100
+ * Generate a fixed-duration music clip using Lyria RealTime.
101
+ *
102
+ * Connects, plays for the specified duration (default 30s), collects all
103
+ * audio chunks, assembles into a WAV file, and returns the file path.
104
+ *
105
+ * Output: 48kHz, stereo, 16-bit PCM wrapped in WAV.
106
+ */
107
+ export declare function generateRealtimeMusic(apiKey: string, config: LyriaRealtimeConfig, options: {
108
+ prompts: WeightedPrompt[];
109
+ durationSeconds?: number;
110
+ musicConfig?: MusicGenerationConfig;
111
+ model?: string;
112
+ signal?: AbortSignal;
113
+ onProgress?: (percent: number, status: string) => void;
114
+ }): Promise<{
115
+ filePath: string;
116
+ durationMs: number;
117
+ }>;
@@ -0,0 +1,295 @@
1
+ /**
2
+ * GeminiLyriaRealtimeSession 鈥?WebSocket-based real-time streaming music generation.
3
+ *
4
+ * Uses Lyria RealTime (`lyria-realtime-exp`) via the Gemini Live API WebSocket.
5
+ * Provides both a full interactive session API and a one-shot convenience method.
6
+ *
7
+ * Protocol:
8
+ * - WebSocket URL: wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent
9
+ * - Auth: API key as query parameter
10
+ * - Client 鈫?Server: setup, musicInput (weightedPrompts, musicGenerationConfig, playbackControl)
11
+ * - Server 鈫?Client: serverContent.audioChunks (base64 PCM s16le, 48kHz, stereo)
12
+ *
13
+ * Docs: https://ai.google.dev/gemini-api/docs/realtime-music-generation
14
+ */
15
+ import { writeFileSync, mkdirSync } from "node:fs";
16
+ import { join } from "node:path";
17
+ import { randomUUID } from "node:crypto";
18
+ import { getUserCacheDir } from "../paths.js";
19
+ // 鈹€鈹€ Session 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
20
+ /**
21
+ * Interactive Lyria RealTime session over WebSocket.
22
+ *
23
+ * Usage:
24
+ * const session = new GeminiLyriaRealtimeSession({ baseUrl: "..." });
25
+ * await session.connect(apiKey, { prompts: [{ text: "jazz", weight: 1 }] });
26
+ * session.onAudioChunk = (chunk) => { ... };
27
+ * await session.play();
28
+ * // ... later
29
+ * await session.stop();
30
+ * session.close();
31
+ */
32
+ export class GeminiLyriaRealtimeSession {
33
+ ws = null;
34
+ wsUrl;
35
+ model = "models/lyria-realtime-exp";
36
+ /** Called for each audio chunk received from the server. */
37
+ onAudioChunk = null;
38
+ /** Called when the server reports a filtered prompt. */
39
+ onFilteredPrompt = null;
40
+ /** Called on WebSocket error. */
41
+ onError = null;
42
+ /** Called when the WebSocket connection closes. */
43
+ onClose = null;
44
+ constructor(config) {
45
+ // Derive WebSocket URL from REST base URL
46
+ // "https://generativelanguage.googleapis.com/v1beta"
47
+ // 鈫?"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent"
48
+ const restBase = config.baseUrl.replace(/\/+$/, "");
49
+ const origin = new URL(restBase).origin.replace(/^http/, "ws");
50
+ this.wsUrl = `${origin}/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
51
+ }
52
+ /**
53
+ * Connect to the Lyria RealTime WebSocket and send setup + initial config.
54
+ */
55
+ async connect(apiKey, options) {
56
+ if (options.model) {
57
+ this.model = options.model.startsWith("models/")
58
+ ? options.model
59
+ : `models/${options.model}`;
60
+ }
61
+ const url = `${this.wsUrl}?key=${encodeURIComponent(apiKey)}`;
62
+ const ws = new WebSocket(url);
63
+ this.ws = ws;
64
+ await new Promise((resolve, reject) => {
65
+ const onOpen = () => {
66
+ ws.removeEventListener("error", onInitError);
67
+ resolve();
68
+ };
69
+ const onInitError = () => {
70
+ ws.removeEventListener("open", onOpen);
71
+ reject(new Error("Lyria RealTime WebSocket connection failed"));
72
+ };
73
+ ws.addEventListener("open", onOpen, { once: true });
74
+ ws.addEventListener("error", onInitError, { once: true });
75
+ });
76
+ // Wire message handling
77
+ ws.addEventListener("message", (ev) => this.handleMessage(ev));
78
+ ws.addEventListener("error", () => {
79
+ this.onError?.(new Error("Lyria RealTime WebSocket error"));
80
+ });
81
+ ws.addEventListener("close", () => {
82
+ this.ws = null;
83
+ this.onClose?.();
84
+ });
85
+ // Send setup message
86
+ this.send({
87
+ setup: { model: this.model },
88
+ });
89
+ // Wait a tick for setup acknowledgement
90
+ await new Promise(r => setTimeout(r, 100));
91
+ // Send initial prompts
92
+ await this.setWeightedPrompts(options.prompts);
93
+ // Send initial config
94
+ if (options.config) {
95
+ await this.setMusicGenerationConfig(options.config);
96
+ }
97
+ }
98
+ /** Set or update weighted prompts (smooth transition). */
99
+ async setWeightedPrompts(prompts) {
100
+ this.send({
101
+ musicInput: {
102
+ weightedPrompts: prompts.map(p => ({
103
+ text: p.text,
104
+ weight: p.weight,
105
+ })),
106
+ },
107
+ });
108
+ }
109
+ /** Set or update music generation config. */
110
+ async setMusicGenerationConfig(config) {
111
+ const mc = {};
112
+ if (config.bpm !== undefined)
113
+ mc.bpm = config.bpm;
114
+ if (config.density !== undefined)
115
+ mc.density = config.density;
116
+ if (config.brightness !== undefined)
117
+ mc.brightness = config.brightness;
118
+ if (config.guidance !== undefined)
119
+ mc.guidance = config.guidance;
120
+ if (config.scale !== undefined)
121
+ mc.scale = config.scale;
122
+ if (config.temperature !== undefined)
123
+ mc.temperature = config.temperature;
124
+ if (config.topK !== undefined)
125
+ mc.top_k = config.topK;
126
+ if (config.seed !== undefined)
127
+ mc.seed = config.seed;
128
+ if (config.muteBass !== undefined)
129
+ mc.mute_bass = config.muteBass;
130
+ if (config.muteDrums !== undefined)
131
+ mc.mute_drums = config.muteDrums;
132
+ if (config.onlyBassAndDrums !== undefined)
133
+ mc.only_bass_and_drums = config.onlyBassAndDrums;
134
+ if (config.musicGenerationMode !== undefined)
135
+ mc.music_generation_mode = config.musicGenerationMode;
136
+ if (config.audioFormat !== undefined)
137
+ mc.audio_format = config.audioFormat;
138
+ if (config.sampleRateHz !== undefined)
139
+ mc.sample_rate_hz = config.sampleRateHz;
140
+ this.send({
141
+ musicInput: { musicGenerationConfig: mc },
142
+ });
143
+ }
144
+ /** Start streaming music. */
145
+ async play() {
146
+ this.send({ musicInput: { playbackControl: "PLAY" } });
147
+ }
148
+ /** Pause music streaming (can resume with play). */
149
+ async pause() {
150
+ this.send({ musicInput: { playbackControl: "PAUSE" } });
151
+ }
152
+ /** Stop music streaming (terminates the current piece). */
153
+ async stop() {
154
+ this.send({ musicInput: { playbackControl: "STOP" } });
155
+ }
156
+ /** Reset model context (for BPM/scale changes). */
157
+ async resetContext() {
158
+ this.send({ musicInput: { playbackControl: "RESET_CONTEXT" } });
159
+ }
160
+ /** Close the WebSocket connection. */
161
+ close() {
162
+ if (this.ws && this.ws.readyState !== WebSocket.CLOSED) {
163
+ this.ws.close();
164
+ }
165
+ this.ws = null;
166
+ }
167
+ /** Whether the session is connected. */
168
+ get connected() {
169
+ return this.ws !== null && this.ws.readyState === WebSocket.OPEN;
170
+ }
171
+ // 鈹€鈹€ Private 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
172
+ send(msg) {
173
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
174
+ throw new Error("Lyria RealTime session not connected");
175
+ }
176
+ this.ws.send(JSON.stringify(msg));
177
+ }
178
+ handleMessage(ev) {
179
+ let data;
180
+ try {
181
+ data = JSON.parse(typeof ev.data === "string" ? ev.data : ev.data.toString());
182
+ }
183
+ catch {
184
+ return;
185
+ }
186
+ const serverContent = data.serverContent;
187
+ if (!serverContent)
188
+ return;
189
+ // Audio chunks
190
+ const audioChunks = serverContent.audioChunks;
191
+ if (audioChunks && this.onAudioChunk) {
192
+ for (const chunk of audioChunks) {
193
+ if (chunk.data) {
194
+ this.onAudioChunk({ data: Buffer.from(chunk.data, "base64") });
195
+ }
196
+ }
197
+ }
198
+ // Filtered prompt (safety)
199
+ const filtered = serverContent.filteredPrompt;
200
+ if (filtered) {
201
+ this.onFilteredPrompt?.(filtered);
202
+ }
203
+ }
204
+ }
205
+ // 鈹€鈹€ One-shot convenience: generate a fixed-duration music clip 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
206
+ /**
207
+ * Generate a fixed-duration music clip using Lyria RealTime.
208
+ *
209
+ * Connects, plays for the specified duration (default 30s), collects all
210
+ * audio chunks, assembles into a WAV file, and returns the file path.
211
+ *
212
+ * Output: 48kHz, stereo, 16-bit PCM wrapped in WAV.
213
+ */
214
+ export async function generateRealtimeMusic(apiKey, config, options) {
215
+ const durationSec = options.durationSeconds ?? 30;
216
+ const start = Date.now();
217
+ const session = new GeminiLyriaRealtimeSession(config);
218
+ // Collect PCM chunks
219
+ const pcmChunks = [];
220
+ session.onAudioChunk = (chunk) => {
221
+ pcmChunks.push(chunk.data);
222
+ };
223
+ let sessionError = null;
224
+ session.onError = (err) => {
225
+ sessionError = err;
226
+ };
227
+ try {
228
+ await session.connect(apiKey, {
229
+ model: options.model ?? "lyria-realtime-exp",
230
+ prompts: options.prompts,
231
+ config: options.musicConfig,
232
+ });
233
+ options.onProgress?.(5, "connected");
234
+ // Start playback
235
+ await session.play();
236
+ options.onProgress?.(10, "streaming");
237
+ // Collect audio for the specified duration
238
+ const deadline = Date.now() + durationSec * 1000;
239
+ const pollInterval = 500;
240
+ while (Date.now() < deadline) {
241
+ options.signal?.throwIfAborted();
242
+ if (sessionError)
243
+ throw sessionError;
244
+ if (!session.connected)
245
+ break;
246
+ const elapsed = Date.now() - start;
247
+ const totalExpected = durationSec * 1000;
248
+ const pct = Math.min(95, Math.round(10 + (elapsed / totalExpected) * 85));
249
+ options.onProgress?.(pct, "streaming");
250
+ await new Promise(r => setTimeout(r, pollInterval));
251
+ }
252
+ // Stop playback
253
+ if (session.connected) {
254
+ await session.stop();
255
+ }
256
+ options.onProgress?.(98, "assembling");
257
+ // Assemble PCM 鈫?WAV
258
+ const pcm = Buffer.concat(pcmChunks);
259
+ const wav = wrapPcmAsWav(pcm, 48000, 2, 16);
260
+ const cacheDir = join(getUserCacheDir(), "gemini-music-realtime");
261
+ mkdirSync(cacheDir, { recursive: true });
262
+ const filename = `lyria-rt-${randomUUID()}.wav`;
263
+ const filePath = join(cacheDir, filename);
264
+ writeFileSync(filePath, wav);
265
+ options.onProgress?.(100, "completed");
266
+ return {
267
+ filePath: `file://${filePath}`,
268
+ durationMs: Date.now() - start,
269
+ };
270
+ }
271
+ finally {
272
+ session.close();
273
+ }
274
+ }
275
+ // 鈹€鈹€ WAV helper 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
276
+ function wrapPcmAsWav(pcm, sampleRate, channels, bitsPerSample) {
277
+ const byteRate = sampleRate * channels * (bitsPerSample / 8);
278
+ const blockAlign = channels * (bitsPerSample / 8);
279
+ const dataSize = pcm.length;
280
+ const header = Buffer.alloc(44);
281
+ header.write("RIFF", 0);
282
+ header.writeUInt32LE(36 + dataSize, 4);
283
+ header.write("WAVE", 8);
284
+ header.write("fmt ", 12);
285
+ header.writeUInt32LE(16, 16);
286
+ header.writeUInt16LE(1, 20);
287
+ header.writeUInt16LE(channels, 22);
288
+ header.writeUInt32LE(sampleRate, 24);
289
+ header.writeUInt32LE(byteRate, 28);
290
+ header.writeUInt16LE(blockAlign, 30);
291
+ header.writeUInt16LE(bitsPerSample, 32);
292
+ header.write("data", 36);
293
+ header.writeUInt32LE(dataSize, 40);
294
+ return Buffer.concat([header, pcm]);
295
+ }
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Gemini Media Transport 鈥?unified media generation for all Gemini media APIs.
3
+ *
4
+ * Supported media types and endpoints:
5
+ * image 鈥?POST /models/{model}:generateContent (responseModalities: ["TEXT","IMAGE"])
6
+ * video 鈥?POST /models/{model}:predictLongRunning 鈫?poll operations 鈫?download URI
7
+ * music 鈥?POST /models/{model}:generateContent (Lyria 3 鈥?inlineData audio)
8
+ * music_realtime 鈥?WebSocket session (Lyria RealTime 鈥?streaming PCM 鈫?WAV)
9
+ * tts 鈥?POST /models/{model}:generateContent (speechConfig 鈥?inlineData PCM)
10
+ * embedding 鈥?POST /models/{model}:embedContent (float vector)
11
+ *
12
+ * Auth: x-goog-api-key header for all endpoints.
13
+ */
14
+ import type { AsyncMediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
15
+ export interface GeminiMediaConfig {
16
+ /** Base URL, e.g. "https://generativelanguage.googleapis.com/v1beta" */
17
+ baseUrl: string;
18
+ timeoutMs?: number;
19
+ }
20
+ export declare class GeminiMediaTransport implements AsyncMediaTransport {
21
+ readonly supportedTypes: readonly MediaType[];
22
+ private apiBase;
23
+ private timeoutMs;
24
+ constructor(config: GeminiMediaConfig);
25
+ generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
26
+ deleteVideoTask(_taskId: string, _apiKey: string, _signal?: AbortSignal): Promise<void>;
27
+ listVideoTasks(_apiKey: string, _options?: {
28
+ after?: string;
29
+ limit?: number;
30
+ status?: string;
31
+ }, _signal?: AbortSignal): Promise<Record<string, unknown>>;
32
+ getTaskStatus(taskId: string, apiKey: string, signal?: AbortSignal): Promise<{
33
+ status: string;
34
+ task: Record<string, unknown>;
35
+ }>;
36
+ private generateImage;
37
+ private generateVideo;
38
+ private generateMusic;
39
+ private generateMusicRealtime;
40
+ private generateTTS;
41
+ private generateEmbedding;
42
+ private postJson;
43
+ private pollOperation;
44
+ /**
45
+ * Resolve an image URL to inline data for the Veo API.
46
+ * Supports file:// paths and https:// URLs.
47
+ */
48
+ private resolveImageData;
49
+ /** Extract base64 image data from generateContent response 鈫?persist to cache files. */
50
+ private extractInlineImages;
51
+ /** Extract base64 audio data from generateContent response 鈫?persist to cache files. */
52
+ private extractInlineAudio;
53
+ }