@xiaozhiclaw/provider-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/aliyun-oss-file-upload-adapter.d.ts +44 -0
- package/dist/adapters/aliyun-oss-file-upload-adapter.js +96 -0
- package/dist/adapters/gemini-file-upload-adapter.d.ts +26 -0
- package/dist/adapters/gemini-file-upload-adapter.js +92 -0
- package/dist/adapters/hub-oss-file-upload-adapter.d.ts +29 -0
- package/dist/adapters/hub-oss-file-upload-adapter.js +53 -0
- package/dist/adapters/index.d.ts +10 -0
- package/dist/adapters/index.js +10 -0
- package/dist/adapters/openai-file-upload-adapter.d.ts +38 -0
- package/dist/adapters/openai-file-upload-adapter.js +56 -0
- package/dist/adapters/volcengine-file-upload-adapter.d.ts +24 -0
- package/dist/adapters/volcengine-file-upload-adapter.js +45 -0
- package/dist/builtin-providers.d.ts +8 -0
- package/dist/builtin-providers.js +2237 -0
- package/dist/constants.d.ts +1 -0
- package/dist/constants.js +1 -0
- package/dist/credentials.d.ts +1 -0
- package/dist/credentials.js +8 -0
- package/dist/debug-transport.d.ts +12 -0
- package/dist/debug-transport.js +99 -0
- package/dist/errors.d.ts +11 -0
- package/dist/errors.js +12 -0
- package/dist/events.d.ts +48 -0
- package/dist/events.js +1 -0
- package/dist/file-upload-service.d.ts +68 -0
- package/dist/file-upload-service.js +110 -0
- package/dist/gemini-schema-utils.d.ts +17 -0
- package/dist/gemini-schema-utils.js +76 -0
- package/dist/index.d.ts +37 -0
- package/dist/index.js +33 -0
- package/dist/llm-client.d.ts +43 -0
- package/dist/llm-client.js +217 -0
- package/dist/media-client.d.ts +42 -0
- package/dist/media-client.js +174 -0
- package/dist/media-transport.d.ts +176 -0
- package/dist/media-transport.js +16 -0
- package/dist/media.d.ts +2 -0
- package/dist/media.js +1 -0
- package/dist/model-detection.d.ts +22 -0
- package/dist/model-detection.js +28 -0
- package/dist/paths.d.ts +2 -0
- package/dist/paths.js +11 -0
- package/dist/provider-def.d.ts +220 -0
- package/dist/provider-def.js +9 -0
- package/dist/provider-registry.d.ts +51 -0
- package/dist/provider-registry.js +130 -0
- package/dist/provider-tool-api.d.ts +44 -0
- package/dist/provider-tool-api.js +9 -0
- package/dist/provider-variant-resolver.d.ts +35 -0
- package/dist/provider-variant-resolver.js +174 -0
- package/dist/retry.d.ts +37 -0
- package/dist/retry.js +71 -0
- package/dist/transport.d.ts +281 -0
- package/dist/transport.js +27 -0
- package/dist/transports/anthropic-messages.d.ts +65 -0
- package/dist/transports/anthropic-messages.js +1004 -0
- package/dist/transports/gemini-cache-api.d.ts +86 -0
- package/dist/transports/gemini-cache-api.js +141 -0
- package/dist/transports/gemini-file-api.d.ts +90 -0
- package/dist/transports/gemini-file-api.js +164 -0
- package/dist/transports/gemini-generatecontent.d.ts +56 -0
- package/dist/transports/gemini-generatecontent.js +688 -0
- package/dist/transports/gemini-lyria-realtime.d.ts +117 -0
- package/dist/transports/gemini-lyria-realtime.js +295 -0
- package/dist/transports/gemini-media.d.ts +53 -0
- package/dist/transports/gemini-media.js +383 -0
- package/dist/transports/media-resolve.d.ts +50 -0
- package/dist/transports/media-resolve.js +91 -0
- package/dist/transports/minimax-media.d.ts +56 -0
- package/dist/transports/minimax-media.js +433 -0
- package/dist/transports/openai-chat.d.ts +81 -0
- package/dist/transports/openai-chat.js +782 -0
- package/dist/transports/openai-media.d.ts +24 -0
- package/dist/transports/openai-media.js +118 -0
- package/dist/transports/openai-responses.d.ts +63 -0
- package/dist/transports/openai-responses.js +778 -0
- package/dist/transports/qwen-media.d.ts +59 -0
- package/dist/transports/qwen-media.js +411 -0
- package/dist/transports/realtime-transport.d.ts +183 -0
- package/dist/transports/realtime-transport.js +332 -0
- package/dist/transports/volcengine-grounding.d.ts +58 -0
- package/dist/transports/volcengine-grounding.js +69 -0
- package/dist/transports/volcengine-media.d.ts +94 -0
- package/dist/transports/volcengine-media.js +801 -0
- package/dist/transports/volcengine-responses.d.ts +64 -0
- package/dist/transports/volcengine-responses.js +797 -0
- package/dist/transports/zhipu-media.d.ts +82 -0
- package/dist/transports/zhipu-media.js +522 -0
- package/dist/transports/zhipu-tool-api.d.ts +35 -0
- package/dist/transports/zhipu-tool-api.js +126 -0
- package/dist/wire-types.d.ts +51 -0
- package/dist/wire-types.js +1 -0
- package/package.json +33 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GeminiLyriaRealtimeSession 鈥?WebSocket-based real-time streaming music generation.
|
|
3
|
+
*
|
|
4
|
+
* Uses Lyria RealTime (`lyria-realtime-exp`) via the Gemini Live API WebSocket.
|
|
5
|
+
* Provides both a full interactive session API and a one-shot convenience method.
|
|
6
|
+
*
|
|
7
|
+
* Protocol:
|
|
8
|
+
* - WebSocket URL: wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent
|
|
9
|
+
* - Auth: API key as query parameter
|
|
10
|
+
* - Client 鈫?Server: setup, musicInput (weightedPrompts, musicGenerationConfig, playbackControl)
|
|
11
|
+
* - Server 鈫?Client: serverContent.audioChunks (base64 PCM s16le, 48kHz, stereo)
|
|
12
|
+
*
|
|
13
|
+
* Docs: https://ai.google.dev/gemini-api/docs/realtime-music-generation
|
|
14
|
+
*/
|
|
15
|
+
export interface WeightedPrompt {
|
|
16
|
+
text: string;
|
|
17
|
+
weight: number;
|
|
18
|
+
}
|
|
19
|
+
export type MusicScale = "C_MAJOR_A_MINOR" | "D_FLAT_MAJOR_B_FLAT_MINOR" | "D_MAJOR_B_MINOR" | "E_FLAT_MAJOR_C_MINOR" | "E_MAJOR_D_FLAT_MINOR" | "F_MAJOR_D_MINOR" | "G_FLAT_MAJOR_E_FLAT_MINOR" | "G_MAJOR_E_MINOR" | "A_FLAT_MAJOR_F_MINOR" | "A_MAJOR_G_FLAT_MINOR" | "B_FLAT_MAJOR_G_MINOR" | "B_MAJOR_A_FLAT_MINOR" | "SCALE_UNSPECIFIED";
|
|
20
|
+
export type MusicGenerationMode = "QUALITY" | "DIVERSITY" | "VOCALIZATION";
|
|
21
|
+
export interface MusicGenerationConfig {
|
|
22
|
+
bpm?: number;
|
|
23
|
+
density?: number;
|
|
24
|
+
brightness?: number;
|
|
25
|
+
guidance?: number;
|
|
26
|
+
scale?: MusicScale;
|
|
27
|
+
temperature?: number;
|
|
28
|
+
topK?: number;
|
|
29
|
+
seed?: number;
|
|
30
|
+
muteBass?: boolean;
|
|
31
|
+
muteDrums?: boolean;
|
|
32
|
+
onlyBassAndDrums?: boolean;
|
|
33
|
+
musicGenerationMode?: MusicGenerationMode;
|
|
34
|
+
audioFormat?: string;
|
|
35
|
+
sampleRateHz?: number;
|
|
36
|
+
}
|
|
37
|
+
export interface LyriaRealtimeConfig {
|
|
38
|
+
/** Base URL (REST), e.g. "https://generativelanguage.googleapis.com/v1beta" */
|
|
39
|
+
baseUrl: string;
|
|
40
|
+
}
|
|
41
|
+
export interface LyriaRealtimeSessionOptions {
|
|
42
|
+
model?: string;
|
|
43
|
+
prompts: WeightedPrompt[];
|
|
44
|
+
config?: MusicGenerationConfig;
|
|
45
|
+
}
|
|
46
|
+
/** Audio chunk received from the server. */
|
|
47
|
+
export interface AudioChunk {
|
|
48
|
+
/** Raw PCM s16le data (48kHz, stereo) */
|
|
49
|
+
data: Buffer;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Interactive Lyria RealTime session over WebSocket.
|
|
53
|
+
*
|
|
54
|
+
* Usage:
|
|
55
|
+
* const session = new GeminiLyriaRealtimeSession({ baseUrl: "..." });
|
|
56
|
+
* await session.connect(apiKey, { prompts: [{ text: "jazz", weight: 1 }] });
|
|
57
|
+
* session.onAudioChunk = (chunk) => { ... };
|
|
58
|
+
* await session.play();
|
|
59
|
+
* // ... later
|
|
60
|
+
* await session.stop();
|
|
61
|
+
* session.close();
|
|
62
|
+
*/
|
|
63
|
+
export declare class GeminiLyriaRealtimeSession {
|
|
64
|
+
private ws;
|
|
65
|
+
private wsUrl;
|
|
66
|
+
private model;
|
|
67
|
+
/** Called for each audio chunk received from the server. */
|
|
68
|
+
onAudioChunk: ((chunk: AudioChunk) => void) | null;
|
|
69
|
+
/** Called when the server reports a filtered prompt. */
|
|
70
|
+
onFilteredPrompt: ((reason: string) => void) | null;
|
|
71
|
+
/** Called on WebSocket error. */
|
|
72
|
+
onError: ((error: Error) => void) | null;
|
|
73
|
+
/** Called when the WebSocket connection closes. */
|
|
74
|
+
onClose: (() => void) | null;
|
|
75
|
+
constructor(config: LyriaRealtimeConfig);
|
|
76
|
+
/**
|
|
77
|
+
* Connect to the Lyria RealTime WebSocket and send setup + initial config.
|
|
78
|
+
*/
|
|
79
|
+
connect(apiKey: string, options: LyriaRealtimeSessionOptions): Promise<void>;
|
|
80
|
+
/** Set or update weighted prompts (smooth transition). */
|
|
81
|
+
setWeightedPrompts(prompts: WeightedPrompt[]): Promise<void>;
|
|
82
|
+
/** Set or update music generation config. */
|
|
83
|
+
setMusicGenerationConfig(config: MusicGenerationConfig): Promise<void>;
|
|
84
|
+
/** Start streaming music. */
|
|
85
|
+
play(): Promise<void>;
|
|
86
|
+
/** Pause music streaming (can resume with play). */
|
|
87
|
+
pause(): Promise<void>;
|
|
88
|
+
/** Stop music streaming (terminates the current piece). */
|
|
89
|
+
stop(): Promise<void>;
|
|
90
|
+
/** Reset model context (for BPM/scale changes). */
|
|
91
|
+
resetContext(): Promise<void>;
|
|
92
|
+
/** Close the WebSocket connection. */
|
|
93
|
+
close(): void;
|
|
94
|
+
/** Whether the session is connected. */
|
|
95
|
+
get connected(): boolean;
|
|
96
|
+
private send;
|
|
97
|
+
private handleMessage;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Generate a fixed-duration music clip using Lyria RealTime.
|
|
101
|
+
*
|
|
102
|
+
* Connects, plays for the specified duration (default 30s), collects all
|
|
103
|
+
* audio chunks, assembles into a WAV file, and returns the file path.
|
|
104
|
+
*
|
|
105
|
+
* Output: 48kHz, stereo, 16-bit PCM wrapped in WAV.
|
|
106
|
+
*/
|
|
107
|
+
export declare function generateRealtimeMusic(apiKey: string, config: LyriaRealtimeConfig, options: {
|
|
108
|
+
prompts: WeightedPrompt[];
|
|
109
|
+
durationSeconds?: number;
|
|
110
|
+
musicConfig?: MusicGenerationConfig;
|
|
111
|
+
model?: string;
|
|
112
|
+
signal?: AbortSignal;
|
|
113
|
+
onProgress?: (percent: number, status: string) => void;
|
|
114
|
+
}): Promise<{
|
|
115
|
+
filePath: string;
|
|
116
|
+
durationMs: number;
|
|
117
|
+
}>;
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GeminiLyriaRealtimeSession 鈥?WebSocket-based real-time streaming music generation.
|
|
3
|
+
*
|
|
4
|
+
* Uses Lyria RealTime (`lyria-realtime-exp`) via the Gemini Live API WebSocket.
|
|
5
|
+
* Provides both a full interactive session API and a one-shot convenience method.
|
|
6
|
+
*
|
|
7
|
+
* Protocol:
|
|
8
|
+
* - WebSocket URL: wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent
|
|
9
|
+
* - Auth: API key as query parameter
|
|
10
|
+
* - Client 鈫?Server: setup, musicInput (weightedPrompts, musicGenerationConfig, playbackControl)
|
|
11
|
+
* - Server 鈫?Client: serverContent.audioChunks (base64 PCM s16le, 48kHz, stereo)
|
|
12
|
+
*
|
|
13
|
+
* Docs: https://ai.google.dev/gemini-api/docs/realtime-music-generation
|
|
14
|
+
*/
|
|
15
|
+
import { writeFileSync, mkdirSync } from "node:fs";
|
|
16
|
+
import { join } from "node:path";
|
|
17
|
+
import { randomUUID } from "node:crypto";
|
|
18
|
+
import { getUserCacheDir } from "../paths.js";
|
|
19
|
+
// 鈹€鈹€ Session 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
20
|
+
/**
|
|
21
|
+
* Interactive Lyria RealTime session over WebSocket.
|
|
22
|
+
*
|
|
23
|
+
* Usage:
|
|
24
|
+
* const session = new GeminiLyriaRealtimeSession({ baseUrl: "..." });
|
|
25
|
+
* await session.connect(apiKey, { prompts: [{ text: "jazz", weight: 1 }] });
|
|
26
|
+
* session.onAudioChunk = (chunk) => { ... };
|
|
27
|
+
* await session.play();
|
|
28
|
+
* // ... later
|
|
29
|
+
* await session.stop();
|
|
30
|
+
* session.close();
|
|
31
|
+
*/
|
|
32
|
+
export class GeminiLyriaRealtimeSession {
|
|
33
|
+
ws = null;
|
|
34
|
+
wsUrl;
|
|
35
|
+
model = "models/lyria-realtime-exp";
|
|
36
|
+
/** Called for each audio chunk received from the server. */
|
|
37
|
+
onAudioChunk = null;
|
|
38
|
+
/** Called when the server reports a filtered prompt. */
|
|
39
|
+
onFilteredPrompt = null;
|
|
40
|
+
/** Called on WebSocket error. */
|
|
41
|
+
onError = null;
|
|
42
|
+
/** Called when the WebSocket connection closes. */
|
|
43
|
+
onClose = null;
|
|
44
|
+
constructor(config) {
|
|
45
|
+
// Derive WebSocket URL from REST base URL
|
|
46
|
+
// "https://generativelanguage.googleapis.com/v1beta"
|
|
47
|
+
// 鈫?"wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent"
|
|
48
|
+
const restBase = config.baseUrl.replace(/\/+$/, "");
|
|
49
|
+
const origin = new URL(restBase).origin.replace(/^http/, "ws");
|
|
50
|
+
this.wsUrl = `${origin}/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Connect to the Lyria RealTime WebSocket and send setup + initial config.
|
|
54
|
+
*/
|
|
55
|
+
async connect(apiKey, options) {
|
|
56
|
+
if (options.model) {
|
|
57
|
+
this.model = options.model.startsWith("models/")
|
|
58
|
+
? options.model
|
|
59
|
+
: `models/${options.model}`;
|
|
60
|
+
}
|
|
61
|
+
const url = `${this.wsUrl}?key=${encodeURIComponent(apiKey)}`;
|
|
62
|
+
const ws = new WebSocket(url);
|
|
63
|
+
this.ws = ws;
|
|
64
|
+
await new Promise((resolve, reject) => {
|
|
65
|
+
const onOpen = () => {
|
|
66
|
+
ws.removeEventListener("error", onInitError);
|
|
67
|
+
resolve();
|
|
68
|
+
};
|
|
69
|
+
const onInitError = () => {
|
|
70
|
+
ws.removeEventListener("open", onOpen);
|
|
71
|
+
reject(new Error("Lyria RealTime WebSocket connection failed"));
|
|
72
|
+
};
|
|
73
|
+
ws.addEventListener("open", onOpen, { once: true });
|
|
74
|
+
ws.addEventListener("error", onInitError, { once: true });
|
|
75
|
+
});
|
|
76
|
+
// Wire message handling
|
|
77
|
+
ws.addEventListener("message", (ev) => this.handleMessage(ev));
|
|
78
|
+
ws.addEventListener("error", () => {
|
|
79
|
+
this.onError?.(new Error("Lyria RealTime WebSocket error"));
|
|
80
|
+
});
|
|
81
|
+
ws.addEventListener("close", () => {
|
|
82
|
+
this.ws = null;
|
|
83
|
+
this.onClose?.();
|
|
84
|
+
});
|
|
85
|
+
// Send setup message
|
|
86
|
+
this.send({
|
|
87
|
+
setup: { model: this.model },
|
|
88
|
+
});
|
|
89
|
+
// Wait a tick for setup acknowledgement
|
|
90
|
+
await new Promise(r => setTimeout(r, 100));
|
|
91
|
+
// Send initial prompts
|
|
92
|
+
await this.setWeightedPrompts(options.prompts);
|
|
93
|
+
// Send initial config
|
|
94
|
+
if (options.config) {
|
|
95
|
+
await this.setMusicGenerationConfig(options.config);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
/** Set or update weighted prompts (smooth transition). */
|
|
99
|
+
async setWeightedPrompts(prompts) {
|
|
100
|
+
this.send({
|
|
101
|
+
musicInput: {
|
|
102
|
+
weightedPrompts: prompts.map(p => ({
|
|
103
|
+
text: p.text,
|
|
104
|
+
weight: p.weight,
|
|
105
|
+
})),
|
|
106
|
+
},
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
/** Set or update music generation config. */
|
|
110
|
+
async setMusicGenerationConfig(config) {
|
|
111
|
+
const mc = {};
|
|
112
|
+
if (config.bpm !== undefined)
|
|
113
|
+
mc.bpm = config.bpm;
|
|
114
|
+
if (config.density !== undefined)
|
|
115
|
+
mc.density = config.density;
|
|
116
|
+
if (config.brightness !== undefined)
|
|
117
|
+
mc.brightness = config.brightness;
|
|
118
|
+
if (config.guidance !== undefined)
|
|
119
|
+
mc.guidance = config.guidance;
|
|
120
|
+
if (config.scale !== undefined)
|
|
121
|
+
mc.scale = config.scale;
|
|
122
|
+
if (config.temperature !== undefined)
|
|
123
|
+
mc.temperature = config.temperature;
|
|
124
|
+
if (config.topK !== undefined)
|
|
125
|
+
mc.top_k = config.topK;
|
|
126
|
+
if (config.seed !== undefined)
|
|
127
|
+
mc.seed = config.seed;
|
|
128
|
+
if (config.muteBass !== undefined)
|
|
129
|
+
mc.mute_bass = config.muteBass;
|
|
130
|
+
if (config.muteDrums !== undefined)
|
|
131
|
+
mc.mute_drums = config.muteDrums;
|
|
132
|
+
if (config.onlyBassAndDrums !== undefined)
|
|
133
|
+
mc.only_bass_and_drums = config.onlyBassAndDrums;
|
|
134
|
+
if (config.musicGenerationMode !== undefined)
|
|
135
|
+
mc.music_generation_mode = config.musicGenerationMode;
|
|
136
|
+
if (config.audioFormat !== undefined)
|
|
137
|
+
mc.audio_format = config.audioFormat;
|
|
138
|
+
if (config.sampleRateHz !== undefined)
|
|
139
|
+
mc.sample_rate_hz = config.sampleRateHz;
|
|
140
|
+
this.send({
|
|
141
|
+
musicInput: { musicGenerationConfig: mc },
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
/** Start streaming music. */
|
|
145
|
+
async play() {
|
|
146
|
+
this.send({ musicInput: { playbackControl: "PLAY" } });
|
|
147
|
+
}
|
|
148
|
+
/** Pause music streaming (can resume with play). */
|
|
149
|
+
async pause() {
|
|
150
|
+
this.send({ musicInput: { playbackControl: "PAUSE" } });
|
|
151
|
+
}
|
|
152
|
+
/** Stop music streaming (terminates the current piece). */
|
|
153
|
+
async stop() {
|
|
154
|
+
this.send({ musicInput: { playbackControl: "STOP" } });
|
|
155
|
+
}
|
|
156
|
+
/** Reset model context (for BPM/scale changes). */
|
|
157
|
+
async resetContext() {
|
|
158
|
+
this.send({ musicInput: { playbackControl: "RESET_CONTEXT" } });
|
|
159
|
+
}
|
|
160
|
+
/** Close the WebSocket connection. */
|
|
161
|
+
close() {
|
|
162
|
+
if (this.ws && this.ws.readyState !== WebSocket.CLOSED) {
|
|
163
|
+
this.ws.close();
|
|
164
|
+
}
|
|
165
|
+
this.ws = null;
|
|
166
|
+
}
|
|
167
|
+
/** Whether the session is connected. */
|
|
168
|
+
get connected() {
|
|
169
|
+
return this.ws !== null && this.ws.readyState === WebSocket.OPEN;
|
|
170
|
+
}
|
|
171
|
+
// 鈹€鈹€ Private 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
172
|
+
send(msg) {
|
|
173
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
174
|
+
throw new Error("Lyria RealTime session not connected");
|
|
175
|
+
}
|
|
176
|
+
this.ws.send(JSON.stringify(msg));
|
|
177
|
+
}
|
|
178
|
+
handleMessage(ev) {
|
|
179
|
+
let data;
|
|
180
|
+
try {
|
|
181
|
+
data = JSON.parse(typeof ev.data === "string" ? ev.data : ev.data.toString());
|
|
182
|
+
}
|
|
183
|
+
catch {
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
const serverContent = data.serverContent;
|
|
187
|
+
if (!serverContent)
|
|
188
|
+
return;
|
|
189
|
+
// Audio chunks
|
|
190
|
+
const audioChunks = serverContent.audioChunks;
|
|
191
|
+
if (audioChunks && this.onAudioChunk) {
|
|
192
|
+
for (const chunk of audioChunks) {
|
|
193
|
+
if (chunk.data) {
|
|
194
|
+
this.onAudioChunk({ data: Buffer.from(chunk.data, "base64") });
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
// Filtered prompt (safety)
|
|
199
|
+
const filtered = serverContent.filteredPrompt;
|
|
200
|
+
if (filtered) {
|
|
201
|
+
this.onFilteredPrompt?.(filtered);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
// 鈹€鈹€ One-shot convenience: generate a fixed-duration music clip 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
206
|
+
/**
|
|
207
|
+
* Generate a fixed-duration music clip using Lyria RealTime.
|
|
208
|
+
*
|
|
209
|
+
* Connects, plays for the specified duration (default 30s), collects all
|
|
210
|
+
* audio chunks, assembles into a WAV file, and returns the file path.
|
|
211
|
+
*
|
|
212
|
+
* Output: 48kHz, stereo, 16-bit PCM wrapped in WAV.
|
|
213
|
+
*/
|
|
214
|
+
export async function generateRealtimeMusic(apiKey, config, options) {
|
|
215
|
+
const durationSec = options.durationSeconds ?? 30;
|
|
216
|
+
const start = Date.now();
|
|
217
|
+
const session = new GeminiLyriaRealtimeSession(config);
|
|
218
|
+
// Collect PCM chunks
|
|
219
|
+
const pcmChunks = [];
|
|
220
|
+
session.onAudioChunk = (chunk) => {
|
|
221
|
+
pcmChunks.push(chunk.data);
|
|
222
|
+
};
|
|
223
|
+
let sessionError = null;
|
|
224
|
+
session.onError = (err) => {
|
|
225
|
+
sessionError = err;
|
|
226
|
+
};
|
|
227
|
+
try {
|
|
228
|
+
await session.connect(apiKey, {
|
|
229
|
+
model: options.model ?? "lyria-realtime-exp",
|
|
230
|
+
prompts: options.prompts,
|
|
231
|
+
config: options.musicConfig,
|
|
232
|
+
});
|
|
233
|
+
options.onProgress?.(5, "connected");
|
|
234
|
+
// Start playback
|
|
235
|
+
await session.play();
|
|
236
|
+
options.onProgress?.(10, "streaming");
|
|
237
|
+
// Collect audio for the specified duration
|
|
238
|
+
const deadline = Date.now() + durationSec * 1000;
|
|
239
|
+
const pollInterval = 500;
|
|
240
|
+
while (Date.now() < deadline) {
|
|
241
|
+
options.signal?.throwIfAborted();
|
|
242
|
+
if (sessionError)
|
|
243
|
+
throw sessionError;
|
|
244
|
+
if (!session.connected)
|
|
245
|
+
break;
|
|
246
|
+
const elapsed = Date.now() - start;
|
|
247
|
+
const totalExpected = durationSec * 1000;
|
|
248
|
+
const pct = Math.min(95, Math.round(10 + (elapsed / totalExpected) * 85));
|
|
249
|
+
options.onProgress?.(pct, "streaming");
|
|
250
|
+
await new Promise(r => setTimeout(r, pollInterval));
|
|
251
|
+
}
|
|
252
|
+
// Stop playback
|
|
253
|
+
if (session.connected) {
|
|
254
|
+
await session.stop();
|
|
255
|
+
}
|
|
256
|
+
options.onProgress?.(98, "assembling");
|
|
257
|
+
// Assemble PCM 鈫?WAV
|
|
258
|
+
const pcm = Buffer.concat(pcmChunks);
|
|
259
|
+
const wav = wrapPcmAsWav(pcm, 48000, 2, 16);
|
|
260
|
+
const cacheDir = join(getUserCacheDir(), "gemini-music-realtime");
|
|
261
|
+
mkdirSync(cacheDir, { recursive: true });
|
|
262
|
+
const filename = `lyria-rt-${randomUUID()}.wav`;
|
|
263
|
+
const filePath = join(cacheDir, filename);
|
|
264
|
+
writeFileSync(filePath, wav);
|
|
265
|
+
options.onProgress?.(100, "completed");
|
|
266
|
+
return {
|
|
267
|
+
filePath: `file://${filePath}`,
|
|
268
|
+
durationMs: Date.now() - start,
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
finally {
|
|
272
|
+
session.close();
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
// 鈹€鈹€ WAV helper 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
|
|
276
|
+
function wrapPcmAsWav(pcm, sampleRate, channels, bitsPerSample) {
|
|
277
|
+
const byteRate = sampleRate * channels * (bitsPerSample / 8);
|
|
278
|
+
const blockAlign = channels * (bitsPerSample / 8);
|
|
279
|
+
const dataSize = pcm.length;
|
|
280
|
+
const header = Buffer.alloc(44);
|
|
281
|
+
header.write("RIFF", 0);
|
|
282
|
+
header.writeUInt32LE(36 + dataSize, 4);
|
|
283
|
+
header.write("WAVE", 8);
|
|
284
|
+
header.write("fmt ", 12);
|
|
285
|
+
header.writeUInt32LE(16, 16);
|
|
286
|
+
header.writeUInt16LE(1, 20);
|
|
287
|
+
header.writeUInt16LE(channels, 22);
|
|
288
|
+
header.writeUInt32LE(sampleRate, 24);
|
|
289
|
+
header.writeUInt32LE(byteRate, 28);
|
|
290
|
+
header.writeUInt16LE(blockAlign, 30);
|
|
291
|
+
header.writeUInt16LE(bitsPerSample, 32);
|
|
292
|
+
header.write("data", 36);
|
|
293
|
+
header.writeUInt32LE(dataSize, 40);
|
|
294
|
+
return Buffer.concat([header, pcm]);
|
|
295
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gemini Media Transport 鈥?unified media generation for all Gemini media APIs.
|
|
3
|
+
*
|
|
4
|
+
* Supported media types and endpoints:
|
|
5
|
+
* image 鈥?POST /models/{model}:generateContent (responseModalities: ["TEXT","IMAGE"])
|
|
6
|
+
* video 鈥?POST /models/{model}:predictLongRunning 鈫?poll operations 鈫?download URI
|
|
7
|
+
* music 鈥?POST /models/{model}:generateContent (Lyria 3 鈥?inlineData audio)
|
|
8
|
+
* music_realtime 鈥?WebSocket session (Lyria RealTime 鈥?streaming PCM 鈫?WAV)
|
|
9
|
+
* tts 鈥?POST /models/{model}:generateContent (speechConfig 鈥?inlineData PCM)
|
|
10
|
+
* embedding 鈥?POST /models/{model}:embedContent (float vector)
|
|
11
|
+
*
|
|
12
|
+
* Auth: x-goog-api-key header for all endpoints.
|
|
13
|
+
*/
|
|
14
|
+
import type { AsyncMediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
|
|
15
|
+
export interface GeminiMediaConfig {
|
|
16
|
+
/** Base URL, e.g. "https://generativelanguage.googleapis.com/v1beta" */
|
|
17
|
+
baseUrl: string;
|
|
18
|
+
timeoutMs?: number;
|
|
19
|
+
}
|
|
20
|
+
export declare class GeminiMediaTransport implements AsyncMediaTransport {
|
|
21
|
+
readonly supportedTypes: readonly MediaType[];
|
|
22
|
+
private apiBase;
|
|
23
|
+
private timeoutMs;
|
|
24
|
+
constructor(config: GeminiMediaConfig);
|
|
25
|
+
generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
|
|
26
|
+
deleteVideoTask(_taskId: string, _apiKey: string, _signal?: AbortSignal): Promise<void>;
|
|
27
|
+
listVideoTasks(_apiKey: string, _options?: {
|
|
28
|
+
after?: string;
|
|
29
|
+
limit?: number;
|
|
30
|
+
status?: string;
|
|
31
|
+
}, _signal?: AbortSignal): Promise<Record<string, unknown>>;
|
|
32
|
+
getTaskStatus(taskId: string, apiKey: string, signal?: AbortSignal): Promise<{
|
|
33
|
+
status: string;
|
|
34
|
+
task: Record<string, unknown>;
|
|
35
|
+
}>;
|
|
36
|
+
private generateImage;
|
|
37
|
+
private generateVideo;
|
|
38
|
+
private generateMusic;
|
|
39
|
+
private generateMusicRealtime;
|
|
40
|
+
private generateTTS;
|
|
41
|
+
private generateEmbedding;
|
|
42
|
+
private postJson;
|
|
43
|
+
private pollOperation;
|
|
44
|
+
/**
|
|
45
|
+
* Resolve an image URL to inline data for the Veo API.
|
|
46
|
+
* Supports file:// paths and https:// URLs.
|
|
47
|
+
*/
|
|
48
|
+
private resolveImageData;
|
|
49
|
+
/** Extract base64 image data from generateContent response 鈫?persist to cache files. */
|
|
50
|
+
private extractInlineImages;
|
|
51
|
+
/** Extract base64 audio data from generateContent response 鈫?persist to cache files. */
|
|
52
|
+
private extractInlineAudio;
|
|
53
|
+
}
|