@firebase/ai 2.4.0-canary.c8263c471 → 2.5.0-20251028194003
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai-public.d.ts +110 -9
- package/dist/ai.d.ts +110 -9
- package/dist/esm/index.esm.js +149 -46
- package/dist/esm/index.esm.js.map +1 -1
- package/dist/esm/src/methods/chrome-adapter.d.ts +1 -1
- package/dist/esm/src/methods/live-session.d.ts +64 -9
- package/dist/esm/src/types/live-responses.d.ts +21 -3
- package/dist/esm/src/types/requests.d.ts +23 -0
- package/dist/esm/src/types/responses.d.ts +21 -0
- package/dist/index.cjs.js +149 -46
- package/dist/index.cjs.js.map +1 -1
- package/dist/index.node.cjs.js +129 -39
- package/dist/index.node.cjs.js.map +1 -1
- package/dist/index.node.mjs +129 -39
- package/dist/index.node.mjs.map +1 -1
- package/dist/src/methods/chrome-adapter.d.ts +1 -1
- package/dist/src/methods/live-session.d.ts +64 -9
- package/dist/src/types/live-responses.d.ts +21 -3
- package/dist/src/types/requests.d.ts +23 -0
- package/dist/src/types/responses.d.ts +21 -0
- package/package.json +8 -8
|
@@ -25,11 +25,11 @@ import { LanguageModel } from '../types/language-model';
|
|
|
25
25
|
export declare class ChromeAdapterImpl implements ChromeAdapter {
|
|
26
26
|
languageModelProvider: LanguageModel;
|
|
27
27
|
mode: InferenceMode;
|
|
28
|
-
onDeviceParams: OnDeviceParams;
|
|
29
28
|
static SUPPORTED_MIME_TYPES: string[];
|
|
30
29
|
private isDownloading;
|
|
31
30
|
private downloadPromise;
|
|
32
31
|
private oldSession;
|
|
32
|
+
onDeviceParams: OnDeviceParams;
|
|
33
33
|
constructor(languageModelProvider: LanguageModel, mode: InferenceMode, onDeviceParams?: OnDeviceParams);
|
|
34
34
|
/**
|
|
35
35
|
* Checks if a given request can be made on-device.
|
|
@@ -53,32 +53,65 @@ export declare class LiveSession {
|
|
|
53
53
|
*/
|
|
54
54
|
send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
|
|
55
55
|
/**
|
|
56
|
-
* Sends
|
|
56
|
+
* Sends text to the server in realtime.
|
|
57
57
|
*
|
|
58
|
-
* @
|
|
58
|
+
* @example
|
|
59
|
+
* ```javascript
|
|
60
|
+
* liveSession.sendTextRealtime("Hello, how are you?");
|
|
61
|
+
* ```
|
|
62
|
+
*
|
|
63
|
+
* @param text - The text data to send.
|
|
59
64
|
* @throws If this session has been closed.
|
|
60
65
|
*
|
|
61
66
|
* @beta
|
|
62
67
|
*/
|
|
63
|
-
|
|
68
|
+
sendTextRealtime(text: string): Promise<void>;
|
|
64
69
|
/**
|
|
65
|
-
* Sends
|
|
70
|
+
* Sends audio data to the server in realtime.
|
|
66
71
|
*
|
|
67
|
-
* @
|
|
72
|
+
* @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
|
|
73
|
+
* little-endian.
|
|
74
|
+
*
|
|
75
|
+
* @example
|
|
76
|
+
* ```javascript
|
|
77
|
+
* // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
|
|
78
|
+
* const blob = { mimeType: "audio/pcm", data: pcmData };
|
|
79
|
+
* liveSession.sendAudioRealtime(blob);
|
|
80
|
+
* ```
|
|
81
|
+
*
|
|
82
|
+
* @param blob - The base64-encoded PCM data to send to the server in realtime.
|
|
68
83
|
* @throws If this session has been closed.
|
|
69
84
|
*
|
|
70
85
|
* @beta
|
|
71
86
|
*/
|
|
72
|
-
|
|
87
|
+
sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
|
|
73
88
|
/**
|
|
74
|
-
* Sends
|
|
89
|
+
* Sends video data to the server in realtime.
|
|
75
90
|
*
|
|
76
|
-
* @
|
|
91
|
+
* @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
|
|
92
|
+
* is recommended to set `mimeType` to `image/jpeg`.
|
|
93
|
+
*
|
|
94
|
+
* @example
|
|
95
|
+
* ```javascript
|
|
96
|
+
* // const videoFrame = ... base64-encoded JPEG data
|
|
97
|
+
* const blob = { mimeType: "image/jpeg", data: videoFrame };
|
|
98
|
+
* liveSession.sendVideoRealtime(blob);
|
|
99
|
+
* ```
|
|
100
|
+
* @param blob - The base64-encoded video data to send to the server in realtime.
|
|
77
101
|
* @throws If this session has been closed.
|
|
78
102
|
*
|
|
79
103
|
* @beta
|
|
80
104
|
*/
|
|
81
|
-
|
|
105
|
+
sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
|
|
106
|
+
/**
|
|
107
|
+
* Sends function responses to the server.
|
|
108
|
+
*
|
|
109
|
+
* @param functionResponses - The function responses to send.
|
|
110
|
+
* @throws If this session has been closed.
|
|
111
|
+
*
|
|
112
|
+
* @beta
|
|
113
|
+
*/
|
|
114
|
+
sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
|
|
82
115
|
/**
|
|
83
116
|
* Yields messages received from the server.
|
|
84
117
|
* This can only be used by one consumer at a time.
|
|
@@ -96,4 +129,26 @@ export declare class LiveSession {
|
|
|
96
129
|
* @beta
|
|
97
130
|
*/
|
|
98
131
|
close(): Promise<void>;
|
|
132
|
+
/**
|
|
133
|
+
* Sends realtime input to the server.
|
|
134
|
+
*
|
|
135
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
136
|
+
*
|
|
137
|
+
* @param mediaChunks - The media chunks to send.
|
|
138
|
+
* @throws If this session has been closed.
|
|
139
|
+
*
|
|
140
|
+
* @beta
|
|
141
|
+
*/
|
|
142
|
+
sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
|
|
143
|
+
/**
|
|
144
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
145
|
+
*
|
|
146
|
+
* Sends a stream of {@link GenerativeContentBlob}.
|
|
147
|
+
*
|
|
148
|
+
* @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
|
|
149
|
+
* @throws If this session has been closed.
|
|
150
|
+
*
|
|
151
|
+
* @beta
|
|
152
|
+
*/
|
|
153
|
+
sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
|
|
99
154
|
}
|
|
@@ -15,7 +15,8 @@
|
|
|
15
15
|
* limitations under the License.
|
|
16
16
|
*/
|
|
17
17
|
import { Content, FunctionResponse, GenerativeContentBlob, Part } from './content';
|
|
18
|
-
import { LiveGenerationConfig, Tool, ToolConfig } from './requests';
|
|
18
|
+
import { AudioTranscriptionConfig, LiveGenerationConfig, Tool, ToolConfig } from './requests';
|
|
19
|
+
import { Transcription } from './responses';
|
|
19
20
|
/**
|
|
20
21
|
* User input that is sent to the model.
|
|
21
22
|
*
|
|
@@ -25,6 +26,8 @@ export interface _LiveClientContent {
|
|
|
25
26
|
clientContent: {
|
|
26
27
|
turns: [Content];
|
|
27
28
|
turnComplete: boolean;
|
|
29
|
+
inputTranscription?: Transcription;
|
|
30
|
+
outputTranscription?: Transcription;
|
|
28
31
|
};
|
|
29
32
|
}
|
|
30
33
|
/**
|
|
@@ -34,7 +37,13 @@ export interface _LiveClientContent {
|
|
|
34
37
|
*/
|
|
35
38
|
export interface _LiveClientRealtimeInput {
|
|
36
39
|
realtimeInput: {
|
|
37
|
-
|
|
40
|
+
text?: string;
|
|
41
|
+
audio?: GenerativeContentBlob;
|
|
42
|
+
video?: GenerativeContentBlob;
|
|
43
|
+
/**
|
|
44
|
+
* @deprecated Use `text`, `audio`, and `video` instead.
|
|
45
|
+
*/
|
|
46
|
+
mediaChunks?: GenerativeContentBlob[];
|
|
38
47
|
};
|
|
39
48
|
}
|
|
40
49
|
/**
|
|
@@ -53,9 +62,18 @@ export interface _LiveClientToolResponse {
|
|
|
53
62
|
export interface _LiveClientSetup {
|
|
54
63
|
setup: {
|
|
55
64
|
model: string;
|
|
56
|
-
generationConfig?:
|
|
65
|
+
generationConfig?: _LiveGenerationConfig;
|
|
57
66
|
tools?: Tool[];
|
|
58
67
|
toolConfig?: ToolConfig;
|
|
59
68
|
systemInstruction?: string | Part | Content;
|
|
69
|
+
inputAudioTranscription?: AudioTranscriptionConfig;
|
|
70
|
+
outputAudioTranscription?: AudioTranscriptionConfig;
|
|
60
71
|
};
|
|
61
72
|
}
|
|
73
|
+
/**
|
|
74
|
+
* The Live Generation Config.
|
|
75
|
+
*
|
|
76
|
+
* The public API ({@link LiveGenerationConfig}) has `inputAudioTranscription` and `outputAudioTranscription`,
|
|
77
|
+
* but the server expects these fields to be in the top-level `setup` message. This was a conscious API decision.
|
|
78
|
+
*/
|
|
79
|
+
export type _LiveGenerationConfig = Omit<LiveGenerationConfig, 'inputAudioTranscription' | 'outputAudioTranscription'>;
|
|
@@ -167,6 +167,24 @@ export interface LiveGenerationConfig {
|
|
|
167
167
|
* The modalities of the response.
|
|
168
168
|
*/
|
|
169
169
|
responseModalities?: ResponseModality[];
|
|
170
|
+
/**
|
|
171
|
+
* Enables transcription of audio input.
|
|
172
|
+
*
|
|
173
|
+
* When enabled, the model will respond with transcriptions of your audio input in the `inputTranscriptions` property
|
|
174
|
+
* in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
|
|
175
|
+
* messages, so you may only receive small amounts of text per message. For example, if you ask the model
|
|
176
|
+
* "How are you today?", the model may transcribe that input across three messages, broken up as "How a", "re yo", "u today?".
|
|
177
|
+
*/
|
|
178
|
+
inputAudioTranscription?: AudioTranscriptionConfig;
|
|
179
|
+
/**
|
|
180
|
+
* Enables transcription of audio input.
|
|
181
|
+
*
|
|
182
|
+
* When enabled, the model will respond with transcriptions of its audio output in the `outputTranscription` property
|
|
183
|
+
* in {@link LiveServerContent} messages. Note that the transcriptions are broken up across
|
|
184
|
+
* messages, so you may only receive small amounts of text per message. For example, if the model says
|
|
185
|
+
* "How are you today?", the model may transcribe that output across three messages, broken up as "How a", "re yo", "u today?".
|
|
186
|
+
*/
|
|
187
|
+
outputAudioTranscription?: AudioTranscriptionConfig;
|
|
170
188
|
}
|
|
171
189
|
/**
|
|
172
190
|
* Params for {@link GenerativeModel.startChat}.
|
|
@@ -439,3 +457,8 @@ export interface SpeechConfig {
|
|
|
439
457
|
*/
|
|
440
458
|
voiceConfig?: VoiceConfig;
|
|
441
459
|
}
|
|
460
|
+
/**
|
|
461
|
+
* The audio transcription configuration.
|
|
462
|
+
*/
|
|
463
|
+
export interface AudioTranscriptionConfig {
|
|
464
|
+
}
|
|
@@ -516,6 +516,27 @@ export interface LiveServerContent {
|
|
|
516
516
|
* model was not interrupted.
|
|
517
517
|
*/
|
|
518
518
|
interrupted?: boolean;
|
|
519
|
+
/**
|
|
520
|
+
* Transcription of the audio that was input to the model.
|
|
521
|
+
*/
|
|
522
|
+
inputTranscription?: Transcription;
|
|
523
|
+
/**
|
|
524
|
+
* Transcription of the audio output from the model.
|
|
525
|
+
*/
|
|
526
|
+
outputTranscription?: Transcription;
|
|
527
|
+
}
|
|
528
|
+
/**
|
|
529
|
+
* Transcription of audio. This can be returned from a {@link LiveGenerativeModel} if transcription
|
|
530
|
+
* is enabled with the `inputAudioTranscription` or `outputAudioTranscription` properties on
|
|
531
|
+
* the {@link LiveGenerationConfig}.
|
|
532
|
+
*
|
|
533
|
+
* @beta
|
|
534
|
+
*/
|
|
535
|
+
export interface Transcription {
|
|
536
|
+
/**
|
|
537
|
+
* The text transcription of the audio.
|
|
538
|
+
*/
|
|
539
|
+
text?: string;
|
|
519
540
|
}
|
|
520
541
|
/**
|
|
521
542
|
* A request from the model for the client to execute one or more functions.
|
package/dist/index.cjs.js
CHANGED
|
@@ -8,7 +8,7 @@ var util = require('@firebase/util');
|
|
|
8
8
|
var logger$1 = require('@firebase/logger');
|
|
9
9
|
|
|
10
10
|
var name = "@firebase/ai";
|
|
11
|
-
var version = "2.
|
|
11
|
+
var version = "2.5.0-20251028194003";
|
|
12
12
|
|
|
13
13
|
/**
|
|
14
14
|
* @license
|
|
@@ -901,22 +901,35 @@ var Availability;
|
|
|
901
901
|
* See the License for the specific language governing permissions and
|
|
902
902
|
* limitations under the License.
|
|
903
903
|
*/
|
|
904
|
+
// Defaults to support image inputs for convenience.
|
|
905
|
+
const defaultExpectedInputs = [{ type: 'image' }];
|
|
904
906
|
/**
|
|
905
907
|
* Defines an inference "backend" that uses Chrome's on-device model,
|
|
906
908
|
* and encapsulates logic for detecting when on-device inference is
|
|
907
909
|
* possible.
|
|
908
910
|
*/
|
|
909
911
|
class ChromeAdapterImpl {
|
|
910
|
-
constructor(languageModelProvider, mode, onDeviceParams
|
|
911
|
-
createOptions: {
|
|
912
|
-
// Defaults to support image inputs for convenience.
|
|
913
|
-
expectedInputs: [{ type: 'image' }]
|
|
914
|
-
}
|
|
915
|
-
}) {
|
|
912
|
+
constructor(languageModelProvider, mode, onDeviceParams) {
|
|
916
913
|
this.languageModelProvider = languageModelProvider;
|
|
917
914
|
this.mode = mode;
|
|
918
|
-
this.onDeviceParams = onDeviceParams;
|
|
919
915
|
this.isDownloading = false;
|
|
916
|
+
this.onDeviceParams = {
|
|
917
|
+
createOptions: {
|
|
918
|
+
expectedInputs: defaultExpectedInputs
|
|
919
|
+
}
|
|
920
|
+
};
|
|
921
|
+
if (onDeviceParams) {
|
|
922
|
+
this.onDeviceParams = onDeviceParams;
|
|
923
|
+
if (!this.onDeviceParams.createOptions) {
|
|
924
|
+
this.onDeviceParams.createOptions = {
|
|
925
|
+
expectedInputs: defaultExpectedInputs
|
|
926
|
+
};
|
|
927
|
+
}
|
|
928
|
+
else if (!this.onDeviceParams.createOptions.expectedInputs) {
|
|
929
|
+
this.onDeviceParams.createOptions.expectedInputs =
|
|
930
|
+
defaultExpectedInputs;
|
|
931
|
+
}
|
|
932
|
+
}
|
|
920
933
|
}
|
|
921
934
|
/**
|
|
922
935
|
* Checks if a given request can be made on-device.
|
|
@@ -2861,75 +2874,104 @@ class LiveSession {
|
|
|
2861
2874
|
this.webSocketHandler.send(JSON.stringify(message));
|
|
2862
2875
|
}
|
|
2863
2876
|
/**
|
|
2864
|
-
* Sends
|
|
2877
|
+
* Sends text to the server in realtime.
|
|
2865
2878
|
*
|
|
2866
|
-
* @
|
|
2879
|
+
* @example
|
|
2880
|
+
* ```javascript
|
|
2881
|
+
* liveSession.sendTextRealtime("Hello, how are you?");
|
|
2882
|
+
* ```
|
|
2883
|
+
*
|
|
2884
|
+
* @param text - The text data to send.
|
|
2867
2885
|
* @throws If this session has been closed.
|
|
2868
2886
|
*
|
|
2869
2887
|
* @beta
|
|
2870
2888
|
*/
|
|
2871
|
-
async
|
|
2889
|
+
async sendTextRealtime(text) {
|
|
2872
2890
|
if (this.isClosed) {
|
|
2873
2891
|
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
2874
2892
|
}
|
|
2875
|
-
|
|
2876
|
-
|
|
2877
|
-
|
|
2878
|
-
|
|
2879
|
-
|
|
2880
|
-
|
|
2881
|
-
this.webSocketHandler.send(JSON.stringify(message));
|
|
2882
|
-
});
|
|
2893
|
+
const message = {
|
|
2894
|
+
realtimeInput: {
|
|
2895
|
+
text
|
|
2896
|
+
}
|
|
2897
|
+
};
|
|
2898
|
+
this.webSocketHandler.send(JSON.stringify(message));
|
|
2883
2899
|
}
|
|
2884
2900
|
/**
|
|
2885
|
-
* Sends
|
|
2901
|
+
* Sends audio data to the server in realtime.
|
|
2886
2902
|
*
|
|
2887
|
-
* @
|
|
2903
|
+
* @remarks The server requires that the audio data is base64-encoded 16-bit PCM at 16kHz
|
|
2904
|
+
* little-endian.
|
|
2905
|
+
*
|
|
2906
|
+
* @example
|
|
2907
|
+
* ```javascript
|
|
2908
|
+
* // const pcmData = ... base64-encoded 16-bit PCM at 16kHz little-endian.
|
|
2909
|
+
* const blob = { mimeType: "audio/pcm", data: pcmData };
|
|
2910
|
+
* liveSession.sendAudioRealtime(blob);
|
|
2911
|
+
* ```
|
|
2912
|
+
*
|
|
2913
|
+
* @param blob - The base64-encoded PCM data to send to the server in realtime.
|
|
2888
2914
|
* @throws If this session has been closed.
|
|
2889
2915
|
*
|
|
2890
2916
|
* @beta
|
|
2891
2917
|
*/
|
|
2892
|
-
async
|
|
2918
|
+
async sendAudioRealtime(blob) {
|
|
2893
2919
|
if (this.isClosed) {
|
|
2894
2920
|
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
2895
2921
|
}
|
|
2896
2922
|
const message = {
|
|
2897
|
-
|
|
2898
|
-
|
|
2923
|
+
realtimeInput: {
|
|
2924
|
+
audio: blob
|
|
2899
2925
|
}
|
|
2900
2926
|
};
|
|
2901
2927
|
this.webSocketHandler.send(JSON.stringify(message));
|
|
2902
2928
|
}
|
|
2903
2929
|
/**
|
|
2904
|
-
* Sends
|
|
2930
|
+
* Sends video data to the server in realtime.
|
|
2905
2931
|
*
|
|
2906
|
-
* @
|
|
2932
|
+
* @remarks The server requires that the video is sent as individual video frames at 1 FPS. It
|
|
2933
|
+
* is recommended to set `mimeType` to `image/jpeg`.
|
|
2934
|
+
*
|
|
2935
|
+
* @example
|
|
2936
|
+
* ```javascript
|
|
2937
|
+
* // const videoFrame = ... base64-encoded JPEG data
|
|
2938
|
+
* const blob = { mimeType: "image/jpeg", data: videoFrame };
|
|
2939
|
+
* liveSession.sendVideoRealtime(blob);
|
|
2940
|
+
* ```
|
|
2941
|
+
* @param blob - The base64-encoded video data to send to the server in realtime.
|
|
2907
2942
|
* @throws If this session has been closed.
|
|
2908
2943
|
*
|
|
2909
2944
|
* @beta
|
|
2910
2945
|
*/
|
|
2911
|
-
async
|
|
2946
|
+
async sendVideoRealtime(blob) {
|
|
2912
2947
|
if (this.isClosed) {
|
|
2913
2948
|
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
2914
2949
|
}
|
|
2915
|
-
const
|
|
2916
|
-
|
|
2917
|
-
|
|
2918
|
-
const { done, value } = await reader.read();
|
|
2919
|
-
if (done) {
|
|
2920
|
-
break;
|
|
2921
|
-
}
|
|
2922
|
-
else if (!value) {
|
|
2923
|
-
throw new Error('Missing chunk in reader, but reader is not done.');
|
|
2924
|
-
}
|
|
2925
|
-
await this.sendMediaChunks([value]);
|
|
2926
|
-
}
|
|
2927
|
-
catch (e) {
|
|
2928
|
-
// Re-throw any errors that occur during stream consumption or sending.
|
|
2929
|
-
const message = e instanceof Error ? e.message : 'Error processing media stream.';
|
|
2930
|
-
throw new AIError(AIErrorCode.REQUEST_ERROR, message);
|
|
2950
|
+
const message = {
|
|
2951
|
+
realtimeInput: {
|
|
2952
|
+
video: blob
|
|
2931
2953
|
}
|
|
2954
|
+
};
|
|
2955
|
+
this.webSocketHandler.send(JSON.stringify(message));
|
|
2956
|
+
}
|
|
2957
|
+
/**
|
|
2958
|
+
* Sends function responses to the server.
|
|
2959
|
+
*
|
|
2960
|
+
* @param functionResponses - The function responses to send.
|
|
2961
|
+
* @throws If this session has been closed.
|
|
2962
|
+
*
|
|
2963
|
+
* @beta
|
|
2964
|
+
*/
|
|
2965
|
+
async sendFunctionResponses(functionResponses) {
|
|
2966
|
+
if (this.isClosed) {
|
|
2967
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
2932
2968
|
}
|
|
2969
|
+
const message = {
|
|
2970
|
+
toolResponse: {
|
|
2971
|
+
functionResponses
|
|
2972
|
+
}
|
|
2973
|
+
};
|
|
2974
|
+
this.webSocketHandler.send(JSON.stringify(message));
|
|
2933
2975
|
}
|
|
2934
2976
|
/**
|
|
2935
2977
|
* Yields messages received from the server.
|
|
@@ -2987,6 +3029,62 @@ class LiveSession {
|
|
|
2987
3029
|
await this.webSocketHandler.close(1000, 'Client closed session.');
|
|
2988
3030
|
}
|
|
2989
3031
|
}
|
|
3032
|
+
/**
|
|
3033
|
+
* Sends realtime input to the server.
|
|
3034
|
+
*
|
|
3035
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
3036
|
+
*
|
|
3037
|
+
* @param mediaChunks - The media chunks to send.
|
|
3038
|
+
* @throws If this session has been closed.
|
|
3039
|
+
*
|
|
3040
|
+
* @beta
|
|
3041
|
+
*/
|
|
3042
|
+
async sendMediaChunks(mediaChunks) {
|
|
3043
|
+
if (this.isClosed) {
|
|
3044
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
3045
|
+
}
|
|
3046
|
+
// The backend does not support sending more than one mediaChunk in one message.
|
|
3047
|
+
// Work around this limitation by sending mediaChunks in separate messages.
|
|
3048
|
+
mediaChunks.forEach(mediaChunk => {
|
|
3049
|
+
const message = {
|
|
3050
|
+
realtimeInput: { mediaChunks: [mediaChunk] }
|
|
3051
|
+
};
|
|
3052
|
+
this.webSocketHandler.send(JSON.stringify(message));
|
|
3053
|
+
});
|
|
3054
|
+
}
|
|
3055
|
+
/**
|
|
3056
|
+
* @deprecated Use `sendTextRealtime()`, `sendAudioRealtime()`, and `sendVideoRealtime()` instead.
|
|
3057
|
+
*
|
|
3058
|
+
* Sends a stream of {@link GenerativeContentBlob}.
|
|
3059
|
+
*
|
|
3060
|
+
* @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
|
|
3061
|
+
* @throws If this session has been closed.
|
|
3062
|
+
*
|
|
3063
|
+
* @beta
|
|
3064
|
+
*/
|
|
3065
|
+
async sendMediaStream(mediaChunkStream) {
|
|
3066
|
+
if (this.isClosed) {
|
|
3067
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
|
|
3068
|
+
}
|
|
3069
|
+
const reader = mediaChunkStream.getReader();
|
|
3070
|
+
while (true) {
|
|
3071
|
+
try {
|
|
3072
|
+
const { done, value } = await reader.read();
|
|
3073
|
+
if (done) {
|
|
3074
|
+
break;
|
|
3075
|
+
}
|
|
3076
|
+
else if (!value) {
|
|
3077
|
+
throw new Error('Missing chunk in reader, but reader is not done.');
|
|
3078
|
+
}
|
|
3079
|
+
await this.sendMediaChunks([value]);
|
|
3080
|
+
}
|
|
3081
|
+
catch (e) {
|
|
3082
|
+
// Re-throw any errors that occur during stream consumption or sending.
|
|
3083
|
+
const message = e instanceof Error ? e.message : 'Error processing media stream.';
|
|
3084
|
+
throw new AIError(AIErrorCode.REQUEST_ERROR, message);
|
|
3085
|
+
}
|
|
3086
|
+
}
|
|
3087
|
+
}
|
|
2990
3088
|
}
|
|
2991
3089
|
|
|
2992
3090
|
/**
|
|
@@ -3047,13 +3145,18 @@ class LiveGenerativeModel extends AIModel {
|
|
|
3047
3145
|
else {
|
|
3048
3146
|
fullModelPath = `projects/${this._apiSettings.project}/locations/${this._apiSettings.location}/${this.model}`;
|
|
3049
3147
|
}
|
|
3148
|
+
// inputAudioTranscription and outputAudioTranscription are on the generation config in the public API,
|
|
3149
|
+
// but the backend expects them to be in the `setup` message.
|
|
3150
|
+
const { inputAudioTranscription, outputAudioTranscription, ...generationConfig } = this.generationConfig;
|
|
3050
3151
|
const setupMessage = {
|
|
3051
3152
|
setup: {
|
|
3052
3153
|
model: fullModelPath,
|
|
3053
|
-
generationConfig
|
|
3154
|
+
generationConfig,
|
|
3054
3155
|
tools: this.tools,
|
|
3055
3156
|
toolConfig: this.toolConfig,
|
|
3056
|
-
systemInstruction: this.systemInstruction
|
|
3157
|
+
systemInstruction: this.systemInstruction,
|
|
3158
|
+
inputAudioTranscription,
|
|
3159
|
+
outputAudioTranscription
|
|
3057
3160
|
}
|
|
3058
3161
|
};
|
|
3059
3162
|
try {
|
|
@@ -3759,7 +3862,7 @@ class AudioConversationRunner {
|
|
|
3759
3862
|
mimeType: 'audio/pcm',
|
|
3760
3863
|
data: base64
|
|
3761
3864
|
};
|
|
3762
|
-
void this.liveSession.
|
|
3865
|
+
void this.liveSession.sendAudioRealtime(chunk);
|
|
3763
3866
|
};
|
|
3764
3867
|
}
|
|
3765
3868
|
/**
|