@firebase/ai 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/dist/ai-public.d.ts +580 -9
  2. package/dist/ai.d.ts +699 -10
  3. package/dist/esm/index.esm.js +1255 -368
  4. package/dist/esm/index.esm.js.map +1 -1
  5. package/dist/esm/src/api.d.ts +18 -3
  6. package/dist/esm/src/constants.d.ts +1 -1
  7. package/dist/esm/src/index.d.ts +5 -1
  8. package/dist/esm/src/methods/chrome-adapter.d.ts +30 -24
  9. package/dist/esm/src/methods/live-session-helpers.d.ts +154 -0
  10. package/dist/esm/src/methods/live-session.d.ts +90 -0
  11. package/dist/esm/src/models/ai-model.d.ts +1 -1
  12. package/dist/esm/src/models/index.d.ts +1 -0
  13. package/dist/esm/src/models/live-generative-model.d.ts +55 -0
  14. package/dist/esm/src/public-types.d.ts +10 -1
  15. package/dist/esm/src/requests/request.d.ts +6 -0
  16. package/dist/esm/src/requests/response-helpers.d.ts +9 -5
  17. package/dist/esm/src/service.d.ts +7 -2
  18. package/dist/esm/src/types/chrome-adapter.d.ts +6 -4
  19. package/dist/esm/src/types/content.d.ts +42 -0
  20. package/dist/esm/src/types/enums.d.ts +5 -0
  21. package/dist/esm/src/types/error.d.ts +2 -0
  22. package/dist/esm/src/types/imagen/internal.d.ts +10 -0
  23. package/dist/esm/src/types/live-responses.d.ts +53 -0
  24. package/dist/esm/src/types/requests.d.ts +109 -1
  25. package/dist/esm/src/types/responses.d.ts +87 -4
  26. package/dist/esm/src/websocket.d.ts +67 -0
  27. package/dist/index.cjs.js +1259 -366
  28. package/dist/index.cjs.js.map +1 -1
  29. package/dist/index.node.cjs.js +907 -311
  30. package/dist/index.node.cjs.js.map +1 -1
  31. package/dist/index.node.mjs +904 -313
  32. package/dist/index.node.mjs.map +1 -1
  33. package/dist/src/api.d.ts +18 -3
  34. package/dist/src/constants.d.ts +1 -1
  35. package/dist/src/index.d.ts +5 -1
  36. package/dist/src/methods/chrome-adapter.d.ts +30 -24
  37. package/dist/src/methods/live-session-helpers.d.ts +154 -0
  38. package/dist/src/methods/live-session.d.ts +90 -0
  39. package/dist/src/models/ai-model.d.ts +1 -1
  40. package/dist/src/models/index.d.ts +1 -0
  41. package/dist/src/models/live-generative-model.d.ts +55 -0
  42. package/dist/src/public-types.d.ts +10 -1
  43. package/dist/src/requests/request.d.ts +6 -0
  44. package/dist/src/requests/response-helpers.d.ts +9 -5
  45. package/dist/src/service.d.ts +7 -2
  46. package/dist/src/types/chrome-adapter.d.ts +6 -4
  47. package/dist/src/types/content.d.ts +42 -0
  48. package/dist/src/types/enums.d.ts +5 -0
  49. package/dist/src/types/error.d.ts +2 -0
  50. package/dist/src/types/imagen/internal.d.ts +10 -0
  51. package/dist/src/types/live-responses.d.ts +53 -0
  52. package/dist/src/types/requests.d.ts +109 -1
  53. package/dist/src/types/responses.d.ts +87 -4
  54. package/dist/src/websocket.d.ts +67 -0
  55. package/package.json +4 -2
package/dist/ai.d.ts CHANGED
@@ -4,10 +4,18 @@
4
4
  * @packageDocumentation
5
5
  */
6
6
 
7
+ import { AppCheckInternalComponentName } from '@firebase/app-check-interop-types';
7
8
  import { AppCheckTokenResult } from '@firebase/app-check-interop-types';
9
+ import { ComponentContainer } from '@firebase/component';
8
10
  import { FirebaseApp } from '@firebase/app';
11
+ import { FirebaseAppCheckInternal } from '@firebase/app-check-interop-types';
12
+ import { FirebaseAuthInternal } from '@firebase/auth-interop-types';
13
+ import { FirebaseAuthInternalName } from '@firebase/auth-interop-types';
9
14
  import { FirebaseAuthTokenData } from '@firebase/auth-interop-types';
10
15
  import { FirebaseError } from '@firebase/util';
16
+ import { _FirebaseService } from '@firebase/app';
17
+ import { InstanceFactoryOptions } from '@firebase/component';
18
+ import { Provider } from '@firebase/component';
11
19
 
12
20
  /**
13
21
  * An instance of the Firebase AI SDK.
@@ -27,6 +35,10 @@ export declare interface AI {
27
35
  * Vertex AI Gemini API (using {@link VertexAIBackend}).
28
36
  */
29
37
  backend: Backend;
38
+ /**
39
+ * Options applied to this {@link AI} instance.
40
+ */
41
+ options?: AIOptions;
30
42
  /**
31
43
  * @deprecated use `AI.backend.location` instead.
32
44
  *
@@ -67,6 +79,8 @@ export declare const AIErrorCode: {
67
79
  readonly RESPONSE_ERROR: "response-error";
68
80
  /** An error occurred while performing a fetch. */
69
81
  readonly FETCH_ERROR: "fetch-error";
82
+ /** An error occurred because an operation was attempted on a closed session. */
83
+ readonly SESSION_CLOSED: "session-closed";
70
84
  /** An error associated with a Content object. */
71
85
  readonly INVALID_CONTENT: "invalid-content";
72
86
  /** An error due to the Firebase API not being enabled in the Console. */
@@ -111,7 +125,7 @@ export declare abstract class AIModel {
111
125
  /**
112
126
  * @internal
113
127
  */
114
- protected _apiSettings: ApiSettings;
128
+ _apiSettings: ApiSettings;
115
129
  /**
116
130
  * Constructs a new instance of the {@link AIModel} class.
117
131
  *
@@ -159,8 +173,27 @@ export declare abstract class AIModel {
159
173
  export declare interface AIOptions {
160
174
  /**
161
175
  * The backend configuration to use for the AI service instance.
176
+ * Defaults to the Gemini Developer API backend ({@link GoogleAIBackend}).
177
+ */
178
+ backend?: Backend;
179
+ /**
180
+ * Whether to use App Check limited use tokens. Defaults to false.
162
181
  */
182
+ useLimitedUseAppCheckTokens?: boolean;
183
+ }
184
+
185
+ declare class AIService implements AI, _FirebaseService {
186
+ app: FirebaseApp;
163
187
  backend: Backend;
188
+ chromeAdapterFactory?: ((mode: InferenceMode, window?: Window, params?: OnDeviceParams) => ChromeAdapterImpl | undefined) | undefined;
189
+ auth: FirebaseAuthInternal | null;
190
+ appCheck: FirebaseAppCheckInternal | null;
191
+ _options?: Omit<AIOptions, 'backend'>;
192
+ location: string;
193
+ constructor(app: FirebaseApp, backend: Backend, authProvider?: Provider<FirebaseAuthInternalName>, appCheckProvider?: Provider<AppCheckInternalComponentName>, chromeAdapterFactory?: ((mode: InferenceMode, window?: Window, params?: OnDeviceParams) => ChromeAdapterImpl | undefined) | undefined);
194
+ _delete(): Promise<void>;
195
+ set options(optionsToSet: AIOptions);
196
+ get options(): AIOptions | undefined;
164
197
  }
165
198
 
166
199
  /**
@@ -208,6 +241,29 @@ export declare class ArraySchema extends Schema {
208
241
  toJSON(): SchemaRequest;
209
242
  }
210
243
 
244
+ /**
245
+ * A controller for managing an active audio conversation.
246
+ *
247
+ * @beta
248
+ */
249
+ export declare interface AudioConversationController {
250
+ /**
251
+ * Stops the audio conversation, closes the microphone connection, and
252
+ * cleans up resources. Returns a promise that resolves when cleanup is complete.
253
+ */
254
+ stop: () => Promise<void>;
255
+ }
256
+
257
+ /**
258
+ * @internal
259
+ */
260
+ declare enum Availability {
261
+ 'UNAVAILABLE' = "unavailable",
262
+ 'DOWNLOADABLE' = "downloadable",
263
+ 'DOWNLOADING' = "downloading",
264
+ 'AVAILABLE' = "available"
265
+ }
266
+
211
267
  /**
212
268
  * Abstract base class representing the configuration for an AI service backend.
213
269
  * This class should not be instantiated directly. Use its subclasses; {@link GoogleAIBackend} for
@@ -360,16 +416,18 @@ export declare interface ChromeAdapter {
360
416
  /**
361
417
  * Generates content using on-device inference.
362
418
  *
363
- * <p>This is comparable to {@link GenerativeModel.generateContent} for generating
364
- * content using in-cloud inference.</p>
419
+ * @remarks
420
+ * This is comparable to {@link GenerativeModel.generateContent} for generating
421
+ * content using in-cloud inference.
365
422
  * @param request - a standard Firebase AI {@link GenerateContentRequest}
366
423
  */
367
424
  generateContent(request: GenerateContentRequest): Promise<Response>;
368
425
  /**
369
426
  * Generates a content stream using on-device inference.
370
427
  *
371
- * <p>This is comparable to {@link GenerativeModel.generateContentStream} for generating
372
- * a content stream using in-cloud inference.</p>
428
+ * @remarks
429
+ * This is comparable to {@link GenerativeModel.generateContentStream} for generating
430
+ * a content stream using in-cloud inference.
373
431
  * @param request - a standard Firebase AI {@link GenerateContentRequest}
374
432
  */
375
433
  generateContentStream(request: GenerateContentRequest): Promise<Response>;
@@ -379,6 +437,108 @@ export declare interface ChromeAdapter {
379
437
  countTokens(request: CountTokensRequest): Promise<Response>;
380
438
  }
381
439
 
440
+ /**
441
+ * Defines an inference "backend" that uses Chrome's on-device model,
442
+ * and encapsulates logic for detecting when on-device inference is
443
+ * possible.
444
+ */
445
+ declare class ChromeAdapterImpl implements ChromeAdapter {
446
+ languageModelProvider: LanguageModel;
447
+ mode: InferenceMode;
448
+ onDeviceParams: OnDeviceParams;
449
+ static SUPPORTED_MIME_TYPES: string[];
450
+ private isDownloading;
451
+ private downloadPromise;
452
+ private oldSession;
453
+ constructor(languageModelProvider: LanguageModel, mode: InferenceMode, onDeviceParams?: OnDeviceParams);
454
+ /**
455
+ * Checks if a given request can be made on-device.
456
+ *
457
+ * Encapsulates a few concerns:
458
+ * the mode
459
+ * API existence
460
+ * prompt formatting
461
+ * model availability, including triggering download if necessary
462
+ *
463
+ *
464
+ * Pros: callers needn't be concerned with details of on-device availability.</p>
465
+ * Cons: this method spans a few concerns and splits request validation from usage.
466
+ * If instance variables weren't already part of the API, we could consider a better
467
+ * separation of concerns.
468
+ */
469
+ isAvailable(request: GenerateContentRequest): Promise<boolean>;
470
+ /**
471
+ * Generates content on device.
472
+ *
473
+ * @remarks
474
+ * This is comparable to {@link GenerativeModel.generateContent} for generating content in
475
+ * Cloud.
476
+ * @param request - a standard Firebase AI {@link GenerateContentRequest}
477
+ * @returns {@link Response}, so we can reuse common response formatting.
478
+ */
479
+ generateContent(request: GenerateContentRequest): Promise<Response>;
480
+ /**
481
+ * Generates content stream on device.
482
+ *
483
+ * @remarks
484
+ * This is comparable to {@link GenerativeModel.generateContentStream} for generating content in
485
+ * Cloud.
486
+ * @param request - a standard Firebase AI {@link GenerateContentRequest}
487
+ * @returns {@link Response}, so we can reuse common response formatting.
488
+ */
489
+ generateContentStream(request: GenerateContentRequest): Promise<Response>;
490
+ countTokens(_request: CountTokensRequest): Promise<Response>;
491
+ /**
492
+ * Asserts inference for the given request can be performed by an on-device model.
493
+ */
494
+ private static isOnDeviceRequest;
495
+ /**
496
+ * Encapsulates logic to get availability and download a model if one is downloadable.
497
+ */
498
+ private downloadIfAvailable;
499
+ /**
500
+ * Triggers out-of-band download of an on-device model.
501
+ *
502
+ * Chrome only downloads models as needed. Chrome knows a model is needed when code calls
503
+ * LanguageModel.create.
504
+ *
505
+ * Since Chrome manages the download, the SDK can only avoid redundant download requests by
506
+ * tracking if a download has previously been requested.
507
+ */
508
+ private download;
509
+ /**
510
+ * Converts Firebase AI {@link Content} object to a Chrome {@link LanguageModelMessage} object.
511
+ */
512
+ private static toLanguageModelMessage;
513
+ /**
514
+ * Converts a Firebase AI Part object to a Chrome LanguageModelMessageContent object.
515
+ */
516
+ private static toLanguageModelMessageContent;
517
+ /**
518
+ * Converts a Firebase AI {@link Role} string to a {@link LanguageModelMessageRole} string.
519
+ */
520
+ private static toLanguageModelMessageRole;
521
+ /**
522
+ * Abstracts Chrome session creation.
523
+ *
524
+ * Chrome uses a multi-turn session for all inference. Firebase AI uses single-turn for all
525
+ * inference. To map the Firebase AI API to Chrome's API, the SDK creates a new session for all
526
+ * inference.
527
+ *
528
+ * Chrome will remove a model from memory if it's no longer in use, so this method ensures a
529
+ * new session is created before an old session is destroyed.
530
+ */
531
+ private createSession;
532
+ /**
533
+ * Formats string returned by Chrome as a {@link Response} returned by Firebase AI.
534
+ */
535
+ private static toResponse;
536
+ /**
537
+ * Formats string stream returned by Chrome as SSE returned by Firebase AI.
538
+ */
539
+ private static toStreamResponse;
540
+ }
541
+
382
542
  /**
383
543
  * A single citation.
384
544
  * @public
@@ -500,15 +660,34 @@ export declare interface EnhancedGenerateContentResponse extends GenerateContent
500
660
  */
501
661
  text: () => string;
502
662
  /**
503
- * Aggregates and returns all {@link InlineDataPart}s from the {@link GenerateContentResponse}'s
504
- * first candidate.
505
- *
506
- * @returns An array of {@link InlineDataPart}s containing data from the response, if available.
663
+ * Aggregates and returns every {@link InlineDataPart} from the first candidate of
664
+ * {@link GenerateContentResponse}.
507
665
  *
508
666
  * @throws If the prompt or candidate was blocked.
509
667
  */
510
668
  inlineDataParts: () => InlineDataPart[] | undefined;
669
+ /**
670
+ * Aggregates and returns every {@link FunctionCall} from the first candidate of
671
+ * {@link GenerateContentResponse}.
672
+ *
673
+ * @throws If the prompt or candidate was blocked.
674
+ */
511
675
  functionCalls: () => FunctionCall[] | undefined;
676
+ /**
677
+ * Aggregates and returns every {@link TextPart} with their `thought` property set
678
+ * to `true` from the first candidate of {@link GenerateContentResponse}.
679
+ *
680
+ * @throws If the prompt or candidate was blocked.
681
+ *
682
+ * @remarks
683
+ * Thought summaries provide a brief overview of the model's internal thinking process,
684
+ * offering insight into how it arrived at the final answer. This can be useful for
685
+ * debugging, understanding the model's reasoning, and verifying its accuracy.
686
+ *
687
+ * Thoughts will only be included if {@link ThinkingConfig.includeThoughts} is
688
+ * set to `true`.
689
+ */
690
+ thoughtSummary: () => string | undefined;
512
691
  }
513
692
 
514
693
  /**
@@ -528,6 +707,8 @@ export declare interface ErrorDetails {
528
707
  [key: string]: unknown;
529
708
  }
530
709
 
710
+ export declare function factory(container: ComponentContainer, { instanceIdentifier }: InstanceFactoryOptions): AIService;
711
+
531
712
  /**
532
713
  * Data pointing to a file uploaded on Google Cloud Storage.
533
714
  * @public
@@ -547,6 +728,11 @@ export declare interface FileDataPart {
547
728
  functionCall?: never;
548
729
  functionResponse?: never;
549
730
  fileData: FileData;
731
+ thought?: boolean;
732
+ /**
733
+ * @internal
734
+ */
735
+ thoughtSignature?: never;
550
736
  }
551
737
 
552
738
  /**
@@ -605,6 +791,15 @@ export declare type FinishReason = (typeof FinishReason)[keyof typeof FinishReas
605
791
  * @public
606
792
  */
607
793
  export declare interface FunctionCall {
794
+ /**
795
+ * The id of the function call. This must be sent back in the associated {@link FunctionResponse}.
796
+ *
797
+ *
798
+ * @remarks This property is only supported in the Gemini Developer API ({@link GoogleAIBackend}).
799
+ * When using the Gemini Developer API ({@link GoogleAIBackend}), this property will be
800
+ * `undefined`.
801
+ */
802
+ id?: string;
608
803
  name: string;
609
804
  args: object;
610
805
  }
@@ -654,6 +849,11 @@ export declare interface FunctionCallPart {
654
849
  inlineData?: never;
655
850
  functionCall: FunctionCall;
656
851
  functionResponse?: never;
852
+ thought?: boolean;
853
+ /**
854
+ * @internal
855
+ */
856
+ thoughtSignature?: never;
657
857
  }
658
858
 
659
859
  /**
@@ -715,6 +915,14 @@ export declare interface FunctionDeclarationsTool {
715
915
  * @public
716
916
  */
717
917
  export declare interface FunctionResponse {
918
+ /**
919
+ * The id of the {@link FunctionCall}.
920
+ *
921
+ * @remarks This property is only supported in the Gemini Developer API ({@link GoogleAIBackend}).
922
+ * When using the Gemini Developer API ({@link GoogleAIBackend}), this property will be
923
+ * `undefined`.
924
+ */
925
+ id?: string;
718
926
  name: string;
719
927
  response: object;
720
928
  }
@@ -728,6 +936,11 @@ export declare interface FunctionResponsePart {
728
936
  inlineData?: never;
729
937
  functionCall?: never;
730
938
  functionResponse: FunctionResponse;
939
+ thought?: boolean;
940
+ /**
941
+ * @internal
942
+ */
943
+ thoughtSignature?: never;
731
944
  }
732
945
 
733
946
  /**
@@ -937,6 +1150,20 @@ export declare function getGenerativeModel(ai: AI, modelParams: ModelParams | Hy
937
1150
  */
938
1151
  export declare function getImagenModel(ai: AI, modelParams: ImagenModelParams, requestOptions?: RequestOptions): ImagenModel;
939
1152
 
1153
+ /**
1154
+ * Returns a {@link LiveGenerativeModel} class for real-time, bidirectional communication.
1155
+ *
1156
+ * The Live API is only supported in modern browser windows and Node >= 22.
1157
+ *
1158
+ * @param ai - An {@link AI} instance.
1159
+ * @param modelParams - Parameters to use when setting up a {@link LiveSession}.
1160
+ * @throws If the `apiKey` or `projectId` fields are missing in your
1161
+ * Firebase config.
1162
+ *
1163
+ * @beta
1164
+ */
1165
+ export declare function getLiveGenerativeModel(ai: AI, modelParams: LiveModelParams): LiveGenerativeModel;
1166
+
940
1167
  /**
941
1168
  * Configuration class for the Gemini Developer API.
942
1169
  *
@@ -1759,6 +1986,11 @@ export declare interface InlineDataPart {
1759
1986
  * Applicable if `inlineData` is a video.
1760
1987
  */
1761
1988
  videoMetadata?: VideoMetadata;
1989
+ thought?: boolean;
1990
+ /**
1991
+ * @internal
1992
+ */
1993
+ thoughtSignature?: never;
1762
1994
  }
1763
1995
 
1764
1996
  /**
@@ -1769,6 +2001,38 @@ export declare class IntegerSchema extends Schema {
1769
2001
  constructor(schemaParams?: SchemaParams);
1770
2002
  }
1771
2003
 
2004
+ /**
2005
+ * @license
2006
+ * Copyright 2025 Google LLC
2007
+ *
2008
+ * Licensed under the Apache License, Version 2.0 (the "License");
2009
+ * you may not use this file except in compliance with the License.
2010
+ * You may obtain a copy of the License at
2011
+ *
2012
+ * http://www.apache.org/licenses/LICENSE-2.0
2013
+ *
2014
+ * Unless required by applicable law or agreed to in writing, software
2015
+ * distributed under the License is distributed on an "AS IS" BASIS,
2016
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
2017
+ * See the License for the specific language governing permissions and
2018
+ * limitations under the License.
2019
+ */
2020
+ /**
2021
+ * The subset of the Prompt API
2022
+ * (see {@link https://github.com/webmachinelearning/prompt-api#full-api-surface-in-web-idl }
2023
+ * required for hybrid functionality.
2024
+ *
2025
+ * @internal
2026
+ */
2027
+ declare interface LanguageModel extends EventTarget {
2028
+ create(options?: LanguageModelCreateOptions): Promise<LanguageModel>;
2029
+ availability(options?: LanguageModelCreateCoreOptions): Promise<Availability>;
2030
+ prompt(input: LanguageModelPrompt, options?: LanguageModelPromptOptions): Promise<string>;
2031
+ promptStreaming(input: LanguageModelPrompt, options?: LanguageModelPromptOptions): ReadableStream;
2032
+ measureInputUsage(input: LanguageModelPrompt, options?: LanguageModelPromptOptions): Promise<number>;
2033
+ destroy(): undefined;
2034
+ }
2035
+
1772
2036
  /**
1773
2037
  * <b>(EXPERIMENTAL)</b>
1774
2038
  * Configures the creation of an on-device language model session.
@@ -1840,6 +2104,13 @@ export declare type LanguageModelMessageRole = 'system' | 'user' | 'assistant';
1840
2104
  */
1841
2105
  export declare type LanguageModelMessageType = 'text' | 'image' | 'audio';
1842
2106
 
2107
+ /**
2108
+ * <b>(EXPERIMENTAL)</b>
2109
+ * An on-device language model prompt.
2110
+ * @public
2111
+ */
2112
+ declare type LanguageModelPrompt = LanguageModelMessage[];
2113
+
1843
2114
  /**
1844
2115
  * <b>(EXPERIMENTAL)</b>
1845
2116
  * Options for an on-device language model prompt.
@@ -1849,6 +2120,247 @@ export declare interface LanguageModelPromptOptions {
1849
2120
  responseConstraint?: object;
1850
2121
  }
1851
2122
 
2123
+ /**
2124
+ * Configuration parameters used by {@link LiveGenerativeModel} to control live content generation.
2125
+ *
2126
+ * @beta
2127
+ */
2128
+ export declare interface LiveGenerationConfig {
2129
+ /**
2130
+ * Configuration for speech synthesis.
2131
+ */
2132
+ speechConfig?: SpeechConfig;
2133
+ /**
2134
+ * Specifies the maximum number of tokens that can be generated in the response. The number of
2135
+ * tokens per word varies depending on the language outputted. Is unbounded by default.
2136
+ */
2137
+ maxOutputTokens?: number;
2138
+ /**
2139
+ * Controls the degree of randomness in token selection. A `temperature` value of 0 means that the highest
2140
+ * probability tokens are always selected. In this case, responses for a given prompt are mostly
2141
+ * deterministic, but a small amount of variation is still possible.
2142
+ */
2143
+ temperature?: number;
2144
+ /**
2145
+ * Changes how the model selects tokens for output. Tokens are
2146
+ * selected from the most to least probable until the sum of their probabilities equals the `topP`
2147
+ * value. For example, if tokens A, B, and C have probabilities of 0.3, 0.2, and 0.1 respectively
2148
+ * and the `topP` value is 0.5, then the model will select either A or B as the next token by using
2149
+ * the `temperature` and exclude C as a candidate. Defaults to 0.95 if unset.
2150
+ */
2151
+ topP?: number;
2152
+ /**
2153
+ * Changes how the model selects token for output. A `topK` value of 1 means the select token is
2154
+ * the most probable among all tokens in the model's vocabulary, while a `topK` value 3 means that
2155
+ * the next token is selected from among the 3 most probably using probabilities sampled. Tokens
2156
+ * are then further filtered with the highest selected `temperature` sampling. Defaults to 40
2157
+ * if unspecified.
2158
+ */
2159
+ topK?: number;
2160
+ /**
2161
+ * Positive penalties.
2162
+ */
2163
+ presencePenalty?: number;
2164
+ /**
2165
+ * Frequency penalties.
2166
+ */
2167
+ frequencyPenalty?: number;
2168
+ /**
2169
+ * The modalities of the response.
2170
+ */
2171
+ responseModalities?: ResponseModality[];
2172
+ }
2173
+
2174
+ /**
2175
+ * Class for Live generative model APIs. The Live API enables low-latency, two-way multimodal
2176
+ * interactions with Gemini.
2177
+ *
2178
+ * This class should only be instantiated with {@link getLiveGenerativeModel}.
2179
+ *
2180
+ * @beta
2181
+ */
2182
+ export declare class LiveGenerativeModel extends AIModel {
2183
+ /**
2184
+ * @internal
2185
+ */
2186
+ private _webSocketHandler;
2187
+ generationConfig: LiveGenerationConfig;
2188
+ tools?: Tool[];
2189
+ toolConfig?: ToolConfig;
2190
+ systemInstruction?: Content;
2191
+ /**
2192
+ * @internal
2193
+ */
2194
+ constructor(ai: AI, modelParams: LiveModelParams,
2195
+ /**
2196
+ * @internal
2197
+ */
2198
+ _webSocketHandler: WebSocketHandler);
2199
+ /**
2200
+ * Starts a {@link LiveSession}.
2201
+ *
2202
+ * @returns A {@link LiveSession}.
2203
+ * @throws If the connection failed to be established with the server.
2204
+ *
2205
+ * @beta
2206
+ */
2207
+ connect(): Promise<LiveSession>;
2208
+ }
2209
+
2210
+ /**
2211
+ * Params passed to {@link getLiveGenerativeModel}.
2212
+ * @beta
2213
+ */
2214
+ export declare interface LiveModelParams {
2215
+ model: string;
2216
+ generationConfig?: LiveGenerationConfig;
2217
+ tools?: Tool[];
2218
+ toolConfig?: ToolConfig;
2219
+ systemInstruction?: string | Part | Content;
2220
+ }
2221
+
2222
+ /**
2223
+ * The types of responses that can be returned by {@link LiveSession.receive}.
2224
+ *
2225
+ * @beta
2226
+ */
2227
+ export declare const LiveResponseType: {
2228
+ SERVER_CONTENT: string;
2229
+ TOOL_CALL: string;
2230
+ TOOL_CALL_CANCELLATION: string;
2231
+ };
2232
+
2233
+ /**
2234
+ * The types of responses that can be returned by {@link LiveSession.receive}.
2235
+ * This is a property on all messages that can be used for type narrowing. This property is not
2236
+ * returned by the server, it is assigned to a server message object once it's parsed.
2237
+ *
2238
+ * @beta
2239
+ */
2240
+ export declare type LiveResponseType = (typeof LiveResponseType)[keyof typeof LiveResponseType];
2241
+
2242
+ /**
2243
+ * An incremental content update from the model.
2244
+ *
2245
+ * @beta
2246
+ */
2247
+ export declare interface LiveServerContent {
2248
+ type: 'serverContent';
2249
+ /**
2250
+ * The content that the model has generated as part of the current conversation with the user.
2251
+ */
2252
+ modelTurn?: Content;
2253
+ /**
2254
+ * Indicates whether the turn is complete. This is `undefined` if the turn is not complete.
2255
+ */
2256
+ turnComplete?: boolean;
2257
+ /**
2258
+ * Indicates whether the model was interrupted by the client. An interruption occurs when
2259
+ * the client sends a message before the model finishes it's turn. This is `undefined` if the
2260
+ * model was not interrupted.
2261
+ */
2262
+ interrupted?: boolean;
2263
+ }
2264
+
2265
+ /**
2266
+ * A request from the model for the client to execute one or more functions.
2267
+ *
2268
+ * @beta
2269
+ */
2270
+ export declare interface LiveServerToolCall {
2271
+ type: 'toolCall';
2272
+ /**
2273
+ * An array of function calls to run.
2274
+ */
2275
+ functionCalls: FunctionCall[];
2276
+ }
2277
+
2278
+ /**
2279
+ * Notification to cancel a previous function call triggered by {@link LiveServerToolCall}.
2280
+ *
2281
+ * @beta
2282
+ */
2283
+ export declare interface LiveServerToolCallCancellation {
2284
+ type: 'toolCallCancellation';
2285
+ /**
2286
+ * IDs of function calls that were cancelled. These refer to the `id` property of a {@link FunctionCall}.
2287
+ */
2288
+ functionIds: string[];
2289
+ }
2290
+
2291
+ /**
2292
+ * Represents an active, real-time, bidirectional conversation with the model.
2293
+ *
2294
+ * This class should only be instantiated by calling {@link LiveGenerativeModel.connect}.
2295
+ *
2296
+ * @beta
2297
+ */
2298
+ export declare class LiveSession {
2299
+ private webSocketHandler;
2300
+ private serverMessages;
2301
+ /**
2302
+ * Indicates whether this Live session is closed.
2303
+ *
2304
+ * @beta
2305
+ */
2306
+ isClosed: boolean;
2307
+ /**
2308
+ * Indicates whether this Live session is being controlled by an `AudioConversationController`.
2309
+ *
2310
+ * @beta
2311
+ */
2312
+ inConversation: boolean;
2313
+ /**
2314
+ * @internal
2315
+ */
2316
+ constructor(webSocketHandler: WebSocketHandler, serverMessages: AsyncGenerator<unknown>);
2317
+ /**
2318
+ * Sends content to the server.
2319
+ *
2320
+ * @param request - The message to send to the model.
2321
+ * @param turnComplete - Indicates if the turn is complete. Defaults to false.
2322
+ * @throws If this session has been closed.
2323
+ *
2324
+ * @beta
2325
+ */
2326
+ send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
2327
+ /**
2328
+ * Sends realtime input to the server.
2329
+ *
2330
+ * @param mediaChunks - The media chunks to send.
2331
+ * @throws If this session has been closed.
2332
+ *
2333
+ * @beta
2334
+ */
2335
+ sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
2336
+ /**
2337
+ * Sends a stream of {@link GenerativeContentBlob}.
2338
+ *
2339
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2340
+ * @throws If this session has been closed.
2341
+ *
2342
+ * @beta
2343
+ */
2344
+ sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
2345
+ /**
2346
+ * Yields messages received from the server.
2347
+ * This can only be used by one consumer at a time.
2348
+ *
2349
+ * @returns An `AsyncGenerator` that yields server messages as they arrive.
2350
+ * @throws If the session is already closed, or if we receive a response that we don't support.
2351
+ *
2352
+ * @beta
2353
+ */
2354
+ receive(): AsyncGenerator<LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation>;
2355
+ /**
2356
+ * Closes this session.
2357
+ * All methods on this session will throw an error once this resolves.
2358
+ *
2359
+ * @beta
2360
+ */
2361
+ close(): Promise<void>;
2362
+ }
2363
+
1852
2364
  /**
1853
2365
  * Content part modality.
1854
2366
  * @public
@@ -1977,6 +2489,20 @@ export declare type Part = TextPart | InlineDataPart | FunctionCallPart | Functi
1977
2489
  */
1978
2490
  export declare const POSSIBLE_ROLES: readonly ["user", "model", "function", "system"];
1979
2491
 
2492
+ /**
2493
+ * Configuration for a pre-built voice.
2494
+ *
2495
+ * @beta
2496
+ */
2497
+ export declare interface PrebuiltVoiceConfig {
2498
+ /**
2499
+ * The voice name to use for speech synthesis.
2500
+ *
2501
+ * For a full list of names and demos of what each voice sounds like, see {@link https://cloud.google.com/text-to-speech/docs/chirp3-hd | Chirp 3: HD Voices}.
2502
+ */
2503
+ voiceName?: string;
2504
+ }
2505
+
1980
2506
  /**
1981
2507
  * If the prompt was blocked, this will be populated with `blockReason` and
1982
2508
  * the relevant `safetyRatings`.
@@ -2003,7 +2529,10 @@ export declare interface RequestOptions {
2003
2529
  */
2004
2530
  timeout?: number;
2005
2531
  /**
2006
- * Base url for endpoint. Defaults to https://firebasevertexai.googleapis.com
2532
+ * Base url for endpoint. Defaults to
2533
+ * https://firebasevertexai.googleapis.com, which is the
2534
+ * {@link https://console.cloud.google.com/apis/library/firebasevertexai.googleapis.com?project=_ | Firebase AI Logic API}
2535
+ * (used regardless of your chosen Gemini API provider).
2007
2536
  */
2008
2537
  baseUrl?: string;
2009
2538
  }
@@ -2024,6 +2553,11 @@ export declare const ResponseModality: {
2024
2553
  * @beta
2025
2554
  */
2026
2555
  readonly IMAGE: "IMAGE";
2556
+ /**
2557
+ * Audio.
2558
+ * @beta
2559
+ */
2560
+ readonly AUDIO: "AUDIO";
2027
2561
  };
2028
2562
 
2029
2563
  /**
@@ -2347,6 +2881,80 @@ export declare interface Segment {
2347
2881
  text: string;
2348
2882
  }
2349
2883
 
2884
+ /**
2885
+ * Configures speech synthesis.
2886
+ *
2887
+ * @beta
2888
+ */
2889
+ export declare interface SpeechConfig {
2890
+ /**
2891
+ * Configures the voice to be used in speech synthesis.
2892
+ */
2893
+ voiceConfig?: VoiceConfig;
2894
+ }
2895
+
2896
+ /**
2897
+ * Starts a real-time, bidirectional audio conversation with the model. This helper function manages
2898
+ * the complexities of microphone access, audio recording, playback, and interruptions.
2899
+ *
2900
+ * @remarks Important: This function must be called in response to a user gesture
2901
+ * (for example, a button click) to comply with {@link https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Best_practices#autoplay_policy | browser autoplay policies}.
2902
+ *
2903
+ * @example
2904
+ * ```javascript
2905
+ * const liveSession = await model.connect();
2906
+ * let conversationController;
2907
+ *
2908
+ * // This function must be called from within a click handler.
2909
+ * async function startConversation() {
2910
+ * try {
2911
+ * conversationController = await startAudioConversation(liveSession);
2912
+ * } catch (e) {
2913
+ * // Handle AI-specific errors
2914
+ * if (e instanceof AIError) {
2915
+ * console.error("AI Error:", e.message);
2916
+ * }
2917
+ * // Handle microphone permission and hardware errors
2918
+ * else if (e instanceof DOMException) {
2919
+ * console.error("Microphone Error:", e.message);
2920
+ * }
2921
+ * // Handle other unexpected errors
2922
+ * else {
2923
+ * console.error("An unexpected error occurred:", e);
2924
+ * }
2925
+ * }
2926
+ * }
2927
+ *
2928
+ * // Later, to stop the conversation:
2929
+ * // if (conversationController) {
2930
+ * // await conversationController.stop();
2931
+ * // }
2932
+ * ```
2933
+ *
2934
+ * @param liveSession - An active {@link LiveSession} instance.
2935
+ * @param options - Configuration options for the audio conversation.
2936
+ * @returns A `Promise` that resolves with an {@link AudioConversationController}.
2937
+ * @throws `AIError` if the environment does not support required Web APIs (`UNSUPPORTED`), if a conversation is already active (`REQUEST_ERROR`), the session is closed (`SESSION_CLOSED`), or if an unexpected initialization error occurs (`ERROR`).
2938
+ * @throws `DOMException` Thrown by `navigator.mediaDevices.getUserMedia()` if issues occur with microphone access, such as permissions being denied (`NotAllowedError`) or no compatible hardware being found (`NotFoundError`). See the {@link https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/getUserMedia#exceptions | MDN documentation} for a full list of exceptions.
2939
+ *
2940
+ * @beta
2941
+ */
2942
+ export declare function startAudioConversation(liveSession: LiveSession, options?: StartAudioConversationOptions): Promise<AudioConversationController>;
2943
+
2944
+ /**
2945
+ * Options for {@link startAudioConversation}.
2946
+ *
2947
+ * @beta
2948
+ */
2949
+ export declare interface StartAudioConversationOptions {
2950
+ /**
2951
+ * An async handler that is called when the model requests a function to be executed.
2952
+ * The handler should perform the function call and return the result as a `Part`,
2953
+ * which will then be sent back to the model.
2954
+ */
2955
+ functionCallingHandler?: (functionCalls: LiveServerToolCall['functionCalls']) => Promise<Part>;
2956
+ }
2957
+
2350
2958
  /**
2351
2959
  * Params for {@link GenerativeModel.startChat}.
2352
2960
  * @public
@@ -2381,6 +2989,11 @@ export declare interface TextPart {
2381
2989
  inlineData?: never;
2382
2990
  functionCall?: never;
2383
2991
  functionResponse?: never;
2992
+ thought?: boolean;
2993
+ /**
2994
+ * @internal
2995
+ */
2996
+ thoughtSignature?: string;
2384
2997
  }
2385
2998
 
2386
2999
  /**
@@ -2406,6 +3019,15 @@ export declare interface ThinkingConfig {
2406
3019
  * feature or if the specified budget is not within the model's supported range.
2407
3020
  */
2408
3021
  thinkingBudget?: number;
3022
+ /**
3023
+ * Whether to include "thought summaries" in the model's response.
3024
+ *
3025
+ * @remarks
3026
+ * Thought summaries provide a brief overview of the model's internal thinking process,
3027
+ * offering insight into how it arrived at the final answer. This can be useful for
3028
+ * debugging, understanding the model's reasoning, and verifying its accuracy.
3029
+ */
3030
+ includeThoughts?: boolean;
2409
3031
  }
2410
3032
 
2411
3033
  /**
@@ -2487,6 +3109,18 @@ export declare interface VideoMetadata {
2487
3109
  endOffset: string;
2488
3110
  }
2489
3111
 
3112
+ /**
3113
+ * Configuration for the voice to used in speech synthesis.
3114
+ *
3115
+ * @beta
3116
+ */
3117
+ export declare interface VoiceConfig {
3118
+ /**
3119
+ * Configures the voice using a pre-built voice configuration.
3120
+ */
3121
+ prebuiltVoiceConfig?: PrebuiltVoiceConfig;
3122
+ }
3123
+
2490
3124
  /**
2491
3125
  * @public
2492
3126
  */
@@ -2522,4 +3156,59 @@ export declare interface WebGroundingChunk {
2522
3156
  domain?: string;
2523
3157
  }
2524
3158
 
3159
+ /**
3160
+ * @license
3161
+ * Copyright 2025 Google LLC
3162
+ *
3163
+ * Licensed under the Apache License, Version 2.0 (the "License");
3164
+ * you may not use this file except in compliance with the License.
3165
+ * You may obtain a copy of the License at
3166
+ *
3167
+ * http://www.apache.org/licenses/LICENSE-2.0
3168
+ *
3169
+ * Unless required by applicable law or agreed to in writing, software
3170
+ * distributed under the License is distributed on an "AS IS" BASIS,
3171
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
3172
+ * See the License for the specific language governing permissions and
3173
+ * limitations under the License.
3174
+ */
3175
+ /**
3176
+ * A standardized interface for interacting with a WebSocket connection.
3177
+ * This abstraction allows the SDK to use the appropriate WebSocket implementation
3178
+ * for the current JS environment (Browser vs. Node) without
3179
+ * changing the core logic of the `LiveSession`.
3180
+ * @internal
3181
+ */
3182
+ declare interface WebSocketHandler {
3183
+ /**
3184
+ * Establishes a connection to the given URL.
3185
+ *
3186
+ * @param url The WebSocket URL (e.g., wss://...).
3187
+ * @returns A promise that resolves on successful connection or rejects on failure.
3188
+ */
3189
+ connect(url: string): Promise<void>;
3190
+ /**
3191
+ * Sends data over the WebSocket.
3192
+ *
3193
+ * @param data The string or binary data to send.
3194
+ */
3195
+ send(data: string | ArrayBuffer): void;
3196
+ /**
3197
+ * Returns an async generator that yields parsed JSON objects from the server.
3198
+ * The yielded type is `unknown` because the handler cannot guarantee the shape of the data.
3199
+ * The consumer is responsible for type validation.
3200
+ * The generator terminates when the connection is closed.
3201
+ *
3202
+ * @returns A generator that allows consumers to pull messages using a `for await...of` loop.
3203
+ */
3204
+ listen(): AsyncGenerator<unknown>;
3205
+ /**
3206
+ * Closes the WebSocket connection.
3207
+ *
3208
+ * @param code - A numeric status code explaining why the connection is closing.
3209
+ * @param reason - A human-readable string explaining why the connection is closing.
3210
+ */
3211
+ close(code?: number, reason?: string): Promise<void>;
3212
+ }
3213
+
2525
3214
  export { }