@firebase/ai 2.1.0 → 2.2.0-canary.f2ecae7df

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/dist/ai-public.d.ts +580 -9
  2. package/dist/ai.d.ts +699 -10
  3. package/dist/esm/index.esm.js +1255 -368
  4. package/dist/esm/index.esm.js.map +1 -1
  5. package/dist/esm/src/api.d.ts +18 -3
  6. package/dist/esm/src/constants.d.ts +1 -1
  7. package/dist/esm/src/index.d.ts +5 -1
  8. package/dist/esm/src/methods/chrome-adapter.d.ts +30 -24
  9. package/dist/esm/src/methods/live-session-helpers.d.ts +154 -0
  10. package/dist/esm/src/methods/live-session.d.ts +90 -0
  11. package/dist/esm/src/models/ai-model.d.ts +1 -1
  12. package/dist/esm/src/models/index.d.ts +1 -0
  13. package/dist/esm/src/models/live-generative-model.d.ts +55 -0
  14. package/dist/esm/src/public-types.d.ts +10 -1
  15. package/dist/esm/src/requests/request.d.ts +6 -0
  16. package/dist/esm/src/requests/response-helpers.d.ts +9 -5
  17. package/dist/esm/src/service.d.ts +7 -2
  18. package/dist/esm/src/types/chrome-adapter.d.ts +6 -4
  19. package/dist/esm/src/types/content.d.ts +42 -0
  20. package/dist/esm/src/types/enums.d.ts +5 -0
  21. package/dist/esm/src/types/error.d.ts +2 -0
  22. package/dist/esm/src/types/imagen/internal.d.ts +10 -0
  23. package/dist/esm/src/types/live-responses.d.ts +53 -0
  24. package/dist/esm/src/types/requests.d.ts +109 -1
  25. package/dist/esm/src/types/responses.d.ts +87 -4
  26. package/dist/esm/src/websocket.d.ts +67 -0
  27. package/dist/index.cjs.js +1259 -366
  28. package/dist/index.cjs.js.map +1 -1
  29. package/dist/index.node.cjs.js +907 -311
  30. package/dist/index.node.cjs.js.map +1 -1
  31. package/dist/index.node.mjs +904 -313
  32. package/dist/index.node.mjs.map +1 -1
  33. package/dist/src/api.d.ts +18 -3
  34. package/dist/src/constants.d.ts +1 -1
  35. package/dist/src/index.d.ts +5 -1
  36. package/dist/src/methods/chrome-adapter.d.ts +30 -24
  37. package/dist/src/methods/live-session-helpers.d.ts +154 -0
  38. package/dist/src/methods/live-session.d.ts +90 -0
  39. package/dist/src/models/ai-model.d.ts +1 -1
  40. package/dist/src/models/index.d.ts +1 -0
  41. package/dist/src/models/live-generative-model.d.ts +55 -0
  42. package/dist/src/public-types.d.ts +10 -1
  43. package/dist/src/requests/request.d.ts +6 -0
  44. package/dist/src/requests/response-helpers.d.ts +9 -5
  45. package/dist/src/service.d.ts +7 -2
  46. package/dist/src/types/chrome-adapter.d.ts +6 -4
  47. package/dist/src/types/content.d.ts +42 -0
  48. package/dist/src/types/enums.d.ts +5 -0
  49. package/dist/src/types/error.d.ts +2 -0
  50. package/dist/src/types/imagen/internal.d.ts +10 -0
  51. package/dist/src/types/live-responses.d.ts +53 -0
  52. package/dist/src/types/requests.d.ts +109 -1
  53. package/dist/src/types/responses.d.ts +87 -4
  54. package/dist/src/websocket.d.ts +67 -0
  55. package/package.json +10 -8
@@ -4,10 +4,17 @@
4
4
  * @packageDocumentation
5
5
  */
6
6
 
7
+ import { AppCheckInternalComponentName } from '@firebase/app-check-interop-types';
7
8
  import { AppCheckTokenResult } from '@firebase/app-check-interop-types';
9
+ import { ComponentContainer } from '@firebase/component';
8
10
  import { FirebaseApp } from '@firebase/app';
11
+ import { FirebaseAppCheckInternal } from '@firebase/app-check-interop-types';
12
+ import { FirebaseAuthInternal } from '@firebase/auth-interop-types';
13
+ import { FirebaseAuthInternalName } from '@firebase/auth-interop-types';
9
14
  import { FirebaseAuthTokenData } from '@firebase/auth-interop-types';
10
15
  import { FirebaseError } from '@firebase/util';
16
+ import { InstanceFactoryOptions } from '@firebase/component';
17
+ import { Provider } from '@firebase/component';
11
18
 
12
19
  /**
13
20
  * An instance of the Firebase AI SDK.
@@ -27,6 +34,10 @@ export declare interface AI {
27
34
  * Vertex AI Gemini API (using {@link VertexAIBackend}).
28
35
  */
29
36
  backend: Backend;
37
+ /**
38
+ * Options applied to this {@link AI} instance.
39
+ */
40
+ options?: AIOptions;
30
41
  /**
31
42
  * @deprecated use `AI.backend.location` instead.
32
43
  *
@@ -67,6 +78,8 @@ export declare const AIErrorCode: {
67
78
  readonly RESPONSE_ERROR: "response-error";
68
79
  /** An error occurred while performing a fetch. */
69
80
  readonly FETCH_ERROR: "fetch-error";
81
+ /** An error occurred because an operation was attempted on a closed session. */
82
+ readonly SESSION_CLOSED: "session-closed";
70
83
  /** An error associated with a Content object. */
71
84
  readonly INVALID_CONTENT: "invalid-content";
72
85
  /** An error due to the Firebase API not being enabled in the Console. */
@@ -125,8 +138,27 @@ export declare abstract class AIModel {
125
138
  export declare interface AIOptions {
126
139
  /**
127
140
  * The backend configuration to use for the AI service instance.
141
+ * Defaults to the Gemini Developer API backend ({@link GoogleAIBackend}).
128
142
  */
143
+ backend?: Backend;
144
+ /**
145
+ * Whether to use App Check limited use tokens. Defaults to false.
146
+ */
147
+ useLimitedUseAppCheckTokens?: boolean;
148
+ }
149
+
150
+ declare class AIService implements AI, _FirebaseService {
151
+ app: FirebaseApp;
129
152
  backend: Backend;
153
+ chromeAdapterFactory?: ((mode: InferenceMode, window?: Window, params?: OnDeviceParams) => ChromeAdapterImpl | undefined) | undefined;
154
+ auth: FirebaseAuthInternal | null;
155
+ appCheck: FirebaseAppCheckInternal | null;
156
+ _options?: Omit<AIOptions, 'backend'>;
157
+ location: string;
158
+ constructor(app: FirebaseApp, backend: Backend, authProvider?: Provider<FirebaseAuthInternalName>, appCheckProvider?: Provider<AppCheckInternalComponentName>, chromeAdapterFactory?: ((mode: InferenceMode, window?: Window, params?: OnDeviceParams) => ChromeAdapterImpl | undefined) | undefined);
159
+ _delete(): Promise<void>;
160
+ set options(optionsToSet: AIOptions);
161
+ get options(): AIOptions | undefined;
130
162
  }
131
163
 
132
164
  /**
@@ -168,6 +200,21 @@ export declare class ArraySchema extends Schema {
168
200
  /* Excluded from this release type: toJSON */
169
201
  }
170
202
 
203
+ /**
204
+ * A controller for managing an active audio conversation.
205
+ *
206
+ * @beta
207
+ */
208
+ export declare interface AudioConversationController {
209
+ /**
210
+ * Stops the audio conversation, closes the microphone connection, and
211
+ * cleans up resources. Returns a promise that resolves when cleanup is complete.
212
+ */
213
+ stop: () => Promise<void>;
214
+ }
215
+
216
+ /* Excluded from this release type: Availability */
217
+
171
218
  /**
172
219
  * Abstract base class representing the configuration for an AI service backend.
173
220
  * This class should not be instantiated directly. Use its subclasses; {@link GoogleAIBackend} for
@@ -320,22 +367,126 @@ export declare interface ChromeAdapter {
320
367
  /**
321
368
  * Generates content using on-device inference.
322
369
  *
323
- * <p>This is comparable to {@link GenerativeModel.generateContent} for generating
324
- * content using in-cloud inference.</p>
370
+ * @remarks
371
+ * This is comparable to {@link GenerativeModel.generateContent} for generating
372
+ * content using in-cloud inference.
325
373
  * @param request - a standard Firebase AI {@link GenerateContentRequest}
326
374
  */
327
375
  generateContent(request: GenerateContentRequest): Promise<Response>;
328
376
  /**
329
377
  * Generates a content stream using on-device inference.
330
378
  *
331
- * <p>This is comparable to {@link GenerativeModel.generateContentStream} for generating
332
- * a content stream using in-cloud inference.</p>
379
+ * @remarks
380
+ * This is comparable to {@link GenerativeModel.generateContentStream} for generating
381
+ * a content stream using in-cloud inference.
333
382
  * @param request - a standard Firebase AI {@link GenerateContentRequest}
334
383
  */
335
384
  generateContentStream(request: GenerateContentRequest): Promise<Response>;
336
385
  /* Excluded from this release type: countTokens */
337
386
  }
338
387
 
388
+ /**
389
+ * Defines an inference "backend" that uses Chrome's on-device model,
390
+ * and encapsulates logic for detecting when on-device inference is
391
+ * possible.
392
+ */
393
+ declare class ChromeAdapterImpl implements ChromeAdapter {
394
+ languageModelProvider: LanguageModel;
395
+ mode: InferenceMode;
396
+ onDeviceParams: OnDeviceParams;
397
+ static SUPPORTED_MIME_TYPES: string[];
398
+ private isDownloading;
399
+ private downloadPromise;
400
+ private oldSession;
401
+ constructor(languageModelProvider: LanguageModel, mode: InferenceMode, onDeviceParams?: OnDeviceParams);
402
+ /**
403
+ * Checks if a given request can be made on-device.
404
+ *
405
+ * Encapsulates a few concerns:
406
+ * the mode
407
+ * API existence
408
+ * prompt formatting
409
+ * model availability, including triggering download if necessary
410
+ *
411
+ *
412
+ * Pros: callers needn't be concerned with details of on-device availability.</p>
413
+ * Cons: this method spans a few concerns and splits request validation from usage.
414
+ * If instance variables weren't already part of the API, we could consider a better
415
+ * separation of concerns.
416
+ */
417
+ isAvailable(request: GenerateContentRequest): Promise<boolean>;
418
+ /**
419
+ * Generates content on device.
420
+ *
421
+ * @remarks
422
+ * This is comparable to {@link GenerativeModel.generateContent} for generating content in
423
+ * Cloud.
424
+ * @param request - a standard Firebase AI {@link GenerateContentRequest}
425
+ * @returns {@link Response}, so we can reuse common response formatting.
426
+ */
427
+ generateContent(request: GenerateContentRequest): Promise<Response>;
428
+ /**
429
+ * Generates content stream on device.
430
+ *
431
+ * @remarks
432
+ * This is comparable to {@link GenerativeModel.generateContentStream} for generating content in
433
+ * Cloud.
434
+ * @param request - a standard Firebase AI {@link GenerateContentRequest}
435
+ * @returns {@link Response}, so we can reuse common response formatting.
436
+ */
437
+ generateContentStream(request: GenerateContentRequest): Promise<Response>;
438
+ countTokens(_request: CountTokensRequest): Promise<Response>;
439
+ /**
440
+ * Asserts inference for the given request can be performed by an on-device model.
441
+ */
442
+ private static isOnDeviceRequest;
443
+ /**
444
+ * Encapsulates logic to get availability and download a model if one is downloadable.
445
+ */
446
+ private downloadIfAvailable;
447
+ /**
448
+ * Triggers out-of-band download of an on-device model.
449
+ *
450
+ * Chrome only downloads models as needed. Chrome knows a model is needed when code calls
451
+ * LanguageModel.create.
452
+ *
453
+ * Since Chrome manages the download, the SDK can only avoid redundant download requests by
454
+ * tracking if a download has previously been requested.
455
+ */
456
+ private download;
457
+ /**
458
+ * Converts Firebase AI {@link Content} object to a Chrome {@link LanguageModelMessage} object.
459
+ */
460
+ private static toLanguageModelMessage;
461
+ /**
462
+ * Converts a Firebase AI Part object to a Chrome LanguageModelMessageContent object.
463
+ */
464
+ private static toLanguageModelMessageContent;
465
+ /**
466
+ * Converts a Firebase AI {@link Role} string to a {@link LanguageModelMessageRole} string.
467
+ */
468
+ private static toLanguageModelMessageRole;
469
+ /**
470
+ * Abstracts Chrome session creation.
471
+ *
472
+ * Chrome uses a multi-turn session for all inference. Firebase AI uses single-turn for all
473
+ * inference. To map the Firebase AI API to Chrome's API, the SDK creates a new session for all
474
+ * inference.
475
+ *
476
+ * Chrome will remove a model from memory if it's no longer in use, so this method ensures a
477
+ * new session is created before an old session is destroyed.
478
+ */
479
+ private createSession;
480
+ /**
481
+ * Formats string returned by Chrome as a {@link Response} returned by Firebase AI.
482
+ */
483
+ private static toResponse;
484
+ /**
485
+ * Formats string stream returned by Chrome as SSE returned by Firebase AI.
486
+ */
487
+ private static toStreamResponse;
488
+ }
489
+
339
490
  /**
340
491
  * A single citation.
341
492
  * @public
@@ -457,15 +608,34 @@ export declare interface EnhancedGenerateContentResponse extends GenerateContent
457
608
  */
458
609
  text: () => string;
459
610
  /**
460
- * Aggregates and returns all {@link InlineDataPart}s from the {@link GenerateContentResponse}'s
461
- * first candidate.
462
- *
463
- * @returns An array of {@link InlineDataPart}s containing data from the response, if available.
611
+ * Aggregates and returns every {@link InlineDataPart} from the first candidate of
612
+ * {@link GenerateContentResponse}.
464
613
  *
465
614
  * @throws If the prompt or candidate was blocked.
466
615
  */
467
616
  inlineDataParts: () => InlineDataPart[] | undefined;
617
+ /**
618
+ * Aggregates and returns every {@link FunctionCall} from the first candidate of
619
+ * {@link GenerateContentResponse}.
620
+ *
621
+ * @throws If the prompt or candidate was blocked.
622
+ */
468
623
  functionCalls: () => FunctionCall[] | undefined;
624
+ /**
625
+ * Aggregates and returns every {@link TextPart} with their `thought` property set
626
+ * to `true` from the first candidate of {@link GenerateContentResponse}.
627
+ *
628
+ * @throws If the prompt or candidate was blocked.
629
+ *
630
+ * @remarks
631
+ * Thought summaries provide a brief overview of the model's internal thinking process,
632
+ * offering insight into how it arrived at the final answer. This can be useful for
633
+ * debugging, understanding the model's reasoning, and verifying its accuracy.
634
+ *
635
+ * Thoughts will only be included if {@link ThinkingConfig.includeThoughts} is
636
+ * set to `true`.
637
+ */
638
+ thoughtSummary: () => string | undefined;
469
639
  }
470
640
 
471
641
  /**
@@ -485,6 +655,8 @@ export declare interface ErrorDetails {
485
655
  [key: string]: unknown;
486
656
  }
487
657
 
658
+ export declare function factory(container: ComponentContainer, { instanceIdentifier }: InstanceFactoryOptions): AIService;
659
+
488
660
  /**
489
661
  * Data pointing to a file uploaded on Google Cloud Storage.
490
662
  * @public
@@ -504,6 +676,8 @@ export declare interface FileDataPart {
504
676
  functionCall?: never;
505
677
  functionResponse?: never;
506
678
  fileData: FileData;
679
+ thought?: boolean;
680
+ /* Excluded from this release type: thoughtSignature */
507
681
  }
508
682
 
509
683
  /**
@@ -555,6 +729,8 @@ export declare const FinishReason: {
555
729
  */
556
730
  export declare type FinishReason = (typeof FinishReason)[keyof typeof FinishReason];
557
731
 
732
+ /* Excluded from this release type: _FirebaseService */
733
+
558
734
  /**
559
735
  * A predicted {@link FunctionCall} returned from the model
560
736
  * that contains a string representing the {@link FunctionDeclaration.name}
@@ -562,6 +738,15 @@ export declare type FinishReason = (typeof FinishReason)[keyof typeof FinishReas
562
738
  * @public
563
739
  */
564
740
  export declare interface FunctionCall {
741
+ /**
742
+ * The id of the function call. This must be sent back in the associated {@link FunctionResponse}.
743
+ *
744
+ *
745
+ * @remarks This property is only supported in the Gemini Developer API ({@link GoogleAIBackend}).
746
+ * When using the Gemini Developer API ({@link GoogleAIBackend}), this property will be
747
+ * `undefined`.
748
+ */
749
+ id?: string;
565
750
  name: string;
566
751
  args: object;
567
752
  }
@@ -611,6 +796,8 @@ export declare interface FunctionCallPart {
611
796
  inlineData?: never;
612
797
  functionCall: FunctionCall;
613
798
  functionResponse?: never;
799
+ thought?: boolean;
800
+ /* Excluded from this release type: thoughtSignature */
614
801
  }
615
802
 
616
803
  /**
@@ -672,6 +859,14 @@ export declare interface FunctionDeclarationsTool {
672
859
  * @public
673
860
  */
674
861
  export declare interface FunctionResponse {
862
+ /**
863
+ * The id of the {@link FunctionCall}.
864
+ *
865
+ * @remarks This property is only supported in the Gemini Developer API ({@link GoogleAIBackend}).
866
+ * When using the Gemini Developer API ({@link GoogleAIBackend}), this property will be
867
+ * `undefined`.
868
+ */
869
+ id?: string;
675
870
  name: string;
676
871
  response: object;
677
872
  }
@@ -685,6 +880,8 @@ export declare interface FunctionResponsePart {
685
880
  inlineData?: never;
686
881
  functionCall?: never;
687
882
  functionResponse: FunctionResponse;
883
+ thought?: boolean;
884
+ /* Excluded from this release type: thoughtSignature */
688
885
  }
689
886
 
690
887
  /**
@@ -894,6 +1091,20 @@ export declare function getGenerativeModel(ai: AI, modelParams: ModelParams | Hy
894
1091
  */
895
1092
  export declare function getImagenModel(ai: AI, modelParams: ImagenModelParams, requestOptions?: RequestOptions): ImagenModel;
896
1093
 
1094
+ /**
1095
+ * Returns a {@link LiveGenerativeModel} class for real-time, bidirectional communication.
1096
+ *
1097
+ * The Live API is only supported in modern browser windows and Node >= 22.
1098
+ *
1099
+ * @param ai - An {@link AI} instance.
1100
+ * @param modelParams - Parameters to use when setting up a {@link LiveSession}.
1101
+ * @throws If the `apiKey` or `projectId` fields are missing in your
1102
+ * Firebase config.
1103
+ *
1104
+ * @beta
1105
+ */
1106
+ export declare function getLiveGenerativeModel(ai: AI, modelParams: LiveModelParams): LiveGenerativeModel;
1107
+
897
1108
  /**
898
1109
  * Configuration class for the Gemini Developer API.
899
1110
  *
@@ -1663,6 +1874,8 @@ export declare interface InlineDataPart {
1663
1874
  * Applicable if `inlineData` is a video.
1664
1875
  */
1665
1876
  videoMetadata?: VideoMetadata;
1877
+ thought?: boolean;
1878
+ /* Excluded from this release type: thoughtSignature */
1666
1879
  }
1667
1880
 
1668
1881
  /**
@@ -1673,6 +1886,8 @@ export declare class IntegerSchema extends Schema {
1673
1886
  constructor(schemaParams?: SchemaParams);
1674
1887
  }
1675
1888
 
1889
+ /* Excluded from this release type: LanguageModel */
1890
+
1676
1891
  /**
1677
1892
  * <b>(EXPERIMENTAL)</b>
1678
1893
  * Configures the creation of an on-device language model session.
@@ -1744,6 +1959,13 @@ export declare type LanguageModelMessageRole = 'system' | 'user' | 'assistant';
1744
1959
  */
1745
1960
  export declare type LanguageModelMessageType = 'text' | 'image' | 'audio';
1746
1961
 
1962
+ /**
1963
+ * <b>(EXPERIMENTAL)</b>
1964
+ * An on-device language model prompt.
1965
+ * @public
1966
+ */
1967
+ declare type LanguageModelPrompt = LanguageModelMessage[];
1968
+
1747
1969
  /**
1748
1970
  * <b>(EXPERIMENTAL)</b>
1749
1971
  * Options for an on-device language model prompt.
@@ -1753,6 +1975,234 @@ export declare interface LanguageModelPromptOptions {
1753
1975
  responseConstraint?: object;
1754
1976
  }
1755
1977
 
1978
+ /**
1979
+ * Configuration parameters used by {@link LiveGenerativeModel} to control live content generation.
1980
+ *
1981
+ * @beta
1982
+ */
1983
+ export declare interface LiveGenerationConfig {
1984
+ /**
1985
+ * Configuration for speech synthesis.
1986
+ */
1987
+ speechConfig?: SpeechConfig;
1988
+ /**
1989
+ * Specifies the maximum number of tokens that can be generated in the response. The number of
1990
+ * tokens per word varies depending on the language outputted. Is unbounded by default.
1991
+ */
1992
+ maxOutputTokens?: number;
1993
+ /**
1994
+ * Controls the degree of randomness in token selection. A `temperature` value of 0 means that the highest
1995
+ * probability tokens are always selected. In this case, responses for a given prompt are mostly
1996
+ * deterministic, but a small amount of variation is still possible.
1997
+ */
1998
+ temperature?: number;
1999
+ /**
2000
+ * Changes how the model selects tokens for output. Tokens are
2001
+ * selected from the most to least probable until the sum of their probabilities equals the `topP`
2002
+ * value. For example, if tokens A, B, and C have probabilities of 0.3, 0.2, and 0.1 respectively
2003
+ * and the `topP` value is 0.5, then the model will select either A or B as the next token by using
2004
+ * the `temperature` and exclude C as a candidate. Defaults to 0.95 if unset.
2005
+ */
2006
+ topP?: number;
2007
+ /**
2008
+ * Changes how the model selects token for output. A `topK` value of 1 means the select token is
2009
+ * the most probable among all tokens in the model's vocabulary, while a `topK` value 3 means that
2010
+ * the next token is selected from among the 3 most probably using probabilities sampled. Tokens
2011
+ * are then further filtered with the highest selected `temperature` sampling. Defaults to 40
2012
+ * if unspecified.
2013
+ */
2014
+ topK?: number;
2015
+ /**
2016
+ * Positive penalties.
2017
+ */
2018
+ presencePenalty?: number;
2019
+ /**
2020
+ * Frequency penalties.
2021
+ */
2022
+ frequencyPenalty?: number;
2023
+ /**
2024
+ * The modalities of the response.
2025
+ */
2026
+ responseModalities?: ResponseModality[];
2027
+ }
2028
+
2029
+ /**
2030
+ * Class for Live generative model APIs. The Live API enables low-latency, two-way multimodal
2031
+ * interactions with Gemini.
2032
+ *
2033
+ * This class should only be instantiated with {@link getLiveGenerativeModel}.
2034
+ *
2035
+ * @beta
2036
+ */
2037
+ export declare class LiveGenerativeModel extends AIModel {
2038
+ /* Excluded from this release type: _webSocketHandler */
2039
+ generationConfig: LiveGenerationConfig;
2040
+ tools?: Tool[];
2041
+ toolConfig?: ToolConfig;
2042
+ systemInstruction?: Content;
2043
+ /* Excluded from this release type: __constructor */
2044
+ /**
2045
+ * Starts a {@link LiveSession}.
2046
+ *
2047
+ * @returns A {@link LiveSession}.
2048
+ * @throws If the connection failed to be established with the server.
2049
+ *
2050
+ * @beta
2051
+ */
2052
+ connect(): Promise<LiveSession>;
2053
+ }
2054
+
2055
+ /**
2056
+ * Params passed to {@link getLiveGenerativeModel}.
2057
+ * @beta
2058
+ */
2059
+ export declare interface LiveModelParams {
2060
+ model: string;
2061
+ generationConfig?: LiveGenerationConfig;
2062
+ tools?: Tool[];
2063
+ toolConfig?: ToolConfig;
2064
+ systemInstruction?: string | Part | Content;
2065
+ }
2066
+
2067
+ /**
2068
+ * The types of responses that can be returned by {@link LiveSession.receive}.
2069
+ *
2070
+ * @beta
2071
+ */
2072
+ export declare const LiveResponseType: {
2073
+ SERVER_CONTENT: string;
2074
+ TOOL_CALL: string;
2075
+ TOOL_CALL_CANCELLATION: string;
2076
+ };
2077
+
2078
+ /**
2079
+ * The types of responses that can be returned by {@link LiveSession.receive}.
2080
+ * This is a property on all messages that can be used for type narrowing. This property is not
2081
+ * returned by the server, it is assigned to a server message object once it's parsed.
2082
+ *
2083
+ * @beta
2084
+ */
2085
+ export declare type LiveResponseType = (typeof LiveResponseType)[keyof typeof LiveResponseType];
2086
+
2087
+ /**
2088
+ * An incremental content update from the model.
2089
+ *
2090
+ * @beta
2091
+ */
2092
+ export declare interface LiveServerContent {
2093
+ type: 'serverContent';
2094
+ /**
2095
+ * The content that the model has generated as part of the current conversation with the user.
2096
+ */
2097
+ modelTurn?: Content;
2098
+ /**
2099
+ * Indicates whether the turn is complete. This is `undefined` if the turn is not complete.
2100
+ */
2101
+ turnComplete?: boolean;
2102
+ /**
2103
+ * Indicates whether the model was interrupted by the client. An interruption occurs when
2104
+ * the client sends a message before the model finishes it's turn. This is `undefined` if the
2105
+ * model was not interrupted.
2106
+ */
2107
+ interrupted?: boolean;
2108
+ }
2109
+
2110
+ /**
2111
+ * A request from the model for the client to execute one or more functions.
2112
+ *
2113
+ * @beta
2114
+ */
2115
+ export declare interface LiveServerToolCall {
2116
+ type: 'toolCall';
2117
+ /**
2118
+ * An array of function calls to run.
2119
+ */
2120
+ functionCalls: FunctionCall[];
2121
+ }
2122
+
2123
+ /**
2124
+ * Notification to cancel a previous function call triggered by {@link LiveServerToolCall}.
2125
+ *
2126
+ * @beta
2127
+ */
2128
+ export declare interface LiveServerToolCallCancellation {
2129
+ type: 'toolCallCancellation';
2130
+ /**
2131
+ * IDs of function calls that were cancelled. These refer to the `id` property of a {@link FunctionCall}.
2132
+ */
2133
+ functionIds: string[];
2134
+ }
2135
+
2136
+ /**
2137
+ * Represents an active, real-time, bidirectional conversation with the model.
2138
+ *
2139
+ * This class should only be instantiated by calling {@link LiveGenerativeModel.connect}.
2140
+ *
2141
+ * @beta
2142
+ */
2143
+ export declare class LiveSession {
2144
+ private webSocketHandler;
2145
+ private serverMessages;
2146
+ /**
2147
+ * Indicates whether this Live session is closed.
2148
+ *
2149
+ * @beta
2150
+ */
2151
+ isClosed: boolean;
2152
+ /**
2153
+ * Indicates whether this Live session is being controlled by an `AudioConversationController`.
2154
+ *
2155
+ * @beta
2156
+ */
2157
+ inConversation: boolean;
2158
+ /* Excluded from this release type: __constructor */
2159
+ /**
2160
+ * Sends content to the server.
2161
+ *
2162
+ * @param request - The message to send to the model.
2163
+ * @param turnComplete - Indicates if the turn is complete. Defaults to false.
2164
+ * @throws If this session has been closed.
2165
+ *
2166
+ * @beta
2167
+ */
2168
+ send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
2169
+ /**
2170
+ * Sends realtime input to the server.
2171
+ *
2172
+ * @param mediaChunks - The media chunks to send.
2173
+ * @throws If this session has been closed.
2174
+ *
2175
+ * @beta
2176
+ */
2177
+ sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
2178
+ /**
2179
+ * Sends a stream of {@link GenerativeContentBlob}.
2180
+ *
2181
+ * @param mediaChunkStream - The stream of {@link GenerativeContentBlob} to send.
2182
+ * @throws If this session has been closed.
2183
+ *
2184
+ * @beta
2185
+ */
2186
+ sendMediaStream(mediaChunkStream: ReadableStream<GenerativeContentBlob>): Promise<void>;
2187
+ /**
2188
+ * Yields messages received from the server.
2189
+ * This can only be used by one consumer at a time.
2190
+ *
2191
+ * @returns An `AsyncGenerator` that yields server messages as they arrive.
2192
+ * @throws If the session is already closed, or if we receive a response that we don't support.
2193
+ *
2194
+ * @beta
2195
+ */
2196
+ receive(): AsyncGenerator<LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation>;
2197
+ /**
2198
+ * Closes this session.
2199
+ * All methods on this session will throw an error once this resolves.
2200
+ *
2201
+ * @beta
2202
+ */
2203
+ close(): Promise<void>;
2204
+ }
2205
+
1756
2206
  /**
1757
2207
  * Content part modality.
1758
2208
  * @public
@@ -1878,6 +2328,20 @@ export declare type Part = TextPart | InlineDataPart | FunctionCallPart | Functi
1878
2328
  */
1879
2329
  export declare const POSSIBLE_ROLES: readonly ["user", "model", "function", "system"];
1880
2330
 
2331
+ /**
2332
+ * Configuration for a pre-built voice.
2333
+ *
2334
+ * @beta
2335
+ */
2336
+ export declare interface PrebuiltVoiceConfig {
2337
+ /**
2338
+ * The voice name to use for speech synthesis.
2339
+ *
2340
+ * For a full list of names and demos of what each voice sounds like, see {@link https://cloud.google.com/text-to-speech/docs/chirp3-hd | Chirp 3: HD Voices}.
2341
+ */
2342
+ voiceName?: string;
2343
+ }
2344
+
1881
2345
  /**
1882
2346
  * If the prompt was blocked, this will be populated with `blockReason` and
1883
2347
  * the relevant `safetyRatings`.
@@ -1904,7 +2368,10 @@ export declare interface RequestOptions {
1904
2368
  */
1905
2369
  timeout?: number;
1906
2370
  /**
1907
- * Base url for endpoint. Defaults to https://firebasevertexai.googleapis.com
2371
+ * Base url for endpoint. Defaults to
2372
+ * https://firebasevertexai.googleapis.com, which is the
2373
+ * {@link https://console.cloud.google.com/apis/library/firebasevertexai.googleapis.com?project=_ | Firebase AI Logic API}
2374
+ * (used regardless of your chosen Gemini API provider).
1908
2375
  */
1909
2376
  baseUrl?: string;
1910
2377
  }
@@ -1925,6 +2392,11 @@ export declare const ResponseModality: {
1925
2392
  * @beta
1926
2393
  */
1927
2394
  readonly IMAGE: "IMAGE";
2395
+ /**
2396
+ * Audio.
2397
+ * @beta
2398
+ */
2399
+ readonly AUDIO: "AUDIO";
1928
2400
  };
1929
2401
 
1930
2402
  /**
@@ -2243,6 +2715,80 @@ export declare interface Segment {
2243
2715
  text: string;
2244
2716
  }
2245
2717
 
2718
+ /**
2719
+ * Configures speech synthesis.
2720
+ *
2721
+ * @beta
2722
+ */
2723
+ export declare interface SpeechConfig {
2724
+ /**
2725
+ * Configures the voice to be used in speech synthesis.
2726
+ */
2727
+ voiceConfig?: VoiceConfig;
2728
+ }
2729
+
2730
+ /**
2731
+ * Starts a real-time, bidirectional audio conversation with the model. This helper function manages
2732
+ * the complexities of microphone access, audio recording, playback, and interruptions.
2733
+ *
2734
+ * @remarks Important: This function must be called in response to a user gesture
2735
+ * (for example, a button click) to comply with {@link https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API/Best_practices#autoplay_policy | browser autoplay policies}.
2736
+ *
2737
+ * @example
2738
+ * ```javascript
2739
+ * const liveSession = await model.connect();
2740
+ * let conversationController;
2741
+ *
2742
+ * // This function must be called from within a click handler.
2743
+ * async function startConversation() {
2744
+ * try {
2745
+ * conversationController = await startAudioConversation(liveSession);
2746
+ * } catch (e) {
2747
+ * // Handle AI-specific errors
2748
+ * if (e instanceof AIError) {
2749
+ * console.error("AI Error:", e.message);
2750
+ * }
2751
+ * // Handle microphone permission and hardware errors
2752
+ * else if (e instanceof DOMException) {
2753
+ * console.error("Microphone Error:", e.message);
2754
+ * }
2755
+ * // Handle other unexpected errors
2756
+ * else {
2757
+ * console.error("An unexpected error occurred:", e);
2758
+ * }
2759
+ * }
2760
+ * }
2761
+ *
2762
+ * // Later, to stop the conversation:
2763
+ * // if (conversationController) {
2764
+ * // await conversationController.stop();
2765
+ * // }
2766
+ * ```
2767
+ *
2768
+ * @param liveSession - An active {@link LiveSession} instance.
2769
+ * @param options - Configuration options for the audio conversation.
2770
+ * @returns A `Promise` that resolves with an {@link AudioConversationController}.
2771
+ * @throws `AIError` if the environment does not support required Web APIs (`UNSUPPORTED`), if a conversation is already active (`REQUEST_ERROR`), the session is closed (`SESSION_CLOSED`), or if an unexpected initialization error occurs (`ERROR`).
2772
+ * @throws `DOMException` Thrown by `navigator.mediaDevices.getUserMedia()` if issues occur with microphone access, such as permissions being denied (`NotAllowedError`) or no compatible hardware being found (`NotFoundError`). See the {@link https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/getUserMedia#exceptions | MDN documentation} for a full list of exceptions.
2773
+ *
2774
+ * @beta
2775
+ */
2776
+ export declare function startAudioConversation(liveSession: LiveSession, options?: StartAudioConversationOptions): Promise<AudioConversationController>;
2777
+
2778
+ /**
2779
+ * Options for {@link startAudioConversation}.
2780
+ *
2781
+ * @beta
2782
+ */
2783
+ export declare interface StartAudioConversationOptions {
2784
+ /**
2785
+ * An async handler that is called when the model requests a function to be executed.
2786
+ * The handler should perform the function call and return the result as a `Part`,
2787
+ * which will then be sent back to the model.
2788
+ */
2789
+ functionCallingHandler?: (functionCalls: LiveServerToolCall['functionCalls']) => Promise<Part>;
2790
+ }
2791
+
2246
2792
  /**
2247
2793
  * Params for {@link GenerativeModel.startChat}.
2248
2794
  * @public
@@ -2274,6 +2820,8 @@ export declare interface TextPart {
2274
2820
  inlineData?: never;
2275
2821
  functionCall?: never;
2276
2822
  functionResponse?: never;
2823
+ thought?: boolean;
2824
+ /* Excluded from this release type: thoughtSignature */
2277
2825
  }
2278
2826
 
2279
2827
  /**
@@ -2299,6 +2847,15 @@ export declare interface ThinkingConfig {
2299
2847
  * feature or if the specified budget is not within the model's supported range.
2300
2848
  */
2301
2849
  thinkingBudget?: number;
2850
+ /**
2851
+ * Whether to include "thought summaries" in the model's response.
2852
+ *
2853
+ * @remarks
2854
+ * Thought summaries provide a brief overview of the model's internal thinking process,
2855
+ * offering insight into how it arrived at the final answer. This can be useful for
2856
+ * debugging, understanding the model's reasoning, and verifying its accuracy.
2857
+ */
2858
+ includeThoughts?: boolean;
2302
2859
  }
2303
2860
 
2304
2861
  /**
@@ -2380,6 +2937,18 @@ export declare interface VideoMetadata {
2380
2937
  endOffset: string;
2381
2938
  }
2382
2939
 
2940
+ /**
2941
+ * Configuration for the voice to used in speech synthesis.
2942
+ *
2943
+ * @beta
2944
+ */
2945
+ export declare interface VoiceConfig {
2946
+ /**
2947
+ * Configures the voice using a pre-built voice configuration.
2948
+ */
2949
+ prebuiltVoiceConfig?: PrebuiltVoiceConfig;
2950
+ }
2951
+
2383
2952
  /**
2384
2953
  * @public
2385
2954
  */
@@ -2415,4 +2984,6 @@ export declare interface WebGroundingChunk {
2415
2984
  domain?: string;
2416
2985
  }
2417
2986
 
2987
+ /* Excluded from this release type: WebSocketHandler */
2988
+
2418
2989
  export { }