@firebase/ai 2.3.0-20250917161512 → 2.3.0-canary.0bb2fe636

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,7 @@
14
14
  * See the License for the specific language governing permissions and
15
15
  * limitations under the License.
16
16
  */
17
- import { LiveServerToolCall, Part } from '../types';
17
+ import { FunctionCall, FunctionResponse } from '../types';
18
18
  import { LiveSession } from './live-session';
19
19
  /**
20
20
  * A controller for managing an active audio conversation.
@@ -39,7 +39,7 @@ export interface StartAudioConversationOptions {
39
39
  * The handler should perform the function call and return the result as a `Part`,
40
40
  * which will then be sent back to the model.
41
41
  */
42
- functionCallingHandler?: (functionCalls: LiveServerToolCall['functionCalls']) => Promise<Part>;
42
+ functionCallingHandler?: (functionCalls: FunctionCall[]) => Promise<FunctionResponse>;
43
43
  }
44
44
  /**
45
45
  * Dependencies needed by the {@link AudioConversationRunner}.
@@ -14,7 +14,7 @@
14
14
  * See the License for the specific language governing permissions and
15
15
  * limitations under the License.
16
16
  */
17
- import { GenerativeContentBlob, LiveServerContent, LiveServerToolCall, LiveServerToolCallCancellation, Part } from '../public-types';
17
+ import { FunctionResponse, GenerativeContentBlob, LiveServerContent, LiveServerToolCall, LiveServerToolCallCancellation, Part } from '../public-types';
18
18
  import { WebSocketHandler } from '../websocket';
19
19
  /**
20
20
  * Represents an active, real-time, bidirectional conversation with the model.
@@ -61,6 +61,15 @@ export declare class LiveSession {
61
61
  * @beta
62
62
  */
63
63
  sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
64
+ /**
65
+ * Sends function responses to the server.
66
+ *
67
+ * @param functionResponses - The function responses to send.
68
+ * @throws If this session has been closed.
69
+ *
70
+ * @beta
71
+ */
72
+ sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
64
73
  /**
65
74
  * Sends a stream of {@link GenerativeContentBlob}.
66
75
  *
@@ -138,7 +138,7 @@ export interface FileDataPart {
138
138
  /**
139
139
  * Represents the code that is executed by the model.
140
140
  *
141
- * @public
141
+ * @beta
142
142
  */
143
143
  export interface ExecutableCodePart {
144
144
  text?: never;
@@ -157,7 +157,7 @@ export interface ExecutableCodePart {
157
157
  /**
158
158
  * Represents the code execution result from the model.
159
159
  *
160
- * @public
160
+ * @beta
161
161
  */
162
162
  export interface CodeExecutionResultPart {
163
163
  text?: never;
@@ -176,7 +176,7 @@ export interface CodeExecutionResultPart {
176
176
  /**
177
177
  * An interface for executable code returned by the model.
178
178
  *
179
- * @public
179
+ * @beta
180
180
  */
181
181
  export interface ExecutableCode {
182
182
  /**
@@ -191,7 +191,7 @@ export interface ExecutableCode {
191
191
  /**
192
192
  * The results of code execution run by the model.
193
193
  *
194
- * @public
194
+ * @beta
195
195
  */
196
196
  export interface CodeExecutionResult {
197
197
  /**
@@ -352,7 +352,7 @@ export type InferenceMode = (typeof InferenceMode)[keyof typeof InferenceMode];
352
352
  /**
353
353
  * Represents the result of the code execution.
354
354
  *
355
- * @public
355
+ * @beta
356
356
  */
357
357
  export declare const Outcome: {
358
358
  UNSPECIFIED: string;
@@ -363,13 +363,13 @@ export declare const Outcome: {
363
363
  /**
364
364
  * Represents the result of the code execution.
365
365
  *
366
- * @public
366
+ * @beta
367
367
  */
368
368
  export type Outcome = (typeof Outcome)[keyof typeof Outcome];
369
369
  /**
370
370
  * The programming language of the code.
371
371
  *
372
- * @public
372
+ * @beta
373
373
  */
374
374
  export declare const Language: {
375
375
  UNSPECIFIED: string;
@@ -378,6 +378,6 @@ export declare const Language: {
378
378
  /**
379
379
  * The programming language of the code.
380
380
  *
381
- * @public
381
+ * @beta
382
382
  */
383
383
  export type Language = (typeof Language)[keyof typeof Language];
@@ -14,7 +14,7 @@
14
14
  * See the License for the specific language governing permissions and
15
15
  * limitations under the License.
16
16
  */
17
- import { Tool, GenerationConfig, Citation, FinishReason, GroundingMetadata, PromptFeedback, SafetyRating, UsageMetadata } from '../public-types';
17
+ import { Tool, GenerationConfig, Citation, FinishReason, GroundingMetadata, PromptFeedback, SafetyRating, UsageMetadata, URLContextMetadata } from '../public-types';
18
18
  import { Content, Part } from './content';
19
19
  /**
20
20
  * @internal
@@ -47,6 +47,7 @@ export interface GoogleAIGenerateContentCandidate {
47
47
  safetyRatings?: SafetyRating[];
48
48
  citationMetadata?: GoogleAICitationMetadata;
49
49
  groundingMetadata?: GroundingMetadata;
50
+ urlContextMetadata?: URLContextMetadata;
50
51
  }
51
52
  /**
52
53
  * @internal
@@ -14,7 +14,7 @@
14
14
  * See the License for the specific language governing permissions and
15
15
  * limitations under the License.
16
16
  */
17
- import { Content, GenerativeContentBlob, Part } from './content';
17
+ import { Content, FunctionResponse, GenerativeContentBlob, Part } from './content';
18
18
  import { LiveGenerationConfig, Tool, ToolConfig } from './requests';
19
19
  /**
20
20
  * User input that is sent to the model.
@@ -37,6 +37,14 @@ export interface _LiveClientRealtimeInput {
37
37
  mediaChunks: GenerativeContentBlob[];
38
38
  };
39
39
  }
40
+ /**
41
+ * Function responses that are sent to the model in real time.
42
+ */
43
+ export interface _LiveClientToolResponse {
44
+ toolResponse: {
45
+ functionResponses: FunctionResponse[];
46
+ };
47
+ }
40
48
  /**
41
49
  * The first message in a Live session, used to configure generation options.
42
50
  *
@@ -218,7 +218,7 @@ export interface RequestOptions {
218
218
  * Defines a tool that model can call to access external knowledge.
219
219
  * @public
220
220
  */
221
- export type Tool = FunctionDeclarationsTool | GoogleSearchTool | CodeExecutionTool;
221
+ export type Tool = FunctionDeclarationsTool | GoogleSearchTool | CodeExecutionTool | URLContextTool;
222
222
  /**
223
223
  * Structured representation of a function declaration as defined by the
224
224
  * {@link https://spec.openapis.org/oas/v3.0.3 | OpenAPI 3.0 specification}.
@@ -273,7 +273,7 @@ export interface GoogleSearchTool {
273
273
  /**
274
274
  * A tool that enables the model to use code execution.
275
275
  *
276
- * @public
276
+ * @beta
277
277
  */
278
278
  export interface CodeExecutionTool {
279
279
  /**
@@ -291,6 +291,26 @@ export interface CodeExecutionTool {
291
291
  */
292
292
  export interface GoogleSearch {
293
293
  }
294
+ /**
295
+ * A tool that allows you to provide additional context to the models in the form of public web
296
+ * URLs. By including URLs in your request, the Gemini model will access the content from those
297
+ * pages to inform and enhance its response.
298
+ *
299
+ * @beta
300
+ */
301
+ export interface URLContextTool {
302
+ /**
303
+ * Specifies the URL Context configuration.
304
+ */
305
+ urlContext: URLContext;
306
+ }
307
+ /**
308
+ * Specifies the URL Context configuration.
309
+ *
310
+ * @beta
311
+ */
312
+ export interface URLContext {
313
+ }
294
314
  /**
295
315
  * A `FunctionDeclarationsTool` is a piece of code that enables the system to
296
316
  * interact with external systems to perform an action, or set of actions,
@@ -102,8 +102,16 @@ export interface UsageMetadata {
102
102
  */
103
103
  thoughtsTokenCount?: number;
104
104
  totalTokenCount: number;
105
+ /**
106
+ * The number of tokens used by tools.
107
+ */
108
+ toolUsePromptTokenCount?: number;
105
109
  promptTokensDetails?: ModalityTokenCount[];
106
110
  candidatesTokensDetails?: ModalityTokenCount[];
111
+ /**
112
+ * A list of tokens used by tools, broken down by modality.
113
+ */
114
+ toolUsePromptTokensDetails?: ModalityTokenCount[];
107
115
  }
108
116
  /**
109
117
  * Represents token counting info for a single modality.
@@ -143,6 +151,7 @@ export interface GenerateContentCandidate {
143
151
  safetyRatings?: SafetyRating[];
144
152
  citationMetadata?: CitationMetadata;
145
153
  groundingMetadata?: GroundingMetadata;
154
+ urlContextMetadata?: URLContextMetadata;
146
155
  }
147
156
  /**
148
157
  * Citation metadata that may be found on a {@link GenerateContentCandidate}.
@@ -323,6 +332,89 @@ export interface Segment {
323
332
  */
324
333
  text: string;
325
334
  }
335
+ /**
336
+ * Metadata related to {@link URLContextTool}.
337
+ *
338
+ * @beta
339
+ */
340
+ export interface URLContextMetadata {
341
+ /**
342
+ * List of URL metadata used to provide context to the Gemini model.
343
+ */
344
+ urlMetadata: URLMetadata[];
345
+ }
346
+ /**
347
+ * Metadata for a single URL retrieved by the {@link URLContextTool} tool.
348
+ *
349
+ * @beta
350
+ */
351
+ export interface URLMetadata {
352
+ /**
353
+ * The retrieved URL.
354
+ */
355
+ retrievedUrl?: string;
356
+ /**
357
+ * The status of the URL retrieval.
358
+ */
359
+ urlRetrievalStatus?: URLRetrievalStatus;
360
+ }
361
+ /**
362
+ * The status of a URL retrieval.
363
+ *
364
+ * @remarks
365
+ * <b>URL_RETRIEVAL_STATUS_UNSPECIFIED:</b> Unspecified retrieval status.
366
+ * <br/>
367
+ * <b>URL_RETRIEVAL_STATUS_SUCCESS:</b> The URL retrieval was successful.
368
+ * <br/>
369
+ * <b>URL_RETRIEVAL_STATUS_ERROR:</b> The URL retrieval failed.
370
+ * <br/>
371
+ * <b>URL_RETRIEVAL_STATUS_PAYWALL:</b> The URL retrieval failed because the content is behind a paywall.
372
+ * <br/>
373
+ * <b>URL_RETRIEVAL_STATUS_UNSAFE:</b> The URL retrieval failed because the content is unsafe.
374
+ * <br/>
375
+ *
376
+ * @beta
377
+ */
378
+ export declare const URLRetrievalStatus: {
379
+ /**
380
+ * Unspecified retrieval status.
381
+ */
382
+ URL_RETRIEVAL_STATUS_UNSPECIFIED: string;
383
+ /**
384
+ * The URL retrieval was successful.
385
+ */
386
+ URL_RETRIEVAL_STATUS_SUCCESS: string;
387
+ /**
388
+ * The URL retrieval failed.
389
+ */
390
+ URL_RETRIEVAL_STATUS_ERROR: string;
391
+ /**
392
+ * The URL retrieval failed because the content is behind a paywall.
393
+ */
394
+ URL_RETRIEVAL_STATUS_PAYWALL: string;
395
+ /**
396
+ * The URL retrieval failed because the content is unsafe.
397
+ */
398
+ URL_RETRIEVAL_STATUS_UNSAFE: string;
399
+ };
400
+ /**
401
+ * The status of a URL retrieval.
402
+ *
403
+ * @remarks
404
+ * <b>URL_RETRIEVAL_STATUS_UNSPECIFIED:</b> Unspecified retrieval status.
405
+ * <br/>
406
+ * <b>URL_RETRIEVAL_STATUS_SUCCESS:</b> The URL retrieval was successful.
407
+ * <br/>
408
+ * <b>URL_RETRIEVAL_STATUS_ERROR:</b> The URL retrieval failed.
409
+ * <br/>
410
+ * <b>URL_RETRIEVAL_STATUS_PAYWALL:</b> The URL retrieval failed because the content is behind a paywall.
411
+ * <br/>
412
+ * <b>URL_RETRIEVAL_STATUS_UNSAFE:</b> The URL retrieval failed because the content is unsafe.
413
+ * <br/>
414
+ *
415
+ * @beta
416
+ */
417
+ export type URLRetrievalStatus = (typeof URLRetrievalStatus)[keyof typeof URLRetrievalStatus];
326
418
  /**
327
419
  * @public
328
420
  */
package/dist/index.cjs.js CHANGED
@@ -8,7 +8,7 @@ var util = require('@firebase/util');
8
8
  var logger$1 = require('@firebase/logger');
9
9
 
10
10
  var name = "@firebase/ai";
11
- var version = "2.3.0-20250917161512";
11
+ var version = "2.3.0-canary.0bb2fe636";
12
12
 
13
13
  /**
14
14
  * @license
@@ -386,7 +386,7 @@ const InferenceMode = {
386
386
  /**
387
387
  * Represents the result of the code execution.
388
388
  *
389
- * @public
389
+ * @beta
390
390
  */
391
391
  const Outcome = {
392
392
  UNSPECIFIED: 'OUTCOME_UNSPECIFIED',
@@ -397,7 +397,7 @@ const Outcome = {
397
397
  /**
398
398
  * The programming language of the code.
399
399
  *
400
- * @public
400
+ * @beta
401
401
  */
402
402
  const Language = {
403
403
  UNSPECIFIED: 'LANGUAGE_UNSPECIFIED',
@@ -420,6 +420,45 @@ const Language = {
420
420
  * See the License for the specific language governing permissions and
421
421
  * limitations under the License.
422
422
  */
423
+ /**
424
+ * The status of a URL retrieval.
425
+ *
426
+ * @remarks
427
+ * <b>URL_RETRIEVAL_STATUS_UNSPECIFIED:</b> Unspecified retrieval status.
428
+ * <br/>
429
+ * <b>URL_RETRIEVAL_STATUS_SUCCESS:</b> The URL retrieval was successful.
430
+ * <br/>
431
+ * <b>URL_RETRIEVAL_STATUS_ERROR:</b> The URL retrieval failed.
432
+ * <br/>
433
+ * <b>URL_RETRIEVAL_STATUS_PAYWALL:</b> The URL retrieval failed because the content is behind a paywall.
434
+ * <br/>
435
+ * <b>URL_RETRIEVAL_STATUS_UNSAFE:</b> The URL retrieval failed because the content is unsafe.
436
+ * <br/>
437
+ *
438
+ * @beta
439
+ */
440
+ const URLRetrievalStatus = {
441
+ /**
442
+ * Unspecified retrieval status.
443
+ */
444
+ URL_RETRIEVAL_STATUS_UNSPECIFIED: 'URL_RETRIEVAL_STATUS_UNSPECIFIED',
445
+ /**
446
+ * The URL retrieval was successful.
447
+ */
448
+ URL_RETRIEVAL_STATUS_SUCCESS: 'URL_RETRIEVAL_STATUS_SUCCESS',
449
+ /**
450
+ * The URL retrieval failed.
451
+ */
452
+ URL_RETRIEVAL_STATUS_ERROR: 'URL_RETRIEVAL_STATUS_ERROR',
453
+ /**
454
+ * The URL retrieval failed because the content is behind a paywall.
455
+ */
456
+ URL_RETRIEVAL_STATUS_PAYWALL: 'URL_RETRIEVAL_STATUS_PAYWALL',
457
+ /**
458
+ * The URL retrieval failed because the content is unsafe.
459
+ */
460
+ URL_RETRIEVAL_STATUS_UNSAFE: 'URL_RETRIEVAL_STATUS_UNSAFE'
461
+ };
423
462
  /**
424
463
  * The types of responses that can be returned by {@link LiveSession.receive}.
425
464
  *
@@ -1880,7 +1919,7 @@ function mapGenerateContentCandidates(candidates) {
1880
1919
  // videoMetadata is not supported.
1881
1920
  // Throw early since developers may send a long video as input and only expect to pay
1882
1921
  // for inference on a small portion of the video.
1883
- if (candidate.content?.parts.some(part => part?.videoMetadata)) {
1922
+ if (candidate.content?.parts?.some(part => part?.videoMetadata)) {
1884
1923
  throw new AIError(AIErrorCode.UNSUPPORTED, 'Part.videoMetadata is not supported in the Gemini Developer API. Please remove this property.');
1885
1924
  }
1886
1925
  const mappedCandidate = {
@@ -1890,7 +1929,8 @@ function mapGenerateContentCandidates(candidates) {
1890
1929
  finishMessage: candidate.finishMessage,
1891
1930
  safetyRatings: mappedSafetyRatings,
1892
1931
  citationMetadata,
1893
- groundingMetadata: candidate.groundingMetadata
1932
+ groundingMetadata: candidate.groundingMetadata,
1933
+ urlContextMetadata: candidate.urlContextMetadata
1894
1934
  };
1895
1935
  mappedCandidates.push(mappedCandidate);
1896
1936
  });
@@ -1981,6 +2021,14 @@ async function* generateResponseSequence(stream, apiSettings) {
1981
2021
  else {
1982
2022
  enhancedResponse = createEnhancedContentResponse(value);
1983
2023
  }
2024
+ const firstCandidate = enhancedResponse.candidates?.[0];
2025
+ // Don't yield a response with no useful data for the developer.
2026
+ if (!firstCandidate?.content?.parts &&
2027
+ !firstCandidate?.finishReason &&
2028
+ !firstCandidate?.citationMetadata &&
2029
+ !firstCandidate?.urlContextMetadata) {
2030
+ continue;
2031
+ }
1984
2032
  yield enhancedResponse;
1985
2033
  }
1986
2034
  }
@@ -2060,36 +2108,43 @@ function aggregateResponses(responses) {
2060
2108
  candidate.safetyRatings;
2061
2109
  aggregatedResponse.candidates[i].groundingMetadata =
2062
2110
  candidate.groundingMetadata;
2111
+ // The urlContextMetadata object is defined in the first chunk of the response stream.
2112
+ // In all subsequent chunks, the urlContextMetadata object will be undefined. We need to
2113
+ // make sure that we don't overwrite the first value urlContextMetadata object with undefined.
2114
+ // FIXME: What happens if we receive a second, valid urlContextMetadata object?
2115
+ const urlContextMetadata = candidate.urlContextMetadata;
2116
+ if (typeof urlContextMetadata === 'object' &&
2117
+ urlContextMetadata !== null &&
2118
+ Object.keys(urlContextMetadata).length > 0) {
2119
+ aggregatedResponse.candidates[i].urlContextMetadata =
2120
+ urlContextMetadata;
2121
+ }
2063
2122
  /**
2064
2123
  * Candidates should always have content and parts, but this handles
2065
2124
  * possible malformed responses.
2066
2125
  */
2067
- if (candidate.content && candidate.content.parts) {
2126
+ if (candidate.content) {
2127
+ // Skip a candidate without parts.
2128
+ if (!candidate.content.parts) {
2129
+ continue;
2130
+ }
2068
2131
  if (!aggregatedResponse.candidates[i].content) {
2069
2132
  aggregatedResponse.candidates[i].content = {
2070
2133
  role: candidate.content.role || 'user',
2071
2134
  parts: []
2072
2135
  };
2073
2136
  }
2074
- const newPart = {};
2075
2137
  for (const part of candidate.content.parts) {
2076
- if (part.text !== undefined) {
2077
- // The backend can send empty text parts. If these are sent back
2078
- // (e.g. in chat history), the backend will respond with an error.
2079
- // To prevent this, ignore empty text parts.
2080
- if (part.text === '') {
2081
- continue;
2082
- }
2083
- newPart.text = part.text;
2084
- }
2085
- if (part.functionCall) {
2086
- newPart.functionCall = part.functionCall;
2138
+ const newPart = { ...part };
2139
+ // The backend can send empty text parts. If these are sent back
2140
+ // (e.g. in chat history), the backend will respond with an error.
2141
+ // To prevent this, ignore empty text parts.
2142
+ if (part.text === '') {
2143
+ continue;
2087
2144
  }
2088
- if (Object.keys(newPart).length === 0) {
2089
- throw new AIError(AIErrorCode.INVALID_CONTENT, 'Part should have at least one property, but there are none. This is likely caused ' +
2090
- 'by a malformed response from the backend.');
2145
+ if (Object.keys(newPart).length > 0) {
2146
+ aggregatedResponse.candidates[i].content.parts.push(newPart);
2091
2147
  }
2092
- aggregatedResponse.candidates[i].content.parts.push(newPart);
2093
2148
  }
2094
2149
  }
2095
2150
  }
@@ -2795,6 +2850,25 @@ class LiveSession {
2795
2850
  this.webSocketHandler.send(JSON.stringify(message));
2796
2851
  });
2797
2852
  }
2853
+ /**
2854
+ * Sends function responses to the server.
2855
+ *
2856
+ * @param functionResponses - The function responses to send.
2857
+ * @throws If this session has been closed.
2858
+ *
2859
+ * @beta
2860
+ */
2861
+ async sendFunctionResponses(functionResponses) {
2862
+ if (this.isClosed) {
2863
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2864
+ }
2865
+ const message = {
2866
+ toolResponse: {
2867
+ functionResponses
2868
+ }
2869
+ };
2870
+ this.webSocketHandler.send(JSON.stringify(message));
2871
+ }
2798
2872
  /**
2799
2873
  * Sends a stream of {@link GenerativeContentBlob}.
2800
2874
  *
@@ -3776,9 +3850,9 @@ class AudioConversationRunner {
3776
3850
  }
3777
3851
  else {
3778
3852
  try {
3779
- const resultPart = await this.options.functionCallingHandler(message.functionCalls);
3853
+ const functionResponse = await this.options.functionCallingHandler(message.functionCalls);
3780
3854
  if (!this.isStopped) {
3781
- void this.liveSession.send([resultPart]);
3855
+ void this.liveSession.sendFunctionResponses([functionResponse]);
3782
3856
  }
3783
3857
  }
3784
3858
  catch (e) {
@@ -4078,6 +4152,7 @@ exports.ResponseModality = ResponseModality;
4078
4152
  exports.Schema = Schema;
4079
4153
  exports.SchemaType = SchemaType;
4080
4154
  exports.StringSchema = StringSchema;
4155
+ exports.URLRetrievalStatus = URLRetrievalStatus;
4081
4156
  exports.VertexAIBackend = VertexAIBackend;
4082
4157
  exports.getAI = getAI;
4083
4158
  exports.getGenerativeModel = getGenerativeModel;