@firebase/ai 2.3.0-canary.7a7634f79 → 2.3.0-canary.ccbf7ba36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,7 @@
14
14
  * See the License for the specific language governing permissions and
15
15
  * limitations under the License.
16
16
  */
17
- import { LiveServerToolCall, Part } from '../types';
17
+ import { FunctionCall, FunctionResponse } from '../types';
18
18
  import { LiveSession } from './live-session';
19
19
  /**
20
20
  * A controller for managing an active audio conversation.
@@ -39,7 +39,7 @@ export interface StartAudioConversationOptions {
39
39
  * The handler should perform the function call and return the result as a `Part`,
40
40
  * which will then be sent back to the model.
41
41
  */
42
- functionCallingHandler?: (functionCalls: LiveServerToolCall['functionCalls']) => Promise<Part>;
42
+ functionCallingHandler?: (functionCalls: FunctionCall[]) => Promise<FunctionResponse>;
43
43
  }
44
44
  /**
45
45
  * Dependencies needed by the {@link AudioConversationRunner}.
@@ -14,7 +14,7 @@
14
14
  * See the License for the specific language governing permissions and
15
15
  * limitations under the License.
16
16
  */
17
- import { GenerativeContentBlob, LiveServerContent, LiveServerToolCall, LiveServerToolCallCancellation, Part } from '../public-types';
17
+ import { FunctionResponse, GenerativeContentBlob, LiveServerContent, LiveServerToolCall, LiveServerToolCallCancellation, Part } from '../public-types';
18
18
  import { WebSocketHandler } from '../websocket';
19
19
  /**
20
20
  * Represents an active, real-time, bidirectional conversation with the model.
@@ -61,6 +61,15 @@ export declare class LiveSession {
61
61
  * @beta
62
62
  */
63
63
  sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise<void>;
64
+ /**
65
+ * Sends function responses to the server.
66
+ *
67
+ * @param functionResponses - The function responses to send.
68
+ * @throws If this session has been closed.
69
+ *
70
+ * @beta
71
+ */
72
+ sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
64
73
  /**
65
74
  * Sends a stream of {@link GenerativeContentBlob}.
66
75
  *
@@ -14,7 +14,7 @@
14
14
  * See the License for the specific language governing permissions and
15
15
  * limitations under the License.
16
16
  */
17
- import { Content, GenerativeContentBlob, Part } from './content';
17
+ import { Content, FunctionResponse, GenerativeContentBlob, Part } from './content';
18
18
  import { LiveGenerationConfig, Tool, ToolConfig } from './requests';
19
19
  /**
20
20
  * User input that is sent to the model.
@@ -37,6 +37,14 @@ export interface _LiveClientRealtimeInput {
37
37
  mediaChunks: GenerativeContentBlob[];
38
38
  };
39
39
  }
40
+ /**
41
+ * Function responses that are sent to the model in real time.
42
+ */
43
+ export interface _LiveClientToolResponse {
44
+ toolResponse: {
45
+ functionResponses: FunctionResponse[];
46
+ };
47
+ }
40
48
  /**
41
49
  * The first message in a Live session, used to configure generation options.
42
50
  *
package/dist/index.cjs.js CHANGED
@@ -8,7 +8,7 @@ var util = require('@firebase/util');
8
8
  var logger$1 = require('@firebase/logger');
9
9
 
10
10
  var name = "@firebase/ai";
11
- var version = "2.3.0-canary.7a7634f79";
11
+ var version = "2.3.0-canary.ccbf7ba36";
12
12
 
13
13
  /**
14
14
  * @license
@@ -1919,7 +1919,7 @@ function mapGenerateContentCandidates(candidates) {
1919
1919
  // videoMetadata is not supported.
1920
1920
  // Throw early since developers may send a long video as input and only expect to pay
1921
1921
  // for inference on a small portion of the video.
1922
- if (candidate.content?.parts.some(part => part?.videoMetadata)) {
1922
+ if (candidate.content?.parts?.some(part => part?.videoMetadata)) {
1923
1923
  throw new AIError(AIErrorCode.UNSUPPORTED, 'Part.videoMetadata is not supported in the Gemini Developer API. Please remove this property.');
1924
1924
  }
1925
1925
  const mappedCandidate = {
@@ -2021,6 +2021,14 @@ async function* generateResponseSequence(stream, apiSettings) {
2021
2021
  else {
2022
2022
  enhancedResponse = createEnhancedContentResponse(value);
2023
2023
  }
2024
+ const firstCandidate = enhancedResponse.candidates?.[0];
2025
+ // Don't yield a response with no useful data for the developer.
2026
+ if (!firstCandidate?.content?.parts &&
2027
+ !firstCandidate?.finishReason &&
2028
+ !firstCandidate?.citationMetadata &&
2029
+ !firstCandidate?.urlContextMetadata) {
2030
+ continue;
2031
+ }
2024
2032
  yield enhancedResponse;
2025
2033
  }
2026
2034
  }
@@ -2115,32 +2123,28 @@ function aggregateResponses(responses) {
2115
2123
  * Candidates should always have content and parts, but this handles
2116
2124
  * possible malformed responses.
2117
2125
  */
2118
- if (candidate.content && candidate.content.parts) {
2126
+ if (candidate.content) {
2127
+ // Skip a candidate without parts.
2128
+ if (!candidate.content.parts) {
2129
+ continue;
2130
+ }
2119
2131
  if (!aggregatedResponse.candidates[i].content) {
2120
2132
  aggregatedResponse.candidates[i].content = {
2121
2133
  role: candidate.content.role || 'user',
2122
2134
  parts: []
2123
2135
  };
2124
2136
  }
2125
- const newPart = {};
2126
2137
  for (const part of candidate.content.parts) {
2127
- if (part.text !== undefined) {
2128
- // The backend can send empty text parts. If these are sent back
2129
- // (e.g. in chat history), the backend will respond with an error.
2130
- // To prevent this, ignore empty text parts.
2131
- if (part.text === '') {
2132
- continue;
2133
- }
2134
- newPart.text = part.text;
2138
+ const newPart = { ...part };
2139
+ // The backend can send empty text parts. If these are sent back
2140
+ // (e.g. in chat history), the backend will respond with an error.
2141
+ // To prevent this, ignore empty text parts.
2142
+ if (part.text === '') {
2143
+ continue;
2135
2144
  }
2136
- if (part.functionCall) {
2137
- newPart.functionCall = part.functionCall;
2145
+ if (Object.keys(newPart).length > 0) {
2146
+ aggregatedResponse.candidates[i].content.parts.push(newPart);
2138
2147
  }
2139
- if (Object.keys(newPart).length === 0) {
2140
- throw new AIError(AIErrorCode.INVALID_CONTENT, 'Part should have at least one property, but there are none. This is likely caused ' +
2141
- 'by a malformed response from the backend.');
2142
- }
2143
- aggregatedResponse.candidates[i].content.parts.push(newPart);
2144
2148
  }
2145
2149
  }
2146
2150
  }
@@ -2846,6 +2850,25 @@ class LiveSession {
2846
2850
  this.webSocketHandler.send(JSON.stringify(message));
2847
2851
  });
2848
2852
  }
2853
+ /**
2854
+ * Sends function responses to the server.
2855
+ *
2856
+ * @param functionResponses - The function responses to send.
2857
+ * @throws If this session has been closed.
2858
+ *
2859
+ * @beta
2860
+ */
2861
+ async sendFunctionResponses(functionResponses) {
2862
+ if (this.isClosed) {
2863
+ throw new AIError(AIErrorCode.REQUEST_ERROR, 'This LiveSession has been closed and cannot be used.');
2864
+ }
2865
+ const message = {
2866
+ toolResponse: {
2867
+ functionResponses
2868
+ }
2869
+ };
2870
+ this.webSocketHandler.send(JSON.stringify(message));
2871
+ }
2849
2872
  /**
2850
2873
  * Sends a stream of {@link GenerativeContentBlob}.
2851
2874
  *
@@ -3827,9 +3850,9 @@ class AudioConversationRunner {
3827
3850
  }
3828
3851
  else {
3829
3852
  try {
3830
- const resultPart = await this.options.functionCallingHandler(message.functionCalls);
3853
+ const functionResponse = await this.options.functionCallingHandler(message.functionCalls);
3831
3854
  if (!this.isStopped) {
3832
- void this.liveSession.send([resultPart]);
3855
+ void this.liveSession.sendFunctionResponses([functionResponse]);
3833
3856
  }
3834
3857
  }
3835
3858
  catch (e) {