npm - hume - Versions diffs - 0.8.1-beta6 → 0.8.1-beta8 - Mend

hume 0.8.1-beta6 → 0.8.1-beta8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

package/.mock/definition/empathic-voice/__package__.yml CHANGED Viewed

@@ -209,11 +209,77 @@ types:
       duration_secs:
         type: optional<integer>
         docs: Duration in seconds for the timeout.
+  PostedTimeoutSpecsInactivity:
+    docs: >-
+      Specifies the duration of user inactivity (in seconds) after which the EVI
+      WebSocket connection will be automatically disconnected. Default is 600
+      seconds (10 minutes).
+      Accepts a minimum value of 1 second and a maximum value of 1,800 seconds.
+    properties:
+      enabled:
+        type: boolean
+        docs: >-
+          Boolean indicating if this timeout is enabled.
+          If set to `false`, EVI will not timeout due to a specified duration of
+          user inactivity being reached. However, the conversation will
+          eventually disconnect after 1,800 seconds (30 minutes), which is the
+          maximum WebSocket duration limit for EVI.
+      duration_secs:
+        type: optional<integer>
+        docs: >-
+          Duration in seconds for the timeout (e.g. 600 seconds represents 10
+          minutes).
+  PostedTimeoutSpecsMaxDuration:
+    docs: >-
+      Specifies the maximum allowed duration (in seconds) for an EVI WebSocket
+      connection before it is automatically disconnected. Default is 1,800
+      seconds (30 minutes).
+      Accepts a minimum value of 1 second and a maximum value of 1,800 seconds.
+    properties:
+      enabled:
+        type: boolean
+        docs: >-
+          Boolean indicating if this timeout is enabled.
+          If set to `false`, EVI will not timeout due to a specified maximum
+          duration being reached. However, the conversation will eventually
+          disconnect after 1,800 seconds (30 minutes), which is the maximum
+          WebSocket duration limit for EVI.
+      duration_secs:
+        type: optional<integer>
+        docs: >-
+          Duration in seconds for the timeout (e.g. 600 seconds represents 10
+          minutes).
   PostedTimeoutSpecs:
     docs: Collection of timeout specs to be posted to the server
     properties:
-      inactivity: optional<PostedTimeoutSpec>
-      max_duration: optional<PostedTimeoutSpec>
+      inactivity:
+        type: optional<PostedTimeoutSpecsInactivity>
+        docs: >-
+          Specifies the duration of user inactivity (in seconds) after which the
+          EVI WebSocket connection will be automatically disconnected. Default
+          is 600 seconds (10 minutes).
+          Accepts a minimum value of 1 second and a maximum value of 1,800
+          seconds.
+      max_duration:
+        type: optional<PostedTimeoutSpecsMaxDuration>
+        docs: >-
+          Specifies the maximum allowed duration (in seconds) for an EVI
+          WebSocket connection before it is automatically disconnected. Default
+          is 1,800 seconds (30 minutes).
+          Accepts a minimum value of 1 second and a maximum value of 1,800
+          seconds.
   PostedUserDefinedToolSpec:
     docs: A specific tool identifier to be posted to the server
     properties:
@@ -290,6 +356,7 @@ types:
         type: optional<list<optional<ReturnBuiltinTool>>>
         docs: List of built-in tools associated with this config
       event_messages: optional<ReturnEventMessageSpecs>
+      timeouts: optional<ReturnTimeoutSpecs>
   ReturnEllmModel:
     docs: A specific eLLM Model configuration
     properties:
@@ -742,8 +809,9 @@ types:
       type:
         type: literal<"assistant_input">
         docs: >-
-          The type of message sent through the socket; for an Assistant Input
-          message, this must be `assistant_input`.
+          The type of message sent through the socket; must be `assistant_input`
+          for our server to correctly identify and process it as an Assistant
+          Input message.
       custom_session_id:
         type: optional<string>
         docs: >-
@@ -751,26 +819,44 @@ types:
           data, and persist conversations across EVI sessions.
       text:
         type: string
-        docs: Text to be synthesized.
+        docs: >-
+          Assistant text to synthesize into spoken audio and insert into the
+          conversation.
+          EVI uses this text to generate spoken audio using our proprietary
+          expressive text-to-speech model. Our model adds appropriate emotional
+          inflections and tones to the text based on the user’s expressions and
+          the context of the conversation. The synthesized audio is streamed
+          back to the user as an [Assistant
+          Message](/reference/empathic-voice-interface-evi/chat/chat#receive.Assistant%20Message.type).
   AudioConfiguration:
     properties:
       encoding:
         type: Encoding
-        docs: Audio encoding.
+        docs: Encoding format of the audio input, such as `linear16`.
       channels:
         type: integer
-        docs: Number of channels.
+        docs: Number of audio channels.
       sample_rate:
         type: integer
-        docs: Audio sample rate.
+        docs: >-
+          Audio sample rate. Number of samples per second in the audio input,
+          measured in Hertz.
   AudioInput:
     docs: When provided, the input is audio.
     properties:
       type:
         type: literal<"audio_input">
         docs: >-
-          The type of message sent through the socket; for an Audio Input
-          message, this must be `audio_input`.
+          The type of message sent through the socket; must be `audio_input` for
+          our server to correctly identify and process it as an Audio Input
+          message.
+          This message is used for sending audio input data to EVI for
+          processing and expression measurement. Audio data should be sent as a
+          continuous stream, encoded in Base64.
       custom_session_id:
         type: optional<string>
         docs: >-
@@ -778,22 +864,68 @@ types:
           data, and persist conversations across EVI sessions.
       data:
         type: string
-        docs: Base64 encoded audio input.
-        validation:
-          format: base64
-  BuiltInTool: literal<"web_search">
+        docs: >-
+          Base64 encoded audio input to insert into the conversation.
+          The audio input must be captured and transmitted to EVI as a
+          continuous stream, with the audio data sent in small chunks for better
+          transcription quality. When capturing audio through the browser, we
+          recommend recording the audio in 100ms intervals and adjusting from
+          there to determine if smaller or larger chunks are needed. These
+          chunks should be continuously sent to EVI as Audio Input messages.
+          The content of an Audio Input message is treated as the user’s speech
+          to EVI. EVI processes the audio, conducts expression measurement using
+          the prosody model, and responds accordingly.
+  BuiltInTool:
+    type: literal<"web_search">
+    docs: >-
+      Name of the built-in tool. Set to `web_search` to equip EVI with the
+      built-in Web Search tool.
   BuiltinToolConfig:
     properties:
       name: BuiltInTool
-      fallback_content: optional<string>
+      fallback_content:
+        type: optional<string>
+        docs: >-
+          Optional text passed to the supplemental LLM if the tool call fails.
+          The LLM then uses this text to generate a response back to the user,
+          ensuring continuity in the conversation.
   Context:
     properties:
       type:
         type: optional<ContextType>
-        docs: The persistence level of the injected context.
+        docs: >-
+          The persistence level of the injected context. Specifies how long the
+          injected context will remain active in the session.
+          There are three possible context types:
+          - **Persistent**: The context is appended to all user messages for the
+          duration of the session.
+          - **Temporary**: The context is appended only to the next user
+          message.
+           - **Editable**: The original context is updated to reflect the new context.
       text:
         type: string
-        docs: User context to inject.
+        docs: >-
+          The context to be injected into the conversation. Helps inform the
+          LLM's response by providing relevant information about the ongoing
+          conversation.
+          This text will be appended to the end of user messages based on the
+          chosen persistence level. For example, if you want to remind EVI of
+          its role as a helpful weather assistant, the context you insert will
+          be appended to the end of user messages as `{Context: You are a
+          helpful weather assistant}`.
   ContextType:
     enum:
       - editable
@@ -809,8 +941,16 @@ types:
       type:
         type: literal<"pause_assistant_message">
         docs: >-
-          The type of message sent through the socket; for a Pause Assistant
-          message, this must be `pause_assistant_message`.
+          The type of message sent through the socket; must be
+          `pause_assistant_message` for our server to correctly identify and
+          process it as a Pause Assistant message.
+          Once this message is sent, EVI will not respond until a [Resume
+          Assistant
+          message](/reference/empathic-voice-interface-evi/chat/chat#send.Resume%20Assistant%20Message.type)
+          is sent. When paused, EVI won’t respond, but transcriptions of your
+          audio inputs will still be recorded.
       custom_session_id:
         type: optional<string>
         docs: >-
@@ -824,8 +964,17 @@ types:
       type:
         type: literal<"resume_assistant_message">
         docs: >-
-          The type of message sent through the socket; for a Resume Assistant
-          message, this must be `resume_assistant_message`.
+          The type of message sent through the socket; must be
+          `resume_assistant_message` for our server to correctly identify and
+          process it as a Resume Assistant message.
+          Upon resuming, if any audio input was sent during the pause, EVI will
+          retain context from all messages sent but only respond to the last
+          user message. (e.g., If you ask EVI two questions while paused and
+          then send a `resume_assistant_message`, EVI will respond to the second
+          question and have added the first question to its conversation
+          context.)
       custom_session_id:
         type: optional<string>
         docs: >-
@@ -837,33 +986,123 @@ types:
       type:
         type: literal<"session_settings">
         docs: >-
-          The type of message sent through the socket; for a Session Settings
-          message, this must be `session_settings`.
+          The type of message sent through the socket; must be
+          `session_settings` for our server to correctly identify and process it
+          as a Session Settings message.
+          Session settings are temporary and apply only to the current Chat
+          session. These settings can be adjusted dynamically based on the
+          requirements of each session to ensure optimal performance and user
+          experience.
+          For more information, please refer to the [Session Settings
+          section](/docs/empathic-voice-interface-evi/configuration#session-settings)
+          on the EVI Configuration page.
       custom_session_id:
         type: optional<string>
         docs: >-
-          Used to manage conversational state, correlate frontend and backend
-          data, and persist conversations across EVI sessions.
+          Unique identifier for the session. Used to manage conversational
+          state, correlate frontend and backend data, and persist conversations
+          across EVI sessions.
+          If included, the response sent from Hume to your backend will include
+          this ID. This allows you to correlate frontend users with their
+          incoming messages.
+          It is recommended to pass a `custom_session_id` if you are using a
+          Custom Language Model. Please see our guide to [using a custom
+          language
+          model](/docs/empathic-voice-interface-evi/custom-language-model) with
+          EVI to learn more.
       system_prompt:
         type: optional<string>
         docs: >-
-          Instructions for how the system should respond to the user. Set to
-          null to use the default system prompt.
+          Instructions used to shape EVI’s behavior, responses, and style for
+          the session.
+          When included in a Session Settings message, the provided Prompt
+          overrides the existing one specified in the EVI configuration. If no
+          Prompt was defined in the configuration, this Prompt will be the one
+          used for the session.
+          You can use the Prompt to define a specific goal or role for EVI,
+          specifying how it should act or what it should focus on during the
+          conversation. For example, EVI can be instructed to act as a customer
+          support representative, a fitness coach, or a travel advisor, each
+          with its own set of behaviors and response styles.
+          For help writing a system prompt, see our [Prompting
+          Guide](/docs/empathic-voice-interface-evi/prompting).
       context:
         type: optional<Context>
-        docs: User context to inject. Set to null to disable context injection.
+        docs: >-
+          Allows developers to inject additional context into the conversation,
+          which is appended to the end of user messages for the session.
+          When included in a Session Settings message, the provided context can
+          be used to remind the LLM of its role in every user message, prevent
+          it from forgetting important details, or add new relevant information
+          to the conversation.
+          Set to `null` to disable context injection.
       audio:
         type: optional<AudioConfiguration>
-        docs: Audio configuration.
+        docs: >-
+          Configuration details for the audio input used during the session.
+          Ensures the audio is being correctly set up for processing.
+          This optional field is only required when the audio input is encoded
+          in PCM Linear 16 (16-bit, little-endian, signed PCM WAV data). For
+          detailed instructions on how to configure session settings for PCM
+          Linear 16 audio, please refer to the [Session Settings
+          section](/docs/empathic-voice-interface-evi/configuration#session-settings)
+          on the EVI Configuration page.
       language_model_api_key:
         type: optional<string>
-        docs: Third party API key for the language model used for non-Hume models.
+        docs: >-
+          Third party API key for the supplemental language model.
+          When provided, EVI will use this key instead of Hume’s API key for the
+          supplemental LLM. This allows you to bypass rate limits and utilize
+          your own API key as needed.
       tools:
         type: optional<list<Tool>>
-        docs: List of tools to enable.
+        docs: >-
+          List of user-defined tools to enable for the session.
+          Tools are resources used by EVI to perform various tasks, such as
+          searching the web or calling external APIs. Built-in tools, like web
+          search, are natively integrated, while user-defined tools are created
+          and invoked by the user. To learn more, see our [Tool Use
+          Guide](/docs/empathic-voice-interface-evi/tool-use).
       builtin_tools:
         type: optional<list<BuiltinToolConfig>>
-        docs: List of builtin tools to enable.
+        docs: >-
+          List of built-in tools to enable for the session.
+          Tools are resources used by EVI to perform various tasks, such as
+          searching the web or calling external APIs. Built-in tools, like web
+          search, are natively integrated, while user-defined tools are created
+          and invoked by the user. To learn more, see our [Tool Use
+          Guide](/docs/empathic-voice-interface-evi/tool-use).
+          Currently, the only built-in tool Hume provides is **Web Search**.
+          When enabled, Web Search equips EVI with the ability to search the web
+          for up-to-date information.
       metadata: optional<map<string, unknown>>
       variables:
         type: optional<map<string, string>>
@@ -872,21 +1111,31 @@ types:
     properties:
       type:
         type: ToolType
-        docs: Type of tool.
+        docs: Type of tool. Set to `function` for user-defined tools.
       name:
         type: string
-        docs: Name of the tool.
+        docs: Name of the user-defined tool to be enabled.
       parameters:
         type: string
-        docs: Parameters of the tool. Is a stringified JSON schema.
+        docs: >-
+          Parameters of the tool. Is a stringified JSON schema.
+          These parameters define the inputs needed for the tool’s execution,
+          including the expected data type and description for each input field.
+          Structured as a JSON schema, this format ensures the tool receives
+          data in the expected format.
       description:
         type: optional<string>
-        docs: Description of the function.
+        docs: >-
+          An optional description of what the tool does, used by the
+          supplemental LLM to choose when and how to call the function.
       fallback_content:
         type: optional<string>
         docs: >-
-          Fallback content of the tool, passed to the LLM if the function call
-          response fails.
+          Optional text passed to the supplemental LLM if the tool call fails.
+          The LLM then uses this text to generate a response back to the user,
+          ensuring continuity in the conversation.
   ToolErrorMessage:
     docs: When provided, the output is a function call error.
     properties:
@@ -895,6 +1144,12 @@ types:
         docs: >-
           The type of message sent through the socket; for a Tool Error message,
           this must be `tool_error`.
+          Upon receiving a [Tool Call
+          message](/reference/empathic-voice-interface-evi/chat/chat#receive.Tool%20Call%20Message.type)
+          and failing to invoke the function, this message is sent to notify EVI
+          of the tool's failure.
       custom_session_id:
         type: optional<string>
         docs: >-
@@ -902,22 +1157,37 @@ types:
           data, and persist conversations across EVI sessions.
       tool_type:
         type: optional<ToolType>
-        docs: Type of tool called, either 'builtin' or 'function'.
+        docs: >-
+          Type of tool called. Either `builtin` for natively implemented tools,
+          like web search, or `function` for user-defined tools.
       tool_call_id:
         type: string
-        docs: ID of the tool call.
+        docs: >-
+          The unique identifier for a specific tool call instance.
+          This ID is used to track the request and response of a particular tool
+          invocation, ensuring that the Tool Error message is linked to the
+          appropriate tool call request. The specified `tool_call_id` must match
+          the one received in the [Tool Call
+          message](/reference/empathic-voice-interface-evi/chat/chat#receive.Tool%20Call%20Message.type).
       content:
         type: optional<string>
-        docs: The content passed to the LLM in place of the tool response.
+        docs: >-
+          Optional text passed to the supplemental LLM in place of the tool call
+          result. The LLM then uses this text to generate a response back to the
+          user, ensuring continuity in the conversation if the tool errors.
       error:
         type: string
         docs: Error message from the tool call, not exposed to the LLM or user.
       code:
         type: optional<string>
-        docs: Error code.
+        docs: Error code. Identifies the type of error encountered.
       level:
         type: optional<ErrorLevel>
-        docs: Error level.
+        docs: >-
+          Indicates the severity of an error; for a Tool Error message, this
+          must be `warn` to signal an unexpected event.
   ToolResponseMessage:
     docs: When provided, the output is a function call response.
     properties:
@@ -926,6 +1196,12 @@ types:
         docs: >-
           The type of message sent through the socket; for a Tool Response
           message, this must be `tool_response`.
+          Upon receiving a [Tool Call
+          message](/reference/empathic-voice-interface-evi/chat/chat#receive.Tool%20Call%20Message.type)
+          and successfully invoking the function, this message is sent to convey
+          the result of the function call back to EVI.
       custom_session_id:
         type: optional<string>
         docs: >-
@@ -933,12 +1209,35 @@ types:
           data, and persist conversations across EVI sessions.
       tool_call_id:
         type: string
-        docs: ID of the tool call.
+        docs: >-
+          The unique identifier for a specific tool call instance.
+          This ID is used to track the request and response of a particular tool
+          invocation, ensuring that the correct response is linked to the
+          appropriate request. The specified `tool_call_id` must match the one
+          received in the [Tool Call
+          message](/reference/empathic-voice-interface-evi/chat/chat#receive.Tool%20Call%20Message.tool_call_id).
       content:
         type: string
-        docs: Return value of the tool call.
-      tool_name: optional<string>
-      tool_type: optional<ToolType>
+        docs: >-
+          Return value of the tool call. Contains the output generated by the
+          tool to pass back to EVI.
+      tool_name:
+        type: optional<string>
+        docs: >-
+          Name of the tool.
+          Include this optional field to help the supplemental LLM identify
+          which tool generated the response. The specified `tool_name` must
+          match the one received in the [Tool Call
+          message](/reference/empathic-voice-interface-evi/chat/chat#receive.Tool%20Call%20Message.type).
+      tool_type:
+        type: optional<ToolType>
+        docs: >-
+          Type of tool called. Either `builtin` for natively implemented tools,
+          like web search, or `function` for user-defined tools.
   ToolType:
     enum:
       - builtin
@@ -949,8 +1248,9 @@ types:
       type:
         type: literal<"user_input">
         docs: >-
-          The type of message sent through the socket; for a User Input message,
-          this must be `user_input`.
+          The type of message sent through the socket; must be `user_input` for
+          our server to correctly identify and process it as a User Input
+          message.
       custom_session_id:
         type: optional<string>
         docs: >-
@@ -958,7 +1258,15 @@ types:
           data, and persist conversations across EVI sessions.
       text:
         type: string
-        docs: User text to insert into the conversation.
+        docs: >-
+          User text to insert into the conversation. Text sent through a User
+          Input message is treated as the user’s speech to EVI. EVI processes
+          this input and provides a corresponding response.
+          Expression measurement results are not available for User Input
+          messages, as the prosody model relies on audio input and cannot
+          process text alone.
   AssistantEnd:
     docs: When provided, the output is an assistant end message.
     properties:
@@ -967,6 +1275,11 @@ types:
         docs: >-
           The type of message sent through the socket; for an Assistant End
           message, this must be `assistant_end`.
+          This message indicates the conclusion of the assistant’s response,
+          signaling that the assistant has finished speaking for the current
+          conversational turn.
       custom_session_id:
         type: optional<string>
         docs: >-
@@ -980,6 +1293,11 @@ types:
         docs: >-
           The type of message sent through the socket; for an Assistant Message,
           this must be `assistant_message`.
+          This message contains both a transcript of the assistant’s response
+          and the expression measurement predictions of the assistant’s audio
+          output.
       custom_session_id:
         type: optional<string>
         docs: >-
@@ -987,7 +1305,9 @@ types:
           data, and persist conversations across EVI sessions.
       id:
         type: optional<string>
-        docs: ID of the assistant message.
+        docs: >-
+          ID of the assistant message. Allows the Assistant Message to be
+          tracked and referenced.
       message:
         type: ChatMessage
         docs: Transcript of the message.
@@ -996,7 +1316,10 @@ types:
         docs: Inference model results.
       from_text:
         type: boolean
-        docs: Indicates if this message was constructed from a text input message.
+        docs: >-
+          Indicates if this message was inserted into the conversation as text
+          from an [Assistant Input
+          message](/reference/empathic-voice-interface-evi/chat/chat#send.Assistant%20Input.text).
   AudioOutput:
     docs: When provided, the output is audio.
     properties:
@@ -1012,12 +1335,15 @@ types:
           data, and persist conversations across EVI sessions.
       id:
         type: string
-        docs: ID of the audio output.
+        docs: >-
+          ID of the audio output. Allows the Audio Output message to be tracked
+          and referenced.
       data:
         type: string
-        docs: Base64 encoded audio output.
-        validation:
-          format: base64
+        docs: >-
+          Base64 encoded audio output. This encoded audio is transmitted to the
+          client, where it can be decoded and played back as part of the user
+          interaction.
   ChatMessageToolResult:
     discriminated: false
     docs: Function call response from client.
@@ -1046,6 +1372,11 @@ types:
         docs: >-
           The type of message sent through the socket; for a Chat Metadata
           message, this must be `chat_metadata`.
+          The Chat Metadata message is the first message you receive after
+          establishing a connection with EVI and contains important identifiers
+          for the current Chat session.
       custom_session_id:
         type: optional<string>
         docs: >-
@@ -1053,10 +1384,25 @@ types:
           data, and persist conversations across EVI sessions.
       chat_group_id:
         type: string
-        docs: ID of the chat group. Used to resume a chat.
+        docs: >-
+          ID of the Chat Group.
+          Used to resume a Chat when passed in the
+          [resumed_chat_group_id](/reference/empathic-voice-interface-evi/chat/chat#request.query.resumed_chat_group_id)
+          query parameter of a subsequent connection request. This allows EVI to
+          continue the conversation from where it left off within the Chat
+          Group.
+          Learn more about [supporting chat
+          resumability](/docs/empathic-voice-interface-evi/faq#does-evi-support-chat-resumability)
+          from the EVI FAQ.
       chat_id:
         type: string
-        docs: ID of the chat.
+        docs: >-
+          ID of the Chat session. Allows the Chat session to be tracked and
+          referenced.
       request_id:
         type: optional<string>
         docs: ID of the initiating request.
@@ -1118,6 +1464,11 @@ types:
         docs: >-
           The type of message sent through the socket; for a Web Socket Error
           message, this must be `error`.
+          This message indicates a disruption in the WebSocket connection, such
+          as an unexpected disconnection, protocol error, or data transmission
+          issue.
       custom_session_id:
         type: optional<string>
         docs: >-
@@ -1125,18 +1476,27 @@ types:
           data, and persist conversations across EVI sessions.
       code:
         type: string
-        docs: Error code.
+        docs: Error code. Identifies the type of error encountered.
       slug:
         type: string
-        docs: Error slug.
+        docs: >-
+          Short, human-readable identifier and description for the error. See a
+          complete list of error slugs on the [Errors
+          page](/docs/resources/errors).
       message:
         type: string
-        docs: Error message.
+        docs: Detailed description of the error.
   Inference:
     properties:
       prosody:
         type: optional<ProsodyInference>
-        docs: Prosody model inference results.
+        docs: >-
+          Prosody model inference results.
+          EVI uses the prosody model to measure 48 expressions related to speech
+          and vocal characteristics. These results contain a detailed emotional
+          and tonal analysis of the audio.
   MillisecondInterval:
     properties:
       begin:
@@ -1147,7 +1507,14 @@ types:
         docs: End time of the interval in milliseconds.
   ProsodyInference:
     properties:
-      scores: EmotionScores
+      scores:
+        type: EmotionScores
+        docs: >-
+          The confidence levels of 48 expressions in a given audio sample.
+          Scores typically range from 0 to 1, with higher values indicating a
+          stronger confidence level in the measured attribute.
   Role:
     enum:
       - assistant
@@ -1163,15 +1530,32 @@ types:
         docs: Name of the tool called.
       parameters:
         type: string
-        docs: Parameters of the tool call. Is a stringified JSON schema.
+        docs: >-
+          Parameters of the tool.
+          These parameters define the inputs needed for the tool’s execution,
+          including the expected data type and description for each input field.
+          Structured as a stringified JSON schema, this format ensures the tool
+          receives data in the expected format.
       tool_call_id:
         type: string
-        docs: ID of the tool call.
+        docs: >-
+          The unique identifier for a specific tool call instance.
+          This ID is used to track the request and response of a particular tool
+          invocation, ensuring that the correct response is linked to the
+          appropriate request.
       type:
         type: literal<"tool_call">
         docs: >-
           The type of message sent through the socket; for a Tool Call message,
           this must be `tool_call`.
+          This message indicates that the supplemental LLM has detected a need
+          to invoke the specified tool.
       custom_session_id:
         type: optional<string>
         docs: >-
@@ -1179,10 +1563,17 @@ types:
           data, and persist conversations across EVI sessions.
       tool_type:
         type: optional<ToolType>
-        docs: Type of tool called, either 'builtin' or 'function'.
+        docs: >-
+          Type of tool called. Either `builtin` for natively implemented tools,
+          like web search, or `function` for user-defined tools.
       response_required:
         type: boolean
-        docs: Whether a response is required from the developer.
+        docs: >-
+          Indicates whether a response to the tool call is required from the
+          developer, either in the form of a [Tool Response
+          message](/reference/empathic-voice-interface-evi/chat/chat#send.Tool%20Response%20Message.type)
+          or a [Tool Error
+          message](/reference/empathic-voice-interface-evi/chat/chat#send.Tool%20Error%20Message.type).
   UserInterruption:
     docs: When provided, the output is an interruption.
     properties:
@@ -1191,6 +1582,13 @@ types:
         docs: >-
           The type of message sent through the socket; for a User Interruption
           message, this must be `user_interruption`.
+          This message indicates the user has interrupted the assistant’s
+          response. EVI detects the interruption in real-time and sends this
+          message to signal the interruption event. This message allows the
+          system to stop the current audio playback, clear the audio queue, and
+          prepare to handle new user input.
       custom_session_id:
         type: optional<string>
         docs: >-
@@ -1205,8 +1603,18 @@ types:
       type:
         type: literal<"user_message">
         docs: >-
-          The type of message sent through the socket; for a User message, this
+          The type of message sent through the socket; for a User Message, this
           must be `user_message`.
+          This message contains both a transcript of the user’s input and the
+          expression measurement predictions if the input was sent as an [Audio
+          Input
+          message](/reference/empathic-voice-interface-evi/chat/chat#send.Audio%20Input.type).
+          Expression measurement predictions are not provided for a [User Input
+          message](/reference/empathic-voice-interface-evi/chat/chat#send.User%20Input.type),
+          as the prosody model relies on audio input and cannot process text
+          alone.
       custom_session_id:
         type: optional<string>
         docs: >-
@@ -1223,7 +1631,22 @@ types:
         docs: Start and End time of user message.
       from_text:
         type: boolean
-        docs: Indicates if this message was constructed from a text input message.
+        docs: >-
+          Indicates if this message was inserted into the conversation as text
+          from a [User
+          Input](/reference/empathic-voice-interface-evi/chat/chat#send.User%20Input.text)
+          message.
+  JsonMessage:
+    discriminated: false
+    union:
+      - AssistantMessage
+      - ChatMetadata
+      - WebSocketError
+      - UserInterruption
+      - UserMessage
+      - ToolCallMessage
+      - ToolResponseMessage
+      - ToolErrorMessage
   TtsInput:
     properties:
       type: optional<literal<"tts">>
@@ -1253,8 +1676,12 @@ types:
   VoiceArgs:
     properties:
       voice: optional<VoiceNameEnum>
-      baseline: optional<boolean>
-      reconstruct: optional<boolean>
+      baseline:
+        type: optional<boolean>
+        default: false
+      reconstruct:
+        type: optional<boolean>
+        default: false
   VoiceNameEnum:
     enum:
       - ITO