hume 0.8.1-beta6 → 0.8.1-beta8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/.mock/definition/empathic-voice/__package__.yml +498 -71
  2. package/.mock/definition/empathic-voice/configs.yml +40 -0
  3. package/.mock/definition/expression-measurement/__package__.yml +13 -11
  4. package/.mock/definition/expression-measurement/stream.yml +4 -0
  5. package/.mock/fern.config.json +1 -1
  6. package/Client.d.ts +1 -0
  7. package/api/resources/empathicVoice/client/Client.d.ts +1 -0
  8. package/api/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +1 -0
  9. package/api/resources/empathicVoice/resources/configs/client/requests/PostedConfigVersion.d.ts +1 -0
  10. package/api/resources/empathicVoice/types/AssistantEnd.d.ts +5 -1
  11. package/api/resources/empathicVoice/types/AssistantInput.d.ts +6 -2
  12. package/api/resources/empathicVoice/types/AssistantMessage.d.ts +7 -3
  13. package/api/resources/empathicVoice/types/AudioConfiguration.d.ts +3 -3
  14. package/api/resources/empathicVoice/types/AudioInput.d.ts +12 -2
  15. package/api/resources/empathicVoice/types/AudioOutput.d.ts +2 -2
  16. package/api/resources/empathicVoice/types/BuiltInTool.d.ts +3 -0
  17. package/api/resources/empathicVoice/types/BuiltinToolConfig.d.ts +1 -0
  18. package/api/resources/empathicVoice/types/ChatMetadata.d.ts +13 -3
  19. package/api/resources/empathicVoice/types/Context.d.ts +16 -2
  20. package/api/resources/empathicVoice/types/Inference.d.ts +5 -1
  21. package/api/resources/empathicVoice/types/JsonMessage.d.ts +5 -0
  22. package/api/resources/empathicVoice/types/JsonMessage.js +5 -0
  23. package/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +5 -1
  24. package/api/resources/empathicVoice/types/PostedTimeoutSpecs.d.ts +12 -2
  25. package/api/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +18 -0
  26. package/api/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.js +5 -0
  27. package/api/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +18 -0
  28. package/api/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.js +5 -0
  29. package/api/resources/empathicVoice/types/ProsodyInference.d.ts +5 -0
  30. package/api/resources/empathicVoice/types/ResumeAssistantMessage.d.ts +5 -1
  31. package/api/resources/empathicVoice/types/ReturnConfig.d.ts +1 -0
  32. package/api/resources/empathicVoice/types/SessionSettings.d.ts +52 -8
  33. package/api/resources/empathicVoice/types/Tool.d.ts +9 -5
  34. package/api/resources/empathicVoice/types/ToolCallMessage.d.ts +17 -5
  35. package/api/resources/empathicVoice/types/ToolErrorMessage.d.ts +14 -6
  36. package/api/resources/empathicVoice/types/ToolResponseMessage.d.ts +17 -3
  37. package/api/resources/empathicVoice/types/UserInput.d.ts +6 -2
  38. package/api/resources/empathicVoice/types/UserInterruption.d.ts +5 -1
  39. package/api/resources/empathicVoice/types/UserMessage.d.ts +6 -2
  40. package/api/resources/empathicVoice/types/WebSocketError.d.ts +8 -4
  41. package/api/resources/empathicVoice/types/index.d.ts +3 -0
  42. package/api/resources/empathicVoice/types/index.js +3 -0
  43. package/dist/Client.d.ts +1 -0
  44. package/dist/api/resources/empathicVoice/client/Client.d.ts +1 -0
  45. package/dist/api/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +1 -0
  46. package/dist/api/resources/empathicVoice/resources/configs/client/requests/PostedConfigVersion.d.ts +1 -0
  47. package/dist/api/resources/empathicVoice/types/AssistantEnd.d.ts +5 -1
  48. package/dist/api/resources/empathicVoice/types/AssistantInput.d.ts +6 -2
  49. package/dist/api/resources/empathicVoice/types/AssistantMessage.d.ts +7 -3
  50. package/dist/api/resources/empathicVoice/types/AudioConfiguration.d.ts +3 -3
  51. package/dist/api/resources/empathicVoice/types/AudioInput.d.ts +12 -2
  52. package/dist/api/resources/empathicVoice/types/AudioOutput.d.ts +2 -2
  53. package/dist/api/resources/empathicVoice/types/BuiltInTool.d.ts +3 -0
  54. package/dist/api/resources/empathicVoice/types/BuiltinToolConfig.d.ts +1 -0
  55. package/dist/api/resources/empathicVoice/types/ChatMetadata.d.ts +13 -3
  56. package/dist/api/resources/empathicVoice/types/Context.d.ts +16 -2
  57. package/dist/api/resources/empathicVoice/types/Inference.d.ts +5 -1
  58. package/dist/api/resources/empathicVoice/types/JsonMessage.d.ts +5 -0
  59. package/dist/api/resources/empathicVoice/types/JsonMessage.js +5 -0
  60. package/dist/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +5 -1
  61. package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecs.d.ts +12 -2
  62. package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +18 -0
  63. package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.js +5 -0
  64. package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +18 -0
  65. package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.js +5 -0
  66. package/dist/api/resources/empathicVoice/types/ProsodyInference.d.ts +5 -0
  67. package/dist/api/resources/empathicVoice/types/ResumeAssistantMessage.d.ts +5 -1
  68. package/dist/api/resources/empathicVoice/types/ReturnConfig.d.ts +1 -0
  69. package/dist/api/resources/empathicVoice/types/SessionSettings.d.ts +52 -8
  70. package/dist/api/resources/empathicVoice/types/Tool.d.ts +9 -5
  71. package/dist/api/resources/empathicVoice/types/ToolCallMessage.d.ts +17 -5
  72. package/dist/api/resources/empathicVoice/types/ToolErrorMessage.d.ts +14 -6
  73. package/dist/api/resources/empathicVoice/types/ToolResponseMessage.d.ts +17 -3
  74. package/dist/api/resources/empathicVoice/types/UserInput.d.ts +6 -2
  75. package/dist/api/resources/empathicVoice/types/UserInterruption.d.ts +5 -1
  76. package/dist/api/resources/empathicVoice/types/UserMessage.d.ts +6 -2
  77. package/dist/api/resources/empathicVoice/types/WebSocketError.d.ts +8 -4
  78. package/dist/api/resources/empathicVoice/types/index.d.ts +3 -0
  79. package/dist/api/resources/empathicVoice/types/index.js +3 -0
  80. package/dist/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +2 -0
  81. package/dist/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfig.js +2 -0
  82. package/dist/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfigVersion.d.ts +2 -0
  83. package/dist/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfigVersion.js +2 -0
  84. package/dist/serialization/resources/empathicVoice/types/JsonMessage.d.ts +18 -0
  85. package/dist/serialization/resources/empathicVoice/types/JsonMessage.js +48 -0
  86. package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecs.d.ts +4 -3
  87. package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecs.js +4 -3
  88. package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +13 -0
  89. package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.js +34 -0
  90. package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +13 -0
  91. package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.js +34 -0
  92. package/dist/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +2 -0
  93. package/dist/serialization/resources/empathicVoice/types/ReturnConfig.js +2 -0
  94. package/dist/serialization/resources/empathicVoice/types/index.d.ts +3 -0
  95. package/dist/serialization/resources/empathicVoice/types/index.js +3 -0
  96. package/package.json +1 -1
  97. package/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +2 -0
  98. package/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfig.js +2 -0
  99. package/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfigVersion.d.ts +2 -0
  100. package/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfigVersion.js +2 -0
  101. package/serialization/resources/empathicVoice/types/JsonMessage.d.ts +18 -0
  102. package/serialization/resources/empathicVoice/types/JsonMessage.js +48 -0
  103. package/serialization/resources/empathicVoice/types/PostedTimeoutSpecs.d.ts +4 -3
  104. package/serialization/resources/empathicVoice/types/PostedTimeoutSpecs.js +4 -3
  105. package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +13 -0
  106. package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.js +34 -0
  107. package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +13 -0
  108. package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.js +34 -0
  109. package/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +2 -0
  110. package/serialization/resources/empathicVoice/types/ReturnConfig.js +2 -0
  111. package/serialization/resources/empathicVoice/types/index.d.ts +3 -0
  112. package/serialization/resources/empathicVoice/types/index.js +3 -0
@@ -209,11 +209,77 @@ types:
209
209
  duration_secs:
210
210
  type: optional<integer>
211
211
  docs: Duration in seconds for the timeout.
212
+ PostedTimeoutSpecsInactivity:
213
+ docs: >-
214
+ Specifies the duration of user inactivity (in seconds) after which the EVI
215
+ WebSocket connection will be automatically disconnected. Default is 600
216
+ seconds (10 minutes).
217
+
218
+
219
+ Accepts a minimum value of 1 second and a maximum value of 1,800 seconds.
220
+ properties:
221
+ enabled:
222
+ type: boolean
223
+ docs: >-
224
+ Boolean indicating if this timeout is enabled.
225
+
226
+
227
+ If set to `false`, EVI will not timeout due to a specified duration of
228
+ user inactivity being reached. However, the conversation will
229
+ eventually disconnect after 1,800 seconds (30 minutes), which is the
230
+ maximum WebSocket duration limit for EVI.
231
+ duration_secs:
232
+ type: optional<integer>
233
+ docs: >-
234
+ Duration in seconds for the timeout (e.g. 600 seconds represents 10
235
+ minutes).
236
+ PostedTimeoutSpecsMaxDuration:
237
+ docs: >-
238
+ Specifies the maximum allowed duration (in seconds) for an EVI WebSocket
239
+ connection before it is automatically disconnected. Default is 1,800
240
+ seconds (30 minutes).
241
+
242
+
243
+ Accepts a minimum value of 1 second and a maximum value of 1,800 seconds.
244
+ properties:
245
+ enabled:
246
+ type: boolean
247
+ docs: >-
248
+ Boolean indicating if this timeout is enabled.
249
+
250
+
251
+ If set to `false`, EVI will not timeout due to a specified maximum
252
+ duration being reached. However, the conversation will eventually
253
+ disconnect after 1,800 seconds (30 minutes), which is the maximum
254
+ WebSocket duration limit for EVI.
255
+ duration_secs:
256
+ type: optional<integer>
257
+ docs: >-
258
+ Duration in seconds for the timeout (e.g. 600 seconds represents 10
259
+ minutes).
212
260
  PostedTimeoutSpecs:
213
261
  docs: Collection of timeout specs to be posted to the server
214
262
  properties:
215
- inactivity: optional<PostedTimeoutSpec>
216
- max_duration: optional<PostedTimeoutSpec>
263
+ inactivity:
264
+ type: optional<PostedTimeoutSpecsInactivity>
265
+ docs: >-
266
+ Specifies the duration of user inactivity (in seconds) after which the
267
+ EVI WebSocket connection will be automatically disconnected. Default
268
+ is 600 seconds (10 minutes).
269
+
270
+
271
+ Accepts a minimum value of 1 second and a maximum value of 1,800
272
+ seconds.
273
+ max_duration:
274
+ type: optional<PostedTimeoutSpecsMaxDuration>
275
+ docs: >-
276
+ Specifies the maximum allowed duration (in seconds) for an EVI
277
+ WebSocket connection before it is automatically disconnected. Default
278
+ is 1,800 seconds (30 minutes).
279
+
280
+
281
+ Accepts a minimum value of 1 second and a maximum value of 1,800
282
+ seconds.
217
283
  PostedUserDefinedToolSpec:
218
284
  docs: A specific tool identifier to be posted to the server
219
285
  properties:
@@ -290,6 +356,7 @@ types:
290
356
  type: optional<list<optional<ReturnBuiltinTool>>>
291
357
  docs: List of built-in tools associated with this config
292
358
  event_messages: optional<ReturnEventMessageSpecs>
359
+ timeouts: optional<ReturnTimeoutSpecs>
293
360
  ReturnEllmModel:
294
361
  docs: A specific eLLM Model configuration
295
362
  properties:
@@ -742,8 +809,9 @@ types:
742
809
  type:
743
810
  type: literal<"assistant_input">
744
811
  docs: >-
745
- The type of message sent through the socket; for an Assistant Input
746
- message, this must be `assistant_input`.
812
+ The type of message sent through the socket; must be `assistant_input`
813
+ for our server to correctly identify and process it as an Assistant
814
+ Input message.
747
815
  custom_session_id:
748
816
  type: optional<string>
749
817
  docs: >-
@@ -751,26 +819,44 @@ types:
751
819
  data, and persist conversations across EVI sessions.
752
820
  text:
753
821
  type: string
754
- docs: Text to be synthesized.
822
+ docs: >-
823
+ Assistant text to synthesize into spoken audio and insert into the
824
+ conversation.
825
+
826
+
827
+ EVI uses this text to generate spoken audio using our proprietary
828
+ expressive text-to-speech model. Our model adds appropriate emotional
829
+ inflections and tones to the text based on the user’s expressions and
830
+ the context of the conversation. The synthesized audio is streamed
831
+ back to the user as an [Assistant
832
+ Message](/reference/empathic-voice-interface-evi/chat/chat#receive.Assistant%20Message.type).
755
833
  AudioConfiguration:
756
834
  properties:
757
835
  encoding:
758
836
  type: Encoding
759
- docs: Audio encoding.
837
+ docs: Encoding format of the audio input, such as `linear16`.
760
838
  channels:
761
839
  type: integer
762
- docs: Number of channels.
840
+ docs: Number of audio channels.
763
841
  sample_rate:
764
842
  type: integer
765
- docs: Audio sample rate.
843
+ docs: >-
844
+ Audio sample rate. Number of samples per second in the audio input,
845
+ measured in Hertz.
766
846
  AudioInput:
767
847
  docs: When provided, the input is audio.
768
848
  properties:
769
849
  type:
770
850
  type: literal<"audio_input">
771
851
  docs: >-
772
- The type of message sent through the socket; for an Audio Input
773
- message, this must be `audio_input`.
852
+ The type of message sent through the socket; must be `audio_input` for
853
+ our server to correctly identify and process it as an Audio Input
854
+ message.
855
+
856
+
857
+ This message is used for sending audio input data to EVI for
858
+ processing and expression measurement. Audio data should be sent as a
859
+ continuous stream, encoded in Base64.
774
860
  custom_session_id:
775
861
  type: optional<string>
776
862
  docs: >-
@@ -778,22 +864,68 @@ types:
778
864
  data, and persist conversations across EVI sessions.
779
865
  data:
780
866
  type: string
781
- docs: Base64 encoded audio input.
782
- validation:
783
- format: base64
784
- BuiltInTool: literal<"web_search">
867
+ docs: >-
868
+ Base64 encoded audio input to insert into the conversation.
869
+
870
+
871
+ The audio input must be captured and transmitted to EVI as a
872
+ continuous stream, with the audio data sent in small chunks for better
873
+ transcription quality. When capturing audio through the browser, we
874
+ recommend recording the audio in 100ms intervals and adjusting from
875
+ there to determine if smaller or larger chunks are needed. These
876
+ chunks should be continuously sent to EVI as Audio Input messages.
877
+
878
+
879
+ The content of an Audio Input message is treated as the user’s speech
880
+ to EVI. EVI processes the audio, conducts expression measurement using
881
+ the prosody model, and responds accordingly.
882
+ BuiltInTool:
883
+ type: literal<"web_search">
884
+ docs: >-
885
+ Name of the built-in tool. Set to `web_search` to equip EVI with the
886
+ built-in Web Search tool.
785
887
  BuiltinToolConfig:
786
888
  properties:
787
889
  name: BuiltInTool
788
- fallback_content: optional<string>
890
+ fallback_content:
891
+ type: optional<string>
892
+ docs: >-
893
+ Optional text passed to the supplemental LLM if the tool call fails.
894
+ The LLM then uses this text to generate a response back to the user,
895
+ ensuring continuity in the conversation.
789
896
  Context:
790
897
  properties:
791
898
  type:
792
899
  type: optional<ContextType>
793
- docs: The persistence level of the injected context.
900
+ docs: >-
901
+ The persistence level of the injected context. Specifies how long the
902
+ injected context will remain active in the session.
903
+
904
+
905
+ There are three possible context types:
906
+
907
+
908
+ - **Persistent**: The context is appended to all user messages for the
909
+ duration of the session.
910
+
911
+
912
+ - **Temporary**: The context is appended only to the next user
913
+ message.
914
+
915
+ - **Editable**: The original context is updated to reflect the new context.
794
916
  text:
795
917
  type: string
796
- docs: User context to inject.
918
+ docs: >-
919
+ The context to be injected into the conversation. Helps inform the
920
+ LLM's response by providing relevant information about the ongoing
921
+ conversation.
922
+
923
+
924
+ This text will be appended to the end of user messages based on the
925
+ chosen persistence level. For example, if you want to remind EVI of
926
+ its role as a helpful weather assistant, the context you insert will
927
+ be appended to the end of user messages as `{Context: You are a
928
+ helpful weather assistant}`.
797
929
  ContextType:
798
930
  enum:
799
931
  - editable
@@ -809,8 +941,16 @@ types:
809
941
  type:
810
942
  type: literal<"pause_assistant_message">
811
943
  docs: >-
812
- The type of message sent through the socket; for a Pause Assistant
813
- message, this must be `pause_assistant_message`.
944
+ The type of message sent through the socket; must be
945
+ `pause_assistant_message` for our server to correctly identify and
946
+ process it as a Pause Assistant message.
947
+
948
+
949
+ Once this message is sent, EVI will not respond until a [Resume
950
+ Assistant
951
+ message](/reference/empathic-voice-interface-evi/chat/chat#send.Resume%20Assistant%20Message.type)
952
+ is sent. When paused, EVI won’t respond, but transcriptions of your
953
+ audio inputs will still be recorded.
814
954
  custom_session_id:
815
955
  type: optional<string>
816
956
  docs: >-
@@ -824,8 +964,17 @@ types:
824
964
  type:
825
965
  type: literal<"resume_assistant_message">
826
966
  docs: >-
827
- The type of message sent through the socket; for a Resume Assistant
828
- message, this must be `resume_assistant_message`.
967
+ The type of message sent through the socket; must be
968
+ `resume_assistant_message` for our server to correctly identify and
969
+ process it as a Resume Assistant message.
970
+
971
+
972
+ Upon resuming, if any audio input was sent during the pause, EVI will
973
+ retain context from all messages sent but only respond to the last
974
+ user message. (e.g., If you ask EVI two questions while paused and
975
+ then send a `resume_assistant_message`, EVI will respond to the second
976
+ question and have added the first question to its conversation
977
+ context.)
829
978
  custom_session_id:
830
979
  type: optional<string>
831
980
  docs: >-
@@ -837,33 +986,123 @@ types:
837
986
  type:
838
987
  type: literal<"session_settings">
839
988
  docs: >-
840
- The type of message sent through the socket; for a Session Settings
841
- message, this must be `session_settings`.
989
+ The type of message sent through the socket; must be
990
+ `session_settings` for our server to correctly identify and process it
991
+ as a Session Settings message.
992
+
993
+
994
+ Session settings are temporary and apply only to the current Chat
995
+ session. These settings can be adjusted dynamically based on the
996
+ requirements of each session to ensure optimal performance and user
997
+ experience.
998
+
999
+
1000
+ For more information, please refer to the [Session Settings
1001
+ section](/docs/empathic-voice-interface-evi/configuration#session-settings)
1002
+ on the EVI Configuration page.
842
1003
  custom_session_id:
843
1004
  type: optional<string>
844
1005
  docs: >-
845
- Used to manage conversational state, correlate frontend and backend
846
- data, and persist conversations across EVI sessions.
1006
+ Unique identifier for the session. Used to manage conversational
1007
+ state, correlate frontend and backend data, and persist conversations
1008
+ across EVI sessions.
1009
+
1010
+
1011
+ If included, the response sent from Hume to your backend will include
1012
+ this ID. This allows you to correlate frontend users with their
1013
+ incoming messages.
1014
+
1015
+
1016
+ It is recommended to pass a `custom_session_id` if you are using a
1017
+ Custom Language Model. Please see our guide to [using a custom
1018
+ language
1019
+ model](/docs/empathic-voice-interface-evi/custom-language-model) with
1020
+ EVI to learn more.
847
1021
  system_prompt:
848
1022
  type: optional<string>
849
1023
  docs: >-
850
- Instructions for how the system should respond to the user. Set to
851
- null to use the default system prompt.
1024
+ Instructions used to shape EVI’s behavior, responses, and style for
1025
+ the session.
1026
+
1027
+
1028
+ When included in a Session Settings message, the provided Prompt
1029
+ overrides the existing one specified in the EVI configuration. If no
1030
+ Prompt was defined in the configuration, this Prompt will be the one
1031
+ used for the session.
1032
+
1033
+
1034
+ You can use the Prompt to define a specific goal or role for EVI,
1035
+ specifying how it should act or what it should focus on during the
1036
+ conversation. For example, EVI can be instructed to act as a customer
1037
+ support representative, a fitness coach, or a travel advisor, each
1038
+ with its own set of behaviors and response styles.
1039
+
1040
+
1041
+ For help writing a system prompt, see our [Prompting
1042
+ Guide](/docs/empathic-voice-interface-evi/prompting).
852
1043
  context:
853
1044
  type: optional<Context>
854
- docs: User context to inject. Set to null to disable context injection.
1045
+ docs: >-
1046
+ Allows developers to inject additional context into the conversation,
1047
+ which is appended to the end of user messages for the session.
1048
+
1049
+
1050
+ When included in a Session Settings message, the provided context can
1051
+ be used to remind the LLM of its role in every user message, prevent
1052
+ it from forgetting important details, or add new relevant information
1053
+ to the conversation.
1054
+
1055
+
1056
+ Set to `null` to disable context injection.
855
1057
  audio:
856
1058
  type: optional<AudioConfiguration>
857
- docs: Audio configuration.
1059
+ docs: >-
1060
+ Configuration details for the audio input used during the session.
1061
+ Ensures the audio is being correctly set up for processing.
1062
+
1063
+
1064
+ This optional field is only required when the audio input is encoded
1065
+ in PCM Linear 16 (16-bit, little-endian, signed PCM WAV data). For
1066
+ detailed instructions on how to configure session settings for PCM
1067
+ Linear 16 audio, please refer to the [Session Settings
1068
+ section](/docs/empathic-voice-interface-evi/configuration#session-settings)
1069
+ on the EVI Configuration page.
858
1070
  language_model_api_key:
859
1071
  type: optional<string>
860
- docs: Third party API key for the language model used for non-Hume models.
1072
+ docs: >-
1073
+ Third party API key for the supplemental language model.
1074
+
1075
+
1076
+ When provided, EVI will use this key instead of Hume’s API key for the
1077
+ supplemental LLM. This allows you to bypass rate limits and utilize
1078
+ your own API key as needed.
861
1079
  tools:
862
1080
  type: optional<list<Tool>>
863
- docs: List of tools to enable.
1081
+ docs: >-
1082
+ List of user-defined tools to enable for the session.
1083
+
1084
+
1085
+ Tools are resources used by EVI to perform various tasks, such as
1086
+ searching the web or calling external APIs. Built-in tools, like web
1087
+ search, are natively integrated, while user-defined tools are created
1088
+ and invoked by the user. To learn more, see our [Tool Use
1089
+ Guide](/docs/empathic-voice-interface-evi/tool-use).
864
1090
  builtin_tools:
865
1091
  type: optional<list<BuiltinToolConfig>>
866
- docs: List of builtin tools to enable.
1092
+ docs: >-
1093
+ List of built-in tools to enable for the session.
1094
+
1095
+
1096
+ Tools are resources used by EVI to perform various tasks, such as
1097
+ searching the web or calling external APIs. Built-in tools, like web
1098
+ search, are natively integrated, while user-defined tools are created
1099
+ and invoked by the user. To learn more, see our [Tool Use
1100
+ Guide](/docs/empathic-voice-interface-evi/tool-use).
1101
+
1102
+
1103
+ Currently, the only built-in tool Hume provides is **Web Search**.
1104
+ When enabled, Web Search equips EVI with the ability to search the web
1105
+ for up-to-date information.
867
1106
  metadata: optional<map<string, unknown>>
868
1107
  variables:
869
1108
  type: optional<map<string, string>>
@@ -872,21 +1111,31 @@ types:
872
1111
  properties:
873
1112
  type:
874
1113
  type: ToolType
875
- docs: Type of tool.
1114
+ docs: Type of tool. Set to `function` for user-defined tools.
876
1115
  name:
877
1116
  type: string
878
- docs: Name of the tool.
1117
+ docs: Name of the user-defined tool to be enabled.
879
1118
  parameters:
880
1119
  type: string
881
- docs: Parameters of the tool. Is a stringified JSON schema.
1120
+ docs: >-
1121
+ Parameters of the tool. Is a stringified JSON schema.
1122
+
1123
+
1124
+ These parameters define the inputs needed for the tool’s execution,
1125
+ including the expected data type and description for each input field.
1126
+ Structured as a JSON schema, this format ensures the tool receives
1127
+ data in the expected format.
882
1128
  description:
883
1129
  type: optional<string>
884
- docs: Description of the function.
1130
+ docs: >-
1131
+ An optional description of what the tool does, used by the
1132
+ supplemental LLM to choose when and how to call the function.
885
1133
  fallback_content:
886
1134
  type: optional<string>
887
1135
  docs: >-
888
- Fallback content of the tool, passed to the LLM if the function call
889
- response fails.
1136
+ Optional text passed to the supplemental LLM if the tool call fails.
1137
+ The LLM then uses this text to generate a response back to the user,
1138
+ ensuring continuity in the conversation.
890
1139
  ToolErrorMessage:
891
1140
  docs: When provided, the output is a function call error.
892
1141
  properties:
@@ -895,6 +1144,12 @@ types:
895
1144
  docs: >-
896
1145
  The type of message sent through the socket; for a Tool Error message,
897
1146
  this must be `tool_error`.
1147
+
1148
+
1149
+ Upon receiving a [Tool Call
1150
+ message](/reference/empathic-voice-interface-evi/chat/chat#receive.Tool%20Call%20Message.type)
1151
+ and failing to invoke the function, this message is sent to notify EVI
1152
+ of the tool's failure.
898
1153
  custom_session_id:
899
1154
  type: optional<string>
900
1155
  docs: >-
@@ -902,22 +1157,37 @@ types:
902
1157
  data, and persist conversations across EVI sessions.
903
1158
  tool_type:
904
1159
  type: optional<ToolType>
905
- docs: Type of tool called, either 'builtin' or 'function'.
1160
+ docs: >-
1161
+ Type of tool called. Either `builtin` for natively implemented tools,
1162
+ like web search, or `function` for user-defined tools.
906
1163
  tool_call_id:
907
1164
  type: string
908
- docs: ID of the tool call.
1165
+ docs: >-
1166
+ The unique identifier for a specific tool call instance.
1167
+
1168
+
1169
+ This ID is used to track the request and response of a particular tool
1170
+ invocation, ensuring that the Tool Error message is linked to the
1171
+ appropriate tool call request. The specified `tool_call_id` must match
1172
+ the one received in the [Tool Call
1173
+ message](/reference/empathic-voice-interface-evi/chat/chat#receive.Tool%20Call%20Message.type).
909
1174
  content:
910
1175
  type: optional<string>
911
- docs: The content passed to the LLM in place of the tool response.
1176
+ docs: >-
1177
+ Optional text passed to the supplemental LLM in place of the tool call
1178
+ result. The LLM then uses this text to generate a response back to the
1179
+ user, ensuring continuity in the conversation if the tool errors.
912
1180
  error:
913
1181
  type: string
914
1182
  docs: Error message from the tool call, not exposed to the LLM or user.
915
1183
  code:
916
1184
  type: optional<string>
917
- docs: Error code.
1185
+ docs: Error code. Identifies the type of error encountered.
918
1186
  level:
919
1187
  type: optional<ErrorLevel>
920
- docs: Error level.
1188
+ docs: >-
1189
+ Indicates the severity of an error; for a Tool Error message, this
1190
+ must be `warn` to signal an unexpected event.
921
1191
  ToolResponseMessage:
922
1192
  docs: When provided, the output is a function call response.
923
1193
  properties:
@@ -926,6 +1196,12 @@ types:
926
1196
  docs: >-
927
1197
  The type of message sent through the socket; for a Tool Response
928
1198
  message, this must be `tool_response`.
1199
+
1200
+
1201
+ Upon receiving a [Tool Call
1202
+ message](/reference/empathic-voice-interface-evi/chat/chat#receive.Tool%20Call%20Message.type)
1203
+ and successfully invoking the function, this message is sent to convey
1204
+ the result of the function call back to EVI.
929
1205
  custom_session_id:
930
1206
  type: optional<string>
931
1207
  docs: >-
@@ -933,12 +1209,35 @@ types:
933
1209
  data, and persist conversations across EVI sessions.
934
1210
  tool_call_id:
935
1211
  type: string
936
- docs: ID of the tool call.
1212
+ docs: >-
1213
+ The unique identifier for a specific tool call instance.
1214
+
1215
+
1216
+ This ID is used to track the request and response of a particular tool
1217
+ invocation, ensuring that the correct response is linked to the
1218
+ appropriate request. The specified `tool_call_id` must match the one
1219
+ received in the [Tool Call
1220
+ message](/reference/empathic-voice-interface-evi/chat/chat#receive.Tool%20Call%20Message.tool_call_id).
937
1221
  content:
938
1222
  type: string
939
- docs: Return value of the tool call.
940
- tool_name: optional<string>
941
- tool_type: optional<ToolType>
1223
+ docs: >-
1224
+ Return value of the tool call. Contains the output generated by the
1225
+ tool to pass back to EVI.
1226
+ tool_name:
1227
+ type: optional<string>
1228
+ docs: >-
1229
+ Name of the tool.
1230
+
1231
+
1232
+ Include this optional field to help the supplemental LLM identify
1233
+ which tool generated the response. The specified `tool_name` must
1234
+ match the one received in the [Tool Call
1235
+ message](/reference/empathic-voice-interface-evi/chat/chat#receive.Tool%20Call%20Message.type).
1236
+ tool_type:
1237
+ type: optional<ToolType>
1238
+ docs: >-
1239
+ Type of tool called. Either `builtin` for natively implemented tools,
1240
+ like web search, or `function` for user-defined tools.
942
1241
  ToolType:
943
1242
  enum:
944
1243
  - builtin
@@ -949,8 +1248,9 @@ types:
949
1248
  type:
950
1249
  type: literal<"user_input">
951
1250
  docs: >-
952
- The type of message sent through the socket; for a User Input message,
953
- this must be `user_input`.
1251
+ The type of message sent through the socket; must be `user_input` for
1252
+ our server to correctly identify and process it as a User Input
1253
+ message.
954
1254
  custom_session_id:
955
1255
  type: optional<string>
956
1256
  docs: >-
@@ -958,7 +1258,15 @@ types:
958
1258
  data, and persist conversations across EVI sessions.
959
1259
  text:
960
1260
  type: string
961
- docs: User text to insert into the conversation.
1261
+ docs: >-
1262
+ User text to insert into the conversation. Text sent through a User
1263
+ Input message is treated as the user’s speech to EVI. EVI processes
1264
+ this input and provides a corresponding response.
1265
+
1266
+
1267
+ Expression measurement results are not available for User Input
1268
+ messages, as the prosody model relies on audio input and cannot
1269
+ process text alone.
962
1270
  AssistantEnd:
963
1271
  docs: When provided, the output is an assistant end message.
964
1272
  properties:
@@ -967,6 +1275,11 @@ types:
967
1275
  docs: >-
968
1276
  The type of message sent through the socket; for an Assistant End
969
1277
  message, this must be `assistant_end`.
1278
+
1279
+
1280
+ This message indicates the conclusion of the assistant’s response,
1281
+ signaling that the assistant has finished speaking for the current
1282
+ conversational turn.
970
1283
  custom_session_id:
971
1284
  type: optional<string>
972
1285
  docs: >-
@@ -980,6 +1293,11 @@ types:
980
1293
  docs: >-
981
1294
  The type of message sent through the socket; for an Assistant Message,
982
1295
  this must be `assistant_message`.
1296
+
1297
+
1298
+ This message contains both a transcript of the assistant’s response
1299
+ and the expression measurement predictions of the assistant’s audio
1300
+ output.
983
1301
  custom_session_id:
984
1302
  type: optional<string>
985
1303
  docs: >-
@@ -987,7 +1305,9 @@ types:
987
1305
  data, and persist conversations across EVI sessions.
988
1306
  id:
989
1307
  type: optional<string>
990
- docs: ID of the assistant message.
1308
+ docs: >-
1309
+ ID of the assistant message. Allows the Assistant Message to be
1310
+ tracked and referenced.
991
1311
  message:
992
1312
  type: ChatMessage
993
1313
  docs: Transcript of the message.
@@ -996,7 +1316,10 @@ types:
996
1316
  docs: Inference model results.
997
1317
  from_text:
998
1318
  type: boolean
999
- docs: Indicates if this message was constructed from a text input message.
1319
+ docs: >-
1320
+ Indicates if this message was inserted into the conversation as text
1321
+ from an [Assistant Input
1322
+ message](/reference/empathic-voice-interface-evi/chat/chat#send.Assistant%20Input.text).
1000
1323
  AudioOutput:
1001
1324
  docs: When provided, the output is audio.
1002
1325
  properties:
@@ -1012,12 +1335,15 @@ types:
1012
1335
  data, and persist conversations across EVI sessions.
1013
1336
  id:
1014
1337
  type: string
1015
- docs: ID of the audio output.
1338
+ docs: >-
1339
+ ID of the audio output. Allows the Audio Output message to be tracked
1340
+ and referenced.
1016
1341
  data:
1017
1342
  type: string
1018
- docs: Base64 encoded audio output.
1019
- validation:
1020
- format: base64
1343
+ docs: >-
1344
+ Base64 encoded audio output. This encoded audio is transmitted to the
1345
+ client, where it can be decoded and played back as part of the user
1346
+ interaction.
1021
1347
  ChatMessageToolResult:
1022
1348
  discriminated: false
1023
1349
  docs: Function call response from client.
@@ -1046,6 +1372,11 @@ types:
1046
1372
  docs: >-
1047
1373
  The type of message sent through the socket; for a Chat Metadata
1048
1374
  message, this must be `chat_metadata`.
1375
+
1376
+
1377
+ The Chat Metadata message is the first message you receive after
1378
+ establishing a connection with EVI and contains important identifiers
1379
+ for the current Chat session.
1049
1380
  custom_session_id:
1050
1381
  type: optional<string>
1051
1382
  docs: >-
@@ -1053,10 +1384,25 @@ types:
1053
1384
  data, and persist conversations across EVI sessions.
1054
1385
  chat_group_id:
1055
1386
  type: string
1056
- docs: ID of the chat group. Used to resume a chat.
1387
+ docs: >-
1388
+ ID of the Chat Group.
1389
+
1390
+
1391
+ Used to resume a Chat when passed in the
1392
+ [resumed_chat_group_id](/reference/empathic-voice-interface-evi/chat/chat#request.query.resumed_chat_group_id)
1393
+ query parameter of a subsequent connection request. This allows EVI to
1394
+ continue the conversation from where it left off within the Chat
1395
+ Group.
1396
+
1397
+
1398
+ Learn more about [supporting chat
1399
+ resumability](/docs/empathic-voice-interface-evi/faq#does-evi-support-chat-resumability)
1400
+ from the EVI FAQ.
1057
1401
  chat_id:
1058
1402
  type: string
1059
- docs: ID of the chat.
1403
+ docs: >-
1404
+ ID of the Chat session. Allows the Chat session to be tracked and
1405
+ referenced.
1060
1406
  request_id:
1061
1407
  type: optional<string>
1062
1408
  docs: ID of the initiating request.
@@ -1118,6 +1464,11 @@ types:
1118
1464
  docs: >-
1119
1465
  The type of message sent through the socket; for a Web Socket Error
1120
1466
  message, this must be `error`.
1467
+
1468
+
1469
+ This message indicates a disruption in the WebSocket connection, such
1470
+ as an unexpected disconnection, protocol error, or data transmission
1471
+ issue.
1121
1472
  custom_session_id:
1122
1473
  type: optional<string>
1123
1474
  docs: >-
@@ -1125,18 +1476,27 @@ types:
1125
1476
  data, and persist conversations across EVI sessions.
1126
1477
  code:
1127
1478
  type: string
1128
- docs: Error code.
1479
+ docs: Error code. Identifies the type of error encountered.
1129
1480
  slug:
1130
1481
  type: string
1131
- docs: Error slug.
1482
+ docs: >-
1483
+ Short, human-readable identifier and description for the error. See a
1484
+ complete list of error slugs on the [Errors
1485
+ page](/docs/resources/errors).
1132
1486
  message:
1133
1487
  type: string
1134
- docs: Error message.
1488
+ docs: Detailed description of the error.
1135
1489
  Inference:
1136
1490
  properties:
1137
1491
  prosody:
1138
1492
  type: optional<ProsodyInference>
1139
- docs: Prosody model inference results.
1493
+ docs: >-
1494
+ Prosody model inference results.
1495
+
1496
+
1497
+ EVI uses the prosody model to measure 48 expressions related to speech
1498
+ and vocal characteristics. These results contain a detailed emotional
1499
+ and tonal analysis of the audio.
1140
1500
  MillisecondInterval:
1141
1501
  properties:
1142
1502
  begin:
@@ -1147,7 +1507,14 @@ types:
1147
1507
  docs: End time of the interval in milliseconds.
1148
1508
  ProsodyInference:
1149
1509
  properties:
1150
- scores: EmotionScores
1510
+ scores:
1511
+ type: EmotionScores
1512
+ docs: >-
1513
+ The confidence levels of 48 expressions in a given audio sample.
1514
+
1515
+
1516
+ Scores typically range from 0 to 1, with higher values indicating a
1517
+ stronger confidence level in the measured attribute.
1151
1518
  Role:
1152
1519
  enum:
1153
1520
  - assistant
@@ -1163,15 +1530,32 @@ types:
1163
1530
  docs: Name of the tool called.
1164
1531
  parameters:
1165
1532
  type: string
1166
- docs: Parameters of the tool call. Is a stringified JSON schema.
1533
+ docs: >-
1534
+ Parameters of the tool.
1535
+
1536
+
1537
+ These parameters define the inputs needed for the tool’s execution,
1538
+ including the expected data type and description for each input field.
1539
+ Structured as a stringified JSON schema, this format ensures the tool
1540
+ receives data in the expected format.
1167
1541
  tool_call_id:
1168
1542
  type: string
1169
- docs: ID of the tool call.
1543
+ docs: >-
1544
+ The unique identifier for a specific tool call instance.
1545
+
1546
+
1547
+ This ID is used to track the request and response of a particular tool
1548
+ invocation, ensuring that the correct response is linked to the
1549
+ appropriate request.
1170
1550
  type:
1171
1551
  type: literal<"tool_call">
1172
1552
  docs: >-
1173
1553
  The type of message sent through the socket; for a Tool Call message,
1174
1554
  this must be `tool_call`.
1555
+
1556
+
1557
+ This message indicates that the supplemental LLM has detected a need
1558
+ to invoke the specified tool.
1175
1559
  custom_session_id:
1176
1560
  type: optional<string>
1177
1561
  docs: >-
@@ -1179,10 +1563,17 @@ types:
1179
1563
  data, and persist conversations across EVI sessions.
1180
1564
  tool_type:
1181
1565
  type: optional<ToolType>
1182
- docs: Type of tool called, either 'builtin' or 'function'.
1566
+ docs: >-
1567
+ Type of tool called. Either `builtin` for natively implemented tools,
1568
+ like web search, or `function` for user-defined tools.
1183
1569
  response_required:
1184
1570
  type: boolean
1185
- docs: Whether a response is required from the developer.
1571
+ docs: >-
1572
+ Indicates whether a response to the tool call is required from the
1573
+ developer, either in the form of a [Tool Response
1574
+ message](/reference/empathic-voice-interface-evi/chat/chat#send.Tool%20Response%20Message.type)
1575
+ or a [Tool Error
1576
+ message](/reference/empathic-voice-interface-evi/chat/chat#send.Tool%20Error%20Message.type).
1186
1577
  UserInterruption:
1187
1578
  docs: When provided, the output is an interruption.
1188
1579
  properties:
@@ -1191,6 +1582,13 @@ types:
1191
1582
  docs: >-
1192
1583
  The type of message sent through the socket; for a User Interruption
1193
1584
  message, this must be `user_interruption`.
1585
+
1586
+
1587
+ This message indicates the user has interrupted the assistant’s
1588
+ response. EVI detects the interruption in real-time and sends this
1589
+ message to signal the interruption event. This message allows the
1590
+ system to stop the current audio playback, clear the audio queue, and
1591
+ prepare to handle new user input.
1194
1592
  custom_session_id:
1195
1593
  type: optional<string>
1196
1594
  docs: >-
@@ -1205,8 +1603,18 @@ types:
1205
1603
  type:
1206
1604
  type: literal<"user_message">
1207
1605
  docs: >-
1208
- The type of message sent through the socket; for a User message, this
1606
+ The type of message sent through the socket; for a User Message, this
1209
1607
  must be `user_message`.
1608
+
1609
+
1610
+ This message contains both a transcript of the user’s input and the
1611
+ expression measurement predictions if the input was sent as an [Audio
1612
+ Input
1613
+ message](/reference/empathic-voice-interface-evi/chat/chat#send.Audio%20Input.type).
1614
+ Expression measurement predictions are not provided for a [User Input
1615
+ message](/reference/empathic-voice-interface-evi/chat/chat#send.User%20Input.type),
1616
+ as the prosody model relies on audio input and cannot process text
1617
+ alone.
1210
1618
  custom_session_id:
1211
1619
  type: optional<string>
1212
1620
  docs: >-
@@ -1223,7 +1631,22 @@ types:
1223
1631
  docs: Start and End time of user message.
1224
1632
  from_text:
1225
1633
  type: boolean
1226
- docs: Indicates if this message was constructed from a text input message.
1634
+ docs: >-
1635
+ Indicates if this message was inserted into the conversation as text
1636
+ from a [User
1637
+ Input](/reference/empathic-voice-interface-evi/chat/chat#send.User%20Input.text)
1638
+ message.
1639
+ JsonMessage:
1640
+ discriminated: false
1641
+ union:
1642
+ - AssistantMessage
1643
+ - ChatMetadata
1644
+ - WebSocketError
1645
+ - UserInterruption
1646
+ - UserMessage
1647
+ - ToolCallMessage
1648
+ - ToolResponseMessage
1649
+ - ToolErrorMessage
1227
1650
  TtsInput:
1228
1651
  properties:
1229
1652
  type: optional<literal<"tts">>
@@ -1253,8 +1676,12 @@ types:
1253
1676
  VoiceArgs:
1254
1677
  properties:
1255
1678
  voice: optional<VoiceNameEnum>
1256
- baseline: optional<boolean>
1257
- reconstruct: optional<boolean>
1679
+ baseline:
1680
+ type: optional<boolean>
1681
+ default: false
1682
+ reconstruct:
1683
+ type: optional<boolean>
1684
+ default: false
1258
1685
  VoiceNameEnum:
1259
1686
  enum:
1260
1687
  - ITO