hume 0.8.1-beta1 → 0.8.1-beta10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mock/definition/empathic-voice/__package__.yml +499 -72
- package/.mock/definition/empathic-voice/chat.yml +81 -4
- package/.mock/definition/empathic-voice/configs.yml +40 -0
- package/.mock/definition/expression-measurement/__package__.yml +13 -11
- package/.mock/definition/expression-measurement/stream.yml +4 -0
- package/.mock/fern.config.json +1 -1
- package/Client.d.ts +4 -3
- package/Client.js +6 -6
- package/api/resources/empathicVoice/client/Client.d.ts +1 -0
- package/api/resources/empathicVoice/client/index.d.ts +1 -0
- package/api/resources/empathicVoice/client/index.js +2 -0
- package/api/resources/empathicVoice/index.d.ts +1 -0
- package/api/resources/empathicVoice/index.js +1 -0
- package/api/resources/empathicVoice/resources/chat/client/Client.d.ts +6 -0
- package/api/resources/empathicVoice/resources/chat/client/Client.js +12 -3
- package/api/resources/empathicVoice/resources/chat/client/Socket.d.ts +8 -0
- package/api/resources/empathicVoice/resources/chat/client/Socket.js +57 -1
- package/api/resources/empathicVoice/resources/chat/index.d.ts +1 -0
- package/api/resources/empathicVoice/resources/chat/index.js +1 -0
- package/api/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +1 -0
- package/api/resources/empathicVoice/resources/configs/client/requests/PostedConfigVersion.d.ts +1 -0
- package/api/resources/empathicVoice/types/AssistantEnd.d.ts +6 -2
- package/api/resources/empathicVoice/types/AssistantInput.d.ts +6 -2
- package/api/resources/empathicVoice/types/AssistantMessage.d.ts +7 -3
- package/api/resources/empathicVoice/types/AudioConfiguration.d.ts +3 -3
- package/api/resources/empathicVoice/types/AudioInput.d.ts +12 -2
- package/api/resources/empathicVoice/types/AudioOutput.d.ts +2 -2
- package/api/resources/empathicVoice/types/BuiltInTool.d.ts +3 -0
- package/api/resources/empathicVoice/types/BuiltinToolConfig.d.ts +1 -0
- package/api/resources/empathicVoice/types/ChatMetadata.d.ts +13 -3
- package/api/resources/empathicVoice/types/Context.d.ts +16 -2
- package/api/resources/empathicVoice/types/Inference.d.ts +5 -1
- package/api/resources/empathicVoice/types/JsonMessage.d.ts +5 -0
- package/api/resources/empathicVoice/types/JsonMessage.js +5 -0
- package/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +5 -1
- package/api/resources/empathicVoice/types/PostedTimeoutSpecs.d.ts +12 -2
- package/api/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +18 -0
- package/api/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.js +5 -0
- package/api/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +18 -0
- package/api/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.js +5 -0
- package/api/resources/empathicVoice/types/ProsodyInference.d.ts +5 -0
- package/api/resources/empathicVoice/types/ResumeAssistantMessage.d.ts +5 -1
- package/api/resources/empathicVoice/types/ReturnConfig.d.ts +1 -0
- package/api/resources/empathicVoice/types/SessionSettings.d.ts +52 -8
- package/api/resources/empathicVoice/types/Tool.d.ts +9 -5
- package/api/resources/empathicVoice/types/ToolCallMessage.d.ts +17 -5
- package/api/resources/empathicVoice/types/ToolErrorMessage.d.ts +14 -6
- package/api/resources/empathicVoice/types/ToolResponseMessage.d.ts +17 -3
- package/api/resources/empathicVoice/types/UserInput.d.ts +6 -2
- package/api/resources/empathicVoice/types/UserInterruption.d.ts +5 -1
- package/api/resources/empathicVoice/types/UserMessage.d.ts +6 -2
- package/api/resources/empathicVoice/types/WebSocketError.d.ts +8 -4
- package/api/resources/empathicVoice/types/index.d.ts +3 -0
- package/api/resources/empathicVoice/types/index.js +3 -0
- package/api/resources/index.d.ts +1 -1
- package/api/resources/index.js +2 -2
- package/core/websocket/ws.js +6 -0
- package/dist/Client.d.ts +4 -3
- package/dist/Client.js +6 -6
- package/dist/api/resources/empathicVoice/client/Client.d.ts +1 -0
- package/dist/api/resources/empathicVoice/client/index.d.ts +1 -0
- package/dist/api/resources/empathicVoice/client/index.js +2 -0
- package/dist/api/resources/empathicVoice/index.d.ts +1 -0
- package/dist/api/resources/empathicVoice/index.js +1 -0
- package/dist/api/resources/empathicVoice/resources/chat/client/Client.d.ts +6 -0
- package/dist/api/resources/empathicVoice/resources/chat/client/Client.js +12 -3
- package/dist/api/resources/empathicVoice/resources/chat/client/Socket.d.ts +8 -0
- package/dist/api/resources/empathicVoice/resources/chat/client/Socket.js +57 -1
- package/dist/api/resources/empathicVoice/resources/chat/index.d.ts +1 -0
- package/dist/api/resources/empathicVoice/resources/chat/index.js +1 -0
- package/dist/api/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +1 -0
- package/dist/api/resources/empathicVoice/resources/configs/client/requests/PostedConfigVersion.d.ts +1 -0
- package/dist/api/resources/empathicVoice/types/AssistantEnd.d.ts +6 -2
- package/dist/api/resources/empathicVoice/types/AssistantInput.d.ts +6 -2
- package/dist/api/resources/empathicVoice/types/AssistantMessage.d.ts +7 -3
- package/dist/api/resources/empathicVoice/types/AudioConfiguration.d.ts +3 -3
- package/dist/api/resources/empathicVoice/types/AudioInput.d.ts +12 -2
- package/dist/api/resources/empathicVoice/types/AudioOutput.d.ts +2 -2
- package/dist/api/resources/empathicVoice/types/BuiltInTool.d.ts +3 -0
- package/dist/api/resources/empathicVoice/types/BuiltinToolConfig.d.ts +1 -0
- package/dist/api/resources/empathicVoice/types/ChatMetadata.d.ts +13 -3
- package/dist/api/resources/empathicVoice/types/Context.d.ts +16 -2
- package/dist/api/resources/empathicVoice/types/Inference.d.ts +5 -1
- package/dist/api/resources/empathicVoice/types/JsonMessage.d.ts +5 -0
- package/dist/api/resources/empathicVoice/types/JsonMessage.js +5 -0
- package/dist/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +5 -1
- package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecs.d.ts +12 -2
- package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +18 -0
- package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.js +5 -0
- package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +18 -0
- package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.js +5 -0
- package/dist/api/resources/empathicVoice/types/ProsodyInference.d.ts +5 -0
- package/dist/api/resources/empathicVoice/types/ResumeAssistantMessage.d.ts +5 -1
- package/dist/api/resources/empathicVoice/types/ReturnConfig.d.ts +1 -0
- package/dist/api/resources/empathicVoice/types/SessionSettings.d.ts +52 -8
- package/dist/api/resources/empathicVoice/types/Tool.d.ts +9 -5
- package/dist/api/resources/empathicVoice/types/ToolCallMessage.d.ts +17 -5
- package/dist/api/resources/empathicVoice/types/ToolErrorMessage.d.ts +14 -6
- package/dist/api/resources/empathicVoice/types/ToolResponseMessage.d.ts +17 -3
- package/dist/api/resources/empathicVoice/types/UserInput.d.ts +6 -2
- package/dist/api/resources/empathicVoice/types/UserInterruption.d.ts +5 -1
- package/dist/api/resources/empathicVoice/types/UserMessage.d.ts +6 -2
- package/dist/api/resources/empathicVoice/types/WebSocketError.d.ts +8 -4
- package/dist/api/resources/empathicVoice/types/index.d.ts +3 -0
- package/dist/api/resources/empathicVoice/types/index.js +3 -0
- package/dist/api/resources/index.d.ts +1 -1
- package/dist/api/resources/index.js +2 -2
- package/dist/core/websocket/ws.js +6 -0
- package/dist/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +2 -0
- package/dist/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfig.js +2 -0
- package/dist/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfigVersion.d.ts +2 -0
- package/dist/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfigVersion.js +2 -0
- package/dist/serialization/resources/empathicVoice/types/AssistantEnd.d.ts +1 -1
- package/dist/serialization/resources/empathicVoice/types/AssistantEnd.js +1 -1
- package/dist/serialization/resources/empathicVoice/types/JsonMessage.d.ts +18 -0
- package/dist/serialization/resources/empathicVoice/types/JsonMessage.js +48 -0
- package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecs.d.ts +4 -3
- package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecs.js +4 -3
- package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +13 -0
- package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.js +34 -0
- package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +13 -0
- package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.js +34 -0
- package/dist/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +2 -0
- package/dist/serialization/resources/empathicVoice/types/ReturnConfig.js +2 -0
- package/dist/serialization/resources/empathicVoice/types/index.d.ts +3 -0
- package/dist/serialization/resources/empathicVoice/types/index.js +3 -0
- package/dist/serialization/resources/index.d.ts +1 -1
- package/dist/serialization/resources/index.js +2 -2
- package/dist/wrapper/checkForAudioTracks.d.ts +8 -0
- package/dist/wrapper/checkForAudioTracks.js +24 -0
- package/dist/wrapper/fetchAccessToken.d.ts +23 -0
- package/dist/wrapper/fetchAccessToken.js +51 -0
- package/dist/wrapper/index.d.ts +2 -0
- package/dist/wrapper/index.js +5 -1
- package/package.json +1 -1
- package/reference.md +444 -444
- package/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +2 -0
- package/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfig.js +2 -0
- package/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfigVersion.d.ts +2 -0
- package/serialization/resources/empathicVoice/resources/configs/client/requests/PostedConfigVersion.js +2 -0
- package/serialization/resources/empathicVoice/types/AssistantEnd.d.ts +1 -1
- package/serialization/resources/empathicVoice/types/AssistantEnd.js +1 -1
- package/serialization/resources/empathicVoice/types/JsonMessage.d.ts +18 -0
- package/serialization/resources/empathicVoice/types/JsonMessage.js +48 -0
- package/serialization/resources/empathicVoice/types/PostedTimeoutSpecs.d.ts +4 -3
- package/serialization/resources/empathicVoice/types/PostedTimeoutSpecs.js +4 -3
- package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +13 -0
- package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.js +34 -0
- package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +13 -0
- package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.js +34 -0
- package/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +2 -0
- package/serialization/resources/empathicVoice/types/ReturnConfig.js +2 -0
- package/serialization/resources/empathicVoice/types/index.d.ts +3 -0
- package/serialization/resources/empathicVoice/types/index.js +3 -0
- package/serialization/resources/index.d.ts +1 -1
- package/serialization/resources/index.js +2 -2
- package/wrapper/checkForAudioTracks.d.ts +8 -0
- package/wrapper/checkForAudioTracks.js +24 -0
- package/wrapper/fetchAccessToken.d.ts +23 -0
- package/wrapper/fetchAccessToken.js +51 -0
- package/wrapper/index.d.ts +2 -0
- package/wrapper/index.js +5 -1
|
@@ -209,11 +209,77 @@ types:
|
|
|
209
209
|
duration_secs:
|
|
210
210
|
type: optional<integer>
|
|
211
211
|
docs: Duration in seconds for the timeout.
|
|
212
|
+
PostedTimeoutSpecsInactivity:
|
|
213
|
+
docs: >-
|
|
214
|
+
Specifies the duration of user inactivity (in seconds) after which the EVI
|
|
215
|
+
WebSocket connection will be automatically disconnected. Default is 600
|
|
216
|
+
seconds (10 minutes).
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
Accepts a minimum value of 1 second and a maximum value of 1,800 seconds.
|
|
220
|
+
properties:
|
|
221
|
+
enabled:
|
|
222
|
+
type: boolean
|
|
223
|
+
docs: >-
|
|
224
|
+
Boolean indicating if this timeout is enabled.
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
If set to `false`, EVI will not timeout due to a specified duration of
|
|
228
|
+
user inactivity being reached. However, the conversation will
|
|
229
|
+
eventually disconnect after 1,800 seconds (30 minutes), which is the
|
|
230
|
+
maximum WebSocket duration limit for EVI.
|
|
231
|
+
duration_secs:
|
|
232
|
+
type: optional<integer>
|
|
233
|
+
docs: >-
|
|
234
|
+
Duration in seconds for the timeout (e.g. 600 seconds represents 10
|
|
235
|
+
minutes).
|
|
236
|
+
PostedTimeoutSpecsMaxDuration:
|
|
237
|
+
docs: >-
|
|
238
|
+
Specifies the maximum allowed duration (in seconds) for an EVI WebSocket
|
|
239
|
+
connection before it is automatically disconnected. Default is 1,800
|
|
240
|
+
seconds (30 minutes).
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
Accepts a minimum value of 1 second and a maximum value of 1,800 seconds.
|
|
244
|
+
properties:
|
|
245
|
+
enabled:
|
|
246
|
+
type: boolean
|
|
247
|
+
docs: >-
|
|
248
|
+
Boolean indicating if this timeout is enabled.
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
If set to `false`, EVI will not timeout due to a specified maximum
|
|
252
|
+
duration being reached. However, the conversation will eventually
|
|
253
|
+
disconnect after 1,800 seconds (30 minutes), which is the maximum
|
|
254
|
+
WebSocket duration limit for EVI.
|
|
255
|
+
duration_secs:
|
|
256
|
+
type: optional<integer>
|
|
257
|
+
docs: >-
|
|
258
|
+
Duration in seconds for the timeout (e.g. 600 seconds represents 10
|
|
259
|
+
minutes).
|
|
212
260
|
PostedTimeoutSpecs:
|
|
213
261
|
docs: Collection of timeout specs to be posted to the server
|
|
214
262
|
properties:
|
|
215
|
-
inactivity:
|
|
216
|
-
|
|
263
|
+
inactivity:
|
|
264
|
+
type: optional<PostedTimeoutSpecsInactivity>
|
|
265
|
+
docs: >-
|
|
266
|
+
Specifies the duration of user inactivity (in seconds) after which the
|
|
267
|
+
EVI WebSocket connection will be automatically disconnected. Default
|
|
268
|
+
is 600 seconds (10 minutes).
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
Accepts a minimum value of 1 second and a maximum value of 1,800
|
|
272
|
+
seconds.
|
|
273
|
+
max_duration:
|
|
274
|
+
type: optional<PostedTimeoutSpecsMaxDuration>
|
|
275
|
+
docs: >-
|
|
276
|
+
Specifies the maximum allowed duration (in seconds) for an EVI
|
|
277
|
+
WebSocket connection before it is automatically disconnected. Default
|
|
278
|
+
is 1,800 seconds (30 minutes).
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
Accepts a minimum value of 1 second and a maximum value of 1,800
|
|
282
|
+
seconds.
|
|
217
283
|
PostedUserDefinedToolSpec:
|
|
218
284
|
docs: A specific tool identifier to be posted to the server
|
|
219
285
|
properties:
|
|
@@ -290,6 +356,7 @@ types:
|
|
|
290
356
|
type: optional<list<optional<ReturnBuiltinTool>>>
|
|
291
357
|
docs: List of built-in tools associated with this config
|
|
292
358
|
event_messages: optional<ReturnEventMessageSpecs>
|
|
359
|
+
timeouts: optional<ReturnTimeoutSpecs>
|
|
293
360
|
ReturnEllmModel:
|
|
294
361
|
docs: A specific eLLM Model configuration
|
|
295
362
|
properties:
|
|
@@ -742,8 +809,9 @@ types:
|
|
|
742
809
|
type:
|
|
743
810
|
type: literal<"assistant_input">
|
|
744
811
|
docs: >-
|
|
745
|
-
The type of message sent through the socket;
|
|
746
|
-
|
|
812
|
+
The type of message sent through the socket; must be `assistant_input`
|
|
813
|
+
for our server to correctly identify and process it as an Assistant
|
|
814
|
+
Input message.
|
|
747
815
|
custom_session_id:
|
|
748
816
|
type: optional<string>
|
|
749
817
|
docs: >-
|
|
@@ -751,26 +819,44 @@ types:
|
|
|
751
819
|
data, and persist conversations across EVI sessions.
|
|
752
820
|
text:
|
|
753
821
|
type: string
|
|
754
|
-
docs:
|
|
822
|
+
docs: >-
|
|
823
|
+
Assistant text to synthesize into spoken audio and insert into the
|
|
824
|
+
conversation.
|
|
825
|
+
|
|
826
|
+
|
|
827
|
+
EVI uses this text to generate spoken audio using our proprietary
|
|
828
|
+
expressive text-to-speech model. Our model adds appropriate emotional
|
|
829
|
+
inflections and tones to the text based on the user’s expressions and
|
|
830
|
+
the context of the conversation. The synthesized audio is streamed
|
|
831
|
+
back to the user as an [Assistant
|
|
832
|
+
Message](/reference/empathic-voice-interface-evi/chat/chat#receive.Assistant%20Message.type).
|
|
755
833
|
AudioConfiguration:
|
|
756
834
|
properties:
|
|
757
835
|
encoding:
|
|
758
836
|
type: Encoding
|
|
759
|
-
docs:
|
|
837
|
+
docs: Encoding format of the audio input, such as `linear16`.
|
|
760
838
|
channels:
|
|
761
839
|
type: integer
|
|
762
|
-
docs: Number of channels.
|
|
840
|
+
docs: Number of audio channels.
|
|
763
841
|
sample_rate:
|
|
764
842
|
type: integer
|
|
765
|
-
docs:
|
|
843
|
+
docs: >-
|
|
844
|
+
Audio sample rate. Number of samples per second in the audio input,
|
|
845
|
+
measured in Hertz.
|
|
766
846
|
AudioInput:
|
|
767
847
|
docs: When provided, the input is audio.
|
|
768
848
|
properties:
|
|
769
849
|
type:
|
|
770
850
|
type: literal<"audio_input">
|
|
771
851
|
docs: >-
|
|
772
|
-
The type of message sent through the socket;
|
|
773
|
-
|
|
852
|
+
The type of message sent through the socket; must be `audio_input` for
|
|
853
|
+
our server to correctly identify and process it as an Audio Input
|
|
854
|
+
message.
|
|
855
|
+
|
|
856
|
+
|
|
857
|
+
This message is used for sending audio input data to EVI for
|
|
858
|
+
processing and expression measurement. Audio data should be sent as a
|
|
859
|
+
continuous stream, encoded in Base64.
|
|
774
860
|
custom_session_id:
|
|
775
861
|
type: optional<string>
|
|
776
862
|
docs: >-
|
|
@@ -778,22 +864,68 @@ types:
|
|
|
778
864
|
data, and persist conversations across EVI sessions.
|
|
779
865
|
data:
|
|
780
866
|
type: string
|
|
781
|
-
docs:
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
867
|
+
docs: >-
|
|
868
|
+
Base64 encoded audio input to insert into the conversation.
|
|
869
|
+
|
|
870
|
+
|
|
871
|
+
The audio input must be captured and transmitted to EVI as a
|
|
872
|
+
continuous stream, with the audio data sent in small chunks for better
|
|
873
|
+
transcription quality. When capturing audio through the browser, we
|
|
874
|
+
recommend recording the audio in 100ms intervals and adjusting from
|
|
875
|
+
there to determine if smaller or larger chunks are needed. These
|
|
876
|
+
chunks should be continuously sent to EVI as Audio Input messages.
|
|
877
|
+
|
|
878
|
+
|
|
879
|
+
The content of an Audio Input message is treated as the user’s speech
|
|
880
|
+
to EVI. EVI processes the audio, conducts expression measurement using
|
|
881
|
+
the prosody model, and responds accordingly.
|
|
882
|
+
BuiltInTool:
|
|
883
|
+
type: literal<"web_search">
|
|
884
|
+
docs: >-
|
|
885
|
+
Name of the built-in tool. Set to `web_search` to equip EVI with the
|
|
886
|
+
built-in Web Search tool.
|
|
785
887
|
BuiltinToolConfig:
|
|
786
888
|
properties:
|
|
787
889
|
name: BuiltInTool
|
|
788
|
-
fallback_content:
|
|
890
|
+
fallback_content:
|
|
891
|
+
type: optional<string>
|
|
892
|
+
docs: >-
|
|
893
|
+
Optional text passed to the supplemental LLM if the tool call fails.
|
|
894
|
+
The LLM then uses this text to generate a response back to the user,
|
|
895
|
+
ensuring continuity in the conversation.
|
|
789
896
|
Context:
|
|
790
897
|
properties:
|
|
791
898
|
type:
|
|
792
899
|
type: optional<ContextType>
|
|
793
|
-
docs:
|
|
900
|
+
docs: >-
|
|
901
|
+
The persistence level of the injected context. Specifies how long the
|
|
902
|
+
injected context will remain active in the session.
|
|
903
|
+
|
|
904
|
+
|
|
905
|
+
There are three possible context types:
|
|
906
|
+
|
|
907
|
+
|
|
908
|
+
- **Persistent**: The context is appended to all user messages for the
|
|
909
|
+
duration of the session.
|
|
910
|
+
|
|
911
|
+
|
|
912
|
+
- **Temporary**: The context is appended only to the next user
|
|
913
|
+
message.
|
|
914
|
+
|
|
915
|
+
- **Editable**: The original context is updated to reflect the new context.
|
|
794
916
|
text:
|
|
795
917
|
type: string
|
|
796
|
-
docs:
|
|
918
|
+
docs: >-
|
|
919
|
+
The context to be injected into the conversation. Helps inform the
|
|
920
|
+
LLM's response by providing relevant information about the ongoing
|
|
921
|
+
conversation.
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
This text will be appended to the end of user messages based on the
|
|
925
|
+
chosen persistence level. For example, if you want to remind EVI of
|
|
926
|
+
its role as a helpful weather assistant, the context you insert will
|
|
927
|
+
be appended to the end of user messages as `{Context: You are a
|
|
928
|
+
helpful weather assistant}`.
|
|
797
929
|
ContextType:
|
|
798
930
|
enum:
|
|
799
931
|
- editable
|
|
@@ -809,8 +941,16 @@ types:
|
|
|
809
941
|
type:
|
|
810
942
|
type: literal<"pause_assistant_message">
|
|
811
943
|
docs: >-
|
|
812
|
-
The type of message sent through the socket;
|
|
813
|
-
|
|
944
|
+
The type of message sent through the socket; must be
|
|
945
|
+
`pause_assistant_message` for our server to correctly identify and
|
|
946
|
+
process it as a Pause Assistant message.
|
|
947
|
+
|
|
948
|
+
|
|
949
|
+
Once this message is sent, EVI will not respond until a [Resume
|
|
950
|
+
Assistant
|
|
951
|
+
message](/reference/empathic-voice-interface-evi/chat/chat#send.Resume%20Assistant%20Message.type)
|
|
952
|
+
is sent. When paused, EVI won’t respond, but transcriptions of your
|
|
953
|
+
audio inputs will still be recorded.
|
|
814
954
|
custom_session_id:
|
|
815
955
|
type: optional<string>
|
|
816
956
|
docs: >-
|
|
@@ -824,8 +964,17 @@ types:
|
|
|
824
964
|
type:
|
|
825
965
|
type: literal<"resume_assistant_message">
|
|
826
966
|
docs: >-
|
|
827
|
-
The type of message sent through the socket;
|
|
828
|
-
|
|
967
|
+
The type of message sent through the socket; must be
|
|
968
|
+
`resume_assistant_message` for our server to correctly identify and
|
|
969
|
+
process it as a Resume Assistant message.
|
|
970
|
+
|
|
971
|
+
|
|
972
|
+
Upon resuming, if any audio input was sent during the pause, EVI will
|
|
973
|
+
retain context from all messages sent but only respond to the last
|
|
974
|
+
user message. (e.g., If you ask EVI two questions while paused and
|
|
975
|
+
then send a `resume_assistant_message`, EVI will respond to the second
|
|
976
|
+
question and have added the first question to its conversation
|
|
977
|
+
context.)
|
|
829
978
|
custom_session_id:
|
|
830
979
|
type: optional<string>
|
|
831
980
|
docs: >-
|
|
@@ -837,33 +986,123 @@ types:
|
|
|
837
986
|
type:
|
|
838
987
|
type: literal<"session_settings">
|
|
839
988
|
docs: >-
|
|
840
|
-
The type of message sent through the socket;
|
|
841
|
-
|
|
989
|
+
The type of message sent through the socket; must be
|
|
990
|
+
`session_settings` for our server to correctly identify and process it
|
|
991
|
+
as a Session Settings message.
|
|
992
|
+
|
|
993
|
+
|
|
994
|
+
Session settings are temporary and apply only to the current Chat
|
|
995
|
+
session. These settings can be adjusted dynamically based on the
|
|
996
|
+
requirements of each session to ensure optimal performance and user
|
|
997
|
+
experience.
|
|
998
|
+
|
|
999
|
+
|
|
1000
|
+
For more information, please refer to the [Session Settings
|
|
1001
|
+
section](/docs/empathic-voice-interface-evi/configuration#session-settings)
|
|
1002
|
+
on the EVI Configuration page.
|
|
842
1003
|
custom_session_id:
|
|
843
1004
|
type: optional<string>
|
|
844
1005
|
docs: >-
|
|
845
|
-
|
|
846
|
-
data, and persist conversations
|
|
1006
|
+
Unique identifier for the session. Used to manage conversational
|
|
1007
|
+
state, correlate frontend and backend data, and persist conversations
|
|
1008
|
+
across EVI sessions.
|
|
1009
|
+
|
|
1010
|
+
|
|
1011
|
+
If included, the response sent from Hume to your backend will include
|
|
1012
|
+
this ID. This allows you to correlate frontend users with their
|
|
1013
|
+
incoming messages.
|
|
1014
|
+
|
|
1015
|
+
|
|
1016
|
+
It is recommended to pass a `custom_session_id` if you are using a
|
|
1017
|
+
Custom Language Model. Please see our guide to [using a custom
|
|
1018
|
+
language
|
|
1019
|
+
model](/docs/empathic-voice-interface-evi/custom-language-model) with
|
|
1020
|
+
EVI to learn more.
|
|
847
1021
|
system_prompt:
|
|
848
1022
|
type: optional<string>
|
|
849
1023
|
docs: >-
|
|
850
|
-
Instructions
|
|
851
|
-
|
|
1024
|
+
Instructions used to shape EVI’s behavior, responses, and style for
|
|
1025
|
+
the session.
|
|
1026
|
+
|
|
1027
|
+
|
|
1028
|
+
When included in a Session Settings message, the provided Prompt
|
|
1029
|
+
overrides the existing one specified in the EVI configuration. If no
|
|
1030
|
+
Prompt was defined in the configuration, this Prompt will be the one
|
|
1031
|
+
used for the session.
|
|
1032
|
+
|
|
1033
|
+
|
|
1034
|
+
You can use the Prompt to define a specific goal or role for EVI,
|
|
1035
|
+
specifying how it should act or what it should focus on during the
|
|
1036
|
+
conversation. For example, EVI can be instructed to act as a customer
|
|
1037
|
+
support representative, a fitness coach, or a travel advisor, each
|
|
1038
|
+
with its own set of behaviors and response styles.
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
For help writing a system prompt, see our [Prompting
|
|
1042
|
+
Guide](/docs/empathic-voice-interface-evi/prompting).
|
|
852
1043
|
context:
|
|
853
1044
|
type: optional<Context>
|
|
854
|
-
docs:
|
|
1045
|
+
docs: >-
|
|
1046
|
+
Allows developers to inject additional context into the conversation,
|
|
1047
|
+
which is appended to the end of user messages for the session.
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
When included in a Session Settings message, the provided context can
|
|
1051
|
+
be used to remind the LLM of its role in every user message, prevent
|
|
1052
|
+
it from forgetting important details, or add new relevant information
|
|
1053
|
+
to the conversation.
|
|
1054
|
+
|
|
1055
|
+
|
|
1056
|
+
Set to `null` to disable context injection.
|
|
855
1057
|
audio:
|
|
856
1058
|
type: optional<AudioConfiguration>
|
|
857
|
-
docs:
|
|
1059
|
+
docs: >-
|
|
1060
|
+
Configuration details for the audio input used during the session.
|
|
1061
|
+
Ensures the audio is being correctly set up for processing.
|
|
1062
|
+
|
|
1063
|
+
|
|
1064
|
+
This optional field is only required when the audio input is encoded
|
|
1065
|
+
in PCM Linear 16 (16-bit, little-endian, signed PCM WAV data). For
|
|
1066
|
+
detailed instructions on how to configure session settings for PCM
|
|
1067
|
+
Linear 16 audio, please refer to the [Session Settings
|
|
1068
|
+
section](/docs/empathic-voice-interface-evi/configuration#session-settings)
|
|
1069
|
+
on the EVI Configuration page.
|
|
858
1070
|
language_model_api_key:
|
|
859
1071
|
type: optional<string>
|
|
860
|
-
docs:
|
|
1072
|
+
docs: >-
|
|
1073
|
+
Third party API key for the supplemental language model.
|
|
1074
|
+
|
|
1075
|
+
|
|
1076
|
+
When provided, EVI will use this key instead of Hume’s API key for the
|
|
1077
|
+
supplemental LLM. This allows you to bypass rate limits and utilize
|
|
1078
|
+
your own API key as needed.
|
|
861
1079
|
tools:
|
|
862
1080
|
type: optional<list<Tool>>
|
|
863
|
-
docs:
|
|
1081
|
+
docs: >-
|
|
1082
|
+
List of user-defined tools to enable for the session.
|
|
1083
|
+
|
|
1084
|
+
|
|
1085
|
+
Tools are resources used by EVI to perform various tasks, such as
|
|
1086
|
+
searching the web or calling external APIs. Built-in tools, like web
|
|
1087
|
+
search, are natively integrated, while user-defined tools are created
|
|
1088
|
+
and invoked by the user. To learn more, see our [Tool Use
|
|
1089
|
+
Guide](/docs/empathic-voice-interface-evi/tool-use).
|
|
864
1090
|
builtin_tools:
|
|
865
1091
|
type: optional<list<BuiltinToolConfig>>
|
|
866
|
-
docs:
|
|
1092
|
+
docs: >-
|
|
1093
|
+
List of built-in tools to enable for the session.
|
|
1094
|
+
|
|
1095
|
+
|
|
1096
|
+
Tools are resources used by EVI to perform various tasks, such as
|
|
1097
|
+
searching the web or calling external APIs. Built-in tools, like web
|
|
1098
|
+
search, are natively integrated, while user-defined tools are created
|
|
1099
|
+
and invoked by the user. To learn more, see our [Tool Use
|
|
1100
|
+
Guide](/docs/empathic-voice-interface-evi/tool-use).
|
|
1101
|
+
|
|
1102
|
+
|
|
1103
|
+
Currently, the only built-in tool Hume provides is **Web Search**.
|
|
1104
|
+
When enabled, Web Search equips EVI with the ability to search the web
|
|
1105
|
+
for up-to-date information.
|
|
867
1106
|
metadata: optional<map<string, unknown>>
|
|
868
1107
|
variables:
|
|
869
1108
|
type: optional<map<string, string>>
|
|
@@ -872,21 +1111,31 @@ types:
|
|
|
872
1111
|
properties:
|
|
873
1112
|
type:
|
|
874
1113
|
type: ToolType
|
|
875
|
-
docs: Type of tool.
|
|
1114
|
+
docs: Type of tool. Set to `function` for user-defined tools.
|
|
876
1115
|
name:
|
|
877
1116
|
type: string
|
|
878
|
-
docs: Name of the tool.
|
|
1117
|
+
docs: Name of the user-defined tool to be enabled.
|
|
879
1118
|
parameters:
|
|
880
1119
|
type: string
|
|
881
|
-
docs:
|
|
1120
|
+
docs: >-
|
|
1121
|
+
Parameters of the tool. Is a stringified JSON schema.
|
|
1122
|
+
|
|
1123
|
+
|
|
1124
|
+
These parameters define the inputs needed for the tool’s execution,
|
|
1125
|
+
including the expected data type and description for each input field.
|
|
1126
|
+
Structured as a JSON schema, this format ensures the tool receives
|
|
1127
|
+
data in the expected format.
|
|
882
1128
|
description:
|
|
883
1129
|
type: optional<string>
|
|
884
|
-
docs:
|
|
1130
|
+
docs: >-
|
|
1131
|
+
An optional description of what the tool does, used by the
|
|
1132
|
+
supplemental LLM to choose when and how to call the function.
|
|
885
1133
|
fallback_content:
|
|
886
1134
|
type: optional<string>
|
|
887
1135
|
docs: >-
|
|
888
|
-
|
|
889
|
-
response
|
|
1136
|
+
Optional text passed to the supplemental LLM if the tool call fails.
|
|
1137
|
+
The LLM then uses this text to generate a response back to the user,
|
|
1138
|
+
ensuring continuity in the conversation.
|
|
890
1139
|
ToolErrorMessage:
|
|
891
1140
|
docs: When provided, the output is a function call error.
|
|
892
1141
|
properties:
|
|
@@ -895,6 +1144,12 @@ types:
|
|
|
895
1144
|
docs: >-
|
|
896
1145
|
The type of message sent through the socket; for a Tool Error message,
|
|
897
1146
|
this must be `tool_error`.
|
|
1147
|
+
|
|
1148
|
+
|
|
1149
|
+
Upon receiving a [Tool Call
|
|
1150
|
+
message](/reference/empathic-voice-interface-evi/chat/chat#receive.Tool%20Call%20Message.type)
|
|
1151
|
+
and failing to invoke the function, this message is sent to notify EVI
|
|
1152
|
+
of the tool's failure.
|
|
898
1153
|
custom_session_id:
|
|
899
1154
|
type: optional<string>
|
|
900
1155
|
docs: >-
|
|
@@ -902,22 +1157,37 @@ types:
|
|
|
902
1157
|
data, and persist conversations across EVI sessions.
|
|
903
1158
|
tool_type:
|
|
904
1159
|
type: optional<ToolType>
|
|
905
|
-
docs:
|
|
1160
|
+
docs: >-
|
|
1161
|
+
Type of tool called. Either `builtin` for natively implemented tools,
|
|
1162
|
+
like web search, or `function` for user-defined tools.
|
|
906
1163
|
tool_call_id:
|
|
907
1164
|
type: string
|
|
908
|
-
docs:
|
|
1165
|
+
docs: >-
|
|
1166
|
+
The unique identifier for a specific tool call instance.
|
|
1167
|
+
|
|
1168
|
+
|
|
1169
|
+
This ID is used to track the request and response of a particular tool
|
|
1170
|
+
invocation, ensuring that the Tool Error message is linked to the
|
|
1171
|
+
appropriate tool call request. The specified `tool_call_id` must match
|
|
1172
|
+
the one received in the [Tool Call
|
|
1173
|
+
message](/reference/empathic-voice-interface-evi/chat/chat#receive.Tool%20Call%20Message.type).
|
|
909
1174
|
content:
|
|
910
1175
|
type: optional<string>
|
|
911
|
-
docs:
|
|
1176
|
+
docs: >-
|
|
1177
|
+
Optional text passed to the supplemental LLM in place of the tool call
|
|
1178
|
+
result. The LLM then uses this text to generate a response back to the
|
|
1179
|
+
user, ensuring continuity in the conversation if the tool errors.
|
|
912
1180
|
error:
|
|
913
1181
|
type: string
|
|
914
1182
|
docs: Error message from the tool call, not exposed to the LLM or user.
|
|
915
1183
|
code:
|
|
916
1184
|
type: optional<string>
|
|
917
|
-
docs: Error code.
|
|
1185
|
+
docs: Error code. Identifies the type of error encountered.
|
|
918
1186
|
level:
|
|
919
1187
|
type: optional<ErrorLevel>
|
|
920
|
-
docs:
|
|
1188
|
+
docs: >-
|
|
1189
|
+
Indicates the severity of an error; for a Tool Error message, this
|
|
1190
|
+
must be `warn` to signal an unexpected event.
|
|
921
1191
|
ToolResponseMessage:
|
|
922
1192
|
docs: When provided, the output is a function call response.
|
|
923
1193
|
properties:
|
|
@@ -926,6 +1196,12 @@ types:
|
|
|
926
1196
|
docs: >-
|
|
927
1197
|
The type of message sent through the socket; for a Tool Response
|
|
928
1198
|
message, this must be `tool_response`.
|
|
1199
|
+
|
|
1200
|
+
|
|
1201
|
+
Upon receiving a [Tool Call
|
|
1202
|
+
message](/reference/empathic-voice-interface-evi/chat/chat#receive.Tool%20Call%20Message.type)
|
|
1203
|
+
and successfully invoking the function, this message is sent to convey
|
|
1204
|
+
the result of the function call back to EVI.
|
|
929
1205
|
custom_session_id:
|
|
930
1206
|
type: optional<string>
|
|
931
1207
|
docs: >-
|
|
@@ -933,12 +1209,35 @@ types:
|
|
|
933
1209
|
data, and persist conversations across EVI sessions.
|
|
934
1210
|
tool_call_id:
|
|
935
1211
|
type: string
|
|
936
|
-
docs:
|
|
1212
|
+
docs: >-
|
|
1213
|
+
The unique identifier for a specific tool call instance.
|
|
1214
|
+
|
|
1215
|
+
|
|
1216
|
+
This ID is used to track the request and response of a particular tool
|
|
1217
|
+
invocation, ensuring that the correct response is linked to the
|
|
1218
|
+
appropriate request. The specified `tool_call_id` must match the one
|
|
1219
|
+
received in the [Tool Call
|
|
1220
|
+
message](/reference/empathic-voice-interface-evi/chat/chat#receive.Tool%20Call%20Message.tool_call_id).
|
|
937
1221
|
content:
|
|
938
1222
|
type: string
|
|
939
|
-
docs:
|
|
940
|
-
|
|
941
|
-
|
|
1223
|
+
docs: >-
|
|
1224
|
+
Return value of the tool call. Contains the output generated by the
|
|
1225
|
+
tool to pass back to EVI.
|
|
1226
|
+
tool_name:
|
|
1227
|
+
type: optional<string>
|
|
1228
|
+
docs: >-
|
|
1229
|
+
Name of the tool.
|
|
1230
|
+
|
|
1231
|
+
|
|
1232
|
+
Include this optional field to help the supplemental LLM identify
|
|
1233
|
+
which tool generated the response. The specified `tool_name` must
|
|
1234
|
+
match the one received in the [Tool Call
|
|
1235
|
+
message](/reference/empathic-voice-interface-evi/chat/chat#receive.Tool%20Call%20Message.type).
|
|
1236
|
+
tool_type:
|
|
1237
|
+
type: optional<ToolType>
|
|
1238
|
+
docs: >-
|
|
1239
|
+
Type of tool called. Either `builtin` for natively implemented tools,
|
|
1240
|
+
like web search, or `function` for user-defined tools.
|
|
942
1241
|
ToolType:
|
|
943
1242
|
enum:
|
|
944
1243
|
- builtin
|
|
@@ -949,8 +1248,9 @@ types:
|
|
|
949
1248
|
type:
|
|
950
1249
|
type: literal<"user_input">
|
|
951
1250
|
docs: >-
|
|
952
|
-
The type of message sent through the socket;
|
|
953
|
-
|
|
1251
|
+
The type of message sent through the socket; must be `user_input` for
|
|
1252
|
+
our server to correctly identify and process it as a User Input
|
|
1253
|
+
message.
|
|
954
1254
|
custom_session_id:
|
|
955
1255
|
type: optional<string>
|
|
956
1256
|
docs: >-
|
|
@@ -958,15 +1258,28 @@ types:
|
|
|
958
1258
|
data, and persist conversations across EVI sessions.
|
|
959
1259
|
text:
|
|
960
1260
|
type: string
|
|
961
|
-
docs:
|
|
1261
|
+
docs: >-
|
|
1262
|
+
User text to insert into the conversation. Text sent through a User
|
|
1263
|
+
Input message is treated as the user’s speech to EVI. EVI processes
|
|
1264
|
+
this input and provides a corresponding response.
|
|
1265
|
+
|
|
1266
|
+
|
|
1267
|
+
Expression measurement results are not available for User Input
|
|
1268
|
+
messages, as the prosody model relies on audio input and cannot
|
|
1269
|
+
process text alone.
|
|
962
1270
|
AssistantEnd:
|
|
963
1271
|
docs: When provided, the output is an assistant end message.
|
|
964
1272
|
properties:
|
|
965
1273
|
type:
|
|
966
|
-
type: literal<"
|
|
1274
|
+
type: literal<"assistant_end">
|
|
967
1275
|
docs: >-
|
|
968
1276
|
The type of message sent through the socket; for an Assistant End
|
|
969
1277
|
message, this must be `assistant_end`.
|
|
1278
|
+
|
|
1279
|
+
|
|
1280
|
+
This message indicates the conclusion of the assistant’s response,
|
|
1281
|
+
signaling that the assistant has finished speaking for the current
|
|
1282
|
+
conversational turn.
|
|
970
1283
|
custom_session_id:
|
|
971
1284
|
type: optional<string>
|
|
972
1285
|
docs: >-
|
|
@@ -980,6 +1293,11 @@ types:
|
|
|
980
1293
|
docs: >-
|
|
981
1294
|
The type of message sent through the socket; for an Assistant Message,
|
|
982
1295
|
this must be `assistant_message`.
|
|
1296
|
+
|
|
1297
|
+
|
|
1298
|
+
This message contains both a transcript of the assistant’s response
|
|
1299
|
+
and the expression measurement predictions of the assistant’s audio
|
|
1300
|
+
output.
|
|
983
1301
|
custom_session_id:
|
|
984
1302
|
type: optional<string>
|
|
985
1303
|
docs: >-
|
|
@@ -987,7 +1305,9 @@ types:
|
|
|
987
1305
|
data, and persist conversations across EVI sessions.
|
|
988
1306
|
id:
|
|
989
1307
|
type: optional<string>
|
|
990
|
-
docs:
|
|
1308
|
+
docs: >-
|
|
1309
|
+
ID of the assistant message. Allows the Assistant Message to be
|
|
1310
|
+
tracked and referenced.
|
|
991
1311
|
message:
|
|
992
1312
|
type: ChatMessage
|
|
993
1313
|
docs: Transcript of the message.
|
|
@@ -996,7 +1316,10 @@ types:
|
|
|
996
1316
|
docs: Inference model results.
|
|
997
1317
|
from_text:
|
|
998
1318
|
type: boolean
|
|
999
|
-
docs:
|
|
1319
|
+
docs: >-
|
|
1320
|
+
Indicates if this message was inserted into the conversation as text
|
|
1321
|
+
from an [Assistant Input
|
|
1322
|
+
message](/reference/empathic-voice-interface-evi/chat/chat#send.Assistant%20Input.text).
|
|
1000
1323
|
AudioOutput:
|
|
1001
1324
|
docs: When provided, the output is audio.
|
|
1002
1325
|
properties:
|
|
@@ -1012,12 +1335,15 @@ types:
|
|
|
1012
1335
|
data, and persist conversations across EVI sessions.
|
|
1013
1336
|
id:
|
|
1014
1337
|
type: string
|
|
1015
|
-
docs:
|
|
1338
|
+
docs: >-
|
|
1339
|
+
ID of the audio output. Allows the Audio Output message to be tracked
|
|
1340
|
+
and referenced.
|
|
1016
1341
|
data:
|
|
1017
1342
|
type: string
|
|
1018
|
-
docs:
|
|
1019
|
-
|
|
1020
|
-
|
|
1343
|
+
docs: >-
|
|
1344
|
+
Base64 encoded audio output. This encoded audio is transmitted to the
|
|
1345
|
+
client, where it can be decoded and played back as part of the user
|
|
1346
|
+
interaction.
|
|
1021
1347
|
ChatMessageToolResult:
|
|
1022
1348
|
discriminated: false
|
|
1023
1349
|
docs: Function call response from client.
|
|
@@ -1046,6 +1372,11 @@ types:
|
|
|
1046
1372
|
docs: >-
|
|
1047
1373
|
The type of message sent through the socket; for a Chat Metadata
|
|
1048
1374
|
message, this must be `chat_metadata`.
|
|
1375
|
+
|
|
1376
|
+
|
|
1377
|
+
The Chat Metadata message is the first message you receive after
|
|
1378
|
+
establishing a connection with EVI and contains important identifiers
|
|
1379
|
+
for the current Chat session.
|
|
1049
1380
|
custom_session_id:
|
|
1050
1381
|
type: optional<string>
|
|
1051
1382
|
docs: >-
|
|
@@ -1053,10 +1384,25 @@ types:
|
|
|
1053
1384
|
data, and persist conversations across EVI sessions.
|
|
1054
1385
|
chat_group_id:
|
|
1055
1386
|
type: string
|
|
1056
|
-
docs:
|
|
1387
|
+
docs: >-
|
|
1388
|
+
ID of the Chat Group.
|
|
1389
|
+
|
|
1390
|
+
|
|
1391
|
+
Used to resume a Chat when passed in the
|
|
1392
|
+
[resumed_chat_group_id](/reference/empathic-voice-interface-evi/chat/chat#request.query.resumed_chat_group_id)
|
|
1393
|
+
query parameter of a subsequent connection request. This allows EVI to
|
|
1394
|
+
continue the conversation from where it left off within the Chat
|
|
1395
|
+
Group.
|
|
1396
|
+
|
|
1397
|
+
|
|
1398
|
+
Learn more about [supporting chat
|
|
1399
|
+
resumability](/docs/empathic-voice-interface-evi/faq#does-evi-support-chat-resumability)
|
|
1400
|
+
from the EVI FAQ.
|
|
1057
1401
|
chat_id:
|
|
1058
1402
|
type: string
|
|
1059
|
-
docs:
|
|
1403
|
+
docs: >-
|
|
1404
|
+
ID of the Chat session. Allows the Chat session to be tracked and
|
|
1405
|
+
referenced.
|
|
1060
1406
|
request_id:
|
|
1061
1407
|
type: optional<string>
|
|
1062
1408
|
docs: ID of the initiating request.
|
|
@@ -1118,6 +1464,11 @@ types:
|
|
|
1118
1464
|
docs: >-
|
|
1119
1465
|
The type of message sent through the socket; for a Web Socket Error
|
|
1120
1466
|
message, this must be `error`.
|
|
1467
|
+
|
|
1468
|
+
|
|
1469
|
+
This message indicates a disruption in the WebSocket connection, such
|
|
1470
|
+
as an unexpected disconnection, protocol error, or data transmission
|
|
1471
|
+
issue.
|
|
1121
1472
|
custom_session_id:
|
|
1122
1473
|
type: optional<string>
|
|
1123
1474
|
docs: >-
|
|
@@ -1125,18 +1476,27 @@ types:
|
|
|
1125
1476
|
data, and persist conversations across EVI sessions.
|
|
1126
1477
|
code:
|
|
1127
1478
|
type: string
|
|
1128
|
-
docs: Error code.
|
|
1479
|
+
docs: Error code. Identifies the type of error encountered.
|
|
1129
1480
|
slug:
|
|
1130
1481
|
type: string
|
|
1131
|
-
docs:
|
|
1482
|
+
docs: >-
|
|
1483
|
+
Short, human-readable identifier and description for the error. See a
|
|
1484
|
+
complete list of error slugs on the [Errors
|
|
1485
|
+
page](/docs/resources/errors).
|
|
1132
1486
|
message:
|
|
1133
1487
|
type: string
|
|
1134
|
-
docs:
|
|
1488
|
+
docs: Detailed description of the error.
|
|
1135
1489
|
Inference:
|
|
1136
1490
|
properties:
|
|
1137
1491
|
prosody:
|
|
1138
1492
|
type: optional<ProsodyInference>
|
|
1139
|
-
docs:
|
|
1493
|
+
docs: >-
|
|
1494
|
+
Prosody model inference results.
|
|
1495
|
+
|
|
1496
|
+
|
|
1497
|
+
EVI uses the prosody model to measure 48 expressions related to speech
|
|
1498
|
+
and vocal characteristics. These results contain a detailed emotional
|
|
1499
|
+
and tonal analysis of the audio.
|
|
1140
1500
|
MillisecondInterval:
|
|
1141
1501
|
properties:
|
|
1142
1502
|
begin:
|
|
@@ -1147,7 +1507,14 @@ types:
|
|
|
1147
1507
|
docs: End time of the interval in milliseconds.
|
|
1148
1508
|
ProsodyInference:
|
|
1149
1509
|
properties:
|
|
1150
|
-
scores:
|
|
1510
|
+
scores:
|
|
1511
|
+
type: EmotionScores
|
|
1512
|
+
docs: >-
|
|
1513
|
+
The confidence levels of 48 expressions in a given audio sample.
|
|
1514
|
+
|
|
1515
|
+
|
|
1516
|
+
Scores typically range from 0 to 1, with higher values indicating a
|
|
1517
|
+
stronger confidence level in the measured attribute.
|
|
1151
1518
|
Role:
|
|
1152
1519
|
enum:
|
|
1153
1520
|
- assistant
|
|
@@ -1163,15 +1530,32 @@ types:
|
|
|
1163
1530
|
docs: Name of the tool called.
|
|
1164
1531
|
parameters:
|
|
1165
1532
|
type: string
|
|
1166
|
-
docs:
|
|
1533
|
+
docs: >-
|
|
1534
|
+
Parameters of the tool.
|
|
1535
|
+
|
|
1536
|
+
|
|
1537
|
+
These parameters define the inputs needed for the tool’s execution,
|
|
1538
|
+
including the expected data type and description for each input field.
|
|
1539
|
+
Structured as a stringified JSON schema, this format ensures the tool
|
|
1540
|
+
receives data in the expected format.
|
|
1167
1541
|
tool_call_id:
|
|
1168
1542
|
type: string
|
|
1169
|
-
docs:
|
|
1543
|
+
docs: >-
|
|
1544
|
+
The unique identifier for a specific tool call instance.
|
|
1545
|
+
|
|
1546
|
+
|
|
1547
|
+
This ID is used to track the request and response of a particular tool
|
|
1548
|
+
invocation, ensuring that the correct response is linked to the
|
|
1549
|
+
appropriate request.
|
|
1170
1550
|
type:
|
|
1171
1551
|
type: literal<"tool_call">
|
|
1172
1552
|
docs: >-
|
|
1173
1553
|
The type of message sent through the socket; for a Tool Call message,
|
|
1174
1554
|
this must be `tool_call`.
|
|
1555
|
+
|
|
1556
|
+
|
|
1557
|
+
This message indicates that the supplemental LLM has detected a need
|
|
1558
|
+
to invoke the specified tool.
|
|
1175
1559
|
custom_session_id:
|
|
1176
1560
|
type: optional<string>
|
|
1177
1561
|
docs: >-
|
|
@@ -1179,10 +1563,17 @@ types:
|
|
|
1179
1563
|
data, and persist conversations across EVI sessions.
|
|
1180
1564
|
tool_type:
|
|
1181
1565
|
type: optional<ToolType>
|
|
1182
|
-
docs:
|
|
1566
|
+
docs: >-
|
|
1567
|
+
Type of tool called. Either `builtin` for natively implemented tools,
|
|
1568
|
+
like web search, or `function` for user-defined tools.
|
|
1183
1569
|
response_required:
|
|
1184
1570
|
type: boolean
|
|
1185
|
-
docs:
|
|
1571
|
+
docs: >-
|
|
1572
|
+
Indicates whether a response to the tool call is required from the
|
|
1573
|
+
developer, either in the form of a [Tool Response
|
|
1574
|
+
message](/reference/empathic-voice-interface-evi/chat/chat#send.Tool%20Response%20Message.type)
|
|
1575
|
+
or a [Tool Error
|
|
1576
|
+
message](/reference/empathic-voice-interface-evi/chat/chat#send.Tool%20Error%20Message.type).
|
|
1186
1577
|
UserInterruption:
|
|
1187
1578
|
docs: When provided, the output is an interruption.
|
|
1188
1579
|
properties:
|
|
@@ -1191,6 +1582,13 @@ types:
|
|
|
1191
1582
|
docs: >-
|
|
1192
1583
|
The type of message sent through the socket; for a User Interruption
|
|
1193
1584
|
message, this must be `user_interruption`.
|
|
1585
|
+
|
|
1586
|
+
|
|
1587
|
+
This message indicates the user has interrupted the assistant’s
|
|
1588
|
+
response. EVI detects the interruption in real-time and sends this
|
|
1589
|
+
message to signal the interruption event. This message allows the
|
|
1590
|
+
system to stop the current audio playback, clear the audio queue, and
|
|
1591
|
+
prepare to handle new user input.
|
|
1194
1592
|
custom_session_id:
|
|
1195
1593
|
type: optional<string>
|
|
1196
1594
|
docs: >-
|
|
@@ -1205,8 +1603,18 @@ types:
|
|
|
1205
1603
|
type:
|
|
1206
1604
|
type: literal<"user_message">
|
|
1207
1605
|
docs: >-
|
|
1208
|
-
The type of message sent through the socket; for a User
|
|
1606
|
+
The type of message sent through the socket; for a User Message, this
|
|
1209
1607
|
must be `user_message`.
|
|
1608
|
+
|
|
1609
|
+
|
|
1610
|
+
This message contains both a transcript of the user’s input and the
|
|
1611
|
+
expression measurement predictions if the input was sent as an [Audio
|
|
1612
|
+
Input
|
|
1613
|
+
message](/reference/empathic-voice-interface-evi/chat/chat#send.Audio%20Input.type).
|
|
1614
|
+
Expression measurement predictions are not provided for a [User Input
|
|
1615
|
+
message](/reference/empathic-voice-interface-evi/chat/chat#send.User%20Input.type),
|
|
1616
|
+
as the prosody model relies on audio input and cannot process text
|
|
1617
|
+
alone.
|
|
1210
1618
|
custom_session_id:
|
|
1211
1619
|
type: optional<string>
|
|
1212
1620
|
docs: >-
|
|
@@ -1223,7 +1631,22 @@ types:
|
|
|
1223
1631
|
docs: Start and End time of user message.
|
|
1224
1632
|
from_text:
|
|
1225
1633
|
type: boolean
|
|
1226
|
-
docs:
|
|
1634
|
+
docs: >-
|
|
1635
|
+
Indicates if this message was inserted into the conversation as text
|
|
1636
|
+
from a [User
|
|
1637
|
+
Input](/reference/empathic-voice-interface-evi/chat/chat#send.User%20Input.text)
|
|
1638
|
+
message.
|
|
1639
|
+
JsonMessage:
|
|
1640
|
+
discriminated: false
|
|
1641
|
+
union:
|
|
1642
|
+
- AssistantMessage
|
|
1643
|
+
- ChatMetadata
|
|
1644
|
+
- WebSocketError
|
|
1645
|
+
- UserInterruption
|
|
1646
|
+
- UserMessage
|
|
1647
|
+
- ToolCallMessage
|
|
1648
|
+
- ToolResponseMessage
|
|
1649
|
+
- ToolErrorMessage
|
|
1227
1650
|
TtsInput:
|
|
1228
1651
|
properties:
|
|
1229
1652
|
type: optional<literal<"tts">>
|
|
@@ -1253,8 +1676,12 @@ types:
|
|
|
1253
1676
|
VoiceArgs:
|
|
1254
1677
|
properties:
|
|
1255
1678
|
voice: optional<VoiceNameEnum>
|
|
1256
|
-
baseline:
|
|
1257
|
-
|
|
1679
|
+
baseline:
|
|
1680
|
+
type: optional<boolean>
|
|
1681
|
+
default: false
|
|
1682
|
+
reconstruct:
|
|
1683
|
+
type: optional<boolean>
|
|
1684
|
+
default: false
|
|
1258
1685
|
VoiceNameEnum:
|
|
1259
1686
|
enum:
|
|
1260
1687
|
- ITO
|