hume 0.13.6 → 0.13.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (244) hide show
  1. package/.mock/definition/empathic-voice/__package__.yml +669 -657
  2. package/.mock/definition/empathic-voice/chat.yml +27 -27
  3. package/.mock/definition/empathic-voice/chatWebhooks.yml +2 -2
  4. package/.mock/definition/tts/__package__.yml +93 -88
  5. package/api/resources/empathicVoice/types/AssistantEnd.d.ts +2 -2
  6. package/api/resources/empathicVoice/types/AssistantInput.d.ts +2 -2
  7. package/api/resources/empathicVoice/types/AssistantMessage.d.ts +8 -8
  8. package/api/resources/empathicVoice/types/AssistantProsody.d.ts +6 -6
  9. package/api/resources/empathicVoice/types/AudioConfiguration.d.ts +2 -2
  10. package/api/resources/empathicVoice/types/AudioInput.d.ts +6 -6
  11. package/api/resources/empathicVoice/types/AudioOutput.d.ts +4 -4
  12. package/api/resources/empathicVoice/types/BuiltinToolConfig.d.ts +1 -1
  13. package/api/resources/empathicVoice/types/ChatMessage.d.ts +2 -2
  14. package/api/resources/empathicVoice/types/ChatMetadata.d.ts +8 -8
  15. package/api/resources/empathicVoice/types/Context.d.ts +6 -6
  16. package/api/resources/empathicVoice/types/LanguageModelType.d.ts +7 -1
  17. package/api/resources/empathicVoice/types/LanguageModelType.js +6 -0
  18. package/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +2 -2
  19. package/api/resources/empathicVoice/types/ResumeAssistantMessage.d.ts +2 -2
  20. package/api/resources/empathicVoice/types/ReturnConfig.d.ts +2 -2
  21. package/api/resources/empathicVoice/types/SessionSettings.d.ts +27 -27
  22. package/api/resources/empathicVoice/types/Tool.d.ts +6 -6
  23. package/api/resources/empathicVoice/types/ToolCallMessage.d.ts +6 -6
  24. package/api/resources/empathicVoice/types/ToolErrorMessage.d.ts +16 -16
  25. package/api/resources/empathicVoice/types/ToolResponseMessage.d.ts +8 -8
  26. package/api/resources/empathicVoice/types/UserInput.d.ts +2 -2
  27. package/api/resources/empathicVoice/types/UserInterruption.d.ts +4 -4
  28. package/api/resources/empathicVoice/types/UserMessage.d.ts +12 -12
  29. package/api/resources/empathicVoice/types/WebSocketError.d.ts +10 -10
  30. package/api/resources/empathicVoice/types/WebhookEventChatEnded.d.ts +8 -8
  31. package/api/resources/empathicVoice/types/WebhookEventChatStarted.d.ts +6 -6
  32. package/api/resources/empathicVoice/types/index.d.ts +16 -16
  33. package/api/resources/empathicVoice/types/index.js +16 -16
  34. package/api/resources/index.d.ts +1 -1
  35. package/api/resources/index.js +2 -2
  36. package/api/resources/tts/client/Client.d.ts +6 -6
  37. package/api/resources/tts/client/Client.js +35 -35
  38. package/api/resources/tts/types/OctaveVersion.d.ts +4 -0
  39. package/api/resources/tts/types/OctaveVersion.js +5 -0
  40. package/api/resources/tts/types/PostedTts.d.ts +9 -8
  41. package/api/resources/tts/types/PostedUtterance.d.ts +6 -6
  42. package/api/resources/tts/types/ReturnGeneration.d.ts +5 -5
  43. package/api/resources/tts/types/ReturnTts.d.ts +1 -1
  44. package/api/resources/tts/types/Snippet.d.ts +6 -6
  45. package/api/resources/tts/types/SnippetAudioChunk.d.ts +12 -11
  46. package/api/resources/tts/types/index.d.ts +1 -0
  47. package/api/resources/tts/types/index.js +1 -0
  48. package/dist/api/resources/empathicVoice/types/AssistantEnd.d.ts +2 -2
  49. package/dist/api/resources/empathicVoice/types/AssistantInput.d.ts +2 -2
  50. package/dist/api/resources/empathicVoice/types/AssistantMessage.d.ts +8 -8
  51. package/dist/api/resources/empathicVoice/types/AssistantProsody.d.ts +6 -6
  52. package/dist/api/resources/empathicVoice/types/AudioConfiguration.d.ts +2 -2
  53. package/dist/api/resources/empathicVoice/types/AudioInput.d.ts +6 -6
  54. package/dist/api/resources/empathicVoice/types/AudioOutput.d.ts +4 -4
  55. package/dist/api/resources/empathicVoice/types/BuiltinToolConfig.d.ts +1 -1
  56. package/dist/api/resources/empathicVoice/types/ChatMessage.d.ts +2 -2
  57. package/dist/api/resources/empathicVoice/types/ChatMetadata.d.ts +8 -8
  58. package/dist/api/resources/empathicVoice/types/Context.d.ts +6 -6
  59. package/dist/api/resources/empathicVoice/types/LanguageModelType.d.ts +7 -1
  60. package/dist/api/resources/empathicVoice/types/LanguageModelType.js +6 -0
  61. package/dist/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +2 -2
  62. package/dist/api/resources/empathicVoice/types/ResumeAssistantMessage.d.ts +2 -2
  63. package/dist/api/resources/empathicVoice/types/ReturnConfig.d.ts +2 -2
  64. package/dist/api/resources/empathicVoice/types/SessionSettings.d.ts +27 -27
  65. package/dist/api/resources/empathicVoice/types/Tool.d.ts +6 -6
  66. package/dist/api/resources/empathicVoice/types/ToolCallMessage.d.ts +6 -6
  67. package/dist/api/resources/empathicVoice/types/ToolErrorMessage.d.ts +16 -16
  68. package/dist/api/resources/empathicVoice/types/ToolResponseMessage.d.ts +8 -8
  69. package/dist/api/resources/empathicVoice/types/UserInput.d.ts +2 -2
  70. package/dist/api/resources/empathicVoice/types/UserInterruption.d.ts +4 -4
  71. package/dist/api/resources/empathicVoice/types/UserMessage.d.ts +12 -12
  72. package/dist/api/resources/empathicVoice/types/WebSocketError.d.ts +10 -10
  73. package/dist/api/resources/empathicVoice/types/WebhookEventChatEnded.d.ts +8 -8
  74. package/dist/api/resources/empathicVoice/types/WebhookEventChatStarted.d.ts +6 -6
  75. package/dist/api/resources/empathicVoice/types/index.d.ts +16 -16
  76. package/dist/api/resources/empathicVoice/types/index.js +16 -16
  77. package/dist/api/resources/index.d.ts +1 -1
  78. package/dist/api/resources/index.js +2 -2
  79. package/dist/api/resources/tts/client/Client.d.ts +6 -6
  80. package/dist/api/resources/tts/client/Client.js +35 -35
  81. package/dist/api/resources/tts/types/OctaveVersion.d.ts +4 -0
  82. package/dist/api/resources/tts/types/OctaveVersion.js +5 -0
  83. package/dist/api/resources/tts/types/PostedTts.d.ts +9 -8
  84. package/dist/api/resources/tts/types/PostedUtterance.d.ts +6 -6
  85. package/dist/api/resources/tts/types/ReturnGeneration.d.ts +5 -5
  86. package/dist/api/resources/tts/types/ReturnTts.d.ts +1 -1
  87. package/dist/api/resources/tts/types/Snippet.d.ts +6 -6
  88. package/dist/api/resources/tts/types/SnippetAudioChunk.d.ts +12 -11
  89. package/dist/api/resources/tts/types/index.d.ts +1 -0
  90. package/dist/api/resources/tts/types/index.js +1 -0
  91. package/dist/serialization/resources/empathicVoice/types/AssistantEnd.d.ts +1 -1
  92. package/dist/serialization/resources/empathicVoice/types/AssistantEnd.js +1 -1
  93. package/dist/serialization/resources/empathicVoice/types/AssistantInput.d.ts +1 -1
  94. package/dist/serialization/resources/empathicVoice/types/AssistantInput.js +1 -1
  95. package/dist/serialization/resources/empathicVoice/types/AssistantMessage.d.ts +2 -2
  96. package/dist/serialization/resources/empathicVoice/types/AssistantMessage.js +2 -2
  97. package/dist/serialization/resources/empathicVoice/types/AssistantProsody.d.ts +2 -2
  98. package/dist/serialization/resources/empathicVoice/types/AssistantProsody.js +2 -2
  99. package/dist/serialization/resources/empathicVoice/types/AudioConfiguration.d.ts +1 -1
  100. package/dist/serialization/resources/empathicVoice/types/AudioConfiguration.js +1 -1
  101. package/dist/serialization/resources/empathicVoice/types/AudioInput.d.ts +1 -1
  102. package/dist/serialization/resources/empathicVoice/types/AudioInput.js +1 -1
  103. package/dist/serialization/resources/empathicVoice/types/AudioOutput.d.ts +2 -2
  104. package/dist/serialization/resources/empathicVoice/types/AudioOutput.js +2 -2
  105. package/dist/serialization/resources/empathicVoice/types/BuiltinToolConfig.d.ts +1 -1
  106. package/dist/serialization/resources/empathicVoice/types/BuiltinToolConfig.js +1 -1
  107. package/dist/serialization/resources/empathicVoice/types/ChatMessage.d.ts +1 -1
  108. package/dist/serialization/resources/empathicVoice/types/ChatMessage.js +1 -1
  109. package/dist/serialization/resources/empathicVoice/types/ChatMetadata.d.ts +2 -2
  110. package/dist/serialization/resources/empathicVoice/types/ChatMetadata.js +2 -2
  111. package/dist/serialization/resources/empathicVoice/types/Context.d.ts +1 -1
  112. package/dist/serialization/resources/empathicVoice/types/Context.js +1 -1
  113. package/dist/serialization/resources/empathicVoice/types/LanguageModelType.d.ts +1 -1
  114. package/dist/serialization/resources/empathicVoice/types/LanguageModelType.js +6 -0
  115. package/dist/serialization/resources/empathicVoice/types/PauseAssistantMessage.d.ts +1 -1
  116. package/dist/serialization/resources/empathicVoice/types/PauseAssistantMessage.js +1 -1
  117. package/dist/serialization/resources/empathicVoice/types/ResumeAssistantMessage.d.ts +1 -1
  118. package/dist/serialization/resources/empathicVoice/types/ResumeAssistantMessage.js +1 -1
  119. package/dist/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +1 -1
  120. package/dist/serialization/resources/empathicVoice/types/ReturnConfig.js +1 -1
  121. package/dist/serialization/resources/empathicVoice/types/SessionSettings.d.ts +8 -8
  122. package/dist/serialization/resources/empathicVoice/types/SessionSettings.js +8 -8
  123. package/dist/serialization/resources/empathicVoice/types/Tool.d.ts +3 -3
  124. package/dist/serialization/resources/empathicVoice/types/Tool.js +3 -3
  125. package/dist/serialization/resources/empathicVoice/types/ToolCallMessage.d.ts +3 -3
  126. package/dist/serialization/resources/empathicVoice/types/ToolCallMessage.js +3 -3
  127. package/dist/serialization/resources/empathicVoice/types/ToolErrorMessage.d.ts +6 -6
  128. package/dist/serialization/resources/empathicVoice/types/ToolErrorMessage.js +6 -6
  129. package/dist/serialization/resources/empathicVoice/types/ToolResponseMessage.d.ts +2 -2
  130. package/dist/serialization/resources/empathicVoice/types/ToolResponseMessage.js +2 -2
  131. package/dist/serialization/resources/empathicVoice/types/UserInput.d.ts +1 -1
  132. package/dist/serialization/resources/empathicVoice/types/UserInput.js +1 -1
  133. package/dist/serialization/resources/empathicVoice/types/UserInterruption.d.ts +1 -1
  134. package/dist/serialization/resources/empathicVoice/types/UserInterruption.js +1 -1
  135. package/dist/serialization/resources/empathicVoice/types/UserMessage.d.ts +3 -3
  136. package/dist/serialization/resources/empathicVoice/types/UserMessage.js +3 -3
  137. package/dist/serialization/resources/empathicVoice/types/WebSocketError.d.ts +3 -3
  138. package/dist/serialization/resources/empathicVoice/types/WebSocketError.js +3 -3
  139. package/dist/serialization/resources/empathicVoice/types/WebhookEventChatEnded.d.ts +4 -4
  140. package/dist/serialization/resources/empathicVoice/types/WebhookEventChatEnded.js +4 -4
  141. package/dist/serialization/resources/empathicVoice/types/WebhookEventChatStarted.d.ts +3 -3
  142. package/dist/serialization/resources/empathicVoice/types/WebhookEventChatStarted.js +3 -3
  143. package/dist/serialization/resources/empathicVoice/types/index.d.ts +16 -16
  144. package/dist/serialization/resources/empathicVoice/types/index.js +16 -16
  145. package/dist/serialization/resources/index.d.ts +1 -1
  146. package/dist/serialization/resources/index.js +2 -2
  147. package/dist/serialization/resources/tts/types/OctaveVersion.d.ts +10 -0
  148. package/dist/serialization/resources/tts/types/OctaveVersion.js +41 -0
  149. package/dist/serialization/resources/tts/types/PostedTts.d.ts +5 -3
  150. package/dist/serialization/resources/tts/types/PostedTts.js +5 -3
  151. package/dist/serialization/resources/tts/types/PostedUtterance.d.ts +2 -2
  152. package/dist/serialization/resources/tts/types/PostedUtterance.js +2 -2
  153. package/dist/serialization/resources/tts/types/ReturnGeneration.d.ts +3 -3
  154. package/dist/serialization/resources/tts/types/ReturnGeneration.js +3 -3
  155. package/dist/serialization/resources/tts/types/ReturnTts.d.ts +1 -1
  156. package/dist/serialization/resources/tts/types/ReturnTts.js +1 -1
  157. package/dist/serialization/resources/tts/types/Snippet.d.ts +3 -3
  158. package/dist/serialization/resources/tts/types/Snippet.js +3 -3
  159. package/dist/serialization/resources/tts/types/SnippetAudioChunk.d.ts +7 -6
  160. package/dist/serialization/resources/tts/types/SnippetAudioChunk.js +7 -6
  161. package/dist/serialization/resources/tts/types/index.d.ts +1 -0
  162. package/dist/serialization/resources/tts/types/index.js +1 -0
  163. package/dist/version.d.ts +1 -1
  164. package/dist/version.js +1 -1
  165. package/dist/wrapper/index.d.ts +1 -1
  166. package/dist/wrapper/index.js +53 -3
  167. package/package.json +1 -1
  168. package/reference.md +706 -706
  169. package/serialization/resources/empathicVoice/types/AssistantEnd.d.ts +1 -1
  170. package/serialization/resources/empathicVoice/types/AssistantEnd.js +1 -1
  171. package/serialization/resources/empathicVoice/types/AssistantInput.d.ts +1 -1
  172. package/serialization/resources/empathicVoice/types/AssistantInput.js +1 -1
  173. package/serialization/resources/empathicVoice/types/AssistantMessage.d.ts +2 -2
  174. package/serialization/resources/empathicVoice/types/AssistantMessage.js +2 -2
  175. package/serialization/resources/empathicVoice/types/AssistantProsody.d.ts +2 -2
  176. package/serialization/resources/empathicVoice/types/AssistantProsody.js +2 -2
  177. package/serialization/resources/empathicVoice/types/AudioConfiguration.d.ts +1 -1
  178. package/serialization/resources/empathicVoice/types/AudioConfiguration.js +1 -1
  179. package/serialization/resources/empathicVoice/types/AudioInput.d.ts +1 -1
  180. package/serialization/resources/empathicVoice/types/AudioInput.js +1 -1
  181. package/serialization/resources/empathicVoice/types/AudioOutput.d.ts +2 -2
  182. package/serialization/resources/empathicVoice/types/AudioOutput.js +2 -2
  183. package/serialization/resources/empathicVoice/types/BuiltinToolConfig.d.ts +1 -1
  184. package/serialization/resources/empathicVoice/types/BuiltinToolConfig.js +1 -1
  185. package/serialization/resources/empathicVoice/types/ChatMessage.d.ts +1 -1
  186. package/serialization/resources/empathicVoice/types/ChatMessage.js +1 -1
  187. package/serialization/resources/empathicVoice/types/ChatMetadata.d.ts +2 -2
  188. package/serialization/resources/empathicVoice/types/ChatMetadata.js +2 -2
  189. package/serialization/resources/empathicVoice/types/Context.d.ts +1 -1
  190. package/serialization/resources/empathicVoice/types/Context.js +1 -1
  191. package/serialization/resources/empathicVoice/types/LanguageModelType.d.ts +1 -1
  192. package/serialization/resources/empathicVoice/types/LanguageModelType.js +6 -0
  193. package/serialization/resources/empathicVoice/types/PauseAssistantMessage.d.ts +1 -1
  194. package/serialization/resources/empathicVoice/types/PauseAssistantMessage.js +1 -1
  195. package/serialization/resources/empathicVoice/types/ResumeAssistantMessage.d.ts +1 -1
  196. package/serialization/resources/empathicVoice/types/ResumeAssistantMessage.js +1 -1
  197. package/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +1 -1
  198. package/serialization/resources/empathicVoice/types/ReturnConfig.js +1 -1
  199. package/serialization/resources/empathicVoice/types/SessionSettings.d.ts +8 -8
  200. package/serialization/resources/empathicVoice/types/SessionSettings.js +8 -8
  201. package/serialization/resources/empathicVoice/types/Tool.d.ts +3 -3
  202. package/serialization/resources/empathicVoice/types/Tool.js +3 -3
  203. package/serialization/resources/empathicVoice/types/ToolCallMessage.d.ts +3 -3
  204. package/serialization/resources/empathicVoice/types/ToolCallMessage.js +3 -3
  205. package/serialization/resources/empathicVoice/types/ToolErrorMessage.d.ts +6 -6
  206. package/serialization/resources/empathicVoice/types/ToolErrorMessage.js +6 -6
  207. package/serialization/resources/empathicVoice/types/ToolResponseMessage.d.ts +2 -2
  208. package/serialization/resources/empathicVoice/types/ToolResponseMessage.js +2 -2
  209. package/serialization/resources/empathicVoice/types/UserInput.d.ts +1 -1
  210. package/serialization/resources/empathicVoice/types/UserInput.js +1 -1
  211. package/serialization/resources/empathicVoice/types/UserInterruption.d.ts +1 -1
  212. package/serialization/resources/empathicVoice/types/UserInterruption.js +1 -1
  213. package/serialization/resources/empathicVoice/types/UserMessage.d.ts +3 -3
  214. package/serialization/resources/empathicVoice/types/UserMessage.js +3 -3
  215. package/serialization/resources/empathicVoice/types/WebSocketError.d.ts +3 -3
  216. package/serialization/resources/empathicVoice/types/WebSocketError.js +3 -3
  217. package/serialization/resources/empathicVoice/types/WebhookEventChatEnded.d.ts +4 -4
  218. package/serialization/resources/empathicVoice/types/WebhookEventChatEnded.js +4 -4
  219. package/serialization/resources/empathicVoice/types/WebhookEventChatStarted.d.ts +3 -3
  220. package/serialization/resources/empathicVoice/types/WebhookEventChatStarted.js +3 -3
  221. package/serialization/resources/empathicVoice/types/index.d.ts +16 -16
  222. package/serialization/resources/empathicVoice/types/index.js +16 -16
  223. package/serialization/resources/index.d.ts +1 -1
  224. package/serialization/resources/index.js +2 -2
  225. package/serialization/resources/tts/types/OctaveVersion.d.ts +10 -0
  226. package/serialization/resources/tts/types/OctaveVersion.js +41 -0
  227. package/serialization/resources/tts/types/PostedTts.d.ts +5 -3
  228. package/serialization/resources/tts/types/PostedTts.js +5 -3
  229. package/serialization/resources/tts/types/PostedUtterance.d.ts +2 -2
  230. package/serialization/resources/tts/types/PostedUtterance.js +2 -2
  231. package/serialization/resources/tts/types/ReturnGeneration.d.ts +3 -3
  232. package/serialization/resources/tts/types/ReturnGeneration.js +3 -3
  233. package/serialization/resources/tts/types/ReturnTts.d.ts +1 -1
  234. package/serialization/resources/tts/types/ReturnTts.js +1 -1
  235. package/serialization/resources/tts/types/Snippet.d.ts +3 -3
  236. package/serialization/resources/tts/types/Snippet.js +3 -3
  237. package/serialization/resources/tts/types/SnippetAudioChunk.d.ts +7 -6
  238. package/serialization/resources/tts/types/SnippetAudioChunk.js +7 -6
  239. package/serialization/resources/tts/types/index.d.ts +1 -0
  240. package/serialization/resources/tts/types/index.js +1 -0
  241. package/version.d.ts +1 -1
  242. package/version.js +1 -1
  243. package/wrapper/index.d.ts +1 -1
  244. package/wrapper/index.js +53 -3
@@ -4,6 +4,21 @@ channel:
4
4
  auth: false
5
5
  docs: Chat with Empathic Voice Interface (EVI)
6
6
  query-parameters:
7
+ access_token:
8
+ type: optional<string>
9
+ default: ''
10
+ docs: >-
11
+ Access token used for authenticating the client. If not provided, an
12
+ `api_key` must be provided to authenticate.
13
+
14
+
15
+ The access token is generated using both an API key and a Secret key,
16
+ which provides an additional layer of security compared to using just an
17
+ API key.
18
+
19
+
20
+ For more details, refer to the [Authentication Strategies
21
+ Guide](/docs/introduction/api-key#authentication-strategies).
7
22
  config_id:
8
23
  type: optional<string>
9
24
  docs: >-
@@ -32,6 +47,13 @@ channel:
32
47
 
33
48
  Include this parameter to apply a specific version of an EVI
34
49
  configuration. If omitted, the latest version will be applied.
50
+ event_limit:
51
+ type: optional<integer>
52
+ docs: >-
53
+ The maximum number of chat events to return from chat history. By
54
+ default, the system returns up to 300 events (100 events per page × 3
55
+ pages). Set this parameter to a smaller value to limit the number of
56
+ events returned.
35
57
  resumed_chat_group_id:
36
58
  type: optional<string>
37
59
  docs: >-
@@ -76,12 +98,6 @@ channel:
76
98
  Use the GET `/v0/evi/chat_groups` endpoint to obtain the Chat Group IDs
77
99
  of all Chat Groups associated with an API key. This endpoint returns a
78
100
  list of all available chat groups.
79
- voice_id:
80
- type: optional<string>
81
- docs: >-
82
- The name or ID of the voice from the `Voice Library` to be used as the
83
- speaker for this EVI session. This will override the speaker set in the
84
- selected configuration.
85
101
  verbose_transcription:
86
102
  type: optional<boolean>
87
103
  default: false
@@ -93,28 +109,12 @@ channel:
93
109
  field on a
94
110
  [UserMessage](/reference/speech-to-speech-evi/chat#receive.UserMessage)
95
111
  denotes whether the message is "interim" or "final."
96
- event_limit:
97
- type: optional<integer>
98
- docs: >-
99
- The maximum number of chat events to return from chat history. By
100
- default, the system returns up to 300 events (100 events per page × 3
101
- pages). Set this parameter to a smaller value to limit the number of
102
- events returned.
103
- access_token:
112
+ voice_id:
104
113
  type: optional<string>
105
- default: ''
106
114
  docs: >-
107
- Access token used for authenticating the client. If not provided, an
108
- `api_key` must be provided to authenticate.
109
-
110
-
111
- The access token is generated using both an API key and a Secret key,
112
- which provides an additional layer of security compared to using just an
113
- API key.
114
-
115
-
116
- For more details, refer to the [Authentication Strategies
117
- Guide](/docs/introduction/api-key#authentication-strategies).
115
+ The name or ID of the voice from the `Voice Library` to be used as the
116
+ speaker for this EVI session. This will override the speaker set in the
117
+ selected configuration.
118
118
  api_key:
119
119
  type: optional<string>
120
120
  default: ''
@@ -136,8 +136,8 @@ channel:
136
136
  - messages:
137
137
  - type: publish
138
138
  body:
139
- type: audio_input
140
139
  data: data
140
+ type: audio_input
141
141
  - type: subscribe
142
142
  body:
143
143
  type: assistant_end
@@ -11,9 +11,9 @@ webhooks:
11
11
  - payload:
12
12
  chat_group_id: chat_group_id
13
13
  chat_id: chat_id
14
- end_time: 1
15
14
  duration_seconds: 1
16
15
  end_reason: ACTIVE
16
+ end_time: 1
17
17
  docs: Sent when an EVI chat ends.
18
18
  chatStarted:
19
19
  audiences: []
@@ -25,6 +25,6 @@ webhooks:
25
25
  - payload:
26
26
  chat_group_id: chat_group_id
27
27
  chat_id: chat_id
28
- start_time: 1
29
28
  chat_start_type: new_chat_group
29
+ start_time: 1
30
30
  docs: Sent when an EVI chat is started.
@@ -120,30 +120,26 @@ service:
120
120
  Middle-aged masculine voice with a clear, rhythmic Scots lilt,
121
121
  rounded vowels, and a warm, steady tone with an articulate,
122
122
  academic quality.
123
- synthesize-json-streaming:
124
- path: /v0/tts/stream/json
123
+ synthesize-file-streaming:
124
+ path: /v0/tts/stream/file
125
125
  method: POST
126
126
  auth: true
127
127
  docs: >-
128
128
  Streams synthesized speech using the specified voice. If no voice is
129
129
  provided, a novel voice will be generated dynamically. Optionally,
130
130
  additional context can be included to influence the speech's style and
131
- prosody.
132
-
133
-
134
- The response is a stream of JSON objects including audio encoded in
135
- base64.
131
+ prosody.
136
132
  source:
137
133
  openapi: tts-openapi.json
138
- display-name: Text-to-speech (Streamed JSON)
134
+ display-name: Text-to-speech (Streamed File)
139
135
  request:
140
136
  body:
141
137
  type: PostedTts
142
138
  content-type: application/json
143
- response-stream:
144
- docs: Successful Response
145
- type: SnippetAudioChunk
146
- format: json
139
+ response:
140
+ docs: OK
141
+ type: file
142
+ status-code: 200
147
143
  errors:
148
144
  - UnprocessableEntityError
149
145
  examples:
@@ -155,26 +151,30 @@ service:
155
151
  voice:
156
152
  name: Male English Actor
157
153
  provider: HUME_AI
158
- synthesize-file-streaming:
159
- path: /v0/tts/stream/file
154
+ synthesize-json-streaming:
155
+ path: /v0/tts/stream/json
160
156
  method: POST
161
157
  auth: true
162
158
  docs: >-
163
159
  Streams synthesized speech using the specified voice. If no voice is
164
160
  provided, a novel voice will be generated dynamically. Optionally,
165
161
  additional context can be included to influence the speech's style and
166
- prosody.
162
+ prosody.
163
+
164
+
165
+ The response is a stream of JSON objects including audio encoded in
166
+ base64.
167
167
  source:
168
168
  openapi: tts-openapi.json
169
- display-name: Text-to-speech (Streamed File)
169
+ display-name: Text-to-speech (Streamed JSON)
170
170
  request:
171
171
  body:
172
172
  type: PostedTts
173
173
  content-type: application/json
174
- response:
175
- docs: OK
176
- type: file
177
- status-code: 200
174
+ response-stream:
175
+ docs: Successful Response
176
+ type: SnippetAudioChunk
177
+ format: json
178
178
  errors:
179
179
  - UnprocessableEntityError
180
180
  examples:
@@ -319,14 +319,30 @@ types:
319
319
  SnippetAudioChunk:
320
320
  docs: Metadata for a chunk of generated audio.
321
321
  properties:
322
- request_id:
322
+ audio:
323
323
  type: string
324
- docs: ID of the initiating request.
324
+ docs: The generated audio output chunk in the requested format.
325
+ audio_format:
326
+ type: AudioFormatType
327
+ docs: The generated audio output format.
328
+ chunk_index:
329
+ type: integer
330
+ docs: The index of the audio chunk in the snippet.
325
331
  generation_id:
326
332
  type: string
327
333
  docs: >-
328
334
  The generation ID of the parent snippet that this chunk corresponds
329
335
  to.
336
+ is_last_chunk:
337
+ type: boolean
338
+ docs: >-
339
+ Whether or not this is the last chunk streamed back from the decoder
340
+ for one input snippet.
341
+ request_id:
342
+ type: string
343
+ docs: ID of the initiating request.
344
+ snippet:
345
+ type: optional<Snippet>
330
346
  snippet_id:
331
347
  type: string
332
348
  docs: The ID of the parent snippet that this chunk corresponds to.
@@ -339,27 +355,13 @@ types:
339
355
  The transcribed text of the generated audio of the parent snippet that
340
356
  this chunk corresponds to. It is only present if `instant_mode` is set
341
357
  to `false`.
342
- chunk_index:
343
- type: integer
344
- docs: The index of the audio chunk in the snippet.
345
- audio:
346
- type: string
347
- docs: The generated audio output chunk in the requested format.
348
- audio_format:
349
- type: AudioFormatType
350
- docs: The generated audio output format.
351
- is_last_chunk:
352
- type: boolean
353
- docs: >-
354
- Whether or not this is the last chunk streamed back from the decoder
355
- for one input snippet.
358
+ type:
359
+ type: optional<literal<"audio">>
356
360
  utterance_index:
357
361
  type: optional<integer>
358
362
  docs: >-
359
363
  The index of the utterance in the request that the parent snippet of
360
364
  this chunk corresponds to.
361
- snippet:
362
- type: optional<Snippet>
363
365
  source:
364
366
  openapi: tts-openapi.json
365
367
  PostedContextWithGenerationId:
@@ -395,25 +397,25 @@ types:
395
397
  openapi: tts-openapi.json
396
398
  ReturnGeneration:
397
399
  properties:
398
- generation_id:
400
+ audio:
399
401
  type: string
400
402
  docs: >-
401
- A unique ID associated with this TTS generation that can be used as
402
- context for generating consistent speech style and prosody across
403
- multiple requests.
403
+ The generated audio output in the requested format, encoded as a
404
+ base64 string.
404
405
  duration:
405
406
  type: double
406
407
  docs: Duration of the generated audio in seconds.
408
+ encoding:
409
+ type: AudioEncoding
407
410
  file_size:
408
411
  type: integer
409
412
  docs: Size of the generated audio in bytes.
410
- encoding:
411
- type: AudioEncoding
412
- audio:
413
+ generation_id:
413
414
  type: string
414
415
  docs: >-
415
- The generated audio output in the requested format, encoded as a
416
- base64 string.
416
+ A unique ID associated with this TTS generation that can be used as
417
+ context for generating consistent speech style and prosody across
418
+ multiple requests.
417
419
  snippets:
418
420
  docs: >-
419
421
  A list of snippet groups where each group corresponds to an utterance
@@ -466,18 +468,9 @@ types:
466
468
  Utterances to use as context for generating consistent speech style
467
469
  and prosody across multiple requests. These will not be converted to
468
470
  speech output.
469
- utterances:
470
- docs: >-
471
- A list of **Utterances** to be converted to speech output.
472
-
473
-
474
- An **Utterance** is a unit of input for
475
- [Octave](/docs/text-to-speech-tts/overview), and includes input
476
- `text`, an optional `description` to serve as the prompt for how the
477
- speech should be delivered, an optional `voice` specification, and
478
- additional controls to guide delivery for `speed` and
479
- `trailing_silence`.
480
- type: list<PostedUtterance>
471
+ format:
472
+ type: optional<Format>
473
+ docs: Specifies the output audio file format.
481
474
  num_generations:
482
475
  type: optional<integer>
483
476
  docs: Number of generations of the audio to produce.
@@ -485,9 +478,6 @@ types:
485
478
  validation:
486
479
  min: 1
487
480
  max: 5
488
- format:
489
- type: optional<Format>
490
- docs: Specifies the output audio file format.
491
481
  split_utterances:
492
482
  type: optional<boolean>
493
483
  docs: >-
@@ -516,6 +506,20 @@ types:
516
506
  if disabled, each chunk's audio will be its own audio file, each with
517
507
  its own headers (if applicable).
518
508
  default: false
509
+ utterances:
510
+ docs: >-
511
+ A list of **Utterances** to be converted to speech output.
512
+
513
+
514
+ An **Utterance** is a unit of input for
515
+ [Octave](/docs/text-to-speech-tts/overview), and includes input
516
+ `text`, an optional `description` to serve as the prompt for how the
517
+ speech should be delivered, an optional `voice` specification, and
518
+ additional controls to guide delivery for `speed` and
519
+ `trailing_silence`.
520
+ type: list<PostedUtterance>
521
+ version:
522
+ type: optional<OctaveVersion>
519
523
  instant_mode:
520
524
  type: optional<boolean>
521
525
  docs: >-
@@ -542,16 +546,17 @@ types:
542
546
  openapi: tts-openapi.json
543
547
  ReturnTts:
544
548
  properties:
549
+ generations:
550
+ type: list<ReturnGeneration>
545
551
  request_id:
546
552
  type: optional<string>
547
553
  docs: >-
548
554
  A unique ID associated with this request for tracking and
549
555
  troubleshooting. Use this ID when contacting [support](/support) for
550
556
  troubleshooting assistance.
551
- generations:
552
- type: list<ReturnGeneration>
553
557
  source:
554
558
  openapi: tts-openapi.json
559
+ OctaveVersion: string
555
560
  ReturnVoice:
556
561
  docs: An Octave voice available for text-to-speech
557
562
  properties:
@@ -577,37 +582,32 @@ types:
577
582
  openapi: tts-openapi.json
578
583
  Snippet:
579
584
  properties:
585
+ audio:
586
+ type: string
587
+ docs: >-
588
+ The segmented audio output in the requested format, encoded as a
589
+ base64 string.
590
+ generation_id:
591
+ type: string
592
+ docs: The generation ID this snippet corresponds to.
580
593
  id:
581
594
  type: string
582
595
  docs: A unique ID associated with this **Snippet**.
583
596
  text:
584
597
  type: string
585
598
  docs: The text for this **Snippet**.
586
- generation_id:
587
- type: string
588
- docs: The generation ID this snippet corresponds to.
589
- utterance_index:
590
- type: optional<integer>
591
- docs: The index of the utterance in the request this snippet corresponds to.
592
599
  transcribed_text:
593
600
  type: optional<string>
594
601
  docs: >-
595
602
  The transcribed text of the generated audio. It is only present if
596
603
  `instant_mode` is set to `false`.
597
- audio:
598
- type: string
599
- docs: >-
600
- The segmented audio output in the requested format, encoded as a
601
- base64 string.
604
+ utterance_index:
605
+ type: optional<integer>
606
+ docs: The index of the utterance in the request this snippet corresponds to.
602
607
  source:
603
608
  openapi: tts-openapi.json
604
609
  PostedUtterance:
605
610
  properties:
606
- text:
607
- type: string
608
- docs: The input text to be synthesized into speech.
609
- validation:
610
- maxLength: 5000
611
611
  description:
612
612
  type: optional<string>
613
613
  docs: >-
@@ -629,14 +629,6 @@ types:
629
629
  guide](/docs/text-to-speech-tts/prompting) for design tips.
630
630
  validation:
631
631
  maxLength: 1000
632
- voice:
633
- type: optional<PostedUtteranceVoice>
634
- docs: >-
635
- The `name` or `id` associated with a **Voice** from the **Voice
636
- Library** to be used as the speaker for this and all subsequent
637
- `utterances`, until the `voice` field is updated again.
638
-
639
- See our [voices guide](/docs/text-to-speech-tts/voices) for more details on generating and specifying **Voices**.
640
632
  speed:
641
633
  type: optional<double>
642
634
  docs: >-
@@ -646,6 +638,11 @@ types:
646
638
  validation:
647
639
  min: 0.5
648
640
  max: 2
641
+ text:
642
+ type: string
643
+ docs: The input text to be synthesized into speech.
644
+ validation:
645
+ maxLength: 5000
649
646
  trailing_silence:
650
647
  type: optional<double>
651
648
  docs: Duration of trailing silence (in seconds) to add to this utterance
@@ -653,6 +650,14 @@ types:
653
650
  validation:
654
651
  min: 0
655
652
  max: 5
653
+ voice:
654
+ type: optional<PostedUtteranceVoice>
655
+ docs: >-
656
+ The `name` or `id` associated with a **Voice** from the **Voice
657
+ Library** to be used as the speaker for this and all subsequent
658
+ `utterances`, until the `voice` field is updated again.
659
+
660
+ See our [voices guide](/docs/text-to-speech-tts/voices) for more details on generating and specifying **Voices**.
656
661
  source:
657
662
  openapi: tts-openapi.json
658
663
  ValidationErrorLocItem:
@@ -5,12 +5,12 @@
5
5
  * When provided, the output is an assistant end message.
6
6
  */
7
7
  export interface AssistantEnd {
8
+ /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
9
+ customSessionId?: string;
8
10
  /**
9
11
  * The type of message sent through the socket; for an Assistant End message, this must be `assistant_end`.
10
12
  *
11
13
  * This message indicates the conclusion of the assistant's response, signaling that the assistant has finished speaking for the current conversational turn.
12
14
  */
13
15
  type: "assistant_end";
14
- /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
15
- customSessionId?: string;
16
16
  }
@@ -5,8 +5,6 @@
5
5
  * When provided, the input is spoken by EVI.
6
6
  */
7
7
  export interface AssistantInput {
8
- /** The type of message sent through the socket; must be `assistant_input` for our server to correctly identify and process it as an Assistant Input message. */
9
- type: "assistant_input";
10
8
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
11
9
  customSessionId?: string;
12
10
  /**
@@ -15,4 +13,6 @@ export interface AssistantInput {
15
13
  * EVI uses this text to generate spoken audio using our proprietary expressive text-to-speech model. Our model adds appropriate emotional inflections and tones to the text based on the user's expressions and the context of the conversation. The synthesized audio is streamed back to the user as an [Assistant Message](/reference/speech-to-speech-evi/chat#receive.AssistantMessage).
16
14
  */
17
15
  text: string;
16
+ /** The type of message sent through the socket; must be `assistant_input` for our server to correctly identify and process it as an Assistant Input message. */
17
+ type: "assistant_input";
18
18
  }
@@ -6,20 +6,20 @@ import * as Hume from "../../../index";
6
6
  * When provided, the output is an assistant message.
7
7
  */
8
8
  export interface AssistantMessage {
9
- /**
10
- * The type of message sent through the socket; for an Assistant Message, this must be `assistant_message`.
11
- *
12
- * This message contains both a transcript of the assistant's response and the expression measurement predictions of the assistant's audio output.
13
- */
14
- type: "assistant_message";
15
9
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
16
10
  customSessionId?: string;
11
+ /** Indicates if this message was inserted into the conversation as text from an [Assistant Input message](/reference/speech-to-speech-evi/chat#send.AssistantInput.text). */
12
+ fromText: boolean;
17
13
  /** ID of the assistant message. Allows the Assistant Message to be tracked and referenced. */
18
14
  id?: string;
19
15
  /** Transcript of the message. */
20
16
  message: Hume.empathicVoice.ChatMessage;
21
17
  /** Inference model results. */
22
18
  models: Hume.empathicVoice.Inference;
23
- /** Indicates if this message was inserted into the conversation as text from an [Assistant Input message](/reference/speech-to-speech-evi/chat#send.AssistantInput.text). */
24
- fromText: boolean;
19
+ /**
20
+ * The type of message sent through the socket; for an Assistant Message, this must be `assistant_message`.
21
+ *
22
+ * This message contains both a transcript of the assistant's response and the expression measurement predictions of the assistant's audio output.
23
+ */
24
+ type: "assistant_message";
25
25
  }
@@ -6,16 +6,16 @@ import * as Hume from "../../../index";
6
6
  * When provided, the output is an Assistant Prosody message.
7
7
  */
8
8
  export interface AssistantProsody {
9
+ /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
10
+ customSessionId?: string;
11
+ /** Unique identifier for the segment. */
12
+ id?: string;
13
+ /** Inference model results. */
14
+ models: Hume.empathicVoice.Inference;
9
15
  /**
10
16
  * The type of message sent through the socket; for an Assistant Prosody message, this must be `assistant_PROSODY`.
11
17
  *
12
18
  * This message the expression measurement predictions of the assistant's audio output.
13
19
  */
14
20
  type: "assistant_prosody";
15
- /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
16
- customSessionId?: string;
17
- /** Inference model results. */
18
- models: Hume.empathicVoice.Inference;
19
- /** Unique identifier for the segment. */
20
- id?: string;
21
21
  }
@@ -3,10 +3,10 @@
3
3
  */
4
4
  import * as Hume from "../../../index";
5
5
  export interface AudioConfiguration {
6
- /** Encoding format of the audio input, such as `linear16`. */
7
- encoding: Hume.empathicVoice.Encoding;
8
6
  /** Number of audio channels. */
9
7
  channels: number;
8
+ /** Encoding format of the audio input, such as `linear16`. */
9
+ encoding: Hume.empathicVoice.Encoding;
10
10
  /** Audio sample rate. Number of samples per second in the audio input, measured in Hertz. */
11
11
  sampleRate: number;
12
12
  }
@@ -5,12 +5,6 @@
5
5
  * When provided, the input is audio.
6
6
  */
7
7
  export interface AudioInput {
8
- /**
9
- * The type of message sent through the socket; must be `audio_input` for our server to correctly identify and process it as an Audio Input message.
10
- *
11
- * This message is used for sending audio input data to EVI for processing and expression measurement. Audio data should be sent as a continuous stream, encoded in Base64.
12
- */
13
- type: "audio_input";
14
8
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
15
9
  customSessionId?: string;
16
10
  /**
@@ -23,4 +17,10 @@ export interface AudioInput {
23
17
  * Hume recommends streaming audio with a buffer window of 20 milliseconds (ms), or 100 milliseconds (ms) for web applications.
24
18
  */
25
19
  data: string;
20
+ /**
21
+ * The type of message sent through the socket; must be `audio_input` for our server to correctly identify and process it as an Audio Input message.
22
+ *
23
+ * This message is used for sending audio input data to EVI for processing and expression measurement. Audio data should be sent as a continuous stream, encoded in Base64.
24
+ */
25
+ type: "audio_input";
26
26
  }
@@ -5,14 +5,14 @@
5
5
  * The type of message sent through the socket; for an Audio Output message, this must be `audio_output`.
6
6
  */
7
7
  export interface AudioOutput {
8
- /** The type of message sent through the socket; for an Audio Output message, this must be `audio_output`. */
9
- type: "audio_output";
10
8
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
11
9
  customSessionId?: string;
10
+ /** Base64 encoded audio output. This encoded audio is transmitted to the client, where it can be decoded and played back as part of the user interaction. */
11
+ data: string;
12
12
  /** ID of the audio output. Allows the Audio Output message to be tracked and referenced. */
13
13
  id: string;
14
14
  /** Index of the chunk of audio relative to the whole audio segment. */
15
15
  index: number;
16
- /** Base64 encoded audio output. This encoded audio is transmitted to the client, where it can be decoded and played back as part of the user interaction. */
17
- data: string;
16
+ /** The type of message sent through the socket; for an Audio Output message, this must be `audio_output`. */
17
+ type: "audio_output";
18
18
  }
@@ -3,7 +3,7 @@
3
3
  */
4
4
  import * as Hume from "../../../index";
5
5
  export interface BuiltinToolConfig {
6
- name: Hume.empathicVoice.BuiltInTool;
7
6
  /** Optional text passed to the supplemental LLM if the tool call fails. The LLM then uses this text to generate a response back to the user, ensuring continuity in the conversation. */
8
7
  fallbackContent?: string;
8
+ name: Hume.empathicVoice.BuiltInTool;
9
9
  }
@@ -3,10 +3,10 @@
3
3
  */
4
4
  import * as Hume from "../../../index";
5
5
  export interface ChatMessage {
6
- /** Role of who is providing the message. */
7
- role: Hume.empathicVoice.Role;
8
6
  /** Transcript of the message. */
9
7
  content?: string;
8
+ /** Role of who is providing the message. */
9
+ role: Hume.empathicVoice.Role;
10
10
  /** Function call name and arguments. */
11
11
  toolCall?: Hume.empathicVoice.ToolCallMessage;
12
12
  /** Function call response from client. */
@@ -5,14 +5,6 @@
5
5
  * When provided, the output is a chat metadata message.
6
6
  */
7
7
  export interface ChatMetadata {
8
- /**
9
- * The type of message sent through the socket; for a Chat Metadata message, this must be `chat_metadata`.
10
- *
11
- * The Chat Metadata message is the first message you receive after establishing a connection with EVI and contains important identifiers for the current Chat session.
12
- */
13
- type: "chat_metadata";
14
- /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
15
- customSessionId?: string;
16
8
  /**
17
9
  * ID of the Chat Group.
18
10
  *
@@ -23,6 +15,14 @@ export interface ChatMetadata {
23
15
  chatGroupId: string;
24
16
  /** ID of the Chat session. Allows the Chat session to be tracked and referenced. */
25
17
  chatId: string;
18
+ /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
19
+ customSessionId?: string;
26
20
  /** ID of the initiating request. */
27
21
  requestId?: string;
22
+ /**
23
+ * The type of message sent through the socket; for a Chat Metadata message, this must be `chat_metadata`.
24
+ *
25
+ * The Chat Metadata message is the first message you receive after establishing a connection with EVI and contains important identifiers for the current Chat session.
26
+ */
27
+ type: "chat_metadata";
28
28
  }
@@ -3,6 +3,12 @@
3
3
  */
4
4
  import * as Hume from "../../../index";
5
5
  export interface Context {
6
+ /**
7
+ * The context to be injected into the conversation. Helps inform the LLM's response by providing relevant information about the ongoing conversation.
8
+ *
9
+ * This text will be appended to the end of [user_messages](/reference/speech-to-speech-evi/chat#receive.UserMessage.message.content) based on the chosen persistence level. For example, if you want to remind EVI of its role as a helpful weather assistant, the context you insert will be appended to the end of user messages as `{Context: You are a helpful weather assistant}`.
10
+ */
11
+ text: string;
6
12
  /**
7
13
  * The persistence level of the injected context. Specifies how long the injected context will remain active in the session.
8
14
  *
@@ -11,10 +17,4 @@ export interface Context {
11
17
  * - **Persistent**: Context that is applied to all subsequent assistant responses for the remainder of the Chat.
12
18
  */
13
19
  type?: Hume.empathicVoice.ContextType;
14
- /**
15
- * The context to be injected into the conversation. Helps inform the LLM's response by providing relevant information about the ongoing conversation.
16
- *
17
- * This text will be appended to the end of [user_messages](/reference/speech-to-speech-evi/chat#receive.UserMessage.message.content) based on the chosen persistence level. For example, if you want to remind EVI of its role as a helpful weather assistant, the context you insert will be appended to the end of user messages as `{Context: You are a helpful weather assistant}`.
18
- */
19
- text: string;
20
20
  }