hume 0.13.3 → 0.13.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/.mock/definition/empathic-voice/__package__.yml +25 -27
  2. package/.mock/definition/empathic-voice/chat.yml +10 -10
  3. package/.mock/definition/empathic-voice/configs.yml +1 -11
  4. package/.mock/definition/tts/__package__.yml +169 -120
  5. package/.mock/definition/tts/streamInput.yml +56 -0
  6. package/.mock/fern.config.json +1 -1
  7. package/api/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +1 -5
  8. package/api/resources/empathicVoice/types/AssistantInput.d.ts +1 -1
  9. package/api/resources/empathicVoice/types/AssistantMessage.d.ts +1 -1
  10. package/api/resources/empathicVoice/types/ChatMetadata.d.ts +2 -2
  11. package/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +1 -1
  12. package/api/resources/empathicVoice/types/SessionSettings.d.ts +7 -7
  13. package/api/resources/empathicVoice/types/ToolCallMessage.d.ts +1 -1
  14. package/api/resources/empathicVoice/types/ToolErrorMessage.d.ts +2 -2
  15. package/api/resources/empathicVoice/types/ToolResponseMessage.d.ts +3 -3
  16. package/api/resources/empathicVoice/types/UserMessage.d.ts +3 -3
  17. package/api/resources/tts/types/PublishTts.d.ts +23 -0
  18. package/api/resources/tts/types/PublishTts.js +5 -0
  19. package/api/resources/tts/types/SnippetAudioChunk.d.ts +1 -1
  20. package/api/resources/tts/types/index.d.ts +7 -6
  21. package/api/resources/tts/types/index.js +7 -6
  22. package/dist/api/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +1 -5
  23. package/dist/api/resources/empathicVoice/types/AssistantInput.d.ts +1 -1
  24. package/dist/api/resources/empathicVoice/types/AssistantMessage.d.ts +1 -1
  25. package/dist/api/resources/empathicVoice/types/ChatMetadata.d.ts +2 -2
  26. package/dist/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +1 -1
  27. package/dist/api/resources/empathicVoice/types/SessionSettings.d.ts +7 -7
  28. package/dist/api/resources/empathicVoice/types/ToolCallMessage.d.ts +1 -1
  29. package/dist/api/resources/empathicVoice/types/ToolErrorMessage.d.ts +2 -2
  30. package/dist/api/resources/empathicVoice/types/ToolResponseMessage.d.ts +3 -3
  31. package/dist/api/resources/empathicVoice/types/UserMessage.d.ts +3 -3
  32. package/dist/api/resources/tts/types/PublishTts.d.ts +23 -0
  33. package/dist/api/resources/tts/types/PublishTts.js +5 -0
  34. package/dist/api/resources/tts/types/SnippetAudioChunk.d.ts +1 -1
  35. package/dist/api/resources/tts/types/index.d.ts +7 -6
  36. package/dist/api/resources/tts/types/index.js +7 -6
  37. package/dist/serialization/resources/tts/types/PublishTts.d.ts +19 -0
  38. package/dist/serialization/resources/tts/types/PublishTts.js +50 -0
  39. package/dist/serialization/resources/tts/types/SnippetAudioChunk.d.ts +1 -1
  40. package/dist/serialization/resources/tts/types/SnippetAudioChunk.js +1 -1
  41. package/dist/serialization/resources/tts/types/index.d.ts +7 -6
  42. package/dist/serialization/resources/tts/types/index.js +7 -6
  43. package/dist/version.d.ts +1 -1
  44. package/dist/version.js +1 -1
  45. package/dist/wrapper/EVIWebAudioPlayer.d.ts +6 -7
  46. package/dist/wrapper/EVIWebAudioPlayer.js +237 -73
  47. package/dist/wrapper/convertFrequencyScale.d.ts +1 -0
  48. package/dist/wrapper/convertFrequencyScale.js +28 -0
  49. package/dist/wrapper/generateEmptyFft.d.ts +1 -0
  50. package/dist/wrapper/generateEmptyFft.js +6 -0
  51. package/package.json +2 -1
  52. package/serialization/resources/tts/types/PublishTts.d.ts +19 -0
  53. package/serialization/resources/tts/types/PublishTts.js +50 -0
  54. package/serialization/resources/tts/types/SnippetAudioChunk.d.ts +1 -1
  55. package/serialization/resources/tts/types/SnippetAudioChunk.js +1 -1
  56. package/serialization/resources/tts/types/index.d.ts +7 -6
  57. package/serialization/resources/tts/types/index.js +7 -6
  58. package/version.d.ts +1 -1
  59. package/version.js +1 -1
  60. package/wrapper/EVIWebAudioPlayer.d.ts +6 -7
  61. package/wrapper/EVIWebAudioPlayer.js +237 -73
  62. package/wrapper/convertFrequencyScale.d.ts +1 -0
  63. package/wrapper/convertFrequencyScale.js +28 -0
  64. package/wrapper/generateEmptyFft.d.ts +1 -0
  65. package/wrapper/generateEmptyFft.js +6 -0
@@ -32,7 +32,7 @@ types:
32
32
  inflections and tones to the text based on the user's expressions and
33
33
  the context of the conversation. The synthesized audio is streamed
34
34
  back to the user as an [Assistant
35
- Message](/reference/empathic-voice-interface-evi/chat/chat#receive.AssistantMessage.type).
35
+ Message](/reference/speech-to-speech-evi/chat#receive.AssistantMessage).
36
36
  source:
37
37
  openapi: evi-asyncapi.json
38
38
  AudioConfiguration:
@@ -165,7 +165,7 @@ types:
165
165
 
166
166
  Once this message is sent, EVI will not respond until a [Resume
167
167
  Assistant
168
- message](/reference/empathic-voice-interface-evi/chat/chat#send.ResumeAssistantMessage.type)
168
+ message](/reference/speech-to-speech-evi/chat#send.ResumeAssistantMessage)
169
169
  is sent. When paused, EVI won't respond, but transcriptions of your
170
170
  audio inputs will still be recorded.
171
171
  custom_session_id:
@@ -228,7 +228,7 @@ types:
228
228
 
229
229
 
230
230
  For more information, please refer to the [Session Settings
231
- guide](/docs/empathic-voice-interface-evi/configuration/session-settings).
231
+ guide](/docs/speech-to-speech-evi/configuration/session-settings).
232
232
  custom_session_id:
233
233
  type: optional<string>
234
234
  docs: >-
@@ -245,8 +245,8 @@ types:
245
245
  It is recommended to pass a `custom_session_id` if you are using a
246
246
  Custom Language Model. Please see our guide to [using a custom
247
247
  language
248
- model](/docs/empathic-voice-interface-evi/guides/custom-language-model)
249
- with EVI to learn more.
248
+ model](/docs/speech-to-speech-evi/guides/custom-language-model) with
249
+ EVI to learn more.
250
250
  system_prompt:
251
251
  type: optional<string>
252
252
  docs: >-
@@ -268,7 +268,7 @@ types:
268
268
 
269
269
 
270
270
  For help writing a system prompt, see our [Prompting
271
- Guide](/docs/empathic-voice-interface-evi/guides/prompting).
271
+ Guide](/docs/speech-to-speech-evi/guides/prompting).
272
272
  context:
273
273
  type: optional<Context>
274
274
  docs: >-
@@ -294,7 +294,7 @@ types:
294
294
  in PCM Linear 16 (16-bit, little-endian, signed PCM WAV data). For
295
295
  detailed instructions on how to configure session settings for PCM
296
296
  Linear 16 audio, please refer to the [Session Settings
297
- guide](/docs/empathic-voice-interface-evi/configuration/session-settings).
297
+ guide](/docs/speech-to-speech-evi/configuration/session-settings).
298
298
  language_model_api_key:
299
299
  type: optional<string>
300
300
  docs: >-
@@ -314,7 +314,7 @@ types:
314
314
  searching the web or calling external APIs. Built-in tools, like web
315
315
  search, are natively integrated, while user-defined tools are created
316
316
  and invoked by the user. To learn more, see our [Tool Use
317
- Guide](/docs/empathic-voice-interface-evi/features/tool-use).
317
+ Guide](/docs/speech-to-speech-evi/features/tool-use).
318
318
  builtin_tools:
319
319
  type: optional<list<BuiltinToolConfig>>
320
320
  docs: >-
@@ -325,7 +325,7 @@ types:
325
325
  searching the web or calling external APIs. Built-in tools, like web
326
326
  search, are natively integrated, while user-defined tools are created
327
327
  and invoked by the user. To learn more, see our [Tool Use
328
- Guide](/docs/empathic-voice-interface-evi/features/tool-use).
328
+ Guide](/docs/speech-to-speech-evi/features/tool-use).
329
329
 
330
330
 
331
331
  Currently, the only built-in tool Hume provides is **Web Search**.
@@ -350,7 +350,7 @@ types:
350
350
  Using this field, you can personalize responses based on
351
351
  session-specific details. For more guidance, see our [guide on using
352
352
  dynamic
353
- variables](/docs/empathic-voice-interface-evi/features/dynamic-variables).
353
+ variables](/docs/speech-to-speech-evi/features/dynamic-variables).
354
354
  source:
355
355
  openapi: evi-asyncapi.json
356
356
  Tool:
@@ -395,7 +395,7 @@ types:
395
395
 
396
396
 
397
397
  Upon receiving a [Tool Call
398
- message](/reference/empathic-voice-interface-evi/chat/chat#receive.ToolCallMessage.type)
398
+ message](/reference/speech-to-speech-evi/chat#receive.ToolCallMessage)
399
399
  and failing to invoke the function, this message is sent to notify EVI
400
400
  of the tool's failure.
401
401
  custom_session_id:
@@ -418,7 +418,7 @@ types:
418
418
  invocation, ensuring that the Tool Error message is linked to the
419
419
  appropriate tool call request. The specified `tool_call_id` must match
420
420
  the one received in the [Tool Call
421
- message](/reference/empathic-voice-interface-evi/chat/chat#receive.ToolCallMessage.type).
421
+ message](/reference/speech-to-speech-evi/chat#receive.ToolCallMessage).
422
422
  content:
423
423
  type: optional<string>
424
424
  docs: >-
@@ -449,7 +449,7 @@ types:
449
449
 
450
450
 
451
451
  Upon receiving a [Tool Call
452
- message](/reference/empathic-voice-interface-evi/chat/chat#receive.ToolCallMessage.type)
452
+ message](/reference/speech-to-speech-evi/chat#receive.ToolCallMessage)
453
453
  and successfully invoking the function, this message is sent to convey
454
454
  the result of the function call back to EVI.
455
455
  custom_session_id:
@@ -467,7 +467,7 @@ types:
467
467
  invocation, ensuring that the correct response is linked to the
468
468
  appropriate request. The specified `tool_call_id` must match the one
469
469
  received in the [Tool Call
470
- message](/reference/empathic-voice-interface-evi/chat/chat#receive.ToolCallMessage.tool_call_id).
470
+ message](/reference/speech-to-speech-evi/chat#receive.ToolCallMessage.tool_call_id).
471
471
  content:
472
472
  type: string
473
473
  docs: >-
@@ -482,7 +482,7 @@ types:
482
482
  Include this optional field to help the supplemental LLM identify
483
483
  which tool generated the response. The specified `tool_name` must
484
484
  match the one received in the [Tool Call
485
- message](/reference/empathic-voice-interface-evi/chat/chat#receive.ToolCallMessage.type).
485
+ message](/reference/speech-to-speech-evi/chat#receive.ToolCallMessage).
486
486
  tool_type:
487
487
  type: optional<ToolType>
488
488
  docs: >-
@@ -584,7 +584,7 @@ types:
584
584
  docs: >-
585
585
  Indicates if this message was inserted into the conversation as text
586
586
  from an [Assistant Input
587
- message](/reference/empathic-voice-interface-evi/chat/chat#send.AssistantInput.text).
587
+ message](/reference/speech-to-speech-evi/chat#send.AssistantInput.text).
588
588
  source:
589
589
  openapi: evi-asyncapi.json
590
590
  AssistantProsody:
@@ -693,14 +693,14 @@ types:
693
693
 
694
694
 
695
695
  Used to resume a Chat when passed in the
696
- [resumed_chat_group_id](/reference/empathic-voice-interface-evi/chat/chat#request.query.resumed_chat_group_id)
696
+ [resumed_chat_group_id](/reference/speech-to-speech-evi/chat#request.query.resumed_chat_group_id)
697
697
  query parameter of a subsequent connection request. This allows EVI to
698
698
  continue the conversation from where it left off within the Chat
699
699
  Group.
700
700
 
701
701
 
702
702
  Learn more about [supporting chat
703
- resumability](/docs/empathic-voice-interface-evi/faq#does-evi-support-chat-resumability)
703
+ resumability](/docs/speech-to-speech-evi/faq#does-evi-support-chat-resumability)
704
704
  from the EVI FAQ.
705
705
  chat_id:
706
706
  type: string
@@ -897,9 +897,9 @@ types:
897
897
  docs: >-
898
898
  Indicates whether a response to the tool call is required from the
899
899
  developer, either in the form of a [Tool Response
900
- message](/reference/empathic-voice-interface-evi/chat/chat#send.ToolResponseMessage.type)
900
+ message](/reference/speech-to-speech-evi/chat#send.ToolResponseMessage)
901
901
  or a [Tool Error
902
- message](/reference/empathic-voice-interface-evi/chat/chat#send.ToolErrorMessage.type).
902
+ message](/reference/speech-to-speech-evi/chat#send.ToolErrorMessage).
903
903
  source:
904
904
  openapi: evi-asyncapi.json
905
905
  UserInterruption:
@@ -939,12 +939,10 @@ types:
939
939
 
940
940
  This message contains both a transcript of the user's input and the
941
941
  expression measurement predictions if the input was sent as an [Audio
942
- Input
943
- message](/reference/empathic-voice-interface-evi/chat/chat#send.AudioInput.type).
942
+ Input message](/reference/speech-to-speech-evi/chat#send.AudioInput).
944
943
  Expression measurement predictions are not provided for a [User Input
945
- message](/reference/empathic-voice-interface-evi/chat/chat#send.UserInput.type),
946
- as the prosody model relies on audio input and cannot process text
947
- alone.
944
+ message](/reference/speech-to-speech-evi/chat#send.UserInput), as the
945
+ prosody model relies on audio input and cannot process text alone.
948
946
  custom_session_id:
949
947
  type: optional<string>
950
948
  docs: >-
@@ -964,7 +962,7 @@ types:
964
962
  docs: >-
965
963
  Indicates if this message was inserted into the conversation as text
966
964
  from a [User
967
- Input](/reference/empathic-voice-interface-evi/chat/chat#send.UserInput.text)
965
+ Input](/reference/speech-to-speech-evi/chat#send.UserInput.text)
968
966
  message.
969
967
  interim:
970
968
  type: boolean
@@ -981,7 +979,7 @@ types:
981
979
 
982
980
 
983
981
  Interim transcripts are only sent when the
984
- [`verbose_transcription`](/reference/empathic-voice-interface-evi/chat/chat#request.query.verbose_transcription)
982
+ [`verbose_transcription`](/reference/speech-to-speech-evi/chat#request.query.verbose_transcription)
985
983
  query parameter is set to `true` in the initial handshake.
986
984
  source:
987
985
  openapi: evi-asyncapi.json
@@ -13,11 +13,11 @@ channel:
13
13
  Include this ID in your connection request to equip EVI with the Prompt,
14
14
  Language Model, Voice, and Tools associated with the specified
15
15
  configuration. If omitted, EVI will apply [default configuration
16
- settings](/docs/empathic-voice-interface-evi/configuration/build-a-configuration#default-configuration).
16
+ settings](/docs/speech-to-speech-evi/configuration/build-a-configuration#default-configuration).
17
17
 
18
18
 
19
19
  For help obtaining this ID, see our [Configuration
20
- Guide](/docs/empathic-voice-interface-evi/configuration).
20
+ Guide](/docs/speech-to-speech-evi/configuration).
21
21
  config_version:
22
22
  type: optional<integer>
23
23
  docs: >-
@@ -57,7 +57,7 @@ channel:
57
57
 
58
58
 
59
59
  - [Chat
60
- Metadata](/reference/empathic-voice-interface-evi/chat/chat#receive.ChatMetadata.type):
60
+ Metadata](/reference/speech-to-speech-evi/chat#receive.ChatMetadata):
61
61
  Upon establishing a WebSocket connection with EVI, the user receives a
62
62
  Chat Metadata message. This message contains a `chat_group_id`, which
63
63
  can be used to resume conversations within this chat group in future
@@ -65,14 +65,14 @@ channel:
65
65
 
66
66
 
67
67
  - [List Chats
68
- endpoint](/reference/empathic-voice-interface-evi/chats/list-chats): Use
69
- the GET `/v0/evi/chats` endpoint to obtain the Chat Group ID of
70
- individual Chat sessions. This endpoint lists all available Chat
71
- sessions and their associated Chat Group ID.
68
+ endpoint](/reference/speech-to-speech-evi/chats/list-chats): Use the GET
69
+ `/v0/evi/chats` endpoint to obtain the Chat Group ID of individual Chat
70
+ sessions. This endpoint lists all available Chat sessions and their
71
+ associated Chat Group ID.
72
72
 
73
73
 
74
74
  - [List Chat Groups
75
- endpoint](/reference/empathic-voice-interface-evi/chat-groups/list-chat-groups):
75
+ endpoint](/reference/speech-to-speech-evi/chat-groups/list-chat-groups):
76
76
  Use the GET `/v0/evi/chat_groups` endpoint to obtain the Chat Group IDs
77
77
  of all Chat Groups associated with an API key. This endpoint returns a
78
78
  list of all available chat groups.
@@ -89,9 +89,9 @@ channel:
89
89
  A flag to enable verbose transcription. Set this query parameter to
90
90
  `true` to have unfinalized user transcripts be sent to the client as
91
91
  interim UserMessage messages. The
92
- [interim](/reference/empathic-voice-interface-evi/chat/chat#receive.UserMessage.interim)
92
+ [interim](/reference/speech-to-speech-evi/chat#receive.UserMessage.interim)
93
93
  field on a
94
- [UserMessage](/reference/empathic-voice-interface-evi/chat/chat#receive.UserMessage.type)
94
+ [UserMessage](/reference/speech-to-speech-evi/chat#receive.UserMessage)
95
95
  denotes whether the message is "interim" or "final."
96
96
  event_limit:
97
97
  type: optional<integer>
@@ -140,17 +140,7 @@ service:
140
140
  properties:
141
141
  evi_version:
142
142
  type: string
143
- docs: >-
144
- Specifies the EVI version to use. See our [EVI Version
145
- Guide](/docs/speech-to-speech-evi/configuration/evi-version) for
146
- differences between versions.
147
-
148
-
149
- **We're officially sunsetting EVI versions 1 and 2 on August 30,
150
- 2025**. To keep things running smoothly, be sure to [migrate to
151
- EVI
152
- 3](/docs/speech-to-speech-evi/configuration/evi-version#migrating-to-evi-3)
153
- before then.
143
+ docs: EVI version to use. Only version `3` is supported.
154
144
  name:
155
145
  type: string
156
146
  docs: Name applied to all versions of a particular Config.
@@ -189,6 +189,175 @@ service:
189
189
  source:
190
190
  openapi: tts-openapi.json
191
191
  types:
192
+ PublishTts:
193
+ docs: Input message type for the TTS stream.
194
+ properties:
195
+ text:
196
+ type: optional<string>
197
+ docs: The input text to be converted to speech output.
198
+ default: ''
199
+ validation:
200
+ maxLength: 5000
201
+ description:
202
+ type: optional<string>
203
+ docs: >-
204
+ Natural language instructions describing how the text should be spoken
205
+ by the model (e.g., `"a soft, gentle voice with a strong British
206
+ accent"`).
207
+ validation:
208
+ maxLength: 1000
209
+ voice:
210
+ type: optional<PostedUtteranceVoice>
211
+ docs: >-
212
+ The name or ID of the voice from the `Voice Library` to be used as the
213
+ speaker for this and all subsequent utterances, until the `"voice"`
214
+ field is updated again.
215
+ speed:
216
+ type: optional<double>
217
+ docs: A relative measure of how fast this utterance should be spoken.
218
+ default: 1
219
+ validation:
220
+ min: 0.25
221
+ max: 3
222
+ trailing_silence:
223
+ type: optional<double>
224
+ docs: Duration of trailing silence (in seconds) to add to this utterance
225
+ default: 0
226
+ validation:
227
+ min: 0
228
+ max: 5
229
+ flush:
230
+ type: optional<boolean>
231
+ docs: >-
232
+ Force the generation of audio regardless of how much text has been
233
+ supplied.
234
+ default: false
235
+ close:
236
+ type: optional<boolean>
237
+ docs: Force the generation of audio and close the stream.
238
+ default: false
239
+ source:
240
+ openapi: tts-asyncapi.json
241
+ PostedUtteranceVoiceWithId:
242
+ properties:
243
+ id:
244
+ type: string
245
+ docs: The unique ID associated with the **Voice**.
246
+ provider:
247
+ type: optional<VoiceProvider>
248
+ docs: >-
249
+ Specifies the source provider associated with the chosen voice.
250
+
251
+
252
+ - **`HUME_AI`**: Select voices from Hume's [Voice
253
+ Library](https://platform.hume.ai/tts/voice-library), containing a
254
+ variety of preset, shared voices.
255
+
256
+ - **`CUSTOM_VOICE`**: Select from voices you've personally generated
257
+ and saved in your account.
258
+
259
+
260
+ If no provider is explicitly set, the default provider is
261
+ `CUSTOM_VOICE`. When using voices from Hume's **Voice Library**, you
262
+ must explicitly set the provider to `HUME_AI`.
263
+
264
+
265
+ Preset voices from Hume's **Voice Library** are accessible by all
266
+ users. In contrast, your custom voices are private and accessible only
267
+ via requests authenticated with your API key.
268
+ source:
269
+ openapi: tts-openapi.json
270
+ PostedUtteranceVoiceWithName:
271
+ properties:
272
+ name:
273
+ type: string
274
+ docs: The name of a **Voice**.
275
+ provider:
276
+ type: optional<VoiceProvider>
277
+ docs: >-
278
+ Specifies the source provider associated with the chosen voice.
279
+
280
+
281
+ - **`HUME_AI`**: Select voices from Hume's [Voice
282
+ Library](https://platform.hume.ai/tts/voice-library), containing a
283
+ variety of preset, shared voices.
284
+
285
+ - **`CUSTOM_VOICE`**: Select from voices you've personally generated
286
+ and saved in your account.
287
+
288
+
289
+ If no provider is explicitly set, the default provider is
290
+ `CUSTOM_VOICE`. When using voices from Hume's **Voice Library**, you
291
+ must explicitly set the provider to `HUME_AI`.
292
+
293
+
294
+ Preset voices from Hume's **Voice Library** are accessible by all
295
+ users. In contrast, your custom voices are private and accessible only
296
+ via requests authenticated with your API key.
297
+ source:
298
+ openapi: tts-openapi.json
299
+ VoiceProvider:
300
+ enum:
301
+ - HUME_AI
302
+ - CUSTOM_VOICE
303
+ source:
304
+ openapi: tts-openapi.json
305
+ PostedUtteranceVoice:
306
+ discriminated: false
307
+ union:
308
+ - type: PostedUtteranceVoiceWithId
309
+ - type: PostedUtteranceVoiceWithName
310
+ source:
311
+ openapi: tts-openapi.json
312
+ AudioFormatType:
313
+ enum:
314
+ - mp3
315
+ - pcm
316
+ - wav
317
+ source:
318
+ openapi: tts-openapi.json
319
+ SnippetAudioChunk:
320
+ properties:
321
+ generation_id:
322
+ type: string
323
+ docs: >-
324
+ The generation ID of the parent snippet that this chunk corresponds
325
+ to.
326
+ snippet_id:
327
+ type: string
328
+ docs: The ID of the parent snippet that this chunk corresponds to.
329
+ text:
330
+ type: string
331
+ docs: The text of the parent snippet that this chunk corresponds to.
332
+ transcribed_text:
333
+ type: optional<string>
334
+ docs: >-
335
+ The transcribed text of the generated audio of the parent snippet that
336
+ this chunk corresponds to. It is only present if `instant_mode` is set
337
+ to `false`.
338
+ chunk_index:
339
+ type: integer
340
+ docs: The index of the audio chunk in the snippet.
341
+ audio:
342
+ type: string
343
+ docs: The generated audio output chunk in the requested format.
344
+ audio_format:
345
+ type: AudioFormatType
346
+ docs: The generated audio output format.
347
+ is_last_chunk:
348
+ type: boolean
349
+ docs: >-
350
+ Whether or not this is the last chunk streamed back from the decoder
351
+ for one input snippet.
352
+ utterance_index:
353
+ type: optional<integer>
354
+ docs: >-
355
+ The index of the utterance in the request that the parent snippet of
356
+ this chunk corresponds to.
357
+ snippet:
358
+ type: optional<Snippet>
359
+ source:
360
+ openapi: tts-openapi.json
192
361
  PostedContextWithGenerationId:
193
362
  properties:
194
363
  generation_id:
@@ -220,13 +389,6 @@ types:
220
389
  is `48000 Hz`.
221
390
  source:
222
391
  openapi: tts-openapi.json
223
- AudioFormatType:
224
- enum:
225
- - mp3
226
- - pcm
227
- - wav
228
- source:
229
- openapi: tts-openapi.json
230
392
  ReturnGeneration:
231
393
  properties:
232
394
  generation_id:
@@ -435,48 +597,6 @@ types:
435
597
  base64 string.
436
598
  source:
437
599
  openapi: tts-openapi.json
438
- SnippetAudioChunk:
439
- properties:
440
- generation_id:
441
- type: string
442
- docs: >-
443
- The generation ID of the parent snippet that this chunk corresponds
444
- to.
445
- snippet_id:
446
- type: string
447
- docs: The ID of the parent snippet that this chunk corresponds to.
448
- text:
449
- type: string
450
- docs: The text of the parent snippet that this chunk corresponds to.
451
- transcribed_text:
452
- type: optional<string>
453
- docs: >-
454
- The transcribed text of the generated audio of the parent snippet that
455
- this chunk corresponds to. It is only present if `instant_mode` is set
456
- to `false`.
457
- chunk_index:
458
- type: integer
459
- docs: The index of the audio chunk in the snippet.
460
- audio:
461
- type: string
462
- docs: The generated audio output chunk in the requested format.
463
- audio_format:
464
- type: AudioFormatType
465
- docs: The generated audio output format.
466
- is_last_chunk:
467
- type: boolean
468
- docs: >-
469
- Whether or not this is the last chunk streamed back from the decoder
470
- for one input snippet.
471
- utterance_index:
472
- type: optional<integer>
473
- docs: >-
474
- The index of the utterance in the request that the parent snippet of
475
- this chunk corresponds to.
476
- snippet:
477
- type: Snippet
478
- source:
479
- openapi: tts-openapi.json
480
600
  PostedUtterance:
481
601
  properties:
482
602
  text:
@@ -547,77 +667,6 @@ types:
547
667
  type: string
548
668
  source:
549
669
  openapi: tts-openapi.json
550
- PostedUtteranceVoiceWithId:
551
- properties:
552
- id:
553
- type: string
554
- docs: The unique ID associated with the **Voice**.
555
- provider:
556
- type: optional<VoiceProvider>
557
- docs: >-
558
- Specifies the source provider associated with the chosen voice.
559
-
560
-
561
- - **`HUME_AI`**: Select voices from Hume's [Voice
562
- Library](https://platform.hume.ai/tts/voice-library), containing a
563
- variety of preset, shared voices.
564
-
565
- - **`CUSTOM_VOICE`**: Select from voices you've personally generated
566
- and saved in your account.
567
-
568
-
569
- If no provider is explicitly set, the default provider is
570
- `CUSTOM_VOICE`. When using voices from Hume's **Voice Library**, you
571
- must explicitly set the provider to `HUME_AI`.
572
-
573
-
574
- Preset voices from Hume's **Voice Library** are accessible by all
575
- users. In contrast, your custom voices are private and accessible only
576
- via requests authenticated with your API key.
577
- source:
578
- openapi: tts-openapi.json
579
- PostedUtteranceVoiceWithName:
580
- properties:
581
- name:
582
- type: string
583
- docs: The name of a **Voice**.
584
- provider:
585
- type: optional<VoiceProvider>
586
- docs: >-
587
- Specifies the source provider associated with the chosen voice.
588
-
589
-
590
- - **`HUME_AI`**: Select voices from Hume's [Voice
591
- Library](https://platform.hume.ai/tts/voice-library), containing a
592
- variety of preset, shared voices.
593
-
594
- - **`CUSTOM_VOICE`**: Select from voices you've personally generated
595
- and saved in your account.
596
-
597
-
598
- If no provider is explicitly set, the default provider is
599
- `CUSTOM_VOICE`. When using voices from Hume's **Voice Library**, you
600
- must explicitly set the provider to `HUME_AI`.
601
-
602
-
603
- Preset voices from Hume's **Voice Library** are accessible by all
604
- users. In contrast, your custom voices are private and accessible only
605
- via requests authenticated with your API key.
606
- source:
607
- openapi: tts-openapi.json
608
- VoiceProvider:
609
- enum:
610
- - HUME_AI
611
- - CUSTOM_VOICE
612
- source:
613
- openapi: tts-openapi.json
614
- PostedUtteranceVoice:
615
- discriminated: false
616
- union:
617
- - type: PostedUtteranceVoiceWithId
618
- - type: PostedUtteranceVoiceWithName
619
- source:
620
- openapi: tts-openapi.json
621
670
  FormatWav:
622
671
  properties: {}
623
672
  source:
@@ -0,0 +1,56 @@
1
+ imports:
2
+ root: __package__.yml
3
+ channel:
4
+ path: /stream/input
5
+ url: prod
6
+ auth: false
7
+ docs: Generate emotionally expressive speech.
8
+ query-parameters:
9
+ context_generation_id:
10
+ type: optional<string>
11
+ docs: >-
12
+ The ID of a prior TTS generation to use as context for generating
13
+ consistent speech style and prosody across multiple requests. Including
14
+ context may increase audio generation times.
15
+ format_type: root.AudioFormatType
16
+ strip_headers:
17
+ type: optional<boolean>
18
+ default: false
19
+ docs: >-
20
+ If enabled, the audio for all the chunks of a generation, once
21
+ concatenated together, will constitute a single audio file. Otherwise,
22
+ if disabled, each chunk's audio will be its own audio file, each with
23
+ its own headers (if applicable).
24
+ instant_mode:
25
+ type: optional<boolean>
26
+ default: true
27
+ docs: >-
28
+ Accelerates processing to reduce streaming latency.Incurs approximately
29
+ 10% additional cost while preserving full voice quality.
30
+ no_binary:
31
+ type: optional<boolean>
32
+ default: false
33
+ docs: If enabled, no binary websocket messages will be sent to the client.
34
+ messages:
35
+ publish:
36
+ origin: client
37
+ body:
38
+ type: root.PublishTts
39
+ subscribe:
40
+ origin: server
41
+ body:
42
+ type: root.SnippetAudioChunk
43
+ examples:
44
+ - messages:
45
+ - type: publish
46
+ body: {}
47
+ - type: subscribe
48
+ body:
49
+ request_id: request_id
50
+ generation_id: generation_id
51
+ snippet_id: snippet_id
52
+ text: text
53
+ chunk_index: 1
54
+ audio: audio
55
+ audio_format: mp3
56
+ is_last_chunk: true
@@ -1,4 +1,4 @@
1
1
  {
2
2
  "organization" : "hume",
3
- "version" : "0.65.42"
3
+ "version" : "0.66.15"
4
4
  }
@@ -37,11 +37,7 @@ import * as Hume from "../../../../../../index";
37
37
  * }
38
38
  */
39
39
  export interface PostedConfig {
40
- /**
41
- * Specifies the EVI version to use. See our [EVI Version Guide](/docs/speech-to-speech-evi/configuration/evi-version) for differences between versions.
42
- *
43
- * **We're officially sunsetting EVI versions 1 and 2 on August 30, 2025**. To keep things running smoothly, be sure to [migrate to EVI 3](/docs/speech-to-speech-evi/configuration/evi-version#migrating-to-evi-3) before then.
44
- */
40
+ /** EVI version to use. Only version `3` is supported. */
45
41
  eviVersion: string;
46
42
  /** Name applied to all versions of a particular Config. */
47
43
  name: string;
@@ -12,7 +12,7 @@ export interface AssistantInput {
12
12
  /**
13
13
  * Assistant text to synthesize into spoken audio and insert into the conversation.
14
14
  *
15
- * EVI uses this text to generate spoken audio using our proprietary expressive text-to-speech model. Our model adds appropriate emotional inflections and tones to the text based on the user's expressions and the context of the conversation. The synthesized audio is streamed back to the user as an [Assistant Message](/reference/empathic-voice-interface-evi/chat/chat#receive.AssistantMessage.type).
15
+ * EVI uses this text to generate spoken audio using our proprietary expressive text-to-speech model. Our model adds appropriate emotional inflections and tones to the text based on the user's expressions and the context of the conversation. The synthesized audio is streamed back to the user as an [Assistant Message](/reference/speech-to-speech-evi/chat#receive.AssistantMessage).
16
16
  */
17
17
  text: string;
18
18
  }