hume 0.13.3 → 0.13.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/api/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +1 -5
  2. package/api/resources/empathicVoice/types/AssistantInput.d.ts +1 -1
  3. package/api/resources/empathicVoice/types/AssistantMessage.d.ts +1 -1
  4. package/api/resources/empathicVoice/types/ChatMetadata.d.ts +2 -2
  5. package/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +1 -1
  6. package/api/resources/empathicVoice/types/SessionSettings.d.ts +9 -7
  7. package/api/resources/empathicVoice/types/ToolCallMessage.d.ts +1 -1
  8. package/api/resources/empathicVoice/types/ToolErrorMessage.d.ts +2 -2
  9. package/api/resources/empathicVoice/types/ToolResponseMessage.d.ts +3 -3
  10. package/api/resources/empathicVoice/types/UserMessage.d.ts +3 -3
  11. package/api/resources/tts/types/PublishTts.d.ts +23 -0
  12. package/api/resources/tts/types/PublishTts.js +5 -0
  13. package/api/resources/tts/types/SnippetAudioChunk.d.ts +6 -1
  14. package/api/resources/tts/types/index.d.ts +7 -6
  15. package/api/resources/tts/types/index.js +7 -6
  16. package/dist/api/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +1 -5
  17. package/dist/api/resources/empathicVoice/types/AssistantInput.d.ts +1 -1
  18. package/dist/api/resources/empathicVoice/types/AssistantMessage.d.ts +1 -1
  19. package/dist/api/resources/empathicVoice/types/ChatMetadata.d.ts +2 -2
  20. package/dist/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +1 -1
  21. package/dist/api/resources/empathicVoice/types/SessionSettings.d.ts +9 -7
  22. package/dist/api/resources/empathicVoice/types/ToolCallMessage.d.ts +1 -1
  23. package/dist/api/resources/empathicVoice/types/ToolErrorMessage.d.ts +2 -2
  24. package/dist/api/resources/empathicVoice/types/ToolResponseMessage.d.ts +3 -3
  25. package/dist/api/resources/empathicVoice/types/UserMessage.d.ts +3 -3
  26. package/dist/api/resources/tts/types/PublishTts.d.ts +23 -0
  27. package/dist/api/resources/tts/types/PublishTts.js +5 -0
  28. package/dist/api/resources/tts/types/SnippetAudioChunk.d.ts +6 -1
  29. package/dist/api/resources/tts/types/index.d.ts +7 -6
  30. package/dist/api/resources/tts/types/index.js +7 -6
  31. package/dist/serialization/resources/empathicVoice/types/SessionSettings.d.ts +1 -0
  32. package/dist/serialization/resources/empathicVoice/types/SessionSettings.js +1 -0
  33. package/dist/serialization/resources/tts/types/PublishTts.d.ts +19 -0
  34. package/dist/serialization/resources/tts/types/PublishTts.js +50 -0
  35. package/dist/serialization/resources/tts/types/SnippetAudioChunk.d.ts +2 -1
  36. package/dist/serialization/resources/tts/types/SnippetAudioChunk.js +2 -1
  37. package/dist/serialization/resources/tts/types/index.d.ts +7 -6
  38. package/dist/serialization/resources/tts/types/index.js +7 -6
  39. package/dist/version.d.ts +1 -1
  40. package/dist/version.js +1 -1
  41. package/dist/wrapper/EVIWebAudioPlayer.d.ts +6 -7
  42. package/dist/wrapper/EVIWebAudioPlayer.js +237 -73
  43. package/dist/wrapper/SilenceFiller.d.ts +85 -0
  44. package/dist/wrapper/SilenceFiller.js +203 -0
  45. package/dist/wrapper/collate.d.ts +36 -0
  46. package/dist/wrapper/collate.js +126 -0
  47. package/dist/wrapper/convertFrequencyScale.d.ts +1 -0
  48. package/dist/wrapper/convertFrequencyScale.js +28 -0
  49. package/dist/wrapper/generateEmptyFft.d.ts +1 -0
  50. package/dist/wrapper/generateEmptyFft.js +6 -0
  51. package/dist/wrapper/index.d.ts +2 -0
  52. package/dist/wrapper/index.js +5 -1
  53. package/package.json +2 -1
  54. package/serialization/resources/empathicVoice/types/SessionSettings.d.ts +1 -0
  55. package/serialization/resources/empathicVoice/types/SessionSettings.js +1 -0
  56. package/serialization/resources/tts/types/PublishTts.d.ts +19 -0
  57. package/serialization/resources/tts/types/PublishTts.js +50 -0
  58. package/serialization/resources/tts/types/SnippetAudioChunk.d.ts +2 -1
  59. package/serialization/resources/tts/types/SnippetAudioChunk.js +2 -1
  60. package/serialization/resources/tts/types/index.d.ts +7 -6
  61. package/serialization/resources/tts/types/index.js +7 -6
  62. package/version.d.ts +1 -1
  63. package/version.js +1 -1
  64. package/wrapper/EVIWebAudioPlayer.d.ts +6 -7
  65. package/wrapper/EVIWebAudioPlayer.js +237 -73
  66. package/wrapper/SilenceFiller.d.ts +85 -0
  67. package/wrapper/SilenceFiller.js +203 -0
  68. package/wrapper/collate.d.ts +36 -0
  69. package/wrapper/collate.js +126 -0
  70. package/wrapper/convertFrequencyScale.d.ts +1 -0
  71. package/wrapper/convertFrequencyScale.js +28 -0
  72. package/wrapper/generateEmptyFft.d.ts +1 -0
  73. package/wrapper/generateEmptyFft.js +6 -0
  74. package/wrapper/index.d.ts +2 -0
  75. package/wrapper/index.js +5 -1
  76. package/.mock/definition/api.yml +0 -12
  77. package/.mock/definition/empathic-voice/__package__.yml +0 -2973
  78. package/.mock/definition/empathic-voice/chat.yml +0 -175
  79. package/.mock/definition/empathic-voice/chatGroups.yml +0 -627
  80. package/.mock/definition/empathic-voice/chatWebhooks.yml +0 -30
  81. package/.mock/definition/empathic-voice/chats.yml +0 -506
  82. package/.mock/definition/empathic-voice/configs.yml +0 -852
  83. package/.mock/definition/empathic-voice/prompts.yml +0 -558
  84. package/.mock/definition/empathic-voice/tools.yml +0 -626
  85. package/.mock/definition/expression-measurement/__package__.yml +0 -1
  86. package/.mock/definition/expression-measurement/batch/__package__.yml +0 -1803
  87. package/.mock/definition/expression-measurement/stream/__package__.yml +0 -113
  88. package/.mock/definition/expression-measurement/stream/stream.yml +0 -438
  89. package/.mock/definition/tts/__package__.yml +0 -660
  90. package/.mock/definition/tts/voices.yml +0 -143
  91. package/.mock/fern.config.json +0 -4
@@ -1,660 +0,0 @@
1
- errors:
2
- UnprocessableEntityError:
3
- status-code: 422
4
- type: HTTPValidationError
5
- docs: Validation Error
6
- examples:
7
- - value: {}
8
- BadRequestError:
9
- status-code: 400
10
- type: ErrorResponse
11
- docs: Bad Request
12
- examples:
13
- - value: {}
14
- service:
15
- auth: false
16
- base-path: ''
17
- endpoints:
18
- synthesize-json:
19
- path: /v0/tts
20
- method: POST
21
- auth: true
22
- docs: >-
23
- Synthesizes one or more input texts into speech using the specified
24
- voice. If no voice is provided, a novel voice will be generated
25
- dynamically. Optionally, additional context can be included to influence
26
- the speech's style and prosody.
27
-
28
-
29
- The response includes the base64-encoded audio and metadata in JSON
30
- format.
31
- source:
32
- openapi: tts-openapi.json
33
- display-name: Text-to-speech (Json)
34
- request:
35
- body:
36
- type: PostedTts
37
- content-type: application/json
38
- response:
39
- docs: Successful Response
40
- type: ReturnTts
41
- status-code: 200
42
- errors:
43
- - UnprocessableEntityError
44
- examples:
45
- - request:
46
- context:
47
- utterances:
48
- - text: How can people see beauty so differently?
49
- description: >-
50
- A curious student with a clear and respectful tone, seeking
51
- clarification on Hume's ideas with a straightforward
52
- question.
53
- format:
54
- type: mp3
55
- num_generations: 1
56
- utterances:
57
- - text: >-
58
- Beauty is no quality in things themselves: It exists merely in
59
- the mind which contemplates them.
60
- description: >-
61
- Middle-aged masculine voice with a clear, rhythmic Scots lilt,
62
- rounded vowels, and a warm, steady tone with an articulate,
63
- academic quality.
64
- response:
65
- body:
66
- generations:
67
- - audio: //PExAA0DDYRvkpNfhv3JI5JZ...etc.
68
- duration: 7.44225
69
- encoding:
70
- format: mp3
71
- sample_rate: 48000
72
- file_size: 120192
73
- generation_id: 795c949a-1510-4a80-9646-7d0863b023ab
74
- snippets:
75
- - - audio: //PExAA0DDYRvkpNfhv3JI5JZ...etc.
76
- generation_id: 795c949a-1510-4a80-9646-7d0863b023ab
77
- id: 37b1b1b1-1b1b-1b1b-1b1b-1b1b1b1b1b1b
78
- text: >-
79
- Beauty is no quality in things themselves: It exists
80
- merely in the mind which contemplates them.
81
- utterance_index: 0
82
- request_id: 66e01f90-4501-4aa0-bbaf-74f45dc15aa725906
83
- synthesize-file:
84
- path: /v0/tts/file
85
- method: POST
86
- auth: true
87
- docs: >-
88
- Synthesizes one or more input texts into speech using the specified
89
- voice. If no voice is provided, a novel voice will be generated
90
- dynamically. Optionally, additional context can be included to influence
91
- the speech's style and prosody.
92
-
93
-
94
- The response contains the generated audio file in the requested format.
95
- source:
96
- openapi: tts-openapi.json
97
- display-name: Text-to-speech (File)
98
- request:
99
- body:
100
- type: PostedTts
101
- content-type: application/json
102
- response:
103
- docs: OK
104
- type: file
105
- status-code: 200
106
- errors:
107
- - UnprocessableEntityError
108
- examples:
109
- - request:
110
- context:
111
- generation_id: 09ad914d-8e7f-40f8-a279-e34f07f7dab2
112
- format:
113
- type: mp3
114
- num_generations: 1
115
- utterances:
116
- - text: >-
117
- Beauty is no quality in things themselves: It exists merely in
118
- the mind which contemplates them.
119
- description: >-
120
- Middle-aged masculine voice with a clear, rhythmic Scots lilt,
121
- rounded vowels, and a warm, steady tone with an articulate,
122
- academic quality.
123
- synthesize-json-streaming:
124
- path: /v0/tts/stream/json
125
- method: POST
126
- auth: true
127
- docs: >-
128
- Streams synthesized speech using the specified voice. If no voice is
129
- provided, a novel voice will be generated dynamically. Optionally,
130
- additional context can be included to influence the speech's style and
131
- prosody.
132
-
133
-
134
- The response is a stream of JSON objects including audio encoded in
135
- base64.
136
- source:
137
- openapi: tts-openapi.json
138
- display-name: Text-to-speech (Streamed JSON)
139
- request:
140
- body:
141
- type: PostedTts
142
- content-type: application/json
143
- response-stream:
144
- docs: Successful Response
145
- type: SnippetAudioChunk
146
- format: json
147
- errors:
148
- - UnprocessableEntityError
149
- examples:
150
- - request:
151
- utterances:
152
- - text: >-
153
- Beauty is no quality in things themselves: It exists merely in
154
- the mind which contemplates them.
155
- voice:
156
- name: Male English Actor
157
- provider: HUME_AI
158
- synthesize-file-streaming:
159
- path: /v0/tts/stream/file
160
- method: POST
161
- auth: true
162
- docs: >-
163
- Streams synthesized speech using the specified voice. If no voice is
164
- provided, a novel voice will be generated dynamically. Optionally,
165
- additional context can be included to influence the speech's style and
166
- prosody.
167
- source:
168
- openapi: tts-openapi.json
169
- display-name: Text-to-speech (Streamed File)
170
- request:
171
- body:
172
- type: PostedTts
173
- content-type: application/json
174
- response:
175
- docs: OK
176
- type: file
177
- status-code: 200
178
- errors:
179
- - UnprocessableEntityError
180
- examples:
181
- - request:
182
- utterances:
183
- - text: >-
184
- Beauty is no quality in things themselves: It exists merely in
185
- the mind which contemplates them.
186
- voice:
187
- name: Male English Actor
188
- provider: HUME_AI
189
- source:
190
- openapi: tts-openapi.json
191
- types:
192
- PostedContextWithGenerationId:
193
- properties:
194
- generation_id:
195
- type: string
196
- docs: >-
197
- The ID of a prior TTS generation to use as context for generating
198
- consistent speech style and prosody across multiple requests.
199
- Including context may increase audio generation times.
200
- source:
201
- openapi: tts-openapi.json
202
- PostedContextWithUtterances:
203
- properties:
204
- utterances:
205
- type: list<PostedUtterance>
206
- source:
207
- openapi: tts-openapi.json
208
- AudioEncoding:
209
- docs: >-
210
- Encoding information about the generated audio, including the `format` and
211
- `sample_rate`.
212
- properties:
213
- format:
214
- type: AudioFormatType
215
- docs: Format for the output audio.
216
- sample_rate:
217
- type: integer
218
- docs: >-
219
- The sample rate (`Hz`) of the generated audio. The default sample rate
220
- is `48000 Hz`.
221
- source:
222
- openapi: tts-openapi.json
223
- AudioFormatType:
224
- enum:
225
- - mp3
226
- - pcm
227
- - wav
228
- source:
229
- openapi: tts-openapi.json
230
- ReturnGeneration:
231
- properties:
232
- generation_id:
233
- type: string
234
- docs: >-
235
- A unique ID associated with this TTS generation that can be used as
236
- context for generating consistent speech style and prosody across
237
- multiple requests.
238
- duration:
239
- type: double
240
- docs: Duration of the generated audio in seconds.
241
- file_size:
242
- type: integer
243
- docs: Size of the generated audio in bytes.
244
- encoding:
245
- type: AudioEncoding
246
- audio:
247
- type: string
248
- docs: >-
249
- The generated audio output in the requested format, encoded as a
250
- base64 string.
251
- snippets:
252
- docs: >-
253
- A list of snippet groups where each group corresponds to an utterance
254
- in the request. Each group contains segmented snippets that represent
255
- the original utterance divided into more natural-sounding units
256
- optimized for speech delivery.
257
- type: list<list<Snippet>>
258
- source:
259
- openapi: tts-openapi.json
260
- HTTPValidationError:
261
- properties:
262
- detail:
263
- type: optional<list<ValidationError>>
264
- source:
265
- openapi: tts-openapi.json
266
- FormatMp3:
267
- properties: {}
268
- source:
269
- openapi: tts-openapi.json
270
- PostedContext:
271
- discriminated: false
272
- docs: >-
273
- Utterances to use as context for generating consistent speech style and
274
- prosody across multiple requests. These will not be converted to speech
275
- output.
276
- union:
277
- - type: PostedContextWithGenerationId
278
- - type: PostedContextWithUtterances
279
- source:
280
- openapi: tts-openapi.json
281
- inline: true
282
- Format:
283
- discriminant: type
284
- base-properties: {}
285
- docs: Specifies the output audio file format.
286
- union:
287
- mp3:
288
- type: FormatMp3
289
- pcm:
290
- type: FormatPcm
291
- wav:
292
- type: FormatWav
293
- source:
294
- openapi: tts-openapi.json
295
- PostedTts:
296
- properties:
297
- context:
298
- type: optional<PostedContext>
299
- docs: >-
300
- Utterances to use as context for generating consistent speech style
301
- and prosody across multiple requests. These will not be converted to
302
- speech output.
303
- utterances:
304
- docs: >-
305
- A list of **Utterances** to be converted to speech output.
306
-
307
-
308
- An **Utterance** is a unit of input for
309
- [Octave](/docs/text-to-speech-tts/overview), and includes input
310
- `text`, an optional `description` to serve as the prompt for how the
311
- speech should be delivered, an optional `voice` specification, and
312
- additional controls to guide delivery for `speed` and
313
- `trailing_silence`.
314
- type: list<PostedUtterance>
315
- num_generations:
316
- type: optional<integer>
317
- docs: Number of generations of the audio to produce.
318
- default: 1
319
- validation:
320
- min: 1
321
- max: 5
322
- format:
323
- type: optional<Format>
324
- docs: Specifies the output audio file format.
325
- split_utterances:
326
- type: optional<boolean>
327
- docs: >-
328
- Controls how audio output is segmented in the response.
329
-
330
-
331
- - When **enabled** (`true`), input utterances are automatically split
332
- into natural-sounding speech segments.
333
-
334
-
335
- - When **disabled** (`false`), the response maintains a strict
336
- one-to-one mapping between input utterances and output snippets.
337
-
338
-
339
- This setting affects how the `snippets` array is structured in the
340
- response, which may be important for applications that need to track
341
- the relationship between input text and generated audio segments. When
342
- setting to `false`, avoid including utterances with long `text`, as
343
- this can result in distorted output.
344
- default: true
345
- strip_headers:
346
- type: optional<boolean>
347
- docs: >-
348
- If enabled, the audio for all the chunks of a generation, once
349
- concatenated together, will constitute a single audio file. Otherwise,
350
- if disabled, each chunk's audio will be its own audio file, each with
351
- its own headers (if applicable).
352
- default: false
353
- instant_mode:
354
- type: optional<boolean>
355
- docs: >-
356
- Enables ultra-low latency streaming, significantly reducing the time
357
- until the first audio chunk is received. Recommended for real-time
358
- applications requiring immediate audio playback. For further details,
359
- see our documentation on [instant
360
- mode](/docs/text-to-speech-tts/overview#ultra-low-latency-streaming-instant-mode).
361
-
362
- - A
363
- [voice](/reference/text-to-speech-tts/synthesize-json-streaming#request.body.utterances.voice)
364
- must be specified when instant mode is enabled. Dynamic voice
365
- generation is not supported with this mode.
366
-
367
- - Instant mode is only supported for streaming endpoints (e.g.,
368
- [/v0/tts/stream/json](/reference/text-to-speech-tts/synthesize-json-streaming),
369
- [/v0/tts/stream/file](/reference/text-to-speech-tts/synthesize-file-streaming)).
370
-
371
- - Ensure only a single generation is requested
372
- ([num_generations](/reference/text-to-speech-tts/synthesize-json-streaming#request.body.num_generations)
373
- must be `1` or omitted).
374
- default: true
375
- source:
376
- openapi: tts-openapi.json
377
- ReturnTts:
378
- properties:
379
- request_id:
380
- type: optional<string>
381
- docs: >-
382
- A unique ID associated with this request for tracking and
383
- troubleshooting. Use this ID when contacting [support](/support) for
384
- troubleshooting assistance.
385
- generations:
386
- type: list<ReturnGeneration>
387
- source:
388
- openapi: tts-openapi.json
389
- ReturnVoice:
390
- docs: An Octave voice available for text-to-speech
391
- properties:
392
- id:
393
- type: optional<string>
394
- docs: ID of the voice in the `Voice Library`.
395
- name:
396
- type: optional<string>
397
- docs: Name of the voice in the `Voice Library`.
398
- provider:
399
- type: optional<VoiceProvider>
400
- docs: >-
401
- The provider associated with the created voice.
402
-
403
-
404
- Voices created through this endpoint will always have the provider set
405
- to `CUSTOM_VOICE`, indicating a custom voice stored in your account.
406
- source:
407
- openapi: tts-openapi.json
408
- FormatPcm:
409
- properties: {}
410
- source:
411
- openapi: tts-openapi.json
412
- Snippet:
413
- properties:
414
- id:
415
- type: string
416
- docs: A unique ID associated with this **Snippet**.
417
- text:
418
- type: string
419
- docs: The text for this **Snippet**.
420
- generation_id:
421
- type: string
422
- docs: The generation ID this snippet corresponds to.
423
- utterance_index:
424
- type: optional<integer>
425
- docs: The index of the utterance in the request this snippet corresponds to.
426
- transcribed_text:
427
- type: optional<string>
428
- docs: >-
429
- The transcribed text of the generated audio. It is only present if
430
- `instant_mode` is set to `false`.
431
- audio:
432
- type: string
433
- docs: >-
434
- The segmented audio output in the requested format, encoded as a
435
- base64 string.
436
- source:
437
- openapi: tts-openapi.json
438
- SnippetAudioChunk:
439
- properties:
440
- generation_id:
441
- type: string
442
- docs: >-
443
- The generation ID of the parent snippet that this chunk corresponds
444
- to.
445
- snippet_id:
446
- type: string
447
- docs: The ID of the parent snippet that this chunk corresponds to.
448
- text:
449
- type: string
450
- docs: The text of the parent snippet that this chunk corresponds to.
451
- transcribed_text:
452
- type: optional<string>
453
- docs: >-
454
- The transcribed text of the generated audio of the parent snippet that
455
- this chunk corresponds to. It is only present if `instant_mode` is set
456
- to `false`.
457
- chunk_index:
458
- type: integer
459
- docs: The index of the audio chunk in the snippet.
460
- audio:
461
- type: string
462
- docs: The generated audio output chunk in the requested format.
463
- audio_format:
464
- type: AudioFormatType
465
- docs: The generated audio output format.
466
- is_last_chunk:
467
- type: boolean
468
- docs: >-
469
- Whether or not this is the last chunk streamed back from the decoder
470
- for one input snippet.
471
- utterance_index:
472
- type: optional<integer>
473
- docs: >-
474
- The index of the utterance in the request that the parent snippet of
475
- this chunk corresponds to.
476
- snippet:
477
- type: Snippet
478
- source:
479
- openapi: tts-openapi.json
480
- PostedUtterance:
481
- properties:
482
- text:
483
- type: string
484
- docs: The input text to be synthesized into speech.
485
- validation:
486
- maxLength: 5000
487
- description:
488
- type: optional<string>
489
- docs: >-
490
- Natural language instructions describing how the synthesized speech
491
- should sound, including but not limited to tone, intonation, pacing,
492
- and accent.
493
-
494
-
495
- **This field behaves differently depending on whether a voice is
496
- specified**:
497
-
498
- - **Voice specified**: the description will serve as acting directions
499
- for delivery. Keep directions concise—100 characters or fewer—for best
500
- results. See our guide on [acting
501
- instructions](/docs/text-to-speech-tts/acting-instructions).
502
-
503
- - **Voice not specified**: the description will serve as a voice
504
- prompt for generating a voice. See our [prompting
505
- guide](/docs/text-to-speech-tts/prompting) for design tips.
506
- validation:
507
- maxLength: 1000
508
- voice:
509
- type: optional<PostedUtteranceVoice>
510
- docs: >-
511
- The `name` or `id` associated with a **Voice** from the **Voice
512
- Library** to be used as the speaker for this and all subsequent
513
- `utterances`, until the `voice` field is updated again.
514
-
515
- See our [voices guide](/docs/text-to-speech-tts/voices) for more details on generating and specifying **Voices**.
516
- speed:
517
- type: optional<double>
518
- docs: >-
519
- Speed multiplier for the synthesized speech. Extreme values below 0.75
520
- and above 1.5 may sometimes cause instability to the generated output.
521
- default: 1
522
- validation:
523
- min: 0.5
524
- max: 2
525
- trailing_silence:
526
- type: optional<double>
527
- docs: Duration of trailing silence (in seconds) to add to this utterance
528
- default: 0
529
- validation:
530
- min: 0
531
- max: 5
532
- source:
533
- openapi: tts-openapi.json
534
- ValidationErrorLocItem:
535
- discriminated: false
536
- union:
537
- - string
538
- - integer
539
- source:
540
- openapi: tts-openapi.json
541
- inline: true
542
- ValidationError:
543
- properties:
544
- loc:
545
- type: list<ValidationErrorLocItem>
546
- msg: string
547
- type: string
548
- source:
549
- openapi: tts-openapi.json
550
- PostedUtteranceVoiceWithId:
551
- properties:
552
- id:
553
- type: string
554
- docs: The unique ID associated with the **Voice**.
555
- provider:
556
- type: optional<VoiceProvider>
557
- docs: >-
558
- Specifies the source provider associated with the chosen voice.
559
-
560
-
561
- - **`HUME_AI`**: Select voices from Hume's [Voice
562
- Library](https://platform.hume.ai/tts/voice-library), containing a
563
- variety of preset, shared voices.
564
-
565
- - **`CUSTOM_VOICE`**: Select from voices you've personally generated
566
- and saved in your account.
567
-
568
-
569
- If no provider is explicitly set, the default provider is
570
- `CUSTOM_VOICE`. When using voices from Hume's **Voice Library**, you
571
- must explicitly set the provider to `HUME_AI`.
572
-
573
-
574
- Preset voices from Hume's **Voice Library** are accessible by all
575
- users. In contrast, your custom voices are private and accessible only
576
- via requests authenticated with your API key.
577
- source:
578
- openapi: tts-openapi.json
579
- PostedUtteranceVoiceWithName:
580
- properties:
581
- name:
582
- type: string
583
- docs: The name of a **Voice**.
584
- provider:
585
- type: optional<VoiceProvider>
586
- docs: >-
587
- Specifies the source provider associated with the chosen voice.
588
-
589
-
590
- - **`HUME_AI`**: Select voices from Hume's [Voice
591
- Library](https://platform.hume.ai/tts/voice-library), containing a
592
- variety of preset, shared voices.
593
-
594
- - **`CUSTOM_VOICE`**: Select from voices you've personally generated
595
- and saved in your account.
596
-
597
-
598
- If no provider is explicitly set, the default provider is
599
- `CUSTOM_VOICE`. When using voices from Hume's **Voice Library**, you
600
- must explicitly set the provider to `HUME_AI`.
601
-
602
-
603
- Preset voices from Hume's **Voice Library** are accessible by all
604
- users. In contrast, your custom voices are private and accessible only
605
- via requests authenticated with your API key.
606
- source:
607
- openapi: tts-openapi.json
608
- VoiceProvider:
609
- enum:
610
- - HUME_AI
611
- - CUSTOM_VOICE
612
- source:
613
- openapi: tts-openapi.json
614
- PostedUtteranceVoice:
615
- discriminated: false
616
- union:
617
- - type: PostedUtteranceVoiceWithId
618
- - type: PostedUtteranceVoiceWithName
619
- source:
620
- openapi: tts-openapi.json
621
- FormatWav:
622
- properties: {}
623
- source:
624
- openapi: tts-openapi.json
625
- ErrorResponse:
626
- properties:
627
- error: optional<string>
628
- message: optional<string>
629
- code: optional<string>
630
- source:
631
- openapi: tts-openapi.json
632
- ReturnPagedVoices:
633
- docs: A paginated list Octave voices available for text-to-speech
634
- properties:
635
- page_number:
636
- type: optional<integer>
637
- docs: >-
638
- The page number of the returned list.
639
-
640
-
641
- This value corresponds to the `page_number` parameter specified in the
642
- request. Pagination uses zero-based indexing.
643
- page_size:
644
- type: optional<integer>
645
- docs: >-
646
- The maximum number of items returned per page.
647
-
648
-
649
- This value corresponds to the `page_size` parameter specified in the
650
- request.
651
- total_pages:
652
- type: optional<integer>
653
- docs: The total number of pages in the collection.
654
- voices_page:
655
- type: optional<list<ReturnVoice>>
656
- docs: >-
657
- List of voices returned for the specified `page_number` and
658
- `page_size`.
659
- source:
660
- openapi: tts-openapi.json