hume 0.13.0 → 0.13.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mock/definition/empathic-voice/__package__.yml +68 -61
- package/.mock/definition/empathic-voice/chatWebhooks.yml +8 -12
- package/.mock/definition/empathic-voice/prompts.yml +2 -2
- package/.mock/definition/empathic-voice/tools.yml +2 -2
- package/.mock/definition/tts/__package__.yml +85 -47
- package/.mock/definition/tts/voices.yml +9 -9
- package/api/resources/empathicVoice/resources/chat/client/Client.d.ts +2 -0
- package/api/resources/empathicVoice/resources/prompts/client/requests/PostedPrompt.d.ts +1 -1
- package/api/resources/empathicVoice/resources/prompts/client/requests/PostedPromptVersion.d.ts +1 -1
- package/api/resources/empathicVoice/resources/tools/client/requests/PostedUserDefinedTool.d.ts +1 -1
- package/api/resources/empathicVoice/resources/tools/client/requests/PostedUserDefinedToolVersion.d.ts +1 -1
- package/api/resources/empathicVoice/types/AssistantEnd.d.ts +1 -1
- package/api/resources/empathicVoice/types/AssistantInput.d.ts +1 -1
- package/api/resources/empathicVoice/types/AssistantMessage.d.ts +1 -1
- package/api/resources/empathicVoice/types/AudioInput.d.ts +1 -1
- package/api/resources/empathicVoice/types/ContextType.d.ts +2 -2
- package/api/resources/empathicVoice/types/ContextType.js +1 -1
- package/api/resources/empathicVoice/types/JsonMessage.d.ts +1 -1
- package/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +1 -1
- package/api/resources/empathicVoice/types/PostedLanguageModel.d.ts +1 -1
- package/api/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +2 -2
- package/api/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +2 -2
- package/api/resources/empathicVoice/types/ReturnConfig.d.ts +2 -1
- package/api/resources/empathicVoice/types/ReturnLanguageModel.d.ts +1 -1
- package/api/resources/empathicVoice/types/ReturnPrompt.d.ts +2 -4
- package/api/resources/empathicVoice/types/ReturnUserDefinedTool.d.ts +1 -1
- package/api/resources/empathicVoice/types/ReturnVoice.d.ts +12 -0
- package/api/resources/empathicVoice/types/ReturnVoice.js +5 -0
- package/api/resources/empathicVoice/types/SessionSettings.d.ts +2 -2
- package/api/resources/empathicVoice/types/Tool.d.ts +1 -1
- package/api/resources/empathicVoice/types/ToolCallMessage.d.ts +1 -1
- package/api/resources/empathicVoice/types/UserInput.d.ts +1 -1
- package/api/resources/empathicVoice/types/UserInterruption.d.ts +1 -1
- package/api/resources/empathicVoice/types/UserMessage.d.ts +1 -1
- package/api/resources/empathicVoice/types/VoiceProvider.d.ts +1 -2
- package/api/resources/empathicVoice/types/VoiceProvider.js +0 -1
- package/api/resources/empathicVoice/types/index.d.ts +4 -3
- package/api/resources/empathicVoice/types/index.js +4 -3
- package/api/resources/tts/client/Client.d.ts +5 -5
- package/api/resources/tts/client/Client.js +5 -5
- package/api/resources/tts/types/SnippetAudioChunk.d.ts +20 -0
- package/dist/api/resources/empathicVoice/resources/chat/client/Client.d.ts +2 -0
- package/dist/api/resources/empathicVoice/resources/prompts/client/requests/PostedPrompt.d.ts +1 -1
- package/dist/api/resources/empathicVoice/resources/prompts/client/requests/PostedPromptVersion.d.ts +1 -1
- package/dist/api/resources/empathicVoice/resources/tools/client/requests/PostedUserDefinedTool.d.ts +1 -1
- package/dist/api/resources/empathicVoice/resources/tools/client/requests/PostedUserDefinedToolVersion.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/AssistantEnd.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/AssistantInput.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/AssistantMessage.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/AudioInput.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/ContextType.d.ts +2 -2
- package/dist/api/resources/empathicVoice/types/ContextType.js +1 -1
- package/dist/api/resources/empathicVoice/types/JsonMessage.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/PostedLanguageModel.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +2 -2
- package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +2 -2
- package/dist/api/resources/empathicVoice/types/ReturnConfig.d.ts +2 -1
- package/dist/api/resources/empathicVoice/types/ReturnLanguageModel.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/ReturnPrompt.d.ts +2 -4
- package/dist/api/resources/empathicVoice/types/ReturnUserDefinedTool.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/ReturnVoice.d.ts +12 -0
- package/dist/api/resources/empathicVoice/types/ReturnVoice.js +5 -0
- package/dist/api/resources/empathicVoice/types/SessionSettings.d.ts +2 -2
- package/dist/api/resources/empathicVoice/types/Tool.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/ToolCallMessage.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/UserInput.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/UserInterruption.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/UserMessage.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/VoiceProvider.d.ts +1 -2
- package/dist/api/resources/empathicVoice/types/VoiceProvider.js +0 -1
- package/dist/api/resources/empathicVoice/types/index.d.ts +4 -3
- package/dist/api/resources/empathicVoice/types/index.js +4 -3
- package/dist/api/resources/tts/client/Client.d.ts +5 -5
- package/dist/api/resources/tts/client/Client.js +5 -5
- package/dist/api/resources/tts/types/SnippetAudioChunk.d.ts +20 -0
- package/dist/serialization/resources/empathicVoice/types/ContextType.d.ts +1 -1
- package/dist/serialization/resources/empathicVoice/types/ContextType.js +1 -1
- package/dist/serialization/resources/empathicVoice/types/JsonMessage.d.ts +2 -2
- package/dist/serialization/resources/empathicVoice/types/JsonMessage.js +2 -2
- package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +1 -1
- package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.js +1 -1
- package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +1 -1
- package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.js +1 -1
- package/dist/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +2 -1
- package/dist/serialization/resources/empathicVoice/types/ReturnConfig.js +2 -1
- package/dist/serialization/resources/empathicVoice/types/ReturnVoice.d.ts +15 -0
- package/dist/serialization/resources/empathicVoice/types/ReturnVoice.js +46 -0
- package/dist/serialization/resources/empathicVoice/types/VoiceProvider.d.ts +1 -1
- package/dist/serialization/resources/empathicVoice/types/VoiceProvider.js +1 -1
- package/dist/serialization/resources/empathicVoice/types/index.d.ts +4 -3
- package/dist/serialization/resources/empathicVoice/types/index.js +4 -3
- package/dist/serialization/resources/tts/types/SnippetAudioChunk.d.ts +12 -0
- package/dist/serialization/resources/tts/types/SnippetAudioChunk.js +14 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +1 -1
- package/reference.md +14 -14
- package/serialization/resources/empathicVoice/types/ContextType.d.ts +1 -1
- package/serialization/resources/empathicVoice/types/ContextType.js +1 -1
- package/serialization/resources/empathicVoice/types/JsonMessage.d.ts +2 -2
- package/serialization/resources/empathicVoice/types/JsonMessage.js +2 -2
- package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +1 -1
- package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.js +1 -1
- package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +1 -1
- package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.js +1 -1
- package/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +2 -1
- package/serialization/resources/empathicVoice/types/ReturnConfig.js +2 -1
- package/serialization/resources/empathicVoice/types/ReturnVoice.d.ts +15 -0
- package/serialization/resources/empathicVoice/types/ReturnVoice.js +46 -0
- package/serialization/resources/empathicVoice/types/VoiceProvider.d.ts +1 -1
- package/serialization/resources/empathicVoice/types/VoiceProvider.js +1 -1
- package/serialization/resources/empathicVoice/types/index.d.ts +4 -3
- package/serialization/resources/empathicVoice/types/index.js +4 -3
- package/serialization/resources/tts/types/SnippetAudioChunk.d.ts +12 -0
- package/serialization/resources/tts/types/SnippetAudioChunk.js +14 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
|
@@ -29,7 +29,7 @@ service:
|
|
|
29
29
|
The response includes the base64-encoded audio and metadata in JSON
|
|
30
30
|
format.
|
|
31
31
|
source:
|
|
32
|
-
openapi: tts-openapi.
|
|
32
|
+
openapi: tts-openapi.json
|
|
33
33
|
display-name: Text-to-speech (Json)
|
|
34
34
|
request:
|
|
35
35
|
body:
|
|
@@ -43,14 +43,6 @@ service:
|
|
|
43
43
|
- UnprocessableEntityError
|
|
44
44
|
examples:
|
|
45
45
|
- request:
|
|
46
|
-
utterances:
|
|
47
|
-
- text: >-
|
|
48
|
-
Beauty is no quality in things themselves: It exists merely in
|
|
49
|
-
the mind which contemplates them.
|
|
50
|
-
description: >-
|
|
51
|
-
Middle-aged masculine voice with a clear, rhythmic Scots lilt,
|
|
52
|
-
rounded vowels, and a warm, steady tone with an articulate,
|
|
53
|
-
academic quality.
|
|
54
46
|
context:
|
|
55
47
|
utterances:
|
|
56
48
|
- text: How can people see beauty so differently?
|
|
@@ -61,16 +53,24 @@ service:
|
|
|
61
53
|
format:
|
|
62
54
|
type: mp3
|
|
63
55
|
num_generations: 1
|
|
56
|
+
utterances:
|
|
57
|
+
- text: >-
|
|
58
|
+
Beauty is no quality in things themselves: It exists merely in
|
|
59
|
+
the mind which contemplates them.
|
|
60
|
+
description: >-
|
|
61
|
+
Middle-aged masculine voice with a clear, rhythmic Scots lilt,
|
|
62
|
+
rounded vowels, and a warm, steady tone with an articulate,
|
|
63
|
+
academic quality.
|
|
64
64
|
response:
|
|
65
65
|
body:
|
|
66
66
|
generations:
|
|
67
|
-
-
|
|
67
|
+
- audio: //PExAA0DDYRvkpNfhv3JI5JZ...etc.
|
|
68
68
|
duration: 7.44225
|
|
69
|
-
file_size: 120192
|
|
70
69
|
encoding:
|
|
71
70
|
format: mp3
|
|
72
71
|
sample_rate: 48000
|
|
73
|
-
|
|
72
|
+
file_size: 120192
|
|
73
|
+
generation_id: 795c949a-1510-4a80-9646-7d0863b023ab
|
|
74
74
|
snippets:
|
|
75
75
|
- - audio: //PExAA0DDYRvkpNfhv3JI5JZ...etc.
|
|
76
76
|
generation_id: 795c949a-1510-4a80-9646-7d0863b023ab
|
|
@@ -93,7 +93,7 @@ service:
|
|
|
93
93
|
|
|
94
94
|
The response contains the generated audio file in the requested format.
|
|
95
95
|
source:
|
|
96
|
-
openapi: tts-openapi.
|
|
96
|
+
openapi: tts-openapi.json
|
|
97
97
|
display-name: Text-to-speech (File)
|
|
98
98
|
request:
|
|
99
99
|
body:
|
|
@@ -107,6 +107,11 @@ service:
|
|
|
107
107
|
- UnprocessableEntityError
|
|
108
108
|
examples:
|
|
109
109
|
- request:
|
|
110
|
+
context:
|
|
111
|
+
generation_id: 09ad914d-8e7f-40f8-a279-e34f07f7dab2
|
|
112
|
+
format:
|
|
113
|
+
type: mp3
|
|
114
|
+
num_generations: 1
|
|
110
115
|
utterances:
|
|
111
116
|
- text: >-
|
|
112
117
|
Beauty is no quality in things themselves: It exists merely in
|
|
@@ -115,11 +120,6 @@ service:
|
|
|
115
120
|
Middle-aged masculine voice with a clear, rhythmic Scots lilt,
|
|
116
121
|
rounded vowels, and a warm, steady tone with an articulate,
|
|
117
122
|
academic quality.
|
|
118
|
-
context:
|
|
119
|
-
generation_id: 09ad914d-8e7f-40f8-a279-e34f07f7dab2
|
|
120
|
-
format:
|
|
121
|
-
type: mp3
|
|
122
|
-
num_generations: 1
|
|
123
123
|
synthesize-json-streaming:
|
|
124
124
|
path: /v0/tts/stream/json
|
|
125
125
|
method: POST
|
|
@@ -134,7 +134,7 @@ service:
|
|
|
134
134
|
The response is a stream of JSON objects including audio encoded in
|
|
135
135
|
base64.
|
|
136
136
|
source:
|
|
137
|
-
openapi: tts-openapi.
|
|
137
|
+
openapi: tts-openapi.json
|
|
138
138
|
display-name: Text-to-speech (Streamed JSON)
|
|
139
139
|
request:
|
|
140
140
|
body:
|
|
@@ -165,7 +165,7 @@ service:
|
|
|
165
165
|
additional context can be included to influence the speech's style and
|
|
166
166
|
prosody.
|
|
167
167
|
source:
|
|
168
|
-
openapi: tts-openapi.
|
|
168
|
+
openapi: tts-openapi.json
|
|
169
169
|
display-name: Text-to-speech (Streamed File)
|
|
170
170
|
request:
|
|
171
171
|
body:
|
|
@@ -187,7 +187,7 @@ service:
|
|
|
187
187
|
name: Male English Actor
|
|
188
188
|
provider: HUME_AI
|
|
189
189
|
source:
|
|
190
|
-
openapi: tts-openapi.
|
|
190
|
+
openapi: tts-openapi.json
|
|
191
191
|
types:
|
|
192
192
|
PostedContextWithGenerationId:
|
|
193
193
|
properties:
|
|
@@ -198,13 +198,13 @@ types:
|
|
|
198
198
|
consistent speech style and prosody across multiple requests.
|
|
199
199
|
Including context may increase audio generation times.
|
|
200
200
|
source:
|
|
201
|
-
openapi: tts-openapi.
|
|
201
|
+
openapi: tts-openapi.json
|
|
202
202
|
PostedContextWithUtterances:
|
|
203
203
|
properties:
|
|
204
204
|
utterances:
|
|
205
205
|
type: list<PostedUtterance>
|
|
206
206
|
source:
|
|
207
|
-
openapi: tts-openapi.
|
|
207
|
+
openapi: tts-openapi.json
|
|
208
208
|
AudioEncoding:
|
|
209
209
|
docs: >-
|
|
210
210
|
Encoding information about the generated audio, including the `format` and
|
|
@@ -219,14 +219,14 @@ types:
|
|
|
219
219
|
The sample rate (`Hz`) of the generated audio. The default sample rate
|
|
220
220
|
is `48000 Hz`.
|
|
221
221
|
source:
|
|
222
|
-
openapi: tts-openapi.
|
|
222
|
+
openapi: tts-openapi.json
|
|
223
223
|
AudioFormatType:
|
|
224
224
|
enum:
|
|
225
225
|
- mp3
|
|
226
226
|
- pcm
|
|
227
227
|
- wav
|
|
228
228
|
source:
|
|
229
|
-
openapi: tts-openapi.
|
|
229
|
+
openapi: tts-openapi.json
|
|
230
230
|
ReturnGeneration:
|
|
231
231
|
properties:
|
|
232
232
|
generation_id:
|
|
@@ -256,17 +256,17 @@ types:
|
|
|
256
256
|
optimized for speech delivery.
|
|
257
257
|
type: list<list<Snippet>>
|
|
258
258
|
source:
|
|
259
|
-
openapi: tts-openapi.
|
|
259
|
+
openapi: tts-openapi.json
|
|
260
260
|
HTTPValidationError:
|
|
261
261
|
properties:
|
|
262
262
|
detail:
|
|
263
263
|
type: optional<list<ValidationError>>
|
|
264
264
|
source:
|
|
265
|
-
openapi: tts-openapi.
|
|
265
|
+
openapi: tts-openapi.json
|
|
266
266
|
FormatMp3:
|
|
267
267
|
properties: {}
|
|
268
268
|
source:
|
|
269
|
-
openapi: tts-openapi.
|
|
269
|
+
openapi: tts-openapi.json
|
|
270
270
|
PostedContext:
|
|
271
271
|
discriminated: false
|
|
272
272
|
docs: >-
|
|
@@ -277,7 +277,7 @@ types:
|
|
|
277
277
|
- type: PostedContextWithGenerationId
|
|
278
278
|
- type: PostedContextWithUtterances
|
|
279
279
|
source:
|
|
280
|
-
openapi: tts-openapi.
|
|
280
|
+
openapi: tts-openapi.json
|
|
281
281
|
inline: true
|
|
282
282
|
Format:
|
|
283
283
|
discriminant: type
|
|
@@ -291,7 +291,7 @@ types:
|
|
|
291
291
|
wav:
|
|
292
292
|
type: FormatWav
|
|
293
293
|
source:
|
|
294
|
-
openapi: tts-openapi.
|
|
294
|
+
openapi: tts-openapi.json
|
|
295
295
|
PostedTts:
|
|
296
296
|
properties:
|
|
297
297
|
context:
|
|
@@ -373,7 +373,7 @@ types:
|
|
|
373
373
|
must be `1` or omitted).
|
|
374
374
|
default: true
|
|
375
375
|
source:
|
|
376
|
-
openapi: tts-openapi.
|
|
376
|
+
openapi: tts-openapi.json
|
|
377
377
|
ReturnTts:
|
|
378
378
|
properties:
|
|
379
379
|
request_id:
|
|
@@ -385,7 +385,7 @@ types:
|
|
|
385
385
|
generations:
|
|
386
386
|
type: list<ReturnGeneration>
|
|
387
387
|
source:
|
|
388
|
-
openapi: tts-openapi.
|
|
388
|
+
openapi: tts-openapi.json
|
|
389
389
|
ReturnVoice:
|
|
390
390
|
docs: An Octave voice available for text-to-speech
|
|
391
391
|
properties:
|
|
@@ -404,11 +404,11 @@ types:
|
|
|
404
404
|
Voices created through this endpoint will always have the provider set
|
|
405
405
|
to `CUSTOM_VOICE`, indicating a custom voice stored in your account.
|
|
406
406
|
source:
|
|
407
|
-
openapi: tts-openapi.
|
|
407
|
+
openapi: tts-openapi.json
|
|
408
408
|
FormatPcm:
|
|
409
409
|
properties: {}
|
|
410
410
|
source:
|
|
411
|
-
openapi: tts-openapi.
|
|
411
|
+
openapi: tts-openapi.json
|
|
412
412
|
Snippet:
|
|
413
413
|
properties:
|
|
414
414
|
id:
|
|
@@ -434,11 +434,49 @@ types:
|
|
|
434
434
|
The segmented audio output in the requested format, encoded as a
|
|
435
435
|
base64 string.
|
|
436
436
|
source:
|
|
437
|
-
openapi: tts-openapi.
|
|
437
|
+
openapi: tts-openapi.json
|
|
438
438
|
SnippetAudioChunk:
|
|
439
|
-
properties:
|
|
439
|
+
properties:
|
|
440
|
+
generation_id:
|
|
441
|
+
type: string
|
|
442
|
+
docs: >-
|
|
443
|
+
The generation ID of the parent snippet that this chunk corresponds
|
|
444
|
+
to.
|
|
445
|
+
snippet_id:
|
|
446
|
+
type: string
|
|
447
|
+
docs: The ID of the parent snippet that this chunk corresponds to.
|
|
448
|
+
text:
|
|
449
|
+
type: string
|
|
450
|
+
docs: The text of the parent snippet that this chunk corresponds to.
|
|
451
|
+
transcribed_text:
|
|
452
|
+
type: optional<string>
|
|
453
|
+
docs: >-
|
|
454
|
+
The transcribed text of the generated audio of the parent snippet that
|
|
455
|
+
this chunk corresponds to. It is only present if `instant_mode` is set
|
|
456
|
+
to `false`.
|
|
457
|
+
chunk_index:
|
|
458
|
+
type: integer
|
|
459
|
+
docs: The index of the audio chunk in the snippet.
|
|
460
|
+
audio:
|
|
461
|
+
type: string
|
|
462
|
+
docs: The generated audio output chunk in the requested format.
|
|
463
|
+
audio_format:
|
|
464
|
+
type: AudioFormatType
|
|
465
|
+
docs: The generated audio output format.
|
|
466
|
+
is_last_chunk:
|
|
467
|
+
type: boolean
|
|
468
|
+
docs: >-
|
|
469
|
+
Whether or not this is the last chunk streamed back from the decoder
|
|
470
|
+
for one input snippet.
|
|
471
|
+
utterance_index:
|
|
472
|
+
type: optional<integer>
|
|
473
|
+
docs: >-
|
|
474
|
+
The index of the utterance in the request that the parent snippet of
|
|
475
|
+
this chunk corresponds to.
|
|
476
|
+
snippet:
|
|
477
|
+
type: Snippet
|
|
440
478
|
source:
|
|
441
|
-
openapi: tts-openapi.
|
|
479
|
+
openapi: tts-openapi.json
|
|
442
480
|
PostedUtterance:
|
|
443
481
|
properties:
|
|
444
482
|
text:
|
|
@@ -492,14 +530,14 @@ types:
|
|
|
492
530
|
min: 0
|
|
493
531
|
max: 5
|
|
494
532
|
source:
|
|
495
|
-
openapi: tts-openapi.
|
|
533
|
+
openapi: tts-openapi.json
|
|
496
534
|
ValidationErrorLocItem:
|
|
497
535
|
discriminated: false
|
|
498
536
|
union:
|
|
499
537
|
- string
|
|
500
538
|
- integer
|
|
501
539
|
source:
|
|
502
|
-
openapi: tts-openapi.
|
|
540
|
+
openapi: tts-openapi.json
|
|
503
541
|
inline: true
|
|
504
542
|
ValidationError:
|
|
505
543
|
properties:
|
|
@@ -508,7 +546,7 @@ types:
|
|
|
508
546
|
msg: string
|
|
509
547
|
type: string
|
|
510
548
|
source:
|
|
511
|
-
openapi: tts-openapi.
|
|
549
|
+
openapi: tts-openapi.json
|
|
512
550
|
PostedUtteranceVoiceWithId:
|
|
513
551
|
properties:
|
|
514
552
|
id:
|
|
@@ -537,7 +575,7 @@ types:
|
|
|
537
575
|
users. In contrast, your custom voices are private and accessible only
|
|
538
576
|
via requests authenticated with your API key.
|
|
539
577
|
source:
|
|
540
|
-
openapi: tts-openapi.
|
|
578
|
+
openapi: tts-openapi.json
|
|
541
579
|
PostedUtteranceVoiceWithName:
|
|
542
580
|
properties:
|
|
543
581
|
name:
|
|
@@ -566,31 +604,31 @@ types:
|
|
|
566
604
|
users. In contrast, your custom voices are private and accessible only
|
|
567
605
|
via requests authenticated with your API key.
|
|
568
606
|
source:
|
|
569
|
-
openapi: tts-openapi.
|
|
607
|
+
openapi: tts-openapi.json
|
|
570
608
|
VoiceProvider:
|
|
571
609
|
enum:
|
|
572
610
|
- HUME_AI
|
|
573
611
|
- CUSTOM_VOICE
|
|
574
612
|
source:
|
|
575
|
-
openapi: tts-openapi.
|
|
613
|
+
openapi: tts-openapi.json
|
|
576
614
|
PostedUtteranceVoice:
|
|
577
615
|
discriminated: false
|
|
578
616
|
union:
|
|
579
617
|
- type: PostedUtteranceVoiceWithId
|
|
580
618
|
- type: PostedUtteranceVoiceWithName
|
|
581
619
|
source:
|
|
582
|
-
openapi: tts-openapi.
|
|
620
|
+
openapi: tts-openapi.json
|
|
583
621
|
FormatWav:
|
|
584
622
|
properties: {}
|
|
585
623
|
source:
|
|
586
|
-
openapi: tts-openapi.
|
|
624
|
+
openapi: tts-openapi.json
|
|
587
625
|
ErrorResponse:
|
|
588
626
|
properties:
|
|
589
627
|
error: optional<string>
|
|
590
628
|
message: optional<string>
|
|
591
629
|
code: optional<string>
|
|
592
630
|
source:
|
|
593
|
-
openapi: tts-openapi.
|
|
631
|
+
openapi: tts-openapi.json
|
|
594
632
|
ReturnPagedVoices:
|
|
595
633
|
docs: A paginated list Octave voices available for text-to-speech
|
|
596
634
|
properties:
|
|
@@ -619,4 +657,4 @@ types:
|
|
|
619
657
|
List of voices returned for the specified `page_number` and
|
|
620
658
|
`page_size`.
|
|
621
659
|
source:
|
|
622
|
-
openapi: tts-openapi.
|
|
660
|
+
openapi: tts-openapi.json
|
|
@@ -15,7 +15,7 @@ service:
|
|
|
15
15
|
offset: $request.page_number
|
|
16
16
|
results: $response.voices_page
|
|
17
17
|
source:
|
|
18
|
-
openapi: tts-openapi.
|
|
18
|
+
openapi: tts-openapi.json
|
|
19
19
|
display-name: List voices
|
|
20
20
|
request:
|
|
21
21
|
name: VoicesListRequest
|
|
@@ -70,11 +70,11 @@ service:
|
|
|
70
70
|
page_size: 10
|
|
71
71
|
total_pages: 1
|
|
72
72
|
voices_page:
|
|
73
|
-
-
|
|
74
|
-
|
|
73
|
+
- id: c42352c0-4566-455d-b180-0f654b65b525
|
|
74
|
+
name: David Hume
|
|
75
75
|
provider: CUSTOM_VOICE
|
|
76
|
-
-
|
|
77
|
-
|
|
76
|
+
- id: d87352b0-26a3-4b11-081b-d157a5674d19
|
|
77
|
+
name: Goliath Hume
|
|
78
78
|
provider: CUSTOM_VOICE
|
|
79
79
|
create:
|
|
80
80
|
path: /v0/tts/voices
|
|
@@ -89,7 +89,7 @@ service:
|
|
|
89
89
|
ensuring consistent speech style and prosody. For more details on voice
|
|
90
90
|
creation, see the [Voices Guide](/docs/text-to-speech-tts/voices).
|
|
91
91
|
source:
|
|
92
|
-
openapi: tts-openapi.
|
|
92
|
+
openapi: tts-openapi.json
|
|
93
93
|
display-name: Create voice
|
|
94
94
|
request:
|
|
95
95
|
name: PostedVoice
|
|
@@ -117,8 +117,8 @@ service:
|
|
|
117
117
|
name: David Hume
|
|
118
118
|
response:
|
|
119
119
|
body:
|
|
120
|
-
name: David Hume
|
|
121
120
|
id: c42352c0-4566-455d-b180-0f654b65b525
|
|
121
|
+
name: David Hume
|
|
122
122
|
provider: CUSTOM_VOICE
|
|
123
123
|
delete:
|
|
124
124
|
path: /v0/tts/voices
|
|
@@ -126,7 +126,7 @@ service:
|
|
|
126
126
|
auth: true
|
|
127
127
|
docs: Deletes a previously generated custom voice.
|
|
128
128
|
source:
|
|
129
|
-
openapi: tts-openapi.
|
|
129
|
+
openapi: tts-openapi.json
|
|
130
130
|
display-name: Delete voice
|
|
131
131
|
request:
|
|
132
132
|
name: VoicesDeleteRequest
|
|
@@ -140,4 +140,4 @@ service:
|
|
|
140
140
|
- query-parameters:
|
|
141
141
|
name: David Hume
|
|
142
142
|
source:
|
|
143
|
-
openapi: tts-openapi.
|
|
143
|
+
openapi: tts-openapi.json
|
|
@@ -22,6 +22,8 @@ export declare namespace Chat {
|
|
|
22
22
|
resumedChatGroupId?: string;
|
|
23
23
|
/** A flag to enable verbose transcription. Set this query parameter to `true` to have unfinalized user transcripts be sent to the client as interim UserMessage messages. The [interim](/reference/empathic-voice-interface-evi/chat/chat#receive.User%20Message.interim) field on a [UserMessage](/reference/empathic-voice-interface-evi/chat/chat#receive.User%20Message.type) denotes whether the message is "interim" or "final." */
|
|
24
24
|
verboseTranscription?: boolean;
|
|
25
|
+
/** ID of the Voice to use for this chat. If specified, will override the voice set in the Config */
|
|
26
|
+
voiceId?: string;
|
|
25
27
|
/** Extra query parameters sent at WebSocket connection */
|
|
26
28
|
queryParams?: Record<string, string | string[] | object | object[]>;
|
|
27
29
|
}
|
|
@@ -14,7 +14,7 @@ export interface PostedPrompt {
|
|
|
14
14
|
/** An optional description of the Prompt version. */
|
|
15
15
|
versionDescription?: string;
|
|
16
16
|
/**
|
|
17
|
-
* Instructions used to shape EVI
|
|
17
|
+
* Instructions used to shape EVI's behavior, responses, and style.
|
|
18
18
|
*
|
|
19
19
|
* You can use the Prompt to define a specific goal or role for EVI, specifying how it should act or what it should focus on during the conversation. For example, EVI can be instructed to act as a customer support representative, a fitness coach, or a travel advisor, each with its own set of behaviors and response styles.
|
|
20
20
|
*
|
package/api/resources/empathicVoice/resources/prompts/client/requests/PostedPromptVersion.d.ts
CHANGED
|
@@ -12,7 +12,7 @@ export interface PostedPromptVersion {
|
|
|
12
12
|
/** An optional description of the Prompt version. */
|
|
13
13
|
versionDescription?: string;
|
|
14
14
|
/**
|
|
15
|
-
* Instructions used to shape EVI
|
|
15
|
+
* Instructions used to shape EVI's behavior, responses, and style for this version of the Prompt.
|
|
16
16
|
*
|
|
17
17
|
* You can use the Prompt to define a specific goal or role for EVI, specifying how it should act or what it should focus on during the conversation. For example, EVI can be instructed to act as a customer support representative, a fitness coach, or a travel advisor, each with its own set of behaviors and response styles.
|
|
18
18
|
*
|
package/api/resources/empathicVoice/resources/tools/client/requests/PostedUserDefinedTool.d.ts
CHANGED
|
@@ -21,7 +21,7 @@ export interface PostedUserDefinedTool {
|
|
|
21
21
|
/**
|
|
22
22
|
* Stringified JSON defining the parameters used by this version of the Tool.
|
|
23
23
|
*
|
|
24
|
-
* These parameters define the inputs needed for the Tool
|
|
24
|
+
* These parameters define the inputs needed for the Tool's execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the Tool receives data in the expected format.
|
|
25
25
|
*/
|
|
26
26
|
parameters: string;
|
|
27
27
|
/** Optional text passed to the supplemental LLM in place of the tool call result. The LLM then uses this text to generate a response back to the user, ensuring continuity in the conversation if the Tool errors. */
|
|
@@ -18,7 +18,7 @@ export interface PostedUserDefinedToolVersion {
|
|
|
18
18
|
/**
|
|
19
19
|
* Stringified JSON defining the parameters used by this version of the Tool.
|
|
20
20
|
*
|
|
21
|
-
* These parameters define the inputs needed for the Tool
|
|
21
|
+
* These parameters define the inputs needed for the Tool's execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the Tool receives data in the expected format.
|
|
22
22
|
*/
|
|
23
23
|
parameters: string;
|
|
24
24
|
/** Optional text passed to the supplemental LLM in place of the tool call result. The LLM then uses this text to generate a response back to the user, ensuring continuity in the conversation if the Tool errors. */
|
|
@@ -8,7 +8,7 @@ export interface AssistantEnd {
|
|
|
8
8
|
/**
|
|
9
9
|
* The type of message sent through the socket; for an Assistant End message, this must be `assistant_end`.
|
|
10
10
|
*
|
|
11
|
-
* This message indicates the conclusion of the assistant
|
|
11
|
+
* This message indicates the conclusion of the assistant's response, signaling that the assistant has finished speaking for the current conversational turn.
|
|
12
12
|
*/
|
|
13
13
|
type: "assistant_end";
|
|
14
14
|
/** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
|
|
@@ -12,7 +12,7 @@ export interface AssistantInput {
|
|
|
12
12
|
/**
|
|
13
13
|
* Assistant text to synthesize into spoken audio and insert into the conversation.
|
|
14
14
|
*
|
|
15
|
-
* EVI uses this text to generate spoken audio using our proprietary expressive text-to-speech model. Our model adds appropriate emotional inflections and tones to the text based on the user
|
|
15
|
+
* EVI uses this text to generate spoken audio using our proprietary expressive text-to-speech model. Our model adds appropriate emotional inflections and tones to the text based on the user's expressions and the context of the conversation. The synthesized audio is streamed back to the user as an [Assistant Message](/reference/empathic-voice-interface-evi/chat/chat#receive.AssistantMessage.type).
|
|
16
16
|
*/
|
|
17
17
|
text: string;
|
|
18
18
|
}
|
|
@@ -9,7 +9,7 @@ export interface AssistantMessage {
|
|
|
9
9
|
/**
|
|
10
10
|
* The type of message sent through the socket; for an Assistant Message, this must be `assistant_message`.
|
|
11
11
|
*
|
|
12
|
-
* This message contains both a transcript of the assistant
|
|
12
|
+
* This message contains both a transcript of the assistant's response and the expression measurement predictions of the assistant's audio output.
|
|
13
13
|
*/
|
|
14
14
|
type: "assistant_message";
|
|
15
15
|
/** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
|
|
@@ -16,7 +16,7 @@ export interface AudioInput {
|
|
|
16
16
|
/**
|
|
17
17
|
* Base64 encoded audio input to insert into the conversation.
|
|
18
18
|
*
|
|
19
|
-
* The content of an Audio Input message is treated as the user
|
|
19
|
+
* The content of an Audio Input message is treated as the user's speech to EVI and must be streamed continuously. Pre-recorded audio files are not supported.
|
|
20
20
|
*
|
|
21
21
|
* For optimal transcription quality, the audio data should be transmitted in small chunks.
|
|
22
22
|
*
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* This file was auto-generated by Fern from our API Definition.
|
|
3
3
|
*/
|
|
4
|
-
export type ContextType = "
|
|
4
|
+
export type ContextType = "persistent" | "temporary";
|
|
5
5
|
export declare const ContextType: {
|
|
6
|
-
readonly Temporary: "temporary";
|
|
7
6
|
readonly Persistent: "persistent";
|
|
7
|
+
readonly Temporary: "temporary";
|
|
8
8
|
};
|
|
@@ -2,4 +2,4 @@
|
|
|
2
2
|
* This file was auto-generated by Fern from our API Definition.
|
|
3
3
|
*/
|
|
4
4
|
import * as Hume from "../../../index";
|
|
5
|
-
export type JsonMessage = Hume.empathicVoice.AssistantEnd | Hume.empathicVoice.AssistantMessage | Hume.empathicVoice.ChatMetadata | Hume.empathicVoice.WebSocketError | Hume.empathicVoice.UserInterruption | Hume.empathicVoice.UserMessage | Hume.empathicVoice.ToolCallMessage | Hume.empathicVoice.ToolResponseMessage | Hume.empathicVoice.ToolErrorMessage
|
|
5
|
+
export type JsonMessage = Hume.empathicVoice.AssistantEnd | Hume.empathicVoice.AssistantMessage | Hume.empathicVoice.AssistantProsody | Hume.empathicVoice.ChatMetadata | Hume.empathicVoice.WebSocketError | Hume.empathicVoice.UserInterruption | Hume.empathicVoice.UserMessage | Hume.empathicVoice.ToolCallMessage | Hume.empathicVoice.ToolResponseMessage | Hume.empathicVoice.ToolErrorMessage;
|
|
@@ -8,7 +8,7 @@ export interface PauseAssistantMessage {
|
|
|
8
8
|
/**
|
|
9
9
|
* The type of message sent through the socket; must be `pause_assistant_message` for our server to correctly identify and process it as a Pause Assistant message.
|
|
10
10
|
*
|
|
11
|
-
* Once this message is sent, EVI will not respond until a [Resume Assistant message](/reference/empathic-voice-interface-evi/chat/chat#send.ResumeAssistantMessage.type) is sent. When paused, EVI won
|
|
11
|
+
* Once this message is sent, EVI will not respond until a [Resume Assistant message](/reference/empathic-voice-interface-evi/chat/chat#send.ResumeAssistantMessage.type) is sent. When paused, EVI won't respond, but transcriptions of your audio inputs will still be recorded.
|
|
12
12
|
*/
|
|
13
13
|
type: "pause_assistant_message";
|
|
14
14
|
/** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
|
|
@@ -13,7 +13,7 @@ export interface PostedLanguageModel {
|
|
|
13
13
|
/**
|
|
14
14
|
* The model temperature, with values between 0 to 1 (inclusive).
|
|
15
15
|
*
|
|
16
|
-
* Controls the randomness of the LLM
|
|
16
|
+
* Controls the randomness of the LLM's output, with values closer to 0 yielding focused, deterministic responses and values closer to 1 producing more creative, diverse responses.
|
|
17
17
|
*/
|
|
18
18
|
temperature?: number;
|
|
19
19
|
}
|
|
@@ -7,12 +7,12 @@
|
|
|
7
7
|
* Accepts a minimum value of 30 seconds and a maximum value of 1,800 seconds.
|
|
8
8
|
*/
|
|
9
9
|
export interface PostedTimeoutSpecsInactivity {
|
|
10
|
+
/** Duration in seconds for the timeout (e.g. 600 seconds represents 10 minutes). */
|
|
11
|
+
durationSecs?: number;
|
|
10
12
|
/**
|
|
11
13
|
* Boolean indicating if this timeout is enabled.
|
|
12
14
|
*
|
|
13
15
|
* If set to false, EVI will not timeout due to a specified duration of user inactivity being reached. However, the conversation will eventually disconnect after 1,800 seconds (30 minutes), which is the maximum WebSocket duration limit for EVI.
|
|
14
16
|
*/
|
|
15
17
|
enabled: boolean;
|
|
16
|
-
/** Duration in seconds for the timeout (e.g. 600 seconds represents 10 minutes). */
|
|
17
|
-
durationSecs?: number;
|
|
18
18
|
}
|
|
@@ -7,12 +7,12 @@
|
|
|
7
7
|
* Accepts a minimum value of 30 seconds and a maximum value of 1,800 seconds.
|
|
8
8
|
*/
|
|
9
9
|
export interface PostedTimeoutSpecsMaxDuration {
|
|
10
|
+
/** Duration in seconds for the timeout (e.g. 600 seconds represents 10 minutes). */
|
|
11
|
+
durationSecs?: number;
|
|
10
12
|
/**
|
|
11
13
|
* Boolean indicating if this timeout is enabled.
|
|
12
14
|
*
|
|
13
15
|
* If set to false, EVI will not timeout due to a specified maximum duration being reached. However, the conversation will eventually disconnect after 1,800 seconds (30 minutes), which is the maximum WebSocket duration limit for EVI.
|
|
14
16
|
*/
|
|
15
17
|
enabled: boolean;
|
|
16
|
-
/** Duration in seconds for the timeout (e.g. 600 seconds represents 10 minutes). */
|
|
17
|
-
durationSecs?: number;
|
|
18
18
|
}
|
|
@@ -45,7 +45,8 @@ export interface ReturnConfig {
|
|
|
45
45
|
* Hume's eLLM (empathic Large Language Model) is a multimodal language model that takes into account both expression measures and language. The eLLM generates short, empathic language responses and guides text-to-speech (TTS) prosody.
|
|
46
46
|
*/
|
|
47
47
|
ellmModel?: Hume.empathicVoice.ReturnEllmModel;
|
|
48
|
-
voice
|
|
48
|
+
/** A voice specification associated with this Config. */
|
|
49
|
+
voice?: Hume.empathicVoice.ReturnVoice;
|
|
49
50
|
prompt?: Hume.empathicVoice.ReturnPrompt;
|
|
50
51
|
/** Map of webhooks associated with this config. */
|
|
51
52
|
webhooks?: (Hume.empathicVoice.ReturnWebhookSpec | undefined)[];
|
|
@@ -13,7 +13,7 @@ export interface ReturnLanguageModel {
|
|
|
13
13
|
/**
|
|
14
14
|
* The model temperature, with values between 0 to 1 (inclusive).
|
|
15
15
|
*
|
|
16
|
-
* Controls the randomness of the LLM
|
|
16
|
+
* Controls the randomness of the LLM's output, with values closer to 0 yielding focused, deterministic responses and values closer to 1 producing more creative, diverse responses.
|
|
17
17
|
*/
|
|
18
18
|
temperature?: number;
|
|
19
19
|
}
|
|
@@ -11,11 +11,9 @@ export interface ReturnPrompt {
|
|
|
11
11
|
/** Identifier for a Prompt. Formatted as a UUID. */
|
|
12
12
|
id: string;
|
|
13
13
|
/**
|
|
14
|
-
* Instructions used to shape EVI
|
|
14
|
+
* Instructions used to shape EVI's behavior, responses, and style.
|
|
15
15
|
*
|
|
16
|
-
* You can use the Prompt to define a specific goal or role for EVI, specifying how it should act or what it should focus on during the conversation. For example, EVI can be instructed to act as a customer support representative, a fitness coach, or a travel advisor, each with its own set of behaviors and response styles.
|
|
17
|
-
*
|
|
18
|
-
* For help writing a system prompt, see our [Prompting Guide](/docs/speech-to-speech-evi/guides/prompting).
|
|
16
|
+
* You can use the Prompt to define a specific goal or role for EVI, specifying how it should act or what it should focus on during the conversation. For example, EVI can be instructed to act as a customer support representative, a fitness coach, or a travel advisor, each with its own set of behaviors and response styles. For help writing a system prompt, see our [Prompting Guide](/docs/speech-to-speech-evi/guides/prompting).
|
|
19
17
|
*/
|
|
20
18
|
text: string;
|
|
21
19
|
/**
|
|
@@ -35,7 +35,7 @@ export interface ReturnUserDefinedTool {
|
|
|
35
35
|
/**
|
|
36
36
|
* Stringified JSON defining the parameters used by this version of the Tool.
|
|
37
37
|
*
|
|
38
|
-
* These parameters define the inputs needed for the Tool
|
|
38
|
+
* These parameters define the inputs needed for the Tool's execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the tool receives data in the expected format.
|
|
39
39
|
*/
|
|
40
40
|
parameters: string;
|
|
41
41
|
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* This file was auto-generated by Fern from our API Definition.
|
|
3
|
+
*/
|
|
4
|
+
import * as Hume from "../../../index";
|
|
5
|
+
/**
|
|
6
|
+
* An Octave voice available for text-to-speech
|
|
7
|
+
*/
|
|
8
|
+
export interface ReturnVoice {
|
|
9
|
+
id?: string;
|
|
10
|
+
name?: string;
|
|
11
|
+
provider?: Hume.empathicVoice.VoiceProvider;
|
|
12
|
+
}
|
|
@@ -23,7 +23,7 @@ export interface SessionSettings {
|
|
|
23
23
|
*/
|
|
24
24
|
customSessionId?: string;
|
|
25
25
|
/**
|
|
26
|
-
* Instructions used to shape EVI
|
|
26
|
+
* Instructions used to shape EVI's behavior, responses, and style for the session.
|
|
27
27
|
*
|
|
28
28
|
* When included in a Session Settings message, the provided Prompt overrides the existing one specified in the EVI configuration. If no Prompt was defined in the configuration, this Prompt will be the one used for the session.
|
|
29
29
|
*
|
|
@@ -49,7 +49,7 @@ export interface SessionSettings {
|
|
|
49
49
|
/**
|
|
50
50
|
* Third party API key for the supplemental language model.
|
|
51
51
|
*
|
|
52
|
-
* When provided, EVI will use this key instead of Hume
|
|
52
|
+
* When provided, EVI will use this key instead of Hume's API key for the supplemental LLM. This allows you to bypass rate limits and utilize your own API key as needed.
|
|
53
53
|
*/
|
|
54
54
|
languageModelApiKey?: string;
|
|
55
55
|
/**
|