hume 0.13.0 → 0.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/.mock/definition/empathic-voice/__package__.yml +68 -61
  2. package/.mock/definition/empathic-voice/chatWebhooks.yml +8 -12
  3. package/.mock/definition/empathic-voice/prompts.yml +2 -2
  4. package/.mock/definition/empathic-voice/tools.yml +2 -2
  5. package/.mock/definition/tts/__package__.yml +85 -47
  6. package/.mock/definition/tts/voices.yml +9 -9
  7. package/api/resources/empathicVoice/resources/chat/client/Client.d.ts +2 -0
  8. package/api/resources/empathicVoice/resources/prompts/client/requests/PostedPrompt.d.ts +1 -1
  9. package/api/resources/empathicVoice/resources/prompts/client/requests/PostedPromptVersion.d.ts +1 -1
  10. package/api/resources/empathicVoice/resources/tools/client/requests/PostedUserDefinedTool.d.ts +1 -1
  11. package/api/resources/empathicVoice/resources/tools/client/requests/PostedUserDefinedToolVersion.d.ts +1 -1
  12. package/api/resources/empathicVoice/types/AssistantEnd.d.ts +1 -1
  13. package/api/resources/empathicVoice/types/AssistantInput.d.ts +1 -1
  14. package/api/resources/empathicVoice/types/AssistantMessage.d.ts +1 -1
  15. package/api/resources/empathicVoice/types/AudioInput.d.ts +1 -1
  16. package/api/resources/empathicVoice/types/ContextType.d.ts +2 -2
  17. package/api/resources/empathicVoice/types/ContextType.js +1 -1
  18. package/api/resources/empathicVoice/types/JsonMessage.d.ts +1 -1
  19. package/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +1 -1
  20. package/api/resources/empathicVoice/types/PostedLanguageModel.d.ts +1 -1
  21. package/api/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +2 -2
  22. package/api/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +2 -2
  23. package/api/resources/empathicVoice/types/ReturnConfig.d.ts +2 -1
  24. package/api/resources/empathicVoice/types/ReturnLanguageModel.d.ts +1 -1
  25. package/api/resources/empathicVoice/types/ReturnPrompt.d.ts +2 -4
  26. package/api/resources/empathicVoice/types/ReturnUserDefinedTool.d.ts +1 -1
  27. package/api/resources/empathicVoice/types/ReturnVoice.d.ts +12 -0
  28. package/api/resources/empathicVoice/types/ReturnVoice.js +5 -0
  29. package/api/resources/empathicVoice/types/SessionSettings.d.ts +2 -2
  30. package/api/resources/empathicVoice/types/Tool.d.ts +1 -1
  31. package/api/resources/empathicVoice/types/ToolCallMessage.d.ts +1 -1
  32. package/api/resources/empathicVoice/types/UserInput.d.ts +1 -1
  33. package/api/resources/empathicVoice/types/UserInterruption.d.ts +1 -1
  34. package/api/resources/empathicVoice/types/UserMessage.d.ts +1 -1
  35. package/api/resources/empathicVoice/types/VoiceProvider.d.ts +1 -2
  36. package/api/resources/empathicVoice/types/VoiceProvider.js +0 -1
  37. package/api/resources/empathicVoice/types/index.d.ts +4 -3
  38. package/api/resources/empathicVoice/types/index.js +4 -3
  39. package/api/resources/tts/client/Client.d.ts +5 -5
  40. package/api/resources/tts/client/Client.js +5 -5
  41. package/api/resources/tts/types/SnippetAudioChunk.d.ts +20 -0
  42. package/dist/api/resources/empathicVoice/resources/chat/client/Client.d.ts +2 -0
  43. package/dist/api/resources/empathicVoice/resources/prompts/client/requests/PostedPrompt.d.ts +1 -1
  44. package/dist/api/resources/empathicVoice/resources/prompts/client/requests/PostedPromptVersion.d.ts +1 -1
  45. package/dist/api/resources/empathicVoice/resources/tools/client/requests/PostedUserDefinedTool.d.ts +1 -1
  46. package/dist/api/resources/empathicVoice/resources/tools/client/requests/PostedUserDefinedToolVersion.d.ts +1 -1
  47. package/dist/api/resources/empathicVoice/types/AssistantEnd.d.ts +1 -1
  48. package/dist/api/resources/empathicVoice/types/AssistantInput.d.ts +1 -1
  49. package/dist/api/resources/empathicVoice/types/AssistantMessage.d.ts +1 -1
  50. package/dist/api/resources/empathicVoice/types/AudioInput.d.ts +1 -1
  51. package/dist/api/resources/empathicVoice/types/ContextType.d.ts +2 -2
  52. package/dist/api/resources/empathicVoice/types/ContextType.js +1 -1
  53. package/dist/api/resources/empathicVoice/types/JsonMessage.d.ts +1 -1
  54. package/dist/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +1 -1
  55. package/dist/api/resources/empathicVoice/types/PostedLanguageModel.d.ts +1 -1
  56. package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +2 -2
  57. package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +2 -2
  58. package/dist/api/resources/empathicVoice/types/ReturnConfig.d.ts +2 -1
  59. package/dist/api/resources/empathicVoice/types/ReturnLanguageModel.d.ts +1 -1
  60. package/dist/api/resources/empathicVoice/types/ReturnPrompt.d.ts +2 -4
  61. package/dist/api/resources/empathicVoice/types/ReturnUserDefinedTool.d.ts +1 -1
  62. package/dist/api/resources/empathicVoice/types/ReturnVoice.d.ts +12 -0
  63. package/dist/api/resources/empathicVoice/types/ReturnVoice.js +5 -0
  64. package/dist/api/resources/empathicVoice/types/SessionSettings.d.ts +2 -2
  65. package/dist/api/resources/empathicVoice/types/Tool.d.ts +1 -1
  66. package/dist/api/resources/empathicVoice/types/ToolCallMessage.d.ts +1 -1
  67. package/dist/api/resources/empathicVoice/types/UserInput.d.ts +1 -1
  68. package/dist/api/resources/empathicVoice/types/UserInterruption.d.ts +1 -1
  69. package/dist/api/resources/empathicVoice/types/UserMessage.d.ts +1 -1
  70. package/dist/api/resources/empathicVoice/types/VoiceProvider.d.ts +1 -2
  71. package/dist/api/resources/empathicVoice/types/VoiceProvider.js +0 -1
  72. package/dist/api/resources/empathicVoice/types/index.d.ts +4 -3
  73. package/dist/api/resources/empathicVoice/types/index.js +4 -3
  74. package/dist/api/resources/tts/client/Client.d.ts +5 -5
  75. package/dist/api/resources/tts/client/Client.js +5 -5
  76. package/dist/api/resources/tts/types/SnippetAudioChunk.d.ts +20 -0
  77. package/dist/serialization/resources/empathicVoice/types/ContextType.d.ts +1 -1
  78. package/dist/serialization/resources/empathicVoice/types/ContextType.js +1 -1
  79. package/dist/serialization/resources/empathicVoice/types/JsonMessage.d.ts +2 -2
  80. package/dist/serialization/resources/empathicVoice/types/JsonMessage.js +2 -2
  81. package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +1 -1
  82. package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.js +1 -1
  83. package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +1 -1
  84. package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.js +1 -1
  85. package/dist/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +2 -1
  86. package/dist/serialization/resources/empathicVoice/types/ReturnConfig.js +2 -1
  87. package/dist/serialization/resources/empathicVoice/types/ReturnVoice.d.ts +15 -0
  88. package/dist/serialization/resources/empathicVoice/types/ReturnVoice.js +46 -0
  89. package/dist/serialization/resources/empathicVoice/types/VoiceProvider.d.ts +1 -1
  90. package/dist/serialization/resources/empathicVoice/types/VoiceProvider.js +1 -1
  91. package/dist/serialization/resources/empathicVoice/types/index.d.ts +4 -3
  92. package/dist/serialization/resources/empathicVoice/types/index.js +4 -3
  93. package/dist/serialization/resources/tts/types/SnippetAudioChunk.d.ts +12 -0
  94. package/dist/serialization/resources/tts/types/SnippetAudioChunk.js +14 -1
  95. package/dist/version.d.ts +1 -1
  96. package/dist/version.js +1 -1
  97. package/package.json +1 -1
  98. package/reference.md +14 -14
  99. package/serialization/resources/empathicVoice/types/ContextType.d.ts +1 -1
  100. package/serialization/resources/empathicVoice/types/ContextType.js +1 -1
  101. package/serialization/resources/empathicVoice/types/JsonMessage.d.ts +2 -2
  102. package/serialization/resources/empathicVoice/types/JsonMessage.js +2 -2
  103. package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +1 -1
  104. package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.js +1 -1
  105. package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +1 -1
  106. package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.js +1 -1
  107. package/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +2 -1
  108. package/serialization/resources/empathicVoice/types/ReturnConfig.js +2 -1
  109. package/serialization/resources/empathicVoice/types/ReturnVoice.d.ts +15 -0
  110. package/serialization/resources/empathicVoice/types/ReturnVoice.js +46 -0
  111. package/serialization/resources/empathicVoice/types/VoiceProvider.d.ts +1 -1
  112. package/serialization/resources/empathicVoice/types/VoiceProvider.js +1 -1
  113. package/serialization/resources/empathicVoice/types/index.d.ts +4 -3
  114. package/serialization/resources/empathicVoice/types/index.js +4 -3
  115. package/serialization/resources/tts/types/SnippetAudioChunk.d.ts +12 -0
  116. package/serialization/resources/tts/types/SnippetAudioChunk.js +14 -1
  117. package/version.d.ts +1 -1
  118. package/version.js +1 -1
@@ -29,7 +29,7 @@ service:
29
29
  The response includes the base64-encoded audio and metadata in JSON
30
30
  format.
31
31
  source:
32
- openapi: tts-openapi.yml
32
+ openapi: tts-openapi.json
33
33
  display-name: Text-to-speech (Json)
34
34
  request:
35
35
  body:
@@ -43,14 +43,6 @@ service:
43
43
  - UnprocessableEntityError
44
44
  examples:
45
45
  - request:
46
- utterances:
47
- - text: >-
48
- Beauty is no quality in things themselves: It exists merely in
49
- the mind which contemplates them.
50
- description: >-
51
- Middle-aged masculine voice with a clear, rhythmic Scots lilt,
52
- rounded vowels, and a warm, steady tone with an articulate,
53
- academic quality.
54
46
  context:
55
47
  utterances:
56
48
  - text: How can people see beauty so differently?
@@ -61,16 +53,24 @@ service:
61
53
  format:
62
54
  type: mp3
63
55
  num_generations: 1
56
+ utterances:
57
+ - text: >-
58
+ Beauty is no quality in things themselves: It exists merely in
59
+ the mind which contemplates them.
60
+ description: >-
61
+ Middle-aged masculine voice with a clear, rhythmic Scots lilt,
62
+ rounded vowels, and a warm, steady tone with an articulate,
63
+ academic quality.
64
64
  response:
65
65
  body:
66
66
  generations:
67
- - generation_id: 795c949a-1510-4a80-9646-7d0863b023ab
67
+ - audio: //PExAA0DDYRvkpNfhv3JI5JZ...etc.
68
68
  duration: 7.44225
69
- file_size: 120192
70
69
  encoding:
71
70
  format: mp3
72
71
  sample_rate: 48000
73
- audio: //PExAA0DDYRvkpNfhv3JI5JZ...etc.
72
+ file_size: 120192
73
+ generation_id: 795c949a-1510-4a80-9646-7d0863b023ab
74
74
  snippets:
75
75
  - - audio: //PExAA0DDYRvkpNfhv3JI5JZ...etc.
76
76
  generation_id: 795c949a-1510-4a80-9646-7d0863b023ab
@@ -93,7 +93,7 @@ service:
93
93
 
94
94
  The response contains the generated audio file in the requested format.
95
95
  source:
96
- openapi: tts-openapi.yml
96
+ openapi: tts-openapi.json
97
97
  display-name: Text-to-speech (File)
98
98
  request:
99
99
  body:
@@ -107,6 +107,11 @@ service:
107
107
  - UnprocessableEntityError
108
108
  examples:
109
109
  - request:
110
+ context:
111
+ generation_id: 09ad914d-8e7f-40f8-a279-e34f07f7dab2
112
+ format:
113
+ type: mp3
114
+ num_generations: 1
110
115
  utterances:
111
116
  - text: >-
112
117
  Beauty is no quality in things themselves: It exists merely in
@@ -115,11 +120,6 @@ service:
115
120
  Middle-aged masculine voice with a clear, rhythmic Scots lilt,
116
121
  rounded vowels, and a warm, steady tone with an articulate,
117
122
  academic quality.
118
- context:
119
- generation_id: 09ad914d-8e7f-40f8-a279-e34f07f7dab2
120
- format:
121
- type: mp3
122
- num_generations: 1
123
123
  synthesize-json-streaming:
124
124
  path: /v0/tts/stream/json
125
125
  method: POST
@@ -134,7 +134,7 @@ service:
134
134
  The response is a stream of JSON objects including audio encoded in
135
135
  base64.
136
136
  source:
137
- openapi: tts-openapi.yml
137
+ openapi: tts-openapi.json
138
138
  display-name: Text-to-speech (Streamed JSON)
139
139
  request:
140
140
  body:
@@ -165,7 +165,7 @@ service:
165
165
  additional context can be included to influence the speech's style and
166
166
  prosody.
167
167
  source:
168
- openapi: tts-openapi.yml
168
+ openapi: tts-openapi.json
169
169
  display-name: Text-to-speech (Streamed File)
170
170
  request:
171
171
  body:
@@ -187,7 +187,7 @@ service:
187
187
  name: Male English Actor
188
188
  provider: HUME_AI
189
189
  source:
190
- openapi: tts-openapi.yml
190
+ openapi: tts-openapi.json
191
191
  types:
192
192
  PostedContextWithGenerationId:
193
193
  properties:
@@ -198,13 +198,13 @@ types:
198
198
  consistent speech style and prosody across multiple requests.
199
199
  Including context may increase audio generation times.
200
200
  source:
201
- openapi: tts-openapi.yml
201
+ openapi: tts-openapi.json
202
202
  PostedContextWithUtterances:
203
203
  properties:
204
204
  utterances:
205
205
  type: list<PostedUtterance>
206
206
  source:
207
- openapi: tts-openapi.yml
207
+ openapi: tts-openapi.json
208
208
  AudioEncoding:
209
209
  docs: >-
210
210
  Encoding information about the generated audio, including the `format` and
@@ -219,14 +219,14 @@ types:
219
219
  The sample rate (`Hz`) of the generated audio. The default sample rate
220
220
  is `48000 Hz`.
221
221
  source:
222
- openapi: tts-openapi.yml
222
+ openapi: tts-openapi.json
223
223
  AudioFormatType:
224
224
  enum:
225
225
  - mp3
226
226
  - pcm
227
227
  - wav
228
228
  source:
229
- openapi: tts-openapi.yml
229
+ openapi: tts-openapi.json
230
230
  ReturnGeneration:
231
231
  properties:
232
232
  generation_id:
@@ -256,17 +256,17 @@ types:
256
256
  optimized for speech delivery.
257
257
  type: list<list<Snippet>>
258
258
  source:
259
- openapi: tts-openapi.yml
259
+ openapi: tts-openapi.json
260
260
  HTTPValidationError:
261
261
  properties:
262
262
  detail:
263
263
  type: optional<list<ValidationError>>
264
264
  source:
265
- openapi: tts-openapi.yml
265
+ openapi: tts-openapi.json
266
266
  FormatMp3:
267
267
  properties: {}
268
268
  source:
269
- openapi: tts-openapi.yml
269
+ openapi: tts-openapi.json
270
270
  PostedContext:
271
271
  discriminated: false
272
272
  docs: >-
@@ -277,7 +277,7 @@ types:
277
277
  - type: PostedContextWithGenerationId
278
278
  - type: PostedContextWithUtterances
279
279
  source:
280
- openapi: tts-openapi.yml
280
+ openapi: tts-openapi.json
281
281
  inline: true
282
282
  Format:
283
283
  discriminant: type
@@ -291,7 +291,7 @@ types:
291
291
  wav:
292
292
  type: FormatWav
293
293
  source:
294
- openapi: tts-openapi.yml
294
+ openapi: tts-openapi.json
295
295
  PostedTts:
296
296
  properties:
297
297
  context:
@@ -373,7 +373,7 @@ types:
373
373
  must be `1` or omitted).
374
374
  default: true
375
375
  source:
376
- openapi: tts-openapi.yml
376
+ openapi: tts-openapi.json
377
377
  ReturnTts:
378
378
  properties:
379
379
  request_id:
@@ -385,7 +385,7 @@ types:
385
385
  generations:
386
386
  type: list<ReturnGeneration>
387
387
  source:
388
- openapi: tts-openapi.yml
388
+ openapi: tts-openapi.json
389
389
  ReturnVoice:
390
390
  docs: An Octave voice available for text-to-speech
391
391
  properties:
@@ -404,11 +404,11 @@ types:
404
404
  Voices created through this endpoint will always have the provider set
405
405
  to `CUSTOM_VOICE`, indicating a custom voice stored in your account.
406
406
  source:
407
- openapi: tts-openapi.yml
407
+ openapi: tts-openapi.json
408
408
  FormatPcm:
409
409
  properties: {}
410
410
  source:
411
- openapi: tts-openapi.yml
411
+ openapi: tts-openapi.json
412
412
  Snippet:
413
413
  properties:
414
414
  id:
@@ -434,11 +434,49 @@ types:
434
434
  The segmented audio output in the requested format, encoded as a
435
435
  base64 string.
436
436
  source:
437
- openapi: tts-openapi.yml
437
+ openapi: tts-openapi.json
438
438
  SnippetAudioChunk:
439
- properties: {}
439
+ properties:
440
+ generation_id:
441
+ type: string
442
+ docs: >-
443
+ The generation ID of the parent snippet that this chunk corresponds
444
+ to.
445
+ snippet_id:
446
+ type: string
447
+ docs: The ID of the parent snippet that this chunk corresponds to.
448
+ text:
449
+ type: string
450
+ docs: The text of the parent snippet that this chunk corresponds to.
451
+ transcribed_text:
452
+ type: optional<string>
453
+ docs: >-
454
+ The transcribed text of the generated audio of the parent snippet that
455
+ this chunk corresponds to. It is only present if `instant_mode` is set
456
+ to `false`.
457
+ chunk_index:
458
+ type: integer
459
+ docs: The index of the audio chunk in the snippet.
460
+ audio:
461
+ type: string
462
+ docs: The generated audio output chunk in the requested format.
463
+ audio_format:
464
+ type: AudioFormatType
465
+ docs: The generated audio output format.
466
+ is_last_chunk:
467
+ type: boolean
468
+ docs: >-
469
+ Whether or not this is the last chunk streamed back from the decoder
470
+ for one input snippet.
471
+ utterance_index:
472
+ type: optional<integer>
473
+ docs: >-
474
+ The index of the utterance in the request that the parent snippet of
475
+ this chunk corresponds to.
476
+ snippet:
477
+ type: Snippet
440
478
  source:
441
- openapi: tts-openapi.yml
479
+ openapi: tts-openapi.json
442
480
  PostedUtterance:
443
481
  properties:
444
482
  text:
@@ -492,14 +530,14 @@ types:
492
530
  min: 0
493
531
  max: 5
494
532
  source:
495
- openapi: tts-openapi.yml
533
+ openapi: tts-openapi.json
496
534
  ValidationErrorLocItem:
497
535
  discriminated: false
498
536
  union:
499
537
  - string
500
538
  - integer
501
539
  source:
502
- openapi: tts-openapi.yml
540
+ openapi: tts-openapi.json
503
541
  inline: true
504
542
  ValidationError:
505
543
  properties:
@@ -508,7 +546,7 @@ types:
508
546
  msg: string
509
547
  type: string
510
548
  source:
511
- openapi: tts-openapi.yml
549
+ openapi: tts-openapi.json
512
550
  PostedUtteranceVoiceWithId:
513
551
  properties:
514
552
  id:
@@ -537,7 +575,7 @@ types:
537
575
  users. In contrast, your custom voices are private and accessible only
538
576
  via requests authenticated with your API key.
539
577
  source:
540
- openapi: tts-openapi.yml
578
+ openapi: tts-openapi.json
541
579
  PostedUtteranceVoiceWithName:
542
580
  properties:
543
581
  name:
@@ -566,31 +604,31 @@ types:
566
604
  users. In contrast, your custom voices are private and accessible only
567
605
  via requests authenticated with your API key.
568
606
  source:
569
- openapi: tts-openapi.yml
607
+ openapi: tts-openapi.json
570
608
  VoiceProvider:
571
609
  enum:
572
610
  - HUME_AI
573
611
  - CUSTOM_VOICE
574
612
  source:
575
- openapi: tts-openapi.yml
613
+ openapi: tts-openapi.json
576
614
  PostedUtteranceVoice:
577
615
  discriminated: false
578
616
  union:
579
617
  - type: PostedUtteranceVoiceWithId
580
618
  - type: PostedUtteranceVoiceWithName
581
619
  source:
582
- openapi: tts-openapi.yml
620
+ openapi: tts-openapi.json
583
621
  FormatWav:
584
622
  properties: {}
585
623
  source:
586
- openapi: tts-openapi.yml
624
+ openapi: tts-openapi.json
587
625
  ErrorResponse:
588
626
  properties:
589
627
  error: optional<string>
590
628
  message: optional<string>
591
629
  code: optional<string>
592
630
  source:
593
- openapi: tts-openapi.yml
631
+ openapi: tts-openapi.json
594
632
  ReturnPagedVoices:
595
633
  docs: A paginated list Octave voices available for text-to-speech
596
634
  properties:
@@ -619,4 +657,4 @@ types:
619
657
  List of voices returned for the specified `page_number` and
620
658
  `page_size`.
621
659
  source:
622
- openapi: tts-openapi.yml
660
+ openapi: tts-openapi.json
@@ -15,7 +15,7 @@ service:
15
15
  offset: $request.page_number
16
16
  results: $response.voices_page
17
17
  source:
18
- openapi: tts-openapi.yml
18
+ openapi: tts-openapi.json
19
19
  display-name: List voices
20
20
  request:
21
21
  name: VoicesListRequest
@@ -70,11 +70,11 @@ service:
70
70
  page_size: 10
71
71
  total_pages: 1
72
72
  voices_page:
73
- - name: David Hume
74
- id: c42352c0-4566-455d-b180-0f654b65b525
73
+ - id: c42352c0-4566-455d-b180-0f654b65b525
74
+ name: David Hume
75
75
  provider: CUSTOM_VOICE
76
- - name: Goliath Hume
77
- id: d87352b0-26a3-4b11-081b-d157a5674d19
76
+ - id: d87352b0-26a3-4b11-081b-d157a5674d19
77
+ name: Goliath Hume
78
78
  provider: CUSTOM_VOICE
79
79
  create:
80
80
  path: /v0/tts/voices
@@ -89,7 +89,7 @@ service:
89
89
  ensuring consistent speech style and prosody. For more details on voice
90
90
  creation, see the [Voices Guide](/docs/text-to-speech-tts/voices).
91
91
  source:
92
- openapi: tts-openapi.yml
92
+ openapi: tts-openapi.json
93
93
  display-name: Create voice
94
94
  request:
95
95
  name: PostedVoice
@@ -117,8 +117,8 @@ service:
117
117
  name: David Hume
118
118
  response:
119
119
  body:
120
- name: David Hume
121
120
  id: c42352c0-4566-455d-b180-0f654b65b525
121
+ name: David Hume
122
122
  provider: CUSTOM_VOICE
123
123
  delete:
124
124
  path: /v0/tts/voices
@@ -126,7 +126,7 @@ service:
126
126
  auth: true
127
127
  docs: Deletes a previously generated custom voice.
128
128
  source:
129
- openapi: tts-openapi.yml
129
+ openapi: tts-openapi.json
130
130
  display-name: Delete voice
131
131
  request:
132
132
  name: VoicesDeleteRequest
@@ -140,4 +140,4 @@ service:
140
140
  - query-parameters:
141
141
  name: David Hume
142
142
  source:
143
- openapi: tts-openapi.yml
143
+ openapi: tts-openapi.json
@@ -22,6 +22,8 @@ export declare namespace Chat {
22
22
  resumedChatGroupId?: string;
23
23
  /** A flag to enable verbose transcription. Set this query parameter to `true` to have unfinalized user transcripts be sent to the client as interim UserMessage messages. The [interim](/reference/empathic-voice-interface-evi/chat/chat#receive.User%20Message.interim) field on a [UserMessage](/reference/empathic-voice-interface-evi/chat/chat#receive.User%20Message.type) denotes whether the message is "interim" or "final." */
24
24
  verboseTranscription?: boolean;
25
+ /** ID of the Voice to use for this chat. If specified, will override the voice set in the Config */
26
+ voiceId?: string;
25
27
  /** Extra query parameters sent at WebSocket connection */
26
28
  queryParams?: Record<string, string | string[] | object | object[]>;
27
29
  }
@@ -14,7 +14,7 @@ export interface PostedPrompt {
14
14
  /** An optional description of the Prompt version. */
15
15
  versionDescription?: string;
16
16
  /**
17
- * Instructions used to shape EVIs behavior, responses, and style.
17
+ * Instructions used to shape EVI's behavior, responses, and style.
18
18
  *
19
19
  * You can use the Prompt to define a specific goal or role for EVI, specifying how it should act or what it should focus on during the conversation. For example, EVI can be instructed to act as a customer support representative, a fitness coach, or a travel advisor, each with its own set of behaviors and response styles.
20
20
  *
@@ -12,7 +12,7 @@ export interface PostedPromptVersion {
12
12
  /** An optional description of the Prompt version. */
13
13
  versionDescription?: string;
14
14
  /**
15
- * Instructions used to shape EVIs behavior, responses, and style for this version of the Prompt.
15
+ * Instructions used to shape EVI's behavior, responses, and style for this version of the Prompt.
16
16
  *
17
17
  * You can use the Prompt to define a specific goal or role for EVI, specifying how it should act or what it should focus on during the conversation. For example, EVI can be instructed to act as a customer support representative, a fitness coach, or a travel advisor, each with its own set of behaviors and response styles.
18
18
  *
@@ -21,7 +21,7 @@ export interface PostedUserDefinedTool {
21
21
  /**
22
22
  * Stringified JSON defining the parameters used by this version of the Tool.
23
23
  *
24
- * These parameters define the inputs needed for the Tools execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the Tool receives data in the expected format.
24
+ * These parameters define the inputs needed for the Tool's execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the Tool receives data in the expected format.
25
25
  */
26
26
  parameters: string;
27
27
  /** Optional text passed to the supplemental LLM in place of the tool call result. The LLM then uses this text to generate a response back to the user, ensuring continuity in the conversation if the Tool errors. */
@@ -18,7 +18,7 @@ export interface PostedUserDefinedToolVersion {
18
18
  /**
19
19
  * Stringified JSON defining the parameters used by this version of the Tool.
20
20
  *
21
- * These parameters define the inputs needed for the Tools execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the Tool receives data in the expected format.
21
+ * These parameters define the inputs needed for the Tool's execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the Tool receives data in the expected format.
22
22
  */
23
23
  parameters: string;
24
24
  /** Optional text passed to the supplemental LLM in place of the tool call result. The LLM then uses this text to generate a response back to the user, ensuring continuity in the conversation if the Tool errors. */
@@ -8,7 +8,7 @@ export interface AssistantEnd {
8
8
  /**
9
9
  * The type of message sent through the socket; for an Assistant End message, this must be `assistant_end`.
10
10
  *
11
- * This message indicates the conclusion of the assistants response, signaling that the assistant has finished speaking for the current conversational turn.
11
+ * This message indicates the conclusion of the assistant's response, signaling that the assistant has finished speaking for the current conversational turn.
12
12
  */
13
13
  type: "assistant_end";
14
14
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
@@ -12,7 +12,7 @@ export interface AssistantInput {
12
12
  /**
13
13
  * Assistant text to synthesize into spoken audio and insert into the conversation.
14
14
  *
15
- * EVI uses this text to generate spoken audio using our proprietary expressive text-to-speech model. Our model adds appropriate emotional inflections and tones to the text based on the users expressions and the context of the conversation. The synthesized audio is streamed back to the user as an [Assistant Message](/reference/empathic-voice-interface-evi/chat/chat#receive.AssistantMessage.type).
15
+ * EVI uses this text to generate spoken audio using our proprietary expressive text-to-speech model. Our model adds appropriate emotional inflections and tones to the text based on the user's expressions and the context of the conversation. The synthesized audio is streamed back to the user as an [Assistant Message](/reference/empathic-voice-interface-evi/chat/chat#receive.AssistantMessage.type).
16
16
  */
17
17
  text: string;
18
18
  }
@@ -9,7 +9,7 @@ export interface AssistantMessage {
9
9
  /**
10
10
  * The type of message sent through the socket; for an Assistant Message, this must be `assistant_message`.
11
11
  *
12
- * This message contains both a transcript of the assistants response and the expression measurement predictions of the assistants audio output.
12
+ * This message contains both a transcript of the assistant's response and the expression measurement predictions of the assistant's audio output.
13
13
  */
14
14
  type: "assistant_message";
15
15
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
@@ -16,7 +16,7 @@ export interface AudioInput {
16
16
  /**
17
17
  * Base64 encoded audio input to insert into the conversation.
18
18
  *
19
- * The content of an Audio Input message is treated as the users speech to EVI and must be streamed continuously. Pre-recorded audio files are not supported.
19
+ * The content of an Audio Input message is treated as the user's speech to EVI and must be streamed continuously. Pre-recorded audio files are not supported.
20
20
  *
21
21
  * For optimal transcription quality, the audio data should be transmitted in small chunks.
22
22
  *
@@ -1,8 +1,8 @@
1
1
  /**
2
2
  * This file was auto-generated by Fern from our API Definition.
3
3
  */
4
- export type ContextType = "temporary" | "persistent";
4
+ export type ContextType = "persistent" | "temporary";
5
5
  export declare const ContextType: {
6
- readonly Temporary: "temporary";
7
6
  readonly Persistent: "persistent";
7
+ readonly Temporary: "temporary";
8
8
  };
@@ -5,6 +5,6 @@
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.ContextType = void 0;
7
7
  exports.ContextType = {
8
- Temporary: "temporary",
9
8
  Persistent: "persistent",
9
+ Temporary: "temporary",
10
10
  };
@@ -2,4 +2,4 @@
2
2
  * This file was auto-generated by Fern from our API Definition.
3
3
  */
4
4
  import * as Hume from "../../../index";
5
- export type JsonMessage = Hume.empathicVoice.AssistantEnd | Hume.empathicVoice.AssistantMessage | Hume.empathicVoice.ChatMetadata | Hume.empathicVoice.WebSocketError | Hume.empathicVoice.UserInterruption | Hume.empathicVoice.UserMessage | Hume.empathicVoice.ToolCallMessage | Hume.empathicVoice.ToolResponseMessage | Hume.empathicVoice.ToolErrorMessage | Hume.empathicVoice.AssistantProsody;
5
+ export type JsonMessage = Hume.empathicVoice.AssistantEnd | Hume.empathicVoice.AssistantMessage | Hume.empathicVoice.AssistantProsody | Hume.empathicVoice.ChatMetadata | Hume.empathicVoice.WebSocketError | Hume.empathicVoice.UserInterruption | Hume.empathicVoice.UserMessage | Hume.empathicVoice.ToolCallMessage | Hume.empathicVoice.ToolResponseMessage | Hume.empathicVoice.ToolErrorMessage;
@@ -8,7 +8,7 @@ export interface PauseAssistantMessage {
8
8
  /**
9
9
  * The type of message sent through the socket; must be `pause_assistant_message` for our server to correctly identify and process it as a Pause Assistant message.
10
10
  *
11
- * Once this message is sent, EVI will not respond until a [Resume Assistant message](/reference/empathic-voice-interface-evi/chat/chat#send.ResumeAssistantMessage.type) is sent. When paused, EVI wont respond, but transcriptions of your audio inputs will still be recorded.
11
+ * Once this message is sent, EVI will not respond until a [Resume Assistant message](/reference/empathic-voice-interface-evi/chat/chat#send.ResumeAssistantMessage.type) is sent. When paused, EVI won't respond, but transcriptions of your audio inputs will still be recorded.
12
12
  */
13
13
  type: "pause_assistant_message";
14
14
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
@@ -13,7 +13,7 @@ export interface PostedLanguageModel {
13
13
  /**
14
14
  * The model temperature, with values between 0 to 1 (inclusive).
15
15
  *
16
- * Controls the randomness of the LLMs output, with values closer to 0 yielding focused, deterministic responses and values closer to 1 producing more creative, diverse responses.
16
+ * Controls the randomness of the LLM's output, with values closer to 0 yielding focused, deterministic responses and values closer to 1 producing more creative, diverse responses.
17
17
  */
18
18
  temperature?: number;
19
19
  }
@@ -7,12 +7,12 @@
7
7
  * Accepts a minimum value of 30 seconds and a maximum value of 1,800 seconds.
8
8
  */
9
9
  export interface PostedTimeoutSpecsInactivity {
10
+ /** Duration in seconds for the timeout (e.g. 600 seconds represents 10 minutes). */
11
+ durationSecs?: number;
10
12
  /**
11
13
  * Boolean indicating if this timeout is enabled.
12
14
  *
13
15
  * If set to false, EVI will not timeout due to a specified duration of user inactivity being reached. However, the conversation will eventually disconnect after 1,800 seconds (30 minutes), which is the maximum WebSocket duration limit for EVI.
14
16
  */
15
17
  enabled: boolean;
16
- /** Duration in seconds for the timeout (e.g. 600 seconds represents 10 minutes). */
17
- durationSecs?: number;
18
18
  }
@@ -7,12 +7,12 @@
7
7
  * Accepts a minimum value of 30 seconds and a maximum value of 1,800 seconds.
8
8
  */
9
9
  export interface PostedTimeoutSpecsMaxDuration {
10
+ /** Duration in seconds for the timeout (e.g. 600 seconds represents 10 minutes). */
11
+ durationSecs?: number;
10
12
  /**
11
13
  * Boolean indicating if this timeout is enabled.
12
14
  *
13
15
  * If set to false, EVI will not timeout due to a specified maximum duration being reached. However, the conversation will eventually disconnect after 1,800 seconds (30 minutes), which is the maximum WebSocket duration limit for EVI.
14
16
  */
15
17
  enabled: boolean;
16
- /** Duration in seconds for the timeout (e.g. 600 seconds represents 10 minutes). */
17
- durationSecs?: number;
18
18
  }
@@ -45,7 +45,8 @@ export interface ReturnConfig {
45
45
  * Hume's eLLM (empathic Large Language Model) is a multimodal language model that takes into account both expression measures and language. The eLLM generates short, empathic language responses and guides text-to-speech (TTS) prosody.
46
46
  */
47
47
  ellmModel?: Hume.empathicVoice.ReturnEllmModel;
48
- voice?: unknown;
48
+ /** A voice specification associated with this Config. */
49
+ voice?: Hume.empathicVoice.ReturnVoice;
49
50
  prompt?: Hume.empathicVoice.ReturnPrompt;
50
51
  /** Map of webhooks associated with this config. */
51
52
  webhooks?: (Hume.empathicVoice.ReturnWebhookSpec | undefined)[];
@@ -13,7 +13,7 @@ export interface ReturnLanguageModel {
13
13
  /**
14
14
  * The model temperature, with values between 0 to 1 (inclusive).
15
15
  *
16
- * Controls the randomness of the LLMs output, with values closer to 0 yielding focused, deterministic responses and values closer to 1 producing more creative, diverse responses.
16
+ * Controls the randomness of the LLM's output, with values closer to 0 yielding focused, deterministic responses and values closer to 1 producing more creative, diverse responses.
17
17
  */
18
18
  temperature?: number;
19
19
  }
@@ -11,11 +11,9 @@ export interface ReturnPrompt {
11
11
  /** Identifier for a Prompt. Formatted as a UUID. */
12
12
  id: string;
13
13
  /**
14
- * Instructions used to shape EVIs behavior, responses, and style.
14
+ * Instructions used to shape EVI's behavior, responses, and style.
15
15
  *
16
- * You can use the Prompt to define a specific goal or role for EVI, specifying how it should act or what it should focus on during the conversation. For example, EVI can be instructed to act as a customer support representative, a fitness coach, or a travel advisor, each with its own set of behaviors and response styles.
17
- *
18
- * For help writing a system prompt, see our [Prompting Guide](/docs/speech-to-speech-evi/guides/prompting).
16
+ * You can use the Prompt to define a specific goal or role for EVI, specifying how it should act or what it should focus on during the conversation. For example, EVI can be instructed to act as a customer support representative, a fitness coach, or a travel advisor, each with its own set of behaviors and response styles. For help writing a system prompt, see our [Prompting Guide](/docs/speech-to-speech-evi/guides/prompting).
19
17
  */
20
18
  text: string;
21
19
  /**
@@ -35,7 +35,7 @@ export interface ReturnUserDefinedTool {
35
35
  /**
36
36
  * Stringified JSON defining the parameters used by this version of the Tool.
37
37
  *
38
- * These parameters define the inputs needed for the Tools execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the tool receives data in the expected format.
38
+ * These parameters define the inputs needed for the Tool's execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the tool receives data in the expected format.
39
39
  */
40
40
  parameters: string;
41
41
  }
@@ -0,0 +1,12 @@
1
+ /**
2
+ * This file was auto-generated by Fern from our API Definition.
3
+ */
4
+ import * as Hume from "../../../index";
5
+ /**
6
+ * An Octave voice available for text-to-speech
7
+ */
8
+ export interface ReturnVoice {
9
+ id?: string;
10
+ name?: string;
11
+ provider?: Hume.empathicVoice.VoiceProvider;
12
+ }
@@ -0,0 +1,5 @@
1
+ "use strict";
2
+ /**
3
+ * This file was auto-generated by Fern from our API Definition.
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -23,7 +23,7 @@ export interface SessionSettings {
23
23
  */
24
24
  customSessionId?: string;
25
25
  /**
26
- * Instructions used to shape EVIs behavior, responses, and style for the session.
26
+ * Instructions used to shape EVI's behavior, responses, and style for the session.
27
27
  *
28
28
  * When included in a Session Settings message, the provided Prompt overrides the existing one specified in the EVI configuration. If no Prompt was defined in the configuration, this Prompt will be the one used for the session.
29
29
  *
@@ -49,7 +49,7 @@ export interface SessionSettings {
49
49
  /**
50
50
  * Third party API key for the supplemental language model.
51
51
  *
52
- * When provided, EVI will use this key instead of Humes API key for the supplemental LLM. This allows you to bypass rate limits and utilize your own API key as needed.
52
+ * When provided, EVI will use this key instead of Hume's API key for the supplemental LLM. This allows you to bypass rate limits and utilize your own API key as needed.
53
53
  */
54
54
  languageModelApiKey?: string;
55
55
  /**