hume 0.13.8 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/.mock/definition/empathic-voice/__package__.yml +13 -9
  2. package/.mock/definition/empathic-voice/chat.yml +105 -0
  3. package/.mock/definition/empathic-voice/configs.yml +3 -1
  4. package/.mock/definition/tts/__package__.yml +147 -78
  5. package/.mock/definition/tts/streamInput.yml +27 -36
  6. package/api/resources/empathicVoice/resources/chat/client/Client.d.ts +4 -0
  7. package/api/resources/empathicVoice/resources/chat/client/Client.js +47 -4
  8. package/api/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +1 -1
  9. package/api/resources/empathicVoice/types/LanguageModelType.d.ts +3 -1
  10. package/api/resources/empathicVoice/types/LanguageModelType.js +2 -0
  11. package/api/resources/empathicVoice/types/ReturnConfig.d.ts +4 -4
  12. package/api/resources/empathicVoice/types/ReturnPrompt.d.ts +2 -2
  13. package/api/resources/tts/client/Client.d.ts +1 -1
  14. package/api/resources/tts/client/Client.js +1 -1
  15. package/api/resources/tts/types/MillisecondInterval.d.ts +9 -0
  16. package/api/resources/tts/types/MillisecondInterval.js +5 -0
  17. package/api/resources/tts/types/OctaveVersion.d.ts +5 -1
  18. package/api/resources/tts/types/OctaveVersion.js +5 -0
  19. package/api/resources/tts/types/PostedTts.d.ts +3 -0
  20. package/api/resources/tts/types/PublishTts.d.ts +8 -8
  21. package/api/resources/tts/types/Snippet.d.ts +3 -0
  22. package/api/resources/tts/types/SnippetAudioChunk.d.ts +0 -1
  23. package/api/resources/tts/types/Timestamp.d.ts +9 -0
  24. package/api/resources/tts/types/Timestamp.js +5 -0
  25. package/api/resources/tts/types/TimestampMessage.d.ts +17 -0
  26. package/api/resources/tts/types/TimestampMessage.js +5 -0
  27. package/api/resources/tts/types/TimestampType.d.ts +8 -0
  28. package/api/resources/tts/types/TimestampType.js +10 -0
  29. package/api/resources/tts/types/TtsOutput.d.ts +13 -0
  30. package/api/resources/tts/types/TtsOutput.js +5 -0
  31. package/api/resources/tts/types/index.d.ts +8 -3
  32. package/api/resources/tts/types/index.js +8 -3
  33. package/dist/api/resources/empathicVoice/resources/chat/client/Client.d.ts +4 -0
  34. package/dist/api/resources/empathicVoice/resources/chat/client/Client.js +47 -4
  35. package/dist/api/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +1 -1
  36. package/dist/api/resources/empathicVoice/types/LanguageModelType.d.ts +3 -1
  37. package/dist/api/resources/empathicVoice/types/LanguageModelType.js +2 -0
  38. package/dist/api/resources/empathicVoice/types/ReturnConfig.d.ts +4 -4
  39. package/dist/api/resources/empathicVoice/types/ReturnPrompt.d.ts +2 -2
  40. package/dist/api/resources/tts/client/Client.d.ts +1 -1
  41. package/dist/api/resources/tts/client/Client.js +1 -1
  42. package/dist/api/resources/tts/types/MillisecondInterval.d.ts +9 -0
  43. package/dist/api/resources/tts/types/MillisecondInterval.js +5 -0
  44. package/dist/api/resources/tts/types/OctaveVersion.d.ts +5 -1
  45. package/dist/api/resources/tts/types/OctaveVersion.js +5 -0
  46. package/dist/api/resources/tts/types/PostedTts.d.ts +3 -0
  47. package/dist/api/resources/tts/types/PublishTts.d.ts +8 -8
  48. package/dist/api/resources/tts/types/Snippet.d.ts +3 -0
  49. package/dist/api/resources/tts/types/SnippetAudioChunk.d.ts +0 -1
  50. package/dist/api/resources/tts/types/Timestamp.d.ts +9 -0
  51. package/dist/api/resources/tts/types/Timestamp.js +5 -0
  52. package/dist/api/resources/tts/types/TimestampMessage.d.ts +17 -0
  53. package/dist/api/resources/tts/types/TimestampMessage.js +5 -0
  54. package/dist/api/resources/tts/types/TimestampType.d.ts +8 -0
  55. package/dist/api/resources/tts/types/TimestampType.js +10 -0
  56. package/dist/api/resources/tts/types/TtsOutput.d.ts +13 -0
  57. package/dist/api/resources/tts/types/TtsOutput.js +5 -0
  58. package/dist/api/resources/tts/types/index.d.ts +8 -3
  59. package/dist/api/resources/tts/types/index.js +8 -3
  60. package/dist/serialization/resources/empathicVoice/types/LanguageModelType.d.ts +1 -1
  61. package/dist/serialization/resources/empathicVoice/types/LanguageModelType.js +2 -0
  62. package/dist/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +3 -3
  63. package/dist/serialization/resources/empathicVoice/types/ReturnConfig.js +3 -3
  64. package/dist/serialization/resources/empathicVoice/types/ReturnPrompt.d.ts +1 -1
  65. package/dist/serialization/resources/empathicVoice/types/ReturnPrompt.js +1 -1
  66. package/dist/serialization/resources/tts/types/MillisecondInterval.d.ts +13 -0
  67. package/dist/serialization/resources/tts/types/MillisecondInterval.js +44 -0
  68. package/dist/serialization/resources/tts/types/OctaveVersion.d.ts +1 -1
  69. package/dist/serialization/resources/tts/types/OctaveVersion.js +1 -1
  70. package/dist/serialization/resources/tts/types/PostedTts.d.ts +2 -0
  71. package/dist/serialization/resources/tts/types/PostedTts.js +2 -0
  72. package/dist/serialization/resources/tts/types/PublishTts.d.ts +4 -4
  73. package/dist/serialization/resources/tts/types/PublishTts.js +4 -4
  74. package/dist/serialization/resources/tts/types/Snippet.d.ts +2 -0
  75. package/dist/serialization/resources/tts/types/Snippet.js +2 -0
  76. package/dist/serialization/resources/tts/types/SnippetAudioChunk.d.ts +0 -1
  77. package/dist/serialization/resources/tts/types/SnippetAudioChunk.js +0 -1
  78. package/dist/serialization/resources/tts/types/Timestamp.d.ts +16 -0
  79. package/dist/serialization/resources/tts/types/Timestamp.js +47 -0
  80. package/dist/serialization/resources/tts/types/TimestampMessage.d.ts +16 -0
  81. package/dist/serialization/resources/tts/types/TimestampMessage.js +47 -0
  82. package/dist/serialization/resources/tts/types/TimestampType.d.ts +10 -0
  83. package/dist/serialization/resources/tts/types/TimestampType.js +41 -0
  84. package/dist/serialization/resources/tts/types/TtsOutput.d.ts +18 -0
  85. package/dist/serialization/resources/tts/types/TtsOutput.js +51 -0
  86. package/dist/serialization/resources/tts/types/index.d.ts +8 -3
  87. package/dist/serialization/resources/tts/types/index.js +8 -3
  88. package/dist/version.d.ts +1 -1
  89. package/dist/version.js +1 -1
  90. package/package.json +1 -1
  91. package/reference.md +1 -1
  92. package/serialization/resources/empathicVoice/types/LanguageModelType.d.ts +1 -1
  93. package/serialization/resources/empathicVoice/types/LanguageModelType.js +2 -0
  94. package/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +3 -3
  95. package/serialization/resources/empathicVoice/types/ReturnConfig.js +3 -3
  96. package/serialization/resources/empathicVoice/types/ReturnPrompt.d.ts +1 -1
  97. package/serialization/resources/empathicVoice/types/ReturnPrompt.js +1 -1
  98. package/serialization/resources/tts/types/MillisecondInterval.d.ts +13 -0
  99. package/serialization/resources/tts/types/MillisecondInterval.js +44 -0
  100. package/serialization/resources/tts/types/OctaveVersion.d.ts +1 -1
  101. package/serialization/resources/tts/types/OctaveVersion.js +1 -1
  102. package/serialization/resources/tts/types/PostedTts.d.ts +2 -0
  103. package/serialization/resources/tts/types/PostedTts.js +2 -0
  104. package/serialization/resources/tts/types/PublishTts.d.ts +4 -4
  105. package/serialization/resources/tts/types/PublishTts.js +4 -4
  106. package/serialization/resources/tts/types/Snippet.d.ts +2 -0
  107. package/serialization/resources/tts/types/Snippet.js +2 -0
  108. package/serialization/resources/tts/types/SnippetAudioChunk.d.ts +0 -1
  109. package/serialization/resources/tts/types/SnippetAudioChunk.js +0 -1
  110. package/serialization/resources/tts/types/Timestamp.d.ts +16 -0
  111. package/serialization/resources/tts/types/Timestamp.js +47 -0
  112. package/serialization/resources/tts/types/TimestampMessage.d.ts +16 -0
  113. package/serialization/resources/tts/types/TimestampMessage.js +47 -0
  114. package/serialization/resources/tts/types/TimestampType.d.ts +10 -0
  115. package/serialization/resources/tts/types/TimestampType.js +41 -0
  116. package/serialization/resources/tts/types/TtsOutput.d.ts +18 -0
  117. package/serialization/resources/tts/types/TtsOutput.js +51 -0
  118. package/serialization/resources/tts/types/index.d.ts +8 -3
  119. package/serialization/resources/tts/types/index.js +8 -3
  120. package/version.d.ts +1 -1
  121. package/version.js +1 -1
@@ -1027,6 +1027,8 @@ types:
1027
1027
  name: Claude3Haiku20240307
1028
1028
  - value: claude-sonnet-4-20250514
1029
1029
  name: ClaudeSonnet420250514
1030
+ - value: claude-sonnet-4-5-20250929
1031
+ name: ClaudeSonnet4520250929
1030
1032
  - value: us.anthropic.claude-3-5-haiku-20241022-v1:0
1031
1033
  name: UsAnthropicClaude35Haiku20241022V10
1032
1034
  - value: us.anthropic.claude-3-5-sonnet-20240620-v1:0
@@ -1119,6 +1121,8 @@ types:
1119
1121
  name: Llama4Maverick17B128EInstruct
1120
1122
  - value: Qwen3-32B
1121
1123
  name: Qwen332B
1124
+ - value: grok-4-fast-non-reasoning-latest
1125
+ name: Grok4FastNonReasoningLatest
1122
1126
  - ellm
1123
1127
  - value: custom-language-model
1124
1128
  name: CustomLanguageModel
@@ -1470,9 +1474,6 @@ types:
1470
1474
  Version numbers are integer values representing different iterations
1471
1475
  of the Prompt. Each update to the Prompt increments its version
1472
1476
  number.
1473
- version_description:
1474
- type: optional<string>
1475
- docs: An optional description of the Prompt version.
1476
1477
  version_type:
1477
1478
  type: ReturnPromptVersionType
1478
1479
  docs: >-
@@ -1488,6 +1489,9 @@ types:
1488
1489
  docs: >-
1489
1490
  Time at which the Prompt was last modified. Measured in seconds since
1490
1491
  the Unix epoch.
1492
+ version_description:
1493
+ type: optional<string>
1494
+ docs: An optional description of the Prompt version.
1491
1495
  source:
1492
1496
  openapi: evi-openapi.json
1493
1497
  ReturnPagedConfigs:
@@ -1543,12 +1547,6 @@ types:
1543
1547
  Version numbers are integer values representing different iterations
1544
1548
  of the Config. Each update to the Config increments its version
1545
1549
  number.
1546
- tools:
1547
- type: optional<list<optional<ReturnUserDefinedTool>>>
1548
- docs: List of user-defined tools associated with this Config.
1549
- version_description:
1550
- type: optional<string>
1551
- docs: An optional description of the Config version.
1552
1550
  language_model:
1553
1551
  type: optional<ReturnLanguageModel>
1554
1552
  docs: >-
@@ -1603,6 +1601,12 @@ types:
1603
1601
  docs: >-
1604
1602
  Time at which the Config was last modified. Measured in seconds since
1605
1603
  the Unix epoch.
1604
+ version_description:
1605
+ type: optional<string>
1606
+ docs: An optional description of the Config version.
1607
+ tools:
1608
+ type: optional<list<optional<ReturnUserDefinedTool>>>
1609
+ docs: List of user-defined tools associated with this Config.
1606
1610
  source:
1607
1611
  openapi: evi-openapi.json
1608
1612
  ReturnPagedChatsPaginationDirection:
@@ -98,6 +98,111 @@ channel:
98
98
  Use the GET `/v0/evi/chat_groups` endpoint to obtain the Chat Group IDs
99
99
  of all Chat Groups associated with an API key. This endpoint returns a
100
100
  list of all available chat groups.
101
+ session_settings[audio][channels]:
102
+ type: optional<integer>
103
+ docs: Sets number of audio channels for audio input.
104
+ session_settings[audio][encoding]:
105
+ type: optional<string>
106
+ docs: Sets encoding format of the audio input, such as `linear16`.
107
+ session_settings[audio][sample_rate]:
108
+ type: optional<integer>
109
+ docs: >-
110
+ Sets the sample rate for audio input. (Number of samples per second in
111
+ the audio input, measured in Hertz.)
112
+ session_settings[context][text]:
113
+ type: optional<string>
114
+ docs: >-
115
+ The context to be injected into the conversation. Helps inform the LLM's
116
+ response by providing relevant information about the ongoing
117
+ conversation.
118
+
119
+
120
+ This text will be appended to the end of
121
+ [user_messages](/reference/speech-to-speech-evi/chat#receive.UserMessage.message.content)
122
+ based on the chosen persistence level. For example, if you want to
123
+ remind EVI of its role as a helpful weather assistant, the context you
124
+ insert will be appended to the end of user messages as `{Context: You
125
+ are a helpful weather assistant}`.
126
+ session_settings[context][type]:
127
+ type: optional<string>
128
+ docs: >-
129
+ The persistence level of the injected context. Specifies how long the
130
+ injected context will remain active in the session.
131
+
132
+
133
+ - **Temporary**: Context that is only applied to the following assistant
134
+ response.
135
+
136
+
137
+ - **Persistent**: Context that is applied to all subsequent assistant
138
+ responses for the remainder of the Chat.
139
+ session_settings[custom_session_id]:
140
+ type: optional<string>
141
+ docs: >-
142
+ Used to manage conversational state, correlate frontend and backend
143
+ data, and persist conversations across EVI sessions.
144
+ session_settings[event_limit]:
145
+ type: optional<integer>
146
+ docs: >-
147
+ The maximum number of chat events to return from chat history. By
148
+ default, the system returns up to 300 events (100 events per page × 3
149
+ pages). Set this parameter to a smaller value to limit the number of
150
+ events returned.
151
+ session_settings[language_model_api_key]:
152
+ type: optional<string>
153
+ docs: >-
154
+ Third party API key for the supplemental language model.
155
+
156
+
157
+ When provided, EVI will use this key instead of Hume's API key for the
158
+ supplemental LLM. This allows you to bypass rate limits and utilize your
159
+ own API key as needed.
160
+ session_settings[system_prompt]:
161
+ type: optional<string>
162
+ docs: >-
163
+ Instructions used to shape EVI's behavior, responses, and style for the
164
+ session.
165
+
166
+
167
+ When included in a Session Settings message, the provided Prompt
168
+ overrides the existing one specified in the EVI configuration. If no
169
+ Prompt was defined in the configuration, this Prompt will be the one
170
+ used for the session.
171
+
172
+
173
+ You can use the Prompt to define a specific goal or role for EVI,
174
+ specifying how it should act or what it should focus on during the
175
+ conversation. For example, EVI can be instructed to act as a customer
176
+ support representative, a fitness coach, or a travel advisor, each with
177
+ its own set of behaviors and response styles.
178
+
179
+
180
+ For help writing a system prompt, see our [Prompting
181
+ Guide](/docs/speech-to-speech-evi/guides/prompting).
182
+ session_settings[variables]:
183
+ type: optional<string>
184
+ docs: >-
185
+ This field allows you to assign values to dynamic variables referenced
186
+ in your system prompt.
187
+
188
+
189
+ Each key represents the variable name, and the corresponding value is
190
+ the specific content you wish to assign to that variable within the
191
+ session. While the values for variables can be strings, numbers, or
192
+ booleans, the value will ultimately be converted to a string when
193
+ injected into your system prompt.
194
+
195
+
196
+ Using this field, you can personalize responses based on
197
+ session-specific details. For more guidance, see our [guide on using
198
+ dynamic
199
+ variables](/docs/speech-to-speech-evi/features/dynamic-variables).
200
+ session_settings[voice_id]:
201
+ type: optional<string>
202
+ docs: >-
203
+ The name or ID of the voice from the `Voice Library` to be used as the
204
+ speaker for this EVI session. This will override the speaker set in the
205
+ selected configuration.
101
206
  verbose_transcription:
102
207
  type: optional<boolean>
103
208
  default: false
@@ -140,7 +140,9 @@ service:
140
140
  properties:
141
141
  evi_version:
142
142
  type: string
143
- docs: EVI version to use. Only version `3` is supported.
143
+ docs: >-
144
+ EVI version to use. Only versions `3` and `4-mini` are
145
+ supported.
144
146
  name:
145
147
  type: string
146
148
  docs: Name applied to all versions of a particular Config.
@@ -30,7 +30,7 @@ service:
30
30
  format.
31
31
  source:
32
32
  openapi: tts-openapi.json
33
- display-name: Text-to-speech (Json)
33
+ display-name: Text-to-Speech (Json)
34
34
  request:
35
35
  body:
36
36
  type: PostedTts
@@ -79,6 +79,7 @@ service:
79
79
  Beauty is no quality in things themselves: It exists
80
80
  merely in the mind which contemplates them.
81
81
  utterance_index: 0
82
+ timestamps: []
82
83
  request_id: 66e01f90-4501-4aa0-bbaf-74f45dc15aa725906
83
84
  synthesize-file:
84
85
  path: /v0/tts/file
@@ -94,7 +95,7 @@ service:
94
95
  The response contains the generated audio file in the requested format.
95
96
  source:
96
97
  openapi: tts-openapi.json
97
- display-name: Text-to-speech (File)
98
+ display-name: Text-to-Speech (File)
98
99
  request:
99
100
  body:
100
101
  type: PostedTts
@@ -131,7 +132,7 @@ service:
131
132
  prosody.
132
133
  source:
133
134
  openapi: tts-openapi.json
134
- display-name: Text-to-speech (Streamed File)
135
+ display-name: Text-to-Speech (Streamed File)
135
136
  request:
136
137
  body:
137
138
  type: PostedTts
@@ -166,14 +167,14 @@ service:
166
167
  base64.
167
168
  source:
168
169
  openapi: tts-openapi.json
169
- display-name: Text-to-speech (Streamed JSON)
170
+ display-name: Text-to-Speech (Streamed JSON)
170
171
  request:
171
172
  body:
172
173
  type: PostedTts
173
174
  content-type: application/json
174
175
  response-stream:
175
176
  docs: Successful Response
176
- type: SnippetAudioChunk
177
+ type: TtsOutput
177
178
  format: json
178
179
  errors:
179
180
  - UnprocessableEntityError
@@ -189,15 +190,30 @@ service:
189
190
  source:
190
191
  openapi: tts-openapi.json
191
192
  types:
193
+ TtsOutput:
194
+ discriminant: type
195
+ base-properties: {}
196
+ union:
197
+ timestamp:
198
+ type: TimestampMessage
199
+ audio:
200
+ type: SnippetAudioChunk
201
+ source:
202
+ openapi: tts-openapi.json
203
+ AudioFormatType:
204
+ enum:
205
+ - mp3
206
+ - pcm
207
+ - wav
208
+ source:
209
+ openapi: tts-openapi.json
192
210
  PublishTts:
193
211
  docs: Input message type for the TTS stream.
194
212
  properties:
195
- text:
196
- type: optional<string>
197
- docs: The input text to be converted to speech output.
198
- default: ''
199
- validation:
200
- maxLength: 5000
213
+ close:
214
+ type: optional<boolean>
215
+ docs: Force the generation of audio and close the stream.
216
+ default: false
201
217
  description:
202
218
  type: optional<string>
203
219
  docs: >-
@@ -206,12 +222,12 @@ types:
206
222
  accent"`).
207
223
  validation:
208
224
  maxLength: 1000
209
- voice:
210
- type: optional<PostedUtteranceVoice>
225
+ flush:
226
+ type: optional<boolean>
211
227
  docs: >-
212
- The name or ID of the voice from the `Voice Library` to be used as the
213
- speaker for this and all subsequent utterances, until the `"voice"`
214
- field is updated again.
228
+ Force the generation of audio regardless of how much text has been
229
+ supplied.
230
+ default: false
215
231
  speed:
216
232
  type: optional<double>
217
233
  docs: A relative measure of how fast this utterance should be spoken.
@@ -219,6 +235,12 @@ types:
219
235
  validation:
220
236
  min: 0.25
221
237
  max: 3
238
+ text:
239
+ type: optional<string>
240
+ docs: The input text to be converted to speech output.
241
+ default: ''
242
+ validation:
243
+ maxLength: 5000
222
244
  trailing_silence:
223
245
  type: optional<double>
224
246
  docs: Duration of trailing silence (in seconds) to add to this utterance
@@ -226,18 +248,104 @@ types:
226
248
  validation:
227
249
  min: 0
228
250
  max: 5
229
- flush:
230
- type: optional<boolean>
251
+ voice:
252
+ type: optional<PostedUtteranceVoice>
231
253
  docs: >-
232
- Force the generation of audio regardless of how much text has been
233
- supplied.
234
- default: false
235
- close:
236
- type: optional<boolean>
237
- docs: Force the generation of audio and close the stream.
238
- default: false
254
+ The name or ID of the voice from the `Voice Library` to be used as the
255
+ speaker for this and all subsequent utterances, until the `"voice"`
256
+ field is updated again.
239
257
  source:
240
258
  openapi: tts-asyncapi.json
259
+ MillisecondInterval:
260
+ properties:
261
+ begin:
262
+ type: integer
263
+ docs: Start time of the interval in milliseconds.
264
+ end:
265
+ type: integer
266
+ docs: End time of the interval in milliseconds.
267
+ source:
268
+ openapi: tts-openapi.json
269
+ TimestampMessage:
270
+ docs: A word or phoneme level timestamp for the generated audio.
271
+ properties:
272
+ generation_id:
273
+ type: string
274
+ docs: >-
275
+ The generation ID of the parent snippet that this chunk corresponds
276
+ to.
277
+ request_id:
278
+ type: string
279
+ docs: ID of the initiating request.
280
+ snippet_id:
281
+ type: string
282
+ docs: The ID of the parent snippet that this chunk corresponds to.
283
+ timestamp:
284
+ type: Timestamp
285
+ docs: A word or phoneme level timestamp for the generated audio.
286
+ source:
287
+ openapi: tts-openapi.json
288
+ SnippetAudioChunk:
289
+ docs: Metadata for a chunk of generated audio.
290
+ properties:
291
+ audio:
292
+ type: string
293
+ docs: The generated audio output chunk in the requested format.
294
+ audio_format:
295
+ type: AudioFormatType
296
+ docs: The generated audio output format.
297
+ chunk_index:
298
+ type: integer
299
+ docs: The index of the audio chunk in the snippet.
300
+ generation_id:
301
+ type: string
302
+ docs: >-
303
+ The generation ID of the parent snippet that this chunk corresponds
304
+ to.
305
+ is_last_chunk:
306
+ type: boolean
307
+ docs: >-
308
+ Whether or not this is the last chunk streamed back from the decoder
309
+ for one input snippet.
310
+ request_id:
311
+ type: string
312
+ docs: ID of the initiating request.
313
+ snippet:
314
+ type: optional<Snippet>
315
+ snippet_id:
316
+ type: string
317
+ docs: The ID of the parent snippet that this chunk corresponds to.
318
+ text:
319
+ type: string
320
+ docs: The text of the parent snippet that this chunk corresponds to.
321
+ transcribed_text:
322
+ type: optional<string>
323
+ docs: >-
324
+ The transcribed text of the generated audio of the parent snippet that
325
+ this chunk corresponds to. It is only present if `instant_mode` is set
326
+ to `false`.
327
+ utterance_index:
328
+ type: optional<integer>
329
+ docs: >-
330
+ The index of the utterance in the request that the parent snippet of
331
+ this chunk corresponds to.
332
+ source:
333
+ openapi: tts-openapi.json
334
+ Timestamp:
335
+ properties:
336
+ text: string
337
+ time:
338
+ type: MillisecondInterval
339
+ type:
340
+ type: TimestampType
341
+ source:
342
+ openapi: tts-openapi.json
343
+ TimestampType:
344
+ enum:
345
+ - word
346
+ - phoneme
347
+ source:
348
+ openapi: tts-openapi.json
241
349
  PostedUtteranceVoiceWithId:
242
350
  properties:
243
351
  id:
@@ -309,59 +417,12 @@ types:
309
417
  - type: PostedUtteranceVoiceWithName
310
418
  source:
311
419
  openapi: tts-openapi.json
312
- AudioFormatType:
420
+ OctaveVersion:
313
421
  enum:
314
- - mp3
315
- - pcm
316
- - wav
317
- source:
318
- openapi: tts-openapi.json
319
- SnippetAudioChunk:
320
- docs: Metadata for a chunk of generated audio.
321
- properties:
322
- audio:
323
- type: string
324
- docs: The generated audio output chunk in the requested format.
325
- audio_format:
326
- type: AudioFormatType
327
- docs: The generated audio output format.
328
- chunk_index:
329
- type: integer
330
- docs: The index of the audio chunk in the snippet.
331
- generation_id:
332
- type: string
333
- docs: >-
334
- The generation ID of the parent snippet that this chunk corresponds
335
- to.
336
- is_last_chunk:
337
- type: boolean
338
- docs: >-
339
- Whether or not this is the last chunk streamed back from the decoder
340
- for one input snippet.
341
- request_id:
342
- type: string
343
- docs: ID of the initiating request.
344
- snippet:
345
- type: optional<Snippet>
346
- snippet_id:
347
- type: string
348
- docs: The ID of the parent snippet that this chunk corresponds to.
349
- text:
350
- type: string
351
- docs: The text of the parent snippet that this chunk corresponds to.
352
- transcribed_text:
353
- type: optional<string>
354
- docs: >-
355
- The transcribed text of the generated audio of the parent snippet that
356
- this chunk corresponds to. It is only present if `instant_mode` is set
357
- to `false`.
358
- type:
359
- type: optional<literal<"audio">>
360
- utterance_index:
361
- type: optional<integer>
362
- docs: >-
363
- The index of the utterance in the request that the parent snippet of
364
- this chunk corresponds to.
422
+ - value: '1'
423
+ name: One
424
+ - value: '2'
425
+ name: Two
365
426
  source:
366
427
  openapi: tts-openapi.json
367
428
  PostedContextWithGenerationId:
@@ -471,6 +532,9 @@ types:
471
532
  format:
472
533
  type: optional<Format>
473
534
  docs: Specifies the output audio file format.
535
+ include_timestamp_types:
536
+ type: optional<list<TimestampType>>
537
+ docs: The set of timestamp types to include in the response.
474
538
  num_generations:
475
539
  type: optional<integer>
476
540
  docs: Number of generations of the audio to produce.
@@ -520,6 +584,9 @@ types:
520
584
  type: list<PostedUtterance>
521
585
  version:
522
586
  type: optional<OctaveVersion>
587
+ docs: >-
588
+ The version of the Octave Model to use. 1 for the legacy model, 2 for
589
+ the new model.
523
590
  instant_mode:
524
591
  type: optional<boolean>
525
592
  docs: >-
@@ -556,7 +623,6 @@ types:
556
623
  troubleshooting assistance.
557
624
  source:
558
625
  openapi: tts-openapi.json
559
- OctaveVersion: string
560
626
  ReturnVoice:
561
627
  docs: An Octave voice available for text-to-speech
562
628
  properties:
@@ -596,6 +662,9 @@ types:
596
662
  text:
597
663
  type: string
598
664
  docs: The text for this **Snippet**.
665
+ timestamps:
666
+ docs: A list of word or phoneme level timestamps for the generated audio.
667
+ type: list<Timestamp>
599
668
  transcribed_text:
600
669
  type: optional<string>
601
670
  docs: >-
@@ -6,6 +6,21 @@ channel:
6
6
  auth: false
7
7
  docs: Generate emotionally expressive speech.
8
8
  query-parameters:
9
+ access_token:
10
+ type: optional<string>
11
+ default: ''
12
+ docs: >-
13
+ Access token used for authenticating the client. If not provided, an
14
+ `api_key` must be provided to authenticate.
15
+
16
+
17
+ The access token is generated using both an API key and a Secret key,
18
+ which provides an additional layer of security compared to using just an
19
+ API key.
20
+
21
+
22
+ For more details, refer to the [Authentication Strategies
23
+ Guide](/docs/introduction/api-key#authentication-strategies).
9
24
  context_generation_id:
10
25
  type: optional<string>
11
26
  docs: >-
@@ -13,14 +28,10 @@ channel:
13
28
  consistent speech style and prosody across multiple requests. Including
14
29
  context may increase audio generation times.
15
30
  format_type: root.AudioFormatType
16
- strip_headers:
17
- type: optional<boolean>
18
- default: false
19
- docs: >-
20
- If enabled, the audio for all the chunks of a generation, once
21
- concatenated together, will constitute a single audio file. Otherwise,
22
- if disabled, each chunk's audio will be its own audio file, each with
23
- its own headers (if applicable).
31
+ include_timestamp_types:
32
+ type: optional<root.TimestampType>
33
+ allow-multiple: true
34
+ docs: The set of timestamp types to include in the response.
24
35
  instant_mode:
25
36
  type: optional<boolean>
26
37
  default: true
@@ -34,21 +45,15 @@ channel:
34
45
  type: optional<boolean>
35
46
  default: false
36
47
  docs: If enabled, no binary websocket messages will be sent to the client.
37
- access_token:
38
- type: optional<string>
39
- default: ''
48
+ strip_headers:
49
+ type: optional<boolean>
50
+ default: false
40
51
  docs: >-
41
- Access token used for authenticating the client. If not provided, an
42
- `api_key` must be provided to authenticate.
43
-
44
-
45
- The access token is generated using both an API key and a Secret key,
46
- which provides an additional layer of security compared to using just an
47
- API key.
48
-
49
-
50
- For more details, refer to the [Authentication Strategies
51
- Guide](/docs/introduction/api-key#authentication-strategies).
52
+ If enabled, the audio for all the chunks of a generation, once
53
+ concatenated together, will constitute a single audio file. Otherwise,
54
+ if disabled, each chunk's audio will be its own audio file, each with
55
+ its own headers (if applicable).
56
+ version: root.OctaveVersion
52
57
  api_key:
53
58
  type: optional<string>
54
59
  default: ''
@@ -64,21 +69,7 @@ channel:
64
69
  origin: client
65
70
  body:
66
71
  type: root.PublishTts
67
- subscribe:
68
- origin: server
69
- body:
70
- type: root.SnippetAudioChunk
71
72
  examples:
72
73
  - messages:
73
74
  - type: publish
74
75
  body: {}
75
- - type: subscribe
76
- body:
77
- request_id: request_id
78
- generation_id: generation_id
79
- snippet_id: snippet_id
80
- text: text
81
- chunk_index: 1
82
- audio: audio
83
- audio_format: mp3
84
- is_last_chunk: true
@@ -2,6 +2,7 @@
2
2
  import * as environments from "../../../../../../environments";
3
3
  import * as core from "../../../../../../core";
4
4
  import { ChatSocket } from "./Socket";
5
+ import { SessionSettings } from "../../../types/SessionSettings";
5
6
  export declare function createHostnameWithProtocol(environment: string): string;
6
7
  export declare namespace Chat {
7
8
  interface Options {
@@ -24,6 +25,9 @@ export declare namespace Chat {
24
25
  verboseTranscription?: boolean;
25
26
  /** ID of the Voice to use for this chat. If specified, will override the voice set in the Config */
26
27
  voiceId?: string;
28
+ sessionSettings?: Pick<SessionSettings, Exclude<keyof SessionSettings, "builtinTools" | "type" | "metadata" | "tools">> & {
29
+ eventLimit?: number;
30
+ };
27
31
  /** Extra query parameters sent at WebSocket connection */
28
32
  queryParams?: Record<string, string | string[] | object | object[]>;
29
33
  }