hume 0.13.8 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mock/definition/empathic-voice/__package__.yml +13 -9
- package/.mock/definition/empathic-voice/chat.yml +105 -0
- package/.mock/definition/empathic-voice/configs.yml +3 -1
- package/.mock/definition/tts/__package__.yml +147 -78
- package/.mock/definition/tts/streamInput.yml +27 -36
- package/api/resources/empathicVoice/resources/chat/client/Client.d.ts +4 -0
- package/api/resources/empathicVoice/resources/chat/client/Client.js +47 -4
- package/api/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +1 -1
- package/api/resources/empathicVoice/types/LanguageModelType.d.ts +3 -1
- package/api/resources/empathicVoice/types/LanguageModelType.js +2 -0
- package/api/resources/empathicVoice/types/ReturnConfig.d.ts +4 -4
- package/api/resources/empathicVoice/types/ReturnPrompt.d.ts +2 -2
- package/api/resources/tts/client/Client.d.ts +1 -1
- package/api/resources/tts/client/Client.js +1 -1
- package/api/resources/tts/types/MillisecondInterval.d.ts +9 -0
- package/api/resources/tts/types/MillisecondInterval.js +5 -0
- package/api/resources/tts/types/OctaveVersion.d.ts +5 -1
- package/api/resources/tts/types/OctaveVersion.js +5 -0
- package/api/resources/tts/types/PostedTts.d.ts +3 -0
- package/api/resources/tts/types/PublishTts.d.ts +8 -8
- package/api/resources/tts/types/Snippet.d.ts +3 -0
- package/api/resources/tts/types/SnippetAudioChunk.d.ts +0 -1
- package/api/resources/tts/types/Timestamp.d.ts +9 -0
- package/api/resources/tts/types/Timestamp.js +5 -0
- package/api/resources/tts/types/TimestampMessage.d.ts +17 -0
- package/api/resources/tts/types/TimestampMessage.js +5 -0
- package/api/resources/tts/types/TimestampType.d.ts +8 -0
- package/api/resources/tts/types/TimestampType.js +10 -0
- package/api/resources/tts/types/TtsOutput.d.ts +13 -0
- package/api/resources/tts/types/TtsOutput.js +5 -0
- package/api/resources/tts/types/index.d.ts +8 -3
- package/api/resources/tts/types/index.js +8 -3
- package/dist/api/resources/empathicVoice/resources/chat/client/Client.d.ts +4 -0
- package/dist/api/resources/empathicVoice/resources/chat/client/Client.js +47 -4
- package/dist/api/resources/empathicVoice/resources/configs/client/requests/PostedConfig.d.ts +1 -1
- package/dist/api/resources/empathicVoice/types/LanguageModelType.d.ts +3 -1
- package/dist/api/resources/empathicVoice/types/LanguageModelType.js +2 -0
- package/dist/api/resources/empathicVoice/types/ReturnConfig.d.ts +4 -4
- package/dist/api/resources/empathicVoice/types/ReturnPrompt.d.ts +2 -2
- package/dist/api/resources/tts/client/Client.d.ts +1 -1
- package/dist/api/resources/tts/client/Client.js +1 -1
- package/dist/api/resources/tts/types/MillisecondInterval.d.ts +9 -0
- package/dist/api/resources/tts/types/MillisecondInterval.js +5 -0
- package/dist/api/resources/tts/types/OctaveVersion.d.ts +5 -1
- package/dist/api/resources/tts/types/OctaveVersion.js +5 -0
- package/dist/api/resources/tts/types/PostedTts.d.ts +3 -0
- package/dist/api/resources/tts/types/PublishTts.d.ts +8 -8
- package/dist/api/resources/tts/types/Snippet.d.ts +3 -0
- package/dist/api/resources/tts/types/SnippetAudioChunk.d.ts +0 -1
- package/dist/api/resources/tts/types/Timestamp.d.ts +9 -0
- package/dist/api/resources/tts/types/Timestamp.js +5 -0
- package/dist/api/resources/tts/types/TimestampMessage.d.ts +17 -0
- package/dist/api/resources/tts/types/TimestampMessage.js +5 -0
- package/dist/api/resources/tts/types/TimestampType.d.ts +8 -0
- package/dist/api/resources/tts/types/TimestampType.js +10 -0
- package/dist/api/resources/tts/types/TtsOutput.d.ts +13 -0
- package/dist/api/resources/tts/types/TtsOutput.js +5 -0
- package/dist/api/resources/tts/types/index.d.ts +8 -3
- package/dist/api/resources/tts/types/index.js +8 -3
- package/dist/serialization/resources/empathicVoice/types/LanguageModelType.d.ts +1 -1
- package/dist/serialization/resources/empathicVoice/types/LanguageModelType.js +2 -0
- package/dist/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +3 -3
- package/dist/serialization/resources/empathicVoice/types/ReturnConfig.js +3 -3
- package/dist/serialization/resources/empathicVoice/types/ReturnPrompt.d.ts +1 -1
- package/dist/serialization/resources/empathicVoice/types/ReturnPrompt.js +1 -1
- package/dist/serialization/resources/tts/types/MillisecondInterval.d.ts +13 -0
- package/dist/serialization/resources/tts/types/MillisecondInterval.js +44 -0
- package/dist/serialization/resources/tts/types/OctaveVersion.d.ts +1 -1
- package/dist/serialization/resources/tts/types/OctaveVersion.js +1 -1
- package/dist/serialization/resources/tts/types/PostedTts.d.ts +2 -0
- package/dist/serialization/resources/tts/types/PostedTts.js +2 -0
- package/dist/serialization/resources/tts/types/PublishTts.d.ts +4 -4
- package/dist/serialization/resources/tts/types/PublishTts.js +4 -4
- package/dist/serialization/resources/tts/types/Snippet.d.ts +2 -0
- package/dist/serialization/resources/tts/types/Snippet.js +2 -0
- package/dist/serialization/resources/tts/types/SnippetAudioChunk.d.ts +0 -1
- package/dist/serialization/resources/tts/types/SnippetAudioChunk.js +0 -1
- package/dist/serialization/resources/tts/types/Timestamp.d.ts +16 -0
- package/dist/serialization/resources/tts/types/Timestamp.js +47 -0
- package/dist/serialization/resources/tts/types/TimestampMessage.d.ts +16 -0
- package/dist/serialization/resources/tts/types/TimestampMessage.js +47 -0
- package/dist/serialization/resources/tts/types/TimestampType.d.ts +10 -0
- package/dist/serialization/resources/tts/types/TimestampType.js +41 -0
- package/dist/serialization/resources/tts/types/TtsOutput.d.ts +18 -0
- package/dist/serialization/resources/tts/types/TtsOutput.js +51 -0
- package/dist/serialization/resources/tts/types/index.d.ts +8 -3
- package/dist/serialization/resources/tts/types/index.js +8 -3
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +1 -1
- package/reference.md +1 -1
- package/serialization/resources/empathicVoice/types/LanguageModelType.d.ts +1 -1
- package/serialization/resources/empathicVoice/types/LanguageModelType.js +2 -0
- package/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +3 -3
- package/serialization/resources/empathicVoice/types/ReturnConfig.js +3 -3
- package/serialization/resources/empathicVoice/types/ReturnPrompt.d.ts +1 -1
- package/serialization/resources/empathicVoice/types/ReturnPrompt.js +1 -1
- package/serialization/resources/tts/types/MillisecondInterval.d.ts +13 -0
- package/serialization/resources/tts/types/MillisecondInterval.js +44 -0
- package/serialization/resources/tts/types/OctaveVersion.d.ts +1 -1
- package/serialization/resources/tts/types/OctaveVersion.js +1 -1
- package/serialization/resources/tts/types/PostedTts.d.ts +2 -0
- package/serialization/resources/tts/types/PostedTts.js +2 -0
- package/serialization/resources/tts/types/PublishTts.d.ts +4 -4
- package/serialization/resources/tts/types/PublishTts.js +4 -4
- package/serialization/resources/tts/types/Snippet.d.ts +2 -0
- package/serialization/resources/tts/types/Snippet.js +2 -0
- package/serialization/resources/tts/types/SnippetAudioChunk.d.ts +0 -1
- package/serialization/resources/tts/types/SnippetAudioChunk.js +0 -1
- package/serialization/resources/tts/types/Timestamp.d.ts +16 -0
- package/serialization/resources/tts/types/Timestamp.js +47 -0
- package/serialization/resources/tts/types/TimestampMessage.d.ts +16 -0
- package/serialization/resources/tts/types/TimestampMessage.js +47 -0
- package/serialization/resources/tts/types/TimestampType.d.ts +10 -0
- package/serialization/resources/tts/types/TimestampType.js +41 -0
- package/serialization/resources/tts/types/TtsOutput.d.ts +18 -0
- package/serialization/resources/tts/types/TtsOutput.js +51 -0
- package/serialization/resources/tts/types/index.d.ts +8 -3
- package/serialization/resources/tts/types/index.js +8 -3
- package/version.d.ts +1 -1
- package/version.js +1 -1
|
@@ -1027,6 +1027,8 @@ types:
|
|
|
1027
1027
|
name: Claude3Haiku20240307
|
|
1028
1028
|
- value: claude-sonnet-4-20250514
|
|
1029
1029
|
name: ClaudeSonnet420250514
|
|
1030
|
+
- value: claude-sonnet-4-5-20250929
|
|
1031
|
+
name: ClaudeSonnet4520250929
|
|
1030
1032
|
- value: us.anthropic.claude-3-5-haiku-20241022-v1:0
|
|
1031
1033
|
name: UsAnthropicClaude35Haiku20241022V10
|
|
1032
1034
|
- value: us.anthropic.claude-3-5-sonnet-20240620-v1:0
|
|
@@ -1119,6 +1121,8 @@ types:
|
|
|
1119
1121
|
name: Llama4Maverick17B128EInstruct
|
|
1120
1122
|
- value: Qwen3-32B
|
|
1121
1123
|
name: Qwen332B
|
|
1124
|
+
- value: grok-4-fast-non-reasoning-latest
|
|
1125
|
+
name: Grok4FastNonReasoningLatest
|
|
1122
1126
|
- ellm
|
|
1123
1127
|
- value: custom-language-model
|
|
1124
1128
|
name: CustomLanguageModel
|
|
@@ -1470,9 +1474,6 @@ types:
|
|
|
1470
1474
|
Version numbers are integer values representing different iterations
|
|
1471
1475
|
of the Prompt. Each update to the Prompt increments its version
|
|
1472
1476
|
number.
|
|
1473
|
-
version_description:
|
|
1474
|
-
type: optional<string>
|
|
1475
|
-
docs: An optional description of the Prompt version.
|
|
1476
1477
|
version_type:
|
|
1477
1478
|
type: ReturnPromptVersionType
|
|
1478
1479
|
docs: >-
|
|
@@ -1488,6 +1489,9 @@ types:
|
|
|
1488
1489
|
docs: >-
|
|
1489
1490
|
Time at which the Prompt was last modified. Measured in seconds since
|
|
1490
1491
|
the Unix epoch.
|
|
1492
|
+
version_description:
|
|
1493
|
+
type: optional<string>
|
|
1494
|
+
docs: An optional description of the Prompt version.
|
|
1491
1495
|
source:
|
|
1492
1496
|
openapi: evi-openapi.json
|
|
1493
1497
|
ReturnPagedConfigs:
|
|
@@ -1543,12 +1547,6 @@ types:
|
|
|
1543
1547
|
Version numbers are integer values representing different iterations
|
|
1544
1548
|
of the Config. Each update to the Config increments its version
|
|
1545
1549
|
number.
|
|
1546
|
-
tools:
|
|
1547
|
-
type: optional<list<optional<ReturnUserDefinedTool>>>
|
|
1548
|
-
docs: List of user-defined tools associated with this Config.
|
|
1549
|
-
version_description:
|
|
1550
|
-
type: optional<string>
|
|
1551
|
-
docs: An optional description of the Config version.
|
|
1552
1550
|
language_model:
|
|
1553
1551
|
type: optional<ReturnLanguageModel>
|
|
1554
1552
|
docs: >-
|
|
@@ -1603,6 +1601,12 @@ types:
|
|
|
1603
1601
|
docs: >-
|
|
1604
1602
|
Time at which the Config was last modified. Measured in seconds since
|
|
1605
1603
|
the Unix epoch.
|
|
1604
|
+
version_description:
|
|
1605
|
+
type: optional<string>
|
|
1606
|
+
docs: An optional description of the Config version.
|
|
1607
|
+
tools:
|
|
1608
|
+
type: optional<list<optional<ReturnUserDefinedTool>>>
|
|
1609
|
+
docs: List of user-defined tools associated with this Config.
|
|
1606
1610
|
source:
|
|
1607
1611
|
openapi: evi-openapi.json
|
|
1608
1612
|
ReturnPagedChatsPaginationDirection:
|
|
@@ -98,6 +98,111 @@ channel:
|
|
|
98
98
|
Use the GET `/v0/evi/chat_groups` endpoint to obtain the Chat Group IDs
|
|
99
99
|
of all Chat Groups associated with an API key. This endpoint returns a
|
|
100
100
|
list of all available chat groups.
|
|
101
|
+
session_settings[audio][channels]:
|
|
102
|
+
type: optional<integer>
|
|
103
|
+
docs: Sets number of audio channels for audio input.
|
|
104
|
+
session_settings[audio][encoding]:
|
|
105
|
+
type: optional<string>
|
|
106
|
+
docs: Sets encoding format of the audio input, such as `linear16`.
|
|
107
|
+
session_settings[audio][sample_rate]:
|
|
108
|
+
type: optional<integer>
|
|
109
|
+
docs: >-
|
|
110
|
+
Sets the sample rate for audio input. (Number of samples per second in
|
|
111
|
+
the audio input, measured in Hertz.)
|
|
112
|
+
session_settings[context][text]:
|
|
113
|
+
type: optional<string>
|
|
114
|
+
docs: >-
|
|
115
|
+
The context to be injected into the conversation. Helps inform the LLM's
|
|
116
|
+
response by providing relevant information about the ongoing
|
|
117
|
+
conversation.
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
This text will be appended to the end of
|
|
121
|
+
[user_messages](/reference/speech-to-speech-evi/chat#receive.UserMessage.message.content)
|
|
122
|
+
based on the chosen persistence level. For example, if you want to
|
|
123
|
+
remind EVI of its role as a helpful weather assistant, the context you
|
|
124
|
+
insert will be appended to the end of user messages as `{Context: You
|
|
125
|
+
are a helpful weather assistant}`.
|
|
126
|
+
session_settings[context][type]:
|
|
127
|
+
type: optional<string>
|
|
128
|
+
docs: >-
|
|
129
|
+
The persistence level of the injected context. Specifies how long the
|
|
130
|
+
injected context will remain active in the session.
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
- **Temporary**: Context that is only applied to the following assistant
|
|
134
|
+
response.
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
- **Persistent**: Context that is applied to all subsequent assistant
|
|
138
|
+
responses for the remainder of the Chat.
|
|
139
|
+
session_settings[custom_session_id]:
|
|
140
|
+
type: optional<string>
|
|
141
|
+
docs: >-
|
|
142
|
+
Used to manage conversational state, correlate frontend and backend
|
|
143
|
+
data, and persist conversations across EVI sessions.
|
|
144
|
+
session_settings[event_limit]:
|
|
145
|
+
type: optional<integer>
|
|
146
|
+
docs: >-
|
|
147
|
+
The maximum number of chat events to return from chat history. By
|
|
148
|
+
default, the system returns up to 300 events (100 events per page × 3
|
|
149
|
+
pages). Set this parameter to a smaller value to limit the number of
|
|
150
|
+
events returned.
|
|
151
|
+
session_settings[language_model_api_key]:
|
|
152
|
+
type: optional<string>
|
|
153
|
+
docs: >-
|
|
154
|
+
Third party API key for the supplemental language model.
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
When provided, EVI will use this key instead of Hume's API key for the
|
|
158
|
+
supplemental LLM. This allows you to bypass rate limits and utilize your
|
|
159
|
+
own API key as needed.
|
|
160
|
+
session_settings[system_prompt]:
|
|
161
|
+
type: optional<string>
|
|
162
|
+
docs: >-
|
|
163
|
+
Instructions used to shape EVI's behavior, responses, and style for the
|
|
164
|
+
session.
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
When included in a Session Settings message, the provided Prompt
|
|
168
|
+
overrides the existing one specified in the EVI configuration. If no
|
|
169
|
+
Prompt was defined in the configuration, this Prompt will be the one
|
|
170
|
+
used for the session.
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
You can use the Prompt to define a specific goal or role for EVI,
|
|
174
|
+
specifying how it should act or what it should focus on during the
|
|
175
|
+
conversation. For example, EVI can be instructed to act as a customer
|
|
176
|
+
support representative, a fitness coach, or a travel advisor, each with
|
|
177
|
+
its own set of behaviors and response styles.
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
For help writing a system prompt, see our [Prompting
|
|
181
|
+
Guide](/docs/speech-to-speech-evi/guides/prompting).
|
|
182
|
+
session_settings[variables]:
|
|
183
|
+
type: optional<string>
|
|
184
|
+
docs: >-
|
|
185
|
+
This field allows you to assign values to dynamic variables referenced
|
|
186
|
+
in your system prompt.
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
Each key represents the variable name, and the corresponding value is
|
|
190
|
+
the specific content you wish to assign to that variable within the
|
|
191
|
+
session. While the values for variables can be strings, numbers, or
|
|
192
|
+
booleans, the value will ultimately be converted to a string when
|
|
193
|
+
injected into your system prompt.
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
Using this field, you can personalize responses based on
|
|
197
|
+
session-specific details. For more guidance, see our [guide on using
|
|
198
|
+
dynamic
|
|
199
|
+
variables](/docs/speech-to-speech-evi/features/dynamic-variables).
|
|
200
|
+
session_settings[voice_id]:
|
|
201
|
+
type: optional<string>
|
|
202
|
+
docs: >-
|
|
203
|
+
The name or ID of the voice from the `Voice Library` to be used as the
|
|
204
|
+
speaker for this EVI session. This will override the speaker set in the
|
|
205
|
+
selected configuration.
|
|
101
206
|
verbose_transcription:
|
|
102
207
|
type: optional<boolean>
|
|
103
208
|
default: false
|
|
@@ -140,7 +140,9 @@ service:
|
|
|
140
140
|
properties:
|
|
141
141
|
evi_version:
|
|
142
142
|
type: string
|
|
143
|
-
docs:
|
|
143
|
+
docs: >-
|
|
144
|
+
EVI version to use. Only versions `3` and `4-mini` are
|
|
145
|
+
supported.
|
|
144
146
|
name:
|
|
145
147
|
type: string
|
|
146
148
|
docs: Name applied to all versions of a particular Config.
|
|
@@ -30,7 +30,7 @@ service:
|
|
|
30
30
|
format.
|
|
31
31
|
source:
|
|
32
32
|
openapi: tts-openapi.json
|
|
33
|
-
display-name: Text-to-
|
|
33
|
+
display-name: Text-to-Speech (Json)
|
|
34
34
|
request:
|
|
35
35
|
body:
|
|
36
36
|
type: PostedTts
|
|
@@ -79,6 +79,7 @@ service:
|
|
|
79
79
|
Beauty is no quality in things themselves: It exists
|
|
80
80
|
merely in the mind which contemplates them.
|
|
81
81
|
utterance_index: 0
|
|
82
|
+
timestamps: []
|
|
82
83
|
request_id: 66e01f90-4501-4aa0-bbaf-74f45dc15aa725906
|
|
83
84
|
synthesize-file:
|
|
84
85
|
path: /v0/tts/file
|
|
@@ -94,7 +95,7 @@ service:
|
|
|
94
95
|
The response contains the generated audio file in the requested format.
|
|
95
96
|
source:
|
|
96
97
|
openapi: tts-openapi.json
|
|
97
|
-
display-name: Text-to-
|
|
98
|
+
display-name: Text-to-Speech (File)
|
|
98
99
|
request:
|
|
99
100
|
body:
|
|
100
101
|
type: PostedTts
|
|
@@ -131,7 +132,7 @@ service:
|
|
|
131
132
|
prosody.
|
|
132
133
|
source:
|
|
133
134
|
openapi: tts-openapi.json
|
|
134
|
-
display-name: Text-to-
|
|
135
|
+
display-name: Text-to-Speech (Streamed File)
|
|
135
136
|
request:
|
|
136
137
|
body:
|
|
137
138
|
type: PostedTts
|
|
@@ -166,14 +167,14 @@ service:
|
|
|
166
167
|
base64.
|
|
167
168
|
source:
|
|
168
169
|
openapi: tts-openapi.json
|
|
169
|
-
display-name: Text-to-
|
|
170
|
+
display-name: Text-to-Speech (Streamed JSON)
|
|
170
171
|
request:
|
|
171
172
|
body:
|
|
172
173
|
type: PostedTts
|
|
173
174
|
content-type: application/json
|
|
174
175
|
response-stream:
|
|
175
176
|
docs: Successful Response
|
|
176
|
-
type:
|
|
177
|
+
type: TtsOutput
|
|
177
178
|
format: json
|
|
178
179
|
errors:
|
|
179
180
|
- UnprocessableEntityError
|
|
@@ -189,15 +190,30 @@ service:
|
|
|
189
190
|
source:
|
|
190
191
|
openapi: tts-openapi.json
|
|
191
192
|
types:
|
|
193
|
+
TtsOutput:
|
|
194
|
+
discriminant: type
|
|
195
|
+
base-properties: {}
|
|
196
|
+
union:
|
|
197
|
+
timestamp:
|
|
198
|
+
type: TimestampMessage
|
|
199
|
+
audio:
|
|
200
|
+
type: SnippetAudioChunk
|
|
201
|
+
source:
|
|
202
|
+
openapi: tts-openapi.json
|
|
203
|
+
AudioFormatType:
|
|
204
|
+
enum:
|
|
205
|
+
- mp3
|
|
206
|
+
- pcm
|
|
207
|
+
- wav
|
|
208
|
+
source:
|
|
209
|
+
openapi: tts-openapi.json
|
|
192
210
|
PublishTts:
|
|
193
211
|
docs: Input message type for the TTS stream.
|
|
194
212
|
properties:
|
|
195
|
-
|
|
196
|
-
type: optional<
|
|
197
|
-
docs:
|
|
198
|
-
default:
|
|
199
|
-
validation:
|
|
200
|
-
maxLength: 5000
|
|
213
|
+
close:
|
|
214
|
+
type: optional<boolean>
|
|
215
|
+
docs: Force the generation of audio and close the stream.
|
|
216
|
+
default: false
|
|
201
217
|
description:
|
|
202
218
|
type: optional<string>
|
|
203
219
|
docs: >-
|
|
@@ -206,12 +222,12 @@ types:
|
|
|
206
222
|
accent"`).
|
|
207
223
|
validation:
|
|
208
224
|
maxLength: 1000
|
|
209
|
-
|
|
210
|
-
type: optional<
|
|
225
|
+
flush:
|
|
226
|
+
type: optional<boolean>
|
|
211
227
|
docs: >-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
228
|
+
Force the generation of audio regardless of how much text has been
|
|
229
|
+
supplied.
|
|
230
|
+
default: false
|
|
215
231
|
speed:
|
|
216
232
|
type: optional<double>
|
|
217
233
|
docs: A relative measure of how fast this utterance should be spoken.
|
|
@@ -219,6 +235,12 @@ types:
|
|
|
219
235
|
validation:
|
|
220
236
|
min: 0.25
|
|
221
237
|
max: 3
|
|
238
|
+
text:
|
|
239
|
+
type: optional<string>
|
|
240
|
+
docs: The input text to be converted to speech output.
|
|
241
|
+
default: ''
|
|
242
|
+
validation:
|
|
243
|
+
maxLength: 5000
|
|
222
244
|
trailing_silence:
|
|
223
245
|
type: optional<double>
|
|
224
246
|
docs: Duration of trailing silence (in seconds) to add to this utterance
|
|
@@ -226,18 +248,104 @@ types:
|
|
|
226
248
|
validation:
|
|
227
249
|
min: 0
|
|
228
250
|
max: 5
|
|
229
|
-
|
|
230
|
-
type: optional<
|
|
251
|
+
voice:
|
|
252
|
+
type: optional<PostedUtteranceVoice>
|
|
231
253
|
docs: >-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
close:
|
|
236
|
-
type: optional<boolean>
|
|
237
|
-
docs: Force the generation of audio and close the stream.
|
|
238
|
-
default: false
|
|
254
|
+
The name or ID of the voice from the `Voice Library` to be used as the
|
|
255
|
+
speaker for this and all subsequent utterances, until the `"voice"`
|
|
256
|
+
field is updated again.
|
|
239
257
|
source:
|
|
240
258
|
openapi: tts-asyncapi.json
|
|
259
|
+
MillisecondInterval:
|
|
260
|
+
properties:
|
|
261
|
+
begin:
|
|
262
|
+
type: integer
|
|
263
|
+
docs: Start time of the interval in milliseconds.
|
|
264
|
+
end:
|
|
265
|
+
type: integer
|
|
266
|
+
docs: End time of the interval in milliseconds.
|
|
267
|
+
source:
|
|
268
|
+
openapi: tts-openapi.json
|
|
269
|
+
TimestampMessage:
|
|
270
|
+
docs: A word or phoneme level timestamp for the generated audio.
|
|
271
|
+
properties:
|
|
272
|
+
generation_id:
|
|
273
|
+
type: string
|
|
274
|
+
docs: >-
|
|
275
|
+
The generation ID of the parent snippet that this chunk corresponds
|
|
276
|
+
to.
|
|
277
|
+
request_id:
|
|
278
|
+
type: string
|
|
279
|
+
docs: ID of the initiating request.
|
|
280
|
+
snippet_id:
|
|
281
|
+
type: string
|
|
282
|
+
docs: The ID of the parent snippet that this chunk corresponds to.
|
|
283
|
+
timestamp:
|
|
284
|
+
type: Timestamp
|
|
285
|
+
docs: A word or phoneme level timestamp for the generated audio.
|
|
286
|
+
source:
|
|
287
|
+
openapi: tts-openapi.json
|
|
288
|
+
SnippetAudioChunk:
|
|
289
|
+
docs: Metadata for a chunk of generated audio.
|
|
290
|
+
properties:
|
|
291
|
+
audio:
|
|
292
|
+
type: string
|
|
293
|
+
docs: The generated audio output chunk in the requested format.
|
|
294
|
+
audio_format:
|
|
295
|
+
type: AudioFormatType
|
|
296
|
+
docs: The generated audio output format.
|
|
297
|
+
chunk_index:
|
|
298
|
+
type: integer
|
|
299
|
+
docs: The index of the audio chunk in the snippet.
|
|
300
|
+
generation_id:
|
|
301
|
+
type: string
|
|
302
|
+
docs: >-
|
|
303
|
+
The generation ID of the parent snippet that this chunk corresponds
|
|
304
|
+
to.
|
|
305
|
+
is_last_chunk:
|
|
306
|
+
type: boolean
|
|
307
|
+
docs: >-
|
|
308
|
+
Whether or not this is the last chunk streamed back from the decoder
|
|
309
|
+
for one input snippet.
|
|
310
|
+
request_id:
|
|
311
|
+
type: string
|
|
312
|
+
docs: ID of the initiating request.
|
|
313
|
+
snippet:
|
|
314
|
+
type: optional<Snippet>
|
|
315
|
+
snippet_id:
|
|
316
|
+
type: string
|
|
317
|
+
docs: The ID of the parent snippet that this chunk corresponds to.
|
|
318
|
+
text:
|
|
319
|
+
type: string
|
|
320
|
+
docs: The text of the parent snippet that this chunk corresponds to.
|
|
321
|
+
transcribed_text:
|
|
322
|
+
type: optional<string>
|
|
323
|
+
docs: >-
|
|
324
|
+
The transcribed text of the generated audio of the parent snippet that
|
|
325
|
+
this chunk corresponds to. It is only present if `instant_mode` is set
|
|
326
|
+
to `false`.
|
|
327
|
+
utterance_index:
|
|
328
|
+
type: optional<integer>
|
|
329
|
+
docs: >-
|
|
330
|
+
The index of the utterance in the request that the parent snippet of
|
|
331
|
+
this chunk corresponds to.
|
|
332
|
+
source:
|
|
333
|
+
openapi: tts-openapi.json
|
|
334
|
+
Timestamp:
|
|
335
|
+
properties:
|
|
336
|
+
text: string
|
|
337
|
+
time:
|
|
338
|
+
type: MillisecondInterval
|
|
339
|
+
type:
|
|
340
|
+
type: TimestampType
|
|
341
|
+
source:
|
|
342
|
+
openapi: tts-openapi.json
|
|
343
|
+
TimestampType:
|
|
344
|
+
enum:
|
|
345
|
+
- word
|
|
346
|
+
- phoneme
|
|
347
|
+
source:
|
|
348
|
+
openapi: tts-openapi.json
|
|
241
349
|
PostedUtteranceVoiceWithId:
|
|
242
350
|
properties:
|
|
243
351
|
id:
|
|
@@ -309,59 +417,12 @@ types:
|
|
|
309
417
|
- type: PostedUtteranceVoiceWithName
|
|
310
418
|
source:
|
|
311
419
|
openapi: tts-openapi.json
|
|
312
|
-
|
|
420
|
+
OctaveVersion:
|
|
313
421
|
enum:
|
|
314
|
-
-
|
|
315
|
-
|
|
316
|
-
-
|
|
317
|
-
|
|
318
|
-
openapi: tts-openapi.json
|
|
319
|
-
SnippetAudioChunk:
|
|
320
|
-
docs: Metadata for a chunk of generated audio.
|
|
321
|
-
properties:
|
|
322
|
-
audio:
|
|
323
|
-
type: string
|
|
324
|
-
docs: The generated audio output chunk in the requested format.
|
|
325
|
-
audio_format:
|
|
326
|
-
type: AudioFormatType
|
|
327
|
-
docs: The generated audio output format.
|
|
328
|
-
chunk_index:
|
|
329
|
-
type: integer
|
|
330
|
-
docs: The index of the audio chunk in the snippet.
|
|
331
|
-
generation_id:
|
|
332
|
-
type: string
|
|
333
|
-
docs: >-
|
|
334
|
-
The generation ID of the parent snippet that this chunk corresponds
|
|
335
|
-
to.
|
|
336
|
-
is_last_chunk:
|
|
337
|
-
type: boolean
|
|
338
|
-
docs: >-
|
|
339
|
-
Whether or not this is the last chunk streamed back from the decoder
|
|
340
|
-
for one input snippet.
|
|
341
|
-
request_id:
|
|
342
|
-
type: string
|
|
343
|
-
docs: ID of the initiating request.
|
|
344
|
-
snippet:
|
|
345
|
-
type: optional<Snippet>
|
|
346
|
-
snippet_id:
|
|
347
|
-
type: string
|
|
348
|
-
docs: The ID of the parent snippet that this chunk corresponds to.
|
|
349
|
-
text:
|
|
350
|
-
type: string
|
|
351
|
-
docs: The text of the parent snippet that this chunk corresponds to.
|
|
352
|
-
transcribed_text:
|
|
353
|
-
type: optional<string>
|
|
354
|
-
docs: >-
|
|
355
|
-
The transcribed text of the generated audio of the parent snippet that
|
|
356
|
-
this chunk corresponds to. It is only present if `instant_mode` is set
|
|
357
|
-
to `false`.
|
|
358
|
-
type:
|
|
359
|
-
type: optional<literal<"audio">>
|
|
360
|
-
utterance_index:
|
|
361
|
-
type: optional<integer>
|
|
362
|
-
docs: >-
|
|
363
|
-
The index of the utterance in the request that the parent snippet of
|
|
364
|
-
this chunk corresponds to.
|
|
422
|
+
- value: '1'
|
|
423
|
+
name: One
|
|
424
|
+
- value: '2'
|
|
425
|
+
name: Two
|
|
365
426
|
source:
|
|
366
427
|
openapi: tts-openapi.json
|
|
367
428
|
PostedContextWithGenerationId:
|
|
@@ -471,6 +532,9 @@ types:
|
|
|
471
532
|
format:
|
|
472
533
|
type: optional<Format>
|
|
473
534
|
docs: Specifies the output audio file format.
|
|
535
|
+
include_timestamp_types:
|
|
536
|
+
type: optional<list<TimestampType>>
|
|
537
|
+
docs: The set of timestamp types to include in the response.
|
|
474
538
|
num_generations:
|
|
475
539
|
type: optional<integer>
|
|
476
540
|
docs: Number of generations of the audio to produce.
|
|
@@ -520,6 +584,9 @@ types:
|
|
|
520
584
|
type: list<PostedUtterance>
|
|
521
585
|
version:
|
|
522
586
|
type: optional<OctaveVersion>
|
|
587
|
+
docs: >-
|
|
588
|
+
The version of the Octave Model to use. 1 for the legacy model, 2 for
|
|
589
|
+
the new model.
|
|
523
590
|
instant_mode:
|
|
524
591
|
type: optional<boolean>
|
|
525
592
|
docs: >-
|
|
@@ -556,7 +623,6 @@ types:
|
|
|
556
623
|
troubleshooting assistance.
|
|
557
624
|
source:
|
|
558
625
|
openapi: tts-openapi.json
|
|
559
|
-
OctaveVersion: string
|
|
560
626
|
ReturnVoice:
|
|
561
627
|
docs: An Octave voice available for text-to-speech
|
|
562
628
|
properties:
|
|
@@ -596,6 +662,9 @@ types:
|
|
|
596
662
|
text:
|
|
597
663
|
type: string
|
|
598
664
|
docs: The text for this **Snippet**.
|
|
665
|
+
timestamps:
|
|
666
|
+
docs: A list of word or phoneme level timestamps for the generated audio.
|
|
667
|
+
type: list<Timestamp>
|
|
599
668
|
transcribed_text:
|
|
600
669
|
type: optional<string>
|
|
601
670
|
docs: >-
|
|
@@ -6,6 +6,21 @@ channel:
|
|
|
6
6
|
auth: false
|
|
7
7
|
docs: Generate emotionally expressive speech.
|
|
8
8
|
query-parameters:
|
|
9
|
+
access_token:
|
|
10
|
+
type: optional<string>
|
|
11
|
+
default: ''
|
|
12
|
+
docs: >-
|
|
13
|
+
Access token used for authenticating the client. If not provided, an
|
|
14
|
+
`api_key` must be provided to authenticate.
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
The access token is generated using both an API key and a Secret key,
|
|
18
|
+
which provides an additional layer of security compared to using just an
|
|
19
|
+
API key.
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
For more details, refer to the [Authentication Strategies
|
|
23
|
+
Guide](/docs/introduction/api-key#authentication-strategies).
|
|
9
24
|
context_generation_id:
|
|
10
25
|
type: optional<string>
|
|
11
26
|
docs: >-
|
|
@@ -13,14 +28,10 @@ channel:
|
|
|
13
28
|
consistent speech style and prosody across multiple requests. Including
|
|
14
29
|
context may increase audio generation times.
|
|
15
30
|
format_type: root.AudioFormatType
|
|
16
|
-
|
|
17
|
-
type: optional<
|
|
18
|
-
|
|
19
|
-
docs:
|
|
20
|
-
If enabled, the audio for all the chunks of a generation, once
|
|
21
|
-
concatenated together, will constitute a single audio file. Otherwise,
|
|
22
|
-
if disabled, each chunk's audio will be its own audio file, each with
|
|
23
|
-
its own headers (if applicable).
|
|
31
|
+
include_timestamp_types:
|
|
32
|
+
type: optional<root.TimestampType>
|
|
33
|
+
allow-multiple: true
|
|
34
|
+
docs: The set of timestamp types to include in the response.
|
|
24
35
|
instant_mode:
|
|
25
36
|
type: optional<boolean>
|
|
26
37
|
default: true
|
|
@@ -34,21 +45,15 @@ channel:
|
|
|
34
45
|
type: optional<boolean>
|
|
35
46
|
default: false
|
|
36
47
|
docs: If enabled, no binary websocket messages will be sent to the client.
|
|
37
|
-
|
|
38
|
-
type: optional<
|
|
39
|
-
default:
|
|
48
|
+
strip_headers:
|
|
49
|
+
type: optional<boolean>
|
|
50
|
+
default: false
|
|
40
51
|
docs: >-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
which provides an additional layer of security compared to using just an
|
|
47
|
-
API key.
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
For more details, refer to the [Authentication Strategies
|
|
51
|
-
Guide](/docs/introduction/api-key#authentication-strategies).
|
|
52
|
+
If enabled, the audio for all the chunks of a generation, once
|
|
53
|
+
concatenated together, will constitute a single audio file. Otherwise,
|
|
54
|
+
if disabled, each chunk's audio will be its own audio file, each with
|
|
55
|
+
its own headers (if applicable).
|
|
56
|
+
version: root.OctaveVersion
|
|
52
57
|
api_key:
|
|
53
58
|
type: optional<string>
|
|
54
59
|
default: ''
|
|
@@ -64,21 +69,7 @@ channel:
|
|
|
64
69
|
origin: client
|
|
65
70
|
body:
|
|
66
71
|
type: root.PublishTts
|
|
67
|
-
subscribe:
|
|
68
|
-
origin: server
|
|
69
|
-
body:
|
|
70
|
-
type: root.SnippetAudioChunk
|
|
71
72
|
examples:
|
|
72
73
|
- messages:
|
|
73
74
|
- type: publish
|
|
74
75
|
body: {}
|
|
75
|
-
- type: subscribe
|
|
76
|
-
body:
|
|
77
|
-
request_id: request_id
|
|
78
|
-
generation_id: generation_id
|
|
79
|
-
snippet_id: snippet_id
|
|
80
|
-
text: text
|
|
81
|
-
chunk_index: 1
|
|
82
|
-
audio: audio
|
|
83
|
-
audio_format: mp3
|
|
84
|
-
is_last_chunk: true
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
import * as environments from "../../../../../../environments";
|
|
3
3
|
import * as core from "../../../../../../core";
|
|
4
4
|
import { ChatSocket } from "./Socket";
|
|
5
|
+
import { SessionSettings } from "../../../types/SessionSettings";
|
|
5
6
|
export declare function createHostnameWithProtocol(environment: string): string;
|
|
6
7
|
export declare namespace Chat {
|
|
7
8
|
interface Options {
|
|
@@ -24,6 +25,9 @@ export declare namespace Chat {
|
|
|
24
25
|
verboseTranscription?: boolean;
|
|
25
26
|
/** ID of the Voice to use for this chat. If specified, will override the voice set in the Config */
|
|
26
27
|
voiceId?: string;
|
|
28
|
+
sessionSettings?: Pick<SessionSettings, Exclude<keyof SessionSettings, "builtinTools" | "type" | "metadata" | "tools">> & {
|
|
29
|
+
eventLimit?: number;
|
|
30
|
+
};
|
|
27
31
|
/** Extra query parameters sent at WebSocket connection */
|
|
28
32
|
queryParams?: Record<string, string | string[] | object | object[]>;
|
|
29
33
|
}
|