hume 0.13.0 → 0.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/.mock/definition/empathic-voice/__package__.yml +68 -61
  2. package/.mock/definition/empathic-voice/chatWebhooks.yml +8 -12
  3. package/.mock/definition/empathic-voice/prompts.yml +2 -2
  4. package/.mock/definition/empathic-voice/tools.yml +2 -2
  5. package/.mock/definition/tts/__package__.yml +85 -47
  6. package/.mock/definition/tts/voices.yml +9 -9
  7. package/api/resources/empathicVoice/resources/chat/client/Client.d.ts +2 -0
  8. package/api/resources/empathicVoice/resources/prompts/client/requests/PostedPrompt.d.ts +1 -1
  9. package/api/resources/empathicVoice/resources/prompts/client/requests/PostedPromptVersion.d.ts +1 -1
  10. package/api/resources/empathicVoice/resources/tools/client/requests/PostedUserDefinedTool.d.ts +1 -1
  11. package/api/resources/empathicVoice/resources/tools/client/requests/PostedUserDefinedToolVersion.d.ts +1 -1
  12. package/api/resources/empathicVoice/types/AssistantEnd.d.ts +1 -1
  13. package/api/resources/empathicVoice/types/AssistantInput.d.ts +1 -1
  14. package/api/resources/empathicVoice/types/AssistantMessage.d.ts +1 -1
  15. package/api/resources/empathicVoice/types/AudioInput.d.ts +1 -1
  16. package/api/resources/empathicVoice/types/ContextType.d.ts +2 -2
  17. package/api/resources/empathicVoice/types/ContextType.js +1 -1
  18. package/api/resources/empathicVoice/types/JsonMessage.d.ts +1 -1
  19. package/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +1 -1
  20. package/api/resources/empathicVoice/types/PostedLanguageModel.d.ts +1 -1
  21. package/api/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +2 -2
  22. package/api/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +2 -2
  23. package/api/resources/empathicVoice/types/ReturnConfig.d.ts +2 -1
  24. package/api/resources/empathicVoice/types/ReturnLanguageModel.d.ts +1 -1
  25. package/api/resources/empathicVoice/types/ReturnPrompt.d.ts +2 -4
  26. package/api/resources/empathicVoice/types/ReturnUserDefinedTool.d.ts +1 -1
  27. package/api/resources/empathicVoice/types/ReturnVoice.d.ts +12 -0
  28. package/api/resources/empathicVoice/types/ReturnVoice.js +5 -0
  29. package/api/resources/empathicVoice/types/SessionSettings.d.ts +2 -2
  30. package/api/resources/empathicVoice/types/Tool.d.ts +1 -1
  31. package/api/resources/empathicVoice/types/ToolCallMessage.d.ts +1 -1
  32. package/api/resources/empathicVoice/types/UserInput.d.ts +1 -1
  33. package/api/resources/empathicVoice/types/UserInterruption.d.ts +1 -1
  34. package/api/resources/empathicVoice/types/UserMessage.d.ts +1 -1
  35. package/api/resources/empathicVoice/types/VoiceProvider.d.ts +1 -2
  36. package/api/resources/empathicVoice/types/VoiceProvider.js +0 -1
  37. package/api/resources/empathicVoice/types/index.d.ts +4 -3
  38. package/api/resources/empathicVoice/types/index.js +4 -3
  39. package/api/resources/tts/client/Client.d.ts +5 -5
  40. package/api/resources/tts/client/Client.js +5 -5
  41. package/api/resources/tts/types/SnippetAudioChunk.d.ts +20 -0
  42. package/dist/api/resources/empathicVoice/resources/chat/client/Client.d.ts +2 -0
  43. package/dist/api/resources/empathicVoice/resources/prompts/client/requests/PostedPrompt.d.ts +1 -1
  44. package/dist/api/resources/empathicVoice/resources/prompts/client/requests/PostedPromptVersion.d.ts +1 -1
  45. package/dist/api/resources/empathicVoice/resources/tools/client/requests/PostedUserDefinedTool.d.ts +1 -1
  46. package/dist/api/resources/empathicVoice/resources/tools/client/requests/PostedUserDefinedToolVersion.d.ts +1 -1
  47. package/dist/api/resources/empathicVoice/types/AssistantEnd.d.ts +1 -1
  48. package/dist/api/resources/empathicVoice/types/AssistantInput.d.ts +1 -1
  49. package/dist/api/resources/empathicVoice/types/AssistantMessage.d.ts +1 -1
  50. package/dist/api/resources/empathicVoice/types/AudioInput.d.ts +1 -1
  51. package/dist/api/resources/empathicVoice/types/ContextType.d.ts +2 -2
  52. package/dist/api/resources/empathicVoice/types/ContextType.js +1 -1
  53. package/dist/api/resources/empathicVoice/types/JsonMessage.d.ts +1 -1
  54. package/dist/api/resources/empathicVoice/types/PauseAssistantMessage.d.ts +1 -1
  55. package/dist/api/resources/empathicVoice/types/PostedLanguageModel.d.ts +1 -1
  56. package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +2 -2
  57. package/dist/api/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +2 -2
  58. package/dist/api/resources/empathicVoice/types/ReturnConfig.d.ts +2 -1
  59. package/dist/api/resources/empathicVoice/types/ReturnLanguageModel.d.ts +1 -1
  60. package/dist/api/resources/empathicVoice/types/ReturnPrompt.d.ts +2 -4
  61. package/dist/api/resources/empathicVoice/types/ReturnUserDefinedTool.d.ts +1 -1
  62. package/dist/api/resources/empathicVoice/types/ReturnVoice.d.ts +12 -0
  63. package/dist/api/resources/empathicVoice/types/ReturnVoice.js +5 -0
  64. package/dist/api/resources/empathicVoice/types/SessionSettings.d.ts +2 -2
  65. package/dist/api/resources/empathicVoice/types/Tool.d.ts +1 -1
  66. package/dist/api/resources/empathicVoice/types/ToolCallMessage.d.ts +1 -1
  67. package/dist/api/resources/empathicVoice/types/UserInput.d.ts +1 -1
  68. package/dist/api/resources/empathicVoice/types/UserInterruption.d.ts +1 -1
  69. package/dist/api/resources/empathicVoice/types/UserMessage.d.ts +1 -1
  70. package/dist/api/resources/empathicVoice/types/VoiceProvider.d.ts +1 -2
  71. package/dist/api/resources/empathicVoice/types/VoiceProvider.js +0 -1
  72. package/dist/api/resources/empathicVoice/types/index.d.ts +4 -3
  73. package/dist/api/resources/empathicVoice/types/index.js +4 -3
  74. package/dist/api/resources/tts/client/Client.d.ts +5 -5
  75. package/dist/api/resources/tts/client/Client.js +5 -5
  76. package/dist/api/resources/tts/types/SnippetAudioChunk.d.ts +20 -0
  77. package/dist/serialization/resources/empathicVoice/types/ContextType.d.ts +1 -1
  78. package/dist/serialization/resources/empathicVoice/types/ContextType.js +1 -1
  79. package/dist/serialization/resources/empathicVoice/types/JsonMessage.d.ts +2 -2
  80. package/dist/serialization/resources/empathicVoice/types/JsonMessage.js +2 -2
  81. package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +1 -1
  82. package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.js +1 -1
  83. package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +1 -1
  84. package/dist/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.js +1 -1
  85. package/dist/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +2 -1
  86. package/dist/serialization/resources/empathicVoice/types/ReturnConfig.js +2 -1
  87. package/dist/serialization/resources/empathicVoice/types/ReturnVoice.d.ts +15 -0
  88. package/dist/serialization/resources/empathicVoice/types/ReturnVoice.js +46 -0
  89. package/dist/serialization/resources/empathicVoice/types/VoiceProvider.d.ts +1 -1
  90. package/dist/serialization/resources/empathicVoice/types/VoiceProvider.js +1 -1
  91. package/dist/serialization/resources/empathicVoice/types/index.d.ts +4 -3
  92. package/dist/serialization/resources/empathicVoice/types/index.js +4 -3
  93. package/dist/serialization/resources/tts/types/SnippetAudioChunk.d.ts +12 -0
  94. package/dist/serialization/resources/tts/types/SnippetAudioChunk.js +14 -1
  95. package/dist/version.d.ts +1 -1
  96. package/dist/version.js +1 -1
  97. package/package.json +1 -1
  98. package/reference.md +14 -14
  99. package/serialization/resources/empathicVoice/types/ContextType.d.ts +1 -1
  100. package/serialization/resources/empathicVoice/types/ContextType.js +1 -1
  101. package/serialization/resources/empathicVoice/types/JsonMessage.d.ts +2 -2
  102. package/serialization/resources/empathicVoice/types/JsonMessage.js +2 -2
  103. package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.d.ts +1 -1
  104. package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsInactivity.js +1 -1
  105. package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.d.ts +1 -1
  106. package/serialization/resources/empathicVoice/types/PostedTimeoutSpecsMaxDuration.js +1 -1
  107. package/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +2 -1
  108. package/serialization/resources/empathicVoice/types/ReturnConfig.js +2 -1
  109. package/serialization/resources/empathicVoice/types/ReturnVoice.d.ts +15 -0
  110. package/serialization/resources/empathicVoice/types/ReturnVoice.js +46 -0
  111. package/serialization/resources/empathicVoice/types/VoiceProvider.d.ts +1 -1
  112. package/serialization/resources/empathicVoice/types/VoiceProvider.js +1 -1
  113. package/serialization/resources/empathicVoice/types/index.d.ts +4 -3
  114. package/serialization/resources/empathicVoice/types/index.js +4 -3
  115. package/serialization/resources/tts/types/SnippetAudioChunk.d.ts +12 -0
  116. package/serialization/resources/tts/types/SnippetAudioChunk.js +14 -1
  117. package/version.d.ts +1 -1
  118. package/version.js +1 -1
@@ -10,7 +10,7 @@ export interface Tool {
10
10
  /**
11
11
  * Parameters of the tool. Is a stringified JSON schema.
12
12
  *
13
- * These parameters define the inputs needed for the tools execution, including the expected data type and description for each input field. Structured as a JSON schema, this format ensures the tool receives data in the expected format.
13
+ * These parameters define the inputs needed for the tool's execution, including the expected data type and description for each input field. Structured as a JSON schema, this format ensures the tool receives data in the expected format.
14
14
  */
15
15
  parameters: string;
16
16
  /** An optional description of what the tool does, used by the supplemental LLM to choose when and how to call the function. */
@@ -11,7 +11,7 @@ export interface ToolCallMessage {
11
11
  /**
12
12
  * Parameters of the tool.
13
13
  *
14
- * These parameters define the inputs needed for the tools execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the tool receives data in the expected format.
14
+ * These parameters define the inputs needed for the tool's execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the tool receives data in the expected format.
15
15
  */
16
16
  parameters: string;
17
17
  /**
@@ -12,7 +12,7 @@ export interface UserInput {
12
12
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
13
13
  customSessionId?: string;
14
14
  /**
15
- * User text to insert into the conversation. Text sent through a User Input message is treated as the users speech to EVI. EVI processes this input and provides a corresponding response.
15
+ * User text to insert into the conversation. Text sent through a User Input message is treated as the user's speech to EVI. EVI processes this input and provides a corresponding response.
16
16
  *
17
17
  * Expression measurement results are not available for User Input messages, as the prosody model relies on audio input and cannot process text alone.
18
18
  */
@@ -8,7 +8,7 @@ export interface UserInterruption {
8
8
  /**
9
9
  * The type of message sent through the socket; for a User Interruption message, this must be `user_interruption`.
10
10
  *
11
- * This message indicates the user has interrupted the assistants response. EVI detects the interruption in real-time and sends this message to signal the interruption event. This message allows the system to stop the current audio playback, clear the audio queue, and prepare to handle new user input.
11
+ * This message indicates the user has interrupted the assistant's response. EVI detects the interruption in real-time and sends this message to signal the interruption event. This message allows the system to stop the current audio playback, clear the audio queue, and prepare to handle new user input.
12
12
  */
13
13
  type: "user_interruption";
14
14
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
@@ -9,7 +9,7 @@ export interface UserMessage {
9
9
  /**
10
10
  * The type of message sent through the socket; for a User Message, this must be `user_message`.
11
11
  *
12
- * This message contains both a transcript of the users input and the expression measurement predictions if the input was sent as an [Audio Input message](/reference/empathic-voice-interface-evi/chat/chat#send.AudioInput.type). Expression measurement predictions are not provided for a [User Input message](/reference/empathic-voice-interface-evi/chat/chat#send.UserInput.type), as the prosody model relies on audio input and cannot process text alone.
12
+ * This message contains both a transcript of the user's input and the expression measurement predictions if the input was sent as an [Audio Input message](/reference/empathic-voice-interface-evi/chat/chat#send.AudioInput.type). Expression measurement predictions are not provided for a [User Input message](/reference/empathic-voice-interface-evi/chat/chat#send.UserInput.type), as the prosody model relies on audio input and cannot process text alone.
13
13
  */
14
14
  type: "user_message";
15
15
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
@@ -1,9 +1,8 @@
1
1
  /**
2
2
  * This file was auto-generated by Fern from our API Definition.
3
3
  */
4
- export type VoiceProvider = "HUME_AI" | "CUSTOM_VOICE" | "OCTAVE_COMBINED";
4
+ export type VoiceProvider = "HUME_AI" | "CUSTOM_VOICE";
5
5
  export declare const VoiceProvider: {
6
6
  readonly HumeAi: "HUME_AI";
7
7
  readonly CustomVoice: "CUSTOM_VOICE";
8
- readonly OctaveCombined: "OCTAVE_COMBINED";
9
8
  };
@@ -7,5 +7,4 @@ exports.VoiceProvider = void 0;
7
7
  exports.VoiceProvider = {
8
8
  HumeAi: "HUME_AI",
9
9
  CustomVoice: "CUSTOM_VOICE",
10
- OctaveCombined: "OCTAVE_COMBINED",
11
10
  };
@@ -38,6 +38,8 @@ export * from "./LanguageModelType";
38
38
  export * from "./ModelProviderEnum";
39
39
  export * from "./ValidationErrorLocItem";
40
40
  export * from "./ValidationError";
41
+ export * from "./VoiceId";
42
+ export * from "./VoiceName";
41
43
  export * from "./WebhookEventChatEnded";
42
44
  export * from "./WebhookEventChatStartType";
43
45
  export * from "./WebhookEventChatStarted";
@@ -102,7 +104,6 @@ export * from "./PostedEventMessageSpec";
102
104
  export * from "./PostedTimeoutSpec";
103
105
  export * from "./ReturnEventMessageSpec";
104
106
  export * from "./ReturnTimeoutSpec";
105
- export * from "./VoiceProvider";
106
- export * from "./VoiceId";
107
- export * from "./VoiceName";
108
107
  export * from "./VoiceRef";
108
+ export * from "./ReturnVoice";
109
+ export * from "./VoiceProvider";
@@ -54,6 +54,8 @@ __exportStar(require("./LanguageModelType"), exports);
54
54
  __exportStar(require("./ModelProviderEnum"), exports);
55
55
  __exportStar(require("./ValidationErrorLocItem"), exports);
56
56
  __exportStar(require("./ValidationError"), exports);
57
+ __exportStar(require("./VoiceId"), exports);
58
+ __exportStar(require("./VoiceName"), exports);
57
59
  __exportStar(require("./WebhookEventChatEnded"), exports);
58
60
  __exportStar(require("./WebhookEventChatStartType"), exports);
59
61
  __exportStar(require("./WebhookEventChatStarted"), exports);
@@ -118,7 +120,6 @@ __exportStar(require("./PostedEventMessageSpec"), exports);
118
120
  __exportStar(require("./PostedTimeoutSpec"), exports);
119
121
  __exportStar(require("./ReturnEventMessageSpec"), exports);
120
122
  __exportStar(require("./ReturnTimeoutSpec"), exports);
121
- __exportStar(require("./VoiceProvider"), exports);
122
- __exportStar(require("./VoiceId"), exports);
123
- __exportStar(require("./VoiceName"), exports);
124
123
  __exportStar(require("./VoiceRef"), exports);
124
+ __exportStar(require("./ReturnVoice"), exports);
125
+ __exportStar(require("./VoiceProvider"), exports);
@@ -44,10 +44,6 @@ export declare class Tts {
44
44
  *
45
45
  * @example
46
46
  * await client.tts.synthesizeJson({
47
- * utterances: [{
48
- * text: "Beauty is no quality in things themselves: It exists merely in the mind which contemplates them.",
49
- * description: "Middle-aged masculine voice with a clear, rhythmic Scots lilt, rounded vowels, and a warm, steady tone with an articulate, academic quality."
50
- * }],
51
47
  * context: {
52
48
  * utterances: [{
53
49
  * text: "How can people see beauty so differently?",
@@ -57,7 +53,11 @@ export declare class Tts {
57
53
  * format: {
58
54
  * type: "mp3"
59
55
  * },
60
- * numGenerations: 1
56
+ * numGenerations: 1,
57
+ * utterances: [{
58
+ * text: "Beauty is no quality in things themselves: It exists merely in the mind which contemplates them.",
59
+ * description: "Middle-aged masculine voice with a clear, rhythmic Scots lilt, rounded vowels, and a warm, steady tone with an articulate, academic quality."
60
+ * }]
61
61
  * })
62
62
  */
63
63
  synthesizeJson(request: Hume.tts.PostedTts, requestOptions?: Tts.RequestOptions): core.HttpResponsePromise<Hume.tts.ReturnTts>;
@@ -77,10 +77,6 @@ class Tts {
77
77
  *
78
78
  * @example
79
79
  * await client.tts.synthesizeJson({
80
- * utterances: [{
81
- * text: "Beauty is no quality in things themselves: It exists merely in the mind which contemplates them.",
82
- * description: "Middle-aged masculine voice with a clear, rhythmic Scots lilt, rounded vowels, and a warm, steady tone with an articulate, academic quality."
83
- * }],
84
80
  * context: {
85
81
  * utterances: [{
86
82
  * text: "How can people see beauty so differently?",
@@ -90,7 +86,11 @@ class Tts {
90
86
  * format: {
91
87
  * type: "mp3"
92
88
  * },
93
- * numGenerations: 1
89
+ * numGenerations: 1,
90
+ * utterances: [{
91
+ * text: "Beauty is no quality in things themselves: It exists merely in the mind which contemplates them.",
92
+ * description: "Middle-aged masculine voice with a clear, rhythmic Scots lilt, rounded vowels, and a warm, steady tone with an articulate, academic quality."
93
+ * }]
94
94
  * })
95
95
  */
96
96
  synthesizeJson(request, requestOptions) {
@@ -1,5 +1,25 @@
1
1
  /**
2
2
  * This file was auto-generated by Fern from our API Definition.
3
3
  */
4
+ import * as Hume from "../../../index";
4
5
  export interface SnippetAudioChunk {
6
+ /** The generation ID of the parent snippet that this chunk corresponds to. */
7
+ generationId: string;
8
+ /** The ID of the parent snippet that this chunk corresponds to. */
9
+ snippetId: string;
10
+ /** The text of the parent snippet that this chunk corresponds to. */
11
+ text: string;
12
+ /** The transcribed text of the generated audio of the parent snippet that this chunk corresponds to. It is only present if `instant_mode` is set to `false`. */
13
+ transcribedText?: string;
14
+ /** The index of the audio chunk in the snippet. */
15
+ chunkIndex: number;
16
+ /** The generated audio output chunk in the requested format. */
17
+ audio: string;
18
+ /** The generated audio output format. */
19
+ audioFormat: Hume.tts.AudioFormatType;
20
+ /** Whether or not this is the last chunk streamed back from the decoder for one input snippet. */
21
+ isLastChunk: boolean;
22
+ /** The index of the utterance in the request that the parent snippet of this chunk corresponds to. */
23
+ utteranceIndex?: number;
24
+ snippet: Hume.tts.Snippet;
5
25
  }
@@ -22,6 +22,8 @@ export declare namespace Chat {
22
22
  resumedChatGroupId?: string;
23
23
  /** A flag to enable verbose transcription. Set this query parameter to `true` to have unfinalized user transcripts be sent to the client as interim UserMessage messages. The [interim](/reference/empathic-voice-interface-evi/chat/chat#receive.User%20Message.interim) field on a [UserMessage](/reference/empathic-voice-interface-evi/chat/chat#receive.User%20Message.type) denotes whether the message is "interim" or "final." */
24
24
  verboseTranscription?: boolean;
25
+ /** ID of the Voice to use for this chat. If specified, will override the voice set in the Config */
26
+ voiceId?: string;
25
27
  /** Extra query parameters sent at WebSocket connection */
26
28
  queryParams?: Record<string, string | string[] | object | object[]>;
27
29
  }
@@ -14,7 +14,7 @@ export interface PostedPrompt {
14
14
  /** An optional description of the Prompt version. */
15
15
  versionDescription?: string;
16
16
  /**
17
- * Instructions used to shape EVIs behavior, responses, and style.
17
+ * Instructions used to shape EVI's behavior, responses, and style.
18
18
  *
19
19
  * You can use the Prompt to define a specific goal or role for EVI, specifying how it should act or what it should focus on during the conversation. For example, EVI can be instructed to act as a customer support representative, a fitness coach, or a travel advisor, each with its own set of behaviors and response styles.
20
20
  *
@@ -12,7 +12,7 @@ export interface PostedPromptVersion {
12
12
  /** An optional description of the Prompt version. */
13
13
  versionDescription?: string;
14
14
  /**
15
- * Instructions used to shape EVIs behavior, responses, and style for this version of the Prompt.
15
+ * Instructions used to shape EVI's behavior, responses, and style for this version of the Prompt.
16
16
  *
17
17
  * You can use the Prompt to define a specific goal or role for EVI, specifying how it should act or what it should focus on during the conversation. For example, EVI can be instructed to act as a customer support representative, a fitness coach, or a travel advisor, each with its own set of behaviors and response styles.
18
18
  *
@@ -21,7 +21,7 @@ export interface PostedUserDefinedTool {
21
21
  /**
22
22
  * Stringified JSON defining the parameters used by this version of the Tool.
23
23
  *
24
- * These parameters define the inputs needed for the Tools execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the Tool receives data in the expected format.
24
+ * These parameters define the inputs needed for the Tool's execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the Tool receives data in the expected format.
25
25
  */
26
26
  parameters: string;
27
27
  /** Optional text passed to the supplemental LLM in place of the tool call result. The LLM then uses this text to generate a response back to the user, ensuring continuity in the conversation if the Tool errors. */
@@ -18,7 +18,7 @@ export interface PostedUserDefinedToolVersion {
18
18
  /**
19
19
  * Stringified JSON defining the parameters used by this version of the Tool.
20
20
  *
21
- * These parameters define the inputs needed for the Tools execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the Tool receives data in the expected format.
21
+ * These parameters define the inputs needed for the Tool's execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the Tool receives data in the expected format.
22
22
  */
23
23
  parameters: string;
24
24
  /** Optional text passed to the supplemental LLM in place of the tool call result. The LLM then uses this text to generate a response back to the user, ensuring continuity in the conversation if the Tool errors. */
@@ -8,7 +8,7 @@ export interface AssistantEnd {
8
8
  /**
9
9
  * The type of message sent through the socket; for an Assistant End message, this must be `assistant_end`.
10
10
  *
11
- * This message indicates the conclusion of the assistants response, signaling that the assistant has finished speaking for the current conversational turn.
11
+ * This message indicates the conclusion of the assistant's response, signaling that the assistant has finished speaking for the current conversational turn.
12
12
  */
13
13
  type: "assistant_end";
14
14
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
@@ -12,7 +12,7 @@ export interface AssistantInput {
12
12
  /**
13
13
  * Assistant text to synthesize into spoken audio and insert into the conversation.
14
14
  *
15
- * EVI uses this text to generate spoken audio using our proprietary expressive text-to-speech model. Our model adds appropriate emotional inflections and tones to the text based on the users expressions and the context of the conversation. The synthesized audio is streamed back to the user as an [Assistant Message](/reference/empathic-voice-interface-evi/chat/chat#receive.AssistantMessage.type).
15
+ * EVI uses this text to generate spoken audio using our proprietary expressive text-to-speech model. Our model adds appropriate emotional inflections and tones to the text based on the user's expressions and the context of the conversation. The synthesized audio is streamed back to the user as an [Assistant Message](/reference/empathic-voice-interface-evi/chat/chat#receive.AssistantMessage.type).
16
16
  */
17
17
  text: string;
18
18
  }
@@ -9,7 +9,7 @@ export interface AssistantMessage {
9
9
  /**
10
10
  * The type of message sent through the socket; for an Assistant Message, this must be `assistant_message`.
11
11
  *
12
- * This message contains both a transcript of the assistants response and the expression measurement predictions of the assistants audio output.
12
+ * This message contains both a transcript of the assistant's response and the expression measurement predictions of the assistant's audio output.
13
13
  */
14
14
  type: "assistant_message";
15
15
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
@@ -16,7 +16,7 @@ export interface AudioInput {
16
16
  /**
17
17
  * Base64 encoded audio input to insert into the conversation.
18
18
  *
19
- * The content of an Audio Input message is treated as the users speech to EVI and must be streamed continuously. Pre-recorded audio files are not supported.
19
+ * The content of an Audio Input message is treated as the user's speech to EVI and must be streamed continuously. Pre-recorded audio files are not supported.
20
20
  *
21
21
  * For optimal transcription quality, the audio data should be transmitted in small chunks.
22
22
  *
@@ -1,8 +1,8 @@
1
1
  /**
2
2
  * This file was auto-generated by Fern from our API Definition.
3
3
  */
4
- export type ContextType = "temporary" | "persistent";
4
+ export type ContextType = "persistent" | "temporary";
5
5
  export declare const ContextType: {
6
- readonly Temporary: "temporary";
7
6
  readonly Persistent: "persistent";
7
+ readonly Temporary: "temporary";
8
8
  };
@@ -5,6 +5,6 @@
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.ContextType = void 0;
7
7
  exports.ContextType = {
8
- Temporary: "temporary",
9
8
  Persistent: "persistent",
9
+ Temporary: "temporary",
10
10
  };
@@ -2,4 +2,4 @@
2
2
  * This file was auto-generated by Fern from our API Definition.
3
3
  */
4
4
  import * as Hume from "../../../index";
5
- export type JsonMessage = Hume.empathicVoice.AssistantEnd | Hume.empathicVoice.AssistantMessage | Hume.empathicVoice.ChatMetadata | Hume.empathicVoice.WebSocketError | Hume.empathicVoice.UserInterruption | Hume.empathicVoice.UserMessage | Hume.empathicVoice.ToolCallMessage | Hume.empathicVoice.ToolResponseMessage | Hume.empathicVoice.ToolErrorMessage | Hume.empathicVoice.AssistantProsody;
5
+ export type JsonMessage = Hume.empathicVoice.AssistantEnd | Hume.empathicVoice.AssistantMessage | Hume.empathicVoice.AssistantProsody | Hume.empathicVoice.ChatMetadata | Hume.empathicVoice.WebSocketError | Hume.empathicVoice.UserInterruption | Hume.empathicVoice.UserMessage | Hume.empathicVoice.ToolCallMessage | Hume.empathicVoice.ToolResponseMessage | Hume.empathicVoice.ToolErrorMessage;
@@ -8,7 +8,7 @@ export interface PauseAssistantMessage {
8
8
  /**
9
9
  * The type of message sent through the socket; must be `pause_assistant_message` for our server to correctly identify and process it as a Pause Assistant message.
10
10
  *
11
- * Once this message is sent, EVI will not respond until a [Resume Assistant message](/reference/empathic-voice-interface-evi/chat/chat#send.ResumeAssistantMessage.type) is sent. When paused, EVI wont respond, but transcriptions of your audio inputs will still be recorded.
11
+ * Once this message is sent, EVI will not respond until a [Resume Assistant message](/reference/empathic-voice-interface-evi/chat/chat#send.ResumeAssistantMessage.type) is sent. When paused, EVI won't respond, but transcriptions of your audio inputs will still be recorded.
12
12
  */
13
13
  type: "pause_assistant_message";
14
14
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
@@ -13,7 +13,7 @@ export interface PostedLanguageModel {
13
13
  /**
14
14
  * The model temperature, with values between 0 to 1 (inclusive).
15
15
  *
16
- * Controls the randomness of the LLMs output, with values closer to 0 yielding focused, deterministic responses and values closer to 1 producing more creative, diverse responses.
16
+ * Controls the randomness of the LLM's output, with values closer to 0 yielding focused, deterministic responses and values closer to 1 producing more creative, diverse responses.
17
17
  */
18
18
  temperature?: number;
19
19
  }
@@ -7,12 +7,12 @@
7
7
  * Accepts a minimum value of 30 seconds and a maximum value of 1,800 seconds.
8
8
  */
9
9
  export interface PostedTimeoutSpecsInactivity {
10
+ /** Duration in seconds for the timeout (e.g. 600 seconds represents 10 minutes). */
11
+ durationSecs?: number;
10
12
  /**
11
13
  * Boolean indicating if this timeout is enabled.
12
14
  *
13
15
  * If set to false, EVI will not timeout due to a specified duration of user inactivity being reached. However, the conversation will eventually disconnect after 1,800 seconds (30 minutes), which is the maximum WebSocket duration limit for EVI.
14
16
  */
15
17
  enabled: boolean;
16
- /** Duration in seconds for the timeout (e.g. 600 seconds represents 10 minutes). */
17
- durationSecs?: number;
18
18
  }
@@ -7,12 +7,12 @@
7
7
  * Accepts a minimum value of 30 seconds and a maximum value of 1,800 seconds.
8
8
  */
9
9
  export interface PostedTimeoutSpecsMaxDuration {
10
+ /** Duration in seconds for the timeout (e.g. 600 seconds represents 10 minutes). */
11
+ durationSecs?: number;
10
12
  /**
11
13
  * Boolean indicating if this timeout is enabled.
12
14
  *
13
15
  * If set to false, EVI will not timeout due to a specified maximum duration being reached. However, the conversation will eventually disconnect after 1,800 seconds (30 minutes), which is the maximum WebSocket duration limit for EVI.
14
16
  */
15
17
  enabled: boolean;
16
- /** Duration in seconds for the timeout (e.g. 600 seconds represents 10 minutes). */
17
- durationSecs?: number;
18
18
  }
@@ -45,7 +45,8 @@ export interface ReturnConfig {
45
45
  * Hume's eLLM (empathic Large Language Model) is a multimodal language model that takes into account both expression measures and language. The eLLM generates short, empathic language responses and guides text-to-speech (TTS) prosody.
46
46
  */
47
47
  ellmModel?: Hume.empathicVoice.ReturnEllmModel;
48
- voice?: unknown;
48
+ /** A voice specification associated with this Config. */
49
+ voice?: Hume.empathicVoice.ReturnVoice;
49
50
  prompt?: Hume.empathicVoice.ReturnPrompt;
50
51
  /** Map of webhooks associated with this config. */
51
52
  webhooks?: (Hume.empathicVoice.ReturnWebhookSpec | undefined)[];
@@ -13,7 +13,7 @@ export interface ReturnLanguageModel {
13
13
  /**
14
14
  * The model temperature, with values between 0 to 1 (inclusive).
15
15
  *
16
- * Controls the randomness of the LLMs output, with values closer to 0 yielding focused, deterministic responses and values closer to 1 producing more creative, diverse responses.
16
+ * Controls the randomness of the LLM's output, with values closer to 0 yielding focused, deterministic responses and values closer to 1 producing more creative, diverse responses.
17
17
  */
18
18
  temperature?: number;
19
19
  }
@@ -11,11 +11,9 @@ export interface ReturnPrompt {
11
11
  /** Identifier for a Prompt. Formatted as a UUID. */
12
12
  id: string;
13
13
  /**
14
- * Instructions used to shape EVIs behavior, responses, and style.
14
+ * Instructions used to shape EVI's behavior, responses, and style.
15
15
  *
16
- * You can use the Prompt to define a specific goal or role for EVI, specifying how it should act or what it should focus on during the conversation. For example, EVI can be instructed to act as a customer support representative, a fitness coach, or a travel advisor, each with its own set of behaviors and response styles.
17
- *
18
- * For help writing a system prompt, see our [Prompting Guide](/docs/speech-to-speech-evi/guides/prompting).
16
+ * You can use the Prompt to define a specific goal or role for EVI, specifying how it should act or what it should focus on during the conversation. For example, EVI can be instructed to act as a customer support representative, a fitness coach, or a travel advisor, each with its own set of behaviors and response styles. For help writing a system prompt, see our [Prompting Guide](/docs/speech-to-speech-evi/guides/prompting).
19
17
  */
20
18
  text: string;
21
19
  /**
@@ -35,7 +35,7 @@ export interface ReturnUserDefinedTool {
35
35
  /**
36
36
  * Stringified JSON defining the parameters used by this version of the Tool.
37
37
  *
38
- * These parameters define the inputs needed for the Tools execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the tool receives data in the expected format.
38
+ * These parameters define the inputs needed for the Tool's execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the tool receives data in the expected format.
39
39
  */
40
40
  parameters: string;
41
41
  }
@@ -0,0 +1,12 @@
1
+ /**
2
+ * This file was auto-generated by Fern from our API Definition.
3
+ */
4
+ import * as Hume from "../../../index";
5
+ /**
6
+ * An Octave voice available for text-to-speech
7
+ */
8
+ export interface ReturnVoice {
9
+ id?: string;
10
+ name?: string;
11
+ provider?: Hume.empathicVoice.VoiceProvider;
12
+ }
@@ -0,0 +1,5 @@
1
+ "use strict";
2
+ /**
3
+ * This file was auto-generated by Fern from our API Definition.
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -23,7 +23,7 @@ export interface SessionSettings {
23
23
  */
24
24
  customSessionId?: string;
25
25
  /**
26
- * Instructions used to shape EVIs behavior, responses, and style for the session.
26
+ * Instructions used to shape EVI's behavior, responses, and style for the session.
27
27
  *
28
28
  * When included in a Session Settings message, the provided Prompt overrides the existing one specified in the EVI configuration. If no Prompt was defined in the configuration, this Prompt will be the one used for the session.
29
29
  *
@@ -49,7 +49,7 @@ export interface SessionSettings {
49
49
  /**
50
50
  * Third party API key for the supplemental language model.
51
51
  *
52
- * When provided, EVI will use this key instead of Humes API key for the supplemental LLM. This allows you to bypass rate limits and utilize your own API key as needed.
52
+ * When provided, EVI will use this key instead of Hume's API key for the supplemental LLM. This allows you to bypass rate limits and utilize your own API key as needed.
53
53
  */
54
54
  languageModelApiKey?: string;
55
55
  /**
@@ -10,7 +10,7 @@ export interface Tool {
10
10
  /**
11
11
  * Parameters of the tool. Is a stringified JSON schema.
12
12
  *
13
- * These parameters define the inputs needed for the tools execution, including the expected data type and description for each input field. Structured as a JSON schema, this format ensures the tool receives data in the expected format.
13
+ * These parameters define the inputs needed for the tool's execution, including the expected data type and description for each input field. Structured as a JSON schema, this format ensures the tool receives data in the expected format.
14
14
  */
15
15
  parameters: string;
16
16
  /** An optional description of what the tool does, used by the supplemental LLM to choose when and how to call the function. */
@@ -11,7 +11,7 @@ export interface ToolCallMessage {
11
11
  /**
12
12
  * Parameters of the tool.
13
13
  *
14
- * These parameters define the inputs needed for the tools execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the tool receives data in the expected format.
14
+ * These parameters define the inputs needed for the tool's execution, including the expected data type and description for each input field. Structured as a stringified JSON schema, this format ensures the tool receives data in the expected format.
15
15
  */
16
16
  parameters: string;
17
17
  /**
@@ -12,7 +12,7 @@ export interface UserInput {
12
12
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
13
13
  customSessionId?: string;
14
14
  /**
15
- * User text to insert into the conversation. Text sent through a User Input message is treated as the users speech to EVI. EVI processes this input and provides a corresponding response.
15
+ * User text to insert into the conversation. Text sent through a User Input message is treated as the user's speech to EVI. EVI processes this input and provides a corresponding response.
16
16
  *
17
17
  * Expression measurement results are not available for User Input messages, as the prosody model relies on audio input and cannot process text alone.
18
18
  */
@@ -8,7 +8,7 @@ export interface UserInterruption {
8
8
  /**
9
9
  * The type of message sent through the socket; for a User Interruption message, this must be `user_interruption`.
10
10
  *
11
- * This message indicates the user has interrupted the assistants response. EVI detects the interruption in real-time and sends this message to signal the interruption event. This message allows the system to stop the current audio playback, clear the audio queue, and prepare to handle new user input.
11
+ * This message indicates the user has interrupted the assistant's response. EVI detects the interruption in real-time and sends this message to signal the interruption event. This message allows the system to stop the current audio playback, clear the audio queue, and prepare to handle new user input.
12
12
  */
13
13
  type: "user_interruption";
14
14
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
@@ -9,7 +9,7 @@ export interface UserMessage {
9
9
  /**
10
10
  * The type of message sent through the socket; for a User Message, this must be `user_message`.
11
11
  *
12
- * This message contains both a transcript of the users input and the expression measurement predictions if the input was sent as an [Audio Input message](/reference/empathic-voice-interface-evi/chat/chat#send.AudioInput.type). Expression measurement predictions are not provided for a [User Input message](/reference/empathic-voice-interface-evi/chat/chat#send.UserInput.type), as the prosody model relies on audio input and cannot process text alone.
12
+ * This message contains both a transcript of the user's input and the expression measurement predictions if the input was sent as an [Audio Input message](/reference/empathic-voice-interface-evi/chat/chat#send.AudioInput.type). Expression measurement predictions are not provided for a [User Input message](/reference/empathic-voice-interface-evi/chat/chat#send.UserInput.type), as the prosody model relies on audio input and cannot process text alone.
13
13
  */
14
14
  type: "user_message";
15
15
  /** Used to manage conversational state, correlate frontend and backend data, and persist conversations across EVI sessions. */
@@ -1,9 +1,8 @@
1
1
  /**
2
2
  * This file was auto-generated by Fern from our API Definition.
3
3
  */
4
- export type VoiceProvider = "HUME_AI" | "CUSTOM_VOICE" | "OCTAVE_COMBINED";
4
+ export type VoiceProvider = "HUME_AI" | "CUSTOM_VOICE";
5
5
  export declare const VoiceProvider: {
6
6
  readonly HumeAi: "HUME_AI";
7
7
  readonly CustomVoice: "CUSTOM_VOICE";
8
- readonly OctaveCombined: "OCTAVE_COMBINED";
9
8
  };
@@ -7,5 +7,4 @@ exports.VoiceProvider = void 0;
7
7
  exports.VoiceProvider = {
8
8
  HumeAi: "HUME_AI",
9
9
  CustomVoice: "CUSTOM_VOICE",
10
- OctaveCombined: "OCTAVE_COMBINED",
11
10
  };
@@ -38,6 +38,8 @@ export * from "./LanguageModelType";
38
38
  export * from "./ModelProviderEnum";
39
39
  export * from "./ValidationErrorLocItem";
40
40
  export * from "./ValidationError";
41
+ export * from "./VoiceId";
42
+ export * from "./VoiceName";
41
43
  export * from "./WebhookEventChatEnded";
42
44
  export * from "./WebhookEventChatStartType";
43
45
  export * from "./WebhookEventChatStarted";
@@ -102,7 +104,6 @@ export * from "./PostedEventMessageSpec";
102
104
  export * from "./PostedTimeoutSpec";
103
105
  export * from "./ReturnEventMessageSpec";
104
106
  export * from "./ReturnTimeoutSpec";
105
- export * from "./VoiceProvider";
106
- export * from "./VoiceId";
107
- export * from "./VoiceName";
108
107
  export * from "./VoiceRef";
108
+ export * from "./ReturnVoice";
109
+ export * from "./VoiceProvider";
@@ -54,6 +54,8 @@ __exportStar(require("./LanguageModelType"), exports);
54
54
  __exportStar(require("./ModelProviderEnum"), exports);
55
55
  __exportStar(require("./ValidationErrorLocItem"), exports);
56
56
  __exportStar(require("./ValidationError"), exports);
57
+ __exportStar(require("./VoiceId"), exports);
58
+ __exportStar(require("./VoiceName"), exports);
57
59
  __exportStar(require("./WebhookEventChatEnded"), exports);
58
60
  __exportStar(require("./WebhookEventChatStartType"), exports);
59
61
  __exportStar(require("./WebhookEventChatStarted"), exports);
@@ -118,7 +120,6 @@ __exportStar(require("./PostedEventMessageSpec"), exports);
118
120
  __exportStar(require("./PostedTimeoutSpec"), exports);
119
121
  __exportStar(require("./ReturnEventMessageSpec"), exports);
120
122
  __exportStar(require("./ReturnTimeoutSpec"), exports);
121
- __exportStar(require("./VoiceProvider"), exports);
122
- __exportStar(require("./VoiceId"), exports);
123
- __exportStar(require("./VoiceName"), exports);
124
123
  __exportStar(require("./VoiceRef"), exports);
124
+ __exportStar(require("./ReturnVoice"), exports);
125
+ __exportStar(require("./VoiceProvider"), exports);
@@ -44,10 +44,6 @@ export declare class Tts {
44
44
  *
45
45
  * @example
46
46
  * await client.tts.synthesizeJson({
47
- * utterances: [{
48
- * text: "Beauty is no quality in things themselves: It exists merely in the mind which contemplates them.",
49
- * description: "Middle-aged masculine voice with a clear, rhythmic Scots lilt, rounded vowels, and a warm, steady tone with an articulate, academic quality."
50
- * }],
51
47
  * context: {
52
48
  * utterances: [{
53
49
  * text: "How can people see beauty so differently?",
@@ -57,7 +53,11 @@ export declare class Tts {
57
53
  * format: {
58
54
  * type: "mp3"
59
55
  * },
60
- * numGenerations: 1
56
+ * numGenerations: 1,
57
+ * utterances: [{
58
+ * text: "Beauty is no quality in things themselves: It exists merely in the mind which contemplates them.",
59
+ * description: "Middle-aged masculine voice with a clear, rhythmic Scots lilt, rounded vowels, and a warm, steady tone with an articulate, academic quality."
60
+ * }]
61
61
  * })
62
62
  */
63
63
  synthesizeJson(request: Hume.tts.PostedTts, requestOptions?: Tts.RequestOptions): core.HttpResponsePromise<Hume.tts.ReturnTts>;
@@ -77,10 +77,6 @@ class Tts {
77
77
  *
78
78
  * @example
79
79
  * await client.tts.synthesizeJson({
80
- * utterances: [{
81
- * text: "Beauty is no quality in things themselves: It exists merely in the mind which contemplates them.",
82
- * description: "Middle-aged masculine voice with a clear, rhythmic Scots lilt, rounded vowels, and a warm, steady tone with an articulate, academic quality."
83
- * }],
84
80
  * context: {
85
81
  * utterances: [{
86
82
  * text: "How can people see beauty so differently?",
@@ -90,7 +86,11 @@ class Tts {
90
86
  * format: {
91
87
  * type: "mp3"
92
88
  * },
93
- * numGenerations: 1
89
+ * numGenerations: 1,
90
+ * utterances: [{
91
+ * text: "Beauty is no quality in things themselves: It exists merely in the mind which contemplates them.",
92
+ * description: "Middle-aged masculine voice with a clear, rhythmic Scots lilt, rounded vowels, and a warm, steady tone with an articulate, academic quality."
93
+ * }]
94
94
  * })
95
95
  */
96
96
  synthesizeJson(request, requestOptions) {