npm - hume - Versions diffs - 0.13.8 → 0.14.1 - Mend

hume 0.13.8 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (121) hide show

package/.mock/definition/empathic-voice/__package__.yml CHANGED Viewed

@@ -1027,6 +1027,8 @@ types:
         name: Claude3Haiku20240307
       - value: claude-sonnet-4-20250514
         name: ClaudeSonnet420250514
+      - value: claude-sonnet-4-5-20250929
+        name: ClaudeSonnet4520250929
       - value: us.anthropic.claude-3-5-haiku-20241022-v1:0
         name: UsAnthropicClaude35Haiku20241022V10
       - value: us.anthropic.claude-3-5-sonnet-20240620-v1:0
@@ -1119,6 +1121,8 @@ types:
         name: Llama4Maverick17B128EInstruct
       - value: Qwen3-32B
         name: Qwen332B
+      - value: grok-4-fast-non-reasoning-latest
+        name: Grok4FastNonReasoningLatest
       - ellm
       - value: custom-language-model
         name: CustomLanguageModel
@@ -1470,9 +1474,6 @@ types:
           Version numbers are integer values representing different iterations
           of the Prompt. Each update to the Prompt increments its version
           number.
-      version_description:
-        type: optional<string>
-        docs: An optional description of the Prompt version.
       version_type:
         type: ReturnPromptVersionType
         docs: >-
@@ -1488,6 +1489,9 @@ types:
         docs: >-
           Time at which the Prompt was last modified. Measured in seconds since
           the Unix epoch.
+      version_description:
+        type: optional<string>
+        docs: An optional description of the Prompt version.
     source:
       openapi: evi-openapi.json
   ReturnPagedConfigs:
@@ -1543,12 +1547,6 @@ types:
           Version numbers are integer values representing different iterations
           of the Config. Each update to the Config increments its version
           number.
-      tools:
-        type: optional<list<optional<ReturnUserDefinedTool>>>
-        docs: List of user-defined tools associated with this Config.
-      version_description:
-        type: optional<string>
-        docs: An optional description of the Config version.
       language_model:
         type: optional<ReturnLanguageModel>
         docs: >-
@@ -1603,6 +1601,12 @@ types:
         docs: >-
           Time at which the Config was last modified. Measured in seconds since
           the Unix epoch.
+      version_description:
+        type: optional<string>
+        docs: An optional description of the Config version.
+      tools:
+        type: optional<list<optional<ReturnUserDefinedTool>>>
+        docs: List of user-defined tools associated with this Config.
     source:
       openapi: evi-openapi.json
   ReturnPagedChatsPaginationDirection:

package/.mock/definition/empathic-voice/chat.yml CHANGED Viewed

@@ -98,6 +98,111 @@ channel:
         Use the GET `/v0/evi/chat_groups` endpoint to obtain the Chat Group IDs
         of all Chat Groups associated with an API key. This endpoint returns a
         list of all available chat groups.
+    session_settings[audio][channels]:
+      type: optional<integer>
+      docs: Sets number of audio channels for audio input.
+    session_settings[audio][encoding]:
+      type: optional<string>
+      docs: Sets encoding format of the audio input, such as `linear16`.
+    session_settings[audio][sample_rate]:
+      type: optional<integer>
+      docs: >-
+        Sets the sample rate for audio input. (Number of samples per second in
+        the audio input, measured in Hertz.)
+    session_settings[context][text]:
+      type: optional<string>
+      docs: >-
+        The context to be injected into the conversation. Helps inform the LLM's
+        response by providing relevant information about the ongoing
+        conversation.
+        This text will be appended to the end of
+        [user_messages](/reference/speech-to-speech-evi/chat#receive.UserMessage.message.content)
+        based on the chosen persistence level. For example, if you want to
+        remind EVI of its role as a helpful weather assistant, the context you
+        insert will be appended to the end of user messages as `{Context: You
+        are a helpful weather assistant}`.
+    session_settings[context][type]:
+      type: optional<string>
+      docs: >-
+        The persistence level of the injected context. Specifies how long the
+        injected context will remain active in the session.
+        - **Temporary**: Context that is only applied to the following assistant
+        response.
+        - **Persistent**: Context that is applied to all subsequent assistant
+        responses for the remainder of the Chat.
+    session_settings[custom_session_id]:
+      type: optional<string>
+      docs: >-
+        Used to manage conversational state, correlate frontend and backend
+        data, and persist conversations across EVI sessions.
+    session_settings[event_limit]:
+      type: optional<integer>
+      docs: >-
+        The maximum number of chat events to return from chat history. By
+        default, the system returns up to 300 events (100 events per page × 3
+        pages). Set this parameter to a smaller value to limit the number of
+        events returned.
+    session_settings[language_model_api_key]:
+      type: optional<string>
+      docs: >-
+        Third party API key for the supplemental language model.
+        When provided, EVI will use this key instead of Hume's API key for the
+        supplemental LLM. This allows you to bypass rate limits and utilize your
+        own API key as needed.
+    session_settings[system_prompt]:
+      type: optional<string>
+      docs: >-
+        Instructions used to shape EVI's behavior, responses, and style for the
+        session.
+        When included in a Session Settings message, the provided Prompt
+        overrides the existing one specified in the EVI configuration. If no
+        Prompt was defined in the configuration, this Prompt will be the one
+        used for the session.
+        You can use the Prompt to define a specific goal or role for EVI,
+        specifying how it should act or what it should focus on during the
+        conversation. For example, EVI can be instructed to act as a customer
+        support representative, a fitness coach, or a travel advisor, each with
+        its own set of behaviors and response styles.
+        For help writing a system prompt, see our [Prompting
+        Guide](/docs/speech-to-speech-evi/guides/prompting).
+    session_settings[variables]:
+      type: optional<string>
+      docs: >-
+        This field allows you to assign values to dynamic variables referenced
+        in your system prompt.
+        Each key represents the variable name, and the corresponding value is
+        the specific content you wish to assign to that variable within the
+        session. While the values for variables can be strings, numbers, or
+        booleans, the value will ultimately be converted to a string when
+        injected into your system prompt.
+        Using this field, you can personalize responses based on
+        session-specific details. For more guidance, see our [guide on using
+        dynamic
+        variables](/docs/speech-to-speech-evi/features/dynamic-variables).
+    session_settings[voice_id]:
+      type: optional<string>
+      docs: >-
+        The name or ID of the voice from the `Voice Library` to be used as the
+        speaker for this EVI session. This will override the speaker set in the
+        selected configuration.
     verbose_transcription:
       type: optional<boolean>
       default: false

package/.mock/definition/empathic-voice/configs.yml CHANGED Viewed

@@ -140,7 +140,9 @@ service:
           properties:
             evi_version:
               type: string
-              docs: EVI version to use. Only version `3` is supported.
+              docs: >-
+                EVI version to use. Only versions `3` and `4-mini` are
+                supported.
             name:
               type: string
               docs: Name applied to all versions of a particular Config.

package/.mock/definition/tts/__package__.yml CHANGED Viewed

@@ -30,7 +30,7 @@ service:
         format.
       source:
         openapi: tts-openapi.json
-      display-name: Text-to-speech (Json)
+      display-name: Text-to-Speech (Json)
       request:
         body:
           type: PostedTts
@@ -79,6 +79,7 @@ service:
                           Beauty is no quality in things themselves: It exists
                           merely in the mind which contemplates them.
                         utterance_index: 0
+                        timestamps: []
               request_id: 66e01f90-4501-4aa0-bbaf-74f45dc15aa725906
     synthesize-file:
       path: /v0/tts/file
@@ -94,7 +95,7 @@ service:
         The response contains the generated audio file in the requested format.
       source:
         openapi: tts-openapi.json
-      display-name: Text-to-speech (File)
+      display-name: Text-to-Speech (File)
       request:
         body:
           type: PostedTts
@@ -131,7 +132,7 @@ service:
         prosody.
       source:
         openapi: tts-openapi.json
-      display-name: Text-to-speech (Streamed File)
+      display-name: Text-to-Speech (Streamed File)
       request:
         body:
           type: PostedTts
@@ -166,14 +167,14 @@ service:
         base64.
       source:
         openapi: tts-openapi.json
-      display-name: Text-to-speech (Streamed JSON)
+      display-name: Text-to-Speech (Streamed JSON)
       request:
         body:
           type: PostedTts
         content-type: application/json
       response-stream:
         docs: Successful Response
-        type: SnippetAudioChunk
+        type: TtsOutput
         format: json
       errors:
         - UnprocessableEntityError
@@ -189,15 +190,30 @@ service:
   source:
     openapi: tts-openapi.json
 types:
+  TtsOutput:
+    discriminant: type
+    base-properties: {}
+    union:
+      timestamp:
+        type: TimestampMessage
+      audio:
+        type: SnippetAudioChunk
+    source:
+      openapi: tts-openapi.json
+  AudioFormatType:
+    enum:
+      - mp3
+      - pcm
+      - wav
+    source:
+      openapi: tts-openapi.json
   PublishTts:
     docs: Input message type for the TTS stream.
     properties:
-      text:
-        type: optional<string>
-        docs: The input text to be converted to speech output.
-        default: ''
-        validation:
-          maxLength: 5000
+      close:
+        type: optional<boolean>
+        docs: Force the generation of audio and close the stream.
+        default: false
       description:
         type: optional<string>
         docs: >-
@@ -206,12 +222,12 @@ types:
           accent"`).
         validation:
           maxLength: 1000
-      voice:
-        type: optional<PostedUtteranceVoice>
+      flush:
+        type: optional<boolean>
         docs: >-
-          The name or ID of the voice from the `Voice Library` to be used as the
-          speaker for this and all subsequent utterances, until the `"voice"`
-          field is updated again.
+          Force the generation of audio regardless of how much text has been
+          supplied.
+        default: false
       speed:
         type: optional<double>
         docs: A relative measure of how fast this utterance should be spoken.
@@ -219,6 +235,12 @@ types:
         validation:
           min: 0.25
           max: 3
+      text:
+        type: optional<string>
+        docs: The input text to be converted to speech output.
+        default: ''
+        validation:
+          maxLength: 5000
       trailing_silence:
         type: optional<double>
         docs: Duration of trailing silence (in seconds) to add to this utterance
@@ -226,18 +248,104 @@ types:
         validation:
           min: 0
           max: 5
-      flush:
-        type: optional<boolean>
+      voice:
+        type: optional<PostedUtteranceVoice>
         docs: >-
-          Force the generation of audio regardless of how much text has been
-          supplied.
-        default: false
-      close:
-        type: optional<boolean>
-        docs: Force the generation of audio and close the stream.
-        default: false
+          The name or ID of the voice from the `Voice Library` to be used as the
+          speaker for this and all subsequent utterances, until the `"voice"`
+          field is updated again.
     source:
       openapi: tts-asyncapi.json
+  MillisecondInterval:
+    properties:
+      begin:
+        type: integer
+        docs: Start time of the interval in milliseconds.
+      end:
+        type: integer
+        docs: End time of the interval in milliseconds.
+    source:
+      openapi: tts-openapi.json
+  TimestampMessage:
+    docs: A word or phoneme level timestamp for the generated audio.
+    properties:
+      generation_id:
+        type: string
+        docs: >-
+          The generation ID of the parent snippet that this chunk corresponds
+          to.
+      request_id:
+        type: string
+        docs: ID of the initiating request.
+      snippet_id:
+        type: string
+        docs: The ID of the parent snippet that this chunk corresponds to.
+      timestamp:
+        type: Timestamp
+        docs: A word or phoneme level timestamp for the generated audio.
+    source:
+      openapi: tts-openapi.json
+  SnippetAudioChunk:
+    docs: Metadata for a chunk of generated audio.
+    properties:
+      audio:
+        type: string
+        docs: The generated audio output chunk in the requested format.
+      audio_format:
+        type: AudioFormatType
+        docs: The generated audio output format.
+      chunk_index:
+        type: integer
+        docs: The index of the audio chunk in the snippet.
+      generation_id:
+        type: string
+        docs: >-
+          The generation ID of the parent snippet that this chunk corresponds
+          to.
+      is_last_chunk:
+        type: boolean
+        docs: >-
+          Whether or not this is the last chunk streamed back from the decoder
+          for one input snippet.
+      request_id:
+        type: string
+        docs: ID of the initiating request.
+      snippet:
+        type: optional<Snippet>
+      snippet_id:
+        type: string
+        docs: The ID of the parent snippet that this chunk corresponds to.
+      text:
+        type: string
+        docs: The text of the parent snippet that this chunk corresponds to.
+      transcribed_text:
+        type: optional<string>
+        docs: >-
+          The transcribed text of the generated audio of the parent snippet that
+          this chunk corresponds to. It is only present if `instant_mode` is set
+          to `false`.
+      utterance_index:
+        type: optional<integer>
+        docs: >-
+          The index of the utterance in the request that the parent snippet of
+          this chunk corresponds to.
+    source:
+      openapi: tts-openapi.json
+  Timestamp:
+    properties:
+      text: string
+      time:
+        type: MillisecondInterval
+      type:
+        type: TimestampType
+    source:
+      openapi: tts-openapi.json
+  TimestampType:
+    enum:
+      - word
+      - phoneme
+    source:
+      openapi: tts-openapi.json
   PostedUtteranceVoiceWithId:
     properties:
       id:
@@ -309,59 +417,12 @@ types:
       - type: PostedUtteranceVoiceWithName
     source:
       openapi: tts-openapi.json
-  AudioFormatType:
+  OctaveVersion:
     enum:
-      - mp3
-      - pcm
-      - wav
-    source:
-      openapi: tts-openapi.json
-  SnippetAudioChunk:
-    docs: Metadata for a chunk of generated audio.
-    properties:
-      audio:
-        type: string
-        docs: The generated audio output chunk in the requested format.
-      audio_format:
-        type: AudioFormatType
-        docs: The generated audio output format.
-      chunk_index:
-        type: integer
-        docs: The index of the audio chunk in the snippet.
-      generation_id:
-        type: string
-        docs: >-
-          The generation ID of the parent snippet that this chunk corresponds
-          to.
-      is_last_chunk:
-        type: boolean
-        docs: >-
-          Whether or not this is the last chunk streamed back from the decoder
-          for one input snippet.
-      request_id:
-        type: string
-        docs: ID of the initiating request.
-      snippet:
-        type: optional<Snippet>
-      snippet_id:
-        type: string
-        docs: The ID of the parent snippet that this chunk corresponds to.
-      text:
-        type: string
-        docs: The text of the parent snippet that this chunk corresponds to.
-      transcribed_text:
-        type: optional<string>
-        docs: >-
-          The transcribed text of the generated audio of the parent snippet that
-          this chunk corresponds to. It is only present if `instant_mode` is set
-          to `false`.
-      type:
-        type: optional<literal<"audio">>
-      utterance_index:
-        type: optional<integer>
-        docs: >-
-          The index of the utterance in the request that the parent snippet of
-          this chunk corresponds to.
+      - value: '1'
+        name: One
+      - value: '2'
+        name: Two
     source:
       openapi: tts-openapi.json
   PostedContextWithGenerationId:
@@ -471,6 +532,9 @@ types:
       format:
         type: optional<Format>
         docs: Specifies the output audio file format.
+      include_timestamp_types:
+        type: optional<list<TimestampType>>
+        docs: The set of timestamp types to include in the response.
       num_generations:
         type: optional<integer>
         docs: Number of generations of the audio to produce.
@@ -520,6 +584,9 @@ types:
         type: list<PostedUtterance>
       version:
         type: optional<OctaveVersion>
+        docs: >-
+          The version of the Octave Model to use. 1 for the legacy model, 2 for
+          the new model.
       instant_mode:
         type: optional<boolean>
         docs: >-
@@ -556,7 +623,6 @@ types:
           troubleshooting assistance.
     source:
       openapi: tts-openapi.json
-  OctaveVersion: string
   ReturnVoice:
     docs: An Octave voice available for text-to-speech
     properties:
@@ -596,6 +662,9 @@ types:
       text:
         type: string
         docs: The text for this **Snippet**.
+      timestamps:
+        docs: A list of word or phoneme level timestamps for the generated audio.
+        type: list<Timestamp>
       transcribed_text:
         type: optional<string>
         docs: >-

package/.mock/definition/tts/streamInput.yml CHANGED Viewed

@@ -6,6 +6,21 @@ channel:
   auth: false
   docs: Generate emotionally expressive speech.
   query-parameters:
+    access_token:
+      type: optional<string>
+      default: ''
+      docs: >-
+        Access token used for authenticating the client. If not provided, an
+        `api_key` must be provided to authenticate.
+        The access token is generated using both an API key and a Secret key,
+        which provides an additional layer of security compared to using just an
+        API key.
+        For more details, refer to the [Authentication Strategies
+        Guide](/docs/introduction/api-key#authentication-strategies).
     context_generation_id:
       type: optional<string>
       docs: >-
@@ -13,14 +28,10 @@ channel:
         consistent speech style and prosody across multiple requests. Including
         context may increase audio generation times.
     format_type: root.AudioFormatType
-    strip_headers:
-      type: optional<boolean>
-      default: false
-      docs: >-
-        If enabled, the audio for all the chunks of a generation, once
-        concatenated together, will constitute a single audio file. Otherwise,
-        if disabled, each chunk's audio will be its own audio file, each with
-        its own headers (if applicable).
+    include_timestamp_types:
+      type: optional<root.TimestampType>
+      allow-multiple: true
+      docs: The set of timestamp types to include in the response.
     instant_mode:
       type: optional<boolean>
       default: true
@@ -34,21 +45,15 @@ channel:
       type: optional<boolean>
       default: false
       docs: If enabled, no binary websocket messages will be sent to the client.
-    access_token:
-      type: optional<string>
-      default: ''
+    strip_headers:
+      type: optional<boolean>
+      default: false
       docs: >-
-        Access token used for authenticating the client. If not provided, an
-        `api_key` must be provided to authenticate.
-        The access token is generated using both an API key and a Secret key,
-        which provides an additional layer of security compared to using just an
-        API key.
-        For more details, refer to the [Authentication Strategies
-        Guide](/docs/introduction/api-key#authentication-strategies).
+        If enabled, the audio for all the chunks of a generation, once
+        concatenated together, will constitute a single audio file. Otherwise,
+        if disabled, each chunk's audio will be its own audio file, each with
+        its own headers (if applicable).
+    version: root.OctaveVersion
     api_key:
       type: optional<string>
       default: ''
@@ -64,21 +69,7 @@ channel:
       origin: client
       body:
         type: root.PublishTts
-    subscribe:
-      origin: server
-      body:
-        type: root.SnippetAudioChunk
   examples:
     - messages:
         - type: publish
           body: {}
-        - type: subscribe
-          body:
-            request_id: request_id
-            generation_id: generation_id
-            snippet_id: snippet_id
-            text: text
-            chunk_index: 1
-            audio: audio
-            audio_format: mp3
-            is_last_chunk: true

package/api/resources/empathicVoice/resources/chat/client/Client.d.ts CHANGED Viewed

@@ -2,6 +2,7 @@
 import * as environments from "../../../../../../environments";
 import * as core from "../../../../../../core";
 import { ChatSocket } from "./Socket";
+import { SessionSettings } from "../../../types/SessionSettings";
 export declare function createHostnameWithProtocol(environment: string): string;
 export declare namespace Chat {
     interface Options {
@@ -24,6 +25,9 @@ export declare namespace Chat {
         verboseTranscription?: boolean;
         /** ID of the Voice to use for this chat. If specified, will override the voice set in the Config */
         voiceId?: string;
+        sessionSettings?: Pick<SessionSettings, Exclude<keyof SessionSettings, "builtinTools" | "type" | "metadata" | "tools">> & {
+            eventLimit?: number;
+        };
         /** Extra query parameters sent at WebSocket connection */
         queryParams?: Record<string, string | string[] | object | object[]>;
     }