npm - hume - Versions diffs - 0.13.1 → 0.13.3 - Mend

hume 0.13.1 → 0.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/.mock/definition/empathic-voice/__package__.yml CHANGED Viewed

@@ -142,8 +142,8 @@ types:
       openapi: evi-asyncapi.json
   ContextType:
     enum:
-      - temporary
       - persistent
+      - temporary
     source:
       openapi: evi-asyncapi.json
   Encoding:
@@ -990,6 +990,7 @@ types:
     union:
       - type: AssistantEnd
       - type: AssistantMessage
+      - type: AssistantProsody
       - type: ChatMetadata
       - type: WebSocketError
       - type: UserInterruption
@@ -997,7 +998,6 @@ types:
       - type: ToolCallMessage
       - type: ToolResponseMessage
       - type: ToolErrorMessage
-      - type: AssistantProsody
     source:
       openapi: evi-asyncapi.json
   HTTPValidationError:
@@ -1142,6 +1142,26 @@ types:
       type: string
     source:
       openapi: evi-openapi.json
+  VoiceId:
+    properties:
+      id:
+        type: string
+        docs: ID of the voice in the `Voice Library`.
+      provider:
+        type: optional<VoiceProvider>
+        docs: Model provider associated with this Voice ID.
+    source:
+      openapi: evi-openapi.json
+  VoiceName:
+    properties:
+      name:
+        type: string
+        docs: Name of the voice in the `Voice Library`.
+      provider:
+        type: optional<VoiceProvider>
+        docs: Model provider associated with this Voice Name.
+    source:
+      openapi: evi-openapi.json
   WebhookEventChatEnded:
     properties:
       event_name:
@@ -1551,7 +1571,9 @@ types:
           model that takes into account both expression measures and language.
           The eLLM generates short, empathic language responses and guides
           text-to-speech (TTS) prosody.
-      voice: optional<unknown>
+      voice:
+        type: optional<ReturnVoice>
+        docs: A voice specification associated with this Config.
       prompt: optional<ReturnPrompt>
       webhooks:
         type: optional<list<optional<ReturnWebhookSpec>>>
@@ -2928,33 +2950,6 @@ types:
           minutes).
     source:
       openapi: evi-openapi.json
-  VoiceProvider:
-    enum:
-      - HUME_AI
-      - CUSTOM_VOICE
-      - OCTAVE_COMBINED
-    source:
-      openapi: evi-openapi.json
-  VoiceId:
-    properties:
-      id:
-        type: string
-        docs: ID of the voice in the `Voice Library`.
-      provider:
-        type: optional<VoiceProvider>
-        docs: Model provider associated with this Voice ID.
-    source:
-      openapi: evi-openapi.json
-  VoiceName:
-    properties:
-      name:
-        type: string
-        docs: Name of the voice in the `Voice Library`.
-      provider:
-        type: optional<VoiceProvider>
-        docs: Model provider associated with this Voice Name.
-    source:
-      openapi: evi-openapi.json
   VoiceRef:
     discriminated: false
     union:
@@ -2962,3 +2957,17 @@ types:
       - type: VoiceName
     source:
       openapi: evi-openapi.json
+  ReturnVoice:
+    docs: An Octave voice available for text-to-speech
+    properties:
+      id: optional<string>
+      name: optional<string>
+      provider: optional<VoiceProvider>
+    source:
+      openapi: evi-openapi.json
+  VoiceProvider:
+    enum:
+      - HUME_AI
+      - CUSTOM_VOICE
+    source:
+      openapi: evi-openapi.json

package/.mock/definition/tts/__package__.yml CHANGED Viewed

@@ -29,7 +29,7 @@ service:
         The response includes the base64-encoded audio and metadata in JSON
         format.
       source:
-        openapi: tts-openapi.yml
+        openapi: tts-openapi.json
       display-name: Text-to-speech (Json)
       request:
         body:
@@ -43,14 +43,6 @@ service:
         - UnprocessableEntityError
       examples:
         - request:
-            utterances:
-              - text: >-
-                  Beauty is no quality in things themselves: It exists merely in
-                  the mind which contemplates them.
-                description: >-
-                  Middle-aged masculine voice with a clear, rhythmic Scots lilt,
-                  rounded vowels, and a warm, steady tone with an articulate,
-                  academic quality.
             context:
               utterances:
                 - text: How can people see beauty so differently?
@@ -61,16 +53,24 @@ service:
             format:
               type: mp3
             num_generations: 1
+            utterances:
+              - text: >-
+                  Beauty is no quality in things themselves: It exists merely in
+                  the mind which contemplates them.
+                description: >-
+                  Middle-aged masculine voice with a clear, rhythmic Scots lilt,
+                  rounded vowels, and a warm, steady tone with an articulate,
+                  academic quality.
           response:
             body:
               generations:
-                - generation_id: 795c949a-1510-4a80-9646-7d0863b023ab
+                - audio: //PExAA0DDYRvkpNfhv3JI5JZ...etc.
                   duration: 7.44225
-                  file_size: 120192
                   encoding:
                     format: mp3
                     sample_rate: 48000
-                  audio: //PExAA0DDYRvkpNfhv3JI5JZ...etc.
+                  file_size: 120192
+                  generation_id: 795c949a-1510-4a80-9646-7d0863b023ab
                   snippets:
                     - - audio: //PExAA0DDYRvkpNfhv3JI5JZ...etc.
                         generation_id: 795c949a-1510-4a80-9646-7d0863b023ab
@@ -93,7 +93,7 @@ service:
         The response contains the generated audio file in the requested format.
       source:
-        openapi: tts-openapi.yml
+        openapi: tts-openapi.json
       display-name: Text-to-speech (File)
       request:
         body:
@@ -107,6 +107,11 @@ service:
         - UnprocessableEntityError
       examples:
         - request:
+            context:
+              generation_id: 09ad914d-8e7f-40f8-a279-e34f07f7dab2
+            format:
+              type: mp3
+            num_generations: 1
             utterances:
               - text: >-
                   Beauty is no quality in things themselves: It exists merely in
@@ -115,11 +120,6 @@ service:
                   Middle-aged masculine voice with a clear, rhythmic Scots lilt,
                   rounded vowels, and a warm, steady tone with an articulate,
                   academic quality.
-            context:
-              generation_id: 09ad914d-8e7f-40f8-a279-e34f07f7dab2
-            format:
-              type: mp3
-            num_generations: 1
     synthesize-json-streaming:
       path: /v0/tts/stream/json
       method: POST
@@ -134,7 +134,7 @@ service:
         The response is a stream of JSON objects including audio encoded in
         base64.
       source:
-        openapi: tts-openapi.yml
+        openapi: tts-openapi.json
       display-name: Text-to-speech (Streamed JSON)
       request:
         body:
@@ -165,7 +165,7 @@ service:
         additional context can be included to influence the speech's style and
         prosody.
       source:
-        openapi: tts-openapi.yml
+        openapi: tts-openapi.json
       display-name: Text-to-speech (Streamed File)
       request:
         body:
@@ -187,7 +187,7 @@ service:
                   name: Male English Actor
                   provider: HUME_AI
   source:
-    openapi: tts-openapi.yml
+    openapi: tts-openapi.json
 types:
   PostedContextWithGenerationId:
     properties:
@@ -198,13 +198,13 @@ types:
           consistent speech style and prosody across multiple requests.
           Including context may increase audio generation times.
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   PostedContextWithUtterances:
     properties:
       utterances:
         type: list<PostedUtterance>
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   AudioEncoding:
     docs: >-
       Encoding information about the generated audio, including the `format` and
@@ -219,14 +219,14 @@ types:
           The sample rate (`Hz`) of the generated audio. The default sample rate
           is `48000 Hz`.
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   AudioFormatType:
     enum:
       - mp3
       - pcm
       - wav
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   ReturnGeneration:
     properties:
       generation_id:
@@ -256,17 +256,17 @@ types:
           optimized for speech delivery.
         type: list<list<Snippet>>
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   HTTPValidationError:
     properties:
       detail:
         type: optional<list<ValidationError>>
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   FormatMp3:
     properties: {}
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   PostedContext:
     discriminated: false
     docs: >-
@@ -277,7 +277,7 @@ types:
       - type: PostedContextWithGenerationId
       - type: PostedContextWithUtterances
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
     inline: true
   Format:
     discriminant: type
@@ -291,7 +291,7 @@ types:
       wav:
         type: FormatWav
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   PostedTts:
     properties:
       context:
@@ -373,7 +373,7 @@ types:
           must be `1` or omitted).
         default: true
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   ReturnTts:
     properties:
       request_id:
@@ -385,7 +385,7 @@ types:
       generations:
         type: list<ReturnGeneration>
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   ReturnVoice:
     docs: An Octave voice available for text-to-speech
     properties:
@@ -404,11 +404,11 @@ types:
           Voices created through this endpoint will always have the provider set
           to `CUSTOM_VOICE`, indicating a custom voice stored in your account.
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   FormatPcm:
     properties: {}
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   Snippet:
     properties:
       id:
@@ -434,11 +434,49 @@ types:
           The segmented audio output in the requested format, encoded as a
           base64 string.
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   SnippetAudioChunk:
-    properties: {}
+    properties:
+      generation_id:
+        type: string
+        docs: >-
+          The generation ID of the parent snippet that this chunk corresponds
+          to.
+      snippet_id:
+        type: string
+        docs: The ID of the parent snippet that this chunk corresponds to.
+      text:
+        type: string
+        docs: The text of the parent snippet that this chunk corresponds to.
+      transcribed_text:
+        type: optional<string>
+        docs: >-
+          The transcribed text of the generated audio of the parent snippet that
+          this chunk corresponds to. It is only present if `instant_mode` is set
+          to `false`.
+      chunk_index:
+        type: integer
+        docs: The index of the audio chunk in the snippet.
+      audio:
+        type: string
+        docs: The generated audio output chunk in the requested format.
+      audio_format:
+        type: AudioFormatType
+        docs: The generated audio output format.
+      is_last_chunk:
+        type: boolean
+        docs: >-
+          Whether or not this is the last chunk streamed back from the decoder
+          for one input snippet.
+      utterance_index:
+        type: optional<integer>
+        docs: >-
+          The index of the utterance in the request that the parent snippet of
+          this chunk corresponds to.
+      snippet:
+        type: Snippet
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   PostedUtterance:
     properties:
       text:
@@ -492,14 +530,14 @@ types:
           min: 0
           max: 5
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   ValidationErrorLocItem:
     discriminated: false
     union:
       - string
       - integer
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
     inline: true
   ValidationError:
     properties:
@@ -508,7 +546,7 @@ types:
       msg: string
       type: string
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   PostedUtteranceVoiceWithId:
     properties:
       id:
@@ -537,7 +575,7 @@ types:
           users. In contrast, your custom voices are private and accessible only
           via requests authenticated with your API key.
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   PostedUtteranceVoiceWithName:
     properties:
       name:
@@ -566,31 +604,31 @@ types:
           users. In contrast, your custom voices are private and accessible only
           via requests authenticated with your API key.
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   VoiceProvider:
     enum:
       - HUME_AI
       - CUSTOM_VOICE
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   PostedUtteranceVoice:
     discriminated: false
     union:
       - type: PostedUtteranceVoiceWithId
       - type: PostedUtteranceVoiceWithName
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   FormatWav:
     properties: {}
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   ErrorResponse:
     properties:
       error: optional<string>
       message: optional<string>
       code: optional<string>
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json
   ReturnPagedVoices:
     docs: A paginated list Octave voices available for text-to-speech
     properties:
@@ -619,4 +657,4 @@ types:
           List of voices returned for the specified `page_number` and
           `page_size`.
     source:
-      openapi: tts-openapi.yml
+      openapi: tts-openapi.json

package/.mock/definition/tts/voices.yml CHANGED Viewed

@@ -15,7 +15,7 @@ service:
         offset: $request.page_number
         results: $response.voices_page
       source:
-        openapi: tts-openapi.yml
+        openapi: tts-openapi.json
       display-name: List voices
       request:
         name: VoicesListRequest
@@ -70,11 +70,11 @@ service:
               page_size: 10
               total_pages: 1
               voices_page:
-                - name: David Hume
-                  id: c42352c0-4566-455d-b180-0f654b65b525
+                - id: c42352c0-4566-455d-b180-0f654b65b525
+                  name: David Hume
                   provider: CUSTOM_VOICE
-                - name: Goliath Hume
-                  id: d87352b0-26a3-4b11-081b-d157a5674d19
+                - id: d87352b0-26a3-4b11-081b-d157a5674d19
+                  name: Goliath Hume
                   provider: CUSTOM_VOICE
     create:
       path: /v0/tts/voices
@@ -89,7 +89,7 @@ service:
         ensuring consistent speech style and prosody. For more details on voice
         creation, see the [Voices Guide](/docs/text-to-speech-tts/voices).
       source:
-        openapi: tts-openapi.yml
+        openapi: tts-openapi.json
       display-name: Create voice
       request:
         name: PostedVoice
@@ -117,8 +117,8 @@ service:
             name: David Hume
           response:
             body:
-              name: David Hume
               id: c42352c0-4566-455d-b180-0f654b65b525
+              name: David Hume
               provider: CUSTOM_VOICE
     delete:
       path: /v0/tts/voices
@@ -126,7 +126,7 @@ service:
       auth: true
       docs: Deletes a previously generated custom voice.
       source:
-        openapi: tts-openapi.yml
+        openapi: tts-openapi.json
       display-name: Delete voice
       request:
         name: VoicesDeleteRequest
@@ -140,4 +140,4 @@ service:
         - query-parameters:
             name: David Hume
   source:
-    openapi: tts-openapi.yml
+    openapi: tts-openapi.json

package/api/resources/empathicVoice/resources/chat/client/Client.js CHANGED Viewed

@@ -82,6 +82,9 @@ class Chat {
         if (args.verboseTranscription !== null) {
             queryParams["verbose_transcription"] = args.verboseTranscription ? "true" : "false";
         }
+        if (args.voiceId !== null && args.voiceId !== undefined && args.voiceId !== "") {
+            queryParams["voice_id"] = args.voiceId;
+        }
         if (args.queryParams !== null && args.queryParams !== undefined) {
             for (const [name, value] of Object.entries(args.queryParams)) {
                 queryParams[name] = value;

package/api/resources/empathicVoice/types/ContextType.d.ts CHANGED Viewed

@@ -1,8 +1,8 @@
 /**
  * This file was auto-generated by Fern from our API Definition.
  */
-export type ContextType = "temporary" | "persistent";
+export type ContextType = "persistent" | "temporary";
 export declare const ContextType: {
-    readonly Temporary: "temporary";
     readonly Persistent: "persistent";
+    readonly Temporary: "temporary";
 };

package/api/resources/empathicVoice/types/ContextType.js CHANGED Viewed

@@ -5,6 +5,6 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.ContextType = void 0;
 exports.ContextType = {
-    Temporary: "temporary",
     Persistent: "persistent",
+    Temporary: "temporary",
 };

package/api/resources/empathicVoice/types/JsonMessage.d.ts CHANGED Viewed

@@ -2,4 +2,4 @@
  * This file was auto-generated by Fern from our API Definition.
  */
 import * as Hume from "../../../index";
-export type JsonMessage = Hume.empathicVoice.AssistantEnd | Hume.empathicVoice.AssistantMessage | Hume.empathicVoice.ChatMetadata | Hume.empathicVoice.WebSocketError | Hume.empathicVoice.UserInterruption | Hume.empathicVoice.UserMessage | Hume.empathicVoice.ToolCallMessage | Hume.empathicVoice.ToolResponseMessage | Hume.empathicVoice.ToolErrorMessage | Hume.empathicVoice.AssistantProsody;
+export type JsonMessage = Hume.empathicVoice.AssistantEnd | Hume.empathicVoice.AssistantMessage | Hume.empathicVoice.AssistantProsody | Hume.empathicVoice.ChatMetadata | Hume.empathicVoice.WebSocketError | Hume.empathicVoice.UserInterruption | Hume.empathicVoice.UserMessage | Hume.empathicVoice.ToolCallMessage | Hume.empathicVoice.ToolResponseMessage | Hume.empathicVoice.ToolErrorMessage;

package/api/resources/empathicVoice/types/ReturnConfig.d.ts CHANGED Viewed

@@ -45,7 +45,8 @@ export interface ReturnConfig {
      * Hume's eLLM (empathic Large Language Model) is a multimodal language model that takes into account both expression measures and language. The eLLM generates short, empathic language responses and guides text-to-speech (TTS) prosody.
      */
     ellmModel?: Hume.empathicVoice.ReturnEllmModel;
-    voice?: unknown;
+    /** A voice specification associated with this Config. */
+    voice?: Hume.empathicVoice.ReturnVoice;
     prompt?: Hume.empathicVoice.ReturnPrompt;
     /** Map of webhooks associated with this config. */
     webhooks?: (Hume.empathicVoice.ReturnWebhookSpec | undefined)[];

package/api/resources/empathicVoice/types/ReturnVoice.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+/**
+ * This file was auto-generated by Fern from our API Definition.
+ */
+import * as Hume from "../../../index";
+/**
+ * An Octave voice available for text-to-speech
+ */
+export interface ReturnVoice {
+    id?: string;
+    name?: string;
+    provider?: Hume.empathicVoice.VoiceProvider;
+}

package/api/resources/empathicVoice/types/ReturnVoice.js ADDED Viewed

@@ -0,0 +1,5 @@
+"use strict";
+/**
+ * This file was auto-generated by Fern from our API Definition.
+ */
+Object.defineProperty(exports, "__esModule", { value: true });

package/api/resources/empathicVoice/types/VoiceProvider.d.ts CHANGED Viewed

@@ -1,9 +1,8 @@
 /**
  * This file was auto-generated by Fern from our API Definition.
  */
-export type VoiceProvider = "HUME_AI" | "CUSTOM_VOICE" | "OCTAVE_COMBINED";
+export type VoiceProvider = "HUME_AI" | "CUSTOM_VOICE";
 export declare const VoiceProvider: {
     readonly HumeAi: "HUME_AI";
     readonly CustomVoice: "CUSTOM_VOICE";
-    readonly OctaveCombined: "OCTAVE_COMBINED";
 };

package/api/resources/empathicVoice/types/VoiceProvider.js CHANGED Viewed

@@ -7,5 +7,4 @@ exports.VoiceProvider = void 0;
 exports.VoiceProvider = {
     HumeAi: "HUME_AI",
     CustomVoice: "CUSTOM_VOICE",
-    OctaveCombined: "OCTAVE_COMBINED",
 };

package/api/resources/empathicVoice/types/index.d.ts CHANGED Viewed

@@ -38,6 +38,8 @@ export * from "./LanguageModelType";
 export * from "./ModelProviderEnum";
 export * from "./ValidationErrorLocItem";
 export * from "./ValidationError";
+export * from "./VoiceId";
+export * from "./VoiceName";
 export * from "./WebhookEventChatEnded";
 export * from "./WebhookEventChatStartType";
 export * from "./WebhookEventChatStarted";
@@ -102,7 +104,6 @@ export * from "./PostedEventMessageSpec";
 export * from "./PostedTimeoutSpec";
 export * from "./ReturnEventMessageSpec";
 export * from "./ReturnTimeoutSpec";
-export * from "./VoiceProvider";
-export * from "./VoiceId";
-export * from "./VoiceName";
 export * from "./VoiceRef";
+export * from "./ReturnVoice";
+export * from "./VoiceProvider";

package/api/resources/empathicVoice/types/index.js CHANGED Viewed

@@ -54,6 +54,8 @@ __exportStar(require("./LanguageModelType"), exports);
 __exportStar(require("./ModelProviderEnum"), exports);
 __exportStar(require("./ValidationErrorLocItem"), exports);
 __exportStar(require("./ValidationError"), exports);
+__exportStar(require("./VoiceId"), exports);
+__exportStar(require("./VoiceName"), exports);
 __exportStar(require("./WebhookEventChatEnded"), exports);
 __exportStar(require("./WebhookEventChatStartType"), exports);
 __exportStar(require("./WebhookEventChatStarted"), exports);
@@ -118,7 +120,6 @@ __exportStar(require("./PostedEventMessageSpec"), exports);
 __exportStar(require("./PostedTimeoutSpec"), exports);
 __exportStar(require("./ReturnEventMessageSpec"), exports);
 __exportStar(require("./ReturnTimeoutSpec"), exports);
-__exportStar(require("./VoiceProvider"), exports);
-__exportStar(require("./VoiceId"), exports);
-__exportStar(require("./VoiceName"), exports);
 __exportStar(require("./VoiceRef"), exports);
+__exportStar(require("./ReturnVoice"), exports);
+__exportStar(require("./VoiceProvider"), exports);

package/api/resources/tts/client/Client.d.ts CHANGED Viewed

@@ -44,10 +44,6 @@ export declare class Tts {
      *
      * @example
      *     await client.tts.synthesizeJson({
-     *         utterances: [{
-     *                 text: "Beauty is no quality in things themselves: It exists merely in the mind which contemplates them.",
-     *                 description: "Middle-aged masculine voice with a clear, rhythmic Scots lilt, rounded vowels, and a warm, steady tone with an articulate, academic quality."
-     *             }],
      *         context: {
      *             utterances: [{
      *                     text: "How can people see beauty so differently?",
@@ -57,7 +53,11 @@ export declare class Tts {
      *         format: {
      *             type: "mp3"
      *         },
-     *         numGenerations: 1
+     *         numGenerations: 1,
+     *         utterances: [{
+     *                 text: "Beauty is no quality in things themselves: It exists merely in the mind which contemplates them.",
+     *                 description: "Middle-aged masculine voice with a clear, rhythmic Scots lilt, rounded vowels, and a warm, steady tone with an articulate, academic quality."
+     *             }]
      *     })
      */
     synthesizeJson(request: Hume.tts.PostedTts, requestOptions?: Tts.RequestOptions): core.HttpResponsePromise<Hume.tts.ReturnTts>;