RubyGems - elevenlabs_client - Versions diffs - 0.3.0 → 0.5.0 - Mend

elevenlabs_client 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +52 -1
data/README.md +78 -1
data/lib/elevenlabs_client/client.rb +63 -1
data/lib/elevenlabs_client/endpoints/audio_isolation.rb +71 -0
data/lib/elevenlabs_client/endpoints/audio_native.rb +103 -0
data/lib/elevenlabs_client/endpoints/dubs.rb +208 -2
data/lib/elevenlabs_client/endpoints/forced_alignment.rb +41 -0
data/lib/elevenlabs_client/endpoints/speech_to_speech.rb +125 -0
data/lib/elevenlabs_client/endpoints/speech_to_text.rb +108 -0
data/lib/elevenlabs_client/endpoints/text_to_dialogue_stream.rb +50 -0
data/lib/elevenlabs_client/endpoints/text_to_speech_stream.rb +1 -0
data/lib/elevenlabs_client/endpoints/text_to_speech_stream_with_timestamps.rb +75 -0
data/lib/elevenlabs_client/endpoints/text_to_speech_with_timestamps.rb +73 -0
data/lib/elevenlabs_client/endpoints/voices.rb +362 -0
data/lib/elevenlabs_client/endpoints/websocket_text_to_speech.rb +250 -0
data/lib/elevenlabs_client/version.rb +1 -1
data/lib/elevenlabs_client.rb +9 -2
metadata +25 -2

data/lib/elevenlabs_client/endpoints/text_to_speech_with_timestamps.rb ADDED Viewed

@@ -0,0 +1,73 @@
+# frozen_string_literal: true
+module ElevenlabsClient
+  class TextToSpeechWithTimestamps
+    def initialize(client)
+      @client = client
+    end
+    # POST /v1/text-to-speech/{voice_id}/with-timestamps
+    # Generate speech from text with precise character-level timing information
+    # Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/with-timestamps
+    #
+    # @param voice_id [String] Voice ID to be used
+    # @param text [String] The text that will get converted into speech
+    # @param options [Hash] Optional TTS parameters
+    # @option options [String] :model_id Model identifier (defaults to "eleven_multilingual_v2")
+    # @option options [String] :language_code ISO 639-1 language code for text normalization
+    # @option options [Hash] :voice_settings Voice settings overriding stored settings
+    # @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
+    # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
+    # @option options [String] :previous_text Text that came before current request
+    # @option options [String] :next_text Text that comes after current request
+    # @option options [Array<String>] :previous_request_ids Request IDs of previous samples (max 3)
+    # @option options [Array<String>] :next_request_ids Request IDs of next samples (max 3)
+    # @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
+    # @option options [Boolean] :apply_language_text_normalization Language text normalization
+    # @option options [Boolean] :use_pvc_as_ivc Use IVC version instead of PVC (deprecated)
+    # @option options [Boolean] :enable_logging Enable logging (defaults to true)
+    # @option options [Integer] :optimize_streaming_latency Latency optimizations (0-4, deprecated)
+    # @option options [String] :output_format Output format (defaults to "mp3_44100_128")
+    # @return [Hash] Response containing audio_base64, alignment, and normalized_alignment
+    def generate(voice_id, text, **options)
+      # Build query parameters
+      query_params = {}
+      query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
+      query_params[:optimize_streaming_latency] = options[:optimize_streaming_latency] if options[:optimize_streaming_latency]
+      query_params[:output_format] = options[:output_format] if options[:output_format]
+      # Build endpoint with query parameters
+      endpoint = "/v1/text-to-speech/#{voice_id}/with-timestamps"
+      if query_params.any?
+        query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
+        endpoint += "?#{query_string}"
+      end
+      # Build request body
+      request_body = { text: text }
+      # Add optional body parameters
+      request_body[:model_id] = options[:model_id] if options[:model_id]
+      request_body[:language_code] = options[:language_code] if options[:language_code]
+      request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
+      request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
+      request_body[:seed] = options[:seed] if options[:seed]
+      request_body[:previous_text] = options[:previous_text] if options[:previous_text]
+      request_body[:next_text] = options[:next_text] if options[:next_text]
+      request_body[:previous_request_ids] = options[:previous_request_ids] if options[:previous_request_ids]
+      request_body[:next_request_ids] = options[:next_request_ids] if options[:next_request_ids]
+      request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
+      request_body[:apply_language_text_normalization] = options[:apply_language_text_normalization] unless options[:apply_language_text_normalization].nil?
+      request_body[:use_pvc_as_ivc] = options[:use_pvc_as_ivc] unless options[:use_pvc_as_ivc].nil?
+      @client.post(endpoint, request_body)
+    end
+    # Alias for backward compatibility
+    alias_method :text_to_speech_with_timestamps, :generate
+    private
+    attr_reader :client
+  end
+end

data/lib/elevenlabs_client/endpoints/voices.rb CHANGED Viewed

@@ -133,12 +133,374 @@ module ElevenlabsClient
       false
     end
+    # POST /v1/similar-voices
+    # Returns a list of shared voices similar to the provided audio sample
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/similar-voices
+    #
+    # @param audio_file [IO, File] Audio file to find similar voices for
+    # @param filename [String] Original filename for the audio file
+    # @param options [Hash] Optional parameters
+    # @option options [Float] :similarity_threshold Threshold for voice similarity (0-2)
+    # @option options [Integer] :top_k Number of most similar voices to return (1-100)
+    # @return [Hash] Response containing similar voices
+    def find_similar(audio_file, filename, **options)
+      endpoint = "/v1/similar-voices"
+      payload = {
+        audio_file: @client.file_part(audio_file, filename)
+      }
+      payload[:similarity_threshold] = options[:similarity_threshold] if options[:similarity_threshold]
+      payload[:top_k] = options[:top_k] if options[:top_k]
+      @client.post_multipart(endpoint, payload)
+    end
+    # POST /v1/voices/add
+    # Creates a new IVC (Instant Voice Cloning) voice
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/add-voice
+    #
+    # @param name [String] Name of the voice
+    # @param audio_files [Array<IO, File>] Array of audio files for voice cloning
+    # @param filenames [Array<String>] Array of original filenames
+    # @param options [Hash] Optional parameters
+    # @option options [Boolean] :remove_background_noise Remove background noise (default: false)
+    # @option options [String] :description Description of the voice
+    # @option options [String] :labels Serialized labels dictionary
+    # @return [Hash] Response containing voice_id and requires_verification status
+    def create_ivc(name, audio_files, filenames, **options)
+      endpoint = "/v1/voices/add"
+      payload = { name: name }
+      # Add optional parameters
+      payload[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
+      payload[:description] = options[:description] if options[:description]
+      payload[:labels] = options[:labels] if options[:labels]
+      # Add audio files
+      audio_files.each_with_index do |file, index|
+        filename = filenames[index] || "audio_#{index}.mp3"
+        payload["files[]"] = @client.file_part(file, filename)
+      end
+      @client.post_multipart(endpoint, payload)
+    end
+    # GET /v1/voices/settings/default
+    # Gets the default settings for voices
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/default-settings
+    #
+    # @return [Hash] Default voice settings
+    def get_default_settings
+      endpoint = "/v1/voices/settings/default"
+      @client.get(endpoint)
+    end
+    # GET /v1/voices/{voice_id}/settings
+    # Returns the settings for a specific voice
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/get-settings
+    #
+    # @param voice_id [String] Voice ID
+    # @return [Hash] Voice settings
+    def get_settings(voice_id)
+      endpoint = "/v1/voices/#{voice_id}/settings"
+      @client.get(endpoint)
+    end
+    # POST /v1/voices/{voice_id}/settings/edit
+    # Edit settings for a specific voice
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/edit-settings
+    #
+    # @param voice_id [String] Voice ID
+    # @param options [Hash] Voice settings to update
+    # @option options [Float] :stability Stability setting (0.0-1.0)
+    # @option options [Boolean] :use_speaker_boost Enable speaker boost
+    # @option options [Float] :similarity_boost Similarity boost setting (0.0-1.0)
+    # @option options [Float] :style Style exaggeration (0.0-1.0)
+    # @option options [Float] :speed Speed adjustment (0.25-4.0)
+    # @return [Hash] Response with status
+    def edit_settings(voice_id, **options)
+      endpoint = "/v1/voices/#{voice_id}/settings/edit"
+      payload = {}
+      payload[:stability] = options[:stability] if options[:stability]
+      payload[:use_speaker_boost] = options[:use_speaker_boost] unless options[:use_speaker_boost].nil?
+      payload[:similarity_boost] = options[:similarity_boost] if options[:similarity_boost]
+      payload[:style] = options[:style] if options[:style]
+      payload[:speed] = options[:speed] if options[:speed]
+      @client.post(endpoint, payload)
+    end
+    # GET /v1/voices/{voice_id}/samples/{sample_id}/audio
+    # Returns the audio corresponding to a sample attached to a voice
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/get-sample-audio
+    #
+    # @param voice_id [String] Voice ID
+    # @param sample_id [String] Sample ID
+    # @return [String] Binary audio data
+    def get_sample_audio(voice_id, sample_id)
+      endpoint = "/v1/voices/#{voice_id}/samples/#{sample_id}/audio"
+      @client.get(endpoint)
+    end
+    # POST /v1/voices/pvc
+    # Creates a new PVC (Professional Voice Cloning) voice with metadata but no samples
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/create-pvc
+    #
+    # @param name [String] Name of the voice (max 100 characters)
+    # @param language [String] Language used in the samples
+    # @param options [Hash] Optional parameters
+    # @option options [String] :description Description (max 500 characters)
+    # @option options [Hash] :labels Serialized labels dictionary
+    # @return [Hash] Response containing voice_id
+    def create_pvc(name, language, **options)
+      endpoint = "/v1/voices/pvc"
+      payload = {
+        name: name,
+        language: language
+      }
+      payload[:description] = options[:description] if options[:description]
+      payload[:labels] = options[:labels] if options[:labels]
+      @client.post(endpoint, payload)
+    end
+    # POST /v1/voices/pvc/{voice_id}
+    # Edit PVC voice metadata
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/update-pvc
+    #
+    # @param voice_id [String] Voice ID
+    # @param options [Hash] Parameters to update
+    # @option options [String] :name New name (max 100 characters)
+    # @option options [String] :language New language
+    # @option options [String] :description New description (max 500 characters)
+    # @option options [Hash] :labels New labels dictionary
+    # @return [Hash] Response containing voice_id
+    def update_pvc(voice_id, **options)
+      endpoint = "/v1/voices/pvc/#{voice_id}"
+      payload = {}
+      payload[:name] = options[:name] if options[:name]
+      payload[:language] = options[:language] if options[:language]
+      payload[:description] = options[:description] if options[:description]
+      payload[:labels] = options[:labels] if options[:labels]
+      @client.post(endpoint, payload)
+    end
+    # POST /v1/voices/pvc/{voice_id}/train
+    # Start PVC training process for a voice
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/train-pvc
+    #
+    # @param voice_id [String] Voice ID
+    # @param options [Hash] Optional parameters
+    # @option options [String] :model_id Model ID to use for conversion
+    # @return [Hash] Response with status
+    def train_pvc(voice_id, **options)
+      endpoint = "/v1/voices/pvc/#{voice_id}/train"
+      payload = {}
+      payload[:model_id] = options[:model_id] if options[:model_id]
+      @client.post(endpoint, payload)
+    end
+    # POST /v1/voices/pvc/{voice_id}/samples
+    # Add audio samples to a PVC voice
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/add-pvc-samples
+    #
+    # @param voice_id [String] Voice ID
+    # @param audio_files [Array<IO, File>] Audio files for the voice
+    # @param filenames [Array<String>] Original filenames
+    # @param options [Hash] Optional parameters
+    # @option options [Boolean] :remove_background_noise Remove background noise (default: false)
+    # @return [Array<Hash>] Array of sample information
+    def add_pvc_samples(voice_id, audio_files, filenames, **options)
+      endpoint = "/v1/voices/pvc/#{voice_id}/samples"
+      payload = {}
+      payload[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
+      # Add audio files
+      audio_files.each_with_index do |file, index|
+        filename = filenames[index] || "audio_#{index}.mp3"
+        payload["files[]"] = @client.file_part(file, filename)
+      end
+      @client.post_multipart(endpoint, payload)
+    end
+    # POST /v1/voices/pvc/{voice_id}/samples/{sample_id}
+    # Update a PVC voice sample - apply noise removal or select speaker
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/update-pvc-sample
+    #
+    # @param voice_id [String] Voice ID
+    # @param sample_id [String] Sample ID
+    # @param options [Hash] Update parameters
+    # @option options [Boolean] :remove_background_noise Remove background noise
+    # @option options [Array<String>] :selected_speaker_ids Speaker IDs for training
+    # @option options [Integer] :trim_start_time Start time in milliseconds
+    # @option options [Integer] :trim_end_time End time in milliseconds
+    # @return [Hash] Response containing voice_id
+    def update_pvc_sample(voice_id, sample_id, **options)
+      endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}"
+      payload = {}
+      payload[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
+      payload[:selected_speaker_ids] = options[:selected_speaker_ids] if options[:selected_speaker_ids]
+      payload[:trim_start_time] = options[:trim_start_time] if options[:trim_start_time]
+      payload[:trim_end_time] = options[:trim_end_time] if options[:trim_end_time]
+      @client.post(endpoint, payload)
+    end
+    # DELETE /v1/voices/pvc/{voice_id}/samples/{sample_id}
+    # Delete a sample from a PVC voice
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/delete-pvc-sample
+    #
+    # @param voice_id [String] Voice ID
+    # @param sample_id [String] Sample ID
+    # @return [Hash] Response with status
+    def delete_pvc_sample(voice_id, sample_id)
+      endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}"
+      @client.delete(endpoint)
+    end
+    # GET /v1/voices/pvc/{voice_id}/samples/{sample_id}/audio
+    # Retrieve voice sample audio with or without noise removal
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/get-pvc-sample-audio
+    #
+    # @param voice_id [String] Voice ID
+    # @param sample_id [String] Sample ID
+    # @param options [Hash] Optional parameters
+    # @option options [Boolean] :remove_background_noise Remove background noise (default: false)
+    # @return [Hash] Response with base64 audio data and metadata
+    def get_pvc_sample_audio(voice_id, sample_id, **options)
+      endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}/audio"
+      params = {}
+      params[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
+      @client.get(endpoint, params)
+    end
+    # GET /v1/voices/pvc/{voice_id}/samples/{sample_id}/waveform
+    # Retrieve the visual waveform of a voice sample
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/get-pvc-waveform
+    #
+    # @param voice_id [String] Voice ID
+    # @param sample_id [String] Sample ID
+    # @return [Hash] Response with sample_id and visual_waveform array
+    def get_pvc_sample_waveform(voice_id, sample_id)
+      endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}/waveform"
+      @client.get(endpoint)
+    end
+    # GET /v1/voices/pvc/{voice_id}/samples/{sample_id}/speakers
+    # Retrieve speaker separation status and detected speakers
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/get-pvc-speakers
+    #
+    # @param voice_id [String] Voice ID
+    # @param sample_id [String] Sample ID
+    # @return [Hash] Response with separation status and speakers
+    def get_pvc_speaker_separation_status(voice_id, sample_id)
+      endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}/speakers"
+      @client.get(endpoint)
+    end
+    # POST /v1/voices/pvc/{voice_id}/samples/{sample_id}/separate-speakers
+    # Start speaker separation process for a sample
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/start-speaker-separation
+    #
+    # @param voice_id [String] Voice ID
+    # @param sample_id [String] Sample ID
+    # @return [Hash] Response with status
+    def start_pvc_speaker_separation(voice_id, sample_id)
+      endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}/separate-speakers"
+      @client.post(endpoint)
+    end
+    # GET /v1/voices/pvc/{voice_id}/samples/{sample_id}/speakers/{speaker_id}/audio
+    # Retrieve separated audio for a specific speaker
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/get-separated-speaker-audio
+    #
+    # @param voice_id [String] Voice ID
+    # @param sample_id [String] Sample ID
+    # @param speaker_id [String] Speaker ID
+    # @return [Hash] Response with base64 audio data and metadata
+    def get_pvc_separated_speaker_audio(voice_id, sample_id, speaker_id)
+      endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}/speakers/#{speaker_id}/audio"
+      @client.get(endpoint)
+    end
+    # POST /v1/voices/pvc/{voice_id}/verification
+    # Request manual verification for a PVC voice
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/request-pvc-verification
+    #
+    # @param voice_id [String] Voice ID
+    # @param verification_files [Array<IO, File>] Verification documents
+    # @param filenames [Array<String>] Original filenames
+    # @param options [Hash] Optional parameters
+    # @option options [String] :extra_text Extra text for verification process
+    # @return [Hash] Response with status
+    def request_pvc_verification(voice_id, verification_files, filenames, **options)
+      endpoint = "/v1/voices/pvc/#{voice_id}/verification"
+      payload = {}
+      payload[:extra_text] = options[:extra_text] if options[:extra_text]
+      # Add verification files
+      verification_files.each_with_index do |file, index|
+        filename = filenames[index] || "verification_#{index}.pdf"
+        payload["files[]"] = @client.file_part(file, filename)
+      end
+      @client.post_multipart(endpoint, payload)
+    end
+    # GET /v1/voices/pvc/{voice_id}/captcha
+    # Get captcha for PVC voice verification
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/get-pvc-captcha
+    #
+    # @param voice_id [String] Voice ID
+    # @return [Hash] Captcha data
+    def get_pvc_captcha(voice_id)
+      endpoint = "/v1/voices/pvc/#{voice_id}/captcha"
+      @client.get(endpoint)
+    end
+    # POST /v1/voices/pvc/{voice_id}/captcha
+    # Submit captcha verification for PVC voice
+    # Documentation: https://elevenlabs.io/docs/api-reference/voices/verify-pvc-captcha
+    #
+    # @param voice_id [String] Voice ID
+    # @param recording_file [IO, File] Audio recording of the user
+    # @param filename [String] Original filename for the recording
+    # @return [Hash] Response with status
+    def verify_pvc_captcha(voice_id, recording_file, filename)
+      endpoint = "/v1/voices/pvc/#{voice_id}/captcha"
+      payload = {
+        recording: @client.file_part(recording_file, filename)
+      }
+      @client.post_multipart(endpoint, payload)
+    end
     # Alias methods for backward compatibility and convenience
     alias_method :get_voice, :get
     alias_method :list_voices, :list
     alias_method :create_voice, :create
     alias_method :edit_voice, :edit
     alias_method :delete_voice, :delete
+    alias_method :similar_voices, :find_similar
+    alias_method :default_settings, :get_default_settings
+    alias_method :voice_settings, :get_settings
+    alias_method :update_settings, :edit_settings
     private