RubyGems - eleven_rb - Versions diffs - 0.3.0 → 0.4.0 - Mend

eleven_rb 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +14 -0
data/README.md +24 -1
data/lib/eleven_rb/client.rb +8 -0
data/lib/eleven_rb/http/client.rb +4 -3
data/lib/eleven_rb/resources/base.rb +4 -3
data/lib/eleven_rb/resources/speech_to_speech.rb +94 -0
data/lib/eleven_rb/version.rb +1 -1
data/lib/eleven_rb.rb +1 -0
metadata +4 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 8f8b7a2ab5d7ebe900552e83ae3e8499545f788da927ffc60d54185464d3bbc3
-  data.tar.gz: ee1b62923b6fc88304f4c78a60f68164617fd0f46d68cca09ad1496f14dda10d
+  metadata.gz: fa71eff851a0c6b80f139e801962bceaf1bb371f6e9a0cd325c47b2ee6f4994c
+  data.tar.gz: aa640970faba75afe3cbacdc16958ee63c1f2ecfb009512cd9e8182117e16d29
 SHA512:
-  metadata.gz: 3079784a64fe6d3bff8e2c631d46763dcc6e352da16e9a7599d0ef0df7be45694c3b00279d3362f6459f7168c72f26b0e460eb80bd07239b675394979fd9cc59
-  data.tar.gz: 224530d093fdbd9b489adf6199cfd4cd19e22b2e9074bd42f59345fd0a5efcd06930c9ef9e9bfe9ab9034621c54eaf6c9443ba1b5aee91296c7365347fbcbe67
+  metadata.gz: a537ba9de014afc366c348a71613f257b6380ae784979fb42cd55522610c85661e0e34907c770642fa268051cbb91b24110e00d5e8b7305d5c913a81e04705a6
+  data.tar.gz: c1f4e236fb327b737b4e6346bf3a71bfec357d4f7f09a0e1eaf9c8889ef2251ff40b8eeb3be959e98b58e2a76ff6305e7391d64f0c33a9ce1fb38897b958f1fa

data/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [0.4.0] - 2026-03-10
+### Added
+- Speech-to-Speech voice conversion via `client.sts.convert` (`POST /v1/speech-to-speech/{voice_id}`)
+- `Client#speech_to_speech` resource with `sts` alias
+- Accepts file paths (String) or IO objects (IO, StringIO, Tempfile) for audio input
+- Multipart upload with binary response support
+- Default model: `eleven_english_sts_v2`
+### Changed
+- `Resources::Base#post_multipart` and `HTTP::Client#post_multipart` now accept `response_type:` parameter (defaults to `:json`, backwards-compatible)
 ## [0.3.0] - 2026-02-08
 ### Added

data/README.md CHANGED Viewed

@@ -4,11 +4,12 @@
 [![CI](https://github.com/webventures/eleven_rb/actions/workflows/ci.yml/badge.svg)](https://github.com/webventures/eleven_rb/actions/workflows/ci.yml)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-A Ruby client for the [ElevenLabs](https://try.elevenlabs.io/qyk2j8gumrjz) Text-to-Speech, Sound Effects, and Music API.
+A Ruby client for the [ElevenLabs](https://try.elevenlabs.io/qyk2j8gumrjz) Text-to-Speech, Speech-to-Speech, Sound Effects, and Music API.
 ## Features
 - Text-to-Speech generation and streaming
+- Speech-to-Speech voice conversion
 - Sound effects generation from text descriptions
 - Music generation from prompts or composition plans
 - Voice management (list, get, create, update, delete)
@@ -88,6 +89,28 @@ File.open("output.mp3", "wb") do |file|
 end
 ```
+### Speech-to-Speech
+```ruby
+# Convert audio to a different voice
+audio = client.sts.convert("input.mp3", voice_id: "voice_id")
+audio.save_to_file("output.mp3")
+# With options
+audio = client.sts.convert(
+  "input.mp3",
+  voice_id: "voice_id",
+  model_id: "eleven_english_sts_v2",
+  voice_settings: { stability: 0.5, similarity_boost: 0.75 },
+  remove_background_noise: true,
+  output_format: "mp3_44100_192"
+)
+# From an IO object
+io = File.open("input.mp3", "rb")
+audio = client.sts.convert(io, voice_id: "voice_id")
+```
 ### Sound Effects
 ```ruby

data/lib/eleven_rb/client.rb CHANGED Viewed

@@ -79,6 +79,14 @@ module ElevenRb
       @user ||= Resources::User.new(http_client)
     end
+    # Speech-to-speech resource
+    #
+    # @return [Resources::SpeechToSpeech]
+    def speech_to_speech
+      @speech_to_speech ||= Resources::SpeechToSpeech.new(http_client)
+    end
+    alias sts speech_to_speech
     # Sound effects resource
     #
     # @return [Resources::SoundEffects]

data/lib/eleven_rb/http/client.rb CHANGED Viewed

@@ -49,9 +49,10 @@ module ElevenRb
       #
       # @param path [String] the API path
       # @param params [Hash] form parameters including files
-      # @return [Hash] parsed JSON response
-      def post_multipart(path, params)
-        request(:post, path, body: params, multipart: true)
+      # @param response_type [Symbol] :json or :binary
+      # @return [Hash, String] parsed JSON or binary response
+      def post_multipart(path, params, response_type: :json)
+        request(:post, path, body: params, multipart: true, response_type: response_type)
       end
       # Make a streaming POST request

data/lib/eleven_rb/resources/base.rb CHANGED Viewed

@@ -64,9 +64,10 @@ module ElevenRb
       #
       # @param path [String]
       # @param params [Hash]
-      # @return [Hash]
-      def post_multipart(path, params)
-        http_client.post_multipart(path, params)
+      # @param response_type [Symbol] :json or :binary
+      # @return [Hash, String]
+      def post_multipart(path, params, response_type: :json)
+        http_client.post_multipart(path, params, response_type: response_type)
       end
       # Validate presence of a value

data/lib/eleven_rb/resources/speech_to_speech.rb ADDED Viewed

@@ -0,0 +1,94 @@
+# frozen_string_literal: true
+module ElevenRb
+  module Resources
+    # Speech-to-speech voice conversion resource
+    #
+    # Converts audio from one voice to another while preserving timing,
+    # cadence, and emotion. Uses ElevenLabs STS API with multipart upload.
+    #
+    # @example Convert a file
+    #   audio = client.sts.convert("input.mp3", voice_id: "abc123")
+    #   audio.save_to_file("output.mp3")
+    #
+    # @example Convert an IO object
+    #   io = File.open("input.mp3", "rb")
+    #   audio = client.sts.convert(io, voice_id: "abc123")
+    class SpeechToSpeech < Base
+      DEFAULT_MODEL = 'eleven_english_sts_v2'
+      MAX_INPUT_BYTES = 50 * 1024 * 1024 # 50 MB
+      # Convert speech from one voice to another
+      #
+      # @param audio_input [String, IO, Tempfile] file path or IO object of source audio
+      # @param voice_id [String] target voice ID to convert into
+      # @param model_id [String] STS model (default: eleven_english_sts_v2)
+      # @param voice_settings [Hash, nil] override voice settings (stability, similarity_boost)
+      # @param remove_background_noise [Boolean] isolate speech before conversion
+      # @param output_format [String] audio output format
+      # @param seed [Integer, nil] for reproducible results
+      # @return [Objects::Audio]
+      def convert(audio_input, voice_id:, model_id: DEFAULT_MODEL,
+                  voice_settings: nil, remove_background_noise: false,
+                  output_format: 'mp3_44100_128', seed: nil)
+        validate_presence!(voice_id, 'voice_id')
+        file = prepare_upload(audio_input)
+        params = {
+          audio: file,
+          model_id: model_id
+        }
+        params[:voice_settings] = voice_settings.to_json if voice_settings
+        params[:remove_background_noise] = remove_background_noise.to_s
+        params[:seed] = seed.to_s if seed
+        path = "/speech-to-speech/#{voice_id}?output_format=#{output_format}"
+        response = post_multipart(path, params, response_type: :binary)
+        audio = Objects::Audio.new(
+          data: response,
+          format: output_format,
+          voice_id: voice_id,
+          text: '[speech-to-speech]',
+          model_id: model_id
+        )
+        notify_audio_generated(audio, voice_id: voice_id, model_id: model_id)
+        audio
+      ensure
+        file&.close if file.respond_to?(:close) && audio_input.is_a?(String)
+      end
+      private
+      def notify_audio_generated(audio, voice_id:, model_id:)
+        cost_info = Objects::CostInfo.new(text: '[sts]', voice_id: voice_id, model_id: model_id)
+        http_client.config.trigger(
+          :on_audio_generated,
+          audio: audio,
+          voice_id: voice_id,
+          text: '[speech-to-speech]',
+          cost_info: cost_info.to_h
+        )
+      end
+      # Prepare the audio input for multipart upload
+      #
+      # @param input [String, IO, StringIO, Tempfile] file path or IO object
+      # @return [IO] file handle ready for upload
+      def prepare_upload(input)
+        case input
+        when String
+          raise Errors::ValidationError, "File not found: #{input}" unless File.exist?(input)
+          File.open(input, 'rb')
+        when IO, StringIO, Tempfile
+          input
+        else
+          raise ArgumentError, "Expected file path or IO object, got #{input.class}"
+        end
+      end
+    end
+  end
+end

data/lib/eleven_rb/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module ElevenRb
-  VERSION = '0.3.0'
+  VERSION = '0.4.0'
 end

data/lib/eleven_rb.rb CHANGED Viewed

@@ -108,6 +108,7 @@ require_relative 'eleven_rb/resources/models'
 require_relative 'eleven_rb/resources/user'
 require_relative 'eleven_rb/resources/sound_effects'
 require_relative 'eleven_rb/resources/music'
+require_relative 'eleven_rb/resources/speech_to_speech'
 # High-level components
 require_relative 'eleven_rb/voice_slot_manager'

metadata CHANGED Viewed

@@ -1,14 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: eleven_rb
 version: !ruby/object:Gem::Version
-  version: 0.3.0
+  version: 0.4.0
 platform: ruby
 authors:
 - Web Ventures Ltd
-autorequire:
 bindir: bin
 cert_chain: []
-date: 2026-02-08 00:00:00.000000000 Z
+date: 1980-01-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: base64
@@ -158,6 +157,7 @@ files:
 - lib/eleven_rb/resources/models.rb
 - lib/eleven_rb/resources/music.rb
 - lib/eleven_rb/resources/sound_effects.rb
+- lib/eleven_rb/resources/speech_to_speech.rb
 - lib/eleven_rb/resources/text_to_speech.rb
 - lib/eleven_rb/resources/user.rb
 - lib/eleven_rb/resources/voice_library.rb
@@ -173,7 +173,6 @@ metadata:
   source_code_uri: https://github.com/webventures/eleven_rb
   changelog_uri: https://github.com/webventures/eleven_rb/blob/main/CHANGELOG.md
   rubygems_mfa_required: 'true'
-post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -188,8 +187,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.5.3
-signing_key:
+rubygems_version: 3.6.9
 specification_version: 4
 summary: Ruby client for the ElevenLabs Text-to-Speech API
 test_files: []