eleven_rb 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8f8b7a2ab5d7ebe900552e83ae3e8499545f788da927ffc60d54185464d3bbc3
4
- data.tar.gz: ee1b62923b6fc88304f4c78a60f68164617fd0f46d68cca09ad1496f14dda10d
3
+ metadata.gz: fa71eff851a0c6b80f139e801962bceaf1bb371f6e9a0cd325c47b2ee6f4994c
4
+ data.tar.gz: aa640970faba75afe3cbacdc16958ee63c1f2ecfb009512cd9e8182117e16d29
5
5
  SHA512:
6
- metadata.gz: 3079784a64fe6d3bff8e2c631d46763dcc6e352da16e9a7599d0ef0df7be45694c3b00279d3362f6459f7168c72f26b0e460eb80bd07239b675394979fd9cc59
7
- data.tar.gz: 224530d093fdbd9b489adf6199cfd4cd19e22b2e9074bd42f59345fd0a5efcd06930c9ef9e9bfe9ab9034621c54eaf6c9443ba1b5aee91296c7365347fbcbe67
6
+ metadata.gz: a537ba9de014afc366c348a71613f257b6380ae784979fb42cd55522610c85661e0e34907c770642fa268051cbb91b24110e00d5e8b7305d5c913a81e04705a6
7
+ data.tar.gz: c1f4e236fb327b737b4e6346bf3a71bfec357d4f7f09a0e1eaf9c8889ef2251ff40b8eeb3be959e98b58e2a76ff6305e7391d64f0c33a9ce1fb38897b958f1fa
data/CHANGELOG.md CHANGED
@@ -7,6 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.4.0] - 2026-03-10
11
+
12
+ ### Added
13
+
14
+ - Speech-to-Speech voice conversion via `client.sts.convert` (`POST /v1/speech-to-speech/{voice_id}`)
15
+ - `Client#speech_to_speech` resource with `sts` alias
16
+ - Accepts file paths (String) or IO objects (IO, StringIO, Tempfile) for audio input
17
+ - Multipart upload with binary response support
18
+ - Default model: `eleven_english_sts_v2`
19
+
20
+ ### Changed
21
+
22
+ - `Resources::Base#post_multipart` and `HTTP::Client#post_multipart` now accept `response_type:` parameter (defaults to `:json`, backwards-compatible)
23
+
10
24
  ## [0.3.0] - 2026-02-08
11
25
 
12
26
  ### Added
data/README.md CHANGED
@@ -4,11 +4,12 @@
4
4
  [![CI](https://github.com/webventures/eleven_rb/actions/workflows/ci.yml/badge.svg)](https://github.com/webventures/eleven_rb/actions/workflows/ci.yml)
5
5
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
6
 
7
- A Ruby client for the [ElevenLabs](https://try.elevenlabs.io/qyk2j8gumrjz) Text-to-Speech, Sound Effects, and Music API.
7
+ A Ruby client for the [ElevenLabs](https://try.elevenlabs.io/qyk2j8gumrjz) Text-to-Speech, Speech-to-Speech, Sound Effects, and Music API.
8
8
 
9
9
  ## Features
10
10
 
11
11
  - Text-to-Speech generation and streaming
12
+ - Speech-to-Speech voice conversion
12
13
  - Sound effects generation from text descriptions
13
14
  - Music generation from prompts or composition plans
14
15
  - Voice management (list, get, create, update, delete)
@@ -88,6 +89,28 @@ File.open("output.mp3", "wb") do |file|
88
89
  end
89
90
  ```
90
91
 
92
+ ### Speech-to-Speech
93
+
94
+ ```ruby
95
+ # Convert audio to a different voice
96
+ audio = client.sts.convert("input.mp3", voice_id: "voice_id")
97
+ audio.save_to_file("output.mp3")
98
+
99
+ # With options
100
+ audio = client.sts.convert(
101
+ "input.mp3",
102
+ voice_id: "voice_id",
103
+ model_id: "eleven_english_sts_v2",
104
+ voice_settings: { stability: 0.5, similarity_boost: 0.75 },
105
+ remove_background_noise: true,
106
+ output_format: "mp3_44100_192"
107
+ )
108
+
109
+ # From an IO object
110
+ io = File.open("input.mp3", "rb")
111
+ audio = client.sts.convert(io, voice_id: "voice_id")
112
+ ```
113
+
91
114
  ### Sound Effects
92
115
 
93
116
  ```ruby
@@ -79,6 +79,14 @@ module ElevenRb
79
79
  @user ||= Resources::User.new(http_client)
80
80
  end
81
81
 
82
+ # Speech-to-speech resource
83
+ #
84
+ # @return [Resources::SpeechToSpeech]
85
+ def speech_to_speech
86
+ @speech_to_speech ||= Resources::SpeechToSpeech.new(http_client)
87
+ end
88
+ alias sts speech_to_speech
89
+
82
90
  # Sound effects resource
83
91
  #
84
92
  # @return [Resources::SoundEffects]
@@ -49,9 +49,10 @@ module ElevenRb
49
49
  #
50
50
  # @param path [String] the API path
51
51
  # @param params [Hash] form parameters including files
52
- # @return [Hash] parsed JSON response
53
- def post_multipart(path, params)
54
- request(:post, path, body: params, multipart: true)
52
+ # @param response_type [Symbol] :json or :binary
53
+ # @return [Hash, String] parsed JSON or binary response
54
+ def post_multipart(path, params, response_type: :json)
55
+ request(:post, path, body: params, multipart: true, response_type: response_type)
55
56
  end
56
57
 
57
58
  # Make a streaming POST request
@@ -64,9 +64,10 @@ module ElevenRb
64
64
  #
65
65
  # @param path [String]
66
66
  # @param params [Hash]
67
- # @return [Hash]
68
- def post_multipart(path, params)
69
- http_client.post_multipart(path, params)
67
+ # @param response_type [Symbol] :json or :binary
68
+ # @return [Hash, String]
69
+ def post_multipart(path, params, response_type: :json)
70
+ http_client.post_multipart(path, params, response_type: response_type)
70
71
  end
71
72
 
72
73
  # Validate presence of a value
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ElevenRb
4
+ module Resources
5
+ # Speech-to-speech voice conversion resource
6
+ #
7
+ # Converts audio from one voice to another while preserving timing,
8
+ # cadence, and emotion. Uses ElevenLabs STS API with multipart upload.
9
+ #
10
+ # @example Convert a file
11
+ # audio = client.sts.convert("input.mp3", voice_id: "abc123")
12
+ # audio.save_to_file("output.mp3")
13
+ #
14
+ # @example Convert an IO object
15
+ # io = File.open("input.mp3", "rb")
16
+ # audio = client.sts.convert(io, voice_id: "abc123")
17
+ class SpeechToSpeech < Base
18
+ DEFAULT_MODEL = 'eleven_english_sts_v2'
19
+ MAX_INPUT_BYTES = 50 * 1024 * 1024 # 50 MB
20
+
21
+ # Convert speech from one voice to another
22
+ #
23
+ # @param audio_input [String, IO, Tempfile] file path or IO object of source audio
24
+ # @param voice_id [String] target voice ID to convert into
25
+ # @param model_id [String] STS model (default: eleven_english_sts_v2)
26
+ # @param voice_settings [Hash, nil] override voice settings (stability, similarity_boost)
27
+ # @param remove_background_noise [Boolean] isolate speech before conversion
28
+ # @param output_format [String] audio output format
29
+ # @param seed [Integer, nil] for reproducible results
30
+ # @return [Objects::Audio]
31
+ def convert(audio_input, voice_id:, model_id: DEFAULT_MODEL,
32
+ voice_settings: nil, remove_background_noise: false,
33
+ output_format: 'mp3_44100_128', seed: nil)
34
+ validate_presence!(voice_id, 'voice_id')
35
+
36
+ file = prepare_upload(audio_input)
37
+
38
+ params = {
39
+ audio: file,
40
+ model_id: model_id
41
+ }
42
+ params[:voice_settings] = voice_settings.to_json if voice_settings
43
+ params[:remove_background_noise] = remove_background_noise.to_s
44
+ params[:seed] = seed.to_s if seed
45
+
46
+ path = "/speech-to-speech/#{voice_id}?output_format=#{output_format}"
47
+ response = post_multipart(path, params, response_type: :binary)
48
+
49
+ audio = Objects::Audio.new(
50
+ data: response,
51
+ format: output_format,
52
+ voice_id: voice_id,
53
+ text: '[speech-to-speech]',
54
+ model_id: model_id
55
+ )
56
+
57
+ notify_audio_generated(audio, voice_id: voice_id, model_id: model_id)
58
+ audio
59
+ ensure
60
+ file&.close if file.respond_to?(:close) && audio_input.is_a?(String)
61
+ end
62
+
63
+ private
64
+
65
+ def notify_audio_generated(audio, voice_id:, model_id:)
66
+ cost_info = Objects::CostInfo.new(text: '[sts]', voice_id: voice_id, model_id: model_id)
67
+ http_client.config.trigger(
68
+ :on_audio_generated,
69
+ audio: audio,
70
+ voice_id: voice_id,
71
+ text: '[speech-to-speech]',
72
+ cost_info: cost_info.to_h
73
+ )
74
+ end
75
+
76
+ # Prepare the audio input for multipart upload
77
+ #
78
+ # @param input [String, IO, StringIO, Tempfile] file path or IO object
79
+ # @return [IO] file handle ready for upload
80
+ def prepare_upload(input)
81
+ case input
82
+ when String
83
+ raise Errors::ValidationError, "File not found: #{input}" unless File.exist?(input)
84
+
85
+ File.open(input, 'rb')
86
+ when IO, StringIO, Tempfile
87
+ input
88
+ else
89
+ raise ArgumentError, "Expected file path or IO object, got #{input.class}"
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ElevenRb
4
- VERSION = '0.3.0'
4
+ VERSION = '0.4.0'
5
5
  end
data/lib/eleven_rb.rb CHANGED
@@ -108,6 +108,7 @@ require_relative 'eleven_rb/resources/models'
108
108
  require_relative 'eleven_rb/resources/user'
109
109
  require_relative 'eleven_rb/resources/sound_effects'
110
110
  require_relative 'eleven_rb/resources/music'
111
+ require_relative 'eleven_rb/resources/speech_to_speech'
111
112
 
112
113
  # High-level components
113
114
  require_relative 'eleven_rb/voice_slot_manager'
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: eleven_rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Web Ventures Ltd
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2026-02-08 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: base64
@@ -158,6 +157,7 @@ files:
158
157
  - lib/eleven_rb/resources/models.rb
159
158
  - lib/eleven_rb/resources/music.rb
160
159
  - lib/eleven_rb/resources/sound_effects.rb
160
+ - lib/eleven_rb/resources/speech_to_speech.rb
161
161
  - lib/eleven_rb/resources/text_to_speech.rb
162
162
  - lib/eleven_rb/resources/user.rb
163
163
  - lib/eleven_rb/resources/voice_library.rb
@@ -173,7 +173,6 @@ metadata:
173
173
  source_code_uri: https://github.com/webventures/eleven_rb
174
174
  changelog_uri: https://github.com/webventures/eleven_rb/blob/main/CHANGELOG.md
175
175
  rubygems_mfa_required: 'true'
176
- post_install_message:
177
176
  rdoc_options: []
178
177
  require_paths:
179
178
  - lib
@@ -188,8 +187,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
188
187
  - !ruby/object:Gem::Version
189
188
  version: '0'
190
189
  requirements: []
191
- rubygems_version: 3.5.3
192
- signing_key:
190
+ rubygems_version: 3.6.9
193
191
  specification_version: 4
194
192
  summary: Ruby client for the ElevenLabs Text-to-Speech API
195
193
  test_files: []