eleven_rb 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/README.md +24 -1
- data/lib/eleven_rb/client.rb +8 -0
- data/lib/eleven_rb/http/client.rb +4 -3
- data/lib/eleven_rb/resources/base.rb +4 -3
- data/lib/eleven_rb/resources/speech_to_speech.rb +94 -0
- data/lib/eleven_rb/version.rb +1 -1
- data/lib/eleven_rb.rb +1 -0
- metadata +4 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fa71eff851a0c6b80f139e801962bceaf1bb371f6e9a0cd325c47b2ee6f4994c
|
|
4
|
+
data.tar.gz: aa640970faba75afe3cbacdc16958ee63c1f2ecfb009512cd9e8182117e16d29
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a537ba9de014afc366c348a71613f257b6380ae784979fb42cd55522610c85661e0e34907c770642fa268051cbb91b24110e00d5e8b7305d5c913a81e04705a6
|
|
7
|
+
data.tar.gz: c1f4e236fb327b737b4e6346bf3a71bfec357d4f7f09a0e1eaf9c8889ef2251ff40b8eeb3be959e98b58e2a76ff6305e7391d64f0c33a9ce1fb38897b958f1fa
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.4.0] - 2026-03-10
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Speech-to-Speech voice conversion via `client.sts.convert` (`POST /v1/speech-to-speech/{voice_id}`)
|
|
15
|
+
- `Client#speech_to_speech` resource with `sts` alias
|
|
16
|
+
- Accepts file paths (String) or IO objects (IO, StringIO, Tempfile) for audio input
|
|
17
|
+
- Multipart upload with binary response support
|
|
18
|
+
- Default model: `eleven_english_sts_v2`
|
|
19
|
+
|
|
20
|
+
### Changed
|
|
21
|
+
|
|
22
|
+
- `Resources::Base#post_multipart` and `HTTP::Client#post_multipart` now accept `response_type:` parameter (defaults to `:json`, backwards-compatible)
|
|
23
|
+
|
|
10
24
|
## [0.3.0] - 2026-02-08
|
|
11
25
|
|
|
12
26
|
### Added
|
data/README.md
CHANGED
|
@@ -4,11 +4,12 @@
|
|
|
4
4
|
[](https://github.com/webventures/eleven_rb/actions/workflows/ci.yml)
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
|
|
7
|
-
A Ruby client for the [ElevenLabs](https://try.elevenlabs.io/qyk2j8gumrjz) Text-to-Speech, Sound Effects, and Music API.
|
|
7
|
+
A Ruby client for the [ElevenLabs](https://try.elevenlabs.io/qyk2j8gumrjz) Text-to-Speech, Speech-to-Speech, Sound Effects, and Music API.
|
|
8
8
|
|
|
9
9
|
## Features
|
|
10
10
|
|
|
11
11
|
- Text-to-Speech generation and streaming
|
|
12
|
+
- Speech-to-Speech voice conversion
|
|
12
13
|
- Sound effects generation from text descriptions
|
|
13
14
|
- Music generation from prompts or composition plans
|
|
14
15
|
- Voice management (list, get, create, update, delete)
|
|
@@ -88,6 +89,28 @@ File.open("output.mp3", "wb") do |file|
|
|
|
88
89
|
end
|
|
89
90
|
```
|
|
90
91
|
|
|
92
|
+
### Speech-to-Speech
|
|
93
|
+
|
|
94
|
+
```ruby
|
|
95
|
+
# Convert audio to a different voice
|
|
96
|
+
audio = client.sts.convert("input.mp3", voice_id: "voice_id")
|
|
97
|
+
audio.save_to_file("output.mp3")
|
|
98
|
+
|
|
99
|
+
# With options
|
|
100
|
+
audio = client.sts.convert(
|
|
101
|
+
"input.mp3",
|
|
102
|
+
voice_id: "voice_id",
|
|
103
|
+
model_id: "eleven_english_sts_v2",
|
|
104
|
+
voice_settings: { stability: 0.5, similarity_boost: 0.75 },
|
|
105
|
+
remove_background_noise: true,
|
|
106
|
+
output_format: "mp3_44100_192"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# From an IO object
|
|
110
|
+
io = File.open("input.mp3", "rb")
|
|
111
|
+
audio = client.sts.convert(io, voice_id: "voice_id")
|
|
112
|
+
```
|
|
113
|
+
|
|
91
114
|
### Sound Effects
|
|
92
115
|
|
|
93
116
|
```ruby
|
data/lib/eleven_rb/client.rb
CHANGED
|
@@ -79,6 +79,14 @@ module ElevenRb
|
|
|
79
79
|
@user ||= Resources::User.new(http_client)
|
|
80
80
|
end
|
|
81
81
|
|
|
82
|
+
# Speech-to-speech resource
|
|
83
|
+
#
|
|
84
|
+
# @return [Resources::SpeechToSpeech]
|
|
85
|
+
def speech_to_speech
|
|
86
|
+
@speech_to_speech ||= Resources::SpeechToSpeech.new(http_client)
|
|
87
|
+
end
|
|
88
|
+
alias sts speech_to_speech
|
|
89
|
+
|
|
82
90
|
# Sound effects resource
|
|
83
91
|
#
|
|
84
92
|
# @return [Resources::SoundEffects]
|
|
@@ -49,9 +49,10 @@ module ElevenRb
|
|
|
49
49
|
#
|
|
50
50
|
# @param path [String] the API path
|
|
51
51
|
# @param params [Hash] form parameters including files
|
|
52
|
-
# @
|
|
53
|
-
|
|
54
|
-
|
|
52
|
+
# @param response_type [Symbol] :json or :binary
|
|
53
|
+
# @return [Hash, String] parsed JSON or binary response
|
|
54
|
+
def post_multipart(path, params, response_type: :json)
|
|
55
|
+
request(:post, path, body: params, multipart: true, response_type: response_type)
|
|
55
56
|
end
|
|
56
57
|
|
|
57
58
|
# Make a streaming POST request
|
|
@@ -64,9 +64,10 @@ module ElevenRb
|
|
|
64
64
|
#
|
|
65
65
|
# @param path [String]
|
|
66
66
|
# @param params [Hash]
|
|
67
|
-
# @
|
|
68
|
-
|
|
69
|
-
|
|
67
|
+
# @param response_type [Symbol] :json or :binary
|
|
68
|
+
# @return [Hash, String]
|
|
69
|
+
def post_multipart(path, params, response_type: :json)
|
|
70
|
+
http_client.post_multipart(path, params, response_type: response_type)
|
|
70
71
|
end
|
|
71
72
|
|
|
72
73
|
# Validate presence of a value
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ElevenRb
|
|
4
|
+
module Resources
|
|
5
|
+
# Speech-to-speech voice conversion resource
|
|
6
|
+
#
|
|
7
|
+
# Converts audio from one voice to another while preserving timing,
|
|
8
|
+
# cadence, and emotion. Uses ElevenLabs STS API with multipart upload.
|
|
9
|
+
#
|
|
10
|
+
# @example Convert a file
|
|
11
|
+
# audio = client.sts.convert("input.mp3", voice_id: "abc123")
|
|
12
|
+
# audio.save_to_file("output.mp3")
|
|
13
|
+
#
|
|
14
|
+
# @example Convert an IO object
|
|
15
|
+
# io = File.open("input.mp3", "rb")
|
|
16
|
+
# audio = client.sts.convert(io, voice_id: "abc123")
|
|
17
|
+
class SpeechToSpeech < Base
|
|
18
|
+
DEFAULT_MODEL = 'eleven_english_sts_v2'
|
|
19
|
+
MAX_INPUT_BYTES = 50 * 1024 * 1024 # 50 MB
|
|
20
|
+
|
|
21
|
+
# Convert speech from one voice to another
|
|
22
|
+
#
|
|
23
|
+
# @param audio_input [String, IO, Tempfile] file path or IO object of source audio
|
|
24
|
+
# @param voice_id [String] target voice ID to convert into
|
|
25
|
+
# @param model_id [String] STS model (default: eleven_english_sts_v2)
|
|
26
|
+
# @param voice_settings [Hash, nil] override voice settings (stability, similarity_boost)
|
|
27
|
+
# @param remove_background_noise [Boolean] isolate speech before conversion
|
|
28
|
+
# @param output_format [String] audio output format
|
|
29
|
+
# @param seed [Integer, nil] for reproducible results
|
|
30
|
+
# @return [Objects::Audio]
|
|
31
|
+
def convert(audio_input, voice_id:, model_id: DEFAULT_MODEL,
|
|
32
|
+
voice_settings: nil, remove_background_noise: false,
|
|
33
|
+
output_format: 'mp3_44100_128', seed: nil)
|
|
34
|
+
validate_presence!(voice_id, 'voice_id')
|
|
35
|
+
|
|
36
|
+
file = prepare_upload(audio_input)
|
|
37
|
+
|
|
38
|
+
params = {
|
|
39
|
+
audio: file,
|
|
40
|
+
model_id: model_id
|
|
41
|
+
}
|
|
42
|
+
params[:voice_settings] = voice_settings.to_json if voice_settings
|
|
43
|
+
params[:remove_background_noise] = remove_background_noise.to_s
|
|
44
|
+
params[:seed] = seed.to_s if seed
|
|
45
|
+
|
|
46
|
+
path = "/speech-to-speech/#{voice_id}?output_format=#{output_format}"
|
|
47
|
+
response = post_multipart(path, params, response_type: :binary)
|
|
48
|
+
|
|
49
|
+
audio = Objects::Audio.new(
|
|
50
|
+
data: response,
|
|
51
|
+
format: output_format,
|
|
52
|
+
voice_id: voice_id,
|
|
53
|
+
text: '[speech-to-speech]',
|
|
54
|
+
model_id: model_id
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
notify_audio_generated(audio, voice_id: voice_id, model_id: model_id)
|
|
58
|
+
audio
|
|
59
|
+
ensure
|
|
60
|
+
file&.close if file.respond_to?(:close) && audio_input.is_a?(String)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
def notify_audio_generated(audio, voice_id:, model_id:)
|
|
66
|
+
cost_info = Objects::CostInfo.new(text: '[sts]', voice_id: voice_id, model_id: model_id)
|
|
67
|
+
http_client.config.trigger(
|
|
68
|
+
:on_audio_generated,
|
|
69
|
+
audio: audio,
|
|
70
|
+
voice_id: voice_id,
|
|
71
|
+
text: '[speech-to-speech]',
|
|
72
|
+
cost_info: cost_info.to_h
|
|
73
|
+
)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Prepare the audio input for multipart upload
|
|
77
|
+
#
|
|
78
|
+
# @param input [String, IO, StringIO, Tempfile] file path or IO object
|
|
79
|
+
# @return [IO] file handle ready for upload
|
|
80
|
+
def prepare_upload(input)
|
|
81
|
+
case input
|
|
82
|
+
when String
|
|
83
|
+
raise Errors::ValidationError, "File not found: #{input}" unless File.exist?(input)
|
|
84
|
+
|
|
85
|
+
File.open(input, 'rb')
|
|
86
|
+
when IO, StringIO, Tempfile
|
|
87
|
+
input
|
|
88
|
+
else
|
|
89
|
+
raise ArgumentError, "Expected file path or IO object, got #{input.class}"
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
data/lib/eleven_rb/version.rb
CHANGED
data/lib/eleven_rb.rb
CHANGED
|
@@ -108,6 +108,7 @@ require_relative 'eleven_rb/resources/models'
|
|
|
108
108
|
require_relative 'eleven_rb/resources/user'
|
|
109
109
|
require_relative 'eleven_rb/resources/sound_effects'
|
|
110
110
|
require_relative 'eleven_rb/resources/music'
|
|
111
|
+
require_relative 'eleven_rb/resources/speech_to_speech'
|
|
111
112
|
|
|
112
113
|
# High-level components
|
|
113
114
|
require_relative 'eleven_rb/voice_slot_manager'
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: eleven_rb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Web Ventures Ltd
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: base64
|
|
@@ -158,6 +157,7 @@ files:
|
|
|
158
157
|
- lib/eleven_rb/resources/models.rb
|
|
159
158
|
- lib/eleven_rb/resources/music.rb
|
|
160
159
|
- lib/eleven_rb/resources/sound_effects.rb
|
|
160
|
+
- lib/eleven_rb/resources/speech_to_speech.rb
|
|
161
161
|
- lib/eleven_rb/resources/text_to_speech.rb
|
|
162
162
|
- lib/eleven_rb/resources/user.rb
|
|
163
163
|
- lib/eleven_rb/resources/voice_library.rb
|
|
@@ -173,7 +173,6 @@ metadata:
|
|
|
173
173
|
source_code_uri: https://github.com/webventures/eleven_rb
|
|
174
174
|
changelog_uri: https://github.com/webventures/eleven_rb/blob/main/CHANGELOG.md
|
|
175
175
|
rubygems_mfa_required: 'true'
|
|
176
|
-
post_install_message:
|
|
177
176
|
rdoc_options: []
|
|
178
177
|
require_paths:
|
|
179
178
|
- lib
|
|
@@ -188,8 +187,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
188
187
|
- !ruby/object:Gem::Version
|
|
189
188
|
version: '0'
|
|
190
189
|
requirements: []
|
|
191
|
-
rubygems_version: 3.
|
|
192
|
-
signing_key:
|
|
190
|
+
rubygems_version: 3.6.9
|
|
193
191
|
specification_version: 4
|
|
194
192
|
summary: Ruby client for the ElevenLabs Text-to-Speech API
|
|
195
193
|
test_files: []
|