eleven_rb 0.3.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +31 -0
- data/README.md +65 -2
- data/lib/eleven_rb/client.rb +16 -0
- data/lib/eleven_rb/http/client.rb +4 -3
- data/lib/eleven_rb/objects/cost_info.rb +5 -3
- data/lib/eleven_rb/resources/base.rb +4 -3
- data/lib/eleven_rb/resources/models.rb +7 -0
- data/lib/eleven_rb/resources/speech_to_speech.rb +94 -0
- data/lib/eleven_rb/resources/text_to_dialogue.rb +113 -0
- data/lib/eleven_rb/version.rb +1 -1
- data/lib/eleven_rb.rb +2 -0
- metadata +9 -10
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ed711abcce18771ad13f10bcb29754605be61f7d02f7114f0e0b28b0dad4d556
|
|
4
|
+
data.tar.gz: 146285726bc80b0c3eab0b307a7ec4b788a8f3465903992bb12fc2b34bc1694b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6bf8e216c83287bb099e4a6bbed4ef718329f361fb7dfb4c70bf122f2512c74916eb1540fe6a1dfd4ae01e0edc53edc05408a017946f504c09611a54a6c2370b
|
|
7
|
+
data.tar.gz: 1839c52e3adf4efed58c410f08fa5c5e4818fde0964922e0752019b6606d726ed66a4f48a767b4e2110aa3b0cf98e7f4666eaef64552ec9f0f976378b1ef5094
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,37 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [1.0.0] - 2026-03-10
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Text-to-Dialogue multi-speaker audio generation via `client.text_to_dialogue.generate` (`POST /v1/text-to-dialogue`)
|
|
15
|
+
- `Client#text_to_dialogue` resource with `dialogue` alias
|
|
16
|
+
- Multi-speaker input validation (max 10 unique voices, 5000 character limit)
|
|
17
|
+
- `eleven_v3` model added to `CostInfo::COST_PER_1K_CHARS` ($0.30/1K chars)
|
|
18
|
+
- `Models#latest` method returning the most capable model (`eleven_v3`)
|
|
19
|
+
- Audio tags support via v3 model (`[laughs]`, `[whispers]`, `[excited]`, etc.)
|
|
20
|
+
- `CostInfo` now accepts `character_count:` keyword as alternative to `text:`
|
|
21
|
+
- TTS generation with word-level timestamps via `client.tts.generate_with_timestamps`
|
|
22
|
+
|
|
23
|
+
### Changed
|
|
24
|
+
|
|
25
|
+
- `CostInfo#initialize` signature: `text:` is now optional when `character_count:` is provided (backwards-compatible)
|
|
26
|
+
|
|
27
|
+
## [0.4.0] - 2026-03-10
|
|
28
|
+
|
|
29
|
+
### Added
|
|
30
|
+
|
|
31
|
+
- Speech-to-Speech voice conversion via `client.sts.convert` (`POST /v1/speech-to-speech/{voice_id}`)
|
|
32
|
+
- `Client#speech_to_speech` resource with `sts` alias
|
|
33
|
+
- Accepts file paths (String) or IO objects (IO, StringIO, Tempfile) for audio input
|
|
34
|
+
- Multipart upload with binary response support
|
|
35
|
+
- Default model: `eleven_english_sts_v2`
|
|
36
|
+
|
|
37
|
+
### Changed
|
|
38
|
+
|
|
39
|
+
- `Resources::Base#post_multipart` and `HTTP::Client#post_multipart` now accept `response_type:` parameter (defaults to `:json`, backwards-compatible)
|
|
40
|
+
|
|
10
41
|
## [0.3.0] - 2026-02-08
|
|
11
42
|
|
|
12
43
|
### Added
|
data/README.md
CHANGED
|
@@ -4,11 +4,13 @@
|
|
|
4
4
|
[](https://github.com/webventures/eleven_rb/actions/workflows/ci.yml)
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
|
|
7
|
-
A Ruby client for the [ElevenLabs](https://try.elevenlabs.io/qyk2j8gumrjz) Text-to-Speech, Sound Effects, and Music API.
|
|
7
|
+
A Ruby client for the [ElevenLabs](https://try.elevenlabs.io/qyk2j8gumrjz) Text-to-Speech, Speech-to-Speech, Text-to-Dialogue, Sound Effects, and Music API.
|
|
8
8
|
|
|
9
9
|
## Features
|
|
10
10
|
|
|
11
11
|
- Text-to-Speech generation and streaming
|
|
12
|
+
- Speech-to-Speech voice conversion
|
|
13
|
+
- Text-to-Dialogue multi-speaker generation with audio tags
|
|
12
14
|
- Sound effects generation from text descriptions
|
|
13
15
|
- Music generation from prompts or composition plans
|
|
14
16
|
- Voice management (list, get, create, update, delete)
|
|
@@ -72,7 +74,7 @@ audio.save_to_file("output.mp3")
|
|
|
72
74
|
audio = client.tts.generate(
|
|
73
75
|
"Hello world",
|
|
74
76
|
voice_id: "voice_id",
|
|
75
|
-
model_id: "
|
|
77
|
+
model_id: "eleven_v3", # Most expressive, 70+ languages, audio tags
|
|
76
78
|
voice_settings: {
|
|
77
79
|
stability: 0.5,
|
|
78
80
|
similarity_boost: 0.75
|
|
@@ -88,6 +90,64 @@ File.open("output.mp3", "wb") do |file|
|
|
|
88
90
|
end
|
|
89
91
|
```
|
|
90
92
|
|
|
93
|
+
### Speech-to-Speech
|
|
94
|
+
|
|
95
|
+
```ruby
|
|
96
|
+
# Convert audio to a different voice
|
|
97
|
+
audio = client.sts.convert("input.mp3", voice_id: "voice_id")
|
|
98
|
+
audio.save_to_file("output.mp3")
|
|
99
|
+
|
|
100
|
+
# With options
|
|
101
|
+
audio = client.sts.convert(
|
|
102
|
+
"input.mp3",
|
|
103
|
+
voice_id: "voice_id",
|
|
104
|
+
model_id: "eleven_english_sts_v2",
|
|
105
|
+
voice_settings: { stability: 0.5, similarity_boost: 0.75 },
|
|
106
|
+
remove_background_noise: true,
|
|
107
|
+
output_format: "mp3_44100_192"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# From an IO object
|
|
111
|
+
io = File.open("input.mp3", "rb")
|
|
112
|
+
audio = client.sts.convert(io, voice_id: "voice_id")
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Text-to-Dialogue
|
|
116
|
+
|
|
117
|
+
```ruby
|
|
118
|
+
# Generate multi-speaker dialogue
|
|
119
|
+
audio = client.text_to_dialogue.generate([
|
|
120
|
+
{ text: "[excited] Welcome to the show!", voice_id: "voice_abc" },
|
|
121
|
+
{ text: "[laughs] Thanks for having me.", voice_id: "voice_xyz" },
|
|
122
|
+
{ text: "So tell us about your project...", voice_id: "voice_abc" }
|
|
123
|
+
])
|
|
124
|
+
audio.save_to_file("dialogue.mp3")
|
|
125
|
+
|
|
126
|
+
# With options
|
|
127
|
+
audio = client.dialogue.generate(
|
|
128
|
+
inputs,
|
|
129
|
+
model_id: "eleven_v3",
|
|
130
|
+
language_code: "en",
|
|
131
|
+
settings: { stability: 0.5 },
|
|
132
|
+
seed: 42,
|
|
133
|
+
output_format: "mp3_44100_192"
|
|
134
|
+
)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Audio Tags
|
|
138
|
+
|
|
139
|
+
The `eleven_v3` model supports inline audio tags for expressive speech:
|
|
140
|
+
|
|
141
|
+
```ruby
|
|
142
|
+
audio = client.tts.generate(
|
|
143
|
+
"[excited] Oh wow, this is AMAZING! [laughs] I can't believe it...",
|
|
144
|
+
voice_id: "voice_id",
|
|
145
|
+
model_id: "eleven_v3"
|
|
146
|
+
)
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Supported tags include `[laughs]`, `[whispers]`, `[sighs]`, `[excited]`, `[sarcastic]`, `[curious]`, `[pause]`, and more. Use CAPS for emphasis, `...` for pauses, and `—` for interruptions. See the [ElevenLabs v3 documentation](https://elevenlabs.io/docs/guides/audio-tags) for the full list.
|
|
150
|
+
|
|
91
151
|
### Sound Effects
|
|
92
152
|
|
|
93
153
|
```ruby
|
|
@@ -251,6 +311,9 @@ client = ElevenRb::Client.new(
|
|
|
251
311
|
models = client.models.list
|
|
252
312
|
models.each { |m| puts "#{m.name} (#{m.model_id})" }
|
|
253
313
|
|
|
314
|
+
# Get the latest/most capable model
|
|
315
|
+
client.models.latest # => "eleven_v3"
|
|
316
|
+
|
|
254
317
|
# Get multilingual models
|
|
255
318
|
client.models.multilingual
|
|
256
319
|
|
data/lib/eleven_rb/client.rb
CHANGED
|
@@ -79,6 +79,14 @@ module ElevenRb
|
|
|
79
79
|
@user ||= Resources::User.new(http_client)
|
|
80
80
|
end
|
|
81
81
|
|
|
82
|
+
# Speech-to-speech resource
|
|
83
|
+
#
|
|
84
|
+
# @return [Resources::SpeechToSpeech]
|
|
85
|
+
def speech_to_speech
|
|
86
|
+
@speech_to_speech ||= Resources::SpeechToSpeech.new(http_client)
|
|
87
|
+
end
|
|
88
|
+
alias sts speech_to_speech
|
|
89
|
+
|
|
82
90
|
# Sound effects resource
|
|
83
91
|
#
|
|
84
92
|
# @return [Resources::SoundEffects]
|
|
@@ -93,6 +101,14 @@ module ElevenRb
|
|
|
93
101
|
@music ||= Resources::Music.new(http_client)
|
|
94
102
|
end
|
|
95
103
|
|
|
104
|
+
# Text-to-dialogue resource
|
|
105
|
+
#
|
|
106
|
+
# @return [Resources::TextToDialogue]
|
|
107
|
+
def text_to_dialogue
|
|
108
|
+
@text_to_dialogue ||= Resources::TextToDialogue.new(http_client)
|
|
109
|
+
end
|
|
110
|
+
alias dialogue text_to_dialogue
|
|
111
|
+
|
|
96
112
|
# Voice slot manager
|
|
97
113
|
#
|
|
98
114
|
# @return [VoiceSlotManager]
|
|
@@ -49,9 +49,10 @@ module ElevenRb
|
|
|
49
49
|
#
|
|
50
50
|
# @param path [String] the API path
|
|
51
51
|
# @param params [Hash] form parameters including files
|
|
52
|
-
# @
|
|
53
|
-
|
|
54
|
-
|
|
52
|
+
# @param response_type [Symbol] :json or :binary
|
|
53
|
+
# @return [Hash, String] parsed JSON or binary response
|
|
54
|
+
def post_multipart(path, params, response_type: :json)
|
|
55
|
+
request(:post, path, body: params, multipart: true, response_type: response_type)
|
|
55
56
|
end
|
|
56
57
|
|
|
57
58
|
# Make a streaming POST request
|
|
@@ -12,6 +12,7 @@ module ElevenRb
|
|
|
12
12
|
'eleven_monolingual_v1' => 0.30,
|
|
13
13
|
'eleven_multilingual_v1' => 0.30,
|
|
14
14
|
'eleven_multilingual_v2' => 0.30,
|
|
15
|
+
'eleven_v3' => 0.30,
|
|
15
16
|
'eleven_turbo_v2' => 0.18,
|
|
16
17
|
'eleven_turbo_v2_5' => 0.18,
|
|
17
18
|
'eleven_english_sts_v2' => 0.30,
|
|
@@ -23,11 +24,12 @@ module ElevenRb
|
|
|
23
24
|
|
|
24
25
|
# Initialize cost info
|
|
25
26
|
#
|
|
26
|
-
# @param text [String] the text being converted
|
|
27
|
+
# @param text [String, nil] the text being converted
|
|
28
|
+
# @param character_count [Integer, nil] direct character count (alternative to text)
|
|
27
29
|
# @param voice_id [String] the voice ID
|
|
28
30
|
# @param model_id [String] the model ID
|
|
29
|
-
def initialize(
|
|
30
|
-
@character_count = text
|
|
31
|
+
def initialize(voice_id:, model_id:, text: nil, character_count: nil)
|
|
32
|
+
@character_count = character_count || text&.length || 0
|
|
31
33
|
@voice_id = voice_id
|
|
32
34
|
@model_id = model_id
|
|
33
35
|
end
|
|
@@ -64,9 +64,10 @@ module ElevenRb
|
|
|
64
64
|
#
|
|
65
65
|
# @param path [String]
|
|
66
66
|
# @param params [Hash]
|
|
67
|
-
# @
|
|
68
|
-
|
|
69
|
-
|
|
67
|
+
# @param response_type [Symbol] :json or :binary
|
|
68
|
+
# @return [Hash, String]
|
|
69
|
+
def post_multipart(path, params, response_type: :json)
|
|
70
|
+
http_client.post_multipart(path, params, response_type: response_type)
|
|
70
71
|
end
|
|
71
72
|
|
|
72
73
|
# Validate presence of a value
|
|
@@ -54,6 +54,13 @@ module ElevenRb
|
|
|
54
54
|
get('eleven_multilingual_v2') || tts_capable.first
|
|
55
55
|
end
|
|
56
56
|
|
|
57
|
+
# Get the latest/most capable model
|
|
58
|
+
#
|
|
59
|
+
# @return [Objects::Model, nil]
|
|
60
|
+
def latest
|
|
61
|
+
get('eleven_v3') || default
|
|
62
|
+
end
|
|
63
|
+
|
|
57
64
|
# Get model IDs as array
|
|
58
65
|
#
|
|
59
66
|
# @return [Array<String>]
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ElevenRb
|
|
4
|
+
module Resources
|
|
5
|
+
# Speech-to-speech voice conversion resource
|
|
6
|
+
#
|
|
7
|
+
# Converts audio from one voice to another while preserving timing,
|
|
8
|
+
# cadence, and emotion. Uses ElevenLabs STS API with multipart upload.
|
|
9
|
+
#
|
|
10
|
+
# @example Convert a file
|
|
11
|
+
# audio = client.sts.convert("input.mp3", voice_id: "abc123")
|
|
12
|
+
# audio.save_to_file("output.mp3")
|
|
13
|
+
#
|
|
14
|
+
# @example Convert an IO object
|
|
15
|
+
# io = File.open("input.mp3", "rb")
|
|
16
|
+
# audio = client.sts.convert(io, voice_id: "abc123")
|
|
17
|
+
class SpeechToSpeech < Base
|
|
18
|
+
DEFAULT_MODEL = 'eleven_english_sts_v2'
|
|
19
|
+
MAX_INPUT_BYTES = 50 * 1024 * 1024 # 50 MB
|
|
20
|
+
|
|
21
|
+
# Convert speech from one voice to another
|
|
22
|
+
#
|
|
23
|
+
# @param audio_input [String, IO, Tempfile] file path or IO object of source audio
|
|
24
|
+
# @param voice_id [String] target voice ID to convert into
|
|
25
|
+
# @param model_id [String] STS model (default: eleven_english_sts_v2)
|
|
26
|
+
# @param voice_settings [Hash, nil] override voice settings (stability, similarity_boost)
|
|
27
|
+
# @param remove_background_noise [Boolean] isolate speech before conversion
|
|
28
|
+
# @param output_format [String] audio output format
|
|
29
|
+
# @param seed [Integer, nil] for reproducible results
|
|
30
|
+
# @return [Objects::Audio]
|
|
31
|
+
def convert(audio_input, voice_id:, model_id: DEFAULT_MODEL,
|
|
32
|
+
voice_settings: nil, remove_background_noise: false,
|
|
33
|
+
output_format: 'mp3_44100_128', seed: nil)
|
|
34
|
+
validate_presence!(voice_id, 'voice_id')
|
|
35
|
+
|
|
36
|
+
file = prepare_upload(audio_input)
|
|
37
|
+
|
|
38
|
+
params = {
|
|
39
|
+
audio: file,
|
|
40
|
+
model_id: model_id
|
|
41
|
+
}
|
|
42
|
+
params[:voice_settings] = voice_settings.to_json if voice_settings
|
|
43
|
+
params[:remove_background_noise] = remove_background_noise.to_s
|
|
44
|
+
params[:seed] = seed.to_s if seed
|
|
45
|
+
|
|
46
|
+
path = "/speech-to-speech/#{voice_id}?output_format=#{output_format}"
|
|
47
|
+
response = post_multipart(path, params, response_type: :binary)
|
|
48
|
+
|
|
49
|
+
audio = Objects::Audio.new(
|
|
50
|
+
data: response,
|
|
51
|
+
format: output_format,
|
|
52
|
+
voice_id: voice_id,
|
|
53
|
+
text: '[speech-to-speech]',
|
|
54
|
+
model_id: model_id
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
notify_audio_generated(audio, voice_id: voice_id, model_id: model_id)
|
|
58
|
+
audio
|
|
59
|
+
ensure
|
|
60
|
+
file&.close if file.respond_to?(:close) && audio_input.is_a?(String)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
def notify_audio_generated(audio, voice_id:, model_id:)
|
|
66
|
+
cost_info = Objects::CostInfo.new(text: '[sts]', voice_id: voice_id, model_id: model_id)
|
|
67
|
+
http_client.config.trigger(
|
|
68
|
+
:on_audio_generated,
|
|
69
|
+
audio: audio,
|
|
70
|
+
voice_id: voice_id,
|
|
71
|
+
text: '[speech-to-speech]',
|
|
72
|
+
cost_info: cost_info.to_h
|
|
73
|
+
)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Prepare the audio input for multipart upload
|
|
77
|
+
#
|
|
78
|
+
# @param input [String, IO, StringIO, Tempfile] file path or IO object
|
|
79
|
+
# @return [IO] file handle ready for upload
|
|
80
|
+
def prepare_upload(input)
|
|
81
|
+
case input
|
|
82
|
+
when String
|
|
83
|
+
raise Errors::ValidationError, "File not found: #{input}" unless File.exist?(input)
|
|
84
|
+
|
|
85
|
+
File.open(input, 'rb')
|
|
86
|
+
when IO, StringIO, Tempfile
|
|
87
|
+
input
|
|
88
|
+
else
|
|
89
|
+
raise ArgumentError, "Expected file path or IO object, got #{input.class}"
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ElevenRb
|
|
4
|
+
module Resources
|
|
5
|
+
# Text-to-dialogue resource for multi-speaker audio generation
|
|
6
|
+
#
|
|
7
|
+
# @example Generate dialogue
|
|
8
|
+
# audio = client.text_to_dialogue.generate([
|
|
9
|
+
# { text: "[excited] Welcome!", voice_id: "voice_abc" },
|
|
10
|
+
# { text: "[laughs] Thanks!", voice_id: "voice_xyz" }
|
|
11
|
+
# ])
|
|
12
|
+
# audio.save_to_file("dialogue.mp3")
|
|
13
|
+
class TextToDialogue < Base
|
|
14
|
+
DEFAULT_MODEL = 'eleven_v3'
|
|
15
|
+
MAX_VOICES_PER_REQUEST = 10
|
|
16
|
+
MAX_TEXT_LENGTH = 5000
|
|
17
|
+
|
|
18
|
+
# Generate dialogue audio from multiple speaker inputs
|
|
19
|
+
#
|
|
20
|
+
# @param inputs [Array<Hash>] Array of { text:, voice_id: } hashes
|
|
21
|
+
# @param model_id [String] Model to use (only eleven_v3 supported)
|
|
22
|
+
# @param language_code [String, nil] ISO 639-1 language code
|
|
23
|
+
# @param settings [Hash, nil] Generation settings (stability: 0.0-1.0)
|
|
24
|
+
# @param seed [Integer, nil] Seed for reproducibility
|
|
25
|
+
# @param output_format [String] Audio output format
|
|
26
|
+
# @param apply_text_normalization [String] "auto", "on", or "off"
|
|
27
|
+
# @return [Objects::Audio]
|
|
28
|
+
def generate(
|
|
29
|
+
inputs,
|
|
30
|
+
model_id: DEFAULT_MODEL,
|
|
31
|
+
language_code: nil,
|
|
32
|
+
settings: nil,
|
|
33
|
+
seed: nil,
|
|
34
|
+
output_format: 'mp3_44100_128',
|
|
35
|
+
apply_text_normalization: 'auto'
|
|
36
|
+
)
|
|
37
|
+
validate_inputs!(inputs)
|
|
38
|
+
|
|
39
|
+
body = build_request_body(inputs, model_id, language_code, settings, seed,
|
|
40
|
+
apply_text_normalization)
|
|
41
|
+
|
|
42
|
+
response = post_binary(
|
|
43
|
+
"/text-to-dialogue?output_format=#{output_format}",
|
|
44
|
+
body
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
build_audio_response(response, inputs, output_format, model_id)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
def build_request_body(inputs, model_id, language_code, settings, seed,
|
|
53
|
+
apply_text_normalization)
|
|
54
|
+
body = {
|
|
55
|
+
inputs: inputs.map { |i| { text: i[:text], voice_id: i[:voice_id] } },
|
|
56
|
+
model_id: model_id,
|
|
57
|
+
apply_text_normalization: apply_text_normalization
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
body[:language_code] = language_code if language_code
|
|
61
|
+
body[:settings] = settings if settings
|
|
62
|
+
body[:seed] = seed if seed
|
|
63
|
+
body
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def build_audio_response(response, inputs, output_format, model_id)
|
|
67
|
+
total_text = inputs.map { |i| i[:text] }.join("\n")
|
|
68
|
+
total_chars = inputs.sum { |i| i[:text].length }
|
|
69
|
+
primary_voice = inputs.first[:voice_id]
|
|
70
|
+
|
|
71
|
+
audio = Objects::Audio.new(
|
|
72
|
+
data: response, format: output_format,
|
|
73
|
+
voice_id: primary_voice, text: total_text, model_id: model_id
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
cost_info = Objects::CostInfo.new(
|
|
77
|
+
character_count: total_chars, voice_id: primary_voice, model_id: model_id
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
http_client.config.trigger(
|
|
81
|
+
:on_audio_generated,
|
|
82
|
+
audio: audio, voice_id: primary_voice,
|
|
83
|
+
text: total_text, cost_info: cost_info.to_h
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
audio
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def validate_inputs!(inputs)
|
|
90
|
+
raise Errors::ValidationError, 'inputs must be a non-empty array' unless inputs.is_a?(Array) && !inputs.empty?
|
|
91
|
+
|
|
92
|
+
inputs.each_with_index do |input, i|
|
|
93
|
+
validate_presence!(input[:text], "inputs[#{i}].text")
|
|
94
|
+
validate_presence!(input[:voice_id], "inputs[#{i}].voice_id")
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
unique_voices = inputs.map { |i| i[:voice_id] }.uniq
|
|
98
|
+
if unique_voices.length > MAX_VOICES_PER_REQUEST
|
|
99
|
+
raise Errors::ValidationError,
|
|
100
|
+
"Maximum #{MAX_VOICES_PER_REQUEST} unique voices per request " \
|
|
101
|
+
"(got #{unique_voices.length})"
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
total_chars = inputs.sum { |i| i[:text].length }
|
|
105
|
+
return unless total_chars > MAX_TEXT_LENGTH
|
|
106
|
+
|
|
107
|
+
raise Errors::ValidationError,
|
|
108
|
+
"Total text length #{total_chars} exceeds maximum " \
|
|
109
|
+
"#{MAX_TEXT_LENGTH} characters"
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
data/lib/eleven_rb/version.rb
CHANGED
data/lib/eleven_rb.rb
CHANGED
|
@@ -108,6 +108,8 @@ require_relative 'eleven_rb/resources/models'
|
|
|
108
108
|
require_relative 'eleven_rb/resources/user'
|
|
109
109
|
require_relative 'eleven_rb/resources/sound_effects'
|
|
110
110
|
require_relative 'eleven_rb/resources/music'
|
|
111
|
+
require_relative 'eleven_rb/resources/speech_to_speech'
|
|
112
|
+
require_relative 'eleven_rb/resources/text_to_dialogue'
|
|
111
113
|
|
|
112
114
|
# High-level components
|
|
113
115
|
require_relative 'eleven_rb/voice_slot_manager'
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: eleven_rb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 1.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Web Ventures Ltd
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: base64
|
|
@@ -123,9 +122,9 @@ dependencies:
|
|
|
123
122
|
- !ruby/object:Gem::Version
|
|
124
123
|
version: '0.9'
|
|
125
124
|
description: |
|
|
126
|
-
A
|
|
127
|
-
|
|
128
|
-
|
|
125
|
+
A comprehensive Ruby client for ElevenLabs covering Text-to-Speech,
|
|
126
|
+
Speech-to-Speech, Text-to-Dialogue, Sound Effects, and Music generation
|
|
127
|
+
with voice management, streaming, and built-in cost tracking.
|
|
129
128
|
email:
|
|
130
129
|
- gems@dev.webven.nz
|
|
131
130
|
executables: []
|
|
@@ -158,6 +157,8 @@ files:
|
|
|
158
157
|
- lib/eleven_rb/resources/models.rb
|
|
159
158
|
- lib/eleven_rb/resources/music.rb
|
|
160
159
|
- lib/eleven_rb/resources/sound_effects.rb
|
|
160
|
+
- lib/eleven_rb/resources/speech_to_speech.rb
|
|
161
|
+
- lib/eleven_rb/resources/text_to_dialogue.rb
|
|
161
162
|
- lib/eleven_rb/resources/text_to_speech.rb
|
|
162
163
|
- lib/eleven_rb/resources/user.rb
|
|
163
164
|
- lib/eleven_rb/resources/voice_library.rb
|
|
@@ -173,7 +174,6 @@ metadata:
|
|
|
173
174
|
source_code_uri: https://github.com/webventures/eleven_rb
|
|
174
175
|
changelog_uri: https://github.com/webventures/eleven_rb/blob/main/CHANGELOG.md
|
|
175
176
|
rubygems_mfa_required: 'true'
|
|
176
|
-
post_install_message:
|
|
177
177
|
rdoc_options: []
|
|
178
178
|
require_paths:
|
|
179
179
|
- lib
|
|
@@ -188,8 +188,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
188
188
|
- !ruby/object:Gem::Version
|
|
189
189
|
version: '0'
|
|
190
190
|
requirements: []
|
|
191
|
-
rubygems_version: 3.
|
|
192
|
-
signing_key:
|
|
191
|
+
rubygems_version: 3.6.9
|
|
193
192
|
specification_version: 4
|
|
194
|
-
summary: Ruby client for the ElevenLabs
|
|
193
|
+
summary: Ruby client for the ElevenLabs Audio AI API
|
|
195
194
|
test_files: []
|