eleven_rb 0.4.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/README.md +42 -2
- data/lib/eleven_rb/client.rb +8 -0
- data/lib/eleven_rb/objects/cost_info.rb +5 -3
- data/lib/eleven_rb/resources/models.rb +7 -0
- data/lib/eleven_rb/resources/text_to_dialogue.rb +113 -0
- data/lib/eleven_rb/version.rb +1 -1
- data/lib/eleven_rb.rb +1 -0
- metadata +6 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ed711abcce18771ad13f10bcb29754605be61f7d02f7114f0e0b28b0dad4d556
|
|
4
|
+
data.tar.gz: 146285726bc80b0c3eab0b307a7ec4b788a8f3465903992bb12fc2b34bc1694b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6bf8e216c83287bb099e4a6bbed4ef718329f361fb7dfb4c70bf122f2512c74916eb1540fe6a1dfd4ae01e0edc53edc05408a017946f504c09611a54a6c2370b
|
|
7
|
+
data.tar.gz: 1839c52e3adf4efed58c410f08fa5c5e4818fde0964922e0752019b6606d726ed66a4f48a767b4e2110aa3b0cf98e7f4666eaef64552ec9f0f976378b1ef5094
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [1.0.0] - 2026-03-10
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Text-to-Dialogue multi-speaker audio generation via `client.text_to_dialogue.generate` (`POST /v1/text-to-dialogue`)
|
|
15
|
+
- `Client#text_to_dialogue` resource with `dialogue` alias
|
|
16
|
+
- Multi-speaker input validation (max 10 unique voices, 5000 character limit)
|
|
17
|
+
- `eleven_v3` model added to `CostInfo::COST_PER_1K_CHARS` ($0.30/1K chars)
|
|
18
|
+
- `Models#latest` method returning the most capable model (`eleven_v3`)
|
|
19
|
+
- Audio tags support via v3 model (`[laughs]`, `[whispers]`, `[excited]`, etc.)
|
|
20
|
+
- `CostInfo` now accepts `character_count:` keyword as alternative to `text:`
|
|
21
|
+
- TTS generation with word-level timestamps via `client.tts.generate_with_timestamps`
|
|
22
|
+
|
|
23
|
+
### Changed
|
|
24
|
+
|
|
25
|
+
- `CostInfo#initialize` signature: `text:` is now optional when `character_count:` is provided (backwards-compatible)
|
|
26
|
+
|
|
10
27
|
## [0.4.0] - 2026-03-10
|
|
11
28
|
|
|
12
29
|
### Added
|
data/README.md
CHANGED
|
@@ -4,12 +4,13 @@
|
|
|
4
4
|
[](https://github.com/webventures/eleven_rb/actions/workflows/ci.yml)
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
|
|
7
|
-
A Ruby client for the [ElevenLabs](https://try.elevenlabs.io/qyk2j8gumrjz) Text-to-Speech, Speech-to-Speech, Sound Effects, and Music API.
|
|
7
|
+
A Ruby client for the [ElevenLabs](https://try.elevenlabs.io/qyk2j8gumrjz) Text-to-Speech, Speech-to-Speech, Text-to-Dialogue, Sound Effects, and Music API.
|
|
8
8
|
|
|
9
9
|
## Features
|
|
10
10
|
|
|
11
11
|
- Text-to-Speech generation and streaming
|
|
12
12
|
- Speech-to-Speech voice conversion
|
|
13
|
+
- Text-to-Dialogue multi-speaker generation with audio tags
|
|
13
14
|
- Sound effects generation from text descriptions
|
|
14
15
|
- Music generation from prompts or composition plans
|
|
15
16
|
- Voice management (list, get, create, update, delete)
|
|
@@ -73,7 +74,7 @@ audio.save_to_file("output.mp3")
|
|
|
73
74
|
audio = client.tts.generate(
|
|
74
75
|
"Hello world",
|
|
75
76
|
voice_id: "voice_id",
|
|
76
|
-
model_id: "
|
|
77
|
+
model_id: "eleven_v3", # Most expressive, 70+ languages, audio tags
|
|
77
78
|
voice_settings: {
|
|
78
79
|
stability: 0.5,
|
|
79
80
|
similarity_boost: 0.75
|
|
@@ -111,6 +112,42 @@ io = File.open("input.mp3", "rb")
|
|
|
111
112
|
audio = client.sts.convert(io, voice_id: "voice_id")
|
|
112
113
|
```
|
|
113
114
|
|
|
115
|
+
### Text-to-Dialogue
|
|
116
|
+
|
|
117
|
+
```ruby
|
|
118
|
+
# Generate multi-speaker dialogue
|
|
119
|
+
audio = client.text_to_dialogue.generate([
|
|
120
|
+
{ text: "[excited] Welcome to the show!", voice_id: "voice_abc" },
|
|
121
|
+
{ text: "[laughs] Thanks for having me.", voice_id: "voice_xyz" },
|
|
122
|
+
{ text: "So tell us about your project...", voice_id: "voice_abc" }
|
|
123
|
+
])
|
|
124
|
+
audio.save_to_file("dialogue.mp3")
|
|
125
|
+
|
|
126
|
+
# With options
|
|
127
|
+
audio = client.dialogue.generate(
|
|
128
|
+
inputs,
|
|
129
|
+
model_id: "eleven_v3",
|
|
130
|
+
language_code: "en",
|
|
131
|
+
settings: { stability: 0.5 },
|
|
132
|
+
seed: 42,
|
|
133
|
+
output_format: "mp3_44100_192"
|
|
134
|
+
)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Audio Tags
|
|
138
|
+
|
|
139
|
+
The `eleven_v3` model supports inline audio tags for expressive speech:
|
|
140
|
+
|
|
141
|
+
```ruby
|
|
142
|
+
audio = client.tts.generate(
|
|
143
|
+
"[excited] Oh wow, this is AMAZING! [laughs] I can't believe it...",
|
|
144
|
+
voice_id: "voice_id",
|
|
145
|
+
model_id: "eleven_v3"
|
|
146
|
+
)
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Supported tags include `[laughs]`, `[whispers]`, `[sighs]`, `[excited]`, `[sarcastic]`, `[curious]`, `[pause]`, and more. Use CAPS for emphasis, `...` for pauses, and `—` for interruptions. See the [ElevenLabs v3 documentation](https://elevenlabs.io/docs/guides/audio-tags) for the full list.
|
|
150
|
+
|
|
114
151
|
### Sound Effects
|
|
115
152
|
|
|
116
153
|
```ruby
|
|
@@ -274,6 +311,9 @@ client = ElevenRb::Client.new(
|
|
|
274
311
|
models = client.models.list
|
|
275
312
|
models.each { |m| puts "#{m.name} (#{m.model_id})" }
|
|
276
313
|
|
|
314
|
+
# Get the latest/most capable model
|
|
315
|
+
client.models.latest # => "eleven_v3"
|
|
316
|
+
|
|
277
317
|
# Get multilingual models
|
|
278
318
|
client.models.multilingual
|
|
279
319
|
|
data/lib/eleven_rb/client.rb
CHANGED
|
@@ -101,6 +101,14 @@ module ElevenRb
|
|
|
101
101
|
@music ||= Resources::Music.new(http_client)
|
|
102
102
|
end
|
|
103
103
|
|
|
104
|
+
# Text-to-dialogue resource
|
|
105
|
+
#
|
|
106
|
+
# @return [Resources::TextToDialogue]
|
|
107
|
+
def text_to_dialogue
|
|
108
|
+
@text_to_dialogue ||= Resources::TextToDialogue.new(http_client)
|
|
109
|
+
end
|
|
110
|
+
alias dialogue text_to_dialogue
|
|
111
|
+
|
|
104
112
|
# Voice slot manager
|
|
105
113
|
#
|
|
106
114
|
# @return [VoiceSlotManager]
|
|
@@ -12,6 +12,7 @@ module ElevenRb
|
|
|
12
12
|
'eleven_monolingual_v1' => 0.30,
|
|
13
13
|
'eleven_multilingual_v1' => 0.30,
|
|
14
14
|
'eleven_multilingual_v2' => 0.30,
|
|
15
|
+
'eleven_v3' => 0.30,
|
|
15
16
|
'eleven_turbo_v2' => 0.18,
|
|
16
17
|
'eleven_turbo_v2_5' => 0.18,
|
|
17
18
|
'eleven_english_sts_v2' => 0.30,
|
|
@@ -23,11 +24,12 @@ module ElevenRb
|
|
|
23
24
|
|
|
24
25
|
# Initialize cost info
|
|
25
26
|
#
|
|
26
|
-
# @param text [String] the text being converted
|
|
27
|
+
# @param text [String, nil] the text being converted
|
|
28
|
+
# @param character_count [Integer, nil] direct character count (alternative to text)
|
|
27
29
|
# @param voice_id [String] the voice ID
|
|
28
30
|
# @param model_id [String] the model ID
|
|
29
|
-
def initialize(
|
|
30
|
-
@character_count = text
|
|
31
|
+
def initialize(voice_id:, model_id:, text: nil, character_count: nil)
|
|
32
|
+
@character_count = character_count || text&.length || 0
|
|
31
33
|
@voice_id = voice_id
|
|
32
34
|
@model_id = model_id
|
|
33
35
|
end
|
|
@@ -54,6 +54,13 @@ module ElevenRb
|
|
|
54
54
|
get('eleven_multilingual_v2') || tts_capable.first
|
|
55
55
|
end
|
|
56
56
|
|
|
57
|
+
# Get the latest/most capable model
|
|
58
|
+
#
|
|
59
|
+
# @return [Objects::Model, nil]
|
|
60
|
+
def latest
|
|
61
|
+
get('eleven_v3') || default
|
|
62
|
+
end
|
|
63
|
+
|
|
57
64
|
# Get model IDs as array
|
|
58
65
|
#
|
|
59
66
|
# @return [Array<String>]
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ElevenRb
|
|
4
|
+
module Resources
|
|
5
|
+
# Text-to-dialogue resource for multi-speaker audio generation
|
|
6
|
+
#
|
|
7
|
+
# @example Generate dialogue
|
|
8
|
+
# audio = client.text_to_dialogue.generate([
|
|
9
|
+
# { text: "[excited] Welcome!", voice_id: "voice_abc" },
|
|
10
|
+
# { text: "[laughs] Thanks!", voice_id: "voice_xyz" }
|
|
11
|
+
# ])
|
|
12
|
+
# audio.save_to_file("dialogue.mp3")
|
|
13
|
+
class TextToDialogue < Base
|
|
14
|
+
DEFAULT_MODEL = 'eleven_v3'
|
|
15
|
+
MAX_VOICES_PER_REQUEST = 10
|
|
16
|
+
MAX_TEXT_LENGTH = 5000
|
|
17
|
+
|
|
18
|
+
# Generate dialogue audio from multiple speaker inputs
|
|
19
|
+
#
|
|
20
|
+
# @param inputs [Array<Hash>] Array of { text:, voice_id: } hashes
|
|
21
|
+
# @param model_id [String] Model to use (only eleven_v3 supported)
|
|
22
|
+
# @param language_code [String, nil] ISO 639-1 language code
|
|
23
|
+
# @param settings [Hash, nil] Generation settings (stability: 0.0-1.0)
|
|
24
|
+
# @param seed [Integer, nil] Seed for reproducibility
|
|
25
|
+
# @param output_format [String] Audio output format
|
|
26
|
+
# @param apply_text_normalization [String] "auto", "on", or "off"
|
|
27
|
+
# @return [Objects::Audio]
|
|
28
|
+
def generate(
|
|
29
|
+
inputs,
|
|
30
|
+
model_id: DEFAULT_MODEL,
|
|
31
|
+
language_code: nil,
|
|
32
|
+
settings: nil,
|
|
33
|
+
seed: nil,
|
|
34
|
+
output_format: 'mp3_44100_128',
|
|
35
|
+
apply_text_normalization: 'auto'
|
|
36
|
+
)
|
|
37
|
+
validate_inputs!(inputs)
|
|
38
|
+
|
|
39
|
+
body = build_request_body(inputs, model_id, language_code, settings, seed,
|
|
40
|
+
apply_text_normalization)
|
|
41
|
+
|
|
42
|
+
response = post_binary(
|
|
43
|
+
"/text-to-dialogue?output_format=#{output_format}",
|
|
44
|
+
body
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
build_audio_response(response, inputs, output_format, model_id)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
def build_request_body(inputs, model_id, language_code, settings, seed,
|
|
53
|
+
apply_text_normalization)
|
|
54
|
+
body = {
|
|
55
|
+
inputs: inputs.map { |i| { text: i[:text], voice_id: i[:voice_id] } },
|
|
56
|
+
model_id: model_id,
|
|
57
|
+
apply_text_normalization: apply_text_normalization
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
body[:language_code] = language_code if language_code
|
|
61
|
+
body[:settings] = settings if settings
|
|
62
|
+
body[:seed] = seed if seed
|
|
63
|
+
body
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def build_audio_response(response, inputs, output_format, model_id)
|
|
67
|
+
total_text = inputs.map { |i| i[:text] }.join("\n")
|
|
68
|
+
total_chars = inputs.sum { |i| i[:text].length }
|
|
69
|
+
primary_voice = inputs.first[:voice_id]
|
|
70
|
+
|
|
71
|
+
audio = Objects::Audio.new(
|
|
72
|
+
data: response, format: output_format,
|
|
73
|
+
voice_id: primary_voice, text: total_text, model_id: model_id
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
cost_info = Objects::CostInfo.new(
|
|
77
|
+
character_count: total_chars, voice_id: primary_voice, model_id: model_id
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
http_client.config.trigger(
|
|
81
|
+
:on_audio_generated,
|
|
82
|
+
audio: audio, voice_id: primary_voice,
|
|
83
|
+
text: total_text, cost_info: cost_info.to_h
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
audio
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def validate_inputs!(inputs)
|
|
90
|
+
raise Errors::ValidationError, 'inputs must be a non-empty array' unless inputs.is_a?(Array) && !inputs.empty?
|
|
91
|
+
|
|
92
|
+
inputs.each_with_index do |input, i|
|
|
93
|
+
validate_presence!(input[:text], "inputs[#{i}].text")
|
|
94
|
+
validate_presence!(input[:voice_id], "inputs[#{i}].voice_id")
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
unique_voices = inputs.map { |i| i[:voice_id] }.uniq
|
|
98
|
+
if unique_voices.length > MAX_VOICES_PER_REQUEST
|
|
99
|
+
raise Errors::ValidationError,
|
|
100
|
+
"Maximum #{MAX_VOICES_PER_REQUEST} unique voices per request " \
|
|
101
|
+
"(got #{unique_voices.length})"
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
total_chars = inputs.sum { |i| i[:text].length }
|
|
105
|
+
return unless total_chars > MAX_TEXT_LENGTH
|
|
106
|
+
|
|
107
|
+
raise Errors::ValidationError,
|
|
108
|
+
"Total text length #{total_chars} exceeds maximum " \
|
|
109
|
+
"#{MAX_TEXT_LENGTH} characters"
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
data/lib/eleven_rb/version.rb
CHANGED
data/lib/eleven_rb.rb
CHANGED
|
@@ -109,6 +109,7 @@ require_relative 'eleven_rb/resources/user'
|
|
|
109
109
|
require_relative 'eleven_rb/resources/sound_effects'
|
|
110
110
|
require_relative 'eleven_rb/resources/music'
|
|
111
111
|
require_relative 'eleven_rb/resources/speech_to_speech'
|
|
112
|
+
require_relative 'eleven_rb/resources/text_to_dialogue'
|
|
112
113
|
|
|
113
114
|
# High-level components
|
|
114
115
|
require_relative 'eleven_rb/voice_slot_manager'
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: eleven_rb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 1.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Web Ventures Ltd
|
|
@@ -122,9 +122,9 @@ dependencies:
|
|
|
122
122
|
- !ruby/object:Gem::Version
|
|
123
123
|
version: '0.9'
|
|
124
124
|
description: |
|
|
125
|
-
A
|
|
126
|
-
|
|
127
|
-
|
|
125
|
+
A comprehensive Ruby client for ElevenLabs covering Text-to-Speech,
|
|
126
|
+
Speech-to-Speech, Text-to-Dialogue, Sound Effects, and Music generation
|
|
127
|
+
with voice management, streaming, and built-in cost tracking.
|
|
128
128
|
email:
|
|
129
129
|
- gems@dev.webven.nz
|
|
130
130
|
executables: []
|
|
@@ -158,6 +158,7 @@ files:
|
|
|
158
158
|
- lib/eleven_rb/resources/music.rb
|
|
159
159
|
- lib/eleven_rb/resources/sound_effects.rb
|
|
160
160
|
- lib/eleven_rb/resources/speech_to_speech.rb
|
|
161
|
+
- lib/eleven_rb/resources/text_to_dialogue.rb
|
|
161
162
|
- lib/eleven_rb/resources/text_to_speech.rb
|
|
162
163
|
- lib/eleven_rb/resources/user.rb
|
|
163
164
|
- lib/eleven_rb/resources/voice_library.rb
|
|
@@ -189,5 +190,5 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
189
190
|
requirements: []
|
|
190
191
|
rubygems_version: 3.6.9
|
|
191
192
|
specification_version: 4
|
|
192
|
-
summary: Ruby client for the ElevenLabs
|
|
193
|
+
summary: Ruby client for the ElevenLabs Audio AI API
|
|
193
194
|
test_files: []
|