elevenlabs 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +180 -50
- data/lib/elevenlabs/client.rb +86 -0
- data/lib/elevenlabs/errors.rb +1 -1
- data/lib/elevenlabs.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 59c61b4f80cd6efaaa39b93d6ab8ebfdc3a3edce59d2012a984ce8b9f032352d
|
4
|
+
data.tar.gz: 06c3253d6d21cd59fa5942620de9dee173a3ac926c484dcd17ca378793f6920e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5f1cd0d00b602fe88356591d3292dfc6154d722eab9904dea0ec0cd99a5340651be30f44e0470783c1504cabeeed4a0e4dcfab6732c742a650c3c4161129c775
|
7
|
+
data.tar.gz: b9e3f4821d6e355f811f0d783b36cec64dd103a83713eb13a3ff3ae8c886009294371df5de825ff9287be969b92489b19c90ce426584a81ddb34d8f6e5e97182
|
data/README.md
CHANGED
@@ -12,6 +12,8 @@ This gem provides an easy-to-use interface for:
|
|
12
12
|
- **Editing an existing voice**
|
13
13
|
- **Deleting a voice**
|
14
14
|
- **Converting text to speech** and retrieving the generated audio
|
15
|
+
- **Designing a voice** based on a text description
|
16
|
+
- **Streaming text-to-speech audio**
|
15
17
|
|
16
18
|
All requests are handled via [Faraday](https://github.com/lostisland/faraday).
|
17
19
|
|
@@ -39,6 +41,7 @@ All requests are handled via [Faraday](https://github.com/lostisland/faraday).
|
|
39
41
|
|
40
42
|
- **Simple and intuitive API client** for ElevenLabs.
|
41
43
|
- **Multipart file uploads** for training custom voices.
|
44
|
+
- **Voice design** via text prompts to generate voice previews.
|
42
45
|
- **Automatic authentication** via API key configuration.
|
43
46
|
- **Error handling** with custom exceptions.
|
44
47
|
- **Rails integration support** (including credentials storage).
|
@@ -52,16 +55,25 @@ Add the gem to your `Gemfile`:
|
|
52
55
|
```ruby
|
53
56
|
gem "elevenlabs"
|
54
57
|
```
|
58
|
+
|
55
59
|
Then run:
|
56
|
-
|
60
|
+
|
61
|
+
```bash
|
57
62
|
bundle install
|
58
63
|
```
|
64
|
+
|
59
65
|
Or install it directly using:
|
60
|
-
|
66
|
+
|
67
|
+
```bash
|
61
68
|
gem install elevenlabs
|
62
69
|
```
|
63
|
-
|
64
|
-
|
70
|
+
|
71
|
+
---
|
72
|
+
|
73
|
+
## Usage
|
74
|
+
|
75
|
+
### Basic Example (Standalone Ruby)
|
76
|
+
|
65
77
|
```ruby
|
66
78
|
require "elevenlabs"
|
67
79
|
|
@@ -85,27 +97,58 @@ audio_data = client.text_to_speech(voice_id, text)
|
|
85
97
|
# 5. Save the audio file
|
86
98
|
File.open("output.mp3", "wb") { |f| f.write(audio_data) }
|
87
99
|
puts "Audio file saved to output.mp3"
|
100
|
+
|
101
|
+
# 6. Design a voice with a text prompt
|
102
|
+
response = client.design_voice(
|
103
|
+
"A deep, resonant male voice with a British accent, suitable for storytelling",
|
104
|
+
output_format: "mp3_44100_192",
|
105
|
+
model_id: "eleven_multilingual_ttv_v2",
|
106
|
+
text: "In a land far away, where the mountains meet the sky, a great adventure began. Brave heroes embarked on a quest to find the lost artifact, facing challenges and forging bonds that would last a lifetime. Their journey took them through enchanted forests, across raging rivers, and into the heart of ancient ruins.",
|
107
|
+
auto_generate_text: false,
|
108
|
+
loudness: 0.5,
|
109
|
+
seed: 12345,
|
110
|
+
guidance_scale: 5.0,
|
111
|
+
stream_previews: false
|
112
|
+
)
|
113
|
+
|
114
|
+
# 7. Save voice preview audio
|
115
|
+
require "base64"
|
116
|
+
response["previews"].each_with_index do |preview, index|
|
117
|
+
audio_data = Base64.decode64(preview["audio_base_64"])
|
118
|
+
File.open("preview_#{index}.mp3", "wb") { |f| f.write(audio_data) }
|
119
|
+
puts "Saved preview #{index + 1} to preview_#{index}.mp3"
|
120
|
+
end
|
88
121
|
```
|
122
|
+
|
89
123
|
Note: You can override the API key per request:
|
124
|
+
|
90
125
|
```ruby
|
91
126
|
client = Elevenlabs::Client.new(api_key: "DIFFERENT_API_KEY")
|
92
127
|
```
|
93
|
-
|
94
|
-
|
128
|
+
|
129
|
+
### Rails Integration
|
130
|
+
|
131
|
+
#### Store API Key in Rails Credentials
|
132
|
+
|
95
133
|
1. Open your encrypted credentials:
|
96
|
-
|
134
|
+
|
135
|
+
```bash
|
97
136
|
EDITOR=vim rails credentials:edit
|
98
137
|
```
|
99
138
|
|
100
139
|
2. Add the ElevenLabs API key:
|
101
|
-
|
140
|
+
|
141
|
+
```yaml
|
102
142
|
eleven_labs:
|
103
143
|
api_key: YOUR_SECURE_KEY
|
104
144
|
```
|
145
|
+
|
105
146
|
3. Save and exit. Rails will securely encrypt your API key.
|
106
147
|
|
107
|
-
Rails Initializer
|
108
|
-
|
148
|
+
#### Rails Initializer
|
149
|
+
|
150
|
+
Create an initializer file: `config/initializers/elevenlabs.rb`
|
151
|
+
|
109
152
|
```ruby
|
110
153
|
# config/initializers/elevenlabs.rb
|
111
154
|
require "elevenlabs"
|
@@ -116,59 +159,104 @@ Rails.application.config.to_prepare do
|
|
116
159
|
end
|
117
160
|
end
|
118
161
|
```
|
162
|
+
|
119
163
|
Now you can simply call:
|
164
|
+
|
120
165
|
```ruby
|
121
166
|
client = Elevenlabs::Client.new
|
122
167
|
```
|
168
|
+
|
123
169
|
without manually providing an API key.
|
124
170
|
|
125
|
-
|
126
|
-
|
171
|
+
#### Controller Example
|
172
|
+
|
173
|
+
```ruby
|
174
|
+
class AudioController < ApplicationController
|
175
|
+
def generate
|
176
|
+
client = Elevenlabs::Client.new
|
177
|
+
voice_id = params[:voice_id]
|
178
|
+
text = params[:text]
|
179
|
+
|
180
|
+
begin
|
181
|
+
audio_data = client.text_to_speech(voice_id, text)
|
182
|
+
send_data audio_data, type: "audio/mpeg", disposition: "attachment", filename: "output.mp3"
|
183
|
+
rescue Elevenlabs::APIError => e
|
184
|
+
render json: { error: e.message }, status: :bad_request
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
```
|
189
|
+
|
190
|
+
---
|
191
|
+
|
192
|
+
## Endpoints
|
193
|
+
|
194
|
+
1. **List Voices**
|
195
|
+
|
127
196
|
```ruby
|
128
197
|
client.list_voices
|
129
198
|
# => { "voices" => [...] }
|
130
199
|
```
|
131
|
-
|
200
|
+
|
201
|
+
2. **Get Voice Details**
|
202
|
+
|
132
203
|
```ruby
|
133
204
|
client.get_voice("VOICE_ID")
|
134
205
|
# => { "voice_id" => "...", "name" => "...", ... }
|
135
206
|
```
|
136
|
-
|
207
|
+
|
208
|
+
3. **Create a Custom Voice**
|
209
|
+
|
137
210
|
```ruby
|
138
211
|
sample_files = [File.open("sample1.mp3", "rb")]
|
139
212
|
client.create_voice("Custom Voice", sample_files, description: "My custom AI voice")
|
140
213
|
# => JSON response with new voice details
|
141
214
|
```
|
142
|
-
|
215
|
+
|
216
|
+
4. **Check if a Voice is Banned**
|
217
|
+
|
143
218
|
```ruby
|
144
219
|
sample_files = [File.open("trump.mp3", "rb")]
|
145
220
|
client.create_voice("Donald Trump", sample_files, description: "My Trump Voice")
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
=> true
|
221
|
+
# => {"voice_id"=>"<RETURNED_VOICE_ID>", "requires_verification"=>false}
|
222
|
+
trump = "<RETURNED_VOICE_ID>"
|
223
|
+
client.banned?(trump)
|
224
|
+
# => true
|
150
225
|
```
|
151
|
-
|
226
|
+
|
227
|
+
5. **Edit a Voice**
|
228
|
+
|
152
229
|
```ruby
|
153
230
|
client.edit_voice("VOICE_ID", name: "Updated Voice Name")
|
154
231
|
# => JSON response with updated details
|
155
232
|
```
|
156
|
-
|
233
|
+
|
234
|
+
6. **Delete a Voice**
|
235
|
+
|
157
236
|
```ruby
|
158
237
|
client.delete_voice("VOICE_ID")
|
159
238
|
# => JSON response acknowledging deletion
|
160
239
|
```
|
161
|
-
|
240
|
+
|
241
|
+
7. **Convert Text to Speech**
|
242
|
+
|
162
243
|
```ruby
|
163
244
|
audio_data = client.text_to_speech("VOICE_ID", "Hello world!")
|
164
245
|
File.open("output.mp3", "wb") { |f| f.write(audio_data) }
|
165
246
|
```
|
166
|
-
8 Stream Text to Speech
|
167
|
-
stream from terminal
|
168
|
-
```ruby
|
169
|
-
Mac: brew install sox
|
170
|
-
Linux: sudo apt install sox
|
171
247
|
|
248
|
+
8. **Stream Text to Speech**
|
249
|
+
|
250
|
+
Stream from terminal:
|
251
|
+
|
252
|
+
```bash
|
253
|
+
# Mac: Install sox
|
254
|
+
brew install sox
|
255
|
+
# Linux: Install sox
|
256
|
+
sudo apt install sox
|
257
|
+
```
|
258
|
+
|
259
|
+
```ruby
|
172
260
|
IO.popen("play -t mp3 -", "wb") do |audio_pipe| # Notice "wb" (write binary)
|
173
261
|
client.text_to_speech_stream("VOICE_ID", "Some text to stream back in chunks") do |chunk|
|
174
262
|
audio_pipe.write(chunk.b) # Ensure chunk is written as binary
|
@@ -176,19 +264,45 @@ IO.popen("play -t mp3 -", "wb") do |audio_pipe| # Notice "wb" (write binary)
|
|
176
264
|
end
|
177
265
|
```
|
178
266
|
|
179
|
-
|
267
|
+
9. **Design a Voice**
|
268
|
+
|
269
|
+
Generate voice previews based on a text description:
|
270
|
+
|
271
|
+
```ruby
|
272
|
+
response = client.design_voice(
|
273
|
+
"A deep, resonant male voice with a British accent, suitable for storytelling",
|
274
|
+
output_format: "mp3_44100_192",
|
275
|
+
model_id: "eleven_multilingual_ttv_v2",
|
276
|
+
text: "In a land far away, where the mountains meet the sky, a great adventure began. Brave heroes embarked on a quest to find the lost artifact, facing challenges and forging bonds that would last a lifetime. Their journey took them through enchanted forests, across raging rivers, and into the heart of ancient ruins.",
|
277
|
+
auto_generate_text: false
|
278
|
+
)
|
279
|
+
# Save the first preview to an MP3 file
|
280
|
+
require "base64"
|
281
|
+
audio_data = Base64.decode64(response["previews"][0]["audio_base_64"])
|
282
|
+
File.open("voice_preview.mp3", "wb") { |f| f.write(audio_data) }
|
283
|
+
```
|
284
|
+
|
285
|
+
---
|
286
|
+
|
287
|
+
## Error Handling
|
288
|
+
|
180
289
|
When the API returns an error, the gem raises specific exceptions:
|
181
290
|
|
182
|
-
Exception
|
183
|
-
|
184
|
-
Elevenlabs::
|
185
|
-
Elevenlabs::
|
186
|
-
Elevenlabs::
|
291
|
+
| Exception | Meaning |
|
292
|
+
|-------------------------------|----------------------------------|
|
293
|
+
| `Elevenlabs::BadRequestError` | Invalid request parameters |
|
294
|
+
| `Elevenlabs::AuthenticationError` | Invalid API key |
|
295
|
+
| `Elevenlabs::NotFoundError` | Resource (voice) not found |
|
296
|
+
| `Elevenlabs::UnprocessableEntityError` | Unprocessable entity (e.g., invalid input format) |
|
297
|
+
| `Elevenlabs::APIError` | General API failure |
|
298
|
+
|
187
299
|
Example:
|
188
300
|
|
189
301
|
```ruby
|
190
302
|
begin
|
191
|
-
client.
|
303
|
+
client.design_voice("Short description") # Too short, will raise error
|
304
|
+
rescue Elevenlabs::UnprocessableEntityError => e
|
305
|
+
puts "Validation error: #{e.message}"
|
192
306
|
rescue Elevenlabs::AuthenticationError => e
|
193
307
|
puts "Invalid API key: #{e.message}"
|
194
308
|
rescue Elevenlabs::NotFoundError => e
|
@@ -198,38 +312,54 @@ rescue Elevenlabs::APIError => e
|
|
198
312
|
end
|
199
313
|
```
|
200
314
|
|
201
|
-
|
202
|
-
|
315
|
+
---
|
316
|
+
|
317
|
+
## Development
|
318
|
+
|
319
|
+
Clone this repository:
|
320
|
+
|
203
321
|
```bash
|
204
322
|
git clone https://github.com/your-username/elevenlabs.git
|
205
323
|
cd elevenlabs
|
206
324
|
```
|
207
|
-
|
325
|
+
|
326
|
+
Install dependencies:
|
327
|
+
|
208
328
|
```bash
|
209
329
|
bundle install
|
210
330
|
```
|
211
|
-
|
331
|
+
|
332
|
+
Build the gem:
|
333
|
+
|
212
334
|
```bash
|
213
335
|
gem build elevenlabs.gemspec
|
214
336
|
```
|
215
|
-
|
337
|
+
|
338
|
+
Install the gem locally:
|
339
|
+
|
216
340
|
```bash
|
217
|
-
gem install ./elevenlabs-0.0.
|
341
|
+
gem install ./elevenlabs-0.0.5.gem
|
218
342
|
```
|
219
|
-
|
343
|
+
|
344
|
+
---
|
345
|
+
|
346
|
+
## Contributing
|
347
|
+
|
220
348
|
Contributions are welcome! Please follow these steps:
|
221
349
|
|
222
|
-
Fork the repository
|
223
|
-
Create a feature branch (git checkout -b feature/my-new-feature)
|
224
|
-
Commit your changes (git commit -am 'Add new feature')
|
225
|
-
Push to your branch (git push origin feature/my-new-feature)
|
226
|
-
Create a Pull Request describing your changes
|
350
|
+
1. Fork the repository
|
351
|
+
2. Create a feature branch (`git checkout -b feature/my-new-feature`)
|
352
|
+
3. Commit your changes (`git commit -am 'Add new feature'`)
|
353
|
+
4. Push to your branch (`git push origin feature/my-new-feature`)
|
354
|
+
5. Create a Pull Request describing your changes
|
355
|
+
|
227
356
|
For bug reports, please open an issue with details.
|
228
357
|
|
229
|
-
|
358
|
+
---
|
359
|
+
|
360
|
+
## License
|
361
|
+
|
230
362
|
This project is licensed under the MIT License. See the LICENSE file for details.
|
231
363
|
|
232
|
-
⭐ Thank you for using the Elevenlabs Ruby Gem!
|
364
|
+
⭐ Thank you for using the Elevenlabs Ruby Gem!
|
233
365
|
If you have any questions or suggestions, feel free to open an issue or submit a Pull Request!
|
234
|
-
|
235
|
-
# elevenlabs
|
data/lib/elevenlabs/client.rb
CHANGED
@@ -88,6 +88,92 @@ module Elevenlabs
|
|
88
88
|
handle_error(e)
|
89
89
|
end
|
90
90
|
|
91
|
+
#####################################################
|
92
|
+
# Design a Voice #
|
93
|
+
# (POST /v1/text-to-voice/design) #
|
94
|
+
#####################################################
|
95
|
+
|
96
|
+
# Designs a voice based on a description
|
97
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/text-to-voice/design
|
98
|
+
#
|
99
|
+
# @param [String] voice_description - Description of the voice (20-1000 characters)
|
100
|
+
# @param [Hash] options - Optional parameters
|
101
|
+
# :output_format => String (e.g., "mp3_44100_192", default: "mp3_44100_192")
|
102
|
+
# :model_id => String (e.g., "eleven_multilingual_ttv_v2", "eleven_ttv_v3")
|
103
|
+
# :text => String (100-1000 characters, optional)
|
104
|
+
# :auto_generate_text => Boolean (default: false)
|
105
|
+
# :loudness => Float (-1 to 1, default: 0.5)
|
106
|
+
# :seed => Integer (0 to 2147483647, optional)
|
107
|
+
# :guidance_scale => Float (0 to 100, default: 5)
|
108
|
+
# :stream_previews => Boolean (default: false)
|
109
|
+
# :remixing_session_id => String (optional)
|
110
|
+
# :remixing_session_iteration_id => String (optional)
|
111
|
+
# :quality => Float (-1 to 1, optional)
|
112
|
+
# :reference_audio_base64 => String (base64 encoded audio, optional, requires eleven_ttv_v3)
|
113
|
+
# :prompt_strength => Float (0 to 1, optional, requires eleven_ttv_v3)
|
114
|
+
#
|
115
|
+
# @return [Hash] JSON response containing previews and text
|
116
|
+
def design_voice(voice_description, options = {})
|
117
|
+
endpoint = "/v1/text-to-voice/design"
|
118
|
+
request_body = { voice_description: voice_description }
|
119
|
+
|
120
|
+
# Add optional parameters if provided
|
121
|
+
request_body[:output_format] = options[:output_format] if options[:output_format]
|
122
|
+
request_body[:model_id] = options[:model_id] if options[:model_id]
|
123
|
+
request_body[:text] = options[:text] if options[:text]
|
124
|
+
request_body[:auto_generate_text] = options[:auto_generate_text] unless options[:auto_generate_text].nil?
|
125
|
+
request_body[:loudness] = options[:loudness] if options[:loudness]
|
126
|
+
request_body[:seed] = options[:seed] if options[:seed]
|
127
|
+
request_body[:guidance_scale] = options[:guidance_scale] if options[:guidance_scale]
|
128
|
+
request_body[:stream_previews] = options[:stream_previews] unless options[:stream_previews].nil?
|
129
|
+
request_body[:remixing_session_id] = options[:remixing_session_id] if options[:remixing_session_id]
|
130
|
+
request_body[:remixing_session_iteration_id] = options[:remixing_session_iteration_id] if options[:remixing_session_iteration_id]
|
131
|
+
request_body[:quality] = options[:quality] if options[:quality]
|
132
|
+
request_body[:reference_audio_base64] = options[:reference_audio_base64] if options[:reference_audio_base64]
|
133
|
+
request_body[:prompt_strength] = options[:prompt_strength] if options[:prompt_strength]
|
134
|
+
|
135
|
+
response = @connection.post(endpoint) do |req|
|
136
|
+
req.headers = default_headers
|
137
|
+
req.body = request_body.to_json
|
138
|
+
end
|
139
|
+
|
140
|
+
JSON.parse(response.body)
|
141
|
+
rescue Faraday::ClientError => e
|
142
|
+
handle_error(e)
|
143
|
+
end
|
144
|
+
|
145
|
+
#####################################################
|
146
|
+
# Create a Voice #
|
147
|
+
# (POST /v1/text-to-voice/create) #
|
148
|
+
#####################################################
|
149
|
+
# Creates a voice from the designed voice generated_voice_id
|
150
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/text-to-voice
|
151
|
+
#
|
152
|
+
# @param [String] voice_name - Name of the voice
|
153
|
+
# @param [String] voice_description - Description of the voice (20-1000 characters)
|
154
|
+
# @param [String] generated_voice_id - The generated voice ID from design_voice
|
155
|
+
# @param [Hash] labels - Optional metadata for the voice
|
156
|
+
# @param [Array<String>] played_not_selected_voice_ids - Optional list of voice IDs played but not selected
|
157
|
+
#
|
158
|
+
# @return [Hash] JSON response containing voice_id and other voice details
|
159
|
+
def create_from_generated_voice(voice_name, voice_description, generated_voice_id, labels: nil, played_not_selected_voice_ids: nil)
|
160
|
+
endpoint = "/v1/text-to-voice"
|
161
|
+
request_body = {
|
162
|
+
voice_name: voice_name,
|
163
|
+
voice_description: voice_description,
|
164
|
+
generated_voice_id: generated_voice_id,
|
165
|
+
labels: labels,
|
166
|
+
played_not_selected_voice_ids: played_not_selected_voice_ids
|
167
|
+
}.compact
|
168
|
+
|
169
|
+
response = @connection.post(endpoint) do |req|
|
170
|
+
req.headers = default_headers
|
171
|
+
req.body = request_body.to_json
|
172
|
+
end
|
173
|
+
JSON.parse(response.body)
|
174
|
+
rescue Faraday::ClientError => e
|
175
|
+
handle_error(e)
|
176
|
+
end
|
91
177
|
|
92
178
|
#####################################################
|
93
179
|
# GET Voices #
|
data/lib/elevenlabs/errors.rb
CHANGED
data/lib/elevenlabs.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elevenlabs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- hackliteracy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-08-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '1.1'
|
41
41
|
description: This gem provides a convenient Ruby interface to the ElevenLabs TTS,
|
42
|
-
Voice Cloning, and Streaming endpoints.
|
42
|
+
Voice Cloning, Voice Design and Streaming endpoints.
|
43
43
|
email:
|
44
44
|
- hackliteracy@gmail.com
|
45
45
|
executables: []
|