elevenlabs 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +77 -19
- data/lib/elevenlabs/client.rb +164 -0
- data/lib/elevenlabs.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2daafae7b6dbf3724b93ce2022b2fe6ac3703bfbcac12326b75e1a37cd188a39
|
|
4
|
+
data.tar.gz: ba2227a765efc7538e4aadbe0fcb0917a55c1ba70540a2660b4c75b2545f85da
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 07d40969dd5fdf8926c2f09c21359df4b5060b1f212797de03ef16c4fcf0dc2b6495c476a9a0741247dccf95dd30fde8d6da7404370b6acc4b61a0ea0ce8f7cd
|
|
7
|
+
data.tar.gz: 927e01fdc01e4f985466b62e2725f676117f757d3f809d8b4da7ea420e54c0e0a57ab2279cf031dd466451ce8c7a96dd76219da4a7e3d02ada842682a20246f8
|
data/README.md
CHANGED
|
@@ -14,6 +14,7 @@ This gem provides an easy-to-use interface for:
|
|
|
14
14
|
- **Converting text to speech** and retrieving the generated audio
|
|
15
15
|
- **Designing a voice** based on a text description
|
|
16
16
|
- **Streaming text-to-speech audio**
|
|
17
|
+
- **Music Generation**
|
|
17
18
|
|
|
18
19
|
All requests are handled via [Faraday](https://github.com/lostisland/faraday).
|
|
19
20
|
|
|
@@ -196,16 +197,20 @@ end
|
|
|
196
197
|
```ruby
|
|
197
198
|
client.list_voices
|
|
198
199
|
# => { "voices" => [...] }
|
|
199
|
-
```
|
|
200
200
|
|
|
201
|
-
2.
|
|
201
|
+
2. List Models
|
|
202
|
+
|
|
203
|
+
client.list_models
|
|
204
|
+
# => [...]
|
|
205
|
+
|
|
206
|
+
3. **Get Voice Details**
|
|
202
207
|
|
|
203
208
|
```ruby
|
|
204
209
|
client.get_voice("VOICE_ID")
|
|
205
210
|
# => { "voice_id" => "...", "name" => "...", ... }
|
|
206
211
|
```
|
|
207
212
|
|
|
208
|
-
|
|
213
|
+
4. **Create a Custom Voice**
|
|
209
214
|
|
|
210
215
|
```ruby
|
|
211
216
|
sample_files = [File.open("sample1.mp3", "rb")]
|
|
@@ -213,7 +218,7 @@ client.create_voice("Custom Voice", sample_files, description: "My custom AI voi
|
|
|
213
218
|
# => JSON response with new voice details
|
|
214
219
|
```
|
|
215
220
|
|
|
216
|
-
|
|
221
|
+
5. **Check if a Voice is Banned**
|
|
217
222
|
|
|
218
223
|
```ruby
|
|
219
224
|
sample_files = [File.open("trump.mp3", "rb")]
|
|
@@ -224,28 +229,28 @@ client.banned?(trump)
|
|
|
224
229
|
# => true
|
|
225
230
|
```
|
|
226
231
|
|
|
227
|
-
|
|
232
|
+
6. **Edit a Voice**
|
|
228
233
|
|
|
229
234
|
```ruby
|
|
230
235
|
client.edit_voice("VOICE_ID", name: "Updated Voice Name")
|
|
231
236
|
# => JSON response with updated details
|
|
232
237
|
```
|
|
233
238
|
|
|
234
|
-
|
|
239
|
+
7. **Delete a Voice**
|
|
235
240
|
|
|
236
241
|
```ruby
|
|
237
242
|
client.delete_voice("VOICE_ID")
|
|
238
243
|
# => JSON response acknowledging deletion
|
|
239
244
|
```
|
|
240
245
|
|
|
241
|
-
|
|
246
|
+
8. **Convert Text to Speech**
|
|
242
247
|
|
|
243
248
|
```ruby
|
|
244
249
|
audio_data = client.text_to_speech("VOICE_ID", "Hello world!")
|
|
245
250
|
File.open("output.mp3", "wb") { |f| f.write(audio_data) }
|
|
246
251
|
```
|
|
247
252
|
|
|
248
|
-
|
|
253
|
+
9. **Stream Text to Speech**
|
|
249
254
|
|
|
250
255
|
Stream from terminal:
|
|
251
256
|
|
|
@@ -264,22 +269,75 @@ IO.popen("play -t mp3 -", "wb") do |audio_pipe| # Notice "wb" (write binary)
|
|
|
264
269
|
end
|
|
265
270
|
```
|
|
266
271
|
|
|
267
|
-
|
|
272
|
+
10. **Create a Voice from a Design**
|
|
268
273
|
|
|
269
|
-
|
|
274
|
+
Once you’ve generated a voice design using client.design_voice, you can turn it into a permanent voice in your account by passing its generated_voice_id to client.create_from_generated_voice.
|
|
270
275
|
|
|
276
|
+
# Step 1: Design a voice (returns previews + generated_voice_id)
|
|
271
277
|
```ruby
|
|
272
|
-
|
|
273
|
-
"A
|
|
274
|
-
output_format: "mp3_44100_192",
|
|
278
|
+
design_response = client.design_voice(
|
|
279
|
+
"A warm, friendly female voice with a slight Australian accent",
|
|
275
280
|
model_id: "eleven_multilingual_ttv_v2",
|
|
276
|
-
text: "
|
|
281
|
+
text: "Welcome to our podcast, where every story is an adventure, taking you on a journey through fascinating worlds, inspiring voices, and unforgettable moments.",
|
|
277
282
|
auto_generate_text: false
|
|
278
283
|
)
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
284
|
+
|
|
285
|
+
generated_voice_id = design_response["previews"].first["generated_voice_id"] #three previews are given, but for this example we will use the first to create a voice here
|
|
286
|
+
|
|
287
|
+
# Step 2: Create the permanent voice
|
|
288
|
+
create_response = client.create_from_generated_voice(
|
|
289
|
+
"Friendly Aussie",
|
|
290
|
+
"A warm, friendly Australian-accented voice for podcasts",
|
|
291
|
+
generated_voice_id,
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
voice_id = create_response["voice_id"] # This is the ID you can use for TTS
|
|
295
|
+
|
|
296
|
+
# Step 3: Use the new voice for TTS
|
|
297
|
+
audio_data = client.text_to_speech(voice_id, "This is my new permanent designed voice.")
|
|
298
|
+
File.open("friendly_aussie.mp3", "wb") { |f| f.write(audio_data) }
|
|
299
|
+
```
|
|
300
|
+
Important notes:
|
|
301
|
+
|
|
302
|
+
Always store the returned voice_id from create_voice_from_design. This is the permanent identifier for TTS.
|
|
303
|
+
|
|
304
|
+
Designed voices cannot be used for TTS until they are created in your account.
|
|
305
|
+
|
|
306
|
+
If the voice is not immediately available for TTS, wait a few seconds or check its status via client.get_voice(voice_id) until it’s "active".
|
|
307
|
+
|
|
308
|
+
11. Create a multi-speaker dialogue
|
|
309
|
+
```ruby
|
|
310
|
+
inputs = [{text: "It smells like updog in here", voice_id: "TX3LPaxmHKxFdv7VOQHJ"}, {text: "What's updog?", voice_id: "RILOU7YmBhvwJGDGjNmP"}, {text: "Not much, you?", voice_id: "TX3LPaxmHKxFdv7VOQHJ"}]
|
|
311
|
+
|
|
312
|
+
audio_data = client.text_to_dialogue(inputs)
|
|
313
|
+
File.open("what's updog.mp3", "wb") { |f| f.write(audio_data) }
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
12. **Generate Music from prompt**
|
|
317
|
+
```ruby
|
|
318
|
+
audio = client.compose_music(prompt: "Lo-fi hip hop beat", music_length_ms: 30000)
|
|
319
|
+
File.binwrite("lofi.mp3", audio)
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
12. **Stream Music Generated from prompt**
|
|
323
|
+
```ruby
|
|
324
|
+
File.open("epic_stream.mp3", "wb") do |f|
|
|
325
|
+
client.compose_music_stream(prompt: "Epic orchestral build", music_length_ms: 60000) do |chunk|
|
|
326
|
+
f.write(chunk)
|
|
327
|
+
end
|
|
328
|
+
end
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
13. **Generate Music with Detailed Metadata (metadata + audio) from prompt**
|
|
332
|
+
```ruby
|
|
333
|
+
result = client.compose_music_detailed(prompt: "Jazz piano trio", music_length_ms: 20000)
|
|
334
|
+
puts result # raw multipart data (needs parsing)
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
14. **Create a music composition plan from prompt**
|
|
338
|
+
```ruby
|
|
339
|
+
plan = client.create_music_plan(prompt: "Upbeat pop song with verse and chorus", music_length_ms: 60000)
|
|
340
|
+
puts plan[:sections]
|
|
283
341
|
```
|
|
284
342
|
|
|
285
343
|
---
|
|
@@ -338,7 +396,7 @@ gem build elevenlabs.gemspec
|
|
|
338
396
|
Install the gem locally:
|
|
339
397
|
|
|
340
398
|
```bash
|
|
341
|
-
gem install ./elevenlabs-0.0.
|
|
399
|
+
gem install ./elevenlabs-0.0.7.gem
|
|
342
400
|
```
|
|
343
401
|
|
|
344
402
|
---
|
data/lib/elevenlabs/client.rb
CHANGED
|
@@ -88,6 +88,47 @@ module Elevenlabs
|
|
|
88
88
|
handle_error(e)
|
|
89
89
|
end
|
|
90
90
|
|
|
91
|
+
#####################################################
|
|
92
|
+
# Text-to-Dialogue #
|
|
93
|
+
# (POST /v1/text-to-dialogue) #
|
|
94
|
+
#####################################################
|
|
95
|
+
|
|
96
|
+
# Converts a list of text and voice ID pairs into speech (dialogue) and returns audio.
|
|
97
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/text-to-dialogue/convert
|
|
98
|
+
#
|
|
99
|
+
# @param [Array[Objects]] inputs - A list of dialogue inputs, each containing text and a voice ID which will be converted into speech
|
|
100
|
+
# :text => String
|
|
101
|
+
# :voice_id => String
|
|
102
|
+
# @param [String] model_id - optional Identifier of the model to be used
|
|
103
|
+
# @param [Hash] settings - optinal Settings controlling the dialogue generation
|
|
104
|
+
# :stability => double - 0.0 = Creative, 0.5 = Natural, 1.0 = Robust
|
|
105
|
+
# :use_speaker_boost => boolean
|
|
106
|
+
# @param [Integer] seed - optional Best effort to sample deterministically.
|
|
107
|
+
#
|
|
108
|
+
# @return [String] The binary audio data (usually an MP3).
|
|
109
|
+
def text_to_dialogue(inputs, model_id = nil, settings = {}, seed = nil)
|
|
110
|
+
endpoint = "/v1/text-to-dialogue"
|
|
111
|
+
request_body = {}.tap do |r|
|
|
112
|
+
r[:inputs] = inputs
|
|
113
|
+
r[:model_id] = model_id if model_id
|
|
114
|
+
r[:settings] = settings unless settings.empty?
|
|
115
|
+
r[:seed] = seed if seed
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
headers = default_headers
|
|
119
|
+
headers["Accept"] = "audio/mpeg"
|
|
120
|
+
|
|
121
|
+
response = @connection.post(endpoint) do |req|
|
|
122
|
+
req.headers = headers
|
|
123
|
+
req.body = request_body.to_json
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Returns raw binary data (often MP3)
|
|
127
|
+
response.body
|
|
128
|
+
rescue Faraday::ClientError => e
|
|
129
|
+
handle_error(e)
|
|
130
|
+
end
|
|
131
|
+
|
|
91
132
|
#####################################################
|
|
92
133
|
# Design a Voice #
|
|
93
134
|
# (POST /v1/text-to-voice/design) #
|
|
@@ -194,6 +235,25 @@ module Elevenlabs
|
|
|
194
235
|
handle_error(e)
|
|
195
236
|
end
|
|
196
237
|
|
|
238
|
+
#####################################################
|
|
239
|
+
# GET models #
|
|
240
|
+
# (GET /v1/models) #
|
|
241
|
+
#####################################################
|
|
242
|
+
|
|
243
|
+
# Gets a list of available models
|
|
244
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/models/list
|
|
245
|
+
#
|
|
246
|
+
# @return [Hash] The JSON response containing an array of models
|
|
247
|
+
def list_models
|
|
248
|
+
endpoint = "/v1/models"
|
|
249
|
+
response = @connection.get(endpoint) do |req|
|
|
250
|
+
req.headers = default_headers
|
|
251
|
+
end
|
|
252
|
+
JSON.parse(response.body)
|
|
253
|
+
rescue Faraday::ClientError => e
|
|
254
|
+
handle_error(e)
|
|
255
|
+
end
|
|
256
|
+
|
|
197
257
|
#####################################################
|
|
198
258
|
# GET a Single Voice #
|
|
199
259
|
# (GET /v1/voices/{voice_id}) #
|
|
@@ -345,6 +405,110 @@ module Elevenlabs
|
|
|
345
405
|
voice_id.in?(active_voices)
|
|
346
406
|
end
|
|
347
407
|
|
|
408
|
+
#####################################################
|
|
409
|
+
# Music API #
|
|
410
|
+
#####################################################
|
|
411
|
+
|
|
412
|
+
# 1. Compose music (basic)
|
|
413
|
+
# POST /v1/music
|
|
414
|
+
def compose_music(options = {})
|
|
415
|
+
endpoint = "/v1/music"
|
|
416
|
+
request_body = {
|
|
417
|
+
prompt: options[:prompt],
|
|
418
|
+
composition_plan: options[:composition_plan],
|
|
419
|
+
music_length_ms: options[:music_length_ms],
|
|
420
|
+
model_id: options[:model_id] || "music_v1"
|
|
421
|
+
}.compact
|
|
422
|
+
|
|
423
|
+
headers = default_headers.merge("Accept" => "audio/mpeg")
|
|
424
|
+
query = {}
|
|
425
|
+
query[:output_format] = options[:output_format] if options[:output_format]
|
|
426
|
+
|
|
427
|
+
response = @connection.post("#{endpoint}?#{URI.encode_www_form(query)}") do |req|
|
|
428
|
+
req.headers = headers
|
|
429
|
+
req.body = request_body.to_json
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
response.body # raw binary audio
|
|
433
|
+
rescue Faraday::ClientError => e
|
|
434
|
+
handle_error(e)
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
# 2. Stream music
|
|
438
|
+
# POST /v1/music/stream
|
|
439
|
+
def compose_music_stream(options = {}, &block)
|
|
440
|
+
endpoint = "/v1/music/stream"
|
|
441
|
+
request_body = {
|
|
442
|
+
prompt: options[:prompt],
|
|
443
|
+
composition_plan: options[:composition_plan],
|
|
444
|
+
music_length_ms: options[:music_length_ms],
|
|
445
|
+
model_id: options[:model_id] || "music_v1"
|
|
446
|
+
}.compact
|
|
447
|
+
|
|
448
|
+
headers = default_headers.merge("Accept" => "audio/mpeg")
|
|
449
|
+
query = {}
|
|
450
|
+
query[:output_format] = options[:output_format] if options[:output_format]
|
|
451
|
+
|
|
452
|
+
@connection.post("#{endpoint}?#{URI.encode_www_form(query)}") do |req|
|
|
453
|
+
req.options.on_data = Proc.new do |chunk, _|
|
|
454
|
+
block.call(chunk) if block
|
|
455
|
+
end
|
|
456
|
+
req.headers = headers
|
|
457
|
+
req.body = request_body.to_json
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
nil # audio streamed via block
|
|
461
|
+
rescue Faraday::ClientError => e
|
|
462
|
+
handle_error(e)
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
# 3. Compose detailed music (metadata + audio)
|
|
466
|
+
# POST /v1/music/detailed
|
|
467
|
+
def compose_music_detailed(options = {})
|
|
468
|
+
endpoint = "/v1/music/detailed"
|
|
469
|
+
request_body = {
|
|
470
|
+
prompt: options[:prompt],
|
|
471
|
+
composition_plan: options[:composition_plan],
|
|
472
|
+
music_length_ms: options[:music_length_ms],
|
|
473
|
+
model_id: options[:model_id] || "music_v1"
|
|
474
|
+
}.compact
|
|
475
|
+
|
|
476
|
+
headers = default_headers
|
|
477
|
+
query = {}
|
|
478
|
+
query[:output_format] = options[:output_format] if options[:output_format]
|
|
479
|
+
|
|
480
|
+
response = @connection.post("#{endpoint}?#{URI.encode_www_form(query)}") do |req|
|
|
481
|
+
req.headers = headers
|
|
482
|
+
req.body = request_body.to_json
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
response.body # multipart/mixed with JSON + binary audio
|
|
486
|
+
rescue Faraday::ClientError => e
|
|
487
|
+
handle_error(e)
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
# 4. Create a composition plan
|
|
492
|
+
# POST /v1/music/plan
|
|
493
|
+
def create_music_plan(options = {})
|
|
494
|
+
endpoint = "/v1/music/plan"
|
|
495
|
+
request_body = {
|
|
496
|
+
prompt: options[:prompt],
|
|
497
|
+
music_length_ms: options[:music_length_ms],
|
|
498
|
+
source_composition_plan: options[:source_composition_plan],
|
|
499
|
+
model_id: options[:model_id] || "music_v1"
|
|
500
|
+
}.compact
|
|
501
|
+
|
|
502
|
+
response = @connection.post(endpoint) do |req|
|
|
503
|
+
req.headers = default_headers
|
|
504
|
+
req.body = request_body.to_json
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
JSON.parse(response.body, symbolize_names: true)
|
|
508
|
+
rescue Faraday::ClientError => e
|
|
509
|
+
handle_error(e)
|
|
510
|
+
end
|
|
511
|
+
|
|
348
512
|
private
|
|
349
513
|
|
|
350
514
|
# Common headers needed by Elevenlabs
|
data/lib/elevenlabs.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: elevenlabs
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- hackliteracy
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-08-
|
|
11
|
+
date: 2025-08-25 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: faraday
|
|
@@ -39,7 +39,8 @@ dependencies:
|
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
40
|
version: '1.1'
|
|
41
41
|
description: This gem provides a convenient Ruby interface to the ElevenLabs TTS,
|
|
42
|
-
Voice Cloning, Voice Design
|
|
42
|
+
Voice Cloning, Voice Design, Voice dialogues, TTS Streaming, Music Generation and
|
|
43
|
+
Streaming endpoints.
|
|
43
44
|
email:
|
|
44
45
|
- hackliteracy@gmail.com
|
|
45
46
|
executables: []
|