elevenlabs 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +76 -2
- data/lib/elevenlabs/client.rb +149 -1
- data/lib/elevenlabs.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ea563591f2116a24c1911bd964ed30bfee9dd468e9890db98a7f38ad84138bd4
|
|
4
|
+
data.tar.gz: e1c155f3dc9f5daaff7ceb283ccd7493335f639a1beb66df93adaf14ac8fac96
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8559ecdf9dc45be3f7018afb389cb784603ba6c255db024f7276df60490504da366f462cbf2fb3886eeaf39f305ff530ef5ee4f9dcee577df883e62e48684af7
|
|
7
|
+
data.tar.gz: 62c761c794c8641940d5b297a0252b37dc9f41e2afcaaad777701acf7c94ee8ec3089f76559eaffd26bdf4292930cf2a9f5827f5d7ed5c523445356fa3e085b4
|
data/README.md
CHANGED
|
@@ -14,6 +14,8 @@ This gem provides an easy-to-use interface for:
|
|
|
14
14
|
- **Converting text to speech** and retrieving the generated audio
|
|
15
15
|
- **Designing a voice** based on a text description
|
|
16
16
|
- **Streaming text-to-speech audio**
|
|
17
|
+
- **Music Generation**
|
|
18
|
+
- **Sound Effect Generation**
|
|
17
19
|
|
|
18
20
|
All requests are handled via [Faraday](https://github.com/lostisland/faraday).
|
|
19
21
|
|
|
@@ -304,7 +306,7 @@ Designed voices cannot be used for TTS until they are created in your account.
|
|
|
304
306
|
|
|
305
307
|
If the voice is not immediately available for TTS, wait a few seconds or check its status via client.get_voice(voice_id) until it’s "active".
|
|
306
308
|
|
|
307
|
-
|
|
309
|
+
11. Create a multi-speaker dialogue
|
|
308
310
|
```ruby
|
|
309
311
|
inputs = [{text: "It smells like updog in here", voice_id: "TX3LPaxmHKxFdv7VOQHJ"}, {text: "What's updog?", voice_id: "RILOU7YmBhvwJGDGjNmP"}, {text: "Not much, you?", voice_id: "TX3LPaxmHKxFdv7VOQHJ"}]
|
|
310
312
|
|
|
@@ -312,6 +314,78 @@ audio_data = client.text_to_dialogue(inputs)
|
|
|
312
314
|
File.open("what's updog.mp3", "wb") { |f| f.write(audio_data) }
|
|
313
315
|
```
|
|
314
316
|
|
|
317
|
+
12. **Generate Music from prompt**
|
|
318
|
+
```ruby
|
|
319
|
+
audio = client.compose_music(prompt: "Lo-fi hip hop beat", music_length_ms: 30000)
|
|
320
|
+
File.binwrite("lofi.mp3", audio)
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
12. **Stream Music Generated from prompt**
|
|
324
|
+
```ruby
|
|
325
|
+
File.open("epic_stream.mp3", "wb") do |f|
|
|
326
|
+
client.compose_music_stream(prompt: "Epic orchestral build", music_length_ms: 60000) do |chunk|
|
|
327
|
+
f.write(chunk)
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
13. **Generate Music with Detailed Metadata (metadata + audio) from prompt**
|
|
333
|
+
```ruby
|
|
334
|
+
result = client.compose_music_detailed(prompt: "Jazz piano trio", music_length_ms: 20000)
|
|
335
|
+
puts result # raw multipart data (needs parsing)
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
14. **Create a music composition plan from prompt**
|
|
339
|
+
```ruby
|
|
340
|
+
plan = client.create_music_plan(prompt: "Upbeat pop song with verse and chorus", music_length_ms: 60000)
|
|
341
|
+
puts plan[:sections]
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
15. **Create sound effects from a prompt**
|
|
345
|
+
|
|
346
|
+
Basic Usage: Simple Prompt
|
|
347
|
+
Generate a sound effect with only a text prompt, using default settings (output_format: "mp3_44100_128", duration_seconds: nil (auto-detected), prompt_influence: 0.3).
|
|
348
|
+
|
|
349
|
+
```ruby
|
|
350
|
+
audio_data = client.sound_generation("Futuristic laser blast in a space battle")
|
|
351
|
+
|
|
352
|
+
# Save the audio to a file
|
|
353
|
+
File.open("laser_blast.mp3", "wb") { |f| f.write(audio_data) }
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
Advanced Usage: Custom Duration, Influence, and Format
|
|
357
|
+
Specify duration_seconds, prompt_influence, and output_format for precise control over the sound effect.
|
|
358
|
+
# Generate a roaring dragon sound with specific settings
|
|
359
|
+
```ruby
|
|
360
|
+
audio_data = client.sound_generation(
|
|
361
|
+
"Roaring dragon in a fantasy cave",
|
|
362
|
+
duration_seconds: 3.0,
|
|
363
|
+
prompt_influence: 0.7, # Higher influence for closer adherence to the prompt
|
|
364
|
+
output_format: "mp3_22050_32"
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
# Save the audio to a file
|
|
368
|
+
File.open("dragon_roar.mp3", "wb") { |f| f.write(audio_data) }
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
Looping Sound Effect
|
|
373
|
+
Create a looping sound effect for continuous playback, such as background ambiance in a video game.
|
|
374
|
+
# Generate a looping ambient sound for a haunted forest
|
|
375
|
+
```ruby
|
|
376
|
+
audio_data = client.sound_generation(
|
|
377
|
+
"Eerie wind and distant owl hoots in a haunted forest",
|
|
378
|
+
loop: true,
|
|
379
|
+
duration_seconds: 10.0,
|
|
380
|
+
prompt_influence: 0.5,
|
|
381
|
+
output_format: "mp3_22050_32"
|
|
382
|
+
)
|
|
383
|
+
# Save the audio to a file
|
|
384
|
+
File.open("haunted_forest_loop.mp3", "wb") { |f| f.write(audio_data) }
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
For more details, see the ElevenLabs Sound Generation API documentation.
|
|
388
|
+
|
|
315
389
|
---
|
|
316
390
|
|
|
317
391
|
## Error Handling
|
|
@@ -368,7 +442,7 @@ gem build elevenlabs.gemspec
|
|
|
368
442
|
Install the gem locally:
|
|
369
443
|
|
|
370
444
|
```bash
|
|
371
|
-
gem install ./elevenlabs-0.0.
|
|
445
|
+
gem install ./elevenlabs-0.0.8.gem
|
|
372
446
|
```
|
|
373
447
|
|
|
374
448
|
---
|
data/lib/elevenlabs/client.rb
CHANGED
|
@@ -9,13 +9,15 @@ module Elevenlabs
|
|
|
9
9
|
BASE_URL = "https://api.elevenlabs.io"
|
|
10
10
|
|
|
11
11
|
# Note the default param: `api_key: nil`
|
|
12
|
-
def initialize(api_key: nil)
|
|
12
|
+
def initialize(api_key: nil, open_timeout: 5, read_timeout: 120)
|
|
13
13
|
# If the caller doesn’t provide an api_key, use the gem-wide config
|
|
14
14
|
@api_key = api_key || Elevenlabs.configuration&.api_key
|
|
15
15
|
|
|
16
16
|
@connection = Faraday.new(url: BASE_URL) do |conn|
|
|
17
17
|
conn.request :url_encoded
|
|
18
18
|
conn.response :raise_error
|
|
19
|
+
conn.options.open_timeout = open_timeout # time to open connection
|
|
20
|
+
conn.options.timeout = read_timeout # time to wait for response
|
|
19
21
|
conn.adapter Faraday.default_adapter
|
|
20
22
|
end
|
|
21
23
|
end
|
|
@@ -129,6 +131,48 @@ module Elevenlabs
|
|
|
129
131
|
handle_error(e)
|
|
130
132
|
end
|
|
131
133
|
|
|
134
|
+
#####################################################
|
|
135
|
+
# Sound Generation #
|
|
136
|
+
# (POST /v1/sound-generation) #
|
|
137
|
+
#####################################################
|
|
138
|
+
|
|
139
|
+
# Convert text to sound effects and retrieve audio (binary data)
|
|
140
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/sound-generation
|
|
141
|
+
#
|
|
142
|
+
# @param [String] text - text prompt describing the sound effect
|
|
143
|
+
# @param [Hash] options - optional parameters
|
|
144
|
+
# :loop => Boolean (whether to create a looping sound effect, default: false)
|
|
145
|
+
# :duration_seconds => Float (0.5 to 30 seconds, default: nil for auto-detection)
|
|
146
|
+
# :prompt_influence => Float (0.0 to 1.0, default: 0.3)
|
|
147
|
+
# :output_format => String (e.g., "mp3_22050_32", default: "mp3_44100_128")
|
|
148
|
+
#
|
|
149
|
+
# @return [String] The binary audio data (usually an MP3).
|
|
150
|
+
def sound_generation(text, options = {})
|
|
151
|
+
endpoint = "/v1/sound-generation"
|
|
152
|
+
request_body = { text: text }
|
|
153
|
+
|
|
154
|
+
# Add optional parameters if provided
|
|
155
|
+
request_body[:loop] = options[:loop] unless options[:loop].nil?
|
|
156
|
+
request_body[:duration_seconds] = options[:duration_seconds] if options[:duration_seconds]
|
|
157
|
+
request_body[:prompt_influence] = options[:prompt_influence] if options[:prompt_influence]
|
|
158
|
+
|
|
159
|
+
headers = default_headers
|
|
160
|
+
headers["Accept"] = "audio/mpeg"
|
|
161
|
+
|
|
162
|
+
query = {}
|
|
163
|
+
query[:output_format] = options[:output_format] if options[:output_format]
|
|
164
|
+
|
|
165
|
+
response = @connection.post("#{endpoint}?#{URI.encode_www_form(query)}") do |req|
|
|
166
|
+
req.headers = headers
|
|
167
|
+
req.body = request_body.to_json
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Returns raw binary data (often MP3)
|
|
171
|
+
response.body
|
|
172
|
+
rescue Faraday::ClientError => e
|
|
173
|
+
handle_error(e)
|
|
174
|
+
end
|
|
175
|
+
|
|
132
176
|
#####################################################
|
|
133
177
|
# Design a Voice #
|
|
134
178
|
# (POST /v1/text-to-voice/design) #
|
|
@@ -405,6 +449,110 @@ module Elevenlabs
|
|
|
405
449
|
voice_id.in?(active_voices)
|
|
406
450
|
end
|
|
407
451
|
|
|
452
|
+
#####################################################
|
|
453
|
+
# Music API #
|
|
454
|
+
#####################################################
|
|
455
|
+
|
|
456
|
+
# 1. Compose music (basic)
|
|
457
|
+
# POST /v1/music
|
|
458
|
+
def compose_music(options = {})
|
|
459
|
+
endpoint = "/v1/music"
|
|
460
|
+
request_body = {
|
|
461
|
+
prompt: options[:prompt],
|
|
462
|
+
composition_plan: options[:composition_plan],
|
|
463
|
+
music_length_ms: options[:music_length_ms],
|
|
464
|
+
model_id: options[:model_id] || "music_v1"
|
|
465
|
+
}.compact
|
|
466
|
+
|
|
467
|
+
headers = default_headers.merge("Accept" => "audio/mpeg")
|
|
468
|
+
query = {}
|
|
469
|
+
query[:output_format] = options[:output_format] if options[:output_format]
|
|
470
|
+
|
|
471
|
+
response = @connection.post("#{endpoint}?#{URI.encode_www_form(query)}") do |req|
|
|
472
|
+
req.headers = headers
|
|
473
|
+
req.body = request_body.to_json
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
response.body # raw binary audio
|
|
477
|
+
rescue Faraday::ClientError => e
|
|
478
|
+
handle_error(e)
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
# 2. Stream music
|
|
482
|
+
# POST /v1/music/stream
|
|
483
|
+
def compose_music_stream(options = {}, &block)
|
|
484
|
+
endpoint = "/v1/music/stream"
|
|
485
|
+
request_body = {
|
|
486
|
+
prompt: options[:prompt],
|
|
487
|
+
composition_plan: options[:composition_plan],
|
|
488
|
+
music_length_ms: options[:music_length_ms],
|
|
489
|
+
model_id: options[:model_id] || "music_v1"
|
|
490
|
+
}.compact
|
|
491
|
+
|
|
492
|
+
headers = default_headers.merge("Accept" => "audio/mpeg")
|
|
493
|
+
query = {}
|
|
494
|
+
query[:output_format] = options[:output_format] if options[:output_format]
|
|
495
|
+
|
|
496
|
+
@connection.post("#{endpoint}?#{URI.encode_www_form(query)}") do |req|
|
|
497
|
+
req.options.on_data = Proc.new do |chunk, _|
|
|
498
|
+
block.call(chunk) if block
|
|
499
|
+
end
|
|
500
|
+
req.headers = headers
|
|
501
|
+
req.body = request_body.to_json
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
nil # audio streamed via block
|
|
505
|
+
rescue Faraday::ClientError => e
|
|
506
|
+
handle_error(e)
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
# 3. Compose detailed music (metadata + audio)
|
|
510
|
+
# POST /v1/music/detailed
|
|
511
|
+
def compose_music_detailed(options = {})
|
|
512
|
+
endpoint = "/v1/music/detailed"
|
|
513
|
+
request_body = {
|
|
514
|
+
prompt: options[:prompt],
|
|
515
|
+
composition_plan: options[:composition_plan],
|
|
516
|
+
music_length_ms: options[:music_length_ms],
|
|
517
|
+
model_id: options[:model_id] || "music_v1"
|
|
518
|
+
}.compact
|
|
519
|
+
|
|
520
|
+
headers = default_headers
|
|
521
|
+
query = {}
|
|
522
|
+
query[:output_format] = options[:output_format] if options[:output_format]
|
|
523
|
+
|
|
524
|
+
response = @connection.post("#{endpoint}?#{URI.encode_www_form(query)}") do |req|
|
|
525
|
+
req.headers = headers
|
|
526
|
+
req.body = request_body.to_json
|
|
527
|
+
end
|
|
528
|
+
|
|
529
|
+
response.body # multipart/mixed with JSON + binary audio
|
|
530
|
+
rescue Faraday::ClientError => e
|
|
531
|
+
handle_error(e)
|
|
532
|
+
end
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
# 4. Create a composition plan
|
|
536
|
+
# POST /v1/music/plan
|
|
537
|
+
def create_music_plan(options = {})
|
|
538
|
+
endpoint = "/v1/music/plan"
|
|
539
|
+
request_body = {
|
|
540
|
+
prompt: options[:prompt],
|
|
541
|
+
music_length_ms: options[:music_length_ms],
|
|
542
|
+
source_composition_plan: options[:source_composition_plan],
|
|
543
|
+
model_id: options[:model_id] || "music_v1"
|
|
544
|
+
}.compact
|
|
545
|
+
|
|
546
|
+
response = @connection.post(endpoint) do |req|
|
|
547
|
+
req.headers = default_headers
|
|
548
|
+
req.body = request_body.to_json
|
|
549
|
+
end
|
|
550
|
+
|
|
551
|
+
JSON.parse(response.body, symbolize_names: true)
|
|
552
|
+
rescue Faraday::ClientError => e
|
|
553
|
+
handle_error(e)
|
|
554
|
+
end
|
|
555
|
+
|
|
408
556
|
private
|
|
409
557
|
|
|
410
558
|
# Common headers needed by Elevenlabs
|
data/lib/elevenlabs.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: elevenlabs
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.8
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- hackliteracy
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-
|
|
11
|
+
date: 2025-09-03 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: faraday
|
|
@@ -39,7 +39,8 @@ dependencies:
|
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
40
|
version: '1.1'
|
|
41
41
|
description: This gem provides a convenient Ruby interface to the ElevenLabs TTS,
|
|
42
|
-
Voice Cloning, Voice Design, Voice dialogues
|
|
42
|
+
Voice Cloning, Voice Design, Voice dialogues, TTS Streaming, Music Generation and
|
|
43
|
+
Streaming endpoints.
|
|
43
44
|
email:
|
|
44
45
|
- hackliteracy@gmail.com
|
|
45
46
|
executables: []
|