elevenlabs_client 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +52 -1
- data/README.md +78 -1
- data/lib/elevenlabs_client/client.rb +63 -1
- data/lib/elevenlabs_client/endpoints/audio_isolation.rb +71 -0
- data/lib/elevenlabs_client/endpoints/audio_native.rb +103 -0
- data/lib/elevenlabs_client/endpoints/dubs.rb +208 -2
- data/lib/elevenlabs_client/endpoints/forced_alignment.rb +41 -0
- data/lib/elevenlabs_client/endpoints/speech_to_speech.rb +125 -0
- data/lib/elevenlabs_client/endpoints/speech_to_text.rb +108 -0
- data/lib/elevenlabs_client/endpoints/text_to_dialogue_stream.rb +50 -0
- data/lib/elevenlabs_client/endpoints/text_to_speech_stream.rb +1 -0
- data/lib/elevenlabs_client/endpoints/text_to_speech_stream_with_timestamps.rb +75 -0
- data/lib/elevenlabs_client/endpoints/text_to_speech_with_timestamps.rb +73 -0
- data/lib/elevenlabs_client/endpoints/voices.rb +362 -0
- data/lib/elevenlabs_client/endpoints/websocket_text_to_speech.rb +250 -0
- data/lib/elevenlabs_client/version.rb +1 -1
- data/lib/elevenlabs_client.rb +9 -2
- metadata +25 -2
@@ -0,0 +1,250 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'websocket-client-simple'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
module ElevenlabsClient
|
7
|
+
class WebSocketTextToSpeech
|
8
|
+
def initialize(client)
|
9
|
+
@client = client
|
10
|
+
@base_url = client.base_url.gsub('https://', 'wss://').gsub('http://', 'ws://')
|
11
|
+
end
|
12
|
+
|
13
|
+
# Creates a WebSocket connection for real-time text-to-speech streaming
|
14
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/websockets/text-to-speech
|
15
|
+
#
|
16
|
+
# @param voice_id [String] The unique identifier for the voice
|
17
|
+
# @param options [Hash] Optional parameters
|
18
|
+
# @option options [String] :model_id The model ID to use
|
19
|
+
# @option options [String] :language_code ISO 639-1 language code
|
20
|
+
# @option options [Boolean] :enable_logging Enable logging (default: true)
|
21
|
+
# @option options [Boolean] :enable_ssml_parsing Enable SSML parsing (default: false)
|
22
|
+
# @option options [String] :output_format Output audio format
|
23
|
+
# @option options [Integer] :inactivity_timeout Timeout in seconds (default: 20, max: 180)
|
24
|
+
# @option options [Boolean] :sync_alignment Include timing data (default: false)
|
25
|
+
# @option options [Boolean] :auto_mode Reduce latency mode (default: false)
|
26
|
+
# @option options [String] :apply_text_normalization Text normalization ("auto", "on", "off")
|
27
|
+
# @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
|
28
|
+
# @return [WebSocket::Client::Simple::Client] WebSocket client instance
|
29
|
+
def connect_stream_input(voice_id, **options)
|
30
|
+
endpoint = "/v1/text-to-speech/#{voice_id}/stream-input"
|
31
|
+
|
32
|
+
# Build query parameters in the same order as provided in options
|
33
|
+
allowed_keys = [:model_id, :language_code, :enable_logging, :enable_ssml_parsing, :output_format, :inactivity_timeout, :sync_alignment, :auto_mode, :apply_text_normalization, :seed]
|
34
|
+
pairs = []
|
35
|
+
options.each do |k, v|
|
36
|
+
next unless allowed_keys.include?(k)
|
37
|
+
next if v.nil?
|
38
|
+
next if (k == :language_code || k == :apply_text_normalization) && v.to_s.empty?
|
39
|
+
pairs << [k, v]
|
40
|
+
end
|
41
|
+
if pairs.any?
|
42
|
+
query_string = pairs.map { |k, v| "#{k}=#{v}" }.join("&")
|
43
|
+
endpoint += "?#{query_string}"
|
44
|
+
end
|
45
|
+
|
46
|
+
url = "#{@base_url}#{endpoint}"
|
47
|
+
headers = { "xi-api-key" => @client.api_key }
|
48
|
+
|
49
|
+
WebSocket::Client::Simple.connect(url, headers: headers)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Creates a WebSocket connection for multi-context text-to-speech streaming
|
53
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/websockets/multi-context
|
54
|
+
#
|
55
|
+
# @param voice_id [String] The unique identifier for the voice
|
56
|
+
# @param options [Hash] Optional parameters (same as connect_stream_input)
|
57
|
+
# @return [WebSocket::Client::Simple::Client] WebSocket client instance
|
58
|
+
def connect_multi_stream_input(voice_id, **options)
|
59
|
+
endpoint = "/v1/text-to-speech/#{voice_id}/multi-stream-input"
|
60
|
+
|
61
|
+
# Build query parameters in the same order as provided in options
|
62
|
+
allowed_keys = [:model_id, :language_code, :enable_logging, :enable_ssml_parsing, :output_format, :inactivity_timeout, :sync_alignment, :auto_mode, :apply_text_normalization, :seed]
|
63
|
+
pairs = []
|
64
|
+
options.each do |k, v|
|
65
|
+
next unless allowed_keys.include?(k)
|
66
|
+
next if v.nil?
|
67
|
+
next if (k == :language_code || k == :apply_text_normalization) && v.to_s.empty?
|
68
|
+
pairs << [k, v]
|
69
|
+
end
|
70
|
+
if pairs.any?
|
71
|
+
query_string = pairs.map { |k, v| "#{k}=#{v}" }.join("&")
|
72
|
+
endpoint += "?#{query_string}"
|
73
|
+
end
|
74
|
+
|
75
|
+
url = "#{@base_url}#{endpoint}"
|
76
|
+
headers = { "xi-api-key" => @client.api_key }
|
77
|
+
|
78
|
+
WebSocket::Client::Simple.connect(url, headers: headers)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Helper method to send initialization message for single stream
|
82
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
83
|
+
# @param options [Hash] Initialization options
|
84
|
+
# @option options [String] :text Initial text (usually a space)
|
85
|
+
# @option options [Hash] :voice_settings Voice settings hash
|
86
|
+
# @option options [String] :xi_api_key API key (will use client's key if not provided)
|
87
|
+
def send_initialize_connection(ws, **options)
|
88
|
+
message = {
|
89
|
+
text: options[:text] || " ",
|
90
|
+
voice_settings: options[:voice_settings] || {},
|
91
|
+
xi_api_key: options[:xi_api_key] || @client.api_key
|
92
|
+
}
|
93
|
+
|
94
|
+
ws.send(message.to_json)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Helper method to send text for single stream
|
98
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
99
|
+
# @param text [String] Text to convert to speech
|
100
|
+
# @param options [Hash] Optional parameters
|
101
|
+
# @option options [Boolean] :try_trigger_generation Try to trigger generation
|
102
|
+
# @option options [Hash] :voice_settings Voice settings override
|
103
|
+
def send_text(ws, text, **options)
|
104
|
+
message = { text: text }
|
105
|
+
message[:try_trigger_generation] = options[:try_trigger_generation] unless options[:try_trigger_generation].nil?
|
106
|
+
message[:voice_settings] = options[:voice_settings] if options[:voice_settings]
|
107
|
+
|
108
|
+
ws.send(message.to_json)
|
109
|
+
end
|
110
|
+
|
111
|
+
# Helper method to close connection for single stream
|
112
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
113
|
+
def send_close_connection(ws)
|
114
|
+
message = { text: "" }
|
115
|
+
ws.send(message.to_json)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Helper method to send initialization message for multi-context stream
|
119
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
120
|
+
# @param context_id [String] Context identifier
|
121
|
+
# @param options [Hash] Initialization options
|
122
|
+
def send_initialize_connection_multi(ws, context_id, **options)
|
123
|
+
message = {
|
124
|
+
text: options[:text] || " ",
|
125
|
+
voice_settings: options[:voice_settings] || {},
|
126
|
+
context_id: context_id
|
127
|
+
}
|
128
|
+
|
129
|
+
ws.send(message.to_json)
|
130
|
+
end
|
131
|
+
|
132
|
+
# Helper method to initialize a new context in multi-stream
|
133
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
134
|
+
# @param context_id [String] Context identifier
|
135
|
+
# @param options [Hash] Context options
|
136
|
+
def send_initialize_context(ws, context_id, **options)
|
137
|
+
message = {
|
138
|
+
context_id: context_id,
|
139
|
+
voice_settings: options[:voice_settings] || {}
|
140
|
+
}
|
141
|
+
message[:model_id] = options[:model_id] if options[:model_id]
|
142
|
+
message[:language_code] = options[:language_code] if options[:language_code]
|
143
|
+
|
144
|
+
ws.send(message.to_json)
|
145
|
+
end
|
146
|
+
|
147
|
+
# Helper method to send text for multi-context stream
|
148
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
149
|
+
# @param context_id [String] Context identifier
|
150
|
+
# @param text [String] Text to convert to speech
|
151
|
+
# @param options [Hash] Optional parameters
|
152
|
+
def send_text_multi(ws, context_id, text, **options)
|
153
|
+
message = {
|
154
|
+
text: text,
|
155
|
+
context_id: context_id
|
156
|
+
}
|
157
|
+
message[:flush] = options[:flush] unless options[:flush].nil?
|
158
|
+
|
159
|
+
ws.send(message.to_json)
|
160
|
+
end
|
161
|
+
|
162
|
+
# Helper method to flush a context
|
163
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
164
|
+
# @param context_id [String] Context identifier
|
165
|
+
def send_flush_context(ws, context_id)
|
166
|
+
message = {
|
167
|
+
context_id: context_id,
|
168
|
+
flush: true
|
169
|
+
}
|
170
|
+
|
171
|
+
ws.send(message.to_json)
|
172
|
+
end
|
173
|
+
|
174
|
+
# Helper method to close a specific context
|
175
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
176
|
+
# @param context_id [String] Context identifier
|
177
|
+
def send_close_context(ws, context_id)
|
178
|
+
message = {
|
179
|
+
context_id: context_id,
|
180
|
+
close_context: true
|
181
|
+
}
|
182
|
+
|
183
|
+
ws.send(message.to_json)
|
184
|
+
end
|
185
|
+
|
186
|
+
# Helper method to keep a context alive
|
187
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
188
|
+
# @param context_id [String] Context identifier
|
189
|
+
def send_keep_context_alive(ws, context_id)
|
190
|
+
message = {
|
191
|
+
context_id: context_id,
|
192
|
+
keep_context_alive: true
|
193
|
+
}
|
194
|
+
|
195
|
+
ws.send(message.to_json)
|
196
|
+
end
|
197
|
+
|
198
|
+
# Helper method to close the entire socket
|
199
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
200
|
+
def send_close_socket(ws)
|
201
|
+
message = { close_socket: true }
|
202
|
+
ws.send(message.to_json)
|
203
|
+
end
|
204
|
+
|
205
|
+
# Convenience method to create a complete streaming session
|
206
|
+
# @param voice_id [String] The unique identifier for the voice
|
207
|
+
# @param text_chunks [Array<String>] Array of text chunks to stream
|
208
|
+
# @param options [Hash] Connection and voice options
|
209
|
+
# @param block [Proc] Block to handle audio chunks
|
210
|
+
def stream_text_to_speech(voice_id, text_chunks, **options, &block)
|
211
|
+
ws = connect_stream_input(voice_id, **options)
|
212
|
+
|
213
|
+
ws.on :open do
|
214
|
+
# Initialize connection
|
215
|
+
send_initialize_connection(ws, **options)
|
216
|
+
|
217
|
+
# Send text chunks
|
218
|
+
text_chunks.each_with_index do |chunk, index|
|
219
|
+
send_text(ws, chunk, try_trigger_generation: (index == text_chunks.length - 1))
|
220
|
+
end
|
221
|
+
|
222
|
+
# Close connection
|
223
|
+
send_close_connection(ws)
|
224
|
+
end
|
225
|
+
|
226
|
+
ws.on :message do |msg|
|
227
|
+
data = JSON.parse(msg.data)
|
228
|
+
if data['audio'] && block_given?
|
229
|
+
# Decode base64 audio and yield to block
|
230
|
+
audio_data = Base64.decode64(data['audio'])
|
231
|
+
block.call(audio_data, data)
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
ws.on :error do |e|
|
236
|
+
raise APIError, "WebSocket error: #{e.message}"
|
237
|
+
end
|
238
|
+
|
239
|
+
ws
|
240
|
+
end
|
241
|
+
|
242
|
+
# Alias methods for convenience
|
243
|
+
alias_method :connect_single_stream, :connect_stream_input
|
244
|
+
alias_method :connect_multi_context, :connect_multi_stream_input
|
245
|
+
|
246
|
+
private
|
247
|
+
|
248
|
+
attr_reader :client
|
249
|
+
end
|
250
|
+
end
|
data/lib/elevenlabs_client.rb
CHANGED
@@ -6,17 +6,24 @@ require_relative "elevenlabs_client/settings"
|
|
6
6
|
require_relative "elevenlabs_client/endpoints/dubs"
|
7
7
|
require_relative "elevenlabs_client/endpoints/text_to_speech"
|
8
8
|
require_relative "elevenlabs_client/endpoints/text_to_speech_stream"
|
9
|
+
require_relative "elevenlabs_client/endpoints/text_to_speech_with_timestamps"
|
10
|
+
require_relative "elevenlabs_client/endpoints/text_to_speech_stream_with_timestamps"
|
9
11
|
require_relative "elevenlabs_client/endpoints/text_to_dialogue"
|
12
|
+
require_relative "elevenlabs_client/endpoints/text_to_dialogue_stream"
|
10
13
|
require_relative "elevenlabs_client/endpoints/sound_generation"
|
11
14
|
require_relative "elevenlabs_client/endpoints/text_to_voice"
|
12
15
|
require_relative "elevenlabs_client/endpoints/models"
|
13
16
|
require_relative "elevenlabs_client/endpoints/voices"
|
14
17
|
require_relative "elevenlabs_client/endpoints/music"
|
18
|
+
require_relative "elevenlabs_client/endpoints/audio_isolation"
|
19
|
+
require_relative "elevenlabs_client/endpoints/audio_native"
|
20
|
+
require_relative "elevenlabs_client/endpoints/forced_alignment"
|
21
|
+
require_relative "elevenlabs_client/endpoints/speech_to_speech"
|
22
|
+
require_relative "elevenlabs_client/endpoints/speech_to_text"
|
23
|
+
require_relative "elevenlabs_client/endpoints/websocket_text_to_speech"
|
15
24
|
require_relative "elevenlabs_client/client"
|
16
25
|
|
17
26
|
module ElevenlabsClient
|
18
|
-
class Error < StandardError; end
|
19
|
-
|
20
27
|
# Convenience method to create a new client
|
21
28
|
def self.new(**options)
|
22
29
|
Client.new(**options)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elevenlabs_client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vitor Oliveira
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-09-
|
11
|
+
date: 2025-09-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '1.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: websocket-client-simple
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.8'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.8'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: bundler
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -121,15 +135,24 @@ files:
|
|
121
135
|
- README.md
|
122
136
|
- lib/elevenlabs_client.rb
|
123
137
|
- lib/elevenlabs_client/client.rb
|
138
|
+
- lib/elevenlabs_client/endpoints/audio_isolation.rb
|
139
|
+
- lib/elevenlabs_client/endpoints/audio_native.rb
|
124
140
|
- lib/elevenlabs_client/endpoints/dubs.rb
|
141
|
+
- lib/elevenlabs_client/endpoints/forced_alignment.rb
|
125
142
|
- lib/elevenlabs_client/endpoints/models.rb
|
126
143
|
- lib/elevenlabs_client/endpoints/music.rb
|
127
144
|
- lib/elevenlabs_client/endpoints/sound_generation.rb
|
145
|
+
- lib/elevenlabs_client/endpoints/speech_to_speech.rb
|
146
|
+
- lib/elevenlabs_client/endpoints/speech_to_text.rb
|
128
147
|
- lib/elevenlabs_client/endpoints/text_to_dialogue.rb
|
148
|
+
- lib/elevenlabs_client/endpoints/text_to_dialogue_stream.rb
|
129
149
|
- lib/elevenlabs_client/endpoints/text_to_speech.rb
|
130
150
|
- lib/elevenlabs_client/endpoints/text_to_speech_stream.rb
|
151
|
+
- lib/elevenlabs_client/endpoints/text_to_speech_stream_with_timestamps.rb
|
152
|
+
- lib/elevenlabs_client/endpoints/text_to_speech_with_timestamps.rb
|
131
153
|
- lib/elevenlabs_client/endpoints/text_to_voice.rb
|
132
154
|
- lib/elevenlabs_client/endpoints/voices.rb
|
155
|
+
- lib/elevenlabs_client/endpoints/websocket_text_to_speech.rb
|
133
156
|
- lib/elevenlabs_client/errors.rb
|
134
157
|
- lib/elevenlabs_client/settings.rb
|
135
158
|
- lib/elevenlabs_client/version.rb
|