elevenlabs_client 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,250 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'websocket-client-simple'
4
+ require 'json'
5
+
6
+ module ElevenlabsClient
7
+ class WebSocketTextToSpeech
8
+ def initialize(client)
9
+ @client = client
10
+ @base_url = client.base_url.gsub('https://', 'wss://').gsub('http://', 'ws://')
11
+ end
12
+
13
+ # Creates a WebSocket connection for real-time text-to-speech streaming
14
+ # Documentation: https://elevenlabs.io/docs/api-reference/websockets/text-to-speech
15
+ #
16
+ # @param voice_id [String] The unique identifier for the voice
17
+ # @param options [Hash] Optional parameters
18
+ # @option options [String] :model_id The model ID to use
19
+ # @option options [String] :language_code ISO 639-1 language code
20
+ # @option options [Boolean] :enable_logging Enable logging (default: true)
21
+ # @option options [Boolean] :enable_ssml_parsing Enable SSML parsing (default: false)
22
+ # @option options [String] :output_format Output audio format
23
+ # @option options [Integer] :inactivity_timeout Timeout in seconds (default: 20, max: 180)
24
+ # @option options [Boolean] :sync_alignment Include timing data (default: false)
25
+ # @option options [Boolean] :auto_mode Reduce latency mode (default: false)
26
+ # @option options [String] :apply_text_normalization Text normalization ("auto", "on", "off")
27
+ # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
28
+ # @return [WebSocket::Client::Simple::Client] WebSocket client instance
29
+ def connect_stream_input(voice_id, **options)
30
+ endpoint = "/v1/text-to-speech/#{voice_id}/stream-input"
31
+
32
+ # Build query parameters in the same order as provided in options
33
+ allowed_keys = [:model_id, :language_code, :enable_logging, :enable_ssml_parsing, :output_format, :inactivity_timeout, :sync_alignment, :auto_mode, :apply_text_normalization, :seed]
34
+ pairs = []
35
+ options.each do |k, v|
36
+ next unless allowed_keys.include?(k)
37
+ next if v.nil?
38
+ next if (k == :language_code || k == :apply_text_normalization) && v.to_s.empty?
39
+ pairs << [k, v]
40
+ end
41
+ if pairs.any?
42
+ query_string = pairs.map { |k, v| "#{k}=#{v}" }.join("&")
43
+ endpoint += "?#{query_string}"
44
+ end
45
+
46
+ url = "#{@base_url}#{endpoint}"
47
+ headers = { "xi-api-key" => @client.api_key }
48
+
49
+ WebSocket::Client::Simple.connect(url, headers: headers)
50
+ end
51
+
52
+ # Creates a WebSocket connection for multi-context text-to-speech streaming
53
+ # Documentation: https://elevenlabs.io/docs/api-reference/websockets/multi-context
54
+ #
55
+ # @param voice_id [String] The unique identifier for the voice
56
+ # @param options [Hash] Optional parameters (same as connect_stream_input)
57
+ # @return [WebSocket::Client::Simple::Client] WebSocket client instance
58
+ def connect_multi_stream_input(voice_id, **options)
59
+ endpoint = "/v1/text-to-speech/#{voice_id}/multi-stream-input"
60
+
61
+ # Build query parameters in the same order as provided in options
62
+ allowed_keys = [:model_id, :language_code, :enable_logging, :enable_ssml_parsing, :output_format, :inactivity_timeout, :sync_alignment, :auto_mode, :apply_text_normalization, :seed]
63
+ pairs = []
64
+ options.each do |k, v|
65
+ next unless allowed_keys.include?(k)
66
+ next if v.nil?
67
+ next if (k == :language_code || k == :apply_text_normalization) && v.to_s.empty?
68
+ pairs << [k, v]
69
+ end
70
+ if pairs.any?
71
+ query_string = pairs.map { |k, v| "#{k}=#{v}" }.join("&")
72
+ endpoint += "?#{query_string}"
73
+ end
74
+
75
+ url = "#{@base_url}#{endpoint}"
76
+ headers = { "xi-api-key" => @client.api_key }
77
+
78
+ WebSocket::Client::Simple.connect(url, headers: headers)
79
+ end
80
+
81
+ # Helper method to send initialization message for single stream
82
+ # @param ws [WebSocket::Client::Simple::Client] WebSocket client
83
+ # @param options [Hash] Initialization options
84
+ # @option options [String] :text Initial text (usually a space)
85
+ # @option options [Hash] :voice_settings Voice settings hash
86
+ # @option options [String] :xi_api_key API key (will use client's key if not provided)
87
+ def send_initialize_connection(ws, **options)
88
+ message = {
89
+ text: options[:text] || " ",
90
+ voice_settings: options[:voice_settings] || {},
91
+ xi_api_key: options[:xi_api_key] || @client.api_key
92
+ }
93
+
94
+ ws.send(message.to_json)
95
+ end
96
+
97
+ # Helper method to send text for single stream
98
+ # @param ws [WebSocket::Client::Simple::Client] WebSocket client
99
+ # @param text [String] Text to convert to speech
100
+ # @param options [Hash] Optional parameters
101
+ # @option options [Boolean] :try_trigger_generation Try to trigger generation
102
+ # @option options [Hash] :voice_settings Voice settings override
103
+ def send_text(ws, text, **options)
104
+ message = { text: text }
105
+ message[:try_trigger_generation] = options[:try_trigger_generation] unless options[:try_trigger_generation].nil?
106
+ message[:voice_settings] = options[:voice_settings] if options[:voice_settings]
107
+
108
+ ws.send(message.to_json)
109
+ end
110
+
111
+ # Helper method to close connection for single stream
112
+ # @param ws [WebSocket::Client::Simple::Client] WebSocket client
113
+ def send_close_connection(ws)
114
+ message = { text: "" }
115
+ ws.send(message.to_json)
116
+ end
117
+
118
+ # Helper method to send initialization message for multi-context stream
119
+ # @param ws [WebSocket::Client::Simple::Client] WebSocket client
120
+ # @param context_id [String] Context identifier
121
+ # @param options [Hash] Initialization options
122
+ def send_initialize_connection_multi(ws, context_id, **options)
123
+ message = {
124
+ text: options[:text] || " ",
125
+ voice_settings: options[:voice_settings] || {},
126
+ context_id: context_id
127
+ }
128
+
129
+ ws.send(message.to_json)
130
+ end
131
+
132
+ # Helper method to initialize a new context in multi-stream
133
+ # @param ws [WebSocket::Client::Simple::Client] WebSocket client
134
+ # @param context_id [String] Context identifier
135
+ # @param options [Hash] Context options
136
+ def send_initialize_context(ws, context_id, **options)
137
+ message = {
138
+ context_id: context_id,
139
+ voice_settings: options[:voice_settings] || {}
140
+ }
141
+ message[:model_id] = options[:model_id] if options[:model_id]
142
+ message[:language_code] = options[:language_code] if options[:language_code]
143
+
144
+ ws.send(message.to_json)
145
+ end
146
+
147
+ # Helper method to send text for multi-context stream
148
+ # @param ws [WebSocket::Client::Simple::Client] WebSocket client
149
+ # @param context_id [String] Context identifier
150
+ # @param text [String] Text to convert to speech
151
+ # @param options [Hash] Optional parameters
152
+ def send_text_multi(ws, context_id, text, **options)
153
+ message = {
154
+ text: text,
155
+ context_id: context_id
156
+ }
157
+ message[:flush] = options[:flush] unless options[:flush].nil?
158
+
159
+ ws.send(message.to_json)
160
+ end
161
+
162
+ # Helper method to flush a context
163
+ # @param ws [WebSocket::Client::Simple::Client] WebSocket client
164
+ # @param context_id [String] Context identifier
165
+ def send_flush_context(ws, context_id)
166
+ message = {
167
+ context_id: context_id,
168
+ flush: true
169
+ }
170
+
171
+ ws.send(message.to_json)
172
+ end
173
+
174
+ # Helper method to close a specific context
175
+ # @param ws [WebSocket::Client::Simple::Client] WebSocket client
176
+ # @param context_id [String] Context identifier
177
+ def send_close_context(ws, context_id)
178
+ message = {
179
+ context_id: context_id,
180
+ close_context: true
181
+ }
182
+
183
+ ws.send(message.to_json)
184
+ end
185
+
186
+ # Helper method to keep a context alive
187
+ # @param ws [WebSocket::Client::Simple::Client] WebSocket client
188
+ # @param context_id [String] Context identifier
189
+ def send_keep_context_alive(ws, context_id)
190
+ message = {
191
+ context_id: context_id,
192
+ keep_context_alive: true
193
+ }
194
+
195
+ ws.send(message.to_json)
196
+ end
197
+
198
+ # Helper method to close the entire socket
199
+ # @param ws [WebSocket::Client::Simple::Client] WebSocket client
200
+ def send_close_socket(ws)
201
+ message = { close_socket: true }
202
+ ws.send(message.to_json)
203
+ end
204
+
205
+ # Convenience method to create a complete streaming session
206
+ # @param voice_id [String] The unique identifier for the voice
207
+ # @param text_chunks [Array<String>] Array of text chunks to stream
208
+ # @param options [Hash] Connection and voice options
209
+ # @param block [Proc] Block to handle audio chunks
210
+ def stream_text_to_speech(voice_id, text_chunks, **options, &block)
211
+ ws = connect_stream_input(voice_id, **options)
212
+
213
+ ws.on :open do
214
+ # Initialize connection
215
+ send_initialize_connection(ws, **options)
216
+
217
+ # Send text chunks
218
+ text_chunks.each_with_index do |chunk, index|
219
+ send_text(ws, chunk, try_trigger_generation: (index == text_chunks.length - 1))
220
+ end
221
+
222
+ # Close connection
223
+ send_close_connection(ws)
224
+ end
225
+
226
+ ws.on :message do |msg|
227
+ data = JSON.parse(msg.data)
228
+ if data['audio'] && block_given?
229
+ # Decode base64 audio and yield to block
230
+ audio_data = Base64.decode64(data['audio'])
231
+ block.call(audio_data, data)
232
+ end
233
+ end
234
+
235
+ ws.on :error do |e|
236
+ raise APIError, "WebSocket error: #{e.message}"
237
+ end
238
+
239
+ ws
240
+ end
241
+
242
+ # Alias methods for convenience
243
+ alias_method :connect_single_stream, :connect_stream_input
244
+ alias_method :connect_multi_context, :connect_multi_stream_input
245
+
246
+ private
247
+
248
+ attr_reader :client
249
+ end
250
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ElevenlabsClient
4
- VERSION = "0.3.0"
4
+ VERSION = "0.5.0"
5
5
  end
@@ -6,17 +6,24 @@ require_relative "elevenlabs_client/settings"
6
6
  require_relative "elevenlabs_client/endpoints/dubs"
7
7
  require_relative "elevenlabs_client/endpoints/text_to_speech"
8
8
  require_relative "elevenlabs_client/endpoints/text_to_speech_stream"
9
+ require_relative "elevenlabs_client/endpoints/text_to_speech_with_timestamps"
10
+ require_relative "elevenlabs_client/endpoints/text_to_speech_stream_with_timestamps"
9
11
  require_relative "elevenlabs_client/endpoints/text_to_dialogue"
12
+ require_relative "elevenlabs_client/endpoints/text_to_dialogue_stream"
10
13
  require_relative "elevenlabs_client/endpoints/sound_generation"
11
14
  require_relative "elevenlabs_client/endpoints/text_to_voice"
12
15
  require_relative "elevenlabs_client/endpoints/models"
13
16
  require_relative "elevenlabs_client/endpoints/voices"
14
17
  require_relative "elevenlabs_client/endpoints/music"
18
+ require_relative "elevenlabs_client/endpoints/audio_isolation"
19
+ require_relative "elevenlabs_client/endpoints/audio_native"
20
+ require_relative "elevenlabs_client/endpoints/forced_alignment"
21
+ require_relative "elevenlabs_client/endpoints/speech_to_speech"
22
+ require_relative "elevenlabs_client/endpoints/speech_to_text"
23
+ require_relative "elevenlabs_client/endpoints/websocket_text_to_speech"
15
24
  require_relative "elevenlabs_client/client"
16
25
 
17
26
  module ElevenlabsClient
18
- class Error < StandardError; end
19
-
20
27
  # Convenience method to create a new client
21
28
  def self.new(**options)
22
29
  Client.new(**options)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elevenlabs_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vitor Oliveira
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-09-13 00:00:00.000000000 Z
11
+ date: 2025-09-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '1.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: websocket-client-simple
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.8'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.8'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: bundler
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -121,15 +135,24 @@ files:
121
135
  - README.md
122
136
  - lib/elevenlabs_client.rb
123
137
  - lib/elevenlabs_client/client.rb
138
+ - lib/elevenlabs_client/endpoints/audio_isolation.rb
139
+ - lib/elevenlabs_client/endpoints/audio_native.rb
124
140
  - lib/elevenlabs_client/endpoints/dubs.rb
141
+ - lib/elevenlabs_client/endpoints/forced_alignment.rb
125
142
  - lib/elevenlabs_client/endpoints/models.rb
126
143
  - lib/elevenlabs_client/endpoints/music.rb
127
144
  - lib/elevenlabs_client/endpoints/sound_generation.rb
145
+ - lib/elevenlabs_client/endpoints/speech_to_speech.rb
146
+ - lib/elevenlabs_client/endpoints/speech_to_text.rb
128
147
  - lib/elevenlabs_client/endpoints/text_to_dialogue.rb
148
+ - lib/elevenlabs_client/endpoints/text_to_dialogue_stream.rb
129
149
  - lib/elevenlabs_client/endpoints/text_to_speech.rb
130
150
  - lib/elevenlabs_client/endpoints/text_to_speech_stream.rb
151
+ - lib/elevenlabs_client/endpoints/text_to_speech_stream_with_timestamps.rb
152
+ - lib/elevenlabs_client/endpoints/text_to_speech_with_timestamps.rb
131
153
  - lib/elevenlabs_client/endpoints/text_to_voice.rb
132
154
  - lib/elevenlabs_client/endpoints/voices.rb
155
+ - lib/elevenlabs_client/endpoints/websocket_text_to_speech.rb
133
156
  - lib/elevenlabs_client/errors.rb
134
157
  - lib/elevenlabs_client/settings.rb
135
158
  - lib/elevenlabs_client/version.rb