elevenlabs_client 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +140 -0
- data/README.md +163 -5
- data/lib/elevenlabs_client/client.rb +80 -3
- data/lib/elevenlabs_client/endpoints/admin/history.rb +106 -0
- data/lib/elevenlabs_client/endpoints/admin/models.rb +27 -0
- data/lib/elevenlabs_client/endpoints/admin/usage.rb +46 -0
- data/lib/elevenlabs_client/endpoints/admin/user.rb +28 -0
- data/lib/elevenlabs_client/endpoints/admin/voice_library.rb +86 -0
- data/lib/elevenlabs_client/endpoints/audio_isolation.rb +71 -0
- data/lib/elevenlabs_client/endpoints/audio_native.rb +103 -0
- data/lib/elevenlabs_client/endpoints/dubs.rb +52 -2
- data/lib/elevenlabs_client/endpoints/forced_alignment.rb +41 -0
- data/lib/elevenlabs_client/endpoints/sound_generation.rb +0 -1
- data/lib/elevenlabs_client/endpoints/speech_to_speech.rb +125 -0
- data/lib/elevenlabs_client/endpoints/speech_to_text.rb +121 -0
- data/lib/elevenlabs_client/endpoints/text_to_dialogue.rb +34 -1
- data/lib/elevenlabs_client/endpoints/text_to_speech.rb +147 -1
- data/lib/elevenlabs_client/endpoints/text_to_voice.rb +13 -1
- data/lib/elevenlabs_client/endpoints/voices.rb +368 -7
- data/lib/elevenlabs_client/endpoints/websocket_text_to_speech.rb +250 -0
- data/lib/elevenlabs_client/version.rb +1 -1
- data/lib/elevenlabs_client.rb +11 -4
- metadata +41 -4
- data/lib/elevenlabs_client/endpoints/models.rb +0 -26
- data/lib/elevenlabs_client/endpoints/text_to_speech_stream.rb +0 -42
@@ -110,6 +110,374 @@ module ElevenlabsClient
|
|
110
110
|
@client.delete(endpoint)
|
111
111
|
end
|
112
112
|
|
113
|
+
# POST /v1/similar-voices
|
114
|
+
# Returns a list of shared voices similar to the provided audio sample
|
115
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/similar-voices
|
116
|
+
#
|
117
|
+
# @param audio_file [IO, File] Audio file to find similar voices for
|
118
|
+
# @param filename [String] Original filename for the audio file
|
119
|
+
# @param options [Hash] Optional parameters
|
120
|
+
# @option options [Float] :similarity_threshold Threshold for voice similarity (0-2)
|
121
|
+
# @option options [Integer] :top_k Number of most similar voices to return (1-100)
|
122
|
+
# @return [Hash] Response containing similar voices
|
123
|
+
def find_similar(audio_file, filename, **options)
|
124
|
+
endpoint = "/v1/similar-voices"
|
125
|
+
|
126
|
+
payload = {
|
127
|
+
audio_file: @client.file_part(audio_file, filename)
|
128
|
+
}
|
129
|
+
|
130
|
+
payload[:similarity_threshold] = options[:similarity_threshold] if options[:similarity_threshold]
|
131
|
+
payload[:top_k] = options[:top_k] if options[:top_k]
|
132
|
+
|
133
|
+
@client.post_multipart(endpoint, payload)
|
134
|
+
end
|
135
|
+
|
136
|
+
# POST /v1/voices/add
|
137
|
+
# Creates a new IVC (Instant Voice Cloning) voice
|
138
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/add-voice
|
139
|
+
#
|
140
|
+
# @param name [String] Name of the voice
|
141
|
+
# @param audio_files [Array<IO, File>] Array of audio files for voice cloning
|
142
|
+
# @param filenames [Array<String>] Array of original filenames
|
143
|
+
# @param options [Hash] Optional parameters
|
144
|
+
# @option options [Boolean] :remove_background_noise Remove background noise (default: false)
|
145
|
+
# @option options [String] :description Description of the voice
|
146
|
+
# @option options [String] :labels Serialized labels dictionary
|
147
|
+
# @return [Hash] Response containing voice_id and requires_verification status
|
148
|
+
def create_ivc(name, audio_files, filenames, **options)
|
149
|
+
endpoint = "/v1/voices/add"
|
150
|
+
|
151
|
+
payload = { name: name }
|
152
|
+
|
153
|
+
# Add optional parameters
|
154
|
+
payload[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
|
155
|
+
payload[:description] = options[:description] if options[:description]
|
156
|
+
payload[:labels] = options[:labels] if options[:labels]
|
157
|
+
|
158
|
+
# Add audio files
|
159
|
+
audio_files.each_with_index do |file, index|
|
160
|
+
filename = filenames[index] || "audio_#{index}.mp3"
|
161
|
+
payload["files[]"] = @client.file_part(file, filename)
|
162
|
+
end
|
163
|
+
|
164
|
+
@client.post_multipart(endpoint, payload)
|
165
|
+
end
|
166
|
+
|
167
|
+
# GET /v1/voices/settings/default
|
168
|
+
# Gets the default settings for voices
|
169
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/default-settings
|
170
|
+
#
|
171
|
+
# @return [Hash] Default voice settings
|
172
|
+
def get_default_settings
|
173
|
+
endpoint = "/v1/voices/settings/default"
|
174
|
+
@client.get(endpoint)
|
175
|
+
end
|
176
|
+
|
177
|
+
# GET /v1/voices/{voice_id}/settings
|
178
|
+
# Returns the settings for a specific voice
|
179
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/get-settings
|
180
|
+
#
|
181
|
+
# @param voice_id [String] Voice ID
|
182
|
+
# @return [Hash] Voice settings
|
183
|
+
def get_settings(voice_id)
|
184
|
+
endpoint = "/v1/voices/#{voice_id}/settings"
|
185
|
+
@client.get(endpoint)
|
186
|
+
end
|
187
|
+
|
188
|
+
# POST /v1/voices/{voice_id}/settings/edit
|
189
|
+
# Edit settings for a specific voice
|
190
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/edit-settings
|
191
|
+
#
|
192
|
+
# @param voice_id [String] Voice ID
|
193
|
+
# @param options [Hash] Voice settings to update
|
194
|
+
# @option options [Float] :stability Stability setting (0.0-1.0)
|
195
|
+
# @option options [Boolean] :use_speaker_boost Enable speaker boost
|
196
|
+
# @option options [Float] :similarity_boost Similarity boost setting (0.0-1.0)
|
197
|
+
# @option options [Float] :style Style exaggeration (0.0-1.0)
|
198
|
+
# @option options [Float] :speed Speed adjustment (0.25-4.0)
|
199
|
+
# @return [Hash] Response with status
|
200
|
+
def edit_settings(voice_id, **options)
|
201
|
+
endpoint = "/v1/voices/#{voice_id}/settings/edit"
|
202
|
+
|
203
|
+
payload = {}
|
204
|
+
payload[:stability] = options[:stability] if options[:stability]
|
205
|
+
payload[:use_speaker_boost] = options[:use_speaker_boost] unless options[:use_speaker_boost].nil?
|
206
|
+
payload[:similarity_boost] = options[:similarity_boost] if options[:similarity_boost]
|
207
|
+
payload[:style] = options[:style] if options[:style]
|
208
|
+
payload[:speed] = options[:speed] if options[:speed]
|
209
|
+
|
210
|
+
@client.post(endpoint, payload)
|
211
|
+
end
|
212
|
+
|
213
|
+
# GET /v1/voices/{voice_id}/samples/{sample_id}/audio
|
214
|
+
# Returns the audio corresponding to a sample attached to a voice
|
215
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/get-sample-audio
|
216
|
+
#
|
217
|
+
# @param voice_id [String] Voice ID
|
218
|
+
# @param sample_id [String] Sample ID
|
219
|
+
# @return [String] Binary audio data
|
220
|
+
def get_sample_audio(voice_id, sample_id)
|
221
|
+
endpoint = "/v1/voices/#{voice_id}/samples/#{sample_id}/audio"
|
222
|
+
@client.get(endpoint)
|
223
|
+
end
|
224
|
+
|
225
|
+
# POST /v1/voices/pvc
|
226
|
+
# Creates a new PVC (Professional Voice Cloning) voice with metadata but no samples
|
227
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/create-pvc
|
228
|
+
#
|
229
|
+
# @param name [String] Name of the voice (max 100 characters)
|
230
|
+
# @param language [String] Language used in the samples
|
231
|
+
# @param options [Hash] Optional parameters
|
232
|
+
# @option options [String] :description Description (max 500 characters)
|
233
|
+
# @option options [Hash] :labels Serialized labels dictionary
|
234
|
+
# @return [Hash] Response containing voice_id
|
235
|
+
def create_pvc(name, language, **options)
|
236
|
+
endpoint = "/v1/voices/pvc"
|
237
|
+
|
238
|
+
payload = {
|
239
|
+
name: name,
|
240
|
+
language: language
|
241
|
+
}
|
242
|
+
|
243
|
+
payload[:description] = options[:description] if options[:description]
|
244
|
+
payload[:labels] = options[:labels] if options[:labels]
|
245
|
+
|
246
|
+
@client.post(endpoint, payload)
|
247
|
+
end
|
248
|
+
|
249
|
+
# POST /v1/voices/pvc/{voice_id}
|
250
|
+
# Edit PVC voice metadata
|
251
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/update-pvc
|
252
|
+
#
|
253
|
+
# @param voice_id [String] Voice ID
|
254
|
+
# @param options [Hash] Parameters to update
|
255
|
+
# @option options [String] :name New name (max 100 characters)
|
256
|
+
# @option options [String] :language New language
|
257
|
+
# @option options [String] :description New description (max 500 characters)
|
258
|
+
# @option options [Hash] :labels New labels dictionary
|
259
|
+
# @return [Hash] Response containing voice_id
|
260
|
+
def update_pvc(voice_id, **options)
|
261
|
+
endpoint = "/v1/voices/pvc/#{voice_id}"
|
262
|
+
|
263
|
+
payload = {}
|
264
|
+
payload[:name] = options[:name] if options[:name]
|
265
|
+
payload[:language] = options[:language] if options[:language]
|
266
|
+
payload[:description] = options[:description] if options[:description]
|
267
|
+
payload[:labels] = options[:labels] if options[:labels]
|
268
|
+
|
269
|
+
@client.post(endpoint, payload)
|
270
|
+
end
|
271
|
+
|
272
|
+
# POST /v1/voices/pvc/{voice_id}/train
|
273
|
+
# Start PVC training process for a voice
|
274
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/train-pvc
|
275
|
+
#
|
276
|
+
# @param voice_id [String] Voice ID
|
277
|
+
# @param options [Hash] Optional parameters
|
278
|
+
# @option options [String] :model_id Model ID to use for conversion
|
279
|
+
# @return [Hash] Response with status
|
280
|
+
def train_pvc(voice_id, **options)
|
281
|
+
endpoint = "/v1/voices/pvc/#{voice_id}/train"
|
282
|
+
|
283
|
+
payload = {}
|
284
|
+
payload[:model_id] = options[:model_id] if options[:model_id]
|
285
|
+
|
286
|
+
@client.post(endpoint, payload)
|
287
|
+
end
|
288
|
+
|
289
|
+
# POST /v1/voices/pvc/{voice_id}/samples
|
290
|
+
# Add audio samples to a PVC voice
|
291
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/add-pvc-samples
|
292
|
+
#
|
293
|
+
# @param voice_id [String] Voice ID
|
294
|
+
# @param audio_files [Array<IO, File>] Audio files for the voice
|
295
|
+
# @param filenames [Array<String>] Original filenames
|
296
|
+
# @param options [Hash] Optional parameters
|
297
|
+
# @option options [Boolean] :remove_background_noise Remove background noise (default: false)
|
298
|
+
# @return [Array<Hash>] Array of sample information
|
299
|
+
def add_pvc_samples(voice_id, audio_files, filenames, **options)
|
300
|
+
endpoint = "/v1/voices/pvc/#{voice_id}/samples"
|
301
|
+
|
302
|
+
payload = {}
|
303
|
+
payload[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
|
304
|
+
|
305
|
+
# Add audio files
|
306
|
+
audio_files.each_with_index do |file, index|
|
307
|
+
filename = filenames[index] || "audio_#{index}.mp3"
|
308
|
+
payload["files[]"] = @client.file_part(file, filename)
|
309
|
+
end
|
310
|
+
|
311
|
+
@client.post_multipart(endpoint, payload)
|
312
|
+
end
|
313
|
+
|
314
|
+
# POST /v1/voices/pvc/{voice_id}/samples/{sample_id}
|
315
|
+
# Update a PVC voice sample - apply noise removal or select speaker
|
316
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/update-pvc-sample
|
317
|
+
#
|
318
|
+
# @param voice_id [String] Voice ID
|
319
|
+
# @param sample_id [String] Sample ID
|
320
|
+
# @param options [Hash] Update parameters
|
321
|
+
# @option options [Boolean] :remove_background_noise Remove background noise
|
322
|
+
# @option options [Array<String>] :selected_speaker_ids Speaker IDs for training
|
323
|
+
# @option options [Integer] :trim_start_time Start time in milliseconds
|
324
|
+
# @option options [Integer] :trim_end_time End time in milliseconds
|
325
|
+
# @return [Hash] Response containing voice_id
|
326
|
+
def update_pvc_sample(voice_id, sample_id, **options)
|
327
|
+
endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}"
|
328
|
+
|
329
|
+
payload = {}
|
330
|
+
payload[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
|
331
|
+
payload[:selected_speaker_ids] = options[:selected_speaker_ids] if options[:selected_speaker_ids]
|
332
|
+
payload[:trim_start_time] = options[:trim_start_time] if options[:trim_start_time]
|
333
|
+
payload[:trim_end_time] = options[:trim_end_time] if options[:trim_end_time]
|
334
|
+
|
335
|
+
@client.post(endpoint, payload)
|
336
|
+
end
|
337
|
+
|
338
|
+
# DELETE /v1/voices/pvc/{voice_id}/samples/{sample_id}
|
339
|
+
# Delete a sample from a PVC voice
|
340
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/delete-pvc-sample
|
341
|
+
#
|
342
|
+
# @param voice_id [String] Voice ID
|
343
|
+
# @param sample_id [String] Sample ID
|
344
|
+
# @return [Hash] Response with status
|
345
|
+
def delete_pvc_sample(voice_id, sample_id)
|
346
|
+
endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}"
|
347
|
+
@client.delete(endpoint)
|
348
|
+
end
|
349
|
+
|
350
|
+
# GET /v1/voices/pvc/{voice_id}/samples/{sample_id}/audio
|
351
|
+
# Retrieve voice sample audio with or without noise removal
|
352
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/get-pvc-sample-audio
|
353
|
+
#
|
354
|
+
# @param voice_id [String] Voice ID
|
355
|
+
# @param sample_id [String] Sample ID
|
356
|
+
# @param options [Hash] Optional parameters
|
357
|
+
# @option options [Boolean] :remove_background_noise Remove background noise (default: false)
|
358
|
+
# @return [Hash] Response with base64 audio data and metadata
|
359
|
+
def get_pvc_sample_audio(voice_id, sample_id, **options)
|
360
|
+
endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}/audio"
|
361
|
+
|
362
|
+
params = {}
|
363
|
+
params[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
|
364
|
+
|
365
|
+
@client.get(endpoint, params)
|
366
|
+
end
|
367
|
+
|
368
|
+
# GET /v1/voices/pvc/{voice_id}/samples/{sample_id}/waveform
|
369
|
+
# Retrieve the visual waveform of a voice sample
|
370
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/get-pvc-waveform
|
371
|
+
#
|
372
|
+
# @param voice_id [String] Voice ID
|
373
|
+
# @param sample_id [String] Sample ID
|
374
|
+
# @return [Hash] Response with sample_id and visual_waveform array
|
375
|
+
def get_pvc_sample_waveform(voice_id, sample_id)
|
376
|
+
endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}/waveform"
|
377
|
+
@client.get(endpoint)
|
378
|
+
end
|
379
|
+
|
380
|
+
# GET /v1/voices/pvc/{voice_id}/samples/{sample_id}/speakers
|
381
|
+
# Retrieve speaker separation status and detected speakers
|
382
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/get-pvc-speakers
|
383
|
+
#
|
384
|
+
# @param voice_id [String] Voice ID
|
385
|
+
# @param sample_id [String] Sample ID
|
386
|
+
# @return [Hash] Response with separation status and speakers
|
387
|
+
def get_pvc_speaker_separation_status(voice_id, sample_id)
|
388
|
+
endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}/speakers"
|
389
|
+
@client.get(endpoint)
|
390
|
+
end
|
391
|
+
|
392
|
+
# POST /v1/voices/pvc/{voice_id}/samples/{sample_id}/separate-speakers
|
393
|
+
# Start speaker separation process for a sample
|
394
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/start-speaker-separation
|
395
|
+
#
|
396
|
+
# @param voice_id [String] Voice ID
|
397
|
+
# @param sample_id [String] Sample ID
|
398
|
+
# @return [Hash] Response with status
|
399
|
+
def start_pvc_speaker_separation(voice_id, sample_id)
|
400
|
+
endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}/separate-speakers"
|
401
|
+
@client.post(endpoint)
|
402
|
+
end
|
403
|
+
|
404
|
+
# GET /v1/voices/pvc/{voice_id}/samples/{sample_id}/speakers/{speaker_id}/audio
|
405
|
+
# Retrieve separated audio for a specific speaker
|
406
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/get-separated-speaker-audio
|
407
|
+
#
|
408
|
+
# @param voice_id [String] Voice ID
|
409
|
+
# @param sample_id [String] Sample ID
|
410
|
+
# @param speaker_id [String] Speaker ID
|
411
|
+
# @return [Hash] Response with base64 audio data and metadata
|
412
|
+
def get_pvc_separated_speaker_audio(voice_id, sample_id, speaker_id)
|
413
|
+
endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}/speakers/#{speaker_id}/audio"
|
414
|
+
@client.get(endpoint)
|
415
|
+
end
|
416
|
+
|
417
|
+
# POST /v1/voices/pvc/{voice_id}/verification
|
418
|
+
# Request manual verification for a PVC voice
|
419
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/request-pvc-verification
|
420
|
+
#
|
421
|
+
# @param voice_id [String] Voice ID
|
422
|
+
# @param verification_files [Array<IO, File>] Verification documents
|
423
|
+
# @param filenames [Array<String>] Original filenames
|
424
|
+
# @param options [Hash] Optional parameters
|
425
|
+
# @option options [String] :extra_text Extra text for verification process
|
426
|
+
# @return [Hash] Response with status
|
427
|
+
def request_pvc_verification(voice_id, verification_files, filenames, **options)
|
428
|
+
endpoint = "/v1/voices/pvc/#{voice_id}/verification"
|
429
|
+
|
430
|
+
payload = {}
|
431
|
+
payload[:extra_text] = options[:extra_text] if options[:extra_text]
|
432
|
+
|
433
|
+
# Add verification files
|
434
|
+
verification_files.each_with_index do |file, index|
|
435
|
+
filename = filenames[index] || "verification_#{index}.pdf"
|
436
|
+
payload["files[]"] = @client.file_part(file, filename)
|
437
|
+
end
|
438
|
+
|
439
|
+
@client.post_multipart(endpoint, payload)
|
440
|
+
end
|
441
|
+
|
442
|
+
# GET /v1/voices/pvc/{voice_id}/captcha
|
443
|
+
# Get captcha for PVC voice verification
|
444
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/get-pvc-captcha
|
445
|
+
#
|
446
|
+
# @param voice_id [String] Voice ID
|
447
|
+
# @return [Hash] Captcha data
|
448
|
+
def get_pvc_captcha(voice_id)
|
449
|
+
endpoint = "/v1/voices/pvc/#{voice_id}/captcha"
|
450
|
+
@client.get(endpoint)
|
451
|
+
end
|
452
|
+
|
453
|
+
# POST /v1/voices/pvc/{voice_id}/captcha
|
454
|
+
# Submit captcha verification for PVC voice
|
455
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/voices/verify-pvc-captcha
|
456
|
+
#
|
457
|
+
# @param voice_id [String] Voice ID
|
458
|
+
# @param recording_file [IO, File] Audio recording of the user
|
459
|
+
# @param filename [String] Original filename for the recording
|
460
|
+
# @return [Hash] Response with status
|
461
|
+
def verify_pvc_captcha(voice_id, recording_file, filename)
|
462
|
+
endpoint = "/v1/voices/pvc/#{voice_id}/captcha"
|
463
|
+
|
464
|
+
payload = {
|
465
|
+
recording: @client.file_part(recording_file, filename)
|
466
|
+
}
|
467
|
+
|
468
|
+
@client.post_multipart(endpoint, payload)
|
469
|
+
end
|
470
|
+
|
471
|
+
alias_method :get_voice, :get
|
472
|
+
alias_method :list_voices, :list
|
473
|
+
alias_method :create_voice, :create
|
474
|
+
alias_method :edit_voice, :edit
|
475
|
+
alias_method :delete_voice, :delete
|
476
|
+
alias_method :similar_voices, :find_similar
|
477
|
+
alias_method :default_settings, :get_default_settings
|
478
|
+
alias_method :voice_settings, :get_settings
|
479
|
+
alias_method :update_settings, :edit_settings
|
480
|
+
|
113
481
|
# Check if a voice is banned (safety control)
|
114
482
|
# @param voice_id [String] The ID of the voice to check
|
115
483
|
# @return [Boolean] True if the voice is banned
|
@@ -133,13 +501,6 @@ module ElevenlabsClient
|
|
133
501
|
false
|
134
502
|
end
|
135
503
|
|
136
|
-
# Alias methods for backward compatibility and convenience
|
137
|
-
alias_method :get_voice, :get
|
138
|
-
alias_method :list_voices, :list
|
139
|
-
alias_method :create_voice, :create
|
140
|
-
alias_method :edit_voice, :edit
|
141
|
-
alias_method :delete_voice, :delete
|
142
|
-
|
143
504
|
private
|
144
505
|
|
145
506
|
attr_reader :client
|
@@ -0,0 +1,250 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'websocket-client-simple'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
module ElevenlabsClient
|
7
|
+
class WebSocketTextToSpeech
|
8
|
+
def initialize(client)
|
9
|
+
@client = client
|
10
|
+
@base_url = client.base_url.gsub('https://', 'wss://').gsub('http://', 'ws://')
|
11
|
+
end
|
12
|
+
|
13
|
+
# Creates a WebSocket connection for real-time text-to-speech streaming
|
14
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/websockets/text-to-speech
|
15
|
+
#
|
16
|
+
# @param voice_id [String] The unique identifier for the voice
|
17
|
+
# @param options [Hash] Optional parameters
|
18
|
+
# @option options [String] :model_id The model ID to use
|
19
|
+
# @option options [String] :language_code ISO 639-1 language code
|
20
|
+
# @option options [Boolean] :enable_logging Enable logging (default: true)
|
21
|
+
# @option options [Boolean] :enable_ssml_parsing Enable SSML parsing (default: false)
|
22
|
+
# @option options [String] :output_format Output audio format
|
23
|
+
# @option options [Integer] :inactivity_timeout Timeout in seconds (default: 20, max: 180)
|
24
|
+
# @option options [Boolean] :sync_alignment Include timing data (default: false)
|
25
|
+
# @option options [Boolean] :auto_mode Reduce latency mode (default: false)
|
26
|
+
# @option options [String] :apply_text_normalization Text normalization ("auto", "on", "off")
|
27
|
+
# @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
|
28
|
+
# @return [WebSocket::Client::Simple::Client] WebSocket client instance
|
29
|
+
def connect_stream_input(voice_id, **options)
|
30
|
+
endpoint = "/v1/text-to-speech/#{voice_id}/stream-input"
|
31
|
+
|
32
|
+
# Build query parameters in the same order as provided in options
|
33
|
+
allowed_keys = [:model_id, :language_code, :enable_logging, :enable_ssml_parsing, :output_format, :inactivity_timeout, :sync_alignment, :auto_mode, :apply_text_normalization, :seed]
|
34
|
+
pairs = []
|
35
|
+
options.each do |k, v|
|
36
|
+
next unless allowed_keys.include?(k)
|
37
|
+
next if v.nil?
|
38
|
+
next if (k == :language_code || k == :apply_text_normalization) && v.to_s.empty?
|
39
|
+
pairs << [k, v]
|
40
|
+
end
|
41
|
+
if pairs.any?
|
42
|
+
query_string = pairs.map { |k, v| "#{k}=#{v}" }.join("&")
|
43
|
+
endpoint += "?#{query_string}"
|
44
|
+
end
|
45
|
+
|
46
|
+
url = "#{@base_url}#{endpoint}"
|
47
|
+
headers = { "xi-api-key" => @client.api_key }
|
48
|
+
|
49
|
+
WebSocket::Client::Simple.connect(url, headers: headers)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Creates a WebSocket connection for multi-context text-to-speech streaming
|
53
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/websockets/multi-context
|
54
|
+
#
|
55
|
+
# @param voice_id [String] The unique identifier for the voice
|
56
|
+
# @param options [Hash] Optional parameters (same as connect_stream_input)
|
57
|
+
# @return [WebSocket::Client::Simple::Client] WebSocket client instance
|
58
|
+
def connect_multi_stream_input(voice_id, **options)
|
59
|
+
endpoint = "/v1/text-to-speech/#{voice_id}/multi-stream-input"
|
60
|
+
|
61
|
+
# Build query parameters in the same order as provided in options
|
62
|
+
allowed_keys = [:model_id, :language_code, :enable_logging, :enable_ssml_parsing, :output_format, :inactivity_timeout, :sync_alignment, :auto_mode, :apply_text_normalization, :seed]
|
63
|
+
pairs = []
|
64
|
+
options.each do |k, v|
|
65
|
+
next unless allowed_keys.include?(k)
|
66
|
+
next if v.nil?
|
67
|
+
next if (k == :language_code || k == :apply_text_normalization) && v.to_s.empty?
|
68
|
+
pairs << [k, v]
|
69
|
+
end
|
70
|
+
if pairs.any?
|
71
|
+
query_string = pairs.map { |k, v| "#{k}=#{v}" }.join("&")
|
72
|
+
endpoint += "?#{query_string}"
|
73
|
+
end
|
74
|
+
|
75
|
+
url = "#{@base_url}#{endpoint}"
|
76
|
+
headers = { "xi-api-key" => @client.api_key }
|
77
|
+
|
78
|
+
WebSocket::Client::Simple.connect(url, headers: headers)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Helper method to send initialization message for single stream
|
82
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
83
|
+
# @param options [Hash] Initialization options
|
84
|
+
# @option options [String] :text Initial text (usually a space)
|
85
|
+
# @option options [Hash] :voice_settings Voice settings hash
|
86
|
+
# @option options [String] :xi_api_key API key (will use client's key if not provided)
|
87
|
+
def send_initialize_connection(ws, **options)
|
88
|
+
message = {
|
89
|
+
text: options[:text] || " ",
|
90
|
+
voice_settings: options[:voice_settings] || {},
|
91
|
+
xi_api_key: options[:xi_api_key] || @client.api_key
|
92
|
+
}
|
93
|
+
|
94
|
+
ws.send(message.to_json)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Helper method to send text for single stream
|
98
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
99
|
+
# @param text [String] Text to convert to speech
|
100
|
+
# @param options [Hash] Optional parameters
|
101
|
+
# @option options [Boolean] :try_trigger_generation Try to trigger generation
|
102
|
+
# @option options [Hash] :voice_settings Voice settings override
|
103
|
+
def send_text(ws, text, **options)
|
104
|
+
message = { text: text }
|
105
|
+
message[:try_trigger_generation] = options[:try_trigger_generation] unless options[:try_trigger_generation].nil?
|
106
|
+
message[:voice_settings] = options[:voice_settings] if options[:voice_settings]
|
107
|
+
|
108
|
+
ws.send(message.to_json)
|
109
|
+
end
|
110
|
+
|
111
|
+
# Helper method to close connection for single stream
|
112
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
113
|
+
def send_close_connection(ws)
|
114
|
+
message = { text: "" }
|
115
|
+
ws.send(message.to_json)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Helper method to send initialization message for multi-context stream
|
119
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
120
|
+
# @param context_id [String] Context identifier
|
121
|
+
# @param options [Hash] Initialization options
|
122
|
+
def send_initialize_connection_multi(ws, context_id, **options)
|
123
|
+
message = {
|
124
|
+
text: options[:text] || " ",
|
125
|
+
voice_settings: options[:voice_settings] || {},
|
126
|
+
context_id: context_id
|
127
|
+
}
|
128
|
+
|
129
|
+
ws.send(message.to_json)
|
130
|
+
end
|
131
|
+
|
132
|
+
# Helper method to initialize a new context in multi-stream
|
133
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
134
|
+
# @param context_id [String] Context identifier
|
135
|
+
# @param options [Hash] Context options
|
136
|
+
def send_initialize_context(ws, context_id, **options)
|
137
|
+
message = {
|
138
|
+
context_id: context_id,
|
139
|
+
voice_settings: options[:voice_settings] || {}
|
140
|
+
}
|
141
|
+
message[:model_id] = options[:model_id] if options[:model_id]
|
142
|
+
message[:language_code] = options[:language_code] if options[:language_code]
|
143
|
+
|
144
|
+
ws.send(message.to_json)
|
145
|
+
end
|
146
|
+
|
147
|
+
# Helper method to send text for multi-context stream
|
148
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
149
|
+
# @param context_id [String] Context identifier
|
150
|
+
# @param text [String] Text to convert to speech
|
151
|
+
# @param options [Hash] Optional parameters
|
152
|
+
def send_text_multi(ws, context_id, text, **options)
|
153
|
+
message = {
|
154
|
+
text: text,
|
155
|
+
context_id: context_id
|
156
|
+
}
|
157
|
+
message[:flush] = options[:flush] unless options[:flush].nil?
|
158
|
+
|
159
|
+
ws.send(message.to_json)
|
160
|
+
end
|
161
|
+
|
162
|
+
# Helper method to flush a context
|
163
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
164
|
+
# @param context_id [String] Context identifier
|
165
|
+
def send_flush_context(ws, context_id)
|
166
|
+
message = {
|
167
|
+
context_id: context_id,
|
168
|
+
flush: true
|
169
|
+
}
|
170
|
+
|
171
|
+
ws.send(message.to_json)
|
172
|
+
end
|
173
|
+
|
174
|
+
# Helper method to close a specific context
|
175
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
176
|
+
# @param context_id [String] Context identifier
|
177
|
+
def send_close_context(ws, context_id)
|
178
|
+
message = {
|
179
|
+
context_id: context_id,
|
180
|
+
close_context: true
|
181
|
+
}
|
182
|
+
|
183
|
+
ws.send(message.to_json)
|
184
|
+
end
|
185
|
+
|
186
|
+
# Helper method to keep a context alive
|
187
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
188
|
+
# @param context_id [String] Context identifier
|
189
|
+
def send_keep_context_alive(ws, context_id)
|
190
|
+
message = {
|
191
|
+
context_id: context_id,
|
192
|
+
keep_context_alive: true
|
193
|
+
}
|
194
|
+
|
195
|
+
ws.send(message.to_json)
|
196
|
+
end
|
197
|
+
|
198
|
+
# Helper method to close the entire socket
|
199
|
+
# @param ws [WebSocket::Client::Simple::Client] WebSocket client
|
200
|
+
def send_close_socket(ws)
|
201
|
+
message = { close_socket: true }
|
202
|
+
ws.send(message.to_json)
|
203
|
+
end
|
204
|
+
|
205
|
+
# Convenience method to create a complete streaming session
|
206
|
+
# @param voice_id [String] The unique identifier for the voice
|
207
|
+
# @param text_chunks [Array<String>] Array of text chunks to stream
|
208
|
+
# @param options [Hash] Connection and voice options
|
209
|
+
# @param block [Proc] Block to handle audio chunks
|
210
|
+
def stream_text_to_speech(voice_id, text_chunks, **options, &block)
|
211
|
+
ws = connect_stream_input(voice_id, **options)
|
212
|
+
|
213
|
+
ws.on :open do
|
214
|
+
# Initialize connection
|
215
|
+
send_initialize_connection(ws, **options)
|
216
|
+
|
217
|
+
# Send text chunks
|
218
|
+
text_chunks.each_with_index do |chunk, index|
|
219
|
+
send_text(ws, chunk, try_trigger_generation: (index == text_chunks.length - 1))
|
220
|
+
end
|
221
|
+
|
222
|
+
# Close connection
|
223
|
+
send_close_connection(ws)
|
224
|
+
end
|
225
|
+
|
226
|
+
ws.on :message do |msg|
|
227
|
+
data = JSON.parse(msg.data)
|
228
|
+
if data['audio'] && block_given?
|
229
|
+
# Decode base64 audio and yield to block
|
230
|
+
audio_data = Base64.decode64(data['audio'])
|
231
|
+
block.call(audio_data, data)
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
ws.on :error do |e|
|
236
|
+
raise APIError, "WebSocket error: #{e.message}"
|
237
|
+
end
|
238
|
+
|
239
|
+
ws
|
240
|
+
end
|
241
|
+
|
242
|
+
# Alias methods for convenience
|
243
|
+
alias_method :connect_single_stream, :connect_stream_input
|
244
|
+
alias_method :connect_multi_context, :connect_multi_stream_input
|
245
|
+
|
246
|
+
private
|
247
|
+
|
248
|
+
attr_reader :client
|
249
|
+
end
|
250
|
+
end
|
data/lib/elevenlabs_client.rb
CHANGED
@@ -5,18 +5,25 @@ require_relative "elevenlabs_client/errors"
|
|
5
5
|
require_relative "elevenlabs_client/settings"
|
6
6
|
require_relative "elevenlabs_client/endpoints/dubs"
|
7
7
|
require_relative "elevenlabs_client/endpoints/text_to_speech"
|
8
|
-
require_relative "elevenlabs_client/endpoints/text_to_speech_stream"
|
9
8
|
require_relative "elevenlabs_client/endpoints/text_to_dialogue"
|
10
9
|
require_relative "elevenlabs_client/endpoints/sound_generation"
|
11
10
|
require_relative "elevenlabs_client/endpoints/text_to_voice"
|
12
|
-
require_relative "elevenlabs_client/endpoints/models"
|
11
|
+
require_relative "elevenlabs_client/endpoints/admin/models"
|
12
|
+
require_relative "elevenlabs_client/endpoints/admin/history"
|
13
|
+
require_relative "elevenlabs_client/endpoints/admin/usage"
|
14
|
+
require_relative "elevenlabs_client/endpoints/admin/user"
|
15
|
+
require_relative "elevenlabs_client/endpoints/admin/voice_library"
|
13
16
|
require_relative "elevenlabs_client/endpoints/voices"
|
14
17
|
require_relative "elevenlabs_client/endpoints/music"
|
18
|
+
require_relative "elevenlabs_client/endpoints/audio_isolation"
|
19
|
+
require_relative "elevenlabs_client/endpoints/audio_native"
|
20
|
+
require_relative "elevenlabs_client/endpoints/forced_alignment"
|
21
|
+
require_relative "elevenlabs_client/endpoints/speech_to_speech"
|
22
|
+
require_relative "elevenlabs_client/endpoints/speech_to_text"
|
23
|
+
require_relative "elevenlabs_client/endpoints/websocket_text_to_speech"
|
15
24
|
require_relative "elevenlabs_client/client"
|
16
25
|
|
17
26
|
module ElevenlabsClient
|
18
|
-
class Error < StandardError; end
|
19
|
-
|
20
27
|
# Convenience method to create a new client
|
21
28
|
def self.new(**options)
|
22
29
|
Client.new(**options)
|