elevenlabs_client 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ElevenlabsClient
4
+ class TextToSpeechWithTimestamps
5
+ def initialize(client)
6
+ @client = client
7
+ end
8
+
9
+ # POST /v1/text-to-speech/{voice_id}/with-timestamps
10
+ # Generate speech from text with precise character-level timing information
11
+ # Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/with-timestamps
12
+ #
13
+ # @param voice_id [String] Voice ID to be used
14
+ # @param text [String] The text that will get converted into speech
15
+ # @param options [Hash] Optional TTS parameters
16
+ # @option options [String] :model_id Model identifier (defaults to "eleven_multilingual_v2")
17
+ # @option options [String] :language_code ISO 639-1 language code for text normalization
18
+ # @option options [Hash] :voice_settings Voice settings overriding stored settings
19
+ # @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
20
+ # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
21
+ # @option options [String] :previous_text Text that came before current request
22
+ # @option options [String] :next_text Text that comes after current request
23
+ # @option options [Array<String>] :previous_request_ids Request IDs of previous samples (max 3)
24
+ # @option options [Array<String>] :next_request_ids Request IDs of next samples (max 3)
25
+ # @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
26
+ # @option options [Boolean] :apply_language_text_normalization Language text normalization
27
+ # @option options [Boolean] :use_pvc_as_ivc Use IVC version instead of PVC (deprecated)
28
+ # @option options [Boolean] :enable_logging Enable logging (defaults to true)
29
+ # @option options [Integer] :optimize_streaming_latency Latency optimizations (0-4, deprecated)
30
+ # @option options [String] :output_format Output format (defaults to "mp3_44100_128")
31
+ # @return [Hash] Response containing audio_base64, alignment, and normalized_alignment
32
+ def generate(voice_id, text, **options)
33
+ # Build query parameters
34
+ query_params = {}
35
+ query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
36
+ query_params[:optimize_streaming_latency] = options[:optimize_streaming_latency] if options[:optimize_streaming_latency]
37
+ query_params[:output_format] = options[:output_format] if options[:output_format]
38
+
39
+ # Build endpoint with query parameters
40
+ endpoint = "/v1/text-to-speech/#{voice_id}/with-timestamps"
41
+ if query_params.any?
42
+ query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
43
+ endpoint += "?#{query_string}"
44
+ end
45
+
46
+ # Build request body
47
+ request_body = { text: text }
48
+
49
+ # Add optional body parameters
50
+ request_body[:model_id] = options[:model_id] if options[:model_id]
51
+ request_body[:language_code] = options[:language_code] if options[:language_code]
52
+ request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
53
+ request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
54
+ request_body[:seed] = options[:seed] if options[:seed]
55
+ request_body[:previous_text] = options[:previous_text] if options[:previous_text]
56
+ request_body[:next_text] = options[:next_text] if options[:next_text]
57
+ request_body[:previous_request_ids] = options[:previous_request_ids] if options[:previous_request_ids]
58
+ request_body[:next_request_ids] = options[:next_request_ids] if options[:next_request_ids]
59
+ request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
60
+ request_body[:apply_language_text_normalization] = options[:apply_language_text_normalization] unless options[:apply_language_text_normalization].nil?
61
+ request_body[:use_pvc_as_ivc] = options[:use_pvc_as_ivc] unless options[:use_pvc_as_ivc].nil?
62
+
63
+ @client.post(endpoint, request_body)
64
+ end
65
+
66
+ # Alias for backward compatibility
67
+ alias_method :text_to_speech_with_timestamps, :generate
68
+
69
+ private
70
+
71
+ attr_reader :client
72
+ end
73
+ end
@@ -133,12 +133,374 @@ module ElevenlabsClient
133
133
  false
134
134
  end
135
135
 
136
+ # POST /v1/similar-voices
137
+ # Returns a list of shared voices similar to the provided audio sample
138
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/similar-voices
139
+ #
140
+ # @param audio_file [IO, File] Audio file to find similar voices for
141
+ # @param filename [String] Original filename for the audio file
142
+ # @param options [Hash] Optional parameters
143
+ # @option options [Float] :similarity_threshold Threshold for voice similarity (0-2)
144
+ # @option options [Integer] :top_k Number of most similar voices to return (1-100)
145
+ # @return [Hash] Response containing similar voices
146
+ def find_similar(audio_file, filename, **options)
147
+ endpoint = "/v1/similar-voices"
148
+
149
+ payload = {
150
+ audio_file: @client.file_part(audio_file, filename)
151
+ }
152
+
153
+ payload[:similarity_threshold] = options[:similarity_threshold] if options[:similarity_threshold]
154
+ payload[:top_k] = options[:top_k] if options[:top_k]
155
+
156
+ @client.post_multipart(endpoint, payload)
157
+ end
158
+
159
+ # POST /v1/voices/add
160
+ # Creates a new IVC (Instant Voice Cloning) voice
161
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/add-voice
162
+ #
163
+ # @param name [String] Name of the voice
164
+ # @param audio_files [Array<IO, File>] Array of audio files for voice cloning
165
+ # @param filenames [Array<String>] Array of original filenames
166
+ # @param options [Hash] Optional parameters
167
+ # @option options [Boolean] :remove_background_noise Remove background noise (default: false)
168
+ # @option options [String] :description Description of the voice
169
+ # @option options [String] :labels Serialized labels dictionary
170
+ # @return [Hash] Response containing voice_id and requires_verification status
171
+ def create_ivc(name, audio_files, filenames, **options)
172
+ endpoint = "/v1/voices/add"
173
+
174
+ payload = { name: name }
175
+
176
+ # Add optional parameters
177
+ payload[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
178
+ payload[:description] = options[:description] if options[:description]
179
+ payload[:labels] = options[:labels] if options[:labels]
180
+
181
+ # Add audio files
182
+ audio_files.each_with_index do |file, index|
183
+ filename = filenames[index] || "audio_#{index}.mp3"
184
+ payload["files[]"] = @client.file_part(file, filename)
185
+ end
186
+
187
+ @client.post_multipart(endpoint, payload)
188
+ end
189
+
190
+ # GET /v1/voices/settings/default
191
+ # Gets the default settings for voices
192
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/default-settings
193
+ #
194
+ # @return [Hash] Default voice settings
195
+ def get_default_settings
196
+ endpoint = "/v1/voices/settings/default"
197
+ @client.get(endpoint)
198
+ end
199
+
200
+ # GET /v1/voices/{voice_id}/settings
201
+ # Returns the settings for a specific voice
202
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/get-settings
203
+ #
204
+ # @param voice_id [String] Voice ID
205
+ # @return [Hash] Voice settings
206
+ def get_settings(voice_id)
207
+ endpoint = "/v1/voices/#{voice_id}/settings"
208
+ @client.get(endpoint)
209
+ end
210
+
211
+ # POST /v1/voices/{voice_id}/settings/edit
212
+ # Edit settings for a specific voice
213
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/edit-settings
214
+ #
215
+ # @param voice_id [String] Voice ID
216
+ # @param options [Hash] Voice settings to update
217
+ # @option options [Float] :stability Stability setting (0.0-1.0)
218
+ # @option options [Boolean] :use_speaker_boost Enable speaker boost
219
+ # @option options [Float] :similarity_boost Similarity boost setting (0.0-1.0)
220
+ # @option options [Float] :style Style exaggeration (0.0-1.0)
221
+ # @option options [Float] :speed Speed adjustment (0.25-4.0)
222
+ # @return [Hash] Response with status
223
+ def edit_settings(voice_id, **options)
224
+ endpoint = "/v1/voices/#{voice_id}/settings/edit"
225
+
226
+ payload = {}
227
+ payload[:stability] = options[:stability] if options[:stability]
228
+ payload[:use_speaker_boost] = options[:use_speaker_boost] unless options[:use_speaker_boost].nil?
229
+ payload[:similarity_boost] = options[:similarity_boost] if options[:similarity_boost]
230
+ payload[:style] = options[:style] if options[:style]
231
+ payload[:speed] = options[:speed] if options[:speed]
232
+
233
+ @client.post(endpoint, payload)
234
+ end
235
+
236
+ # GET /v1/voices/{voice_id}/samples/{sample_id}/audio
237
+ # Returns the audio corresponding to a sample attached to a voice
238
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/get-sample-audio
239
+ #
240
+ # @param voice_id [String] Voice ID
241
+ # @param sample_id [String] Sample ID
242
+ # @return [String] Binary audio data
243
+ def get_sample_audio(voice_id, sample_id)
244
+ endpoint = "/v1/voices/#{voice_id}/samples/#{sample_id}/audio"
245
+ @client.get(endpoint)
246
+ end
247
+
248
+ # POST /v1/voices/pvc
249
+ # Creates a new PVC (Professional Voice Cloning) voice with metadata but no samples
250
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/create-pvc
251
+ #
252
+ # @param name [String] Name of the voice (max 100 characters)
253
+ # @param language [String] Language used in the samples
254
+ # @param options [Hash] Optional parameters
255
+ # @option options [String] :description Description (max 500 characters)
256
+ # @option options [Hash] :labels Serialized labels dictionary
257
+ # @return [Hash] Response containing voice_id
258
+ def create_pvc(name, language, **options)
259
+ endpoint = "/v1/voices/pvc"
260
+
261
+ payload = {
262
+ name: name,
263
+ language: language
264
+ }
265
+
266
+ payload[:description] = options[:description] if options[:description]
267
+ payload[:labels] = options[:labels] if options[:labels]
268
+
269
+ @client.post(endpoint, payload)
270
+ end
271
+
272
+ # POST /v1/voices/pvc/{voice_id}
273
+ # Edit PVC voice metadata
274
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/update-pvc
275
+ #
276
+ # @param voice_id [String] Voice ID
277
+ # @param options [Hash] Parameters to update
278
+ # @option options [String] :name New name (max 100 characters)
279
+ # @option options [String] :language New language
280
+ # @option options [String] :description New description (max 500 characters)
281
+ # @option options [Hash] :labels New labels dictionary
282
+ # @return [Hash] Response containing voice_id
283
+ def update_pvc(voice_id, **options)
284
+ endpoint = "/v1/voices/pvc/#{voice_id}"
285
+
286
+ payload = {}
287
+ payload[:name] = options[:name] if options[:name]
288
+ payload[:language] = options[:language] if options[:language]
289
+ payload[:description] = options[:description] if options[:description]
290
+ payload[:labels] = options[:labels] if options[:labels]
291
+
292
+ @client.post(endpoint, payload)
293
+ end
294
+
295
+ # POST /v1/voices/pvc/{voice_id}/train
296
+ # Start PVC training process for a voice
297
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/train-pvc
298
+ #
299
+ # @param voice_id [String] Voice ID
300
+ # @param options [Hash] Optional parameters
301
+ # @option options [String] :model_id Model ID to use for conversion
302
+ # @return [Hash] Response with status
303
+ def train_pvc(voice_id, **options)
304
+ endpoint = "/v1/voices/pvc/#{voice_id}/train"
305
+
306
+ payload = {}
307
+ payload[:model_id] = options[:model_id] if options[:model_id]
308
+
309
+ @client.post(endpoint, payload)
310
+ end
311
+
312
+ # POST /v1/voices/pvc/{voice_id}/samples
313
+ # Add audio samples to a PVC voice
314
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/add-pvc-samples
315
+ #
316
+ # @param voice_id [String] Voice ID
317
+ # @param audio_files [Array<IO, File>] Audio files for the voice
318
+ # @param filenames [Array<String>] Original filenames
319
+ # @param options [Hash] Optional parameters
320
+ # @option options [Boolean] :remove_background_noise Remove background noise (default: false)
321
+ # @return [Array<Hash>] Array of sample information
322
+ def add_pvc_samples(voice_id, audio_files, filenames, **options)
323
+ endpoint = "/v1/voices/pvc/#{voice_id}/samples"
324
+
325
+ payload = {}
326
+ payload[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
327
+
328
+ # Add audio files
329
+ audio_files.each_with_index do |file, index|
330
+ filename = filenames[index] || "audio_#{index}.mp3"
331
+ payload["files[]"] = @client.file_part(file, filename)
332
+ end
333
+
334
+ @client.post_multipart(endpoint, payload)
335
+ end
336
+
337
+ # POST /v1/voices/pvc/{voice_id}/samples/{sample_id}
338
+ # Update a PVC voice sample - apply noise removal or select speaker
339
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/update-pvc-sample
340
+ #
341
+ # @param voice_id [String] Voice ID
342
+ # @param sample_id [String] Sample ID
343
+ # @param options [Hash] Update parameters
344
+ # @option options [Boolean] :remove_background_noise Remove background noise
345
+ # @option options [Array<String>] :selected_speaker_ids Speaker IDs for training
346
+ # @option options [Integer] :trim_start_time Start time in milliseconds
347
+ # @option options [Integer] :trim_end_time End time in milliseconds
348
+ # @return [Hash] Response containing voice_id
349
+ def update_pvc_sample(voice_id, sample_id, **options)
350
+ endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}"
351
+
352
+ payload = {}
353
+ payload[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
354
+ payload[:selected_speaker_ids] = options[:selected_speaker_ids] if options[:selected_speaker_ids]
355
+ payload[:trim_start_time] = options[:trim_start_time] if options[:trim_start_time]
356
+ payload[:trim_end_time] = options[:trim_end_time] if options[:trim_end_time]
357
+
358
+ @client.post(endpoint, payload)
359
+ end
360
+
361
+ # DELETE /v1/voices/pvc/{voice_id}/samples/{sample_id}
362
+ # Delete a sample from a PVC voice
363
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/delete-pvc-sample
364
+ #
365
+ # @param voice_id [String] Voice ID
366
+ # @param sample_id [String] Sample ID
367
+ # @return [Hash] Response with status
368
+ def delete_pvc_sample(voice_id, sample_id)
369
+ endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}"
370
+ @client.delete(endpoint)
371
+ end
372
+
373
+ # GET /v1/voices/pvc/{voice_id}/samples/{sample_id}/audio
374
+ # Retrieve voice sample audio with or without noise removal
375
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/get-pvc-sample-audio
376
+ #
377
+ # @param voice_id [String] Voice ID
378
+ # @param sample_id [String] Sample ID
379
+ # @param options [Hash] Optional parameters
380
+ # @option options [Boolean] :remove_background_noise Remove background noise (default: false)
381
+ # @return [Hash] Response with base64 audio data and metadata
382
+ def get_pvc_sample_audio(voice_id, sample_id, **options)
383
+ endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}/audio"
384
+
385
+ params = {}
386
+ params[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
387
+
388
+ @client.get(endpoint, params)
389
+ end
390
+
391
+ # GET /v1/voices/pvc/{voice_id}/samples/{sample_id}/waveform
392
+ # Retrieve the visual waveform of a voice sample
393
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/get-pvc-waveform
394
+ #
395
+ # @param voice_id [String] Voice ID
396
+ # @param sample_id [String] Sample ID
397
+ # @return [Hash] Response with sample_id and visual_waveform array
398
+ def get_pvc_sample_waveform(voice_id, sample_id)
399
+ endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}/waveform"
400
+ @client.get(endpoint)
401
+ end
402
+
403
+ # GET /v1/voices/pvc/{voice_id}/samples/{sample_id}/speakers
404
+ # Retrieve speaker separation status and detected speakers
405
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/get-pvc-speakers
406
+ #
407
+ # @param voice_id [String] Voice ID
408
+ # @param sample_id [String] Sample ID
409
+ # @return [Hash] Response with separation status and speakers
410
+ def get_pvc_speaker_separation_status(voice_id, sample_id)
411
+ endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}/speakers"
412
+ @client.get(endpoint)
413
+ end
414
+
415
+ # POST /v1/voices/pvc/{voice_id}/samples/{sample_id}/separate-speakers
416
+ # Start speaker separation process for a sample
417
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/start-speaker-separation
418
+ #
419
+ # @param voice_id [String] Voice ID
420
+ # @param sample_id [String] Sample ID
421
+ # @return [Hash] Response with status
422
+ def start_pvc_speaker_separation(voice_id, sample_id)
423
+ endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}/separate-speakers"
424
+ @client.post(endpoint)
425
+ end
426
+
427
+ # GET /v1/voices/pvc/{voice_id}/samples/{sample_id}/speakers/{speaker_id}/audio
428
+ # Retrieve separated audio for a specific speaker
429
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/get-separated-speaker-audio
430
+ #
431
+ # @param voice_id [String] Voice ID
432
+ # @param sample_id [String] Sample ID
433
+ # @param speaker_id [String] Speaker ID
434
+ # @return [Hash] Response with base64 audio data and metadata
435
+ def get_pvc_separated_speaker_audio(voice_id, sample_id, speaker_id)
436
+ endpoint = "/v1/voices/pvc/#{voice_id}/samples/#{sample_id}/speakers/#{speaker_id}/audio"
437
+ @client.get(endpoint)
438
+ end
439
+
440
+ # POST /v1/voices/pvc/{voice_id}/verification
441
+ # Request manual verification for a PVC voice
442
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/request-pvc-verification
443
+ #
444
+ # @param voice_id [String] Voice ID
445
+ # @param verification_files [Array<IO, File>] Verification documents
446
+ # @param filenames [Array<String>] Original filenames
447
+ # @param options [Hash] Optional parameters
448
+ # @option options [String] :extra_text Extra text for verification process
449
+ # @return [Hash] Response with status
450
+ def request_pvc_verification(voice_id, verification_files, filenames, **options)
451
+ endpoint = "/v1/voices/pvc/#{voice_id}/verification"
452
+
453
+ payload = {}
454
+ payload[:extra_text] = options[:extra_text] if options[:extra_text]
455
+
456
+ # Add verification files
457
+ verification_files.each_with_index do |file, index|
458
+ filename = filenames[index] || "verification_#{index}.pdf"
459
+ payload["files[]"] = @client.file_part(file, filename)
460
+ end
461
+
462
+ @client.post_multipart(endpoint, payload)
463
+ end
464
+
465
+ # GET /v1/voices/pvc/{voice_id}/captcha
466
+ # Get captcha for PVC voice verification
467
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/get-pvc-captcha
468
+ #
469
+ # @param voice_id [String] Voice ID
470
+ # @return [Hash] Captcha data
471
+ def get_pvc_captcha(voice_id)
472
+ endpoint = "/v1/voices/pvc/#{voice_id}/captcha"
473
+ @client.get(endpoint)
474
+ end
475
+
476
+ # POST /v1/voices/pvc/{voice_id}/captcha
477
+ # Submit captcha verification for PVC voice
478
+ # Documentation: https://elevenlabs.io/docs/api-reference/voices/verify-pvc-captcha
479
+ #
480
+ # @param voice_id [String] Voice ID
481
+ # @param recording_file [IO, File] Audio recording of the user
482
+ # @param filename [String] Original filename for the recording
483
+ # @return [Hash] Response with status
484
+ def verify_pvc_captcha(voice_id, recording_file, filename)
485
+ endpoint = "/v1/voices/pvc/#{voice_id}/captcha"
486
+
487
+ payload = {
488
+ recording: @client.file_part(recording_file, filename)
489
+ }
490
+
491
+ @client.post_multipart(endpoint, payload)
492
+ end
493
+
136
494
  # Alias methods for backward compatibility and convenience
137
495
  alias_method :get_voice, :get
138
496
  alias_method :list_voices, :list
139
497
  alias_method :create_voice, :create
140
498
  alias_method :edit_voice, :edit
141
499
  alias_method :delete_voice, :delete
500
+ alias_method :similar_voices, :find_similar
501
+ alias_method :default_settings, :get_default_settings
502
+ alias_method :voice_settings, :get_settings
503
+ alias_method :update_settings, :edit_settings
142
504
 
143
505
  private
144
506