elevenlabs_client 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +108 -0
- data/README.md +85 -4
- data/lib/elevenlabs_client/client.rb +35 -6
- data/lib/elevenlabs_client/endpoints/admin/history.rb +106 -0
- data/lib/elevenlabs_client/endpoints/admin/models.rb +27 -0
- data/lib/elevenlabs_client/endpoints/admin/usage.rb +46 -0
- data/lib/elevenlabs_client/endpoints/admin/user.rb +28 -0
- data/lib/elevenlabs_client/endpoints/admin/voice_library.rb +86 -0
- data/lib/elevenlabs_client/endpoints/sound_generation.rb +0 -1
- data/lib/elevenlabs_client/endpoints/speech_to_text.rb +13 -0
- data/lib/elevenlabs_client/endpoints/text_to_dialogue.rb +34 -1
- data/lib/elevenlabs_client/endpoints/text_to_speech.rb +147 -1
- data/lib/elevenlabs_client/endpoints/text_to_voice.rb +13 -1
- data/lib/elevenlabs_client/endpoints/voices.rb +23 -24
- data/lib/elevenlabs_client/version.rb +1 -1
- data/lib/elevenlabs_client.rb +5 -5
- metadata +21 -7
- data/lib/elevenlabs_client/endpoints/models.rb +0 -26
- data/lib/elevenlabs_client/endpoints/text_to_dialogue_stream.rb +0 -50
- data/lib/elevenlabs_client/endpoints/text_to_speech_stream.rb +0 -43
- data/lib/elevenlabs_client/endpoints/text_to_speech_stream_with_timestamps.rb +0 -75
- data/lib/elevenlabs_client/endpoints/text_to_speech_with_timestamps.rb +0 -73
@@ -96,10 +96,23 @@ module ElevenlabsClient
|
|
96
96
|
@client.get(endpoint)
|
97
97
|
end
|
98
98
|
|
99
|
+
# DELETE /v1/speech-to-text/transcripts/:transcription_id
|
100
|
+
# Delete a previously generated transcript by its ID
|
101
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/speech-to-text/delete-transcript
|
102
|
+
#
|
103
|
+
# @param transcription_id [String] The unique ID of the transcript to delete
|
104
|
+
# @return [Hash] Delete confirmation response
|
105
|
+
def delete_transcript(transcription_id)
|
106
|
+
endpoint = "/v1/speech-to-text/transcripts/#{transcription_id}"
|
107
|
+
@client.delete(endpoint)
|
108
|
+
end
|
109
|
+
|
99
110
|
# Alias methods for convenience
|
100
111
|
alias_method :transcribe, :create
|
101
112
|
alias_method :get_transcription, :get_transcript
|
102
113
|
alias_method :retrieve_transcript, :get_transcript
|
114
|
+
alias_method :delete_transcription, :delete_transcript
|
115
|
+
alias_method :remove_transcript, :delete_transcript
|
103
116
|
|
104
117
|
private
|
105
118
|
|
@@ -30,7 +30,40 @@ module ElevenlabsClient
|
|
30
30
|
@client.post_binary(endpoint, request_body)
|
31
31
|
end
|
32
32
|
|
33
|
-
#
|
33
|
+
# POST /v1/text-to-dialogue/stream
|
34
|
+
# Converts a list of text and voice ID pairs into speech (dialogue) and returns an audio stream.
|
35
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/text-to-dialogue/stream
|
36
|
+
#
|
37
|
+
# @param inputs [Array<Hash>] A list of dialogue inputs, each containing text and a voice ID
|
38
|
+
# @param options [Hash] Optional parameters
|
39
|
+
# @option options [String] :model_id Identifier of the model to be used (default: "eleven_v3")
|
40
|
+
# @option options [String] :language_code ISO 639-1 language code
|
41
|
+
# @option options [Hash] :settings Settings controlling the dialogue generation
|
42
|
+
# @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
|
43
|
+
# @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
|
44
|
+
# @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
|
45
|
+
# @option options [String] :output_format Output format (defaults to "mp3_44100_128")
|
46
|
+
# @param block [Proc] Block to handle each audio chunk
|
47
|
+
# @return [Faraday::Response] The response object
|
48
|
+
def stream(inputs, **options, &block)
|
49
|
+
# Build endpoint with optional query params
|
50
|
+
output_format = options[:output_format] || "mp3_44100_128"
|
51
|
+
endpoint = "/v1/text-to-dialogue/stream?output_format=#{output_format}"
|
52
|
+
|
53
|
+
# Build request body
|
54
|
+
request_body = { inputs: inputs }
|
55
|
+
request_body[:model_id] = options[:model_id] if options[:model_id]
|
56
|
+
request_body[:language_code] = options[:language_code] if options[:language_code]
|
57
|
+
request_body[:settings] = options[:settings] if options[:settings]
|
58
|
+
request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
|
59
|
+
request_body[:seed] = options[:seed] if options[:seed]
|
60
|
+
request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
|
61
|
+
|
62
|
+
@client.post_streaming(endpoint, request_body, &block)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Alias for convenience
|
66
|
+
alias_method :text_to_dialogue_stream, :stream
|
34
67
|
alias_method :text_to_dialogue, :convert
|
35
68
|
|
36
69
|
private
|
@@ -33,7 +33,153 @@ module ElevenlabsClient
|
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
|
-
#
|
36
|
+
# POST /v1/text-to-speech/{voice_id}/with-timestamps
|
37
|
+
# Generate speech from text with precise character-level timing information
|
38
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/with-timestamps
|
39
|
+
#
|
40
|
+
# @param voice_id [String] Voice ID to be used
|
41
|
+
# @param text [String] The text that will get converted into speech
|
42
|
+
# @param options [Hash] Optional TTS parameters
|
43
|
+
# @option options [String] :model_id Model identifier (defaults to "eleven_multilingual_v2")
|
44
|
+
# @option options [String] :language_code ISO 639-1 language code for text normalization
|
45
|
+
# @option options [Hash] :voice_settings Voice settings overriding stored settings
|
46
|
+
# @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
|
47
|
+
# @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
|
48
|
+
# @option options [String] :previous_text Text that came before current request
|
49
|
+
# @option options [String] :next_text Text that comes after current request
|
50
|
+
# @option options [Array<String>] :previous_request_ids Request IDs of previous samples (max 3)
|
51
|
+
# @option options [Array<String>] :next_request_ids Request IDs of next samples (max 3)
|
52
|
+
# @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
|
53
|
+
# @option options [Boolean] :apply_language_text_normalization Language text normalization
|
54
|
+
# @option options [Boolean] :use_pvc_as_ivc Use IVC version instead of PVC (deprecated)
|
55
|
+
# @option options [Boolean] :enable_logging Enable logging (defaults to true)
|
56
|
+
# @option options [Integer] :optimize_streaming_latency Latency optimizations (0-4, deprecated)
|
57
|
+
# @option options [String] :output_format Output format (defaults to "mp3_44100_128")
|
58
|
+
# @return [Hash] Response containing audio_base64, alignment, and normalized_alignment
|
59
|
+
def convert_with_timestamps(voice_id, text, **options)
|
60
|
+
# Build query parameters
|
61
|
+
query_params = {}
|
62
|
+
query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
|
63
|
+
query_params[:optimize_streaming_latency] = options[:optimize_streaming_latency] if options[:optimize_streaming_latency]
|
64
|
+
query_params[:output_format] = options[:output_format] if options[:output_format]
|
65
|
+
|
66
|
+
# Build endpoint with query parameters
|
67
|
+
endpoint = "/v1/text-to-speech/#{voice_id}/with-timestamps"
|
68
|
+
if query_params.any?
|
69
|
+
query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
|
70
|
+
endpoint += "?#{query_string}"
|
71
|
+
end
|
72
|
+
|
73
|
+
# Build request body
|
74
|
+
request_body = { text: text }
|
75
|
+
|
76
|
+
# Add optional body parameters
|
77
|
+
request_body[:model_id] = options[:model_id] if options[:model_id]
|
78
|
+
request_body[:language_code] = options[:language_code] if options[:language_code]
|
79
|
+
request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
|
80
|
+
request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
|
81
|
+
request_body[:seed] = options[:seed] if options[:seed]
|
82
|
+
request_body[:previous_text] = options[:previous_text] if options[:previous_text]
|
83
|
+
request_body[:next_text] = options[:next_text] if options[:next_text]
|
84
|
+
request_body[:previous_request_ids] = options[:previous_request_ids] if options[:previous_request_ids]
|
85
|
+
request_body[:next_request_ids] = options[:next_request_ids] if options[:next_request_ids]
|
86
|
+
request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
|
87
|
+
request_body[:apply_language_text_normalization] = options[:apply_language_text_normalization] unless options[:apply_language_text_normalization].nil?
|
88
|
+
request_body[:use_pvc_as_ivc] = options[:use_pvc_as_ivc] unless options[:use_pvc_as_ivc].nil?
|
89
|
+
|
90
|
+
@client.post(endpoint, request_body)
|
91
|
+
end
|
92
|
+
|
93
|
+
alias_method :text_to_speech_with_timestamps, :convert_with_timestamps
|
94
|
+
|
95
|
+
# POST /v1/text-to-speech/{voice_id}/stream
|
96
|
+
# Stream text-to-speech audio in real-time chunks
|
97
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/stream
|
98
|
+
#
|
99
|
+
# @param voice_id [String] The ID of the voice to use
|
100
|
+
# @param text [String] Text to synthesize
|
101
|
+
# @param options [Hash] Optional TTS parameters
|
102
|
+
# @option options [String] :model_id Model to use (defaults to "eleven_multilingual_v2")
|
103
|
+
# @option options [String] :output_format Output format (defaults to "mp3_44100_128")
|
104
|
+
# @option options [Hash] :voice_settings Voice configuration
|
105
|
+
# @param block [Proc] Block to handle each audio chunk
|
106
|
+
# @return [Faraday::Response] The response object
|
107
|
+
def stream(voice_id, text, **options, &block)
|
108
|
+
output_format = options[:output_format] || "mp3_44100_128"
|
109
|
+
endpoint = "/v1/text-to-speech/#{voice_id}/stream?output_format=#{output_format}"
|
110
|
+
|
111
|
+
request_body = {
|
112
|
+
text: text,
|
113
|
+
model_id: options[:model_id] || "eleven_multilingual_v2"
|
114
|
+
}
|
115
|
+
|
116
|
+
# Add voice_settings if provided
|
117
|
+
request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
|
118
|
+
|
119
|
+
@client.post_streaming(endpoint, request_body, &block)
|
120
|
+
end
|
121
|
+
|
122
|
+
# POST /v1/text-to-speech/{voice_id}/stream/with-timestamps
|
123
|
+
# Stream text-to-speech audio with character-level timing information
|
124
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/stream-with-timestamps
|
125
|
+
#
|
126
|
+
# @param voice_id [String] Voice ID to be used
|
127
|
+
# @param text [String] The text that will get converted into speech
|
128
|
+
# @param options [Hash] Optional TTS parameters
|
129
|
+
# @option options [String] :model_id Model identifier (defaults to "eleven_multilingual_v2")
|
130
|
+
# @option options [String] :language_code ISO 639-1 language code for text normalization
|
131
|
+
# @option options [Hash] :voice_settings Voice settings overriding stored settings
|
132
|
+
# @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
|
133
|
+
# @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
|
134
|
+
# @option options [String] :previous_text Text that came before current request
|
135
|
+
# @option options [String] :next_text Text that comes after current request
|
136
|
+
# @option options [Array<String>] :previous_request_ids Request IDs of previous samples (max 3)
|
137
|
+
# @option options [Array<String>] :next_request_ids Request IDs of next samples (max 3)
|
138
|
+
# @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
|
139
|
+
# @option options [Boolean] :apply_language_text_normalization Language text normalization
|
140
|
+
# @option options [Boolean] :use_pvc_as_ivc Use IVC version instead of PVC (deprecated)
|
141
|
+
# @option options [Boolean] :enable_logging Enable logging (defaults to true)
|
142
|
+
# @option options [Integer] :optimize_streaming_latency Latency optimizations (0-4, deprecated)
|
143
|
+
# @option options [String] :output_format Output format (defaults to "mp3_44100_128")
|
144
|
+
# @param block [Proc] Block to handle each streaming chunk containing audio and timing data
|
145
|
+
# @return [Faraday::Response] The response object
|
146
|
+
def stream_with_timestamps(voice_id, text, **options, &block)
|
147
|
+
# Build query parameters
|
148
|
+
query_params = {}
|
149
|
+
query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
|
150
|
+
query_params[:optimize_streaming_latency] = options[:optimize_streaming_latency] if options[:optimize_streaming_latency]
|
151
|
+
query_params[:output_format] = options[:output_format] if options[:output_format]
|
152
|
+
|
153
|
+
# Build endpoint with query parameters
|
154
|
+
endpoint = "/v1/text-to-speech/#{voice_id}/stream/with-timestamps"
|
155
|
+
if query_params.any?
|
156
|
+
query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
|
157
|
+
endpoint += "?#{query_string}"
|
158
|
+
end
|
159
|
+
|
160
|
+
# Build request body
|
161
|
+
request_body = { text: text }
|
162
|
+
|
163
|
+
# Add optional body parameters
|
164
|
+
request_body[:model_id] = options[:model_id] if options[:model_id]
|
165
|
+
request_body[:language_code] = options[:language_code] if options[:language_code]
|
166
|
+
request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
|
167
|
+
request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
|
168
|
+
request_body[:seed] = options[:seed] if options[:seed]
|
169
|
+
request_body[:previous_text] = options[:previous_text] if options[:previous_text]
|
170
|
+
request_body[:next_text] = options[:next_text] if options[:next_text]
|
171
|
+
request_body[:previous_request_ids] = options[:previous_request_ids] if options[:previous_request_ids]
|
172
|
+
request_body[:next_request_ids] = options[:next_request_ids] if options[:next_request_ids]
|
173
|
+
request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
|
174
|
+
request_body[:apply_language_text_normalization] = options[:apply_language_text_normalization] unless options[:apply_language_text_normalization].nil?
|
175
|
+
request_body[:use_pvc_as_ivc] = options[:use_pvc_as_ivc] unless options[:use_pvc_as_ivc].nil?
|
176
|
+
|
177
|
+
# Use streaming method with JSON parsing for timestamp data
|
178
|
+
@client.post_streaming_with_timestamps(endpoint, request_body, &block)
|
179
|
+
end
|
180
|
+
|
181
|
+
alias_method :text_to_speech_stream_with_timestamps, :stream_with_timestamps
|
182
|
+
alias_method :text_to_speech_stream, :stream
|
37
183
|
alias_method :text_to_speech, :convert
|
38
184
|
|
39
185
|
private
|
@@ -74,6 +74,18 @@ module ElevenlabsClient
|
|
74
74
|
@client.post(endpoint, request_body)
|
75
75
|
end
|
76
76
|
|
77
|
+
# GET /v1/text-to-voice/:generated_voice_id/stream
|
78
|
+
# Stream a voice preview that was created via the /v1/text-to-voice/design endpoint
|
79
|
+
# Documentation: https://elevenlabs.io/docs/api-reference/text-to-voice/stream-voice-preview
|
80
|
+
#
|
81
|
+
# @param generated_voice_id [String] The generated_voice_id to stream
|
82
|
+
# @param block [Proc] Block to handle each streaming chunk
|
83
|
+
# @return [Faraday::Response] The response object
|
84
|
+
def stream_preview(generated_voice_id, &block)
|
85
|
+
endpoint = "/v1/text-to-voice/#{generated_voice_id}/stream"
|
86
|
+
@client.get_streaming(endpoint, &block)
|
87
|
+
end
|
88
|
+
|
77
89
|
# GET /v1/voices
|
78
90
|
# Retrieves all voices associated with your Elevenlabs account
|
79
91
|
# Documentation: https://elevenlabs.io/docs/api-reference/voices
|
@@ -84,9 +96,9 @@ module ElevenlabsClient
|
|
84
96
|
@client.get(endpoint)
|
85
97
|
end
|
86
98
|
|
87
|
-
# Alias methods for backward compatibility and convenience
|
88
99
|
alias_method :design_voice, :design
|
89
100
|
alias_method :create_from_generated_voice, :create
|
101
|
+
alias_method :stream_voice_preview, :stream_preview
|
90
102
|
|
91
103
|
private
|
92
104
|
|
@@ -110,29 +110,6 @@ module ElevenlabsClient
|
|
110
110
|
@client.delete(endpoint)
|
111
111
|
end
|
112
112
|
|
113
|
-
# Check if a voice is banned (safety control)
|
114
|
-
# @param voice_id [String] The ID of the voice to check
|
115
|
-
# @return [Boolean] True if the voice is banned
|
116
|
-
def banned?(voice_id)
|
117
|
-
voice = get(voice_id)
|
118
|
-
voice["safety_control"] == "BAN"
|
119
|
-
rescue ElevenlabsClient::ValidationError, ElevenlabsClient::APIError, ElevenlabsClient::NotFoundError
|
120
|
-
# If we can't get the voice, assume it's not banned
|
121
|
-
false
|
122
|
-
end
|
123
|
-
|
124
|
-
# Check if a voice is active (exists in the voice list)
|
125
|
-
# @param voice_id [String] The ID of the voice to check
|
126
|
-
# @return [Boolean] True if the voice is active
|
127
|
-
def active?(voice_id)
|
128
|
-
voices = list
|
129
|
-
active_voice_ids = voices["voices"].map { |voice| voice["voice_id"] }
|
130
|
-
active_voice_ids.include?(voice_id)
|
131
|
-
rescue ElevenlabsClient::ValidationError, ElevenlabsClient::APIError, ElevenlabsClient::NotFoundError
|
132
|
-
# If we can't get the voice list, assume it's not active
|
133
|
-
false
|
134
|
-
end
|
135
|
-
|
136
113
|
# POST /v1/similar-voices
|
137
114
|
# Returns a list of shared voices similar to the provided audio sample
|
138
115
|
# Documentation: https://elevenlabs.io/docs/api-reference/voices/similar-voices
|
@@ -491,7 +468,6 @@ module ElevenlabsClient
|
|
491
468
|
@client.post_multipart(endpoint, payload)
|
492
469
|
end
|
493
470
|
|
494
|
-
# Alias methods for backward compatibility and convenience
|
495
471
|
alias_method :get_voice, :get
|
496
472
|
alias_method :list_voices, :list
|
497
473
|
alias_method :create_voice, :create
|
@@ -502,6 +478,29 @@ module ElevenlabsClient
|
|
502
478
|
alias_method :voice_settings, :get_settings
|
503
479
|
alias_method :update_settings, :edit_settings
|
504
480
|
|
481
|
+
# Check if a voice is banned (safety control)
|
482
|
+
# @param voice_id [String] The ID of the voice to check
|
483
|
+
# @return [Boolean] True if the voice is banned
|
484
|
+
def banned?(voice_id)
|
485
|
+
voice = get(voice_id)
|
486
|
+
voice["safety_control"] == "BAN"
|
487
|
+
rescue ElevenlabsClient::ValidationError, ElevenlabsClient::APIError, ElevenlabsClient::NotFoundError
|
488
|
+
# If we can't get the voice, assume it's not banned
|
489
|
+
false
|
490
|
+
end
|
491
|
+
|
492
|
+
# Check if a voice is active (exists in the voice list)
|
493
|
+
# @param voice_id [String] The ID of the voice to check
|
494
|
+
# @return [Boolean] True if the voice is active
|
495
|
+
def active?(voice_id)
|
496
|
+
voices = list
|
497
|
+
active_voice_ids = voices["voices"].map { |voice| voice["voice_id"] }
|
498
|
+
active_voice_ids.include?(voice_id)
|
499
|
+
rescue ElevenlabsClient::ValidationError, ElevenlabsClient::APIError, ElevenlabsClient::NotFoundError
|
500
|
+
# If we can't get the voice list, assume it's not active
|
501
|
+
false
|
502
|
+
end
|
503
|
+
|
505
504
|
private
|
506
505
|
|
507
506
|
attr_reader :client
|
data/lib/elevenlabs_client.rb
CHANGED
@@ -5,14 +5,14 @@ require_relative "elevenlabs_client/errors"
|
|
5
5
|
require_relative "elevenlabs_client/settings"
|
6
6
|
require_relative "elevenlabs_client/endpoints/dubs"
|
7
7
|
require_relative "elevenlabs_client/endpoints/text_to_speech"
|
8
|
-
require_relative "elevenlabs_client/endpoints/text_to_speech_stream"
|
9
|
-
require_relative "elevenlabs_client/endpoints/text_to_speech_with_timestamps"
|
10
|
-
require_relative "elevenlabs_client/endpoints/text_to_speech_stream_with_timestamps"
|
11
8
|
require_relative "elevenlabs_client/endpoints/text_to_dialogue"
|
12
|
-
require_relative "elevenlabs_client/endpoints/text_to_dialogue_stream"
|
13
9
|
require_relative "elevenlabs_client/endpoints/sound_generation"
|
14
10
|
require_relative "elevenlabs_client/endpoints/text_to_voice"
|
15
|
-
require_relative "elevenlabs_client/endpoints/models"
|
11
|
+
require_relative "elevenlabs_client/endpoints/admin/models"
|
12
|
+
require_relative "elevenlabs_client/endpoints/admin/history"
|
13
|
+
require_relative "elevenlabs_client/endpoints/admin/usage"
|
14
|
+
require_relative "elevenlabs_client/endpoints/admin/user"
|
15
|
+
require_relative "elevenlabs_client/endpoints/admin/voice_library"
|
16
16
|
require_relative "elevenlabs_client/endpoints/voices"
|
17
17
|
require_relative "elevenlabs_client/endpoints/music"
|
18
18
|
require_relative "elevenlabs_client/endpoints/audio_isolation"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elevenlabs_client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vitor Oliveira
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-09-
|
11
|
+
date: 2025-09-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '3.0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: bundler-audit
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0.9'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0.9'
|
125
139
|
description: A Ruby client library for interacting with ElevenLabs dubbing and voice
|
126
140
|
synthesis APIs
|
127
141
|
email:
|
@@ -135,21 +149,21 @@ files:
|
|
135
149
|
- README.md
|
136
150
|
- lib/elevenlabs_client.rb
|
137
151
|
- lib/elevenlabs_client/client.rb
|
152
|
+
- lib/elevenlabs_client/endpoints/admin/history.rb
|
153
|
+
- lib/elevenlabs_client/endpoints/admin/models.rb
|
154
|
+
- lib/elevenlabs_client/endpoints/admin/usage.rb
|
155
|
+
- lib/elevenlabs_client/endpoints/admin/user.rb
|
156
|
+
- lib/elevenlabs_client/endpoints/admin/voice_library.rb
|
138
157
|
- lib/elevenlabs_client/endpoints/audio_isolation.rb
|
139
158
|
- lib/elevenlabs_client/endpoints/audio_native.rb
|
140
159
|
- lib/elevenlabs_client/endpoints/dubs.rb
|
141
160
|
- lib/elevenlabs_client/endpoints/forced_alignment.rb
|
142
|
-
- lib/elevenlabs_client/endpoints/models.rb
|
143
161
|
- lib/elevenlabs_client/endpoints/music.rb
|
144
162
|
- lib/elevenlabs_client/endpoints/sound_generation.rb
|
145
163
|
- lib/elevenlabs_client/endpoints/speech_to_speech.rb
|
146
164
|
- lib/elevenlabs_client/endpoints/speech_to_text.rb
|
147
165
|
- lib/elevenlabs_client/endpoints/text_to_dialogue.rb
|
148
|
-
- lib/elevenlabs_client/endpoints/text_to_dialogue_stream.rb
|
149
166
|
- lib/elevenlabs_client/endpoints/text_to_speech.rb
|
150
|
-
- lib/elevenlabs_client/endpoints/text_to_speech_stream.rb
|
151
|
-
- lib/elevenlabs_client/endpoints/text_to_speech_stream_with_timestamps.rb
|
152
|
-
- lib/elevenlabs_client/endpoints/text_to_speech_with_timestamps.rb
|
153
167
|
- lib/elevenlabs_client/endpoints/text_to_voice.rb
|
154
168
|
- lib/elevenlabs_client/endpoints/voices.rb
|
155
169
|
- lib/elevenlabs_client/endpoints/websocket_text_to_speech.rb
|
@@ -1,26 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module ElevenlabsClient
|
4
|
-
class Models
|
5
|
-
def initialize(client)
|
6
|
-
@client = client
|
7
|
-
end
|
8
|
-
|
9
|
-
# GET /v1/models
|
10
|
-
# Gets a list of available models
|
11
|
-
# Documentation: https://elevenlabs.io/docs/api-reference/models/list
|
12
|
-
#
|
13
|
-
# @return [Hash] The JSON response containing an array of models
|
14
|
-
def list
|
15
|
-
endpoint = "/v1/models"
|
16
|
-
@client.get(endpoint)
|
17
|
-
end
|
18
|
-
|
19
|
-
# Alias for backward compatibility and convenience
|
20
|
-
alias_method :list_models, :list
|
21
|
-
|
22
|
-
private
|
23
|
-
|
24
|
-
attr_reader :client
|
25
|
-
end
|
26
|
-
end
|
@@ -1,50 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module ElevenlabsClient
|
4
|
-
class TextToDialogueStream
|
5
|
-
def initialize(client)
|
6
|
-
@client = client
|
7
|
-
end
|
8
|
-
|
9
|
-
# POST /v1/text-to-dialogue/stream
|
10
|
-
# Converts a list of text and voice ID pairs into speech (dialogue) and returns an audio stream.
|
11
|
-
# Documentation: https://elevenlabs.io/docs/api-reference/text-to-dialogue/stream
|
12
|
-
#
|
13
|
-
# @param inputs [Array<Hash>] A list of dialogue inputs, each containing text and a voice ID
|
14
|
-
# @param options [Hash] Optional parameters
|
15
|
-
# @option options [String] :model_id Identifier of the model to be used (default: "eleven_v3")
|
16
|
-
# @option options [String] :language_code ISO 639-1 language code
|
17
|
-
# @option options [Hash] :settings Settings controlling the dialogue generation
|
18
|
-
# @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
|
19
|
-
# @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
|
20
|
-
# @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
|
21
|
-
# @option options [String] :output_format Output format (defaults to "mp3_44100_128")
|
22
|
-
# @param block [Proc] Block to handle each audio chunk
|
23
|
-
# @return [Faraday::Response] The response object
|
24
|
-
def stream(inputs, **options, &block)
|
25
|
-
# Build endpoint with optional query params
|
26
|
-
output_format = options[:output_format] || "mp3_44100_128"
|
27
|
-
endpoint = "/v1/text-to-dialogue/stream?output_format=#{output_format}"
|
28
|
-
|
29
|
-
# Build request body
|
30
|
-
request_body = { inputs: inputs }
|
31
|
-
request_body[:model_id] = options[:model_id] if options[:model_id]
|
32
|
-
request_body[:language_code] = options[:language_code] if options[:language_code]
|
33
|
-
request_body[:settings] = options[:settings] if options[:settings]
|
34
|
-
request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
|
35
|
-
request_body[:seed] = options[:seed] if options[:seed]
|
36
|
-
request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
|
37
|
-
|
38
|
-
@client.post_streaming(endpoint, request_body, &block)
|
39
|
-
end
|
40
|
-
|
41
|
-
# Alias for convenience
|
42
|
-
alias_method :text_to_dialogue_stream, :stream
|
43
|
-
|
44
|
-
private
|
45
|
-
|
46
|
-
attr_reader :client
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
|
@@ -1,43 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module ElevenlabsClient
|
4
|
-
class TextToSpeechStream
|
5
|
-
def initialize(client)
|
6
|
-
@client = client
|
7
|
-
end
|
8
|
-
|
9
|
-
# POST /v1/text-to-speech/{voice_id}/stream
|
10
|
-
# Stream text-to-speech audio in real-time chunks
|
11
|
-
# Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/stream
|
12
|
-
#
|
13
|
-
# @param voice_id [String] The ID of the voice to use
|
14
|
-
# @param text [String] Text to synthesize
|
15
|
-
# @param options [Hash] Optional TTS parameters
|
16
|
-
# @option options [String] :model_id Model to use (defaults to "eleven_multilingual_v2")
|
17
|
-
# @option options [String] :output_format Output format (defaults to "mp3_44100_128")
|
18
|
-
# @option options [Hash] :voice_settings Voice configuration
|
19
|
-
# @param block [Proc] Block to handle each audio chunk
|
20
|
-
# @return [Faraday::Response] The response object
|
21
|
-
def stream(voice_id, text, **options, &block)
|
22
|
-
output_format = options[:output_format] || "mp3_44100_128"
|
23
|
-
endpoint = "/v1/text-to-speech/#{voice_id}/stream?output_format=#{output_format}"
|
24
|
-
|
25
|
-
request_body = {
|
26
|
-
text: text,
|
27
|
-
model_id: options[:model_id] || "eleven_multilingual_v2"
|
28
|
-
}
|
29
|
-
|
30
|
-
# Add voice_settings if provided
|
31
|
-
request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
|
32
|
-
|
33
|
-
@client.post_streaming(endpoint, request_body, &block)
|
34
|
-
end
|
35
|
-
|
36
|
-
# Alias for backward compatibility
|
37
|
-
alias_method :text_to_speech_stream, :stream
|
38
|
-
|
39
|
-
private
|
40
|
-
|
41
|
-
attr_reader :client
|
42
|
-
end
|
43
|
-
end
|
@@ -1,75 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module ElevenlabsClient
|
4
|
-
class TextToSpeechStreamWithTimestamps
|
5
|
-
def initialize(client)
|
6
|
-
@client = client
|
7
|
-
end
|
8
|
-
|
9
|
-
# POST /v1/text-to-speech/{voice_id}/stream/with-timestamps
|
10
|
-
# Stream text-to-speech audio with character-level timing information
|
11
|
-
# Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/stream-with-timestamps
|
12
|
-
#
|
13
|
-
# @param voice_id [String] Voice ID to be used
|
14
|
-
# @param text [String] The text that will get converted into speech
|
15
|
-
# @param options [Hash] Optional TTS parameters
|
16
|
-
# @option options [String] :model_id Model identifier (defaults to "eleven_multilingual_v2")
|
17
|
-
# @option options [String] :language_code ISO 639-1 language code for text normalization
|
18
|
-
# @option options [Hash] :voice_settings Voice settings overriding stored settings
|
19
|
-
# @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
|
20
|
-
# @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
|
21
|
-
# @option options [String] :previous_text Text that came before current request
|
22
|
-
# @option options [String] :next_text Text that comes after current request
|
23
|
-
# @option options [Array<String>] :previous_request_ids Request IDs of previous samples (max 3)
|
24
|
-
# @option options [Array<String>] :next_request_ids Request IDs of next samples (max 3)
|
25
|
-
# @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
|
26
|
-
# @option options [Boolean] :apply_language_text_normalization Language text normalization
|
27
|
-
# @option options [Boolean] :use_pvc_as_ivc Use IVC version instead of PVC (deprecated)
|
28
|
-
# @option options [Boolean] :enable_logging Enable logging (defaults to true)
|
29
|
-
# @option options [Integer] :optimize_streaming_latency Latency optimizations (0-4, deprecated)
|
30
|
-
# @option options [String] :output_format Output format (defaults to "mp3_44100_128")
|
31
|
-
# @param block [Proc] Block to handle each streaming chunk containing audio and timing data
|
32
|
-
# @return [Faraday::Response] The response object
|
33
|
-
def stream(voice_id, text, **options, &block)
|
34
|
-
# Build query parameters
|
35
|
-
query_params = {}
|
36
|
-
query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
|
37
|
-
query_params[:optimize_streaming_latency] = options[:optimize_streaming_latency] if options[:optimize_streaming_latency]
|
38
|
-
query_params[:output_format] = options[:output_format] if options[:output_format]
|
39
|
-
|
40
|
-
# Build endpoint with query parameters
|
41
|
-
endpoint = "/v1/text-to-speech/#{voice_id}/stream/with-timestamps"
|
42
|
-
if query_params.any?
|
43
|
-
query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
|
44
|
-
endpoint += "?#{query_string}"
|
45
|
-
end
|
46
|
-
|
47
|
-
# Build request body
|
48
|
-
request_body = { text: text }
|
49
|
-
|
50
|
-
# Add optional body parameters
|
51
|
-
request_body[:model_id] = options[:model_id] if options[:model_id]
|
52
|
-
request_body[:language_code] = options[:language_code] if options[:language_code]
|
53
|
-
request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
|
54
|
-
request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
|
55
|
-
request_body[:seed] = options[:seed] if options[:seed]
|
56
|
-
request_body[:previous_text] = options[:previous_text] if options[:previous_text]
|
57
|
-
request_body[:next_text] = options[:next_text] if options[:next_text]
|
58
|
-
request_body[:previous_request_ids] = options[:previous_request_ids] if options[:previous_request_ids]
|
59
|
-
request_body[:next_request_ids] = options[:next_request_ids] if options[:next_request_ids]
|
60
|
-
request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
|
61
|
-
request_body[:apply_language_text_normalization] = options[:apply_language_text_normalization] unless options[:apply_language_text_normalization].nil?
|
62
|
-
request_body[:use_pvc_as_ivc] = options[:use_pvc_as_ivc] unless options[:use_pvc_as_ivc].nil?
|
63
|
-
|
64
|
-
# Use streaming method with JSON parsing for timestamp data
|
65
|
-
@client.post_streaming_with_timestamps(endpoint, request_body, &block)
|
66
|
-
end
|
67
|
-
|
68
|
-
# Alias for backward compatibility
|
69
|
-
alias_method :text_to_speech_stream_with_timestamps, :stream
|
70
|
-
|
71
|
-
private
|
72
|
-
|
73
|
-
attr_reader :client
|
74
|
-
end
|
75
|
-
end
|