elevenlabs_client 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ElevenlabsClient
4
+ class SpeechToSpeech
5
+ def initialize(client)
6
+ @client = client
7
+ end
8
+
9
+ # POST /v1/speech-to-speech/:voice_id
10
+ # Transform audio from one voice to another. Maintain full control over emotion, timing and delivery.
11
+ # Documentation: https://elevenlabs.io/docs/api-reference/speech-to-speech
12
+ #
13
+ # @param voice_id [String] ID of the voice to be used
14
+ # @param audio_file [IO, File] The audio file which holds the content and emotion
15
+ # @param filename [String] Original filename for the audio file
16
+ # @param options [Hash] Optional parameters
17
+ # @option options [Boolean] :enable_logging Enable logging (default: true)
18
+ # @option options [Integer] :optimize_streaming_latency Latency optimization level (0-4, deprecated)
19
+ # @option options [String] :output_format Output format (default: "mp3_44100_128")
20
+ # @option options [String] :model_id Model identifier (default: "eleven_english_sts_v2")
21
+ # @option options [String] :voice_settings JSON encoded voice settings
22
+ # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
23
+ # @option options [Boolean] :remove_background_noise Remove background noise (default: false)
24
+ # @option options [String] :file_format Input file format ("pcm_s16le_16" or "other")
25
+ # @return [String] Binary audio data
26
+ def convert(voice_id, audio_file, filename, **options)
27
+ endpoint = "/v1/speech-to-speech/#{voice_id}"
28
+
29
+ # Build query parameters
30
+ query_params = {}
31
+ query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
32
+ query_params[:optimize_streaming_latency] = options[:optimize_streaming_latency] if options[:optimize_streaming_latency]
33
+ query_params[:output_format] = options[:output_format] if options[:output_format]
34
+
35
+ # Add query parameters to endpoint if any exist
36
+ if query_params.any?
37
+ query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
38
+ endpoint += "?#{query_string}"
39
+ end
40
+
41
+ # Build multipart payload
42
+ payload = {
43
+ audio: @client.file_part(audio_file, filename)
44
+ }
45
+
46
+ # Add optional form parameters
47
+ payload[:model_id] = options[:model_id] if options[:model_id]
48
+ payload[:voice_settings] = options[:voice_settings] if options[:voice_settings]
49
+ payload[:seed] = options[:seed] if options[:seed]
50
+ payload[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
51
+ payload[:file_format] = options[:file_format] if options[:file_format]
52
+
53
+ @client.post_multipart(endpoint, payload)
54
+ end
55
+
56
+ # POST /v1/speech-to-speech/:voice_id/stream
57
+ # Stream audio from one voice to another. Maintain full control over emotion, timing and delivery.
58
+ # Documentation: https://elevenlabs.io/docs/api-reference/speech-to-speech/stream
59
+ #
60
+ # @param voice_id [String] ID of the voice to be used
61
+ # @param audio_file [IO, File] The audio file which holds the content and emotion
62
+ # @param filename [String] Original filename for the audio file
63
+ # @param options [Hash] Optional parameters
64
+ # @option options [Boolean] :enable_logging Enable logging (default: true)
65
+ # @option options [Integer] :optimize_streaming_latency Latency optimization level (0-4, deprecated)
66
+ # @option options [String] :output_format Output format (default: "mp3_44100_128")
67
+ # @option options [String] :model_id Model identifier (default: "eleven_english_sts_v2")
68
+ # @option options [String] :voice_settings JSON encoded voice settings
69
+ # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
70
+ # @option options [Boolean] :remove_background_noise Remove background noise (default: false)
71
+ # @option options [String] :file_format Input file format ("pcm_s16le_16" or "other")
72
+ # @param block [Proc] Block to handle each chunk of streaming audio data
73
+ # @return [Faraday::Response] Response object for streaming
74
+ def convert_stream(voice_id, audio_file, filename, **options, &block)
75
+ endpoint = "/v1/speech-to-speech/#{voice_id}/stream"
76
+
77
+ # Build query parameters
78
+ query_params = {}
79
+ query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
80
+ query_params[:optimize_streaming_latency] = options[:optimize_streaming_latency] if options[:optimize_streaming_latency]
81
+ query_params[:output_format] = options[:output_format] if options[:output_format]
82
+
83
+ # Add query parameters to endpoint if any exist
84
+ if query_params.any?
85
+ query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
86
+ endpoint += "?#{query_string}"
87
+ end
88
+
89
+ # Build multipart payload
90
+ payload = {
91
+ audio: @client.file_part(audio_file, filename)
92
+ }
93
+
94
+ # Add optional form parameters
95
+ payload[:model_id] = options[:model_id] if options[:model_id]
96
+ payload[:voice_settings] = options[:voice_settings] if options[:voice_settings]
97
+ payload[:seed] = options[:seed] if options[:seed]
98
+ payload[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
99
+ payload[:file_format] = options[:file_format] if options[:file_format]
100
+
101
+ # Use streaming multipart request
102
+ response = @client.instance_variable_get(:@conn).post(endpoint) do |req|
103
+ req.headers["xi-api-key"] = @client.api_key
104
+ req.body = payload
105
+
106
+ # Set up streaming callback if block provided
107
+ if block_given?
108
+ req.options.on_data = proc do |chunk, _|
109
+ block.call(chunk)
110
+ end
111
+ end
112
+ end
113
+
114
+ @client.send(:handle_response, response)
115
+ end
116
+
117
+ # Alias methods for convenience
118
+ alias_method :voice_changer, :convert
119
+ alias_method :voice_changer_stream, :convert_stream
120
+
121
+ private
122
+
123
+ attr_reader :client
124
+ end
125
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ElevenlabsClient
4
+ class SpeechToText
5
+ def initialize(client)
6
+ @client = client
7
+ end
8
+
9
+ # POST /v1/speech-to-text
10
+ # Transcribe an audio or video file
11
+ # Documentation: https://elevenlabs.io/docs/api-reference/speech-to-text
12
+ #
13
+ # @param model_id [String] The ID of the model to use for transcription
14
+ # @param options [Hash] Optional parameters
15
+ # @option options [IO, File] :file The file to transcribe (required if no cloud_storage_url)
16
+ # @option options [String] :filename Original filename (required if file provided)
17
+ # @option options [String] :cloud_storage_url HTTPS URL of file to transcribe (required if no file)
18
+ # @option options [Boolean] :enable_logging Enable logging (default: true)
19
+ # @option options [String] :language_code ISO-639-1 or ISO-639-3 language code
20
+ # @option options [Boolean] :tag_audio_events Tag audio events like (laughter) (default: true)
21
+ # @option options [Integer] :num_speakers Maximum number of speakers (1-32)
22
+ # @option options [String] :timestamps_granularity Timestamp granularity ("none", "word", "character")
23
+ # @option options [Boolean] :diarize Annotate which speaker is talking (default: false)
24
+ # @option options [Float] :diarization_threshold Diarization threshold (0.1-0.4)
25
+ # @option options [Array] :additional_formats Additional export formats
26
+ # @option options [String] :file_format Input file format ("pcm_s16le_16" or "other")
27
+ # @option options [Boolean] :webhook Send result to webhook (default: false)
28
+ # @option options [String] :webhook_id Specific webhook ID
29
+ # @option options [Float] :temperature Randomness control (0.0-2.0)
30
+ # @option options [Integer] :seed Deterministic sampling seed (0-2147483647)
31
+ # @option options [Boolean] :use_multi_channel Multi-channel processing (default: false)
32
+ # @option options [String, Hash] :webhook_metadata Metadata for webhook
33
+ # @return [Hash] Transcription result or webhook response
34
+ def create(model_id, **options)
35
+ endpoint = "/v1/speech-to-text"
36
+
37
+ # Build query parameters
38
+ query_params = {}
39
+ query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
40
+
41
+ # Add query parameters to endpoint if any exist
42
+ if query_params.any?
43
+ query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
44
+ endpoint += "?#{query_string}"
45
+ end
46
+
47
+ # Build multipart payload
48
+ payload = {
49
+ model_id: model_id
50
+ }
51
+
52
+ # Add file or cloud storage URL (exactly one is required)
53
+ if options[:file] && options[:filename]
54
+ payload[:file] = @client.file_part(options[:file], options[:filename])
55
+ elsif options[:cloud_storage_url]
56
+ payload[:cloud_storage_url] = options[:cloud_storage_url]
57
+ else
58
+ raise ArgumentError, "Either :file with :filename or :cloud_storage_url must be provided"
59
+ end
60
+
61
+ # Add optional form parameters
62
+ payload[:language_code] = options[:language_code] if options[:language_code]
63
+ payload[:tag_audio_events] = options[:tag_audio_events] unless options[:tag_audio_events].nil?
64
+ payload[:num_speakers] = options[:num_speakers] if options[:num_speakers]
65
+ payload[:timestamps_granularity] = options[:timestamps_granularity] if options[:timestamps_granularity]
66
+ payload[:diarize] = options[:diarize] unless options[:diarize].nil?
67
+ payload[:diarization_threshold] = options[:diarization_threshold] if options[:diarization_threshold]
68
+ payload[:additional_formats] = options[:additional_formats] if options[:additional_formats]
69
+ payload[:file_format] = options[:file_format] if options[:file_format]
70
+ payload[:webhook] = options[:webhook] unless options[:webhook].nil?
71
+ payload[:webhook_id] = options[:webhook_id] if options[:webhook_id]
72
+ payload[:temperature] = options[:temperature] if options[:temperature]
73
+ payload[:seed] = options[:seed] if options[:seed]
74
+ payload[:use_multi_channel] = options[:use_multi_channel] unless options[:use_multi_channel].nil?
75
+
76
+ # Handle webhook_metadata (can be string or hash)
77
+ if options[:webhook_metadata]
78
+ if options[:webhook_metadata].is_a?(Hash)
79
+ payload[:webhook_metadata] = options[:webhook_metadata].to_json
80
+ else
81
+ payload[:webhook_metadata] = options[:webhook_metadata]
82
+ end
83
+ end
84
+
85
+ @client.post_multipart(endpoint, payload)
86
+ end
87
+
88
+ # GET /v1/speech-to-text/transcripts/:transcription_id
89
+ # Retrieve a previously generated transcript by its ID
90
+ # Documentation: https://elevenlabs.io/docs/api-reference/speech-to-text/get-transcript
91
+ #
92
+ # @param transcription_id [String] The unique ID of the transcript to retrieve
93
+ # @return [Hash] The transcript data
94
+ def get_transcript(transcription_id)
95
+ endpoint = "/v1/speech-to-text/transcripts/#{transcription_id}"
96
+ @client.get(endpoint)
97
+ end
98
+
99
+ # DELETE /v1/speech-to-text/transcripts/:transcription_id
100
+ # Delete a previously generated transcript by its ID
101
+ # Documentation: https://elevenlabs.io/docs/api-reference/speech-to-text/delete-transcript
102
+ #
103
+ # @param transcription_id [String] The unique ID of the transcript to delete
104
+ # @return [Hash] Delete confirmation response
105
+ def delete_transcript(transcription_id)
106
+ endpoint = "/v1/speech-to-text/transcripts/#{transcription_id}"
107
+ @client.delete(endpoint)
108
+ end
109
+
110
+ # Alias methods for convenience
111
+ alias_method :transcribe, :create
112
+ alias_method :get_transcription, :get_transcript
113
+ alias_method :retrieve_transcript, :get_transcript
114
+ alias_method :delete_transcription, :delete_transcript
115
+ alias_method :remove_transcript, :delete_transcript
116
+
117
+ private
118
+
119
+ attr_reader :client
120
+ end
121
+ end
@@ -30,7 +30,40 @@ module ElevenlabsClient
30
30
  @client.post_binary(endpoint, request_body)
31
31
  end
32
32
 
33
- # Alias for backward compatibility and convenience
33
+ # POST /v1/text-to-dialogue/stream
34
+ # Converts a list of text and voice ID pairs into speech (dialogue) and returns an audio stream.
35
+ # Documentation: https://elevenlabs.io/docs/api-reference/text-to-dialogue/stream
36
+ #
37
+ # @param inputs [Array<Hash>] A list of dialogue inputs, each containing text and a voice ID
38
+ # @param options [Hash] Optional parameters
39
+ # @option options [String] :model_id Identifier of the model to be used (default: "eleven_v3")
40
+ # @option options [String] :language_code ISO 639-1 language code
41
+ # @option options [Hash] :settings Settings controlling the dialogue generation
42
+ # @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
43
+ # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
44
+ # @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
45
+ # @option options [String] :output_format Output format (defaults to "mp3_44100_128")
46
+ # @param block [Proc] Block to handle each audio chunk
47
+ # @return [Faraday::Response] The response object
48
+ def stream(inputs, **options, &block)
49
+ # Build endpoint with optional query params
50
+ output_format = options[:output_format] || "mp3_44100_128"
51
+ endpoint = "/v1/text-to-dialogue/stream?output_format=#{output_format}"
52
+
53
+ # Build request body
54
+ request_body = { inputs: inputs }
55
+ request_body[:model_id] = options[:model_id] if options[:model_id]
56
+ request_body[:language_code] = options[:language_code] if options[:language_code]
57
+ request_body[:settings] = options[:settings] if options[:settings]
58
+ request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
59
+ request_body[:seed] = options[:seed] if options[:seed]
60
+ request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
61
+
62
+ @client.post_streaming(endpoint, request_body, &block)
63
+ end
64
+
65
+ # Alias for convenience
66
+ alias_method :text_to_dialogue_stream, :stream
34
67
  alias_method :text_to_dialogue, :convert
35
68
 
36
69
  private
@@ -33,7 +33,153 @@ module ElevenlabsClient
33
33
  end
34
34
  end
35
35
 
36
- # Alias for backward compatibility and convenience
36
+ # POST /v1/text-to-speech/{voice_id}/with-timestamps
37
+ # Generate speech from text with precise character-level timing information
38
+ # Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/with-timestamps
39
+ #
40
+ # @param voice_id [String] Voice ID to be used
41
+ # @param text [String] The text that will get converted into speech
42
+ # @param options [Hash] Optional TTS parameters
43
+ # @option options [String] :model_id Model identifier (defaults to "eleven_multilingual_v2")
44
+ # @option options [String] :language_code ISO 639-1 language code for text normalization
45
+ # @option options [Hash] :voice_settings Voice settings overriding stored settings
46
+ # @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
47
+ # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
48
+ # @option options [String] :previous_text Text that came before current request
49
+ # @option options [String] :next_text Text that comes after current request
50
+ # @option options [Array<String>] :previous_request_ids Request IDs of previous samples (max 3)
51
+ # @option options [Array<String>] :next_request_ids Request IDs of next samples (max 3)
52
+ # @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
53
+ # @option options [Boolean] :apply_language_text_normalization Language text normalization
54
+ # @option options [Boolean] :use_pvc_as_ivc Use IVC version instead of PVC (deprecated)
55
+ # @option options [Boolean] :enable_logging Enable logging (defaults to true)
56
+ # @option options [Integer] :optimize_streaming_latency Latency optimizations (0-4, deprecated)
57
+ # @option options [String] :output_format Output format (defaults to "mp3_44100_128")
58
+ # @return [Hash] Response containing audio_base64, alignment, and normalized_alignment
59
+ def convert_with_timestamps(voice_id, text, **options)
60
+ # Build query parameters
61
+ query_params = {}
62
+ query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
63
+ query_params[:optimize_streaming_latency] = options[:optimize_streaming_latency] if options[:optimize_streaming_latency]
64
+ query_params[:output_format] = options[:output_format] if options[:output_format]
65
+
66
+ # Build endpoint with query parameters
67
+ endpoint = "/v1/text-to-speech/#{voice_id}/with-timestamps"
68
+ if query_params.any?
69
+ query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
70
+ endpoint += "?#{query_string}"
71
+ end
72
+
73
+ # Build request body
74
+ request_body = { text: text }
75
+
76
+ # Add optional body parameters
77
+ request_body[:model_id] = options[:model_id] if options[:model_id]
78
+ request_body[:language_code] = options[:language_code] if options[:language_code]
79
+ request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
80
+ request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
81
+ request_body[:seed] = options[:seed] if options[:seed]
82
+ request_body[:previous_text] = options[:previous_text] if options[:previous_text]
83
+ request_body[:next_text] = options[:next_text] if options[:next_text]
84
+ request_body[:previous_request_ids] = options[:previous_request_ids] if options[:previous_request_ids]
85
+ request_body[:next_request_ids] = options[:next_request_ids] if options[:next_request_ids]
86
+ request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
87
+ request_body[:apply_language_text_normalization] = options[:apply_language_text_normalization] unless options[:apply_language_text_normalization].nil?
88
+ request_body[:use_pvc_as_ivc] = options[:use_pvc_as_ivc] unless options[:use_pvc_as_ivc].nil?
89
+
90
+ @client.post(endpoint, request_body)
91
+ end
92
+
93
+ alias_method :text_to_speech_with_timestamps, :convert_with_timestamps
94
+
95
+ # POST /v1/text-to-speech/{voice_id}/stream
96
+ # Stream text-to-speech audio in real-time chunks
97
+ # Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/stream
98
+ #
99
+ # @param voice_id [String] The ID of the voice to use
100
+ # @param text [String] Text to synthesize
101
+ # @param options [Hash] Optional TTS parameters
102
+ # @option options [String] :model_id Model to use (defaults to "eleven_multilingual_v2")
103
+ # @option options [String] :output_format Output format (defaults to "mp3_44100_128")
104
+ # @option options [Hash] :voice_settings Voice configuration
105
+ # @param block [Proc] Block to handle each audio chunk
106
+ # @return [Faraday::Response] The response object
107
+ def stream(voice_id, text, **options, &block)
108
+ output_format = options[:output_format] || "mp3_44100_128"
109
+ endpoint = "/v1/text-to-speech/#{voice_id}/stream?output_format=#{output_format}"
110
+
111
+ request_body = {
112
+ text: text,
113
+ model_id: options[:model_id] || "eleven_multilingual_v2"
114
+ }
115
+
116
+ # Add voice_settings if provided
117
+ request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
118
+
119
+ @client.post_streaming(endpoint, request_body, &block)
120
+ end
121
+
122
+ # POST /v1/text-to-speech/{voice_id}/stream/with-timestamps
123
+ # Stream text-to-speech audio with character-level timing information
124
+ # Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/stream-with-timestamps
125
+ #
126
+ # @param voice_id [String] Voice ID to be used
127
+ # @param text [String] The text that will get converted into speech
128
+ # @param options [Hash] Optional TTS parameters
129
+ # @option options [String] :model_id Model identifier (defaults to "eleven_multilingual_v2")
130
+ # @option options [String] :language_code ISO 639-1 language code for text normalization
131
+ # @option options [Hash] :voice_settings Voice settings overriding stored settings
132
+ # @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
133
+ # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
134
+ # @option options [String] :previous_text Text that came before current request
135
+ # @option options [String] :next_text Text that comes after current request
136
+ # @option options [Array<String>] :previous_request_ids Request IDs of previous samples (max 3)
137
+ # @option options [Array<String>] :next_request_ids Request IDs of next samples (max 3)
138
+ # @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
139
+ # @option options [Boolean] :apply_language_text_normalization Language text normalization
140
+ # @option options [Boolean] :use_pvc_as_ivc Use IVC version instead of PVC (deprecated)
141
+ # @option options [Boolean] :enable_logging Enable logging (defaults to true)
142
+ # @option options [Integer] :optimize_streaming_latency Latency optimizations (0-4, deprecated)
143
+ # @option options [String] :output_format Output format (defaults to "mp3_44100_128")
144
+ # @param block [Proc] Block to handle each streaming chunk containing audio and timing data
145
+ # @return [Faraday::Response] The response object
146
+ def stream_with_timestamps(voice_id, text, **options, &block)
147
+ # Build query parameters
148
+ query_params = {}
149
+ query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
150
+ query_params[:optimize_streaming_latency] = options[:optimize_streaming_latency] if options[:optimize_streaming_latency]
151
+ query_params[:output_format] = options[:output_format] if options[:output_format]
152
+
153
+ # Build endpoint with query parameters
154
+ endpoint = "/v1/text-to-speech/#{voice_id}/stream/with-timestamps"
155
+ if query_params.any?
156
+ query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
157
+ endpoint += "?#{query_string}"
158
+ end
159
+
160
+ # Build request body
161
+ request_body = { text: text }
162
+
163
+ # Add optional body parameters
164
+ request_body[:model_id] = options[:model_id] if options[:model_id]
165
+ request_body[:language_code] = options[:language_code] if options[:language_code]
166
+ request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
167
+ request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
168
+ request_body[:seed] = options[:seed] if options[:seed]
169
+ request_body[:previous_text] = options[:previous_text] if options[:previous_text]
170
+ request_body[:next_text] = options[:next_text] if options[:next_text]
171
+ request_body[:previous_request_ids] = options[:previous_request_ids] if options[:previous_request_ids]
172
+ request_body[:next_request_ids] = options[:next_request_ids] if options[:next_request_ids]
173
+ request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
174
+ request_body[:apply_language_text_normalization] = options[:apply_language_text_normalization] unless options[:apply_language_text_normalization].nil?
175
+ request_body[:use_pvc_as_ivc] = options[:use_pvc_as_ivc] unless options[:use_pvc_as_ivc].nil?
176
+
177
+ # Use streaming method with JSON parsing for timestamp data
178
+ @client.post_streaming_with_timestamps(endpoint, request_body, &block)
179
+ end
180
+
181
+ alias_method :text_to_speech_stream_with_timestamps, :stream_with_timestamps
182
+ alias_method :text_to_speech_stream, :stream
37
183
  alias_method :text_to_speech, :convert
38
184
 
39
185
  private
@@ -74,6 +74,18 @@ module ElevenlabsClient
74
74
  @client.post(endpoint, request_body)
75
75
  end
76
76
 
77
+ # GET /v1/text-to-voice/:generated_voice_id/stream
78
+ # Stream a voice preview that was created via the /v1/text-to-voice/design endpoint
79
+ # Documentation: https://elevenlabs.io/docs/api-reference/text-to-voice/stream-voice-preview
80
+ #
81
+ # @param generated_voice_id [String] The generated_voice_id to stream
82
+ # @param block [Proc] Block to handle each streaming chunk
83
+ # @return [Faraday::Response] The response object
84
+ def stream_preview(generated_voice_id, &block)
85
+ endpoint = "/v1/text-to-voice/#{generated_voice_id}/stream"
86
+ @client.get_streaming(endpoint, &block)
87
+ end
88
+
77
89
  # GET /v1/voices
78
90
  # Retrieves all voices associated with your Elevenlabs account
79
91
  # Documentation: https://elevenlabs.io/docs/api-reference/voices
@@ -84,9 +96,9 @@ module ElevenlabsClient
84
96
  @client.get(endpoint)
85
97
  end
86
98
 
87
- # Alias methods for backward compatibility and convenience
88
99
  alias_method :design_voice, :design
89
100
  alias_method :create_from_generated_voice, :create
101
+ alias_method :stream_voice_preview, :stream_preview
90
102
 
91
103
  private
92
104