elevenlabs_client 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,7 @@ module ElevenlabsClient
8
8
 
9
9
  # POST /v1/dubbing (multipart)
10
10
  # Creates a new dubbing job
11
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/create
11
12
  #
12
13
  # @param file_io [IO] The audio/video file to dub
13
14
  # @param filename [String] Original filename
@@ -19,8 +20,9 @@ module ElevenlabsClient
19
20
  payload = {
20
21
  file: @client.file_part(file_io, filename),
21
22
  mode: "automatic",
22
- target_languages: target_languages,
23
- name: name
23
+ name: name,
24
+ target_lang: target_languages.first,
25
+ num_speakers: 1
24
26
  }.compact.merge(options)
25
27
 
26
28
  @client.post_multipart("/v1/dubbing", payload)
@@ -28,6 +30,7 @@ module ElevenlabsClient
28
30
 
29
31
  # GET /v1/dubbing/{id}
30
32
  # Retrieves dubbing job details
33
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/get
31
34
  #
32
35
  # @param dubbing_id [String] The dubbing job ID
33
36
  # @return [Hash] Dubbing job details
@@ -37,6 +40,7 @@ module ElevenlabsClient
37
40
 
38
41
  # GET /v1/dubbing
39
42
  # Lists dubbing jobs
43
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing
40
44
  #
41
45
  # @param params [Hash] Query parameters (dubbing_status, page_size, etc.)
42
46
  # @return [Hash] List of dubbing jobs
@@ -46,6 +50,7 @@ module ElevenlabsClient
46
50
 
47
51
  # GET /v1/dubbing/{id}/resources
48
52
  # Retrieves dubbing resources for editing (if dubbing_studio: true was used)
53
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/resources/get-resource
49
54
  #
50
55
  # @param dubbing_id [String] The dubbing job ID
51
56
  # @return [Hash] Dubbing resources
@@ -53,6 +58,207 @@ module ElevenlabsClient
53
58
  @client.get("/v1/dubbing/#{dubbing_id}/resources")
54
59
  end
55
60
 
61
+ # DELETE /v1/dubbing/{id}
62
+ # Deletes a dubbing project
63
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/delete
64
+ #
65
+ # @param dubbing_id [String] The dubbing job ID
66
+ # @return [Hash] Response with status
67
+ def delete(dubbing_id)
68
+ @client.delete("/v1/dubbing/#{dubbing_id}")
69
+ end
70
+
71
+ # GET /v1/dubbing/resource/{dubbing_id}
72
+ # Gets dubbing resource with detailed information including segments, speakers, etc.
73
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/resources/get-resource
74
+ #
75
+ # @param dubbing_id [String] The dubbing job ID
76
+ # @return [Hash] Detailed dubbing resource information
77
+ def get_resource(dubbing_id)
78
+ @client.get("/v1/dubbing/resource/#{dubbing_id}")
79
+ end
80
+
81
+ # POST /v1/dubbing/resource/{dubbing_id}/speaker/{speaker_id}/segment
82
+ # Creates a new segment in dubbing resource
83
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/resources/create-segment
84
+ #
85
+ # @param dubbing_id [String] The dubbing job ID
86
+ # @param speaker_id [String] The speaker ID
87
+ # @param start_time [Float] Start time of the segment
88
+ # @param end_time [Float] End time of the segment
89
+ # @param text [String, nil] Optional text for the segment
90
+ # @param translations [Hash, nil] Optional translations map
91
+ # @return [Hash] Response with version and new segment ID
92
+ def create_segment(dubbing_id:, speaker_id:, start_time:, end_time:, text: nil, translations: nil)
93
+ payload = {
94
+ start_time: start_time,
95
+ end_time: end_time,
96
+ text: text,
97
+ translations: translations
98
+ }.compact
99
+
100
+ @client.post("/v1/dubbing/resource/#{dubbing_id}/speaker/#{speaker_id}/segment", payload)
101
+ end
102
+
103
+ # DELETE /v1/dubbing/resource/{dubbing_id}/segment/{segment_id}
104
+ # Deletes a single segment from the dubbing
105
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/resources/delete-segment
106
+ #
107
+ # @param dubbing_id [String] The dubbing job ID
108
+ # @param segment_id [String] The segment ID
109
+ # @return [Hash] Response with version
110
+ def delete_segment(dubbing_id, segment_id)
111
+ @client.delete("/v1/dubbing/resource/#{dubbing_id}/segment/#{segment_id}")
112
+ end
113
+
114
+ # PATCH /v1/dubbing/resource/{dubbing_id}/segment/{segment_id}/{language}
115
+ # Updates a single segment with new text and/or start/end times
116
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/resources/update-segment
117
+ #
118
+ # @param dubbing_id [String] The dubbing job ID
119
+ # @param segment_id [String] The segment ID
120
+ # @param language [String] The language ID
121
+ # @param start_time [Float, nil] Optional new start time
122
+ # @param end_time [Float, nil] Optional new end time
123
+ # @param text [String, nil] Optional new text
124
+ # @return [Hash] Response with version
125
+ def update_segment(dubbing_id:, segment_id:, language:, start_time: nil, end_time: nil, text: nil)
126
+ payload = {
127
+ start_time: start_time,
128
+ end_time: end_time,
129
+ text: text
130
+ }.compact
131
+
132
+ @client.patch("/v1/dubbing/resource/#{dubbing_id}/segment/#{segment_id}/#{language}", payload)
133
+ end
134
+
135
+ # POST /v1/dubbing/resource/{dubbing_id}/transcribe
136
+ # Regenerates transcriptions for specified segments
137
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/resources/transcribe-segment
138
+ #
139
+ # @param dubbing_id [String] The dubbing job ID
140
+ # @param segments [Array<String>] List of segment IDs to transcribe
141
+ # @return [Hash] Response with version
142
+ def transcribe_segment(dubbing_id, segments)
143
+ payload = { segments: segments }
144
+ @client.post("/v1/dubbing/resource/#{dubbing_id}/transcribe", payload)
145
+ end
146
+
147
+ # POST /v1/dubbing/resource/{dubbing_id}/translate
148
+ # Regenerates translations for specified segments/languages
149
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/resources/translate-segment
150
+ #
151
+ # @param dubbing_id [String] The dubbing job ID
152
+ # @param segments [Array<String>] List of segment IDs to translate
153
+ # @param languages [Array<String>, nil] Optional list of languages to translate
154
+ # @return [Hash] Response with version
155
+ def translate_segment(dubbing_id, segments, languages = nil)
156
+ payload = {
157
+ segments: segments,
158
+ languages: languages
159
+ }.compact
160
+
161
+ @client.post("/v1/dubbing/resource/#{dubbing_id}/translate", payload)
162
+ end
163
+
164
+ # POST /v1/dubbing/resource/{dubbing_id}/dub
165
+ # Regenerates dubs for specified segments/languages
166
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/resources/dub-segment
167
+ #
168
+ # @param dubbing_id [String] The dubbing job ID
169
+ # @param segments [Array<String>] List of segment IDs to dub
170
+ # @param languages [Array<String>, nil] Optional list of languages to dub
171
+ # @return [Hash] Response with version
172
+ def dub_segment(dubbing_id, segments, languages = nil)
173
+ payload = {
174
+ segments: segments,
175
+ languages: languages
176
+ }.compact
177
+
178
+ @client.post("/v1/dubbing/resource/#{dubbing_id}/dub", payload)
179
+ end
180
+
181
+ # POST /v1/dubbing/resource/{dubbing_id}/render/{language}
182
+ # Renders the output media for a language
183
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/resources/render-project
184
+ #
185
+ # @param dubbing_id [String] The dubbing job ID
186
+ # @param language [String] The language to render
187
+ # @param render_type [String] The type of render (mp4, aac, mp3, wav, aaf, tracks_zip, clips_zip)
188
+ # @param normalize_volume [Boolean, nil] Whether to normalize volume (defaults to false)
189
+ # @return [Hash] Response with version and render_id
190
+ def render_project(dubbing_id:, language:, render_type:, normalize_volume: nil)
191
+ payload = {
192
+ render_type: render_type,
193
+ normalize_volume: normalize_volume
194
+ }.compact
195
+
196
+ @client.post("/v1/dubbing/resource/#{dubbing_id}/render/#{language}", payload)
197
+ end
198
+
199
+ # PATCH /v1/dubbing/resource/{dubbing_id}/speaker/{speaker_id}
200
+ # Updates speaker metadata such as voice
201
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/resources/update-speaker
202
+ #
203
+ # @param dubbing_id [String] The dubbing job ID
204
+ # @param speaker_id [String] The speaker ID
205
+ # @param voice_id [String, nil] Voice ID from library or 'track-clone'/'clip-clone'
206
+ # @param languages [Array<String>, nil] Languages to apply changes to
207
+ # @return [Hash] Response with version
208
+ def update_speaker(dubbing_id:, speaker_id:, voice_id: nil, languages: nil)
209
+ payload = {
210
+ voice_id: voice_id,
211
+ languages: languages
212
+ }.compact
213
+
214
+ @client.patch("/v1/dubbing/resource/#{dubbing_id}/speaker/#{speaker_id}", payload)
215
+ end
216
+
217
+ # GET /v1/dubbing/resource/{dubbing_id}/speaker/{speaker_id}/similar-voices
218
+ # Gets similar voices for a speaker
219
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/resources/get-similar-voices
220
+ #
221
+ # @param dubbing_id [String] The dubbing job ID
222
+ # @param speaker_id [String] The speaker ID
223
+ # @return [Hash] Response with list of similar voices
224
+ def get_similar_voices(dubbing_id, speaker_id)
225
+ @client.get("/v1/dubbing/resource/#{dubbing_id}/speaker/#{speaker_id}/similar-voices")
226
+ end
227
+
228
+ # GET /v1/dubbing/{dubbing_id}/audio/{language_code}
229
+ # Returns dub as a streamed MP3 or MP4 file
230
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/audio/get
231
+ #
232
+ # @param dubbing_id [String] ID of the dubbing project
233
+ # @param language_code [String] ID of the language
234
+ # @return [String] Binary audio/video data
235
+ def get_dubbed_audio(dubbing_id, language_code)
236
+ endpoint = "/v1/dubbing/#{dubbing_id}/audio/#{language_code}"
237
+ @client.get(endpoint)
238
+ end
239
+
240
+ # GET /v1/dubbing/{dubbing_id}/transcript/{language_code}
241
+ # Returns transcript for the dub as an SRT or WEBVTT file
242
+ # Documentation: https://elevenlabs.io/docs/api-reference/dubbing/transcript/get-transcript-for-dub
243
+ #
244
+ # @param dubbing_id [String] ID of the dubbing project
245
+ # @param language_code [String] ID of the language
246
+ # @param options [Hash] Optional parameters
247
+ # @option options [String] :format_type Format to use ("srt" or "webvtt", default: "srt")
248
+ # @return [String] Transcript in specified format
249
+ def get_dubbed_transcript(dubbing_id, language_code, **options)
250
+ endpoint = "/v1/dubbing/#{dubbing_id}/transcript/#{language_code}"
251
+
252
+ params = {}
253
+ params[:format_type] = options[:format_type] if options[:format_type]
254
+
255
+ @client.get(endpoint, params)
256
+ end
257
+
258
+ # Alias methods for convenience
259
+ alias_method :dubbed_audio, :get_dubbed_audio
260
+ alias_method :dubbed_transcript, :get_dubbed_transcript
261
+
56
262
  private
57
263
 
58
264
  attr_reader :client
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ElevenlabsClient
4
+ class ForcedAlignment
5
+ def initialize(client)
6
+ @client = client
7
+ end
8
+
9
+ # POST /v1/forced-alignment
10
+ # Force align an audio file to text. Get timing information for each character and word
11
+ # Documentation: https://elevenlabs.io/docs/api-reference/forced-alignment
12
+ #
13
+ # @param audio_file [IO, File] The audio file to align (must be less than 1GB)
14
+ # @param filename [String] Original filename for the audio file
15
+ # @param text [String] The text to align with the audio
16
+ # @param options [Hash] Optional parameters
17
+ # @option options [Boolean] :enabled_spooled_file Stream file in chunks for large files (defaults to false)
18
+ # @return [Hash] JSON response containing characters, words arrays with timing info, and loss score
19
+ def create(audio_file, filename, text, **options)
20
+ endpoint = "/v1/forced-alignment"
21
+
22
+ payload = {
23
+ file: @client.file_part(audio_file, filename),
24
+ text: text
25
+ }
26
+
27
+ # Add optional parameters if provided
28
+ payload[:enabled_spooled_file] = options[:enabled_spooled_file] unless options[:enabled_spooled_file].nil?
29
+
30
+ @client.post_multipart(endpoint, payload)
31
+ end
32
+
33
+ # Alias methods for convenience
34
+ alias_method :align, :create
35
+ alias_method :force_align, :create
36
+
37
+ private
38
+
39
+ attr_reader :client
40
+ end
41
+ end
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ElevenlabsClient
4
+ class SpeechToSpeech
5
+ def initialize(client)
6
+ @client = client
7
+ end
8
+
9
+ # POST /v1/speech-to-speech/:voice_id
10
+ # Transform audio from one voice to another. Maintain full control over emotion, timing and delivery.
11
+ # Documentation: https://elevenlabs.io/docs/api-reference/speech-to-speech
12
+ #
13
+ # @param voice_id [String] ID of the voice to be used
14
+ # @param audio_file [IO, File] The audio file which holds the content and emotion
15
+ # @param filename [String] Original filename for the audio file
16
+ # @param options [Hash] Optional parameters
17
+ # @option options [Boolean] :enable_logging Enable logging (default: true)
18
+ # @option options [Integer] :optimize_streaming_latency Latency optimization level (0-4, deprecated)
19
+ # @option options [String] :output_format Output format (default: "mp3_44100_128")
20
+ # @option options [String] :model_id Model identifier (default: "eleven_english_sts_v2")
21
+ # @option options [String] :voice_settings JSON encoded voice settings
22
+ # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
23
+ # @option options [Boolean] :remove_background_noise Remove background noise (default: false)
24
+ # @option options [String] :file_format Input file format ("pcm_s16le_16" or "other")
25
+ # @return [String] Binary audio data
26
+ def convert(voice_id, audio_file, filename, **options)
27
+ endpoint = "/v1/speech-to-speech/#{voice_id}"
28
+
29
+ # Build query parameters
30
+ query_params = {}
31
+ query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
32
+ query_params[:optimize_streaming_latency] = options[:optimize_streaming_latency] if options[:optimize_streaming_latency]
33
+ query_params[:output_format] = options[:output_format] if options[:output_format]
34
+
35
+ # Add query parameters to endpoint if any exist
36
+ if query_params.any?
37
+ query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
38
+ endpoint += "?#{query_string}"
39
+ end
40
+
41
+ # Build multipart payload
42
+ payload = {
43
+ audio: @client.file_part(audio_file, filename)
44
+ }
45
+
46
+ # Add optional form parameters
47
+ payload[:model_id] = options[:model_id] if options[:model_id]
48
+ payload[:voice_settings] = options[:voice_settings] if options[:voice_settings]
49
+ payload[:seed] = options[:seed] if options[:seed]
50
+ payload[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
51
+ payload[:file_format] = options[:file_format] if options[:file_format]
52
+
53
+ @client.post_multipart(endpoint, payload)
54
+ end
55
+
56
+ # POST /v1/speech-to-speech/:voice_id/stream
57
+ # Stream audio from one voice to another. Maintain full control over emotion, timing and delivery.
58
+ # Documentation: https://elevenlabs.io/docs/api-reference/speech-to-speech/stream
59
+ #
60
+ # @param voice_id [String] ID of the voice to be used
61
+ # @param audio_file [IO, File] The audio file which holds the content and emotion
62
+ # @param filename [String] Original filename for the audio file
63
+ # @param options [Hash] Optional parameters
64
+ # @option options [Boolean] :enable_logging Enable logging (default: true)
65
+ # @option options [Integer] :optimize_streaming_latency Latency optimization level (0-4, deprecated)
66
+ # @option options [String] :output_format Output format (default: "mp3_44100_128")
67
+ # @option options [String] :model_id Model identifier (default: "eleven_english_sts_v2")
68
+ # @option options [String] :voice_settings JSON encoded voice settings
69
+ # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
70
+ # @option options [Boolean] :remove_background_noise Remove background noise (default: false)
71
+ # @option options [String] :file_format Input file format ("pcm_s16le_16" or "other")
72
+ # @param block [Proc] Block to handle each chunk of streaming audio data
73
+ # @return [Faraday::Response] Response object for streaming
74
+ def convert_stream(voice_id, audio_file, filename, **options, &block)
75
+ endpoint = "/v1/speech-to-speech/#{voice_id}/stream"
76
+
77
+ # Build query parameters
78
+ query_params = {}
79
+ query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
80
+ query_params[:optimize_streaming_latency] = options[:optimize_streaming_latency] if options[:optimize_streaming_latency]
81
+ query_params[:output_format] = options[:output_format] if options[:output_format]
82
+
83
+ # Add query parameters to endpoint if any exist
84
+ if query_params.any?
85
+ query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
86
+ endpoint += "?#{query_string}"
87
+ end
88
+
89
+ # Build multipart payload
90
+ payload = {
91
+ audio: @client.file_part(audio_file, filename)
92
+ }
93
+
94
+ # Add optional form parameters
95
+ payload[:model_id] = options[:model_id] if options[:model_id]
96
+ payload[:voice_settings] = options[:voice_settings] if options[:voice_settings]
97
+ payload[:seed] = options[:seed] if options[:seed]
98
+ payload[:remove_background_noise] = options[:remove_background_noise] unless options[:remove_background_noise].nil?
99
+ payload[:file_format] = options[:file_format] if options[:file_format]
100
+
101
+ # Use streaming multipart request
102
+ response = @client.instance_variable_get(:@conn).post(endpoint) do |req|
103
+ req.headers["xi-api-key"] = @client.api_key
104
+ req.body = payload
105
+
106
+ # Set up streaming callback if block provided
107
+ if block_given?
108
+ req.options.on_data = proc do |chunk, _|
109
+ block.call(chunk)
110
+ end
111
+ end
112
+ end
113
+
114
+ @client.send(:handle_response, response)
115
+ end
116
+
117
+ # Alias methods for convenience
118
+ alias_method :voice_changer, :convert
119
+ alias_method :voice_changer_stream, :convert_stream
120
+
121
+ private
122
+
123
+ attr_reader :client
124
+ end
125
+ end
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ElevenlabsClient
4
+ class SpeechToText
5
+ def initialize(client)
6
+ @client = client
7
+ end
8
+
9
+ # POST /v1/speech-to-text
10
+ # Transcribe an audio or video file
11
+ # Documentation: https://elevenlabs.io/docs/api-reference/speech-to-text
12
+ #
13
+ # @param model_id [String] The ID of the model to use for transcription
14
+ # @param options [Hash] Optional parameters
15
+ # @option options [IO, File] :file The file to transcribe (required if no cloud_storage_url)
16
+ # @option options [String] :filename Original filename (required if file provided)
17
+ # @option options [String] :cloud_storage_url HTTPS URL of file to transcribe (required if no file)
18
+ # @option options [Boolean] :enable_logging Enable logging (default: true)
19
+ # @option options [String] :language_code ISO-639-1 or ISO-639-3 language code
20
+ # @option options [Boolean] :tag_audio_events Tag audio events like (laughter) (default: true)
21
+ # @option options [Integer] :num_speakers Maximum number of speakers (1-32)
22
+ # @option options [String] :timestamps_granularity Timestamp granularity ("none", "word", "character")
23
+ # @option options [Boolean] :diarize Annotate which speaker is talking (default: false)
24
+ # @option options [Float] :diarization_threshold Diarization threshold (0.1-0.4)
25
+ # @option options [Array] :additional_formats Additional export formats
26
+ # @option options [String] :file_format Input file format ("pcm_s16le_16" or "other")
27
+ # @option options [Boolean] :webhook Send result to webhook (default: false)
28
+ # @option options [String] :webhook_id Specific webhook ID
29
+ # @option options [Float] :temperature Randomness control (0.0-2.0)
30
+ # @option options [Integer] :seed Deterministic sampling seed (0-2147483647)
31
+ # @option options [Boolean] :use_multi_channel Multi-channel processing (default: false)
32
+ # @option options [String, Hash] :webhook_metadata Metadata for webhook
33
+ # @return [Hash] Transcription result or webhook response
34
+ def create(model_id, **options)
35
+ endpoint = "/v1/speech-to-text"
36
+
37
+ # Build query parameters
38
+ query_params = {}
39
+ query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
40
+
41
+ # Add query parameters to endpoint if any exist
42
+ if query_params.any?
43
+ query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
44
+ endpoint += "?#{query_string}"
45
+ end
46
+
47
+ # Build multipart payload
48
+ payload = {
49
+ model_id: model_id
50
+ }
51
+
52
+ # Add file or cloud storage URL (exactly one is required)
53
+ if options[:file] && options[:filename]
54
+ payload[:file] = @client.file_part(options[:file], options[:filename])
55
+ elsif options[:cloud_storage_url]
56
+ payload[:cloud_storage_url] = options[:cloud_storage_url]
57
+ else
58
+ raise ArgumentError, "Either :file with :filename or :cloud_storage_url must be provided"
59
+ end
60
+
61
+ # Add optional form parameters
62
+ payload[:language_code] = options[:language_code] if options[:language_code]
63
+ payload[:tag_audio_events] = options[:tag_audio_events] unless options[:tag_audio_events].nil?
64
+ payload[:num_speakers] = options[:num_speakers] if options[:num_speakers]
65
+ payload[:timestamps_granularity] = options[:timestamps_granularity] if options[:timestamps_granularity]
66
+ payload[:diarize] = options[:diarize] unless options[:diarize].nil?
67
+ payload[:diarization_threshold] = options[:diarization_threshold] if options[:diarization_threshold]
68
+ payload[:additional_formats] = options[:additional_formats] if options[:additional_formats]
69
+ payload[:file_format] = options[:file_format] if options[:file_format]
70
+ payload[:webhook] = options[:webhook] unless options[:webhook].nil?
71
+ payload[:webhook_id] = options[:webhook_id] if options[:webhook_id]
72
+ payload[:temperature] = options[:temperature] if options[:temperature]
73
+ payload[:seed] = options[:seed] if options[:seed]
74
+ payload[:use_multi_channel] = options[:use_multi_channel] unless options[:use_multi_channel].nil?
75
+
76
+ # Handle webhook_metadata (can be string or hash)
77
+ if options[:webhook_metadata]
78
+ if options[:webhook_metadata].is_a?(Hash)
79
+ payload[:webhook_metadata] = options[:webhook_metadata].to_json
80
+ else
81
+ payload[:webhook_metadata] = options[:webhook_metadata]
82
+ end
83
+ end
84
+
85
+ @client.post_multipart(endpoint, payload)
86
+ end
87
+
88
+ # GET /v1/speech-to-text/transcripts/:transcription_id
89
+ # Retrieve a previously generated transcript by its ID
90
+ # Documentation: https://elevenlabs.io/docs/api-reference/speech-to-text/get-transcript
91
+ #
92
+ # @param transcription_id [String] The unique ID of the transcript to retrieve
93
+ # @return [Hash] The transcript data
94
+ def get_transcript(transcription_id)
95
+ endpoint = "/v1/speech-to-text/transcripts/#{transcription_id}"
96
+ @client.get(endpoint)
97
+ end
98
+
99
+ # Alias methods for convenience
100
+ alias_method :transcribe, :create
101
+ alias_method :get_transcription, :get_transcript
102
+ alias_method :retrieve_transcript, :get_transcript
103
+
104
+ private
105
+
106
+ attr_reader :client
107
+ end
108
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ElevenlabsClient
4
+ class TextToDialogueStream
5
+ def initialize(client)
6
+ @client = client
7
+ end
8
+
9
+ # POST /v1/text-to-dialogue/stream
10
+ # Converts a list of text and voice ID pairs into speech (dialogue) and returns an audio stream.
11
+ # Documentation: https://elevenlabs.io/docs/api-reference/text-to-dialogue/stream
12
+ #
13
+ # @param inputs [Array<Hash>] A list of dialogue inputs, each containing text and a voice ID
14
+ # @param options [Hash] Optional parameters
15
+ # @option options [String] :model_id Identifier of the model to be used (default: "eleven_v3")
16
+ # @option options [String] :language_code ISO 639-1 language code
17
+ # @option options [Hash] :settings Settings controlling the dialogue generation
18
+ # @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
19
+ # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
20
+ # @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
21
+ # @option options [String] :output_format Output format (defaults to "mp3_44100_128")
22
+ # @param block [Proc] Block to handle each audio chunk
23
+ # @return [Faraday::Response] The response object
24
+ def stream(inputs, **options, &block)
25
+ # Build endpoint with optional query params
26
+ output_format = options[:output_format] || "mp3_44100_128"
27
+ endpoint = "/v1/text-to-dialogue/stream?output_format=#{output_format}"
28
+
29
+ # Build request body
30
+ request_body = { inputs: inputs }
31
+ request_body[:model_id] = options[:model_id] if options[:model_id]
32
+ request_body[:language_code] = options[:language_code] if options[:language_code]
33
+ request_body[:settings] = options[:settings] if options[:settings]
34
+ request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
35
+ request_body[:seed] = options[:seed] if options[:seed]
36
+ request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
37
+
38
+ @client.post_streaming(endpoint, request_body, &block)
39
+ end
40
+
41
+ # Alias for convenience
42
+ alias_method :text_to_dialogue_stream, :stream
43
+
44
+ private
45
+
46
+ attr_reader :client
47
+ end
48
+ end
49
+
50
+
@@ -8,6 +8,7 @@ module ElevenlabsClient
8
8
 
9
9
  # POST /v1/text-to-speech/{voice_id}/stream
10
10
  # Stream text-to-speech audio in real-time chunks
11
+ # Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/stream
11
12
  #
12
13
  # @param voice_id [String] The ID of the voice to use
13
14
  # @param text [String] Text to synthesize
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ElevenlabsClient
4
+ class TextToSpeechStreamWithTimestamps
5
+ def initialize(client)
6
+ @client = client
7
+ end
8
+
9
+ # POST /v1/text-to-speech/{voice_id}/stream/with-timestamps
10
+ # Stream text-to-speech audio with character-level timing information
11
+ # Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/stream-with-timestamps
12
+ #
13
+ # @param voice_id [String] Voice ID to be used
14
+ # @param text [String] The text that will get converted into speech
15
+ # @param options [Hash] Optional TTS parameters
16
+ # @option options [String] :model_id Model identifier (defaults to "eleven_multilingual_v2")
17
+ # @option options [String] :language_code ISO 639-1 language code for text normalization
18
+ # @option options [Hash] :voice_settings Voice settings overriding stored settings
19
+ # @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
20
+ # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
21
+ # @option options [String] :previous_text Text that came before current request
22
+ # @option options [String] :next_text Text that comes after current request
23
+ # @option options [Array<String>] :previous_request_ids Request IDs of previous samples (max 3)
24
+ # @option options [Array<String>] :next_request_ids Request IDs of next samples (max 3)
25
+ # @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
26
+ # @option options [Boolean] :apply_language_text_normalization Language text normalization
27
+ # @option options [Boolean] :use_pvc_as_ivc Use IVC version instead of PVC (deprecated)
28
+ # @option options [Boolean] :enable_logging Enable logging (defaults to true)
29
+ # @option options [Integer] :optimize_streaming_latency Latency optimizations (0-4, deprecated)
30
+ # @option options [String] :output_format Output format (defaults to "mp3_44100_128")
31
+ # @param block [Proc] Block to handle each streaming chunk containing audio and timing data
32
+ # @return [Faraday::Response] The response object
33
+ def stream(voice_id, text, **options, &block)
34
+ # Build query parameters
35
+ query_params = {}
36
+ query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
37
+ query_params[:optimize_streaming_latency] = options[:optimize_streaming_latency] if options[:optimize_streaming_latency]
38
+ query_params[:output_format] = options[:output_format] if options[:output_format]
39
+
40
+ # Build endpoint with query parameters
41
+ endpoint = "/v1/text-to-speech/#{voice_id}/stream/with-timestamps"
42
+ if query_params.any?
43
+ query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
44
+ endpoint += "?#{query_string}"
45
+ end
46
+
47
+ # Build request body
48
+ request_body = { text: text }
49
+
50
+ # Add optional body parameters
51
+ request_body[:model_id] = options[:model_id] if options[:model_id]
52
+ request_body[:language_code] = options[:language_code] if options[:language_code]
53
+ request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
54
+ request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
55
+ request_body[:seed] = options[:seed] if options[:seed]
56
+ request_body[:previous_text] = options[:previous_text] if options[:previous_text]
57
+ request_body[:next_text] = options[:next_text] if options[:next_text]
58
+ request_body[:previous_request_ids] = options[:previous_request_ids] if options[:previous_request_ids]
59
+ request_body[:next_request_ids] = options[:next_request_ids] if options[:next_request_ids]
60
+ request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
61
+ request_body[:apply_language_text_normalization] = options[:apply_language_text_normalization] unless options[:apply_language_text_normalization].nil?
62
+ request_body[:use_pvc_as_ivc] = options[:use_pvc_as_ivc] unless options[:use_pvc_as_ivc].nil?
63
+
64
+ # Use streaming method with JSON parsing for timestamp data
65
+ @client.post_streaming_with_timestamps(endpoint, request_body, &block)
66
+ end
67
+
68
+ # Alias for backward compatibility
69
+ alias_method :text_to_speech_stream_with_timestamps, :stream
70
+
71
+ private
72
+
73
+ attr_reader :client
74
+ end
75
+ end