elevenlabs_client 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -96,10 +96,23 @@ module ElevenlabsClient
96
96
  @client.get(endpoint)
97
97
  end
98
98
 
99
+ # DELETE /v1/speech-to-text/transcripts/:transcription_id
100
+ # Delete a previously generated transcript by its ID
101
+ # Documentation: https://elevenlabs.io/docs/api-reference/speech-to-text/delete-transcript
102
+ #
103
+ # @param transcription_id [String] The unique ID of the transcript to delete
104
+ # @return [Hash] Delete confirmation response
105
+ def delete_transcript(transcription_id)
106
+ endpoint = "/v1/speech-to-text/transcripts/#{transcription_id}"
107
+ @client.delete(endpoint)
108
+ end
109
+
99
110
  # Alias methods for convenience
100
111
  alias_method :transcribe, :create
101
112
  alias_method :get_transcription, :get_transcript
102
113
  alias_method :retrieve_transcript, :get_transcript
114
+ alias_method :delete_transcription, :delete_transcript
115
+ alias_method :remove_transcript, :delete_transcript
103
116
 
104
117
  private
105
118
 
@@ -30,7 +30,40 @@ module ElevenlabsClient
30
30
  @client.post_binary(endpoint, request_body)
31
31
  end
32
32
 
33
- # Alias for backward compatibility and convenience
33
+ # POST /v1/text-to-dialogue/stream
34
+ # Converts a list of text and voice ID pairs into speech (dialogue) and returns an audio stream.
35
+ # Documentation: https://elevenlabs.io/docs/api-reference/text-to-dialogue/stream
36
+ #
37
+ # @param inputs [Array<Hash>] A list of dialogue inputs, each containing text and a voice ID
38
+ # @param options [Hash] Optional parameters
39
+ # @option options [String] :model_id Identifier of the model to be used (default: "eleven_v3")
40
+ # @option options [String] :language_code ISO 639-1 language code
41
+ # @option options [Hash] :settings Settings controlling the dialogue generation
42
+ # @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
43
+ # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
44
+ # @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
45
+ # @option options [String] :output_format Output format (defaults to "mp3_44100_128")
46
+ # @param block [Proc] Block to handle each audio chunk
47
+ # @return [Faraday::Response] The response object
48
+ def stream(inputs, **options, &block)
49
+ # Build endpoint with optional query params
50
+ output_format = options[:output_format] || "mp3_44100_128"
51
+ endpoint = "/v1/text-to-dialogue/stream?output_format=#{output_format}"
52
+
53
+ # Build request body
54
+ request_body = { inputs: inputs }
55
+ request_body[:model_id] = options[:model_id] if options[:model_id]
56
+ request_body[:language_code] = options[:language_code] if options[:language_code]
57
+ request_body[:settings] = options[:settings] if options[:settings]
58
+ request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
59
+ request_body[:seed] = options[:seed] if options[:seed]
60
+ request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
61
+
62
+ @client.post_streaming(endpoint, request_body, &block)
63
+ end
64
+
65
+ # Alias for convenience
66
+ alias_method :text_to_dialogue_stream, :stream
34
67
  alias_method :text_to_dialogue, :convert
35
68
 
36
69
  private
@@ -33,7 +33,153 @@ module ElevenlabsClient
33
33
  end
34
34
  end
35
35
 
36
- # Alias for backward compatibility and convenience
36
+ # POST /v1/text-to-speech/{voice_id}/with-timestamps
37
+ # Generate speech from text with precise character-level timing information
38
+ # Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/with-timestamps
39
+ #
40
+ # @param voice_id [String] Voice ID to be used
41
+ # @param text [String] The text that will get converted into speech
42
+ # @param options [Hash] Optional TTS parameters
43
+ # @option options [String] :model_id Model identifier (defaults to "eleven_multilingual_v2")
44
+ # @option options [String] :language_code ISO 639-1 language code for text normalization
45
+ # @option options [Hash] :voice_settings Voice settings overriding stored settings
46
+ # @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
47
+ # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
48
+ # @option options [String] :previous_text Text that came before current request
49
+ # @option options [String] :next_text Text that comes after current request
50
+ # @option options [Array<String>] :previous_request_ids Request IDs of previous samples (max 3)
51
+ # @option options [Array<String>] :next_request_ids Request IDs of next samples (max 3)
52
+ # @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
53
+ # @option options [Boolean] :apply_language_text_normalization Language text normalization
54
+ # @option options [Boolean] :use_pvc_as_ivc Use IVC version instead of PVC (deprecated)
55
+ # @option options [Boolean] :enable_logging Enable logging (defaults to true)
56
+ # @option options [Integer] :optimize_streaming_latency Latency optimizations (0-4, deprecated)
57
+ # @option options [String] :output_format Output format (defaults to "mp3_44100_128")
58
+ # @return [Hash] Response containing audio_base64, alignment, and normalized_alignment
59
+ def convert_with_timestamps(voice_id, text, **options)
60
+ # Build query parameters
61
+ query_params = {}
62
+ query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
63
+ query_params[:optimize_streaming_latency] = options[:optimize_streaming_latency] if options[:optimize_streaming_latency]
64
+ query_params[:output_format] = options[:output_format] if options[:output_format]
65
+
66
+ # Build endpoint with query parameters
67
+ endpoint = "/v1/text-to-speech/#{voice_id}/with-timestamps"
68
+ if query_params.any?
69
+ query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
70
+ endpoint += "?#{query_string}"
71
+ end
72
+
73
+ # Build request body
74
+ request_body = { text: text }
75
+
76
+ # Add optional body parameters
77
+ request_body[:model_id] = options[:model_id] if options[:model_id]
78
+ request_body[:language_code] = options[:language_code] if options[:language_code]
79
+ request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
80
+ request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
81
+ request_body[:seed] = options[:seed] if options[:seed]
82
+ request_body[:previous_text] = options[:previous_text] if options[:previous_text]
83
+ request_body[:next_text] = options[:next_text] if options[:next_text]
84
+ request_body[:previous_request_ids] = options[:previous_request_ids] if options[:previous_request_ids]
85
+ request_body[:next_request_ids] = options[:next_request_ids] if options[:next_request_ids]
86
+ request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
87
+ request_body[:apply_language_text_normalization] = options[:apply_language_text_normalization] unless options[:apply_language_text_normalization].nil?
88
+ request_body[:use_pvc_as_ivc] = options[:use_pvc_as_ivc] unless options[:use_pvc_as_ivc].nil?
89
+
90
+ @client.post(endpoint, request_body)
91
+ end
92
+
93
+ alias_method :text_to_speech_with_timestamps, :convert_with_timestamps
94
+
95
+ # POST /v1/text-to-speech/{voice_id}/stream
96
+ # Stream text-to-speech audio in real-time chunks
97
+ # Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/stream
98
+ #
99
+ # @param voice_id [String] The ID of the voice to use
100
+ # @param text [String] Text to synthesize
101
+ # @param options [Hash] Optional TTS parameters
102
+ # @option options [String] :model_id Model to use (defaults to "eleven_multilingual_v2")
103
+ # @option options [String] :output_format Output format (defaults to "mp3_44100_128")
104
+ # @option options [Hash] :voice_settings Voice configuration
105
+ # @param block [Proc] Block to handle each audio chunk
106
+ # @return [Faraday::Response] The response object
107
+ def stream(voice_id, text, **options, &block)
108
+ output_format = options[:output_format] || "mp3_44100_128"
109
+ endpoint = "/v1/text-to-speech/#{voice_id}/stream?output_format=#{output_format}"
110
+
111
+ request_body = {
112
+ text: text,
113
+ model_id: options[:model_id] || "eleven_multilingual_v2"
114
+ }
115
+
116
+ # Add voice_settings if provided
117
+ request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
118
+
119
+ @client.post_streaming(endpoint, request_body, &block)
120
+ end
121
+
122
+ # POST /v1/text-to-speech/{voice_id}/stream/with-timestamps
123
+ # Stream text-to-speech audio with character-level timing information
124
+ # Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/stream-with-timestamps
125
+ #
126
+ # @param voice_id [String] Voice ID to be used
127
+ # @param text [String] The text that will get converted into speech
128
+ # @param options [Hash] Optional TTS parameters
129
+ # @option options [String] :model_id Model identifier (defaults to "eleven_multilingual_v2")
130
+ # @option options [String] :language_code ISO 639-1 language code for text normalization
131
+ # @option options [Hash] :voice_settings Voice settings overriding stored settings
132
+ # @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
133
+ # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
134
+ # @option options [String] :previous_text Text that came before current request
135
+ # @option options [String] :next_text Text that comes after current request
136
+ # @option options [Array<String>] :previous_request_ids Request IDs of previous samples (max 3)
137
+ # @option options [Array<String>] :next_request_ids Request IDs of next samples (max 3)
138
+ # @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
139
+ # @option options [Boolean] :apply_language_text_normalization Language text normalization
140
+ # @option options [Boolean] :use_pvc_as_ivc Use IVC version instead of PVC (deprecated)
141
+ # @option options [Boolean] :enable_logging Enable logging (defaults to true)
142
+ # @option options [Integer] :optimize_streaming_latency Latency optimizations (0-4, deprecated)
143
+ # @option options [String] :output_format Output format (defaults to "mp3_44100_128")
144
+ # @param block [Proc] Block to handle each streaming chunk containing audio and timing data
145
+ # @return [Faraday::Response] The response object
146
+ def stream_with_timestamps(voice_id, text, **options, &block)
147
+ # Build query parameters
148
+ query_params = {}
149
+ query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
150
+ query_params[:optimize_streaming_latency] = options[:optimize_streaming_latency] if options[:optimize_streaming_latency]
151
+ query_params[:output_format] = options[:output_format] if options[:output_format]
152
+
153
+ # Build endpoint with query parameters
154
+ endpoint = "/v1/text-to-speech/#{voice_id}/stream/with-timestamps"
155
+ if query_params.any?
156
+ query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
157
+ endpoint += "?#{query_string}"
158
+ end
159
+
160
+ # Build request body
161
+ request_body = { text: text }
162
+
163
+ # Add optional body parameters
164
+ request_body[:model_id] = options[:model_id] if options[:model_id]
165
+ request_body[:language_code] = options[:language_code] if options[:language_code]
166
+ request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
167
+ request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
168
+ request_body[:seed] = options[:seed] if options[:seed]
169
+ request_body[:previous_text] = options[:previous_text] if options[:previous_text]
170
+ request_body[:next_text] = options[:next_text] if options[:next_text]
171
+ request_body[:previous_request_ids] = options[:previous_request_ids] if options[:previous_request_ids]
172
+ request_body[:next_request_ids] = options[:next_request_ids] if options[:next_request_ids]
173
+ request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
174
+ request_body[:apply_language_text_normalization] = options[:apply_language_text_normalization] unless options[:apply_language_text_normalization].nil?
175
+ request_body[:use_pvc_as_ivc] = options[:use_pvc_as_ivc] unless options[:use_pvc_as_ivc].nil?
176
+
177
+ # Use streaming method with JSON parsing for timestamp data
178
+ @client.post_streaming_with_timestamps(endpoint, request_body, &block)
179
+ end
180
+
181
+ alias_method :text_to_speech_stream_with_timestamps, :stream_with_timestamps
182
+ alias_method :text_to_speech_stream, :stream
37
183
  alias_method :text_to_speech, :convert
38
184
 
39
185
  private
@@ -74,6 +74,18 @@ module ElevenlabsClient
74
74
  @client.post(endpoint, request_body)
75
75
  end
76
76
 
77
+ # GET /v1/text-to-voice/:generated_voice_id/stream
78
+ # Stream a voice preview that was created via the /v1/text-to-voice/design endpoint
79
+ # Documentation: https://elevenlabs.io/docs/api-reference/text-to-voice/stream-voice-preview
80
+ #
81
+ # @param generated_voice_id [String] The generated_voice_id to stream
82
+ # @param block [Proc] Block to handle each streaming chunk
83
+ # @return [Faraday::Response] The response object
84
+ def stream_preview(generated_voice_id, &block)
85
+ endpoint = "/v1/text-to-voice/#{generated_voice_id}/stream"
86
+ @client.get_streaming(endpoint, &block)
87
+ end
88
+
77
89
  # GET /v1/voices
78
90
  # Retrieves all voices associated with your Elevenlabs account
79
91
  # Documentation: https://elevenlabs.io/docs/api-reference/voices
@@ -84,9 +96,9 @@ module ElevenlabsClient
84
96
  @client.get(endpoint)
85
97
  end
86
98
 
87
- # Alias methods for backward compatibility and convenience
88
99
  alias_method :design_voice, :design
89
100
  alias_method :create_from_generated_voice, :create
101
+ alias_method :stream_voice_preview, :stream_preview
90
102
 
91
103
  private
92
104
 
@@ -110,29 +110,6 @@ module ElevenlabsClient
110
110
  @client.delete(endpoint)
111
111
  end
112
112
 
113
- # Check if a voice is banned (safety control)
114
- # @param voice_id [String] The ID of the voice to check
115
- # @return [Boolean] True if the voice is banned
116
- def banned?(voice_id)
117
- voice = get(voice_id)
118
- voice["safety_control"] == "BAN"
119
- rescue ElevenlabsClient::ValidationError, ElevenlabsClient::APIError, ElevenlabsClient::NotFoundError
120
- # If we can't get the voice, assume it's not banned
121
- false
122
- end
123
-
124
- # Check if a voice is active (exists in the voice list)
125
- # @param voice_id [String] The ID of the voice to check
126
- # @return [Boolean] True if the voice is active
127
- def active?(voice_id)
128
- voices = list
129
- active_voice_ids = voices["voices"].map { |voice| voice["voice_id"] }
130
- active_voice_ids.include?(voice_id)
131
- rescue ElevenlabsClient::ValidationError, ElevenlabsClient::APIError, ElevenlabsClient::NotFoundError
132
- # If we can't get the voice list, assume it's not active
133
- false
134
- end
135
-
136
113
  # POST /v1/similar-voices
137
114
  # Returns a list of shared voices similar to the provided audio sample
138
115
  # Documentation: https://elevenlabs.io/docs/api-reference/voices/similar-voices
@@ -491,7 +468,6 @@ module ElevenlabsClient
491
468
  @client.post_multipart(endpoint, payload)
492
469
  end
493
470
 
494
- # Alias methods for backward compatibility and convenience
495
471
  alias_method :get_voice, :get
496
472
  alias_method :list_voices, :list
497
473
  alias_method :create_voice, :create
@@ -502,6 +478,29 @@ module ElevenlabsClient
502
478
  alias_method :voice_settings, :get_settings
503
479
  alias_method :update_settings, :edit_settings
504
480
 
481
+ # Check if a voice is banned (safety control)
482
+ # @param voice_id [String] The ID of the voice to check
483
+ # @return [Boolean] True if the voice is banned
484
+ def banned?(voice_id)
485
+ voice = get(voice_id)
486
+ voice["safety_control"] == "BAN"
487
+ rescue ElevenlabsClient::ValidationError, ElevenlabsClient::APIError, ElevenlabsClient::NotFoundError
488
+ # If we can't get the voice, assume it's not banned
489
+ false
490
+ end
491
+
492
+ # Check if a voice is active (exists in the voice list)
493
+ # @param voice_id [String] The ID of the voice to check
494
+ # @return [Boolean] True if the voice is active
495
+ def active?(voice_id)
496
+ voices = list
497
+ active_voice_ids = voices["voices"].map { |voice| voice["voice_id"] }
498
+ active_voice_ids.include?(voice_id)
499
+ rescue ElevenlabsClient::ValidationError, ElevenlabsClient::APIError, ElevenlabsClient::NotFoundError
500
+ # If we can't get the voice list, assume it's not active
501
+ false
502
+ end
503
+
505
504
  private
506
505
 
507
506
  attr_reader :client
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ElevenlabsClient
4
- VERSION = "0.5.0"
4
+ VERSION = "0.6.0"
5
5
  end
@@ -5,14 +5,14 @@ require_relative "elevenlabs_client/errors"
5
5
  require_relative "elevenlabs_client/settings"
6
6
  require_relative "elevenlabs_client/endpoints/dubs"
7
7
  require_relative "elevenlabs_client/endpoints/text_to_speech"
8
- require_relative "elevenlabs_client/endpoints/text_to_speech_stream"
9
- require_relative "elevenlabs_client/endpoints/text_to_speech_with_timestamps"
10
- require_relative "elevenlabs_client/endpoints/text_to_speech_stream_with_timestamps"
11
8
  require_relative "elevenlabs_client/endpoints/text_to_dialogue"
12
- require_relative "elevenlabs_client/endpoints/text_to_dialogue_stream"
13
9
  require_relative "elevenlabs_client/endpoints/sound_generation"
14
10
  require_relative "elevenlabs_client/endpoints/text_to_voice"
15
- require_relative "elevenlabs_client/endpoints/models"
11
+ require_relative "elevenlabs_client/endpoints/admin/models"
12
+ require_relative "elevenlabs_client/endpoints/admin/history"
13
+ require_relative "elevenlabs_client/endpoints/admin/usage"
14
+ require_relative "elevenlabs_client/endpoints/admin/user"
15
+ require_relative "elevenlabs_client/endpoints/admin/voice_library"
16
16
  require_relative "elevenlabs_client/endpoints/voices"
17
17
  require_relative "elevenlabs_client/endpoints/music"
18
18
  require_relative "elevenlabs_client/endpoints/audio_isolation"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elevenlabs_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vitor Oliveira
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-09-14 00:00:00.000000000 Z
11
+ date: 2025-09-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -122,6 +122,20 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: '3.0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: bundler-audit
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '0.9'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '0.9'
125
139
  description: A Ruby client library for interacting with ElevenLabs dubbing and voice
126
140
  synthesis APIs
127
141
  email:
@@ -135,21 +149,21 @@ files:
135
149
  - README.md
136
150
  - lib/elevenlabs_client.rb
137
151
  - lib/elevenlabs_client/client.rb
152
+ - lib/elevenlabs_client/endpoints/admin/history.rb
153
+ - lib/elevenlabs_client/endpoints/admin/models.rb
154
+ - lib/elevenlabs_client/endpoints/admin/usage.rb
155
+ - lib/elevenlabs_client/endpoints/admin/user.rb
156
+ - lib/elevenlabs_client/endpoints/admin/voice_library.rb
138
157
  - lib/elevenlabs_client/endpoints/audio_isolation.rb
139
158
  - lib/elevenlabs_client/endpoints/audio_native.rb
140
159
  - lib/elevenlabs_client/endpoints/dubs.rb
141
160
  - lib/elevenlabs_client/endpoints/forced_alignment.rb
142
- - lib/elevenlabs_client/endpoints/models.rb
143
161
  - lib/elevenlabs_client/endpoints/music.rb
144
162
  - lib/elevenlabs_client/endpoints/sound_generation.rb
145
163
  - lib/elevenlabs_client/endpoints/speech_to_speech.rb
146
164
  - lib/elevenlabs_client/endpoints/speech_to_text.rb
147
165
  - lib/elevenlabs_client/endpoints/text_to_dialogue.rb
148
- - lib/elevenlabs_client/endpoints/text_to_dialogue_stream.rb
149
166
  - lib/elevenlabs_client/endpoints/text_to_speech.rb
150
- - lib/elevenlabs_client/endpoints/text_to_speech_stream.rb
151
- - lib/elevenlabs_client/endpoints/text_to_speech_stream_with_timestamps.rb
152
- - lib/elevenlabs_client/endpoints/text_to_speech_with_timestamps.rb
153
167
  - lib/elevenlabs_client/endpoints/text_to_voice.rb
154
168
  - lib/elevenlabs_client/endpoints/voices.rb
155
169
  - lib/elevenlabs_client/endpoints/websocket_text_to_speech.rb
@@ -1,26 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module ElevenlabsClient
4
- class Models
5
- def initialize(client)
6
- @client = client
7
- end
8
-
9
- # GET /v1/models
10
- # Gets a list of available models
11
- # Documentation: https://elevenlabs.io/docs/api-reference/models/list
12
- #
13
- # @return [Hash] The JSON response containing an array of models
14
- def list
15
- endpoint = "/v1/models"
16
- @client.get(endpoint)
17
- end
18
-
19
- # Alias for backward compatibility and convenience
20
- alias_method :list_models, :list
21
-
22
- private
23
-
24
- attr_reader :client
25
- end
26
- end
@@ -1,50 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module ElevenlabsClient
4
- class TextToDialogueStream
5
- def initialize(client)
6
- @client = client
7
- end
8
-
9
- # POST /v1/text-to-dialogue/stream
10
- # Converts a list of text and voice ID pairs into speech (dialogue) and returns an audio stream.
11
- # Documentation: https://elevenlabs.io/docs/api-reference/text-to-dialogue/stream
12
- #
13
- # @param inputs [Array<Hash>] A list of dialogue inputs, each containing text and a voice ID
14
- # @param options [Hash] Optional parameters
15
- # @option options [String] :model_id Identifier of the model to be used (default: "eleven_v3")
16
- # @option options [String] :language_code ISO 639-1 language code
17
- # @option options [Hash] :settings Settings controlling the dialogue generation
18
- # @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
19
- # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
20
- # @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
21
- # @option options [String] :output_format Output format (defaults to "mp3_44100_128")
22
- # @param block [Proc] Block to handle each audio chunk
23
- # @return [Faraday::Response] The response object
24
- def stream(inputs, **options, &block)
25
- # Build endpoint with optional query params
26
- output_format = options[:output_format] || "mp3_44100_128"
27
- endpoint = "/v1/text-to-dialogue/stream?output_format=#{output_format}"
28
-
29
- # Build request body
30
- request_body = { inputs: inputs }
31
- request_body[:model_id] = options[:model_id] if options[:model_id]
32
- request_body[:language_code] = options[:language_code] if options[:language_code]
33
- request_body[:settings] = options[:settings] if options[:settings]
34
- request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
35
- request_body[:seed] = options[:seed] if options[:seed]
36
- request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
37
-
38
- @client.post_streaming(endpoint, request_body, &block)
39
- end
40
-
41
- # Alias for convenience
42
- alias_method :text_to_dialogue_stream, :stream
43
-
44
- private
45
-
46
- attr_reader :client
47
- end
48
- end
49
-
50
-
@@ -1,43 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module ElevenlabsClient
4
- class TextToSpeechStream
5
- def initialize(client)
6
- @client = client
7
- end
8
-
9
- # POST /v1/text-to-speech/{voice_id}/stream
10
- # Stream text-to-speech audio in real-time chunks
11
- # Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/stream
12
- #
13
- # @param voice_id [String] The ID of the voice to use
14
- # @param text [String] Text to synthesize
15
- # @param options [Hash] Optional TTS parameters
16
- # @option options [String] :model_id Model to use (defaults to "eleven_multilingual_v2")
17
- # @option options [String] :output_format Output format (defaults to "mp3_44100_128")
18
- # @option options [Hash] :voice_settings Voice configuration
19
- # @param block [Proc] Block to handle each audio chunk
20
- # @return [Faraday::Response] The response object
21
- def stream(voice_id, text, **options, &block)
22
- output_format = options[:output_format] || "mp3_44100_128"
23
- endpoint = "/v1/text-to-speech/#{voice_id}/stream?output_format=#{output_format}"
24
-
25
- request_body = {
26
- text: text,
27
- model_id: options[:model_id] || "eleven_multilingual_v2"
28
- }
29
-
30
- # Add voice_settings if provided
31
- request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
32
-
33
- @client.post_streaming(endpoint, request_body, &block)
34
- end
35
-
36
- # Alias for backward compatibility
37
- alias_method :text_to_speech_stream, :stream
38
-
39
- private
40
-
41
- attr_reader :client
42
- end
43
- end
@@ -1,75 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module ElevenlabsClient
4
- class TextToSpeechStreamWithTimestamps
5
- def initialize(client)
6
- @client = client
7
- end
8
-
9
- # POST /v1/text-to-speech/{voice_id}/stream/with-timestamps
10
- # Stream text-to-speech audio with character-level timing information
11
- # Documentation: https://elevenlabs.io/docs/api-reference/text-to-speech/stream-with-timestamps
12
- #
13
- # @param voice_id [String] Voice ID to be used
14
- # @param text [String] The text that will get converted into speech
15
- # @param options [Hash] Optional TTS parameters
16
- # @option options [String] :model_id Model identifier (defaults to "eleven_multilingual_v2")
17
- # @option options [String] :language_code ISO 639-1 language code for text normalization
18
- # @option options [Hash] :voice_settings Voice settings overriding stored settings
19
- # @option options [Array<Hash>] :pronunciation_dictionary_locators Pronunciation dictionary locators (max 3)
20
- # @option options [Integer] :seed Deterministic sampling seed (0-4294967295)
21
- # @option options [String] :previous_text Text that came before current request
22
- # @option options [String] :next_text Text that comes after current request
23
- # @option options [Array<String>] :previous_request_ids Request IDs of previous samples (max 3)
24
- # @option options [Array<String>] :next_request_ids Request IDs of next samples (max 3)
25
- # @option options [String] :apply_text_normalization Text normalization mode ("auto", "on", "off")
26
- # @option options [Boolean] :apply_language_text_normalization Language text normalization
27
- # @option options [Boolean] :use_pvc_as_ivc Use IVC version instead of PVC (deprecated)
28
- # @option options [Boolean] :enable_logging Enable logging (defaults to true)
29
- # @option options [Integer] :optimize_streaming_latency Latency optimizations (0-4, deprecated)
30
- # @option options [String] :output_format Output format (defaults to "mp3_44100_128")
31
- # @param block [Proc] Block to handle each streaming chunk containing audio and timing data
32
- # @return [Faraday::Response] The response object
33
- def stream(voice_id, text, **options, &block)
34
- # Build query parameters
35
- query_params = {}
36
- query_params[:enable_logging] = options[:enable_logging] unless options[:enable_logging].nil?
37
- query_params[:optimize_streaming_latency] = options[:optimize_streaming_latency] if options[:optimize_streaming_latency]
38
- query_params[:output_format] = options[:output_format] if options[:output_format]
39
-
40
- # Build endpoint with query parameters
41
- endpoint = "/v1/text-to-speech/#{voice_id}/stream/with-timestamps"
42
- if query_params.any?
43
- query_string = query_params.map { |k, v| "#{k}=#{v}" }.join("&")
44
- endpoint += "?#{query_string}"
45
- end
46
-
47
- # Build request body
48
- request_body = { text: text }
49
-
50
- # Add optional body parameters
51
- request_body[:model_id] = options[:model_id] if options[:model_id]
52
- request_body[:language_code] = options[:language_code] if options[:language_code]
53
- request_body[:voice_settings] = options[:voice_settings] if options[:voice_settings]
54
- request_body[:pronunciation_dictionary_locators] = options[:pronunciation_dictionary_locators] if options[:pronunciation_dictionary_locators]
55
- request_body[:seed] = options[:seed] if options[:seed]
56
- request_body[:previous_text] = options[:previous_text] if options[:previous_text]
57
- request_body[:next_text] = options[:next_text] if options[:next_text]
58
- request_body[:previous_request_ids] = options[:previous_request_ids] if options[:previous_request_ids]
59
- request_body[:next_request_ids] = options[:next_request_ids] if options[:next_request_ids]
60
- request_body[:apply_text_normalization] = options[:apply_text_normalization] if options[:apply_text_normalization]
61
- request_body[:apply_language_text_normalization] = options[:apply_language_text_normalization] unless options[:apply_language_text_normalization].nil?
62
- request_body[:use_pvc_as_ivc] = options[:use_pvc_as_ivc] unless options[:use_pvc_as_ivc].nil?
63
-
64
- # Use streaming method with JSON parsing for timestamp data
65
- @client.post_streaming_with_timestamps(endpoint, request_body, &block)
66
- end
67
-
68
- # Alias for backward compatibility
69
- alias_method :text_to_speech_stream_with_timestamps, :stream
70
-
71
- private
72
-
73
- attr_reader :client
74
- end
75
- end