ruby_llm-agents 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -0
  3. data/app/controllers/ruby_llm/agents/agents_controller.rb +16 -14
  4. data/app/controllers/ruby_llm/agents/dashboard_controller.rb +20 -20
  5. data/app/controllers/ruby_llm/agents/executions_controller.rb +5 -7
  6. data/app/helpers/ruby_llm/agents/application_helper.rb +57 -58
  7. data/app/models/ruby_llm/agents/execution/analytics.rb +27 -27
  8. data/app/models/ruby_llm/agents/execution/scopes.rb +4 -6
  9. data/app/models/ruby_llm/agents/execution.rb +25 -25
  10. data/app/models/ruby_llm/agents/tenant/budgetable.rb +16 -10
  11. data/app/models/ruby_llm/agents/tenant/resettable.rb +12 -12
  12. data/app/models/ruby_llm/agents/tenant/trackable.rb +7 -7
  13. data/app/services/ruby_llm/agents/agent_registry.rb +6 -6
  14. data/lib/generators/ruby_llm_agents/agent_generator.rb +4 -4
  15. data/lib/generators/ruby_llm_agents/background_remover_generator.rb +6 -6
  16. data/lib/generators/ruby_llm_agents/embedder_generator.rb +4 -4
  17. data/lib/generators/ruby_llm_agents/image_analyzer_generator.rb +7 -7
  18. data/lib/generators/ruby_llm_agents/image_editor_generator.rb +4 -4
  19. data/lib/generators/ruby_llm_agents/image_generator_generator.rb +6 -6
  20. data/lib/generators/ruby_llm_agents/image_pipeline_generator.rb +9 -9
  21. data/lib/generators/ruby_llm_agents/image_transformer_generator.rb +6 -6
  22. data/lib/generators/ruby_llm_agents/image_upscaler_generator.rb +4 -4
  23. data/lib/generators/ruby_llm_agents/image_variator_generator.rb +4 -4
  24. data/lib/generators/ruby_llm_agents/install_generator.rb +3 -3
  25. data/lib/generators/ruby_llm_agents/migrate_structure_generator.rb +4 -4
  26. data/lib/generators/ruby_llm_agents/multi_tenancy_generator.rb +2 -2
  27. data/lib/generators/ruby_llm_agents/restructure_generator.rb +13 -13
  28. data/lib/generators/ruby_llm_agents/speaker_generator.rb +6 -6
  29. data/lib/generators/ruby_llm_agents/transcriber_generator.rb +4 -4
  30. data/lib/generators/ruby_llm_agents/upgrade_generator.rb +2 -2
  31. data/lib/ruby_llm/agents/audio/speaker.rb +40 -31
  32. data/lib/ruby_llm/agents/audio/speech_client.rb +328 -0
  33. data/lib/ruby_llm/agents/audio/speech_pricing.rb +273 -0
  34. data/lib/ruby_llm/agents/audio/transcriber.rb +33 -33
  35. data/lib/ruby_llm/agents/base_agent.rb +14 -14
  36. data/lib/ruby_llm/agents/core/base/callbacks.rb +3 -3
  37. data/lib/ruby_llm/agents/core/configuration.rb +86 -73
  38. data/lib/ruby_llm/agents/core/errors.rb +27 -2
  39. data/lib/ruby_llm/agents/core/instrumentation.rb +64 -66
  40. data/lib/ruby_llm/agents/core/llm_tenant.rb +7 -7
  41. data/lib/ruby_llm/agents/core/version.rb +1 -1
  42. data/lib/ruby_llm/agents/dsl/base.rb +3 -3
  43. data/lib/ruby_llm/agents/dsl/reliability.rb +9 -9
  44. data/lib/ruby_llm/agents/image/analyzer/dsl.rb +1 -1
  45. data/lib/ruby_llm/agents/image/analyzer/execution.rb +4 -4
  46. data/lib/ruby_llm/agents/image/background_remover/dsl.rb +1 -1
  47. data/lib/ruby_llm/agents/image/background_remover/execution.rb +3 -3
  48. data/lib/ruby_llm/agents/image/concerns/image_operation_execution.rb +8 -8
  49. data/lib/ruby_llm/agents/image/editor/execution.rb +1 -1
  50. data/lib/ruby_llm/agents/image/generator/pricing.rb +9 -10
  51. data/lib/ruby_llm/agents/image/generator.rb +6 -6
  52. data/lib/ruby_llm/agents/image/pipeline/dsl.rb +6 -6
  53. data/lib/ruby_llm/agents/image/pipeline/execution.rb +9 -9
  54. data/lib/ruby_llm/agents/image/pipeline.rb +1 -1
  55. data/lib/ruby_llm/agents/image/transformer/execution.rb +1 -1
  56. data/lib/ruby_llm/agents/image/upscaler/dsl.rb +1 -1
  57. data/lib/ruby_llm/agents/image/upscaler/execution.rb +3 -5
  58. data/lib/ruby_llm/agents/image/variator/execution.rb +1 -1
  59. data/lib/ruby_llm/agents/infrastructure/alert_manager.rb +4 -4
  60. data/lib/ruby_llm/agents/infrastructure/attempt_tracker.rb +4 -4
  61. data/lib/ruby_llm/agents/infrastructure/budget/budget_query.rb +9 -9
  62. data/lib/ruby_llm/agents/infrastructure/budget/config_resolver.rb +3 -3
  63. data/lib/ruby_llm/agents/infrastructure/budget/forecaster.rb +1 -1
  64. data/lib/ruby_llm/agents/infrastructure/budget/spend_recorder.rb +17 -17
  65. data/lib/ruby_llm/agents/infrastructure/circuit_breaker.rb +1 -0
  66. data/lib/ruby_llm/agents/infrastructure/execution_logger_job.rb +1 -1
  67. data/lib/ruby_llm/agents/infrastructure/reliability.rb +6 -6
  68. data/lib/ruby_llm/agents/pipeline/builder.rb +11 -11
  69. data/lib/ruby_llm/agents/pipeline/middleware/budget.rb +3 -3
  70. data/lib/ruby_llm/agents/pipeline/middleware/cache.rb +4 -4
  71. data/lib/ruby_llm/agents/pipeline/middleware/instrumentation.rb +34 -22
  72. data/lib/ruby_llm/agents/pipeline/middleware/reliability.rb +2 -3
  73. data/lib/ruby_llm/agents/pipeline/middleware/tenant.rb +7 -7
  74. data/lib/ruby_llm/agents/results/background_removal_result.rb +6 -6
  75. data/lib/ruby_llm/agents/results/embedding_result.rb +15 -15
  76. data/lib/ruby_llm/agents/results/image_analysis_result.rb +7 -7
  77. data/lib/ruby_llm/agents/results/image_edit_result.rb +4 -4
  78. data/lib/ruby_llm/agents/results/image_generation_result.rb +5 -5
  79. data/lib/ruby_llm/agents/results/image_pipeline_result.rb +4 -4
  80. data/lib/ruby_llm/agents/results/image_transform_result.rb +4 -4
  81. data/lib/ruby_llm/agents/results/image_upscale_result.rb +5 -5
  82. data/lib/ruby_llm/agents/results/image_variation_result.rb +4 -4
  83. data/lib/ruby_llm/agents/results/transcription_result.rb +1 -1
  84. data/lib/ruby_llm/agents/text/embedder.rb +13 -13
  85. metadata +3 -1
@@ -17,17 +17,17 @@ module RubyLlmAgents
17
17
  source_root File.expand_path("templates", __dir__)
18
18
 
19
19
  class_option :provider, type: :string, default: "openai",
20
- desc: "The TTS provider to use (openai, elevenlabs)"
20
+ desc: "The TTS provider to use (openai, elevenlabs)"
21
21
  class_option :model, type: :string, default: nil,
22
- desc: "The TTS model to use"
22
+ desc: "The TTS model to use"
23
23
  class_option :voice, type: :string, default: "nova",
24
- desc: "The voice to use"
24
+ desc: "The voice to use"
25
25
  class_option :speed, type: :numeric, default: 1.0,
26
- desc: "Speech speed (0.25-4.0 for OpenAI)"
26
+ desc: "Speech speed (0.25-4.0 for OpenAI)"
27
27
  class_option :format, type: :string, default: "mp3",
28
- desc: "Output format (mp3, wav, ogg, flac)"
28
+ desc: "Output format (mp3, wav, ogg, flac)"
29
29
  class_option :cache, type: :string, default: nil,
30
- desc: "Cache TTL (e.g., '7.days')"
30
+ desc: "Cache TTL (e.g., '7.days')"
31
31
 
32
32
  def ensure_base_class_and_skill_file
33
33
  audio_dir = "app/agents/audio"
@@ -17,13 +17,13 @@ module RubyLlmAgents
17
17
  source_root File.expand_path("templates", __dir__)
18
18
 
19
19
  class_option :model, type: :string, default: "whisper-1",
20
- desc: "The transcription model to use"
20
+ desc: "The transcription model to use"
21
21
  class_option :language, type: :string, default: nil,
22
- desc: "Language code (e.g., 'en', 'es')"
22
+ desc: "Language code (e.g., 'en', 'es')"
23
23
  class_option :output_format, type: :string, default: "text",
24
- desc: "Output format (text, srt, vtt, json)"
24
+ desc: "Output format (text, srt, vtt, json)"
25
25
  class_option :cache, type: :string, default: nil,
26
- desc: "Cache TTL (e.g., '30.days')"
26
+ desc: "Cache TTL (e.g., '30.days')"
27
27
 
28
28
  def ensure_base_class_and_skill_file
29
29
  audio_dir = "app/agents/audio"
@@ -164,13 +164,13 @@ module RubyLlmAgents
164
164
  return false unless ActiveRecord::Base.connection.table_exists?(table)
165
165
 
166
166
  ActiveRecord::Base.connection.column_exists?(table, column)
167
- rescue StandardError
167
+ rescue
168
168
  false
169
169
  end
170
170
 
171
171
  def table_exists?(table)
172
172
  ActiveRecord::Base.connection.table_exists?(table)
173
- rescue StandardError
173
+ rescue
174
174
  false
175
175
  end
176
176
  end
@@ -2,6 +2,8 @@
2
2
 
3
3
  require "digest"
4
4
  require_relative "../results/speech_result"
5
+ require_relative "speech_client"
6
+ require_relative "speech_pricing"
5
7
 
6
8
  module RubyLLM
7
9
  module Agents
@@ -194,19 +196,19 @@ module RubyLLM
194
196
 
195
197
  def default_tts_provider
196
198
  RubyLLM::Agents.configuration.default_tts_provider
197
- rescue StandardError
199
+ rescue
198
200
  :openai
199
201
  end
200
202
 
201
203
  def default_tts_model
202
204
  RubyLLM::Agents.configuration.default_tts_model
203
- rescue StandardError
205
+ rescue
204
206
  "tts-1"
205
207
  end
206
208
 
207
209
  def default_tts_voice
208
210
  RubyLLM::Agents.configuration.default_tts_voice
209
- rescue StandardError
211
+ rescue
210
212
  "nova"
211
213
  end
212
214
  end
@@ -410,7 +412,15 @@ module RubyLLM
410
412
 
411
413
  # Executes standard (non-streaming) speech synthesis
412
414
  def execute_standard_speech(text, options)
413
- response = RubyLLM.speak(text, **options)
415
+ response = speech_client.speak(
416
+ text,
417
+ model: options[:model],
418
+ voice: options[:voice],
419
+ voice_id: resolved_voice_id,
420
+ speed: options[:speed],
421
+ response_format: options[:response_format] || "mp3",
422
+ voice_settings: options[:voice_settings]
423
+ )
414
424
 
415
425
  {
416
426
  audio: response.audio,
@@ -428,9 +438,17 @@ module RubyLLM
428
438
  def execute_streaming_speech(text, options)
429
439
  audio_chunks = []
430
440
 
431
- RubyLLM.speak(text, **options.merge(stream: true)) do |chunk|
441
+ speech_client.speak_streaming(
442
+ text,
443
+ model: options[:model],
444
+ voice: options[:voice],
445
+ voice_id: resolved_voice_id,
446
+ speed: options[:speed],
447
+ response_format: options[:response_format] || "mp3",
448
+ voice_settings: options[:voice_settings]
449
+ ) do |chunk|
432
450
  audio_chunks << chunk.audio if chunk.respond_to?(:audio)
433
- @streaming_block.call(chunk) if @streaming_block
451
+ @streaming_block&.call(chunk)
434
452
  end
435
453
 
436
454
  {
@@ -445,7 +463,7 @@ module RubyLLM
445
463
  }
446
464
  end
447
465
 
448
- # Builds options for RubyLLM.speak
466
+ # Builds options for SpeechClient
449
467
  def build_speak_options
450
468
  options = {
451
469
  model: resolved_model,
@@ -453,13 +471,11 @@ module RubyLLM
453
471
  }
454
472
 
455
473
  speed = resolved_speed
456
- options[:speed] = speed if speed && speed != 1.0
474
+ options[:speed] = speed if speed && (speed - 1.0).abs > Float::EPSILON
457
475
  options[:response_format] = resolved_output_format.to_s
458
476
 
459
- if resolved_provider == :elevenlabs
460
- voice_settings = self.class.voice_settings_config
461
- options[:voice_settings] = voice_settings.to_h if voice_settings
462
- end
477
+ voice_settings = self.class.voice_settings_config
478
+ options[:voice_settings] = voice_settings.to_h if voice_settings
463
479
 
464
480
  options
465
481
  end
@@ -488,29 +504,17 @@ module RubyLLM
488
504
 
489
505
  # Calculates cost for speech synthesis
490
506
  def calculate_cost(raw_result)
491
- characters = raw_result[:characters] || 0
492
-
493
- if raw_result[:raw_response].respond_to?(:cost) && raw_result[:raw_response].cost
507
+ if raw_result[:raw_response].respond_to?(:cost) && raw_result[:raw_response]&.cost
494
508
  return raw_result[:raw_response].cost
495
509
  end
496
510
 
497
- provider = raw_result[:provider]
498
- model_name = raw_result[:model].to_s
499
-
500
- price_per_1k_chars = case provider
501
- when :openai
502
- model_name.include?("hd") ? 0.030 : 0.015
503
- when :elevenlabs
504
- 0.30
505
- when :google
506
- 0.016
507
- when :polly
508
- 0.016
509
- else
510
- 0.015
511
- end
511
+ characters = raw_result[:characters] || 0
512
512
 
513
- (characters / 1000.0) * price_per_1k_chars
513
+ Audio::SpeechPricing.calculate_cost(
514
+ provider: raw_result[:provider],
515
+ model_id: raw_result[:model].to_s,
516
+ characters: characters
517
+ )
514
518
  end
515
519
 
516
520
  # Resolves the provider to use
@@ -547,6 +551,11 @@ module RubyLLM
547
551
  def streaming_enabled?
548
552
  @runtime_streaming || self.class.streaming?
549
553
  end
554
+
555
+ # Returns a SpeechClient for the resolved provider
556
+ def speech_client
557
+ @speech_client ||= Audio::SpeechClient.new(provider: resolved_provider)
558
+ end
550
559
  end
551
560
  end
552
561
  end
@@ -0,0 +1,328 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "faraday"
4
+ require "json"
5
+
6
+ module RubyLLM
7
+ module Agents
8
+ module Audio
9
+ # Direct HTTP client for text-to-speech APIs.
10
+ #
11
+ # Supports OpenAI and ElevenLabs providers, bypassing the need for
12
+ # a RubyLLM.speak() method that does not exist in the base gem.
13
+ #
14
+ # @example OpenAI
15
+ # client = SpeechClient.new(provider: :openai)
16
+ # response = client.speak("Hello", model: "tts-1", voice: "nova")
17
+ # response.audio # => binary audio data
18
+ #
19
+ # @example ElevenLabs
20
+ # client = SpeechClient.new(provider: :elevenlabs)
21
+ # response = client.speak("Hello",
22
+ # model: "eleven_v3",
23
+ # voice: "Rachel",
24
+ # voice_id: "21m00Tcm4TlvDq8ikWAM",
25
+ # voice_settings: { stability: 0.5, similarity_boost: 0.75 }
26
+ # )
27
+ #
28
+ class SpeechClient
29
+ SUPPORTED_PROVIDERS = %i[openai elevenlabs].freeze
30
+
31
+ Response = Struct.new(:audio, :format, :model, :voice, keyword_init: true) do
32
+ def duration
33
+ nil
34
+ end
35
+
36
+ def cost
37
+ nil
38
+ end
39
+ end
40
+
41
+ StreamChunk = Struct.new(:audio, keyword_init: true)
42
+
43
+ # @param provider [Symbol] :openai or :elevenlabs
44
+ # @raise [UnsupportedProviderError] if provider is not supported
45
+ def initialize(provider:)
46
+ validate_provider!(provider)
47
+ @provider = provider
48
+ end
49
+
50
+ # Synthesize speech (non-streaming)
51
+ #
52
+ # @param text [String] text to convert
53
+ # @param model [String] model identifier
54
+ # @param voice [String] voice name
55
+ # @param voice_id [String, nil] voice ID (required for ElevenLabs)
56
+ # @param speed [Float, nil] speed multiplier
57
+ # @param response_format [String] output format
58
+ # @param voice_settings [Hash, nil] ElevenLabs voice settings
59
+ # @return [Response]
60
+ def speak(text, model:, voice:, voice_id: nil, speed: nil,
61
+ response_format: "mp3", voice_settings: nil)
62
+ case @provider
63
+ when :openai
64
+ openai_speak(text, model: model, voice: voice_id || voice,
65
+ speed: speed, response_format: response_format)
66
+ when :elevenlabs
67
+ elevenlabs_speak(text, model: model, voice_id: voice_id || voice,
68
+ speed: speed, response_format: response_format,
69
+ voice_settings: voice_settings)
70
+ end
71
+ end
72
+
73
+ # Synthesize speech with streaming
74
+ #
75
+ # @param text [String] text to convert
76
+ # @param model [String] model identifier
77
+ # @param voice [String] voice name
78
+ # @param voice_id [String, nil] voice ID
79
+ # @param speed [Float, nil] speed multiplier
80
+ # @param response_format [String] output format
81
+ # @param voice_settings [Hash, nil] ElevenLabs voice settings
82
+ # @yield [StreamChunk] each audio chunk as it arrives
83
+ # @return [Response]
84
+ def speak_streaming(text, model:, voice:, voice_id: nil, speed: nil,
85
+ response_format: "mp3", voice_settings: nil, &block)
86
+ case @provider
87
+ when :openai
88
+ openai_speak_streaming(text, model: model, voice: voice_id || voice,
89
+ speed: speed, response_format: response_format,
90
+ &block)
91
+ when :elevenlabs
92
+ elevenlabs_speak_streaming(text, model: model,
93
+ voice_id: voice_id || voice,
94
+ speed: speed,
95
+ response_format: response_format,
96
+ voice_settings: voice_settings, &block)
97
+ end
98
+ end
99
+
100
+ private
101
+
102
+ # ============================================================
103
+ # Provider validation
104
+ # ============================================================
105
+
106
+ def validate_provider!(provider)
107
+ return if SUPPORTED_PROVIDERS.include?(provider)
108
+
109
+ raise UnsupportedProviderError.new(
110
+ "Provider :#{provider} is not yet supported for text-to-speech. " \
111
+ "Supported providers: #{SUPPORTED_PROVIDERS.map { |p| ":#{p}" }.join(", ")}.",
112
+ provider: provider
113
+ )
114
+ end
115
+
116
+ # ============================================================
117
+ # OpenAI implementation
118
+ # ============================================================
119
+
120
+ def openai_speak(text, model:, voice:, speed:, response_format:)
121
+ body = openai_request_body(text, model: model, voice: voice,
122
+ speed: speed, response_format: response_format)
123
+
124
+ response = openai_connection.post("/v1/audio/speech") do |req|
125
+ req.headers["Content-Type"] = "application/json"
126
+ req.body = body.to_json
127
+ end
128
+
129
+ handle_error_response!(response) unless response.success?
130
+
131
+ Response.new(
132
+ audio: response.body,
133
+ format: response_format.to_sym,
134
+ model: model,
135
+ voice: voice
136
+ )
137
+ end
138
+
139
+ def openai_speak_streaming(text, model:, voice:, speed:,
140
+ response_format:, &block)
141
+ body = openai_request_body(text, model: model, voice: voice,
142
+ speed: speed, response_format: response_format)
143
+ chunks = []
144
+
145
+ openai_connection.post("/v1/audio/speech") do |req|
146
+ req.headers["Content-Type"] = "application/json"
147
+ req.body = body.to_json
148
+ req.options.on_data = proc do |chunk, _size, env|
149
+ if env.status == 200
150
+ chunk_obj = StreamChunk.new(audio: chunk)
151
+ chunks << chunk
152
+ block&.call(chunk_obj)
153
+ end
154
+ end
155
+ end
156
+
157
+ Response.new(
158
+ audio: chunks.join,
159
+ format: response_format.to_sym,
160
+ model: model,
161
+ voice: voice
162
+ )
163
+ end
164
+
165
+ def openai_request_body(text, model:, voice:, speed:, response_format:)
166
+ body = {
167
+ model: model,
168
+ input: text,
169
+ voice: voice,
170
+ response_format: response_format.to_s
171
+ }
172
+ body[:speed] = speed if speed && (speed - 1.0).abs > Float::EPSILON
173
+ body
174
+ end
175
+
176
+ def openai_connection
177
+ @openai_connection ||= Faraday.new(url: openai_api_base) do |f|
178
+ f.headers["Authorization"] = "Bearer #{openai_api_key}"
179
+ f.adapter Faraday.default_adapter
180
+ f.options.timeout = 120
181
+ f.options.open_timeout = 30
182
+ end
183
+ end
184
+
185
+ def openai_api_key
186
+ key = RubyLLM.config.openai_api_key
187
+ unless key
188
+ raise ConfigurationError,
189
+ "OpenAI API key is required for text-to-speech. " \
190
+ "Set it via: RubyLLM.configure { |c| c.openai_api_key = 'sk-...' }"
191
+ end
192
+ key
193
+ end
194
+
195
+ def openai_api_base
196
+ base = RubyLLM.config.openai_api_base
197
+ (base && !base.empty?) ? base : "https://api.openai.com"
198
+ end
199
+
200
+ # ============================================================
201
+ # ElevenLabs implementation
202
+ # ============================================================
203
+
204
+ def elevenlabs_speak(text, model:, voice_id:, speed:,
205
+ response_format:, voice_settings:)
206
+ path = "/v1/text-to-speech/#{voice_id}"
207
+ body = elevenlabs_request_body(text, model: model, speed: speed,
208
+ voice_settings: voice_settings)
209
+ format_param = elevenlabs_output_format(response_format)
210
+
211
+ response = elevenlabs_connection.post(path) do |req|
212
+ req.headers["Content-Type"] = "application/json"
213
+ req.params["output_format"] = format_param
214
+ req.body = body.to_json
215
+ end
216
+
217
+ handle_error_response!(response) unless response.success?
218
+
219
+ Response.new(
220
+ audio: response.body,
221
+ format: response_format.to_sym,
222
+ model: model,
223
+ voice: voice_id
224
+ )
225
+ end
226
+
227
+ def elevenlabs_speak_streaming(text, model:, voice_id:, speed:,
228
+ response_format:, voice_settings:, &block)
229
+ path = "/v1/text-to-speech/#{voice_id}/stream"
230
+ body = elevenlabs_request_body(text, model: model, speed: speed,
231
+ voice_settings: voice_settings)
232
+ format_param = elevenlabs_output_format(response_format)
233
+ chunks = []
234
+
235
+ elevenlabs_connection.post(path) do |req|
236
+ req.headers["Content-Type"] = "application/json"
237
+ req.params["output_format"] = format_param
238
+ req.body = body.to_json
239
+ req.options.on_data = proc do |chunk, _size, env|
240
+ if env.status == 200
241
+ chunk_obj = StreamChunk.new(audio: chunk)
242
+ chunks << chunk
243
+ block&.call(chunk_obj)
244
+ end
245
+ end
246
+ end
247
+
248
+ Response.new(
249
+ audio: chunks.join,
250
+ format: response_format.to_sym,
251
+ model: model,
252
+ voice: voice_id
253
+ )
254
+ end
255
+
256
+ def elevenlabs_request_body(text, model:, speed:, voice_settings:)
257
+ body = {
258
+ text: text,
259
+ model_id: model
260
+ }
261
+
262
+ vs = voice_settings&.dup || {}
263
+ vs[:speed] = speed if speed && (speed - 1.0).abs > Float::EPSILON
264
+ body[:voice_settings] = vs unless vs.empty?
265
+
266
+ body
267
+ end
268
+
269
+ ELEVENLABS_FORMAT_MAP = {
270
+ "mp3" => "mp3_44100_128",
271
+ "pcm" => "pcm_44100",
272
+ "ulaw" => "ulaw_8000"
273
+ }.freeze
274
+
275
+ def elevenlabs_output_format(format)
276
+ ELEVENLABS_FORMAT_MAP[format.to_s] || "mp3_44100_128"
277
+ end
278
+
279
+ def elevenlabs_connection
280
+ @elevenlabs_connection ||= Faraday.new(url: elevenlabs_api_base) do |f|
281
+ f.headers["xi-api-key"] = elevenlabs_api_key
282
+ f.adapter Faraday.default_adapter
283
+ f.options.timeout = 120
284
+ f.options.open_timeout = 30
285
+ end
286
+ end
287
+
288
+ def elevenlabs_api_key
289
+ key = RubyLLM::Agents.configuration.elevenlabs_api_key
290
+ unless key
291
+ raise ConfigurationError,
292
+ "ElevenLabs API key is required for text-to-speech. " \
293
+ "Set it via: RubyLLM::Agents.configure { |c| c.elevenlabs_api_key = 'xi-...' }"
294
+ end
295
+ key
296
+ end
297
+
298
+ def elevenlabs_api_base
299
+ base = RubyLLM::Agents.configuration.elevenlabs_api_base
300
+ (base && !base.empty?) ? base : "https://api.elevenlabs.io"
301
+ end
302
+
303
+ # ============================================================
304
+ # Shared error handling
305
+ # ============================================================
306
+
307
+ def handle_error_response!(response)
308
+ raise SpeechApiError.new(
309
+ "TTS API request failed (HTTP #{response.status}): #{error_message_from(response)}",
310
+ status: response.status,
311
+ response_body: response.body
312
+ )
313
+ end
314
+
315
+ def error_message_from(response)
316
+ parsed = JSON.parse(response.body)
317
+ if parsed.is_a?(Hash)
318
+ parsed.dig("error", "message") || parsed["detail"] || parsed["error"] || response.body
319
+ else
320
+ response.body
321
+ end
322
+ rescue JSON::ParserError
323
+ response.body.to_s[0, 200]
324
+ end
325
+ end
326
+ end
327
+ end
328
+ end