ruby_llm-agents 0.4.0 → 1.0.0.beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +225 -34
  3. data/app/controllers/ruby_llm/agents/agents_controller.rb +136 -16
  4. data/app/controllers/ruby_llm/agents/api_configurations_controller.rb +214 -0
  5. data/app/controllers/ruby_llm/agents/dashboard_controller.rb +29 -9
  6. data/app/controllers/ruby_llm/agents/{settings_controller.rb → system_config_controller.rb} +3 -3
  7. data/app/controllers/ruby_llm/agents/tenants_controller.rb +109 -0
  8. data/app/controllers/ruby_llm/agents/workflows_controller.rb +355 -0
  9. data/app/helpers/ruby_llm/agents/application_helper.rb +25 -0
  10. data/app/models/ruby_llm/agents/api_configuration.rb +386 -0
  11. data/app/models/ruby_llm/agents/execution.rb +3 -0
  12. data/app/models/ruby_llm/agents/tenant_budget.rb +112 -14
  13. data/app/services/ruby_llm/agents/agent_registry.rb +51 -12
  14. data/app/views/layouts/ruby_llm/agents/application.html.erb +5 -30
  15. data/app/views/ruby_llm/agents/agents/_agent.html.erb +13 -1
  16. data/app/views/ruby_llm/agents/agents/_config_agent.html.erb +235 -0
  17. data/app/views/ruby_llm/agents/agents/_config_embedder.html.erb +70 -0
  18. data/app/views/ruby_llm/agents/agents/_config_image_generator.html.erb +152 -0
  19. data/app/views/ruby_llm/agents/agents/_config_moderator.html.erb +63 -0
  20. data/app/views/ruby_llm/agents/agents/_config_speaker.html.erb +108 -0
  21. data/app/views/ruby_llm/agents/agents/_config_transcriber.html.erb +91 -0
  22. data/app/views/ruby_llm/agents/agents/_workflow.html.erb +1 -1
  23. data/app/views/ruby_llm/agents/agents/index.html.erb +74 -9
  24. data/app/views/ruby_llm/agents/agents/show.html.erb +18 -378
  25. data/app/views/ruby_llm/agents/api_configurations/_api_key_field.html.erb +34 -0
  26. data/app/views/ruby_llm/agents/api_configurations/_form.html.erb +288 -0
  27. data/app/views/ruby_llm/agents/api_configurations/edit.html.erb +95 -0
  28. data/app/views/ruby_llm/agents/api_configurations/edit_tenant.html.erb +97 -0
  29. data/app/views/ruby_llm/agents/api_configurations/show.html.erb +211 -0
  30. data/app/views/ruby_llm/agents/api_configurations/tenant.html.erb +179 -0
  31. data/app/views/ruby_llm/agents/dashboard/_action_center.html.erb +1 -1
  32. data/app/views/ruby_llm/agents/dashboard/_agent_comparison.html.erb +269 -15
  33. data/app/views/ruby_llm/agents/executions/show.html.erb +98 -0
  34. data/app/views/ruby_llm/agents/shared/_agent_type_badge.html.erb +93 -0
  35. data/app/views/ruby_llm/agents/{settings → system_config}/show.html.erb +1 -1
  36. data/app/views/ruby_llm/agents/tenants/_form.html.erb +150 -0
  37. data/app/views/ruby_llm/agents/tenants/edit.html.erb +13 -0
  38. data/app/views/ruby_llm/agents/tenants/index.html.erb +129 -0
  39. data/app/views/ruby_llm/agents/tenants/show.html.erb +374 -0
  40. data/app/views/ruby_llm/agents/workflows/_step_performance.html.erb +236 -0
  41. data/app/views/ruby_llm/agents/workflows/_structure_parallel.html.erb +76 -0
  42. data/app/views/ruby_llm/agents/workflows/_structure_pipeline.html.erb +74 -0
  43. data/app/views/ruby_llm/agents/workflows/_structure_router.html.erb +108 -0
  44. data/app/views/ruby_llm/agents/workflows/show.html.erb +442 -0
  45. data/config/routes.rb +13 -1
  46. data/lib/generators/ruby_llm_agents/agent_generator.rb +56 -7
  47. data/lib/generators/ruby_llm_agents/api_configuration_generator.rb +100 -0
  48. data/lib/generators/ruby_llm_agents/background_remover_generator.rb +110 -0
  49. data/lib/generators/ruby_llm_agents/embedder_generator.rb +107 -0
  50. data/lib/generators/ruby_llm_agents/image_analyzer_generator.rb +115 -0
  51. data/lib/generators/ruby_llm_agents/image_editor_generator.rb +108 -0
  52. data/lib/generators/ruby_llm_agents/image_generator_generator.rb +116 -0
  53. data/lib/generators/ruby_llm_agents/image_pipeline_generator.rb +178 -0
  54. data/lib/generators/ruby_llm_agents/image_transformer_generator.rb +109 -0
  55. data/lib/generators/ruby_llm_agents/image_upscaler_generator.rb +103 -0
  56. data/lib/generators/ruby_llm_agents/image_variator_generator.rb +102 -0
  57. data/lib/generators/ruby_llm_agents/install_generator.rb +76 -4
  58. data/lib/generators/ruby_llm_agents/restructure_generator.rb +292 -0
  59. data/lib/generators/ruby_llm_agents/speaker_generator.rb +121 -0
  60. data/lib/generators/ruby_llm_agents/templates/add_execution_type_migration.rb.tt +8 -0
  61. data/lib/generators/ruby_llm_agents/templates/agent.rb.tt +99 -84
  62. data/lib/generators/ruby_llm_agents/templates/application_agent.rb.tt +42 -40
  63. data/lib/generators/ruby_llm_agents/templates/application_background_remover.rb.tt +26 -0
  64. data/lib/generators/ruby_llm_agents/templates/application_embedder.rb.tt +50 -0
  65. data/lib/generators/ruby_llm_agents/templates/application_image_analyzer.rb.tt +26 -0
  66. data/lib/generators/ruby_llm_agents/templates/application_image_editor.rb.tt +20 -0
  67. data/lib/generators/ruby_llm_agents/templates/application_image_generator.rb.tt +38 -0
  68. data/lib/generators/ruby_llm_agents/templates/application_image_pipeline.rb.tt +139 -0
  69. data/lib/generators/ruby_llm_agents/templates/application_image_transformer.rb.tt +21 -0
  70. data/lib/generators/ruby_llm_agents/templates/application_image_upscaler.rb.tt +20 -0
  71. data/lib/generators/ruby_llm_agents/templates/application_image_variator.rb.tt +20 -0
  72. data/lib/generators/ruby_llm_agents/templates/application_speaker.rb.tt +49 -0
  73. data/lib/generators/ruby_llm_agents/templates/application_transcriber.rb.tt +53 -0
  74. data/lib/generators/ruby_llm_agents/templates/background_remover.rb.tt +44 -0
  75. data/lib/generators/ruby_llm_agents/templates/create_api_configurations_migration.rb.tt +90 -0
  76. data/lib/generators/ruby_llm_agents/templates/embedder.rb.tt +41 -0
  77. data/lib/generators/ruby_llm_agents/templates/image_analyzer.rb.tt +45 -0
  78. data/lib/generators/ruby_llm_agents/templates/image_editor.rb.tt +35 -0
  79. data/lib/generators/ruby_llm_agents/templates/image_generator.rb.tt +47 -0
  80. data/lib/generators/ruby_llm_agents/templates/image_pipeline.rb.tt +50 -0
  81. data/lib/generators/ruby_llm_agents/templates/image_transformer.rb.tt +44 -0
  82. data/lib/generators/ruby_llm_agents/templates/image_upscaler.rb.tt +38 -0
  83. data/lib/generators/ruby_llm_agents/templates/image_variator.rb.tt +33 -0
  84. data/lib/generators/ruby_llm_agents/templates/skills/AGENTS.md.tt +228 -0
  85. data/lib/generators/ruby_llm_agents/templates/skills/BACKGROUND_REMOVERS.md.tt +131 -0
  86. data/lib/generators/ruby_llm_agents/templates/skills/EMBEDDERS.md.tt +255 -0
  87. data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_ANALYZERS.md.tt +120 -0
  88. data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_EDITORS.md.tt +102 -0
  89. data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_GENERATORS.md.tt +282 -0
  90. data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_PIPELINES.md.tt +228 -0
  91. data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_TRANSFORMERS.md.tt +120 -0
  92. data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_UPSCALERS.md.tt +110 -0
  93. data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_VARIATORS.md.tt +120 -0
  94. data/lib/generators/ruby_llm_agents/templates/skills/SPEAKERS.md.tt +212 -0
  95. data/lib/generators/ruby_llm_agents/templates/skills/TOOLS.md.tt +227 -0
  96. data/lib/generators/ruby_llm_agents/templates/skills/TRANSCRIBERS.md.tt +251 -0
  97. data/lib/generators/ruby_llm_agents/templates/skills/WORKFLOWS.md.tt +300 -0
  98. data/lib/generators/ruby_llm_agents/templates/speaker.rb.tt +56 -0
  99. data/lib/generators/ruby_llm_agents/templates/transcriber.rb.tt +51 -0
  100. data/lib/generators/ruby_llm_agents/transcriber_generator.rb +107 -0
  101. data/lib/generators/ruby_llm_agents/upgrade_generator.rb +152 -1
  102. data/lib/ruby_llm/agents/audio/speaker.rb +553 -0
  103. data/lib/ruby_llm/agents/audio/transcriber.rb +669 -0
  104. data/lib/ruby_llm/agents/base_agent.rb +675 -0
  105. data/lib/ruby_llm/agents/core/base/moderation_dsl.rb +181 -0
  106. data/lib/ruby_llm/agents/core/base/moderation_execution.rb +274 -0
  107. data/lib/ruby_llm/agents/core/base.rb +135 -0
  108. data/lib/ruby_llm/agents/core/configuration.rb +981 -0
  109. data/lib/ruby_llm/agents/core/errors.rb +150 -0
  110. data/lib/ruby_llm/agents/{instrumentation.rb → core/instrumentation.rb} +93 -4
  111. data/lib/ruby_llm/agents/core/llm_tenant.rb +358 -0
  112. data/lib/ruby_llm/agents/core/resolved_config.rb +348 -0
  113. data/lib/ruby_llm/agents/{version.rb → core/version.rb} +1 -1
  114. data/lib/ruby_llm/agents/dsl/base.rb +110 -0
  115. data/lib/ruby_llm/agents/dsl/caching.rb +142 -0
  116. data/lib/ruby_llm/agents/dsl/reliability.rb +307 -0
  117. data/lib/ruby_llm/agents/dsl.rb +41 -0
  118. data/lib/ruby_llm/agents/image/analyzer/dsl.rb +130 -0
  119. data/lib/ruby_llm/agents/image/analyzer/execution.rb +402 -0
  120. data/lib/ruby_llm/agents/image/analyzer.rb +90 -0
  121. data/lib/ruby_llm/agents/image/background_remover/dsl.rb +154 -0
  122. data/lib/ruby_llm/agents/image/background_remover/execution.rb +240 -0
  123. data/lib/ruby_llm/agents/image/background_remover.rb +89 -0
  124. data/lib/ruby_llm/agents/image/concerns/image_operation_dsl.rb +91 -0
  125. data/lib/ruby_llm/agents/image/concerns/image_operation_execution.rb +165 -0
  126. data/lib/ruby_llm/agents/image/editor/dsl.rb +56 -0
  127. data/lib/ruby_llm/agents/image/editor/execution.rb +207 -0
  128. data/lib/ruby_llm/agents/image/editor.rb +92 -0
  129. data/lib/ruby_llm/agents/image/generator/active_storage_support.rb +127 -0
  130. data/lib/ruby_llm/agents/image/generator/content_policy.rb +95 -0
  131. data/lib/ruby_llm/agents/image/generator/pricing.rb +353 -0
  132. data/lib/ruby_llm/agents/image/generator/templates.rb +124 -0
  133. data/lib/ruby_llm/agents/image/generator.rb +455 -0
  134. data/lib/ruby_llm/agents/image/pipeline/dsl.rb +213 -0
  135. data/lib/ruby_llm/agents/image/pipeline/execution.rb +382 -0
  136. data/lib/ruby_llm/agents/image/pipeline.rb +97 -0
  137. data/lib/ruby_llm/agents/image/transformer/dsl.rb +148 -0
  138. data/lib/ruby_llm/agents/image/transformer/execution.rb +223 -0
  139. data/lib/ruby_llm/agents/image/transformer.rb +95 -0
  140. data/lib/ruby_llm/agents/image/upscaler/dsl.rb +83 -0
  141. data/lib/ruby_llm/agents/image/upscaler/execution.rb +219 -0
  142. data/lib/ruby_llm/agents/image/upscaler.rb +81 -0
  143. data/lib/ruby_llm/agents/image/variator/dsl.rb +62 -0
  144. data/lib/ruby_llm/agents/image/variator/execution.rb +189 -0
  145. data/lib/ruby_llm/agents/image/variator.rb +80 -0
  146. data/lib/ruby_llm/agents/{alert_manager.rb → infrastructure/alert_manager.rb} +17 -22
  147. data/lib/ruby_llm/agents/infrastructure/budget/budget_query.rb +145 -0
  148. data/lib/ruby_llm/agents/infrastructure/budget/config_resolver.rb +149 -0
  149. data/lib/ruby_llm/agents/infrastructure/budget/forecaster.rb +68 -0
  150. data/lib/ruby_llm/agents/infrastructure/budget/spend_recorder.rb +279 -0
  151. data/lib/ruby_llm/agents/infrastructure/budget_tracker.rb +275 -0
  152. data/lib/ruby_llm/agents/{execution_logger_job.rb → infrastructure/execution_logger_job.rb} +17 -1
  153. data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/executor.rb +2 -1
  154. data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/retry_strategy.rb +9 -3
  155. data/lib/ruby_llm/agents/{reliability.rb → infrastructure/reliability.rb} +11 -21
  156. data/lib/ruby_llm/agents/pipeline/builder.rb +215 -0
  157. data/lib/ruby_llm/agents/pipeline/context.rb +255 -0
  158. data/lib/ruby_llm/agents/pipeline/executor.rb +86 -0
  159. data/lib/ruby_llm/agents/pipeline/middleware/base.rb +124 -0
  160. data/lib/ruby_llm/agents/pipeline/middleware/budget.rb +95 -0
  161. data/lib/ruby_llm/agents/pipeline/middleware/cache.rb +171 -0
  162. data/lib/ruby_llm/agents/pipeline/middleware/instrumentation.rb +415 -0
  163. data/lib/ruby_llm/agents/pipeline/middleware/reliability.rb +276 -0
  164. data/lib/ruby_llm/agents/pipeline/middleware/tenant.rb +196 -0
  165. data/lib/ruby_llm/agents/pipeline.rb +68 -0
  166. data/lib/ruby_llm/agents/{engine.rb → rails/engine.rb} +79 -10
  167. data/lib/ruby_llm/agents/results/background_removal_result.rb +286 -0
  168. data/lib/ruby_llm/agents/{result.rb → results/base.rb} +73 -1
  169. data/lib/ruby_llm/agents/results/embedding_result.rb +243 -0
  170. data/lib/ruby_llm/agents/results/image_analysis_result.rb +314 -0
  171. data/lib/ruby_llm/agents/results/image_edit_result.rb +250 -0
  172. data/lib/ruby_llm/agents/results/image_generation_result.rb +346 -0
  173. data/lib/ruby_llm/agents/results/image_pipeline_result.rb +399 -0
  174. data/lib/ruby_llm/agents/results/image_transform_result.rb +251 -0
  175. data/lib/ruby_llm/agents/results/image_upscale_result.rb +255 -0
  176. data/lib/ruby_llm/agents/results/image_variation_result.rb +237 -0
  177. data/lib/ruby_llm/agents/results/moderation_result.rb +158 -0
  178. data/lib/ruby_llm/agents/results/speech_result.rb +338 -0
  179. data/lib/ruby_llm/agents/results/transcription_result.rb +408 -0
  180. data/lib/ruby_llm/agents/text/embedder.rb +444 -0
  181. data/lib/ruby_llm/agents/text/moderator.rb +237 -0
  182. data/lib/ruby_llm/agents/workflow/async.rb +220 -0
  183. data/lib/ruby_llm/agents/workflow/async_executor.rb +156 -0
  184. data/lib/ruby_llm/agents/{workflow.rb → workflow/orchestrator.rb} +6 -5
  185. data/lib/ruby_llm/agents/workflow/parallel.rb +34 -17
  186. data/lib/ruby_llm/agents/workflow/thread_pool.rb +185 -0
  187. data/lib/ruby_llm/agents.rb +86 -20
  188. metadata +189 -35
  189. data/lib/ruby_llm/agents/base/caching.rb +0 -40
  190. data/lib/ruby_llm/agents/base/cost_calculation.rb +0 -105
  191. data/lib/ruby_llm/agents/base/dsl.rb +0 -324
  192. data/lib/ruby_llm/agents/base/execution.rb +0 -283
  193. data/lib/ruby_llm/agents/base/reliability_dsl.rb +0 -82
  194. data/lib/ruby_llm/agents/base/reliability_execution.rb +0 -136
  195. data/lib/ruby_llm/agents/base/response_building.rb +0 -86
  196. data/lib/ruby_llm/agents/base/tool_tracking.rb +0 -57
  197. data/lib/ruby_llm/agents/base.rb +0 -209
  198. data/lib/ruby_llm/agents/budget_tracker.rb +0 -471
  199. data/lib/ruby_llm/agents/configuration.rb +0 -357
  200. /data/lib/ruby_llm/agents/{deprecations.rb → core/deprecations.rb} +0 -0
  201. /data/lib/ruby_llm/agents/{inflections.rb → core/inflections.rb} +0 -0
  202. /data/lib/ruby_llm/agents/{attempt_tracker.rb → infrastructure/attempt_tracker.rb} +0 -0
  203. /data/lib/ruby_llm/agents/{cache_helper.rb → infrastructure/cache_helper.rb} +0 -0
  204. /data/lib/ruby_llm/agents/{circuit_breaker.rb → infrastructure/circuit_breaker.rb} +0 -0
  205. /data/lib/ruby_llm/agents/{redactor.rb → infrastructure/redactor.rb} +0 -0
  206. /data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/breaker_manager.rb +0 -0
  207. /data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/execution_constraints.rb +0 -0
  208. /data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/fallback_routing.rb +0 -0
@@ -0,0 +1,553 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+ require_relative "../results/speech_result"
5
+
6
+ module RubyLLM
7
+ module Agents
8
+ # Base class for creating text-to-speech speakers using the middleware pipeline
9
+ #
10
+ # Speaker provides a DSL for configuring text-to-audio operations with
11
+ # built-in execution tracking, budget controls, and multi-tenancy support
12
+ # through the middleware pipeline.
13
+ #
14
+ # @example Basic usage
15
+ # class ArticleNarrator < RubyLLM::Agents::Speaker
16
+ # provider :openai
17
+ # model 'tts-1-hd'
18
+ # voice 'nova'
19
+ # end
20
+ #
21
+ # result = ArticleNarrator.call(text: "Hello world")
22
+ # result.audio # => Binary audio data
23
+ # result.save_to("output.mp3")
24
+ #
25
+ # @example With voice settings
26
+ # class PremiumNarrator < RubyLLM::Agents::Speaker
27
+ # provider :elevenlabs
28
+ # model 'eleven_multilingual_v2'
29
+ # voice 'Rachel'
30
+ #
31
+ # voice_settings do
32
+ # stability 0.5
33
+ # similarity_boost 0.75
34
+ # end
35
+ # end
36
+ #
37
+ # @api public
38
+ class Speaker < BaseAgent
39
+ class << self
40
+ # Returns the agent type for speakers
41
+ #
42
+ # @return [Symbol] :audio
43
+ def agent_type
44
+ :audio
45
+ end
46
+
47
+ # @!group Speaker-specific DSL
48
+
49
+ # Sets or returns the TTS provider
50
+ #
51
+ # @param value [Symbol, nil] The provider (:openai, :elevenlabs, :google, :polly)
52
+ # @return [Symbol] The current provider setting
53
+ def provider(value = nil)
54
+ @provider = value if value
55
+ return @provider if defined?(@provider) && @provider
56
+
57
+ if superclass.respond_to?(:agent_type) && superclass.agent_type == :audio
58
+ superclass.provider
59
+ else
60
+ default_tts_provider
61
+ end
62
+ end
63
+
64
+ # Sets or returns the TTS model
65
+ #
66
+ # @param value [String, nil] The model identifier
67
+ # @return [String] The current model setting
68
+ def model(value = nil)
69
+ @model = value if value
70
+ return @model if defined?(@model) && @model
71
+
72
+ if superclass.respond_to?(:agent_type) && superclass.agent_type == :audio
73
+ superclass.model
74
+ else
75
+ default_tts_model
76
+ end
77
+ end
78
+
79
+ # Sets or returns the voice name
80
+ #
81
+ # @param value [String, nil] The voice name
82
+ # @return [String] The current voice setting
83
+ def voice(value = nil)
84
+ @voice = value if value
85
+ @voice || inherited_or_default(:voice, default_tts_voice)
86
+ end
87
+
88
+ # Sets or returns the voice ID (for custom/cloned voices)
89
+ #
90
+ # @param value [String, nil] The voice ID
91
+ # @return [String, nil] The current voice ID
92
+ def voice_id(value = nil)
93
+ @voice_id = value if value
94
+ @voice_id || inherited_or_default(:voice_id, nil)
95
+ end
96
+
97
+ # Sets or returns the speech speed
98
+ #
99
+ # @param value [Float, nil] Speed multiplier
100
+ # @return [Float] The current speed
101
+ def speed(value = nil)
102
+ @speed = value if value
103
+ @speed || inherited_or_default(:speed, 1.0)
104
+ end
105
+
106
+ # Sets or returns the output format
107
+ #
108
+ # @param value [Symbol, nil] Format (:mp3, :wav, :ogg, etc.)
109
+ # @return [Symbol] The current format
110
+ def output_format(value = nil)
111
+ @output_format = value if value
112
+ @output_format || inherited_or_default(:output_format, :mp3)
113
+ end
114
+
115
+ # Sets or returns streaming mode
116
+ #
117
+ # @param value [Boolean, nil] Enable streaming
118
+ # @return [Boolean] The current streaming setting
119
+ def streaming(value = nil)
120
+ @streaming = value unless value.nil?
121
+ instance_variable_defined?(:@streaming) ? @streaming : inherited_or_default(:streaming, false)
122
+ end
123
+
124
+ def streaming?
125
+ streaming
126
+ end
127
+
128
+ # @!endgroup
129
+
130
+ # @!group Voice Settings DSL
131
+
132
+ # Configures voice settings (ElevenLabs specific)
133
+ #
134
+ # @yield Block for configuring voice settings
135
+ # @return [VoiceSettings] The voice settings configuration
136
+ def voice_settings(&block)
137
+ @voice_settings ||= VoiceSettings.new
138
+ @voice_settings.instance_eval(&block) if block_given?
139
+ @voice_settings
140
+ end
141
+
142
+ def voice_settings_config
143
+ @voice_settings || inherited_or_default(:voice_settings_config, nil)
144
+ end
145
+
146
+ # @!endgroup
147
+
148
+ # @!group Lexicon DSL
149
+
150
+ # Configures pronunciation lexicon
151
+ #
152
+ # @yield Block for configuring pronunciations
153
+ # @return [Lexicon] The lexicon configuration
154
+ def lexicon(&block)
155
+ @lexicon ||= Lexicon.new
156
+ @lexicon.instance_eval(&block) if block_given?
157
+ @lexicon
158
+ end
159
+
160
+ def lexicon_config
161
+ @lexicon || inherited_or_default(:lexicon_config, nil)
162
+ end
163
+
164
+ # @!endgroup
165
+
166
+ # Factory method to instantiate and execute speaker
167
+ #
168
+ # @param text [String] Text to convert to speech
169
+ # @param options [Hash] Additional options
170
+ # @yield [audio_chunk] Called for each audio chunk when streaming
171
+ # @return [SpeechResult] The speech result
172
+ def call(text:, **options, &block)
173
+ new(text: text, **options).call(&block)
174
+ end
175
+
176
+ # Streams the speaker output
177
+ #
178
+ # @param text [String] Text to convert to speech
179
+ # @param options [Hash] Additional options
180
+ # @yield [audio_chunk] Called for each audio chunk
181
+ # @return [SpeechResult] The speech result
182
+ def stream(text:, **options, &block)
183
+ raise ArgumentError, "A block is required for streaming" unless block_given?
184
+
185
+ instance = new(text: text, **options.merge(streaming: true))
186
+ instance.call(&block)
187
+ end
188
+
189
+ private
190
+
191
+ def inherited_or_default(method, default)
192
+ superclass.respond_to?(method) ? superclass.send(method) : default
193
+ end
194
+
195
+ def default_tts_provider
196
+ RubyLLM::Agents.configuration.default_tts_provider
197
+ rescue StandardError
198
+ :openai
199
+ end
200
+
201
+ def default_tts_model
202
+ RubyLLM::Agents.configuration.default_tts_model
203
+ rescue StandardError
204
+ "tts-1"
205
+ end
206
+
207
+ def default_tts_voice
208
+ RubyLLM::Agents.configuration.default_tts_voice
209
+ rescue StandardError
210
+ "nova"
211
+ end
212
+ end
213
+
214
+ # Voice settings configuration class
215
+ class VoiceSettings
216
+ attr_accessor :stability_value, :similarity_boost_value, :style_value, :speaker_boost_value
217
+
218
+ def initialize
219
+ @stability_value = 0.5
220
+ @similarity_boost_value = 0.75
221
+ @style_value = 0.0
222
+ @speaker_boost_value = true
223
+ end
224
+
225
+ def stability(value)
226
+ @stability_value = value
227
+ end
228
+
229
+ def similarity_boost(value)
230
+ @similarity_boost_value = value
231
+ end
232
+
233
+ def style(value)
234
+ @style_value = value
235
+ end
236
+
237
+ def speaker_boost(value)
238
+ @speaker_boost_value = value
239
+ end
240
+
241
+ def to_h
242
+ {
243
+ stability: stability_value,
244
+ similarity_boost: similarity_boost_value,
245
+ style: style_value,
246
+ use_speaker_boost: speaker_boost_value
247
+ }
248
+ end
249
+ end
250
+
251
+ # Pronunciation lexicon class
252
+ class Lexicon
253
+ attr_reader :pronunciations
254
+
255
+ def initialize
256
+ @pronunciations = {}
257
+ end
258
+
259
+ def pronounce(word, pronunciation)
260
+ @pronunciations[word] = pronunciation
261
+ end
262
+
263
+ def apply(text)
264
+ result = text.dup
265
+ pronunciations.each do |word, pronunciation|
266
+ result.gsub!(/\b#{Regexp.escape(word)}\b/i, pronunciation)
267
+ end
268
+ result
269
+ end
270
+
271
+ def to_h
272
+ pronunciations.dup
273
+ end
274
+ end
275
+
276
+ # @!attribute [r] text
277
+ # @return [String] Text to convert to speech
278
+ attr_reader :text
279
+
280
+ # Creates a new Speaker instance
281
+ #
282
+ # @param text [String] Text to convert to speech
283
+ # @param options [Hash] Configuration options
284
+ def initialize(text:, **options)
285
+ @text = text
286
+ @streaming_block = nil
287
+ @runtime_streaming = options.delete(:streaming)
288
+
289
+ # Set model to TTS model if not specified
290
+ options[:model] ||= self.class.model
291
+
292
+ super(**options)
293
+ end
294
+
295
+ # Executes the speech through the middleware pipeline
296
+ #
297
+ # @yield [audio_chunk] Called for each audio chunk when streaming
298
+ # @return [SpeechResult] The speech result
299
+ def call(&block)
300
+ @streaming_block = block
301
+ context = build_context
302
+ result_context = Pipeline::Executor.execute(context)
303
+ result_context.output
304
+ end
305
+
306
+ # The input for this speech operation
307
+ #
308
+ # @return [String] The text being converted
309
+ def user_prompt
310
+ text
311
+ end
312
+
313
+ # Core speech execution
314
+ #
315
+ # This is called by the Pipeline::Executor after middleware
316
+ # has been applied. Only contains the speech API logic.
317
+ #
318
+ # @param context [Pipeline::Context] The execution context
319
+ # @return [void] Sets context.output with the SpeechResult
320
+ def execute(context)
321
+ execution_started_at = Time.current
322
+
323
+ validate_text_input!
324
+ processed_text = apply_lexicon(text)
325
+
326
+ # Execute speech synthesis
327
+ result = execute_speech(processed_text)
328
+
329
+ execution_completed_at = Time.current
330
+ duration_ms = ((execution_completed_at - execution_started_at) * 1000).to_i
331
+
332
+ # Update context
333
+ context.input_tokens = 0
334
+ context.output_tokens = 0
335
+ context.total_cost = calculate_cost(result)
336
+
337
+ # Build final result
338
+ context.output = build_result(
339
+ result,
340
+ text,
341
+ started_at: context.started_at || execution_started_at,
342
+ completed_at: execution_completed_at,
343
+ duration_ms: duration_ms,
344
+ tenant_id: context.tenant_id
345
+ )
346
+ end
347
+
348
+ # Generates the cache key for this speech
349
+ #
350
+ # @return [String] Cache key
351
+ def agent_cache_key
352
+ components = [
353
+ "ruby_llm_agents",
354
+ "speech",
355
+ self.class.name,
356
+ self.class.version,
357
+ resolved_provider,
358
+ resolved_model,
359
+ resolved_voice,
360
+ resolved_voice_id,
361
+ resolved_speed,
362
+ resolved_output_format,
363
+ Digest::SHA256.hexdigest(text)
364
+ ].compact
365
+
366
+ components.join("/")
367
+ end
368
+
369
+ private
370
+
371
+ # Builds context for pipeline execution
372
+ #
373
+ # @return [Pipeline::Context] The context object
374
+ def build_context
375
+ Pipeline::Context.new(
376
+ input: user_prompt,
377
+ agent_class: self.class,
378
+ agent_instance: self,
379
+ model: resolved_model,
380
+ tenant: @options[:tenant],
381
+ skip_cache: @options[:skip_cache] || streaming_enabled?,
382
+ stream_block: (@streaming_block if streaming_enabled?)
383
+ )
384
+ end
385
+
386
+ # Validates text input
387
+ def validate_text_input!
388
+ raise ArgumentError, "text is required" if text.nil?
389
+ raise ArgumentError, "text must be a String, got #{text.class}" unless text.is_a?(String)
390
+ raise ArgumentError, "text cannot be empty" if text.empty?
391
+ end
392
+
393
+ # Applies lexicon pronunciations
394
+ def apply_lexicon(text)
395
+ lexicon = self.class.lexicon_config
396
+ return text unless lexicon
397
+
398
+ lexicon.apply(text)
399
+ end
400
+
401
+ # Executes speech synthesis
402
+ def execute_speech(processed_text)
403
+ speak_options = build_speak_options
404
+
405
+ if streaming_enabled? && @streaming_block
406
+ execute_streaming_speech(processed_text, speak_options)
407
+ else
408
+ execute_standard_speech(processed_text, speak_options)
409
+ end
410
+ end
411
+
412
+ # Executes standard (non-streaming) speech synthesis
413
+ def execute_standard_speech(text, options)
414
+ response = RubyLLM.speak(text, **options)
415
+
416
+ {
417
+ audio: response.audio,
418
+ duration: response.respond_to?(:duration) ? response.duration : nil,
419
+ format: resolved_output_format,
420
+ provider: resolved_provider,
421
+ model: resolved_model,
422
+ voice: resolved_voice,
423
+ characters: text.length,
424
+ raw_response: response
425
+ }
426
+ end
427
+
428
+ # Executes streaming speech synthesis
429
+ def execute_streaming_speech(text, options)
430
+ audio_chunks = []
431
+
432
+ RubyLLM.speak(text, **options.merge(stream: true)) do |chunk|
433
+ audio_chunks << chunk.audio if chunk.respond_to?(:audio)
434
+ @streaming_block.call(chunk) if @streaming_block
435
+ end
436
+
437
+ {
438
+ audio: audio_chunks.join,
439
+ duration: nil,
440
+ format: resolved_output_format,
441
+ provider: resolved_provider,
442
+ model: resolved_model,
443
+ voice: resolved_voice,
444
+ characters: text.length,
445
+ streamed: true
446
+ }
447
+ end
448
+
449
+ # Builds options for RubyLLM.speak
450
+ def build_speak_options
451
+ options = {
452
+ model: resolved_model,
453
+ voice: resolved_voice_id || resolved_voice
454
+ }
455
+
456
+ speed = resolved_speed
457
+ options[:speed] = speed if speed && speed != 1.0
458
+ options[:response_format] = resolved_output_format.to_s
459
+
460
+ if resolved_provider == :elevenlabs
461
+ voice_settings = self.class.voice_settings_config
462
+ options[:voice_settings] = voice_settings.to_h if voice_settings
463
+ end
464
+
465
+ options
466
+ end
467
+
468
+ # Builds the final result object
469
+ def build_result(raw_result, original_text, started_at:, completed_at:, duration_ms:, tenant_id:)
470
+ SpeechResult.new(
471
+ audio: raw_result[:audio],
472
+ duration: raw_result[:duration],
473
+ format: raw_result[:format],
474
+ file_size: raw_result[:audio]&.bytesize,
475
+ characters: raw_result[:characters],
476
+ text_length: original_text.length,
477
+ provider: raw_result[:provider],
478
+ model_id: raw_result[:model],
479
+ voice_id: resolved_voice_id,
480
+ voice_name: raw_result[:voice],
481
+ duration_ms: duration_ms,
482
+ started_at: started_at,
483
+ completed_at: completed_at,
484
+ total_cost: calculate_cost(raw_result),
485
+ status: :success,
486
+ tenant_id: tenant_id
487
+ )
488
+ end
489
+
490
+ # Calculates cost for speech synthesis
491
+ def calculate_cost(raw_result)
492
+ characters = raw_result[:characters] || 0
493
+
494
+ if raw_result[:raw_response].respond_to?(:cost) && raw_result[:raw_response].cost
495
+ return raw_result[:raw_response].cost
496
+ end
497
+
498
+ provider = raw_result[:provider]
499
+ model_name = raw_result[:model].to_s
500
+
501
+ price_per_1k_chars = case provider
502
+ when :openai
503
+ model_name.include?("hd") ? 0.030 : 0.015
504
+ when :elevenlabs
505
+ 0.30
506
+ when :google
507
+ 0.016
508
+ when :polly
509
+ 0.016
510
+ else
511
+ 0.015
512
+ end
513
+
514
+ (characters / 1000.0) * price_per_1k_chars
515
+ end
516
+
517
+ # Resolves the provider to use
518
+ def resolved_provider
519
+ @options[:provider] || self.class.provider
520
+ end
521
+
522
+ # Resolves the model to use
523
+ def resolved_model
524
+ @model || self.class.model
525
+ end
526
+
527
+ # Resolves the voice to use
528
+ def resolved_voice
529
+ @options[:voice] || self.class.voice
530
+ end
531
+
532
+ # Resolves the voice ID to use
533
+ def resolved_voice_id
534
+ @options[:voice_id] || self.class.voice_id
535
+ end
536
+
537
+ # Resolves the speed to use
538
+ def resolved_speed
539
+ @options[:speed] || self.class.speed
540
+ end
541
+
542
+ # Resolves the output format to use
543
+ def resolved_output_format
544
+ @options[:format] || self.class.output_format
545
+ end
546
+
547
+ # Returns whether streaming is enabled
548
+ def streaming_enabled?
549
+ @runtime_streaming || self.class.streaming?
550
+ end
551
+ end
552
+ end
553
+ end