ruby_llm-agents 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +189 -31
  3. data/app/controllers/ruby_llm/agents/agents_controller.rb +136 -16
  4. data/app/controllers/ruby_llm/agents/dashboard_controller.rb +29 -9
  5. data/app/controllers/ruby_llm/agents/workflows_controller.rb +355 -0
  6. data/app/helpers/ruby_llm/agents/application_helper.rb +25 -0
  7. data/app/models/ruby_llm/agents/execution.rb +3 -0
  8. data/app/models/ruby_llm/agents/tenant_budget.rb +58 -15
  9. data/app/services/ruby_llm/agents/agent_registry.rb +51 -12
  10. data/app/views/layouts/ruby_llm/agents/application.html.erb +2 -29
  11. data/app/views/ruby_llm/agents/agents/_agent.html.erb +13 -1
  12. data/app/views/ruby_llm/agents/agents/_config_agent.html.erb +235 -0
  13. data/app/views/ruby_llm/agents/agents/_config_embedder.html.erb +70 -0
  14. data/app/views/ruby_llm/agents/agents/_config_image_generator.html.erb +152 -0
  15. data/app/views/ruby_llm/agents/agents/_config_moderator.html.erb +63 -0
  16. data/app/views/ruby_llm/agents/agents/_config_speaker.html.erb +108 -0
  17. data/app/views/ruby_llm/agents/agents/_config_transcriber.html.erb +91 -0
  18. data/app/views/ruby_llm/agents/agents/_workflow.html.erb +1 -1
  19. data/app/views/ruby_llm/agents/agents/index.html.erb +74 -9
  20. data/app/views/ruby_llm/agents/agents/show.html.erb +18 -378
  21. data/app/views/ruby_llm/agents/dashboard/_agent_comparison.html.erb +269 -15
  22. data/app/views/ruby_llm/agents/executions/show.html.erb +16 -0
  23. data/app/views/ruby_llm/agents/shared/_agent_type_badge.html.erb +93 -0
  24. data/app/views/ruby_llm/agents/workflows/_step_performance.html.erb +236 -0
  25. data/app/views/ruby_llm/agents/workflows/_structure_parallel.html.erb +76 -0
  26. data/app/views/ruby_llm/agents/workflows/_structure_pipeline.html.erb +74 -0
  27. data/app/views/ruby_llm/agents/workflows/_structure_router.html.erb +108 -0
  28. data/app/views/ruby_llm/agents/workflows/show.html.erb +442 -0
  29. data/config/routes.rb +1 -0
  30. data/lib/generators/ruby_llm_agents/agent_generator.rb +56 -7
  31. data/lib/generators/ruby_llm_agents/background_remover_generator.rb +110 -0
  32. data/lib/generators/ruby_llm_agents/embedder_generator.rb +107 -0
  33. data/lib/generators/ruby_llm_agents/image_analyzer_generator.rb +115 -0
  34. data/lib/generators/ruby_llm_agents/image_editor_generator.rb +108 -0
  35. data/lib/generators/ruby_llm_agents/image_generator_generator.rb +116 -0
  36. data/lib/generators/ruby_llm_agents/image_pipeline_generator.rb +178 -0
  37. data/lib/generators/ruby_llm_agents/image_transformer_generator.rb +109 -0
  38. data/lib/generators/ruby_llm_agents/image_upscaler_generator.rb +103 -0
  39. data/lib/generators/ruby_llm_agents/image_variator_generator.rb +102 -0
  40. data/lib/generators/ruby_llm_agents/install_generator.rb +76 -4
  41. data/lib/generators/ruby_llm_agents/restructure_generator.rb +292 -0
  42. data/lib/generators/ruby_llm_agents/speaker_generator.rb +121 -0
  43. data/lib/generators/ruby_llm_agents/templates/add_execution_type_migration.rb.tt +8 -0
  44. data/lib/generators/ruby_llm_agents/templates/agent.rb.tt +99 -84
  45. data/lib/generators/ruby_llm_agents/templates/application_agent.rb.tt +42 -40
  46. data/lib/generators/ruby_llm_agents/templates/application_background_remover.rb.tt +26 -0
  47. data/lib/generators/ruby_llm_agents/templates/application_embedder.rb.tt +50 -0
  48. data/lib/generators/ruby_llm_agents/templates/application_image_analyzer.rb.tt +26 -0
  49. data/lib/generators/ruby_llm_agents/templates/application_image_editor.rb.tt +20 -0
  50. data/lib/generators/ruby_llm_agents/templates/application_image_generator.rb.tt +38 -0
  51. data/lib/generators/ruby_llm_agents/templates/application_image_pipeline.rb.tt +139 -0
  52. data/lib/generators/ruby_llm_agents/templates/application_image_transformer.rb.tt +21 -0
  53. data/lib/generators/ruby_llm_agents/templates/application_image_upscaler.rb.tt +20 -0
  54. data/lib/generators/ruby_llm_agents/templates/application_image_variator.rb.tt +20 -0
  55. data/lib/generators/ruby_llm_agents/templates/application_speaker.rb.tt +49 -0
  56. data/lib/generators/ruby_llm_agents/templates/application_transcriber.rb.tt +53 -0
  57. data/lib/generators/ruby_llm_agents/templates/background_remover.rb.tt +44 -0
  58. data/lib/generators/ruby_llm_agents/templates/embedder.rb.tt +41 -0
  59. data/lib/generators/ruby_llm_agents/templates/image_analyzer.rb.tt +45 -0
  60. data/lib/generators/ruby_llm_agents/templates/image_editor.rb.tt +35 -0
  61. data/lib/generators/ruby_llm_agents/templates/image_generator.rb.tt +47 -0
  62. data/lib/generators/ruby_llm_agents/templates/image_pipeline.rb.tt +50 -0
  63. data/lib/generators/ruby_llm_agents/templates/image_transformer.rb.tt +44 -0
  64. data/lib/generators/ruby_llm_agents/templates/image_upscaler.rb.tt +38 -0
  65. data/lib/generators/ruby_llm_agents/templates/image_variator.rb.tt +33 -0
  66. data/lib/generators/ruby_llm_agents/templates/skills/AGENTS.md.tt +228 -0
  67. data/lib/generators/ruby_llm_agents/templates/skills/BACKGROUND_REMOVERS.md.tt +131 -0
  68. data/lib/generators/ruby_llm_agents/templates/skills/EMBEDDERS.md.tt +255 -0
  69. data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_ANALYZERS.md.tt +120 -0
  70. data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_EDITORS.md.tt +102 -0
  71. data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_GENERATORS.md.tt +282 -0
  72. data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_PIPELINES.md.tt +228 -0
  73. data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_TRANSFORMERS.md.tt +120 -0
  74. data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_UPSCALERS.md.tt +110 -0
  75. data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_VARIATORS.md.tt +120 -0
  76. data/lib/generators/ruby_llm_agents/templates/skills/SPEAKERS.md.tt +212 -0
  77. data/lib/generators/ruby_llm_agents/templates/skills/TOOLS.md.tt +227 -0
  78. data/lib/generators/ruby_llm_agents/templates/skills/TRANSCRIBERS.md.tt +251 -0
  79. data/lib/generators/ruby_llm_agents/templates/skills/WORKFLOWS.md.tt +300 -0
  80. data/lib/generators/ruby_llm_agents/templates/speaker.rb.tt +56 -0
  81. data/lib/generators/ruby_llm_agents/templates/transcriber.rb.tt +51 -0
  82. data/lib/generators/ruby_llm_agents/transcriber_generator.rb +107 -0
  83. data/lib/generators/ruby_llm_agents/upgrade_generator.rb +152 -1
  84. data/lib/ruby_llm/agents/audio/speaker.rb +553 -0
  85. data/lib/ruby_llm/agents/audio/transcriber.rb +669 -0
  86. data/lib/ruby_llm/agents/base_agent.rb +675 -0
  87. data/lib/ruby_llm/agents/core/base/moderation_dsl.rb +181 -0
  88. data/lib/ruby_llm/agents/core/base/moderation_execution.rb +274 -0
  89. data/lib/ruby_llm/agents/core/base.rb +135 -0
  90. data/lib/ruby_llm/agents/core/configuration.rb +981 -0
  91. data/lib/ruby_llm/agents/core/errors.rb +150 -0
  92. data/lib/ruby_llm/agents/{instrumentation.rb → core/instrumentation.rb} +22 -1
  93. data/lib/ruby_llm/agents/core/llm_tenant.rb +358 -0
  94. data/lib/ruby_llm/agents/{version.rb → core/version.rb} +1 -1
  95. data/lib/ruby_llm/agents/dsl/base.rb +110 -0
  96. data/lib/ruby_llm/agents/dsl/caching.rb +142 -0
  97. data/lib/ruby_llm/agents/dsl/reliability.rb +307 -0
  98. data/lib/ruby_llm/agents/dsl.rb +41 -0
  99. data/lib/ruby_llm/agents/image/analyzer/dsl.rb +130 -0
  100. data/lib/ruby_llm/agents/image/analyzer/execution.rb +402 -0
  101. data/lib/ruby_llm/agents/image/analyzer.rb +90 -0
  102. data/lib/ruby_llm/agents/image/background_remover/dsl.rb +154 -0
  103. data/lib/ruby_llm/agents/image/background_remover/execution.rb +240 -0
  104. data/lib/ruby_llm/agents/image/background_remover.rb +89 -0
  105. data/lib/ruby_llm/agents/image/concerns/image_operation_dsl.rb +91 -0
  106. data/lib/ruby_llm/agents/image/concerns/image_operation_execution.rb +165 -0
  107. data/lib/ruby_llm/agents/image/editor/dsl.rb +56 -0
  108. data/lib/ruby_llm/agents/image/editor/execution.rb +207 -0
  109. data/lib/ruby_llm/agents/image/editor.rb +92 -0
  110. data/lib/ruby_llm/agents/image/generator/active_storage_support.rb +127 -0
  111. data/lib/ruby_llm/agents/image/generator/content_policy.rb +95 -0
  112. data/lib/ruby_llm/agents/image/generator/pricing.rb +353 -0
  113. data/lib/ruby_llm/agents/image/generator/templates.rb +124 -0
  114. data/lib/ruby_llm/agents/image/generator.rb +455 -0
  115. data/lib/ruby_llm/agents/image/pipeline/dsl.rb +213 -0
  116. data/lib/ruby_llm/agents/image/pipeline/execution.rb +382 -0
  117. data/lib/ruby_llm/agents/image/pipeline.rb +97 -0
  118. data/lib/ruby_llm/agents/image/transformer/dsl.rb +148 -0
  119. data/lib/ruby_llm/agents/image/transformer/execution.rb +223 -0
  120. data/lib/ruby_llm/agents/image/transformer.rb +95 -0
  121. data/lib/ruby_llm/agents/image/upscaler/dsl.rb +83 -0
  122. data/lib/ruby_llm/agents/image/upscaler/execution.rb +219 -0
  123. data/lib/ruby_llm/agents/image/upscaler.rb +81 -0
  124. data/lib/ruby_llm/agents/image/variator/dsl.rb +62 -0
  125. data/lib/ruby_llm/agents/image/variator/execution.rb +189 -0
  126. data/lib/ruby_llm/agents/image/variator.rb +80 -0
  127. data/lib/ruby_llm/agents/{alert_manager.rb → infrastructure/alert_manager.rb} +17 -22
  128. data/lib/ruby_llm/agents/infrastructure/budget/budget_query.rb +145 -0
  129. data/lib/ruby_llm/agents/infrastructure/budget/config_resolver.rb +149 -0
  130. data/lib/ruby_llm/agents/infrastructure/budget/forecaster.rb +68 -0
  131. data/lib/ruby_llm/agents/infrastructure/budget/spend_recorder.rb +279 -0
  132. data/lib/ruby_llm/agents/infrastructure/budget_tracker.rb +275 -0
  133. data/lib/ruby_llm/agents/{execution_logger_job.rb → infrastructure/execution_logger_job.rb} +17 -1
  134. data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/executor.rb +2 -1
  135. data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/retry_strategy.rb +9 -3
  136. data/lib/ruby_llm/agents/{reliability.rb → infrastructure/reliability.rb} +11 -21
  137. data/lib/ruby_llm/agents/pipeline/builder.rb +215 -0
  138. data/lib/ruby_llm/agents/pipeline/context.rb +255 -0
  139. data/lib/ruby_llm/agents/pipeline/executor.rb +86 -0
  140. data/lib/ruby_llm/agents/pipeline/middleware/base.rb +124 -0
  141. data/lib/ruby_llm/agents/pipeline/middleware/budget.rb +95 -0
  142. data/lib/ruby_llm/agents/pipeline/middleware/cache.rb +171 -0
  143. data/lib/ruby_llm/agents/pipeline/middleware/instrumentation.rb +415 -0
  144. data/lib/ruby_llm/agents/pipeline/middleware/reliability.rb +276 -0
  145. data/lib/ruby_llm/agents/pipeline/middleware/tenant.rb +196 -0
  146. data/lib/ruby_llm/agents/pipeline.rb +68 -0
  147. data/lib/ruby_llm/agents/{engine.rb → rails/engine.rb} +79 -11
  148. data/lib/ruby_llm/agents/results/background_removal_result.rb +286 -0
  149. data/lib/ruby_llm/agents/{result.rb → results/base.rb} +73 -1
  150. data/lib/ruby_llm/agents/results/embedding_result.rb +243 -0
  151. data/lib/ruby_llm/agents/results/image_analysis_result.rb +314 -0
  152. data/lib/ruby_llm/agents/results/image_edit_result.rb +250 -0
  153. data/lib/ruby_llm/agents/results/image_generation_result.rb +346 -0
  154. data/lib/ruby_llm/agents/results/image_pipeline_result.rb +399 -0
  155. data/lib/ruby_llm/agents/results/image_transform_result.rb +251 -0
  156. data/lib/ruby_llm/agents/results/image_upscale_result.rb +255 -0
  157. data/lib/ruby_llm/agents/results/image_variation_result.rb +237 -0
  158. data/lib/ruby_llm/agents/results/moderation_result.rb +158 -0
  159. data/lib/ruby_llm/agents/results/speech_result.rb +338 -0
  160. data/lib/ruby_llm/agents/results/transcription_result.rb +408 -0
  161. data/lib/ruby_llm/agents/text/embedder.rb +444 -0
  162. data/lib/ruby_llm/agents/text/moderator.rb +237 -0
  163. data/lib/ruby_llm/agents/workflow/async.rb +220 -0
  164. data/lib/ruby_llm/agents/workflow/async_executor.rb +156 -0
  165. data/lib/ruby_llm/agents/{workflow.rb → workflow/orchestrator.rb} +6 -5
  166. data/lib/ruby_llm/agents/workflow/parallel.rb +34 -17
  167. data/lib/ruby_llm/agents/workflow/thread_pool.rb +185 -0
  168. data/lib/ruby_llm/agents.rb +86 -20
  169. metadata +172 -34
  170. data/lib/ruby_llm/agents/base/caching.rb +0 -40
  171. data/lib/ruby_llm/agents/base/cost_calculation.rb +0 -105
  172. data/lib/ruby_llm/agents/base/dsl.rb +0 -324
  173. data/lib/ruby_llm/agents/base/execution.rb +0 -366
  174. data/lib/ruby_llm/agents/base/reliability_dsl.rb +0 -82
  175. data/lib/ruby_llm/agents/base/reliability_execution.rb +0 -136
  176. data/lib/ruby_llm/agents/base/response_building.rb +0 -86
  177. data/lib/ruby_llm/agents/base/tool_tracking.rb +0 -57
  178. data/lib/ruby_llm/agents/base.rb +0 -210
  179. data/lib/ruby_llm/agents/budget_tracker.rb +0 -733
  180. data/lib/ruby_llm/agents/configuration.rb +0 -394
  181. /data/lib/ruby_llm/agents/{deprecations.rb → core/deprecations.rb} +0 -0
  182. /data/lib/ruby_llm/agents/{inflections.rb → core/inflections.rb} +0 -0
  183. /data/lib/ruby_llm/agents/{resolved_config.rb → core/resolved_config.rb} +0 -0
  184. /data/lib/ruby_llm/agents/{attempt_tracker.rb → infrastructure/attempt_tracker.rb} +0 -0
  185. /data/lib/ruby_llm/agents/{cache_helper.rb → infrastructure/cache_helper.rb} +0 -0
  186. /data/lib/ruby_llm/agents/{circuit_breaker.rb → infrastructure/circuit_breaker.rb} +0 -0
  187. /data/lib/ruby_llm/agents/{redactor.rb → infrastructure/redactor.rb} +0 -0
  188. /data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/breaker_manager.rb +0 -0
  189. /data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/execution_constraints.rb +0 -0
  190. /data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/fallback_routing.rb +0 -0
@@ -0,0 +1,402 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+ require "json"
5
+ require_relative "../concerns/image_operation_execution"
6
+
7
+ module RubyLLM
8
+ module Agents
9
+ class ImageAnalyzer
10
+ # Execution logic for image analyzers
11
+ #
12
+ # Handles image validation, budget tracking, caching,
13
+ # analysis execution, and result building.
14
+ #
15
+ module Execution
16
+ include Concerns::ImageOperationExecution
17
+
18
+ # Execute the image analysis pipeline
19
+ #
20
+ # @return [ImageAnalysisResult] The result containing analysis data
21
+ def execute
22
+ started_at = Time.current
23
+
24
+ resolve_tenant_context!
25
+ check_budget! if budget_tracking_enabled?
26
+ validate_image!
27
+
28
+ # Check cache
29
+ cached = check_cache(ImageAnalysisResult) if cache_enabled?
30
+ return cached if cached
31
+
32
+ # Analyze image
33
+ analysis_data = analyze_image
34
+
35
+ # Build result
36
+ result = build_result(
37
+ analysis: analysis_data,
38
+ started_at: started_at,
39
+ completed_at: Time.current
40
+ )
41
+
42
+ # Cache result
43
+ write_cache(result) if cache_enabled?
44
+
45
+ # Track execution
46
+ record_execution(result) if execution_tracking_enabled?
47
+
48
+ result
49
+ rescue StandardError => e
50
+ record_failed_execution(e, started_at) if execution_tracking_enabled?
51
+ build_error_result(e, started_at)
52
+ end
53
+
54
+ private
55
+
56
+ def execution_type
57
+ "image_analysis"
58
+ end
59
+
60
+ def validate_image!
61
+ raise ArgumentError, "Image cannot be blank" if image.nil?
62
+
63
+ # Validate image exists if it's a path
64
+ if image.is_a?(String) && !image.start_with?("http")
65
+ unless File.exist?(image)
66
+ raise ArgumentError, "Image file does not exist: #{image}"
67
+ end
68
+ end
69
+ end
70
+
71
+ def analyze_image
72
+ # Build the analysis prompt based on configuration
73
+ prompt = build_analysis_prompt
74
+
75
+ # Use RubyLLM's vision capabilities
76
+ response = call_vision_model(prompt)
77
+
78
+ # Parse the response into structured data
79
+ parse_analysis_response(response)
80
+ end
81
+
82
+ def build_analysis_prompt
83
+ return resolve_custom_prompt if resolve_custom_prompt
84
+
85
+ parts = []
86
+ analysis = resolve_analysis_type
87
+
88
+ case analysis
89
+ when :caption
90
+ parts << "Provide a brief, single-sentence caption for this image."
91
+ when :detailed
92
+ parts << "Provide a detailed description of this image including:"
93
+ parts << "- A brief caption (1 sentence)"
94
+ parts << "- A detailed description (2-3 paragraphs)"
95
+ parts << "- Key tags/keywords (up to #{resolve_max_tags})"
96
+ when :tags
97
+ parts << "List up to #{resolve_max_tags} relevant tags/keywords for this image as a comma-separated list."
98
+ when :objects
99
+ parts << "Identify all objects visible in this image. For each object provide:"
100
+ parts << "- Name of the object"
101
+ parts << "- Approximate location (top-left, center, bottom-right, etc.)"
102
+ parts << "- Confidence level (high, medium, low)"
103
+ when :colors
104
+ parts << "Identify the dominant colors in this image. For each color provide:"
105
+ parts << "- Hex color code"
106
+ parts << "- Color name"
107
+ parts << "- Approximate percentage of the image"
108
+ when :all
109
+ parts << "Analyze this image comprehensively. Provide:"
110
+ parts << "1. A brief caption (1 sentence)"
111
+ parts << "2. A detailed description (2-3 paragraphs)"
112
+ parts << "3. Key tags/keywords (up to #{resolve_max_tags})"
113
+ parts << "4. Objects detected with locations"
114
+ parts << "5. Dominant colors with hex codes and percentages"
115
+ end
116
+
117
+ # Add conditional extraction requests
118
+ if resolve_extract_colors && analysis != :colors && analysis != :all
119
+ parts << "\nAlso identify the dominant colors (hex, name, percentage)."
120
+ end
121
+
122
+ if resolve_detect_objects && analysis != :objects && analysis != :all
123
+ parts << "\nAlso identify objects visible with their locations."
124
+ end
125
+
126
+ if resolve_extract_text
127
+ parts << "\nAlso extract any visible text (OCR)."
128
+ end
129
+
130
+ parts << "\nFormat your response as JSON with the following structure:"
131
+ parts << build_json_schema
132
+
133
+ parts.join("\n")
134
+ end
135
+
136
+ def build_json_schema
137
+ schema = {
138
+ caption: "Brief caption string",
139
+ description: "Detailed description string (if applicable)",
140
+ tags: ["array", "of", "tag", "strings"],
141
+ objects: [{ name: "object name", location: "position", confidence: "high/medium/low" }],
142
+ colors: [{ hex: "#RRGGBB", name: "color name", percentage: 25 }],
143
+ text: "Extracted text if any"
144
+ }
145
+
146
+ JSON.pretty_generate(schema)
147
+ end
148
+
149
+ def call_vision_model(prompt)
150
+ model = resolve_model
151
+
152
+ # Prepare image for the vision model
153
+ image_content = prepare_image_content
154
+
155
+ # Use RubyLLM chat with vision
156
+ chat = RubyLLM.chat(model: model)
157
+ chat.ask(prompt, with: { image: image_content })
158
+ end
159
+
160
+ def prepare_image_content
161
+ if image.is_a?(String)
162
+ if image.start_with?("http")
163
+ # URL - pass directly
164
+ image
165
+ else
166
+ # File path - read and encode as base64
167
+ require "base64"
168
+ mime_type = detect_mime_type(image)
169
+ data = Base64.strict_encode64(File.binread(image))
170
+ "data:#{mime_type};base64,#{data}"
171
+ end
172
+ elsif image.respond_to?(:read)
173
+ # IO object - read and encode
174
+ require "base64"
175
+ content = image.read
176
+ image.rewind if image.respond_to?(:rewind)
177
+ data = Base64.strict_encode64(content)
178
+ "data:image/png;base64,#{data}"
179
+ else
180
+ image.to_s
181
+ end
182
+ end
183
+
184
+ def detect_mime_type(path)
185
+ ext = File.extname(path).downcase
186
+ case ext
187
+ when ".jpg", ".jpeg" then "image/jpeg"
188
+ when ".png" then "image/png"
189
+ when ".gif" then "image/gif"
190
+ when ".webp" then "image/webp"
191
+ when ".bmp" then "image/bmp"
192
+ else "image/png"
193
+ end
194
+ end
195
+
196
+ def parse_analysis_response(response)
197
+ content = response.content
198
+
199
+ # Try to extract JSON from the response
200
+ json_match = content.match(/\{[\s\S]*\}/m)
201
+ if json_match
202
+ begin
203
+ parsed = JSON.parse(json_match[0], symbolize_names: true)
204
+ normalize_analysis_data(parsed)
205
+ rescue JSON::ParserError
206
+ # Fall back to text parsing
207
+ parse_text_response(content)
208
+ end
209
+ else
210
+ parse_text_response(content)
211
+ end
212
+ end
213
+
214
+ def normalize_analysis_data(data)
215
+ {
216
+ caption: data[:caption]&.to_s,
217
+ description: data[:description]&.to_s,
218
+ tags: normalize_tags(data[:tags]),
219
+ objects: normalize_objects(data[:objects]),
220
+ colors: normalize_colors(data[:colors]),
221
+ text: data[:text]&.to_s,
222
+ raw_response: data
223
+ }
224
+ end
225
+
226
+ def normalize_tags(tags)
227
+ return [] unless tags.is_a?(Array)
228
+
229
+ tags.map(&:to_s).map(&:strip).reject(&:empty?).first(resolve_max_tags)
230
+ end
231
+
232
+ def normalize_objects(objects)
233
+ return [] unless objects.is_a?(Array)
234
+
235
+ objects.map do |obj|
236
+ next unless obj.is_a?(Hash)
237
+ {
238
+ name: obj[:name]&.to_s,
239
+ location: obj[:location]&.to_s,
240
+ confidence: obj[:confidence]&.to_s&.downcase,
241
+ bbox: obj[:bbox] || obj[:bounding_box]
242
+ }.compact
243
+ end.compact
244
+ end
245
+
246
+ def normalize_colors(colors)
247
+ return [] unless colors.is_a?(Array)
248
+
249
+ colors.map do |color|
250
+ next unless color.is_a?(Hash)
251
+ {
252
+ hex: color[:hex]&.to_s,
253
+ name: color[:name]&.to_s,
254
+ percentage: color[:percentage]&.to_f
255
+ }.compact
256
+ end.compact
257
+ end
258
+
259
+ def parse_text_response(content)
260
+ # Basic text parsing when JSON extraction fails
261
+ {
262
+ caption: extract_caption_from_text(content),
263
+ description: content,
264
+ tags: extract_tags_from_text(content),
265
+ objects: [],
266
+ colors: [],
267
+ text: nil,
268
+ raw_response: content
269
+ }
270
+ end
271
+
272
+ def extract_caption_from_text(content)
273
+ # Try to extract first sentence as caption
274
+ first_sentence = content.split(/[.!?]/).first&.strip
275
+ first_sentence&.slice(0, 200)
276
+ end
277
+
278
+ def extract_tags_from_text(content)
279
+ # Extract potential tags from comma-separated lists or bullet points
280
+ tags = []
281
+
282
+ # Look for bullet point lists
283
+ content.scan(/[-*•]\s*(\w+(?:\s+\w+)?)/).each do |match|
284
+ tags << match[0].strip.downcase
285
+ end
286
+
287
+ # Look for comma-separated items after "tags:" or "keywords:"
288
+ if content =~ /(?:tags|keywords)[:\s]*(.+?)(?:\n|$)/i
289
+ Regexp.last_match(1).split(/[,;]/).each do |tag|
290
+ tags << tag.strip.downcase
291
+ end
292
+ end
293
+
294
+ tags.uniq.first(resolve_max_tags)
295
+ end
296
+
297
+ def build_result(analysis:, started_at:, completed_at:)
298
+ ImageAnalysisResult.new(
299
+ image: image,
300
+ model_id: resolve_model,
301
+ analysis_type: resolve_analysis_type,
302
+ caption: analysis[:caption],
303
+ description: analysis[:description],
304
+ tags: analysis[:tags],
305
+ objects: analysis[:objects],
306
+ colors: analysis[:colors],
307
+ text: analysis[:text],
308
+ raw_response: analysis[:raw_response],
309
+ started_at: started_at,
310
+ completed_at: completed_at,
311
+ tenant_id: @tenant_id,
312
+ analyzer_class: self.class.name
313
+ )
314
+ end
315
+
316
+ def build_error_result(error, started_at)
317
+ ImageAnalysisResult.new(
318
+ image: image,
319
+ model_id: resolve_model,
320
+ analysis_type: resolve_analysis_type,
321
+ caption: nil,
322
+ description: nil,
323
+ tags: [],
324
+ objects: [],
325
+ colors: [],
326
+ text: nil,
327
+ raw_response: nil,
328
+ started_at: started_at,
329
+ completed_at: Time.current,
330
+ tenant_id: @tenant_id,
331
+ analyzer_class: self.class.name,
332
+ error_class: error.class.name,
333
+ error_message: error.message
334
+ )
335
+ end
336
+
337
+ # Resolution methods
338
+
339
+ def resolve_analysis_type
340
+ options[:analysis_type] || self.class.analysis_type
341
+ end
342
+
343
+ def resolve_extract_colors
344
+ options.fetch(:extract_colors, self.class.extract_colors)
345
+ end
346
+
347
+ def resolve_detect_objects
348
+ options.fetch(:detect_objects, self.class.detect_objects)
349
+ end
350
+
351
+ def resolve_extract_text
352
+ options.fetch(:extract_text, self.class.extract_text)
353
+ end
354
+
355
+ def resolve_custom_prompt
356
+ options[:custom_prompt] || self.class.custom_prompt
357
+ end
358
+
359
+ def resolve_max_tags
360
+ options[:max_tags] || self.class.max_tags
361
+ end
362
+
363
+ # Cache key components
364
+ def cache_key_components
365
+ [
366
+ "image_analyzer",
367
+ self.class.name,
368
+ self.class.version,
369
+ resolve_model,
370
+ resolve_analysis_type.to_s,
371
+ resolve_extract_colors.to_s,
372
+ resolve_detect_objects.to_s,
373
+ resolve_extract_text.to_s,
374
+ Digest::SHA256.hexdigest(image_digest)
375
+ ]
376
+ end
377
+
378
+ def image_digest
379
+ if image.is_a?(String) && File.exist?(image)
380
+ File.read(image)
381
+ elsif image.respond_to?(:read)
382
+ content = image.read
383
+ image.rewind if image.respond_to?(:rewind)
384
+ content
385
+ else
386
+ image.to_s
387
+ end
388
+ end
389
+
390
+ def build_execution_metadata(result)
391
+ {
392
+ analysis_type: result.analysis_type,
393
+ tags_count: result.tags.size,
394
+ objects_count: result.objects.size,
395
+ colors_count: result.colors.size,
396
+ has_text: result.text.present?
397
+ }
398
+ end
399
+ end
400
+ end
401
+ end
402
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "analyzer/dsl"
4
+ require_relative "analyzer/execution"
5
+
6
+ module RubyLLM
7
+ module Agents
8
+ # Image analyzer for understanding and captioning images
9
+ #
10
+ # Analyzes images using vision models to extract captions, tags,
11
+ # descriptions, detected objects, and color information.
12
+ #
13
+ # @example Basic usage
14
+ # result = RubyLLM::Agents::ImageAnalyzer.call(image: "path/to/photo.jpg")
15
+ # result.caption # => "A sunset over mountains"
16
+ # result.tags # => [:nature, :sunset, :mountains]
17
+ # result.description # => "A detailed description..."
18
+ #
19
+ # @example Custom analyzer class
20
+ # class ProductAnalyzer < RubyLLM::Agents::ImageAnalyzer
21
+ # model "gpt-4o"
22
+ # analysis_type :detailed
23
+ # extract_colors true
24
+ # detect_objects true
25
+ #
26
+ # description "Analyzes product photos"
27
+ # end
28
+ #
29
+ # result = ProductAnalyzer.call(image: product_photo)
30
+ # result.objects # => [{name: "laptop", confidence: 0.98, bbox: [...]}]
31
+ # result.colors # => [{hex: "#C0C0C0", name: "silver", percentage: 45}]
32
+ #
33
+ class ImageAnalyzer
34
+ extend DSL
35
+ include Execution
36
+
37
+ class << self
38
+ # Execute image analysis
39
+ #
40
+ # @param image [String, IO] Path, URL, or IO object of the image to analyze
41
+ # @param options [Hash] Additional options (model, analysis_type, etc.)
42
+ # @return [ImageAnalysisResult] The result containing analysis data
43
+ def call(image:, **options)
44
+ new(image: image, **options).call
45
+ end
46
+
47
+ # Ensure subclasses inherit DSL settings
48
+ def inherited(subclass)
49
+ super
50
+ subclass.instance_variable_set(:@model, @model)
51
+ subclass.instance_variable_set(:@analysis_type, @analysis_type)
52
+ subclass.instance_variable_set(:@extract_colors, @extract_colors)
53
+ subclass.instance_variable_set(:@detect_objects, @detect_objects)
54
+ subclass.instance_variable_set(:@extract_text, @extract_text)
55
+ subclass.instance_variable_set(:@custom_prompt, @custom_prompt)
56
+ subclass.instance_variable_set(:@max_tags, @max_tags)
57
+ subclass.instance_variable_set(:@version, @version)
58
+ subclass.instance_variable_set(:@description, @description)
59
+ subclass.instance_variable_set(:@cache_ttl, @cache_ttl)
60
+ end
61
+ end
62
+
63
+ attr_reader :image, :options, :tenant_id
64
+
65
+ # Initialize a new image analyzer instance
66
+ #
67
+ # @param image [String, IO] Image to analyze (path, URL, or IO object)
68
+ # @param options [Hash] Additional options
69
+ # @option options [String] :model Model to use
70
+ # @option options [Symbol] :analysis_type Type of analysis (:caption, :detailed, :tags, :objects)
71
+ # @option options [Boolean] :extract_colors Whether to extract color information
72
+ # @option options [Boolean] :detect_objects Whether to detect objects
73
+ # @option options [Boolean] :extract_text Whether to extract text (OCR)
74
+ # @option options [String] :custom_prompt Custom analysis prompt
75
+ # @option options [Object] :tenant Tenant for multi-tenancy
76
+ def initialize(image:, **options)
77
+ @image = image
78
+ @options = options
79
+ @tenant_id = nil
80
+ end
81
+
82
+ # Execute the image analysis
83
+ #
84
+ # @return [ImageAnalysisResult] The result containing analysis data
85
+ def call
86
+ execute
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../concerns/image_operation_dsl"
4
+
5
+ module RubyLLM
6
+ module Agents
7
+ class BackgroundRemover
8
+ # DSL for configuring background removers
9
+ #
10
+ # Provides class-level methods to configure model, output format,
11
+ # and edge refinement options.
12
+ #
13
+ # @example
14
+ # class ProductBackgroundRemover < RubyLLM::Agents::BackgroundRemover
15
+ # model "rembg"
16
+ # output_format :png
17
+ # refine_edges true
18
+ # alpha_matting true
19
+ # end
20
+ #
21
+ module DSL
22
+ include Concerns::ImageOperationDSL
23
+
24
+ VALID_OUTPUT_FORMATS = %i[png webp].freeze
25
+
26
+ # Set or get the output format
27
+ #
28
+ # @param value [Symbol, nil] Output format (:png, :webp)
29
+ # @return [Symbol] The output format
30
+ def output_format(value = nil)
31
+ if value
32
+ unless VALID_OUTPUT_FORMATS.include?(value)
33
+ raise ArgumentError, "Output format must be one of: #{VALID_OUTPUT_FORMATS.join(', ')}"
34
+ end
35
+ @output_format = value
36
+ else
37
+ @output_format || inherited_or_default(:output_format, :png)
38
+ end
39
+ end
40
+
41
+ # Set or get whether to refine edges
42
+ #
43
+ # When enabled, applies additional processing to smooth
44
+ # and refine the edges of the extracted subject.
45
+ #
46
+ # @param value [Boolean, nil] Enable edge refinement
47
+ # @return [Boolean] Whether edge refinement is enabled
48
+ def refine_edges(value = nil)
49
+ if value.nil?
50
+ result = @refine_edges
51
+ result = inherited_or_default(:refine_edges, false) if result.nil?
52
+ result
53
+ else
54
+ @refine_edges = value
55
+ end
56
+ end
57
+
58
+ # Set or get whether to use alpha matting
59
+ #
60
+ # Alpha matting produces better results for hair, fur,
61
+ # and semi-transparent elements but is slower.
62
+ #
63
+ # @param value [Boolean, nil] Enable alpha matting
64
+ # @return [Boolean] Whether alpha matting is enabled
65
+ def alpha_matting(value = nil)
66
+ if value.nil?
67
+ result = @alpha_matting
68
+ result = inherited_or_default(:alpha_matting, false) if result.nil?
69
+ result
70
+ else
71
+ @alpha_matting = value
72
+ end
73
+ end
74
+
75
+ # Set or get the foreground threshold
76
+ #
77
+ # Pixels with confidence above this threshold are considered
78
+ # foreground. Lower values include more pixels.
79
+ #
80
+ # @param value [Float, nil] Threshold (0.0-1.0)
81
+ # @return [Float] The foreground threshold
82
+ def foreground_threshold(value = nil)
83
+ if value
84
+ unless value.is_a?(Numeric) && value.between?(0.0, 1.0)
85
+ raise ArgumentError, "Foreground threshold must be between 0.0 and 1.0"
86
+ end
87
+ @foreground_threshold = value.to_f
88
+ else
89
+ @foreground_threshold || inherited_or_default(:foreground_threshold, 0.5)
90
+ end
91
+ end
92
+
93
+ # Set or get the background threshold
94
+ #
95
+ # Pixels with confidence below this threshold are considered
96
+ # background. Higher values exclude more pixels.
97
+ #
98
+ # @param value [Float, nil] Threshold (0.0-1.0)
99
+ # @return [Float] The background threshold
100
+ def background_threshold(value = nil)
101
+ if value
102
+ unless value.is_a?(Numeric) && value.between?(0.0, 1.0)
103
+ raise ArgumentError, "Background threshold must be between 0.0 and 1.0"
104
+ end
105
+ @background_threshold = value.to_f
106
+ else
107
+ @background_threshold || inherited_or_default(:background_threshold, 0.5)
108
+ end
109
+ end
110
+
111
+ # Set or get the erode size
112
+ #
113
+ # Size of morphological erosion applied to shrink the mask
114
+ # slightly to avoid edge artifacts.
115
+ #
116
+ # @param value [Integer, nil] Erode size in pixels
117
+ # @return [Integer] The erode size
118
+ def erode_size(value = nil)
119
+ if value
120
+ unless value.is_a?(Integer) && value >= 0
121
+ raise ArgumentError, "Erode size must be a non-negative integer"
122
+ end
123
+ @erode_size = value
124
+ else
125
+ @erode_size || inherited_or_default(:erode_size, 0)
126
+ end
127
+ end
128
+
129
+ # Set or get whether to return the mask
130
+ #
131
+ # When enabled, the result will include the segmentation mask
132
+ # in addition to the extracted foreground.
133
+ #
134
+ # @param value [Boolean, nil] Return segmentation mask
135
+ # @return [Boolean] Whether to return the mask
136
+ def return_mask(value = nil)
137
+ if value.nil?
138
+ result = @return_mask
139
+ result = inherited_or_default(:return_mask, false) if result.nil?
140
+ result
141
+ else
142
+ @return_mask = value
143
+ end
144
+ end
145
+
146
+ private
147
+
148
+ def default_model
149
+ config.default_background_remover_model || "rembg"
150
+ end
151
+ end
152
+ end
153
+ end
154
+ end