ruby_llm-agents 0.5.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +189 -31
- data/app/controllers/ruby_llm/agents/agents_controller.rb +136 -16
- data/app/controllers/ruby_llm/agents/dashboard_controller.rb +29 -9
- data/app/controllers/ruby_llm/agents/workflows_controller.rb +355 -0
- data/app/helpers/ruby_llm/agents/application_helper.rb +25 -0
- data/app/models/ruby_llm/agents/execution.rb +3 -0
- data/app/models/ruby_llm/agents/tenant_budget.rb +58 -15
- data/app/services/ruby_llm/agents/agent_registry.rb +51 -12
- data/app/views/layouts/ruby_llm/agents/application.html.erb +2 -29
- data/app/views/ruby_llm/agents/agents/_agent.html.erb +13 -1
- data/app/views/ruby_llm/agents/agents/_config_agent.html.erb +235 -0
- data/app/views/ruby_llm/agents/agents/_config_embedder.html.erb +70 -0
- data/app/views/ruby_llm/agents/agents/_config_image_generator.html.erb +152 -0
- data/app/views/ruby_llm/agents/agents/_config_moderator.html.erb +63 -0
- data/app/views/ruby_llm/agents/agents/_config_speaker.html.erb +108 -0
- data/app/views/ruby_llm/agents/agents/_config_transcriber.html.erb +91 -0
- data/app/views/ruby_llm/agents/agents/_workflow.html.erb +1 -1
- data/app/views/ruby_llm/agents/agents/index.html.erb +74 -9
- data/app/views/ruby_llm/agents/agents/show.html.erb +18 -378
- data/app/views/ruby_llm/agents/dashboard/_agent_comparison.html.erb +269 -15
- data/app/views/ruby_llm/agents/executions/show.html.erb +16 -0
- data/app/views/ruby_llm/agents/shared/_agent_type_badge.html.erb +93 -0
- data/app/views/ruby_llm/agents/workflows/_step_performance.html.erb +236 -0
- data/app/views/ruby_llm/agents/workflows/_structure_parallel.html.erb +76 -0
- data/app/views/ruby_llm/agents/workflows/_structure_pipeline.html.erb +74 -0
- data/app/views/ruby_llm/agents/workflows/_structure_router.html.erb +108 -0
- data/app/views/ruby_llm/agents/workflows/show.html.erb +442 -0
- data/config/routes.rb +1 -0
- data/lib/generators/ruby_llm_agents/agent_generator.rb +56 -7
- data/lib/generators/ruby_llm_agents/background_remover_generator.rb +110 -0
- data/lib/generators/ruby_llm_agents/embedder_generator.rb +107 -0
- data/lib/generators/ruby_llm_agents/image_analyzer_generator.rb +115 -0
- data/lib/generators/ruby_llm_agents/image_editor_generator.rb +108 -0
- data/lib/generators/ruby_llm_agents/image_generator_generator.rb +116 -0
- data/lib/generators/ruby_llm_agents/image_pipeline_generator.rb +178 -0
- data/lib/generators/ruby_llm_agents/image_transformer_generator.rb +109 -0
- data/lib/generators/ruby_llm_agents/image_upscaler_generator.rb +103 -0
- data/lib/generators/ruby_llm_agents/image_variator_generator.rb +102 -0
- data/lib/generators/ruby_llm_agents/install_generator.rb +76 -4
- data/lib/generators/ruby_llm_agents/restructure_generator.rb +292 -0
- data/lib/generators/ruby_llm_agents/speaker_generator.rb +121 -0
- data/lib/generators/ruby_llm_agents/templates/add_execution_type_migration.rb.tt +8 -0
- data/lib/generators/ruby_llm_agents/templates/agent.rb.tt +99 -84
- data/lib/generators/ruby_llm_agents/templates/application_agent.rb.tt +42 -40
- data/lib/generators/ruby_llm_agents/templates/application_background_remover.rb.tt +26 -0
- data/lib/generators/ruby_llm_agents/templates/application_embedder.rb.tt +50 -0
- data/lib/generators/ruby_llm_agents/templates/application_image_analyzer.rb.tt +26 -0
- data/lib/generators/ruby_llm_agents/templates/application_image_editor.rb.tt +20 -0
- data/lib/generators/ruby_llm_agents/templates/application_image_generator.rb.tt +38 -0
- data/lib/generators/ruby_llm_agents/templates/application_image_pipeline.rb.tt +139 -0
- data/lib/generators/ruby_llm_agents/templates/application_image_transformer.rb.tt +21 -0
- data/lib/generators/ruby_llm_agents/templates/application_image_upscaler.rb.tt +20 -0
- data/lib/generators/ruby_llm_agents/templates/application_image_variator.rb.tt +20 -0
- data/lib/generators/ruby_llm_agents/templates/application_speaker.rb.tt +49 -0
- data/lib/generators/ruby_llm_agents/templates/application_transcriber.rb.tt +53 -0
- data/lib/generators/ruby_llm_agents/templates/background_remover.rb.tt +44 -0
- data/lib/generators/ruby_llm_agents/templates/embedder.rb.tt +41 -0
- data/lib/generators/ruby_llm_agents/templates/image_analyzer.rb.tt +45 -0
- data/lib/generators/ruby_llm_agents/templates/image_editor.rb.tt +35 -0
- data/lib/generators/ruby_llm_agents/templates/image_generator.rb.tt +47 -0
- data/lib/generators/ruby_llm_agents/templates/image_pipeline.rb.tt +50 -0
- data/lib/generators/ruby_llm_agents/templates/image_transformer.rb.tt +44 -0
- data/lib/generators/ruby_llm_agents/templates/image_upscaler.rb.tt +38 -0
- data/lib/generators/ruby_llm_agents/templates/image_variator.rb.tt +33 -0
- data/lib/generators/ruby_llm_agents/templates/skills/AGENTS.md.tt +228 -0
- data/lib/generators/ruby_llm_agents/templates/skills/BACKGROUND_REMOVERS.md.tt +131 -0
- data/lib/generators/ruby_llm_agents/templates/skills/EMBEDDERS.md.tt +255 -0
- data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_ANALYZERS.md.tt +120 -0
- data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_EDITORS.md.tt +102 -0
- data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_GENERATORS.md.tt +282 -0
- data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_PIPELINES.md.tt +228 -0
- data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_TRANSFORMERS.md.tt +120 -0
- data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_UPSCALERS.md.tt +110 -0
- data/lib/generators/ruby_llm_agents/templates/skills/IMAGE_VARIATORS.md.tt +120 -0
- data/lib/generators/ruby_llm_agents/templates/skills/SPEAKERS.md.tt +212 -0
- data/lib/generators/ruby_llm_agents/templates/skills/TOOLS.md.tt +227 -0
- data/lib/generators/ruby_llm_agents/templates/skills/TRANSCRIBERS.md.tt +251 -0
- data/lib/generators/ruby_llm_agents/templates/skills/WORKFLOWS.md.tt +300 -0
- data/lib/generators/ruby_llm_agents/templates/speaker.rb.tt +56 -0
- data/lib/generators/ruby_llm_agents/templates/transcriber.rb.tt +51 -0
- data/lib/generators/ruby_llm_agents/transcriber_generator.rb +107 -0
- data/lib/generators/ruby_llm_agents/upgrade_generator.rb +152 -1
- data/lib/ruby_llm/agents/audio/speaker.rb +553 -0
- data/lib/ruby_llm/agents/audio/transcriber.rb +669 -0
- data/lib/ruby_llm/agents/base_agent.rb +675 -0
- data/lib/ruby_llm/agents/core/base/moderation_dsl.rb +181 -0
- data/lib/ruby_llm/agents/core/base/moderation_execution.rb +274 -0
- data/lib/ruby_llm/agents/core/base.rb +135 -0
- data/lib/ruby_llm/agents/core/configuration.rb +981 -0
- data/lib/ruby_llm/agents/core/errors.rb +150 -0
- data/lib/ruby_llm/agents/{instrumentation.rb → core/instrumentation.rb} +22 -1
- data/lib/ruby_llm/agents/core/llm_tenant.rb +358 -0
- data/lib/ruby_llm/agents/{version.rb → core/version.rb} +1 -1
- data/lib/ruby_llm/agents/dsl/base.rb +110 -0
- data/lib/ruby_llm/agents/dsl/caching.rb +142 -0
- data/lib/ruby_llm/agents/dsl/reliability.rb +307 -0
- data/lib/ruby_llm/agents/dsl.rb +41 -0
- data/lib/ruby_llm/agents/image/analyzer/dsl.rb +130 -0
- data/lib/ruby_llm/agents/image/analyzer/execution.rb +402 -0
- data/lib/ruby_llm/agents/image/analyzer.rb +90 -0
- data/lib/ruby_llm/agents/image/background_remover/dsl.rb +154 -0
- data/lib/ruby_llm/agents/image/background_remover/execution.rb +240 -0
- data/lib/ruby_llm/agents/image/background_remover.rb +89 -0
- data/lib/ruby_llm/agents/image/concerns/image_operation_dsl.rb +91 -0
- data/lib/ruby_llm/agents/image/concerns/image_operation_execution.rb +165 -0
- data/lib/ruby_llm/agents/image/editor/dsl.rb +56 -0
- data/lib/ruby_llm/agents/image/editor/execution.rb +207 -0
- data/lib/ruby_llm/agents/image/editor.rb +92 -0
- data/lib/ruby_llm/agents/image/generator/active_storage_support.rb +127 -0
- data/lib/ruby_llm/agents/image/generator/content_policy.rb +95 -0
- data/lib/ruby_llm/agents/image/generator/pricing.rb +353 -0
- data/lib/ruby_llm/agents/image/generator/templates.rb +124 -0
- data/lib/ruby_llm/agents/image/generator.rb +455 -0
- data/lib/ruby_llm/agents/image/pipeline/dsl.rb +213 -0
- data/lib/ruby_llm/agents/image/pipeline/execution.rb +382 -0
- data/lib/ruby_llm/agents/image/pipeline.rb +97 -0
- data/lib/ruby_llm/agents/image/transformer/dsl.rb +148 -0
- data/lib/ruby_llm/agents/image/transformer/execution.rb +223 -0
- data/lib/ruby_llm/agents/image/transformer.rb +95 -0
- data/lib/ruby_llm/agents/image/upscaler/dsl.rb +83 -0
- data/lib/ruby_llm/agents/image/upscaler/execution.rb +219 -0
- data/lib/ruby_llm/agents/image/upscaler.rb +81 -0
- data/lib/ruby_llm/agents/image/variator/dsl.rb +62 -0
- data/lib/ruby_llm/agents/image/variator/execution.rb +189 -0
- data/lib/ruby_llm/agents/image/variator.rb +80 -0
- data/lib/ruby_llm/agents/{alert_manager.rb → infrastructure/alert_manager.rb} +17 -22
- data/lib/ruby_llm/agents/infrastructure/budget/budget_query.rb +145 -0
- data/lib/ruby_llm/agents/infrastructure/budget/config_resolver.rb +149 -0
- data/lib/ruby_llm/agents/infrastructure/budget/forecaster.rb +68 -0
- data/lib/ruby_llm/agents/infrastructure/budget/spend_recorder.rb +279 -0
- data/lib/ruby_llm/agents/infrastructure/budget_tracker.rb +275 -0
- data/lib/ruby_llm/agents/{execution_logger_job.rb → infrastructure/execution_logger_job.rb} +17 -1
- data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/executor.rb +2 -1
- data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/retry_strategy.rb +9 -3
- data/lib/ruby_llm/agents/{reliability.rb → infrastructure/reliability.rb} +11 -21
- data/lib/ruby_llm/agents/pipeline/builder.rb +215 -0
- data/lib/ruby_llm/agents/pipeline/context.rb +255 -0
- data/lib/ruby_llm/agents/pipeline/executor.rb +86 -0
- data/lib/ruby_llm/agents/pipeline/middleware/base.rb +124 -0
- data/lib/ruby_llm/agents/pipeline/middleware/budget.rb +95 -0
- data/lib/ruby_llm/agents/pipeline/middleware/cache.rb +171 -0
- data/lib/ruby_llm/agents/pipeline/middleware/instrumentation.rb +415 -0
- data/lib/ruby_llm/agents/pipeline/middleware/reliability.rb +276 -0
- data/lib/ruby_llm/agents/pipeline/middleware/tenant.rb +196 -0
- data/lib/ruby_llm/agents/pipeline.rb +68 -0
- data/lib/ruby_llm/agents/{engine.rb → rails/engine.rb} +79 -11
- data/lib/ruby_llm/agents/results/background_removal_result.rb +286 -0
- data/lib/ruby_llm/agents/{result.rb → results/base.rb} +73 -1
- data/lib/ruby_llm/agents/results/embedding_result.rb +243 -0
- data/lib/ruby_llm/agents/results/image_analysis_result.rb +314 -0
- data/lib/ruby_llm/agents/results/image_edit_result.rb +250 -0
- data/lib/ruby_llm/agents/results/image_generation_result.rb +346 -0
- data/lib/ruby_llm/agents/results/image_pipeline_result.rb +399 -0
- data/lib/ruby_llm/agents/results/image_transform_result.rb +251 -0
- data/lib/ruby_llm/agents/results/image_upscale_result.rb +255 -0
- data/lib/ruby_llm/agents/results/image_variation_result.rb +237 -0
- data/lib/ruby_llm/agents/results/moderation_result.rb +158 -0
- data/lib/ruby_llm/agents/results/speech_result.rb +338 -0
- data/lib/ruby_llm/agents/results/transcription_result.rb +408 -0
- data/lib/ruby_llm/agents/text/embedder.rb +444 -0
- data/lib/ruby_llm/agents/text/moderator.rb +237 -0
- data/lib/ruby_llm/agents/workflow/async.rb +220 -0
- data/lib/ruby_llm/agents/workflow/async_executor.rb +156 -0
- data/lib/ruby_llm/agents/{workflow.rb → workflow/orchestrator.rb} +6 -5
- data/lib/ruby_llm/agents/workflow/parallel.rb +34 -17
- data/lib/ruby_llm/agents/workflow/thread_pool.rb +185 -0
- data/lib/ruby_llm/agents.rb +86 -20
- metadata +172 -34
- data/lib/ruby_llm/agents/base/caching.rb +0 -40
- data/lib/ruby_llm/agents/base/cost_calculation.rb +0 -105
- data/lib/ruby_llm/agents/base/dsl.rb +0 -324
- data/lib/ruby_llm/agents/base/execution.rb +0 -366
- data/lib/ruby_llm/agents/base/reliability_dsl.rb +0 -82
- data/lib/ruby_llm/agents/base/reliability_execution.rb +0 -136
- data/lib/ruby_llm/agents/base/response_building.rb +0 -86
- data/lib/ruby_llm/agents/base/tool_tracking.rb +0 -57
- data/lib/ruby_llm/agents/base.rb +0 -210
- data/lib/ruby_llm/agents/budget_tracker.rb +0 -733
- data/lib/ruby_llm/agents/configuration.rb +0 -394
- /data/lib/ruby_llm/agents/{deprecations.rb → core/deprecations.rb} +0 -0
- /data/lib/ruby_llm/agents/{inflections.rb → core/inflections.rb} +0 -0
- /data/lib/ruby_llm/agents/{resolved_config.rb → core/resolved_config.rb} +0 -0
- /data/lib/ruby_llm/agents/{attempt_tracker.rb → infrastructure/attempt_tracker.rb} +0 -0
- /data/lib/ruby_llm/agents/{cache_helper.rb → infrastructure/cache_helper.rb} +0 -0
- /data/lib/ruby_llm/agents/{circuit_breaker.rb → infrastructure/circuit_breaker.rb} +0 -0
- /data/lib/ruby_llm/agents/{redactor.rb → infrastructure/redactor.rb} +0 -0
- /data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/breaker_manager.rb +0 -0
- /data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/execution_constraints.rb +0 -0
- /data/lib/ruby_llm/agents/{reliability → infrastructure/reliability}/fallback_routing.rb +0 -0
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
require "json"
|
|
5
|
+
require_relative "../concerns/image_operation_execution"
|
|
6
|
+
|
|
7
|
+
module RubyLLM
|
|
8
|
+
module Agents
|
|
9
|
+
class ImageAnalyzer
|
|
10
|
+
# Execution logic for image analyzers
|
|
11
|
+
#
|
|
12
|
+
# Handles image validation, budget tracking, caching,
|
|
13
|
+
# analysis execution, and result building.
|
|
14
|
+
#
|
|
15
|
+
module Execution
|
|
16
|
+
include Concerns::ImageOperationExecution
|
|
17
|
+
|
|
18
|
+
# Execute the image analysis pipeline
|
|
19
|
+
#
|
|
20
|
+
# @return [ImageAnalysisResult] The result containing analysis data
|
|
21
|
+
def execute
|
|
22
|
+
started_at = Time.current
|
|
23
|
+
|
|
24
|
+
resolve_tenant_context!
|
|
25
|
+
check_budget! if budget_tracking_enabled?
|
|
26
|
+
validate_image!
|
|
27
|
+
|
|
28
|
+
# Check cache
|
|
29
|
+
cached = check_cache(ImageAnalysisResult) if cache_enabled?
|
|
30
|
+
return cached if cached
|
|
31
|
+
|
|
32
|
+
# Analyze image
|
|
33
|
+
analysis_data = analyze_image
|
|
34
|
+
|
|
35
|
+
# Build result
|
|
36
|
+
result = build_result(
|
|
37
|
+
analysis: analysis_data,
|
|
38
|
+
started_at: started_at,
|
|
39
|
+
completed_at: Time.current
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Cache result
|
|
43
|
+
write_cache(result) if cache_enabled?
|
|
44
|
+
|
|
45
|
+
# Track execution
|
|
46
|
+
record_execution(result) if execution_tracking_enabled?
|
|
47
|
+
|
|
48
|
+
result
|
|
49
|
+
rescue StandardError => e
|
|
50
|
+
record_failed_execution(e, started_at) if execution_tracking_enabled?
|
|
51
|
+
build_error_result(e, started_at)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def execution_type
|
|
57
|
+
"image_analysis"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def validate_image!
|
|
61
|
+
raise ArgumentError, "Image cannot be blank" if image.nil?
|
|
62
|
+
|
|
63
|
+
# Validate image exists if it's a path
|
|
64
|
+
if image.is_a?(String) && !image.start_with?("http")
|
|
65
|
+
unless File.exist?(image)
|
|
66
|
+
raise ArgumentError, "Image file does not exist: #{image}"
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def analyze_image
|
|
72
|
+
# Build the analysis prompt based on configuration
|
|
73
|
+
prompt = build_analysis_prompt
|
|
74
|
+
|
|
75
|
+
# Use RubyLLM's vision capabilities
|
|
76
|
+
response = call_vision_model(prompt)
|
|
77
|
+
|
|
78
|
+
# Parse the response into structured data
|
|
79
|
+
parse_analysis_response(response)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def build_analysis_prompt
|
|
83
|
+
return resolve_custom_prompt if resolve_custom_prompt
|
|
84
|
+
|
|
85
|
+
parts = []
|
|
86
|
+
analysis = resolve_analysis_type
|
|
87
|
+
|
|
88
|
+
case analysis
|
|
89
|
+
when :caption
|
|
90
|
+
parts << "Provide a brief, single-sentence caption for this image."
|
|
91
|
+
when :detailed
|
|
92
|
+
parts << "Provide a detailed description of this image including:"
|
|
93
|
+
parts << "- A brief caption (1 sentence)"
|
|
94
|
+
parts << "- A detailed description (2-3 paragraphs)"
|
|
95
|
+
parts << "- Key tags/keywords (up to #{resolve_max_tags})"
|
|
96
|
+
when :tags
|
|
97
|
+
parts << "List up to #{resolve_max_tags} relevant tags/keywords for this image as a comma-separated list."
|
|
98
|
+
when :objects
|
|
99
|
+
parts << "Identify all objects visible in this image. For each object provide:"
|
|
100
|
+
parts << "- Name of the object"
|
|
101
|
+
parts << "- Approximate location (top-left, center, bottom-right, etc.)"
|
|
102
|
+
parts << "- Confidence level (high, medium, low)"
|
|
103
|
+
when :colors
|
|
104
|
+
parts << "Identify the dominant colors in this image. For each color provide:"
|
|
105
|
+
parts << "- Hex color code"
|
|
106
|
+
parts << "- Color name"
|
|
107
|
+
parts << "- Approximate percentage of the image"
|
|
108
|
+
when :all
|
|
109
|
+
parts << "Analyze this image comprehensively. Provide:"
|
|
110
|
+
parts << "1. A brief caption (1 sentence)"
|
|
111
|
+
parts << "2. A detailed description (2-3 paragraphs)"
|
|
112
|
+
parts << "3. Key tags/keywords (up to #{resolve_max_tags})"
|
|
113
|
+
parts << "4. Objects detected with locations"
|
|
114
|
+
parts << "5. Dominant colors with hex codes and percentages"
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Add conditional extraction requests
|
|
118
|
+
if resolve_extract_colors && analysis != :colors && analysis != :all
|
|
119
|
+
parts << "\nAlso identify the dominant colors (hex, name, percentage)."
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
if resolve_detect_objects && analysis != :objects && analysis != :all
|
|
123
|
+
parts << "\nAlso identify objects visible with their locations."
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
if resolve_extract_text
|
|
127
|
+
parts << "\nAlso extract any visible text (OCR)."
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
parts << "\nFormat your response as JSON with the following structure:"
|
|
131
|
+
parts << build_json_schema
|
|
132
|
+
|
|
133
|
+
parts.join("\n")
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def build_json_schema
|
|
137
|
+
schema = {
|
|
138
|
+
caption: "Brief caption string",
|
|
139
|
+
description: "Detailed description string (if applicable)",
|
|
140
|
+
tags: ["array", "of", "tag", "strings"],
|
|
141
|
+
objects: [{ name: "object name", location: "position", confidence: "high/medium/low" }],
|
|
142
|
+
colors: [{ hex: "#RRGGBB", name: "color name", percentage: 25 }],
|
|
143
|
+
text: "Extracted text if any"
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
JSON.pretty_generate(schema)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def call_vision_model(prompt)
|
|
150
|
+
model = resolve_model
|
|
151
|
+
|
|
152
|
+
# Prepare image for the vision model
|
|
153
|
+
image_content = prepare_image_content
|
|
154
|
+
|
|
155
|
+
# Use RubyLLM chat with vision
|
|
156
|
+
chat = RubyLLM.chat(model: model)
|
|
157
|
+
chat.ask(prompt, with: { image: image_content })
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def prepare_image_content
|
|
161
|
+
if image.is_a?(String)
|
|
162
|
+
if image.start_with?("http")
|
|
163
|
+
# URL - pass directly
|
|
164
|
+
image
|
|
165
|
+
else
|
|
166
|
+
# File path - read and encode as base64
|
|
167
|
+
require "base64"
|
|
168
|
+
mime_type = detect_mime_type(image)
|
|
169
|
+
data = Base64.strict_encode64(File.binread(image))
|
|
170
|
+
"data:#{mime_type};base64,#{data}"
|
|
171
|
+
end
|
|
172
|
+
elsif image.respond_to?(:read)
|
|
173
|
+
# IO object - read and encode
|
|
174
|
+
require "base64"
|
|
175
|
+
content = image.read
|
|
176
|
+
image.rewind if image.respond_to?(:rewind)
|
|
177
|
+
data = Base64.strict_encode64(content)
|
|
178
|
+
"data:image/png;base64,#{data}"
|
|
179
|
+
else
|
|
180
|
+
image.to_s
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def detect_mime_type(path)
|
|
185
|
+
ext = File.extname(path).downcase
|
|
186
|
+
case ext
|
|
187
|
+
when ".jpg", ".jpeg" then "image/jpeg"
|
|
188
|
+
when ".png" then "image/png"
|
|
189
|
+
when ".gif" then "image/gif"
|
|
190
|
+
when ".webp" then "image/webp"
|
|
191
|
+
when ".bmp" then "image/bmp"
|
|
192
|
+
else "image/png"
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def parse_analysis_response(response)
|
|
197
|
+
content = response.content
|
|
198
|
+
|
|
199
|
+
# Try to extract JSON from the response
|
|
200
|
+
json_match = content.match(/\{[\s\S]*\}/m)
|
|
201
|
+
if json_match
|
|
202
|
+
begin
|
|
203
|
+
parsed = JSON.parse(json_match[0], symbolize_names: true)
|
|
204
|
+
normalize_analysis_data(parsed)
|
|
205
|
+
rescue JSON::ParserError
|
|
206
|
+
# Fall back to text parsing
|
|
207
|
+
parse_text_response(content)
|
|
208
|
+
end
|
|
209
|
+
else
|
|
210
|
+
parse_text_response(content)
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def normalize_analysis_data(data)
|
|
215
|
+
{
|
|
216
|
+
caption: data[:caption]&.to_s,
|
|
217
|
+
description: data[:description]&.to_s,
|
|
218
|
+
tags: normalize_tags(data[:tags]),
|
|
219
|
+
objects: normalize_objects(data[:objects]),
|
|
220
|
+
colors: normalize_colors(data[:colors]),
|
|
221
|
+
text: data[:text]&.to_s,
|
|
222
|
+
raw_response: data
|
|
223
|
+
}
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def normalize_tags(tags)
|
|
227
|
+
return [] unless tags.is_a?(Array)
|
|
228
|
+
|
|
229
|
+
tags.map(&:to_s).map(&:strip).reject(&:empty?).first(resolve_max_tags)
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def normalize_objects(objects)
|
|
233
|
+
return [] unless objects.is_a?(Array)
|
|
234
|
+
|
|
235
|
+
objects.map do |obj|
|
|
236
|
+
next unless obj.is_a?(Hash)
|
|
237
|
+
{
|
|
238
|
+
name: obj[:name]&.to_s,
|
|
239
|
+
location: obj[:location]&.to_s,
|
|
240
|
+
confidence: obj[:confidence]&.to_s&.downcase,
|
|
241
|
+
bbox: obj[:bbox] || obj[:bounding_box]
|
|
242
|
+
}.compact
|
|
243
|
+
end.compact
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
def normalize_colors(colors)
|
|
247
|
+
return [] unless colors.is_a?(Array)
|
|
248
|
+
|
|
249
|
+
colors.map do |color|
|
|
250
|
+
next unless color.is_a?(Hash)
|
|
251
|
+
{
|
|
252
|
+
hex: color[:hex]&.to_s,
|
|
253
|
+
name: color[:name]&.to_s,
|
|
254
|
+
percentage: color[:percentage]&.to_f
|
|
255
|
+
}.compact
|
|
256
|
+
end.compact
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def parse_text_response(content)
|
|
260
|
+
# Basic text parsing when JSON extraction fails
|
|
261
|
+
{
|
|
262
|
+
caption: extract_caption_from_text(content),
|
|
263
|
+
description: content,
|
|
264
|
+
tags: extract_tags_from_text(content),
|
|
265
|
+
objects: [],
|
|
266
|
+
colors: [],
|
|
267
|
+
text: nil,
|
|
268
|
+
raw_response: content
|
|
269
|
+
}
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def extract_caption_from_text(content)
|
|
273
|
+
# Try to extract first sentence as caption
|
|
274
|
+
first_sentence = content.split(/[.!?]/).first&.strip
|
|
275
|
+
first_sentence&.slice(0, 200)
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
def extract_tags_from_text(content)
|
|
279
|
+
# Extract potential tags from comma-separated lists or bullet points
|
|
280
|
+
tags = []
|
|
281
|
+
|
|
282
|
+
# Look for bullet point lists
|
|
283
|
+
content.scan(/[-*•]\s*(\w+(?:\s+\w+)?)/).each do |match|
|
|
284
|
+
tags << match[0].strip.downcase
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# Look for comma-separated items after "tags:" or "keywords:"
|
|
288
|
+
if content =~ /(?:tags|keywords)[:\s]*(.+?)(?:\n|$)/i
|
|
289
|
+
Regexp.last_match(1).split(/[,;]/).each do |tag|
|
|
290
|
+
tags << tag.strip.downcase
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
tags.uniq.first(resolve_max_tags)
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def build_result(analysis:, started_at:, completed_at:)
|
|
298
|
+
ImageAnalysisResult.new(
|
|
299
|
+
image: image,
|
|
300
|
+
model_id: resolve_model,
|
|
301
|
+
analysis_type: resolve_analysis_type,
|
|
302
|
+
caption: analysis[:caption],
|
|
303
|
+
description: analysis[:description],
|
|
304
|
+
tags: analysis[:tags],
|
|
305
|
+
objects: analysis[:objects],
|
|
306
|
+
colors: analysis[:colors],
|
|
307
|
+
text: analysis[:text],
|
|
308
|
+
raw_response: analysis[:raw_response],
|
|
309
|
+
started_at: started_at,
|
|
310
|
+
completed_at: completed_at,
|
|
311
|
+
tenant_id: @tenant_id,
|
|
312
|
+
analyzer_class: self.class.name
|
|
313
|
+
)
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
def build_error_result(error, started_at)
|
|
317
|
+
ImageAnalysisResult.new(
|
|
318
|
+
image: image,
|
|
319
|
+
model_id: resolve_model,
|
|
320
|
+
analysis_type: resolve_analysis_type,
|
|
321
|
+
caption: nil,
|
|
322
|
+
description: nil,
|
|
323
|
+
tags: [],
|
|
324
|
+
objects: [],
|
|
325
|
+
colors: [],
|
|
326
|
+
text: nil,
|
|
327
|
+
raw_response: nil,
|
|
328
|
+
started_at: started_at,
|
|
329
|
+
completed_at: Time.current,
|
|
330
|
+
tenant_id: @tenant_id,
|
|
331
|
+
analyzer_class: self.class.name,
|
|
332
|
+
error_class: error.class.name,
|
|
333
|
+
error_message: error.message
|
|
334
|
+
)
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
# Resolution methods
|
|
338
|
+
|
|
339
|
+
def resolve_analysis_type
|
|
340
|
+
options[:analysis_type] || self.class.analysis_type
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
def resolve_extract_colors
|
|
344
|
+
options.fetch(:extract_colors, self.class.extract_colors)
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
def resolve_detect_objects
|
|
348
|
+
options.fetch(:detect_objects, self.class.detect_objects)
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def resolve_extract_text
|
|
352
|
+
options.fetch(:extract_text, self.class.extract_text)
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
def resolve_custom_prompt
|
|
356
|
+
options[:custom_prompt] || self.class.custom_prompt
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
def resolve_max_tags
|
|
360
|
+
options[:max_tags] || self.class.max_tags
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
# Cache key components
|
|
364
|
+
def cache_key_components
|
|
365
|
+
[
|
|
366
|
+
"image_analyzer",
|
|
367
|
+
self.class.name,
|
|
368
|
+
self.class.version,
|
|
369
|
+
resolve_model,
|
|
370
|
+
resolve_analysis_type.to_s,
|
|
371
|
+
resolve_extract_colors.to_s,
|
|
372
|
+
resolve_detect_objects.to_s,
|
|
373
|
+
resolve_extract_text.to_s,
|
|
374
|
+
Digest::SHA256.hexdigest(image_digest)
|
|
375
|
+
]
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
def image_digest
|
|
379
|
+
if image.is_a?(String) && File.exist?(image)
|
|
380
|
+
File.read(image)
|
|
381
|
+
elsif image.respond_to?(:read)
|
|
382
|
+
content = image.read
|
|
383
|
+
image.rewind if image.respond_to?(:rewind)
|
|
384
|
+
content
|
|
385
|
+
else
|
|
386
|
+
image.to_s
|
|
387
|
+
end
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
def build_execution_metadata(result)
|
|
391
|
+
{
|
|
392
|
+
analysis_type: result.analysis_type,
|
|
393
|
+
tags_count: result.tags.size,
|
|
394
|
+
objects_count: result.objects.size,
|
|
395
|
+
colors_count: result.colors.size,
|
|
396
|
+
has_text: result.text.present?
|
|
397
|
+
}
|
|
398
|
+
end
|
|
399
|
+
end
|
|
400
|
+
end
|
|
401
|
+
end
|
|
402
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "analyzer/dsl"
|
|
4
|
+
require_relative "analyzer/execution"
|
|
5
|
+
|
|
6
|
+
module RubyLLM
|
|
7
|
+
module Agents
|
|
8
|
+
# Image analyzer for understanding and captioning images
|
|
9
|
+
#
|
|
10
|
+
# Analyzes images using vision models to extract captions, tags,
|
|
11
|
+
# descriptions, detected objects, and color information.
|
|
12
|
+
#
|
|
13
|
+
# @example Basic usage
|
|
14
|
+
# result = RubyLLM::Agents::ImageAnalyzer.call(image: "path/to/photo.jpg")
|
|
15
|
+
# result.caption # => "A sunset over mountains"
|
|
16
|
+
# result.tags # => [:nature, :sunset, :mountains]
|
|
17
|
+
# result.description # => "A detailed description..."
|
|
18
|
+
#
|
|
19
|
+
# @example Custom analyzer class
|
|
20
|
+
# class ProductAnalyzer < RubyLLM::Agents::ImageAnalyzer
|
|
21
|
+
# model "gpt-4o"
|
|
22
|
+
# analysis_type :detailed
|
|
23
|
+
# extract_colors true
|
|
24
|
+
# detect_objects true
|
|
25
|
+
#
|
|
26
|
+
# description "Analyzes product photos"
|
|
27
|
+
# end
|
|
28
|
+
#
|
|
29
|
+
# result = ProductAnalyzer.call(image: product_photo)
|
|
30
|
+
# result.objects # => [{name: "laptop", confidence: 0.98, bbox: [...]}]
|
|
31
|
+
# result.colors # => [{hex: "#C0C0C0", name: "silver", percentage: 45}]
|
|
32
|
+
#
|
|
33
|
+
class ImageAnalyzer
|
|
34
|
+
extend DSL
|
|
35
|
+
include Execution
|
|
36
|
+
|
|
37
|
+
class << self
|
|
38
|
+
# Execute image analysis
|
|
39
|
+
#
|
|
40
|
+
# @param image [String, IO] Path, URL, or IO object of the image to analyze
|
|
41
|
+
# @param options [Hash] Additional options (model, analysis_type, etc.)
|
|
42
|
+
# @return [ImageAnalysisResult] The result containing analysis data
|
|
43
|
+
def call(image:, **options)
|
|
44
|
+
new(image: image, **options).call
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Ensure subclasses inherit DSL settings
|
|
48
|
+
def inherited(subclass)
|
|
49
|
+
super
|
|
50
|
+
subclass.instance_variable_set(:@model, @model)
|
|
51
|
+
subclass.instance_variable_set(:@analysis_type, @analysis_type)
|
|
52
|
+
subclass.instance_variable_set(:@extract_colors, @extract_colors)
|
|
53
|
+
subclass.instance_variable_set(:@detect_objects, @detect_objects)
|
|
54
|
+
subclass.instance_variable_set(:@extract_text, @extract_text)
|
|
55
|
+
subclass.instance_variable_set(:@custom_prompt, @custom_prompt)
|
|
56
|
+
subclass.instance_variable_set(:@max_tags, @max_tags)
|
|
57
|
+
subclass.instance_variable_set(:@version, @version)
|
|
58
|
+
subclass.instance_variable_set(:@description, @description)
|
|
59
|
+
subclass.instance_variable_set(:@cache_ttl, @cache_ttl)
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
attr_reader :image, :options, :tenant_id
|
|
64
|
+
|
|
65
|
+
# Initialize a new image analyzer instance
|
|
66
|
+
#
|
|
67
|
+
# @param image [String, IO] Image to analyze (path, URL, or IO object)
|
|
68
|
+
# @param options [Hash] Additional options
|
|
69
|
+
# @option options [String] :model Model to use
|
|
70
|
+
# @option options [Symbol] :analysis_type Type of analysis (:caption, :detailed, :tags, :objects)
|
|
71
|
+
# @option options [Boolean] :extract_colors Whether to extract color information
|
|
72
|
+
# @option options [Boolean] :detect_objects Whether to detect objects
|
|
73
|
+
# @option options [Boolean] :extract_text Whether to extract text (OCR)
|
|
74
|
+
# @option options [String] :custom_prompt Custom analysis prompt
|
|
75
|
+
# @option options [Object] :tenant Tenant for multi-tenancy
|
|
76
|
+
def initialize(image:, **options)
|
|
77
|
+
@image = image
|
|
78
|
+
@options = options
|
|
79
|
+
@tenant_id = nil
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Execute the image analysis
|
|
83
|
+
#
|
|
84
|
+
# @return [ImageAnalysisResult] The result containing analysis data
|
|
85
|
+
def call
|
|
86
|
+
execute
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../concerns/image_operation_dsl"
|
|
4
|
+
|
|
5
|
+
module RubyLLM
|
|
6
|
+
module Agents
|
|
7
|
+
class BackgroundRemover
|
|
8
|
+
# DSL for configuring background removers
|
|
9
|
+
#
|
|
10
|
+
# Provides class-level methods to configure model, output format,
|
|
11
|
+
# and edge refinement options.
|
|
12
|
+
#
|
|
13
|
+
# @example
|
|
14
|
+
# class ProductBackgroundRemover < RubyLLM::Agents::BackgroundRemover
|
|
15
|
+
# model "rembg"
|
|
16
|
+
# output_format :png
|
|
17
|
+
# refine_edges true
|
|
18
|
+
# alpha_matting true
|
|
19
|
+
# end
|
|
20
|
+
#
|
|
21
|
+
module DSL
|
|
22
|
+
include Concerns::ImageOperationDSL
|
|
23
|
+
|
|
24
|
+
VALID_OUTPUT_FORMATS = %i[png webp].freeze
|
|
25
|
+
|
|
26
|
+
# Set or get the output format
|
|
27
|
+
#
|
|
28
|
+
# @param value [Symbol, nil] Output format (:png, :webp)
|
|
29
|
+
# @return [Symbol] The output format
|
|
30
|
+
def output_format(value = nil)
|
|
31
|
+
if value
|
|
32
|
+
unless VALID_OUTPUT_FORMATS.include?(value)
|
|
33
|
+
raise ArgumentError, "Output format must be one of: #{VALID_OUTPUT_FORMATS.join(', ')}"
|
|
34
|
+
end
|
|
35
|
+
@output_format = value
|
|
36
|
+
else
|
|
37
|
+
@output_format || inherited_or_default(:output_format, :png)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Set or get whether to refine edges
|
|
42
|
+
#
|
|
43
|
+
# When enabled, applies additional processing to smooth
|
|
44
|
+
# and refine the edges of the extracted subject.
|
|
45
|
+
#
|
|
46
|
+
# @param value [Boolean, nil] Enable edge refinement
|
|
47
|
+
# @return [Boolean] Whether edge refinement is enabled
|
|
48
|
+
def refine_edges(value = nil)
|
|
49
|
+
if value.nil?
|
|
50
|
+
result = @refine_edges
|
|
51
|
+
result = inherited_or_default(:refine_edges, false) if result.nil?
|
|
52
|
+
result
|
|
53
|
+
else
|
|
54
|
+
@refine_edges = value
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Set or get whether to use alpha matting
|
|
59
|
+
#
|
|
60
|
+
# Alpha matting produces better results for hair, fur,
|
|
61
|
+
# and semi-transparent elements but is slower.
|
|
62
|
+
#
|
|
63
|
+
# @param value [Boolean, nil] Enable alpha matting
|
|
64
|
+
# @return [Boolean] Whether alpha matting is enabled
|
|
65
|
+
def alpha_matting(value = nil)
|
|
66
|
+
if value.nil?
|
|
67
|
+
result = @alpha_matting
|
|
68
|
+
result = inherited_or_default(:alpha_matting, false) if result.nil?
|
|
69
|
+
result
|
|
70
|
+
else
|
|
71
|
+
@alpha_matting = value
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Set or get the foreground threshold
|
|
76
|
+
#
|
|
77
|
+
# Pixels with confidence above this threshold are considered
|
|
78
|
+
# foreground. Lower values include more pixels.
|
|
79
|
+
#
|
|
80
|
+
# @param value [Float, nil] Threshold (0.0-1.0)
|
|
81
|
+
# @return [Float] The foreground threshold
|
|
82
|
+
def foreground_threshold(value = nil)
|
|
83
|
+
if value
|
|
84
|
+
unless value.is_a?(Numeric) && value.between?(0.0, 1.0)
|
|
85
|
+
raise ArgumentError, "Foreground threshold must be between 0.0 and 1.0"
|
|
86
|
+
end
|
|
87
|
+
@foreground_threshold = value.to_f
|
|
88
|
+
else
|
|
89
|
+
@foreground_threshold || inherited_or_default(:foreground_threshold, 0.5)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Set or get the background threshold
|
|
94
|
+
#
|
|
95
|
+
# Pixels with confidence below this threshold are considered
|
|
96
|
+
# background. Higher values exclude more pixels.
|
|
97
|
+
#
|
|
98
|
+
# @param value [Float, nil] Threshold (0.0-1.0)
|
|
99
|
+
# @return [Float] The background threshold
|
|
100
|
+
def background_threshold(value = nil)
|
|
101
|
+
if value
|
|
102
|
+
unless value.is_a?(Numeric) && value.between?(0.0, 1.0)
|
|
103
|
+
raise ArgumentError, "Background threshold must be between 0.0 and 1.0"
|
|
104
|
+
end
|
|
105
|
+
@background_threshold = value.to_f
|
|
106
|
+
else
|
|
107
|
+
@background_threshold || inherited_or_default(:background_threshold, 0.5)
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Set or get the erode size
|
|
112
|
+
#
|
|
113
|
+
# Size of morphological erosion applied to shrink the mask
|
|
114
|
+
# slightly to avoid edge artifacts.
|
|
115
|
+
#
|
|
116
|
+
# @param value [Integer, nil] Erode size in pixels
|
|
117
|
+
# @return [Integer] The erode size
|
|
118
|
+
def erode_size(value = nil)
|
|
119
|
+
if value
|
|
120
|
+
unless value.is_a?(Integer) && value >= 0
|
|
121
|
+
raise ArgumentError, "Erode size must be a non-negative integer"
|
|
122
|
+
end
|
|
123
|
+
@erode_size = value
|
|
124
|
+
else
|
|
125
|
+
@erode_size || inherited_or_default(:erode_size, 0)
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Set or get whether to return the mask
|
|
130
|
+
#
|
|
131
|
+
# When enabled, the result will include the segmentation mask
|
|
132
|
+
# in addition to the extracted foreground.
|
|
133
|
+
#
|
|
134
|
+
# @param value [Boolean, nil] Return segmentation mask
|
|
135
|
+
# @return [Boolean] Whether to return the mask
|
|
136
|
+
def return_mask(value = nil)
|
|
137
|
+
if value.nil?
|
|
138
|
+
result = @return_mask
|
|
139
|
+
result = inherited_or_default(:return_mask, false) if result.nil?
|
|
140
|
+
result
|
|
141
|
+
else
|
|
142
|
+
@return_mask = value
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
private
|
|
147
|
+
|
|
148
|
+
def default_model
|
|
149
|
+
config.default_background_remover_model || "rembg"
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|