ragdoll 0.1.1 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/Rakefile +52 -1
- data/app/jobs/ragdoll/extract_keywords_job.rb +28 -0
- data/app/jobs/ragdoll/extract_text_job.rb +38 -0
- data/app/jobs/ragdoll/generate_embeddings_job.rb +28 -0
- data/app/jobs/ragdoll/generate_summary_job.rb +25 -0
- data/app/lib/ragdoll/metadata_schemas.rb +332 -0
- data/app/models/ragdoll/audio_content.rb +142 -0
- data/app/models/ragdoll/content.rb +95 -0
- data/app/models/ragdoll/document.rb +611 -0
- data/app/models/ragdoll/embedding.rb +176 -0
- data/app/models/ragdoll/image_content.rb +194 -0
- data/app/models/ragdoll/text_content.rb +137 -0
- data/app/services/ragdoll/configuration_service.rb +113 -0
- data/app/services/ragdoll/document_management.rb +108 -0
- data/app/services/ragdoll/document_processor.rb +342 -0
- data/app/services/ragdoll/embedding_service.rb +202 -0
- data/app/services/ragdoll/image_description_service.rb +230 -0
- data/app/services/ragdoll/metadata_generator.rb +329 -0
- data/app/services/ragdoll/model_resolver.rb +72 -0
- data/app/services/ragdoll/search_engine.rb +51 -0
- data/app/services/ragdoll/text_chunker.rb +208 -0
- data/app/services/ragdoll/text_generation_service.rb +355 -0
- data/lib/ragdoll/core/client.rb +32 -41
- data/lib/ragdoll/core/configuration.rb +140 -156
- data/lib/ragdoll/core/database.rb +1 -1
- data/lib/ragdoll/core/model.rb +45 -0
- data/lib/ragdoll/core/version.rb +1 -1
- data/lib/ragdoll/core.rb +35 -17
- data/lib/ragdoll.rb +1 -1
- data/lib/tasks/annotate.rake +1 -1
- data/lib/tasks/db.rake +2 -2
- metadata +24 -20
- data/lib/ragdoll/core/document_management.rb +0 -110
- data/lib/ragdoll/core/document_processor.rb +0 -344
- data/lib/ragdoll/core/embedding_service.rb +0 -183
- data/lib/ragdoll/core/jobs/extract_keywords.rb +0 -32
- data/lib/ragdoll/core/jobs/extract_text.rb +0 -42
- data/lib/ragdoll/core/jobs/generate_embeddings.rb +0 -32
- data/lib/ragdoll/core/jobs/generate_summary.rb +0 -29
- data/lib/ragdoll/core/metadata_schemas.rb +0 -334
- data/lib/ragdoll/core/models/audio_content.rb +0 -175
- data/lib/ragdoll/core/models/content.rb +0 -126
- data/lib/ragdoll/core/models/document.rb +0 -678
- data/lib/ragdoll/core/models/embedding.rb +0 -204
- data/lib/ragdoll/core/models/image_content.rb +0 -227
- data/lib/ragdoll/core/models/text_content.rb +0 -169
- data/lib/ragdoll/core/search_engine.rb +0 -50
- data/lib/ragdoll/core/services/image_description_service.rb +0 -230
- data/lib/ragdoll/core/services/metadata_generator.rb +0 -335
- data/lib/ragdoll/core/text_chunker.rb +0 -210
- data/lib/ragdoll/core/text_generation_service.rb +0 -360
@@ -1,230 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# Image description service using RubyLLM
|
4
|
-
|
5
|
-
require "ruby_llm"
|
6
|
-
require "base64"
|
7
|
-
require "rmagick"
|
8
|
-
|
9
|
-
module Ragdoll
|
10
|
-
module Core
|
11
|
-
module Services
|
12
|
-
class ImageDescriptionService
|
13
|
-
class DescriptionError < StandardError; end
|
14
|
-
|
15
|
-
DEFAULT_OPTIONS = {
|
16
|
-
model: "gemma3",
|
17
|
-
provider: :ollama,
|
18
|
-
assume_model_exists: true, # Bypass registry check
|
19
|
-
temperature: 0.4,
|
20
|
-
prompt: "Describe the image in detail."
|
21
|
-
}.freeze
|
22
|
-
|
23
|
-
DEFAULT_FALLBACK_OPTIONS = {
|
24
|
-
model: "smollm2",
|
25
|
-
provider: :ollama,
|
26
|
-
assume_model_exists: true, # Bypass LLM registry check
|
27
|
-
temperature: 0.6
|
28
|
-
}.freeze
|
29
|
-
|
30
|
-
def initialize(primary: DEFAULT_OPTIONS, fallback: DEFAULT_FALLBACK_OPTIONS)
|
31
|
-
puts "🚀 ImageDescriptionService: Initializing with primary: #{primary.inspect}"
|
32
|
-
puts "🚀 ImageDescriptionService: Initializing with fallback: #{fallback.inspect}"
|
33
|
-
|
34
|
-
# Configure RubyLLM using the same pattern as the working example
|
35
|
-
configure_ruby_llm_globally
|
36
|
-
|
37
|
-
primary_temp = primary.delete(:temperature) || DEFAULT_OPTIONS[:temperature]
|
38
|
-
@primary_prompt = primary.delete(:prompt) || DEFAULT_OPTIONS[:prompt]
|
39
|
-
fallback_temp = fallback.delete(:temperature) || DEFAULT_FALLBACK_OPTIONS[:temperature]
|
40
|
-
|
41
|
-
puts "🤖 ImageDescriptionService: Attempting to create primary model..."
|
42
|
-
begin
|
43
|
-
@primary = RubyLLM.chat(**primary).with_temperature(primary_temp)
|
44
|
-
puts "✅ ImageDescriptionService: Primary model created successfully: #{@primary.class}"
|
45
|
-
rescue StandardError => e
|
46
|
-
puts "❌ ImageDescriptionService: Primary model creation failed: #{e.message}"
|
47
|
-
@primary = nil
|
48
|
-
end
|
49
|
-
|
50
|
-
puts "🔄 ImageDescriptionService: Attempting to create fallback model..."
|
51
|
-
begin
|
52
|
-
@fallback = RubyLLM.chat(**fallback).with_temperature(fallback_temp)
|
53
|
-
puts "✅ ImageDescriptionService: Fallback model created successfully: #{@fallback.class}"
|
54
|
-
rescue StandardError => e
|
55
|
-
puts "❌ ImageDescriptionService: Fallback model creation failed: #{e.message}"
|
56
|
-
@fallback = nil
|
57
|
-
end
|
58
|
-
|
59
|
-
return unless @primary.nil? && @fallback.nil?
|
60
|
-
|
61
|
-
puts "⚠️ ImageDescriptionService: WARNING - No models available! Service will return placeholders only."
|
62
|
-
end
|
63
|
-
|
64
|
-
# Generate a description for a local image file.
|
65
|
-
# path (String) - absolute path to the image
|
66
|
-
def generate_description(path)
|
67
|
-
puts "🔍 ImageDescriptionService: Starting description generation for #{path}"
|
68
|
-
start_time = Time.now
|
69
|
-
|
70
|
-
@image_path = path
|
71
|
-
return "" unless @image_path && File.exist?(@image_path) && image_file?
|
72
|
-
|
73
|
-
# Attempt to read image and prepare data; on failure return placeholder
|
74
|
-
data = nil
|
75
|
-
begin
|
76
|
-
puts "📸 ImageDescriptionService: Reading image with Magick..."
|
77
|
-
@image = Magick::Image.read(@image_path).first
|
78
|
-
data = prepare_image_data
|
79
|
-
puts "✅ ImageDescriptionService: Image data prepared (#{data.length} chars base64)"
|
80
|
-
rescue StandardError => e
|
81
|
-
puts "❌ ImageDescriptionService: Failed to read image: #{e.message}"
|
82
|
-
return "[Image file: #{File.basename(@image_path)}]"
|
83
|
-
end
|
84
|
-
return "" unless data
|
85
|
-
|
86
|
-
# Attempt vision model call if client available
|
87
|
-
if @primary
|
88
|
-
puts "🤖 ImageDescriptionService: Attempting primary model (#{@primary.inspect})"
|
89
|
-
begin
|
90
|
-
@primary.add_message(
|
91
|
-
role: "user",
|
92
|
-
content: [
|
93
|
-
{ type: "text", text: @primary_prompt },
|
94
|
-
{ type: "image_url", image_url: { url: "data:#{@image.mime_type};base64,#{data}" } }
|
95
|
-
]
|
96
|
-
)
|
97
|
-
puts "📤 ImageDescriptionService: Calling primary model complete()..."
|
98
|
-
response = @primary.complete
|
99
|
-
puts "📥 ImageDescriptionService: Primary model response received: #{response.inspect}"
|
100
|
-
desc = extract_description(response)
|
101
|
-
if desc && !desc.empty?
|
102
|
-
elapsed = Time.now - start_time
|
103
|
-
puts "✅ ImageDescriptionService: Primary model success! Description: '#{desc[0..100]}...' (#{elapsed.round(2)}s)"
|
104
|
-
return desc
|
105
|
-
end
|
106
|
-
rescue StandardError => e
|
107
|
-
puts "❌ ImageDescriptionService: Primary model failed: #{e.message}"
|
108
|
-
end
|
109
|
-
else
|
110
|
-
puts "⚠️ ImageDescriptionService: No primary model available"
|
111
|
-
end
|
112
|
-
|
113
|
-
# Attempt fallback if available
|
114
|
-
if @fallback
|
115
|
-
puts "🔄 ImageDescriptionService: Attempting fallback model (#{@fallback.inspect})"
|
116
|
-
begin
|
117
|
-
fallback_response = @fallback.ask(fallback_prompt).content
|
118
|
-
elapsed = Time.now - start_time
|
119
|
-
puts "✅ ImageDescriptionService: Fallback model success! Description: '#{fallback_response[0..100]}...' (#{elapsed.round(2)}s)"
|
120
|
-
return fallback_response
|
121
|
-
rescue StandardError => e
|
122
|
-
puts "❌ ImageDescriptionService: Fallback model failed: #{e.message}"
|
123
|
-
end
|
124
|
-
else
|
125
|
-
puts "⚠️ ImageDescriptionService: No fallback model available"
|
126
|
-
end
|
127
|
-
|
128
|
-
# Default placeholder when LLM unavailable
|
129
|
-
elapsed = Time.now - start_time
|
130
|
-
puts "🔚 ImageDescriptionService: Returning placeholder after #{elapsed.round(2)}s"
|
131
|
-
"[Image file: #{File.basename(@image_path)}]"
|
132
|
-
end
|
133
|
-
|
134
|
-
private
|
135
|
-
|
136
|
-
def configure_ruby_llm_globally
|
137
|
-
puts "⚙️ ImageDescriptionService: Configuring RubyLLM globally..."
|
138
|
-
|
139
|
-
# Get Ragdoll configuration or use defaults
|
140
|
-
ragdoll_config = begin
|
141
|
-
Ragdoll::Core.configuration
|
142
|
-
rescue StandardError
|
143
|
-
nil
|
144
|
-
end
|
145
|
-
# FIXME: ollama_url is not in current config structure, should use ruby_llm_config[:ollama][:endpoint]
|
146
|
-
ollama_endpoint = ragdoll_config&.ruby_llm_config&.dig(:ollama, :endpoint) || ENV["OLLAMA_API_BASE"] || ENV["OLLAMA_ENDPOINT"] || "http://localhost:11434"
|
147
|
-
|
148
|
-
puts "🔗 ImageDescriptionService: Using ollama endpoint: #{ollama_endpoint}"
|
149
|
-
|
150
|
-
# Follow the exact pattern from the working example
|
151
|
-
RubyLLM.configure do |config|
|
152
|
-
# Set all provider configs like the working example
|
153
|
-
config.openai_api_key = ENV.fetch("OPENAI_API_KEY", nil)
|
154
|
-
config.openai_organization_id = ENV.fetch("OPENAI_ORGANIZATION_ID", nil)
|
155
|
-
config.openai_project_id = ENV.fetch("OPENAI_PROJECT_ID", nil)
|
156
|
-
config.anthropic_api_key = ENV.fetch("ANTHROPIC_API_KEY", nil)
|
157
|
-
config.gemini_api_key = ENV.fetch("GEMINI_API_KEY", nil)
|
158
|
-
config.deepseek_api_key = ENV.fetch("DEEPSEEK_API_KEY", nil)
|
159
|
-
config.openrouter_api_key = ENV.fetch("OPENROUTER_API_KEY", nil)
|
160
|
-
config.bedrock_api_key = ENV.fetch("BEDROCK_ACCESS_KEY_ID", nil)
|
161
|
-
config.bedrock_secret_key = ENV.fetch("BEDROCK_SECRET_ACCESS_KEY", nil)
|
162
|
-
config.bedrock_region = ENV.fetch("BEDROCK_REGION", nil)
|
163
|
-
config.bedrock_session_token = ENV.fetch("BEDROCK_SESSION_TOKEN", nil)
|
164
|
-
|
165
|
-
# Key: Use the exact same method name as the working example
|
166
|
-
config.ollama_api_base = ollama_endpoint
|
167
|
-
config.openai_api_base = ENV.fetch("OPENAI_API_BASE", nil)
|
168
|
-
config.log_level = :error
|
169
|
-
end
|
170
|
-
|
171
|
-
puts "✅ ImageDescriptionService: RubyLLM configured successfully with global settings"
|
172
|
-
rescue StandardError => e
|
173
|
-
puts "❌ ImageDescriptionService: Failed to configure RubyLLM: #{e.message}"
|
174
|
-
end
|
175
|
-
|
176
|
-
def image_file?
|
177
|
-
%w[.jpg .jpeg .png .gif .bmp .webp .svg .ico .tiff
|
178
|
-
.tif].include?(File.extname(@image_path).downcase)
|
179
|
-
end
|
180
|
-
|
181
|
-
def prepare_image_data
|
182
|
-
Base64.strict_encode64(File.binread(@image_path))
|
183
|
-
rescue StandardError
|
184
|
-
nil
|
185
|
-
end
|
186
|
-
|
187
|
-
def extract_description(response)
|
188
|
-
text = if response.respond_to?(:content)
|
189
|
-
response.content
|
190
|
-
elsif response.is_a?(Hash) && response.dig("choices", 0, "message", "content")
|
191
|
-
response["choices"][0]["message"]["content"]
|
192
|
-
end
|
193
|
-
clean_description(text)
|
194
|
-
end
|
195
|
-
|
196
|
-
def clean_description(description)
|
197
|
-
return unless description.is_a?(String)
|
198
|
-
|
199
|
-
cleaned = description
|
200
|
-
.strip
|
201
|
-
.sub(/\ADescription:?:?\s*/i, "")
|
202
|
-
.gsub(/\s+/, " ")
|
203
|
-
.gsub(@image_path, "")
|
204
|
-
.strip
|
205
|
-
cleaned << "." unless cleaned =~ /[.!?]\z/
|
206
|
-
cleaned
|
207
|
-
end
|
208
|
-
|
209
|
-
def fallback_prompt
|
210
|
-
<<~PROMPT
|
211
|
-
You are a text-based AI tasked with generating a descriptive guess about an image based on its physical characteristics and the absolute pathname provided.
|
212
|
-
|
213
|
-
Please consider the following details:
|
214
|
-
|
215
|
-
1. **Absolute Pathname:** #{@image_path}
|
216
|
-
2. **Image Characteristics:**
|
217
|
-
- **Width:** #{@image.columns}
|
218
|
-
- **Height:** #{@image.rows}
|
219
|
-
- **MIME/Type:** #{@image.mime_type}
|
220
|
-
- **File Size:** #{@image.filesize} bytes
|
221
|
-
- **Number of Colors:** #{@image.number_colors}
|
222
|
-
|
223
|
-
Based on the above information, please make your best guess about what the image might depict. Consider common uses for the file format, the aspect ratio, and any hints from the pathname itself. Provide provide your best guess as a brief description that includes potential subjects, themes, or contexts of the image.
|
224
|
-
|
225
|
-
PROMPT
|
226
|
-
end
|
227
|
-
end
|
228
|
-
end
|
229
|
-
end
|
230
|
-
end
|
@@ -1,335 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative "../metadata_schemas"
|
4
|
-
|
5
|
-
module Ragdoll
|
6
|
-
module Core
|
7
|
-
module Services
|
8
|
-
# Service for generating structured metadata using LLM providers
|
9
|
-
# Leverages structured output capabilities to ensure consistent metadata schemas
|
10
|
-
class MetadataGenerator
|
11
|
-
def initialize(llm_client: nil)
|
12
|
-
@llm_client = llm_client || default_llm_client
|
13
|
-
end
|
14
|
-
|
15
|
-
# Generate metadata for a document based on its content and type
|
16
|
-
def generate_for_document(document)
|
17
|
-
case document.document_type
|
18
|
-
when "text", "markdown", "html"
|
19
|
-
generate_text_metadata(document)
|
20
|
-
when "image"
|
21
|
-
generate_image_metadata(document)
|
22
|
-
when "audio"
|
23
|
-
generate_audio_metadata(document)
|
24
|
-
when "pdf", "docx"
|
25
|
-
generate_pdf_metadata(document)
|
26
|
-
when "mixed"
|
27
|
-
generate_mixed_metadata(document)
|
28
|
-
else
|
29
|
-
generate_text_metadata(document) # fallback
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
# Generate metadata for text content
|
34
|
-
def generate_text_metadata(document)
|
35
|
-
# Combine all text content from the document
|
36
|
-
text_content = document.text_contents.map(&:content).join("\n\n")
|
37
|
-
return {} if text_content.blank?
|
38
|
-
|
39
|
-
schema = MetadataSchemas::TEXT_SCHEMA
|
40
|
-
prompt = build_text_analysis_prompt(text_content)
|
41
|
-
|
42
|
-
generate_structured_metadata(prompt, schema)
|
43
|
-
end
|
44
|
-
|
45
|
-
# Generate metadata for image content
|
46
|
-
def generate_image_metadata(document)
|
47
|
-
# For images, we need to use vision-capable models
|
48
|
-
image_content = document.image_contents.first
|
49
|
-
return {} unless image_content&.image_attached?
|
50
|
-
|
51
|
-
schema = MetadataSchemas::IMAGE_SCHEMA
|
52
|
-
prompt = build_image_analysis_prompt(image_content)
|
53
|
-
|
54
|
-
# This would use a vision model like GPT-4V, Claude 3, etc.
|
55
|
-
generate_structured_metadata(prompt, schema, content_type: "image", image: image_content.image)
|
56
|
-
end
|
57
|
-
|
58
|
-
# Generate metadata for audio content
|
59
|
-
def generate_audio_metadata(document)
|
60
|
-
audio_content = document.audio_contents.first
|
61
|
-
return {} unless audio_content
|
62
|
-
|
63
|
-
schema = MetadataSchemas::AUDIO_SCHEMA
|
64
|
-
|
65
|
-
# Use transcript if available, otherwise analyze audio directly
|
66
|
-
prompt = if audio_content.transcript.present?
|
67
|
-
build_audio_transcript_analysis_prompt(audio_content.transcript, audio_content.duration)
|
68
|
-
else
|
69
|
-
# This would require audio-capable models or speech-to-text preprocessing
|
70
|
-
build_audio_analysis_prompt(audio_content)
|
71
|
-
end
|
72
|
-
|
73
|
-
generate_structured_metadata(prompt, schema)
|
74
|
-
end
|
75
|
-
|
76
|
-
# Generate metadata for PDF content
|
77
|
-
def generate_pdf_metadata(document)
|
78
|
-
text_content = document.text_contents.map(&:content).join("\n\n")
|
79
|
-
return {} if text_content.blank?
|
80
|
-
|
81
|
-
schema = MetadataSchemas::PDF_SCHEMA
|
82
|
-
prompt = build_pdf_analysis_prompt(text_content, document.file_metadata)
|
83
|
-
|
84
|
-
generate_structured_metadata(prompt, schema)
|
85
|
-
end
|
86
|
-
|
87
|
-
# Generate metadata for mixed/multi-modal content
|
88
|
-
def generate_mixed_metadata(document)
|
89
|
-
schema = MetadataSchemas::MIXED_SCHEMA
|
90
|
-
|
91
|
-
# Combine analysis from all content types
|
92
|
-
content_summaries = []
|
93
|
-
|
94
|
-
document.text_contents.each do |text|
|
95
|
-
content_summaries << { type: "text", content: text.content[0..500] }
|
96
|
-
end
|
97
|
-
|
98
|
-
document.image_contents.each do |image|
|
99
|
-
content_summaries << { type: "image", description: image.description || "Image content" }
|
100
|
-
end
|
101
|
-
|
102
|
-
document.audio_contents.each do |audio|
|
103
|
-
content_summaries << { type: "audio", transcript: audio.transcript || "Audio content" }
|
104
|
-
end
|
105
|
-
|
106
|
-
prompt = build_mixed_analysis_prompt(content_summaries)
|
107
|
-
generate_structured_metadata(prompt, schema)
|
108
|
-
end
|
109
|
-
|
110
|
-
private
|
111
|
-
|
112
|
-
# Core method for generating structured metadata using LLM
|
113
|
-
def generate_structured_metadata(prompt, schema, content_type: "text", image: nil)
|
114
|
-
case @llm_client&.provider
|
115
|
-
when "openai"
|
116
|
-
generate_with_openai(prompt, schema, content_type, image)
|
117
|
-
when "anthropic"
|
118
|
-
generate_with_anthropic(prompt, schema, content_type, image)
|
119
|
-
when "ollama"
|
120
|
-
generate_with_ollama(prompt, schema)
|
121
|
-
else
|
122
|
-
# Fallback to basic LLM call without structured output
|
123
|
-
generate_with_fallback(prompt, schema)
|
124
|
-
end
|
125
|
-
rescue StandardError => e
|
126
|
-
Rails.logger.error "Metadata generation failed: #{e.message}" if defined?(Rails)
|
127
|
-
puts "Metadata generation failed: #{e.message}"
|
128
|
-
{}
|
129
|
-
end
|
130
|
-
|
131
|
-
# OpenAI structured output
|
132
|
-
def generate_with_openai(prompt, schema, content_type, image)
|
133
|
-
messages = build_messages(prompt, content_type, image)
|
134
|
-
|
135
|
-
response = @llm_client.chat(
|
136
|
-
model: "gpt-4o", # Use latest model with structured output
|
137
|
-
messages: messages,
|
138
|
-
response_format: {
|
139
|
-
type: "json_schema",
|
140
|
-
json_schema: {
|
141
|
-
name: "document_metadata",
|
142
|
-
schema: schema
|
143
|
-
}
|
144
|
-
},
|
145
|
-
temperature: 0.1
|
146
|
-
)
|
147
|
-
|
148
|
-
JSON.parse(response.dig("choices", 0, "message", "content") || "{}")
|
149
|
-
end
|
150
|
-
|
151
|
-
# Anthropic structured output (using XML format)
|
152
|
-
def generate_with_anthropic(prompt, schema, content_type, image)
|
153
|
-
# Anthropic doesn't have native JSON schema support yet
|
154
|
-
# Use XML format with clear instructions
|
155
|
-
structured_prompt = "#{prompt}\n\nPlease respond with a JSON object that follows this exact schema:\n#{schema.to_json}\n\nRespond only with valid JSON, no explanations."
|
156
|
-
|
157
|
-
messages = build_messages(structured_prompt, content_type, image)
|
158
|
-
|
159
|
-
response = @llm_client.chat(
|
160
|
-
model: "claude-3-5-sonnet-20241022",
|
161
|
-
messages: messages,
|
162
|
-
temperature: 0.1
|
163
|
-
)
|
164
|
-
|
165
|
-
# Extract JSON from response
|
166
|
-
content = response.dig("content", 0, "text") || "{}"
|
167
|
-
JSON.parse(content.match(/\{.*\}/m)&.to_s || "{}")
|
168
|
-
end
|
169
|
-
|
170
|
-
# Ollama structured output (using Llama 3.1 or similar)
|
171
|
-
def generate_with_ollama(prompt, schema)
|
172
|
-
structured_prompt = "#{prompt}\n\nRespond with valid JSON matching this schema:\n#{schema.to_json}"
|
173
|
-
|
174
|
-
response = @llm_client.generate(
|
175
|
-
model: "llama3.1:8b",
|
176
|
-
prompt: structured_prompt,
|
177
|
-
format: "json",
|
178
|
-
options: { temperature: 0.1 }
|
179
|
-
)
|
180
|
-
|
181
|
-
JSON.parse(response["response"] || "{}")
|
182
|
-
end
|
183
|
-
|
184
|
-
# Fallback for any LLM provider
|
185
|
-
def generate_with_fallback(prompt, schema)
|
186
|
-
structured_prompt = "#{prompt}\n\nPlease respond with a JSON object that includes these fields: #{schema[:required]&.join(', ')}\n\nRespond only with valid JSON."
|
187
|
-
|
188
|
-
response = @llm_client.generate(prompt: structured_prompt)
|
189
|
-
JSON.parse(response || "{}")
|
190
|
-
rescue JSON::ParserError
|
191
|
-
{}
|
192
|
-
end
|
193
|
-
|
194
|
-
# Build messages array for chat-based APIs
|
195
|
-
def build_messages(prompt, content_type, image)
|
196
|
-
messages = [
|
197
|
-
{
|
198
|
-
role: "system",
|
199
|
-
content: "You are an expert document analyzer. Generate structured metadata as valid JSON following the provided schema exactly."
|
200
|
-
}
|
201
|
-
]
|
202
|
-
|
203
|
-
messages << if content_type == "image" && image
|
204
|
-
{
|
205
|
-
role: "user",
|
206
|
-
content: [
|
207
|
-
{ type: "text", text: prompt },
|
208
|
-
{ type: "image_url", image_url: { url: image_url_for(image) } }
|
209
|
-
]
|
210
|
-
}
|
211
|
-
else
|
212
|
-
{ role: "user", content: prompt }
|
213
|
-
end
|
214
|
-
|
215
|
-
messages
|
216
|
-
end
|
217
|
-
|
218
|
-
# Build analysis prompts for different content types
|
219
|
-
def build_text_analysis_prompt(text_content)
|
220
|
-
<<~PROMPT
|
221
|
-
Analyze the following text document and extract structured metadata:
|
222
|
-
|
223
|
-
#{text_content[0..2000]}#{text_content.length > 2000 ? '...' : ''}
|
224
|
-
|
225
|
-
Please analyze this text and provide comprehensive metadata including:
|
226
|
-
- A concise summary (2-3 sentences)
|
227
|
-
- Relevant keywords and topics
|
228
|
-
- Document classification and complexity level
|
229
|
-
- Sentiment analysis
|
230
|
-
- Estimated reading time
|
231
|
-
- Language detection
|
232
|
-
PROMPT
|
233
|
-
end
|
234
|
-
|
235
|
-
def build_image_analysis_prompt(image_content)
|
236
|
-
existing_description = image_content.description || image_content.alt_text
|
237
|
-
base_prompt = <<~PROMPT
|
238
|
-
Analyze this image and provide comprehensive metadata including:
|
239
|
-
- Detailed description of what's shown
|
240
|
-
- Objects, people, or elements visible
|
241
|
-
- Scene type and visual style
|
242
|
-
- Dominant colors and mood
|
243
|
-
- Any visible text content
|
244
|
-
- Relevant keywords for search
|
245
|
-
PROMPT
|
246
|
-
|
247
|
-
if existing_description.present?
|
248
|
-
"#{base_prompt}\n\nExisting description: #{existing_description}"
|
249
|
-
else
|
250
|
-
base_prompt
|
251
|
-
end
|
252
|
-
end
|
253
|
-
|
254
|
-
def build_audio_transcript_analysis_prompt(transcript, duration)
|
255
|
-
<<~PROMPT
|
256
|
-
Analyze the following audio transcript and provide metadata:
|
257
|
-
|
258
|
-
Duration: #{duration} seconds
|
259
|
-
Transcript: #{transcript[0..1500]}#{transcript.length > 1500 ? '...' : ''}
|
260
|
-
|
261
|
-
Please analyze this audio content and provide:
|
262
|
-
- Summary of the spoken content
|
263
|
-
- Content type (speech, music, podcast, etc.)
|
264
|
-
- Topics discussed
|
265
|
-
- Number of speakers
|
266
|
-
- Language and mood
|
267
|
-
- Key quotes or important phrases
|
268
|
-
PROMPT
|
269
|
-
end
|
270
|
-
|
271
|
-
def build_audio_analysis_prompt(audio_content)
|
272
|
-
<<~PROMPT
|
273
|
-
Analyze this audio file and provide metadata:
|
274
|
-
|
275
|
-
Duration: #{audio_content.duration} seconds
|
276
|
-
Sample Rate: #{audio_content.sample_rate} Hz
|
277
|
-
|
278
|
-
Please determine:
|
279
|
-
- Type of audio content (speech, music, sound effects, etc.)
|
280
|
-
- If music: genre, mood, instruments
|
281
|
-
- If speech: estimated number of speakers, formality level
|
282
|
-
- Overall audio characteristics
|
283
|
-
PROMPT
|
284
|
-
end
|
285
|
-
|
286
|
-
def build_pdf_analysis_prompt(text_content, file_metadata)
|
287
|
-
<<~PROMPT
|
288
|
-
Analyze this PDF document and provide structured metadata:
|
289
|
-
|
290
|
-
File info: #{file_metadata}
|
291
|
-
Content preview: #{text_content[0..2000]}#{text_content.length > 2000 ? '...' : ''}
|
292
|
-
|
293
|
-
Please analyze this PDF and provide:
|
294
|
-
- Document type and classification
|
295
|
-
- Summary of content
|
296
|
-
- Topics and keywords
|
297
|
-
- Document structure analysis
|
298
|
-
- Complexity level
|
299
|
-
- Estimated reading time
|
300
|
-
PROMPT
|
301
|
-
end
|
302
|
-
|
303
|
-
def build_mixed_analysis_prompt(content_summaries)
|
304
|
-
content_desc = content_summaries.map { |c| "#{c[:type]}: #{c.values[1]}" }.join("\n\n")
|
305
|
-
|
306
|
-
<<~PROMPT
|
307
|
-
Analyze this multi-modal document containing different types of content:
|
308
|
-
|
309
|
-
#{content_desc}
|
310
|
-
|
311
|
-
Please provide comprehensive metadata for this mixed-content document:
|
312
|
-
- Overall summary combining all content types
|
313
|
-
- How the different content types relate to each other
|
314
|
-
- Primary vs secondary content types
|
315
|
-
- Keywords spanning all content
|
316
|
-
- Classification for the complete document
|
317
|
-
PROMPT
|
318
|
-
end
|
319
|
-
|
320
|
-
# Utility methods
|
321
|
-
def default_llm_client
|
322
|
-
# This would integrate with your LLM client
|
323
|
-
# Could use ruby_llm or direct API clients
|
324
|
-
nil
|
325
|
-
end
|
326
|
-
|
327
|
-
def image_url_for(image)
|
328
|
-
# Convert Shrine attachment to URL for vision APIs
|
329
|
-
# This would need proper implementation based on your Shrine setup
|
330
|
-
image.url if image.respond_to?(:url)
|
331
|
-
end
|
332
|
-
end
|
333
|
-
end
|
334
|
-
end
|
335
|
-
end
|