ragdoll 0.1.1 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/Rakefile +52 -1
  4. data/app/jobs/ragdoll/extract_keywords_job.rb +28 -0
  5. data/app/jobs/ragdoll/extract_text_job.rb +38 -0
  6. data/app/jobs/ragdoll/generate_embeddings_job.rb +28 -0
  7. data/app/jobs/ragdoll/generate_summary_job.rb +25 -0
  8. data/app/lib/ragdoll/metadata_schemas.rb +332 -0
  9. data/app/models/ragdoll/audio_content.rb +142 -0
  10. data/app/models/ragdoll/content.rb +95 -0
  11. data/app/models/ragdoll/document.rb +611 -0
  12. data/app/models/ragdoll/embedding.rb +176 -0
  13. data/app/models/ragdoll/image_content.rb +194 -0
  14. data/app/models/ragdoll/text_content.rb +137 -0
  15. data/app/services/ragdoll/configuration_service.rb +113 -0
  16. data/app/services/ragdoll/document_management.rb +108 -0
  17. data/app/services/ragdoll/document_processor.rb +342 -0
  18. data/app/services/ragdoll/embedding_service.rb +202 -0
  19. data/app/services/ragdoll/image_description_service.rb +230 -0
  20. data/app/services/ragdoll/metadata_generator.rb +329 -0
  21. data/app/services/ragdoll/model_resolver.rb +72 -0
  22. data/app/services/ragdoll/search_engine.rb +51 -0
  23. data/app/services/ragdoll/text_chunker.rb +208 -0
  24. data/app/services/ragdoll/text_generation_service.rb +355 -0
  25. data/lib/ragdoll/core/client.rb +32 -41
  26. data/lib/ragdoll/core/configuration.rb +140 -156
  27. data/lib/ragdoll/core/database.rb +1 -1
  28. data/lib/ragdoll/core/model.rb +45 -0
  29. data/lib/ragdoll/core/version.rb +1 -1
  30. data/lib/ragdoll/core.rb +35 -17
  31. data/lib/ragdoll.rb +1 -1
  32. data/lib/tasks/annotate.rake +1 -1
  33. data/lib/tasks/db.rake +2 -2
  34. metadata +24 -20
  35. data/lib/ragdoll/core/document_management.rb +0 -110
  36. data/lib/ragdoll/core/document_processor.rb +0 -344
  37. data/lib/ragdoll/core/embedding_service.rb +0 -183
  38. data/lib/ragdoll/core/jobs/extract_keywords.rb +0 -32
  39. data/lib/ragdoll/core/jobs/extract_text.rb +0 -42
  40. data/lib/ragdoll/core/jobs/generate_embeddings.rb +0 -32
  41. data/lib/ragdoll/core/jobs/generate_summary.rb +0 -29
  42. data/lib/ragdoll/core/metadata_schemas.rb +0 -334
  43. data/lib/ragdoll/core/models/audio_content.rb +0 -175
  44. data/lib/ragdoll/core/models/content.rb +0 -126
  45. data/lib/ragdoll/core/models/document.rb +0 -678
  46. data/lib/ragdoll/core/models/embedding.rb +0 -204
  47. data/lib/ragdoll/core/models/image_content.rb +0 -227
  48. data/lib/ragdoll/core/models/text_content.rb +0 -169
  49. data/lib/ragdoll/core/search_engine.rb +0 -50
  50. data/lib/ragdoll/core/services/image_description_service.rb +0 -230
  51. data/lib/ragdoll/core/services/metadata_generator.rb +0 -335
  52. data/lib/ragdoll/core/text_chunker.rb +0 -210
  53. data/lib/ragdoll/core/text_generation_service.rb +0 -360
@@ -0,0 +1,230 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Image description service using RubyLLM
4
+
5
+ require "ruby_llm"
6
+ require "base64"
7
+ require "rmagick"
8
+
9
+ module Ragdoll
10
+ class ImageDescriptionService
11
+ class DescriptionError < StandardError; end
12
+
13
+ DEFAULT_OPTIONS = {
14
+ model: "gemma3",
15
+ provider: :ollama,
16
+ assume_model_exists: true, # Bypass registry check
17
+ temperature: 0.4,
18
+ prompt: "Describe the image in detail."
19
+ }.freeze
20
+
21
+ DEFAULT_FALLBACK_OPTIONS = {
22
+ model: "smollm2",
23
+ provider: :ollama,
24
+ assume_model_exists: true, # Bypass LLM registry check
25
+ temperature: 0.6
26
+ }.freeze
27
+
28
+ def initialize(primary: DEFAULT_OPTIONS, fallback: DEFAULT_FALLBACK_OPTIONS)
29
+ puts "🚀 ImageDescriptionService: Initializing with primary: #{primary.inspect}"
30
+ puts "🚀 ImageDescriptionService: Initializing with fallback: #{fallback.inspect}"
31
+
32
+ # Configure RubyLLM using the same pattern as the working example
33
+ configure_ruby_llm_globally
34
+
35
+ # Duplicate hashes to avoid modifying frozen constants
36
+ primary_opts = primary.dup
37
+ fallback_opts = fallback.dup
38
+
39
+ primary_temp = primary_opts.delete(:temperature) || DEFAULT_OPTIONS[:temperature]
40
+ @primary_prompt = primary_opts.delete(:prompt) || DEFAULT_OPTIONS[:prompt]
41
+ fallback_temp = fallback_opts.delete(:temperature) || DEFAULT_FALLBACK_OPTIONS[:temperature]
42
+
43
+ puts "🤖 ImageDescriptionService: Attempting to create primary model..."
44
+ begin
45
+ @primary = RubyLLM.chat(**primary_opts).with_temperature(primary_temp)
46
+ puts "✅ ImageDescriptionService: Primary model created successfully: #{@primary.class}"
47
+ rescue StandardError => e
48
+ puts "❌ ImageDescriptionService: Primary model creation failed: #{e.message}"
49
+ @primary = nil
50
+ end
51
+
52
+ puts "🔄 ImageDescriptionService: Attempting to create fallback model..."
53
+ begin
54
+ @fallback = RubyLLM.chat(**fallback_opts).with_temperature(fallback_temp)
55
+ puts "✅ ImageDescriptionService: Fallback model created successfully: #{@fallback.class}"
56
+ rescue StandardError => e
57
+ puts "❌ ImageDescriptionService: Fallback model creation failed: #{e.message}"
58
+ @fallback = nil
59
+ end
60
+
61
+ return unless @primary.nil? && @fallback.nil?
62
+
63
+ puts "⚠️ ImageDescriptionService: WARNING - No models available! Service will return placeholders only."
64
+ end
65
+
66
+ # Generate a description for a local image file.
67
+ # path (String) - absolute path to the image
68
+ def generate_description(path)
69
+ puts "🔍 ImageDescriptionService: Starting description generation for #{path}"
70
+ start_time = Time.now
71
+
72
+ @image_path = path
73
+ return "" unless @image_path && File.exist?(@image_path) && image_file?
74
+
75
+ # Attempt to read image and prepare data; on failure return placeholder
76
+ data = nil
77
+ begin
78
+ puts "📸 ImageDescriptionService: Reading image with Magick..."
79
+ @image = Magick::Image.read(@image_path).first
80
+ data = prepare_image_data
81
+ puts "✅ ImageDescriptionService: Image data prepared (#{data.length} chars base64)"
82
+ rescue StandardError => e
83
+ puts "❌ ImageDescriptionService: Failed to read image: #{e.message}"
84
+ return "[Image file: #{File.basename(@image_path)}]"
85
+ end
86
+ return "" unless data
87
+
88
+ # Attempt vision model call if client available
89
+ if @primary
90
+ puts "🤖 ImageDescriptionService: Attempting primary model (#{@primary.inspect})"
91
+ begin
92
+ @primary.add_message(
93
+ role: "user",
94
+ content: [
95
+ { type: "text", text: @primary_prompt },
96
+ { type: "image_url", image_url: { url: "data:#{@image.mime_type};base64,#{data}" } }
97
+ ]
98
+ )
99
+ puts "📤 ImageDescriptionService: Calling primary model complete()..."
100
+ response = @primary.complete
101
+ puts "📥 ImageDescriptionService: Primary model response received: #{response.inspect}"
102
+ desc = extract_description(response)
103
+ if desc && !desc.empty?
104
+ elapsed = Time.now - start_time
105
+ puts "✅ ImageDescriptionService: Primary model success! Description: '#{desc[0..100]}...' (#{elapsed.round(2)}s)"
106
+ return desc
107
+ end
108
+ rescue StandardError => e
109
+ puts "❌ ImageDescriptionService: Primary model failed: #{e.message}"
110
+ end
111
+ else
112
+ puts "⚠️ ImageDescriptionService: No primary model available"
113
+ end
114
+
115
+ # Attempt fallback if available
116
+ if @fallback
117
+ puts "🔄 ImageDescriptionService: Attempting fallback model (#{@fallback.inspect})"
118
+ begin
119
+ fallback_response = @fallback.ask(fallback_prompt).content
120
+ elapsed = Time.now - start_time
121
+ puts "✅ ImageDescriptionService: Fallback model success! Description: '#{fallback_response[0..100]}...' (#{elapsed.round(2)}s)"
122
+ return fallback_response
123
+ rescue StandardError => e
124
+ puts "❌ ImageDescriptionService: Fallback model failed: #{e.message}"
125
+ end
126
+ else
127
+ puts "⚠️ ImageDescriptionService: No fallback model available"
128
+ end
129
+
130
+ # Default placeholder when LLM unavailable
131
+ elapsed = Time.now - start_time
132
+ puts "🔚 ImageDescriptionService: Returning placeholder after #{elapsed.round(2)}s"
133
+ "[Image file: #{File.basename(@image_path)}]"
134
+ end
135
+
136
+ private
137
+
138
+ def configure_ruby_llm_globally
139
+ puts "⚙️ ImageDescriptionService: Configuring RubyLLM globally..."
140
+
141
+ # Get Ragdoll configuration or use defaults
142
+ ragdoll_config = begin
143
+ Ragdoll.configuration
144
+ rescue StandardError
145
+ nil
146
+ end
147
+ # FIXME: ollama_url is not in current config structure, should use ruby_llm_config[:ollama][:endpoint]
148
+ ollama_endpoint = ragdoll_config&.ruby_llm_config&.dig(:ollama, :endpoint) || ENV.fetch("OLLAMA_API_BASE", ENV.fetch("OLLAMA_ENDPOINT", "http://localhost:11434"))
149
+
150
+ puts "🔗 ImageDescriptionService: Using ollama endpoint: #{ollama_endpoint}"
151
+
152
+ # Follow the exact pattern from the working example
153
+ RubyLLM.configure do |config|
154
+ # Set all provider configs like the working example
155
+ config.openai_api_key = ENV.fetch("OPENAI_API_KEY", nil)
156
+ config.openai_organization_id = ENV.fetch("OPENAI_ORGANIZATION_ID", nil)
157
+ config.openai_project_id = ENV.fetch("OPENAI_PROJECT_ID", nil)
158
+ config.anthropic_api_key = ENV.fetch("ANTHROPIC_API_KEY", nil)
159
+ config.gemini_api_key = ENV.fetch("GEMINI_API_KEY", nil)
160
+ config.deepseek_api_key = ENV.fetch("DEEPSEEK_API_KEY", nil)
161
+ config.openrouter_api_key = ENV.fetch("OPENROUTER_API_KEY", nil)
162
+ config.bedrock_api_key = ENV.fetch("BEDROCK_ACCESS_KEY_ID", nil)
163
+ config.bedrock_secret_key = ENV.fetch("BEDROCK_SECRET_ACCESS_KEY", nil)
164
+ config.bedrock_region = ENV.fetch("BEDROCK_REGION", nil)
165
+ config.bedrock_session_token = ENV.fetch("BEDROCK_SESSION_TOKEN", nil)
166
+
167
+ # Key: Use the exact same method name as the working example
168
+ config.ollama_api_base = ollama_endpoint
169
+ config.openai_api_base = ENV.fetch("OPENAI_API_BASE", nil)
170
+ config.log_level = :error
171
+ end
172
+
173
+ puts "✅ ImageDescriptionService: RubyLLM configured successfully with global settings"
174
+ rescue StandardError => e
175
+ puts "❌ ImageDescriptionService: Failed to configure RubyLLM: #{e.message}"
176
+ end
177
+
178
+ def image_file?
179
+ %w[.jpg .jpeg .png .gif .bmp .webp .svg .ico .tiff
180
+ .tif].include?(File.extname(@image_path).downcase)
181
+ end
182
+
183
+ def prepare_image_data
184
+ Base64.strict_encode64(File.binread(@image_path))
185
+ rescue StandardError
186
+ nil
187
+ end
188
+
189
+ def extract_description(response)
190
+ text = if response.respond_to?(:content)
191
+ response.content
192
+ elsif response.is_a?(Hash) && response.dig("choices", 0, "message", "content")
193
+ response["choices"][0]["message"]["content"]
194
+ end
195
+ clean_description(text)
196
+ end
197
+
198
+ def clean_description(description)
199
+ return unless description.is_a?(String)
200
+
201
+ cleaned = description
202
+ .strip
203
+ .sub(/\ADescription:?:?\s*/i, "")
204
+ .gsub(/\s+/, " ")
205
+ .gsub(@image_path, "")
206
+ .strip
207
+ cleaned << "." unless cleaned =~ /[.!?]\z/
208
+ cleaned
209
+ end
210
+
211
+ def fallback_prompt
212
+ <<~PROMPT
213
+ You are a text-based AI tasked with generating a descriptive guess about an image based on its physical characteristics and the absolute pathname provided.
214
+
215
+ Please consider the following details:
216
+
217
+ 1. **Absolute Pathname:** #{@image_path}
218
+ 2. **Image Characteristics:**
219
+ - **Width:** #{@image.columns}
220
+ - **Height:** #{@image.rows}
221
+ - **MIME/Type:** #{@image.mime_type}
222
+ - **File Size:** #{@image.filesize} bytes
223
+ - **Number of Colors:** #{@image.number_colors}
224
+
225
+ Based on the above information, please make your best guess about what the image might depict. Consider common uses for the file format, the aspect ratio, and any hints from the pathname itself. Provide provide your best guess as a brief description that includes potential subjects, themes, or contexts of the image.
226
+
227
+ PROMPT
228
+ end
229
+ end
230
+ end
@@ -0,0 +1,329 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ragdoll
4
+ # Service for generating structured metadata using LLM providers
5
+ # Leverages structured output capabilities to ensure consistent metadata schemas
6
+ class MetadataGenerator
7
+ def initialize(llm_client: nil)
8
+ @llm_client = llm_client || default_llm_client
9
+ end
10
+
11
+ # Generate metadata for a document based on its content and type
12
+ def generate_for_document(document)
13
+ case document.document_type
14
+ when "text", "markdown", "html"
15
+ generate_text_metadata(document)
16
+ when "image"
17
+ generate_image_metadata(document)
18
+ when "audio"
19
+ generate_audio_metadata(document)
20
+ when "pdf", "docx"
21
+ generate_pdf_metadata(document)
22
+ when "mixed"
23
+ generate_mixed_metadata(document)
24
+ else
25
+ generate_text_metadata(document) # fallback
26
+ end
27
+ end
28
+
29
+ # Generate metadata for text content
30
+ def generate_text_metadata(document)
31
+ # Combine all text content from the document
32
+ text_content = document.text_contents.map(&:content).join("\n\n")
33
+ return {} if text_content.blank?
34
+
35
+ schema = Ragdoll::MetadataSchemas::TEXT_SCHEMA
36
+ prompt = build_text_analysis_prompt(text_content)
37
+
38
+ generate_structured_metadata(prompt, schema)
39
+ end
40
+
41
+ # Generate metadata for image content
42
+ def generate_image_metadata(document)
43
+ # For images, we need to use vision-capable models
44
+ image_content = document.image_contents.first
45
+ return {} unless image_content&.image_attached?
46
+
47
+ schema = Ragdoll::MetadataSchemas::IMAGE_SCHEMA
48
+ prompt = build_image_analysis_prompt(image_content)
49
+
50
+ # This would use a vision model like GPT-4V, Claude 3, etc.
51
+ generate_structured_metadata(prompt, schema, content_type: "image", image: image_content.image)
52
+ end
53
+
54
+ # Generate metadata for audio content
55
+ def generate_audio_metadata(document)
56
+ audio_content = document.audio_contents.first
57
+ return {} unless audio_content
58
+
59
+ schema = Ragdoll::MetadataSchemas::AUDIO_SCHEMA
60
+
61
+ # Use transcript if available, otherwise analyze audio directly
62
+ prompt = if audio_content.transcript.present?
63
+ build_audio_transcript_analysis_prompt(audio_content.transcript, audio_content.duration)
64
+ else
65
+ # This would require audio-capable models or speech-to-text preprocessing
66
+ build_audio_analysis_prompt(audio_content)
67
+ end
68
+
69
+ generate_structured_metadata(prompt, schema)
70
+ end
71
+
72
+ # Generate metadata for PDF content
73
+ def generate_pdf_metadata(document)
74
+ text_content = document.text_contents.map(&:content).join("\n\n")
75
+ return {} if text_content.blank?
76
+
77
+ schema = Ragdoll::MetadataSchemas::PDF_SCHEMA
78
+ prompt = build_pdf_analysis_prompt(text_content, document.file_metadata)
79
+
80
+ generate_structured_metadata(prompt, schema)
81
+ end
82
+
83
+ # Generate metadata for mixed/multi-modal content
84
+ def generate_mixed_metadata(document)
85
+ schema = Ragdoll::MetadataSchemas::MIXED_SCHEMA
86
+
87
+ # Combine analysis from all content types
88
+ content_summaries = []
89
+
90
+ document.text_contents.each do |text|
91
+ content_summaries << { type: "text", content: text.content[0..500] }
92
+ end
93
+
94
+ document.image_contents.each do |image|
95
+ content_summaries << { type: "image", description: image.description || "Image content" }
96
+ end
97
+
98
+ document.audio_contents.each do |audio|
99
+ content_summaries << { type: "audio", transcript: audio.transcript || "Audio content" }
100
+ end
101
+
102
+ prompt = build_mixed_analysis_prompt(content_summaries)
103
+ generate_structured_metadata(prompt, schema)
104
+ end
105
+
106
+ private
107
+
108
+ # Core method for generating structured metadata using LLM
109
+ def generate_structured_metadata(prompt, schema, content_type: "text", image: nil)
110
+ case @llm_client&.provider
111
+ when "openai"
112
+ generate_with_openai(prompt, schema, content_type, image)
113
+ when "anthropic"
114
+ generate_with_anthropic(prompt, schema, content_type, image)
115
+ when "ollama"
116
+ generate_with_ollama(prompt, schema)
117
+ else
118
+ # Fallback to basic LLM call without structured output
119
+ generate_with_fallback(prompt, schema)
120
+ end
121
+ rescue StandardError => e
122
+ Rails.logger.error "Metadata generation failed: #{e.message}" if defined?(Rails)
123
+ puts "Metadata generation failed: #{e.message}"
124
+ {}
125
+ end
126
+
127
+ # OpenAI structured output
128
+ def generate_with_openai(prompt, schema, content_type, image)
129
+ messages = build_messages(prompt, content_type, image)
130
+
131
+ response = @llm_client.chat(
132
+ model: "gpt-4o", # Use latest model with structured output
133
+ messages: messages,
134
+ response_format: {
135
+ type: "json_schema",
136
+ json_schema: {
137
+ name: "document_metadata",
138
+ schema: schema
139
+ }
140
+ },
141
+ temperature: 0.1
142
+ )
143
+
144
+ JSON.parse(response.dig("choices", 0, "message", "content") || "{}")
145
+ end
146
+
147
+ # Anthropic structured output (using XML format)
148
+ def generate_with_anthropic(prompt, schema, content_type, image)
149
+ # Anthropic doesn't have native JSON schema support yet
150
+ # Use XML format with clear instructions
151
+ structured_prompt = "#{prompt}\n\nPlease respond with a JSON object that follows this exact schema:\n#{schema.to_json}\n\nRespond only with valid JSON, no explanations."
152
+
153
+ messages = build_messages(structured_prompt, content_type, image)
154
+
155
+ response = @llm_client.chat(
156
+ model: "claude-3-5-sonnet-20241022",
157
+ messages: messages,
158
+ temperature: 0.1
159
+ )
160
+
161
+ # Extract JSON from response
162
+ content = response.dig("content", 0, "text") || "{}"
163
+ JSON.parse(content.match(/\{.*\}/m)&.to_s || "{}")
164
+ end
165
+
166
+ # Ollama structured output (using Llama 3.1 or similar)
167
+ def generate_with_ollama(prompt, schema)
168
+ structured_prompt = "#{prompt}\n\nRespond with valid JSON matching this schema:\n#{schema.to_json}"
169
+
170
+ response = @llm_client.generate(
171
+ model: "llama3.1:8b",
172
+ prompt: structured_prompt,
173
+ format: "json",
174
+ options: { temperature: 0.1 }
175
+ )
176
+
177
+ JSON.parse(response["response"] || "{}")
178
+ end
179
+
180
+ # Fallback for any LLM provider
181
+ def generate_with_fallback(prompt, schema)
182
+ structured_prompt = "#{prompt}\n\nPlease respond with a JSON object that includes these fields: #{schema[:required]&.join(', ')}\n\nRespond only with valid JSON."
183
+
184
+ response = @llm_client.generate(prompt: structured_prompt)
185
+ JSON.parse(response || "{}")
186
+ rescue JSON::ParserError
187
+ {}
188
+ end
189
+
190
+ # Build messages array for chat-based APIs
191
+ def build_messages(prompt, content_type, image)
192
+ messages = [
193
+ {
194
+ role: "system",
195
+ content: "You are an expert document analyzer. Generate structured metadata as valid JSON following the provided schema exactly."
196
+ }
197
+ ]
198
+
199
+ messages << if content_type == "image" && image
200
+ {
201
+ role: "user",
202
+ content: [
203
+ { type: "text", text: prompt },
204
+ { type: "image_url", image_url: { url: image_url_for(image) } }
205
+ ]
206
+ }
207
+ else
208
+ { role: "user", content: prompt }
209
+ end
210
+
211
+ messages
212
+ end
213
+
214
+ # Build analysis prompts for different content types
215
+ def build_text_analysis_prompt(text_content)
216
+ <<~PROMPT
217
+ Analyze the following text document and extract structured metadata:
218
+
219
+ #{text_content[0..2000]}#{text_content.length > 2000 ? '...' : ''}
220
+
221
+ Please analyze this text and provide comprehensive metadata including:
222
+ - A concise summary (2-3 sentences)
223
+ - Relevant keywords and topics
224
+ - Document classification and complexity level
225
+ - Sentiment analysis
226
+ - Estimated reading time
227
+ - Language detection
228
+ PROMPT
229
+ end
230
+
231
+ def build_image_analysis_prompt(image_content)
232
+ existing_description = image_content.description || image_content.alt_text
233
+ base_prompt = <<~PROMPT
234
+ Analyze this image and provide comprehensive metadata including:
235
+ - Detailed description of what's shown
236
+ - Objects, people, or elements visible
237
+ - Scene type and visual style
238
+ - Dominant colors and mood
239
+ - Any visible text content
240
+ - Relevant keywords for search
241
+ PROMPT
242
+
243
+ if existing_description.present?
244
+ "#{base_prompt}\n\nExisting description: #{existing_description}"
245
+ else
246
+ base_prompt
247
+ end
248
+ end
249
+
250
+ def build_audio_transcript_analysis_prompt(transcript, duration)
251
+ <<~PROMPT
252
+ Analyze the following audio transcript and provide metadata:
253
+
254
+ Duration: #{duration} seconds
255
+ Transcript: #{transcript[0..1500]}#{transcript.length > 1500 ? '...' : ''}
256
+
257
+ Please analyze this audio content and provide:
258
+ - Summary of the spoken content
259
+ - Content type (speech, music, podcast, etc.)
260
+ - Topics discussed
261
+ - Number of speakers
262
+ - Language and mood
263
+ - Key quotes or important phrases
264
+ PROMPT
265
+ end
266
+
267
+ def build_audio_analysis_prompt(audio_content)
268
+ <<~PROMPT
269
+ Analyze this audio file and provide metadata:
270
+
271
+ Duration: #{audio_content.duration} seconds
272
+ Sample Rate: #{audio_content.sample_rate} Hz
273
+
274
+ Please determine:
275
+ - Type of audio content (speech, music, sound effects, etc.)
276
+ - If music: genre, mood, instruments
277
+ - If speech: estimated number of speakers, formality level
278
+ - Overall audio characteristics
279
+ PROMPT
280
+ end
281
+
282
+ def build_pdf_analysis_prompt(text_content, file_metadata)
283
+ <<~PROMPT
284
+ Analyze this PDF document and provide structured metadata:
285
+
286
+ File info: #{file_metadata}
287
+ Content preview: #{text_content[0..2000]}#{text_content.length > 2000 ? '...' : ''}
288
+
289
+ Please analyze this PDF and provide:
290
+ - Document type and classification
291
+ - Summary of content
292
+ - Topics and keywords
293
+ - Document structure analysis
294
+ - Complexity level
295
+ - Estimated reading time
296
+ PROMPT
297
+ end
298
+
299
+ def build_mixed_analysis_prompt(content_summaries)
300
+ content_desc = content_summaries.map { |c| "#{c[:type]}: #{c.values[1]}" }.join("\n\n")
301
+
302
+ <<~PROMPT
303
+ Analyze this multi-modal document containing different types of content:
304
+
305
+ #{content_desc}
306
+
307
+ Please provide comprehensive metadata for this mixed-content document:
308
+ - Overall summary combining all content types
309
+ - How the different content types relate to each other
310
+ - Primary vs secondary content types
311
+ - Keywords spanning all content
312
+ - Classification for the complete document
313
+ PROMPT
314
+ end
315
+
316
+ # Utility methods
317
+ def default_llm_client
318
+ # This would integrate with your LLM client
319
+ # Could use ruby_llm or direct API clients
320
+ nil
321
+ end
322
+
323
+ def image_url_for(image)
324
+ # Convert Shrine attachment to URL for vision APIs
325
+ # This would need proper implementation based on your Shrine setup
326
+ image.url if image.respond_to?(:url)
327
+ end
328
+ end
329
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ragdoll
4
+ # Service for resolving models with provider/model parsing and inheritance
5
+ class ModelResolver
6
+ def initialize(config_service = nil)
7
+ @config_service = config_service || Ragdoll::ConfigurationService.new
8
+ end
9
+
10
+ # Resolve model for a task, returns Model object
11
+ def resolve_for_task(task_type, content_type = :text)
12
+ model_string = @config_service.resolve_model(task_type, content_type)
13
+
14
+ raise Ragdoll::Core::ConfigurationError, "No model configured for task '#{task_type}'" if model_string.nil?
15
+
16
+ Ragdoll::Core::Model.new(model_string)
17
+ end
18
+
19
+ # Resolve embedding model for content type, returns Model object with metadata
20
+ def resolve_embedding(content_type = :text)
21
+ embedding_config = @config_service.config.models[:embedding]
22
+ model_string = embedding_config[content_type]
23
+
24
+ raise Ragdoll::Core::ConfigurationError, "No embedding model configured for content type '#{content_type}'" if model_string.nil?
25
+
26
+ model = Ragdoll::Core::Model.new(model_string)
27
+
28
+ # Return object with model and embedding-specific metadata
29
+ OpenStruct.new(
30
+ model: model,
31
+ provider_type: embedding_config[:provider],
32
+ max_dimensions: embedding_config[:max_dimensions],
33
+ cache_embeddings: embedding_config[:cache_embeddings]
34
+ )
35
+ end
36
+
37
+ # Get provider credentials for a Model object
38
+ def provider_credentials_for_model(model)
39
+ provider = model.provider
40
+
41
+ if provider.nil?
42
+ # Use default provider if none specified
43
+ provider = @config_service.config.llm_providers[:default_provider]
44
+ end
45
+
46
+ @config_service.provider_credentials(provider)
47
+ end
48
+
49
+ # Resolve all models for debugging/introspection
50
+ def resolve_all_models
51
+ {
52
+ text_generation: {
53
+ default: resolve_for_task(:default),
54
+ summary: resolve_for_task(:summary),
55
+ keywords: resolve_for_task(:keywords)
56
+ },
57
+ embedding: {
58
+ text: resolve_embedding(:text),
59
+ image: resolve_embedding(:image),
60
+ audio: resolve_embedding(:audio)
61
+ }
62
+ }
63
+ rescue Ragdoll::Core::ConfigurationError => e
64
+ # Return partial results with error information
65
+ { error: e.message, partial: true }
66
+ end
67
+
68
+ private
69
+
70
+ attr_reader :config_service
71
+ end
72
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ # FIXME: This is crap. It does not focus on search.
4
+
5
+ module Ragdoll
6
+ class SearchEngine
7
+ def initialize(embedding_service, config_service: nil)
8
+ @embedding_service = embedding_service
9
+ @config_service = config_service || Ragdoll::ConfigurationService.new
10
+ end
11
+
12
+ def search_documents(query, options = {})
13
+ search_config = @config_service.search_config
14
+ limit = options[:limit] || search_config[:max_results]
15
+ threshold = options[:threshold] || search_config[:similarity_threshold]
16
+ filters = options[:filters] || {}
17
+
18
+ # Generate embedding for the query
19
+ query_embedding = @embedding_service.generate_embedding(query)
20
+ return [] if query_embedding.nil?
21
+
22
+ # Search using ActiveRecord models
23
+ Ragdoll::Embedding.search_similar(query_embedding,
24
+ limit: limit,
25
+ threshold: threshold,
26
+ filters: filters)
27
+ end
28
+
29
+ def search_similar_content(query_or_embedding, options = {})
30
+ search_config = @config_service.search_config
31
+ limit = options[:limit] || search_config[:max_results]
32
+ threshold = options[:threshold] || search_config[:similarity_threshold]
33
+ filters = options[:filters] || {}
34
+
35
+ if query_or_embedding.is_a?(Array)
36
+ # It's already an embedding
37
+ query_embedding = query_or_embedding
38
+ else
39
+ # It's a query string, generate embedding
40
+ query_embedding = @embedding_service.generate_embedding(query_or_embedding)
41
+ return [] if query_embedding.nil?
42
+ end
43
+
44
+ # Search using ActiveRecord models
45
+ Ragdoll::Embedding.search_similar(query_embedding,
46
+ limit: limit,
47
+ threshold: threshold,
48
+ filters: filters)
49
+ end
50
+ end
51
+ end