ragdoll 0.1.11 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,322 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ruby_llm"
4
+ require "base64"
5
+ require "rmagick"
6
+
7
+ module Ragdoll
8
+ class ImageToTextService
9
+ class DescriptionError < StandardError; end
10
+
11
+ DEFAULT_OPTIONS = {
12
+ model: "gemma3",
13
+ provider: :ollama,
14
+ assume_model_exists: true,
15
+ temperature: 0.2,
16
+ detail_level: :comprehensive
17
+ }.freeze
18
+
19
+ DEFAULT_FALLBACK_OPTIONS = {
20
+ model: "smollm2",
21
+ provider: :ollama,
22
+ assume_model_exists: true,
23
+ temperature: 0.4
24
+ }.freeze
25
+
26
+ DETAIL_LEVELS = {
27
+ minimal: "Provide a brief, one-sentence description of the image.",
28
+ standard: "Describe the main elements, objects, and overall composition of the image.",
29
+ comprehensive: "Provide a detailed description including objects, people, settings, colors, mood, style, and any text visible in the image.",
30
+ analytical: "Analyze the image thoroughly, describing composition, lighting, subjects, background, context, and any symbolic or artistic elements."
31
+ }.freeze
32
+
33
+ def self.convert(file_path, **options)
34
+ new(**options).convert(file_path)
35
+ end
36
+
37
+ def initialize(primary: DEFAULT_OPTIONS, fallback: DEFAULT_FALLBACK_OPTIONS, **options)
38
+ @options = DEFAULT_OPTIONS.merge(options)
39
+ @detail_level = @options[:detail_level] || :comprehensive
40
+
41
+ configure_ruby_llm_globally
42
+
43
+ # Setup primary model
44
+ primary_opts = primary.dup
45
+ primary_temp = primary_opts.delete(:temperature) || 0.2
46
+ @primary_prompt = build_prompt(@detail_level)
47
+
48
+ begin
49
+ @primary = RubyLLM.chat(**primary_opts).with_temperature(primary_temp)
50
+ rescue StandardError => e
51
+ puts "❌ ImageToTextService: Primary model creation failed: #{e.message}"
52
+ @primary = nil
53
+ end
54
+
55
+ # Setup fallback model
56
+ fallback_opts = fallback.dup
57
+ fallback_temp = fallback_opts.delete(:temperature) || 0.4
58
+
59
+ begin
60
+ @fallback = RubyLLM.chat(**fallback_opts).with_temperature(fallback_temp)
61
+ rescue StandardError => e
62
+ puts "❌ ImageToTextService: Fallback model creation failed: #{e.message}"
63
+ @fallback = nil
64
+ end
65
+
66
+ if @primary.nil? && @fallback.nil?
67
+ puts "⚠️ ImageToTextService: WARNING - No models available! Service will return metadata-based descriptions only."
68
+ end
69
+ end
70
+
71
+ def convert(file_path)
72
+ return "" unless File.exist?(file_path)
73
+ return "" unless image_file?(file_path)
74
+
75
+ start_time = Time.now
76
+ @image_path = file_path
77
+
78
+ # Try to read image and prepare data
79
+ begin
80
+ @image = Magick::Image.read(@image_path).first
81
+ image_data = prepare_image_data
82
+ return generate_fallback_description unless image_data
83
+ rescue StandardError => e
84
+ puts "❌ ImageToTextService: Failed to read image: #{e.message}"
85
+ return generate_fallback_description
86
+ end
87
+
88
+ # Attempt vision model description
89
+ if @primary
90
+ description = attempt_vision_description(image_data)
91
+ if description && !description.empty?
92
+ elapsed = Time.now - start_time
93
+ puts "✅ ImageToTextService: Vision description generated (#{elapsed.round(2)}s)"
94
+ return description
95
+ end
96
+ end
97
+
98
+ # Attempt fallback model with metadata
99
+ if @fallback
100
+ description = attempt_fallback_description
101
+ if description && !description.empty?
102
+ elapsed = Time.now - start_time
103
+ puts "✅ ImageToTextService: Fallback description generated (#{elapsed.round(2)}s)"
104
+ return description
105
+ end
106
+ end
107
+
108
+ # Final fallback to metadata-based description
109
+ elapsed = Time.now - start_time
110
+ puts "🔚 ImageToTextService: Using metadata-based description (#{elapsed.round(2)}s)"
111
+ generate_fallback_description
112
+ end
113
+
114
+ def supported_formats
115
+ %w[.jpg .jpeg .png .gif .bmp .webp .svg .ico .tiff .tif]
116
+ end
117
+
118
+ private
119
+
120
+ def configure_ruby_llm_globally
121
+ # Get Ragdoll configuration or use defaults
122
+ ragdoll_config = begin
123
+ Ragdoll.configuration
124
+ rescue StandardError
125
+ nil
126
+ end
127
+
128
+ ollama_endpoint = ragdoll_config&.ruby_llm_config&.dig(:ollama, :endpoint) ||
129
+ ENV.fetch("OLLAMA_API_BASE", ENV.fetch("OLLAMA_ENDPOINT", "http://localhost:11434"))
130
+
131
+ RubyLLM.configure do |config|
132
+ config.openai_api_key = ENV.fetch("OPENAI_API_KEY", nil)
133
+ config.openai_organization_id = ENV.fetch("OPENAI_ORGANIZATION_ID", nil)
134
+ config.openai_project_id = ENV.fetch("OPENAI_PROJECT_ID", nil)
135
+ config.anthropic_api_key = ENV.fetch("ANTHROPIC_API_KEY", nil)
136
+ config.gemini_api_key = ENV.fetch("GEMINI_API_KEY", nil)
137
+ config.deepseek_api_key = ENV.fetch("DEEPSEEK_API_KEY", nil)
138
+ config.openrouter_api_key = ENV.fetch("OPENROUTER_API_KEY", nil)
139
+ config.bedrock_api_key = ENV.fetch("BEDROCK_ACCESS_KEY_ID", nil)
140
+ config.bedrock_secret_key = ENV.fetch("BEDROCK_SECRET_ACCESS_KEY", nil)
141
+ config.bedrock_region = ENV.fetch("BEDROCK_REGION", nil)
142
+ config.bedrock_session_token = ENV.fetch("BEDROCK_SESSION_TOKEN", nil)
143
+ config.ollama_api_base = ollama_endpoint
144
+ config.openai_api_base = ENV.fetch("OPENAI_API_BASE", nil)
145
+ config.log_level = :error
146
+ end
147
+ rescue StandardError => e
148
+ puts "❌ ImageToTextService: Failed to configure RubyLLM: #{e.message}"
149
+ end
150
+
151
+ def build_prompt(detail_level)
152
+ base_instruction = DETAIL_LEVELS[detail_level] || DETAIL_LEVELS[:comprehensive]
153
+
154
+ case detail_level
155
+ when :analytical
156
+ <<~PROMPT
157
+ #{base_instruction}
158
+
159
+ Please organize your analysis into these sections:
160
+ 1. Visual Elements: Objects, people, animals, and their relationships
161
+ 2. Setting & Environment: Location, time of day, weather, atmosphere
162
+ 3. Technical Aspects: Lighting, composition, colors, perspective
163
+ 4. Text & Symbols: Any visible text, signs, logos, or symbolic elements
164
+ 5. Context & Meaning: Possible purpose, story, or message conveyed
165
+
166
+ Provide a thorough but concise analysis suitable for search and retrieval.
167
+ PROMPT
168
+ when :comprehensive
169
+ <<~PROMPT
170
+ #{base_instruction}
171
+
172
+ Include details about:
173
+ - Main subjects and their actions or poses
174
+ - Setting, background, and environment
175
+ - Colors, lighting, and mood
176
+ - Any text, signs, or readable elements
177
+ - Style or artistic elements
178
+ - Objects and their relationships
179
+
180
+ Write in a natural, descriptive style that would help someone understand the image content for search purposes.
181
+ PROMPT
182
+ else
183
+ base_instruction
184
+ end
185
+ end
186
+
187
+ def attempt_vision_description(image_data)
188
+ begin
189
+ @primary.add_message(
190
+ role: "user",
191
+ content: [
192
+ { type: "text", text: @primary_prompt },
193
+ { type: "image_url", image_url: { url: "data:#{@image.mime_type};base64,#{image_data}" } }
194
+ ]
195
+ )
196
+
197
+ response = @primary.complete
198
+ description = extract_description(response)
199
+ clean_description(description)
200
+ rescue StandardError => e
201
+ puts "❌ ImageToTextService: Vision model failed: #{e.message}"
202
+ nil
203
+ end
204
+ end
205
+
206
+ def attempt_fallback_description
207
+ begin
208
+ prompt = build_fallback_prompt
209
+ response = @fallback.ask(prompt).content
210
+ clean_description(response)
211
+ rescue StandardError => e
212
+ puts "❌ ImageToTextService: Fallback model failed: #{e.message}"
213
+ nil
214
+ end
215
+ end
216
+
217
+ def build_fallback_prompt
218
+ <<~PROMPT
219
+ Based on the image file information below, generate a descriptive analysis of what this image likely contains:
220
+
221
+ **File Information:**
222
+ - Path: #{@image_path}
223
+ - Filename: #{File.basename(@image_path)}
224
+ - Dimensions: #{@image.columns}x#{@image.rows} pixels
225
+ - Format: #{@image.mime_type}
226
+ - File Size: #{@image.filesize} bytes
227
+ - Colors: #{@image.number_colors} unique colors
228
+
229
+ **Analysis Request:**
230
+ Consider the filename, aspect ratio (#{aspect_ratio_description}), file format, and size to make educated guesses about:
231
+ 1. What type of image this might be (photo, diagram, artwork, screenshot, etc.)
232
+ 2. Possible subject matter based on filename and characteristics
233
+ 3. Likely content based on image properties
234
+
235
+ Provide a thoughtful description that could be useful for search and categorization, even without seeing the actual image content.
236
+ PROMPT
237
+ end
238
+
239
+ def image_file?(file_path)
240
+ extension = File.extname(file_path).downcase
241
+ supported_formats.include?(extension)
242
+ end
243
+
244
+ def prepare_image_data
245
+ Base64.strict_encode64(File.binread(@image_path))
246
+ rescue StandardError
247
+ nil
248
+ end
249
+
250
+ def extract_description(response)
251
+ if response.respond_to?(:content)
252
+ response.content
253
+ elsif response.is_a?(Hash) && response.dig("choices", 0, "message", "content")
254
+ response["choices"][0]["message"]["content"]
255
+ else
256
+ nil
257
+ end
258
+ end
259
+
260
+ def clean_description(description)
261
+ return nil unless description.is_a?(String)
262
+
263
+ cleaned = description
264
+ .strip
265
+ .sub(/\ADescription:?:?\s*/i, "")
266
+ .sub(/\AImage:?\s*/i, "")
267
+ .gsub(/\s+/, " ")
268
+ .gsub(@image_path, File.basename(@image_path))
269
+ .strip
270
+
271
+ # Ensure it ends with punctuation
272
+ cleaned << "." unless cleaned =~ /[.!?]\z/
273
+ cleaned
274
+ end
275
+
276
+ def generate_fallback_description
277
+ filename = File.basename(@image_path, File.extname(@image_path))
278
+
279
+ # Try to extract meaningful information from filename
280
+ descriptive_parts = filename
281
+ .gsub(/[-_]+/, ' ')
282
+ .gsub(/([a-z])([A-Z])/, '\1 \2')
283
+ .split(' ')
284
+ .reject { |part| part.match?(/^\d+$/) } # Remove pure numbers
285
+ .map(&:capitalize)
286
+
287
+ if descriptive_parts.any?
288
+ base_description = "Image: #{descriptive_parts.join(' ')}"
289
+ else
290
+ base_description = "Image file: #{File.basename(@image_path)}"
291
+ end
292
+
293
+ # Add technical details if available
294
+ if @image
295
+ details = []
296
+ details << "#{@image.columns}x#{@image.rows}"
297
+ details << aspect_ratio_description
298
+ details << File.extname(@image_path).upcase.sub('.', '') + " format"
299
+
300
+ "#{base_description} (#{details.join(', ')})"
301
+ else
302
+ base_description
303
+ end
304
+ end
305
+
306
+ def aspect_ratio_description
307
+ return "unknown aspect ratio" unless @image
308
+
309
+ ratio = @image.columns.to_f / @image.rows.to_f
310
+
311
+ case ratio
312
+ when 0.9..1.1 then "square"
313
+ when 1.1..1.5 then "landscape"
314
+ when 1.5..2.0 then "wide landscape"
315
+ when 2.0..Float::INFINITY then "panoramic"
316
+ when 0.5..0.9 then "portrait"
317
+ when 0.0..0.5 then "tall portrait"
318
+ else "unusual aspect ratio"
319
+ end
320
+ end
321
+ end
322
+ end
@@ -0,0 +1,340 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ragdoll
4
+ # Migration service to transition from multi-modal to unified text-based RAG system
5
+ class MigrationService
6
+ class MigrationError < StandardError; end
7
+
8
+ def self.migrate_all_documents(**options)
9
+ new.migrate_all_documents(**options)
10
+ end
11
+
12
+ def self.migrate_document(document_id, **options)
13
+ new.migrate_document(document_id, **options)
14
+ end
15
+
16
+ def initialize
17
+ @converter = Ragdoll::DocumentConverter.new
18
+ @unified_management = Ragdoll::UnifiedDocumentManagement.new
19
+ end
20
+
21
+ # Migrate all existing documents to unified text-based system
22
+ def migrate_all_documents(**options)
23
+ return { error: "UnifiedDocument model not available" } unless defined?(Ragdoll::UnifiedDocument)
24
+
25
+ migration_stats = {
26
+ started_at: Time.current,
27
+ total_documents: 0,
28
+ migrated: 0,
29
+ skipped: 0,
30
+ errors: []
31
+ }
32
+
33
+ puts "🚀 Starting migration from multi-modal to unified text-based system..."
34
+
35
+ # Get all existing documents
36
+ Ragdoll::Document.find_each(batch_size: options[:batch_size] || 50) do |document|
37
+ migration_stats[:total_documents] += 1
38
+
39
+ begin
40
+ result = migrate_single_document(document, **options)
41
+ if result[:status] == :migrated
42
+ migration_stats[:migrated] += 1
43
+ else
44
+ migration_stats[:skipped] += 1
45
+ end
46
+ rescue StandardError => e
47
+ migration_stats[:errors] << {
48
+ document_id: document.id,
49
+ title: document.title,
50
+ error: e.message
51
+ }
52
+ puts "❌ Error migrating document #{document.id}: #{e.message}"
53
+ end
54
+
55
+ # Progress reporting
56
+ if migration_stats[:total_documents] % 10 == 0
57
+ puts "📊 Progress: #{migration_stats[:migrated]} migrated, #{migration_stats[:skipped]} skipped, #{migration_stats[:errors].length} errors"
58
+ end
59
+ end
60
+
61
+ migration_stats[:completed_at] = Time.current
62
+ migration_stats[:duration] = migration_stats[:completed_at] - migration_stats[:started_at]
63
+
64
+ puts "✅ Migration completed!"
65
+ puts "📊 Final stats: #{migration_stats[:migrated]} migrated, #{migration_stats[:skipped]} skipped, #{migration_stats[:errors].length} errors"
66
+ puts "⏱️ Duration: #{migration_stats[:duration].round(2)} seconds"
67
+
68
+ migration_stats
69
+ end
70
+
71
+ # Migrate a specific document
72
+ def migrate_document(document_id, **options)
73
+ document = Ragdoll::Document.find(document_id)
74
+ migrate_single_document(document, **options)
75
+ end
76
+
77
+ # Create comparison report between old and new systems
78
+ def create_comparison_report
79
+ return { error: "UnifiedDocument model not available" } unless defined?(Ragdoll::UnifiedDocument)
80
+
81
+ old_stats = Ragdoll::Document.stats
82
+ new_stats = Ragdoll::UnifiedDocument.stats
83
+ content_stats = Ragdoll::UnifiedContent.stats
84
+
85
+ {
86
+ migration_summary: {
87
+ old_system: {
88
+ total_documents: old_stats[:total_documents],
89
+ text_contents: old_stats[:total_text_contents],
90
+ image_contents: old_stats[:total_image_contents],
91
+ audio_contents: old_stats[:total_audio_contents],
92
+ total_embeddings: old_stats[:total_embeddings]
93
+ },
94
+ new_system: {
95
+ total_documents: new_stats[:total_documents],
96
+ unified_contents: content_stats[:total_contents],
97
+ total_embeddings: new_stats[:total_embeddings],
98
+ by_media_type: content_stats[:by_media_type]
99
+ }
100
+ },
101
+ benefits: {
102
+ simplified_architecture: "Single content model instead of STI",
103
+ unified_search: "All content searchable through text",
104
+ cross_modal_retrieval: "Images and audio searchable via descriptions/transcripts",
105
+ reduced_complexity: "One embedding pipeline instead of multiple"
106
+ },
107
+ recommendations: generate_migration_recommendations
108
+ }
109
+ end
110
+
111
+ # Validate migrated data integrity
112
+ def validate_migration
113
+ return { error: "UnifiedDocument model not available" } unless defined?(Ragdoll::UnifiedDocument)
114
+
115
+ validation_results = {
116
+ total_checks: 0,
117
+ passed: 0,
118
+ failed: 0,
119
+ issues: []
120
+ }
121
+
122
+ puts "🔍 Validating migration integrity..."
123
+
124
+ # Check 1: All documents have corresponding unified documents
125
+ validation_results[:total_checks] += 1
126
+ old_count = Ragdoll::Document.count
127
+ new_count = Ragdoll::UnifiedDocument.count
128
+
129
+ if old_count == new_count
130
+ validation_results[:passed] += 1
131
+ puts "✅ Document count matches: #{old_count} = #{new_count}"
132
+ else
133
+ validation_results[:failed] += 1
134
+ validation_results[:issues] << "Document count mismatch: #{old_count} old vs #{new_count} new"
135
+ puts "❌ Document count mismatch: #{old_count} old vs #{new_count} new"
136
+ end
137
+
138
+ # Check 2: All unified documents have content
139
+ validation_results[:total_checks] += 1
140
+ documents_without_content = Ragdoll::UnifiedDocument.without_content.count
141
+
142
+ if documents_without_content == 0
143
+ validation_results[:passed] += 1
144
+ puts "✅ All unified documents have content"
145
+ else
146
+ validation_results[:failed] += 1
147
+ validation_results[:issues] << "#{documents_without_content} documents without content"
148
+ puts "❌ #{documents_without_content} documents without content"
149
+ end
150
+
151
+ # Check 3: Content quality assessment
152
+ validation_results[:total_checks] += 1
153
+ quality_stats = content_quality_report
154
+
155
+ if quality_stats[:high_quality_percentage] >= 50
156
+ validation_results[:passed] += 1
157
+ puts "✅ Content quality acceptable: #{quality_stats[:high_quality_percentage]}% high quality"
158
+ else
159
+ validation_results[:failed] += 1
160
+ validation_results[:issues] << "Low content quality: only #{quality_stats[:high_quality_percentage]}% high quality"
161
+ puts "⚠️ Content quality concern: only #{quality_stats[:high_quality_percentage]}% high quality"
162
+ end
163
+
164
+ validation_results[:quality_report] = quality_stats
165
+ validation_results
166
+ end
167
+
168
+ private
169
+
170
+ def migrate_single_document(document, **options)
171
+ # Skip if already migrated (check by location)
172
+ if defined?(Ragdoll::UnifiedDocument) &&
173
+ Ragdoll::UnifiedDocument.exists?(location: document.location)
174
+ return { status: :skipped, reason: "already_migrated" }
175
+ end
176
+
177
+ # Extract unified text content from multi-modal document
178
+ unified_text = extract_unified_text_from_document(document)
179
+
180
+ if unified_text.blank?
181
+ return { status: :skipped, reason: "no_content" }
182
+ end
183
+
184
+ # Create unified document
185
+ unified_doc = Ragdoll::UnifiedDocument.create!(
186
+ location: document.location,
187
+ title: document.title,
188
+ document_type: document.document_type,
189
+ status: "pending",
190
+ file_modified_at: document.file_modified_at,
191
+ metadata: merge_document_metadata(document)
192
+ )
193
+
194
+ # Create unified content
195
+ unified_doc.unified_contents.create!(
196
+ content: unified_text,
197
+ original_media_type: determine_primary_media_type(document),
198
+ embedding_model: "text-embedding-3-large",
199
+ metadata: {
200
+ "migrated_at" => Time.current,
201
+ "migration_source" => "multi_modal_document",
202
+ "original_document_id" => document.id,
203
+ "conversion_method" => "migration_consolidation"
204
+ }
205
+ )
206
+
207
+ # Process the unified document if requested
208
+ if options[:process_embeddings]
209
+ unified_doc.process_document!
210
+ else
211
+ unified_doc.update!(status: "processed")
212
+ end
213
+
214
+ puts "✅ Migrated: #{document.title}"
215
+ { status: :migrated, unified_document: unified_doc }
216
+ rescue StandardError => e
217
+ puts "❌ Failed to migrate #{document.title}: #{e.message}"
218
+ raise MigrationError, "Migration failed for document #{document.id}: #{e.message}"
219
+ end
220
+
221
+ def extract_unified_text_from_document(document)
222
+ text_parts = []
223
+
224
+ # Collect text from all content types
225
+ if document.respond_to?(:text_contents)
226
+ document.text_contents.each do |tc|
227
+ text_parts << tc.content if tc.content.present?
228
+ end
229
+ end
230
+
231
+ if document.respond_to?(:image_contents)
232
+ document.image_contents.each do |ic|
233
+ text_parts << ic.description if ic.description.present?
234
+ end
235
+ end
236
+
237
+ if document.respond_to?(:audio_contents)
238
+ document.audio_contents.each do |ac|
239
+ text_parts << ac.transcript if ac.transcript.present?
240
+ end
241
+ end
242
+
243
+ # Fallback to document content field
244
+ if text_parts.empty? && document.content.present?
245
+ text_parts << document.content
246
+ end
247
+
248
+ # Join all text parts
249
+ unified_text = text_parts.compact.reject(&:empty?).join("\n\n")
250
+
251
+ # If still no content, try to regenerate from file
252
+ if unified_text.blank? && File.exist?(document.location)
253
+ begin
254
+ unified_text = @converter.convert_to_text(document.location, document.document_type)
255
+ rescue StandardError => e
256
+ puts "Warning: Could not regenerate content for #{document.location}: #{e.message}"
257
+ end
258
+ end
259
+
260
+ unified_text
261
+ end
262
+
263
+ def determine_primary_media_type(document)
264
+ # Determine the primary media type based on document structure
265
+ if document.respond_to?(:content_types)
266
+ content_types = document.content_types
267
+ return content_types.first if content_types.any?
268
+ end
269
+
270
+ # Fallback to document type
271
+ case document.document_type
272
+ when "text", "markdown", "html", "pdf", "docx"
273
+ "text"
274
+ when "image"
275
+ "image"
276
+ when "audio"
277
+ "audio"
278
+ else
279
+ "text"
280
+ end
281
+ end
282
+
283
+ def merge_document_metadata(document)
284
+ base_metadata = document.metadata || {}
285
+
286
+ # Add migration tracking
287
+ base_metadata.merge(
288
+ "migrated_from_multi_modal" => true,
289
+ "migration_timestamp" => Time.current,
290
+ "original_system" => "multi_modal_sti"
291
+ )
292
+ end
293
+
294
+ def content_quality_report
295
+ return {} unless defined?(Ragdoll::UnifiedContent)
296
+
297
+ total_contents = Ragdoll::UnifiedContent.count
298
+ return { error: "No content to analyze" } if total_contents == 0
299
+
300
+ high_quality = Ragdoll::UnifiedContent.where("LENGTH(content) > 100").count
301
+ medium_quality = Ragdoll::UnifiedContent.where("LENGTH(content) BETWEEN 50 AND 100").count
302
+ low_quality = Ragdoll::UnifiedContent.where("LENGTH(content) < 50").count
303
+
304
+ {
305
+ total_contents: total_contents,
306
+ high_quality: high_quality,
307
+ medium_quality: medium_quality,
308
+ low_quality: low_quality,
309
+ high_quality_percentage: (high_quality.to_f / total_contents * 100).round(1),
310
+ medium_quality_percentage: (medium_quality.to_f / total_contents * 100).round(1),
311
+ low_quality_percentage: (low_quality.to_f / total_contents * 100).round(1)
312
+ }
313
+ end
314
+
315
+ def generate_migration_recommendations
316
+ recommendations = []
317
+
318
+ # Check if UnifiedDocument is available
319
+ if defined?(Ragdoll::UnifiedDocument)
320
+ quality_report = content_quality_report
321
+
322
+ if quality_report[:low_quality_percentage] && quality_report[:low_quality_percentage] > 20
323
+ recommendations << "Consider reprocessing low-quality content with enhanced conversion settings"
324
+ end
325
+
326
+ if quality_report[:total_contents] && quality_report[:total_contents] > 0
327
+ recommendations << "Review content quality scores and adjust conversion parameters as needed"
328
+ end
329
+ else
330
+ recommendations << "Enable UnifiedDocument and UnifiedContent models to start migration"
331
+ end
332
+
333
+ recommendations << "Test search functionality with unified text-based approach"
334
+ recommendations << "Monitor embedding generation performance with single model"
335
+ recommendations << "Consider archiving old multi-modal content tables after validation"
336
+
337
+ recommendations
338
+ end
339
+ end
340
+ end