ragdoll 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,8 @@
3
3
  require "pdf-reader"
4
4
  require "docx"
5
5
  require "rmagick"
6
+ require "yaml"
7
+ require "date"
6
8
  # Image description service is auto-loaded from app/services
7
9
 
8
10
  module Ragdoll
@@ -137,6 +139,11 @@ module Ragdoll
137
139
  raise ParseError, "Unsupported PDF feature: #{e.message}"
138
140
  end
139
141
 
142
+ # Add filepath-based title as fallback if no title was found
143
+ if metadata[:title].nil? || (metadata[:title].is_a?(String) && metadata[:title].strip.empty?)
144
+ metadata[:title] = extract_title_from_filepath
145
+ end
146
+
140
147
  {
141
148
  content: content.strip,
142
149
  metadata: metadata,
@@ -192,6 +199,11 @@ module Ragdoll
192
199
  raise ParseError, "#{__LINE__} Failed to parse DOCX: #{e.message}"
193
200
  end
194
201
 
202
+ # Add filepath-based title as fallback if no title was found
203
+ if metadata[:title].nil? || (metadata[:title].is_a?(String) && metadata[:title].strip.empty?)
204
+ metadata[:title] = extract_title_from_filepath
205
+ end
206
+
195
207
  {
196
208
  content: content.strip,
197
209
  metadata: metadata,
@@ -212,6 +224,20 @@ module Ragdoll
212
224
  else "text"
213
225
  end
214
226
 
227
+ # Parse YAML front matter for markdown files
228
+ if document_type == "markdown" && content.start_with?("---\n")
229
+ front_matter, body_content = parse_yaml_front_matter(content)
230
+ if front_matter
231
+ metadata.merge!(front_matter)
232
+ content = body_content
233
+ end
234
+ end
235
+
236
+ # Add filepath-based title as fallback if no title was found
237
+ if metadata[:title].nil? || (metadata[:title].is_a?(String) && metadata[:title].strip.empty?)
238
+ metadata[:title] = extract_title_from_filepath
239
+ end
240
+
215
241
  {
216
242
  content: content,
217
243
  metadata: metadata,
@@ -225,16 +251,41 @@ module Ragdoll
225
251
  encoding: "ISO-8859-1"
226
252
  }
227
253
 
254
+ # Try to parse front matter with different encoding too
255
+ if document_type == "markdown" && content.start_with?("---\n")
256
+ front_matter, body_content = parse_yaml_front_matter(content)
257
+ if front_matter
258
+ metadata.merge!(front_matter)
259
+ content = body_content
260
+ end
261
+ end
262
+
263
+ # Add filepath-based title as fallback if no title was found
264
+ if metadata[:title].nil? || (metadata[:title].is_a?(String) && metadata[:title].strip.empty?)
265
+ metadata[:title] = extract_title_from_filepath
266
+ end
267
+
228
268
  {
229
269
  content: content,
230
270
  metadata: metadata,
231
- document_type: "text"
271
+ document_type: document_type.nil? ? "text" : document_type
232
272
  }
233
273
  end
234
274
 
235
275
  def parse_html
236
276
  content = File.read(@file_path, encoding: "UTF-8")
237
277
 
278
+ # Extract title from H1 tag if present
279
+ h1_match = content.match(%r{<h1[^>]*>(.*?)</h1>}mi)
280
+ title = nil
281
+ if h1_match
282
+ # Clean up the H1 content by removing any HTML tags and normalizing whitespace
283
+ title = h1_match[1]
284
+ .gsub(/<[^>]+>/, " ") # Remove any nested HTML tags
285
+ .gsub(/\s+/, " ") # Normalize whitespace
286
+ .strip
287
+ end
288
+
238
289
  # Basic HTML tag stripping (for more advanced parsing, consider using Nokogiri)
239
290
  clean_content = content
240
291
  .gsub(%r{<script[^>]*>.*?</script>}mi, "") # Remove script tags
@@ -248,6 +299,13 @@ module Ragdoll
248
299
  original_format: "html"
249
300
  }
250
301
 
302
+ # Add title to metadata if found, otherwise use filepath fallback
303
+ if title && !title.empty?
304
+ metadata[:title] = title
305
+ else
306
+ metadata[:title] = extract_title_from_filepath
307
+ end
308
+
251
309
  {
252
310
  content: clean_content,
253
311
  metadata: metadata,
@@ -286,6 +344,9 @@ module Ragdoll
286
344
  # Use AI-generated description or fallback placeholder
287
345
  content = desc && !desc.empty? ? desc : "Image file: #{File.basename(@file_path)}"
288
346
 
347
+ # Add filepath-based title as fallback
348
+ metadata[:title] = extract_title_from_filepath
349
+
289
350
  puts "✅ DocumentProcessor: Image parsing complete. Content: '#{content[0..100]}...'"
290
351
 
291
352
  {
@@ -338,5 +399,67 @@ module Ragdoll
338
399
  else "application/octet-stream"
339
400
  end
340
401
  end
402
+
403
+ private
404
+
405
+ # Extract a meaningful title from the file path as a fallback
406
+ # @param file_path [String] the full file path
407
+ # @return [String] a cleaned title derived from the filename
408
+ def extract_title_from_filepath(file_path = @file_path)
409
+ filename = File.basename(file_path, File.extname(file_path))
410
+
411
+ # Clean up common patterns in filenames to make them more readable
412
+ title = filename
413
+ .gsub(/[-_]+/, ' ') # Replace hyphens and underscores with spaces
414
+ .gsub(/([a-z])([A-Z])/, '\1 \2') # Add space before capital letters (camelCase)
415
+ .gsub(/\s+/, ' ') # Normalize multiple spaces
416
+ .strip
417
+
418
+ # Capitalize words for better readability
419
+ title.split(' ').map(&:capitalize).join(' ')
420
+ end
421
+
422
+ # Parse YAML front matter from markdown content
423
+ # @param content [String] the full content of the markdown file
424
+ # @return [Array] returns [front_matter_hash, body_content] or [nil, original_content]
425
+ def parse_yaml_front_matter(content)
426
+ # Check if content starts with YAML front matter delimiter
427
+ return [nil, content] unless content.start_with?("---\n")
428
+
429
+ # Find the closing delimiter
430
+ lines = content.lines
431
+ closing_index = nil
432
+
433
+ lines.each_with_index do |line, index|
434
+ next if index == 0 # Skip the opening ---
435
+ if line.strip == "---"
436
+ closing_index = index
437
+ break
438
+ end
439
+ end
440
+
441
+ # No closing delimiter found
442
+ return [nil, content] unless closing_index
443
+
444
+ # Extract YAML content and body
445
+ yaml_lines = lines[1...closing_index]
446
+ body_lines = lines[(closing_index + 1)..-1]
447
+
448
+ yaml_content = yaml_lines.join
449
+ body_content = body_lines&.join || ""
450
+
451
+ # Parse YAML
452
+ begin
453
+ # Allow Time objects for date fields in YAML front matter
454
+ front_matter = YAML.safe_load(yaml_content, permitted_classes: [Time, Date])
455
+ # Convert string keys to symbols for consistency
456
+ front_matter = front_matter.transform_keys(&:to_sym) if front_matter.is_a?(Hash)
457
+ [front_matter, body_content.strip]
458
+ rescue YAML::SyntaxError, Psych::DisallowedClass => e
459
+ # If YAML parsing fails, return original content
460
+ Rails.logger.warn "Warning: Failed to parse YAML front matter: #{e.message}" if defined?(Rails)
461
+ [nil, content]
462
+ end
463
+ end
341
464
  end
342
465
  end
@@ -38,6 +38,11 @@ module Ragdoll
38
38
  embedding_config = @model_resolver.resolve_embedding(:text)
39
39
  # Use just the model name for RubyLLM
40
40
  model = embedding_config.model.model
41
+
42
+ # If model is nil or empty, use fallback
43
+ if model.nil? || model.empty?
44
+ return generate_fallback_embedding
45
+ end
41
46
 
42
47
  begin
43
48
  response = RubyLLM.embed(cleaned_text, model: model)
@@ -93,6 +98,11 @@ module Ragdoll
93
98
  embedding_config = @model_resolver.resolve_embedding(:text)
94
99
  # Use just the model name for RubyLLM
95
100
  model = embedding_config.model.model
101
+
102
+ # If model is nil or empty, use fallback
103
+ if model.nil? || model.empty?
104
+ return cleaned_texts.map { generate_fallback_embedding }
105
+ end
96
106
 
97
107
  cleaned_texts.map do |text|
98
108
  response = RubyLLM.embed(text, model: model)
@@ -27,25 +27,94 @@ module Ragdoll
27
27
  end
28
28
 
29
29
  def search_similar_content(query_or_embedding, options = {})
30
+ start_time = Time.current
30
31
  search_config = @config_service.search_config
31
32
  limit = options[:limit] || search_config[:max_results]
32
33
  threshold = options[:threshold] || search_config[:similarity_threshold]
33
34
  filters = options[:filters] || {}
35
+
36
+ # Extract keywords option and normalize
37
+ keywords = options[:keywords] || []
38
+ keywords = Array(keywords).map(&:to_s).reject(&:empty?)
39
+
40
+ # Extract tracking options
41
+ session_id = options[:session_id]
42
+ user_id = options[:user_id]
43
+ track_search = options.fetch(:track_search, true)
34
44
 
35
45
  if query_or_embedding.is_a?(Array)
36
46
  # It's already an embedding
37
47
  query_embedding = query_or_embedding
48
+ query_string = options[:query] # Should be provided when passing embedding directly
38
49
  else
39
50
  # It's a query string, generate embedding
40
- query_embedding = @embedding_service.generate_embedding(query_or_embedding)
51
+ query_string = query_or_embedding
52
+ query_embedding = @embedding_service.generate_embedding(query_string)
41
53
  return [] if query_embedding.nil?
42
54
  end
43
55
 
44
- # Search using ActiveRecord models
45
- Ragdoll::Embedding.search_similar(query_embedding,
46
- limit: limit,
47
- threshold: threshold,
48
- filters: filters)
56
+ # Add keywords to filters if provided
57
+ if keywords.any?
58
+ filters[:keywords] = keywords
59
+ end
60
+
61
+ # Search using ActiveRecord models with statistics
62
+ # Try enhanced search first, fall back to original if it fails
63
+ begin
64
+ search_response = Ragdoll::Embedding.search_similar_with_stats(query_embedding,
65
+ limit: limit,
66
+ threshold: threshold,
67
+ filters: filters)
68
+ results = search_response[:results]
69
+ statistics = search_response[:statistics]
70
+ rescue NoMethodError, PG::SyntaxError => e
71
+ # Fall back to original search method if enhanced version fails
72
+ puts "Warning: Enhanced search failed (#{e.message}), using fallback" if ENV["RAGDOLL_DEBUG"]
73
+ results = Ragdoll::Embedding.search_similar(query_embedding,
74
+ limit: limit,
75
+ threshold: threshold,
76
+ filters: filters)
77
+ statistics = nil
78
+ end
79
+
80
+ execution_time = ((Time.current - start_time) * 1000).round
81
+
82
+ # Record search if tracking enabled and we have a query string
83
+ if track_search && query_string && !query_string.empty?
84
+ begin
85
+ # Format results for search recording
86
+ search_results = results.map do |result|
87
+ {
88
+ embedding_id: result[:embedding_id] || result[:id],
89
+ similarity: result[:similarity] || result[:similarity_score] || 0.0
90
+ }
91
+ end
92
+
93
+ search_type = keywords.any? ? "semantic_with_keywords" : "semantic"
94
+
95
+ Ragdoll::Search.record_search(
96
+ query: query_string,
97
+ query_embedding: query_embedding,
98
+ results: search_results,
99
+ search_type: search_type,
100
+ filters: filters,
101
+ options: { limit: limit, threshold: threshold, keywords: keywords },
102
+ execution_time_ms: execution_time,
103
+ session_id: session_id,
104
+ user_id: user_id
105
+ )
106
+ rescue => e
107
+ # Log error but don't fail the search
108
+ puts "Warning: Search tracking failed: #{e.message}" if ENV["RAGDOLL_DEBUG"]
109
+ end
110
+ end
111
+
112
+ # Return results with statistics for better user feedback
113
+ {
114
+ results: results,
115
+ statistics: statistics,
116
+ execution_time_ms: execution_time
117
+ }
49
118
  end
50
119
  end
51
120
  end
@@ -1,8 +1,5 @@
1
1
  class EnablePostgresqlExtensions < ActiveRecord::Migration[7.0]
2
2
  def up
3
- # This migration is now handled by the db:create rake task
4
- # Just ensure required extensions are available
5
-
6
3
  # Vector similarity search (required for embeddings)
7
4
  execute "CREATE EXTENSION IF NOT EXISTS vector"
8
5
 
@@ -15,9 +12,11 @@ class EnablePostgresqlExtensions < ActiveRecord::Migration[7.0]
15
12
  end
16
13
 
17
14
  def down
18
- execute <<-SQL
19
- DROP DATABASE IF EXISTS ragdoll_development;
20
- DROP ROLE IF EXISTS ragdoll;
21
- SQL
15
+ # Extensions are typically not dropped as they might be used by other databases
16
+ # If you really need to drop them, uncomment the following:
17
+ # execute "DROP EXTENSION IF EXISTS vector"
18
+ # execute "DROP EXTENSION IF EXISTS unaccent"
19
+ # execute "DROP EXTENSION IF EXISTS pg_trgm"
20
+ # execute "DROP EXTENSION IF EXISTS \"uuid-ossp\""
22
21
  end
23
- end
22
+ end
@@ -0,0 +1,117 @@
1
+ class CreateRagdollDocuments < ActiveRecord::Migration[7.0]
2
+ # For concurrent index creation (PostgreSQL)
3
+ disable_ddl_transaction!
4
+
5
+ def up
6
+ create_table :ragdoll_documents,
7
+ comment: "Core documents table with LLM-generated structured metadata" do |t|
8
+
9
+ t.string :location, null: false,
10
+ comment: "Source location of document (file path, URL, or identifier)"
11
+
12
+ t.string :title, null: false,
13
+ comment: "Human-readable document title for display and search"
14
+
15
+ t.text :summary, null: false, default: "",
16
+ comment: "LLM-generated summary of document content"
17
+
18
+ t.string :document_type, null: false, default: "text",
19
+ comment: "Document format type"
20
+
21
+ t.string :status, null: false, default: "pending",
22
+ comment: "Document processing status"
23
+
24
+ t.json :metadata, default: {},
25
+ comment: "LLM-generated structured metadata about the file"
26
+
27
+ t.timestamp :file_modified_at, null: false, default: -> { "CURRENT_TIMESTAMP" },
28
+ comment: "Timestamp when the source file was last modified"
29
+
30
+ t.timestamps null: false,
31
+ comment: "Standard creation and update timestamps"
32
+
33
+ # Add tsvector column for full-text search
34
+ t.tsvector :search_vector
35
+
36
+ # Add keywords as array column
37
+ t.text :keywords, array: true, default: []
38
+ end
39
+
40
+ ###########
41
+ # Indexes #
42
+ ###########
43
+
44
+ add_index :ragdoll_documents, :location, unique: true,
45
+ comment: "Unique index for document source lookup"
46
+
47
+ add_index :ragdoll_documents, :title,
48
+ comment: "Index for title-based search"
49
+
50
+ add_index :ragdoll_documents, :document_type,
51
+ comment: "Index for filtering by document type"
52
+
53
+ add_index :ragdoll_documents, :status,
54
+ comment: "Index for filtering by processing status"
55
+
56
+ add_index :ragdoll_documents, :created_at,
57
+ comment: "Index for chronological sorting"
58
+
59
+ add_index :ragdoll_documents, [:document_type, :status],
60
+ comment: "Composite index for type+status filtering"
61
+
62
+ # Full-text search index
63
+ execute <<-SQL
64
+ CREATE INDEX CONCURRENTLY index_ragdoll_documents_on_fulltext_search
65
+ ON ragdoll_documents
66
+ USING gin(to_tsvector('english',
67
+ COALESCE(title, '') || ' ' ||
68
+ COALESCE(metadata->>'summary', '') || ' ' ||
69
+ COALESCE(metadata->>'keywords', '') || ' ' ||
70
+ COALESCE(metadata->>'description', '')
71
+ ))
72
+ SQL
73
+
74
+ add_index :ragdoll_documents, "(metadata->>'document_type')",
75
+ name: "index_ragdoll_documents_on_metadata_type",
76
+ comment: "Index for filtering by document type"
77
+
78
+ add_index :ragdoll_documents, "(metadata->>'classification')",
79
+ name: "index_ragdoll_documents_on_metadata_classification",
80
+ comment: "Index for filtering by document classification"
81
+
82
+ # GIN index on search_vector
83
+ add_index :ragdoll_documents, :search_vector, using: :gin, algorithm: :concurrently
84
+
85
+ # GIN index on keywords array
86
+ add_index :ragdoll_documents, :keywords, using: :gin,
87
+ name: 'index_ragdoll_documents_on_keywords_gin'
88
+
89
+ # Trigger to keep search_vector up to date on INSERT/UPDATE
90
+ execute <<-SQL
91
+ CREATE FUNCTION ragdoll_documents_vector_update() RETURNS trigger AS $$
92
+ BEGIN
93
+ NEW.search_vector := to_tsvector('english',
94
+ COALESCE(NEW.title, '') || ' ' ||
95
+ COALESCE(NEW.metadata->>'summary', '') || ' ' ||
96
+ COALESCE(NEW.metadata->>'keywords', '') || ' ' ||
97
+ COALESCE(NEW.metadata->>'description', '')
98
+ );
99
+ RETURN NEW;
100
+ END
101
+ $$ LANGUAGE plpgsql;
102
+
103
+ CREATE TRIGGER ragdoll_search_vector_update
104
+ BEFORE INSERT OR UPDATE ON ragdoll_documents
105
+ FOR EACH ROW EXECUTE FUNCTION ragdoll_documents_vector_update();
106
+ SQL
107
+ end
108
+
109
+ def down
110
+ execute <<-SQL
111
+ DROP TRIGGER IF EXISTS ragdoll_search_vector_update ON ragdoll_documents;
112
+ DROP FUNCTION IF EXISTS ragdoll_documents_vector_update();
113
+ SQL
114
+
115
+ drop_table :ragdoll_documents
116
+ end
117
+ end
@@ -3,7 +3,7 @@ class CreateRagdollEmbeddings < ActiveRecord::Migration[7.0]
3
3
  create_table :ragdoll_embeddings,
4
4
  comment: "Polymorphic vector embeddings storage for semantic similarity search" do |t|
5
5
 
6
- t.references :embeddable, polymorphic: true, null: false,
6
+ t.references :embeddable, polymorphic: true, null: false,
7
7
  comment: "Polymorphic reference to embeddable content"
8
8
 
9
9
  t.text :content, null: false, default: "",
@@ -26,16 +26,19 @@ class CreateRagdollEmbeddings < ActiveRecord::Migration[7.0]
26
26
 
27
27
  t.timestamps null: false,
28
28
  comment: "Standard creation and update timestamps"
29
+ end
29
30
 
30
- ###########
31
- # Indexes #
32
- ###########
31
+ ###########
32
+ # Indexes #
33
+ ###########
33
34
 
34
- t.index %i[embeddable_type embeddable_id],
35
- comment: "Index for finding embeddings by embeddable content"
35
+ add_index :ragdoll_embeddings, [:embeddable_type, :embeddable_id],
36
+ comment: "Index for finding embeddings by embeddable content"
36
37
 
37
- t.index :embedding_vector, using: :ivfflat, opclass: :vector_cosine_ops, name: "index_ragdoll_embeddings_on_embedding_vector_cosine",
38
- comment: "IVFFlat index for fast cosine similarity search"
39
- end
38
+ add_index :ragdoll_embeddings, :embedding_vector,
39
+ using: :ivfflat,
40
+ opclass: :vector_cosine_ops,
41
+ name: "index_ragdoll_embeddings_on_embedding_vector_cosine",
42
+ comment: "IVFFlat index for fast cosine similarity search"
40
43
  end
41
- end
44
+ end
@@ -29,19 +29,22 @@ class CreateRagdollContents < ActiveRecord::Migration[7.0]
29
29
 
30
30
  t.timestamps null: false,
31
31
  comment: "Standard creation and update timestamps"
32
+ end
32
33
 
33
- ###########
34
- # Indexes #
35
- ###########
34
+ ###########
35
+ # Indexes #
36
+ ###########
36
37
 
37
- t.index :embedding_model,
38
- comment: "Index for filtering by embedding model"
38
+ add_index :ragdoll_contents, :embedding_model,
39
+ comment: "Index for filtering by embedding model"
39
40
 
40
- t.index :type,
41
- comment: "Index for filtering by content type"
41
+ add_index :ragdoll_contents, :type,
42
+ comment: "Index for filtering by content type"
42
43
 
43
- t.index "to_tsvector('english', COALESCE(content, ''))", using: :gin, name: "index_ragdoll_contents_on_fulltext_search",
44
- comment: "Full-text search index for text content"
45
- end
44
+ execute <<-SQL
45
+ CREATE INDEX index_ragdoll_contents_on_fulltext_search
46
+ ON ragdoll_contents
47
+ USING gin(to_tsvector('english', COALESCE(content, '')))
48
+ SQL
46
49
  end
47
- end
50
+ end
@@ -0,0 +1,77 @@
1
+ class CreateRagdollSearches < ActiveRecord::Migration[7.0]
2
+ def change
3
+ create_table :ragdoll_searches,
4
+ comment: "Search queries and results tracking with vector similarity support" do |t|
5
+
6
+ t.text :query, null: false,
7
+ comment: "Original search query text"
8
+
9
+ t.vector :query_embedding, limit: 1536, null: false,
10
+ comment: "Vector embedding of the search query for similarity matching"
11
+
12
+ t.string :search_type, null: false, default: "semantic",
13
+ comment: "Type of search performed (semantic, hybrid, fulltext)"
14
+
15
+ t.integer :results_count, null: false, default: 0,
16
+ comment: "Number of results returned for this search"
17
+
18
+ t.float :max_similarity_score,
19
+ comment: "Highest similarity score from results"
20
+
21
+ t.float :min_similarity_score,
22
+ comment: "Lowest similarity score from results"
23
+
24
+ t.float :avg_similarity_score,
25
+ comment: "Average similarity score of results"
26
+
27
+ t.json :search_filters, default: {},
28
+ comment: "Filters applied during search (document_type, date_range, etc.)"
29
+
30
+ t.json :search_options, default: {},
31
+ comment: "Search configuration options (threshold, limit, etc.)"
32
+
33
+ t.integer :execution_time_ms,
34
+ comment: "Search execution time in milliseconds"
35
+
36
+ t.string :session_id,
37
+ comment: "User session identifier for grouping related searches"
38
+
39
+ t.string :user_id,
40
+ comment: "User identifier if authentication is available"
41
+
42
+ t.timestamps null: false,
43
+ comment: "Standard creation and update timestamps"
44
+ end
45
+
46
+ ###########
47
+ # Indexes #
48
+ ###########
49
+
50
+ add_index :ragdoll_searches, :query_embedding,
51
+ using: :ivfflat,
52
+ opclass: :vector_cosine_ops,
53
+ name: "index_ragdoll_searches_on_query_embedding_cosine",
54
+ comment: "IVFFlat index for finding similar search queries"
55
+
56
+ add_index :ragdoll_searches, :search_type,
57
+ comment: "Index for filtering by search type"
58
+
59
+ add_index :ragdoll_searches, :session_id,
60
+ comment: "Index for grouping searches by session"
61
+
62
+ add_index :ragdoll_searches, :user_id,
63
+ comment: "Index for filtering searches by user"
64
+
65
+ add_index :ragdoll_searches, :created_at,
66
+ comment: "Index for chronological search history"
67
+
68
+ add_index :ragdoll_searches, :results_count,
69
+ comment: "Index for analyzing search effectiveness"
70
+
71
+ execute <<-SQL
72
+ CREATE INDEX index_ragdoll_searches_on_fulltext_query
73
+ ON ragdoll_searches
74
+ USING gin(to_tsvector('english', query))
75
+ SQL
76
+ end
77
+ end
@@ -0,0 +1,49 @@
1
+ class CreateRagdollSearchResults < ActiveRecord::Migration[7.0]
2
+ def change
3
+ # Junction table for tracking which embeddings were returned for each search
4
+ create_table :ragdoll_search_results,
5
+ comment: "Junction table linking searches to their returned embeddings" do |t|
6
+
7
+ t.references :search, null: false, foreign_key: { to_table: :ragdoll_searches },
8
+ comment: "Reference to the search query"
9
+
10
+ t.references :embedding, null: false, foreign_key: { to_table: :ragdoll_embeddings },
11
+ comment: "Reference to the returned embedding"
12
+
13
+ t.float :similarity_score, null: false,
14
+ comment: "Similarity score for this result"
15
+
16
+ t.integer :result_rank, null: false,
17
+ comment: "Ranking position of this result (1-based)"
18
+
19
+ t.boolean :clicked, default: false,
20
+ comment: "Whether user interacted with this result"
21
+
22
+ t.datetime :clicked_at,
23
+ comment: "Timestamp when result was clicked/selected"
24
+
25
+ t.timestamps null: false,
26
+ comment: "Standard creation and update timestamps"
27
+ end
28
+
29
+ ###########
30
+ # Indexes #
31
+ ###########
32
+
33
+ add_index :ragdoll_search_results, [:search_id, :result_rank],
34
+ name: "idx_search_results_search_rank",
35
+ comment: "Index for retrieving results in ranked order"
36
+
37
+ add_index :ragdoll_search_results, [:embedding_id, :similarity_score],
38
+ name: "idx_search_results_embedding_score",
39
+ comment: "Index for analyzing embedding performance"
40
+
41
+ add_index :ragdoll_search_results, :similarity_score,
42
+ name: "idx_search_results_similarity",
43
+ comment: "Index for similarity score analysis"
44
+
45
+ add_index :ragdoll_search_results, [:clicked, :clicked_at],
46
+ name: "idx_search_results_clicks",
47
+ comment: "Index for click-through analysis"
48
+ end
49
+ end