ragdoll 0.1.3 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,165 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_record"
4
+ require "neighbor"
5
+
6
+ module Ragdoll
7
+ class Search < ActiveRecord::Base
8
+ self.table_name = "ragdoll_searches"
9
+
10
+ # Use pgvector for vector similarity search on query embeddings
11
+ has_neighbors :query_embedding
12
+
13
+ has_many :search_results, class_name: "Ragdoll::SearchResult", foreign_key: "search_id", dependent: :destroy
14
+ has_many :embeddings, through: :search_results
15
+
16
+ validates :query, presence: true
17
+ validates :query_embedding, presence: true
18
+ validates :search_type, presence: true, inclusion: { in: %w[semantic hybrid fulltext] }
19
+ validates :results_count, presence: true, numericality: { greater_than_or_equal_to: 0 }
20
+
21
+ scope :by_type, ->(type) { where(search_type: type) }
22
+ scope :by_session, ->(session_id) { where(session_id: session_id) }
23
+ scope :by_user, ->(user_id) { where(user_id: user_id) }
24
+ scope :recent, -> { order(created_at: :desc) }
25
+ scope :with_results, -> { where("results_count > 0") }
26
+ scope :popular, -> { where("results_count > 0").order(results_count: :desc) }
27
+ scope :slow_searches, ->(threshold_ms = 1000) { where("execution_time_ms > ?", threshold_ms) }
28
+
29
+ # Find searches with similar query embeddings
30
+ def self.find_similar(query_embedding, limit: 10, threshold: 0.8)
31
+ nearest_neighbors(:query_embedding, query_embedding, distance: "cosine")
32
+ .limit(limit * 2)
33
+ .map do |search|
34
+ similarity = 1.0 - search.neighbor_distance
35
+ next if similarity < threshold
36
+
37
+ search.define_singleton_method(:similarity_score) { similarity }
38
+ search
39
+ end
40
+ .compact
41
+ .sort_by(&:similarity_score)
42
+ .reverse
43
+ .take(limit)
44
+ end
45
+
46
+ # Calculate statistics for this search
47
+ def calculate_similarity_stats!
48
+ return unless search_results.any?
49
+
50
+ scores = search_results.pluck(:similarity_score)
51
+ update!(
52
+ max_similarity_score: scores.max,
53
+ min_similarity_score: scores.min,
54
+ avg_similarity_score: scores.sum.to_f / scores.length
55
+ )
56
+ end
57
+
58
+ # Get search results ordered by rank
59
+ def ranked_results
60
+ search_results.includes(:embedding).order(:result_rank)
61
+ end
62
+
63
+ # Get clicked results
64
+ def clicked_results
65
+ search_results.where(clicked: true).order(:clicked_at)
66
+ end
67
+
68
+ # Calculate click-through rate
69
+ def click_through_rate
70
+ return 0.0 if results_count == 0
71
+
72
+ clicked_count = search_results.where(clicked: true).count
73
+ clicked_count.to_f / results_count
74
+ end
75
+
76
+ # Record a search with its results
77
+ def self.record_search(query:, query_embedding:, results:, search_type: "semantic",
78
+ filters: {}, options: {}, execution_time_ms: nil,
79
+ session_id: nil, user_id: nil)
80
+ search = create!(
81
+ query: query,
82
+ query_embedding: query_embedding,
83
+ search_type: search_type,
84
+ results_count: results.length,
85
+ search_filters: filters,
86
+ search_options: options,
87
+ execution_time_ms: execution_time_ms,
88
+ session_id: session_id,
89
+ user_id: user_id
90
+ )
91
+
92
+ # Create search result records
93
+ results.each_with_index do |result, index|
94
+ search.search_results.create!(
95
+ embedding_id: result[:embedding_id],
96
+ similarity_score: result[:similarity],
97
+ result_rank: index + 1
98
+ )
99
+ end
100
+
101
+ # Calculate and store similarity statistics
102
+ search.calculate_similarity_stats!
103
+ search
104
+ end
105
+
106
+ # Search analytics methods
107
+ def self.search_analytics(days: 30)
108
+ start_date = days.days.ago
109
+ searches = where(created_at: start_date..)
110
+
111
+ {
112
+ total_searches: searches.count,
113
+ unique_queries: searches.distinct.count(:query),
114
+ avg_results_per_search: searches.average(:results_count)&.round(2),
115
+ avg_execution_time: searches.average(:execution_time_ms)&.round(2),
116
+ search_types: searches.group(:search_type).count,
117
+ searches_with_results: searches.where("results_count > 0").count,
118
+ avg_click_through_rate: calculate_avg_ctr(searches)
119
+ }
120
+ end
121
+
122
+ # Cleanup orphaned searches that have no remaining search results
123
+ def self.cleanup_orphaned_searches
124
+ orphaned_search_ids = where.not(id: SearchResult.distinct.pluck(:search_id))
125
+ orphaned_count = orphaned_search_ids.count
126
+
127
+ if orphaned_count > 0
128
+ orphaned_search_ids.destroy_all
129
+ Rails.logger.info "Cleaned up #{orphaned_count} orphaned search records" if defined?(Rails)
130
+ end
131
+
132
+ orphaned_count
133
+ end
134
+
135
+ # Cleanup searches older than specified days with no clicks
136
+ def self.cleanup_old_unused_searches(days: 30)
137
+ cutoff_date = days.days.ago
138
+ unused_searches = where(created_at: ...cutoff_date)
139
+ .left_joins(:search_results)
140
+ .where(search_results: { clicked: [nil, false] })
141
+
142
+ unused_count = unused_searches.count
143
+
144
+ if unused_count > 0
145
+ unused_searches.destroy_all
146
+ Rails.logger.info "Cleaned up #{unused_count} old unused search records" if defined?(Rails)
147
+ end
148
+
149
+ unused_count
150
+ end
151
+
152
+ private
153
+
154
+ def self.calculate_avg_ctr(searches)
155
+ search_ids = searches.pluck(:id)
156
+ return 0.0 if search_ids.empty?
157
+
158
+ total_results = SearchResult.where(search_id: search_ids).count
159
+ return 0.0 if total_results == 0
160
+
161
+ clicked_results = SearchResult.where(search_id: search_ids, clicked: true).count
162
+ (clicked_results.to_f / total_results * 100).round(2)
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_record"
4
+
5
+ module Ragdoll
6
+ class SearchResult < ActiveRecord::Base
7
+ self.table_name = "ragdoll_search_results"
8
+
9
+ belongs_to :search, class_name: "Ragdoll::Search"
10
+ belongs_to :embedding, class_name: "Ragdoll::Embedding"
11
+
12
+ validates :similarity_score, presence: true, numericality: { in: 0.0..1.0 }
13
+ validates :result_rank, presence: true, numericality: { greater_than: 0 }
14
+ validates :result_rank, uniqueness: { scope: :search_id }
15
+
16
+ scope :by_rank, -> { order(:result_rank) }
17
+ scope :clicked, -> { where(clicked: true) }
18
+ scope :unclicked, -> { where(clicked: false) }
19
+ scope :high_similarity, ->(threshold = 0.8) { where("similarity_score >= ?", threshold) }
20
+ scope :recent_clicks, -> { where(clicked: true).order(clicked_at: :desc) }
21
+
22
+ # Cleanup callback to remove searches when they have no results left
23
+ after_destroy :cleanup_empty_search
24
+
25
+ # Mark this result as clicked
26
+ def mark_as_clicked!
27
+ update!(clicked: true, clicked_at: Time.current)
28
+ end
29
+
30
+ # Get the content through the embedding relationship
31
+ def content
32
+ embedding&.content
33
+ end
34
+
35
+ # Get the document through the embedding relationship
36
+ def document
37
+ embedding&.embeddable&.document
38
+ end
39
+
40
+ # Get the document title
41
+ def document_title
42
+ document&.title
43
+ end
44
+
45
+ # Get the document location
46
+ def document_location
47
+ document&.location
48
+ end
49
+
50
+ # Analytics for search results
51
+ def self.analytics(days: 30)
52
+ start_date = days.days.ago
53
+ results = where(created_at: start_date..)
54
+
55
+ {
56
+ total_results: results.count,
57
+ clicked_results: results.where(clicked: true).count,
58
+ click_through_rate: calculate_ctr(results),
59
+ avg_similarity_score: results.average(:similarity_score)&.round(4),
60
+ high_similarity_results: results.where("similarity_score >= 0.8").count,
61
+ low_similarity_results: results.where("similarity_score < 0.5").count,
62
+ rank_performance: rank_click_analysis(results)
63
+ }
64
+ end
65
+
66
+ # Analyze click performance by result rank
67
+ def self.rank_click_analysis(results = nil)
68
+ results ||= all
69
+
70
+ results.group(:result_rank)
71
+ .group("clicked")
72
+ .count
73
+ .each_with_object({}) do |((rank, clicked), count), hash|
74
+ hash[rank] ||= { total: 0, clicked: 0 }
75
+ hash[rank][:total] += count
76
+ hash[rank][:clicked] += count if clicked
77
+ end
78
+ .transform_values do |stats|
79
+ stats.merge(
80
+ ctr: stats[:total] > 0 ? (stats[:clicked].to_f / stats[:total] * 100).round(2) : 0.0
81
+ )
82
+ end
83
+ end
84
+
85
+ # Find embeddings that perform well across multiple searches
86
+ def self.top_performing_embeddings(limit: 20)
87
+ joins(:embedding)
88
+ .group(:embedding_id)
89
+ .select(
90
+ "embedding_id",
91
+ "COUNT(*) as appearance_count",
92
+ "AVG(similarity_score) as avg_similarity",
93
+ "COUNT(CASE WHEN clicked THEN 1 END) as click_count",
94
+ "ROUND(COUNT(CASE WHEN clicked THEN 1 END) * 100.0 / COUNT(*), 2) as ctr"
95
+ )
96
+ .having("COUNT(*) > 1")
97
+ .order("avg_similarity DESC, ctr DESC")
98
+ .limit(limit)
99
+ end
100
+
101
+ private
102
+
103
+ def self.calculate_ctr(results)
104
+ total = results.count
105
+ return 0.0 if total == 0
106
+
107
+ clicked = results.where(clicked: true).count
108
+ (clicked.to_f / total * 100).round(2)
109
+ end
110
+
111
+ # Cleanup callback to remove parent search if it has no results left
112
+ def cleanup_empty_search
113
+ return unless search
114
+
115
+ # Check if this was the last result for the search
116
+ if search.search_results.count == 0
117
+ search.destroy
118
+ end
119
+ end
120
+ end
121
+ end
@@ -20,10 +20,10 @@ module Ragdoll
20
20
  @config.embedding_model(content_type)
21
21
  when :summary, :keywords
22
22
  # Check for task-specific model, fall back to default
23
- task_model = @config.models.text_generation[task_type]
24
- task_model || @config.models.text_generation[:default]
23
+ task_model = @config.models[:text_generation][task_type]
24
+ task_model || @config.models[:text_generation][:default]
25
25
  else
26
- @config.models.text_generation[:default]
26
+ @config.models[:text_generation][:default]
27
27
  end
28
28
  end
29
29
 
@@ -3,6 +3,8 @@
3
3
  require "pdf-reader"
4
4
  require "docx"
5
5
  require "rmagick"
6
+ require "yaml"
7
+ require "date"
6
8
  # Image description service is auto-loaded from app/services
7
9
 
8
10
  module Ragdoll
@@ -137,6 +139,11 @@ module Ragdoll
137
139
  raise ParseError, "Unsupported PDF feature: #{e.message}"
138
140
  end
139
141
 
142
+ # Add filepath-based title as fallback if no title was found
143
+ if metadata[:title].nil? || (metadata[:title].is_a?(String) && metadata[:title].strip.empty?)
144
+ metadata[:title] = extract_title_from_filepath
145
+ end
146
+
140
147
  {
141
148
  content: content.strip,
142
149
  metadata: metadata,
@@ -192,6 +199,11 @@ module Ragdoll
192
199
  raise ParseError, "#{__LINE__} Failed to parse DOCX: #{e.message}"
193
200
  end
194
201
 
202
+ # Add filepath-based title as fallback if no title was found
203
+ if metadata[:title].nil? || (metadata[:title].is_a?(String) && metadata[:title].strip.empty?)
204
+ metadata[:title] = extract_title_from_filepath
205
+ end
206
+
195
207
  {
196
208
  content: content.strip,
197
209
  metadata: metadata,
@@ -212,6 +224,20 @@ module Ragdoll
212
224
  else "text"
213
225
  end
214
226
 
227
+ # Parse YAML front matter for markdown files
228
+ if document_type == "markdown" && content.start_with?("---\n")
229
+ front_matter, body_content = parse_yaml_front_matter(content)
230
+ if front_matter
231
+ metadata.merge!(front_matter)
232
+ content = body_content
233
+ end
234
+ end
235
+
236
+ # Add filepath-based title as fallback if no title was found
237
+ if metadata[:title].nil? || (metadata[:title].is_a?(String) && metadata[:title].strip.empty?)
238
+ metadata[:title] = extract_title_from_filepath
239
+ end
240
+
215
241
  {
216
242
  content: content,
217
243
  metadata: metadata,
@@ -225,16 +251,41 @@ module Ragdoll
225
251
  encoding: "ISO-8859-1"
226
252
  }
227
253
 
254
+ # Try to parse front matter with different encoding too
255
+ if document_type == "markdown" && content.start_with?("---\n")
256
+ front_matter, body_content = parse_yaml_front_matter(content)
257
+ if front_matter
258
+ metadata.merge!(front_matter)
259
+ content = body_content
260
+ end
261
+ end
262
+
263
+ # Add filepath-based title as fallback if no title was found
264
+ if metadata[:title].nil? || (metadata[:title].is_a?(String) && metadata[:title].strip.empty?)
265
+ metadata[:title] = extract_title_from_filepath
266
+ end
267
+
228
268
  {
229
269
  content: content,
230
270
  metadata: metadata,
231
- document_type: "text"
271
+ document_type: document_type.nil? ? "text" : document_type
232
272
  }
233
273
  end
234
274
 
235
275
  def parse_html
236
276
  content = File.read(@file_path, encoding: "UTF-8")
237
277
 
278
+ # Extract title from H1 tag if present
279
+ h1_match = content.match(%r{<h1[^>]*>(.*?)</h1>}mi)
280
+ title = nil
281
+ if h1_match
282
+ # Clean up the H1 content by removing any HTML tags and normalizing whitespace
283
+ title = h1_match[1]
284
+ .gsub(/<[^>]+>/, " ") # Remove any nested HTML tags
285
+ .gsub(/\s+/, " ") # Normalize whitespace
286
+ .strip
287
+ end
288
+
238
289
  # Basic HTML tag stripping (for more advanced parsing, consider using Nokogiri)
239
290
  clean_content = content
240
291
  .gsub(%r{<script[^>]*>.*?</script>}mi, "") # Remove script tags
@@ -248,6 +299,13 @@ module Ragdoll
248
299
  original_format: "html"
249
300
  }
250
301
 
302
+ # Add title to metadata if found, otherwise use filepath fallback
303
+ if title && !title.empty?
304
+ metadata[:title] = title
305
+ else
306
+ metadata[:title] = extract_title_from_filepath
307
+ end
308
+
251
309
  {
252
310
  content: clean_content,
253
311
  metadata: metadata,
@@ -286,6 +344,9 @@ module Ragdoll
286
344
  # Use AI-generated description or fallback placeholder
287
345
  content = desc && !desc.empty? ? desc : "Image file: #{File.basename(@file_path)}"
288
346
 
347
+ # Add filepath-based title as fallback
348
+ metadata[:title] = extract_title_from_filepath
349
+
289
350
  puts "✅ DocumentProcessor: Image parsing complete. Content: '#{content[0..100]}...'"
290
351
 
291
352
  {
@@ -338,5 +399,67 @@ module Ragdoll
338
399
  else "application/octet-stream"
339
400
  end
340
401
  end
402
+
403
+ private
404
+
405
+ # Extract a meaningful title from the file path as a fallback
406
+ # @param file_path [String] the full file path
407
+ # @return [String] a cleaned title derived from the filename
408
+ def extract_title_from_filepath(file_path = @file_path)
409
+ filename = File.basename(file_path, File.extname(file_path))
410
+
411
+ # Clean up common patterns in filenames to make them more readable
412
+ title = filename
413
+ .gsub(/[-_]+/, ' ') # Replace hyphens and underscores with spaces
414
+ .gsub(/([a-z])([A-Z])/, '\1 \2') # Add space before capital letters (camelCase)
415
+ .gsub(/\s+/, ' ') # Normalize multiple spaces
416
+ .strip
417
+
418
+ # Capitalize words for better readability
419
+ title.split(' ').map(&:capitalize).join(' ')
420
+ end
421
+
422
+ # Parse YAML front matter from markdown content
423
+ # @param content [String] the full content of the markdown file
424
+ # @return [Array] returns [front_matter_hash, body_content] or [nil, original_content]
425
+ def parse_yaml_front_matter(content)
426
+ # Check if content starts with YAML front matter delimiter
427
+ return [nil, content] unless content.start_with?("---\n")
428
+
429
+ # Find the closing delimiter
430
+ lines = content.lines
431
+ closing_index = nil
432
+
433
+ lines.each_with_index do |line, index|
434
+ next if index == 0 # Skip the opening ---
435
+ if line.strip == "---"
436
+ closing_index = index
437
+ break
438
+ end
439
+ end
440
+
441
+ # No closing delimiter found
442
+ return [nil, content] unless closing_index
443
+
444
+ # Extract YAML content and body
445
+ yaml_lines = lines[1...closing_index]
446
+ body_lines = lines[(closing_index + 1)..-1]
447
+
448
+ yaml_content = yaml_lines.join
449
+ body_content = body_lines&.join || ""
450
+
451
+ # Parse YAML
452
+ begin
453
+ # Allow Time objects for date fields in YAML front matter
454
+ front_matter = YAML.safe_load(yaml_content, permitted_classes: [Time, Date])
455
+ # Convert string keys to symbols for consistency
456
+ front_matter = front_matter.transform_keys(&:to_sym) if front_matter.is_a?(Hash)
457
+ [front_matter, body_content.strip]
458
+ rescue YAML::SyntaxError, Psych::DisallowedClass => e
459
+ # If YAML parsing fails, return original content
460
+ Rails.logger.warn "Warning: Failed to parse YAML front matter: #{e.message}" if defined?(Rails)
461
+ [nil, content]
462
+ end
463
+ end
341
464
  end
342
465
  end
@@ -38,6 +38,11 @@ module Ragdoll
38
38
  embedding_config = @model_resolver.resolve_embedding(:text)
39
39
  # Use just the model name for RubyLLM
40
40
  model = embedding_config.model.model
41
+
42
+ # If model is nil or empty, use fallback
43
+ if model.nil? || model.empty?
44
+ return generate_fallback_embedding
45
+ end
41
46
 
42
47
  begin
43
48
  response = RubyLLM.embed(cleaned_text, model: model)
@@ -93,6 +98,11 @@ module Ragdoll
93
98
  embedding_config = @model_resolver.resolve_embedding(:text)
94
99
  # Use just the model name for RubyLLM
95
100
  model = embedding_config.model.model
101
+
102
+ # If model is nil or empty, use fallback
103
+ if model.nil? || model.empty?
104
+ return cleaned_texts.map { generate_fallback_embedding }
105
+ end
96
106
 
97
107
  cleaned_texts.map do |text|
98
108
  response = RubyLLM.embed(text, model: model)
@@ -27,25 +27,83 @@ module Ragdoll
27
27
  end
28
28
 
29
29
  def search_similar_content(query_or_embedding, options = {})
30
+ start_time = Time.current
30
31
  search_config = @config_service.search_config
31
32
  limit = options[:limit] || search_config[:max_results]
32
33
  threshold = options[:threshold] || search_config[:similarity_threshold]
33
34
  filters = options[:filters] || {}
35
+
36
+ # Extract tracking options
37
+ session_id = options[:session_id]
38
+ user_id = options[:user_id]
39
+ track_search = options.fetch(:track_search, true)
34
40
 
35
41
  if query_or_embedding.is_a?(Array)
36
42
  # It's already an embedding
37
43
  query_embedding = query_or_embedding
44
+ query_string = options[:query] # Should be provided when passing embedding directly
38
45
  else
39
46
  # It's a query string, generate embedding
40
- query_embedding = @embedding_service.generate_embedding(query_or_embedding)
47
+ query_string = query_or_embedding
48
+ query_embedding = @embedding_service.generate_embedding(query_string)
41
49
  return [] if query_embedding.nil?
42
50
  end
43
51
 
44
- # Search using ActiveRecord models
45
- Ragdoll::Embedding.search_similar(query_embedding,
46
- limit: limit,
47
- threshold: threshold,
48
- filters: filters)
52
+ # Search using ActiveRecord models with statistics
53
+ # Try enhanced search first, fall back to original if it fails
54
+ begin
55
+ search_response = Ragdoll::Embedding.search_similar_with_stats(query_embedding,
56
+ limit: limit,
57
+ threshold: threshold,
58
+ filters: filters)
59
+ results = search_response[:results]
60
+ statistics = search_response[:statistics]
61
+ rescue NoMethodError, PG::SyntaxError => e
62
+ # Fall back to original search method if enhanced version fails
63
+ puts "Warning: Enhanced search failed (#{e.message}), using fallback" if ENV["RAGDOLL_DEBUG"]
64
+ results = Ragdoll::Embedding.search_similar(query_embedding,
65
+ limit: limit,
66
+ threshold: threshold,
67
+ filters: filters)
68
+ statistics = nil
69
+ end
70
+
71
+ execution_time = ((Time.current - start_time) * 1000).round
72
+
73
+ # Record search if tracking enabled and we have a query string
74
+ if track_search && query_string && !query_string.empty?
75
+ begin
76
+ # Format results for search recording
77
+ search_results = results.map do |result|
78
+ {
79
+ embedding_id: result[:embedding_id] || result[:id],
80
+ similarity: result[:similarity] || result[:similarity_score] || 0.0
81
+ }
82
+ end
83
+
84
+ Ragdoll::Search.record_search(
85
+ query: query_string,
86
+ query_embedding: query_embedding,
87
+ results: search_results,
88
+ search_type: "semantic",
89
+ filters: filters,
90
+ options: { limit: limit, threshold: threshold },
91
+ execution_time_ms: execution_time,
92
+ session_id: session_id,
93
+ user_id: user_id
94
+ )
95
+ rescue => e
96
+ # Log error but don't fail the search
97
+ puts "Warning: Search tracking failed: #{e.message}" if ENV["RAGDOLL_DEBUG"]
98
+ end
99
+ end
100
+
101
+ # Return results with statistics for better user feedback
102
+ {
103
+ results: results,
104
+ statistics: statistics,
105
+ execution_time_ms: execution_time
106
+ }
49
107
  end
50
108
  end
51
109
  end
@@ -0,0 +1,73 @@
1
+ class CreateRagdollSearches < ActiveRecord::Migration[7.0]
2
+ def change
3
+ create_table :ragdoll_searches,
4
+ comment: "Search queries and results tracking with vector similarity support" do |t|
5
+
6
+ t.text :query, null: false,
7
+ comment: "Original search query text"
8
+
9
+ t.vector :query_embedding, limit: 1536, null: false,
10
+ comment: "Vector embedding of the search query for similarity matching"
11
+
12
+ t.string :search_type, null: false, default: "semantic",
13
+ comment: "Type of search performed (semantic, hybrid, fulltext)"
14
+
15
+ t.integer :results_count, null: false, default: 0,
16
+ comment: "Number of results returned for this search"
17
+
18
+ t.float :max_similarity_score,
19
+ comment: "Highest similarity score from results"
20
+
21
+ t.float :min_similarity_score,
22
+ comment: "Lowest similarity score from results"
23
+
24
+ t.float :avg_similarity_score,
25
+ comment: "Average similarity score of results"
26
+
27
+ t.json :search_filters, default: {},
28
+ comment: "Filters applied during search (document_type, date_range, etc.)"
29
+
30
+ t.json :search_options, default: {},
31
+ comment: "Search configuration options (threshold, limit, etc.)"
32
+
33
+ t.integer :execution_time_ms,
34
+ comment: "Search execution time in milliseconds"
35
+
36
+ t.string :session_id,
37
+ comment: "User session identifier for grouping related searches"
38
+
39
+ t.string :user_id,
40
+ comment: "User identifier if authentication is available"
41
+
42
+ t.timestamps null: false,
43
+ comment: "Standard creation and update timestamps"
44
+
45
+ ###########
46
+ # Indexes #
47
+ ###########
48
+
49
+ t.index :query_embedding, using: :ivfflat, opclass: :vector_cosine_ops,
50
+ name: "index_ragdoll_searches_on_query_embedding_cosine",
51
+ comment: "IVFFlat index for finding similar search queries"
52
+
53
+ t.index :search_type,
54
+ comment: "Index for filtering by search type"
55
+
56
+ t.index :session_id,
57
+ comment: "Index for grouping searches by session"
58
+
59
+ t.index :user_id,
60
+ comment: "Index for filtering searches by user"
61
+
62
+ t.index :created_at,
63
+ comment: "Index for chronological search history"
64
+
65
+ t.index :results_count,
66
+ comment: "Index for analyzing search effectiveness"
67
+
68
+ t.index "to_tsvector('english', query)", using: :gin,
69
+ name: "index_ragdoll_searches_on_fulltext_query",
70
+ comment: "Full-text search index for finding searches by query text"
71
+ end
72
+ end
73
+ end