activerecord-mysql-search 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +22 -0
  4. data/Appraisals +18 -0
  5. data/CHANGELOG.md +5 -0
  6. data/Gemfile +21 -0
  7. data/LICENSE.txt +21 -0
  8. data/README.md +235 -0
  9. data/Rakefile +16 -0
  10. data/activerecord-mysql-search.gemspec +39 -0
  11. data/gemfiles/.bundle/config +2 -0
  12. data/gemfiles/rails_7.0.gemfile +20 -0
  13. data/gemfiles/rails_7.1.gemfile +19 -0
  14. data/gemfiles/rails_7.2.gemfile +19 -0
  15. data/gemfiles/rails_8.0.gemfile +19 -0
  16. data/lib/activerecord-mysql-search.rb +3 -0
  17. data/lib/generators/mysql/search/create_trigger_generator.rb +28 -0
  18. data/lib/generators/mysql/search/install_generator.rb +25 -0
  19. data/lib/generators/mysql/search/templates/app/models/search_index.rb +6 -0
  20. data/lib/generators/mysql/search/templates/config/initializers/active_record_ext.rb +37 -0
  21. data/lib/generators/mysql/search/templates/config/initializers/mysql_search.rb +22 -0
  22. data/lib/generators/mysql/search/templates/db/migrate/create_search_indices.rb +11 -0
  23. data/lib/generators/mysql/search/templates/db/migrate/enable_auto_update_of_updated_at.rb +22 -0
  24. data/lib/mysql/search/callbacks.rb +75 -0
  25. data/lib/mysql/search/grabber.rb +51 -0
  26. data/lib/mysql/search/jobs/scheduled_updater_job.rb +43 -0
  27. data/lib/mysql/search/jobs/updater_job.rb +24 -0
  28. data/lib/mysql/search/jobs.rb +12 -0
  29. data/lib/mysql/search/queries/full_text_search_query.rb +71 -0
  30. data/lib/mysql/search/queries/updated_sources_query.rb +50 -0
  31. data/lib/mysql/search/railtie.rb +19 -0
  32. data/lib/mysql/search/searchable.rb +24 -0
  33. data/lib/mysql/search/source.rb +69 -0
  34. data/lib/mysql/search/updater.rb +46 -0
  35. data/lib/mysql/search/utils/duration_parser.rb +20 -0
  36. data/lib/mysql/search/utils/formatter.rb +42 -0
  37. data/lib/mysql/search/utils/text_normalizer.rb +16 -0
  38. data/lib/mysql/search/utils.rb +13 -0
  39. data/lib/mysql/search.rb +47 -0
  40. data/lib/tasks/actualize.rake +24 -0
  41. data/lib/tasks/reindex.rake +30 -0
  42. metadata +86 -0
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MySQL
4
+ module Search
5
+ # Manages callbacks for updating search indices.
6
+ class Callbacks
7
+ attr_reader :source_model_class, :callbacks_config, :assigned
8
+
9
+ def self.callback(source_model_class_name, associated, association_path, on_attributes_change)
10
+ return unless enqueue_update?(associated, on_attributes_change)
11
+
12
+ perform = ::MySQL::Search.update_asyncronously ? :perform_later : :perform_now
13
+
14
+ ::MySQL::Search::Jobs::UpdaterJob.set(wait: 10.seconds).public_send(
15
+ perform,
16
+ source_model_class_name,
17
+ associated.class.name,
18
+ associated.id,
19
+ association_path
20
+ )
21
+ end
22
+
23
+ def self.enqueue_update?(associated, on_attributes_change)
24
+ return false unless ::MySQL::Search.automatic_update
25
+
26
+ on_attributes_change.any? { |attribute| associated.saved_change_to_attribute?(attribute) }
27
+ end
28
+
29
+ def initialize(source_model_class, source_config)
30
+ @source_model_class = source_model_class
31
+ @callbacks_config = transform_source_config(source_config)
32
+ @assigned = false
33
+ end
34
+
35
+ def assign
36
+ return if @assigned
37
+
38
+ callbacks_config.each do |association_path, on_attributes_change|
39
+ source_model_class_name = source_model_class.name
40
+ associated_class(association_path).after_save do
41
+ ::MySQL::Search::Callbacks.callback(source_model_class_name, self, association_path, on_attributes_change)
42
+ end
43
+ end
44
+
45
+ @assigned = true
46
+ end
47
+
48
+ private
49
+
50
+ def transform_source_config(source_config)
51
+ extract_association_paths(config: source_config.values.reduce(&:deep_merge))
52
+ end
53
+
54
+ def extract_association_paths(config:, association_config: {}, association_path: [])
55
+ config.each do |attribute_or_relation, attribute_or_relation_config|
56
+ if attribute_or_relation_config.is_a?(Hash)
57
+ extract_association_paths(association_config: association_config,
58
+ association_path: [*association_path, attribute_or_relation],
59
+ config: attribute_or_relation_config)
60
+ else
61
+ association_config[association_path] = [*association_config[association_path], attribute_or_relation]
62
+ end
63
+ end
64
+
65
+ association_config
66
+ end
67
+
68
+ def associated_class(association_path)
69
+ association_path.inject(source_model_class) do |memo, association|
70
+ memo.reflect_on_association(association).klass
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MySQL
4
+ module Search
5
+ # Extracts and formats data for search indexing.
6
+ class Grabber
7
+ attr_reader :model, :config
8
+
9
+ def initialize(model, config)
10
+ @model = model
11
+ @config = config
12
+ end
13
+
14
+ def grab
15
+ config.flat_map do |attr_or_relation, format_or_config|
16
+ case format_or_config
17
+ when Hash then forward(attr_or_relation, format_or_config)
18
+ when Symbol, String, Array, Proc then format(attr_or_relation, format_or_config)
19
+ else
20
+ raise(ArgumentError, "unknown config value: '#{format_or_config.inspect}'")
21
+ end
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def forward(relation_name, grab_config)
28
+ relation = model.public_send(relation_name)
29
+
30
+ if relation.is_a?(::ActiveRecord::Relation)
31
+ relation.flat_map { |model| Grabber.new(model, grab_config).grab }
32
+ else
33
+ Grabber.new(relation, grab_config).grab
34
+ end
35
+ end
36
+
37
+ def format(attr_or_relation, formatters)
38
+ return [] if model.nil?
39
+
40
+ unless model.respond_to?(attr_or_relation)
41
+ raise ArgumentError,
42
+ "Missing attribute or relation `#{attr_or_relation}` on the model `#{model.class}`"
43
+ end
44
+
45
+ value = model.public_send(attr_or_relation)
46
+
47
+ Array.wrap(formatters).map { |formatter| Utils::Formatter.new(value, formatter).format }
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_job'
4
+
5
+ module MySQL
6
+ module Search
7
+ module Jobs
8
+ # Periodically updates search indices
9
+ class ScheduledUpdaterJob < ::ActiveJob::Base
10
+ PERIODS = {
11
+ daily: ->(searchable) { searchable.where(updated_at: 1.day.ago..) },
12
+ weekly: ->(searchable) { searchable.where(updated_at: 1.week.ago..) },
13
+ monthly: ->(searchable) { searchable.where(updated_at: 1.month.ago..) },
14
+ all: ->(searchable) { searchable.all }
15
+ }.with_indifferent_access
16
+
17
+ def perform(period)
18
+ searchable_classes.each do |searchable_class|
19
+ seachables(searchable_class, period).find_in_batches do |searchables|
20
+ searchable_class.transaction do
21
+ searchables.each { |searchable| update(searchable_class, searchable) }
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def searchable_classes
30
+ ::MySQL::Search.source_classes.map(&:model)
31
+ end
32
+
33
+ def seachables(searchable_class, period)
34
+ PERIODS[period].call(searchable_class)
35
+ end
36
+
37
+ def update(searchable_class, searchable)
38
+ ::MySQL::Search::Updater.new(full_text_searchable: searchable_class, associated_model: searchable).update
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_job'
4
+
5
+ module MySQL
6
+ module Search
7
+ module Jobs
8
+ # Updates the search index for a given full-text searchable model based on an associated model's changes.
9
+ class UpdaterJob < ::ActiveJob::Base
10
+ def perform(full_text_searchable_name, associated_name, associated_id, association_path)
11
+ associated_model = associated_name.constantize.find_by(id: associated_id)
12
+
13
+ return if associated_model.nil?
14
+
15
+ ::MySQL::Search::Updater.new(
16
+ full_text_searchable: full_text_searchable_name.constantize,
17
+ associated_model: associated_model,
18
+ association_path: association_path
19
+ ).update
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'jobs/scheduled_updater_job'
4
+ require_relative 'jobs/updater_job'
5
+
6
+ module MySQL
7
+ module Search
8
+ # Contains job classes for managing search updates.
9
+ module Jobs
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'arel'
4
+
5
+ module Arel
6
+ module Visitors
7
+ # Custom visitor for MySQL to handle the `AGAINST` clause in full-text search.
8
+ class MySQL
9
+ def visit_Arel_Nodes_Against(node, collector) # rubocop:disable Naming/MethodName
10
+ visit(node.left, collector) << ' AGAINST ('
11
+ visit(node.right, collector) << ')'
12
+ end
13
+ end
14
+ end
15
+ end
16
+
17
+ module Arel
18
+ module Nodes
19
+ # Represents the `AGAINST` clause used in MySQL full-text search queries.
20
+ class Against < Arel::Nodes::Matches
21
+ end
22
+ end
23
+ end
24
+
25
+ module Arel
26
+ # Adds a method to the `Arel::Nodes::Node` class to allow for full-text search queries.
27
+ module Predications
28
+ def against(other)
29
+ Arel::Nodes::Against.new(self, quoted_node(other))
30
+ end
31
+ end
32
+ end
33
+
34
+ module MySQL
35
+ module Search
36
+ module Queries
37
+ # FullTextSearchQuery is responsible for building and executing full-text search queries
38
+ class FullTextSearchQuery
39
+ attr_reader :source_relation
40
+
41
+ def initialize(source_relation)
42
+ @source_relation = source_relation
43
+ end
44
+
45
+ def call(search_term, search_column: :content)
46
+ relation_table = source_relation.klass.arel_table
47
+
48
+ search_expression = search_expression(search_term, search_column)
49
+
50
+ [relation_table[Arel.star], search_expression.as('search_term_relevancy')]
51
+
52
+ source_relation
53
+ # .select(*select_expression)
54
+ .joins(:search_index)
55
+ .where(search_expression)
56
+ .order(search_expression.desc)
57
+ end
58
+
59
+ private
60
+
61
+ def search_expression(search_term, search_column)
62
+ search_term = ::MySQL::Search::Utils::TextNormalizer.normalize(search_term)
63
+ search_indices = ::MySQL::Search.search_index_class_name.constantize.arel_table
64
+ search_columns = Array.wrap(search_column).map { |col| search_indices[col] }
65
+
66
+ Arel::Nodes::NamedFunction.new('MATCH', search_columns).against(search_term)
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MySQL
4
+ module Search
5
+ module Queries
6
+ # Queries updated sources for search indexing.
7
+ class UpdatedSourcesQuery
8
+ attr_reader :source_relation, :source_class_name
9
+
10
+ def initialize(source_relation)
11
+ @source_relation = source_relation.all
12
+ @source_class_name = "#{source_relation.all.klass.name}Source"
13
+ end
14
+
15
+ def call(time_ago)
16
+ joins_args = source_class_name.constantize.joins_args
17
+ relation = source_relation.left_joins(joins_args).where(updated_at: time_ago..)
18
+
19
+ append_conditions(relation, source_relation, joins_args, time_ago)
20
+ end
21
+
22
+ private
23
+
24
+ def append_conditions(relation, root_class, config, time_ago)
25
+ case config
26
+ when Array then config.reduce(relation) { |rel, item| append_conditions(rel, root_class, item, time_ago) }
27
+ when Hash
28
+ relation = append_conditions(relation, root_class, config.keys, time_ago)
29
+
30
+ config.reduce(relation) do |rel, (root_association, nested_config)|
31
+ append_conditions(rel, association_class(root_class, root_association), nested_config, time_ago)
32
+ end
33
+ else
34
+ append_or(relation, association_class(root_class, config), time_ago)
35
+ end
36
+ end
37
+
38
+ def association_class(root_class, association)
39
+ root_class.reflect_on_association(association).klass
40
+ end
41
+
42
+ def append_or(relation, model, time_ago)
43
+ return relation unless model.column_names.include?('updated_at')
44
+
45
+ relation.or(model.where(updated_at: time_ago..))
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MySQL
4
+ module Search
5
+ # Railtie for integrating MySQL::Search with Rails applications.
6
+ class Railtie < Rails::Railtie
7
+ railtie_name :mysql_search
8
+
9
+ rake_tasks do
10
+ path = File.expand_path(__dir__)
11
+ Dir.glob("#{path}/tasks/**/*.rake").each { |f| load f }
12
+ end
13
+
14
+ generators do
15
+ require 'generators/mysql/search/install_generator'
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MySQL
4
+ module Search
5
+ # Provides ActiveRecord integration for search indexing.
6
+ module Searchable
7
+ extend ActiveSupport::Concern
8
+
9
+ included do
10
+ has_one :search_index, class_name: ::MySQL::Search.search_index_class_name.to_s,
11
+ as: :searchable,
12
+ dependent: :destroy
13
+
14
+ scope :full_text_search_sources_updated, lambda { |time_ago|
15
+ ::MySQL::Search::Queries::UpdatedSourcesQuery.new(self).call(time_ago)
16
+ }
17
+
18
+ scope :full_text_search, lambda { |search_term, search_column: :content|
19
+ ::MySQL::Search::Queries::FullTextSearchQuery.new(self).call(search_term, search_column: search_column)
20
+ }
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MySQL
4
+ module Search
5
+ # Represents a source for search indexing.
6
+ class Source
7
+ class_attribute :_model
8
+ class_attribute :_config, default: {}
9
+ class_attribute :_callbacks
10
+
11
+ attr_reader :model
12
+
13
+ def self.schema(config)
14
+ self._model = model
15
+ self._config = _config.merge(config)
16
+ self._callbacks = ::MySQL::Search::Callbacks.new(_model, _config)
17
+
18
+ _callbacks.assign
19
+ end
20
+
21
+ def self.model
22
+ _model || name.delete_suffix('Source').constantize
23
+ end
24
+
25
+ def self.joins_args
26
+ combined_config = _config.values.reduce(&:deep_merge)
27
+ extract_joins_args(combined_config)
28
+ end
29
+
30
+ def initialize(model)
31
+ @model = model
32
+ end
33
+
34
+ def extract
35
+ _config.each_with_object({}) do |(search_index_attribute, grabber_config), extracted|
36
+ validate_search_index_attribute!(search_index_attribute)
37
+
38
+ grabbed_data = Grabber.new(model, grabber_config).grab
39
+ extracted[search_index_attribute] = grabbed_data.compact.join(' ').squish
40
+ end
41
+ end
42
+
43
+ def self.extract_joins_args(config)
44
+ config.each_with_object([]) do |(attr_or_relation, format_or_config), extracted|
45
+ next unless format_or_config.is_a?(Hash)
46
+
47
+ extracted << if format_or_config.values.any?(Hash)
48
+ { attr_or_relation => extract_joins_args(format_or_config) }
49
+ else
50
+ attr_or_relation
51
+ end
52
+ end
53
+ end
54
+
55
+ private_class_method :extract_joins_args
56
+
57
+ private
58
+
59
+ def validate_search_index_attribute!(search_index_attribute)
60
+ return if Search.search_index_class.column_names.include?(search_index_attribute.to_s)
61
+
62
+ raise(
63
+ ArgumentError,
64
+ "Unknown attribute '#{search_index_attribute}' for #{::MySQL::Search.search_index_class.name}"
65
+ )
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MySQL
4
+ module Search
5
+ # Updates search indices based on associated models.
6
+ class Updater
7
+ attr_reader :full_text_searchable,
8
+ :associated_model,
9
+ :joins_args,
10
+ :source_class
11
+
12
+ def initialize(full_text_searchable:, associated_model:, association_path: [])
13
+ @full_text_searchable = full_text_searchable
14
+ @associated_model = associated_model
15
+ @joins_args = translate_to_joins_args(association_path)
16
+ @source_class = "#{full_text_searchable.name}Source".constantize
17
+ end
18
+
19
+ def update
20
+ full_text_searchable.transaction do
21
+ full_text_searchables.find_each do |model|
22
+ search_index = model.search_index || model.build_search_index
23
+
24
+ search_index.update(source_class.new(model).extract)
25
+ end
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def translate_to_joins_args(association_path)
32
+ return association_path if association_path.empty? || association_path.one?
33
+
34
+ target_association = association_path.pop
35
+
36
+ association_path.reverse.inject(target_association) { |memo, association| { association => memo } }
37
+ end
38
+
39
+ def full_text_searchables
40
+ associated_model_relation = associated_model.class.where(id: associated_model.id)
41
+
42
+ full_text_searchable.joins(joins_args).preload(:search_index).merge(associated_model_relation)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MySQL
4
+ module Search
5
+ module Utils
6
+ # Parses duration strings into ActiveSupport::Duration objects.
7
+ class DurationParser
8
+ REGEXP = /(\d+)[.\s](year|month|week|day|hour|minute|second)s?/
9
+
10
+ def self.parse(duration_string, default = nil)
11
+ match_data = duration_string.to_s.match(REGEXP)
12
+
13
+ return default if match_data.nil?
14
+
15
+ match_data[1].to_i.public_send(match_data[2])
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MySQL
4
+ module Search
5
+ module Utils
6
+ # Formats values for search indexing.
7
+ class Formatter
8
+ attr_reader :value, :formatter
9
+
10
+ def initialize(value, formatter)
11
+ if formatter.instance_of?(Proc)
12
+ @value = formatter.call(value)
13
+ @formatter = nil
14
+ elsif respond_to?(formatter, true)
15
+ @value = value
16
+ @formatter = formatter
17
+ else
18
+ raise(ArgumentError, "Unknown formatter name: '#{formatter.inspect}'")
19
+ end
20
+ end
21
+
22
+ def format
23
+ formatter ? send(formatter) : value
24
+ end
25
+
26
+ private
27
+
28
+ def text
29
+ TextNormalizer.normalize(value.to_s)
30
+ end
31
+
32
+ def calendar_week
33
+ value&.strftime(::MySQL::Search.calendar_week_format)
34
+ end
35
+
36
+ def date
37
+ value&.strftime(::MySQL::Search.date_format)
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MySQL
4
+ module Search
5
+ module Utils
6
+ # Normalizes text by removing non-alphanumeric characters, except for spaces and hyphens.
7
+ class TextNormalizer
8
+ REGEXP = /[[:alnum:][:blank:]+\-*~<>()"@\.]/
9
+
10
+ def self.normalize(value)
11
+ value.to_s.scan(REGEXP).join.squish
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'utils/formatter'
4
+ require_relative 'utils/text_normalizer'
5
+ require_relative 'utils/duration_parser'
6
+
7
+ module MySQL
8
+ module Search
9
+ # Contains utility classes and methods for search indexing.
10
+ module Utils
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'search/callbacks'
4
+ require_relative 'search/grabber'
5
+ require_relative 'search/jobs'
6
+ require_relative 'search/searchable'
7
+ require_relative 'search/source'
8
+ require_relative 'search/queries/updated_sources_query'
9
+ require_relative 'search/queries/full_text_search_query'
10
+ require_relative 'search/updater'
11
+ require_relative 'search/utils'
12
+
13
+ module MySQL
14
+ # Provides a namespace for MySQL search functionality.
15
+ module Search
16
+ module_function
17
+
18
+ # Runtime configuration
19
+ mattr_accessor :automatic_update, default: true
20
+ mattr_accessor :update_asyncronously, default: false
21
+
22
+ # Search Index & Sources
23
+ mattr_accessor :search_index_class_name, default: 'SearchIndex'
24
+ mattr_accessor :sources_path, default: 'app/search_sources'
25
+
26
+ # Formatters
27
+ mattr_accessor :calendar_week_format, default: 'week %V'
28
+ mattr_accessor :date_format, default: '%d.%m.%Y'
29
+
30
+ def search_index_class
31
+ @search_index_class ||= search_index_class_name.constantize
32
+ end
33
+
34
+ def source_classes
35
+ @source_classes ||= Dir.glob("#{sources_path}/**/*.rb").filter_map do |file|
36
+ file.sub("#{sources_path}/", '').sub('.rb', '').camelize.safe_constantize
37
+ end
38
+ end
39
+
40
+ def configure
41
+ yield self
42
+ end
43
+ end
44
+ end
45
+
46
+ # Load Railtie if Rails is defined
47
+ require_relative 'search/railtie' if defined?(Rails::Railtie)
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ namespace :mysql do
4
+ namespace :search do
5
+ desc 'Actualizes full text search since date. Usage: rails time_tracking:full_text_search:actualize[1.hour]'
6
+ task :actualize, [:time_ago] => :environment do |_task, arg|
7
+ time_ago = MySQL::Search::Utils::DurationParser.parse(arg[:time_ago], 1.hour).ago
8
+ searchable_classes = MySQL::Search.source_classes.map(&:model)
9
+
10
+ searchable_classes.each do |searchable_class|
11
+ count = searchable_class.count
12
+ print "\n#{searchable_class.name}"
13
+
14
+ searchable_class.full_text_search_sources_updated(time_ago).find_each.with_index do |record, index|
15
+ puts(" (#{index} / #{count})") if (index % 100).zero?
16
+ MySQL::Search::Updater.new(full_text_searchable: record.class, associated_model: record).update
17
+ putc '.'
18
+ end
19
+
20
+ puts '✅'
21
+ end
22
+ end
23
+ end
24
+ end