searchkick 2.3.2 → 5.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +377 -84
  3. data/LICENSE.txt +1 -1
  4. data/README.md +859 -602
  5. data/lib/searchkick/bulk_reindex_job.rb +13 -9
  6. data/lib/searchkick/controller_runtime.rb +40 -0
  7. data/lib/searchkick/hash_wrapper.rb +12 -0
  8. data/lib/searchkick/index.rb +281 -356
  9. data/lib/searchkick/index_cache.rb +30 -0
  10. data/lib/searchkick/index_options.rb +487 -281
  11. data/lib/searchkick/indexer.rb +15 -8
  12. data/lib/searchkick/log_subscriber.rb +57 -0
  13. data/lib/searchkick/middleware.rb +9 -2
  14. data/lib/searchkick/model.rb +72 -118
  15. data/lib/searchkick/multi_search.rb +9 -10
  16. data/lib/searchkick/process_batch_job.rb +12 -15
  17. data/lib/searchkick/process_queue_job.rb +22 -13
  18. data/lib/searchkick/query.rb +458 -217
  19. data/lib/searchkick/railtie.rb +7 -0
  20. data/lib/searchkick/record_data.rb +128 -0
  21. data/lib/searchkick/record_indexer.rb +164 -0
  22. data/lib/searchkick/reindex_queue.rb +51 -9
  23. data/lib/searchkick/reindex_v2_job.rb +10 -32
  24. data/lib/searchkick/relation.rb +247 -0
  25. data/lib/searchkick/relation_indexer.rb +155 -0
  26. data/lib/searchkick/results.rb +201 -82
  27. data/lib/searchkick/version.rb +1 -1
  28. data/lib/searchkick/where.rb +11 -0
  29. data/lib/searchkick.rb +269 -97
  30. data/lib/tasks/searchkick.rake +37 -0
  31. metadata +24 -178
  32. data/.gitignore +0 -22
  33. data/.travis.yml +0 -39
  34. data/Gemfile +0 -16
  35. data/Rakefile +0 -20
  36. data/benchmark/Gemfile +0 -23
  37. data/benchmark/benchmark.rb +0 -97
  38. data/lib/searchkick/logging.rb +0 -242
  39. data/lib/searchkick/tasks.rb +0 -33
  40. data/searchkick.gemspec +0 -28
  41. data/test/aggs_test.rb +0 -197
  42. data/test/autocomplete_test.rb +0 -75
  43. data/test/boost_test.rb +0 -202
  44. data/test/callbacks_test.rb +0 -59
  45. data/test/ci/before_install.sh +0 -17
  46. data/test/errors_test.rb +0 -19
  47. data/test/gemfiles/activerecord31.gemfile +0 -7
  48. data/test/gemfiles/activerecord32.gemfile +0 -7
  49. data/test/gemfiles/activerecord40.gemfile +0 -8
  50. data/test/gemfiles/activerecord41.gemfile +0 -8
  51. data/test/gemfiles/activerecord42.gemfile +0 -7
  52. data/test/gemfiles/activerecord50.gemfile +0 -7
  53. data/test/gemfiles/apartment.gemfile +0 -8
  54. data/test/gemfiles/cequel.gemfile +0 -8
  55. data/test/gemfiles/mongoid2.gemfile +0 -7
  56. data/test/gemfiles/mongoid3.gemfile +0 -6
  57. data/test/gemfiles/mongoid4.gemfile +0 -7
  58. data/test/gemfiles/mongoid5.gemfile +0 -7
  59. data/test/gemfiles/mongoid6.gemfile +0 -12
  60. data/test/gemfiles/nobrainer.gemfile +0 -8
  61. data/test/gemfiles/parallel_tests.gemfile +0 -8
  62. data/test/geo_shape_test.rb +0 -175
  63. data/test/highlight_test.rb +0 -78
  64. data/test/index_test.rb +0 -166
  65. data/test/inheritance_test.rb +0 -83
  66. data/test/marshal_test.rb +0 -8
  67. data/test/match_test.rb +0 -276
  68. data/test/misspellings_test.rb +0 -56
  69. data/test/model_test.rb +0 -42
  70. data/test/multi_search_test.rb +0 -36
  71. data/test/multi_tenancy_test.rb +0 -22
  72. data/test/order_test.rb +0 -46
  73. data/test/pagination_test.rb +0 -70
  74. data/test/partial_reindex_test.rb +0 -58
  75. data/test/query_test.rb +0 -35
  76. data/test/records_test.rb +0 -10
  77. data/test/reindex_test.rb +0 -64
  78. data/test/reindex_v2_job_test.rb +0 -32
  79. data/test/routing_test.rb +0 -23
  80. data/test/should_index_test.rb +0 -32
  81. data/test/similar_test.rb +0 -28
  82. data/test/sql_test.rb +0 -214
  83. data/test/suggest_test.rb +0 -95
  84. data/test/support/kaminari.yml +0 -21
  85. data/test/synonyms_test.rb +0 -67
  86. data/test/test_helper.rb +0 -567
  87. data/test/where_test.rb +0 -223
@@ -1,3 +1,5 @@
1
+ # thread-local (technically fiber-local) indexer
2
+ # used to aggregate bulk callbacks across models
1
3
  module Searchkick
2
4
  class Indexer
3
5
  attr_reader :queued_items
@@ -14,15 +16,20 @@ module Searchkick
14
16
  def perform
15
17
  items = @queued_items
16
18
  @queued_items = []
17
- if items.any?
18
- response = Searchkick.client.bulk(body: items)
19
- if response["errors"]
20
- first_with_error = response["items"].map do |item|
21
- (item["index"] || item["delete"] || item["update"])
22
- end.find { |item| item["error"] }
23
- raise Searchkick::ImportError, "#{first_with_error["error"]} on item with id '#{first_with_error["_id"]}'"
24
- end
19
+
20
+ return if items.empty?
21
+
22
+ response = Searchkick.client.bulk(body: items)
23
+ if response["errors"]
24
+ # note: delete does not set error when item not found
25
+ first_with_error = response["items"].map do |item|
26
+ (item["index"] || item["delete"] || item["update"])
27
+ end.find { |item| item["error"] }
28
+ raise ImportError, "#{first_with_error["error"]} on item with id '#{first_with_error["_id"]}'"
25
29
  end
30
+
31
+ # maybe return response in future
32
+ nil
26
33
  end
27
34
  end
28
35
  end
@@ -0,0 +1,57 @@
1
+ # based on https://gist.github.com/mnutt/566725
2
+ module Searchkick
3
+ class LogSubscriber < ActiveSupport::LogSubscriber
4
+ def self.runtime=(value)
5
+ Thread.current[:searchkick_runtime] = value
6
+ end
7
+
8
+ def self.runtime
9
+ Thread.current[:searchkick_runtime] ||= 0
10
+ end
11
+
12
+ def self.reset_runtime
13
+ rt = runtime
14
+ self.runtime = 0
15
+ rt
16
+ end
17
+
18
+ def search(event)
19
+ self.class.runtime += event.duration
20
+ return unless logger.debug?
21
+
22
+ payload = event.payload
23
+ name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
24
+
25
+ index = payload[:query][:index].is_a?(Array) ? payload[:query][:index].join(",") : payload[:query][:index]
26
+ type = payload[:query][:type]
27
+ request_params = payload[:query].except(:index, :type, :body)
28
+
29
+ params = []
30
+ request_params.each do |k, v|
31
+ params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
32
+ end
33
+
34
+ debug " #{color(name, YELLOW, true)} #{index}#{type ? "/#{type.join(',')}" : ''}/_search#{params.any? ? '?' + params.join('&') : nil} #{payload[:query][:body].to_json}"
35
+ end
36
+
37
+ def request(event)
38
+ self.class.runtime += event.duration
39
+ return unless logger.debug?
40
+
41
+ payload = event.payload
42
+ name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
43
+
44
+ debug " #{color(name, YELLOW, true)} #{payload.except(:name).to_json}"
45
+ end
46
+
47
+ def multi_search(event)
48
+ self.class.runtime += event.duration
49
+ return unless logger.debug?
50
+
51
+ payload = event.payload
52
+ name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
53
+
54
+ debug " #{color(name, YELLOW, true)} _msearch #{payload[:body]}"
55
+ end
56
+ end
57
+ end
@@ -1,10 +1,17 @@
1
- require "faraday/middleware"
1
+ require "faraday"
2
2
 
3
3
  module Searchkick
4
4
  class Middleware < Faraday::Middleware
5
5
  def call(env)
6
- if env[:method] == :get && env[:url].path.to_s.end_with?("/_search")
6
+ path = env[:url].path.to_s
7
+ if path.end_with?("/_search")
7
8
  env[:request][:timeout] = Searchkick.search_timeout
9
+ elsif path.end_with?("/_msearch")
10
+ # assume no concurrent searches for timeout for now
11
+ searches = env[:request_body].count("\n") / 2
12
+ # do not allow timeout to exceed Searchkick.timeout
13
+ timeout = [Searchkick.search_timeout * searches, Searchkick.timeout].min
14
+ env[:request][:timeout] = timeout
8
15
  end
9
16
  @app.call(env)
10
17
  end
@@ -1,154 +1,108 @@
1
1
  module Searchkick
2
2
  module Model
3
3
  def searchkick(**options)
4
- unknown_keywords = options.keys - [:_all, :batch_size, :callbacks, :conversions, :default_fields,
5
- :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :language,
6
- :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :settings, :similarity,
7
- :special_characters, :stem_conversions, :suggest, :synonyms, :text_end,
8
- :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start]
4
+ options = Searchkick.model_options.merge(options)
5
+
6
+ unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields,
7
+ :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language,
8
+ :locations, :mappings, :match, :max_result_window, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
9
+ :special_characters, :stem, :stemmer, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end,
10
+ :text_middle, :text_start, :unscope, :word, :word_end, :word_middle, :word_start]
9
11
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
10
12
 
11
13
  raise "Only call searchkick once per model" if respond_to?(:searchkick_index)
12
14
 
13
15
  Searchkick.models << self
14
16
 
15
- class_eval do
16
- cattr_reader :searchkick_options, :searchkick_klass
17
+ options[:_type] ||= -> { searchkick_index.klass_document_type(self, true) }
18
+ options[:class_name] = model_name.name
19
+
20
+ callbacks = options.key?(:callbacks) ? options[:callbacks] : :inline
21
+ unless [:inline, true, false, :async, :queue].include?(callbacks)
22
+ raise ArgumentError, "Invalid value for callbacks"
23
+ end
24
+
25
+ base = self
26
+
27
+ mod = Module.new
28
+ include(mod)
29
+ mod.module_eval do
30
+ def reindex(method_name = nil, mode: nil, refresh: false)
31
+ self.class.searchkick_index.reindex([self], method_name: method_name, mode: mode, refresh: refresh, single: true)
32
+ end unless base.method_defined?(:reindex)
33
+
34
+ def similar(**options)
35
+ self.class.searchkick_index.similar_record(self, **options)
36
+ end unless base.method_defined?(:similar)
37
+
38
+ def search_data
39
+ data = respond_to?(:to_hash) ? to_hash : serializable_hash
40
+ data.delete("id")
41
+ data.delete("_id")
42
+ data.delete("_type")
43
+ data
44
+ end unless base.method_defined?(:search_data)
45
+
46
+ def should_index?
47
+ true
48
+ end unless base.method_defined?(:should_index?)
49
+ end
17
50
 
18
- callbacks = options.key?(:callbacks) ? options[:callbacks] : true
51
+ class_eval do
52
+ cattr_reader :searchkick_options, :searchkick_klass, instance_reader: false
19
53
 
20
54
  class_variable_set :@@searchkick_options, options.dup
21
55
  class_variable_set :@@searchkick_klass, self
22
- class_variable_set :@@searchkick_callbacks, callbacks
23
- class_variable_set :@@searchkick_index, options[:index_name] ||
24
- (options[:index_prefix].respond_to?(:call) && proc { [options[:index_prefix].call, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_") }) ||
25
- [options.key?(:index_prefix) ? options[:index_prefix] : Searchkick.index_prefix, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_")
56
+ class_variable_set :@@searchkick_index_cache, Searchkick::IndexCache.new
26
57
 
27
58
  class << self
28
59
  def searchkick_search(term = "*", **options, &block)
29
- Searchkick.search(term, {model: self}.merge(options), &block)
60
+ if Searchkick.relation?(self)
61
+ raise Searchkick::Error, "search must be called on model, not relation"
62
+ end
63
+
64
+ Searchkick.search(term, model: self, **options, &block)
30
65
  end
31
66
  alias_method Searchkick.search_method_name, :searchkick_search if Searchkick.search_method_name
32
67
 
33
- def searchkick_index
34
- index = class_variable_get :@@searchkick_index
35
- index = index.call if index.respond_to? :call
36
- Searchkick::Index.new(index, searchkick_options)
68
+ def searchkick_index(name: nil)
69
+ index_name = name || searchkick_klass.searchkick_index_name
70
+ index_name = index_name.call if index_name.respond_to?(:call)
71
+ index_cache = class_variable_get(:@@searchkick_index_cache)
72
+ index_cache.fetch(index_name) { Searchkick::Index.new(index_name, searchkick_options) }
37
73
  end
38
74
  alias_method :search_index, :searchkick_index unless method_defined?(:search_index)
39
75
 
40
- def enable_search_callbacks
41
- class_variable_set :@@searchkick_callbacks, true
42
- end
43
-
44
- def disable_search_callbacks
45
- class_variable_set :@@searchkick_callbacks, false
46
- end
47
-
48
- def search_callbacks?
49
- class_variable_get(:@@searchkick_callbacks) && Searchkick.callbacks?
50
- end
51
-
52
- def searchkick_reindex(method_name = nil, full: false, **options)
53
- scoped = (respond_to?(:current_scope) && respond_to?(:default_scoped) && current_scope && current_scope.to_sql != default_scoped.to_sql) ||
54
- (respond_to?(:queryable) && queryable != unscoped.with_default_scope)
55
-
56
- refresh = options.fetch(:refresh, !scoped)
57
-
58
- if method_name
59
- # update
60
- searchkick_index.import_scope(searchkick_klass, method_name: method_name)
61
- searchkick_index.refresh if refresh
62
- true
63
- elsif scoped && !full
64
- # reindex association
65
- searchkick_index.import_scope(searchkick_klass)
66
- searchkick_index.refresh if refresh
67
- true
68
- else
69
- # full reindex
70
- searchkick_index.reindex_scope(searchkick_klass, options)
71
- end
76
+ def searchkick_reindex(method_name = nil, **options)
77
+ searchkick_index.reindex(self, method_name: method_name, **options)
72
78
  end
73
79
  alias_method :reindex, :searchkick_reindex unless method_defined?(:reindex)
74
80
 
75
81
  def searchkick_index_options
76
82
  searchkick_index.index_options
77
83
  end
78
- end
79
-
80
- callback_name = callbacks == :async ? :reindex_async : :reindex
81
- if respond_to?(:after_commit)
82
- after_commit callback_name, if: proc { self.class.search_callbacks? }
83
- elsif respond_to?(:after_save)
84
- after_save callback_name, if: proc { self.class.search_callbacks? }
85
- after_destroy callback_name, if: proc { self.class.search_callbacks? }
86
- end
87
84
 
88
- def reindex(method_name = nil, refresh: false, async: false, mode: nil)
89
- klass_options = self.class.searchkick_index.options
90
-
91
- if mode.nil?
92
- mode =
93
- if async
94
- :async
95
- elsif Searchkick.callbacks_value
96
- Searchkick.callbacks_value
97
- elsif klass_options.key?(:callbacks) && klass_options[:callbacks] != :async
98
- # TODO remove 2nd condition in next major version
99
- klass_options[:callbacks]
85
+ def searchkick_index_name
86
+ @searchkick_index_name ||= begin
87
+ options = class_variable_get(:@@searchkick_options)
88
+ if options[:index_name]
89
+ options[:index_name]
90
+ elsif options[:index_prefix].respond_to?(:call)
91
+ -> { [options[:index_prefix].call, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_") }
92
+ else
93
+ [options.key?(:index_prefix) ? options[:index_prefix] : Searchkick.index_prefix, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_")
100
94
  end
101
- end
102
-
103
- case mode
104
- when :queue
105
- if method_name
106
- raise Searchkick::Error, "Partial reindex not supported with queue option"
107
- else
108
- self.class.searchkick_index.reindex_queue.push(id.to_s)
109
- end
110
- when :async
111
- if method_name
112
- # TODO support Mongoid and NoBrainer and non-id primary keys
113
- Searchkick::BulkReindexJob.perform_later(
114
- class_name: self.class.name,
115
- record_ids: [id.to_s],
116
- method_name: method_name ? method_name.to_s : nil
117
- )
118
- else
119
- self.class.searchkick_index.reindex_record_async(self)
120
95
  end
121
- else
122
- if method_name
123
- self.class.searchkick_index.update_record(self, method_name)
124
- else
125
- self.class.searchkick_index.reindex_record(self)
126
- end
127
- self.class.searchkick_index.refresh if refresh
128
96
  end
129
- end unless method_defined?(:reindex)
130
-
131
- # TODO remove this method in next major version
132
- def reindex_async
133
- reindex(async: true)
134
- end unless method_defined?(:reindex_async)
135
-
136
- def similar(options = {})
137
- self.class.searchkick_index.similar_record(self, options)
138
- end unless method_defined?(:similar)
139
-
140
- def search_data
141
- respond_to?(:to_hash) ? to_hash : serializable_hash
142
- end unless method_defined?(:search_data)
143
-
144
- def should_index?
145
- true
146
- end unless method_defined?(:should_index?)
97
+ end
147
98
 
148
- if defined?(Cequel) && self < Cequel::Record && !method_defined?(:destroyed?)
149
- def destroyed?
150
- transient?
151
- end
99
+ # always add callbacks, even when callbacks is false
100
+ # so Model.callbacks block can be used
101
+ if respond_to?(:after_commit)
102
+ after_commit :reindex, if: -> { Searchkick.callbacks?(default: callbacks) }
103
+ elsif respond_to?(:after_save)
104
+ after_save :reindex, if: -> { Searchkick.callbacks?(default: callbacks) }
105
+ after_destroy :reindex, if: -> { Searchkick.callbacks?(default: callbacks) }
152
106
  end
153
107
  end
154
108
  end
@@ -2,25 +2,24 @@ module Searchkick
2
2
  class MultiSearch
3
3
  attr_reader :queries
4
4
 
5
- def initialize(queries, retry_misspellings: false)
5
+ def initialize(queries)
6
6
  @queries = queries
7
- @retry_misspellings = retry_misspellings
8
7
  end
9
8
 
10
9
  def perform
11
10
  if queries.any?
12
- perform_search(queries, retry_misspellings: @retry_misspellings)
11
+ perform_search(queries)
13
12
  end
14
13
  end
15
14
 
16
15
  private
17
16
 
18
- def perform_search(queries, retry_misspellings: true)
19
- responses = client.msearch(body: queries.flat_map { |q| [q.params.except(:body), q.body] })["responses"]
17
+ def perform_search(search_queries, perform_retry: true)
18
+ responses = client.msearch(body: search_queries.flat_map { |q| [q.params.except(:body), q.body] })["responses"]
20
19
 
21
20
  retry_queries = []
22
- queries.each_with_index do |query, i|
23
- if retry_misspellings && query.retry_misspellings?(responses[i])
21
+ search_queries.each_with_index do |query, i|
22
+ if perform_retry && query.retry_misspellings?(responses[i])
24
23
  query.send(:prepare) # okay, since we don't want to expose this method outside Searchkick
25
24
  retry_queries << query
26
25
  else
@@ -28,11 +27,11 @@ module Searchkick
28
27
  end
29
28
  end
30
29
 
31
- if retry_misspellings && retry_queries.any?
32
- perform_search(retry_queries, retry_misspellings: false)
30
+ if retry_queries.any?
31
+ perform_search(retry_queries, perform_retry: false)
33
32
  end
34
33
 
35
- queries
34
+ search_queries
36
35
  end
37
36
 
38
37
  def client
@@ -1,23 +1,20 @@
1
1
  module Searchkick
2
2
  class ProcessBatchJob < ActiveJob::Base
3
- queue_as :searchkick
3
+ queue_as { Searchkick.queue_name }
4
4
 
5
- def perform(class_name:, record_ids:)
6
- klass = class_name.constantize
7
- scope = Searchkick.load_records(klass, record_ids)
8
- scope = scope.search_import if scope.respond_to?(:search_import)
9
- records = scope.select(&:should_index?)
5
+ def perform(class_name:, record_ids:, index_name: nil)
6
+ model = Searchkick.load_model(class_name)
7
+ index = model.searchkick_index(name: index_name)
10
8
 
11
- # determine which records to delete
12
- delete_ids = record_ids - records.map { |r| r.id.to_s }
13
- delete_records = delete_ids.map { |id| m = klass.new; m.id = id; m }
9
+ items =
10
+ record_ids.map do |r|
11
+ parts = r.split(/(?<!\|)\|(?!\|)/, 2)
12
+ .map { |v| v.gsub("||", "|") }
13
+ {id: parts[0], routing: parts[1]}
14
+ end
14
15
 
15
- # bulk reindex
16
- index = klass.searchkick_index
17
- Searchkick.callbacks(:bulk) do
18
- index.bulk_index(records) if records.any?
19
- index.bulk_delete(delete_records) if delete_records.any?
20
- end
16
+ relation = Searchkick.scope(model)
17
+ RecordIndexer.new(index).reindex_items(relation, items, method_name: nil)
21
18
  end
22
19
  end
23
20
  end
@@ -1,22 +1,31 @@
1
1
  module Searchkick
2
2
  class ProcessQueueJob < ActiveJob::Base
3
- queue_as :searchkick
3
+ queue_as { Searchkick.queue_name }
4
4
 
5
- def perform(class_name:)
6
- model = class_name.constantize
5
+ def perform(class_name:, index_name: nil, inline: false)
6
+ model = Searchkick.load_model(class_name)
7
+ index = model.searchkick_index(name: index_name)
8
+ limit = model.searchkick_options[:batch_size] || 1000
7
9
 
8
- limit = model.searchkick_index.options[:batch_size] || 1000
9
- record_ids = model.searchkick_index.reindex_queue.reserve(limit: limit)
10
- if record_ids.any?
11
- Searchkick::ProcessBatchJob.perform_later(
12
- class_name: model.name,
13
- record_ids: record_ids
14
- )
15
- # TODO when moving to reliable queuing, mark as complete
10
+ loop do
11
+ record_ids = index.reindex_queue.reserve(limit: limit)
12
+ if record_ids.any?
13
+ batch_options = {
14
+ class_name: class_name,
15
+ record_ids: record_ids.uniq,
16
+ index_name: index_name
17
+ }
16
18
 
17
- if record_ids.size == limit
18
- Searchkick::ProcessQueueJob.perform_later(class_name: class_name)
19
+ if inline
20
+ # use new.perform to avoid excessive logging
21
+ Searchkick::ProcessBatchJob.new.perform(**batch_options)
22
+ else
23
+ Searchkick::ProcessBatchJob.perform_later(**batch_options)
24
+ end
25
+
26
+ # TODO when moving to reliable queuing, mark as complete
19
27
  end
28
+ break unless record_ids.size == limit
20
29
  end
21
30
  end
22
31
  end