searchkick 2.3.2 → 5.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +377 -84
  3. data/LICENSE.txt +1 -1
  4. data/README.md +859 -602
  5. data/lib/searchkick/bulk_reindex_job.rb +13 -9
  6. data/lib/searchkick/controller_runtime.rb +40 -0
  7. data/lib/searchkick/hash_wrapper.rb +12 -0
  8. data/lib/searchkick/index.rb +281 -356
  9. data/lib/searchkick/index_cache.rb +30 -0
  10. data/lib/searchkick/index_options.rb +487 -281
  11. data/lib/searchkick/indexer.rb +15 -8
  12. data/lib/searchkick/log_subscriber.rb +57 -0
  13. data/lib/searchkick/middleware.rb +9 -2
  14. data/lib/searchkick/model.rb +72 -118
  15. data/lib/searchkick/multi_search.rb +9 -10
  16. data/lib/searchkick/process_batch_job.rb +12 -15
  17. data/lib/searchkick/process_queue_job.rb +22 -13
  18. data/lib/searchkick/query.rb +458 -217
  19. data/lib/searchkick/railtie.rb +7 -0
  20. data/lib/searchkick/record_data.rb +128 -0
  21. data/lib/searchkick/record_indexer.rb +164 -0
  22. data/lib/searchkick/reindex_queue.rb +51 -9
  23. data/lib/searchkick/reindex_v2_job.rb +10 -32
  24. data/lib/searchkick/relation.rb +247 -0
  25. data/lib/searchkick/relation_indexer.rb +155 -0
  26. data/lib/searchkick/results.rb +201 -82
  27. data/lib/searchkick/version.rb +1 -1
  28. data/lib/searchkick/where.rb +11 -0
  29. data/lib/searchkick.rb +269 -97
  30. data/lib/tasks/searchkick.rake +37 -0
  31. metadata +24 -178
  32. data/.gitignore +0 -22
  33. data/.travis.yml +0 -39
  34. data/Gemfile +0 -16
  35. data/Rakefile +0 -20
  36. data/benchmark/Gemfile +0 -23
  37. data/benchmark/benchmark.rb +0 -97
  38. data/lib/searchkick/logging.rb +0 -242
  39. data/lib/searchkick/tasks.rb +0 -33
  40. data/searchkick.gemspec +0 -28
  41. data/test/aggs_test.rb +0 -197
  42. data/test/autocomplete_test.rb +0 -75
  43. data/test/boost_test.rb +0 -202
  44. data/test/callbacks_test.rb +0 -59
  45. data/test/ci/before_install.sh +0 -17
  46. data/test/errors_test.rb +0 -19
  47. data/test/gemfiles/activerecord31.gemfile +0 -7
  48. data/test/gemfiles/activerecord32.gemfile +0 -7
  49. data/test/gemfiles/activerecord40.gemfile +0 -8
  50. data/test/gemfiles/activerecord41.gemfile +0 -8
  51. data/test/gemfiles/activerecord42.gemfile +0 -7
  52. data/test/gemfiles/activerecord50.gemfile +0 -7
  53. data/test/gemfiles/apartment.gemfile +0 -8
  54. data/test/gemfiles/cequel.gemfile +0 -8
  55. data/test/gemfiles/mongoid2.gemfile +0 -7
  56. data/test/gemfiles/mongoid3.gemfile +0 -6
  57. data/test/gemfiles/mongoid4.gemfile +0 -7
  58. data/test/gemfiles/mongoid5.gemfile +0 -7
  59. data/test/gemfiles/mongoid6.gemfile +0 -12
  60. data/test/gemfiles/nobrainer.gemfile +0 -8
  61. data/test/gemfiles/parallel_tests.gemfile +0 -8
  62. data/test/geo_shape_test.rb +0 -175
  63. data/test/highlight_test.rb +0 -78
  64. data/test/index_test.rb +0 -166
  65. data/test/inheritance_test.rb +0 -83
  66. data/test/marshal_test.rb +0 -8
  67. data/test/match_test.rb +0 -276
  68. data/test/misspellings_test.rb +0 -56
  69. data/test/model_test.rb +0 -42
  70. data/test/multi_search_test.rb +0 -36
  71. data/test/multi_tenancy_test.rb +0 -22
  72. data/test/order_test.rb +0 -46
  73. data/test/pagination_test.rb +0 -70
  74. data/test/partial_reindex_test.rb +0 -58
  75. data/test/query_test.rb +0 -35
  76. data/test/records_test.rb +0 -10
  77. data/test/reindex_test.rb +0 -64
  78. data/test/reindex_v2_job_test.rb +0 -32
  79. data/test/routing_test.rb +0 -23
  80. data/test/should_index_test.rb +0 -32
  81. data/test/similar_test.rb +0 -28
  82. data/test/sql_test.rb +0 -214
  83. data/test/suggest_test.rb +0 -95
  84. data/test/support/kaminari.yml +0 -21
  85. data/test/synonyms_test.rb +0 -67
  86. data/test/test_helper.rb +0 -567
  87. data/test/where_test.rb +0 -223
@@ -1,3 +1,5 @@
1
+ # thread-local (technically fiber-local) indexer
2
+ # used to aggregate bulk callbacks across models
1
3
  module Searchkick
2
4
  class Indexer
3
5
  attr_reader :queued_items
@@ -14,15 +16,20 @@ module Searchkick
14
16
  def perform
15
17
  items = @queued_items
16
18
  @queued_items = []
17
- if items.any?
18
- response = Searchkick.client.bulk(body: items)
19
- if response["errors"]
20
- first_with_error = response["items"].map do |item|
21
- (item["index"] || item["delete"] || item["update"])
22
- end.find { |item| item["error"] }
23
- raise Searchkick::ImportError, "#{first_with_error["error"]} on item with id '#{first_with_error["_id"]}'"
24
- end
19
+
20
+ return if items.empty?
21
+
22
+ response = Searchkick.client.bulk(body: items)
23
+ if response["errors"]
24
+ # note: delete does not set error when item not found
25
+ first_with_error = response["items"].map do |item|
26
+ (item["index"] || item["delete"] || item["update"])
27
+ end.find { |item| item["error"] }
28
+ raise ImportError, "#{first_with_error["error"]} on item with id '#{first_with_error["_id"]}'"
25
29
  end
30
+
31
+ # maybe return response in future
32
+ nil
26
33
  end
27
34
  end
28
35
  end
@@ -0,0 +1,57 @@
1
+ # based on https://gist.github.com/mnutt/566725
2
+ module Searchkick
3
+ class LogSubscriber < ActiveSupport::LogSubscriber
4
+ def self.runtime=(value)
5
+ Thread.current[:searchkick_runtime] = value
6
+ end
7
+
8
+ def self.runtime
9
+ Thread.current[:searchkick_runtime] ||= 0
10
+ end
11
+
12
+ def self.reset_runtime
13
+ rt = runtime
14
+ self.runtime = 0
15
+ rt
16
+ end
17
+
18
+ def search(event)
19
+ self.class.runtime += event.duration
20
+ return unless logger.debug?
21
+
22
+ payload = event.payload
23
+ name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
24
+
25
+ index = payload[:query][:index].is_a?(Array) ? payload[:query][:index].join(",") : payload[:query][:index]
26
+ type = payload[:query][:type]
27
+ request_params = payload[:query].except(:index, :type, :body)
28
+
29
+ params = []
30
+ request_params.each do |k, v|
31
+ params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
32
+ end
33
+
34
+ debug " #{color(name, YELLOW, true)} #{index}#{type ? "/#{type.join(',')}" : ''}/_search#{params.any? ? '?' + params.join('&') : nil} #{payload[:query][:body].to_json}"
35
+ end
36
+
37
+ def request(event)
38
+ self.class.runtime += event.duration
39
+ return unless logger.debug?
40
+
41
+ payload = event.payload
42
+ name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
43
+
44
+ debug " #{color(name, YELLOW, true)} #{payload.except(:name).to_json}"
45
+ end
46
+
47
+ def multi_search(event)
48
+ self.class.runtime += event.duration
49
+ return unless logger.debug?
50
+
51
+ payload = event.payload
52
+ name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
53
+
54
+ debug " #{color(name, YELLOW, true)} _msearch #{payload[:body]}"
55
+ end
56
+ end
57
+ end
@@ -1,10 +1,17 @@
1
- require "faraday/middleware"
1
+ require "faraday"
2
2
 
3
3
  module Searchkick
4
4
  class Middleware < Faraday::Middleware
5
5
  def call(env)
6
- if env[:method] == :get && env[:url].path.to_s.end_with?("/_search")
6
+ path = env[:url].path.to_s
7
+ if path.end_with?("/_search")
7
8
  env[:request][:timeout] = Searchkick.search_timeout
9
+ elsif path.end_with?("/_msearch")
10
+ # assume no concurrent searches for timeout for now
11
+ searches = env[:request_body].count("\n") / 2
12
+ # do not allow timeout to exceed Searchkick.timeout
13
+ timeout = [Searchkick.search_timeout * searches, Searchkick.timeout].min
14
+ env[:request][:timeout] = timeout
8
15
  end
9
16
  @app.call(env)
10
17
  end
@@ -1,154 +1,108 @@
1
1
  module Searchkick
2
2
  module Model
3
3
  def searchkick(**options)
4
- unknown_keywords = options.keys - [:_all, :batch_size, :callbacks, :conversions, :default_fields,
5
- :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :language,
6
- :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :settings, :similarity,
7
- :special_characters, :stem_conversions, :suggest, :synonyms, :text_end,
8
- :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start]
4
+ options = Searchkick.model_options.merge(options)
5
+
6
+ unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields,
7
+ :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language,
8
+ :locations, :mappings, :match, :max_result_window, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
9
+ :special_characters, :stem, :stemmer, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end,
10
+ :text_middle, :text_start, :unscope, :word, :word_end, :word_middle, :word_start]
9
11
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
10
12
 
11
13
  raise "Only call searchkick once per model" if respond_to?(:searchkick_index)
12
14
 
13
15
  Searchkick.models << self
14
16
 
15
- class_eval do
16
- cattr_reader :searchkick_options, :searchkick_klass
17
+ options[:_type] ||= -> { searchkick_index.klass_document_type(self, true) }
18
+ options[:class_name] = model_name.name
19
+
20
+ callbacks = options.key?(:callbacks) ? options[:callbacks] : :inline
21
+ unless [:inline, true, false, :async, :queue].include?(callbacks)
22
+ raise ArgumentError, "Invalid value for callbacks"
23
+ end
24
+
25
+ base = self
26
+
27
+ mod = Module.new
28
+ include(mod)
29
+ mod.module_eval do
30
+ def reindex(method_name = nil, mode: nil, refresh: false)
31
+ self.class.searchkick_index.reindex([self], method_name: method_name, mode: mode, refresh: refresh, single: true)
32
+ end unless base.method_defined?(:reindex)
33
+
34
+ def similar(**options)
35
+ self.class.searchkick_index.similar_record(self, **options)
36
+ end unless base.method_defined?(:similar)
37
+
38
+ def search_data
39
+ data = respond_to?(:to_hash) ? to_hash : serializable_hash
40
+ data.delete("id")
41
+ data.delete("_id")
42
+ data.delete("_type")
43
+ data
44
+ end unless base.method_defined?(:search_data)
45
+
46
+ def should_index?
47
+ true
48
+ end unless base.method_defined?(:should_index?)
49
+ end
17
50
 
18
- callbacks = options.key?(:callbacks) ? options[:callbacks] : true
51
+ class_eval do
52
+ cattr_reader :searchkick_options, :searchkick_klass, instance_reader: false
19
53
 
20
54
  class_variable_set :@@searchkick_options, options.dup
21
55
  class_variable_set :@@searchkick_klass, self
22
- class_variable_set :@@searchkick_callbacks, callbacks
23
- class_variable_set :@@searchkick_index, options[:index_name] ||
24
- (options[:index_prefix].respond_to?(:call) && proc { [options[:index_prefix].call, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_") }) ||
25
- [options.key?(:index_prefix) ? options[:index_prefix] : Searchkick.index_prefix, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_")
56
+ class_variable_set :@@searchkick_index_cache, Searchkick::IndexCache.new
26
57
 
27
58
  class << self
28
59
  def searchkick_search(term = "*", **options, &block)
29
- Searchkick.search(term, {model: self}.merge(options), &block)
60
+ if Searchkick.relation?(self)
61
+ raise Searchkick::Error, "search must be called on model, not relation"
62
+ end
63
+
64
+ Searchkick.search(term, model: self, **options, &block)
30
65
  end
31
66
  alias_method Searchkick.search_method_name, :searchkick_search if Searchkick.search_method_name
32
67
 
33
- def searchkick_index
34
- index = class_variable_get :@@searchkick_index
35
- index = index.call if index.respond_to? :call
36
- Searchkick::Index.new(index, searchkick_options)
68
+ def searchkick_index(name: nil)
69
+ index_name = name || searchkick_klass.searchkick_index_name
70
+ index_name = index_name.call if index_name.respond_to?(:call)
71
+ index_cache = class_variable_get(:@@searchkick_index_cache)
72
+ index_cache.fetch(index_name) { Searchkick::Index.new(index_name, searchkick_options) }
37
73
  end
38
74
  alias_method :search_index, :searchkick_index unless method_defined?(:search_index)
39
75
 
40
- def enable_search_callbacks
41
- class_variable_set :@@searchkick_callbacks, true
42
- end
43
-
44
- def disable_search_callbacks
45
- class_variable_set :@@searchkick_callbacks, false
46
- end
47
-
48
- def search_callbacks?
49
- class_variable_get(:@@searchkick_callbacks) && Searchkick.callbacks?
50
- end
51
-
52
- def searchkick_reindex(method_name = nil, full: false, **options)
53
- scoped = (respond_to?(:current_scope) && respond_to?(:default_scoped) && current_scope && current_scope.to_sql != default_scoped.to_sql) ||
54
- (respond_to?(:queryable) && queryable != unscoped.with_default_scope)
55
-
56
- refresh = options.fetch(:refresh, !scoped)
57
-
58
- if method_name
59
- # update
60
- searchkick_index.import_scope(searchkick_klass, method_name: method_name)
61
- searchkick_index.refresh if refresh
62
- true
63
- elsif scoped && !full
64
- # reindex association
65
- searchkick_index.import_scope(searchkick_klass)
66
- searchkick_index.refresh if refresh
67
- true
68
- else
69
- # full reindex
70
- searchkick_index.reindex_scope(searchkick_klass, options)
71
- end
76
+ def searchkick_reindex(method_name = nil, **options)
77
+ searchkick_index.reindex(self, method_name: method_name, **options)
72
78
  end
73
79
  alias_method :reindex, :searchkick_reindex unless method_defined?(:reindex)
74
80
 
75
81
  def searchkick_index_options
76
82
  searchkick_index.index_options
77
83
  end
78
- end
79
-
80
- callback_name = callbacks == :async ? :reindex_async : :reindex
81
- if respond_to?(:after_commit)
82
- after_commit callback_name, if: proc { self.class.search_callbacks? }
83
- elsif respond_to?(:after_save)
84
- after_save callback_name, if: proc { self.class.search_callbacks? }
85
- after_destroy callback_name, if: proc { self.class.search_callbacks? }
86
- end
87
84
 
88
- def reindex(method_name = nil, refresh: false, async: false, mode: nil)
89
- klass_options = self.class.searchkick_index.options
90
-
91
- if mode.nil?
92
- mode =
93
- if async
94
- :async
95
- elsif Searchkick.callbacks_value
96
- Searchkick.callbacks_value
97
- elsif klass_options.key?(:callbacks) && klass_options[:callbacks] != :async
98
- # TODO remove 2nd condition in next major version
99
- klass_options[:callbacks]
85
+ def searchkick_index_name
86
+ @searchkick_index_name ||= begin
87
+ options = class_variable_get(:@@searchkick_options)
88
+ if options[:index_name]
89
+ options[:index_name]
90
+ elsif options[:index_prefix].respond_to?(:call)
91
+ -> { [options[:index_prefix].call, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_") }
92
+ else
93
+ [options.key?(:index_prefix) ? options[:index_prefix] : Searchkick.index_prefix, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_")
100
94
  end
101
- end
102
-
103
- case mode
104
- when :queue
105
- if method_name
106
- raise Searchkick::Error, "Partial reindex not supported with queue option"
107
- else
108
- self.class.searchkick_index.reindex_queue.push(id.to_s)
109
- end
110
- when :async
111
- if method_name
112
- # TODO support Mongoid and NoBrainer and non-id primary keys
113
- Searchkick::BulkReindexJob.perform_later(
114
- class_name: self.class.name,
115
- record_ids: [id.to_s],
116
- method_name: method_name ? method_name.to_s : nil
117
- )
118
- else
119
- self.class.searchkick_index.reindex_record_async(self)
120
95
  end
121
- else
122
- if method_name
123
- self.class.searchkick_index.update_record(self, method_name)
124
- else
125
- self.class.searchkick_index.reindex_record(self)
126
- end
127
- self.class.searchkick_index.refresh if refresh
128
96
  end
129
- end unless method_defined?(:reindex)
130
-
131
- # TODO remove this method in next major version
132
- def reindex_async
133
- reindex(async: true)
134
- end unless method_defined?(:reindex_async)
135
-
136
- def similar(options = {})
137
- self.class.searchkick_index.similar_record(self, options)
138
- end unless method_defined?(:similar)
139
-
140
- def search_data
141
- respond_to?(:to_hash) ? to_hash : serializable_hash
142
- end unless method_defined?(:search_data)
143
-
144
- def should_index?
145
- true
146
- end unless method_defined?(:should_index?)
97
+ end
147
98
 
148
- if defined?(Cequel) && self < Cequel::Record && !method_defined?(:destroyed?)
149
- def destroyed?
150
- transient?
151
- end
99
+ # always add callbacks, even when callbacks is false
100
+ # so Model.callbacks block can be used
101
+ if respond_to?(:after_commit)
102
+ after_commit :reindex, if: -> { Searchkick.callbacks?(default: callbacks) }
103
+ elsif respond_to?(:after_save)
104
+ after_save :reindex, if: -> { Searchkick.callbacks?(default: callbacks) }
105
+ after_destroy :reindex, if: -> { Searchkick.callbacks?(default: callbacks) }
152
106
  end
153
107
  end
154
108
  end
@@ -2,25 +2,24 @@ module Searchkick
2
2
  class MultiSearch
3
3
  attr_reader :queries
4
4
 
5
- def initialize(queries, retry_misspellings: false)
5
+ def initialize(queries)
6
6
  @queries = queries
7
- @retry_misspellings = retry_misspellings
8
7
  end
9
8
 
10
9
  def perform
11
10
  if queries.any?
12
- perform_search(queries, retry_misspellings: @retry_misspellings)
11
+ perform_search(queries)
13
12
  end
14
13
  end
15
14
 
16
15
  private
17
16
 
18
- def perform_search(queries, retry_misspellings: true)
19
- responses = client.msearch(body: queries.flat_map { |q| [q.params.except(:body), q.body] })["responses"]
17
+ def perform_search(search_queries, perform_retry: true)
18
+ responses = client.msearch(body: search_queries.flat_map { |q| [q.params.except(:body), q.body] })["responses"]
20
19
 
21
20
  retry_queries = []
22
- queries.each_with_index do |query, i|
23
- if retry_misspellings && query.retry_misspellings?(responses[i])
21
+ search_queries.each_with_index do |query, i|
22
+ if perform_retry && query.retry_misspellings?(responses[i])
24
23
  query.send(:prepare) # okay, since we don't want to expose this method outside Searchkick
25
24
  retry_queries << query
26
25
  else
@@ -28,11 +27,11 @@ module Searchkick
28
27
  end
29
28
  end
30
29
 
31
- if retry_misspellings && retry_queries.any?
32
- perform_search(retry_queries, retry_misspellings: false)
30
+ if retry_queries.any?
31
+ perform_search(retry_queries, perform_retry: false)
33
32
  end
34
33
 
35
- queries
34
+ search_queries
36
35
  end
37
36
 
38
37
  def client
@@ -1,23 +1,20 @@
1
1
  module Searchkick
2
2
  class ProcessBatchJob < ActiveJob::Base
3
- queue_as :searchkick
3
+ queue_as { Searchkick.queue_name }
4
4
 
5
- def perform(class_name:, record_ids:)
6
- klass = class_name.constantize
7
- scope = Searchkick.load_records(klass, record_ids)
8
- scope = scope.search_import if scope.respond_to?(:search_import)
9
- records = scope.select(&:should_index?)
5
+ def perform(class_name:, record_ids:, index_name: nil)
6
+ model = Searchkick.load_model(class_name)
7
+ index = model.searchkick_index(name: index_name)
10
8
 
11
- # determine which records to delete
12
- delete_ids = record_ids - records.map { |r| r.id.to_s }
13
- delete_records = delete_ids.map { |id| m = klass.new; m.id = id; m }
9
+ items =
10
+ record_ids.map do |r|
11
+ parts = r.split(/(?<!\|)\|(?!\|)/, 2)
12
+ .map { |v| v.gsub("||", "|") }
13
+ {id: parts[0], routing: parts[1]}
14
+ end
14
15
 
15
- # bulk reindex
16
- index = klass.searchkick_index
17
- Searchkick.callbacks(:bulk) do
18
- index.bulk_index(records) if records.any?
19
- index.bulk_delete(delete_records) if delete_records.any?
20
- end
16
+ relation = Searchkick.scope(model)
17
+ RecordIndexer.new(index).reindex_items(relation, items, method_name: nil)
21
18
  end
22
19
  end
23
20
  end
@@ -1,22 +1,31 @@
1
1
  module Searchkick
2
2
  class ProcessQueueJob < ActiveJob::Base
3
- queue_as :searchkick
3
+ queue_as { Searchkick.queue_name }
4
4
 
5
- def perform(class_name:)
6
- model = class_name.constantize
5
+ def perform(class_name:, index_name: nil, inline: false)
6
+ model = Searchkick.load_model(class_name)
7
+ index = model.searchkick_index(name: index_name)
8
+ limit = model.searchkick_options[:batch_size] || 1000
7
9
 
8
- limit = model.searchkick_index.options[:batch_size] || 1000
9
- record_ids = model.searchkick_index.reindex_queue.reserve(limit: limit)
10
- if record_ids.any?
11
- Searchkick::ProcessBatchJob.perform_later(
12
- class_name: model.name,
13
- record_ids: record_ids
14
- )
15
- # TODO when moving to reliable queuing, mark as complete
10
+ loop do
11
+ record_ids = index.reindex_queue.reserve(limit: limit)
12
+ if record_ids.any?
13
+ batch_options = {
14
+ class_name: class_name,
15
+ record_ids: record_ids.uniq,
16
+ index_name: index_name
17
+ }
16
18
 
17
- if record_ids.size == limit
18
- Searchkick::ProcessQueueJob.perform_later(class_name: class_name)
19
+ if inline
20
+ # use new.perform to avoid excessive logging
21
+ Searchkick::ProcessBatchJob.new.perform(**batch_options)
22
+ else
23
+ Searchkick::ProcessBatchJob.perform_later(**batch_options)
24
+ end
25
+
26
+ # TODO when moving to reliable queuing, mark as complete
19
27
  end
28
+ break unless record_ids.size == limit
20
29
  end
21
30
  end
22
31
  end