searchkick 2.3.2 → 5.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +377 -84
  3. data/LICENSE.txt +1 -1
  4. data/README.md +859 -602
  5. data/lib/searchkick/bulk_reindex_job.rb +13 -9
  6. data/lib/searchkick/controller_runtime.rb +40 -0
  7. data/lib/searchkick/hash_wrapper.rb +12 -0
  8. data/lib/searchkick/index.rb +281 -356
  9. data/lib/searchkick/index_cache.rb +30 -0
  10. data/lib/searchkick/index_options.rb +487 -281
  11. data/lib/searchkick/indexer.rb +15 -8
  12. data/lib/searchkick/log_subscriber.rb +57 -0
  13. data/lib/searchkick/middleware.rb +9 -2
  14. data/lib/searchkick/model.rb +72 -118
  15. data/lib/searchkick/multi_search.rb +9 -10
  16. data/lib/searchkick/process_batch_job.rb +12 -15
  17. data/lib/searchkick/process_queue_job.rb +22 -13
  18. data/lib/searchkick/query.rb +458 -217
  19. data/lib/searchkick/railtie.rb +7 -0
  20. data/lib/searchkick/record_data.rb +128 -0
  21. data/lib/searchkick/record_indexer.rb +164 -0
  22. data/lib/searchkick/reindex_queue.rb +51 -9
  23. data/lib/searchkick/reindex_v2_job.rb +10 -32
  24. data/lib/searchkick/relation.rb +247 -0
  25. data/lib/searchkick/relation_indexer.rb +155 -0
  26. data/lib/searchkick/results.rb +201 -82
  27. data/lib/searchkick/version.rb +1 -1
  28. data/lib/searchkick/where.rb +11 -0
  29. data/lib/searchkick.rb +269 -97
  30. data/lib/tasks/searchkick.rake +37 -0
  31. metadata +24 -178
  32. data/.gitignore +0 -22
  33. data/.travis.yml +0 -39
  34. data/Gemfile +0 -16
  35. data/Rakefile +0 -20
  36. data/benchmark/Gemfile +0 -23
  37. data/benchmark/benchmark.rb +0 -97
  38. data/lib/searchkick/logging.rb +0 -242
  39. data/lib/searchkick/tasks.rb +0 -33
  40. data/searchkick.gemspec +0 -28
  41. data/test/aggs_test.rb +0 -197
  42. data/test/autocomplete_test.rb +0 -75
  43. data/test/boost_test.rb +0 -202
  44. data/test/callbacks_test.rb +0 -59
  45. data/test/ci/before_install.sh +0 -17
  46. data/test/errors_test.rb +0 -19
  47. data/test/gemfiles/activerecord31.gemfile +0 -7
  48. data/test/gemfiles/activerecord32.gemfile +0 -7
  49. data/test/gemfiles/activerecord40.gemfile +0 -8
  50. data/test/gemfiles/activerecord41.gemfile +0 -8
  51. data/test/gemfiles/activerecord42.gemfile +0 -7
  52. data/test/gemfiles/activerecord50.gemfile +0 -7
  53. data/test/gemfiles/apartment.gemfile +0 -8
  54. data/test/gemfiles/cequel.gemfile +0 -8
  55. data/test/gemfiles/mongoid2.gemfile +0 -7
  56. data/test/gemfiles/mongoid3.gemfile +0 -6
  57. data/test/gemfiles/mongoid4.gemfile +0 -7
  58. data/test/gemfiles/mongoid5.gemfile +0 -7
  59. data/test/gemfiles/mongoid6.gemfile +0 -12
  60. data/test/gemfiles/nobrainer.gemfile +0 -8
  61. data/test/gemfiles/parallel_tests.gemfile +0 -8
  62. data/test/geo_shape_test.rb +0 -175
  63. data/test/highlight_test.rb +0 -78
  64. data/test/index_test.rb +0 -166
  65. data/test/inheritance_test.rb +0 -83
  66. data/test/marshal_test.rb +0 -8
  67. data/test/match_test.rb +0 -276
  68. data/test/misspellings_test.rb +0 -56
  69. data/test/model_test.rb +0 -42
  70. data/test/multi_search_test.rb +0 -36
  71. data/test/multi_tenancy_test.rb +0 -22
  72. data/test/order_test.rb +0 -46
  73. data/test/pagination_test.rb +0 -70
  74. data/test/partial_reindex_test.rb +0 -58
  75. data/test/query_test.rb +0 -35
  76. data/test/records_test.rb +0 -10
  77. data/test/reindex_test.rb +0 -64
  78. data/test/reindex_v2_job_test.rb +0 -32
  79. data/test/routing_test.rb +0 -23
  80. data/test/should_index_test.rb +0 -32
  81. data/test/similar_test.rb +0 -28
  82. data/test/sql_test.rb +0 -214
  83. data/test/suggest_test.rb +0 -95
  84. data/test/support/kaminari.yml +0 -21
  85. data/test/synonyms_test.rb +0 -67
  86. data/test/test_helper.rb +0 -567
  87. data/test/where_test.rb +0 -223
@@ -0,0 +1,155 @@
1
+ module Searchkick
2
+ class RelationIndexer
3
+ attr_reader :index
4
+
5
+ def initialize(index)
6
+ @index = index
7
+ end
8
+
9
+ def reindex(relation, mode:, method_name: nil, full: false, resume: false, scope: nil)
10
+ # apply scopes
11
+ if scope
12
+ relation = relation.send(scope)
13
+ elsif relation.respond_to?(:search_import)
14
+ relation = relation.search_import
15
+ end
16
+
17
+ # remove unneeded loading for async and queue
18
+ if mode == :async || mode == :queue
19
+ if relation.respond_to?(:primary_key)
20
+ relation = relation.except(:includes, :preload)
21
+ unless mode == :queue && relation.klass.method_defined?(:search_routing)
22
+ relation = relation.except(:select).select(relation.primary_key)
23
+ end
24
+ elsif relation.respond_to?(:only)
25
+ unless mode == :queue && relation.klass.method_defined?(:search_routing)
26
+ relation = relation.only(:_id)
27
+ end
28
+ end
29
+ end
30
+
31
+ if mode == :async && full
32
+ return full_reindex_async(relation)
33
+ end
34
+
35
+ relation = resume_relation(relation) if resume
36
+
37
+ reindex_options = {
38
+ mode: mode,
39
+ method_name: method_name,
40
+ full: full
41
+ }
42
+ record_indexer = RecordIndexer.new(index)
43
+
44
+ in_batches(relation) do |items|
45
+ record_indexer.reindex(items, **reindex_options)
46
+ end
47
+ end
48
+
49
+ def batches_left
50
+ Searchkick.with_redis { |r| r.call("SCARD", batches_key) }
51
+ end
52
+
53
+ def batch_completed(batch_id)
54
+ Searchkick.with_redis { |r| r.call("SREM", batches_key, [batch_id]) }
55
+ end
56
+
57
+ private
58
+
59
+ def resume_relation(relation)
60
+ if relation.respond_to?(:primary_key)
61
+ # use total docs instead of max id since there's not a great way
62
+ # to get the max _id without scripting since it's a string
63
+ where = relation.arel_table[relation.primary_key].gt(index.total_docs)
64
+ relation = relation.where(where)
65
+ else
66
+ raise Error, "Resume not supported for Mongoid"
67
+ end
68
+ end
69
+
70
+ def in_batches(relation)
71
+ if relation.respond_to?(:find_in_batches)
72
+ klass = relation.klass
73
+ # remove order to prevent possible warnings
74
+ relation.except(:order).find_in_batches(batch_size: batch_size) do |batch|
75
+ # prevent scope from affecting search_data as well as inline jobs
76
+ # Active Record runs relation calls in scoping block
77
+ # https://github.com/rails/rails/blob/main/activerecord/lib/active_record/relation/delegation.rb
78
+ # note: we could probably just call klass.current_scope = nil
79
+ # anywhere in reindex method (after initial all call),
80
+ # but this is more cautious
81
+ previous_scope = klass.current_scope(true)
82
+ if previous_scope
83
+ begin
84
+ klass.current_scope = nil
85
+ yield batch
86
+ ensure
87
+ klass.current_scope = previous_scope
88
+ end
89
+ else
90
+ yield batch
91
+ end
92
+ end
93
+ else
94
+ klass = relation.klass
95
+ each_batch(relation, batch_size: batch_size) do |batch|
96
+ # prevent scope from affecting search_data as well as inline jobs
97
+ # note: Model.with_scope doesn't always restore scope, so use custom logic
98
+ previous_scope = Mongoid::Threaded.current_scope(klass)
99
+ if previous_scope
100
+ begin
101
+ Mongoid::Threaded.set_current_scope(nil, klass)
102
+ yield batch
103
+ ensure
104
+ Mongoid::Threaded.set_current_scope(previous_scope, klass)
105
+ end
106
+ else
107
+ yield batch
108
+ end
109
+ end
110
+ end
111
+ end
112
+
113
+ def each_batch(relation, batch_size:)
114
+ # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
115
+ # use cursor for Mongoid
116
+ items = []
117
+ relation.all.each do |item|
118
+ items << item
119
+ if items.length == batch_size
120
+ yield items
121
+ items = []
122
+ end
123
+ end
124
+ yield items if items.any?
125
+ end
126
+
127
+ def batch_size
128
+ @batch_size ||= index.options[:batch_size] || 1000
129
+ end
130
+
131
+ def full_reindex_async(relation)
132
+ batch_id = 1
133
+ class_name = relation.searchkick_options[:class_name]
134
+
135
+ in_batches(relation) do |items|
136
+ batch_job(class_name, batch_id, items.map(&:id))
137
+ batch_id += 1
138
+ end
139
+ end
140
+
141
+ def batch_job(class_name, batch_id, record_ids)
142
+ Searchkick.with_redis { |r| r.call("SADD", batches_key, [batch_id]) }
143
+ Searchkick::BulkReindexJob.perform_later(
144
+ class_name: class_name,
145
+ index_name: index.name,
146
+ batch_id: batch_id,
147
+ record_ids: record_ids.map { |v| v.instance_of?(Integer) ? v : v.to_s }
148
+ )
149
+ end
150
+
151
+ def batches_key
152
+ "searchkick:reindex:#{index.name}:batches"
153
+ end
154
+ end
155
+ end
@@ -1,10 +1,9 @@
1
- require "forwardable"
2
-
3
1
  module Searchkick
4
2
  class Results
5
3
  include Enumerable
6
4
  extend Forwardable
7
5
 
6
+ # TODO remove klass and options in 6.0
8
7
  attr_reader :klass, :response, :options
9
8
 
10
9
  def_delegators :results, :each, :any?, :empty?, :size, :length, :slice, :[], :to_ary
@@ -15,87 +14,33 @@ module Searchkick
15
14
  @options = options
16
15
  end
17
16
 
18
- # experimental: may not make next release
19
- def records
20
- @records ||= results_query(klass, hits)
21
- end
22
-
17
+ # TODO make private in 6.0
23
18
  def results
24
- @results ||= begin
25
- if options[:load]
26
- # results can have different types
27
- results = {}
28
-
29
- hits.group_by { |hit, _| hit["_type"] }.each do |type, grouped_hits|
30
- results[type] = results_query(type.camelize.constantize, grouped_hits).to_a.index_by { |r| r.id.to_s }
31
- end
32
-
33
- # sort
34
- hits.map do |hit|
35
- result = results[hit["_type"]][hit["_id"].to_s]
36
- if result && !(options[:load].is_a?(Hash) && options[:load][:dumpable])
37
- unless result.respond_to?(:search_hit)
38
- result.define_singleton_method(:search_hit) do
39
- hit
40
- end
41
- end
42
-
43
- if hit["highlight"] && !result.respond_to?(:search_highlights)
44
- highlights = Hash[hit["highlight"].map { |k, v| [(options[:json] ? k : k.sub(/\.#{@options[:match_suffix]}\z/, "")).to_sym, v.first] }]
45
- result.define_singleton_method(:search_highlights) do
46
- highlights
47
- end
48
- end
49
- end
50
- result
51
- end.compact
52
- else
53
- hits.map do |hit|
54
- result =
55
- if hit["_source"]
56
- hit.except("_source").merge(hit["_source"])
57
- elsif hit["fields"]
58
- hit.except("fields").merge(hit["fields"])
59
- else
60
- hit
61
- end
19
+ @results ||= with_hit.map(&:first)
20
+ end
62
21
 
63
- if hit["highlight"]
64
- highlight = Hash[hit["highlight"].map { |k, v| [base_field(k), v.first] }]
65
- options[:highlighted_fields].map { |k| base_field(k) }.each do |k|
66
- result["highlighted_#{k}"] ||= (highlight[k] || result[k])
67
- end
68
- end
22
+ def with_hit
23
+ return enum_for(:with_hit) unless block_given?
69
24
 
70
- result["id"] ||= result["_id"] # needed for legacy reasons
71
- Hashie::Mash.new(result)
72
- end
73
- end
25
+ build_hits.each do |result|
26
+ yield result
74
27
  end
75
28
  end
76
29
 
30
+ def missing_records
31
+ @missing_records ||= with_hit_and_missing_records[1]
32
+ end
33
+
77
34
  def suggestions
78
35
  if response["suggest"]
79
36
  response["suggest"].values.flat_map { |v| v.first["options"] }.sort_by { |o| -o["score"] }.map { |o| o["text"] }.uniq
37
+ elsif options[:suggest] || options[:term] == "*" # TODO remove 2nd term
38
+ []
80
39
  else
81
40
  raise "Pass `suggest: true` to the search method for suggestions"
82
41
  end
83
42
  end
84
43
 
85
- def each_with_hit(&block)
86
- results.zip(hits).each(&block)
87
- end
88
-
89
- def with_details
90
- each_with_hit.map do |model, hit|
91
- details = {}
92
- if hit["highlight"]
93
- details[:highlight] = Hash[hit["highlight"].map { |k, v| [(options[:json] ? k : k.sub(/\.#{@options[:match_suffix]}\z/, "")).to_sym, v.first] }]
94
- end
95
- [model, details]
96
- end
97
- end
98
-
99
44
  def aggregations
100
45
  response["aggregations"]
101
46
  end
@@ -124,7 +69,11 @@ module Searchkick
124
69
  end
125
70
 
126
71
  def model_name
127
- klass.model_name
72
+ if klass.nil?
73
+ ActiveModel::Name.new(self.class, nil, 'Result')
74
+ else
75
+ klass.model_name
76
+ end
128
77
  end
129
78
 
130
79
  def entry_name(options = {})
@@ -138,7 +87,13 @@ module Searchkick
138
87
  end
139
88
 
140
89
  def total_count
141
- response["hits"]["total"]
90
+ if options[:total_entries]
91
+ options[:total_entries]
92
+ elsif response["hits"]["total"].is_a?(Hash)
93
+ response["hits"]["total"]["value"]
94
+ else
95
+ response["hits"]["total"]
96
+ end
142
97
  end
143
98
  alias_method :total_entries, :total_count
144
99
 
@@ -187,32 +142,188 @@ module Searchkick
187
142
  end
188
143
 
189
144
  def hits
190
- @response["hits"]["hits"]
145
+ if error
146
+ raise Error, "Query error - use the error method to view it"
147
+ else
148
+ @response["hits"]["hits"]
149
+ end
150
+ end
151
+
152
+ def highlights(multiple: false)
153
+ hits.map do |hit|
154
+ hit_highlights(hit, multiple: multiple)
155
+ end
156
+ end
157
+
158
+ def with_highlights(multiple: false)
159
+ return enum_for(:with_highlights, multiple: multiple) unless block_given?
160
+
161
+ with_hit.each do |result, hit|
162
+ yield result, hit_highlights(hit, multiple: multiple)
163
+ end
164
+ end
165
+
166
+ def with_score
167
+ return enum_for(:with_score) unless block_given?
168
+
169
+ with_hit.each do |result, hit|
170
+ yield result, hit["_score"]
171
+ end
191
172
  end
192
173
 
193
174
  def misspellings?
194
175
  @options[:misspellings]
195
176
  end
196
177
 
178
+ def scroll_id
179
+ @response["_scroll_id"]
180
+ end
181
+
182
+ def scroll
183
+ raise Error, "Pass `scroll` option to the search method for scrolling" unless scroll_id
184
+
185
+ if block_given?
186
+ records = self
187
+ while records.any?
188
+ yield records
189
+ records = records.scroll
190
+ end
191
+
192
+ records.clear_scroll
193
+ else
194
+ begin
195
+ # TODO Active Support notifications for this scroll call
196
+ Results.new(@klass, Searchkick.client.scroll(scroll: options[:scroll], body: {scroll_id: scroll_id}), @options)
197
+ rescue => e
198
+ if Searchkick.not_found_error?(e) && e.message =~ /search_context_missing_exception/i
199
+ raise Error, "Scroll id has expired"
200
+ else
201
+ raise e
202
+ end
203
+ end
204
+ end
205
+ end
206
+
207
+ def clear_scroll
208
+ begin
209
+ # try to clear scroll
210
+ # not required as scroll will expire
211
+ # but there is a cost to open scrolls
212
+ Searchkick.client.clear_scroll(scroll_id: scroll_id)
213
+ rescue => e
214
+ raise e unless Searchkick.transport_error?(e)
215
+ end
216
+ end
217
+
197
218
  private
198
219
 
220
+ def with_hit_and_missing_records
221
+ @with_hit_and_missing_records ||= begin
222
+ missing_records = []
223
+
224
+ if options[:load]
225
+ grouped_hits = hits.group_by { |hit, _| hit["_index"] }
226
+
227
+ # determine models
228
+ index_models = {}
229
+ grouped_hits.each do |index, _|
230
+ models =
231
+ if @klass
232
+ [@klass]
233
+ else
234
+ index_alias = index.split("_")[0..-2].join("_")
235
+ Array((options[:index_mapping] || {})[index_alias])
236
+ end
237
+ raise Error, "Unknown model for index: #{index}. Pass the `models` option to the search method." unless models.any?
238
+ index_models[index] = models
239
+ end
240
+
241
+ # fetch results
242
+ results = {}
243
+ grouped_hits.each do |index, index_hits|
244
+ results[index] = {}
245
+ index_models[index].each do |model|
246
+ results[index].merge!(results_query(model, index_hits).to_a.index_by { |r| r.id.to_s })
247
+ end
248
+ end
249
+
250
+ # sort
251
+ results =
252
+ hits.map do |hit|
253
+ result = results[hit["_index"]][hit["_id"].to_s]
254
+ if result && !(options[:load].is_a?(Hash) && options[:load][:dumpable])
255
+ if (hit["highlight"] || options[:highlight]) && !result.respond_to?(:search_highlights)
256
+ highlights = hit_highlights(hit)
257
+ result.define_singleton_method(:search_highlights) do
258
+ highlights
259
+ end
260
+ end
261
+ end
262
+ [result, hit]
263
+ end.select do |result, hit|
264
+ unless result
265
+ models = index_models[hit["_index"]]
266
+ missing_records << {
267
+ id: hit["_id"],
268
+ # may be multiple models for inheritance with child models
269
+ # not ideal to return different types
270
+ # but this situation shouldn't be common
271
+ model: models.size == 1 ? models.first : models
272
+ }
273
+ end
274
+ result
275
+ end
276
+ else
277
+ results =
278
+ hits.map do |hit|
279
+ result =
280
+ if hit["_source"]
281
+ hit.except("_source").merge(hit["_source"])
282
+ elsif hit["fields"]
283
+ hit.except("fields").merge(hit["fields"])
284
+ else
285
+ hit
286
+ end
287
+
288
+ if hit["highlight"] || options[:highlight]
289
+ highlight = hit["highlight"].to_a.to_h { |k, v| [base_field(k), v.first] }
290
+ options[:highlighted_fields].map { |k| base_field(k) }.each do |k|
291
+ result["highlighted_#{k}"] ||= (highlight[k] || result[k])
292
+ end
293
+ end
294
+
295
+ result["id"] ||= result["_id"] # needed for legacy reasons
296
+ [HashWrapper.new(result), hit]
297
+ end
298
+ end
299
+
300
+ [results, missing_records]
301
+ end
302
+ end
303
+
304
+ def build_hits
305
+ @build_hits ||= begin
306
+ if missing_records.any?
307
+ Searchkick.warn("Records in search index do not exist in database: #{missing_records.map { |v| "#{v[:model].model_name} #{v[:id]}" }.join(", ")}")
308
+ end
309
+ with_hit_and_missing_records[0]
310
+ end
311
+ end
312
+
199
313
  def results_query(records, hits)
314
+ records = Searchkick.scope(records)
315
+
200
316
  ids = hits.map { |hit| hit["_id"] }
201
317
  if options[:includes] || options[:model_includes]
202
318
  included_relations = []
203
319
  combine_includes(included_relations, options[:includes])
204
320
  combine_includes(included_relations, options[:model_includes][records]) if options[:model_includes]
205
321
 
206
- records =
207
- if defined?(NoBrainer::Document) && records < NoBrainer::Document
208
- if Gem.loaded_specs["nobrainer"].version >= Gem::Version.new("0.21")
209
- records.eager_load(included_relations)
210
- else
211
- records.preload(included_relations)
212
- end
213
- else
214
- records.includes(included_relations)
215
- end
322
+ records = records.includes(included_relations)
323
+ end
324
+
325
+ if options[:scope_results]
326
+ records = options[:scope_results].call(records)
216
327
  end
217
328
 
218
329
  Searchkick.load_records(records, ids)
@@ -231,5 +342,13 @@ module Searchkick
231
342
  def base_field(k)
232
343
  k.sub(/\.(analyzed|word_start|word_middle|word_end|text_start|text_middle|text_end|exact)\z/, "")
233
344
  end
345
+
346
+ def hit_highlights(hit, multiple: false)
347
+ if hit["highlight"]
348
+ hit["highlight"].to_h { |k, v| [(options[:json] ? k : k.sub(/\.#{@options[:match_suffix]}\z/, "")).to_sym, multiple ? v : v.first] }
349
+ else
350
+ {}
351
+ end
352
+ end
234
353
  end
235
354
  end
@@ -1,3 +1,3 @@
1
1
  module Searchkick
2
- VERSION = "2.3.2"
2
+ VERSION = "5.2.1"
3
3
  end
@@ -0,0 +1,11 @@
1
+ module Searchkick
2
+ class Where
3
+ def initialize(relation)
4
+ @relation = relation
5
+ end
6
+
7
+ def not(value)
8
+ @relation.where(_not: value)
9
+ end
10
+ end
11
+ end