searchkick 2.3.2 → 5.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +377 -84
  3. data/LICENSE.txt +1 -1
  4. data/README.md +859 -602
  5. data/lib/searchkick/bulk_reindex_job.rb +13 -9
  6. data/lib/searchkick/controller_runtime.rb +40 -0
  7. data/lib/searchkick/hash_wrapper.rb +12 -0
  8. data/lib/searchkick/index.rb +281 -356
  9. data/lib/searchkick/index_cache.rb +30 -0
  10. data/lib/searchkick/index_options.rb +487 -281
  11. data/lib/searchkick/indexer.rb +15 -8
  12. data/lib/searchkick/log_subscriber.rb +57 -0
  13. data/lib/searchkick/middleware.rb +9 -2
  14. data/lib/searchkick/model.rb +72 -118
  15. data/lib/searchkick/multi_search.rb +9 -10
  16. data/lib/searchkick/process_batch_job.rb +12 -15
  17. data/lib/searchkick/process_queue_job.rb +22 -13
  18. data/lib/searchkick/query.rb +458 -217
  19. data/lib/searchkick/railtie.rb +7 -0
  20. data/lib/searchkick/record_data.rb +128 -0
  21. data/lib/searchkick/record_indexer.rb +164 -0
  22. data/lib/searchkick/reindex_queue.rb +51 -9
  23. data/lib/searchkick/reindex_v2_job.rb +10 -32
  24. data/lib/searchkick/relation.rb +247 -0
  25. data/lib/searchkick/relation_indexer.rb +155 -0
  26. data/lib/searchkick/results.rb +201 -82
  27. data/lib/searchkick/version.rb +1 -1
  28. data/lib/searchkick/where.rb +11 -0
  29. data/lib/searchkick.rb +269 -97
  30. data/lib/tasks/searchkick.rake +37 -0
  31. metadata +24 -178
  32. data/.gitignore +0 -22
  33. data/.travis.yml +0 -39
  34. data/Gemfile +0 -16
  35. data/Rakefile +0 -20
  36. data/benchmark/Gemfile +0 -23
  37. data/benchmark/benchmark.rb +0 -97
  38. data/lib/searchkick/logging.rb +0 -242
  39. data/lib/searchkick/tasks.rb +0 -33
  40. data/searchkick.gemspec +0 -28
  41. data/test/aggs_test.rb +0 -197
  42. data/test/autocomplete_test.rb +0 -75
  43. data/test/boost_test.rb +0 -202
  44. data/test/callbacks_test.rb +0 -59
  45. data/test/ci/before_install.sh +0 -17
  46. data/test/errors_test.rb +0 -19
  47. data/test/gemfiles/activerecord31.gemfile +0 -7
  48. data/test/gemfiles/activerecord32.gemfile +0 -7
  49. data/test/gemfiles/activerecord40.gemfile +0 -8
  50. data/test/gemfiles/activerecord41.gemfile +0 -8
  51. data/test/gemfiles/activerecord42.gemfile +0 -7
  52. data/test/gemfiles/activerecord50.gemfile +0 -7
  53. data/test/gemfiles/apartment.gemfile +0 -8
  54. data/test/gemfiles/cequel.gemfile +0 -8
  55. data/test/gemfiles/mongoid2.gemfile +0 -7
  56. data/test/gemfiles/mongoid3.gemfile +0 -6
  57. data/test/gemfiles/mongoid4.gemfile +0 -7
  58. data/test/gemfiles/mongoid5.gemfile +0 -7
  59. data/test/gemfiles/mongoid6.gemfile +0 -12
  60. data/test/gemfiles/nobrainer.gemfile +0 -8
  61. data/test/gemfiles/parallel_tests.gemfile +0 -8
  62. data/test/geo_shape_test.rb +0 -175
  63. data/test/highlight_test.rb +0 -78
  64. data/test/index_test.rb +0 -166
  65. data/test/inheritance_test.rb +0 -83
  66. data/test/marshal_test.rb +0 -8
  67. data/test/match_test.rb +0 -276
  68. data/test/misspellings_test.rb +0 -56
  69. data/test/model_test.rb +0 -42
  70. data/test/multi_search_test.rb +0 -36
  71. data/test/multi_tenancy_test.rb +0 -22
  72. data/test/order_test.rb +0 -46
  73. data/test/pagination_test.rb +0 -70
  74. data/test/partial_reindex_test.rb +0 -58
  75. data/test/query_test.rb +0 -35
  76. data/test/records_test.rb +0 -10
  77. data/test/reindex_test.rb +0 -64
  78. data/test/reindex_v2_job_test.rb +0 -32
  79. data/test/routing_test.rb +0 -23
  80. data/test/should_index_test.rb +0 -32
  81. data/test/similar_test.rb +0 -28
  82. data/test/sql_test.rb +0 -214
  83. data/test/suggest_test.rb +0 -95
  84. data/test/support/kaminari.yml +0 -21
  85. data/test/synonyms_test.rb +0 -67
  86. data/test/test_helper.rb +0 -567
  87. data/test/where_test.rb +0 -223
@@ -1,5 +1,6 @@
1
1
  module Searchkick
2
2
  class Query
3
+ include Enumerable
3
4
  extend Forwardable
4
5
 
5
6
  @@metric_aggs = [:avg, :cardinality, :max, :min, :sum]
@@ -12,20 +13,21 @@ module Searchkick
12
13
  :took, :error, :model_name, :entry_name, :total_count, :total_entries,
13
14
  :current_page, :per_page, :limit_value, :padding, :total_pages, :num_pages,
14
15
  :offset_value, :offset, :previous_page, :prev_page, :next_page, :first_page?, :last_page?,
15
- :out_of_range?, :hits, :response, :to_a, :first
16
+ :out_of_range?, :hits, :response, :to_a, :first, :scroll, :highlights, :with_highlights,
17
+ :with_score, :misspellings?, :scroll_id, :clear_scroll, :missing_records, :with_hit
16
18
 
17
19
  def initialize(klass, term = "*", **options)
18
- unknown_keywords = options.keys - [:aggs, :body, :body_options, :boost,
19
- :boost_by, :boost_by_distance, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :execute, :explain,
20
+ unknown_keywords = options.keys - [:aggs, :block, :body, :body_options, :boost,
21
+ :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :explain,
20
22
  :fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load,
21
- :match, :misspellings, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
22
- :request_params, :routing, :select, :similar, :smart_aggs, :suggest, :track, :type, :where]
23
+ :match, :misspellings, :models, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
24
+ :request_params, :routing, :scope_results, :scroll, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where]
23
25
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
24
26
 
25
27
  term = term.to_s
26
28
 
27
29
  if options[:emoji]
28
- term = EmojiParser.parse_unicode(term) { |e| " #{e.name} " }.strip
30
+ term = EmojiParser.parse_unicode(term) { |e| " #{e.name.tr('_', ' ')} " }.strip
29
31
  end
30
32
 
31
33
  @klass = klass
@@ -39,6 +41,7 @@ module Searchkick
39
41
  @misspellings = false
40
42
  @misspellings_below = nil
41
43
  @highlighted_fields = nil
44
+ @index_mapping = nil
42
45
 
43
46
  prepare
44
47
  end
@@ -56,13 +59,24 @@ module Searchkick
56
59
  end
57
60
 
58
61
  def params
62
+ if options[:models]
63
+ @index_mapping = {}
64
+ Array(options[:models]).each do |model|
65
+ # there can be multiple models per index name due to inheritance - see #1259
66
+ (@index_mapping[model.searchkick_index.name] ||= []) << model
67
+ end
68
+ end
69
+
59
70
  index =
60
71
  if options[:index_name]
61
72
  Array(options[:index_name]).map { |v| v.respond_to?(:searchkick_index) ? v.searchkick_index.name : v }.join(",")
73
+ elsif options[:models]
74
+ @index_mapping.keys.join(",")
62
75
  elsif searchkick_index
63
76
  searchkick_index.name
64
77
  else
65
- "_all"
78
+ # fixes warning about accessing system indices
79
+ "*,-.*"
66
80
  end
67
81
 
68
82
  params = {
@@ -71,6 +85,7 @@ module Searchkick
71
85
  }
72
86
  params[:type] = @type if @type
73
87
  params[:routing] = @routing if @routing
88
+ params[:scroll] = @scroll if @scroll
74
89
  params.merge!(options[:request_params]) if options[:request_params]
75
90
  params
76
91
  end
@@ -94,11 +109,21 @@ module Searchkick
94
109
  query = params
95
110
  type = query[:type]
96
111
  index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index]
112
+ request_params = query.except(:index, :type, :body)
97
113
 
98
114
  # no easy way to tell which host the client will use
99
- host = Searchkick.client.transport.hosts.first
115
+ host =
116
+ if Searchkick.client.transport.respond_to?(:transport)
117
+ Searchkick.client.transport.transport.hosts.first
118
+ else
119
+ Searchkick.client.transport.hosts.first
120
+ end
100
121
  credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil
101
- "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'"
122
+ params = ["pretty"]
123
+ request_params.each do |k, v|
124
+ params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
125
+ end
126
+ "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?#{params.join('&')} -H 'Content-Type: application/json' -d '#{query[:body].to_json}'"
102
127
  end
103
128
 
104
129
  def handle_response(response)
@@ -111,13 +136,18 @@ module Searchkick
111
136
  model_includes: options[:model_includes],
112
137
  json: !@json.nil?,
113
138
  match_suffix: @match_suffix,
139
+ highlight: options[:highlight],
114
140
  highlighted_fields: @highlighted_fields || [],
115
- misspellings: @misspellings
141
+ misspellings: @misspellings,
142
+ term: term,
143
+ scope_results: options[:scope_results],
144
+ total_entries: options[:total_entries],
145
+ index_mapping: @index_mapping,
146
+ suggest: options[:suggest],
147
+ scroll: options[:scroll]
116
148
  }
117
149
 
118
150
  if options[:debug]
119
- require "pp"
120
-
121
151
  puts "Searchkick Version: #{Searchkick::VERSION}"
122
152
  puts "Elasticsearch Version: #{Searchkick.server_version}"
123
153
  puts
@@ -133,7 +163,7 @@ module Searchkick
133
163
  if searchkick_index
134
164
  puts "Model Search Data"
135
165
  begin
136
- pp klass.first(3).map { |r| {index: searchkick_index.record_data(r).merge(data: searchkick_index.send(:search_data, r))}}
166
+ pp klass.limit(3).map { |r| RecordData.new(searchkick_index, r).index_data }
137
167
  rescue => e
138
168
  puts "#{e.class.name}: #{e.message}"
139
169
  end
@@ -157,11 +187,11 @@ module Searchkick
157
187
  end
158
188
 
159
189
  # set execute for multi search
160
- @execute = Searchkick::Results.new(searchkick_klass, response, opts)
190
+ @execute = Results.new(searchkick_klass, response, opts)
161
191
  end
162
192
 
163
193
  def retry_misspellings?(response)
164
- @misspellings_below && response["hits"]["total"] < @misspellings_below
194
+ @misspellings_below && Results.new(searchkick_klass, response).total_count < @misspellings_below
165
195
  end
166
196
 
167
197
  private
@@ -169,7 +199,11 @@ module Searchkick
169
199
  def handle_error(e)
170
200
  status_code = e.message[1..3].to_i
171
201
  if status_code == 404
172
- raise MissingIndexError, "Index missing - run #{reindex_command}"
202
+ if e.message.include?("No search context found for id")
203
+ raise MissingIndexError, "No search context found for id"
204
+ else
205
+ raise MissingIndexError, "Index missing - run #{reindex_command}"
206
+ end
173
207
  elsif status_code == 500 && (
174
208
  e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") ||
175
209
  e.message.include?("No query registered for [multi_match]") ||
@@ -177,15 +211,15 @@ module Searchkick
177
211
  e.message.include?("No query registered for [function_score]")
178
212
  )
179
213
 
180
- raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 2 or greater"
214
+ raise UnsupportedVersionError
181
215
  elsif status_code == 400
182
216
  if (
183
217
  e.message.include?("bool query does not support [filter]") ||
184
218
  e.message.include?("[bool] filter does not support [filter]")
185
219
  )
186
220
 
187
- raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 2 or greater"
188
- elsif e.message.include?("[multi_match] analyzer [searchkick_search] not found")
221
+ raise UnsupportedVersionError
222
+ elsif e.message =~ /analyzer \[searchkick_.+\] not found/
189
223
  raise InvalidQueryError, "Bad mapping - run #{reindex_command}"
190
224
  else
191
225
  raise InvalidQueryError, e.message
@@ -200,7 +234,14 @@ module Searchkick
200
234
  end
201
235
 
202
236
  def execute_search
203
- Searchkick.client.search(params)
237
+ name = searchkick_klass ? "#{searchkick_klass.name} Search" : "Search"
238
+ event = {
239
+ name: name,
240
+ query: params
241
+ }
242
+ ActiveSupport::Notifications.instrument("search.searchkick", event) do
243
+ Searchkick.client.search(params)
244
+ end
204
245
  end
205
246
 
206
247
  def prepare
@@ -210,40 +251,54 @@ module Searchkick
210
251
 
211
252
  # pagination
212
253
  page = [options[:page].to_i, 1].max
213
- per_page = (options[:limit] || options[:per_page] || 1_000).to_i
254
+ # maybe use index.max_result_window in the future
255
+ default_limit = searchkick_options[:deep_paging] ? 1_000_000_000 : 10_000
256
+ per_page = (options[:limit] || options[:per_page] || default_limit).to_i
214
257
  padding = [options[:padding].to_i, 0].max
215
- offset = options[:offset] || (page - 1) * per_page + padding
258
+ offset = (options[:offset] || (page - 1) * per_page + padding).to_i
259
+ scroll = options[:scroll]
260
+
261
+ max_result_window = searchkick_options[:max_result_window]
262
+ if max_result_window
263
+ offset = max_result_window if offset > max_result_window
264
+ per_page = max_result_window - offset if offset + per_page > max_result_window
265
+ end
216
266
 
217
267
  # model and eager loading
218
268
  load = options[:load].nil? ? true : options[:load]
219
269
 
220
- conversions_fields = Array(options[:conversions] || searchkick_options[:conversions]).map(&:to_s)
221
-
222
270
  all = term == "*"
223
271
 
224
272
  @json = options[:body]
225
273
  if @json
226
274
  ignored_options = options.keys & [:aggs, :boost,
227
- :boost_by, :boost_by_distance, :boost_where, :conversions, :conversions_term, :exclude, :explain,
228
- :fields, :highlight, :indices_boost, :limit, :match, :misspellings, :offset, :operator, :order,
229
- :padding, :page, :per_page, :select, :smart_aggs, :suggest, :where]
230
- warn "The body option replaces the entire body, so the following options are ignored: #{ignored_options.join(", ")}" if ignored_options.any?
275
+ :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :exclude, :explain,
276
+ :fields, :highlight, :indices_boost, :match, :misspellings, :operator, :order,
277
+ :profile, :select, :smart_aggs, :suggest, :where]
278
+ raise ArgumentError, "Options incompatible with body option: #{ignored_options.join(", ")}" if ignored_options.any?
231
279
  payload = @json
232
280
  else
281
+ must_not = []
282
+ should = []
283
+
233
284
  if options[:similar]
234
- payload = {
285
+ like = options[:similar] == true ? term : options[:similar]
286
+ query = {
235
287
  more_like_this: {
236
- like_text: term,
288
+ like: like,
237
289
  min_doc_freq: 1,
238
290
  min_term_freq: 1,
239
291
  analyzer: "searchkick_search2"
240
292
  }
241
293
  }
294
+ if fields.all? { |f| f.start_with?("*.") }
295
+ raise ArgumentError, "Must specify fields to search"
296
+ end
242
297
  if fields != ["_all"]
243
- payload[:more_like_this][:fields] = fields
298
+ query[:more_like_this][:fields] = fields
244
299
  end
245
- elsif all
246
- payload = {
300
+ elsif all && !options[:exclude]
301
+ query = {
247
302
  match_all: {}
248
303
  }
249
304
  else
@@ -272,6 +327,15 @@ module Searchkick
272
327
  prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0
273
328
  default_max_expansions = @misspellings_below ? 20 : 3
274
329
  max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || default_max_expansions
330
+ misspellings_fields = misspellings.is_a?(Hash) && misspellings.key?(:fields) && misspellings[:fields].map(&:to_s)
331
+
332
+ if misspellings_fields
333
+ missing_fields = misspellings_fields - fields.map { |f| base_field(f) }
334
+ if missing_fields.any?
335
+ raise ArgumentError, "All fields in per-field misspellings must also be specified in fields option"
336
+ end
337
+ end
338
+
275
339
  @misspellings = true
276
340
  else
277
341
  @misspellings = false
@@ -306,12 +370,16 @@ module Searchkick
306
370
  exclude_analyzer = nil
307
371
  exclude_field = field
308
372
 
373
+ field_misspellings = misspellings && (!misspellings_fields || misspellings_fields.include?(base_field(field)))
374
+
309
375
  if field == "_all" || field.end_with?(".analyzed")
310
- shared_options[:cutoff_frequency] = 0.001 unless operator == "and" || misspellings == false
311
- qs.concat [
312
- shared_options.merge(analyzer: "searchkick_search"),
313
- shared_options.merge(analyzer: "searchkick_search2")
314
- ]
376
+ shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false || (!below73? && !track_total_hits?)
377
+ qs << shared_options.merge(analyzer: "searchkick_search")
378
+
379
+ # searchkick_search and searchkick_search2 are the same for some languages
380
+ unless %w(japanese japanese2 korean polish ukrainian vietnamese).include?(searchkick_options[:language])
381
+ qs << shared_options.merge(analyzer: "searchkick_search2")
382
+ end
315
383
  exclude_analyzer = "searchkick_search2"
316
384
  elsif field.end_with?(".exact")
317
385
  f = field.split(".")[0..-2].join(".")
@@ -324,11 +392,15 @@ module Searchkick
324
392
  exclude_analyzer = analyzer
325
393
  end
326
394
 
327
- if misspellings != false && match_type == :match
328
- qs.concat qs.map { |q| q.except(:cutoff_frequency).merge(fuzziness: edit_distance, prefix_length: prefix_length, max_expansions: max_expansions, boost: factor).merge(transpositions) }
395
+ if field_misspellings != false && match_type == :match
396
+ qs.concat(qs.map { |q| q.except(:cutoff_frequency).merge(fuzziness: edit_distance, prefix_length: prefix_length, max_expansions: max_expansions, boost: factor).merge(transpositions) })
329
397
  end
330
398
 
331
- q2 = qs.map { |q| {match_type => {field => q}} }
399
+ if field.start_with?("*.")
400
+ q2 = qs.map { |q| {multi_match: q.merge(fields: [field], type: match_type == :match_phrase ? "phrase" : "best_fields")} }
401
+ else
402
+ q2 = qs.map { |q| {match_type => {field => q}} }
403
+ end
332
404
 
333
405
  # boost exact matches more
334
406
  if field =~ /\.word_(start|middle|end)\z/ && searchkick_options[:word] != false
@@ -346,100 +418,81 @@ module Searchkick
346
418
  queries_to_add.concat(q2)
347
419
  end
348
420
 
349
- if options[:exclude]
350
- must_not =
351
- Array(options[:exclude]).map do |phrase|
352
- {
353
- match_phrase: {
354
- exclude_field => {
355
- query: phrase,
356
- analyzer: exclude_analyzer
357
- }
358
- }
359
- }
360
- end
421
+ queries << queries_to_add
361
422
 
362
- queries_to_add = [{
363
- bool: {
364
- should: queries_to_add,
365
- must_not: must_not
366
- }
367
- }]
423
+ if options[:exclude]
424
+ must_not.concat(set_exclude(exclude_field, exclude_analyzer))
368
425
  end
369
-
370
- queries.concat(queries_to_add)
371
426
  end
372
427
 
373
- payload = {
374
- dis_max: {
375
- queries: queries
428
+ # all + exclude option
429
+ if all
430
+ query = {
431
+ match_all: {}
376
432
  }
377
- }
378
-
379
- if conversions_fields.present? && options[:conversions] != false
380
- shoulds = []
381
- conversions_fields.each do |conversions_field|
382
- # wrap payload in a bool query
383
- script_score = {field_value_factor: {field: "#{conversions_field}.count"}}
384
433
 
385
- shoulds << {
386
- nested: {
387
- path: conversions_field,
388
- score_mode: "sum",
389
- query: {
390
- function_score: {
391
- boost_mode: "replace",
392
- query: {
393
- match: {
394
- "#{conversions_field}.query" => options[:conversions_term] || term
395
- }
396
- }
397
- }.merge(script_score)
398
- }
399
- }
400
- }
401
- end
434
+ should = []
435
+ else
436
+ # higher score for matching more fields
402
437
  payload = {
403
438
  bool: {
404
- must: payload,
405
- should: shoulds
439
+ should: queries.map { |qs| {dis_max: {queries: qs}} }
406
440
  }
407
441
  }
442
+
443
+ should.concat(set_conversions)
408
444
  end
445
+
446
+ query = payload
447
+ end
448
+
449
+ payload = {}
450
+
451
+ # type when inheritance
452
+ where = ensure_permitted(options[:where] || {}).dup
453
+ if searchkick_options[:inheritance] && (options[:type] || (klass != searchkick_klass && searchkick_index))
454
+ where[:type] = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v, true) }
409
455
  end
410
456
 
457
+ models = Array(options[:models])
458
+ if models.any? { |m| m != m.searchkick_klass }
459
+ # aliases are not supported with _index in ES below 7.5
460
+ # see https://github.com/elastic/elasticsearch/pull/46640
461
+ if below75?
462
+ Searchkick.warn("Passing child models to models option throws off hits and pagination - use type option instead")
463
+ else
464
+ index_type_or =
465
+ models.map do |m|
466
+ v = {_index: m.searchkick_index.name}
467
+ v[:type] = m.searchkick_index.klass_document_type(m, true) if m != m.searchkick_klass
468
+ v
469
+ end
470
+
471
+ where[:or] = Array(where[:or]) + [index_type_or]
472
+ end
473
+ end
474
+
475
+ # start everything as efficient filters
476
+ # move to post_filters as aggs demand
477
+ filters = where_filters(where)
478
+ post_filters = []
479
+
480
+ # aggregations
481
+ set_aggregations(payload, filters, post_filters) if options[:aggs]
482
+
483
+ # post filters
484
+ set_post_filters(payload, post_filters) if post_filters.any?
485
+
411
486
  custom_filters = []
412
487
  multiply_filters = []
413
488
 
414
489
  set_boost_by(multiply_filters, custom_filters)
415
490
  set_boost_where(custom_filters)
416
491
  set_boost_by_distance(custom_filters) if options[:boost_by_distance]
492
+ set_boost_by_recency(custom_filters) if options[:boost_by_recency]
417
493
 
418
- if custom_filters.any?
419
- payload = {
420
- function_score: {
421
- functions: custom_filters,
422
- query: payload,
423
- score_mode: "sum"
424
- }
425
- }
426
- end
427
-
428
- if multiply_filters.any?
429
- payload = {
430
- function_score: {
431
- functions: multiply_filters,
432
- query: payload,
433
- score_mode: "multiply"
434
- }
435
- }
436
- end
494
+ payload[:query] = build_query(query, filters, should, must_not, custom_filters, multiply_filters)
437
495
 
438
- payload = {
439
- query: payload,
440
- size: per_page,
441
- from: offset
442
- }
443
496
  payload[:explain] = options[:explain] if options[:explain]
444
497
  payload[:profile] = options[:profile] if options[:profile]
445
498
 
@@ -449,13 +502,6 @@ module Searchkick
449
502
  # indices_boost
450
503
  set_boost_by_indices(payload)
451
504
 
452
- # filters
453
- filters = where_filters(options[:where])
454
- set_filters(payload, filters) if filters.any?
455
-
456
- # aggregations
457
- set_aggregations(payload) if options[:aggs]
458
-
459
505
  # suggestions
460
506
  set_suggestions(payload, options[:suggest]) if options[:suggest]
461
507
 
@@ -463,11 +509,10 @@ module Searchkick
463
509
  set_highlights(payload, fields) if options[:highlight]
464
510
 
465
511
  # timeout shortly after client times out
466
- payload[:timeout] ||= "#{Searchkick.search_timeout + 1}s"
512
+ payload[:timeout] ||= "#{((Searchkick.search_timeout + 1) * 1000).round}ms"
467
513
 
468
514
  # An empty array will cause only the _id and _type for each hit to be returned
469
- # doc for :select - http://www.elasticsearch.org/guide/reference/api/search/fields/
470
- # doc for :select_v2 - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html
515
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html
471
516
  if options[:select]
472
517
  if options[:select] == []
473
518
  # intuitively [] makes sense to return no fields, but ES by default returns all fields
@@ -480,28 +525,49 @@ module Searchkick
480
525
  end
481
526
  end
482
527
 
528
+ # pagination
529
+ pagination_options = options[:page] || options[:limit] || options[:per_page] || options[:offset] || options[:padding]
530
+ if !options[:body] || pagination_options
531
+ payload[:size] = per_page
532
+ payload[:from] = offset if offset > 0
533
+ end
534
+
483
535
  # type
484
- if options[:type] || (klass != searchkick_klass && searchkick_index)
536
+ if !searchkick_options[:inheritance] && (options[:type] || (klass != searchkick_klass && searchkick_index))
485
537
  @type = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v) }
486
538
  end
487
539
 
488
540
  # routing
489
541
  @routing = options[:routing] if options[:routing]
490
542
 
543
+ if track_total_hits?
544
+ payload[:track_total_hits] = true
545
+ end
546
+
491
547
  # merge more body options
492
548
  payload = payload.deep_merge(options[:body_options]) if options[:body_options]
493
549
 
550
+ # run block
551
+ options[:block].call(payload) if options[:block]
552
+
553
+ # scroll optimization when interating over all docs
554
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html
555
+ if options[:scroll] && payload[:query] == {match_all: {}}
556
+ payload[:sort] ||= ["_doc"]
557
+ end
558
+
494
559
  @body = payload
495
560
  @page = page
496
561
  @per_page = per_page
497
562
  @padding = padding
498
563
  @load = load
564
+ @scroll = scroll
499
565
  end
500
566
 
501
567
  def set_fields
502
568
  boost_fields = {}
503
569
  fields = options[:fields] || searchkick_options[:default_fields] || searchkick_options[:searchable]
504
- all = searchkick_options.key?(:_all) ? searchkick_options[:_all] : below60?
570
+ all = searchkick_options.key?(:_all) ? searchkick_options[:_all] : false
505
571
  default_match = options[:match] || searchkick_options[:match] || :word
506
572
  fields =
507
573
  if fields
@@ -516,12 +582,89 @@ module Searchkick
516
582
  ["_all"]
517
583
  elsif all && default_match == :phrase
518
584
  ["_all.phrase"]
519
- else
585
+ elsif term != "*" && default_match == :exact
520
586
  raise ArgumentError, "Must specify fields to search"
587
+ else
588
+ [default_match == :word ? "*.analyzed" : "*.#{default_match}"]
521
589
  end
522
590
  [boost_fields, fields]
523
591
  end
524
592
 
593
+ def build_query(query, filters, should, must_not, custom_filters, multiply_filters)
594
+ if filters.any? || must_not.any? || should.any?
595
+ bool = {}
596
+ bool[:must] = query if query
597
+ bool[:filter] = filters if filters.any? # where
598
+ bool[:must_not] = must_not if must_not.any? # exclude
599
+ bool[:should] = should if should.any? # conversions
600
+ query = {bool: bool}
601
+ end
602
+
603
+ if custom_filters.any?
604
+ query = {
605
+ function_score: {
606
+ functions: custom_filters,
607
+ query: query,
608
+ score_mode: "sum"
609
+ }
610
+ }
611
+ end
612
+
613
+ if multiply_filters.any?
614
+ query = {
615
+ function_score: {
616
+ functions: multiply_filters,
617
+ query: query,
618
+ score_mode: "multiply"
619
+ }
620
+ }
621
+ end
622
+
623
+ query
624
+ end
625
+
626
+ def set_conversions
627
+ conversions_fields = Array(options[:conversions] || searchkick_options[:conversions]).map(&:to_s)
628
+ if conversions_fields.present? && options[:conversions] != false
629
+ conversions_fields.map do |conversions_field|
630
+ {
631
+ nested: {
632
+ path: conversions_field,
633
+ score_mode: "sum",
634
+ query: {
635
+ function_score: {
636
+ boost_mode: "replace",
637
+ query: {
638
+ match: {
639
+ "#{conversions_field}.query" => options[:conversions_term] || term
640
+ }
641
+ },
642
+ field_value_factor: {
643
+ field: "#{conversions_field}.count"
644
+ }
645
+ }
646
+ }
647
+ }
648
+ }
649
+ end
650
+ else
651
+ []
652
+ end
653
+ end
654
+
655
+ def set_exclude(field, analyzer)
656
+ Array(options[:exclude]).map do |phrase|
657
+ {
658
+ multi_match: {
659
+ fields: [field],
660
+ query: phrase,
661
+ analyzer: analyzer,
662
+ type: "phrase"
663
+ }
664
+ }
665
+ end
666
+ end
667
+
525
668
  def set_boost_by_distance(custom_filters)
526
669
  boost_by_distance = options[:boost_by_distance] || {}
527
670
 
@@ -535,9 +678,11 @@ module Searchkick
535
678
  unless attributes[:origin]
536
679
  raise ArgumentError, "boost_by_distance requires :origin"
537
680
  end
538
- function_params = attributes.select { |k, _| [:origin, :scale, :offset, :decay].include?(k) }
681
+
682
+ function_params = attributes.except(:factor, :function)
539
683
  function_params[:origin] = location_value(function_params[:origin])
540
684
  custom_filters << {
685
+ weight: attributes[:factor] || 1,
541
686
  attributes[:function] => {
542
687
  field => function_params
543
688
  }
@@ -545,16 +690,29 @@ module Searchkick
545
690
  end
546
691
  end
547
692
 
693
+ def set_boost_by_recency(custom_filters)
694
+ options[:boost_by_recency].each do |field, attributes|
695
+ attributes = {function: :gauss, origin: Time.now}.merge(attributes)
696
+
697
+ custom_filters << {
698
+ weight: attributes[:factor] || 1,
699
+ attributes[:function] => {
700
+ field => attributes.except(:factor, :function)
701
+ }
702
+ }
703
+ end
704
+ end
705
+
548
706
  def set_boost_by(multiply_filters, custom_filters)
549
707
  boost_by = options[:boost_by] || {}
550
708
  if boost_by.is_a?(Array)
551
- boost_by = Hash[boost_by.map { |f| [f, {factor: 1}] }]
709
+ boost_by = boost_by.to_h { |f| [f, {factor: 1}] }
552
710
  elsif boost_by.is_a?(Hash)
553
- multiply_by, boost_by = boost_by.partition { |_, v| v[:boost_mode] == "multiply" }.map { |i| Hash[i] }
711
+ multiply_by, boost_by = boost_by.partition { |_, v| v.delete(:boost_mode) == "multiply" }.map(&:to_h)
554
712
  end
555
713
  boost_by[options[:boost]] = {factor: 1} if options[:boost]
556
714
 
557
- custom_filters.concat boost_filters(boost_by, log: true)
715
+ custom_filters.concat boost_filters(boost_by, modifier: "ln2p")
558
716
  multiply_filters.concat boost_filters(multiply_by || {})
559
717
  end
560
718
 
@@ -577,11 +735,9 @@ module Searchkick
577
735
  def set_boost_by_indices(payload)
578
736
  return unless options[:indices_boost]
579
737
 
580
- indices_boost = options[:indices_boost].each_with_object({}) do |(key, boost), memo|
738
+ indices_boost = options[:indices_boost].map do |key, boost|
581
739
  index = key.respond_to?(:searchkick_index) ? key.searchkick_index.name : key
582
- # try to use index explicitly instead of alias: https://github.com/elasticsearch/elasticsearch/issues/4756
583
- index_by_alias = Searchkick.client.indices.get_alias(index: index).keys.first
584
- memo[index_by_alias || index] = boost
740
+ {index => boost}
585
741
  end
586
742
 
587
743
  payload[:indices_boost] = indices_boost
@@ -617,7 +773,8 @@ module Searchkick
617
773
 
618
774
  def set_highlights(payload, fields)
619
775
  payload[:highlight] = {
620
- fields: Hash[fields.map { |f| [f, {}] }]
776
+ fields: fields.to_h { |f| [f, {}] },
777
+ fragment_size: 0
621
778
  }
622
779
 
623
780
  if options[:highlight].is_a?(Hash)
@@ -646,15 +803,14 @@ module Searchkick
646
803
  @highlighted_fields = payload[:highlight][:fields].keys
647
804
  end
648
805
 
649
- def set_aggregations(payload)
806
+ def set_aggregations(payload, filters, post_filters)
650
807
  aggs = options[:aggs]
651
808
  payload[:aggs] = {}
652
809
 
653
- aggs = Hash[aggs.map { |f| [f, {}] }] if aggs.is_a?(Array) # convert to more advanced syntax
654
-
810
+ aggs = aggs.to_h { |f| [f, {}] } if aggs.is_a?(Array) # convert to more advanced syntax
655
811
  aggs.each do |field, agg_options|
656
812
  size = agg_options[:limit] ? agg_options[:limit] : 1_000
657
- shared_agg_options = agg_options.slice(:order, :min_doc_count)
813
+ shared_agg_options = agg_options.except(:limit, :field, :ranges, :date_ranges, :where)
658
814
 
659
815
  if agg_options[:ranges]
660
816
  payload[:aggs][field] = {
@@ -670,20 +826,16 @@ module Searchkick
670
826
  ranges: agg_options[:date_ranges]
671
827
  }.merge(shared_agg_options)
672
828
  }
673
- elsif histogram = agg_options[:date_histogram]
674
- interval = histogram[:interval]
829
+ elsif (histogram = agg_options[:date_histogram])
675
830
  payload[:aggs][field] = {
676
- date_histogram: {
677
- field: histogram[:field],
678
- interval: interval
679
- }
680
- }
681
- elsif metric = @@metric_aggs.find { |k| agg_options.has_key?(k) }
831
+ date_histogram: histogram
832
+ }.merge(shared_agg_options)
833
+ elsif (metric = @@metric_aggs.find { |k| agg_options.has_key?(k) })
682
834
  payload[:aggs][field] = {
683
835
  metric => {
684
836
  field: agg_options[metric][:field] || field
685
837
  }
686
- }
838
+ }.merge(shared_agg_options)
687
839
  else
688
840
  payload[:aggs][field] = {
689
841
  terms: {
@@ -694,8 +846,20 @@ module Searchkick
694
846
  end
695
847
 
696
848
  where = {}
697
- where = (options[:where] || {}).reject { |k| k == field } unless options[:smart_aggs] == false
698
- agg_filters = where_filters(where.merge(agg_options[:where] || {}))
849
+ where = ensure_permitted(options[:where] || {}).reject { |k| k == field } unless options[:smart_aggs] == false
850
+ agg_where = ensure_permitted(agg_options[:where] || {})
851
+ agg_filters = where_filters(where.merge(agg_where))
852
+
853
+ # only do one level comparison for simplicity
854
+ filters.select! do |filter|
855
+ if agg_filters.include?(filter)
856
+ true
857
+ else
858
+ post_filters << filter
859
+ false
860
+ end
861
+ end
862
+
699
863
  if agg_filters.any?
700
864
  payload[:aggs][field] = {
701
865
  filter: {
@@ -711,29 +875,22 @@ module Searchkick
711
875
  end
712
876
  end
713
877
 
714
- def set_filters(payload, filters)
715
- if options[:aggs]
716
- payload[:post_filter] = {
717
- bool: {
718
- filter: filters
719
- }
720
- }
721
- else
722
- # more efficient query if no aggs
723
- payload[:query] = {
724
- bool: {
725
- must: payload[:query],
726
- filter: filters
727
- }
878
+ def set_post_filters(payload, post_filters)
879
+ payload[:post_filter] = {
880
+ bool: {
881
+ filter: post_filters
728
882
  }
729
- end
883
+ }
730
884
  end
731
885
 
732
- # TODO id transformation for arrays
733
886
  def set_order(payload)
734
- order = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc}
735
- id_field = below50? ? :_id : :_uid
736
- payload[:sort] = order.is_a?(Array) ? order : Hash[order.map { |k, v| [k.to_s == "id" ? id_field : k, v] }]
887
+ payload[:sort] = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc}
888
+ end
889
+
890
+ # provides *very* basic protection from unfiltered parameters
891
+ # this is not meant to be comprehensive and may be expanded in the future
892
+ def ensure_permitted(obj)
893
+ obj.to_h
737
894
  end
738
895
 
739
896
  def where_filters(where)
@@ -751,10 +908,12 @@ module Searchkick
751
908
  filters << {bool: {must_not: where_filters(value)}}
752
909
  elsif field == :_and
753
910
  filters << {bool: {must: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
911
+ # elsif field == :_script
912
+ # filters << {script: {script: {source: value, lang: "painless"}}}
754
913
  else
755
914
  # expand ranges
756
915
  if value.is_a?(Range)
757
- value = {gte: value.first, (value.exclude_end? ? :lt : :lte) => value.last}
916
+ value = expand_range(value)
758
917
  end
759
918
 
760
919
  value = {in: value} if value.is_a?(Array)
@@ -762,7 +921,7 @@ module Searchkick
762
921
  if value.is_a?(Hash)
763
922
  value.each do |op, op_value|
764
923
  case op
765
- when :within, :bottom_right
924
+ when :within, :bottom_right, :bottom_left
766
925
  # do nothing
767
926
  when :near
768
927
  filters << {
@@ -797,9 +956,44 @@ module Searchkick
797
956
  }
798
957
  }
799
958
  }
959
+ when :top_right
960
+ filters << {
961
+ geo_bounding_box: {
962
+ field => {
963
+ top_right: location_value(op_value),
964
+ bottom_left: location_value(value[:bottom_left])
965
+ }
966
+ }
967
+ }
968
+ when :like, :ilike
969
+ # based on Postgres
970
+ # https://www.postgresql.org/docs/current/functions-matching.html
971
+ # % matches zero or more characters
972
+ # _ matches one character
973
+ # \ is escape character
974
+ # escape Lucene reserved characters
975
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/regexp-syntax.html#regexp-optional-operators
976
+ reserved = %w(\\ . ? + * | { } [ ] ( ) ")
977
+ regex = op_value.dup
978
+ reserved.each do |v|
979
+ regex.gsub!(v, "\\\\" + v)
980
+ end
981
+ regex = regex.gsub(/(?<!\\)%/, ".*").gsub(/(?<!\\)_/, ".").gsub("\\%", "%").gsub("\\_", "_")
982
+
983
+ if op == :ilike
984
+ if below710?
985
+ raise ArgumentError, "ilike requires Elasticsearch 7.10+"
986
+ else
987
+ filters << {regexp: {field => {value: regex, flags: "NONE", case_insensitive: true}}}
988
+ end
989
+ else
990
+ filters << {regexp: {field => {value: regex, flags: "NONE"}}}
991
+ end
992
+ when :prefix
993
+ filters << {prefix: {field => {value: op_value}}}
800
994
  when :regexp # support for regexp queries without using a regexp ruby object
801
995
  filters << {regexp: {field => {value: op_value}}}
802
- when :not # not equal
996
+ when :not, :_not # not equal
803
997
  filters << {bool: {must_not: term_filters(field, op_value)}}
804
998
  when :all
805
999
  op_value.each do |val|
@@ -807,6 +1001,8 @@ module Searchkick
807
1001
  end
808
1002
  when :in
809
1003
  filters << term_filters(field, op_value)
1004
+ when :exists
1005
+ filters << {exists: {field: field}}
810
1006
  else
811
1007
  range_query =
812
1008
  case op
@@ -819,7 +1015,7 @@ module Searchkick
819
1015
  when :lte
820
1016
  {to: op_value, include_upper: true}
821
1017
  else
822
- raise "Unknown where operator: #{op.inspect}"
1018
+ raise ArgumentError, "Unknown where operator: #{op.inspect}"
823
1019
  end
824
1020
  # issue 132
825
1021
  if (existing = filters.find { |f| f[:range] && f[:range][field] })
@@ -847,57 +1043,75 @@ module Searchkick
847
1043
  elsif value.nil?
848
1044
  {bool: {must_not: {exists: {field: field}}}}
849
1045
  elsif value.is_a?(Regexp)
850
- {regexp: {field => {value: value.source}}}
1046
+ source = value.source
1047
+
1048
+ # TODO handle other regexp options
1049
+
1050
+ # TODO handle other anchor characters, like ^, $, \Z
1051
+ if source.start_with?("\\A")
1052
+ source = source[2..-1]
1053
+ else
1054
+ source = ".*#{source}"
1055
+ end
1056
+
1057
+ if source.end_with?("\\z")
1058
+ source = source[0..-3]
1059
+ else
1060
+ source = "#{source}.*"
1061
+ end
1062
+
1063
+ if below710?
1064
+ if value.casefold?
1065
+ raise ArgumentError, "Case-insensitive flag does not work with Elasticsearch < 7.10"
1066
+ end
1067
+ {regexp: {field => {value: source, flags: "NONE"}}}
1068
+ else
1069
+ {regexp: {field => {value: source, flags: "NONE", case_insensitive: value.casefold?}}}
1070
+ end
851
1071
  else
852
- {term: {field => value}}
1072
+ # TODO add this for other values
1073
+ if value.as_json.is_a?(Enumerable)
1074
+ # query will fail, but this is better
1075
+ # same message as Active Record
1076
+ raise TypeError, "can't cast #{value.class.name}"
1077
+ end
1078
+
1079
+ {term: {field => {value: value}}}
853
1080
  end
854
1081
  end
855
1082
 
856
1083
  def custom_filter(field, value, factor)
857
- if below50?
858
- {
859
- filter: {
860
- bool: {
861
- must: where_filters(field => value)
862
- }
863
- },
864
- boost_factor: factor
1084
+ {
1085
+ filter: where_filters(field => value),
1086
+ weight: factor
1087
+ }
1088
+ end
1089
+
1090
+ def boost_filter(field, factor: 1, modifier: nil, missing: nil)
1091
+ script_score = {
1092
+ field_value_factor: {
1093
+ field: field,
1094
+ factor: factor.to_f,
1095
+ modifier: modifier
865
1096
  }
1097
+ }
1098
+
1099
+ if missing
1100
+ script_score[:field_value_factor][:missing] = missing.to_f
866
1101
  else
867
- {
868
- filter: where_filters(field => value),
869
- weight: factor
1102
+ script_score[:filter] = {
1103
+ exists: {
1104
+ field: field
1105
+ }
870
1106
  }
871
1107
  end
1108
+
1109
+ script_score
872
1110
  end
873
1111
 
874
- def boost_filters(boost_by, options = {})
1112
+ def boost_filters(boost_by, modifier: nil)
875
1113
  boost_by.map do |field, value|
876
- log = value.key?(:log) ? value[:log] : options[:log]
877
- value[:factor] ||= 1
878
- script_score = {
879
- field_value_factor: {
880
- field: field,
881
- factor: value[:factor].to_f,
882
- modifier: log ? "ln2p" : nil
883
- }
884
- }
885
-
886
- if value[:missing]
887
- if below50?
888
- raise ArgumentError, "The missing option for boost_by is not supported in Elasticsearch < 5"
889
- else
890
- script_score[:field_value_factor][:missing] = value[:missing].to_f
891
- end
892
- else
893
- script_score[:filter] = {
894
- exists: {
895
- field: field
896
- }
897
- }
898
- end
899
-
900
- script_score
1114
+ boost_filter(field, modifier: modifier, **value)
901
1115
  end
902
1116
  end
903
1117
 
@@ -922,12 +1136,39 @@ module Searchkick
922
1136
  end
923
1137
  end
924
1138
 
925
- def below50?
926
- Searchkick.server_below?("5.0.0-alpha1")
1139
+ def expand_range(range)
1140
+ expanded = {}
1141
+ expanded[:gte] = range.begin if range.begin
1142
+
1143
+ if range.end && !(range.end.respond_to?(:infinite?) && range.end.infinite?)
1144
+ expanded[range.exclude_end? ? :lt : :lte] = range.end
1145
+ end
1146
+
1147
+ expanded
1148
+ end
1149
+
1150
+ def base_field(k)
1151
+ k.sub(/\.(analyzed|word_start|word_middle|word_end|text_start|text_middle|text_end|exact)\z/, "")
1152
+ end
1153
+
1154
+ def track_total_hits?
1155
+ searchkick_options[:deep_paging] || body_options[:track_total_hits]
1156
+ end
1157
+
1158
+ def body_options
1159
+ options[:body_options] || {}
1160
+ end
1161
+
1162
+ def below73?
1163
+ Searchkick.server_below?("7.3.0")
1164
+ end
1165
+
1166
+ def below75?
1167
+ Searchkick.server_below?("7.5.0")
927
1168
  end
928
1169
 
929
- def below60?
930
- Searchkick.server_below?("6.0.0-alpha1")
1170
+ def below710?
1171
+ Searchkick.server_below?("7.10.0")
931
1172
  end
932
1173
  end
933
1174
  end