searchkick 2.3.2 → 5.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +377 -84
  3. data/LICENSE.txt +1 -1
  4. data/README.md +859 -602
  5. data/lib/searchkick/bulk_reindex_job.rb +13 -9
  6. data/lib/searchkick/controller_runtime.rb +40 -0
  7. data/lib/searchkick/hash_wrapper.rb +12 -0
  8. data/lib/searchkick/index.rb +281 -356
  9. data/lib/searchkick/index_cache.rb +30 -0
  10. data/lib/searchkick/index_options.rb +487 -281
  11. data/lib/searchkick/indexer.rb +15 -8
  12. data/lib/searchkick/log_subscriber.rb +57 -0
  13. data/lib/searchkick/middleware.rb +9 -2
  14. data/lib/searchkick/model.rb +72 -118
  15. data/lib/searchkick/multi_search.rb +9 -10
  16. data/lib/searchkick/process_batch_job.rb +12 -15
  17. data/lib/searchkick/process_queue_job.rb +22 -13
  18. data/lib/searchkick/query.rb +458 -217
  19. data/lib/searchkick/railtie.rb +7 -0
  20. data/lib/searchkick/record_data.rb +128 -0
  21. data/lib/searchkick/record_indexer.rb +164 -0
  22. data/lib/searchkick/reindex_queue.rb +51 -9
  23. data/lib/searchkick/reindex_v2_job.rb +10 -32
  24. data/lib/searchkick/relation.rb +247 -0
  25. data/lib/searchkick/relation_indexer.rb +155 -0
  26. data/lib/searchkick/results.rb +201 -82
  27. data/lib/searchkick/version.rb +1 -1
  28. data/lib/searchkick/where.rb +11 -0
  29. data/lib/searchkick.rb +269 -97
  30. data/lib/tasks/searchkick.rake +37 -0
  31. metadata +24 -178
  32. data/.gitignore +0 -22
  33. data/.travis.yml +0 -39
  34. data/Gemfile +0 -16
  35. data/Rakefile +0 -20
  36. data/benchmark/Gemfile +0 -23
  37. data/benchmark/benchmark.rb +0 -97
  38. data/lib/searchkick/logging.rb +0 -242
  39. data/lib/searchkick/tasks.rb +0 -33
  40. data/searchkick.gemspec +0 -28
  41. data/test/aggs_test.rb +0 -197
  42. data/test/autocomplete_test.rb +0 -75
  43. data/test/boost_test.rb +0 -202
  44. data/test/callbacks_test.rb +0 -59
  45. data/test/ci/before_install.sh +0 -17
  46. data/test/errors_test.rb +0 -19
  47. data/test/gemfiles/activerecord31.gemfile +0 -7
  48. data/test/gemfiles/activerecord32.gemfile +0 -7
  49. data/test/gemfiles/activerecord40.gemfile +0 -8
  50. data/test/gemfiles/activerecord41.gemfile +0 -8
  51. data/test/gemfiles/activerecord42.gemfile +0 -7
  52. data/test/gemfiles/activerecord50.gemfile +0 -7
  53. data/test/gemfiles/apartment.gemfile +0 -8
  54. data/test/gemfiles/cequel.gemfile +0 -8
  55. data/test/gemfiles/mongoid2.gemfile +0 -7
  56. data/test/gemfiles/mongoid3.gemfile +0 -6
  57. data/test/gemfiles/mongoid4.gemfile +0 -7
  58. data/test/gemfiles/mongoid5.gemfile +0 -7
  59. data/test/gemfiles/mongoid6.gemfile +0 -12
  60. data/test/gemfiles/nobrainer.gemfile +0 -8
  61. data/test/gemfiles/parallel_tests.gemfile +0 -8
  62. data/test/geo_shape_test.rb +0 -175
  63. data/test/highlight_test.rb +0 -78
  64. data/test/index_test.rb +0 -166
  65. data/test/inheritance_test.rb +0 -83
  66. data/test/marshal_test.rb +0 -8
  67. data/test/match_test.rb +0 -276
  68. data/test/misspellings_test.rb +0 -56
  69. data/test/model_test.rb +0 -42
  70. data/test/multi_search_test.rb +0 -36
  71. data/test/multi_tenancy_test.rb +0 -22
  72. data/test/order_test.rb +0 -46
  73. data/test/pagination_test.rb +0 -70
  74. data/test/partial_reindex_test.rb +0 -58
  75. data/test/query_test.rb +0 -35
  76. data/test/records_test.rb +0 -10
  77. data/test/reindex_test.rb +0 -64
  78. data/test/reindex_v2_job_test.rb +0 -32
  79. data/test/routing_test.rb +0 -23
  80. data/test/should_index_test.rb +0 -32
  81. data/test/similar_test.rb +0 -28
  82. data/test/sql_test.rb +0 -214
  83. data/test/suggest_test.rb +0 -95
  84. data/test/support/kaminari.yml +0 -21
  85. data/test/synonyms_test.rb +0 -67
  86. data/test/test_helper.rb +0 -567
  87. data/test/where_test.rb +0 -223
@@ -1,5 +1,6 @@
1
1
  module Searchkick
2
2
  class Query
3
+ include Enumerable
3
4
  extend Forwardable
4
5
 
5
6
  @@metric_aggs = [:avg, :cardinality, :max, :min, :sum]
@@ -12,20 +13,21 @@ module Searchkick
12
13
  :took, :error, :model_name, :entry_name, :total_count, :total_entries,
13
14
  :current_page, :per_page, :limit_value, :padding, :total_pages, :num_pages,
14
15
  :offset_value, :offset, :previous_page, :prev_page, :next_page, :first_page?, :last_page?,
15
- :out_of_range?, :hits, :response, :to_a, :first
16
+ :out_of_range?, :hits, :response, :to_a, :first, :scroll, :highlights, :with_highlights,
17
+ :with_score, :misspellings?, :scroll_id, :clear_scroll, :missing_records, :with_hit
16
18
 
17
19
  def initialize(klass, term = "*", **options)
18
- unknown_keywords = options.keys - [:aggs, :body, :body_options, :boost,
19
- :boost_by, :boost_by_distance, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :execute, :explain,
20
+ unknown_keywords = options.keys - [:aggs, :block, :body, :body_options, :boost,
21
+ :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :explain,
20
22
  :fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load,
21
- :match, :misspellings, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
22
- :request_params, :routing, :select, :similar, :smart_aggs, :suggest, :track, :type, :where]
23
+ :match, :misspellings, :models, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
24
+ :request_params, :routing, :scope_results, :scroll, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where]
23
25
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
24
26
 
25
27
  term = term.to_s
26
28
 
27
29
  if options[:emoji]
28
- term = EmojiParser.parse_unicode(term) { |e| " #{e.name} " }.strip
30
+ term = EmojiParser.parse_unicode(term) { |e| " #{e.name.tr('_', ' ')} " }.strip
29
31
  end
30
32
 
31
33
  @klass = klass
@@ -39,6 +41,7 @@ module Searchkick
39
41
  @misspellings = false
40
42
  @misspellings_below = nil
41
43
  @highlighted_fields = nil
44
+ @index_mapping = nil
42
45
 
43
46
  prepare
44
47
  end
@@ -56,13 +59,24 @@ module Searchkick
56
59
  end
57
60
 
58
61
  def params
62
+ if options[:models]
63
+ @index_mapping = {}
64
+ Array(options[:models]).each do |model|
65
+ # there can be multiple models per index name due to inheritance - see #1259
66
+ (@index_mapping[model.searchkick_index.name] ||= []) << model
67
+ end
68
+ end
69
+
59
70
  index =
60
71
  if options[:index_name]
61
72
  Array(options[:index_name]).map { |v| v.respond_to?(:searchkick_index) ? v.searchkick_index.name : v }.join(",")
73
+ elsif options[:models]
74
+ @index_mapping.keys.join(",")
62
75
  elsif searchkick_index
63
76
  searchkick_index.name
64
77
  else
65
- "_all"
78
+ # fixes warning about accessing system indices
79
+ "*,-.*"
66
80
  end
67
81
 
68
82
  params = {
@@ -71,6 +85,7 @@ module Searchkick
71
85
  }
72
86
  params[:type] = @type if @type
73
87
  params[:routing] = @routing if @routing
88
+ params[:scroll] = @scroll if @scroll
74
89
  params.merge!(options[:request_params]) if options[:request_params]
75
90
  params
76
91
  end
@@ -94,11 +109,21 @@ module Searchkick
94
109
  query = params
95
110
  type = query[:type]
96
111
  index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index]
112
+ request_params = query.except(:index, :type, :body)
97
113
 
98
114
  # no easy way to tell which host the client will use
99
- host = Searchkick.client.transport.hosts.first
115
+ host =
116
+ if Searchkick.client.transport.respond_to?(:transport)
117
+ Searchkick.client.transport.transport.hosts.first
118
+ else
119
+ Searchkick.client.transport.hosts.first
120
+ end
100
121
  credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil
101
- "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'"
122
+ params = ["pretty"]
123
+ request_params.each do |k, v|
124
+ params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
125
+ end
126
+ "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?#{params.join('&')} -H 'Content-Type: application/json' -d '#{query[:body].to_json}'"
102
127
  end
103
128
 
104
129
  def handle_response(response)
@@ -111,13 +136,18 @@ module Searchkick
111
136
  model_includes: options[:model_includes],
112
137
  json: !@json.nil?,
113
138
  match_suffix: @match_suffix,
139
+ highlight: options[:highlight],
114
140
  highlighted_fields: @highlighted_fields || [],
115
- misspellings: @misspellings
141
+ misspellings: @misspellings,
142
+ term: term,
143
+ scope_results: options[:scope_results],
144
+ total_entries: options[:total_entries],
145
+ index_mapping: @index_mapping,
146
+ suggest: options[:suggest],
147
+ scroll: options[:scroll]
116
148
  }
117
149
 
118
150
  if options[:debug]
119
- require "pp"
120
-
121
151
  puts "Searchkick Version: #{Searchkick::VERSION}"
122
152
  puts "Elasticsearch Version: #{Searchkick.server_version}"
123
153
  puts
@@ -133,7 +163,7 @@ module Searchkick
133
163
  if searchkick_index
134
164
  puts "Model Search Data"
135
165
  begin
136
- pp klass.first(3).map { |r| {index: searchkick_index.record_data(r).merge(data: searchkick_index.send(:search_data, r))}}
166
+ pp klass.limit(3).map { |r| RecordData.new(searchkick_index, r).index_data }
137
167
  rescue => e
138
168
  puts "#{e.class.name}: #{e.message}"
139
169
  end
@@ -157,11 +187,11 @@ module Searchkick
157
187
  end
158
188
 
159
189
  # set execute for multi search
160
- @execute = Searchkick::Results.new(searchkick_klass, response, opts)
190
+ @execute = Results.new(searchkick_klass, response, opts)
161
191
  end
162
192
 
163
193
  def retry_misspellings?(response)
164
- @misspellings_below && response["hits"]["total"] < @misspellings_below
194
+ @misspellings_below && Results.new(searchkick_klass, response).total_count < @misspellings_below
165
195
  end
166
196
 
167
197
  private
@@ -169,7 +199,11 @@ module Searchkick
169
199
  def handle_error(e)
170
200
  status_code = e.message[1..3].to_i
171
201
  if status_code == 404
172
- raise MissingIndexError, "Index missing - run #{reindex_command}"
202
+ if e.message.include?("No search context found for id")
203
+ raise MissingIndexError, "No search context found for id"
204
+ else
205
+ raise MissingIndexError, "Index missing - run #{reindex_command}"
206
+ end
173
207
  elsif status_code == 500 && (
174
208
  e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") ||
175
209
  e.message.include?("No query registered for [multi_match]") ||
@@ -177,15 +211,15 @@ module Searchkick
177
211
  e.message.include?("No query registered for [function_score]")
178
212
  )
179
213
 
180
- raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 2 or greater"
214
+ raise UnsupportedVersionError
181
215
  elsif status_code == 400
182
216
  if (
183
217
  e.message.include?("bool query does not support [filter]") ||
184
218
  e.message.include?("[bool] filter does not support [filter]")
185
219
  )
186
220
 
187
- raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 2 or greater"
188
- elsif e.message.include?("[multi_match] analyzer [searchkick_search] not found")
221
+ raise UnsupportedVersionError
222
+ elsif e.message =~ /analyzer \[searchkick_.+\] not found/
189
223
  raise InvalidQueryError, "Bad mapping - run #{reindex_command}"
190
224
  else
191
225
  raise InvalidQueryError, e.message
@@ -200,7 +234,14 @@ module Searchkick
200
234
  end
201
235
 
202
236
  def execute_search
203
- Searchkick.client.search(params)
237
+ name = searchkick_klass ? "#{searchkick_klass.name} Search" : "Search"
238
+ event = {
239
+ name: name,
240
+ query: params
241
+ }
242
+ ActiveSupport::Notifications.instrument("search.searchkick", event) do
243
+ Searchkick.client.search(params)
244
+ end
204
245
  end
205
246
 
206
247
  def prepare
@@ -210,40 +251,54 @@ module Searchkick
210
251
 
211
252
  # pagination
212
253
  page = [options[:page].to_i, 1].max
213
- per_page = (options[:limit] || options[:per_page] || 1_000).to_i
254
+ # maybe use index.max_result_window in the future
255
+ default_limit = searchkick_options[:deep_paging] ? 1_000_000_000 : 10_000
256
+ per_page = (options[:limit] || options[:per_page] || default_limit).to_i
214
257
  padding = [options[:padding].to_i, 0].max
215
- offset = options[:offset] || (page - 1) * per_page + padding
258
+ offset = (options[:offset] || (page - 1) * per_page + padding).to_i
259
+ scroll = options[:scroll]
260
+
261
+ max_result_window = searchkick_options[:max_result_window]
262
+ if max_result_window
263
+ offset = max_result_window if offset > max_result_window
264
+ per_page = max_result_window - offset if offset + per_page > max_result_window
265
+ end
216
266
 
217
267
  # model and eager loading
218
268
  load = options[:load].nil? ? true : options[:load]
219
269
 
220
- conversions_fields = Array(options[:conversions] || searchkick_options[:conversions]).map(&:to_s)
221
-
222
270
  all = term == "*"
223
271
 
224
272
  @json = options[:body]
225
273
  if @json
226
274
  ignored_options = options.keys & [:aggs, :boost,
227
- :boost_by, :boost_by_distance, :boost_where, :conversions, :conversions_term, :exclude, :explain,
228
- :fields, :highlight, :indices_boost, :limit, :match, :misspellings, :offset, :operator, :order,
229
- :padding, :page, :per_page, :select, :smart_aggs, :suggest, :where]
230
- warn "The body option replaces the entire body, so the following options are ignored: #{ignored_options.join(", ")}" if ignored_options.any?
275
+ :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :exclude, :explain,
276
+ :fields, :highlight, :indices_boost, :match, :misspellings, :operator, :order,
277
+ :profile, :select, :smart_aggs, :suggest, :where]
278
+ raise ArgumentError, "Options incompatible with body option: #{ignored_options.join(", ")}" if ignored_options.any?
231
279
  payload = @json
232
280
  else
281
+ must_not = []
282
+ should = []
283
+
233
284
  if options[:similar]
234
- payload = {
285
+ like = options[:similar] == true ? term : options[:similar]
286
+ query = {
235
287
  more_like_this: {
236
- like_text: term,
288
+ like: like,
237
289
  min_doc_freq: 1,
238
290
  min_term_freq: 1,
239
291
  analyzer: "searchkick_search2"
240
292
  }
241
293
  }
294
+ if fields.all? { |f| f.start_with?("*.") }
295
+ raise ArgumentError, "Must specify fields to search"
296
+ end
242
297
  if fields != ["_all"]
243
- payload[:more_like_this][:fields] = fields
298
+ query[:more_like_this][:fields] = fields
244
299
  end
245
- elsif all
246
- payload = {
300
+ elsif all && !options[:exclude]
301
+ query = {
247
302
  match_all: {}
248
303
  }
249
304
  else
@@ -272,6 +327,15 @@ module Searchkick
272
327
  prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0
273
328
  default_max_expansions = @misspellings_below ? 20 : 3
274
329
  max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || default_max_expansions
330
+ misspellings_fields = misspellings.is_a?(Hash) && misspellings.key?(:fields) && misspellings[:fields].map(&:to_s)
331
+
332
+ if misspellings_fields
333
+ missing_fields = misspellings_fields - fields.map { |f| base_field(f) }
334
+ if missing_fields.any?
335
+ raise ArgumentError, "All fields in per-field misspellings must also be specified in fields option"
336
+ end
337
+ end
338
+
275
339
  @misspellings = true
276
340
  else
277
341
  @misspellings = false
@@ -306,12 +370,16 @@ module Searchkick
306
370
  exclude_analyzer = nil
307
371
  exclude_field = field
308
372
 
373
+ field_misspellings = misspellings && (!misspellings_fields || misspellings_fields.include?(base_field(field)))
374
+
309
375
  if field == "_all" || field.end_with?(".analyzed")
310
- shared_options[:cutoff_frequency] = 0.001 unless operator == "and" || misspellings == false
311
- qs.concat [
312
- shared_options.merge(analyzer: "searchkick_search"),
313
- shared_options.merge(analyzer: "searchkick_search2")
314
- ]
376
+ shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false || (!below73? && !track_total_hits?)
377
+ qs << shared_options.merge(analyzer: "searchkick_search")
378
+
379
+ # searchkick_search and searchkick_search2 are the same for some languages
380
+ unless %w(japanese japanese2 korean polish ukrainian vietnamese).include?(searchkick_options[:language])
381
+ qs << shared_options.merge(analyzer: "searchkick_search2")
382
+ end
315
383
  exclude_analyzer = "searchkick_search2"
316
384
  elsif field.end_with?(".exact")
317
385
  f = field.split(".")[0..-2].join(".")
@@ -324,11 +392,15 @@ module Searchkick
324
392
  exclude_analyzer = analyzer
325
393
  end
326
394
 
327
- if misspellings != false && match_type == :match
328
- qs.concat qs.map { |q| q.except(:cutoff_frequency).merge(fuzziness: edit_distance, prefix_length: prefix_length, max_expansions: max_expansions, boost: factor).merge(transpositions) }
395
+ if field_misspellings != false && match_type == :match
396
+ qs.concat(qs.map { |q| q.except(:cutoff_frequency).merge(fuzziness: edit_distance, prefix_length: prefix_length, max_expansions: max_expansions, boost: factor).merge(transpositions) })
329
397
  end
330
398
 
331
- q2 = qs.map { |q| {match_type => {field => q}} }
399
+ if field.start_with?("*.")
400
+ q2 = qs.map { |q| {multi_match: q.merge(fields: [field], type: match_type == :match_phrase ? "phrase" : "best_fields")} }
401
+ else
402
+ q2 = qs.map { |q| {match_type => {field => q}} }
403
+ end
332
404
 
333
405
  # boost exact matches more
334
406
  if field =~ /\.word_(start|middle|end)\z/ && searchkick_options[:word] != false
@@ -346,100 +418,81 @@ module Searchkick
346
418
  queries_to_add.concat(q2)
347
419
  end
348
420
 
349
- if options[:exclude]
350
- must_not =
351
- Array(options[:exclude]).map do |phrase|
352
- {
353
- match_phrase: {
354
- exclude_field => {
355
- query: phrase,
356
- analyzer: exclude_analyzer
357
- }
358
- }
359
- }
360
- end
421
+ queries << queries_to_add
361
422
 
362
- queries_to_add = [{
363
- bool: {
364
- should: queries_to_add,
365
- must_not: must_not
366
- }
367
- }]
423
+ if options[:exclude]
424
+ must_not.concat(set_exclude(exclude_field, exclude_analyzer))
368
425
  end
369
-
370
- queries.concat(queries_to_add)
371
426
  end
372
427
 
373
- payload = {
374
- dis_max: {
375
- queries: queries
428
+ # all + exclude option
429
+ if all
430
+ query = {
431
+ match_all: {}
376
432
  }
377
- }
378
-
379
- if conversions_fields.present? && options[:conversions] != false
380
- shoulds = []
381
- conversions_fields.each do |conversions_field|
382
- # wrap payload in a bool query
383
- script_score = {field_value_factor: {field: "#{conversions_field}.count"}}
384
433
 
385
- shoulds << {
386
- nested: {
387
- path: conversions_field,
388
- score_mode: "sum",
389
- query: {
390
- function_score: {
391
- boost_mode: "replace",
392
- query: {
393
- match: {
394
- "#{conversions_field}.query" => options[:conversions_term] || term
395
- }
396
- }
397
- }.merge(script_score)
398
- }
399
- }
400
- }
401
- end
434
+ should = []
435
+ else
436
+ # higher score for matching more fields
402
437
  payload = {
403
438
  bool: {
404
- must: payload,
405
- should: shoulds
439
+ should: queries.map { |qs| {dis_max: {queries: qs}} }
406
440
  }
407
441
  }
442
+
443
+ should.concat(set_conversions)
408
444
  end
445
+
446
+ query = payload
447
+ end
448
+
449
+ payload = {}
450
+
451
+ # type when inheritance
452
+ where = ensure_permitted(options[:where] || {}).dup
453
+ if searchkick_options[:inheritance] && (options[:type] || (klass != searchkick_klass && searchkick_index))
454
+ where[:type] = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v, true) }
409
455
  end
410
456
 
457
+ models = Array(options[:models])
458
+ if models.any? { |m| m != m.searchkick_klass }
459
+ # aliases are not supported with _index in ES below 7.5
460
+ # see https://github.com/elastic/elasticsearch/pull/46640
461
+ if below75?
462
+ Searchkick.warn("Passing child models to models option throws off hits and pagination - use type option instead")
463
+ else
464
+ index_type_or =
465
+ models.map do |m|
466
+ v = {_index: m.searchkick_index.name}
467
+ v[:type] = m.searchkick_index.klass_document_type(m, true) if m != m.searchkick_klass
468
+ v
469
+ end
470
+
471
+ where[:or] = Array(where[:or]) + [index_type_or]
472
+ end
473
+ end
474
+
475
+ # start everything as efficient filters
476
+ # move to post_filters as aggs demand
477
+ filters = where_filters(where)
478
+ post_filters = []
479
+
480
+ # aggregations
481
+ set_aggregations(payload, filters, post_filters) if options[:aggs]
482
+
483
+ # post filters
484
+ set_post_filters(payload, post_filters) if post_filters.any?
485
+
411
486
  custom_filters = []
412
487
  multiply_filters = []
413
488
 
414
489
  set_boost_by(multiply_filters, custom_filters)
415
490
  set_boost_where(custom_filters)
416
491
  set_boost_by_distance(custom_filters) if options[:boost_by_distance]
492
+ set_boost_by_recency(custom_filters) if options[:boost_by_recency]
417
493
 
418
- if custom_filters.any?
419
- payload = {
420
- function_score: {
421
- functions: custom_filters,
422
- query: payload,
423
- score_mode: "sum"
424
- }
425
- }
426
- end
427
-
428
- if multiply_filters.any?
429
- payload = {
430
- function_score: {
431
- functions: multiply_filters,
432
- query: payload,
433
- score_mode: "multiply"
434
- }
435
- }
436
- end
494
+ payload[:query] = build_query(query, filters, should, must_not, custom_filters, multiply_filters)
437
495
 
438
- payload = {
439
- query: payload,
440
- size: per_page,
441
- from: offset
442
- }
443
496
  payload[:explain] = options[:explain] if options[:explain]
444
497
  payload[:profile] = options[:profile] if options[:profile]
445
498
 
@@ -449,13 +502,6 @@ module Searchkick
449
502
  # indices_boost
450
503
  set_boost_by_indices(payload)
451
504
 
452
- # filters
453
- filters = where_filters(options[:where])
454
- set_filters(payload, filters) if filters.any?
455
-
456
- # aggregations
457
- set_aggregations(payload) if options[:aggs]
458
-
459
505
  # suggestions
460
506
  set_suggestions(payload, options[:suggest]) if options[:suggest]
461
507
 
@@ -463,11 +509,10 @@ module Searchkick
463
509
  set_highlights(payload, fields) if options[:highlight]
464
510
 
465
511
  # timeout shortly after client times out
466
- payload[:timeout] ||= "#{Searchkick.search_timeout + 1}s"
512
+ payload[:timeout] ||= "#{((Searchkick.search_timeout + 1) * 1000).round}ms"
467
513
 
468
514
  # An empty array will cause only the _id and _type for each hit to be returned
469
- # doc for :select - http://www.elasticsearch.org/guide/reference/api/search/fields/
470
- # doc for :select_v2 - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html
515
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html
471
516
  if options[:select]
472
517
  if options[:select] == []
473
518
  # intuitively [] makes sense to return no fields, but ES by default returns all fields
@@ -480,28 +525,49 @@ module Searchkick
480
525
  end
481
526
  end
482
527
 
528
+ # pagination
529
+ pagination_options = options[:page] || options[:limit] || options[:per_page] || options[:offset] || options[:padding]
530
+ if !options[:body] || pagination_options
531
+ payload[:size] = per_page
532
+ payload[:from] = offset if offset > 0
533
+ end
534
+
483
535
  # type
484
- if options[:type] || (klass != searchkick_klass && searchkick_index)
536
+ if !searchkick_options[:inheritance] && (options[:type] || (klass != searchkick_klass && searchkick_index))
485
537
  @type = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v) }
486
538
  end
487
539
 
488
540
  # routing
489
541
  @routing = options[:routing] if options[:routing]
490
542
 
543
+ if track_total_hits?
544
+ payload[:track_total_hits] = true
545
+ end
546
+
491
547
  # merge more body options
492
548
  payload = payload.deep_merge(options[:body_options]) if options[:body_options]
493
549
 
550
+ # run block
551
+ options[:block].call(payload) if options[:block]
552
+
553
+ # scroll optimization when interating over all docs
554
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html
555
+ if options[:scroll] && payload[:query] == {match_all: {}}
556
+ payload[:sort] ||= ["_doc"]
557
+ end
558
+
494
559
  @body = payload
495
560
  @page = page
496
561
  @per_page = per_page
497
562
  @padding = padding
498
563
  @load = load
564
+ @scroll = scroll
499
565
  end
500
566
 
501
567
  def set_fields
502
568
  boost_fields = {}
503
569
  fields = options[:fields] || searchkick_options[:default_fields] || searchkick_options[:searchable]
504
- all = searchkick_options.key?(:_all) ? searchkick_options[:_all] : below60?
570
+ all = searchkick_options.key?(:_all) ? searchkick_options[:_all] : false
505
571
  default_match = options[:match] || searchkick_options[:match] || :word
506
572
  fields =
507
573
  if fields
@@ -516,12 +582,89 @@ module Searchkick
516
582
  ["_all"]
517
583
  elsif all && default_match == :phrase
518
584
  ["_all.phrase"]
519
- else
585
+ elsif term != "*" && default_match == :exact
520
586
  raise ArgumentError, "Must specify fields to search"
587
+ else
588
+ [default_match == :word ? "*.analyzed" : "*.#{default_match}"]
521
589
  end
522
590
  [boost_fields, fields]
523
591
  end
524
592
 
593
+ def build_query(query, filters, should, must_not, custom_filters, multiply_filters)
594
+ if filters.any? || must_not.any? || should.any?
595
+ bool = {}
596
+ bool[:must] = query if query
597
+ bool[:filter] = filters if filters.any? # where
598
+ bool[:must_not] = must_not if must_not.any? # exclude
599
+ bool[:should] = should if should.any? # conversions
600
+ query = {bool: bool}
601
+ end
602
+
603
+ if custom_filters.any?
604
+ query = {
605
+ function_score: {
606
+ functions: custom_filters,
607
+ query: query,
608
+ score_mode: "sum"
609
+ }
610
+ }
611
+ end
612
+
613
+ if multiply_filters.any?
614
+ query = {
615
+ function_score: {
616
+ functions: multiply_filters,
617
+ query: query,
618
+ score_mode: "multiply"
619
+ }
620
+ }
621
+ end
622
+
623
+ query
624
+ end
625
+
626
+ def set_conversions
627
+ conversions_fields = Array(options[:conversions] || searchkick_options[:conversions]).map(&:to_s)
628
+ if conversions_fields.present? && options[:conversions] != false
629
+ conversions_fields.map do |conversions_field|
630
+ {
631
+ nested: {
632
+ path: conversions_field,
633
+ score_mode: "sum",
634
+ query: {
635
+ function_score: {
636
+ boost_mode: "replace",
637
+ query: {
638
+ match: {
639
+ "#{conversions_field}.query" => options[:conversions_term] || term
640
+ }
641
+ },
642
+ field_value_factor: {
643
+ field: "#{conversions_field}.count"
644
+ }
645
+ }
646
+ }
647
+ }
648
+ }
649
+ end
650
+ else
651
+ []
652
+ end
653
+ end
654
+
655
+ def set_exclude(field, analyzer)
656
+ Array(options[:exclude]).map do |phrase|
657
+ {
658
+ multi_match: {
659
+ fields: [field],
660
+ query: phrase,
661
+ analyzer: analyzer,
662
+ type: "phrase"
663
+ }
664
+ }
665
+ end
666
+ end
667
+
525
668
  def set_boost_by_distance(custom_filters)
526
669
  boost_by_distance = options[:boost_by_distance] || {}
527
670
 
@@ -535,9 +678,11 @@ module Searchkick
535
678
  unless attributes[:origin]
536
679
  raise ArgumentError, "boost_by_distance requires :origin"
537
680
  end
538
- function_params = attributes.select { |k, _| [:origin, :scale, :offset, :decay].include?(k) }
681
+
682
+ function_params = attributes.except(:factor, :function)
539
683
  function_params[:origin] = location_value(function_params[:origin])
540
684
  custom_filters << {
685
+ weight: attributes[:factor] || 1,
541
686
  attributes[:function] => {
542
687
  field => function_params
543
688
  }
@@ -545,16 +690,29 @@ module Searchkick
545
690
  end
546
691
  end
547
692
 
693
+ def set_boost_by_recency(custom_filters)
694
+ options[:boost_by_recency].each do |field, attributes|
695
+ attributes = {function: :gauss, origin: Time.now}.merge(attributes)
696
+
697
+ custom_filters << {
698
+ weight: attributes[:factor] || 1,
699
+ attributes[:function] => {
700
+ field => attributes.except(:factor, :function)
701
+ }
702
+ }
703
+ end
704
+ end
705
+
548
706
  def set_boost_by(multiply_filters, custom_filters)
549
707
  boost_by = options[:boost_by] || {}
550
708
  if boost_by.is_a?(Array)
551
- boost_by = Hash[boost_by.map { |f| [f, {factor: 1}] }]
709
+ boost_by = boost_by.to_h { |f| [f, {factor: 1}] }
552
710
  elsif boost_by.is_a?(Hash)
553
- multiply_by, boost_by = boost_by.partition { |_, v| v[:boost_mode] == "multiply" }.map { |i| Hash[i] }
711
+ multiply_by, boost_by = boost_by.partition { |_, v| v.delete(:boost_mode) == "multiply" }.map(&:to_h)
554
712
  end
555
713
  boost_by[options[:boost]] = {factor: 1} if options[:boost]
556
714
 
557
- custom_filters.concat boost_filters(boost_by, log: true)
715
+ custom_filters.concat boost_filters(boost_by, modifier: "ln2p")
558
716
  multiply_filters.concat boost_filters(multiply_by || {})
559
717
  end
560
718
 
@@ -577,11 +735,9 @@ module Searchkick
577
735
  def set_boost_by_indices(payload)
578
736
  return unless options[:indices_boost]
579
737
 
580
- indices_boost = options[:indices_boost].each_with_object({}) do |(key, boost), memo|
738
+ indices_boost = options[:indices_boost].map do |key, boost|
581
739
  index = key.respond_to?(:searchkick_index) ? key.searchkick_index.name : key
582
- # try to use index explicitly instead of alias: https://github.com/elasticsearch/elasticsearch/issues/4756
583
- index_by_alias = Searchkick.client.indices.get_alias(index: index).keys.first
584
- memo[index_by_alias || index] = boost
740
+ {index => boost}
585
741
  end
586
742
 
587
743
  payload[:indices_boost] = indices_boost
@@ -617,7 +773,8 @@ module Searchkick
617
773
 
618
774
  def set_highlights(payload, fields)
619
775
  payload[:highlight] = {
620
- fields: Hash[fields.map { |f| [f, {}] }]
776
+ fields: fields.to_h { |f| [f, {}] },
777
+ fragment_size: 0
621
778
  }
622
779
 
623
780
  if options[:highlight].is_a?(Hash)
@@ -646,15 +803,14 @@ module Searchkick
646
803
  @highlighted_fields = payload[:highlight][:fields].keys
647
804
  end
648
805
 
649
- def set_aggregations(payload)
806
+ def set_aggregations(payload, filters, post_filters)
650
807
  aggs = options[:aggs]
651
808
  payload[:aggs] = {}
652
809
 
653
- aggs = Hash[aggs.map { |f| [f, {}] }] if aggs.is_a?(Array) # convert to more advanced syntax
654
-
810
+ aggs = aggs.to_h { |f| [f, {}] } if aggs.is_a?(Array) # convert to more advanced syntax
655
811
  aggs.each do |field, agg_options|
656
812
  size = agg_options[:limit] ? agg_options[:limit] : 1_000
657
- shared_agg_options = agg_options.slice(:order, :min_doc_count)
813
+ shared_agg_options = agg_options.except(:limit, :field, :ranges, :date_ranges, :where)
658
814
 
659
815
  if agg_options[:ranges]
660
816
  payload[:aggs][field] = {
@@ -670,20 +826,16 @@ module Searchkick
670
826
  ranges: agg_options[:date_ranges]
671
827
  }.merge(shared_agg_options)
672
828
  }
673
- elsif histogram = agg_options[:date_histogram]
674
- interval = histogram[:interval]
829
+ elsif (histogram = agg_options[:date_histogram])
675
830
  payload[:aggs][field] = {
676
- date_histogram: {
677
- field: histogram[:field],
678
- interval: interval
679
- }
680
- }
681
- elsif metric = @@metric_aggs.find { |k| agg_options.has_key?(k) }
831
+ date_histogram: histogram
832
+ }.merge(shared_agg_options)
833
+ elsif (metric = @@metric_aggs.find { |k| agg_options.has_key?(k) })
682
834
  payload[:aggs][field] = {
683
835
  metric => {
684
836
  field: agg_options[metric][:field] || field
685
837
  }
686
- }
838
+ }.merge(shared_agg_options)
687
839
  else
688
840
  payload[:aggs][field] = {
689
841
  terms: {
@@ -694,8 +846,20 @@ module Searchkick
694
846
  end
695
847
 
696
848
  where = {}
697
- where = (options[:where] || {}).reject { |k| k == field } unless options[:smart_aggs] == false
698
- agg_filters = where_filters(where.merge(agg_options[:where] || {}))
849
+ where = ensure_permitted(options[:where] || {}).reject { |k| k == field } unless options[:smart_aggs] == false
850
+ agg_where = ensure_permitted(agg_options[:where] || {})
851
+ agg_filters = where_filters(where.merge(agg_where))
852
+
853
+ # only do one level comparison for simplicity
854
+ filters.select! do |filter|
855
+ if agg_filters.include?(filter)
856
+ true
857
+ else
858
+ post_filters << filter
859
+ false
860
+ end
861
+ end
862
+
699
863
  if agg_filters.any?
700
864
  payload[:aggs][field] = {
701
865
  filter: {
@@ -711,29 +875,22 @@ module Searchkick
711
875
  end
712
876
  end
713
877
 
714
- def set_filters(payload, filters)
715
- if options[:aggs]
716
- payload[:post_filter] = {
717
- bool: {
718
- filter: filters
719
- }
720
- }
721
- else
722
- # more efficient query if no aggs
723
- payload[:query] = {
724
- bool: {
725
- must: payload[:query],
726
- filter: filters
727
- }
878
+ def set_post_filters(payload, post_filters)
879
+ payload[:post_filter] = {
880
+ bool: {
881
+ filter: post_filters
728
882
  }
729
- end
883
+ }
730
884
  end
731
885
 
732
- # TODO id transformation for arrays
733
886
  def set_order(payload)
734
- order = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc}
735
- id_field = below50? ? :_id : :_uid
736
- payload[:sort] = order.is_a?(Array) ? order : Hash[order.map { |k, v| [k.to_s == "id" ? id_field : k, v] }]
887
+ payload[:sort] = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc}
888
+ end
889
+
890
+ # provides *very* basic protection from unfiltered parameters
891
+ # this is not meant to be comprehensive and may be expanded in the future
892
+ def ensure_permitted(obj)
893
+ obj.to_h
737
894
  end
738
895
 
739
896
  def where_filters(where)
@@ -751,10 +908,12 @@ module Searchkick
751
908
  filters << {bool: {must_not: where_filters(value)}}
752
909
  elsif field == :_and
753
910
  filters << {bool: {must: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
911
+ # elsif field == :_script
912
+ # filters << {script: {script: {source: value, lang: "painless"}}}
754
913
  else
755
914
  # expand ranges
756
915
  if value.is_a?(Range)
757
- value = {gte: value.first, (value.exclude_end? ? :lt : :lte) => value.last}
916
+ value = expand_range(value)
758
917
  end
759
918
 
760
919
  value = {in: value} if value.is_a?(Array)
@@ -762,7 +921,7 @@ module Searchkick
762
921
  if value.is_a?(Hash)
763
922
  value.each do |op, op_value|
764
923
  case op
765
- when :within, :bottom_right
924
+ when :within, :bottom_right, :bottom_left
766
925
  # do nothing
767
926
  when :near
768
927
  filters << {
@@ -797,9 +956,44 @@ module Searchkick
797
956
  }
798
957
  }
799
958
  }
959
+ when :top_right
960
+ filters << {
961
+ geo_bounding_box: {
962
+ field => {
963
+ top_right: location_value(op_value),
964
+ bottom_left: location_value(value[:bottom_left])
965
+ }
966
+ }
967
+ }
968
+ when :like, :ilike
969
+ # based on Postgres
970
+ # https://www.postgresql.org/docs/current/functions-matching.html
971
+ # % matches zero or more characters
972
+ # _ matches one character
973
+ # \ is escape character
974
+ # escape Lucene reserved characters
975
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/regexp-syntax.html#regexp-optional-operators
976
+ reserved = %w(\\ . ? + * | { } [ ] ( ) ")
977
+ regex = op_value.dup
978
+ reserved.each do |v|
979
+ regex.gsub!(v, "\\\\" + v)
980
+ end
981
+ regex = regex.gsub(/(?<!\\)%/, ".*").gsub(/(?<!\\)_/, ".").gsub("\\%", "%").gsub("\\_", "_")
982
+
983
+ if op == :ilike
984
+ if below710?
985
+ raise ArgumentError, "ilike requires Elasticsearch 7.10+"
986
+ else
987
+ filters << {regexp: {field => {value: regex, flags: "NONE", case_insensitive: true}}}
988
+ end
989
+ else
990
+ filters << {regexp: {field => {value: regex, flags: "NONE"}}}
991
+ end
992
+ when :prefix
993
+ filters << {prefix: {field => {value: op_value}}}
800
994
  when :regexp # support for regexp queries without using a regexp ruby object
801
995
  filters << {regexp: {field => {value: op_value}}}
802
- when :not # not equal
996
+ when :not, :_not # not equal
803
997
  filters << {bool: {must_not: term_filters(field, op_value)}}
804
998
  when :all
805
999
  op_value.each do |val|
@@ -807,6 +1001,8 @@ module Searchkick
807
1001
  end
808
1002
  when :in
809
1003
  filters << term_filters(field, op_value)
1004
+ when :exists
1005
+ filters << {exists: {field: field}}
810
1006
  else
811
1007
  range_query =
812
1008
  case op
@@ -819,7 +1015,7 @@ module Searchkick
819
1015
  when :lte
820
1016
  {to: op_value, include_upper: true}
821
1017
  else
822
- raise "Unknown where operator: #{op.inspect}"
1018
+ raise ArgumentError, "Unknown where operator: #{op.inspect}"
823
1019
  end
824
1020
  # issue 132
825
1021
  if (existing = filters.find { |f| f[:range] && f[:range][field] })
@@ -847,57 +1043,75 @@ module Searchkick
847
1043
  elsif value.nil?
848
1044
  {bool: {must_not: {exists: {field: field}}}}
849
1045
  elsif value.is_a?(Regexp)
850
- {regexp: {field => {value: value.source}}}
1046
+ source = value.source
1047
+
1048
+ # TODO handle other regexp options
1049
+
1050
+ # TODO handle other anchor characters, like ^, $, \Z
1051
+ if source.start_with?("\\A")
1052
+ source = source[2..-1]
1053
+ else
1054
+ source = ".*#{source}"
1055
+ end
1056
+
1057
+ if source.end_with?("\\z")
1058
+ source = source[0..-3]
1059
+ else
1060
+ source = "#{source}.*"
1061
+ end
1062
+
1063
+ if below710?
1064
+ if value.casefold?
1065
+ raise ArgumentError, "Case-insensitive flag does not work with Elasticsearch < 7.10"
1066
+ end
1067
+ {regexp: {field => {value: source, flags: "NONE"}}}
1068
+ else
1069
+ {regexp: {field => {value: source, flags: "NONE", case_insensitive: value.casefold?}}}
1070
+ end
851
1071
  else
852
- {term: {field => value}}
1072
+ # TODO add this for other values
1073
+ if value.as_json.is_a?(Enumerable)
1074
+ # query will fail, but this is better
1075
+ # same message as Active Record
1076
+ raise TypeError, "can't cast #{value.class.name}"
1077
+ end
1078
+
1079
+ {term: {field => {value: value}}}
853
1080
  end
854
1081
  end
855
1082
 
856
1083
  def custom_filter(field, value, factor)
857
- if below50?
858
- {
859
- filter: {
860
- bool: {
861
- must: where_filters(field => value)
862
- }
863
- },
864
- boost_factor: factor
1084
+ {
1085
+ filter: where_filters(field => value),
1086
+ weight: factor
1087
+ }
1088
+ end
1089
+
1090
+ def boost_filter(field, factor: 1, modifier: nil, missing: nil)
1091
+ script_score = {
1092
+ field_value_factor: {
1093
+ field: field,
1094
+ factor: factor.to_f,
1095
+ modifier: modifier
865
1096
  }
1097
+ }
1098
+
1099
+ if missing
1100
+ script_score[:field_value_factor][:missing] = missing.to_f
866
1101
  else
867
- {
868
- filter: where_filters(field => value),
869
- weight: factor
1102
+ script_score[:filter] = {
1103
+ exists: {
1104
+ field: field
1105
+ }
870
1106
  }
871
1107
  end
1108
+
1109
+ script_score
872
1110
  end
873
1111
 
874
- def boost_filters(boost_by, options = {})
1112
+ def boost_filters(boost_by, modifier: nil)
875
1113
  boost_by.map do |field, value|
876
- log = value.key?(:log) ? value[:log] : options[:log]
877
- value[:factor] ||= 1
878
- script_score = {
879
- field_value_factor: {
880
- field: field,
881
- factor: value[:factor].to_f,
882
- modifier: log ? "ln2p" : nil
883
- }
884
- }
885
-
886
- if value[:missing]
887
- if below50?
888
- raise ArgumentError, "The missing option for boost_by is not supported in Elasticsearch < 5"
889
- else
890
- script_score[:field_value_factor][:missing] = value[:missing].to_f
891
- end
892
- else
893
- script_score[:filter] = {
894
- exists: {
895
- field: field
896
- }
897
- }
898
- end
899
-
900
- script_score
1114
+ boost_filter(field, modifier: modifier, **value)
901
1115
  end
902
1116
  end
903
1117
 
@@ -922,12 +1136,39 @@ module Searchkick
922
1136
  end
923
1137
  end
924
1138
 
925
- def below50?
926
- Searchkick.server_below?("5.0.0-alpha1")
1139
+ def expand_range(range)
1140
+ expanded = {}
1141
+ expanded[:gte] = range.begin if range.begin
1142
+
1143
+ if range.end && !(range.end.respond_to?(:infinite?) && range.end.infinite?)
1144
+ expanded[range.exclude_end? ? :lt : :lte] = range.end
1145
+ end
1146
+
1147
+ expanded
1148
+ end
1149
+
1150
+ def base_field(k)
1151
+ k.sub(/\.(analyzed|word_start|word_middle|word_end|text_start|text_middle|text_end|exact)\z/, "")
1152
+ end
1153
+
1154
+ def track_total_hits?
1155
+ searchkick_options[:deep_paging] || body_options[:track_total_hits]
1156
+ end
1157
+
1158
+ def body_options
1159
+ options[:body_options] || {}
1160
+ end
1161
+
1162
+ def below73?
1163
+ Searchkick.server_below?("7.3.0")
1164
+ end
1165
+
1166
+ def below75?
1167
+ Searchkick.server_below?("7.5.0")
927
1168
  end
928
1169
 
929
- def below60?
930
- Searchkick.server_below?("6.0.0-alpha1")
1170
+ def below710?
1171
+ Searchkick.server_below?("7.10.0")
931
1172
  end
932
1173
  end
933
1174
  end