searchkick 2.3.2 → 4.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +251 -84
  3. data/LICENSE.txt +1 -1
  4. data/README.md +552 -432
  5. data/lib/searchkick/bulk_indexer.rb +173 -0
  6. data/lib/searchkick/bulk_reindex_job.rb +2 -2
  7. data/lib/searchkick/hash_wrapper.rb +12 -0
  8. data/lib/searchkick/index.rb +187 -348
  9. data/lib/searchkick/index_options.rb +494 -282
  10. data/lib/searchkick/logging.rb +17 -13
  11. data/lib/searchkick/model.rb +52 -97
  12. data/lib/searchkick/multi_search.rb +9 -10
  13. data/lib/searchkick/process_batch_job.rb +17 -4
  14. data/lib/searchkick/process_queue_job.rb +20 -12
  15. data/lib/searchkick/query.rb +415 -199
  16. data/lib/searchkick/railtie.rb +7 -0
  17. data/lib/searchkick/record_data.rb +128 -0
  18. data/lib/searchkick/record_indexer.rb +79 -0
  19. data/lib/searchkick/reindex_queue.rb +1 -1
  20. data/lib/searchkick/reindex_v2_job.rb +14 -12
  21. data/lib/searchkick/results.rb +135 -41
  22. data/lib/searchkick/version.rb +1 -1
  23. data/lib/searchkick.rb +130 -61
  24. data/lib/tasks/searchkick.rake +34 -0
  25. metadata +18 -162
  26. data/.gitignore +0 -22
  27. data/.travis.yml +0 -39
  28. data/Gemfile +0 -16
  29. data/Rakefile +0 -20
  30. data/benchmark/Gemfile +0 -23
  31. data/benchmark/benchmark.rb +0 -97
  32. data/lib/searchkick/tasks.rb +0 -33
  33. data/searchkick.gemspec +0 -28
  34. data/test/aggs_test.rb +0 -197
  35. data/test/autocomplete_test.rb +0 -75
  36. data/test/boost_test.rb +0 -202
  37. data/test/callbacks_test.rb +0 -59
  38. data/test/ci/before_install.sh +0 -17
  39. data/test/errors_test.rb +0 -19
  40. data/test/gemfiles/activerecord31.gemfile +0 -7
  41. data/test/gemfiles/activerecord32.gemfile +0 -7
  42. data/test/gemfiles/activerecord40.gemfile +0 -8
  43. data/test/gemfiles/activerecord41.gemfile +0 -8
  44. data/test/gemfiles/activerecord42.gemfile +0 -7
  45. data/test/gemfiles/activerecord50.gemfile +0 -7
  46. data/test/gemfiles/apartment.gemfile +0 -8
  47. data/test/gemfiles/cequel.gemfile +0 -8
  48. data/test/gemfiles/mongoid2.gemfile +0 -7
  49. data/test/gemfiles/mongoid3.gemfile +0 -6
  50. data/test/gemfiles/mongoid4.gemfile +0 -7
  51. data/test/gemfiles/mongoid5.gemfile +0 -7
  52. data/test/gemfiles/mongoid6.gemfile +0 -12
  53. data/test/gemfiles/nobrainer.gemfile +0 -8
  54. data/test/gemfiles/parallel_tests.gemfile +0 -8
  55. data/test/geo_shape_test.rb +0 -175
  56. data/test/highlight_test.rb +0 -78
  57. data/test/index_test.rb +0 -166
  58. data/test/inheritance_test.rb +0 -83
  59. data/test/marshal_test.rb +0 -8
  60. data/test/match_test.rb +0 -276
  61. data/test/misspellings_test.rb +0 -56
  62. data/test/model_test.rb +0 -42
  63. data/test/multi_search_test.rb +0 -36
  64. data/test/multi_tenancy_test.rb +0 -22
  65. data/test/order_test.rb +0 -46
  66. data/test/pagination_test.rb +0 -70
  67. data/test/partial_reindex_test.rb +0 -58
  68. data/test/query_test.rb +0 -35
  69. data/test/records_test.rb +0 -10
  70. data/test/reindex_test.rb +0 -64
  71. data/test/reindex_v2_job_test.rb +0 -32
  72. data/test/routing_test.rb +0 -23
  73. data/test/should_index_test.rb +0 -32
  74. data/test/similar_test.rb +0 -28
  75. data/test/sql_test.rb +0 -214
  76. data/test/suggest_test.rb +0 -95
  77. data/test/support/kaminari.yml +0 -21
  78. data/test/synonyms_test.rb +0 -67
  79. data/test/test_helper.rb +0 -567
  80. data/test/where_test.rb +0 -223
@@ -12,14 +12,14 @@ module Searchkick
12
12
  :took, :error, :model_name, :entry_name, :total_count, :total_entries,
13
13
  :current_page, :per_page, :limit_value, :padding, :total_pages, :num_pages,
14
14
  :offset_value, :offset, :previous_page, :prev_page, :next_page, :first_page?, :last_page?,
15
- :out_of_range?, :hits, :response, :to_a, :first
15
+ :out_of_range?, :hits, :response, :to_a, :first, :scroll
16
16
 
17
17
  def initialize(klass, term = "*", **options)
18
- unknown_keywords = options.keys - [:aggs, :body, :body_options, :boost,
19
- :boost_by, :boost_by_distance, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :execute, :explain,
18
+ unknown_keywords = options.keys - [:aggs, :block, :body, :body_options, :boost,
19
+ :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :execute, :explain,
20
20
  :fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load,
21
- :match, :misspellings, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
22
- :request_params, :routing, :select, :similar, :smart_aggs, :suggest, :track, :type, :where]
21
+ :match, :misspellings, :models, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
22
+ :request_params, :routing, :scope_results, :scroll, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where]
23
23
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
24
24
 
25
25
  term = term.to_s
@@ -39,6 +39,7 @@ module Searchkick
39
39
  @misspellings = false
40
40
  @misspellings_below = nil
41
41
  @highlighted_fields = nil
42
+ @index_mapping = nil
42
43
 
43
44
  prepare
44
45
  end
@@ -56,9 +57,19 @@ module Searchkick
56
57
  end
57
58
 
58
59
  def params
60
+ if options[:models]
61
+ @index_mapping = {}
62
+ Array(options[:models]).each do |model|
63
+ # there can be multiple models per index name due to inheritance - see #1259
64
+ (@index_mapping[model.searchkick_index.name] ||= []) << model
65
+ end
66
+ end
67
+
59
68
  index =
60
69
  if options[:index_name]
61
70
  Array(options[:index_name]).map { |v| v.respond_to?(:searchkick_index) ? v.searchkick_index.name : v }.join(",")
71
+ elsif options[:models]
72
+ @index_mapping.keys.join(",")
62
73
  elsif searchkick_index
63
74
  searchkick_index.name
64
75
  else
@@ -71,6 +82,7 @@ module Searchkick
71
82
  }
72
83
  params[:type] = @type if @type
73
84
  params[:routing] = @routing if @routing
85
+ params[:scroll] = @scroll if @scroll
74
86
  params.merge!(options[:request_params]) if options[:request_params]
75
87
  params
76
88
  end
@@ -94,11 +106,16 @@ module Searchkick
94
106
  query = params
95
107
  type = query[:type]
96
108
  index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index]
109
+ request_params = query.except(:index, :type, :body)
97
110
 
98
111
  # no easy way to tell which host the client will use
99
112
  host = Searchkick.client.transport.hosts.first
100
113
  credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil
101
- "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'"
114
+ params = ["pretty"]
115
+ request_params.each do |k, v|
116
+ params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
117
+ end
118
+ "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?#{params.join('&')} -H 'Content-Type: application/json' -d '#{query[:body].to_json}'"
102
119
  end
103
120
 
104
121
  def handle_response(response)
@@ -111,11 +128,19 @@ module Searchkick
111
128
  model_includes: options[:model_includes],
112
129
  json: !@json.nil?,
113
130
  match_suffix: @match_suffix,
131
+ highlight: options[:highlight],
114
132
  highlighted_fields: @highlighted_fields || [],
115
- misspellings: @misspellings
133
+ misspellings: @misspellings,
134
+ term: term,
135
+ scope_results: options[:scope_results],
136
+ total_entries: options[:total_entries],
137
+ index_mapping: @index_mapping,
138
+ suggest: options[:suggest],
139
+ scroll: options[:scroll]
116
140
  }
117
141
 
118
142
  if options[:debug]
143
+ # can remove when minimum Ruby version is 2.5
119
144
  require "pp"
120
145
 
121
146
  puts "Searchkick Version: #{Searchkick::VERSION}"
@@ -133,7 +158,7 @@ module Searchkick
133
158
  if searchkick_index
134
159
  puts "Model Search Data"
135
160
  begin
136
- pp klass.first(3).map { |r| {index: searchkick_index.record_data(r).merge(data: searchkick_index.send(:search_data, r))}}
161
+ pp klass.limit(3).map { |r| RecordData.new(searchkick_index, r).index_data }
137
162
  rescue => e
138
163
  puts "#{e.class.name}: #{e.message}"
139
164
  end
@@ -161,7 +186,7 @@ module Searchkick
161
186
  end
162
187
 
163
188
  def retry_misspellings?(response)
164
- @misspellings_below && response["hits"]["total"] < @misspellings_below
189
+ @misspellings_below && Searchkick::Results.new(searchkick_klass, response).total_count < @misspellings_below
165
190
  end
166
191
 
167
192
  private
@@ -177,15 +202,15 @@ module Searchkick
177
202
  e.message.include?("No query registered for [function_score]")
178
203
  )
179
204
 
180
- raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 2 or greater"
205
+ raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 5 or greater"
181
206
  elsif status_code == 400
182
207
  if (
183
208
  e.message.include?("bool query does not support [filter]") ||
184
209
  e.message.include?("[bool] filter does not support [filter]")
185
210
  )
186
211
 
187
- raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 2 or greater"
188
- elsif e.message.include?("[multi_match] analyzer [searchkick_search] not found")
212
+ raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 5 or greater"
213
+ elsif e.message =~ /analyzer \[searchkick_.+\] not found/
189
214
  raise InvalidQueryError, "Bad mapping - run #{reindex_command}"
190
215
  else
191
216
  raise InvalidQueryError, e.message
@@ -210,40 +235,47 @@ module Searchkick
210
235
 
211
236
  # pagination
212
237
  page = [options[:page].to_i, 1].max
213
- per_page = (options[:limit] || options[:per_page] || 1_000).to_i
238
+ # maybe use index.max_result_window in the future
239
+ default_limit = searchkick_options[:deep_paging] ? 1_000_000_000 : 10_000
240
+ per_page = (options[:limit] || options[:per_page] || default_limit).to_i
214
241
  padding = [options[:padding].to_i, 0].max
215
242
  offset = options[:offset] || (page - 1) * per_page + padding
243
+ scroll = options[:scroll]
216
244
 
217
245
  # model and eager loading
218
246
  load = options[:load].nil? ? true : options[:load]
219
247
 
220
- conversions_fields = Array(options[:conversions] || searchkick_options[:conversions]).map(&:to_s)
221
-
222
248
  all = term == "*"
223
249
 
224
250
  @json = options[:body]
225
251
  if @json
226
252
  ignored_options = options.keys & [:aggs, :boost,
227
- :boost_by, :boost_by_distance, :boost_where, :conversions, :conversions_term, :exclude, :explain,
228
- :fields, :highlight, :indices_boost, :limit, :match, :misspellings, :offset, :operator, :order,
229
- :padding, :page, :per_page, :select, :smart_aggs, :suggest, :where]
230
- warn "The body option replaces the entire body, so the following options are ignored: #{ignored_options.join(", ")}" if ignored_options.any?
253
+ :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :exclude, :explain,
254
+ :fields, :highlight, :indices_boost, :match, :misspellings, :operator, :order,
255
+ :profile, :select, :smart_aggs, :suggest, :where]
256
+ raise ArgumentError, "Options incompatible with body option: #{ignored_options.join(", ")}" if ignored_options.any?
231
257
  payload = @json
232
258
  else
259
+ must_not = []
260
+ should = []
261
+
233
262
  if options[:similar]
234
- payload = {
263
+ query = {
235
264
  more_like_this: {
236
- like_text: term,
265
+ like: term,
237
266
  min_doc_freq: 1,
238
267
  min_term_freq: 1,
239
268
  analyzer: "searchkick_search2"
240
269
  }
241
270
  }
271
+ if fields.all? { |f| f.start_with?("*.") }
272
+ raise ArgumentError, "Must specify fields to search"
273
+ end
242
274
  if fields != ["_all"]
243
- payload[:more_like_this][:fields] = fields
275
+ query[:more_like_this][:fields] = fields
244
276
  end
245
- elsif all
246
- payload = {
277
+ elsif all && !options[:exclude]
278
+ query = {
247
279
  match_all: {}
248
280
  }
249
281
  else
@@ -272,6 +304,15 @@ module Searchkick
272
304
  prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0
273
305
  default_max_expansions = @misspellings_below ? 20 : 3
274
306
  max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || default_max_expansions
307
+ misspellings_fields = misspellings.is_a?(Hash) && misspellings.key?(:fields) && misspellings[:fields].map(&:to_s)
308
+
309
+ if misspellings_fields
310
+ missing_fields = misspellings_fields - fields.map { |f| base_field(f) }
311
+ if missing_fields.any?
312
+ raise ArgumentError, "All fields in per-field misspellings must also be specified in fields option"
313
+ end
314
+ end
315
+
275
316
  @misspellings = true
276
317
  else
277
318
  @misspellings = false
@@ -306,12 +347,16 @@ module Searchkick
306
347
  exclude_analyzer = nil
307
348
  exclude_field = field
308
349
 
350
+ field_misspellings = misspellings && (!misspellings_fields || misspellings_fields.include?(base_field(field)))
351
+
309
352
  if field == "_all" || field.end_with?(".analyzed")
310
- shared_options[:cutoff_frequency] = 0.001 unless operator == "and" || misspellings == false
311
- qs.concat [
312
- shared_options.merge(analyzer: "searchkick_search"),
313
- shared_options.merge(analyzer: "searchkick_search2")
314
- ]
353
+ shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false || (!below73? && !track_total_hits?)
354
+ qs << shared_options.merge(analyzer: "searchkick_search")
355
+
356
+ # searchkick_search and searchkick_search2 are the same for ukrainian
357
+ unless %w(japanese korean polish ukrainian vietnamese).include?(searchkick_options[:language])
358
+ qs << shared_options.merge(analyzer: "searchkick_search2")
359
+ end
315
360
  exclude_analyzer = "searchkick_search2"
316
361
  elsif field.end_with?(".exact")
317
362
  f = field.split(".")[0..-2].join(".")
@@ -324,11 +369,20 @@ module Searchkick
324
369
  exclude_analyzer = analyzer
325
370
  end
326
371
 
327
- if misspellings != false && match_type == :match
328
- qs.concat qs.map { |q| q.except(:cutoff_frequency).merge(fuzziness: edit_distance, prefix_length: prefix_length, max_expansions: max_expansions, boost: factor).merge(transpositions) }
372
+ if field_misspellings != false && match_type == :match
373
+ qs.concat(qs.map { |q| q.except(:cutoff_frequency).merge(fuzziness: edit_distance, prefix_length: prefix_length, max_expansions: max_expansions, boost: factor).merge(transpositions) })
329
374
  end
330
375
 
331
- q2 = qs.map { |q| {match_type => {field => q}} }
376
+ if field.start_with?("*.")
377
+ q2 = qs.map { |q| {multi_match: q.merge(fields: [field], type: match_type == :match_phrase ? "phrase" : "best_fields")} }
378
+ if below61?
379
+ q2.each do |q|
380
+ q[:multi_match].delete(:fuzzy_transpositions)
381
+ end
382
+ end
383
+ else
384
+ q2 = qs.map { |q| {match_type => {field => q}} }
385
+ end
332
386
 
333
387
  # boost exact matches more
334
388
  if field =~ /\.word_(start|middle|end)\z/ && searchkick_options[:word] != false
@@ -346,100 +400,81 @@ module Searchkick
346
400
  queries_to_add.concat(q2)
347
401
  end
348
402
 
349
- if options[:exclude]
350
- must_not =
351
- Array(options[:exclude]).map do |phrase|
352
- {
353
- match_phrase: {
354
- exclude_field => {
355
- query: phrase,
356
- analyzer: exclude_analyzer
357
- }
358
- }
359
- }
360
- end
403
+ queries << queries_to_add
361
404
 
362
- queries_to_add = [{
363
- bool: {
364
- should: queries_to_add,
365
- must_not: must_not
366
- }
367
- }]
405
+ if options[:exclude]
406
+ must_not.concat(set_exclude(exclude_field, exclude_analyzer))
368
407
  end
369
-
370
- queries.concat(queries_to_add)
371
408
  end
372
409
 
373
- payload = {
374
- dis_max: {
375
- queries: queries
410
+ # all + exclude option
411
+ if all
412
+ query = {
413
+ match_all: {}
376
414
  }
377
- }
378
-
379
- if conversions_fields.present? && options[:conversions] != false
380
- shoulds = []
381
- conversions_fields.each do |conversions_field|
382
- # wrap payload in a bool query
383
- script_score = {field_value_factor: {field: "#{conversions_field}.count"}}
384
415
 
385
- shoulds << {
386
- nested: {
387
- path: conversions_field,
388
- score_mode: "sum",
389
- query: {
390
- function_score: {
391
- boost_mode: "replace",
392
- query: {
393
- match: {
394
- "#{conversions_field}.query" => options[:conversions_term] || term
395
- }
396
- }
397
- }.merge(script_score)
398
- }
399
- }
400
- }
401
- end
416
+ should = []
417
+ else
418
+ # higher score for matching more fields
402
419
  payload = {
403
420
  bool: {
404
- must: payload,
405
- should: shoulds
421
+ should: queries.map { |qs| {dis_max: {queries: qs}} }
406
422
  }
407
423
  }
424
+
425
+ should.concat(set_conversions)
426
+ end
427
+
428
+ query = payload
429
+ end
430
+
431
+ payload = {}
432
+
433
+ # type when inheritance
434
+ where = (options[:where] || {}).dup
435
+ if searchkick_options[:inheritance] && (options[:type] || (klass != searchkick_klass && searchkick_index))
436
+ where[:type] = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v, true) }
437
+ end
438
+
439
+ models = Array(options[:models])
440
+ if models.any? { |m| m != m.searchkick_klass }
441
+ # aliases are not supported with _index in ES below 7.5
442
+ # see https://github.com/elastic/elasticsearch/pull/46640
443
+ if below75?
444
+ Searchkick.warn("Passing child models to models option throws off hits and pagination - use type option instead")
445
+ else
446
+ index_type_or =
447
+ models.map do |m|
448
+ v = {_index: m.searchkick_index.name}
449
+ v[:type] = m.searchkick_index.klass_document_type(m, true) if m != m.searchkick_klass
450
+ v
451
+ end
452
+
453
+ where[:or] = Array(where[:or]) + [index_type_or]
408
454
  end
409
455
  end
410
456
 
457
+ # start everything as efficient filters
458
+ # move to post_filters as aggs demand
459
+ filters = where_filters(where)
460
+ post_filters = []
461
+
462
+ # aggregations
463
+ set_aggregations(payload, filters, post_filters) if options[:aggs]
464
+
465
+ # post filters
466
+ set_post_filters(payload, post_filters) if post_filters.any?
467
+
411
468
  custom_filters = []
412
469
  multiply_filters = []
413
470
 
414
471
  set_boost_by(multiply_filters, custom_filters)
415
472
  set_boost_where(custom_filters)
416
473
  set_boost_by_distance(custom_filters) if options[:boost_by_distance]
474
+ set_boost_by_recency(custom_filters) if options[:boost_by_recency]
417
475
 
418
- if custom_filters.any?
419
- payload = {
420
- function_score: {
421
- functions: custom_filters,
422
- query: payload,
423
- score_mode: "sum"
424
- }
425
- }
426
- end
427
-
428
- if multiply_filters.any?
429
- payload = {
430
- function_score: {
431
- functions: multiply_filters,
432
- query: payload,
433
- score_mode: "multiply"
434
- }
435
- }
436
- end
476
+ payload[:query] = build_query(query, filters, should, must_not, custom_filters, multiply_filters)
437
477
 
438
- payload = {
439
- query: payload,
440
- size: per_page,
441
- from: offset
442
- }
443
478
  payload[:explain] = options[:explain] if options[:explain]
444
479
  payload[:profile] = options[:profile] if options[:profile]
445
480
 
@@ -449,13 +484,6 @@ module Searchkick
449
484
  # indices_boost
450
485
  set_boost_by_indices(payload)
451
486
 
452
- # filters
453
- filters = where_filters(options[:where])
454
- set_filters(payload, filters) if filters.any?
455
-
456
- # aggregations
457
- set_aggregations(payload) if options[:aggs]
458
-
459
487
  # suggestions
460
488
  set_suggestions(payload, options[:suggest]) if options[:suggest]
461
489
 
@@ -466,8 +494,7 @@ module Searchkick
466
494
  payload[:timeout] ||= "#{Searchkick.search_timeout + 1}s"
467
495
 
468
496
  # An empty array will cause only the _id and _type for each hit to be returned
469
- # doc for :select - http://www.elasticsearch.org/guide/reference/api/search/fields/
470
- # doc for :select_v2 - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html
497
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html
471
498
  if options[:select]
472
499
  if options[:select] == []
473
500
  # intuitively [] makes sense to return no fields, but ES by default returns all fields
@@ -480,28 +507,49 @@ module Searchkick
480
507
  end
481
508
  end
482
509
 
510
+ # pagination
511
+ pagination_options = options[:page] || options[:limit] || options[:per_page] || options[:offset] || options[:padding]
512
+ if !options[:body] || pagination_options
513
+ payload[:size] = per_page
514
+ payload[:from] = offset if offset > 0
515
+ end
516
+
483
517
  # type
484
- if options[:type] || (klass != searchkick_klass && searchkick_index)
518
+ if !searchkick_options[:inheritance] && (options[:type] || (klass != searchkick_klass && searchkick_index))
485
519
  @type = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v) }
486
520
  end
487
521
 
488
522
  # routing
489
523
  @routing = options[:routing] if options[:routing]
490
524
 
525
+ if track_total_hits?
526
+ payload[:track_total_hits] = true
527
+ end
528
+
491
529
  # merge more body options
492
530
  payload = payload.deep_merge(options[:body_options]) if options[:body_options]
493
531
 
532
+ # run block
533
+ options[:block].call(payload) if options[:block]
534
+
535
+ # scroll optimization when interating over all docs
536
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html
537
+ if options[:scroll] && payload[:query] == {match_all: {}}
538
+ payload[:sort] ||= ["_doc"]
539
+ end
540
+
494
541
  @body = payload
495
542
  @page = page
496
543
  @per_page = per_page
497
544
  @padding = padding
498
545
  @load = load
546
+ @scroll = scroll
499
547
  end
500
548
 
501
549
  def set_fields
502
550
  boost_fields = {}
503
551
  fields = options[:fields] || searchkick_options[:default_fields] || searchkick_options[:searchable]
504
- all = searchkick_options.key?(:_all) ? searchkick_options[:_all] : below60?
552
+ all = searchkick_options.key?(:_all) ? searchkick_options[:_all] : false
505
553
  default_match = options[:match] || searchkick_options[:match] || :word
506
554
  fields =
507
555
  if fields
@@ -516,12 +564,89 @@ module Searchkick
516
564
  ["_all"]
517
565
  elsif all && default_match == :phrase
518
566
  ["_all.phrase"]
519
- else
567
+ elsif term != "*" && default_match == :exact
520
568
  raise ArgumentError, "Must specify fields to search"
569
+ else
570
+ [default_match == :word ? "*.analyzed" : "*.#{default_match}"]
521
571
  end
522
572
  [boost_fields, fields]
523
573
  end
524
574
 
575
+ def build_query(query, filters, should, must_not, custom_filters, multiply_filters)
576
+ if filters.any? || must_not.any? || should.any?
577
+ bool = {}
578
+ bool[:must] = query if query
579
+ bool[:filter] = filters if filters.any? # where
580
+ bool[:must_not] = must_not if must_not.any? # exclude
581
+ bool[:should] = should if should.any? # conversions
582
+ query = {bool: bool}
583
+ end
584
+
585
+ if custom_filters.any?
586
+ query = {
587
+ function_score: {
588
+ functions: custom_filters,
589
+ query: query,
590
+ score_mode: "sum"
591
+ }
592
+ }
593
+ end
594
+
595
+ if multiply_filters.any?
596
+ query = {
597
+ function_score: {
598
+ functions: multiply_filters,
599
+ query: query,
600
+ score_mode: "multiply"
601
+ }
602
+ }
603
+ end
604
+
605
+ query
606
+ end
607
+
608
+ def set_conversions
609
+ conversions_fields = Array(options[:conversions] || searchkick_options[:conversions]).map(&:to_s)
610
+ if conversions_fields.present? && options[:conversions] != false
611
+ conversions_fields.map do |conversions_field|
612
+ {
613
+ nested: {
614
+ path: conversions_field,
615
+ score_mode: "sum",
616
+ query: {
617
+ function_score: {
618
+ boost_mode: "replace",
619
+ query: {
620
+ match: {
621
+ "#{conversions_field}.query" => options[:conversions_term] || term
622
+ }
623
+ },
624
+ field_value_factor: {
625
+ field: "#{conversions_field}.count"
626
+ }
627
+ }
628
+ }
629
+ }
630
+ }
631
+ end
632
+ else
633
+ []
634
+ end
635
+ end
636
+
637
+ def set_exclude(field, analyzer)
638
+ Array(options[:exclude]).map do |phrase|
639
+ {
640
+ multi_match: {
641
+ fields: [field],
642
+ query: phrase,
643
+ analyzer: analyzer,
644
+ type: "phrase"
645
+ }
646
+ }
647
+ end
648
+ end
649
+
525
650
  def set_boost_by_distance(custom_filters)
526
651
  boost_by_distance = options[:boost_by_distance] || {}
527
652
 
@@ -535,9 +660,11 @@ module Searchkick
535
660
  unless attributes[:origin]
536
661
  raise ArgumentError, "boost_by_distance requires :origin"
537
662
  end
538
- function_params = attributes.select { |k, _| [:origin, :scale, :offset, :decay].include?(k) }
663
+
664
+ function_params = attributes.except(:factor, :function)
539
665
  function_params[:origin] = location_value(function_params[:origin])
540
666
  custom_filters << {
667
+ weight: attributes[:factor] || 1,
541
668
  attributes[:function] => {
542
669
  field => function_params
543
670
  }
@@ -545,16 +672,29 @@ module Searchkick
545
672
  end
546
673
  end
547
674
 
675
+ def set_boost_by_recency(custom_filters)
676
+ options[:boost_by_recency].each do |field, attributes|
677
+ attributes = {function: :gauss, origin: Time.now}.merge(attributes)
678
+
679
+ custom_filters << {
680
+ weight: attributes[:factor] || 1,
681
+ attributes[:function] => {
682
+ field => attributes.except(:factor, :function)
683
+ }
684
+ }
685
+ end
686
+ end
687
+
548
688
  def set_boost_by(multiply_filters, custom_filters)
549
689
  boost_by = options[:boost_by] || {}
550
690
  if boost_by.is_a?(Array)
551
691
  boost_by = Hash[boost_by.map { |f| [f, {factor: 1}] }]
552
692
  elsif boost_by.is_a?(Hash)
553
- multiply_by, boost_by = boost_by.partition { |_, v| v[:boost_mode] == "multiply" }.map { |i| Hash[i] }
693
+ multiply_by, boost_by = boost_by.partition { |_, v| v.delete(:boost_mode) == "multiply" }.map { |i| Hash[i] }
554
694
  end
555
695
  boost_by[options[:boost]] = {factor: 1} if options[:boost]
556
696
 
557
- custom_filters.concat boost_filters(boost_by, log: true)
697
+ custom_filters.concat boost_filters(boost_by, modifier: "ln2p")
558
698
  multiply_filters.concat boost_filters(multiply_by || {})
559
699
  end
560
700
 
@@ -577,11 +717,9 @@ module Searchkick
577
717
  def set_boost_by_indices(payload)
578
718
  return unless options[:indices_boost]
579
719
 
580
- indices_boost = options[:indices_boost].each_with_object({}) do |(key, boost), memo|
720
+ indices_boost = options[:indices_boost].map do |key, boost|
581
721
  index = key.respond_to?(:searchkick_index) ? key.searchkick_index.name : key
582
- # try to use index explicitly instead of alias: https://github.com/elasticsearch/elasticsearch/issues/4756
583
- index_by_alias = Searchkick.client.indices.get_alias(index: index).keys.first
584
- memo[index_by_alias || index] = boost
722
+ {index => boost}
585
723
  end
586
724
 
587
725
  payload[:indices_boost] = indices_boost
@@ -617,7 +755,8 @@ module Searchkick
617
755
 
618
756
  def set_highlights(payload, fields)
619
757
  payload[:highlight] = {
620
- fields: Hash[fields.map { |f| [f, {}] }]
758
+ fields: Hash[fields.map { |f| [f, {}] }],
759
+ fragment_size: 0
621
760
  }
622
761
 
623
762
  if options[:highlight].is_a?(Hash)
@@ -646,15 +785,14 @@ module Searchkick
646
785
  @highlighted_fields = payload[:highlight][:fields].keys
647
786
  end
648
787
 
649
- def set_aggregations(payload)
788
+ def set_aggregations(payload, filters, post_filters)
650
789
  aggs = options[:aggs]
651
790
  payload[:aggs] = {}
652
791
 
653
792
  aggs = Hash[aggs.map { |f| [f, {}] }] if aggs.is_a?(Array) # convert to more advanced syntax
654
-
655
793
  aggs.each do |field, agg_options|
656
794
  size = agg_options[:limit] ? agg_options[:limit] : 1_000
657
- shared_agg_options = agg_options.slice(:order, :min_doc_count)
795
+ shared_agg_options = agg_options.except(:limit, :field, :ranges, :date_ranges, :where)
658
796
 
659
797
  if agg_options[:ranges]
660
798
  payload[:aggs][field] = {
@@ -670,20 +808,16 @@ module Searchkick
670
808
  ranges: agg_options[:date_ranges]
671
809
  }.merge(shared_agg_options)
672
810
  }
673
- elsif histogram = agg_options[:date_histogram]
674
- interval = histogram[:interval]
811
+ elsif (histogram = agg_options[:date_histogram])
675
812
  payload[:aggs][field] = {
676
- date_histogram: {
677
- field: histogram[:field],
678
- interval: interval
679
- }
680
- }
681
- elsif metric = @@metric_aggs.find { |k| agg_options.has_key?(k) }
813
+ date_histogram: histogram
814
+ }.merge(shared_agg_options)
815
+ elsif (metric = @@metric_aggs.find { |k| agg_options.has_key?(k) })
682
816
  payload[:aggs][field] = {
683
817
  metric => {
684
818
  field: agg_options[metric][:field] || field
685
819
  }
686
- }
820
+ }.merge(shared_agg_options)
687
821
  else
688
822
  payload[:aggs][field] = {
689
823
  terms: {
@@ -696,6 +830,17 @@ module Searchkick
696
830
  where = {}
697
831
  where = (options[:where] || {}).reject { |k| k == field } unless options[:smart_aggs] == false
698
832
  agg_filters = where_filters(where.merge(agg_options[:where] || {}))
833
+
834
+ # only do one level comparison for simplicity
835
+ filters.select! do |filter|
836
+ if agg_filters.include?(filter)
837
+ true
838
+ else
839
+ post_filters << filter
840
+ false
841
+ end
842
+ end
843
+
699
844
  if agg_filters.any?
700
845
  payload[:aggs][field] = {
701
846
  filter: {
@@ -711,32 +856,27 @@ module Searchkick
711
856
  end
712
857
  end
713
858
 
714
- def set_filters(payload, filters)
715
- if options[:aggs]
716
- payload[:post_filter] = {
717
- bool: {
718
- filter: filters
719
- }
720
- }
721
- else
722
- # more efficient query if no aggs
723
- payload[:query] = {
724
- bool: {
725
- must: payload[:query],
726
- filter: filters
727
- }
859
+ def set_post_filters(payload, post_filters)
860
+ payload[:post_filter] = {
861
+ bool: {
862
+ filter: post_filters
728
863
  }
729
- end
864
+ }
730
865
  end
731
866
 
732
867
  # TODO id transformation for arrays
733
868
  def set_order(payload)
734
869
  order = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc}
735
- id_field = below50? ? :_id : :_uid
870
+ id_field = :_id
736
871
  payload[:sort] = order.is_a?(Array) ? order : Hash[order.map { |k, v| [k.to_s == "id" ? id_field : k, v] }]
737
872
  end
738
873
 
739
874
  def where_filters(where)
875
+ # if where.respond_to?(:permitted?) && !where.permitted?
876
+ # # TODO check in more places
877
+ # Searchkick.warn("Passing unpermitted parameters will raise an exception in Searchkick 5")
878
+ # end
879
+
740
880
  filters = []
741
881
  (where || {}).each do |field, value|
742
882
  field = :_id if field.to_s == "id"
@@ -751,10 +891,17 @@ module Searchkick
751
891
  filters << {bool: {must_not: where_filters(value)}}
752
892
  elsif field == :_and
753
893
  filters << {bool: {must: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
894
+ # elsif field == :_script
895
+ # filters << {script: {script: {source: value, lang: "painless"}}}
754
896
  else
755
897
  # expand ranges
756
898
  if value.is_a?(Range)
757
- value = {gte: value.first, (value.exclude_end? ? :lt : :lte) => value.last}
899
+ # infinite? added in Ruby 2.4
900
+ if value.end.nil? || (value.end.respond_to?(:infinite?) && value.end.infinite?)
901
+ value = {gte: value.first}
902
+ else
903
+ value = {gte: value.first, (value.exclude_end? ? :lt : :lte) => value.last}
904
+ end
758
905
  end
759
906
 
760
907
  value = {in: value} if value.is_a?(Array)
@@ -762,7 +909,7 @@ module Searchkick
762
909
  if value.is_a?(Hash)
763
910
  value.each do |op, op_value|
764
911
  case op
765
- when :within, :bottom_right
912
+ when :within, :bottom_right, :bottom_left
766
913
  # do nothing
767
914
  when :near
768
915
  filters << {
@@ -797,9 +944,35 @@ module Searchkick
797
944
  }
798
945
  }
799
946
  }
947
+ when :top_right
948
+ filters << {
949
+ geo_bounding_box: {
950
+ field => {
951
+ top_right: location_value(op_value),
952
+ bottom_left: location_value(value[:bottom_left])
953
+ }
954
+ }
955
+ }
956
+ when :like
957
+ # based on Postgres
958
+ # https://www.postgresql.org/docs/current/functions-matching.html
959
+ # % matches zero or more characters
960
+ # _ matches one character
961
+ # \ is escape character
962
+ # escape Lucene reserved characters
963
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/regexp-syntax.html#regexp-optional-operators
964
+ reserved = %w(. ? + * | { } [ ] ( ) " \\)
965
+ regex = op_value.dup
966
+ reserved.each do |v|
967
+ regex.gsub!(v, "\\" + v)
968
+ end
969
+ regex = regex.gsub(/(?<!\\)%/, ".*").gsub(/(?<!\\)_/, ".").gsub("\\%", "%").gsub("\\_", "_")
970
+ filters << {regexp: {field => {value: regex}}}
971
+ when :prefix
972
+ filters << {prefix: {field => {value: op_value}}}
800
973
  when :regexp # support for regexp queries without using a regexp ruby object
801
974
  filters << {regexp: {field => {value: op_value}}}
802
- when :not # not equal
975
+ when :not, :_not # not equal
803
976
  filters << {bool: {must_not: term_filters(field, op_value)}}
804
977
  when :all
805
978
  op_value.each do |val|
@@ -807,6 +980,8 @@ module Searchkick
807
980
  end
808
981
  when :in
809
982
  filters << term_filters(field, op_value)
983
+ when :exists
984
+ filters << {exists: {field: field}}
810
985
  else
811
986
  range_query =
812
987
  case op
@@ -847,57 +1022,78 @@ module Searchkick
847
1022
  elsif value.nil?
848
1023
  {bool: {must_not: {exists: {field: field}}}}
849
1024
  elsif value.is_a?(Regexp)
850
- {regexp: {field => {value: value.source}}}
1025
+ if value.casefold?
1026
+ Searchkick.warn("Case-insensitive flag does not work with Elasticsearch")
1027
+ end
1028
+
1029
+ source = value.source
1030
+ unless source.start_with?("\\A") && source.end_with?("\\z")
1031
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-regexp-query.html
1032
+ Searchkick.warn("Regular expressions are always anchored in Elasticsearch")
1033
+ end
1034
+
1035
+ # TODO handle other anchor characters, like ^, $, \Z
1036
+ if source.start_with?("\\A")
1037
+ source = source[2..-1]
1038
+ else
1039
+ # TODO uncomment in Searchkick 5
1040
+ # source = ".*#{source}"
1041
+ end
1042
+
1043
+ if source.end_with?("\\z")
1044
+ source = source[0..-3]
1045
+ else
1046
+ # TODO uncomment in Searchkick 5
1047
+ # source = "#{source}.*"
1048
+ end
1049
+
1050
+ {regexp: {field => {value: source, flags: "NONE"}}}
851
1051
  else
852
- {term: {field => value}}
1052
+ # TODO add this for other values
1053
+ if value.as_json.is_a?(Enumerable)
1054
+ # query will fail, but this is better
1055
+ # same message as Active Record
1056
+ # TODO make TypeError
1057
+ # raise InvalidQueryError for backward compatibility
1058
+ raise Searchkick::InvalidQueryError, "can't cast #{value.class.name}"
1059
+ end
1060
+
1061
+ {term: {field => {value: value}}}
853
1062
  end
854
1063
  end
855
1064
 
856
1065
  def custom_filter(field, value, factor)
857
- if below50?
858
- {
859
- filter: {
860
- bool: {
861
- must: where_filters(field => value)
862
- }
863
- },
864
- boost_factor: factor
1066
+ {
1067
+ filter: where_filters(field => value),
1068
+ weight: factor
1069
+ }
1070
+ end
1071
+
1072
+ def boost_filter(field, factor: 1, modifier: nil, missing: nil)
1073
+ script_score = {
1074
+ field_value_factor: {
1075
+ field: field,
1076
+ factor: factor.to_f,
1077
+ modifier: modifier
865
1078
  }
1079
+ }
1080
+
1081
+ if missing
1082
+ script_score[:field_value_factor][:missing] = missing.to_f
866
1083
  else
867
- {
868
- filter: where_filters(field => value),
869
- weight: factor
1084
+ script_score[:filter] = {
1085
+ exists: {
1086
+ field: field
1087
+ }
870
1088
  }
871
1089
  end
1090
+
1091
+ script_score
872
1092
  end
873
1093
 
874
- def boost_filters(boost_by, options = {})
1094
+ def boost_filters(boost_by, modifier: nil)
875
1095
  boost_by.map do |field, value|
876
- log = value.key?(:log) ? value[:log] : options[:log]
877
- value[:factor] ||= 1
878
- script_score = {
879
- field_value_factor: {
880
- field: field,
881
- factor: value[:factor].to_f,
882
- modifier: log ? "ln2p" : nil
883
- }
884
- }
885
-
886
- if value[:missing]
887
- if below50?
888
- raise ArgumentError, "The missing option for boost_by is not supported in Elasticsearch < 5"
889
- else
890
- script_score[:field_value_factor][:missing] = value[:missing].to_f
891
- end
892
- else
893
- script_score[:filter] = {
894
- exists: {
895
- field: field
896
- }
897
- }
898
- end
899
-
900
- script_score
1096
+ boost_filter(field, modifier: modifier, **value)
901
1097
  end
902
1098
  end
903
1099
 
@@ -922,12 +1118,32 @@ module Searchkick
922
1118
  end
923
1119
  end
924
1120
 
925
- def below50?
926
- Searchkick.server_below?("5.0.0-alpha1")
1121
+ def base_field(k)
1122
+ k.sub(/\.(analyzed|word_start|word_middle|word_end|text_start|text_middle|text_end|exact)\z/, "")
1123
+ end
1124
+
1125
+ def track_total_hits?
1126
+ (searchkick_options[:deep_paging] && !below70?) || body_options[:track_total_hits]
1127
+ end
1128
+
1129
+ def body_options
1130
+ options[:body_options] || {}
1131
+ end
1132
+
1133
+ def below61?
1134
+ Searchkick.server_below?("6.1.0")
1135
+ end
1136
+
1137
+ def below70?
1138
+ Searchkick.server_below?("7.0.0")
1139
+ end
1140
+
1141
+ def below73?
1142
+ Searchkick.server_below?("7.3.0")
927
1143
  end
928
1144
 
929
- def below60?
930
- Searchkick.server_below?("6.0.0-alpha1")
1145
+ def below75?
1146
+ Searchkick.server_below?("7.5.0")
931
1147
  end
932
1148
  end
933
1149
  end