searchkick 2.3.2 → 4.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +251 -84
  3. data/LICENSE.txt +1 -1
  4. data/README.md +552 -432
  5. data/lib/searchkick/bulk_indexer.rb +173 -0
  6. data/lib/searchkick/bulk_reindex_job.rb +2 -2
  7. data/lib/searchkick/hash_wrapper.rb +12 -0
  8. data/lib/searchkick/index.rb +187 -348
  9. data/lib/searchkick/index_options.rb +494 -282
  10. data/lib/searchkick/logging.rb +17 -13
  11. data/lib/searchkick/model.rb +52 -97
  12. data/lib/searchkick/multi_search.rb +9 -10
  13. data/lib/searchkick/process_batch_job.rb +17 -4
  14. data/lib/searchkick/process_queue_job.rb +20 -12
  15. data/lib/searchkick/query.rb +415 -199
  16. data/lib/searchkick/railtie.rb +7 -0
  17. data/lib/searchkick/record_data.rb +128 -0
  18. data/lib/searchkick/record_indexer.rb +79 -0
  19. data/lib/searchkick/reindex_queue.rb +1 -1
  20. data/lib/searchkick/reindex_v2_job.rb +14 -12
  21. data/lib/searchkick/results.rb +135 -41
  22. data/lib/searchkick/version.rb +1 -1
  23. data/lib/searchkick.rb +130 -61
  24. data/lib/tasks/searchkick.rake +34 -0
  25. metadata +18 -162
  26. data/.gitignore +0 -22
  27. data/.travis.yml +0 -39
  28. data/Gemfile +0 -16
  29. data/Rakefile +0 -20
  30. data/benchmark/Gemfile +0 -23
  31. data/benchmark/benchmark.rb +0 -97
  32. data/lib/searchkick/tasks.rb +0 -33
  33. data/searchkick.gemspec +0 -28
  34. data/test/aggs_test.rb +0 -197
  35. data/test/autocomplete_test.rb +0 -75
  36. data/test/boost_test.rb +0 -202
  37. data/test/callbacks_test.rb +0 -59
  38. data/test/ci/before_install.sh +0 -17
  39. data/test/errors_test.rb +0 -19
  40. data/test/gemfiles/activerecord31.gemfile +0 -7
  41. data/test/gemfiles/activerecord32.gemfile +0 -7
  42. data/test/gemfiles/activerecord40.gemfile +0 -8
  43. data/test/gemfiles/activerecord41.gemfile +0 -8
  44. data/test/gemfiles/activerecord42.gemfile +0 -7
  45. data/test/gemfiles/activerecord50.gemfile +0 -7
  46. data/test/gemfiles/apartment.gemfile +0 -8
  47. data/test/gemfiles/cequel.gemfile +0 -8
  48. data/test/gemfiles/mongoid2.gemfile +0 -7
  49. data/test/gemfiles/mongoid3.gemfile +0 -6
  50. data/test/gemfiles/mongoid4.gemfile +0 -7
  51. data/test/gemfiles/mongoid5.gemfile +0 -7
  52. data/test/gemfiles/mongoid6.gemfile +0 -12
  53. data/test/gemfiles/nobrainer.gemfile +0 -8
  54. data/test/gemfiles/parallel_tests.gemfile +0 -8
  55. data/test/geo_shape_test.rb +0 -175
  56. data/test/highlight_test.rb +0 -78
  57. data/test/index_test.rb +0 -166
  58. data/test/inheritance_test.rb +0 -83
  59. data/test/marshal_test.rb +0 -8
  60. data/test/match_test.rb +0 -276
  61. data/test/misspellings_test.rb +0 -56
  62. data/test/model_test.rb +0 -42
  63. data/test/multi_search_test.rb +0 -36
  64. data/test/multi_tenancy_test.rb +0 -22
  65. data/test/order_test.rb +0 -46
  66. data/test/pagination_test.rb +0 -70
  67. data/test/partial_reindex_test.rb +0 -58
  68. data/test/query_test.rb +0 -35
  69. data/test/records_test.rb +0 -10
  70. data/test/reindex_test.rb +0 -64
  71. data/test/reindex_v2_job_test.rb +0 -32
  72. data/test/routing_test.rb +0 -23
  73. data/test/should_index_test.rb +0 -32
  74. data/test/similar_test.rb +0 -28
  75. data/test/sql_test.rb +0 -214
  76. data/test/suggest_test.rb +0 -95
  77. data/test/support/kaminari.yml +0 -21
  78. data/test/synonyms_test.rb +0 -67
  79. data/test/test_helper.rb +0 -567
  80. data/test/where_test.rb +0 -223
@@ -12,14 +12,14 @@ module Searchkick
12
12
  :took, :error, :model_name, :entry_name, :total_count, :total_entries,
13
13
  :current_page, :per_page, :limit_value, :padding, :total_pages, :num_pages,
14
14
  :offset_value, :offset, :previous_page, :prev_page, :next_page, :first_page?, :last_page?,
15
- :out_of_range?, :hits, :response, :to_a, :first
15
+ :out_of_range?, :hits, :response, :to_a, :first, :scroll
16
16
 
17
17
  def initialize(klass, term = "*", **options)
18
- unknown_keywords = options.keys - [:aggs, :body, :body_options, :boost,
19
- :boost_by, :boost_by_distance, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :execute, :explain,
18
+ unknown_keywords = options.keys - [:aggs, :block, :body, :body_options, :boost,
19
+ :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :execute, :explain,
20
20
  :fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load,
21
- :match, :misspellings, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
22
- :request_params, :routing, :select, :similar, :smart_aggs, :suggest, :track, :type, :where]
21
+ :match, :misspellings, :models, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
22
+ :request_params, :routing, :scope_results, :scroll, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where]
23
23
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
24
24
 
25
25
  term = term.to_s
@@ -39,6 +39,7 @@ module Searchkick
39
39
  @misspellings = false
40
40
  @misspellings_below = nil
41
41
  @highlighted_fields = nil
42
+ @index_mapping = nil
42
43
 
43
44
  prepare
44
45
  end
@@ -56,9 +57,19 @@ module Searchkick
56
57
  end
57
58
 
58
59
  def params
60
+ if options[:models]
61
+ @index_mapping = {}
62
+ Array(options[:models]).each do |model|
63
+ # there can be multiple models per index name due to inheritance - see #1259
64
+ (@index_mapping[model.searchkick_index.name] ||= []) << model
65
+ end
66
+ end
67
+
59
68
  index =
60
69
  if options[:index_name]
61
70
  Array(options[:index_name]).map { |v| v.respond_to?(:searchkick_index) ? v.searchkick_index.name : v }.join(",")
71
+ elsif options[:models]
72
+ @index_mapping.keys.join(",")
62
73
  elsif searchkick_index
63
74
  searchkick_index.name
64
75
  else
@@ -71,6 +82,7 @@ module Searchkick
71
82
  }
72
83
  params[:type] = @type if @type
73
84
  params[:routing] = @routing if @routing
85
+ params[:scroll] = @scroll if @scroll
74
86
  params.merge!(options[:request_params]) if options[:request_params]
75
87
  params
76
88
  end
@@ -94,11 +106,16 @@ module Searchkick
94
106
  query = params
95
107
  type = query[:type]
96
108
  index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index]
109
+ request_params = query.except(:index, :type, :body)
97
110
 
98
111
  # no easy way to tell which host the client will use
99
112
  host = Searchkick.client.transport.hosts.first
100
113
  credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil
101
- "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'"
114
+ params = ["pretty"]
115
+ request_params.each do |k, v|
116
+ params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
117
+ end
118
+ "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?#{params.join('&')} -H 'Content-Type: application/json' -d '#{query[:body].to_json}'"
102
119
  end
103
120
 
104
121
  def handle_response(response)
@@ -111,11 +128,19 @@ module Searchkick
111
128
  model_includes: options[:model_includes],
112
129
  json: !@json.nil?,
113
130
  match_suffix: @match_suffix,
131
+ highlight: options[:highlight],
114
132
  highlighted_fields: @highlighted_fields || [],
115
- misspellings: @misspellings
133
+ misspellings: @misspellings,
134
+ term: term,
135
+ scope_results: options[:scope_results],
136
+ total_entries: options[:total_entries],
137
+ index_mapping: @index_mapping,
138
+ suggest: options[:suggest],
139
+ scroll: options[:scroll]
116
140
  }
117
141
 
118
142
  if options[:debug]
143
+ # can remove when minimum Ruby version is 2.5
119
144
  require "pp"
120
145
 
121
146
  puts "Searchkick Version: #{Searchkick::VERSION}"
@@ -133,7 +158,7 @@ module Searchkick
133
158
  if searchkick_index
134
159
  puts "Model Search Data"
135
160
  begin
136
- pp klass.first(3).map { |r| {index: searchkick_index.record_data(r).merge(data: searchkick_index.send(:search_data, r))}}
161
+ pp klass.limit(3).map { |r| RecordData.new(searchkick_index, r).index_data }
137
162
  rescue => e
138
163
  puts "#{e.class.name}: #{e.message}"
139
164
  end
@@ -161,7 +186,7 @@ module Searchkick
161
186
  end
162
187
 
163
188
  def retry_misspellings?(response)
164
- @misspellings_below && response["hits"]["total"] < @misspellings_below
189
+ @misspellings_below && Searchkick::Results.new(searchkick_klass, response).total_count < @misspellings_below
165
190
  end
166
191
 
167
192
  private
@@ -177,15 +202,15 @@ module Searchkick
177
202
  e.message.include?("No query registered for [function_score]")
178
203
  )
179
204
 
180
- raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 2 or greater"
205
+ raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 5 or greater"
181
206
  elsif status_code == 400
182
207
  if (
183
208
  e.message.include?("bool query does not support [filter]") ||
184
209
  e.message.include?("[bool] filter does not support [filter]")
185
210
  )
186
211
 
187
- raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 2 or greater"
188
- elsif e.message.include?("[multi_match] analyzer [searchkick_search] not found")
212
+ raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 5 or greater"
213
+ elsif e.message =~ /analyzer \[searchkick_.+\] not found/
189
214
  raise InvalidQueryError, "Bad mapping - run #{reindex_command}"
190
215
  else
191
216
  raise InvalidQueryError, e.message
@@ -210,40 +235,47 @@ module Searchkick
210
235
 
211
236
  # pagination
212
237
  page = [options[:page].to_i, 1].max
213
- per_page = (options[:limit] || options[:per_page] || 1_000).to_i
238
+ # maybe use index.max_result_window in the future
239
+ default_limit = searchkick_options[:deep_paging] ? 1_000_000_000 : 10_000
240
+ per_page = (options[:limit] || options[:per_page] || default_limit).to_i
214
241
  padding = [options[:padding].to_i, 0].max
215
242
  offset = options[:offset] || (page - 1) * per_page + padding
243
+ scroll = options[:scroll]
216
244
 
217
245
  # model and eager loading
218
246
  load = options[:load].nil? ? true : options[:load]
219
247
 
220
- conversions_fields = Array(options[:conversions] || searchkick_options[:conversions]).map(&:to_s)
221
-
222
248
  all = term == "*"
223
249
 
224
250
  @json = options[:body]
225
251
  if @json
226
252
  ignored_options = options.keys & [:aggs, :boost,
227
- :boost_by, :boost_by_distance, :boost_where, :conversions, :conversions_term, :exclude, :explain,
228
- :fields, :highlight, :indices_boost, :limit, :match, :misspellings, :offset, :operator, :order,
229
- :padding, :page, :per_page, :select, :smart_aggs, :suggest, :where]
230
- warn "The body option replaces the entire body, so the following options are ignored: #{ignored_options.join(", ")}" if ignored_options.any?
253
+ :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :exclude, :explain,
254
+ :fields, :highlight, :indices_boost, :match, :misspellings, :operator, :order,
255
+ :profile, :select, :smart_aggs, :suggest, :where]
256
+ raise ArgumentError, "Options incompatible with body option: #{ignored_options.join(", ")}" if ignored_options.any?
231
257
  payload = @json
232
258
  else
259
+ must_not = []
260
+ should = []
261
+
233
262
  if options[:similar]
234
- payload = {
263
+ query = {
235
264
  more_like_this: {
236
- like_text: term,
265
+ like: term,
237
266
  min_doc_freq: 1,
238
267
  min_term_freq: 1,
239
268
  analyzer: "searchkick_search2"
240
269
  }
241
270
  }
271
+ if fields.all? { |f| f.start_with?("*.") }
272
+ raise ArgumentError, "Must specify fields to search"
273
+ end
242
274
  if fields != ["_all"]
243
- payload[:more_like_this][:fields] = fields
275
+ query[:more_like_this][:fields] = fields
244
276
  end
245
- elsif all
246
- payload = {
277
+ elsif all && !options[:exclude]
278
+ query = {
247
279
  match_all: {}
248
280
  }
249
281
  else
@@ -272,6 +304,15 @@ module Searchkick
272
304
  prefix_length = (misspellings.is_a?(Hash) && misspellings[:prefix_length]) || 0
273
305
  default_max_expansions = @misspellings_below ? 20 : 3
274
306
  max_expansions = (misspellings.is_a?(Hash) && misspellings[:max_expansions]) || default_max_expansions
307
+ misspellings_fields = misspellings.is_a?(Hash) && misspellings.key?(:fields) && misspellings[:fields].map(&:to_s)
308
+
309
+ if misspellings_fields
310
+ missing_fields = misspellings_fields - fields.map { |f| base_field(f) }
311
+ if missing_fields.any?
312
+ raise ArgumentError, "All fields in per-field misspellings must also be specified in fields option"
313
+ end
314
+ end
315
+
275
316
  @misspellings = true
276
317
  else
277
318
  @misspellings = false
@@ -306,12 +347,16 @@ module Searchkick
306
347
  exclude_analyzer = nil
307
348
  exclude_field = field
308
349
 
350
+ field_misspellings = misspellings && (!misspellings_fields || misspellings_fields.include?(base_field(field)))
351
+
309
352
  if field == "_all" || field.end_with?(".analyzed")
310
- shared_options[:cutoff_frequency] = 0.001 unless operator == "and" || misspellings == false
311
- qs.concat [
312
- shared_options.merge(analyzer: "searchkick_search"),
313
- shared_options.merge(analyzer: "searchkick_search2")
314
- ]
353
+ shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false || (!below73? && !track_total_hits?)
354
+ qs << shared_options.merge(analyzer: "searchkick_search")
355
+
356
+ # searchkick_search and searchkick_search2 are the same for ukrainian
357
+ unless %w(japanese korean polish ukrainian vietnamese).include?(searchkick_options[:language])
358
+ qs << shared_options.merge(analyzer: "searchkick_search2")
359
+ end
315
360
  exclude_analyzer = "searchkick_search2"
316
361
  elsif field.end_with?(".exact")
317
362
  f = field.split(".")[0..-2].join(".")
@@ -324,11 +369,20 @@ module Searchkick
324
369
  exclude_analyzer = analyzer
325
370
  end
326
371
 
327
- if misspellings != false && match_type == :match
328
- qs.concat qs.map { |q| q.except(:cutoff_frequency).merge(fuzziness: edit_distance, prefix_length: prefix_length, max_expansions: max_expansions, boost: factor).merge(transpositions) }
372
+ if field_misspellings != false && match_type == :match
373
+ qs.concat(qs.map { |q| q.except(:cutoff_frequency).merge(fuzziness: edit_distance, prefix_length: prefix_length, max_expansions: max_expansions, boost: factor).merge(transpositions) })
329
374
  end
330
375
 
331
- q2 = qs.map { |q| {match_type => {field => q}} }
376
+ if field.start_with?("*.")
377
+ q2 = qs.map { |q| {multi_match: q.merge(fields: [field], type: match_type == :match_phrase ? "phrase" : "best_fields")} }
378
+ if below61?
379
+ q2.each do |q|
380
+ q[:multi_match].delete(:fuzzy_transpositions)
381
+ end
382
+ end
383
+ else
384
+ q2 = qs.map { |q| {match_type => {field => q}} }
385
+ end
332
386
 
333
387
  # boost exact matches more
334
388
  if field =~ /\.word_(start|middle|end)\z/ && searchkick_options[:word] != false
@@ -346,100 +400,81 @@ module Searchkick
346
400
  queries_to_add.concat(q2)
347
401
  end
348
402
 
349
- if options[:exclude]
350
- must_not =
351
- Array(options[:exclude]).map do |phrase|
352
- {
353
- match_phrase: {
354
- exclude_field => {
355
- query: phrase,
356
- analyzer: exclude_analyzer
357
- }
358
- }
359
- }
360
- end
403
+ queries << queries_to_add
361
404
 
362
- queries_to_add = [{
363
- bool: {
364
- should: queries_to_add,
365
- must_not: must_not
366
- }
367
- }]
405
+ if options[:exclude]
406
+ must_not.concat(set_exclude(exclude_field, exclude_analyzer))
368
407
  end
369
-
370
- queries.concat(queries_to_add)
371
408
  end
372
409
 
373
- payload = {
374
- dis_max: {
375
- queries: queries
410
+ # all + exclude option
411
+ if all
412
+ query = {
413
+ match_all: {}
376
414
  }
377
- }
378
-
379
- if conversions_fields.present? && options[:conversions] != false
380
- shoulds = []
381
- conversions_fields.each do |conversions_field|
382
- # wrap payload in a bool query
383
- script_score = {field_value_factor: {field: "#{conversions_field}.count"}}
384
415
 
385
- shoulds << {
386
- nested: {
387
- path: conversions_field,
388
- score_mode: "sum",
389
- query: {
390
- function_score: {
391
- boost_mode: "replace",
392
- query: {
393
- match: {
394
- "#{conversions_field}.query" => options[:conversions_term] || term
395
- }
396
- }
397
- }.merge(script_score)
398
- }
399
- }
400
- }
401
- end
416
+ should = []
417
+ else
418
+ # higher score for matching more fields
402
419
  payload = {
403
420
  bool: {
404
- must: payload,
405
- should: shoulds
421
+ should: queries.map { |qs| {dis_max: {queries: qs}} }
406
422
  }
407
423
  }
424
+
425
+ should.concat(set_conversions)
426
+ end
427
+
428
+ query = payload
429
+ end
430
+
431
+ payload = {}
432
+
433
+ # type when inheritance
434
+ where = (options[:where] || {}).dup
435
+ if searchkick_options[:inheritance] && (options[:type] || (klass != searchkick_klass && searchkick_index))
436
+ where[:type] = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v, true) }
437
+ end
438
+
439
+ models = Array(options[:models])
440
+ if models.any? { |m| m != m.searchkick_klass }
441
+ # aliases are not supported with _index in ES below 7.5
442
+ # see https://github.com/elastic/elasticsearch/pull/46640
443
+ if below75?
444
+ Searchkick.warn("Passing child models to models option throws off hits and pagination - use type option instead")
445
+ else
446
+ index_type_or =
447
+ models.map do |m|
448
+ v = {_index: m.searchkick_index.name}
449
+ v[:type] = m.searchkick_index.klass_document_type(m, true) if m != m.searchkick_klass
450
+ v
451
+ end
452
+
453
+ where[:or] = Array(where[:or]) + [index_type_or]
408
454
  end
409
455
  end
410
456
 
457
+ # start everything as efficient filters
458
+ # move to post_filters as aggs demand
459
+ filters = where_filters(where)
460
+ post_filters = []
461
+
462
+ # aggregations
463
+ set_aggregations(payload, filters, post_filters) if options[:aggs]
464
+
465
+ # post filters
466
+ set_post_filters(payload, post_filters) if post_filters.any?
467
+
411
468
  custom_filters = []
412
469
  multiply_filters = []
413
470
 
414
471
  set_boost_by(multiply_filters, custom_filters)
415
472
  set_boost_where(custom_filters)
416
473
  set_boost_by_distance(custom_filters) if options[:boost_by_distance]
474
+ set_boost_by_recency(custom_filters) if options[:boost_by_recency]
417
475
 
418
- if custom_filters.any?
419
- payload = {
420
- function_score: {
421
- functions: custom_filters,
422
- query: payload,
423
- score_mode: "sum"
424
- }
425
- }
426
- end
427
-
428
- if multiply_filters.any?
429
- payload = {
430
- function_score: {
431
- functions: multiply_filters,
432
- query: payload,
433
- score_mode: "multiply"
434
- }
435
- }
436
- end
476
+ payload[:query] = build_query(query, filters, should, must_not, custom_filters, multiply_filters)
437
477
 
438
- payload = {
439
- query: payload,
440
- size: per_page,
441
- from: offset
442
- }
443
478
  payload[:explain] = options[:explain] if options[:explain]
444
479
  payload[:profile] = options[:profile] if options[:profile]
445
480
 
@@ -449,13 +484,6 @@ module Searchkick
449
484
  # indices_boost
450
485
  set_boost_by_indices(payload)
451
486
 
452
- # filters
453
- filters = where_filters(options[:where])
454
- set_filters(payload, filters) if filters.any?
455
-
456
- # aggregations
457
- set_aggregations(payload) if options[:aggs]
458
-
459
487
  # suggestions
460
488
  set_suggestions(payload, options[:suggest]) if options[:suggest]
461
489
 
@@ -466,8 +494,7 @@ module Searchkick
466
494
  payload[:timeout] ||= "#{Searchkick.search_timeout + 1}s"
467
495
 
468
496
  # An empty array will cause only the _id and _type for each hit to be returned
469
- # doc for :select - http://www.elasticsearch.org/guide/reference/api/search/fields/
470
- # doc for :select_v2 - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html
497
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html
471
498
  if options[:select]
472
499
  if options[:select] == []
473
500
  # intuitively [] makes sense to return no fields, but ES by default returns all fields
@@ -480,28 +507,49 @@ module Searchkick
480
507
  end
481
508
  end
482
509
 
510
+ # pagination
511
+ pagination_options = options[:page] || options[:limit] || options[:per_page] || options[:offset] || options[:padding]
512
+ if !options[:body] || pagination_options
513
+ payload[:size] = per_page
514
+ payload[:from] = offset if offset > 0
515
+ end
516
+
483
517
  # type
484
- if options[:type] || (klass != searchkick_klass && searchkick_index)
518
+ if !searchkick_options[:inheritance] && (options[:type] || (klass != searchkick_klass && searchkick_index))
485
519
  @type = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v) }
486
520
  end
487
521
 
488
522
  # routing
489
523
  @routing = options[:routing] if options[:routing]
490
524
 
525
+ if track_total_hits?
526
+ payload[:track_total_hits] = true
527
+ end
528
+
491
529
  # merge more body options
492
530
  payload = payload.deep_merge(options[:body_options]) if options[:body_options]
493
531
 
532
+ # run block
533
+ options[:block].call(payload) if options[:block]
534
+
535
+ # scroll optimization when interating over all docs
536
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html
537
+ if options[:scroll] && payload[:query] == {match_all: {}}
538
+ payload[:sort] ||= ["_doc"]
539
+ end
540
+
494
541
  @body = payload
495
542
  @page = page
496
543
  @per_page = per_page
497
544
  @padding = padding
498
545
  @load = load
546
+ @scroll = scroll
499
547
  end
500
548
 
501
549
  def set_fields
502
550
  boost_fields = {}
503
551
  fields = options[:fields] || searchkick_options[:default_fields] || searchkick_options[:searchable]
504
- all = searchkick_options.key?(:_all) ? searchkick_options[:_all] : below60?
552
+ all = searchkick_options.key?(:_all) ? searchkick_options[:_all] : false
505
553
  default_match = options[:match] || searchkick_options[:match] || :word
506
554
  fields =
507
555
  if fields
@@ -516,12 +564,89 @@ module Searchkick
516
564
  ["_all"]
517
565
  elsif all && default_match == :phrase
518
566
  ["_all.phrase"]
519
- else
567
+ elsif term != "*" && default_match == :exact
520
568
  raise ArgumentError, "Must specify fields to search"
569
+ else
570
+ [default_match == :word ? "*.analyzed" : "*.#{default_match}"]
521
571
  end
522
572
  [boost_fields, fields]
523
573
  end
524
574
 
575
+ def build_query(query, filters, should, must_not, custom_filters, multiply_filters)
576
+ if filters.any? || must_not.any? || should.any?
577
+ bool = {}
578
+ bool[:must] = query if query
579
+ bool[:filter] = filters if filters.any? # where
580
+ bool[:must_not] = must_not if must_not.any? # exclude
581
+ bool[:should] = should if should.any? # conversions
582
+ query = {bool: bool}
583
+ end
584
+
585
+ if custom_filters.any?
586
+ query = {
587
+ function_score: {
588
+ functions: custom_filters,
589
+ query: query,
590
+ score_mode: "sum"
591
+ }
592
+ }
593
+ end
594
+
595
+ if multiply_filters.any?
596
+ query = {
597
+ function_score: {
598
+ functions: multiply_filters,
599
+ query: query,
600
+ score_mode: "multiply"
601
+ }
602
+ }
603
+ end
604
+
605
+ query
606
+ end
607
+
608
+ def set_conversions
609
+ conversions_fields = Array(options[:conversions] || searchkick_options[:conversions]).map(&:to_s)
610
+ if conversions_fields.present? && options[:conversions] != false
611
+ conversions_fields.map do |conversions_field|
612
+ {
613
+ nested: {
614
+ path: conversions_field,
615
+ score_mode: "sum",
616
+ query: {
617
+ function_score: {
618
+ boost_mode: "replace",
619
+ query: {
620
+ match: {
621
+ "#{conversions_field}.query" => options[:conversions_term] || term
622
+ }
623
+ },
624
+ field_value_factor: {
625
+ field: "#{conversions_field}.count"
626
+ }
627
+ }
628
+ }
629
+ }
630
+ }
631
+ end
632
+ else
633
+ []
634
+ end
635
+ end
636
+
637
+ def set_exclude(field, analyzer)
638
+ Array(options[:exclude]).map do |phrase|
639
+ {
640
+ multi_match: {
641
+ fields: [field],
642
+ query: phrase,
643
+ analyzer: analyzer,
644
+ type: "phrase"
645
+ }
646
+ }
647
+ end
648
+ end
649
+
525
650
  def set_boost_by_distance(custom_filters)
526
651
  boost_by_distance = options[:boost_by_distance] || {}
527
652
 
@@ -535,9 +660,11 @@ module Searchkick
535
660
  unless attributes[:origin]
536
661
  raise ArgumentError, "boost_by_distance requires :origin"
537
662
  end
538
- function_params = attributes.select { |k, _| [:origin, :scale, :offset, :decay].include?(k) }
663
+
664
+ function_params = attributes.except(:factor, :function)
539
665
  function_params[:origin] = location_value(function_params[:origin])
540
666
  custom_filters << {
667
+ weight: attributes[:factor] || 1,
541
668
  attributes[:function] => {
542
669
  field => function_params
543
670
  }
@@ -545,16 +672,29 @@ module Searchkick
545
672
  end
546
673
  end
547
674
 
675
+ def set_boost_by_recency(custom_filters)
676
+ options[:boost_by_recency].each do |field, attributes|
677
+ attributes = {function: :gauss, origin: Time.now}.merge(attributes)
678
+
679
+ custom_filters << {
680
+ weight: attributes[:factor] || 1,
681
+ attributes[:function] => {
682
+ field => attributes.except(:factor, :function)
683
+ }
684
+ }
685
+ end
686
+ end
687
+
548
688
  def set_boost_by(multiply_filters, custom_filters)
549
689
  boost_by = options[:boost_by] || {}
550
690
  if boost_by.is_a?(Array)
551
691
  boost_by = Hash[boost_by.map { |f| [f, {factor: 1}] }]
552
692
  elsif boost_by.is_a?(Hash)
553
- multiply_by, boost_by = boost_by.partition { |_, v| v[:boost_mode] == "multiply" }.map { |i| Hash[i] }
693
+ multiply_by, boost_by = boost_by.partition { |_, v| v.delete(:boost_mode) == "multiply" }.map { |i| Hash[i] }
554
694
  end
555
695
  boost_by[options[:boost]] = {factor: 1} if options[:boost]
556
696
 
557
- custom_filters.concat boost_filters(boost_by, log: true)
697
+ custom_filters.concat boost_filters(boost_by, modifier: "ln2p")
558
698
  multiply_filters.concat boost_filters(multiply_by || {})
559
699
  end
560
700
 
@@ -577,11 +717,9 @@ module Searchkick
577
717
  def set_boost_by_indices(payload)
578
718
  return unless options[:indices_boost]
579
719
 
580
- indices_boost = options[:indices_boost].each_with_object({}) do |(key, boost), memo|
720
+ indices_boost = options[:indices_boost].map do |key, boost|
581
721
  index = key.respond_to?(:searchkick_index) ? key.searchkick_index.name : key
582
- # try to use index explicitly instead of alias: https://github.com/elasticsearch/elasticsearch/issues/4756
583
- index_by_alias = Searchkick.client.indices.get_alias(index: index).keys.first
584
- memo[index_by_alias || index] = boost
722
+ {index => boost}
585
723
  end
586
724
 
587
725
  payload[:indices_boost] = indices_boost
@@ -617,7 +755,8 @@ module Searchkick
617
755
 
618
756
  def set_highlights(payload, fields)
619
757
  payload[:highlight] = {
620
- fields: Hash[fields.map { |f| [f, {}] }]
758
+ fields: Hash[fields.map { |f| [f, {}] }],
759
+ fragment_size: 0
621
760
  }
622
761
 
623
762
  if options[:highlight].is_a?(Hash)
@@ -646,15 +785,14 @@ module Searchkick
646
785
  @highlighted_fields = payload[:highlight][:fields].keys
647
786
  end
648
787
 
649
- def set_aggregations(payload)
788
+ def set_aggregations(payload, filters, post_filters)
650
789
  aggs = options[:aggs]
651
790
  payload[:aggs] = {}
652
791
 
653
792
  aggs = Hash[aggs.map { |f| [f, {}] }] if aggs.is_a?(Array) # convert to more advanced syntax
654
-
655
793
  aggs.each do |field, agg_options|
656
794
  size = agg_options[:limit] ? agg_options[:limit] : 1_000
657
- shared_agg_options = agg_options.slice(:order, :min_doc_count)
795
+ shared_agg_options = agg_options.except(:limit, :field, :ranges, :date_ranges, :where)
658
796
 
659
797
  if agg_options[:ranges]
660
798
  payload[:aggs][field] = {
@@ -670,20 +808,16 @@ module Searchkick
670
808
  ranges: agg_options[:date_ranges]
671
809
  }.merge(shared_agg_options)
672
810
  }
673
- elsif histogram = agg_options[:date_histogram]
674
- interval = histogram[:interval]
811
+ elsif (histogram = agg_options[:date_histogram])
675
812
  payload[:aggs][field] = {
676
- date_histogram: {
677
- field: histogram[:field],
678
- interval: interval
679
- }
680
- }
681
- elsif metric = @@metric_aggs.find { |k| agg_options.has_key?(k) }
813
+ date_histogram: histogram
814
+ }.merge(shared_agg_options)
815
+ elsif (metric = @@metric_aggs.find { |k| agg_options.has_key?(k) })
682
816
  payload[:aggs][field] = {
683
817
  metric => {
684
818
  field: agg_options[metric][:field] || field
685
819
  }
686
- }
820
+ }.merge(shared_agg_options)
687
821
  else
688
822
  payload[:aggs][field] = {
689
823
  terms: {
@@ -696,6 +830,17 @@ module Searchkick
696
830
  where = {}
697
831
  where = (options[:where] || {}).reject { |k| k == field } unless options[:smart_aggs] == false
698
832
  agg_filters = where_filters(where.merge(agg_options[:where] || {}))
833
+
834
+ # only do one level comparison for simplicity
835
+ filters.select! do |filter|
836
+ if agg_filters.include?(filter)
837
+ true
838
+ else
839
+ post_filters << filter
840
+ false
841
+ end
842
+ end
843
+
699
844
  if agg_filters.any?
700
845
  payload[:aggs][field] = {
701
846
  filter: {
@@ -711,32 +856,27 @@ module Searchkick
711
856
  end
712
857
  end
713
858
 
714
- def set_filters(payload, filters)
715
- if options[:aggs]
716
- payload[:post_filter] = {
717
- bool: {
718
- filter: filters
719
- }
720
- }
721
- else
722
- # more efficient query if no aggs
723
- payload[:query] = {
724
- bool: {
725
- must: payload[:query],
726
- filter: filters
727
- }
859
+ def set_post_filters(payload, post_filters)
860
+ payload[:post_filter] = {
861
+ bool: {
862
+ filter: post_filters
728
863
  }
729
- end
864
+ }
730
865
  end
731
866
 
732
867
  # TODO id transformation for arrays
733
868
  def set_order(payload)
734
869
  order = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc}
735
- id_field = below50? ? :_id : :_uid
870
+ id_field = :_id
736
871
  payload[:sort] = order.is_a?(Array) ? order : Hash[order.map { |k, v| [k.to_s == "id" ? id_field : k, v] }]
737
872
  end
738
873
 
739
874
  def where_filters(where)
875
+ # if where.respond_to?(:permitted?) && !where.permitted?
876
+ # # TODO check in more places
877
+ # Searchkick.warn("Passing unpermitted parameters will raise an exception in Searchkick 5")
878
+ # end
879
+
740
880
  filters = []
741
881
  (where || {}).each do |field, value|
742
882
  field = :_id if field.to_s == "id"
@@ -751,10 +891,17 @@ module Searchkick
751
891
  filters << {bool: {must_not: where_filters(value)}}
752
892
  elsif field == :_and
753
893
  filters << {bool: {must: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
894
+ # elsif field == :_script
895
+ # filters << {script: {script: {source: value, lang: "painless"}}}
754
896
  else
755
897
  # expand ranges
756
898
  if value.is_a?(Range)
757
- value = {gte: value.first, (value.exclude_end? ? :lt : :lte) => value.last}
899
+ # infinite? added in Ruby 2.4
900
+ if value.end.nil? || (value.end.respond_to?(:infinite?) && value.end.infinite?)
901
+ value = {gte: value.first}
902
+ else
903
+ value = {gte: value.first, (value.exclude_end? ? :lt : :lte) => value.last}
904
+ end
758
905
  end
759
906
 
760
907
  value = {in: value} if value.is_a?(Array)
@@ -762,7 +909,7 @@ module Searchkick
762
909
  if value.is_a?(Hash)
763
910
  value.each do |op, op_value|
764
911
  case op
765
- when :within, :bottom_right
912
+ when :within, :bottom_right, :bottom_left
766
913
  # do nothing
767
914
  when :near
768
915
  filters << {
@@ -797,9 +944,35 @@ module Searchkick
797
944
  }
798
945
  }
799
946
  }
947
+ when :top_right
948
+ filters << {
949
+ geo_bounding_box: {
950
+ field => {
951
+ top_right: location_value(op_value),
952
+ bottom_left: location_value(value[:bottom_left])
953
+ }
954
+ }
955
+ }
956
+ when :like
957
+ # based on Postgres
958
+ # https://www.postgresql.org/docs/current/functions-matching.html
959
+ # % matches zero or more characters
960
+ # _ matches one character
961
+ # \ is escape character
962
+ # escape Lucene reserved characters
963
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/regexp-syntax.html#regexp-optional-operators
964
+ reserved = %w(. ? + * | { } [ ] ( ) " \\)
965
+ regex = op_value.dup
966
+ reserved.each do |v|
967
+ regex.gsub!(v, "\\" + v)
968
+ end
969
+ regex = regex.gsub(/(?<!\\)%/, ".*").gsub(/(?<!\\)_/, ".").gsub("\\%", "%").gsub("\\_", "_")
970
+ filters << {regexp: {field => {value: regex}}}
971
+ when :prefix
972
+ filters << {prefix: {field => {value: op_value}}}
800
973
  when :regexp # support for regexp queries without using a regexp ruby object
801
974
  filters << {regexp: {field => {value: op_value}}}
802
- when :not # not equal
975
+ when :not, :_not # not equal
803
976
  filters << {bool: {must_not: term_filters(field, op_value)}}
804
977
  when :all
805
978
  op_value.each do |val|
@@ -807,6 +980,8 @@ module Searchkick
807
980
  end
808
981
  when :in
809
982
  filters << term_filters(field, op_value)
983
+ when :exists
984
+ filters << {exists: {field: field}}
810
985
  else
811
986
  range_query =
812
987
  case op
@@ -847,57 +1022,78 @@ module Searchkick
847
1022
  elsif value.nil?
848
1023
  {bool: {must_not: {exists: {field: field}}}}
849
1024
  elsif value.is_a?(Regexp)
850
- {regexp: {field => {value: value.source}}}
1025
+ if value.casefold?
1026
+ Searchkick.warn("Case-insensitive flag does not work with Elasticsearch")
1027
+ end
1028
+
1029
+ source = value.source
1030
+ unless source.start_with?("\\A") && source.end_with?("\\z")
1031
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-regexp-query.html
1032
+ Searchkick.warn("Regular expressions are always anchored in Elasticsearch")
1033
+ end
1034
+
1035
+ # TODO handle other anchor characters, like ^, $, \Z
1036
+ if source.start_with?("\\A")
1037
+ source = source[2..-1]
1038
+ else
1039
+ # TODO uncomment in Searchkick 5
1040
+ # source = ".*#{source}"
1041
+ end
1042
+
1043
+ if source.end_with?("\\z")
1044
+ source = source[0..-3]
1045
+ else
1046
+ # TODO uncomment in Searchkick 5
1047
+ # source = "#{source}.*"
1048
+ end
1049
+
1050
+ {regexp: {field => {value: source, flags: "NONE"}}}
851
1051
  else
852
- {term: {field => value}}
1052
+ # TODO add this for other values
1053
+ if value.as_json.is_a?(Enumerable)
1054
+ # query will fail, but this is better
1055
+ # same message as Active Record
1056
+ # TODO make TypeError
1057
+ # raise InvalidQueryError for backward compatibility
1058
+ raise Searchkick::InvalidQueryError, "can't cast #{value.class.name}"
1059
+ end
1060
+
1061
+ {term: {field => {value: value}}}
853
1062
  end
854
1063
  end
855
1064
 
856
1065
  def custom_filter(field, value, factor)
857
- if below50?
858
- {
859
- filter: {
860
- bool: {
861
- must: where_filters(field => value)
862
- }
863
- },
864
- boost_factor: factor
1066
+ {
1067
+ filter: where_filters(field => value),
1068
+ weight: factor
1069
+ }
1070
+ end
1071
+
1072
+ def boost_filter(field, factor: 1, modifier: nil, missing: nil)
1073
+ script_score = {
1074
+ field_value_factor: {
1075
+ field: field,
1076
+ factor: factor.to_f,
1077
+ modifier: modifier
865
1078
  }
1079
+ }
1080
+
1081
+ if missing
1082
+ script_score[:field_value_factor][:missing] = missing.to_f
866
1083
  else
867
- {
868
- filter: where_filters(field => value),
869
- weight: factor
1084
+ script_score[:filter] = {
1085
+ exists: {
1086
+ field: field
1087
+ }
870
1088
  }
871
1089
  end
1090
+
1091
+ script_score
872
1092
  end
873
1093
 
874
- def boost_filters(boost_by, options = {})
1094
+ def boost_filters(boost_by, modifier: nil)
875
1095
  boost_by.map do |field, value|
876
- log = value.key?(:log) ? value[:log] : options[:log]
877
- value[:factor] ||= 1
878
- script_score = {
879
- field_value_factor: {
880
- field: field,
881
- factor: value[:factor].to_f,
882
- modifier: log ? "ln2p" : nil
883
- }
884
- }
885
-
886
- if value[:missing]
887
- if below50?
888
- raise ArgumentError, "The missing option for boost_by is not supported in Elasticsearch < 5"
889
- else
890
- script_score[:field_value_factor][:missing] = value[:missing].to_f
891
- end
892
- else
893
- script_score[:filter] = {
894
- exists: {
895
- field: field
896
- }
897
- }
898
- end
899
-
900
- script_score
1096
+ boost_filter(field, modifier: modifier, **value)
901
1097
  end
902
1098
  end
903
1099
 
@@ -922,12 +1118,32 @@ module Searchkick
922
1118
  end
923
1119
  end
924
1120
 
925
- def below50?
926
- Searchkick.server_below?("5.0.0-alpha1")
1121
+ def base_field(k)
1122
+ k.sub(/\.(analyzed|word_start|word_middle|word_end|text_start|text_middle|text_end|exact)\z/, "")
1123
+ end
1124
+
1125
+ def track_total_hits?
1126
+ (searchkick_options[:deep_paging] && !below70?) || body_options[:track_total_hits]
1127
+ end
1128
+
1129
+ def body_options
1130
+ options[:body_options] || {}
1131
+ end
1132
+
1133
+ def below61?
1134
+ Searchkick.server_below?("6.1.0")
1135
+ end
1136
+
1137
+ def below70?
1138
+ Searchkick.server_below?("7.0.0")
1139
+ end
1140
+
1141
+ def below73?
1142
+ Searchkick.server_below?("7.3.0")
927
1143
  end
928
1144
 
929
- def below60?
930
- Searchkick.server_below?("6.0.0-alpha1")
1145
+ def below75?
1146
+ Searchkick.server_below?("7.5.0")
931
1147
  end
932
1148
  end
933
1149
  end