searchkick 4.0.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  module Searchkick
2
2
  class Query
3
+ include Enumerable
3
4
  extend Forwardable
4
5
 
5
6
  @@metric_aggs = [:avg, :cardinality, :max, :min, :sum]
@@ -12,20 +13,21 @@ module Searchkick
12
13
  :took, :error, :model_name, :entry_name, :total_count, :total_entries,
13
14
  :current_page, :per_page, :limit_value, :padding, :total_pages, :num_pages,
14
15
  :offset_value, :offset, :previous_page, :prev_page, :next_page, :first_page?, :last_page?,
15
- :out_of_range?, :hits, :response, :to_a, :first
16
+ :out_of_range?, :hits, :response, :to_a, :first, :scroll, :highlights, :with_highlights,
17
+ :with_score, :misspellings?, :scroll_id, :clear_scroll, :missing_records, :with_hit
16
18
 
17
19
  def initialize(klass, term = "*", **options)
18
20
  unknown_keywords = options.keys - [:aggs, :block, :body, :body_options, :boost,
19
- :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :execute, :explain,
21
+ :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :explain,
20
22
  :fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load,
21
23
  :match, :misspellings, :models, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
22
- :request_params, :routing, :scope_results, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where]
24
+ :request_params, :routing, :scope_results, :scroll, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where]
23
25
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
24
26
 
25
27
  term = term.to_s
26
28
 
27
29
  if options[:emoji]
28
- term = EmojiParser.parse_unicode(term) { |e| " #{e.name} " }.strip
30
+ term = EmojiParser.parse_unicode(term) { |e| " #{e.name.tr('_', ' ')} " }.strip
29
31
  end
30
32
 
31
33
  @klass = klass
@@ -60,7 +62,8 @@ module Searchkick
60
62
  if options[:models]
61
63
  @index_mapping = {}
62
64
  Array(options[:models]).each do |model|
63
- @index_mapping[model.searchkick_index.name] = model
65
+ # there can be multiple models per index name due to inheritance - see #1259
66
+ (@index_mapping[model.searchkick_index.name] ||= []) << model
64
67
  end
65
68
  end
66
69
 
@@ -72,7 +75,8 @@ module Searchkick
72
75
  elsif searchkick_index
73
76
  searchkick_index.name
74
77
  else
75
- "_all"
78
+ # fixes warning about accessing system indices
79
+ "*,-.*"
76
80
  end
77
81
 
78
82
  params = {
@@ -81,6 +85,7 @@ module Searchkick
81
85
  }
82
86
  params[:type] = @type if @type
83
87
  params[:routing] = @routing if @routing
88
+ params[:scroll] = @scroll if @scroll
84
89
  params.merge!(options[:request_params]) if options[:request_params]
85
90
  params
86
91
  end
@@ -104,11 +109,21 @@ module Searchkick
104
109
  query = params
105
110
  type = query[:type]
106
111
  index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index]
112
+ request_params = query.except(:index, :type, :body)
107
113
 
108
114
  # no easy way to tell which host the client will use
109
- host = Searchkick.client.transport.hosts.first
115
+ host =
116
+ if Searchkick.client.transport.respond_to?(:transport)
117
+ Searchkick.client.transport.transport.hosts.first
118
+ else
119
+ Searchkick.client.transport.hosts.first
120
+ end
110
121
  credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil
111
- "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -H 'Content-Type: application/json' -d '#{query[:body].to_json}'"
122
+ params = ["pretty"]
123
+ request_params.each do |k, v|
124
+ params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
125
+ end
126
+ "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?#{params.join('&')} -H 'Content-Type: application/json' -d '#{query[:body].to_json}'"
112
127
  end
113
128
 
114
129
  def handle_response(response)
@@ -127,12 +142,12 @@ module Searchkick
127
142
  term: term,
128
143
  scope_results: options[:scope_results],
129
144
  total_entries: options[:total_entries],
130
- index_mapping: @index_mapping
145
+ index_mapping: @index_mapping,
146
+ suggest: options[:suggest],
147
+ scroll: options[:scroll]
131
148
  }
132
149
 
133
150
  if options[:debug]
134
- require "pp"
135
-
136
151
  puts "Searchkick Version: #{Searchkick::VERSION}"
137
152
  puts "Elasticsearch Version: #{Searchkick.server_version}"
138
153
  puts
@@ -192,14 +207,14 @@ module Searchkick
192
207
  e.message.include?("No query registered for [function_score]")
193
208
  )
194
209
 
195
- raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 5 or greater"
210
+ raise UnsupportedVersionError
196
211
  elsif status_code == 400
197
212
  if (
198
213
  e.message.include?("bool query does not support [filter]") ||
199
214
  e.message.include?("[bool] filter does not support [filter]")
200
215
  )
201
216
 
202
- raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 5 or greater"
217
+ raise UnsupportedVersionError
203
218
  elsif e.message =~ /analyzer \[searchkick_.+\] not found/
204
219
  raise InvalidQueryError, "Bad mapping - run #{reindex_command}"
205
220
  else
@@ -215,7 +230,14 @@ module Searchkick
215
230
  end
216
231
 
217
232
  def execute_search
218
- Searchkick.client.search(params)
233
+ name = searchkick_klass ? "#{searchkick_klass.name} Search" : "Search"
234
+ event = {
235
+ name: name,
236
+ query: params
237
+ }
238
+ ActiveSupport::Notifications.instrument("search.searchkick", event) do
239
+ Searchkick.client.search(params)
240
+ end
219
241
  end
220
242
 
221
243
  def prepare
@@ -225,9 +247,12 @@ module Searchkick
225
247
 
226
248
  # pagination
227
249
  page = [options[:page].to_i, 1].max
228
- per_page = (options[:limit] || options[:per_page] || 10_000).to_i
250
+ # maybe use index.max_result_window in the future
251
+ default_limit = searchkick_options[:deep_paging] ? 1_000_000_000 : 10_000
252
+ per_page = (options[:limit] || options[:per_page] || default_limit).to_i
229
253
  padding = [options[:padding].to_i, 0].max
230
254
  offset = options[:offset] || (page - 1) * per_page + padding
255
+ scroll = options[:scroll]
231
256
 
232
257
  # model and eager loading
233
258
  load = options[:load].nil? ? true : options[:load]
@@ -247,9 +272,10 @@ module Searchkick
247
272
  should = []
248
273
 
249
274
  if options[:similar]
275
+ like = options[:similar] == true ? term : options[:similar]
250
276
  query = {
251
277
  more_like_this: {
252
- like: term,
278
+ like: like,
253
279
  min_doc_freq: 1,
254
280
  min_term_freq: 1,
255
281
  analyzer: "searchkick_search2"
@@ -337,11 +363,11 @@ module Searchkick
337
363
  field_misspellings = misspellings && (!misspellings_fields || misspellings_fields.include?(base_field(field)))
338
364
 
339
365
  if field == "_all" || field.end_with?(".analyzed")
340
- shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false
366
+ shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false || (!below73? && !track_total_hits?)
341
367
  qs << shared_options.merge(analyzer: "searchkick_search")
342
368
 
343
- # searchkick_search and searchkick_search2 are the same for ukrainian
344
- unless %w(japanese korean polish ukrainian vietnamese).include?(searchkick_options[:language])
369
+ # searchkick_search and searchkick_search2 are the same for some languages
370
+ unless %w(japanese japanese2 korean polish ukrainian vietnamese).include?(searchkick_options[:language])
345
371
  qs << shared_options.merge(analyzer: "searchkick_search2")
346
372
  end
347
373
  exclude_analyzer = "searchkick_search2"
@@ -362,11 +388,6 @@ module Searchkick
362
388
 
363
389
  if field.start_with?("*.")
364
390
  q2 = qs.map { |q| {multi_match: q.merge(fields: [field], type: match_type == :match_phrase ? "phrase" : "best_fields")} }
365
- if below61?
366
- q2.each do |q|
367
- q[:multi_match].delete(:fuzzy_transpositions)
368
- end
369
- end
370
391
  else
371
392
  q2 = qs.map { |q| {match_type => {field => q}} }
372
393
  end
@@ -418,11 +439,29 @@ module Searchkick
418
439
  payload = {}
419
440
 
420
441
  # type when inheritance
421
- where = (options[:where] || {}).dup
442
+ where = ensure_permitted(options[:where] || {}).dup
422
443
  if searchkick_options[:inheritance] && (options[:type] || (klass != searchkick_klass && searchkick_index))
423
444
  where[:type] = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v, true) }
424
445
  end
425
446
 
447
+ models = Array(options[:models])
448
+ if models.any? { |m| m != m.searchkick_klass }
449
+ # aliases are not supported with _index in ES below 7.5
450
+ # see https://github.com/elastic/elasticsearch/pull/46640
451
+ if below75?
452
+ Searchkick.warn("Passing child models to models option throws off hits and pagination - use type option instead")
453
+ else
454
+ index_type_or =
455
+ models.map do |m|
456
+ v = {_index: m.searchkick_index.name}
457
+ v[:type] = m.searchkick_index.klass_document_type(m, true) if m != m.searchkick_klass
458
+ v
459
+ end
460
+
461
+ where[:or] = Array(where[:or]) + [index_type_or]
462
+ end
463
+ end
464
+
426
465
  # start everything as efficient filters
427
466
  # move to post_filters as aggs demand
428
467
  filters = where_filters(where)
@@ -480,7 +519,7 @@ module Searchkick
480
519
  pagination_options = options[:page] || options[:limit] || options[:per_page] || options[:offset] || options[:padding]
481
520
  if !options[:body] || pagination_options
482
521
  payload[:size] = per_page
483
- payload[:from] = offset
522
+ payload[:from] = offset if offset > 0
484
523
  end
485
524
 
486
525
  # type
@@ -491,17 +530,28 @@ module Searchkick
491
530
  # routing
492
531
  @routing = options[:routing] if options[:routing]
493
532
 
533
+ if track_total_hits?
534
+ payload[:track_total_hits] = true
535
+ end
536
+
494
537
  # merge more body options
495
538
  payload = payload.deep_merge(options[:body_options]) if options[:body_options]
496
539
 
497
540
  # run block
498
541
  options[:block].call(payload) if options[:block]
499
542
 
543
+ # scroll optimization when interating over all docs
544
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html
545
+ if options[:scroll] && payload[:query] == {match_all: {}}
546
+ payload[:sort] ||= ["_doc"]
547
+ end
548
+
500
549
  @body = payload
501
550
  @page = page
502
551
  @per_page = per_page
503
552
  @padding = padding
504
553
  @load = load
554
+ @scroll = scroll
505
555
  end
506
556
 
507
557
  def set_fields
@@ -532,7 +582,8 @@ module Searchkick
532
582
 
533
583
  def build_query(query, filters, should, must_not, custom_filters, multiply_filters)
534
584
  if filters.any? || must_not.any? || should.any?
535
- bool = {must: query}
585
+ bool = {}
586
+ bool[:must] = query if query
536
587
  bool[:filter] = filters if filters.any? # where
537
588
  bool[:must_not] = must_not if must_not.any? # exclude
538
589
  bool[:should] = should if should.any? # conversions
@@ -645,9 +696,9 @@ module Searchkick
645
696
  def set_boost_by(multiply_filters, custom_filters)
646
697
  boost_by = options[:boost_by] || {}
647
698
  if boost_by.is_a?(Array)
648
- boost_by = Hash[boost_by.map { |f| [f, {factor: 1}] }]
699
+ boost_by = boost_by.to_h { |f| [f, {factor: 1}] }
649
700
  elsif boost_by.is_a?(Hash)
650
- multiply_by, boost_by = boost_by.partition { |_, v| v.delete(:boost_mode) == "multiply" }.map { |i| Hash[i] }
701
+ multiply_by, boost_by = boost_by.partition { |_, v| v.delete(:boost_mode) == "multiply" }.map(&:to_h)
651
702
  end
652
703
  boost_by[options[:boost]] = {factor: 1} if options[:boost]
653
704
 
@@ -712,7 +763,7 @@ module Searchkick
712
763
 
713
764
  def set_highlights(payload, fields)
714
765
  payload[:highlight] = {
715
- fields: Hash[fields.map { |f| [f, {}] }],
766
+ fields: fields.to_h { |f| [f, {}] },
716
767
  fragment_size: 0
717
768
  }
718
769
 
@@ -746,7 +797,7 @@ module Searchkick
746
797
  aggs = options[:aggs]
747
798
  payload[:aggs] = {}
748
799
 
749
- aggs = Hash[aggs.map { |f| [f, {}] }] if aggs.is_a?(Array) # convert to more advanced syntax
800
+ aggs = aggs.to_h { |f| [f, {}] } if aggs.is_a?(Array) # convert to more advanced syntax
750
801
  aggs.each do |field, agg_options|
751
802
  size = agg_options[:limit] ? agg_options[:limit] : 1_000
752
803
  shared_agg_options = agg_options.except(:limit, :field, :ranges, :date_ranges, :where)
@@ -785,8 +836,9 @@ module Searchkick
785
836
  end
786
837
 
787
838
  where = {}
788
- where = (options[:where] || {}).reject { |k| k == field } unless options[:smart_aggs] == false
789
- agg_filters = where_filters(where.merge(agg_options[:where] || {}))
839
+ where = ensure_permitted(options[:where] || {}).reject { |k| k == field } unless options[:smart_aggs] == false
840
+ agg_where = ensure_permitted(agg_options[:where] || {})
841
+ agg_filters = where_filters(where.merge(agg_where))
790
842
 
791
843
  # only do one level comparison for simplicity
792
844
  filters.select! do |filter|
@@ -821,11 +873,14 @@ module Searchkick
821
873
  }
822
874
  end
823
875
 
824
- # TODO id transformation for arrays
825
876
  def set_order(payload)
826
- order = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc}
827
- id_field = :_id
828
- payload[:sort] = order.is_a?(Array) ? order : Hash[order.map { |k, v| [k.to_s == "id" ? id_field : k, v] }]
877
+ payload[:sort] = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc}
878
+ end
879
+
880
+ # provides *very* basic protection from unfiltered parameters
881
+ # this is not meant to be comprehensive and may be expanded in the future
882
+ def ensure_permitted(obj)
883
+ obj.to_h
829
884
  end
830
885
 
831
886
  def where_filters(where)
@@ -843,15 +898,12 @@ module Searchkick
843
898
  filters << {bool: {must_not: where_filters(value)}}
844
899
  elsif field == :_and
845
900
  filters << {bool: {must: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
901
+ # elsif field == :_script
902
+ # filters << {script: {script: {source: value, lang: "painless"}}}
846
903
  else
847
904
  # expand ranges
848
905
  if value.is_a?(Range)
849
- # infinite? added in Ruby 2.4
850
- if value.end.nil? || (value.end.respond_to?(:infinite?) && value.end.infinite?)
851
- value = {gte: value.first}
852
- else
853
- value = {gte: value.first, (value.exclude_end? ? :lt : :lte) => value.last}
854
- end
906
+ value = expand_range(value)
855
907
  end
856
908
 
857
909
  value = {in: value} if value.is_a?(Array)
@@ -903,8 +955,32 @@ module Searchkick
903
955
  }
904
956
  }
905
957
  }
958
+ when :like, :ilike
959
+ # based on Postgres
960
+ # https://www.postgresql.org/docs/current/functions-matching.html
961
+ # % matches zero or more characters
962
+ # _ matches one character
963
+ # \ is escape character
964
+ # escape Lucene reserved characters
965
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/regexp-syntax.html#regexp-optional-operators
966
+ reserved = %w(\\ . ? + * | { } [ ] ( ) ")
967
+ regex = op_value.dup
968
+ reserved.each do |v|
969
+ regex.gsub!(v, "\\\\" + v)
970
+ end
971
+ regex = regex.gsub(/(?<!\\)%/, ".*").gsub(/(?<!\\)_/, ".").gsub("\\%", "%").gsub("\\_", "_")
972
+
973
+ if op == :ilike
974
+ if below710?
975
+ raise ArgumentError, "ilike requires Elasticsearch 7.10+"
976
+ else
977
+ filters << {regexp: {field => {value: regex, flags: "NONE", case_insensitive: true}}}
978
+ end
979
+ else
980
+ filters << {regexp: {field => {value: regex, flags: "NONE"}}}
981
+ end
906
982
  when :prefix
907
- filters << {prefix: {field => op_value}}
983
+ filters << {prefix: {field => {value: op_value}}}
908
984
  when :regexp # support for regexp queries without using a regexp ruby object
909
985
  filters << {regexp: {field => {value: op_value}}}
910
986
  when :not, :_not # not equal
@@ -915,6 +991,8 @@ module Searchkick
915
991
  end
916
992
  when :in
917
993
  filters << term_filters(field, op_value)
994
+ when :exists
995
+ filters << {exists: {field: field}}
918
996
  else
919
997
  range_query =
920
998
  case op
@@ -927,7 +1005,7 @@ module Searchkick
927
1005
  when :lte
928
1006
  {to: op_value, include_upper: true}
929
1007
  else
930
- raise "Unknown where operator: #{op.inspect}"
1008
+ raise ArgumentError, "Unknown where operator: #{op.inspect}"
931
1009
  end
932
1010
  # issue 132
933
1011
  if (existing = filters.find { |f| f[:range] && f[:range][field] })
@@ -955,9 +1033,40 @@ module Searchkick
955
1033
  elsif value.nil?
956
1034
  {bool: {must_not: {exists: {field: field}}}}
957
1035
  elsif value.is_a?(Regexp)
958
- {regexp: {field => {value: value.source, flags: "NONE"}}}
1036
+ source = value.source
1037
+
1038
+ # TODO handle other regexp options
1039
+
1040
+ # TODO handle other anchor characters, like ^, $, \Z
1041
+ if source.start_with?("\\A")
1042
+ source = source[2..-1]
1043
+ else
1044
+ source = ".*#{source}"
1045
+ end
1046
+
1047
+ if source.end_with?("\\z")
1048
+ source = source[0..-3]
1049
+ else
1050
+ source = "#{source}.*"
1051
+ end
1052
+
1053
+ if below710?
1054
+ if value.casefold?
1055
+ raise ArgumentError, "Case-insensitive flag does not work with Elasticsearch < 7.10"
1056
+ end
1057
+ {regexp: {field => {value: source, flags: "NONE"}}}
1058
+ else
1059
+ {regexp: {field => {value: source, flags: "NONE", case_insensitive: value.casefold?}}}
1060
+ end
959
1061
  else
960
- {term: {field => value}}
1062
+ # TODO add this for other values
1063
+ if value.as_json.is_a?(Enumerable)
1064
+ # query will fail, but this is better
1065
+ # same message as Active Record
1066
+ raise TypeError, "can't cast #{value.class.name}"
1067
+ end
1068
+
1069
+ {term: {field => {value: value}}}
961
1070
  end
962
1071
  end
963
1072
 
@@ -1017,16 +1126,39 @@ module Searchkick
1017
1126
  end
1018
1127
  end
1019
1128
 
1129
+ def expand_range(range)
1130
+ expanded = {}
1131
+ expanded[:gte] = range.begin if range.begin
1132
+
1133
+ if range.end && !(range.end.respond_to?(:infinite?) && range.end.infinite?)
1134
+ expanded[range.exclude_end? ? :lt : :lte] = range.end
1135
+ end
1136
+
1137
+ expanded
1138
+ end
1139
+
1020
1140
  def base_field(k)
1021
1141
  k.sub(/\.(analyzed|word_start|word_middle|word_end|text_start|text_middle|text_end|exact)\z/, "")
1022
1142
  end
1023
1143
 
1024
- def below61?
1025
- Searchkick.server_below?("6.1.0")
1144
+ def track_total_hits?
1145
+ searchkick_options[:deep_paging] || body_options[:track_total_hits]
1146
+ end
1147
+
1148
+ def body_options
1149
+ options[:body_options] || {}
1150
+ end
1151
+
1152
+ def below73?
1153
+ Searchkick.server_below?("7.3.0")
1154
+ end
1155
+
1156
+ def below75?
1157
+ Searchkick.server_below?("7.5.0")
1026
1158
  end
1027
1159
 
1028
- def below70?
1029
- Searchkick.server_below?("7.0.0")
1160
+ def below710?
1161
+ Searchkick.server_below?("7.10.0")
1030
1162
  end
1031
1163
  end
1032
1164
  end
@@ -39,7 +39,6 @@ module Searchkick
39
39
  _index: index.name,
40
40
  _id: search_id
41
41
  }
42
- data[:_type] = document_type if Searchkick.server_below7?
43
42
  data[:routing] = record.search_routing if record.respond_to?(:search_routing)
44
43
  data
45
44
  end