searchkick 4.0.0 → 4.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -132,7 +132,7 @@ module Searchkick
132
132
  def multi_search(searches)
133
133
  event = {
134
134
  name: "Multi Search",
135
- body: searches.flat_map { |q| [q.params.except(:body).to_json, q.body.to_json] }.map { |v| "#{v}\n" }.join
135
+ body: searches.flat_map { |q| [q.params.except(:body).to_json, q.body.to_json] }.map { |v| "#{v}\n" }.join,
136
136
  }
137
137
  ActiveSupport::Notifications.instrument("multi_search.searchkick", event) do
138
138
  super
@@ -162,12 +162,17 @@ module Searchkick
162
162
 
163
163
  payload = event.payload
164
164
  name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
165
- type = payload[:query][:type]
165
+
166
166
  index = payload[:query][:index].is_a?(Array) ? payload[:query][:index].join(",") : payload[:query][:index]
167
+ type = payload[:query][:type]
168
+ request_params = payload[:query].except(:index, :type, :body)
169
+
170
+ params = []
171
+ request_params.each do |k, v|
172
+ params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
173
+ end
167
174
 
168
- # no easy way to tell which host the client will use
169
- host = Searchkick.client.transport.hosts.first
170
- debug " #{color(name, YELLOW, true)} curl #{host[:protocol]}://#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -H 'Content-Type: application/json' -d '#{payload[:query][:body].to_json}'"
175
+ debug " #{color(name, YELLOW, true)} #{index}#{type ? "/#{type.join(',')}" : ''}/_search#{params.any? ? '?' + params.join('&') : nil} #{payload[:query][:body].to_json}"
171
176
  end
172
177
 
173
178
  def request(event)
@@ -187,9 +192,7 @@ module Searchkick
187
192
  payload = event.payload
188
193
  name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
189
194
 
190
- # no easy way to tell which host the client will use
191
- host = Searchkick.client.transport.hosts.first
192
- debug " #{color(name, YELLOW, true)} curl #{host[:protocol]}://#{host[:host]}:#{host[:port]}/_msearch?pretty -H 'Content-Type: application/json' -d '#{payload[:body]}'"
195
+ debug " #{color(name, YELLOW, true)} _msearch #{payload[:body]}"
193
196
  end
194
197
  end
195
198
 
@@ -3,10 +3,10 @@ module Searchkick
3
3
  def searchkick(**options)
4
4
  options = Searchkick.model_options.merge(options)
5
5
 
6
- unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :default_fields,
6
+ unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields,
7
7
  :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language,
8
- :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :settings, :similarity,
9
- :special_characters, :stem, :stem_conversions, :suggest, :synonyms, :text_end,
8
+ :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
9
+ :special_characters, :stem, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end,
10
10
  :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start]
11
11
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
12
12
 
@@ -15,6 +15,7 @@ module Searchkick
15
15
  Searchkick.models << self
16
16
 
17
17
  options[:_type] ||= -> { searchkick_index.klass_document_type(self, true) }
18
+ options[:class_name] = model_name.name
18
19
 
19
20
  callbacks = options.key?(:callbacks) ? options[:callbacks] : :inline
20
21
  unless [:inline, true, false, :async, :queue].include?(callbacks)
@@ -40,12 +41,15 @@ module Searchkick
40
41
 
41
42
  class << self
42
43
  def searchkick_search(term = "*", **options, &block)
43
- Searchkick.search(term, {model: self}.merge(options), &block)
44
+ # TODO throw error in next major version
45
+ Searchkick.warn("calling search on a relation is deprecated") if Searchkick.relation?(self)
46
+
47
+ Searchkick.search(term, model: self, **options, &block)
44
48
  end
45
49
  alias_method Searchkick.search_method_name, :searchkick_search if Searchkick.search_method_name
46
50
 
47
- def searchkick_index
48
- index = class_variable_get(:@@searchkick_index)
51
+ def searchkick_index(name: nil)
52
+ index = name || class_variable_get(:@@searchkick_index)
49
53
  index = index.call if index.respond_to?(:call)
50
54
  index_cache = class_variable_get(:@@searchkick_index_cache)
51
55
  index_cache[index] ||= Searchkick::Index.new(index, searchkick_options)
@@ -53,10 +57,11 @@ module Searchkick
53
57
  alias_method :search_index, :searchkick_index unless method_defined?(:search_index)
54
58
 
55
59
  def searchkick_reindex(method_name = nil, **options)
56
- scoped = (respond_to?(:current_scope) && respond_to?(:default_scoped) && current_scope && current_scope.to_sql != default_scoped.to_sql) ||
60
+ # TODO relation = Searchkick.relation?(self)
61
+ relation = (respond_to?(:current_scope) && respond_to?(:default_scoped) && current_scope && current_scope.to_sql != default_scoped.to_sql) ||
57
62
  (respond_to?(:queryable) && queryable != unscoped.with_default_scope)
58
63
 
59
- searchkick_index.reindex(searchkick_klass, method_name, scoped: scoped, **options)
64
+ searchkick_index.reindex(searchkick_klass, method_name, scoped: relation, **options)
60
65
  end
61
66
  alias_method :reindex, :searchkick_reindex unless method_defined?(:reindex)
62
67
 
@@ -78,8 +83,9 @@ module Searchkick
78
83
  RecordIndexer.new(self).reindex(method_name, **options)
79
84
  end unless method_defined?(:reindex)
80
85
 
86
+ # TODO switch to keyword arguments
81
87
  def similar(options = {})
82
- self.class.searchkick_index.similar_record(self, options)
88
+ self.class.searchkick_index.similar_record(self, **options)
83
89
  end unless method_defined?(:similar)
84
90
 
85
91
  def search_data
@@ -2,7 +2,7 @@ module Searchkick
2
2
  class ProcessBatchJob < ActiveJob::Base
3
3
  queue_as { Searchkick.queue_name }
4
4
 
5
- def perform(class_name:, record_ids:)
5
+ def perform(class_name:, record_ids:, index_name: nil)
6
6
  # separate routing from id
7
7
  routing = Hash[record_ids.map { |r| r.split(/(?<!\|)\|(?!\|)/, 2).map { |v| v.gsub("||", "|") } }]
8
8
  record_ids = routing.keys
@@ -26,7 +26,7 @@ module Searchkick
26
26
  end
27
27
 
28
28
  # bulk reindex
29
- index = klass.searchkick_index
29
+ index = klass.searchkick_index(name: index_name)
30
30
  Searchkick.callbacks(:bulk) do
31
31
  index.bulk_index(records) if records.any?
32
32
  index.bulk_delete(delete_records) if delete_records.any?
@@ -2,21 +2,29 @@ module Searchkick
2
2
  class ProcessQueueJob < ActiveJob::Base
3
3
  queue_as { Searchkick.queue_name }
4
4
 
5
- def perform(class_name:)
5
+ def perform(class_name:, index_name: nil, inline: false)
6
6
  model = class_name.constantize
7
+ limit = model.searchkick_options[:batch_size] || 1000
7
8
 
8
- limit = model.searchkick_index.options[:batch_size] || 1000
9
- record_ids = model.searchkick_index.reindex_queue.reserve(limit: limit)
10
- if record_ids.any?
11
- Searchkick::ProcessBatchJob.perform_later(
12
- class_name: model.name,
13
- record_ids: record_ids
14
- )
15
- # TODO when moving to reliable queuing, mark as complete
9
+ loop do
10
+ record_ids = model.searchkick_index(name: index_name).reindex_queue.reserve(limit: limit)
11
+ if record_ids.any?
12
+ batch_options = {
13
+ class_name: class_name,
14
+ record_ids: record_ids,
15
+ index_name: index_name
16
+ }
16
17
 
17
- if record_ids.size == limit
18
- Searchkick::ProcessQueueJob.perform_later(class_name: class_name)
18
+ if inline
19
+ # use new.perform to avoid excessive logging
20
+ Searchkick::ProcessBatchJob.new.perform(**batch_options)
21
+ else
22
+ Searchkick::ProcessBatchJob.perform_later(**batch_options)
23
+ end
24
+
25
+ # TODO when moving to reliable queuing, mark as complete
19
26
  end
27
+ break unless record_ids.size == limit
20
28
  end
21
29
  end
22
30
  end
@@ -12,14 +12,14 @@ module Searchkick
12
12
  :took, :error, :model_name, :entry_name, :total_count, :total_entries,
13
13
  :current_page, :per_page, :limit_value, :padding, :total_pages, :num_pages,
14
14
  :offset_value, :offset, :previous_page, :prev_page, :next_page, :first_page?, :last_page?,
15
- :out_of_range?, :hits, :response, :to_a, :first
15
+ :out_of_range?, :hits, :response, :to_a, :first, :scroll
16
16
 
17
17
  def initialize(klass, term = "*", **options)
18
18
  unknown_keywords = options.keys - [:aggs, :block, :body, :body_options, :boost,
19
19
  :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :execute, :explain,
20
20
  :fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load,
21
21
  :match, :misspellings, :models, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
22
- :request_params, :routing, :scope_results, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where]
22
+ :request_params, :routing, :scope_results, :scroll, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where]
23
23
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
24
24
 
25
25
  term = term.to_s
@@ -60,7 +60,8 @@ module Searchkick
60
60
  if options[:models]
61
61
  @index_mapping = {}
62
62
  Array(options[:models]).each do |model|
63
- @index_mapping[model.searchkick_index.name] = model
63
+ # there can be multiple models per index name due to inheritance - see #1259
64
+ (@index_mapping[model.searchkick_index.name] ||= []) << model
64
65
  end
65
66
  end
66
67
 
@@ -81,6 +82,7 @@ module Searchkick
81
82
  }
82
83
  params[:type] = @type if @type
83
84
  params[:routing] = @routing if @routing
85
+ params[:scroll] = @scroll if @scroll
84
86
  params.merge!(options[:request_params]) if options[:request_params]
85
87
  params
86
88
  end
@@ -104,11 +106,16 @@ module Searchkick
104
106
  query = params
105
107
  type = query[:type]
106
108
  index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index]
109
+ request_params = query.except(:index, :type, :body)
107
110
 
108
111
  # no easy way to tell which host the client will use
109
112
  host = Searchkick.client.transport.hosts.first
110
113
  credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil
111
- "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -H 'Content-Type: application/json' -d '#{query[:body].to_json}'"
114
+ params = ["pretty"]
115
+ request_params.each do |k, v|
116
+ params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
117
+ end
118
+ "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?#{params.join('&')} -H 'Content-Type: application/json' -d '#{query[:body].to_json}'"
112
119
  end
113
120
 
114
121
  def handle_response(response)
@@ -127,10 +134,13 @@ module Searchkick
127
134
  term: term,
128
135
  scope_results: options[:scope_results],
129
136
  total_entries: options[:total_entries],
130
- index_mapping: @index_mapping
137
+ index_mapping: @index_mapping,
138
+ suggest: options[:suggest],
139
+ scroll: options[:scroll]
131
140
  }
132
141
 
133
142
  if options[:debug]
143
+ # can remove when minimum Ruby version is 2.5
134
144
  require "pp"
135
145
 
136
146
  puts "Searchkick Version: #{Searchkick::VERSION}"
@@ -225,9 +235,12 @@ module Searchkick
225
235
 
226
236
  # pagination
227
237
  page = [options[:page].to_i, 1].max
228
- per_page = (options[:limit] || options[:per_page] || 10_000).to_i
238
+ # maybe use index.max_result_window in the future
239
+ default_limit = searchkick_options[:deep_paging] ? 1_000_000_000 : 10_000
240
+ per_page = (options[:limit] || options[:per_page] || default_limit).to_i
229
241
  padding = [options[:padding].to_i, 0].max
230
242
  offset = options[:offset] || (page - 1) * per_page + padding
243
+ scroll = options[:scroll]
231
244
 
232
245
  # model and eager loading
233
246
  load = options[:load].nil? ? true : options[:load]
@@ -337,7 +350,7 @@ module Searchkick
337
350
  field_misspellings = misspellings && (!misspellings_fields || misspellings_fields.include?(base_field(field)))
338
351
 
339
352
  if field == "_all" || field.end_with?(".analyzed")
340
- shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false
353
+ shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false || (!below73? && !track_total_hits?)
341
354
  qs << shared_options.merge(analyzer: "searchkick_search")
342
355
 
343
356
  # searchkick_search and searchkick_search2 are the same for ukrainian
@@ -423,6 +436,24 @@ module Searchkick
423
436
  where[:type] = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v, true) }
424
437
  end
425
438
 
439
+ models = Array(options[:models])
440
+ if models.any? { |m| m != m.searchkick_klass }
441
+ # aliases are not supported with _index in ES below 7.5
442
+ # see https://github.com/elastic/elasticsearch/pull/46640
443
+ if below75?
444
+ Searchkick.warn("Passing child models to models option throws off hits and pagination - use type option instead")
445
+ else
446
+ index_type_or =
447
+ models.map do |m|
448
+ v = {_index: m.searchkick_index.name}
449
+ v[:type] = m.searchkick_index.klass_document_type(m, true) if m != m.searchkick_klass
450
+ v
451
+ end
452
+
453
+ where[:or] = Array(where[:or]) + [index_type_or]
454
+ end
455
+ end
456
+
426
457
  # start everything as efficient filters
427
458
  # move to post_filters as aggs demand
428
459
  filters = where_filters(where)
@@ -480,7 +511,7 @@ module Searchkick
480
511
  pagination_options = options[:page] || options[:limit] || options[:per_page] || options[:offset] || options[:padding]
481
512
  if !options[:body] || pagination_options
482
513
  payload[:size] = per_page
483
- payload[:from] = offset
514
+ payload[:from] = offset if offset > 0
484
515
  end
485
516
 
486
517
  # type
@@ -491,17 +522,28 @@ module Searchkick
491
522
  # routing
492
523
  @routing = options[:routing] if options[:routing]
493
524
 
525
+ if track_total_hits?
526
+ payload[:track_total_hits] = true
527
+ end
528
+
494
529
  # merge more body options
495
530
  payload = payload.deep_merge(options[:body_options]) if options[:body_options]
496
531
 
497
532
  # run block
498
533
  options[:block].call(payload) if options[:block]
499
534
 
535
+ # scroll optimization when interating over all docs
536
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html
537
+ if options[:scroll] && payload[:query] == {match_all: {}}
538
+ payload[:sort] ||= ["_doc"]
539
+ end
540
+
500
541
  @body = payload
501
542
  @page = page
502
543
  @per_page = per_page
503
544
  @padding = padding
504
545
  @load = load
546
+ @scroll = scroll
505
547
  end
506
548
 
507
549
  def set_fields
@@ -532,7 +574,8 @@ module Searchkick
532
574
 
533
575
  def build_query(query, filters, should, must_not, custom_filters, multiply_filters)
534
576
  if filters.any? || must_not.any? || should.any?
535
- bool = {must: query}
577
+ bool = {}
578
+ bool[:must] = query if query
536
579
  bool[:filter] = filters if filters.any? # where
537
580
  bool[:must_not] = must_not if must_not.any? # exclude
538
581
  bool[:should] = should if should.any? # conversions
@@ -829,6 +872,11 @@ module Searchkick
829
872
  end
830
873
 
831
874
  def where_filters(where)
875
+ # if where.respond_to?(:permitted?) && !where.permitted?
876
+ # # TODO check in more places
877
+ # Searchkick.warn("Passing unpermitted parameters will raise an exception in Searchkick 5")
878
+ # end
879
+
832
880
  filters = []
833
881
  (where || {}).each do |field, value|
834
882
  field = :_id if field.to_s == "id"
@@ -843,6 +891,8 @@ module Searchkick
843
891
  filters << {bool: {must_not: where_filters(value)}}
844
892
  elsif field == :_and
845
893
  filters << {bool: {must: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
894
+ # elsif field == :_script
895
+ # filters << {script: {script: {source: value, lang: "painless"}}}
846
896
  else
847
897
  # expand ranges
848
898
  if value.is_a?(Range)
@@ -903,8 +953,23 @@ module Searchkick
903
953
  }
904
954
  }
905
955
  }
956
+ when :like
957
+ # based on Postgres
958
+ # https://www.postgresql.org/docs/current/functions-matching.html
959
+ # % matches zero or more characters
960
+ # _ matches one character
961
+ # \ is escape character
962
+ # escape Lucene reserved characters
963
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/regexp-syntax.html#regexp-optional-operators
964
+ reserved = %w(. ? + * | { } [ ] ( ) " \\)
965
+ regex = op_value.dup
966
+ reserved.each do |v|
967
+ regex.gsub!(v, "\\" + v)
968
+ end
969
+ regex = regex.gsub(/(?<!\\)%/, ".*").gsub(/(?<!\\)_/, ".").gsub("\\%", "%").gsub("\\_", "_")
970
+ filters << {regexp: {field => {value: regex}}}
906
971
  when :prefix
907
- filters << {prefix: {field => op_value}}
972
+ filters << {prefix: {field => {value: op_value}}}
908
973
  when :regexp # support for regexp queries without using a regexp ruby object
909
974
  filters << {regexp: {field => {value: op_value}}}
910
975
  when :not, :_not # not equal
@@ -915,6 +980,8 @@ module Searchkick
915
980
  end
916
981
  when :in
917
982
  filters << term_filters(field, op_value)
983
+ when :exists
984
+ filters << {exists: {field: field}}
918
985
  else
919
986
  range_query =
920
987
  case op
@@ -955,9 +1022,43 @@ module Searchkick
955
1022
  elsif value.nil?
956
1023
  {bool: {must_not: {exists: {field: field}}}}
957
1024
  elsif value.is_a?(Regexp)
958
- {regexp: {field => {value: value.source, flags: "NONE"}}}
1025
+ if value.casefold?
1026
+ Searchkick.warn("Case-insensitive flag does not work with Elasticsearch")
1027
+ end
1028
+
1029
+ source = value.source
1030
+ unless source.start_with?("\\A") && source.end_with?("\\z")
1031
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-regexp-query.html
1032
+ Searchkick.warn("Regular expressions are always anchored in Elasticsearch")
1033
+ end
1034
+
1035
+ # TODO handle other anchor characters, like ^, $, \Z
1036
+ if source.start_with?("\\A")
1037
+ source = source[2..-1]
1038
+ else
1039
+ # TODO uncomment in Searchkick 5
1040
+ # source = ".*#{source}"
1041
+ end
1042
+
1043
+ if source.end_with?("\\z")
1044
+ source = source[0..-3]
1045
+ else
1046
+ # TODO uncomment in Searchkick 5
1047
+ # source = "#{source}.*"
1048
+ end
1049
+
1050
+ {regexp: {field => {value: source, flags: "NONE"}}}
959
1051
  else
960
- {term: {field => value}}
1052
+ # TODO add this for other values
1053
+ if value.as_json.is_a?(Enumerable)
1054
+ # query will fail, but this is better
1055
+ # same message as Active Record
1056
+ # TODO make TypeError
1057
+ # raise InvalidQueryError for backward compatibility
1058
+ raise Searchkick::InvalidQueryError, "can't cast #{value.class.name}"
1059
+ end
1060
+
1061
+ {term: {field => {value: value}}}
961
1062
  end
962
1063
  end
963
1064
 
@@ -1021,6 +1122,14 @@ module Searchkick
1021
1122
  k.sub(/\.(analyzed|word_start|word_middle|word_end|text_start|text_middle|text_end|exact)\z/, "")
1022
1123
  end
1023
1124
 
1125
+ def track_total_hits?
1126
+ (searchkick_options[:deep_paging] && !below70?) || body_options[:track_total_hits]
1127
+ end
1128
+
1129
+ def body_options
1130
+ options[:body_options] || {}
1131
+ end
1132
+
1024
1133
  def below61?
1025
1134
  Searchkick.server_below?("6.1.0")
1026
1135
  end
@@ -1028,5 +1137,13 @@ module Searchkick
1028
1137
  def below70?
1029
1138
  Searchkick.server_below?("7.0.0")
1030
1139
  end
1140
+
1141
+ def below73?
1142
+ Searchkick.server_below?("7.3.0")
1143
+ end
1144
+
1145
+ def below75?
1146
+ Searchkick.server_below?("7.5.0")
1147
+ end
1031
1148
  end
1032
1149
  end