searchkick 4.0.0 → 4.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -132,7 +132,7 @@ module Searchkick
132
132
  def multi_search(searches)
133
133
  event = {
134
134
  name: "Multi Search",
135
- body: searches.flat_map { |q| [q.params.except(:body).to_json, q.body.to_json] }.map { |v| "#{v}\n" }.join
135
+ body: searches.flat_map { |q| [q.params.except(:body).to_json, q.body.to_json] }.map { |v| "#{v}\n" }.join,
136
136
  }
137
137
  ActiveSupport::Notifications.instrument("multi_search.searchkick", event) do
138
138
  super
@@ -162,12 +162,17 @@ module Searchkick
162
162
 
163
163
  payload = event.payload
164
164
  name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
165
- type = payload[:query][:type]
165
+
166
166
  index = payload[:query][:index].is_a?(Array) ? payload[:query][:index].join(",") : payload[:query][:index]
167
+ type = payload[:query][:type]
168
+ request_params = payload[:query].except(:index, :type, :body)
169
+
170
+ params = []
171
+ request_params.each do |k, v|
172
+ params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
173
+ end
167
174
 
168
- # no easy way to tell which host the client will use
169
- host = Searchkick.client.transport.hosts.first
170
- debug " #{color(name, YELLOW, true)} curl #{host[:protocol]}://#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -H 'Content-Type: application/json' -d '#{payload[:query][:body].to_json}'"
175
+ debug " #{color(name, YELLOW, true)} #{index}#{type ? "/#{type.join(',')}" : ''}/_search#{params.any? ? '?' + params.join('&') : nil} #{payload[:query][:body].to_json}"
171
176
  end
172
177
 
173
178
  def request(event)
@@ -187,9 +192,7 @@ module Searchkick
187
192
  payload = event.payload
188
193
  name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
189
194
 
190
- # no easy way to tell which host the client will use
191
- host = Searchkick.client.transport.hosts.first
192
- debug " #{color(name, YELLOW, true)} curl #{host[:protocol]}://#{host[:host]}:#{host[:port]}/_msearch?pretty -H 'Content-Type: application/json' -d '#{payload[:body]}'"
195
+ debug " #{color(name, YELLOW, true)} _msearch #{payload[:body]}"
193
196
  end
194
197
  end
195
198
 
@@ -3,10 +3,10 @@ module Searchkick
3
3
  def searchkick(**options)
4
4
  options = Searchkick.model_options.merge(options)
5
5
 
6
- unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :default_fields,
6
+ unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields,
7
7
  :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language,
8
- :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :settings, :similarity,
9
- :special_characters, :stem, :stem_conversions, :suggest, :synonyms, :text_end,
8
+ :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
9
+ :special_characters, :stem, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end,
10
10
  :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start]
11
11
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
12
12
 
@@ -15,6 +15,7 @@ module Searchkick
15
15
  Searchkick.models << self
16
16
 
17
17
  options[:_type] ||= -> { searchkick_index.klass_document_type(self, true) }
18
+ options[:class_name] = model_name.name
18
19
 
19
20
  callbacks = options.key?(:callbacks) ? options[:callbacks] : :inline
20
21
  unless [:inline, true, false, :async, :queue].include?(callbacks)
@@ -40,12 +41,15 @@ module Searchkick
40
41
 
41
42
  class << self
42
43
  def searchkick_search(term = "*", **options, &block)
43
- Searchkick.search(term, {model: self}.merge(options), &block)
44
+ # TODO throw error in next major version
45
+ Searchkick.warn("calling search on a relation is deprecated") if Searchkick.relation?(self)
46
+
47
+ Searchkick.search(term, model: self, **options, &block)
44
48
  end
45
49
  alias_method Searchkick.search_method_name, :searchkick_search if Searchkick.search_method_name
46
50
 
47
- def searchkick_index
48
- index = class_variable_get(:@@searchkick_index)
51
+ def searchkick_index(name: nil)
52
+ index = name || class_variable_get(:@@searchkick_index)
49
53
  index = index.call if index.respond_to?(:call)
50
54
  index_cache = class_variable_get(:@@searchkick_index_cache)
51
55
  index_cache[index] ||= Searchkick::Index.new(index, searchkick_options)
@@ -53,10 +57,11 @@ module Searchkick
53
57
  alias_method :search_index, :searchkick_index unless method_defined?(:search_index)
54
58
 
55
59
  def searchkick_reindex(method_name = nil, **options)
56
- scoped = (respond_to?(:current_scope) && respond_to?(:default_scoped) && current_scope && current_scope.to_sql != default_scoped.to_sql) ||
60
+ # TODO relation = Searchkick.relation?(self)
61
+ relation = (respond_to?(:current_scope) && respond_to?(:default_scoped) && current_scope && current_scope.to_sql != default_scoped.to_sql) ||
57
62
  (respond_to?(:queryable) && queryable != unscoped.with_default_scope)
58
63
 
59
- searchkick_index.reindex(searchkick_klass, method_name, scoped: scoped, **options)
64
+ searchkick_index.reindex(searchkick_klass, method_name, scoped: relation, **options)
60
65
  end
61
66
  alias_method :reindex, :searchkick_reindex unless method_defined?(:reindex)
62
67
 
@@ -78,8 +83,9 @@ module Searchkick
78
83
  RecordIndexer.new(self).reindex(method_name, **options)
79
84
  end unless method_defined?(:reindex)
80
85
 
86
+ # TODO switch to keyword arguments
81
87
  def similar(options = {})
82
- self.class.searchkick_index.similar_record(self, options)
88
+ self.class.searchkick_index.similar_record(self, **options)
83
89
  end unless method_defined?(:similar)
84
90
 
85
91
  def search_data
@@ -2,7 +2,7 @@ module Searchkick
2
2
  class ProcessBatchJob < ActiveJob::Base
3
3
  queue_as { Searchkick.queue_name }
4
4
 
5
- def perform(class_name:, record_ids:)
5
+ def perform(class_name:, record_ids:, index_name: nil)
6
6
  # separate routing from id
7
7
  routing = Hash[record_ids.map { |r| r.split(/(?<!\|)\|(?!\|)/, 2).map { |v| v.gsub("||", "|") } }]
8
8
  record_ids = routing.keys
@@ -26,7 +26,7 @@ module Searchkick
26
26
  end
27
27
 
28
28
  # bulk reindex
29
- index = klass.searchkick_index
29
+ index = klass.searchkick_index(name: index_name)
30
30
  Searchkick.callbacks(:bulk) do
31
31
  index.bulk_index(records) if records.any?
32
32
  index.bulk_delete(delete_records) if delete_records.any?
@@ -2,21 +2,29 @@ module Searchkick
2
2
  class ProcessQueueJob < ActiveJob::Base
3
3
  queue_as { Searchkick.queue_name }
4
4
 
5
- def perform(class_name:)
5
+ def perform(class_name:, index_name: nil, inline: false)
6
6
  model = class_name.constantize
7
+ limit = model.searchkick_options[:batch_size] || 1000
7
8
 
8
- limit = model.searchkick_index.options[:batch_size] || 1000
9
- record_ids = model.searchkick_index.reindex_queue.reserve(limit: limit)
10
- if record_ids.any?
11
- Searchkick::ProcessBatchJob.perform_later(
12
- class_name: model.name,
13
- record_ids: record_ids
14
- )
15
- # TODO when moving to reliable queuing, mark as complete
9
+ loop do
10
+ record_ids = model.searchkick_index(name: index_name).reindex_queue.reserve(limit: limit)
11
+ if record_ids.any?
12
+ batch_options = {
13
+ class_name: class_name,
14
+ record_ids: record_ids,
15
+ index_name: index_name
16
+ }
16
17
 
17
- if record_ids.size == limit
18
- Searchkick::ProcessQueueJob.perform_later(class_name: class_name)
18
+ if inline
19
+ # use new.perform to avoid excessive logging
20
+ Searchkick::ProcessBatchJob.new.perform(**batch_options)
21
+ else
22
+ Searchkick::ProcessBatchJob.perform_later(**batch_options)
23
+ end
24
+
25
+ # TODO when moving to reliable queuing, mark as complete
19
26
  end
27
+ break unless record_ids.size == limit
20
28
  end
21
29
  end
22
30
  end
@@ -12,14 +12,14 @@ module Searchkick
12
12
  :took, :error, :model_name, :entry_name, :total_count, :total_entries,
13
13
  :current_page, :per_page, :limit_value, :padding, :total_pages, :num_pages,
14
14
  :offset_value, :offset, :previous_page, :prev_page, :next_page, :first_page?, :last_page?,
15
- :out_of_range?, :hits, :response, :to_a, :first
15
+ :out_of_range?, :hits, :response, :to_a, :first, :scroll
16
16
 
17
17
  def initialize(klass, term = "*", **options)
18
18
  unknown_keywords = options.keys - [:aggs, :block, :body, :body_options, :boost,
19
19
  :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :execute, :explain,
20
20
  :fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load,
21
21
  :match, :misspellings, :models, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
22
- :request_params, :routing, :scope_results, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where]
22
+ :request_params, :routing, :scope_results, :scroll, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where]
23
23
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
24
24
 
25
25
  term = term.to_s
@@ -60,7 +60,8 @@ module Searchkick
60
60
  if options[:models]
61
61
  @index_mapping = {}
62
62
  Array(options[:models]).each do |model|
63
- @index_mapping[model.searchkick_index.name] = model
63
+ # there can be multiple models per index name due to inheritance - see #1259
64
+ (@index_mapping[model.searchkick_index.name] ||= []) << model
64
65
  end
65
66
  end
66
67
 
@@ -81,6 +82,7 @@ module Searchkick
81
82
  }
82
83
  params[:type] = @type if @type
83
84
  params[:routing] = @routing if @routing
85
+ params[:scroll] = @scroll if @scroll
84
86
  params.merge!(options[:request_params]) if options[:request_params]
85
87
  params
86
88
  end
@@ -104,11 +106,16 @@ module Searchkick
104
106
  query = params
105
107
  type = query[:type]
106
108
  index = query[:index].is_a?(Array) ? query[:index].join(",") : query[:index]
109
+ request_params = query.except(:index, :type, :body)
107
110
 
108
111
  # no easy way to tell which host the client will use
109
112
  host = Searchkick.client.transport.hosts.first
110
113
  credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil
111
- "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -H 'Content-Type: application/json' -d '#{query[:body].to_json}'"
114
+ params = ["pretty"]
115
+ request_params.each do |k, v|
116
+ params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
117
+ end
118
+ "curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?#{params.join('&')} -H 'Content-Type: application/json' -d '#{query[:body].to_json}'"
112
119
  end
113
120
 
114
121
  def handle_response(response)
@@ -127,10 +134,13 @@ module Searchkick
127
134
  term: term,
128
135
  scope_results: options[:scope_results],
129
136
  total_entries: options[:total_entries],
130
- index_mapping: @index_mapping
137
+ index_mapping: @index_mapping,
138
+ suggest: options[:suggest],
139
+ scroll: options[:scroll]
131
140
  }
132
141
 
133
142
  if options[:debug]
143
+ # can remove when minimum Ruby version is 2.5
134
144
  require "pp"
135
145
 
136
146
  puts "Searchkick Version: #{Searchkick::VERSION}"
@@ -225,9 +235,12 @@ module Searchkick
225
235
 
226
236
  # pagination
227
237
  page = [options[:page].to_i, 1].max
228
- per_page = (options[:limit] || options[:per_page] || 10_000).to_i
238
+ # maybe use index.max_result_window in the future
239
+ default_limit = searchkick_options[:deep_paging] ? 1_000_000_000 : 10_000
240
+ per_page = (options[:limit] || options[:per_page] || default_limit).to_i
229
241
  padding = [options[:padding].to_i, 0].max
230
242
  offset = options[:offset] || (page - 1) * per_page + padding
243
+ scroll = options[:scroll]
231
244
 
232
245
  # model and eager loading
233
246
  load = options[:load].nil? ? true : options[:load]
@@ -337,7 +350,7 @@ module Searchkick
337
350
  field_misspellings = misspellings && (!misspellings_fields || misspellings_fields.include?(base_field(field)))
338
351
 
339
352
  if field == "_all" || field.end_with?(".analyzed")
340
- shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false
353
+ shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false || (!below73? && !track_total_hits?)
341
354
  qs << shared_options.merge(analyzer: "searchkick_search")
342
355
 
343
356
  # searchkick_search and searchkick_search2 are the same for ukrainian
@@ -423,6 +436,24 @@ module Searchkick
423
436
  where[:type] = [options[:type] || klass].flatten.map { |v| searchkick_index.klass_document_type(v, true) }
424
437
  end
425
438
 
439
+ models = Array(options[:models])
440
+ if models.any? { |m| m != m.searchkick_klass }
441
+ # aliases are not supported with _index in ES below 7.5
442
+ # see https://github.com/elastic/elasticsearch/pull/46640
443
+ if below75?
444
+ Searchkick.warn("Passing child models to models option throws off hits and pagination - use type option instead")
445
+ else
446
+ index_type_or =
447
+ models.map do |m|
448
+ v = {_index: m.searchkick_index.name}
449
+ v[:type] = m.searchkick_index.klass_document_type(m, true) if m != m.searchkick_klass
450
+ v
451
+ end
452
+
453
+ where[:or] = Array(where[:or]) + [index_type_or]
454
+ end
455
+ end
456
+
426
457
  # start everything as efficient filters
427
458
  # move to post_filters as aggs demand
428
459
  filters = where_filters(where)
@@ -480,7 +511,7 @@ module Searchkick
480
511
  pagination_options = options[:page] || options[:limit] || options[:per_page] || options[:offset] || options[:padding]
481
512
  if !options[:body] || pagination_options
482
513
  payload[:size] = per_page
483
- payload[:from] = offset
514
+ payload[:from] = offset if offset > 0
484
515
  end
485
516
 
486
517
  # type
@@ -491,17 +522,28 @@ module Searchkick
491
522
  # routing
492
523
  @routing = options[:routing] if options[:routing]
493
524
 
525
+ if track_total_hits?
526
+ payload[:track_total_hits] = true
527
+ end
528
+
494
529
  # merge more body options
495
530
  payload = payload.deep_merge(options[:body_options]) if options[:body_options]
496
531
 
497
532
  # run block
498
533
  options[:block].call(payload) if options[:block]
499
534
 
535
+ # scroll optimization when interating over all docs
536
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html
537
+ if options[:scroll] && payload[:query] == {match_all: {}}
538
+ payload[:sort] ||= ["_doc"]
539
+ end
540
+
500
541
  @body = payload
501
542
  @page = page
502
543
  @per_page = per_page
503
544
  @padding = padding
504
545
  @load = load
546
+ @scroll = scroll
505
547
  end
506
548
 
507
549
  def set_fields
@@ -532,7 +574,8 @@ module Searchkick
532
574
 
533
575
  def build_query(query, filters, should, must_not, custom_filters, multiply_filters)
534
576
  if filters.any? || must_not.any? || should.any?
535
- bool = {must: query}
577
+ bool = {}
578
+ bool[:must] = query if query
536
579
  bool[:filter] = filters if filters.any? # where
537
580
  bool[:must_not] = must_not if must_not.any? # exclude
538
581
  bool[:should] = should if should.any? # conversions
@@ -829,6 +872,11 @@ module Searchkick
829
872
  end
830
873
 
831
874
  def where_filters(where)
875
+ # if where.respond_to?(:permitted?) && !where.permitted?
876
+ # # TODO check in more places
877
+ # Searchkick.warn("Passing unpermitted parameters will raise an exception in Searchkick 5")
878
+ # end
879
+
832
880
  filters = []
833
881
  (where || {}).each do |field, value|
834
882
  field = :_id if field.to_s == "id"
@@ -843,6 +891,8 @@ module Searchkick
843
891
  filters << {bool: {must_not: where_filters(value)}}
844
892
  elsif field == :_and
845
893
  filters << {bool: {must: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
894
+ # elsif field == :_script
895
+ # filters << {script: {script: {source: value, lang: "painless"}}}
846
896
  else
847
897
  # expand ranges
848
898
  if value.is_a?(Range)
@@ -903,8 +953,23 @@ module Searchkick
903
953
  }
904
954
  }
905
955
  }
956
+ when :like
957
+ # based on Postgres
958
+ # https://www.postgresql.org/docs/current/functions-matching.html
959
+ # % matches zero or more characters
960
+ # _ matches one character
961
+ # \ is escape character
962
+ # escape Lucene reserved characters
963
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/regexp-syntax.html#regexp-optional-operators
964
+ reserved = %w(. ? + * | { } [ ] ( ) " \\)
965
+ regex = op_value.dup
966
+ reserved.each do |v|
967
+ regex.gsub!(v, "\\" + v)
968
+ end
969
+ regex = regex.gsub(/(?<!\\)%/, ".*").gsub(/(?<!\\)_/, ".").gsub("\\%", "%").gsub("\\_", "_")
970
+ filters << {regexp: {field => {value: regex}}}
906
971
  when :prefix
907
- filters << {prefix: {field => op_value}}
972
+ filters << {prefix: {field => {value: op_value}}}
908
973
  when :regexp # support for regexp queries without using a regexp ruby object
909
974
  filters << {regexp: {field => {value: op_value}}}
910
975
  when :not, :_not # not equal
@@ -915,6 +980,8 @@ module Searchkick
915
980
  end
916
981
  when :in
917
982
  filters << term_filters(field, op_value)
983
+ when :exists
984
+ filters << {exists: {field: field}}
918
985
  else
919
986
  range_query =
920
987
  case op
@@ -955,9 +1022,43 @@ module Searchkick
955
1022
  elsif value.nil?
956
1023
  {bool: {must_not: {exists: {field: field}}}}
957
1024
  elsif value.is_a?(Regexp)
958
- {regexp: {field => {value: value.source, flags: "NONE"}}}
1025
+ if value.casefold?
1026
+ Searchkick.warn("Case-insensitive flag does not work with Elasticsearch")
1027
+ end
1028
+
1029
+ source = value.source
1030
+ unless source.start_with?("\\A") && source.end_with?("\\z")
1031
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-regexp-query.html
1032
+ Searchkick.warn("Regular expressions are always anchored in Elasticsearch")
1033
+ end
1034
+
1035
+ # TODO handle other anchor characters, like ^, $, \Z
1036
+ if source.start_with?("\\A")
1037
+ source = source[2..-1]
1038
+ else
1039
+ # TODO uncomment in Searchkick 5
1040
+ # source = ".*#{source}"
1041
+ end
1042
+
1043
+ if source.end_with?("\\z")
1044
+ source = source[0..-3]
1045
+ else
1046
+ # TODO uncomment in Searchkick 5
1047
+ # source = "#{source}.*"
1048
+ end
1049
+
1050
+ {regexp: {field => {value: source, flags: "NONE"}}}
959
1051
  else
960
- {term: {field => value}}
1052
+ # TODO add this for other values
1053
+ if value.as_json.is_a?(Enumerable)
1054
+ # query will fail, but this is better
1055
+ # same message as Active Record
1056
+ # TODO make TypeError
1057
+ # raise InvalidQueryError for backward compatibility
1058
+ raise Searchkick::InvalidQueryError, "can't cast #{value.class.name}"
1059
+ end
1060
+
1061
+ {term: {field => {value: value}}}
961
1062
  end
962
1063
  end
963
1064
 
@@ -1021,6 +1122,14 @@ module Searchkick
1021
1122
  k.sub(/\.(analyzed|word_start|word_middle|word_end|text_start|text_middle|text_end|exact)\z/, "")
1022
1123
  end
1023
1124
 
1125
+ def track_total_hits?
1126
+ (searchkick_options[:deep_paging] && !below70?) || body_options[:track_total_hits]
1127
+ end
1128
+
1129
+ def body_options
1130
+ options[:body_options] || {}
1131
+ end
1132
+
1024
1133
  def below61?
1025
1134
  Searchkick.server_below?("6.1.0")
1026
1135
  end
@@ -1028,5 +1137,13 @@ module Searchkick
1028
1137
  def below70?
1029
1138
  Searchkick.server_below?("7.0.0")
1030
1139
  end
1140
+
1141
+ def below73?
1142
+ Searchkick.server_below?("7.3.0")
1143
+ end
1144
+
1145
+ def below75?
1146
+ Searchkick.server_below?("7.5.0")
1147
+ end
1031
1148
  end
1032
1149
  end