searchkick 5.0.2 → 5.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -19,7 +19,7 @@ module Searchkick
19
19
  mappings = generate_mappings.deep_symbolize_keys.deep_merge(custom_mappings)
20
20
  end
21
21
 
22
- set_deep_paging(settings) if options[:deep_paging]
22
+ set_deep_paging(settings) if options[:deep_paging] || options[:max_result_window]
23
23
 
24
24
  {
25
25
  settings: settings,
@@ -169,6 +169,21 @@ module Searchkick
169
169
  max_shingle_diff: 4
170
170
  }
171
171
 
172
+ if options[:knn]
173
+ unless Searchkick.knn_support?
174
+ if Searchkick.opensearch?
175
+ raise Error, "knn requires OpenSearch 2.4+"
176
+ else
177
+ raise Error, "knn requires Elasticsearch 8.6+"
178
+ end
179
+ end
180
+
181
+ if Searchkick.opensearch? && options[:knn].any? { |_, v| !v[:distance].nil? }
182
+ # only enable if doing approximate search
183
+ settings[:index][:knn] = true
184
+ end
185
+ end
186
+
172
187
  if options[:case_sensitive]
173
188
  settings[:analysis][:analyzer].each do |_, analyzer|
174
189
  analyzer[:filter].delete("lowercase")
@@ -406,6 +421,66 @@ module Searchkick
406
421
  mapping[field] = shape_options.merge(type: "geo_shape")
407
422
  end
408
423
 
424
+ (options[:knn] || []).each do |field, knn_options|
425
+ distance = knn_options[:distance]
426
+
427
+ if Searchkick.opensearch?
428
+ if distance.nil?
429
+ # avoid server crash if method not specified
430
+ raise ArgumentError, "Must specify a distance for OpenSearch"
431
+ end
432
+
433
+ vector_options = {
434
+ type: "knn_vector",
435
+ dimension: knn_options[:dimensions]
436
+ }
437
+
438
+ if !distance.nil?
439
+ space_type =
440
+ case distance
441
+ when "cosine"
442
+ "cosinesimil"
443
+ when "euclidean"
444
+ "l2"
445
+ when "inner_product"
446
+ "innerproduct"
447
+ else
448
+ raise ArgumentError, "Unknown distance: #{distance}"
449
+ end
450
+
451
+ vector_options[:method] = {
452
+ name: "hnsw",
453
+ space_type: space_type,
454
+ engine: "lucene"
455
+ }
456
+ end
457
+
458
+ mapping[field.to_s] = vector_options
459
+ else
460
+ vector_options = {
461
+ type: "dense_vector",
462
+ dims: knn_options[:dimensions],
463
+ index: !distance.nil?
464
+ }
465
+
466
+ if !distance.nil?
467
+ vector_options[:similarity] =
468
+ case distance
469
+ when "cosine"
470
+ "cosine"
471
+ when "euclidean"
472
+ "l2_norm"
473
+ when "inner_product"
474
+ "max_inner_product"
475
+ else
476
+ raise ArgumentError, "Unknown distance: #{distance}"
477
+ end
478
+ end
479
+
480
+ mapping[field.to_s] = vector_options
481
+ end
482
+ end
483
+
409
484
  if options[:inheritance]
410
485
  mapping[:type] = keyword_mapping
411
486
  end
@@ -525,7 +600,7 @@ module Searchkick
525
600
  def set_deep_paging(settings)
526
601
  if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
527
602
  settings[:index] ||= {}
528
- settings[:index][:max_result_window] = 1_000_000_000
603
+ settings[:index][:max_result_window] = options[:max_result_window] || 1_000_000_000
529
604
  end
530
605
  end
531
606
 
@@ -31,7 +31,7 @@ module Searchkick
31
31
  params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
32
32
  end
33
33
 
34
- debug " #{color(name, YELLOW, true)} #{index}#{type ? "/#{type.join(',')}" : ''}/_search#{params.any? ? '?' + params.join('&') : nil} #{payload[:query][:body].to_json}"
34
+ debug " #{color(name, YELLOW, bold: true)} #{index}#{type ? "/#{type.join(',')}" : ''}/_search#{params.any? ? '?' + params.join('&') : nil} #{payload[:query][:body].to_json}"
35
35
  end
36
36
 
37
37
  def request(event)
@@ -41,7 +41,7 @@ module Searchkick
41
41
  payload = event.payload
42
42
  name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
43
43
 
44
- debug " #{color(name, YELLOW, true)} #{payload.except(:name).to_json}"
44
+ debug " #{color(name, YELLOW, bold: true)} #{payload.except(:name).to_json}"
45
45
  end
46
46
 
47
47
  def multi_search(event)
@@ -51,7 +51,7 @@ module Searchkick
51
51
  payload = event.payload
52
52
  name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
53
53
 
54
- debug " #{color(name, YELLOW, true)} _msearch #{payload[:body]}"
54
+ debug " #{color(name, YELLOW, bold: true)} _msearch #{payload[:body]}"
55
55
  end
56
56
  end
57
57
  end
@@ -3,8 +3,15 @@ require "faraday"
3
3
  module Searchkick
4
4
  class Middleware < Faraday::Middleware
5
5
  def call(env)
6
- if env[:method] == :get && env[:url].path.to_s.end_with?("/_search")
6
+ path = env[:url].path.to_s
7
+ if path.end_with?("/_search")
7
8
  env[:request][:timeout] = Searchkick.search_timeout
9
+ elsif path.end_with?("/_msearch")
10
+ # assume no concurrent searches for timeout for now
11
+ searches = env[:request_body].count("\n") / 2
12
+ # do not allow timeout to exceed Searchkick.timeout
13
+ timeout = [Searchkick.search_timeout * searches, Searchkick.timeout].min
14
+ env[:request][:timeout] = timeout
8
15
  end
9
16
  @app.call(env)
10
17
  end
@@ -4,8 +4,8 @@ module Searchkick
4
4
  options = Searchkick.model_options.merge(options)
5
5
 
6
6
  unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields,
7
- :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language,
8
- :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
7
+ :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :knn, :language,
8
+ :locations, :mappings, :match, :max_result_window, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
9
9
  :special_characters, :stem, :stemmer, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end,
10
10
  :text_middle, :text_start, :unscope, :word, :word_end, :word_middle, :word_start]
11
11
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
@@ -66,7 +66,7 @@ module Searchkick
66
66
  alias_method Searchkick.search_method_name, :searchkick_search if Searchkick.search_method_name
67
67
 
68
68
  def searchkick_index(name: nil)
69
- index_name = name || searchkick_index_name
69
+ index_name = name || searchkick_klass.searchkick_index_name
70
70
  index_name = index_name.call if index_name.respond_to?(:call)
71
71
  index_cache = class_variable_get(:@@searchkick_index_cache)
72
72
  index_cache.fetch(index_name) { Searchkick::Index.new(index_name, searchkick_options) }
@@ -88,9 +88,9 @@ module Searchkick
88
88
  if options[:index_name]
89
89
  options[:index_name]
90
90
  elsif options[:index_prefix].respond_to?(:call)
91
- -> { [options[:index_prefix].call, searchkick_klass.model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_") }
91
+ -> { [options[:index_prefix].call, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_") }
92
92
  else
93
- [options.key?(:index_prefix) ? options[:index_prefix] : Searchkick.index_prefix, searchkick_klass.model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_")
93
+ [options.key?(:index_prefix) ? options[:index_prefix] : Searchkick.index_prefix, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_")
94
94
  end
95
95
  end
96
96
  end
@@ -9,7 +9,7 @@ module Searchkick
9
9
  attr_accessor :body
10
10
 
11
11
  def_delegators :execute, :map, :each, :any?, :empty?, :size, :length, :slice, :[], :to_ary,
12
- :records, :results, :suggestions, :each_with_hit, :with_details, :aggregations, :aggs,
12
+ :results, :suggestions, :each_with_hit, :with_details, :aggregations, :aggs,
13
13
  :took, :error, :model_name, :entry_name, :total_count, :total_entries,
14
14
  :current_page, :per_page, :limit_value, :padding, :total_pages, :num_pages,
15
15
  :offset_value, :offset, :previous_page, :prev_page, :next_page, :first_page?, :last_page?,
@@ -19,7 +19,7 @@ module Searchkick
19
19
  def initialize(klass, term = "*", **options)
20
20
  unknown_keywords = options.keys - [:aggs, :block, :body, :body_options, :boost,
21
21
  :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :explain,
22
- :fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load,
22
+ :fields, :highlight, :includes, :index_name, :indices_boost, :knn, :limit, :load,
23
23
  :match, :misspellings, :models, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
24
24
  :request_params, :routing, :scope_results, :scroll, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where]
25
25
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
@@ -191,7 +191,7 @@ module Searchkick
191
191
  end
192
192
 
193
193
  def retry_misspellings?(response)
194
- @misspellings_below && Results.new(searchkick_klass, response).total_count < @misspellings_below
194
+ @misspellings_below && response["error"].nil? && Results.new(searchkick_klass, response).total_count < @misspellings_below
195
195
  end
196
196
 
197
197
  private
@@ -199,7 +199,11 @@ module Searchkick
199
199
  def handle_error(e)
200
200
  status_code = e.message[1..3].to_i
201
201
  if status_code == 404
202
- raise MissingIndexError, "Index missing - run #{reindex_command}"
202
+ if e.message.include?("No search context found for id")
203
+ raise MissingIndexError, "No search context found for id"
204
+ else
205
+ raise MissingIndexError, "Index missing - run #{reindex_command}"
206
+ end
203
207
  elsif status_code == 500 && (
204
208
  e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") ||
205
209
  e.message.include?("No query registered for [multi_match]") ||
@@ -215,7 +219,7 @@ module Searchkick
215
219
  )
216
220
 
217
221
  raise UnsupportedVersionError
218
- elsif e.message =~ /analyzer \[searchkick_.+\] not found/
222
+ elsif e.message.match?(/analyzer \[searchkick_.+\] not found/)
219
223
  raise InvalidQueryError, "Bad mapping - run #{reindex_command}"
220
224
  else
221
225
  raise InvalidQueryError, e.message
@@ -251,9 +255,16 @@ module Searchkick
251
255
  default_limit = searchkick_options[:deep_paging] ? 1_000_000_000 : 10_000
252
256
  per_page = (options[:limit] || options[:per_page] || default_limit).to_i
253
257
  padding = [options[:padding].to_i, 0].max
254
- offset = options[:offset] || (page - 1) * per_page + padding
258
+ offset = (options[:offset] || (page - 1) * per_page + padding).to_i
255
259
  scroll = options[:scroll]
256
260
 
261
+ max_result_window = searchkick_options[:max_result_window]
262
+ original_per_page = per_page
263
+ if max_result_window
264
+ offset = max_result_window if offset > max_result_window
265
+ per_page = max_result_window - offset if offset + per_page > max_result_window
266
+ end
267
+
257
268
  # model and eager loading
258
269
  load = options[:load].nil? ? true : options[:load]
259
270
 
@@ -363,7 +374,7 @@ module Searchkick
363
374
  field_misspellings = misspellings && (!misspellings_fields || misspellings_fields.include?(base_field(field)))
364
375
 
365
376
  if field == "_all" || field.end_with?(".analyzed")
366
- shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false || (!below73? && !track_total_hits?)
377
+ shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false || (!below73? && !track_total_hits?) || match_type == :match_phrase || !below80? || Searchkick.opensearch?
367
378
  qs << shared_options.merge(analyzer: "searchkick_search")
368
379
 
369
380
  # searchkick_search and searchkick_search2 are the same for some languages
@@ -377,7 +388,7 @@ module Searchkick
377
388
  exclude_field = f
378
389
  exclude_analyzer = "keyword"
379
390
  else
380
- analyzer = field =~ /\.word_(start|middle|end)\z/ ? "searchkick_word_search" : "searchkick_autocomplete_search"
391
+ analyzer = field.match?(/\.word_(start|middle|end)\z/) ? "searchkick_word_search" : "searchkick_autocomplete_search"
381
392
  qs << shared_options.merge(analyzer: analyzer)
382
393
  exclude_analyzer = analyzer
383
394
  end
@@ -499,7 +510,7 @@ module Searchkick
499
510
  set_highlights(payload, fields) if options[:highlight]
500
511
 
501
512
  # timeout shortly after client times out
502
- payload[:timeout] ||= "#{Searchkick.search_timeout + 1}s"
513
+ payload[:timeout] ||= "#{((Searchkick.search_timeout + 1) * 1000).round}ms"
503
514
 
504
515
  # An empty array will cause only the _id and _type for each hit to be returned
505
516
  # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html
@@ -515,6 +526,9 @@ module Searchkick
515
526
  end
516
527
  end
517
528
 
529
+ # knn
530
+ set_knn(payload, options[:knn], per_page, offset) if options[:knn]
531
+
518
532
  # pagination
519
533
  pagination_options = options[:page] || options[:limit] || options[:per_page] || options[:offset] || options[:padding]
520
534
  if !options[:body] || pagination_options
@@ -548,7 +562,7 @@ module Searchkick
548
562
 
549
563
  @body = payload
550
564
  @page = page
551
- @per_page = per_page
565
+ @per_page = original_per_page
552
566
  @padding = padding
553
567
  @load = load
554
568
  @scroll = scroll
@@ -865,6 +879,119 @@ module Searchkick
865
879
  end
866
880
  end
867
881
 
882
+ def set_knn(payload, knn, per_page, offset)
883
+ if term != "*"
884
+ raise ArgumentError, "Use Searchkick.multi_search for hybrid search"
885
+ end
886
+
887
+ field = knn[:field]
888
+ field_options = searchkick_options.dig(:knn, field.to_sym) || searchkick_options.dig(:knn, field.to_s) || {}
889
+ vector = knn[:vector]
890
+ distance = knn[:distance] || field_options[:distance]
891
+ exact = knn[:exact]
892
+ exact = field_options[:distance].nil? || distance != field_options[:distance] if exact.nil?
893
+ k = per_page + offset
894
+ filter = payload.delete(:query)
895
+
896
+ if distance.nil?
897
+ raise ArgumentError, "distance required"
898
+ elsif !exact && distance != field_options[:distance]
899
+ raise ArgumentError, "distance must match searchkick options for approximate search"
900
+ end
901
+
902
+ if Searchkick.opensearch?
903
+ if exact
904
+ # https://opensearch.org/docs/latest/search-plugins/knn/knn-score-script/#spaces
905
+ space_type =
906
+ case distance
907
+ when "cosine"
908
+ "cosinesimil"
909
+ when "euclidean"
910
+ "l2"
911
+ when "taxicab"
912
+ "l1"
913
+ when "inner_product"
914
+ "innerproduct"
915
+ when "chebyshev"
916
+ "linf"
917
+ else
918
+ raise ArgumentError, "Unknown distance: #{distance}"
919
+ end
920
+
921
+ payload[:query] = {
922
+ script_score: {
923
+ query: {
924
+ bool: {
925
+ must: [filter, {exists: {field: field}}]
926
+ }
927
+ },
928
+ script: {
929
+ source: "knn_score",
930
+ lang: "knn",
931
+ params: {
932
+ field: field,
933
+ query_value: vector,
934
+ space_type: space_type
935
+ }
936
+ },
937
+ boost: distance == "cosine" ? 0.5 : 1.0
938
+ }
939
+ }
940
+ else
941
+ payload[:query] = {
942
+ knn: {
943
+ field.to_sym => {
944
+ vector: vector,
945
+ k: k,
946
+ filter: filter
947
+ }
948
+ }
949
+ }
950
+ end
951
+ else
952
+ if exact
953
+ # https://github.com/elastic/elasticsearch/blob/main/docs/reference/vectors/vector-functions.asciidoc
954
+ source =
955
+ case distance
956
+ when "cosine"
957
+ "(cosineSimilarity(params.query_vector, params.field) + 1.0) * 0.5"
958
+ when "euclidean"
959
+ "double l2 = l2norm(params.query_vector, params.field); 1 / (1 + l2 * l2)"
960
+ when "taxicab"
961
+ "1 / (1 + l1norm(params.query_vector, params.field))"
962
+ when "inner_product"
963
+ "double dot = dotProduct(params.query_vector, params.field); dot > 0 ? dot + 1 : 1 / (1 - dot)"
964
+ else
965
+ raise ArgumentError, "Unknown distance: #{distance}"
966
+ end
967
+
968
+ payload[:query] = {
969
+ script_score: {
970
+ query: {
971
+ bool: {
972
+ must: [filter, {exists: {field: field}}]
973
+ }
974
+ },
975
+ script: {
976
+ source: source,
977
+ params: {
978
+ field: field,
979
+ query_vector: vector
980
+ }
981
+ }
982
+ }
983
+ }
984
+ else
985
+ payload[:knn] = {
986
+ field: field,
987
+ query_vector: vector,
988
+ k: k,
989
+ filter: filter
990
+ }
991
+ end
992
+ end
993
+ end
994
+
868
995
  def set_post_filters(payload, post_filters)
869
996
  payload[:post_filter] = {
870
997
  bool: {
@@ -874,7 +1001,8 @@ module Searchkick
874
1001
  end
875
1002
 
876
1003
  def set_order(payload)
877
- payload[:sort] = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc}
1004
+ value = options[:order]
1005
+ payload[:sort] = value.is_a?(Enumerable) ? value : {value => :asc}
878
1006
  end
879
1007
 
880
1008
  # provides *very* basic protection from unfiltered parameters
@@ -898,8 +1026,12 @@ module Searchkick
898
1026
  filters << {bool: {must_not: where_filters(value)}}
899
1027
  elsif field == :_and
900
1028
  filters << {bool: {must: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
901
- # elsif field == :_script
902
- # filters << {script: {script: {source: value, lang: "painless"}}}
1029
+ elsif field == :_script
1030
+ unless value.is_a?(Script)
1031
+ raise TypeError, "expected Searchkick::Script"
1032
+ end
1033
+
1034
+ filters << {script: {script: {source: value.source, lang: value.lang, params: value.params}}}
903
1035
  else
904
1036
  # expand ranges
905
1037
  if value.is_a?(Range)
@@ -992,6 +1124,11 @@ module Searchkick
992
1124
  when :in
993
1125
  filters << term_filters(field, op_value)
994
1126
  when :exists
1127
+ # TODO add support for false in Searchkick 6
1128
+ if op_value != true
1129
+ # TODO raise error in Searchkick 6
1130
+ Searchkick.warn("Passing a value other than true to exists is not supported")
1131
+ end
995
1132
  filters << {exists: {field: field}}
996
1133
  else
997
1134
  range_query =
@@ -1160,5 +1297,9 @@ module Searchkick
1160
1297
  def below710?
1161
1298
  Searchkick.server_below?("7.10.0")
1162
1299
  end
1300
+
1301
+ def below80?
1302
+ Searchkick.server_below?("8.0.0")
1303
+ end
1163
1304
  end
1164
1305
  end
@@ -10,7 +10,7 @@ module Searchkick
10
10
 
11
11
  # supports single and multiple ids
12
12
  def push(record_ids)
13
- Searchkick.with_redis { |r| r.lpush(redis_key, record_ids) }
13
+ Searchkick.with_redis { |r| r.call("LPUSH", redis_key, record_ids) }
14
14
  end
15
15
 
16
16
  def push_records(records)
@@ -34,11 +34,11 @@ module Searchkick
34
34
  # TODO use reliable queuing
35
35
  def reserve(limit: 1000)
36
36
  if supports_rpop_with_count?
37
- Searchkick.with_redis { |r| r.call("rpop", redis_key, limit) }.to_a
37
+ Searchkick.with_redis { |r| r.call("RPOP", redis_key, limit) }.to_a
38
38
  else
39
39
  record_ids = []
40
40
  Searchkick.with_redis do |r|
41
- while record_ids.size < limit && (record_id = r.rpop(redis_key))
41
+ while record_ids.size < limit && (record_id = r.call("RPOP", redis_key))
42
42
  record_ids << record_id
43
43
  end
44
44
  end
@@ -47,11 +47,11 @@ module Searchkick
47
47
  end
48
48
 
49
49
  def clear
50
- Searchkick.with_redis { |r| r.del(redis_key) }
50
+ Searchkick.with_redis { |r| r.call("DEL", redis_key) }
51
51
  end
52
52
 
53
53
  def length
54
- Searchkick.with_redis { |r| r.llen(redis_key) }
54
+ Searchkick.with_redis { |r| r.call("LLEN", redis_key) }
55
55
  end
56
56
 
57
57
  private
@@ -65,11 +65,16 @@ module Searchkick
65
65
  end
66
66
 
67
67
  def redis_version
68
- @redis_version ||= Searchkick.with_redis { |r| Gem::Version.new(r.info["redis_version"]) }
68
+ @redis_version ||=
69
+ Searchkick.with_redis do |r|
70
+ info = r.call("INFO")
71
+ matches = /redis_version:(\S+)/.match(info)
72
+ Gem::Version.new(matches[1])
73
+ end
69
74
  end
70
75
 
71
76
  def escape(value)
72
- value.gsub("|", "||")
77
+ value.to_s.gsub("|", "||")
73
78
  end
74
79
  end
75
80
  end