searchkick 5.0.2 → 5.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,7 @@ module Searchkick
19
19
  mappings = generate_mappings.deep_symbolize_keys.deep_merge(custom_mappings)
20
20
  end
21
21
 
22
- set_deep_paging(settings) if options[:deep_paging]
22
+ set_deep_paging(settings) if options[:deep_paging] || options[:max_result_window]
23
23
 
24
24
  {
25
25
  settings: settings,
@@ -169,6 +169,21 @@ module Searchkick
169
169
  max_shingle_diff: 4
170
170
  }
171
171
 
172
+ if options[:knn]
173
+ unless Searchkick.knn_support?
174
+ if Searchkick.opensearch?
175
+ raise Error, "knn requires OpenSearch 2.4+"
176
+ else
177
+ raise Error, "knn requires Elasticsearch 8.6+"
178
+ end
179
+ end
180
+
181
+ if Searchkick.opensearch? && options[:knn].any? { |_, v| !v[:distance].nil? }
182
+ # only enable if doing approximate search
183
+ settings[:index][:knn] = true
184
+ end
185
+ end
186
+
172
187
  if options[:case_sensitive]
173
188
  settings[:analysis][:analyzer].each do |_, analyzer|
174
189
  analyzer[:filter].delete("lowercase")
@@ -406,6 +421,66 @@ module Searchkick
406
421
  mapping[field] = shape_options.merge(type: "geo_shape")
407
422
  end
408
423
 
424
+ (options[:knn] || []).each do |field, knn_options|
425
+ distance = knn_options[:distance]
426
+
427
+ if Searchkick.opensearch?
428
+ if distance.nil?
429
+ # avoid server crash if method not specified
430
+ raise ArgumentError, "Must specify a distance for OpenSearch"
431
+ end
432
+
433
+ vector_options = {
434
+ type: "knn_vector",
435
+ dimension: knn_options[:dimensions]
436
+ }
437
+
438
+ if !distance.nil?
439
+ space_type =
440
+ case distance
441
+ when "cosine"
442
+ "cosinesimil"
443
+ when "euclidean"
444
+ "l2"
445
+ when "inner_product"
446
+ "innerproduct"
447
+ else
448
+ raise ArgumentError, "Unknown distance: #{distance}"
449
+ end
450
+
451
+ vector_options[:method] = {
452
+ name: "hnsw",
453
+ space_type: space_type,
454
+ engine: "lucene"
455
+ }
456
+ end
457
+
458
+ mapping[field.to_s] = vector_options
459
+ else
460
+ vector_options = {
461
+ type: "dense_vector",
462
+ dims: knn_options[:dimensions],
463
+ index: !distance.nil?
464
+ }
465
+
466
+ if !distance.nil?
467
+ vector_options[:similarity] =
468
+ case distance
469
+ when "cosine"
470
+ "cosine"
471
+ when "euclidean"
472
+ "l2_norm"
473
+ when "inner_product"
474
+ "max_inner_product"
475
+ else
476
+ raise ArgumentError, "Unknown distance: #{distance}"
477
+ end
478
+ end
479
+
480
+ mapping[field.to_s] = vector_options
481
+ end
482
+ end
483
+
409
484
  if options[:inheritance]
410
485
  mapping[:type] = keyword_mapping
411
486
  end
@@ -525,7 +600,7 @@ module Searchkick
525
600
  def set_deep_paging(settings)
526
601
  if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
527
602
  settings[:index] ||= {}
528
- settings[:index][:max_result_window] = 1_000_000_000
603
+ settings[:index][:max_result_window] = options[:max_result_window] || 1_000_000_000
529
604
  end
530
605
  end
531
606
 
@@ -31,7 +31,7 @@ module Searchkick
31
31
  params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
32
32
  end
33
33
 
34
- debug " #{color(name, YELLOW, true)} #{index}#{type ? "/#{type.join(',')}" : ''}/_search#{params.any? ? '?' + params.join('&') : nil} #{payload[:query][:body].to_json}"
34
+ debug " #{color(name, YELLOW, bold: true)} #{index}#{type ? "/#{type.join(',')}" : ''}/_search#{params.any? ? '?' + params.join('&') : nil} #{payload[:query][:body].to_json}"
35
35
  end
36
36
 
37
37
  def request(event)
@@ -41,7 +41,7 @@ module Searchkick
41
41
  payload = event.payload
42
42
  name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
43
43
 
44
- debug " #{color(name, YELLOW, true)} #{payload.except(:name).to_json}"
44
+ debug " #{color(name, YELLOW, bold: true)} #{payload.except(:name).to_json}"
45
45
  end
46
46
 
47
47
  def multi_search(event)
@@ -51,7 +51,7 @@ module Searchkick
51
51
  payload = event.payload
52
52
  name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
53
53
 
54
- debug " #{color(name, YELLOW, true)} _msearch #{payload[:body]}"
54
+ debug " #{color(name, YELLOW, bold: true)} _msearch #{payload[:body]}"
55
55
  end
56
56
  end
57
57
  end
@@ -3,8 +3,15 @@ require "faraday"
3
3
  module Searchkick
4
4
  class Middleware < Faraday::Middleware
5
5
  def call(env)
6
- if env[:method] == :get && env[:url].path.to_s.end_with?("/_search")
6
+ path = env[:url].path.to_s
7
+ if path.end_with?("/_search")
7
8
  env[:request][:timeout] = Searchkick.search_timeout
9
+ elsif path.end_with?("/_msearch")
10
+ # assume no concurrent searches for timeout for now
11
+ searches = env[:request_body].count("\n") / 2
12
+ # do not allow timeout to exceed Searchkick.timeout
13
+ timeout = [Searchkick.search_timeout * searches, Searchkick.timeout].min
14
+ env[:request][:timeout] = timeout
8
15
  end
9
16
  @app.call(env)
10
17
  end
@@ -4,8 +4,8 @@ module Searchkick
4
4
  options = Searchkick.model_options.merge(options)
5
5
 
6
6
  unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields,
7
- :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language,
8
- :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
7
+ :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :knn, :language,
8
+ :locations, :mappings, :match, :max_result_window, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
9
9
  :special_characters, :stem, :stemmer, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end,
10
10
  :text_middle, :text_start, :unscope, :word, :word_end, :word_middle, :word_start]
11
11
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
@@ -66,7 +66,7 @@ module Searchkick
66
66
  alias_method Searchkick.search_method_name, :searchkick_search if Searchkick.search_method_name
67
67
 
68
68
  def searchkick_index(name: nil)
69
- index_name = name || searchkick_index_name
69
+ index_name = name || searchkick_klass.searchkick_index_name
70
70
  index_name = index_name.call if index_name.respond_to?(:call)
71
71
  index_cache = class_variable_get(:@@searchkick_index_cache)
72
72
  index_cache.fetch(index_name) { Searchkick::Index.new(index_name, searchkick_options) }
@@ -88,9 +88,9 @@ module Searchkick
88
88
  if options[:index_name]
89
89
  options[:index_name]
90
90
  elsif options[:index_prefix].respond_to?(:call)
91
- -> { [options[:index_prefix].call, searchkick_klass.model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_") }
91
+ -> { [options[:index_prefix].call, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_") }
92
92
  else
93
- [options.key?(:index_prefix) ? options[:index_prefix] : Searchkick.index_prefix, searchkick_klass.model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_")
93
+ [options.key?(:index_prefix) ? options[:index_prefix] : Searchkick.index_prefix, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_")
94
94
  end
95
95
  end
96
96
  end
@@ -9,7 +9,7 @@ module Searchkick
9
9
  attr_accessor :body
10
10
 
11
11
  def_delegators :execute, :map, :each, :any?, :empty?, :size, :length, :slice, :[], :to_ary,
12
- :records, :results, :suggestions, :each_with_hit, :with_details, :aggregations, :aggs,
12
+ :results, :suggestions, :each_with_hit, :with_details, :aggregations, :aggs,
13
13
  :took, :error, :model_name, :entry_name, :total_count, :total_entries,
14
14
  :current_page, :per_page, :limit_value, :padding, :total_pages, :num_pages,
15
15
  :offset_value, :offset, :previous_page, :prev_page, :next_page, :first_page?, :last_page?,
@@ -19,7 +19,7 @@ module Searchkick
19
19
  def initialize(klass, term = "*", **options)
20
20
  unknown_keywords = options.keys - [:aggs, :block, :body, :body_options, :boost,
21
21
  :boost_by, :boost_by_distance, :boost_by_recency, :boost_where, :conversions, :conversions_term, :debug, :emoji, :exclude, :explain,
22
- :fields, :highlight, :includes, :index_name, :indices_boost, :limit, :load,
22
+ :fields, :highlight, :includes, :index_name, :indices_boost, :knn, :limit, :load,
23
23
  :match, :misspellings, :models, :model_includes, :offset, :operator, :order, :padding, :page, :per_page, :profile,
24
24
  :request_params, :routing, :scope_results, :scroll, :select, :similar, :smart_aggs, :suggest, :total_entries, :track, :type, :where]
25
25
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
@@ -191,7 +191,7 @@ module Searchkick
191
191
  end
192
192
 
193
193
  def retry_misspellings?(response)
194
- @misspellings_below && Results.new(searchkick_klass, response).total_count < @misspellings_below
194
+ @misspellings_below && response["error"].nil? && Results.new(searchkick_klass, response).total_count < @misspellings_below
195
195
  end
196
196
 
197
197
  private
@@ -199,7 +199,11 @@ module Searchkick
199
199
  def handle_error(e)
200
200
  status_code = e.message[1..3].to_i
201
201
  if status_code == 404
202
- raise MissingIndexError, "Index missing - run #{reindex_command}"
202
+ if e.message.include?("No search context found for id")
203
+ raise MissingIndexError, "No search context found for id"
204
+ else
205
+ raise MissingIndexError, "Index missing - run #{reindex_command}"
206
+ end
203
207
  elsif status_code == 500 && (
204
208
  e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") ||
205
209
  e.message.include?("No query registered for [multi_match]") ||
@@ -215,7 +219,7 @@ module Searchkick
215
219
  )
216
220
 
217
221
  raise UnsupportedVersionError
218
- elsif e.message =~ /analyzer \[searchkick_.+\] not found/
222
+ elsif e.message.match?(/analyzer \[searchkick_.+\] not found/)
219
223
  raise InvalidQueryError, "Bad mapping - run #{reindex_command}"
220
224
  else
221
225
  raise InvalidQueryError, e.message
@@ -251,9 +255,16 @@ module Searchkick
251
255
  default_limit = searchkick_options[:deep_paging] ? 1_000_000_000 : 10_000
252
256
  per_page = (options[:limit] || options[:per_page] || default_limit).to_i
253
257
  padding = [options[:padding].to_i, 0].max
254
- offset = options[:offset] || (page - 1) * per_page + padding
258
+ offset = (options[:offset] || (page - 1) * per_page + padding).to_i
255
259
  scroll = options[:scroll]
256
260
 
261
+ max_result_window = searchkick_options[:max_result_window]
262
+ original_per_page = per_page
263
+ if max_result_window
264
+ offset = max_result_window if offset > max_result_window
265
+ per_page = max_result_window - offset if offset + per_page > max_result_window
266
+ end
267
+
257
268
  # model and eager loading
258
269
  load = options[:load].nil? ? true : options[:load]
259
270
 
@@ -363,7 +374,7 @@ module Searchkick
363
374
  field_misspellings = misspellings && (!misspellings_fields || misspellings_fields.include?(base_field(field)))
364
375
 
365
376
  if field == "_all" || field.end_with?(".analyzed")
366
- shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false || (!below73? && !track_total_hits?)
377
+ shared_options[:cutoff_frequency] = 0.001 unless operator.to_s == "and" || field_misspellings == false || (!below73? && !track_total_hits?) || match_type == :match_phrase || !below80? || Searchkick.opensearch?
367
378
  qs << shared_options.merge(analyzer: "searchkick_search")
368
379
 
369
380
  # searchkick_search and searchkick_search2 are the same for some languages
@@ -377,7 +388,7 @@ module Searchkick
377
388
  exclude_field = f
378
389
  exclude_analyzer = "keyword"
379
390
  else
380
- analyzer = field =~ /\.word_(start|middle|end)\z/ ? "searchkick_word_search" : "searchkick_autocomplete_search"
391
+ analyzer = field.match?(/\.word_(start|middle|end)\z/) ? "searchkick_word_search" : "searchkick_autocomplete_search"
381
392
  qs << shared_options.merge(analyzer: analyzer)
382
393
  exclude_analyzer = analyzer
383
394
  end
@@ -499,7 +510,7 @@ module Searchkick
499
510
  set_highlights(payload, fields) if options[:highlight]
500
511
 
501
512
  # timeout shortly after client times out
502
- payload[:timeout] ||= "#{Searchkick.search_timeout + 1}s"
513
+ payload[:timeout] ||= "#{((Searchkick.search_timeout + 1) * 1000).round}ms"
503
514
 
504
515
  # An empty array will cause only the _id and _type for each hit to be returned
505
516
  # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html
@@ -515,6 +526,9 @@ module Searchkick
515
526
  end
516
527
  end
517
528
 
529
+ # knn
530
+ set_knn(payload, options[:knn], per_page, offset) if options[:knn]
531
+
518
532
  # pagination
519
533
  pagination_options = options[:page] || options[:limit] || options[:per_page] || options[:offset] || options[:padding]
520
534
  if !options[:body] || pagination_options
@@ -548,7 +562,7 @@ module Searchkick
548
562
 
549
563
  @body = payload
550
564
  @page = page
551
- @per_page = per_page
565
+ @per_page = original_per_page
552
566
  @padding = padding
553
567
  @load = load
554
568
  @scroll = scroll
@@ -865,6 +879,119 @@ module Searchkick
865
879
  end
866
880
  end
867
881
 
882
+ def set_knn(payload, knn, per_page, offset)
883
+ if term != "*"
884
+ raise ArgumentError, "Use Searchkick.multi_search for hybrid search"
885
+ end
886
+
887
+ field = knn[:field]
888
+ field_options = searchkick_options.dig(:knn, field.to_sym) || searchkick_options.dig(:knn, field.to_s) || {}
889
+ vector = knn[:vector]
890
+ distance = knn[:distance] || field_options[:distance]
891
+ exact = knn[:exact]
892
+ exact = field_options[:distance].nil? || distance != field_options[:distance] if exact.nil?
893
+ k = per_page + offset
894
+ filter = payload.delete(:query)
895
+
896
+ if distance.nil?
897
+ raise ArgumentError, "distance required"
898
+ elsif !exact && distance != field_options[:distance]
899
+ raise ArgumentError, "distance must match searchkick options for approximate search"
900
+ end
901
+
902
+ if Searchkick.opensearch?
903
+ if exact
904
+ # https://opensearch.org/docs/latest/search-plugins/knn/knn-score-script/#spaces
905
+ space_type =
906
+ case distance
907
+ when "cosine"
908
+ "cosinesimil"
909
+ when "euclidean"
910
+ "l2"
911
+ when "taxicab"
912
+ "l1"
913
+ when "inner_product"
914
+ "innerproduct"
915
+ when "chebyshev"
916
+ "linf"
917
+ else
918
+ raise ArgumentError, "Unknown distance: #{distance}"
919
+ end
920
+
921
+ payload[:query] = {
922
+ script_score: {
923
+ query: {
924
+ bool: {
925
+ must: [filter, {exists: {field: field}}]
926
+ }
927
+ },
928
+ script: {
929
+ source: "knn_score",
930
+ lang: "knn",
931
+ params: {
932
+ field: field,
933
+ query_value: vector,
934
+ space_type: space_type
935
+ }
936
+ },
937
+ boost: distance == "cosine" ? 0.5 : 1.0
938
+ }
939
+ }
940
+ else
941
+ payload[:query] = {
942
+ knn: {
943
+ field.to_sym => {
944
+ vector: vector,
945
+ k: k,
946
+ filter: filter
947
+ }
948
+ }
949
+ }
950
+ end
951
+ else
952
+ if exact
953
+ # https://github.com/elastic/elasticsearch/blob/main/docs/reference/vectors/vector-functions.asciidoc
954
+ source =
955
+ case distance
956
+ when "cosine"
957
+ "(cosineSimilarity(params.query_vector, params.field) + 1.0) * 0.5"
958
+ when "euclidean"
959
+ "double l2 = l2norm(params.query_vector, params.field); 1 / (1 + l2 * l2)"
960
+ when "taxicab"
961
+ "1 / (1 + l1norm(params.query_vector, params.field))"
962
+ when "inner_product"
963
+ "double dot = dotProduct(params.query_vector, params.field); dot > 0 ? dot + 1 : 1 / (1 - dot)"
964
+ else
965
+ raise ArgumentError, "Unknown distance: #{distance}"
966
+ end
967
+
968
+ payload[:query] = {
969
+ script_score: {
970
+ query: {
971
+ bool: {
972
+ must: [filter, {exists: {field: field}}]
973
+ }
974
+ },
975
+ script: {
976
+ source: source,
977
+ params: {
978
+ field: field,
979
+ query_vector: vector
980
+ }
981
+ }
982
+ }
983
+ }
984
+ else
985
+ payload[:knn] = {
986
+ field: field,
987
+ query_vector: vector,
988
+ k: k,
989
+ filter: filter
990
+ }
991
+ end
992
+ end
993
+ end
994
+
868
995
  def set_post_filters(payload, post_filters)
869
996
  payload[:post_filter] = {
870
997
  bool: {
@@ -874,7 +1001,8 @@ module Searchkick
874
1001
  end
875
1002
 
876
1003
  def set_order(payload)
877
- payload[:sort] = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc}
1004
+ value = options[:order]
1005
+ payload[:sort] = value.is_a?(Enumerable) ? value : {value => :asc}
878
1006
  end
879
1007
 
880
1008
  # provides *very* basic protection from unfiltered parameters
@@ -898,8 +1026,12 @@ module Searchkick
898
1026
  filters << {bool: {must_not: where_filters(value)}}
899
1027
  elsif field == :_and
900
1028
  filters << {bool: {must: value.map { |or_statement| {bool: {filter: where_filters(or_statement)}} }}}
901
- # elsif field == :_script
902
- # filters << {script: {script: {source: value, lang: "painless"}}}
1029
+ elsif field == :_script
1030
+ unless value.is_a?(Script)
1031
+ raise TypeError, "expected Searchkick::Script"
1032
+ end
1033
+
1034
+ filters << {script: {script: {source: value.source, lang: value.lang, params: value.params}}}
903
1035
  else
904
1036
  # expand ranges
905
1037
  if value.is_a?(Range)
@@ -992,6 +1124,11 @@ module Searchkick
992
1124
  when :in
993
1125
  filters << term_filters(field, op_value)
994
1126
  when :exists
1127
+ # TODO add support for false in Searchkick 6
1128
+ if op_value != true
1129
+ # TODO raise error in Searchkick 6
1130
+ Searchkick.warn("Passing a value other than true to exists is not supported")
1131
+ end
995
1132
  filters << {exists: {field: field}}
996
1133
  else
997
1134
  range_query =
@@ -1160,5 +1297,9 @@ module Searchkick
1160
1297
  def below710?
1161
1298
  Searchkick.server_below?("7.10.0")
1162
1299
  end
1300
+
1301
+ def below80?
1302
+ Searchkick.server_below?("8.0.0")
1303
+ end
1163
1304
  end
1164
1305
  end
@@ -10,7 +10,7 @@ module Searchkick
10
10
 
11
11
  # supports single and multiple ids
12
12
  def push(record_ids)
13
- Searchkick.with_redis { |r| r.lpush(redis_key, record_ids) }
13
+ Searchkick.with_redis { |r| r.call("LPUSH", redis_key, record_ids) }
14
14
  end
15
15
 
16
16
  def push_records(records)
@@ -34,11 +34,11 @@ module Searchkick
34
34
  # TODO use reliable queuing
35
35
  def reserve(limit: 1000)
36
36
  if supports_rpop_with_count?
37
- Searchkick.with_redis { |r| r.call("rpop", redis_key, limit) }.to_a
37
+ Searchkick.with_redis { |r| r.call("RPOP", redis_key, limit) }.to_a
38
38
  else
39
39
  record_ids = []
40
40
  Searchkick.with_redis do |r|
41
- while record_ids.size < limit && (record_id = r.rpop(redis_key))
41
+ while record_ids.size < limit && (record_id = r.call("RPOP", redis_key))
42
42
  record_ids << record_id
43
43
  end
44
44
  end
@@ -47,11 +47,11 @@ module Searchkick
47
47
  end
48
48
 
49
49
  def clear
50
- Searchkick.with_redis { |r| r.del(redis_key) }
50
+ Searchkick.with_redis { |r| r.call("DEL", redis_key) }
51
51
  end
52
52
 
53
53
  def length
54
- Searchkick.with_redis { |r| r.llen(redis_key) }
54
+ Searchkick.with_redis { |r| r.call("LLEN", redis_key) }
55
55
  end
56
56
 
57
57
  private
@@ -65,11 +65,16 @@ module Searchkick
65
65
  end
66
66
 
67
67
  def redis_version
68
- @redis_version ||= Searchkick.with_redis { |r| Gem::Version.new(r.info["redis_version"]) }
68
+ @redis_version ||=
69
+ Searchkick.with_redis do |r|
70
+ info = r.call("INFO")
71
+ matches = /redis_version:(\S+)/.match(info)
72
+ Gem::Version.new(matches[1])
73
+ end
69
74
  end
70
75
 
71
76
  def escape(value)
72
- value.gsub("|", "||")
77
+ value.to_s.gsub("|", "||")
73
78
  end
74
79
  end
75
80
  end