searchkick 4.6.3 → 5.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,21 +7,19 @@ module Searchkick
7
7
  end
8
8
 
9
9
  def index_options
10
- custom_mapping = options[:mappings] || {}
11
- if below70? && custom_mapping.keys.map(&:to_sym).include?(:properties)
12
- # add type
13
- custom_mapping = {index_type => custom_mapping}
14
- end
10
+ # mortal symbols are garbage collected in Ruby 2.2+
11
+ custom_settings = (options[:settings] || {}).deep_symbolize_keys
12
+ custom_mappings = (options[:mappings] || {}).deep_symbolize_keys
15
13
 
16
14
  if options[:mappings] && !options[:merge_mappings]
17
- settings = options[:settings] || {}
18
- mappings = custom_mapping
15
+ settings = custom_settings
16
+ mappings = custom_mappings
19
17
  else
20
- settings = generate_settings
21
- mappings = generate_mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
18
+ settings = generate_settings.deep_symbolize_keys.deep_merge(custom_settings)
19
+ mappings = generate_mappings.deep_symbolize_keys.deep_merge(custom_mappings)
22
20
  end
23
21
 
24
- set_deep_paging(settings) if options[:deep_paging]
22
+ set_deep_paging(settings) if options[:deep_paging] || options[:max_result_window]
25
23
 
26
24
  {
27
25
  settings: settings,
@@ -162,16 +160,28 @@ module Searchkick
162
160
  settings[:number_of_replicas] = 0
163
161
  end
164
162
 
165
- # TODO remove in Searchkick 5 (classic no longer supported)
166
163
  if options[:similarity]
167
164
  settings[:similarity] = {default: {type: options[:similarity]}}
168
165
  end
169
166
 
170
- unless below62?
171
- settings[:index] = {
172
- max_ngram_diff: 49,
173
- max_shingle_diff: 4
174
- }
167
+ settings[:index] = {
168
+ max_ngram_diff: 49,
169
+ max_shingle_diff: 4
170
+ }
171
+
172
+ if options[:knn]
173
+ unless Searchkick.knn_support?
174
+ if Searchkick.opensearch?
175
+ raise Error, "knn requires OpenSearch 2.4+"
176
+ else
177
+ raise Error, "knn requires Elasticsearch 8.6+"
178
+ end
179
+ end
180
+
181
+ if Searchkick.opensearch? && options[:knn].any? { |_, v| !v[:distance].nil? }
182
+ # only enable if doing approximate search
183
+ settings[:index][:knn] = true
184
+ end
175
185
  end
176
186
 
177
187
  if options[:case_sensitive]
@@ -180,13 +190,8 @@ module Searchkick
180
190
  end
181
191
  end
182
192
 
183
- # TODO do this last in Searchkick 5
184
- settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
185
-
186
193
  add_synonyms(settings)
187
194
  add_search_synonyms(settings)
188
- # TODO remove in Searchkick 5
189
- add_wordnet(settings) if options[:wordnet]
190
195
 
191
196
  if options[:special_characters] == false
192
197
  settings[:analysis][:analyzer].each_value do |analyzer_settings|
@@ -223,19 +228,7 @@ module Searchkick
223
228
  type: "smartcn"
224
229
  }
225
230
  )
226
- when "japanese"
227
- settings[:analysis][:analyzer].merge!(
228
- default_analyzer => {
229
- type: "kuromoji"
230
- },
231
- searchkick_search: {
232
- type: "kuromoji"
233
- },
234
- searchkick_search2: {
235
- type: "kuromoji"
236
- }
237
- )
238
- when "japanese2"
231
+ when "japanese", "japanese2"
239
232
  analyzer = {
240
233
  type: "custom",
241
234
  tokenizer: "kuromoji_tokenizer",
@@ -379,16 +372,15 @@ module Searchkick
379
372
  }
380
373
  end
381
374
 
382
- mapping_options = Hash[
375
+ mapping_options =
383
376
  [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
384
- .map { |type| [type, (options[type] || []).map(&:to_s)] }
385
- ]
377
+ .to_h { |type| [type, (options[type] || []).map(&:to_s)] }
386
378
 
387
379
  word = options[:word] != false && (!options[:match] || options[:match] == :word)
388
380
 
389
381
  mapping_options[:searchable].delete("_all")
390
382
 
391
- analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer}
383
+ analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer.to_s}
392
384
 
393
385
  mapping_options.values.flatten.uniq.each do |field|
394
386
  fields = {}
@@ -429,6 +421,75 @@ module Searchkick
429
421
  mapping[field] = shape_options.merge(type: "geo_shape")
430
422
  end
431
423
 
424
+ (options[:knn] || []).each do |field, knn_options|
425
+ distance = knn_options[:distance]
426
+
427
+ if Searchkick.opensearch?
428
+ if distance.nil?
429
+ # avoid server crash if method not specified
430
+ raise ArgumentError, "Must specify a distance for OpenSearch"
431
+ end
432
+
433
+ vector_options = {
434
+ type: "knn_vector",
435
+ dimension: knn_options[:dimensions]
436
+ }
437
+
438
+ if !distance.nil?
439
+ space_type =
440
+ case distance
441
+ when "cosine"
442
+ "cosinesimil"
443
+ when "euclidean"
444
+ "l2"
445
+ when "inner_product"
446
+ "innerproduct"
447
+ else
448
+ raise ArgumentError, "Unknown distance: #{distance}"
449
+ end
450
+
451
+ vector_options[:method] = {
452
+ name: "hnsw",
453
+ space_type: space_type,
454
+ engine: "lucene",
455
+ parameters: knn_options.slice(:m, :ef_construction)
456
+ }
457
+ end
458
+
459
+ mapping[field.to_s] = vector_options
460
+ else
461
+ vector_options = {
462
+ type: "dense_vector",
463
+ dims: knn_options[:dimensions],
464
+ index: !distance.nil?
465
+ }
466
+
467
+ if !distance.nil?
468
+ vector_options[:similarity] =
469
+ case distance
470
+ when "cosine"
471
+ "cosine"
472
+ when "euclidean"
473
+ "l2_norm"
474
+ when "inner_product"
475
+ "max_inner_product"
476
+ else
477
+ raise ArgumentError, "Unknown distance: #{distance}"
478
+ end
479
+
480
+ vector_index_options = knn_options.slice(:m, :ef_construction)
481
+ if vector_index_options.any?
482
+ # TODO no quantization by default in Searchkick 6
483
+ # int8_hnsw was made the default in Elasticsearch 8.14.0
484
+ type = Searchkick.server_below?("8.14.0") ? "hnsw" : "int8_hnsw"
485
+ vector_options[:index_options] = {type: type}.merge(vector_index_options)
486
+ end
487
+ end
488
+
489
+ mapping[field.to_s] = vector_options
490
+ end
491
+ end
492
+
432
493
  if options[:inheritance]
433
494
  mapping[:type] = keyword_mapping
434
495
  end
@@ -481,10 +542,6 @@ module Searchkick
481
542
  ]
482
543
  }
483
544
 
484
- if below70?
485
- mappings = {index_type => mappings}
486
- end
487
-
488
545
  mappings
489
546
  end
490
547
 
@@ -533,14 +590,14 @@ module Searchkick
533
590
  end
534
591
  settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
535
592
 
536
- if options[:language] == "japanese2"
593
+ if ["japanese", "japanese2"].include?(options[:language])
537
594
  [:searchkick_search, :searchkick_search2].each do |analyzer|
538
595
  settings[:analysis][:analyzer][analyzer][:filter].insert(4, "searchkick_synonym_graph")
539
596
  end
540
597
  else
541
598
  [:searchkick_search2, :searchkick_word_search].each do |analyzer|
542
599
  unless settings[:analysis][:analyzer][analyzer].key?(:filter)
543
- raise Searchkick::Error, "Search synonyms are not supported yet for language"
600
+ raise Error, "Search synonyms are not supported yet for language"
544
601
  end
545
602
 
546
603
  settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
@@ -549,25 +606,10 @@ module Searchkick
549
606
  end
550
607
  end
551
608
 
552
- def add_wordnet(settings)
553
- settings[:analysis][:filter][:searchkick_wordnet] = {
554
- type: "synonym",
555
- format: "wordnet",
556
- synonyms_path: Searchkick.wordnet_path
557
- }
558
-
559
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
560
- settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
561
-
562
- %w(word_start word_middle word_end).each do |type|
563
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
564
- end
565
- end
566
-
567
609
  def set_deep_paging(settings)
568
610
  if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
569
611
  settings[:index] ||= {}
570
- settings[:index][:max_result_window] = 1_000_000_000
612
+ settings[:index][:max_result_window] = options[:max_result_window] || 1_000_000_000
571
613
  end
572
614
  end
573
615
 
@@ -587,14 +629,6 @@ module Searchkick
587
629
  :searchkick_index
588
630
  end
589
631
 
590
- def below62?
591
- Searchkick.server_below?("6.2.0")
592
- end
593
-
594
- def below70?
595
- Searchkick.server_below?("7.0.0")
596
- end
597
-
598
632
  def below73?
599
633
  Searchkick.server_below?("7.3.0")
600
634
  end
@@ -1,3 +1,5 @@
1
+ # thread-local (technically fiber-local) indexer
2
+ # used to aggregate bulk callbacks across models
1
3
  module Searchkick
2
4
  class Indexer
3
5
  attr_reader :queued_items
@@ -14,15 +16,20 @@ module Searchkick
14
16
  def perform
15
17
  items = @queued_items
16
18
  @queued_items = []
17
- if items.any?
18
- response = Searchkick.client.bulk(body: items)
19
- if response["errors"]
20
- first_with_error = response["items"].map do |item|
21
- (item["index"] || item["delete"] || item["update"])
22
- end.find { |item| item["error"] }
23
- raise Searchkick::ImportError, "#{first_with_error["error"]} on item with id '#{first_with_error["_id"]}'"
24
- end
19
+
20
+ return if items.empty?
21
+
22
+ response = Searchkick.client.bulk(body: items)
23
+ if response["errors"]
24
+ # note: delete does not set error when item not found
25
+ first_with_error = response["items"].map do |item|
26
+ (item["index"] || item["delete"] || item["update"])
27
+ end.find { |item| item["error"] }
28
+ raise ImportError, "#{first_with_error["error"]} on item with id '#{first_with_error["_id"]}'"
25
29
  end
30
+
31
+ # maybe return response in future
32
+ nil
26
33
  end
27
34
  end
28
35
  end
@@ -0,0 +1,57 @@
1
+ # based on https://gist.github.com/mnutt/566725
2
+ module Searchkick
3
+ class LogSubscriber < ActiveSupport::LogSubscriber
4
+ def self.runtime=(value)
5
+ Thread.current[:searchkick_runtime] = value
6
+ end
7
+
8
+ def self.runtime
9
+ Thread.current[:searchkick_runtime] ||= 0
10
+ end
11
+
12
+ def self.reset_runtime
13
+ rt = runtime
14
+ self.runtime = 0
15
+ rt
16
+ end
17
+
18
+ def search(event)
19
+ self.class.runtime += event.duration
20
+ return unless logger.debug?
21
+
22
+ payload = event.payload
23
+ name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
24
+
25
+ index = payload[:query][:index].is_a?(Array) ? payload[:query][:index].join(",") : payload[:query][:index]
26
+ type = payload[:query][:type]
27
+ request_params = payload[:query].except(:index, :type, :body)
28
+
29
+ params = []
30
+ request_params.each do |k, v|
31
+ params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
32
+ end
33
+
34
+ debug " #{color(name, YELLOW, bold: true)} #{index}#{type ? "/#{type.join(',')}" : ''}/_search#{params.any? ? '?' + params.join('&') : nil} #{payload[:query][:body].to_json}"
35
+ end
36
+
37
+ def request(event)
38
+ self.class.runtime += event.duration
39
+ return unless logger.debug?
40
+
41
+ payload = event.payload
42
+ name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
43
+
44
+ debug " #{color(name, YELLOW, bold: true)} #{payload.except(:name).to_json}"
45
+ end
46
+
47
+ def multi_search(event)
48
+ self.class.runtime += event.duration
49
+ return unless logger.debug?
50
+
51
+ payload = event.payload
52
+ name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
53
+
54
+ debug " #{color(name, YELLOW, bold: true)} _msearch #{payload[:body]}"
55
+ end
56
+ end
57
+ end
@@ -1,10 +1,17 @@
1
- require "faraday/middleware"
1
+ require "faraday"
2
2
 
3
3
  module Searchkick
4
4
  class Middleware < Faraday::Middleware
5
5
  def call(env)
6
- if env[:method] == :get && env[:url].path.to_s.end_with?("/_search")
6
+ path = env[:url].path.to_s
7
+ if path.end_with?("/_search")
7
8
  env[:request][:timeout] = Searchkick.search_timeout
9
+ elsif path.end_with?("/_msearch")
10
+ # assume no concurrent searches for timeout for now
11
+ searches = env[:request_body].count("\n") / 2
12
+ # do not allow timeout to exceed Searchkick.timeout
13
+ timeout = [Searchkick.search_timeout * searches, Searchkick.timeout].min
14
+ env[:request][:timeout] = timeout
8
15
  end
9
16
  @app.call(env)
10
17
  end
@@ -4,10 +4,10 @@ module Searchkick
4
4
  options = Searchkick.model_options.merge(options)
5
5
 
6
6
  unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields,
7
- :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :language,
8
- :locations, :mappings, :match, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
7
+ :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :knn, :language,
8
+ :locations, :mappings, :match, :max_result_window, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
9
9
  :special_characters, :stem, :stemmer, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end,
10
- :text_middle, :text_start, :word, :wordnet, :word_end, :word_middle, :word_start]
10
+ :text_middle, :text_start, :unscope, :word, :word_end, :word_middle, :word_start]
11
11
  raise ArgumentError, "unknown keywords: #{unknown_keywords.join(", ")}" if unknown_keywords.any?
12
12
 
13
13
  raise "Only call searchkick once per model" if respond_to?(:searchkick_index)
@@ -22,52 +22,78 @@ module Searchkick
22
22
  raise ArgumentError, "Invalid value for callbacks"
23
23
  end
24
24
 
25
- index_name =
26
- if options[:index_name]
27
- options[:index_name]
28
- elsif options[:index_prefix].respond_to?(:call)
29
- -> { [options[:index_prefix].call, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_") }
30
- else
31
- [options.key?(:index_prefix) ? options[:index_prefix] : Searchkick.index_prefix, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_")
32
- end
25
+ base = self
26
+
27
+ mod = Module.new
28
+ include(mod)
29
+ mod.module_eval do
30
+ def reindex(method_name = nil, mode: nil, refresh: false)
31
+ self.class.searchkick_index.reindex([self], method_name: method_name, mode: mode, refresh: refresh, single: true)
32
+ end unless base.method_defined?(:reindex)
33
+
34
+ def similar(**options)
35
+ self.class.searchkick_index.similar_record(self, **options)
36
+ end unless base.method_defined?(:similar)
37
+
38
+ def search_data
39
+ data = respond_to?(:to_hash) ? to_hash : serializable_hash
40
+ data.delete("id")
41
+ data.delete("_id")
42
+ data.delete("_type")
43
+ data
44
+ end unless base.method_defined?(:search_data)
45
+
46
+ def should_index?
47
+ true
48
+ end unless base.method_defined?(:should_index?)
49
+ end
33
50
 
34
51
  class_eval do
35
- cattr_reader :searchkick_options, :searchkick_klass
52
+ cattr_reader :searchkick_options, :searchkick_klass, instance_reader: false
36
53
 
37
54
  class_variable_set :@@searchkick_options, options.dup
38
55
  class_variable_set :@@searchkick_klass, self
39
- class_variable_set :@@searchkick_index, index_name
40
- class_variable_set :@@searchkick_index_cache, {}
56
+ class_variable_set :@@searchkick_index_cache, Searchkick::IndexCache.new
41
57
 
42
58
  class << self
43
59
  def searchkick_search(term = "*", **options, &block)
44
- # TODO throw error in next major version
45
- Searchkick.warn("calling search on a relation is deprecated") if Searchkick.relation?(self)
60
+ if Searchkick.relation?(self)
61
+ raise Searchkick::Error, "search must be called on model, not relation"
62
+ end
46
63
 
47
64
  Searchkick.search(term, model: self, **options, &block)
48
65
  end
49
66
  alias_method Searchkick.search_method_name, :searchkick_search if Searchkick.search_method_name
50
67
 
51
68
  def searchkick_index(name: nil)
52
- index = name || class_variable_get(:@@searchkick_index)
53
- index = index.call if index.respond_to?(:call)
69
+ index_name = name || searchkick_klass.searchkick_index_name
70
+ index_name = index_name.call if index_name.respond_to?(:call)
54
71
  index_cache = class_variable_get(:@@searchkick_index_cache)
55
- index_cache[index] ||= Searchkick::Index.new(index, searchkick_options)
72
+ index_cache.fetch(index_name) { Searchkick::Index.new(index_name, searchkick_options) }
56
73
  end
57
74
  alias_method :search_index, :searchkick_index unless method_defined?(:search_index)
58
75
 
59
76
  def searchkick_reindex(method_name = nil, **options)
60
- # TODO relation = Searchkick.relation?(self)
61
- relation = (respond_to?(:current_scope) && respond_to?(:default_scoped) && current_scope && current_scope.to_sql != default_scoped.to_sql) ||
62
- (respond_to?(:queryable) && queryable != unscoped.with_default_scope)
63
-
64
- searchkick_index.reindex(searchkick_klass, method_name, scoped: relation, **options)
77
+ searchkick_index.reindex(self, method_name: method_name, **options)
65
78
  end
66
79
  alias_method :reindex, :searchkick_reindex unless method_defined?(:reindex)
67
80
 
68
81
  def searchkick_index_options
69
82
  searchkick_index.index_options
70
83
  end
84
+
85
+ def searchkick_index_name
86
+ @searchkick_index_name ||= begin
87
+ options = class_variable_get(:@@searchkick_options)
88
+ if options[:index_name]
89
+ options[:index_name]
90
+ elsif options[:index_prefix].respond_to?(:call)
91
+ -> { [options[:index_prefix].call, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_") }
92
+ else
93
+ [options.key?(:index_prefix) ? options[:index_prefix] : Searchkick.index_prefix, model_name.plural, Searchkick.env, Searchkick.index_suffix].compact.join("_")
94
+ end
95
+ end
96
+ end
71
97
  end
72
98
 
73
99
  # always add callbacks, even when callbacks is false
@@ -78,33 +104,6 @@ module Searchkick
78
104
  after_save :reindex, if: -> { Searchkick.callbacks?(default: callbacks) }
79
105
  after_destroy :reindex, if: -> { Searchkick.callbacks?(default: callbacks) }
80
106
  end
81
-
82
- def reindex(method_name = nil, **options)
83
- RecordIndexer.new(self).reindex(method_name, **options)
84
- end unless method_defined?(:reindex)
85
-
86
- # TODO switch to keyword arguments
87
- def similar(options = {})
88
- self.class.searchkick_index.similar_record(self, **options)
89
- end unless method_defined?(:similar)
90
-
91
- def search_data
92
- data = respond_to?(:to_hash) ? to_hash : serializable_hash
93
- data.delete("id")
94
- data.delete("_id")
95
- data.delete("_type")
96
- data
97
- end unless method_defined?(:search_data)
98
-
99
- def should_index?
100
- true
101
- end unless method_defined?(:should_index?)
102
-
103
- if defined?(Cequel) && self < Cequel::Record && !method_defined?(:destroyed?)
104
- def destroyed?
105
- transient?
106
- end
107
- end
108
107
  end
109
108
  end
110
109
  end
@@ -3,34 +3,18 @@ module Searchkick
3
3
  queue_as { Searchkick.queue_name }
4
4
 
5
5
  def perform(class_name:, record_ids:, index_name: nil)
6
- # separate routing from id
7
- routing = Hash[record_ids.map { |r| r.split(/(?<!\|)\|(?!\|)/, 2).map { |v| v.gsub("||", "|") } }]
8
- record_ids = routing.keys
6
+ model = Searchkick.load_model(class_name)
7
+ index = model.searchkick_index(name: index_name)
9
8
 
10
- klass = class_name.constantize
11
- scope = Searchkick.load_records(klass, record_ids)
12
- scope = scope.search_import if scope.respond_to?(:search_import)
13
- records = scope.select(&:should_index?)
14
-
15
- # determine which records to delete
16
- delete_ids = record_ids - records.map { |r| r.id.to_s }
17
- delete_records = delete_ids.map do |id|
18
- m = klass.new
19
- m.id = id
20
- if routing[id]
21
- m.define_singleton_method(:search_routing) do
22
- routing[id]
23
- end
9
+ items =
10
+ record_ids.map do |r|
11
+ parts = r.split(/(?<!\|)\|(?!\|)/, 2)
12
+ .map { |v| v.gsub("||", "|") }
13
+ {id: parts[0], routing: parts[1]}
24
14
  end
25
- m
26
- end
27
15
 
28
- # bulk reindex
29
- index = klass.searchkick_index(name: index_name)
30
- Searchkick.callbacks(:bulk) do
31
- index.bulk_index(records) if records.any?
32
- index.bulk_delete(delete_records) if delete_records.any?
33
- end
16
+ relation = Searchkick.scope(model)
17
+ RecordIndexer.new(index).reindex_items(relation, items, method_name: nil)
34
18
  end
35
19
  end
36
20
  end
@@ -3,11 +3,12 @@ module Searchkick
3
3
  queue_as { Searchkick.queue_name }
4
4
 
5
5
  def perform(class_name:, index_name: nil, inline: false)
6
- model = class_name.constantize
6
+ model = Searchkick.load_model(class_name)
7
+ index = model.searchkick_index(name: index_name)
7
8
  limit = model.searchkick_options[:batch_size] || 1000
8
9
 
9
10
  loop do
10
- record_ids = model.searchkick_index(name: index_name).reindex_queue.reserve(limit: limit)
11
+ record_ids = index.reindex_queue.reserve(limit: limit)
11
12
  if record_ids.any?
12
13
  batch_options = {
13
14
  class_name: class_name,