searchkick 5.5.2 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,20 +1,18 @@
1
1
  module Searchkick
2
- class BulkReindexJob < ActiveJob::Base
2
+ class BulkReindexJob < Searchkick.parent_job.constantize
3
3
  queue_as { Searchkick.queue_name }
4
4
 
5
- # TODO remove min_id and max_id in Searchkick 6
6
- def perform(class_name:, record_ids: nil, index_name: nil, method_name: nil, batch_id: nil, min_id: nil, max_id: nil)
5
+ def perform(class_name:, record_ids: nil, index_name: nil, method_name: nil, batch_id: nil, min_id: nil, max_id: nil, ignore_missing: nil)
7
6
  model = Searchkick.load_model(class_name)
8
7
  index = model.searchkick_index(name: index_name)
9
8
 
10
- # legacy
11
9
  record_ids ||= min_id..max_id
12
10
 
13
11
  relation = Searchkick.scope(model)
14
12
  relation = Searchkick.load_records(relation, record_ids)
15
13
  relation = relation.search_import if relation.respond_to?(:search_import)
16
14
 
17
- RecordIndexer.new(index).reindex(relation, mode: :inline, method_name: method_name, full: false)
15
+ RecordIndexer.new(index).reindex(relation, mode: :inline, method_name: method_name, ignore_missing: ignore_missing, full: false)
18
16
  RelationIndexer.new(index).batch_completed(batch_id) if batch_id
19
17
  end
20
18
  end
@@ -1,12 +1,33 @@
1
1
  module Searchkick
2
- # Subclass of `Hashie::Mash` to wrap Hash-like structures
3
- # (responses from Elasticsearch)
4
- #
5
- # The primary goal of the subclass is to disable the
6
- # warning being printed by Hashie for re-defined
7
- # methods, such as `sort`.
8
- #
9
- class HashWrapper < ::Hashie::Mash
10
- disable_warnings if respond_to?(:disable_warnings)
2
+ class HashWrapper
3
+ def initialize(attributes)
4
+ @attributes = attributes
5
+ end
6
+
7
+ def [](name)
8
+ @attributes[name.to_s]
9
+ end
10
+
11
+ def to_h
12
+ @attributes
13
+ end
14
+
15
+ def method_missing(name, ...)
16
+ if @attributes.key?(name.to_s)
17
+ self[name]
18
+ else
19
+ super
20
+ end
21
+ end
22
+
23
+ def respond_to_missing?(name, ...)
24
+ @attributes.key?(name.to_s) || super
25
+ end
26
+
27
+ def inspect
28
+ attributes = @attributes.reject { |k, v| k[0] == "_" }.map { |k, v| "#{k}: #{v.inspect}" }
29
+ attributes.unshift(attributes.pop) # move id to start
30
+ "#<#{self.class.name} #{attributes.join(", ")}>"
31
+ end
11
32
  end
12
33
  end
@@ -164,11 +164,11 @@ module Searchkick
164
164
  end
165
165
  alias_method :import, :bulk_index
166
166
 
167
- def bulk_update(records, method_name)
167
+ def bulk_update(records, method_name, ignore_missing: nil)
168
168
  return if records.empty?
169
169
 
170
170
  notify_bulk(records, "Update") do
171
- queue_update(records, method_name)
171
+ queue_update(records, method_name, ignore_missing: ignore_missing)
172
172
  end
173
173
  end
174
174
 
@@ -192,7 +192,6 @@ module Searchkick
192
192
  if Searchkick.opensearch?
193
193
  client.transport.perform_request "POST", "_plugins/_refresh_search_analyzers/#{CGI.escape(name)}"
194
194
  else
195
- raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0")
196
195
  begin
197
196
  client.transport.perform_request("GET", "#{CGI.escape(name)}/_reload_search_analyzers")
198
197
  rescue => e
@@ -212,10 +211,14 @@ module Searchkick
212
211
 
213
212
  # note: this is designed to be used internally
214
213
  # so it does not check object matches index class
215
- def reindex(object, method_name: nil, full: false, **options)
214
+ def reindex(object, method_name: nil, ignore_missing: nil, full: false, **options)
215
+ if @options[:job_options]
216
+ options[:job_options] = (@options[:job_options] || {}).merge(options[:job_options] || {})
217
+ end
218
+
216
219
  if object.is_a?(Array)
217
220
  # note: purposefully skip full
218
- return reindex_records(object, method_name: method_name, **options)
221
+ return reindex_records(object, method_name: method_name, ignore_missing: ignore_missing, **options)
219
222
  end
220
223
 
221
224
  if !object.respond_to?(:searchkick_klass)
@@ -232,22 +235,21 @@ module Searchkick
232
235
  if method_name || (scoped && !full)
233
236
  mode = options.delete(:mode) || :inline
234
237
  scope = options.delete(:scope)
238
+ job_options = options.delete(:job_options)
235
239
  raise ArgumentError, "unsupported keywords: #{options.keys.map(&:inspect).join(", ")}" if options.any?
236
240
 
237
241
  # import only
238
- import_scope(relation, method_name: method_name, mode: mode, scope: scope)
242
+ import_scope(relation, method_name: method_name, mode: mode, scope: scope, ignore_missing: ignore_missing, job_options: job_options)
239
243
  self.refresh if refresh
240
244
  true
241
245
  else
242
246
  async = options.delete(:async)
243
247
  if async
244
248
  if async.is_a?(Hash) && async[:wait]
245
- # TODO warn in 5.1
246
- # Searchkick.warn "async option is deprecated - use mode: :async, wait: true instead"
249
+ Searchkick.warn "async option is deprecated - use mode: :async, wait: true instead"
247
250
  options[:wait] = true unless options.key?(:wait)
248
251
  else
249
- # TODO warn in 5.1
250
- # Searchkick.warn "async option is deprecated - use mode: :async instead"
252
+ Searchkick.warn "async option is deprecated - use mode: :async instead"
251
253
  end
252
254
  options[:mode] ||= :async
253
255
  end
@@ -292,6 +294,11 @@ module Searchkick
292
294
  end
293
295
  end
294
296
 
297
+ # private
298
+ def conversions_v2_fields
299
+ @conversions_v2_fields ||= Array(options[:conversions_v2]).map(&:to_s)
300
+ end
301
+
295
302
  # private
296
303
  def suggest_fields
297
304
  @suggest_fields ||= Array(options[:suggest]).map(&:to_s)
@@ -324,8 +331,10 @@ module Searchkick
324
331
  Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| RecordData.new(self, r).delete_data })
325
332
  end
326
333
 
327
- def queue_update(records, method_name)
328
- Searchkick.indexer.queue(records.map { |r| RecordData.new(self, r).update_data(method_name) })
334
+ def queue_update(records, method_name, ignore_missing:)
335
+ items = records.map { |r| RecordData.new(self, r).update_data(method_name) }
336
+ items.each { |i| i.instance_variable_set(:@ignore_missing, true) } if ignore_missing
337
+ Searchkick.indexer.queue(items)
329
338
  end
330
339
 
331
340
  def relation_indexer
@@ -351,10 +360,9 @@ module Searchkick
351
360
 
352
361
  # https://gist.github.com/jarosan/3124884
353
362
  # http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
354
- def full_reindex(relation, import: true, resume: false, retain: false, mode: nil, refresh_interval: nil, scope: nil, wait: nil)
363
+ def full_reindex(relation, import: true, resume: false, retain: false, mode: nil, refresh_interval: nil, scope: nil, wait: nil, job_options: nil)
355
364
  raise ArgumentError, "wait only available in :async mode" if !wait.nil? && mode != :async
356
- # TODO raise ArgumentError in Searchkick 6
357
- Searchkick.warn("Full reindex does not support :queue mode - use :async mode instead") if mode == :queue
365
+ raise ArgumentError, "Full reindex does not support :queue mode - use :async mode instead" if mode == :queue
358
366
 
359
367
  if resume
360
368
  index_name = all_indices.sort.last
@@ -372,7 +380,8 @@ module Searchkick
372
380
  mode: (mode || :inline),
373
381
  full: true,
374
382
  resume: resume,
375
- scope: scope
383
+ scope: scope,
384
+ job_options: job_options
376
385
  }
377
386
 
378
387
  uuid = index.uuid
@@ -184,12 +184,6 @@ module Searchkick
184
184
  end
185
185
  end
186
186
 
187
- if options[:case_sensitive]
188
- settings[:analysis][:analyzer].each do |_, analyzer|
189
- analyzer[:filter].delete("lowercase")
190
- end
191
- end
192
-
193
187
  add_synonyms(settings)
194
188
  add_search_synonyms(settings)
195
189
 
@@ -199,6 +193,12 @@ module Searchkick
199
193
  end
200
194
  end
201
195
 
196
+ if options[:case_sensitive]
197
+ settings[:analysis][:analyzer].each do |_, analyzer|
198
+ analyzer[:filter].delete("lowercase")
199
+ end
200
+ end
201
+
202
202
  settings
203
203
  end
204
204
 
@@ -372,6 +372,16 @@ module Searchkick
372
372
  }
373
373
  end
374
374
 
375
+ Array(options[:conversions_v2]).each do |conversions_field|
376
+ mapping[conversions_field] = {
377
+ type: "rank_features"
378
+ }
379
+ end
380
+
381
+ if (Array(options[:conversions_v2]).map(&:to_s) & Array(options[:conversions]).map(&:to_s)).any?
382
+ raise ArgumentError, "Must have separate conversions fields"
383
+ end
384
+
375
385
  mapping_options =
376
386
  [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
377
387
  .to_h { |type| [type, (options[type] || []).map(&:to_s)] }
@@ -423,6 +433,7 @@ module Searchkick
423
433
 
424
434
  (options[:knn] || []).each do |field, knn_options|
425
435
  distance = knn_options[:distance]
436
+ quantization = knn_options[:quantization]
426
437
 
427
438
  if Searchkick.opensearch?
428
439
  if distance.nil?
@@ -448,6 +459,10 @@ module Searchkick
448
459
  raise ArgumentError, "Unknown distance: #{distance}"
449
460
  end
450
461
 
462
+ if !quantization.nil?
463
+ raise ArgumentError, "Quantization not supported yet for OpenSearch"
464
+ end
465
+
451
466
  vector_options[:method] = {
452
467
  name: "hnsw",
453
468
  space_type: space_type,
@@ -477,13 +492,18 @@ module Searchkick
477
492
  raise ArgumentError, "Unknown distance: #{distance}"
478
493
  end
479
494
 
495
+ type =
496
+ case quantization
497
+ when "int8", "int4", "bbq"
498
+ "#{quantization}_hnsw"
499
+ when nil
500
+ "hnsw"
501
+ else
502
+ raise ArgumentError, "Unknown quantization: #{quantization}"
503
+ end
504
+
480
505
  vector_index_options = knn_options.slice(:m, :ef_construction)
481
- if vector_index_options.any?
482
- # TODO no quantization by default in Searchkick 6
483
- # int8_hnsw was made the default in Elasticsearch 8.14.0
484
- type = Searchkick.server_below?("8.14.0") ? "hnsw" : "int8_hnsw"
485
- vector_options[:index_options] = {type: type}.merge(vector_index_options)
486
- end
506
+ vector_options[:index_options] = {type: type}.merge(vector_index_options)
487
507
  end
488
508
 
489
509
  mapping[field.to_s] = vector_options
@@ -578,9 +598,9 @@ module Searchkick
578
598
  if search_synonyms.is_a?(String)
579
599
  synonym_graph = {
580
600
  type: "synonym_graph",
581
- synonyms_path: search_synonyms
601
+ synonyms_path: search_synonyms,
602
+ updateable: true
582
603
  }
583
- synonym_graph[:updateable] = true unless below73?
584
604
  else
585
605
  synonym_graph = {
586
606
  type: "synonym_graph",
@@ -628,9 +648,5 @@ module Searchkick
628
648
  def default_analyzer
629
649
  :searchkick_index
630
650
  end
631
-
632
- def below73?
633
- Searchkick.server_below?("7.3.0")
634
- end
635
651
  end
636
652
  end
@@ -24,12 +24,20 @@ module Searchkick
24
24
  # note: delete does not set error when item not found
25
25
  first_with_error = response["items"].map do |item|
26
26
  (item["index"] || item["delete"] || item["update"])
27
- end.find { |item| item["error"] }
28
- raise ImportError, "#{first_with_error["error"]} on item with id '#{first_with_error["_id"]}'"
27
+ end.find.with_index { |item, i| item["error"] && !ignore_missing?(items[i], item["error"]) }
28
+ if first_with_error
29
+ raise ImportError, "#{first_with_error["error"]} on item with id '#{first_with_error["_id"]}'"
30
+ end
29
31
  end
30
32
 
31
33
  # maybe return response in future
32
34
  nil
33
35
  end
36
+
37
+ private
38
+
39
+ def ignore_missing?(item, error)
40
+ error["type"] == "document_missing_exception" && item.instance_variable_defined?(:@ignore_missing)
41
+ end
34
42
  end
35
43
  end
@@ -24,7 +24,7 @@ module Searchkick
24
24
 
25
25
  index = payload[:query][:index].is_a?(Array) ? payload[:query][:index].join(",") : payload[:query][:index]
26
26
  type = payload[:query][:type]
27
- request_params = payload[:query].except(:index, :type, :body)
27
+ request_params = payload[:query].except(:index, :type, :body, :opaque_id)
28
28
 
29
29
  params = []
30
30
  request_params.each do |k, v|
@@ -1,10 +1,18 @@
1
1
  module Searchkick
2
2
  module Model
3
3
  def searchkick(**options)
4
- options = Searchkick.model_options.merge(options)
4
+ options = Searchkick.model_options.deep_merge(options)
5
5
 
6
- unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :case_sensitive, :conversions, :deep_paging, :default_fields,
7
- :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :knn, :language,
6
+ if options[:conversions]
7
+ Searchkick.warn("The `conversions` option is deprecated in favor of `conversions_v2`, which provides much better search performance. Upgrade to `conversions_v2` or rename `conversions` to `conversions_v1`")
8
+ end
9
+
10
+ if options.key?(:conversions_v1)
11
+ options[:conversions] = options.delete(:conversions_v1)
12
+ end
13
+
14
+ unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :callback_options, :case_sensitive, :conversions, :conversions_v2, :deep_paging, :default_fields,
15
+ :filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :job_options, :knn, :language,
8
16
  :locations, :mappings, :match, :max_result_window, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
9
17
  :special_characters, :stem, :stemmer, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end,
10
18
  :text_middle, :text_start, :unscope, :word, :word_end, :word_middle, :word_start]
@@ -21,14 +29,16 @@ module Searchkick
21
29
  unless [:inline, true, false, :async, :queue].include?(callbacks)
22
30
  raise ArgumentError, "Invalid value for callbacks"
23
31
  end
32
+ callback_options = (options[:callback_options] || {}).dup
33
+ callback_options[:if] = [-> { Searchkick.callbacks?(default: callbacks) }, callback_options[:if]].compact.flatten(1)
24
34
 
25
35
  base = self
26
36
 
27
37
  mod = Module.new
28
38
  include(mod)
29
39
  mod.module_eval do
30
- def reindex(method_name = nil, mode: nil, refresh: false)
31
- self.class.searchkick_index.reindex([self], method_name: method_name, mode: mode, refresh: refresh, single: true)
40
+ def reindex(method_name = nil, mode: nil, refresh: false, ignore_missing: nil, job_options: nil)
41
+ self.class.searchkick_index.reindex([self], method_name: method_name, mode: mode, refresh: refresh, ignore_missing: ignore_missing, job_options: job_options, single: true)
32
42
  end unless base.method_defined?(:reindex)
33
43
 
34
44
  def similar(**options)
@@ -99,10 +109,10 @@ module Searchkick
99
109
  # always add callbacks, even when callbacks is false
100
110
  # so Model.callbacks block can be used
101
111
  if respond_to?(:after_commit)
102
- after_commit :reindex, if: -> { Searchkick.callbacks?(default: callbacks) }
112
+ after_commit :reindex, **callback_options
103
113
  elsif respond_to?(:after_save)
104
- after_save :reindex, if: -> { Searchkick.callbacks?(default: callbacks) }
105
- after_destroy :reindex, if: -> { Searchkick.callbacks?(default: callbacks) }
114
+ after_save :reindex, **callback_options
115
+ after_destroy :reindex, **callback_options
106
116
  end
107
117
  end
108
118
  end
@@ -2,8 +2,9 @@ module Searchkick
2
2
  class MultiSearch
3
3
  attr_reader :queries
4
4
 
5
- def initialize(queries)
5
+ def initialize(queries, opaque_id: nil)
6
6
  @queries = queries
7
+ @opaque_id = opaque_id
7
8
  end
8
9
 
9
10
  def perform
@@ -15,7 +16,11 @@ module Searchkick
15
16
  private
16
17
 
17
18
  def perform_search(search_queries, perform_retry: true)
18
- responses = client.msearch(body: search_queries.flat_map { |q| [q.params.except(:body), q.body] })["responses"]
19
+ params = {
20
+ body: search_queries.flat_map { |q| [q.params.except(:body), q.body] }
21
+ }
22
+ params[:opaque_id] = @opaque_id if @opaque_id
23
+ responses = client.msearch(params)["responses"]
19
24
 
20
25
  retry_queries = []
21
26
  search_queries.each_with_index do |query, i|
@@ -1,5 +1,5 @@
1
1
  module Searchkick
2
- class ProcessBatchJob < ActiveJob::Base
2
+ class ProcessBatchJob < Searchkick.parent_job.constantize
3
3
  queue_as { Searchkick.queue_name }
4
4
 
5
5
  def perform(class_name:, record_ids:, index_name: nil)
@@ -14,7 +14,7 @@ module Searchkick
14
14
  end
15
15
 
16
16
  relation = Searchkick.scope(model)
17
- RecordIndexer.new(index).reindex_items(relation, items, method_name: nil)
17
+ RecordIndexer.new(index).reindex_items(relation, items, method_name: nil, ignore_missing: nil)
18
18
  end
19
19
  end
20
20
  end
@@ -1,11 +1,12 @@
1
1
  module Searchkick
2
- class ProcessQueueJob < ActiveJob::Base
2
+ class ProcessQueueJob < Searchkick.parent_job.constantize
3
3
  queue_as { Searchkick.queue_name }
4
4
 
5
- def perform(class_name:, index_name: nil, inline: false)
5
+ def perform(class_name:, index_name: nil, inline: false, job_options: nil)
6
6
  model = Searchkick.load_model(class_name)
7
7
  index = model.searchkick_index(name: index_name)
8
8
  limit = model.searchkick_options[:batch_size] || 1000
9
+ job_options = (model.searchkick_options[:job_options] || {}).merge(job_options || {})
9
10
 
10
11
  loop do
11
12
  record_ids = index.reindex_queue.reserve(limit: limit)
@@ -20,7 +21,7 @@ module Searchkick
20
21
  # use new.perform to avoid excessive logging
21
22
  Searchkick::ProcessBatchJob.new.perform(**batch_options)
22
23
  else
23
- Searchkick::ProcessBatchJob.perform_later(**batch_options)
24
+ Searchkick::ProcessBatchJob.set(job_options).perform_later(**batch_options)
24
25
  end
25
26
 
26
27
  # TODO when moving to reliable queuing, mark as complete