searchkick 5.5.2 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/README.md +249 -209
- data/lib/searchkick/bulk_reindex_job.rb +3 -5
- data/lib/searchkick/hash_wrapper.rb +30 -9
- data/lib/searchkick/index.rb +25 -16
- data/lib/searchkick/index_options.rb +34 -18
- data/lib/searchkick/indexer.rb +10 -2
- data/lib/searchkick/log_subscriber.rb +1 -1
- data/lib/searchkick/model.rb +18 -8
- data/lib/searchkick/multi_search.rb +7 -2
- data/lib/searchkick/process_batch_job.rb +2 -2
- data/lib/searchkick/process_queue_job.rb +4 -3
- data/lib/searchkick/query.rb +90 -100
- data/lib/searchkick/record_data.rb +19 -0
- data/lib/searchkick/record_indexer.rb +20 -10
- data/lib/searchkick/reindex_queue.rb +1 -24
- data/lib/searchkick/reindex_v2_job.rb +3 -3
- data/lib/searchkick/relation.rb +504 -72
- data/lib/searchkick/relation_indexer.rb +39 -10
- data/lib/searchkick/results.rb +14 -9
- data/lib/searchkick/version.rb +1 -1
- data/lib/searchkick.rb +12 -31
- metadata +4 -18
@@ -1,20 +1,18 @@
|
|
1
1
|
module Searchkick
|
2
|
-
class BulkReindexJob <
|
2
|
+
class BulkReindexJob < Searchkick.parent_job.constantize
|
3
3
|
queue_as { Searchkick.queue_name }
|
4
4
|
|
5
|
-
|
6
|
-
def perform(class_name:, record_ids: nil, index_name: nil, method_name: nil, batch_id: nil, min_id: nil, max_id: nil)
|
5
|
+
def perform(class_name:, record_ids: nil, index_name: nil, method_name: nil, batch_id: nil, min_id: nil, max_id: nil, ignore_missing: nil)
|
7
6
|
model = Searchkick.load_model(class_name)
|
8
7
|
index = model.searchkick_index(name: index_name)
|
9
8
|
|
10
|
-
# legacy
|
11
9
|
record_ids ||= min_id..max_id
|
12
10
|
|
13
11
|
relation = Searchkick.scope(model)
|
14
12
|
relation = Searchkick.load_records(relation, record_ids)
|
15
13
|
relation = relation.search_import if relation.respond_to?(:search_import)
|
16
14
|
|
17
|
-
RecordIndexer.new(index).reindex(relation, mode: :inline, method_name: method_name, full: false)
|
15
|
+
RecordIndexer.new(index).reindex(relation, mode: :inline, method_name: method_name, ignore_missing: ignore_missing, full: false)
|
18
16
|
RelationIndexer.new(index).batch_completed(batch_id) if batch_id
|
19
17
|
end
|
20
18
|
end
|
@@ -1,12 +1,33 @@
|
|
1
1
|
module Searchkick
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
2
|
+
class HashWrapper
|
3
|
+
def initialize(attributes)
|
4
|
+
@attributes = attributes
|
5
|
+
end
|
6
|
+
|
7
|
+
def [](name)
|
8
|
+
@attributes[name.to_s]
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_h
|
12
|
+
@attributes
|
13
|
+
end
|
14
|
+
|
15
|
+
def method_missing(name, ...)
|
16
|
+
if @attributes.key?(name.to_s)
|
17
|
+
self[name]
|
18
|
+
else
|
19
|
+
super
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def respond_to_missing?(name, ...)
|
24
|
+
@attributes.key?(name.to_s) || super
|
25
|
+
end
|
26
|
+
|
27
|
+
def inspect
|
28
|
+
attributes = @attributes.reject { |k, v| k[0] == "_" }.map { |k, v| "#{k}: #{v.inspect}" }
|
29
|
+
attributes.unshift(attributes.pop) # move id to start
|
30
|
+
"#<#{self.class.name} #{attributes.join(", ")}>"
|
31
|
+
end
|
11
32
|
end
|
12
33
|
end
|
data/lib/searchkick/index.rb
CHANGED
@@ -164,11 +164,11 @@ module Searchkick
|
|
164
164
|
end
|
165
165
|
alias_method :import, :bulk_index
|
166
166
|
|
167
|
-
def bulk_update(records, method_name)
|
167
|
+
def bulk_update(records, method_name, ignore_missing: nil)
|
168
168
|
return if records.empty?
|
169
169
|
|
170
170
|
notify_bulk(records, "Update") do
|
171
|
-
queue_update(records, method_name)
|
171
|
+
queue_update(records, method_name, ignore_missing: ignore_missing)
|
172
172
|
end
|
173
173
|
end
|
174
174
|
|
@@ -192,7 +192,6 @@ module Searchkick
|
|
192
192
|
if Searchkick.opensearch?
|
193
193
|
client.transport.perform_request "POST", "_plugins/_refresh_search_analyzers/#{CGI.escape(name)}"
|
194
194
|
else
|
195
|
-
raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0")
|
196
195
|
begin
|
197
196
|
client.transport.perform_request("GET", "#{CGI.escape(name)}/_reload_search_analyzers")
|
198
197
|
rescue => e
|
@@ -212,10 +211,14 @@ module Searchkick
|
|
212
211
|
|
213
212
|
# note: this is designed to be used internally
|
214
213
|
# so it does not check object matches index class
|
215
|
-
def reindex(object, method_name: nil, full: false, **options)
|
214
|
+
def reindex(object, method_name: nil, ignore_missing: nil, full: false, **options)
|
215
|
+
if @options[:job_options]
|
216
|
+
options[:job_options] = (@options[:job_options] || {}).merge(options[:job_options] || {})
|
217
|
+
end
|
218
|
+
|
216
219
|
if object.is_a?(Array)
|
217
220
|
# note: purposefully skip full
|
218
|
-
return reindex_records(object, method_name: method_name, **options)
|
221
|
+
return reindex_records(object, method_name: method_name, ignore_missing: ignore_missing, **options)
|
219
222
|
end
|
220
223
|
|
221
224
|
if !object.respond_to?(:searchkick_klass)
|
@@ -232,22 +235,21 @@ module Searchkick
|
|
232
235
|
if method_name || (scoped && !full)
|
233
236
|
mode = options.delete(:mode) || :inline
|
234
237
|
scope = options.delete(:scope)
|
238
|
+
job_options = options.delete(:job_options)
|
235
239
|
raise ArgumentError, "unsupported keywords: #{options.keys.map(&:inspect).join(", ")}" if options.any?
|
236
240
|
|
237
241
|
# import only
|
238
|
-
import_scope(relation, method_name: method_name, mode: mode, scope: scope)
|
242
|
+
import_scope(relation, method_name: method_name, mode: mode, scope: scope, ignore_missing: ignore_missing, job_options: job_options)
|
239
243
|
self.refresh if refresh
|
240
244
|
true
|
241
245
|
else
|
242
246
|
async = options.delete(:async)
|
243
247
|
if async
|
244
248
|
if async.is_a?(Hash) && async[:wait]
|
245
|
-
|
246
|
-
# Searchkick.warn "async option is deprecated - use mode: :async, wait: true instead"
|
249
|
+
Searchkick.warn "async option is deprecated - use mode: :async, wait: true instead"
|
247
250
|
options[:wait] = true unless options.key?(:wait)
|
248
251
|
else
|
249
|
-
|
250
|
-
# Searchkick.warn "async option is deprecated - use mode: :async instead"
|
252
|
+
Searchkick.warn "async option is deprecated - use mode: :async instead"
|
251
253
|
end
|
252
254
|
options[:mode] ||= :async
|
253
255
|
end
|
@@ -292,6 +294,11 @@ module Searchkick
|
|
292
294
|
end
|
293
295
|
end
|
294
296
|
|
297
|
+
# private
|
298
|
+
def conversions_v2_fields
|
299
|
+
@conversions_v2_fields ||= Array(options[:conversions_v2]).map(&:to_s)
|
300
|
+
end
|
301
|
+
|
295
302
|
# private
|
296
303
|
def suggest_fields
|
297
304
|
@suggest_fields ||= Array(options[:suggest]).map(&:to_s)
|
@@ -324,8 +331,10 @@ module Searchkick
|
|
324
331
|
Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| RecordData.new(self, r).delete_data })
|
325
332
|
end
|
326
333
|
|
327
|
-
def queue_update(records, method_name)
|
328
|
-
|
334
|
+
def queue_update(records, method_name, ignore_missing:)
|
335
|
+
items = records.map { |r| RecordData.new(self, r).update_data(method_name) }
|
336
|
+
items.each { |i| i.instance_variable_set(:@ignore_missing, true) } if ignore_missing
|
337
|
+
Searchkick.indexer.queue(items)
|
329
338
|
end
|
330
339
|
|
331
340
|
def relation_indexer
|
@@ -351,10 +360,9 @@ module Searchkick
|
|
351
360
|
|
352
361
|
# https://gist.github.com/jarosan/3124884
|
353
362
|
# http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
|
354
|
-
def full_reindex(relation, import: true, resume: false, retain: false, mode: nil, refresh_interval: nil, scope: nil, wait: nil)
|
363
|
+
def full_reindex(relation, import: true, resume: false, retain: false, mode: nil, refresh_interval: nil, scope: nil, wait: nil, job_options: nil)
|
355
364
|
raise ArgumentError, "wait only available in :async mode" if !wait.nil? && mode != :async
|
356
|
-
|
357
|
-
Searchkick.warn("Full reindex does not support :queue mode - use :async mode instead") if mode == :queue
|
365
|
+
raise ArgumentError, "Full reindex does not support :queue mode - use :async mode instead" if mode == :queue
|
358
366
|
|
359
367
|
if resume
|
360
368
|
index_name = all_indices.sort.last
|
@@ -372,7 +380,8 @@ module Searchkick
|
|
372
380
|
mode: (mode || :inline),
|
373
381
|
full: true,
|
374
382
|
resume: resume,
|
375
|
-
scope: scope
|
383
|
+
scope: scope,
|
384
|
+
job_options: job_options
|
376
385
|
}
|
377
386
|
|
378
387
|
uuid = index.uuid
|
@@ -184,12 +184,6 @@ module Searchkick
|
|
184
184
|
end
|
185
185
|
end
|
186
186
|
|
187
|
-
if options[:case_sensitive]
|
188
|
-
settings[:analysis][:analyzer].each do |_, analyzer|
|
189
|
-
analyzer[:filter].delete("lowercase")
|
190
|
-
end
|
191
|
-
end
|
192
|
-
|
193
187
|
add_synonyms(settings)
|
194
188
|
add_search_synonyms(settings)
|
195
189
|
|
@@ -199,6 +193,12 @@ module Searchkick
|
|
199
193
|
end
|
200
194
|
end
|
201
195
|
|
196
|
+
if options[:case_sensitive]
|
197
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
198
|
+
analyzer[:filter].delete("lowercase")
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
202
|
settings
|
203
203
|
end
|
204
204
|
|
@@ -372,6 +372,16 @@ module Searchkick
|
|
372
372
|
}
|
373
373
|
end
|
374
374
|
|
375
|
+
Array(options[:conversions_v2]).each do |conversions_field|
|
376
|
+
mapping[conversions_field] = {
|
377
|
+
type: "rank_features"
|
378
|
+
}
|
379
|
+
end
|
380
|
+
|
381
|
+
if (Array(options[:conversions_v2]).map(&:to_s) & Array(options[:conversions]).map(&:to_s)).any?
|
382
|
+
raise ArgumentError, "Must have separate conversions fields"
|
383
|
+
end
|
384
|
+
|
375
385
|
mapping_options =
|
376
386
|
[:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
|
377
387
|
.to_h { |type| [type, (options[type] || []).map(&:to_s)] }
|
@@ -423,6 +433,7 @@ module Searchkick
|
|
423
433
|
|
424
434
|
(options[:knn] || []).each do |field, knn_options|
|
425
435
|
distance = knn_options[:distance]
|
436
|
+
quantization = knn_options[:quantization]
|
426
437
|
|
427
438
|
if Searchkick.opensearch?
|
428
439
|
if distance.nil?
|
@@ -448,6 +459,10 @@ module Searchkick
|
|
448
459
|
raise ArgumentError, "Unknown distance: #{distance}"
|
449
460
|
end
|
450
461
|
|
462
|
+
if !quantization.nil?
|
463
|
+
raise ArgumentError, "Quantization not supported yet for OpenSearch"
|
464
|
+
end
|
465
|
+
|
451
466
|
vector_options[:method] = {
|
452
467
|
name: "hnsw",
|
453
468
|
space_type: space_type,
|
@@ -477,13 +492,18 @@ module Searchkick
|
|
477
492
|
raise ArgumentError, "Unknown distance: #{distance}"
|
478
493
|
end
|
479
494
|
|
495
|
+
type =
|
496
|
+
case quantization
|
497
|
+
when "int8", "int4", "bbq"
|
498
|
+
"#{quantization}_hnsw"
|
499
|
+
when nil
|
500
|
+
"hnsw"
|
501
|
+
else
|
502
|
+
raise ArgumentError, "Unknown quantization: #{quantization}"
|
503
|
+
end
|
504
|
+
|
480
505
|
vector_index_options = knn_options.slice(:m, :ef_construction)
|
481
|
-
|
482
|
-
# TODO no quantization by default in Searchkick 6
|
483
|
-
# int8_hnsw was made the default in Elasticsearch 8.14.0
|
484
|
-
type = Searchkick.server_below?("8.14.0") ? "hnsw" : "int8_hnsw"
|
485
|
-
vector_options[:index_options] = {type: type}.merge(vector_index_options)
|
486
|
-
end
|
506
|
+
vector_options[:index_options] = {type: type}.merge(vector_index_options)
|
487
507
|
end
|
488
508
|
|
489
509
|
mapping[field.to_s] = vector_options
|
@@ -578,9 +598,9 @@ module Searchkick
|
|
578
598
|
if search_synonyms.is_a?(String)
|
579
599
|
synonym_graph = {
|
580
600
|
type: "synonym_graph",
|
581
|
-
synonyms_path: search_synonyms
|
601
|
+
synonyms_path: search_synonyms,
|
602
|
+
updateable: true
|
582
603
|
}
|
583
|
-
synonym_graph[:updateable] = true unless below73?
|
584
604
|
else
|
585
605
|
synonym_graph = {
|
586
606
|
type: "synonym_graph",
|
@@ -628,9 +648,5 @@ module Searchkick
|
|
628
648
|
def default_analyzer
|
629
649
|
:searchkick_index
|
630
650
|
end
|
631
|
-
|
632
|
-
def below73?
|
633
|
-
Searchkick.server_below?("7.3.0")
|
634
|
-
end
|
635
651
|
end
|
636
652
|
end
|
data/lib/searchkick/indexer.rb
CHANGED
@@ -24,12 +24,20 @@ module Searchkick
|
|
24
24
|
# note: delete does not set error when item not found
|
25
25
|
first_with_error = response["items"].map do |item|
|
26
26
|
(item["index"] || item["delete"] || item["update"])
|
27
|
-
end.find { |item| item["error"] }
|
28
|
-
|
27
|
+
end.find.with_index { |item, i| item["error"] && !ignore_missing?(items[i], item["error"]) }
|
28
|
+
if first_with_error
|
29
|
+
raise ImportError, "#{first_with_error["error"]} on item with id '#{first_with_error["_id"]}'"
|
30
|
+
end
|
29
31
|
end
|
30
32
|
|
31
33
|
# maybe return response in future
|
32
34
|
nil
|
33
35
|
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def ignore_missing?(item, error)
|
40
|
+
error["type"] == "document_missing_exception" && item.instance_variable_defined?(:@ignore_missing)
|
41
|
+
end
|
34
42
|
end
|
35
43
|
end
|
@@ -24,7 +24,7 @@ module Searchkick
|
|
24
24
|
|
25
25
|
index = payload[:query][:index].is_a?(Array) ? payload[:query][:index].join(",") : payload[:query][:index]
|
26
26
|
type = payload[:query][:type]
|
27
|
-
request_params = payload[:query].except(:index, :type, :body)
|
27
|
+
request_params = payload[:query].except(:index, :type, :body, :opaque_id)
|
28
28
|
|
29
29
|
params = []
|
30
30
|
request_params.each do |k, v|
|
data/lib/searchkick/model.rb
CHANGED
@@ -1,10 +1,18 @@
|
|
1
1
|
module Searchkick
|
2
2
|
module Model
|
3
3
|
def searchkick(**options)
|
4
|
-
options = Searchkick.model_options.
|
4
|
+
options = Searchkick.model_options.deep_merge(options)
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
if options[:conversions]
|
7
|
+
Searchkick.warn("The `conversions` option is deprecated in favor of `conversions_v2`, which provides much better search performance. Upgrade to `conversions_v2` or rename `conversions` to `conversions_v1`")
|
8
|
+
end
|
9
|
+
|
10
|
+
if options.key?(:conversions_v1)
|
11
|
+
options[:conversions] = options.delete(:conversions_v1)
|
12
|
+
end
|
13
|
+
|
14
|
+
unknown_keywords = options.keys - [:_all, :_type, :batch_size, :callbacks, :callback_options, :case_sensitive, :conversions, :conversions_v2, :deep_paging, :default_fields,
|
15
|
+
:filterable, :geo_shape, :highlight, :ignore_above, :index_name, :index_prefix, :inheritance, :job_options, :knn, :language,
|
8
16
|
:locations, :mappings, :match, :max_result_window, :merge_mappings, :routing, :searchable, :search_synonyms, :settings, :similarity,
|
9
17
|
:special_characters, :stem, :stemmer, :stem_conversions, :stem_exclusion, :stemmer_override, :suggest, :synonyms, :text_end,
|
10
18
|
:text_middle, :text_start, :unscope, :word, :word_end, :word_middle, :word_start]
|
@@ -21,14 +29,16 @@ module Searchkick
|
|
21
29
|
unless [:inline, true, false, :async, :queue].include?(callbacks)
|
22
30
|
raise ArgumentError, "Invalid value for callbacks"
|
23
31
|
end
|
32
|
+
callback_options = (options[:callback_options] || {}).dup
|
33
|
+
callback_options[:if] = [-> { Searchkick.callbacks?(default: callbacks) }, callback_options[:if]].compact.flatten(1)
|
24
34
|
|
25
35
|
base = self
|
26
36
|
|
27
37
|
mod = Module.new
|
28
38
|
include(mod)
|
29
39
|
mod.module_eval do
|
30
|
-
def reindex(method_name = nil, mode: nil, refresh: false)
|
31
|
-
self.class.searchkick_index.reindex([self], method_name: method_name, mode: mode, refresh: refresh, single: true)
|
40
|
+
def reindex(method_name = nil, mode: nil, refresh: false, ignore_missing: nil, job_options: nil)
|
41
|
+
self.class.searchkick_index.reindex([self], method_name: method_name, mode: mode, refresh: refresh, ignore_missing: ignore_missing, job_options: job_options, single: true)
|
32
42
|
end unless base.method_defined?(:reindex)
|
33
43
|
|
34
44
|
def similar(**options)
|
@@ -99,10 +109,10 @@ module Searchkick
|
|
99
109
|
# always add callbacks, even when callbacks is false
|
100
110
|
# so Model.callbacks block can be used
|
101
111
|
if respond_to?(:after_commit)
|
102
|
-
after_commit :reindex,
|
112
|
+
after_commit :reindex, **callback_options
|
103
113
|
elsif respond_to?(:after_save)
|
104
|
-
after_save :reindex,
|
105
|
-
after_destroy :reindex,
|
114
|
+
after_save :reindex, **callback_options
|
115
|
+
after_destroy :reindex, **callback_options
|
106
116
|
end
|
107
117
|
end
|
108
118
|
end
|
@@ -2,8 +2,9 @@ module Searchkick
|
|
2
2
|
class MultiSearch
|
3
3
|
attr_reader :queries
|
4
4
|
|
5
|
-
def initialize(queries)
|
5
|
+
def initialize(queries, opaque_id: nil)
|
6
6
|
@queries = queries
|
7
|
+
@opaque_id = opaque_id
|
7
8
|
end
|
8
9
|
|
9
10
|
def perform
|
@@ -15,7 +16,11 @@ module Searchkick
|
|
15
16
|
private
|
16
17
|
|
17
18
|
def perform_search(search_queries, perform_retry: true)
|
18
|
-
|
19
|
+
params = {
|
20
|
+
body: search_queries.flat_map { |q| [q.params.except(:body), q.body] }
|
21
|
+
}
|
22
|
+
params[:opaque_id] = @opaque_id if @opaque_id
|
23
|
+
responses = client.msearch(params)["responses"]
|
19
24
|
|
20
25
|
retry_queries = []
|
21
26
|
search_queries.each_with_index do |query, i|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Searchkick
|
2
|
-
class ProcessBatchJob <
|
2
|
+
class ProcessBatchJob < Searchkick.parent_job.constantize
|
3
3
|
queue_as { Searchkick.queue_name }
|
4
4
|
|
5
5
|
def perform(class_name:, record_ids:, index_name: nil)
|
@@ -14,7 +14,7 @@ module Searchkick
|
|
14
14
|
end
|
15
15
|
|
16
16
|
relation = Searchkick.scope(model)
|
17
|
-
RecordIndexer.new(index).reindex_items(relation, items, method_name: nil)
|
17
|
+
RecordIndexer.new(index).reindex_items(relation, items, method_name: nil, ignore_missing: nil)
|
18
18
|
end
|
19
19
|
end
|
20
20
|
end
|
@@ -1,11 +1,12 @@
|
|
1
1
|
module Searchkick
|
2
|
-
class ProcessQueueJob <
|
2
|
+
class ProcessQueueJob < Searchkick.parent_job.constantize
|
3
3
|
queue_as { Searchkick.queue_name }
|
4
4
|
|
5
|
-
def perform(class_name:, index_name: nil, inline: false)
|
5
|
+
def perform(class_name:, index_name: nil, inline: false, job_options: nil)
|
6
6
|
model = Searchkick.load_model(class_name)
|
7
7
|
index = model.searchkick_index(name: index_name)
|
8
8
|
limit = model.searchkick_options[:batch_size] || 1000
|
9
|
+
job_options = (model.searchkick_options[:job_options] || {}).merge(job_options || {})
|
9
10
|
|
10
11
|
loop do
|
11
12
|
record_ids = index.reindex_queue.reserve(limit: limit)
|
@@ -20,7 +21,7 @@ module Searchkick
|
|
20
21
|
# use new.perform to avoid excessive logging
|
21
22
|
Searchkick::ProcessBatchJob.new.perform(**batch_options)
|
22
23
|
else
|
23
|
-
Searchkick::ProcessBatchJob.perform_later(**batch_options)
|
24
|
+
Searchkick::ProcessBatchJob.set(job_options).perform_later(**batch_options)
|
24
25
|
end
|
25
26
|
|
26
27
|
# TODO when moving to reliable queuing, mark as complete
|