searchkick 4.0.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,16 +2,20 @@ module Searchkick
2
2
  class BulkReindexJob < ActiveJob::Base
3
3
  queue_as { Searchkick.queue_name }
4
4
 
5
+ # TODO remove min_id and max_id in Searchkick 6
5
6
  def perform(class_name:, record_ids: nil, index_name: nil, method_name: nil, batch_id: nil, min_id: nil, max_id: nil)
6
- klass = class_name.constantize
7
- index = index_name ? Searchkick::Index.new(index_name, **klass.searchkick_options) : klass.searchkick_index
7
+ model = Searchkick.load_model(class_name)
8
+ index = model.searchkick_index(name: index_name)
9
+
10
+ # legacy
8
11
  record_ids ||= min_id..max_id
9
- index.import_scope(
10
- Searchkick.load_records(klass, record_ids),
11
- method_name: method_name,
12
- batch: true,
13
- batch_id: batch_id
14
- )
12
+
13
+ relation = Searchkick.scope(model)
14
+ relation = Searchkick.load_records(relation, record_ids)
15
+ relation = relation.search_import if relation.respond_to?(:search_import)
16
+
17
+ RecordIndexer.new(index).reindex(relation, mode: :inline, method_name: method_name, full: false)
18
+ RelationIndexer.new(index).batch_completed(batch_id) if batch_id
15
19
  end
16
20
  end
17
21
  end
@@ -0,0 +1,40 @@
1
+ # based on https://gist.github.com/mnutt/566725
2
+ module Searchkick
3
+ module ControllerRuntime
4
+ extend ActiveSupport::Concern
5
+
6
+ protected
7
+
8
+ attr_internal :searchkick_runtime
9
+
10
+ def process_action(action, *args)
11
+ # We also need to reset the runtime before each action
12
+ # because of queries in middleware or in cases we are streaming
13
+ # and it won't be cleaned up by the method below.
14
+ Searchkick::LogSubscriber.reset_runtime
15
+ super
16
+ end
17
+
18
+ def cleanup_view_runtime
19
+ searchkick_rt_before_render = Searchkick::LogSubscriber.reset_runtime
20
+ runtime = super
21
+ searchkick_rt_after_render = Searchkick::LogSubscriber.reset_runtime
22
+ self.searchkick_runtime = searchkick_rt_before_render + searchkick_rt_after_render
23
+ runtime - searchkick_rt_after_render
24
+ end
25
+
26
+ def append_info_to_payload(payload)
27
+ super
28
+ payload[:searchkick_runtime] = (searchkick_runtime || 0) + Searchkick::LogSubscriber.reset_runtime
29
+ end
30
+
31
+ module ClassMethods
32
+ def log_process_action(payload)
33
+ messages = super
34
+ runtime = payload[:searchkick_runtime]
35
+ messages << ("Searchkick: %.1fms" % runtime.to_f) if runtime.to_f > 0
36
+ messages
37
+ end
38
+ end
39
+ end
40
+ end
@@ -1,9 +1,5 @@
1
- require "searchkick/index_options"
2
-
3
1
  module Searchkick
4
2
  class Index
5
- include IndexOptions
6
-
7
3
  attr_reader :name, :options
8
4
 
9
5
  def initialize(name, options = {})
@@ -12,6 +8,10 @@ module Searchkick
12
8
  @klass_document_type = {} # cache
13
9
  end
14
10
 
11
+ def index_options
12
+ IndexOptions.new(self).index_options
13
+ end
14
+
15
15
  def create(body = {})
16
16
  client.indices.create index: name, body: body
17
17
  end
@@ -38,16 +38,19 @@ module Searchkick
38
38
  client.indices.exists_alias name: name
39
39
  end
40
40
 
41
+ # call to_h for consistent results between elasticsearch gem 7 and 8
42
+ # could do for all API calls, but just do for ones where return value is focus for now
41
43
  def mapping
42
- client.indices.get_mapping index: name
44
+ client.indices.get_mapping(index: name).to_h
43
45
  end
44
46
 
47
+ # call to_h for consistent results between elasticsearch gem 7 and 8
45
48
  def settings
46
- client.indices.get_settings index: name
49
+ client.indices.get_settings(index: name).to_h
47
50
  end
48
51
 
49
52
  def refresh_interval
50
- settings.values.first["settings"]["index"]["refresh_interval"]
53
+ index_settings["refresh_interval"]
51
54
  end
52
55
 
53
56
  def update_settings(settings)
@@ -82,7 +85,8 @@ module Searchkick
82
85
  old_indices =
83
86
  begin
84
87
  client.indices.get_alias(name: name).keys
85
- rescue Elasticsearch::Transport::Transport::Errors::NotFound
88
+ rescue => e
89
+ raise e unless Searchkick.not_found_error?(e)
86
90
  {}
87
91
  end
88
92
  actions = old_indices.map { |old_name| {remove: {index: old_name, alias: name}} } + [{add: {index: new_name, alias: name}}]
@@ -94,7 +98,7 @@ module Searchkick
94
98
  record_data = RecordData.new(self, record).record_data
95
99
 
96
100
  # remove underscore
97
- get_options = Hash[record_data.map { |k, v| [k.to_s.sub(/\A_/, "").to_sym, v] }]
101
+ get_options = record_data.to_h { |k, v| [k.to_s.sub(/\A_/, "").to_sym, v] }
98
102
 
99
103
  client.get(get_options)["_source"]
100
104
  end
@@ -103,11 +107,12 @@ module Searchkick
103
107
  indices =
104
108
  begin
105
109
  if client.indices.respond_to?(:get_alias)
106
- client.indices.get_alias
110
+ client.indices.get_alias(index: "#{name}*")
107
111
  else
108
112
  client.indices.get_aliases
109
113
  end
110
- rescue Elasticsearch::Transport::Transport::Errors::NotFound
114
+ rescue => e
115
+ raise e unless Searchkick.not_found_error?(e)
111
116
  {}
112
117
  end
113
118
  indices = indices.select { |_k, v| v.empty? || v["aliases"].empty? } if unaliased
@@ -123,32 +128,47 @@ module Searchkick
123
128
  indices
124
129
  end
125
130
 
126
- # record based
127
- # use helpers for notifications
128
-
129
131
  def store(record)
130
- bulk_indexer.bulk_index([record])
132
+ notify(record, "Store") do
133
+ queue_index([record])
134
+ end
131
135
  end
132
136
 
133
137
  def remove(record)
134
- bulk_indexer.bulk_delete([record])
138
+ notify(record, "Remove") do
139
+ queue_delete([record])
140
+ end
135
141
  end
136
142
 
137
143
  def update_record(record, method_name)
138
- bulk_indexer.bulk_update([record], method_name)
144
+ notify(record, "Update") do
145
+ queue_update([record], method_name)
146
+ end
139
147
  end
140
148
 
141
149
  def bulk_delete(records)
142
- bulk_indexer.bulk_delete(records)
150
+ return if records.empty?
151
+
152
+ notify_bulk(records, "Delete") do
153
+ queue_delete(records)
154
+ end
143
155
  end
144
156
 
145
157
  def bulk_index(records)
146
- bulk_indexer.bulk_index(records)
158
+ return if records.empty?
159
+
160
+ notify_bulk(records, "Import") do
161
+ queue_index(records)
162
+ end
147
163
  end
148
164
  alias_method :import, :bulk_index
149
165
 
150
166
  def bulk_update(records, method_name)
151
- bulk_indexer.bulk_update(records, method_name)
167
+ return if records.empty?
168
+
169
+ notify_bulk(records, "Update") do
170
+ queue_update(records, method_name)
171
+ end
152
172
  end
153
173
 
154
174
  def search_id(record)
@@ -160,18 +180,25 @@ module Searchkick
160
180
  end
161
181
 
162
182
  def similar_record(record, **options)
163
- like_text = retrieve(record).to_hash
164
- .keep_if { |k, _| !options[:fields] || options[:fields].map(&:to_s).include?(k) }
165
- .values.compact.join(" ")
166
-
167
- options[:where] ||= {}
168
- options[:where][:_id] ||= {}
169
- options[:where][:_id][:not] = Array(options[:where][:_id][:not]) + [record.id.to_s]
170
183
  options[:per_page] ||= 10
171
- options[:similar] = true
184
+ options[:similar] = [RecordData.new(self, record).record_data]
185
+ options[:models] ||= [record.class] unless options.key?(:model)
172
186
 
173
- # TODO use index class instead of record class
174
- Searchkick.search(like_text, model: record.class, **options)
187
+ Searchkick.search("*", **options)
188
+ end
189
+
190
+ def reload_synonyms
191
+ if Searchkick.opensearch?
192
+ client.transport.perform_request "POST", "_plugins/_refresh_search_analyzers/#{CGI.escape(name)}"
193
+ else
194
+ raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0")
195
+ begin
196
+ client.transport.perform_request("GET", "#{CGI.escape(name)}/_reload_search_analyzers")
197
+ rescue => e
198
+ raise Error, "Requires non-OSS version of Elasticsearch" if Searchkick.not_allowed_error?(e)
199
+ raise e
200
+ end
201
+ end
175
202
  end
176
203
 
177
204
  # queue
@@ -182,22 +209,35 @@ module Searchkick
182
209
 
183
210
  # reindex
184
211
 
185
- def reindex(relation, method_name, scoped:, full: false, scope: nil, **options)
212
+ # note: this is designed to be used internally
213
+ # so it does not check object matches index class
214
+ def reindex(object, method_name: nil, full: false, **options)
215
+ if object.is_a?(Array)
216
+ # note: purposefully skip full
217
+ return reindex_records(object, method_name: method_name, **options)
218
+ end
219
+
220
+ if !object.respond_to?(:searchkick_klass)
221
+ raise Error, "Cannot reindex object"
222
+ end
223
+
224
+ scoped = Searchkick.relation?(object)
225
+ # call searchkick_klass for inheritance
226
+ relation = scoped ? object.all : Searchkick.scope(object.searchkick_klass).all
227
+
186
228
  refresh = options.fetch(:refresh, !scoped)
229
+ options.delete(:refresh)
187
230
 
188
- if method_name
189
- # update
190
- import_scope(relation, method_name: method_name, scope: scope)
191
- self.refresh if refresh
192
- true
193
- elsif scoped && !full
194
- # reindex association
195
- import_scope(relation, scope: scope)
231
+ if method_name || (scoped && !full)
232
+ mode = options.delete(:mode) || :inline
233
+ raise ArgumentError, "unsupported keywords: #{options.keys.map(&:inspect).join(", ")}" if options.any?
234
+
235
+ # import only
236
+ import_scope(relation, method_name: method_name, mode: mode)
196
237
  self.refresh if refresh
197
238
  true
198
239
  else
199
- # full reindex
200
- reindex_scope(relation, scope: scope, **options)
240
+ full_reindex(relation, **options)
201
241
  end
202
242
  end
203
243
 
@@ -209,15 +249,14 @@ module Searchkick
209
249
  end
210
250
 
211
251
  def import_scope(relation, **options)
212
- bulk_indexer.import_scope(relation, **options)
252
+ relation_indexer.reindex(relation, **options)
213
253
  end
214
254
 
215
255
  def batches_left
216
- bulk_indexer.batches_left
256
+ relation_indexer.batches_left
217
257
  end
218
258
 
219
- # other
220
-
259
+ # private
221
260
  def klass_document_type(klass, ignore_type = false)
222
261
  @klass_document_type[[klass, ignore_type]] ||= begin
223
262
  if !ignore_type && klass.searchkick_klass.searchkick_options[:_type]
@@ -230,7 +269,7 @@ module Searchkick
230
269
  end
231
270
  end
232
271
 
233
- # should not be public
272
+ # private
234
273
  def conversions_fields
235
274
  @conversions_fields ||= begin
236
275
  conversions = Array(options[:conversions])
@@ -238,10 +277,12 @@ module Searchkick
238
277
  end
239
278
  end
240
279
 
280
+ # private
241
281
  def suggest_fields
242
282
  @suggest_fields ||= Array(options[:suggest]).map(&:to_s)
243
283
  end
244
284
 
285
+ # private
245
286
  def locations_fields
246
287
  @locations_fields ||= begin
247
288
  locations = Array(options[:locations])
@@ -249,19 +290,54 @@ module Searchkick
249
290
  end
250
291
  end
251
292
 
293
+ # private
294
+ def uuid
295
+ index_settings["uuid"]
296
+ end
297
+
252
298
  protected
253
299
 
254
300
  def client
255
301
  Searchkick.client
256
302
  end
257
303
 
258
- def bulk_indexer
259
- @bulk_indexer ||= BulkIndexer.new(self)
304
+ def queue_index(records)
305
+ Searchkick.indexer.queue(records.map { |r| RecordData.new(self, r).index_data })
306
+ end
307
+
308
+ def queue_delete(records)
309
+ Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| RecordData.new(self, r).delete_data })
310
+ end
311
+
312
+ def queue_update(records, method_name)
313
+ Searchkick.indexer.queue(records.map { |r| RecordData.new(self, r).update_data(method_name) })
314
+ end
315
+
316
+ def relation_indexer
317
+ @relation_indexer ||= RelationIndexer.new(self)
318
+ end
319
+
320
+ def index_settings
321
+ settings.values.first["settings"]["index"]
322
+ end
323
+
324
+ def import_before_promotion(index, relation, **import_options)
325
+ index.import_scope(relation, **import_options)
326
+ end
327
+
328
+ def reindex_records(object, mode: nil, refresh: false, **options)
329
+ mode ||= Searchkick.callbacks_value || @options[:callbacks] || true
330
+ mode = :inline if mode == :bulk
331
+
332
+ result = RecordIndexer.new(self).reindex(object, mode: mode, full: false, **options)
333
+ self.refresh if refresh
334
+ result
260
335
  end
261
336
 
262
337
  # https://gist.github.com/jarosan/3124884
263
338
  # http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
264
- def reindex_scope(relation, import: true, resume: false, retain: false, async: false, refresh_interval: nil, scope: nil)
339
+ # TODO deprecate async in favor of mode: :async, wait: true/false
340
+ def full_reindex(relation, import: true, resume: false, retain: false, async: false, refresh_interval: nil, scope: nil)
265
341
  if resume
266
342
  index_name = all_indices.sort.last
267
343
  raise Searchkick::Error, "No index to resume" unless index_name
@@ -275,20 +351,22 @@ module Searchkick
275
351
  end
276
352
 
277
353
  import_options = {
278
- resume: resume,
279
- async: async,
354
+ mode: (async ? :async : :inline),
280
355
  full: true,
356
+ resume: resume,
281
357
  scope: scope
282
358
  }
283
359
 
360
+ uuid = index.uuid
361
+
284
362
  # check if alias exists
285
363
  alias_exists = alias_exists?
286
364
  if alias_exists
287
- # import before promotion
288
- index.import_scope(relation, **import_options) if import
365
+ import_before_promotion(index, relation, **import_options) if import
289
366
 
290
367
  # get existing indices to remove
291
368
  unless async
369
+ check_uuid(uuid, index.uuid)
292
370
  promote(index.name, update_refresh_interval: !refresh_interval.nil?)
293
371
  clean_indices unless retain
294
372
  end
@@ -313,6 +391,7 @@ module Searchkick
313
391
  # already promoted if alias didn't exist
314
392
  if alias_exists
315
393
  puts "Jobs complete. Promoting..."
394
+ check_uuid(uuid, index.uuid)
316
395
  promote(index.name, update_refresh_interval: !refresh_interval.nil?)
317
396
  end
318
397
  clean_indices unless retain
@@ -324,12 +403,51 @@ module Searchkick
324
403
  index.refresh
325
404
  true
326
405
  end
327
- rescue Elasticsearch::Transport::Transport::Errors::BadRequest => e
328
- if e.message.include?("No handler for type [text]")
329
- raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 5 or greater"
406
+ rescue => e
407
+ if Searchkick.transport_error?(e) && e.message.include?("No handler for type [text]")
408
+ raise UnsupportedVersionError
330
409
  end
331
410
 
332
411
  raise e
333
412
  end
413
+
414
+ # safety check
415
+ # still a chance for race condition since its called before promotion
416
+ # ideal is for user to disable automatic index creation
417
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#index-creation
418
+ def check_uuid(old_uuid, new_uuid)
419
+ if old_uuid != new_uuid
420
+ raise Searchkick::Error, "Safety check failed - only run one Model.reindex per model at a time"
421
+ end
422
+ end
423
+
424
+ def notify(record, name)
425
+ if Searchkick.callbacks_value == :bulk
426
+ yield
427
+ else
428
+ name = "#{record.class.searchkick_klass.name} #{name}" if record && record.class.searchkick_klass
429
+ event = {
430
+ name: name,
431
+ id: search_id(record)
432
+ }
433
+ ActiveSupport::Notifications.instrument("request.searchkick", event) do
434
+ yield
435
+ end
436
+ end
437
+ end
438
+
439
+ def notify_bulk(records, name)
440
+ if Searchkick.callbacks_value == :bulk
441
+ yield
442
+ else
443
+ event = {
444
+ name: "#{records.first.class.searchkick_klass.name} #{name}",
445
+ count: records.size
446
+ }
447
+ ActiveSupport::Notifications.instrument("request.searchkick", event) do
448
+ yield
449
+ end
450
+ end
451
+ end
334
452
  end
335
453
  end
@@ -0,0 +1,30 @@
1
+ module Searchkick
2
+ class IndexCache
3
+ def initialize(max_size: 20)
4
+ @data = {}
5
+ @mutex = Mutex.new
6
+ @max_size = max_size
7
+ end
8
+
9
+ # probably a better pattern for this
10
+ # but keep it simple
11
+ def fetch(name)
12
+ # thread-safe in MRI without mutex
13
+ # due to how context switching works
14
+ @mutex.synchronize do
15
+ if @data.key?(name)
16
+ @data[name]
17
+ else
18
+ @data.clear if @data.size >= @max_size
19
+ @data[name] = yield
20
+ end
21
+ end
22
+ end
23
+
24
+ def clear
25
+ @mutex.synchronize do
26
+ @data.clear
27
+ end
28
+ end
29
+ end
30
+ end