searchkick 4.6.3 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,3 @@
1
- require "searchkick/index_options"
2
-
3
1
  module Searchkick
4
2
  class Index
5
3
  attr_reader :name, :options
@@ -40,12 +38,15 @@ module Searchkick
40
38
  client.indices.exists_alias name: name
41
39
  end
42
40
 
41
+ # call to_h for consistent results between elasticsearch gem 7 and 8
42
+ # could do for all API calls, but just do for ones where return value is focus for now
43
43
  def mapping
44
- client.indices.get_mapping index: name
44
+ client.indices.get_mapping(index: name).to_h
45
45
  end
46
46
 
47
+ # call to_h for consistent results between elasticsearch gem 7 and 8
47
48
  def settings
48
- client.indices.get_settings index: name
49
+ client.indices.get_settings(index: name).to_h
49
50
  end
50
51
 
51
52
  def refresh_interval
@@ -97,7 +98,7 @@ module Searchkick
97
98
  record_data = RecordData.new(self, record).record_data
98
99
 
99
100
  # remove underscore
100
- get_options = Hash[record_data.map { |k, v| [k.to_s.sub(/\A_/, "").to_sym, v] }]
101
+ get_options = record_data.to_h { |k, v| [k.to_s.sub(/\A_/, "").to_sym, v] }
101
102
 
102
103
  client.get(get_options)["_source"]
103
104
  end
@@ -127,32 +128,47 @@ module Searchkick
127
128
  indices
128
129
  end
129
130
 
130
- # record based
131
- # use helpers for notifications
132
-
133
131
  def store(record)
134
- bulk_indexer.bulk_index([record])
132
+ notify(record, "Store") do
133
+ queue_index([record])
134
+ end
135
135
  end
136
136
 
137
137
  def remove(record)
138
- bulk_indexer.bulk_delete([record])
138
+ notify(record, "Remove") do
139
+ queue_delete([record])
140
+ end
139
141
  end
140
142
 
141
143
  def update_record(record, method_name)
142
- bulk_indexer.bulk_update([record], method_name)
144
+ notify(record, "Update") do
145
+ queue_update([record], method_name)
146
+ end
143
147
  end
144
148
 
145
149
  def bulk_delete(records)
146
- bulk_indexer.bulk_delete(records)
150
+ return if records.empty?
151
+
152
+ notify_bulk(records, "Delete") do
153
+ queue_delete(records)
154
+ end
147
155
  end
148
156
 
149
157
  def bulk_index(records)
150
- bulk_indexer.bulk_index(records)
158
+ return if records.empty?
159
+
160
+ notify_bulk(records, "Import") do
161
+ queue_index(records)
162
+ end
151
163
  end
152
164
  alias_method :import, :bulk_index
153
165
 
154
166
  def bulk_update(records, method_name)
155
- bulk_indexer.bulk_update(records, method_name)
167
+ return if records.empty?
168
+
169
+ notify_bulk(records, "Update") do
170
+ queue_update(records, method_name)
171
+ end
156
172
  end
157
173
 
158
174
  def search_id(record)
@@ -163,20 +179,12 @@ module Searchkick
163
179
  RecordData.new(self, record).document_type
164
180
  end
165
181
 
166
- # TODO use like: [{_index: ..., _id: ...}] in Searchkick 5
167
182
  def similar_record(record, **options)
168
- like_text = retrieve(record).to_hash
169
- .keep_if { |k, _| !options[:fields] || options[:fields].map(&:to_s).include?(k) }
170
- .values.compact.join(" ")
171
-
172
- options[:where] ||= {}
173
- options[:where][:_id] ||= {}
174
- options[:where][:_id][:not] = Array(options[:where][:_id][:not]) + [record.id.to_s]
175
183
  options[:per_page] ||= 10
176
- options[:similar] = true
184
+ options[:similar] = [RecordData.new(self, record).record_data]
185
+ options[:models] ||= [record.class] unless options.key?(:model)
177
186
 
178
- # TODO use index class instead of record class
179
- Searchkick.search(like_text, model: record.class, **options)
187
+ Searchkick.search("*", **options)
180
188
  end
181
189
 
182
190
  def reload_synonyms
@@ -186,8 +194,9 @@ module Searchkick
186
194
  raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0")
187
195
  begin
188
196
  client.transport.perform_request("GET", "#{CGI.escape(name)}/_reload_search_analyzers")
189
- rescue Elasticsearch::Transport::Transport::Errors::MethodNotAllowed
190
- raise Error, "Requires non-OSS version of Elasticsearch"
197
+ rescue => e
198
+ raise Error, "Requires non-OSS version of Elasticsearch" if Searchkick.not_allowed_error?(e)
199
+ raise e
191
200
  end
192
201
  end
193
202
  end
@@ -200,29 +209,35 @@ module Searchkick
200
209
 
201
210
  # reindex
202
211
 
203
- def reindex(relation, method_name, scoped:, full: false, scope: nil, **options)
212
+ # note: this is designed to be used internally
213
+ # so it does not check object matches index class
214
+ def reindex(object, method_name: nil, full: false, **options)
215
+ if object.is_a?(Array)
216
+ # note: purposefully skip full
217
+ return reindex_records(object, method_name: method_name, **options)
218
+ end
219
+
220
+ if !object.respond_to?(:searchkick_klass)
221
+ raise Error, "Cannot reindex object"
222
+ end
223
+
224
+ scoped = Searchkick.relation?(object)
225
+ # call searchkick_klass for inheritance
226
+ relation = scoped ? object.all : Searchkick.scope(object.searchkick_klass).all
227
+
204
228
  refresh = options.fetch(:refresh, !scoped)
205
229
  options.delete(:refresh)
206
230
 
207
- if method_name
208
- # TODO throw ArgumentError
209
- Searchkick.warn("unsupported keywords: #{options.keys.map(&:inspect).join(", ")}") if options.any?
231
+ if method_name || (scoped && !full)
232
+ mode = options.delete(:mode) || :inline
233
+ raise ArgumentError, "unsupported keywords: #{options.keys.map(&:inspect).join(", ")}" if options.any?
210
234
 
211
- # update
212
- import_scope(relation, method_name: method_name, scope: scope)
213
- self.refresh if refresh
214
- true
215
- elsif scoped && !full
216
- # TODO throw ArgumentError
217
- Searchkick.warn("unsupported keywords: #{options.keys.map(&:inspect).join(", ")}") if options.any?
218
-
219
- # reindex association
220
- import_scope(relation, scope: scope)
235
+ # import only
236
+ import_scope(relation, method_name: method_name, mode: mode)
221
237
  self.refresh if refresh
222
238
  true
223
239
  else
224
- # full reindex
225
- reindex_scope(relation, scope: scope, **options)
240
+ full_reindex(relation, **options)
226
241
  end
227
242
  end
228
243
 
@@ -234,15 +249,14 @@ module Searchkick
234
249
  end
235
250
 
236
251
  def import_scope(relation, **options)
237
- bulk_indexer.import_scope(relation, **options)
252
+ relation_indexer.reindex(relation, **options)
238
253
  end
239
254
 
240
255
  def batches_left
241
- bulk_indexer.batches_left
256
+ relation_indexer.batches_left
242
257
  end
243
258
 
244
- # other
245
-
259
+ # private
246
260
  def klass_document_type(klass, ignore_type = false)
247
261
  @klass_document_type[[klass, ignore_type]] ||= begin
248
262
  if !ignore_type && klass.searchkick_klass.searchkick_options[:_type]
@@ -255,7 +269,7 @@ module Searchkick
255
269
  end
256
270
  end
257
271
 
258
- # should not be public
272
+ # private
259
273
  def conversions_fields
260
274
  @conversions_fields ||= begin
261
275
  conversions = Array(options[:conversions])
@@ -263,10 +277,12 @@ module Searchkick
263
277
  end
264
278
  end
265
279
 
280
+ # private
266
281
  def suggest_fields
267
282
  @suggest_fields ||= Array(options[:suggest]).map(&:to_s)
268
283
  end
269
284
 
285
+ # private
270
286
  def locations_fields
271
287
  @locations_fields ||= begin
272
288
  locations = Array(options[:locations])
@@ -285,8 +301,20 @@ module Searchkick
285
301
  Searchkick.client
286
302
  end
287
303
 
288
- def bulk_indexer
289
- @bulk_indexer ||= BulkIndexer.new(self)
304
+ def queue_index(records)
305
+ Searchkick.indexer.queue(records.map { |r| RecordData.new(self, r).index_data })
306
+ end
307
+
308
+ def queue_delete(records)
309
+ Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| RecordData.new(self, r).delete_data })
310
+ end
311
+
312
+ def queue_update(records, method_name)
313
+ Searchkick.indexer.queue(records.map { |r| RecordData.new(self, r).update_data(method_name) })
314
+ end
315
+
316
+ def relation_indexer
317
+ @relation_indexer ||= RelationIndexer.new(self)
290
318
  end
291
319
 
292
320
  def index_settings
@@ -297,9 +325,19 @@ module Searchkick
297
325
  index.import_scope(relation, **import_options)
298
326
  end
299
327
 
328
+ def reindex_records(object, mode: nil, refresh: false, **options)
329
+ mode ||= Searchkick.callbacks_value || @options[:callbacks] || true
330
+ mode = :inline if mode == :bulk
331
+
332
+ result = RecordIndexer.new(self).reindex(object, mode: mode, full: false, **options)
333
+ self.refresh if refresh
334
+ result
335
+ end
336
+
300
337
  # https://gist.github.com/jarosan/3124884
301
338
  # http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
302
- def reindex_scope(relation, import: true, resume: false, retain: false, async: false, refresh_interval: nil, scope: nil)
339
+ # TODO deprecate async in favor of mode: :async, wait: true/false
340
+ def full_reindex(relation, import: true, resume: false, retain: false, async: false, refresh_interval: nil, scope: nil)
303
341
  if resume
304
342
  index_name = all_indices.sort.last
305
343
  raise Searchkick::Error, "No index to resume" unless index_name
@@ -313,9 +351,9 @@ module Searchkick
313
351
  end
314
352
 
315
353
  import_options = {
316
- resume: resume,
317
- async: async,
354
+ mode: (async ? :async : :inline),
318
355
  full: true,
356
+ resume: resume,
319
357
  scope: scope
320
358
  }
321
359
 
@@ -367,7 +405,7 @@ module Searchkick
367
405
  end
368
406
  rescue => e
369
407
  if Searchkick.transport_error?(e) && e.message.include?("No handler for type [text]")
370
- raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 6 or greater"
408
+ raise UnsupportedVersionError
371
409
  end
372
410
 
373
411
  raise e
@@ -382,5 +420,34 @@ module Searchkick
382
420
  raise Searchkick::Error, "Safety check failed - only run one Model.reindex per model at a time"
383
421
  end
384
422
  end
423
+
424
+ def notify(record, name)
425
+ if Searchkick.callbacks_value == :bulk
426
+ yield
427
+ else
428
+ name = "#{record.class.searchkick_klass.name} #{name}" if record && record.class.searchkick_klass
429
+ event = {
430
+ name: name,
431
+ id: search_id(record)
432
+ }
433
+ ActiveSupport::Notifications.instrument("request.searchkick", event) do
434
+ yield
435
+ end
436
+ end
437
+ end
438
+
439
+ def notify_bulk(records, name)
440
+ if Searchkick.callbacks_value == :bulk
441
+ yield
442
+ else
443
+ event = {
444
+ name: "#{records.first.class.searchkick_klass.name} #{name}",
445
+ count: records.size
446
+ }
447
+ ActiveSupport::Notifications.instrument("request.searchkick", event) do
448
+ yield
449
+ end
450
+ end
451
+ end
385
452
  end
386
453
  end
@@ -0,0 +1,30 @@
1
+ module Searchkick
2
+ class IndexCache
3
+ def initialize(max_size: 20)
4
+ @data = {}
5
+ @mutex = Mutex.new
6
+ @max_size = max_size
7
+ end
8
+
9
+ # probably a better pattern for this
10
+ # but keep it simple
11
+ def fetch(name)
12
+ # thread-safe in MRI without mutex
13
+ # due to how context switching works
14
+ @mutex.synchronize do
15
+ if @data.key?(name)
16
+ @data[name]
17
+ else
18
+ @data.clear if @data.size >= @max_size
19
+ @data[name] = yield
20
+ end
21
+ end
22
+ end
23
+
24
+ def clear
25
+ @mutex.synchronize do
26
+ @data.clear
27
+ end
28
+ end
29
+ end
30
+ end
@@ -7,18 +7,16 @@ module Searchkick
7
7
  end
8
8
 
9
9
  def index_options
10
- custom_mapping = options[:mappings] || {}
11
- if below70? && custom_mapping.keys.map(&:to_sym).include?(:properties)
12
- # add type
13
- custom_mapping = {index_type => custom_mapping}
14
- end
10
+ # mortal symbols are garbage collected in Ruby 2.2+
11
+ custom_settings = (options[:settings] || {}).deep_symbolize_keys
12
+ custom_mappings = (options[:mappings] || {}).deep_symbolize_keys
15
13
 
16
14
  if options[:mappings] && !options[:merge_mappings]
17
- settings = options[:settings] || {}
18
- mappings = custom_mapping
15
+ settings = custom_settings
16
+ mappings = custom_mappings
19
17
  else
20
- settings = generate_settings
21
- mappings = generate_mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
18
+ settings = generate_settings.deep_symbolize_keys.deep_merge(custom_settings)
19
+ mappings = generate_mappings.deep_symbolize_keys.deep_merge(custom_mappings)
22
20
  end
23
21
 
24
22
  set_deep_paging(settings) if options[:deep_paging]
@@ -162,17 +160,14 @@ module Searchkick
162
160
  settings[:number_of_replicas] = 0
163
161
  end
164
162
 
165
- # TODO remove in Searchkick 5 (classic no longer supported)
166
163
  if options[:similarity]
167
164
  settings[:similarity] = {default: {type: options[:similarity]}}
168
165
  end
169
166
 
170
- unless below62?
171
- settings[:index] = {
172
- max_ngram_diff: 49,
173
- max_shingle_diff: 4
174
- }
175
- end
167
+ settings[:index] = {
168
+ max_ngram_diff: 49,
169
+ max_shingle_diff: 4
170
+ }
176
171
 
177
172
  if options[:case_sensitive]
178
173
  settings[:analysis][:analyzer].each do |_, analyzer|
@@ -180,13 +175,8 @@ module Searchkick
180
175
  end
181
176
  end
182
177
 
183
- # TODO do this last in Searchkick 5
184
- settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
185
-
186
178
  add_synonyms(settings)
187
179
  add_search_synonyms(settings)
188
- # TODO remove in Searchkick 5
189
- add_wordnet(settings) if options[:wordnet]
190
180
 
191
181
  if options[:special_characters] == false
192
182
  settings[:analysis][:analyzer].each_value do |analyzer_settings|
@@ -223,19 +213,7 @@ module Searchkick
223
213
  type: "smartcn"
224
214
  }
225
215
  )
226
- when "japanese"
227
- settings[:analysis][:analyzer].merge!(
228
- default_analyzer => {
229
- type: "kuromoji"
230
- },
231
- searchkick_search: {
232
- type: "kuromoji"
233
- },
234
- searchkick_search2: {
235
- type: "kuromoji"
236
- }
237
- )
238
- when "japanese2"
216
+ when "japanese", "japanese2"
239
217
  analyzer = {
240
218
  type: "custom",
241
219
  tokenizer: "kuromoji_tokenizer",
@@ -379,16 +357,15 @@ module Searchkick
379
357
  }
380
358
  end
381
359
 
382
- mapping_options = Hash[
360
+ mapping_options =
383
361
  [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
384
- .map { |type| [type, (options[type] || []).map(&:to_s)] }
385
- ]
362
+ .to_h { |type| [type, (options[type] || []).map(&:to_s)] }
386
363
 
387
364
  word = options[:word] != false && (!options[:match] || options[:match] == :word)
388
365
 
389
366
  mapping_options[:searchable].delete("_all")
390
367
 
391
- analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer}
368
+ analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer.to_s}
392
369
 
393
370
  mapping_options.values.flatten.uniq.each do |field|
394
371
  fields = {}
@@ -481,10 +458,6 @@ module Searchkick
481
458
  ]
482
459
  }
483
460
 
484
- if below70?
485
- mappings = {index_type => mappings}
486
- end
487
-
488
461
  mappings
489
462
  end
490
463
 
@@ -533,7 +506,7 @@ module Searchkick
533
506
  end
534
507
  settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
535
508
 
536
- if options[:language] == "japanese2"
509
+ if ["japanese", "japanese2"].include?(options[:language])
537
510
  [:searchkick_search, :searchkick_search2].each do |analyzer|
538
511
  settings[:analysis][:analyzer][analyzer][:filter].insert(4, "searchkick_synonym_graph")
539
512
  end
@@ -549,21 +522,6 @@ module Searchkick
549
522
  end
550
523
  end
551
524
 
552
- def add_wordnet(settings)
553
- settings[:analysis][:filter][:searchkick_wordnet] = {
554
- type: "synonym",
555
- format: "wordnet",
556
- synonyms_path: Searchkick.wordnet_path
557
- }
558
-
559
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
560
- settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
561
-
562
- %w(word_start word_middle word_end).each do |type|
563
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
564
- end
565
- end
566
-
567
525
  def set_deep_paging(settings)
568
526
  if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
569
527
  settings[:index] ||= {}
@@ -587,14 +545,6 @@ module Searchkick
587
545
  :searchkick_index
588
546
  end
589
547
 
590
- def below62?
591
- Searchkick.server_below?("6.2.0")
592
- end
593
-
594
- def below70?
595
- Searchkick.server_below?("7.0.0")
596
- end
597
-
598
548
  def below73?
599
549
  Searchkick.server_below?("7.3.0")
600
550
  end
@@ -1,3 +1,5 @@
1
+ # thread-local (technically fiber-local) indexer
2
+ # used to aggregate bulk callbacks across models
1
3
  module Searchkick
2
4
  class Indexer
3
5
  attr_reader :queued_items
@@ -14,15 +16,20 @@ module Searchkick
14
16
  def perform
15
17
  items = @queued_items
16
18
  @queued_items = []
17
- if items.any?
18
- response = Searchkick.client.bulk(body: items)
19
- if response["errors"]
20
- first_with_error = response["items"].map do |item|
21
- (item["index"] || item["delete"] || item["update"])
22
- end.find { |item| item["error"] }
23
- raise Searchkick::ImportError, "#{first_with_error["error"]} on item with id '#{first_with_error["_id"]}'"
24
- end
19
+
20
+ return if items.empty?
21
+
22
+ response = Searchkick.client.bulk(body: items)
23
+ if response["errors"]
24
+ # note: delete does not set error when item not found
25
+ first_with_error = response["items"].map do |item|
26
+ (item["index"] || item["delete"] || item["update"])
27
+ end.find { |item| item["error"] }
28
+ raise ImportError, "#{first_with_error["error"]} on item with id '#{first_with_error["_id"]}'"
25
29
  end
30
+
31
+ # maybe return response in future
32
+ nil
26
33
  end
27
34
  end
28
35
  end
@@ -0,0 +1,57 @@
1
+ # based on https://gist.github.com/mnutt/566725
2
+ module Searchkick
3
+ class LogSubscriber < ActiveSupport::LogSubscriber
4
+ def self.runtime=(value)
5
+ Thread.current[:searchkick_runtime] = value
6
+ end
7
+
8
+ def self.runtime
9
+ Thread.current[:searchkick_runtime] ||= 0
10
+ end
11
+
12
+ def self.reset_runtime
13
+ rt = runtime
14
+ self.runtime = 0
15
+ rt
16
+ end
17
+
18
+ def search(event)
19
+ self.class.runtime += event.duration
20
+ return unless logger.debug?
21
+
22
+ payload = event.payload
23
+ name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
24
+
25
+ index = payload[:query][:index].is_a?(Array) ? payload[:query][:index].join(",") : payload[:query][:index]
26
+ type = payload[:query][:type]
27
+ request_params = payload[:query].except(:index, :type, :body)
28
+
29
+ params = []
30
+ request_params.each do |k, v|
31
+ params << "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
32
+ end
33
+
34
+ debug " #{color(name, YELLOW, true)} #{index}#{type ? "/#{type.join(',')}" : ''}/_search#{params.any? ? '?' + params.join('&') : nil} #{payload[:query][:body].to_json}"
35
+ end
36
+
37
+ def request(event)
38
+ self.class.runtime += event.duration
39
+ return unless logger.debug?
40
+
41
+ payload = event.payload
42
+ name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
43
+
44
+ debug " #{color(name, YELLOW, true)} #{payload.except(:name).to_json}"
45
+ end
46
+
47
+ def multi_search(event)
48
+ self.class.runtime += event.duration
49
+ return unless logger.debug?
50
+
51
+ payload = event.payload
52
+ name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
53
+
54
+ debug " #{color(name, YELLOW, true)} _msearch #{payload[:body]}"
55
+ end
56
+ end
57
+ end
@@ -1,4 +1,4 @@
1
- require "faraday/middleware"
1
+ require "faraday"
2
2
 
3
3
  module Searchkick
4
4
  class Middleware < Faraday::Middleware