searchkick 2.5.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE.md +7 -0
  3. data/.travis.yml +2 -11
  4. data/CHANGELOG.md +22 -0
  5. data/CONTRIBUTING.md +1 -1
  6. data/Gemfile +3 -3
  7. data/LICENSE.txt +1 -1
  8. data/README.md +68 -141
  9. data/Rakefile +0 -4
  10. data/benchmark/Gemfile +3 -2
  11. data/benchmark/{benchmark.rb → index.rb} +33 -31
  12. data/benchmark/search.rb +48 -0
  13. data/docs/Searchkick-3-Upgrade.md +57 -0
  14. data/lib/searchkick.rb +50 -27
  15. data/lib/searchkick/bulk_indexer.rb +168 -0
  16. data/lib/searchkick/bulk_reindex_job.rb +1 -1
  17. data/lib/searchkick/index.rb +122 -348
  18. data/lib/searchkick/index_options.rb +29 -26
  19. data/lib/searchkick/logging.rb +8 -7
  20. data/lib/searchkick/model.rb +37 -90
  21. data/lib/searchkick/multi_search.rb +6 -7
  22. data/lib/searchkick/query.rb +169 -166
  23. data/lib/searchkick/record_data.rb +133 -0
  24. data/lib/searchkick/record_indexer.rb +55 -0
  25. data/lib/searchkick/reindex_queue.rb +1 -1
  26. data/lib/searchkick/reindex_v2_job.rb +10 -13
  27. data/lib/searchkick/results.rb +14 -25
  28. data/lib/searchkick/tasks.rb +0 -4
  29. data/lib/searchkick/version.rb +1 -1
  30. data/searchkick.gemspec +3 -3
  31. data/test/boost_test.rb +3 -9
  32. data/test/geo_shape_test.rb +0 -4
  33. data/test/highlight_test.rb +28 -12
  34. data/test/index_test.rb +9 -10
  35. data/test/language_test.rb +16 -0
  36. data/test/marshal_test.rb +6 -1
  37. data/test/match_test.rb +9 -4
  38. data/test/model_test.rb +3 -5
  39. data/test/multi_search_test.rb +0 -7
  40. data/test/order_test.rb +1 -7
  41. data/test/pagination_test.rb +1 -1
  42. data/test/reindex_v2_job_test.rb +6 -11
  43. data/test/routing_test.rb +1 -1
  44. data/test/similar_test.rb +2 -2
  45. data/test/sql_test.rb +0 -31
  46. data/test/test_helper.rb +37 -23
  47. metadata +19 -26
  48. data/test/gemfiles/activerecord31.gemfile +0 -7
  49. data/test/gemfiles/activerecord32.gemfile +0 -7
  50. data/test/gemfiles/activerecord40.gemfile +0 -8
  51. data/test/gemfiles/activerecord41.gemfile +0 -8
  52. data/test/gemfiles/mongoid2.gemfile +0 -7
  53. data/test/gemfiles/mongoid3.gemfile +0 -6
  54. data/test/gemfiles/mongoid4.gemfile +0 -7
  55. data/test/records_test.rb +0 -10
@@ -4,7 +4,7 @@ module Searchkick
4
4
 
5
5
  def perform(class_name:, record_ids: nil, index_name: nil, method_name: nil, batch_id: nil, min_id: nil, max_id: nil)
6
6
  klass = class_name.constantize
7
- index = index_name ? Searchkick::Index.new(index_name) : klass.searchkick_index
7
+ index = index_name ? Searchkick::Index.new(index_name, **klass.searchkick_options) : klass.searchkick_index
8
8
  record_ids ||= min_id..max_id
9
9
  index.import_scope(
10
10
  Searchkick.load_records(klass, record_ids),
@@ -1,3 +1,5 @@
1
+ require "searchkick/index_options"
2
+
1
3
  module Searchkick
2
4
  class Index
3
5
  include IndexOptions
@@ -52,9 +54,26 @@ module Searchkick
52
54
  client.indices.put_settings index: name, body: settings
53
55
  end
54
56
 
57
+ def tokens(text, options = {})
58
+ client.indices.analyze(body: {text: text}.merge(options), index: name)["tokens"].map { |t| t["token"] }
59
+ end
60
+
61
+ def total_docs
62
+ response =
63
+ client.search(
64
+ index: name,
65
+ body: {
66
+ query: {match_all: {}},
67
+ size: 0
68
+ }
69
+ )
70
+
71
+ response["hits"]["total"]
72
+ end
73
+
55
74
  def promote(new_name, update_refresh_interval: false)
56
75
  if update_refresh_interval
57
- new_index = Searchkick::Index.new(new_name)
76
+ new_index = Searchkick::Index.new(new_name, @options)
58
77
  settings = options[:settings] || {}
59
78
  refresh_interval = (settings[:index] && settings[:index][:refresh_interval]) || "1s"
60
79
  new_index.update_settings(index: {refresh_interval: refresh_interval})
@@ -71,74 +90,68 @@ module Searchkick
71
90
  end
72
91
  alias_method :swap, :promote
73
92
 
93
+ def retrieve(record)
94
+ client.get(
95
+ index: name,
96
+ type: document_type(record),
97
+ id: search_id(record)
98
+ )["_source"]
99
+ end
100
+
101
+ def all_indices(unaliased: false)
102
+ indices =
103
+ begin
104
+ client.indices.get_aliases
105
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
106
+ {}
107
+ end
108
+ indices = indices.select { |_k, v| v.empty? || v["aliases"].empty? } if unaliased
109
+ indices.select { |k, _v| k =~ /\A#{Regexp.escape(name)}_\d{14,17}\z/ }.keys
110
+ end
111
+
112
+ # remove old indices that start w/ index_name
113
+ def clean_indices
114
+ indices = all_indices(unaliased: true)
115
+ indices.each do |index|
116
+ Searchkick::Index.new(index).delete
117
+ end
118
+ indices
119
+ end
120
+
74
121
  # record based
75
122
  # use helpers for notifications
76
123
 
77
124
  def store(record)
78
- bulk_index_helper([record])
125
+ bulk_indexer.bulk_index([record])
79
126
  end
80
127
 
81
128
  def remove(record)
82
- bulk_delete_helper([record])
129
+ bulk_indexer.bulk_delete([record])
83
130
  end
84
131
 
85
132
  def update_record(record, method_name)
86
- bulk_update_helper([record], method_name)
133
+ bulk_indexer.bulk_update([record], method_name)
87
134
  end
88
135
 
89
136
  def bulk_delete(records)
90
- bulk_delete_helper(records)
137
+ bulk_indexer.bulk_delete(records)
91
138
  end
92
139
 
93
140
  def bulk_index(records)
94
- bulk_index_helper(records)
141
+ bulk_indexer.bulk_index(records)
95
142
  end
96
143
  alias_method :import, :bulk_index
97
144
 
98
145
  def bulk_update(records, method_name)
99
- bulk_update_helper(records, method_name)
100
- end
101
-
102
- def record_data(r)
103
- data = {
104
- _index: name,
105
- _id: search_id(r),
106
- _type: document_type(r)
107
- }
108
- data[:_routing] = r.search_routing if r.respond_to?(:search_routing)
109
- data
110
- end
111
-
112
- def retrieve(record)
113
- client.get(
114
- index: name,
115
- type: document_type(record),
116
- id: search_id(record)
117
- )["_source"]
146
+ bulk_indexer.bulk_update(records, method_name)
118
147
  end
119
148
 
120
- def reindex_record(record)
121
- if record.destroyed? || !record.should_index?
122
- begin
123
- remove(record)
124
- rescue Elasticsearch::Transport::Transport::Errors::NotFound
125
- # do nothing
126
- end
127
- else
128
- store(record)
129
- end
149
+ def search_id(record)
150
+ RecordData.new(self, record).search_id
130
151
  end
131
152
 
132
- def reindex_record_async(record)
133
- if Searchkick.callbacks_value.nil?
134
- if defined?(Searchkick::ReindexV2Job)
135
- Searchkick::ReindexV2Job.perform_later(record.class.name, record.id.to_s)
136
- else
137
- raise Searchkick::Error, "Active Job not found"
138
- end
139
- else
140
- reindex_record(record)
141
- end
153
+ def document_type(record)
154
+ RecordData.new(self, record).document_type
142
155
  end
143
156
 
144
157
  def similar_record(record, **options)
@@ -154,7 +167,7 @@ module Searchkick
154
167
  options[:similar] = true
155
168
 
156
169
  # TODO use index class instead of record class
157
- search_model(record.class, like_text, options)
170
+ Searchkick.search(like_text, model: record.class, **options)
158
171
  end
159
172
 
160
173
  # queue
@@ -163,21 +176,27 @@ module Searchkick
163
176
  Searchkick::ReindexQueue.new(name)
164
177
  end
165
178
 
166
- # search
179
+ # reindex
167
180
 
168
- # TODO remove in next major version
169
- def search_model(searchkick_klass, term = "*", **options, &block)
170
- query = Searchkick::Query.new(searchkick_klass, term, options)
171
- yield(query.body) if block
172
- if options[:execute] == false
173
- query
181
+ def reindex(scope, method_name, scoped:, full: false, **options)
182
+ refresh = options.fetch(:refresh, !scoped)
183
+
184
+ if method_name
185
+ # update
186
+ import_scope(scope, method_name: method_name)
187
+ self.refresh if refresh
188
+ true
189
+ elsif scoped && !full
190
+ # reindex association
191
+ import_scope(scope)
192
+ self.refresh if refresh
193
+ true
174
194
  else
175
- query.execute
195
+ # full reindex
196
+ reindex_scope(scope, options)
176
197
  end
177
198
  end
178
199
 
179
- # reindex
180
-
181
200
  def create_index(index_options: nil)
182
201
  index_options ||= self.index_options
183
202
  index = Searchkick::Index.new("#{name}_#{Time.now.strftime('%Y%m%d%H%M%S%L')}", @options)
@@ -185,37 +204,55 @@ module Searchkick
185
204
  index
186
205
  end
187
206
 
188
- def all_indices(unaliased: false)
189
- indices =
190
- begin
191
- client.indices.get_aliases
192
- rescue Elasticsearch::Transport::Transport::Errors::NotFound
193
- {}
207
+ def import_scope(scope, **options)
208
+ bulk_indexer.import_scope(scope, **options)
209
+ end
210
+
211
+ def batches_left
212
+ bulk_indexer.batches_left
213
+ end
214
+
215
+ # other
216
+
217
+ def klass_document_type(klass, ignore_type = false)
218
+ @klass_document_type[[klass, ignore_type]] ||= begin
219
+ if !ignore_type && klass.searchkick_klass.searchkick_options[:_type]
220
+ type = klass.searchkick_klass.searchkick_options[:_type]
221
+ type = type.call if type.respond_to?(:call)
222
+ type
223
+ else
224
+ klass.model_name.to_s.underscore
194
225
  end
195
- indices = indices.select { |_k, v| v.empty? || v["aliases"].empty? } if unaliased
196
- indices.select { |k, _v| k =~ /\A#{Regexp.escape(name)}_\d{14,17}\z/ }.keys
226
+ end
197
227
  end
198
228
 
199
- # remove old indices that start w/ index_name
200
- def clean_indices
201
- indices = all_indices(unaliased: true)
202
- indices.each do |index|
203
- Searchkick::Index.new(index).delete
229
+ # should not be public
230
+ def conversions_fields
231
+ @conversions_fields ||= begin
232
+ conversions = Array(options[:conversions])
233
+ conversions.map(&:to_s) + conversions.map(&:to_sym)
204
234
  end
205
- indices
206
235
  end
207
236
 
208
- def total_docs
209
- response =
210
- client.search(
211
- index: name,
212
- body: {
213
- query: {match_all: {}},
214
- size: 0
215
- }
216
- )
237
+ def suggest_fields
238
+ @suggest_fields ||= Array(options[:suggest]).map(&:to_s)
239
+ end
217
240
 
218
- response["hits"]["total"]
241
+ def locations_fields
242
+ @locations_fields ||= begin
243
+ locations = Array(options[:locations])
244
+ locations.map(&:to_s) + locations.map(&:to_sym)
245
+ end
246
+ end
247
+
248
+ protected
249
+
250
+ def client
251
+ Searchkick.client
252
+ end
253
+
254
+ def bulk_indexer
255
+ @bulk_indexer ||= BulkIndexer.new(self)
219
256
  end
220
257
 
221
258
  # https://gist.github.com/jarosan/3124884
@@ -224,7 +261,7 @@ module Searchkick
224
261
  if resume
225
262
  index_name = all_indices.sort.last
226
263
  raise Searchkick::Error, "No index to resume" unless index_name
227
- index = Searchkick::Index.new(index_name)
264
+ index = Searchkick::Index.new(index_name, @options)
228
265
  else
229
266
  clean_indices unless retain
230
267
 
@@ -276,275 +313,12 @@ module Searchkick
276
313
  index.refresh
277
314
  true
278
315
  end
279
- end
280
-
281
- def import_scope(scope, resume: false, method_name: nil, async: false, batch: false, batch_id: nil, full: false)
282
- # use scope for import
283
- scope = scope.search_import if scope.respond_to?(:search_import)
284
-
285
- if batch
286
- import_or_update scope.to_a, method_name, async
287
- Searchkick.with_redis { |r| r.srem(batches_key, batch_id) } if batch_id
288
- elsif full && async
289
- full_reindex_async(scope)
290
- elsif scope.respond_to?(:find_in_batches)
291
- if resume
292
- # use total docs instead of max id since there's not a great way
293
- # to get the max _id without scripting since it's a string
294
-
295
- # TODO use primary key and prefix with table name
296
- scope = scope.where("id > ?", total_docs)
297
- end
298
-
299
- scope = scope.select("id").except(:includes, :preload) if async
300
-
301
- scope.find_in_batches batch_size: batch_size do |items|
302
- import_or_update items, method_name, async
303
- end
304
- else
305
- each_batch(scope) do |items|
306
- import_or_update items, method_name, async
307
- end
308
- end
309
- end
310
-
311
- def batches_left
312
- Searchkick.with_redis { |r| r.scard(batches_key) }
313
- end
314
-
315
- # other
316
-
317
- def tokens(text, options = {})
318
- client.indices.analyze(body: {text: text}.merge(options), index: name)["tokens"].map { |t| t["token"] }
319
- end
320
-
321
- def klass_document_type(klass, ignore_type = false)
322
- @klass_document_type[[klass, ignore_type]] ||= begin
323
- if klass.respond_to?(:document_type)
324
- klass.document_type
325
- elsif !ignore_type && klass.searchkick_klass.searchkick_options[:_type]
326
- type = klass.searchkick_klass.searchkick_options[:_type]
327
- type = type.call if type.respond_to?(:call)
328
- type
329
- else
330
- klass.model_name.to_s.underscore
331
- end
332
- end
333
- end
334
-
335
- protected
336
-
337
- def client
338
- Searchkick.client
339
- end
340
-
341
- def document_type(record, ignore_type = false)
342
- if record.respond_to?(:search_document_type)
343
- record.search_document_type
344
- else
345
- klass_document_type(record.class, ignore_type)
346
- end
347
- end
348
-
349
- def search_id(record)
350
- id = record.respond_to?(:search_document_id) ? record.search_document_id : record.id
351
- id.is_a?(Numeric) ? id : id.to_s
352
- end
353
-
354
- EXCLUDED_ATTRIBUTES = ["_id", "_type"]
355
-
356
- def search_data(record, method_name = nil)
357
- partial_reindex = !method_name.nil?
358
- options = record.class.searchkick_options
359
-
360
- # remove _id since search_id is used instead
361
- source = record.send(method_name || :search_data).each_with_object({}) { |(k, v), memo| memo[k.to_s] = v; memo }.except(*EXCLUDED_ATTRIBUTES)
362
-
363
- # conversions
364
- if options[:conversions]
365
- Array(options[:conversions]).map(&:to_s).each do |conversions_field|
366
- if source[conversions_field]
367
- source[conversions_field] = source[conversions_field].map { |k, v| {query: k, count: v} }
368
- end
369
- end
370
- end
371
-
372
- # hack to prevent generator field doesn't exist error
373
- if options[:suggest]
374
- options[:suggest].map(&:to_s).each do |field|
375
- source[field] = nil if !source[field] && !partial_reindex
376
- end
377
- end
378
-
379
- # locations
380
- if options[:locations]
381
- options[:locations].map(&:to_s).each do |field|
382
- if source[field]
383
- if !source[field].is_a?(Hash) && (source[field].first.is_a?(Array) || source[field].first.is_a?(Hash))
384
- # multiple locations
385
- source[field] = source[field].map { |a| location_value(a) }
386
- else
387
- source[field] = location_value(source[field])
388
- end
389
- end
390
- end
391
- end
392
-
393
- if !source.key?("type") && record.class.searchkick_klass.searchkick_options[:inheritance]
394
- source["type"] = document_type(record, true)
395
- end
396
-
397
- cast_big_decimal(source)
398
-
399
- source
400
- end
401
-
402
- def location_value(value)
403
- if value.is_a?(Array)
404
- value.map(&:to_f).reverse
405
- elsif value.is_a?(Hash)
406
- {lat: value[:lat].to_f, lon: value[:lon].to_f}
407
- else
408
- value
409
- end
410
- end
411
-
412
- # change all BigDecimal values to floats due to
413
- # https://github.com/rails/rails/issues/6033
414
- # possible loss of precision :/
415
- def cast_big_decimal(obj)
416
- case obj
417
- when BigDecimal
418
- obj.to_f
419
- when Hash
420
- obj.each do |k, v|
421
- obj[k] = cast_big_decimal(v)
422
- end
423
- when Enumerable
424
- obj.map do |v|
425
- cast_big_decimal(v)
426
- end
427
- else
428
- obj
429
- end
430
- end
431
-
432
- def import_or_update(records, method_name, async)
433
- if records.any?
434
- if async
435
- Searchkick::BulkReindexJob.perform_later(
436
- class_name: records.first.class.name,
437
- record_ids: records.map(&:id),
438
- index_name: name,
439
- method_name: method_name ? method_name.to_s : nil
440
- )
441
- else
442
- records = records.select(&:should_index?)
443
- if records.any?
444
- with_retries do
445
- method_name ? bulk_update(records, method_name) : import(records)
446
- end
447
- end
448
- end
449
- end
450
- end
451
-
452
- def full_reindex_async(scope)
453
- if scope.respond_to?(:primary_key)
454
- # TODO expire Redis key
455
- primary_key = scope.primary_key
456
-
457
- starting_id =
458
- begin
459
- scope.minimum(primary_key)
460
- rescue ActiveRecord::StatementInvalid
461
- false
462
- end
463
-
464
- if starting_id.nil?
465
- # no records, do nothing
466
- elsif starting_id.is_a?(Numeric)
467
- max_id = scope.maximum(primary_key)
468
- batches_count = ((max_id - starting_id + 1) / batch_size.to_f).ceil
469
-
470
- batches_count.times do |i|
471
- batch_id = i + 1
472
- min_id = starting_id + (i * batch_size)
473
- bulk_reindex_job scope, batch_id, min_id: min_id, max_id: min_id + batch_size - 1
474
- end
475
- else
476
- scope.find_in_batches(batch_size: batch_size).each_with_index do |batch, i|
477
- batch_id = i + 1
478
-
479
- bulk_reindex_job scope, batch_id, record_ids: batch.map { |record| record.id.to_s }
480
- end
481
- end
482
- else
483
- batch_id = 1
484
- # TODO remove any eager loading
485
- scope = scope.only(:_id) if scope.respond_to?(:only)
486
- each_batch(scope) do |items|
487
- bulk_reindex_job scope, batch_id, record_ids: items.map { |i| i.id.to_s }
488
- batch_id += 1
489
- end
490
- end
491
- end
492
-
493
- def each_batch(scope)
494
- # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
495
- # use cursor for Mongoid
496
- items = []
497
- scope.all.each do |item|
498
- items << item
499
- if items.length == batch_size
500
- yield items
501
- items = []
502
- end
316
+ rescue Elasticsearch::Transport::Transport::Errors::BadRequest => e
317
+ if e.message.include?("No handler for type [text]")
318
+ raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 5 or greater"
503
319
  end
504
- yield items if items.any?
505
- end
506
-
507
- def bulk_reindex_job(scope, batch_id, options)
508
- Searchkick::BulkReindexJob.perform_later({
509
- class_name: scope.model_name.name,
510
- index_name: name,
511
- batch_id: batch_id
512
- }.merge(options))
513
- Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
514
- end
515
-
516
- def batch_size
517
- @batch_size ||= @options[:batch_size] || 1000
518
- end
519
-
520
- def with_retries
521
- retries = 0
522
-
523
- begin
524
- yield
525
- rescue Faraday::ClientError => e
526
- if retries < 1
527
- retries += 1
528
- retry
529
- end
530
- raise e
531
- end
532
- end
533
-
534
- def bulk_index_helper(records)
535
- Searchkick.indexer.queue(records.map { |r| {index: record_data(r).merge(data: search_data(r))} })
536
- end
537
-
538
- def bulk_delete_helper(records)
539
- Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| {delete: record_data(r)} })
540
- end
541
-
542
- def bulk_update_helper(records, method_name)
543
- Searchkick.indexer.queue(records.map { |r| {update: record_data(r).merge(data: {doc: search_data(r, method_name)})} })
544
- end
545
320
 
546
- def batches_key
547
- "searchkick:reindex:#{name}:batches"
321
+ raise e
548
322
  end
549
323
  end
550
324
  end