searchkick 2.5.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE.md +7 -0
  3. data/.travis.yml +2 -11
  4. data/CHANGELOG.md +22 -0
  5. data/CONTRIBUTING.md +1 -1
  6. data/Gemfile +3 -3
  7. data/LICENSE.txt +1 -1
  8. data/README.md +68 -141
  9. data/Rakefile +0 -4
  10. data/benchmark/Gemfile +3 -2
  11. data/benchmark/{benchmark.rb → index.rb} +33 -31
  12. data/benchmark/search.rb +48 -0
  13. data/docs/Searchkick-3-Upgrade.md +57 -0
  14. data/lib/searchkick.rb +50 -27
  15. data/lib/searchkick/bulk_indexer.rb +168 -0
  16. data/lib/searchkick/bulk_reindex_job.rb +1 -1
  17. data/lib/searchkick/index.rb +122 -348
  18. data/lib/searchkick/index_options.rb +29 -26
  19. data/lib/searchkick/logging.rb +8 -7
  20. data/lib/searchkick/model.rb +37 -90
  21. data/lib/searchkick/multi_search.rb +6 -7
  22. data/lib/searchkick/query.rb +169 -166
  23. data/lib/searchkick/record_data.rb +133 -0
  24. data/lib/searchkick/record_indexer.rb +55 -0
  25. data/lib/searchkick/reindex_queue.rb +1 -1
  26. data/lib/searchkick/reindex_v2_job.rb +10 -13
  27. data/lib/searchkick/results.rb +14 -25
  28. data/lib/searchkick/tasks.rb +0 -4
  29. data/lib/searchkick/version.rb +1 -1
  30. data/searchkick.gemspec +3 -3
  31. data/test/boost_test.rb +3 -9
  32. data/test/geo_shape_test.rb +0 -4
  33. data/test/highlight_test.rb +28 -12
  34. data/test/index_test.rb +9 -10
  35. data/test/language_test.rb +16 -0
  36. data/test/marshal_test.rb +6 -1
  37. data/test/match_test.rb +9 -4
  38. data/test/model_test.rb +3 -5
  39. data/test/multi_search_test.rb +0 -7
  40. data/test/order_test.rb +1 -7
  41. data/test/pagination_test.rb +1 -1
  42. data/test/reindex_v2_job_test.rb +6 -11
  43. data/test/routing_test.rb +1 -1
  44. data/test/similar_test.rb +2 -2
  45. data/test/sql_test.rb +0 -31
  46. data/test/test_helper.rb +37 -23
  47. metadata +19 -26
  48. data/test/gemfiles/activerecord31.gemfile +0 -7
  49. data/test/gemfiles/activerecord32.gemfile +0 -7
  50. data/test/gemfiles/activerecord40.gemfile +0 -8
  51. data/test/gemfiles/activerecord41.gemfile +0 -8
  52. data/test/gemfiles/mongoid2.gemfile +0 -7
  53. data/test/gemfiles/mongoid3.gemfile +0 -6
  54. data/test/gemfiles/mongoid4.gemfile +0 -7
  55. data/test/records_test.rb +0 -10
@@ -4,7 +4,7 @@ module Searchkick
4
4
 
5
5
  def perform(class_name:, record_ids: nil, index_name: nil, method_name: nil, batch_id: nil, min_id: nil, max_id: nil)
6
6
  klass = class_name.constantize
7
- index = index_name ? Searchkick::Index.new(index_name) : klass.searchkick_index
7
+ index = index_name ? Searchkick::Index.new(index_name, **klass.searchkick_options) : klass.searchkick_index
8
8
  record_ids ||= min_id..max_id
9
9
  index.import_scope(
10
10
  Searchkick.load_records(klass, record_ids),
@@ -1,3 +1,5 @@
1
+ require "searchkick/index_options"
2
+
1
3
  module Searchkick
2
4
  class Index
3
5
  include IndexOptions
@@ -52,9 +54,26 @@ module Searchkick
52
54
  client.indices.put_settings index: name, body: settings
53
55
  end
54
56
 
57
+ def tokens(text, options = {})
58
+ client.indices.analyze(body: {text: text}.merge(options), index: name)["tokens"].map { |t| t["token"] }
59
+ end
60
+
61
+ def total_docs
62
+ response =
63
+ client.search(
64
+ index: name,
65
+ body: {
66
+ query: {match_all: {}},
67
+ size: 0
68
+ }
69
+ )
70
+
71
+ response["hits"]["total"]
72
+ end
73
+
55
74
  def promote(new_name, update_refresh_interval: false)
56
75
  if update_refresh_interval
57
- new_index = Searchkick::Index.new(new_name)
76
+ new_index = Searchkick::Index.new(new_name, @options)
58
77
  settings = options[:settings] || {}
59
78
  refresh_interval = (settings[:index] && settings[:index][:refresh_interval]) || "1s"
60
79
  new_index.update_settings(index: {refresh_interval: refresh_interval})
@@ -71,74 +90,68 @@ module Searchkick
71
90
  end
72
91
  alias_method :swap, :promote
73
92
 
93
+ def retrieve(record)
94
+ client.get(
95
+ index: name,
96
+ type: document_type(record),
97
+ id: search_id(record)
98
+ )["_source"]
99
+ end
100
+
101
+ def all_indices(unaliased: false)
102
+ indices =
103
+ begin
104
+ client.indices.get_aliases
105
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
106
+ {}
107
+ end
108
+ indices = indices.select { |_k, v| v.empty? || v["aliases"].empty? } if unaliased
109
+ indices.select { |k, _v| k =~ /\A#{Regexp.escape(name)}_\d{14,17}\z/ }.keys
110
+ end
111
+
112
+ # remove old indices that start w/ index_name
113
+ def clean_indices
114
+ indices = all_indices(unaliased: true)
115
+ indices.each do |index|
116
+ Searchkick::Index.new(index).delete
117
+ end
118
+ indices
119
+ end
120
+
74
121
  # record based
75
122
  # use helpers for notifications
76
123
 
77
124
  def store(record)
78
- bulk_index_helper([record])
125
+ bulk_indexer.bulk_index([record])
79
126
  end
80
127
 
81
128
  def remove(record)
82
- bulk_delete_helper([record])
129
+ bulk_indexer.bulk_delete([record])
83
130
  end
84
131
 
85
132
  def update_record(record, method_name)
86
- bulk_update_helper([record], method_name)
133
+ bulk_indexer.bulk_update([record], method_name)
87
134
  end
88
135
 
89
136
  def bulk_delete(records)
90
- bulk_delete_helper(records)
137
+ bulk_indexer.bulk_delete(records)
91
138
  end
92
139
 
93
140
  def bulk_index(records)
94
- bulk_index_helper(records)
141
+ bulk_indexer.bulk_index(records)
95
142
  end
96
143
  alias_method :import, :bulk_index
97
144
 
98
145
  def bulk_update(records, method_name)
99
- bulk_update_helper(records, method_name)
100
- end
101
-
102
- def record_data(r)
103
- data = {
104
- _index: name,
105
- _id: search_id(r),
106
- _type: document_type(r)
107
- }
108
- data[:_routing] = r.search_routing if r.respond_to?(:search_routing)
109
- data
110
- end
111
-
112
- def retrieve(record)
113
- client.get(
114
- index: name,
115
- type: document_type(record),
116
- id: search_id(record)
117
- )["_source"]
146
+ bulk_indexer.bulk_update(records, method_name)
118
147
  end
119
148
 
120
- def reindex_record(record)
121
- if record.destroyed? || !record.should_index?
122
- begin
123
- remove(record)
124
- rescue Elasticsearch::Transport::Transport::Errors::NotFound
125
- # do nothing
126
- end
127
- else
128
- store(record)
129
- end
149
+ def search_id(record)
150
+ RecordData.new(self, record).search_id
130
151
  end
131
152
 
132
- def reindex_record_async(record)
133
- if Searchkick.callbacks_value.nil?
134
- if defined?(Searchkick::ReindexV2Job)
135
- Searchkick::ReindexV2Job.perform_later(record.class.name, record.id.to_s)
136
- else
137
- raise Searchkick::Error, "Active Job not found"
138
- end
139
- else
140
- reindex_record(record)
141
- end
153
+ def document_type(record)
154
+ RecordData.new(self, record).document_type
142
155
  end
143
156
 
144
157
  def similar_record(record, **options)
@@ -154,7 +167,7 @@ module Searchkick
154
167
  options[:similar] = true
155
168
 
156
169
  # TODO use index class instead of record class
157
- search_model(record.class, like_text, options)
170
+ Searchkick.search(like_text, model: record.class, **options)
158
171
  end
159
172
 
160
173
  # queue
@@ -163,21 +176,27 @@ module Searchkick
163
176
  Searchkick::ReindexQueue.new(name)
164
177
  end
165
178
 
166
- # search
179
+ # reindex
167
180
 
168
- # TODO remove in next major version
169
- def search_model(searchkick_klass, term = "*", **options, &block)
170
- query = Searchkick::Query.new(searchkick_klass, term, options)
171
- yield(query.body) if block
172
- if options[:execute] == false
173
- query
181
+ def reindex(scope, method_name, scoped:, full: false, **options)
182
+ refresh = options.fetch(:refresh, !scoped)
183
+
184
+ if method_name
185
+ # update
186
+ import_scope(scope, method_name: method_name)
187
+ self.refresh if refresh
188
+ true
189
+ elsif scoped && !full
190
+ # reindex association
191
+ import_scope(scope)
192
+ self.refresh if refresh
193
+ true
174
194
  else
175
- query.execute
195
+ # full reindex
196
+ reindex_scope(scope, options)
176
197
  end
177
198
  end
178
199
 
179
- # reindex
180
-
181
200
  def create_index(index_options: nil)
182
201
  index_options ||= self.index_options
183
202
  index = Searchkick::Index.new("#{name}_#{Time.now.strftime('%Y%m%d%H%M%S%L')}", @options)
@@ -185,37 +204,55 @@ module Searchkick
185
204
  index
186
205
  end
187
206
 
188
- def all_indices(unaliased: false)
189
- indices =
190
- begin
191
- client.indices.get_aliases
192
- rescue Elasticsearch::Transport::Transport::Errors::NotFound
193
- {}
207
+ def import_scope(scope, **options)
208
+ bulk_indexer.import_scope(scope, **options)
209
+ end
210
+
211
+ def batches_left
212
+ bulk_indexer.batches_left
213
+ end
214
+
215
+ # other
216
+
217
+ def klass_document_type(klass, ignore_type = false)
218
+ @klass_document_type[[klass, ignore_type]] ||= begin
219
+ if !ignore_type && klass.searchkick_klass.searchkick_options[:_type]
220
+ type = klass.searchkick_klass.searchkick_options[:_type]
221
+ type = type.call if type.respond_to?(:call)
222
+ type
223
+ else
224
+ klass.model_name.to_s.underscore
194
225
  end
195
- indices = indices.select { |_k, v| v.empty? || v["aliases"].empty? } if unaliased
196
- indices.select { |k, _v| k =~ /\A#{Regexp.escape(name)}_\d{14,17}\z/ }.keys
226
+ end
197
227
  end
198
228
 
199
- # remove old indices that start w/ index_name
200
- def clean_indices
201
- indices = all_indices(unaliased: true)
202
- indices.each do |index|
203
- Searchkick::Index.new(index).delete
229
+ # should not be public
230
+ def conversions_fields
231
+ @conversions_fields ||= begin
232
+ conversions = Array(options[:conversions])
233
+ conversions.map(&:to_s) + conversions.map(&:to_sym)
204
234
  end
205
- indices
206
235
  end
207
236
 
208
- def total_docs
209
- response =
210
- client.search(
211
- index: name,
212
- body: {
213
- query: {match_all: {}},
214
- size: 0
215
- }
216
- )
237
+ def suggest_fields
238
+ @suggest_fields ||= Array(options[:suggest]).map(&:to_s)
239
+ end
217
240
 
218
- response["hits"]["total"]
241
+ def locations_fields
242
+ @locations_fields ||= begin
243
+ locations = Array(options[:locations])
244
+ locations.map(&:to_s) + locations.map(&:to_sym)
245
+ end
246
+ end
247
+
248
+ protected
249
+
250
+ def client
251
+ Searchkick.client
252
+ end
253
+
254
+ def bulk_indexer
255
+ @bulk_indexer ||= BulkIndexer.new(self)
219
256
  end
220
257
 
221
258
  # https://gist.github.com/jarosan/3124884
@@ -224,7 +261,7 @@ module Searchkick
224
261
  if resume
225
262
  index_name = all_indices.sort.last
226
263
  raise Searchkick::Error, "No index to resume" unless index_name
227
- index = Searchkick::Index.new(index_name)
264
+ index = Searchkick::Index.new(index_name, @options)
228
265
  else
229
266
  clean_indices unless retain
230
267
 
@@ -276,275 +313,12 @@ module Searchkick
276
313
  index.refresh
277
314
  true
278
315
  end
279
- end
280
-
281
- def import_scope(scope, resume: false, method_name: nil, async: false, batch: false, batch_id: nil, full: false)
282
- # use scope for import
283
- scope = scope.search_import if scope.respond_to?(:search_import)
284
-
285
- if batch
286
- import_or_update scope.to_a, method_name, async
287
- Searchkick.with_redis { |r| r.srem(batches_key, batch_id) } if batch_id
288
- elsif full && async
289
- full_reindex_async(scope)
290
- elsif scope.respond_to?(:find_in_batches)
291
- if resume
292
- # use total docs instead of max id since there's not a great way
293
- # to get the max _id without scripting since it's a string
294
-
295
- # TODO use primary key and prefix with table name
296
- scope = scope.where("id > ?", total_docs)
297
- end
298
-
299
- scope = scope.select("id").except(:includes, :preload) if async
300
-
301
- scope.find_in_batches batch_size: batch_size do |items|
302
- import_or_update items, method_name, async
303
- end
304
- else
305
- each_batch(scope) do |items|
306
- import_or_update items, method_name, async
307
- end
308
- end
309
- end
310
-
311
- def batches_left
312
- Searchkick.with_redis { |r| r.scard(batches_key) }
313
- end
314
-
315
- # other
316
-
317
- def tokens(text, options = {})
318
- client.indices.analyze(body: {text: text}.merge(options), index: name)["tokens"].map { |t| t["token"] }
319
- end
320
-
321
- def klass_document_type(klass, ignore_type = false)
322
- @klass_document_type[[klass, ignore_type]] ||= begin
323
- if klass.respond_to?(:document_type)
324
- klass.document_type
325
- elsif !ignore_type && klass.searchkick_klass.searchkick_options[:_type]
326
- type = klass.searchkick_klass.searchkick_options[:_type]
327
- type = type.call if type.respond_to?(:call)
328
- type
329
- else
330
- klass.model_name.to_s.underscore
331
- end
332
- end
333
- end
334
-
335
- protected
336
-
337
- def client
338
- Searchkick.client
339
- end
340
-
341
- def document_type(record, ignore_type = false)
342
- if record.respond_to?(:search_document_type)
343
- record.search_document_type
344
- else
345
- klass_document_type(record.class, ignore_type)
346
- end
347
- end
348
-
349
- def search_id(record)
350
- id = record.respond_to?(:search_document_id) ? record.search_document_id : record.id
351
- id.is_a?(Numeric) ? id : id.to_s
352
- end
353
-
354
- EXCLUDED_ATTRIBUTES = ["_id", "_type"]
355
-
356
- def search_data(record, method_name = nil)
357
- partial_reindex = !method_name.nil?
358
- options = record.class.searchkick_options
359
-
360
- # remove _id since search_id is used instead
361
- source = record.send(method_name || :search_data).each_with_object({}) { |(k, v), memo| memo[k.to_s] = v; memo }.except(*EXCLUDED_ATTRIBUTES)
362
-
363
- # conversions
364
- if options[:conversions]
365
- Array(options[:conversions]).map(&:to_s).each do |conversions_field|
366
- if source[conversions_field]
367
- source[conversions_field] = source[conversions_field].map { |k, v| {query: k, count: v} }
368
- end
369
- end
370
- end
371
-
372
- # hack to prevent generator field doesn't exist error
373
- if options[:suggest]
374
- options[:suggest].map(&:to_s).each do |field|
375
- source[field] = nil if !source[field] && !partial_reindex
376
- end
377
- end
378
-
379
- # locations
380
- if options[:locations]
381
- options[:locations].map(&:to_s).each do |field|
382
- if source[field]
383
- if !source[field].is_a?(Hash) && (source[field].first.is_a?(Array) || source[field].first.is_a?(Hash))
384
- # multiple locations
385
- source[field] = source[field].map { |a| location_value(a) }
386
- else
387
- source[field] = location_value(source[field])
388
- end
389
- end
390
- end
391
- end
392
-
393
- if !source.key?("type") && record.class.searchkick_klass.searchkick_options[:inheritance]
394
- source["type"] = document_type(record, true)
395
- end
396
-
397
- cast_big_decimal(source)
398
-
399
- source
400
- end
401
-
402
- def location_value(value)
403
- if value.is_a?(Array)
404
- value.map(&:to_f).reverse
405
- elsif value.is_a?(Hash)
406
- {lat: value[:lat].to_f, lon: value[:lon].to_f}
407
- else
408
- value
409
- end
410
- end
411
-
412
- # change all BigDecimal values to floats due to
413
- # https://github.com/rails/rails/issues/6033
414
- # possible loss of precision :/
415
- def cast_big_decimal(obj)
416
- case obj
417
- when BigDecimal
418
- obj.to_f
419
- when Hash
420
- obj.each do |k, v|
421
- obj[k] = cast_big_decimal(v)
422
- end
423
- when Enumerable
424
- obj.map do |v|
425
- cast_big_decimal(v)
426
- end
427
- else
428
- obj
429
- end
430
- end
431
-
432
- def import_or_update(records, method_name, async)
433
- if records.any?
434
- if async
435
- Searchkick::BulkReindexJob.perform_later(
436
- class_name: records.first.class.name,
437
- record_ids: records.map(&:id),
438
- index_name: name,
439
- method_name: method_name ? method_name.to_s : nil
440
- )
441
- else
442
- records = records.select(&:should_index?)
443
- if records.any?
444
- with_retries do
445
- method_name ? bulk_update(records, method_name) : import(records)
446
- end
447
- end
448
- end
449
- end
450
- end
451
-
452
- def full_reindex_async(scope)
453
- if scope.respond_to?(:primary_key)
454
- # TODO expire Redis key
455
- primary_key = scope.primary_key
456
-
457
- starting_id =
458
- begin
459
- scope.minimum(primary_key)
460
- rescue ActiveRecord::StatementInvalid
461
- false
462
- end
463
-
464
- if starting_id.nil?
465
- # no records, do nothing
466
- elsif starting_id.is_a?(Numeric)
467
- max_id = scope.maximum(primary_key)
468
- batches_count = ((max_id - starting_id + 1) / batch_size.to_f).ceil
469
-
470
- batches_count.times do |i|
471
- batch_id = i + 1
472
- min_id = starting_id + (i * batch_size)
473
- bulk_reindex_job scope, batch_id, min_id: min_id, max_id: min_id + batch_size - 1
474
- end
475
- else
476
- scope.find_in_batches(batch_size: batch_size).each_with_index do |batch, i|
477
- batch_id = i + 1
478
-
479
- bulk_reindex_job scope, batch_id, record_ids: batch.map { |record| record.id.to_s }
480
- end
481
- end
482
- else
483
- batch_id = 1
484
- # TODO remove any eager loading
485
- scope = scope.only(:_id) if scope.respond_to?(:only)
486
- each_batch(scope) do |items|
487
- bulk_reindex_job scope, batch_id, record_ids: items.map { |i| i.id.to_s }
488
- batch_id += 1
489
- end
490
- end
491
- end
492
-
493
- def each_batch(scope)
494
- # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
495
- # use cursor for Mongoid
496
- items = []
497
- scope.all.each do |item|
498
- items << item
499
- if items.length == batch_size
500
- yield items
501
- items = []
502
- end
316
+ rescue Elasticsearch::Transport::Transport::Errors::BadRequest => e
317
+ if e.message.include?("No handler for type [text]")
318
+ raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 5 or greater"
503
319
  end
504
- yield items if items.any?
505
- end
506
-
507
- def bulk_reindex_job(scope, batch_id, options)
508
- Searchkick::BulkReindexJob.perform_later({
509
- class_name: scope.model_name.name,
510
- index_name: name,
511
- batch_id: batch_id
512
- }.merge(options))
513
- Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
514
- end
515
-
516
- def batch_size
517
- @batch_size ||= @options[:batch_size] || 1000
518
- end
519
-
520
- def with_retries
521
- retries = 0
522
-
523
- begin
524
- yield
525
- rescue Faraday::ClientError => e
526
- if retries < 1
527
- retries += 1
528
- retry
529
- end
530
- raise e
531
- end
532
- end
533
-
534
- def bulk_index_helper(records)
535
- Searchkick.indexer.queue(records.map { |r| {index: record_data(r).merge(data: search_data(r))} })
536
- end
537
-
538
- def bulk_delete_helper(records)
539
- Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| {delete: record_data(r)} })
540
- end
541
-
542
- def bulk_update_helper(records, method_name)
543
- Searchkick.indexer.queue(records.map { |r| {update: record_data(r).merge(data: {doc: search_data(r, method_name)})} })
544
- end
545
320
 
546
- def batches_key
547
- "searchkick:reindex:#{name}:batches"
321
+ raise e
548
322
  end
549
323
  end
550
324
  end