searchkick 2.3.2 → 4.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +251 -84
  3. data/LICENSE.txt +1 -1
  4. data/README.md +552 -432
  5. data/lib/searchkick/bulk_indexer.rb +173 -0
  6. data/lib/searchkick/bulk_reindex_job.rb +2 -2
  7. data/lib/searchkick/hash_wrapper.rb +12 -0
  8. data/lib/searchkick/index.rb +187 -348
  9. data/lib/searchkick/index_options.rb +494 -282
  10. data/lib/searchkick/logging.rb +17 -13
  11. data/lib/searchkick/model.rb +52 -97
  12. data/lib/searchkick/multi_search.rb +9 -10
  13. data/lib/searchkick/process_batch_job.rb +17 -4
  14. data/lib/searchkick/process_queue_job.rb +20 -12
  15. data/lib/searchkick/query.rb +415 -199
  16. data/lib/searchkick/railtie.rb +7 -0
  17. data/lib/searchkick/record_data.rb +128 -0
  18. data/lib/searchkick/record_indexer.rb +79 -0
  19. data/lib/searchkick/reindex_queue.rb +1 -1
  20. data/lib/searchkick/reindex_v2_job.rb +14 -12
  21. data/lib/searchkick/results.rb +135 -41
  22. data/lib/searchkick/version.rb +1 -1
  23. data/lib/searchkick.rb +130 -61
  24. data/lib/tasks/searchkick.rake +34 -0
  25. metadata +18 -162
  26. data/.gitignore +0 -22
  27. data/.travis.yml +0 -39
  28. data/Gemfile +0 -16
  29. data/Rakefile +0 -20
  30. data/benchmark/Gemfile +0 -23
  31. data/benchmark/benchmark.rb +0 -97
  32. data/lib/searchkick/tasks.rb +0 -33
  33. data/searchkick.gemspec +0 -28
  34. data/test/aggs_test.rb +0 -197
  35. data/test/autocomplete_test.rb +0 -75
  36. data/test/boost_test.rb +0 -202
  37. data/test/callbacks_test.rb +0 -59
  38. data/test/ci/before_install.sh +0 -17
  39. data/test/errors_test.rb +0 -19
  40. data/test/gemfiles/activerecord31.gemfile +0 -7
  41. data/test/gemfiles/activerecord32.gemfile +0 -7
  42. data/test/gemfiles/activerecord40.gemfile +0 -8
  43. data/test/gemfiles/activerecord41.gemfile +0 -8
  44. data/test/gemfiles/activerecord42.gemfile +0 -7
  45. data/test/gemfiles/activerecord50.gemfile +0 -7
  46. data/test/gemfiles/apartment.gemfile +0 -8
  47. data/test/gemfiles/cequel.gemfile +0 -8
  48. data/test/gemfiles/mongoid2.gemfile +0 -7
  49. data/test/gemfiles/mongoid3.gemfile +0 -6
  50. data/test/gemfiles/mongoid4.gemfile +0 -7
  51. data/test/gemfiles/mongoid5.gemfile +0 -7
  52. data/test/gemfiles/mongoid6.gemfile +0 -12
  53. data/test/gemfiles/nobrainer.gemfile +0 -8
  54. data/test/gemfiles/parallel_tests.gemfile +0 -8
  55. data/test/geo_shape_test.rb +0 -175
  56. data/test/highlight_test.rb +0 -78
  57. data/test/index_test.rb +0 -166
  58. data/test/inheritance_test.rb +0 -83
  59. data/test/marshal_test.rb +0 -8
  60. data/test/match_test.rb +0 -276
  61. data/test/misspellings_test.rb +0 -56
  62. data/test/model_test.rb +0 -42
  63. data/test/multi_search_test.rb +0 -36
  64. data/test/multi_tenancy_test.rb +0 -22
  65. data/test/order_test.rb +0 -46
  66. data/test/pagination_test.rb +0 -70
  67. data/test/partial_reindex_test.rb +0 -58
  68. data/test/query_test.rb +0 -35
  69. data/test/records_test.rb +0 -10
  70. data/test/reindex_test.rb +0 -64
  71. data/test/reindex_v2_job_test.rb +0 -32
  72. data/test/routing_test.rb +0 -23
  73. data/test/should_index_test.rb +0 -32
  74. data/test/similar_test.rb +0 -28
  75. data/test/sql_test.rb +0 -214
  76. data/test/suggest_test.rb +0 -95
  77. data/test/support/kaminari.yml +0 -21
  78. data/test/synonyms_test.rb +0 -67
  79. data/test/test_helper.rb +0 -567
  80. data/test/where_test.rb +0 -223
@@ -1,7 +1,7 @@
1
+ require "searchkick/index_options"
2
+
1
3
  module Searchkick
2
4
  class Index
3
- include IndexOptions
4
-
5
5
  attr_reader :name, :options
6
6
 
7
7
  def initialize(name, options = {})
@@ -10,12 +10,16 @@ module Searchkick
10
10
  @klass_document_type = {} # cache
11
11
  end
12
12
 
13
+ def index_options
14
+ IndexOptions.new(self).index_options
15
+ end
16
+
13
17
  def create(body = {})
14
18
  client.indices.create index: name, body: body
15
19
  end
16
20
 
17
21
  def delete
18
- if !Searchkick.server_below?("6.0.0-alpha1") && alias_exists?
22
+ if alias_exists?
19
23
  # can't call delete directly on aliases in ES 6
20
24
  indices = client.indices.get_alias(name: name).keys
21
25
  client.indices.delete index: indices
@@ -45,16 +49,33 @@ module Searchkick
45
49
  end
46
50
 
47
51
  def refresh_interval
48
- settings.values.first["settings"]["index"]["refresh_interval"]
52
+ index_settings["refresh_interval"]
49
53
  end
50
54
 
51
55
  def update_settings(settings)
52
56
  client.indices.put_settings index: name, body: settings
53
57
  end
54
58
 
59
+ def tokens(text, options = {})
60
+ client.indices.analyze(body: {text: text}.merge(options), index: name)["tokens"].map { |t| t["token"] }
61
+ end
62
+
63
+ def total_docs
64
+ response =
65
+ client.search(
66
+ index: name,
67
+ body: {
68
+ query: {match_all: {}},
69
+ size: 0
70
+ }
71
+ )
72
+
73
+ Searchkick::Results.new(nil, response).total_count
74
+ end
75
+
55
76
  def promote(new_name, update_refresh_interval: false)
56
77
  if update_refresh_interval
57
- new_index = Searchkick::Index.new(new_name)
78
+ new_index = Searchkick::Index.new(new_name, @options)
58
79
  settings = options[:settings] || {}
59
80
  refresh_interval = (settings[:index] && settings[:index][:refresh_interval]) || "1s"
60
81
  new_index.update_settings(index: {refresh_interval: refresh_interval})
@@ -71,74 +92,73 @@ module Searchkick
71
92
  end
72
93
  alias_method :swap, :promote
73
94
 
95
+ def retrieve(record)
96
+ record_data = RecordData.new(self, record).record_data
97
+
98
+ # remove underscore
99
+ get_options = Hash[record_data.map { |k, v| [k.to_s.sub(/\A_/, "").to_sym, v] }]
100
+
101
+ client.get(get_options)["_source"]
102
+ end
103
+
104
+ def all_indices(unaliased: false)
105
+ indices =
106
+ begin
107
+ if client.indices.respond_to?(:get_alias)
108
+ client.indices.get_alias
109
+ else
110
+ client.indices.get_aliases
111
+ end
112
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
113
+ {}
114
+ end
115
+ indices = indices.select { |_k, v| v.empty? || v["aliases"].empty? } if unaliased
116
+ indices.select { |k, _v| k =~ /\A#{Regexp.escape(name)}_\d{14,17}\z/ }.keys
117
+ end
118
+
119
+ # remove old indices that start w/ index_name
120
+ def clean_indices
121
+ indices = all_indices(unaliased: true)
122
+ indices.each do |index|
123
+ Searchkick::Index.new(index).delete
124
+ end
125
+ indices
126
+ end
127
+
74
128
  # record based
75
129
  # use helpers for notifications
76
130
 
77
131
  def store(record)
78
- bulk_index_helper([record])
132
+ bulk_indexer.bulk_index([record])
79
133
  end
80
134
 
81
135
  def remove(record)
82
- bulk_delete_helper([record])
136
+ bulk_indexer.bulk_delete([record])
83
137
  end
84
138
 
85
139
  def update_record(record, method_name)
86
- bulk_update_helper([record], method_name)
140
+ bulk_indexer.bulk_update([record], method_name)
87
141
  end
88
142
 
89
143
  def bulk_delete(records)
90
- bulk_delete_helper(records)
144
+ bulk_indexer.bulk_delete(records)
91
145
  end
92
146
 
93
147
  def bulk_index(records)
94
- bulk_index_helper(records)
148
+ bulk_indexer.bulk_index(records)
95
149
  end
96
150
  alias_method :import, :bulk_index
97
151
 
98
152
  def bulk_update(records, method_name)
99
- bulk_update_helper(records, method_name)
100
- end
101
-
102
- def record_data(r)
103
- data = {
104
- _index: name,
105
- _id: search_id(r),
106
- _type: document_type(r)
107
- }
108
- data[:_routing] = r.search_routing if r.respond_to?(:search_routing)
109
- data
110
- end
111
-
112
- def retrieve(record)
113
- client.get(
114
- index: name,
115
- type: document_type(record),
116
- id: search_id(record)
117
- )["_source"]
153
+ bulk_indexer.bulk_update(records, method_name)
118
154
  end
119
155
 
120
- def reindex_record(record)
121
- if record.destroyed? || !record.should_index?
122
- begin
123
- remove(record)
124
- rescue Elasticsearch::Transport::Transport::Errors::NotFound
125
- # do nothing
126
- end
127
- else
128
- store(record)
129
- end
156
+ def search_id(record)
157
+ RecordData.new(self, record).search_id
130
158
  end
131
159
 
132
- def reindex_record_async(record)
133
- if Searchkick.callbacks_value.nil?
134
- if defined?(Searchkick::ReindexV2Job)
135
- Searchkick::ReindexV2Job.perform_later(record.class.name, record.id.to_s)
136
- else
137
- raise Searchkick::Error, "Active Job not found"
138
- end
139
- else
140
- reindex_record(record)
141
- end
160
+ def document_type(record)
161
+ RecordData.new(self, record).document_type
142
162
  end
143
163
 
144
164
  def similar_record(record, **options)
@@ -146,15 +166,25 @@ module Searchkick
146
166
  .keep_if { |k, _| !options[:fields] || options[:fields].map(&:to_s).include?(k) }
147
167
  .values.compact.join(" ")
148
168
 
149
- # TODO deep merge method
150
169
  options[:where] ||= {}
151
170
  options[:where][:_id] ||= {}
152
- options[:where][:_id][:not] = record.id.to_s
171
+ options[:where][:_id][:not] = Array(options[:where][:_id][:not]) + [record.id.to_s]
153
172
  options[:per_page] ||= 10
154
173
  options[:similar] = true
155
174
 
156
175
  # TODO use index class instead of record class
157
- search_model(record.class, like_text, options)
176
+ Searchkick.search(like_text, model: record.class, **options)
177
+ end
178
+
179
+ def reload_synonyms
180
+ require "elasticsearch/xpack"
181
+ raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0")
182
+ raise Error, "Requires elasticsearch-xpack 7.8+" unless client.xpack.respond_to?(:indices)
183
+ begin
184
+ client.xpack.indices.reload_search_analyzers(index: name)
185
+ rescue Elasticsearch::Transport::Transport::Errors::MethodNotAllowed
186
+ raise Error, "Requires non-OSS version of Elasticsearch"
187
+ end
158
188
  end
159
189
 
160
190
  # queue
@@ -163,21 +193,34 @@ module Searchkick
163
193
  Searchkick::ReindexQueue.new(name)
164
194
  end
165
195
 
166
- # search
196
+ # reindex
197
+
198
+ def reindex(relation, method_name, scoped:, full: false, scope: nil, **options)
199
+ refresh = options.fetch(:refresh, !scoped)
200
+ options.delete(:refresh)
167
201
 
168
- # TODO remove in next major version
169
- def search_model(searchkick_klass, term = "*", **options, &block)
170
- query = Searchkick::Query.new(searchkick_klass, term, options)
171
- yield(query.body) if block
172
- if options[:execute] == false
173
- query
202
+ if method_name
203
+ # TODO throw ArgumentError
204
+ Searchkick.warn("unsupported keywords: #{options.keys.map(&:inspect).join(", ")}") if options.any?
205
+
206
+ # update
207
+ import_scope(relation, method_name: method_name, scope: scope)
208
+ self.refresh if refresh
209
+ true
210
+ elsif scoped && !full
211
+ # TODO throw ArgumentError
212
+ Searchkick.warn("unsupported keywords: #{options.keys.map(&:inspect).join(", ")}") if options.any?
213
+
214
+ # reindex association
215
+ import_scope(relation, scope: scope)
216
+ self.refresh if refresh
217
+ true
174
218
  else
175
- query.execute
219
+ # full reindex
220
+ reindex_scope(relation, scope: scope, **options)
176
221
  end
177
222
  end
178
223
 
179
- # reindex
180
-
181
224
  def create_index(index_options: nil)
182
225
  index_options ||= self.index_options
183
226
  index = Searchkick::Index.new("#{name}_#{Time.now.strftime('%Y%m%d%H%M%S%L')}", @options)
@@ -185,62 +228,102 @@ module Searchkick
185
228
  index
186
229
  end
187
230
 
188
- def all_indices(unaliased: false)
189
- indices =
190
- begin
191
- client.indices.get_aliases
192
- rescue Elasticsearch::Transport::Transport::Errors::NotFound
193
- {}
231
+ def import_scope(relation, **options)
232
+ bulk_indexer.import_scope(relation, **options)
233
+ end
234
+
235
+ def batches_left
236
+ bulk_indexer.batches_left
237
+ end
238
+
239
+ # other
240
+
241
+ def klass_document_type(klass, ignore_type = false)
242
+ @klass_document_type[[klass, ignore_type]] ||= begin
243
+ if !ignore_type && klass.searchkick_klass.searchkick_options[:_type]
244
+ type = klass.searchkick_klass.searchkick_options[:_type]
245
+ type = type.call if type.respond_to?(:call)
246
+ type
247
+ else
248
+ klass.model_name.to_s.underscore
194
249
  end
195
- indices = indices.select { |_k, v| v.empty? || v["aliases"].empty? } if unaliased
196
- indices.select { |k, _v| k =~ /\A#{Regexp.escape(name)}_\d{14,17}\z/ }.keys
250
+ end
197
251
  end
198
252
 
199
- # remove old indices that start w/ index_name
200
- def clean_indices
201
- indices = all_indices(unaliased: true)
202
- indices.each do |index|
203
- Searchkick::Index.new(index).delete
253
+ # should not be public
254
+ def conversions_fields
255
+ @conversions_fields ||= begin
256
+ conversions = Array(options[:conversions])
257
+ conversions.map(&:to_s) + conversions.map(&:to_sym)
204
258
  end
205
- indices
206
259
  end
207
260
 
208
- def total_docs
209
- response =
210
- client.search(
211
- index: name,
212
- body: {
213
- query: {match_all: {}},
214
- size: 0
215
- }
216
- )
261
+ def suggest_fields
262
+ @suggest_fields ||= Array(options[:suggest]).map(&:to_s)
263
+ end
264
+
265
+ def locations_fields
266
+ @locations_fields ||= begin
267
+ locations = Array(options[:locations])
268
+ locations.map(&:to_s) + locations.map(&:to_sym)
269
+ end
270
+ end
217
271
 
218
- response["hits"]["total"]
272
+ # private
273
+ def uuid
274
+ index_settings["uuid"]
275
+ end
276
+
277
+ protected
278
+
279
+ def client
280
+ Searchkick.client
281
+ end
282
+
283
+ def bulk_indexer
284
+ @bulk_indexer ||= BulkIndexer.new(self)
285
+ end
286
+
287
+ def index_settings
288
+ settings.values.first["settings"]["index"]
289
+ end
290
+
291
+ def import_before_promotion(index, relation, **import_options)
292
+ index.import_scope(relation, **import_options)
219
293
  end
220
294
 
221
295
  # https://gist.github.com/jarosan/3124884
222
296
  # http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
223
- def reindex_scope(scope, import: true, resume: false, retain: false, async: false, refresh_interval: nil)
297
+ def reindex_scope(relation, import: true, resume: false, retain: false, async: false, refresh_interval: nil, scope: nil)
224
298
  if resume
225
299
  index_name = all_indices.sort.last
226
300
  raise Searchkick::Error, "No index to resume" unless index_name
227
- index = Searchkick::Index.new(index_name)
301
+ index = Searchkick::Index.new(index_name, @options)
228
302
  else
229
303
  clean_indices unless retain
230
304
 
231
- index_options = scope.searchkick_index_options
305
+ index_options = relation.searchkick_index_options
232
306
  index_options.deep_merge!(settings: {index: {refresh_interval: refresh_interval}}) if refresh_interval
233
307
  index = create_index(index_options: index_options)
234
308
  end
235
309
 
310
+ import_options = {
311
+ resume: resume,
312
+ async: async,
313
+ full: true,
314
+ scope: scope
315
+ }
316
+
317
+ uuid = index.uuid
318
+
236
319
  # check if alias exists
237
320
  alias_exists = alias_exists?
238
321
  if alias_exists
239
- # import before promotion
240
- index.import_scope(scope, resume: resume, async: async, full: true) if import
322
+ import_before_promotion(index, relation, **import_options) if import
241
323
 
242
324
  # get existing indices to remove
243
325
  unless async
326
+ check_uuid(uuid, index.uuid)
244
327
  promote(index.name, update_refresh_interval: !refresh_interval.nil?)
245
328
  clean_indices unless retain
246
329
  end
@@ -249,7 +332,7 @@ module Searchkick
249
332
  promote(index.name, update_refresh_interval: !refresh_interval.nil?)
250
333
 
251
334
  # import after promotion
252
- index.import_scope(scope, resume: resume, async: async, full: true) if import
335
+ index.import_scope(relation, **import_options) if import
253
336
  end
254
337
 
255
338
  if async
@@ -265,6 +348,7 @@ module Searchkick
265
348
  # already promoted if alias didn't exist
266
349
  if alias_exists
267
350
  puts "Jobs complete. Promoting..."
351
+ check_uuid(uuid, index.uuid)
268
352
  promote(index.name, update_refresh_interval: !refresh_interval.nil?)
269
353
  end
270
354
  clean_indices unless retain
@@ -276,267 +360,22 @@ module Searchkick
276
360
  index.refresh
277
361
  true
278
362
  end
279
- end
280
-
281
- def import_scope(scope, resume: false, method_name: nil, async: false, batch: false, batch_id: nil, full: false)
282
- # use scope for import
283
- scope = scope.search_import if scope.respond_to?(:search_import)
284
-
285
- if batch
286
- import_or_update scope.to_a, method_name, async
287
- Searchkick.with_redis { |r| r.srem(batches_key, batch_id) } if batch_id
288
- elsif full && async
289
- full_reindex_async(scope)
290
- elsif scope.respond_to?(:find_in_batches)
291
- if resume
292
- # use total docs instead of max id since there's not a great way
293
- # to get the max _id without scripting since it's a string
294
-
295
- # TODO use primary key and prefix with table name
296
- scope = scope.where("id > ?", total_docs)
297
- end
298
-
299
- scope = scope.select("id").except(:includes, :preload) if async
300
-
301
- scope.find_in_batches batch_size: batch_size do |items|
302
- import_or_update items, method_name, async
303
- end
304
- else
305
- each_batch(scope) do |items|
306
- import_or_update items, method_name, async
307
- end
308
- end
309
- end
310
-
311
- def batches_left
312
- Searchkick.with_redis { |r| r.scard(batches_key) }
313
- end
314
-
315
- # other
316
-
317
- def tokens(text, options = {})
318
- client.indices.analyze(body: {text: text}.merge(options), index: name)["tokens"].map { |t| t["token"] }
319
- end
320
-
321
- def klass_document_type(klass)
322
- @klass_document_type[klass] ||= begin
323
- if klass.respond_to?(:document_type)
324
- klass.document_type
325
- else
326
- klass.model_name.to_s.underscore
327
- end
328
- end
329
- end
330
-
331
- protected
332
-
333
- def client
334
- Searchkick.client
335
- end
336
-
337
- def document_type(record)
338
- if record.respond_to?(:search_document_type)
339
- record.search_document_type
340
- else
341
- klass_document_type(record.class)
342
- end
343
- end
344
-
345
- def search_id(record)
346
- id = record.respond_to?(:search_document_id) ? record.search_document_id : record.id
347
- id.is_a?(Numeric) ? id : id.to_s
348
- end
349
-
350
- EXCLUDED_ATTRIBUTES = ["_id", "_type"]
351
-
352
- def search_data(record, method_name = nil)
353
- partial_reindex = !method_name.nil?
354
- options = record.class.searchkick_options
355
-
356
- # remove _id since search_id is used instead
357
- source = record.send(method_name || :search_data).each_with_object({}) { |(k, v), memo| memo[k.to_s] = v; memo }.except(*EXCLUDED_ATTRIBUTES)
358
-
359
- # conversions
360
- if options[:conversions]
361
- Array(options[:conversions]).map(&:to_s).each do |conversions_field|
362
- if source[conversions_field]
363
- source[conversions_field] = source[conversions_field].map { |k, v| {query: k, count: v} }
364
- end
365
- end
366
- end
367
-
368
- # hack to prevent generator field doesn't exist error
369
- if options[:suggest]
370
- options[:suggest].map(&:to_s).each do |field|
371
- source[field] = nil if !source[field] && !partial_reindex
372
- end
373
- end
374
-
375
- # locations
376
- if options[:locations]
377
- options[:locations].map(&:to_s).each do |field|
378
- if source[field]
379
- if !source[field].is_a?(Hash) && (source[field].first.is_a?(Array) || source[field].first.is_a?(Hash))
380
- # multiple locations
381
- source[field] = source[field].map { |a| location_value(a) }
382
- else
383
- source[field] = location_value(source[field])
384
- end
385
- end
386
- end
363
+ rescue Elasticsearch::Transport::Transport::Errors::BadRequest => e
364
+ if e.message.include?("No handler for type [text]")
365
+ raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 5 or greater"
387
366
  end
388
367
 
389
- cast_big_decimal(source)
390
-
391
- source
368
+ raise e
392
369
  end
393
370
 
394
- def location_value(value)
395
- if value.is_a?(Array)
396
- value.map(&:to_f).reverse
397
- elsif value.is_a?(Hash)
398
- {lat: value[:lat].to_f, lon: value[:lon].to_f}
399
- else
400
- value
401
- end
402
- end
403
-
404
- # change all BigDecimal values to floats due to
405
- # https://github.com/rails/rails/issues/6033
406
- # possible loss of precision :/
407
- def cast_big_decimal(obj)
408
- case obj
409
- when BigDecimal
410
- obj.to_f
411
- when Hash
412
- obj.each do |k, v|
413
- obj[k] = cast_big_decimal(v)
414
- end
415
- when Enumerable
416
- obj.map do |v|
417
- cast_big_decimal(v)
418
- end
419
- else
420
- obj
371
+ # safety check
372
+ # still a chance for race condition since its called before promotion
373
+ # ideal is for user to disable automatic index creation
374
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#index-creation
375
+ def check_uuid(old_uuid, new_uuid)
376
+ if old_uuid != new_uuid
377
+ raise Searchkick::Error, "Safety check failed - only run one Model.reindex per model at a time"
421
378
  end
422
379
  end
423
-
424
- def import_or_update(records, method_name, async)
425
- if records.any?
426
- if async
427
- Searchkick::BulkReindexJob.perform_later(
428
- class_name: records.first.class.name,
429
- record_ids: records.map(&:id),
430
- index_name: name,
431
- method_name: method_name ? method_name.to_s : nil
432
- )
433
- else
434
- records = records.select(&:should_index?)
435
- if records.any?
436
- with_retries do
437
- method_name ? bulk_update(records, method_name) : import(records)
438
- end
439
- end
440
- end
441
- end
442
- end
443
-
444
- def full_reindex_async(scope)
445
- if scope.respond_to?(:primary_key)
446
- # TODO expire Redis key
447
- primary_key = scope.primary_key
448
-
449
- starting_id =
450
- begin
451
- scope.minimum(primary_key)
452
- rescue ActiveRecord::StatementInvalid
453
- false
454
- end
455
-
456
- if starting_id.nil?
457
- # no records, do nothing
458
- elsif starting_id.is_a?(Numeric)
459
- max_id = scope.maximum(primary_key)
460
- batches_count = ((max_id - starting_id + 1) / batch_size.to_f).ceil
461
-
462
- batches_count.times do |i|
463
- batch_id = i + 1
464
- min_id = starting_id + (i * batch_size)
465
- bulk_reindex_job scope, batch_id, min_id: min_id, max_id: min_id + batch_size - 1
466
- end
467
- else
468
- scope.find_in_batches(batch_size: batch_size).each_with_index do |batch, i|
469
- batch_id = i + 1
470
-
471
- bulk_reindex_job scope, batch_id, record_ids: batch.map { |record| record.id.to_s }
472
- end
473
- end
474
- else
475
- batch_id = 1
476
- # TODO remove any eager loading
477
- scope = scope.only(:_id) if scope.respond_to?(:only)
478
- each_batch(scope) do |items|
479
- bulk_reindex_job scope, batch_id, record_ids: items.map { |i| i.id.to_s }
480
- batch_id += 1
481
- end
482
- end
483
- end
484
-
485
- def each_batch(scope)
486
- # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
487
- # use cursor for Mongoid
488
- items = []
489
- scope.all.each do |item|
490
- items << item
491
- if items.length == batch_size
492
- yield items
493
- items = []
494
- end
495
- end
496
- yield items if items.any?
497
- end
498
-
499
- def bulk_reindex_job(scope, batch_id, options)
500
- Searchkick::BulkReindexJob.perform_later({
501
- class_name: scope.model_name.name,
502
- index_name: name,
503
- batch_id: batch_id
504
- }.merge(options))
505
- Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
506
- end
507
-
508
- def batch_size
509
- @batch_size ||= @options[:batch_size] || 1000
510
- end
511
-
512
- def with_retries
513
- retries = 0
514
-
515
- begin
516
- yield
517
- rescue Faraday::ClientError => e
518
- if retries < 1
519
- retries += 1
520
- retry
521
- end
522
- raise e
523
- end
524
- end
525
-
526
- def bulk_index_helper(records)
527
- Searchkick.indexer.queue(records.map { |r| {index: record_data(r).merge(data: search_data(r))} })
528
- end
529
-
530
- def bulk_delete_helper(records)
531
- Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| {delete: record_data(r)} })
532
- end
533
-
534
- def bulk_update_helper(records, method_name)
535
- Searchkick.indexer.queue(records.map { |r| {update: record_data(r).merge(data: {doc: search_data(r, method_name)})} })
536
- end
537
-
538
- def batches_key
539
- "searchkick:reindex:#{name}:batches"
540
- end
541
380
  end
542
381
  end