searchkick 2.3.2 → 4.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +251 -84
  3. data/LICENSE.txt +1 -1
  4. data/README.md +552 -432
  5. data/lib/searchkick/bulk_indexer.rb +173 -0
  6. data/lib/searchkick/bulk_reindex_job.rb +2 -2
  7. data/lib/searchkick/hash_wrapper.rb +12 -0
  8. data/lib/searchkick/index.rb +187 -348
  9. data/lib/searchkick/index_options.rb +494 -282
  10. data/lib/searchkick/logging.rb +17 -13
  11. data/lib/searchkick/model.rb +52 -97
  12. data/lib/searchkick/multi_search.rb +9 -10
  13. data/lib/searchkick/process_batch_job.rb +17 -4
  14. data/lib/searchkick/process_queue_job.rb +20 -12
  15. data/lib/searchkick/query.rb +415 -199
  16. data/lib/searchkick/railtie.rb +7 -0
  17. data/lib/searchkick/record_data.rb +128 -0
  18. data/lib/searchkick/record_indexer.rb +79 -0
  19. data/lib/searchkick/reindex_queue.rb +1 -1
  20. data/lib/searchkick/reindex_v2_job.rb +14 -12
  21. data/lib/searchkick/results.rb +135 -41
  22. data/lib/searchkick/version.rb +1 -1
  23. data/lib/searchkick.rb +130 -61
  24. data/lib/tasks/searchkick.rake +34 -0
  25. metadata +18 -162
  26. data/.gitignore +0 -22
  27. data/.travis.yml +0 -39
  28. data/Gemfile +0 -16
  29. data/Rakefile +0 -20
  30. data/benchmark/Gemfile +0 -23
  31. data/benchmark/benchmark.rb +0 -97
  32. data/lib/searchkick/tasks.rb +0 -33
  33. data/searchkick.gemspec +0 -28
  34. data/test/aggs_test.rb +0 -197
  35. data/test/autocomplete_test.rb +0 -75
  36. data/test/boost_test.rb +0 -202
  37. data/test/callbacks_test.rb +0 -59
  38. data/test/ci/before_install.sh +0 -17
  39. data/test/errors_test.rb +0 -19
  40. data/test/gemfiles/activerecord31.gemfile +0 -7
  41. data/test/gemfiles/activerecord32.gemfile +0 -7
  42. data/test/gemfiles/activerecord40.gemfile +0 -8
  43. data/test/gemfiles/activerecord41.gemfile +0 -8
  44. data/test/gemfiles/activerecord42.gemfile +0 -7
  45. data/test/gemfiles/activerecord50.gemfile +0 -7
  46. data/test/gemfiles/apartment.gemfile +0 -8
  47. data/test/gemfiles/cequel.gemfile +0 -8
  48. data/test/gemfiles/mongoid2.gemfile +0 -7
  49. data/test/gemfiles/mongoid3.gemfile +0 -6
  50. data/test/gemfiles/mongoid4.gemfile +0 -7
  51. data/test/gemfiles/mongoid5.gemfile +0 -7
  52. data/test/gemfiles/mongoid6.gemfile +0 -12
  53. data/test/gemfiles/nobrainer.gemfile +0 -8
  54. data/test/gemfiles/parallel_tests.gemfile +0 -8
  55. data/test/geo_shape_test.rb +0 -175
  56. data/test/highlight_test.rb +0 -78
  57. data/test/index_test.rb +0 -166
  58. data/test/inheritance_test.rb +0 -83
  59. data/test/marshal_test.rb +0 -8
  60. data/test/match_test.rb +0 -276
  61. data/test/misspellings_test.rb +0 -56
  62. data/test/model_test.rb +0 -42
  63. data/test/multi_search_test.rb +0 -36
  64. data/test/multi_tenancy_test.rb +0 -22
  65. data/test/order_test.rb +0 -46
  66. data/test/pagination_test.rb +0 -70
  67. data/test/partial_reindex_test.rb +0 -58
  68. data/test/query_test.rb +0 -35
  69. data/test/records_test.rb +0 -10
  70. data/test/reindex_test.rb +0 -64
  71. data/test/reindex_v2_job_test.rb +0 -32
  72. data/test/routing_test.rb +0 -23
  73. data/test/should_index_test.rb +0 -32
  74. data/test/similar_test.rb +0 -28
  75. data/test/sql_test.rb +0 -214
  76. data/test/suggest_test.rb +0 -95
  77. data/test/support/kaminari.yml +0 -21
  78. data/test/synonyms_test.rb +0 -67
  79. data/test/test_helper.rb +0 -567
  80. data/test/where_test.rb +0 -223
@@ -1,7 +1,7 @@
1
+ require "searchkick/index_options"
2
+
1
3
  module Searchkick
2
4
  class Index
3
- include IndexOptions
4
-
5
5
  attr_reader :name, :options
6
6
 
7
7
  def initialize(name, options = {})
@@ -10,12 +10,16 @@ module Searchkick
10
10
  @klass_document_type = {} # cache
11
11
  end
12
12
 
13
+ def index_options
14
+ IndexOptions.new(self).index_options
15
+ end
16
+
13
17
  def create(body = {})
14
18
  client.indices.create index: name, body: body
15
19
  end
16
20
 
17
21
  def delete
18
- if !Searchkick.server_below?("6.0.0-alpha1") && alias_exists?
22
+ if alias_exists?
19
23
  # can't call delete directly on aliases in ES 6
20
24
  indices = client.indices.get_alias(name: name).keys
21
25
  client.indices.delete index: indices
@@ -45,16 +49,33 @@ module Searchkick
45
49
  end
46
50
 
47
51
  def refresh_interval
48
- settings.values.first["settings"]["index"]["refresh_interval"]
52
+ index_settings["refresh_interval"]
49
53
  end
50
54
 
51
55
  def update_settings(settings)
52
56
  client.indices.put_settings index: name, body: settings
53
57
  end
54
58
 
59
+ def tokens(text, options = {})
60
+ client.indices.analyze(body: {text: text}.merge(options), index: name)["tokens"].map { |t| t["token"] }
61
+ end
62
+
63
+ def total_docs
64
+ response =
65
+ client.search(
66
+ index: name,
67
+ body: {
68
+ query: {match_all: {}},
69
+ size: 0
70
+ }
71
+ )
72
+
73
+ Searchkick::Results.new(nil, response).total_count
74
+ end
75
+
55
76
  def promote(new_name, update_refresh_interval: false)
56
77
  if update_refresh_interval
57
- new_index = Searchkick::Index.new(new_name)
78
+ new_index = Searchkick::Index.new(new_name, @options)
58
79
  settings = options[:settings] || {}
59
80
  refresh_interval = (settings[:index] && settings[:index][:refresh_interval]) || "1s"
60
81
  new_index.update_settings(index: {refresh_interval: refresh_interval})
@@ -71,74 +92,73 @@ module Searchkick
71
92
  end
72
93
  alias_method :swap, :promote
73
94
 
95
+ def retrieve(record)
96
+ record_data = RecordData.new(self, record).record_data
97
+
98
+ # remove underscore
99
+ get_options = Hash[record_data.map { |k, v| [k.to_s.sub(/\A_/, "").to_sym, v] }]
100
+
101
+ client.get(get_options)["_source"]
102
+ end
103
+
104
+ def all_indices(unaliased: false)
105
+ indices =
106
+ begin
107
+ if client.indices.respond_to?(:get_alias)
108
+ client.indices.get_alias
109
+ else
110
+ client.indices.get_aliases
111
+ end
112
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
113
+ {}
114
+ end
115
+ indices = indices.select { |_k, v| v.empty? || v["aliases"].empty? } if unaliased
116
+ indices.select { |k, _v| k =~ /\A#{Regexp.escape(name)}_\d{14,17}\z/ }.keys
117
+ end
118
+
119
+ # remove old indices that start w/ index_name
120
+ def clean_indices
121
+ indices = all_indices(unaliased: true)
122
+ indices.each do |index|
123
+ Searchkick::Index.new(index).delete
124
+ end
125
+ indices
126
+ end
127
+
74
128
  # record based
75
129
  # use helpers for notifications
76
130
 
77
131
  def store(record)
78
- bulk_index_helper([record])
132
+ bulk_indexer.bulk_index([record])
79
133
  end
80
134
 
81
135
  def remove(record)
82
- bulk_delete_helper([record])
136
+ bulk_indexer.bulk_delete([record])
83
137
  end
84
138
 
85
139
  def update_record(record, method_name)
86
- bulk_update_helper([record], method_name)
140
+ bulk_indexer.bulk_update([record], method_name)
87
141
  end
88
142
 
89
143
  def bulk_delete(records)
90
- bulk_delete_helper(records)
144
+ bulk_indexer.bulk_delete(records)
91
145
  end
92
146
 
93
147
  def bulk_index(records)
94
- bulk_index_helper(records)
148
+ bulk_indexer.bulk_index(records)
95
149
  end
96
150
  alias_method :import, :bulk_index
97
151
 
98
152
  def bulk_update(records, method_name)
99
- bulk_update_helper(records, method_name)
100
- end
101
-
102
- def record_data(r)
103
- data = {
104
- _index: name,
105
- _id: search_id(r),
106
- _type: document_type(r)
107
- }
108
- data[:_routing] = r.search_routing if r.respond_to?(:search_routing)
109
- data
110
- end
111
-
112
- def retrieve(record)
113
- client.get(
114
- index: name,
115
- type: document_type(record),
116
- id: search_id(record)
117
- )["_source"]
153
+ bulk_indexer.bulk_update(records, method_name)
118
154
  end
119
155
 
120
- def reindex_record(record)
121
- if record.destroyed? || !record.should_index?
122
- begin
123
- remove(record)
124
- rescue Elasticsearch::Transport::Transport::Errors::NotFound
125
- # do nothing
126
- end
127
- else
128
- store(record)
129
- end
156
+ def search_id(record)
157
+ RecordData.new(self, record).search_id
130
158
  end
131
159
 
132
- def reindex_record_async(record)
133
- if Searchkick.callbacks_value.nil?
134
- if defined?(Searchkick::ReindexV2Job)
135
- Searchkick::ReindexV2Job.perform_later(record.class.name, record.id.to_s)
136
- else
137
- raise Searchkick::Error, "Active Job not found"
138
- end
139
- else
140
- reindex_record(record)
141
- end
160
+ def document_type(record)
161
+ RecordData.new(self, record).document_type
142
162
  end
143
163
 
144
164
  def similar_record(record, **options)
@@ -146,15 +166,25 @@ module Searchkick
146
166
  .keep_if { |k, _| !options[:fields] || options[:fields].map(&:to_s).include?(k) }
147
167
  .values.compact.join(" ")
148
168
 
149
- # TODO deep merge method
150
169
  options[:where] ||= {}
151
170
  options[:where][:_id] ||= {}
152
- options[:where][:_id][:not] = record.id.to_s
171
+ options[:where][:_id][:not] = Array(options[:where][:_id][:not]) + [record.id.to_s]
153
172
  options[:per_page] ||= 10
154
173
  options[:similar] = true
155
174
 
156
175
  # TODO use index class instead of record class
157
- search_model(record.class, like_text, options)
176
+ Searchkick.search(like_text, model: record.class, **options)
177
+ end
178
+
179
+ def reload_synonyms
180
+ require "elasticsearch/xpack"
181
+ raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0")
182
+ raise Error, "Requires elasticsearch-xpack 7.8+" unless client.xpack.respond_to?(:indices)
183
+ begin
184
+ client.xpack.indices.reload_search_analyzers(index: name)
185
+ rescue Elasticsearch::Transport::Transport::Errors::MethodNotAllowed
186
+ raise Error, "Requires non-OSS version of Elasticsearch"
187
+ end
158
188
  end
159
189
 
160
190
  # queue
@@ -163,21 +193,34 @@ module Searchkick
163
193
  Searchkick::ReindexQueue.new(name)
164
194
  end
165
195
 
166
- # search
196
+ # reindex
197
+
198
+ def reindex(relation, method_name, scoped:, full: false, scope: nil, **options)
199
+ refresh = options.fetch(:refresh, !scoped)
200
+ options.delete(:refresh)
167
201
 
168
- # TODO remove in next major version
169
- def search_model(searchkick_klass, term = "*", **options, &block)
170
- query = Searchkick::Query.new(searchkick_klass, term, options)
171
- yield(query.body) if block
172
- if options[:execute] == false
173
- query
202
+ if method_name
203
+ # TODO throw ArgumentError
204
+ Searchkick.warn("unsupported keywords: #{options.keys.map(&:inspect).join(", ")}") if options.any?
205
+
206
+ # update
207
+ import_scope(relation, method_name: method_name, scope: scope)
208
+ self.refresh if refresh
209
+ true
210
+ elsif scoped && !full
211
+ # TODO throw ArgumentError
212
+ Searchkick.warn("unsupported keywords: #{options.keys.map(&:inspect).join(", ")}") if options.any?
213
+
214
+ # reindex association
215
+ import_scope(relation, scope: scope)
216
+ self.refresh if refresh
217
+ true
174
218
  else
175
- query.execute
219
+ # full reindex
220
+ reindex_scope(relation, scope: scope, **options)
176
221
  end
177
222
  end
178
223
 
179
- # reindex
180
-
181
224
  def create_index(index_options: nil)
182
225
  index_options ||= self.index_options
183
226
  index = Searchkick::Index.new("#{name}_#{Time.now.strftime('%Y%m%d%H%M%S%L')}", @options)
@@ -185,62 +228,102 @@ module Searchkick
185
228
  index
186
229
  end
187
230
 
188
- def all_indices(unaliased: false)
189
- indices =
190
- begin
191
- client.indices.get_aliases
192
- rescue Elasticsearch::Transport::Transport::Errors::NotFound
193
- {}
231
+ def import_scope(relation, **options)
232
+ bulk_indexer.import_scope(relation, **options)
233
+ end
234
+
235
+ def batches_left
236
+ bulk_indexer.batches_left
237
+ end
238
+
239
+ # other
240
+
241
+ def klass_document_type(klass, ignore_type = false)
242
+ @klass_document_type[[klass, ignore_type]] ||= begin
243
+ if !ignore_type && klass.searchkick_klass.searchkick_options[:_type]
244
+ type = klass.searchkick_klass.searchkick_options[:_type]
245
+ type = type.call if type.respond_to?(:call)
246
+ type
247
+ else
248
+ klass.model_name.to_s.underscore
194
249
  end
195
- indices = indices.select { |_k, v| v.empty? || v["aliases"].empty? } if unaliased
196
- indices.select { |k, _v| k =~ /\A#{Regexp.escape(name)}_\d{14,17}\z/ }.keys
250
+ end
197
251
  end
198
252
 
199
- # remove old indices that start w/ index_name
200
- def clean_indices
201
- indices = all_indices(unaliased: true)
202
- indices.each do |index|
203
- Searchkick::Index.new(index).delete
253
+ # should not be public
254
+ def conversions_fields
255
+ @conversions_fields ||= begin
256
+ conversions = Array(options[:conversions])
257
+ conversions.map(&:to_s) + conversions.map(&:to_sym)
204
258
  end
205
- indices
206
259
  end
207
260
 
208
- def total_docs
209
- response =
210
- client.search(
211
- index: name,
212
- body: {
213
- query: {match_all: {}},
214
- size: 0
215
- }
216
- )
261
+ def suggest_fields
262
+ @suggest_fields ||= Array(options[:suggest]).map(&:to_s)
263
+ end
264
+
265
+ def locations_fields
266
+ @locations_fields ||= begin
267
+ locations = Array(options[:locations])
268
+ locations.map(&:to_s) + locations.map(&:to_sym)
269
+ end
270
+ end
217
271
 
218
- response["hits"]["total"]
272
+ # private
273
+ def uuid
274
+ index_settings["uuid"]
275
+ end
276
+
277
+ protected
278
+
279
+ def client
280
+ Searchkick.client
281
+ end
282
+
283
+ def bulk_indexer
284
+ @bulk_indexer ||= BulkIndexer.new(self)
285
+ end
286
+
287
+ def index_settings
288
+ settings.values.first["settings"]["index"]
289
+ end
290
+
291
+ def import_before_promotion(index, relation, **import_options)
292
+ index.import_scope(relation, **import_options)
219
293
  end
220
294
 
221
295
  # https://gist.github.com/jarosan/3124884
222
296
  # http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
223
- def reindex_scope(scope, import: true, resume: false, retain: false, async: false, refresh_interval: nil)
297
+ def reindex_scope(relation, import: true, resume: false, retain: false, async: false, refresh_interval: nil, scope: nil)
224
298
  if resume
225
299
  index_name = all_indices.sort.last
226
300
  raise Searchkick::Error, "No index to resume" unless index_name
227
- index = Searchkick::Index.new(index_name)
301
+ index = Searchkick::Index.new(index_name, @options)
228
302
  else
229
303
  clean_indices unless retain
230
304
 
231
- index_options = scope.searchkick_index_options
305
+ index_options = relation.searchkick_index_options
232
306
  index_options.deep_merge!(settings: {index: {refresh_interval: refresh_interval}}) if refresh_interval
233
307
  index = create_index(index_options: index_options)
234
308
  end
235
309
 
310
+ import_options = {
311
+ resume: resume,
312
+ async: async,
313
+ full: true,
314
+ scope: scope
315
+ }
316
+
317
+ uuid = index.uuid
318
+
236
319
  # check if alias exists
237
320
  alias_exists = alias_exists?
238
321
  if alias_exists
239
- # import before promotion
240
- index.import_scope(scope, resume: resume, async: async, full: true) if import
322
+ import_before_promotion(index, relation, **import_options) if import
241
323
 
242
324
  # get existing indices to remove
243
325
  unless async
326
+ check_uuid(uuid, index.uuid)
244
327
  promote(index.name, update_refresh_interval: !refresh_interval.nil?)
245
328
  clean_indices unless retain
246
329
  end
@@ -249,7 +332,7 @@ module Searchkick
249
332
  promote(index.name, update_refresh_interval: !refresh_interval.nil?)
250
333
 
251
334
  # import after promotion
252
- index.import_scope(scope, resume: resume, async: async, full: true) if import
335
+ index.import_scope(relation, **import_options) if import
253
336
  end
254
337
 
255
338
  if async
@@ -265,6 +348,7 @@ module Searchkick
265
348
  # already promoted if alias didn't exist
266
349
  if alias_exists
267
350
  puts "Jobs complete. Promoting..."
351
+ check_uuid(uuid, index.uuid)
268
352
  promote(index.name, update_refresh_interval: !refresh_interval.nil?)
269
353
  end
270
354
  clean_indices unless retain
@@ -276,267 +360,22 @@ module Searchkick
276
360
  index.refresh
277
361
  true
278
362
  end
279
- end
280
-
281
- def import_scope(scope, resume: false, method_name: nil, async: false, batch: false, batch_id: nil, full: false)
282
- # use scope for import
283
- scope = scope.search_import if scope.respond_to?(:search_import)
284
-
285
- if batch
286
- import_or_update scope.to_a, method_name, async
287
- Searchkick.with_redis { |r| r.srem(batches_key, batch_id) } if batch_id
288
- elsif full && async
289
- full_reindex_async(scope)
290
- elsif scope.respond_to?(:find_in_batches)
291
- if resume
292
- # use total docs instead of max id since there's not a great way
293
- # to get the max _id without scripting since it's a string
294
-
295
- # TODO use primary key and prefix with table name
296
- scope = scope.where("id > ?", total_docs)
297
- end
298
-
299
- scope = scope.select("id").except(:includes, :preload) if async
300
-
301
- scope.find_in_batches batch_size: batch_size do |items|
302
- import_or_update items, method_name, async
303
- end
304
- else
305
- each_batch(scope) do |items|
306
- import_or_update items, method_name, async
307
- end
308
- end
309
- end
310
-
311
- def batches_left
312
- Searchkick.with_redis { |r| r.scard(batches_key) }
313
- end
314
-
315
- # other
316
-
317
- def tokens(text, options = {})
318
- client.indices.analyze(body: {text: text}.merge(options), index: name)["tokens"].map { |t| t["token"] }
319
- end
320
-
321
- def klass_document_type(klass)
322
- @klass_document_type[klass] ||= begin
323
- if klass.respond_to?(:document_type)
324
- klass.document_type
325
- else
326
- klass.model_name.to_s.underscore
327
- end
328
- end
329
- end
330
-
331
- protected
332
-
333
- def client
334
- Searchkick.client
335
- end
336
-
337
- def document_type(record)
338
- if record.respond_to?(:search_document_type)
339
- record.search_document_type
340
- else
341
- klass_document_type(record.class)
342
- end
343
- end
344
-
345
- def search_id(record)
346
- id = record.respond_to?(:search_document_id) ? record.search_document_id : record.id
347
- id.is_a?(Numeric) ? id : id.to_s
348
- end
349
-
350
- EXCLUDED_ATTRIBUTES = ["_id", "_type"]
351
-
352
- def search_data(record, method_name = nil)
353
- partial_reindex = !method_name.nil?
354
- options = record.class.searchkick_options
355
-
356
- # remove _id since search_id is used instead
357
- source = record.send(method_name || :search_data).each_with_object({}) { |(k, v), memo| memo[k.to_s] = v; memo }.except(*EXCLUDED_ATTRIBUTES)
358
-
359
- # conversions
360
- if options[:conversions]
361
- Array(options[:conversions]).map(&:to_s).each do |conversions_field|
362
- if source[conversions_field]
363
- source[conversions_field] = source[conversions_field].map { |k, v| {query: k, count: v} }
364
- end
365
- end
366
- end
367
-
368
- # hack to prevent generator field doesn't exist error
369
- if options[:suggest]
370
- options[:suggest].map(&:to_s).each do |field|
371
- source[field] = nil if !source[field] && !partial_reindex
372
- end
373
- end
374
-
375
- # locations
376
- if options[:locations]
377
- options[:locations].map(&:to_s).each do |field|
378
- if source[field]
379
- if !source[field].is_a?(Hash) && (source[field].first.is_a?(Array) || source[field].first.is_a?(Hash))
380
- # multiple locations
381
- source[field] = source[field].map { |a| location_value(a) }
382
- else
383
- source[field] = location_value(source[field])
384
- end
385
- end
386
- end
363
+ rescue Elasticsearch::Transport::Transport::Errors::BadRequest => e
364
+ if e.message.include?("No handler for type [text]")
365
+ raise UnsupportedVersionError, "This version of Searchkick requires Elasticsearch 5 or greater"
387
366
  end
388
367
 
389
- cast_big_decimal(source)
390
-
391
- source
368
+ raise e
392
369
  end
393
370
 
394
- def location_value(value)
395
- if value.is_a?(Array)
396
- value.map(&:to_f).reverse
397
- elsif value.is_a?(Hash)
398
- {lat: value[:lat].to_f, lon: value[:lon].to_f}
399
- else
400
- value
401
- end
402
- end
403
-
404
- # change all BigDecimal values to floats due to
405
- # https://github.com/rails/rails/issues/6033
406
- # possible loss of precision :/
407
- def cast_big_decimal(obj)
408
- case obj
409
- when BigDecimal
410
- obj.to_f
411
- when Hash
412
- obj.each do |k, v|
413
- obj[k] = cast_big_decimal(v)
414
- end
415
- when Enumerable
416
- obj.map do |v|
417
- cast_big_decimal(v)
418
- end
419
- else
420
- obj
371
+ # safety check
372
+ # still a chance for race condition since its called before promotion
373
+ # ideal is for user to disable automatic index creation
374
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html#index-creation
375
+ def check_uuid(old_uuid, new_uuid)
376
+ if old_uuid != new_uuid
377
+ raise Searchkick::Error, "Safety check failed - only run one Model.reindex per model at a time"
421
378
  end
422
379
  end
423
-
424
- def import_or_update(records, method_name, async)
425
- if records.any?
426
- if async
427
- Searchkick::BulkReindexJob.perform_later(
428
- class_name: records.first.class.name,
429
- record_ids: records.map(&:id),
430
- index_name: name,
431
- method_name: method_name ? method_name.to_s : nil
432
- )
433
- else
434
- records = records.select(&:should_index?)
435
- if records.any?
436
- with_retries do
437
- method_name ? bulk_update(records, method_name) : import(records)
438
- end
439
- end
440
- end
441
- end
442
- end
443
-
444
- def full_reindex_async(scope)
445
- if scope.respond_to?(:primary_key)
446
- # TODO expire Redis key
447
- primary_key = scope.primary_key
448
-
449
- starting_id =
450
- begin
451
- scope.minimum(primary_key)
452
- rescue ActiveRecord::StatementInvalid
453
- false
454
- end
455
-
456
- if starting_id.nil?
457
- # no records, do nothing
458
- elsif starting_id.is_a?(Numeric)
459
- max_id = scope.maximum(primary_key)
460
- batches_count = ((max_id - starting_id + 1) / batch_size.to_f).ceil
461
-
462
- batches_count.times do |i|
463
- batch_id = i + 1
464
- min_id = starting_id + (i * batch_size)
465
- bulk_reindex_job scope, batch_id, min_id: min_id, max_id: min_id + batch_size - 1
466
- end
467
- else
468
- scope.find_in_batches(batch_size: batch_size).each_with_index do |batch, i|
469
- batch_id = i + 1
470
-
471
- bulk_reindex_job scope, batch_id, record_ids: batch.map { |record| record.id.to_s }
472
- end
473
- end
474
- else
475
- batch_id = 1
476
- # TODO remove any eager loading
477
- scope = scope.only(:_id) if scope.respond_to?(:only)
478
- each_batch(scope) do |items|
479
- bulk_reindex_job scope, batch_id, record_ids: items.map { |i| i.id.to_s }
480
- batch_id += 1
481
- end
482
- end
483
- end
484
-
485
- def each_batch(scope)
486
- # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
487
- # use cursor for Mongoid
488
- items = []
489
- scope.all.each do |item|
490
- items << item
491
- if items.length == batch_size
492
- yield items
493
- items = []
494
- end
495
- end
496
- yield items if items.any?
497
- end
498
-
499
- def bulk_reindex_job(scope, batch_id, options)
500
- Searchkick::BulkReindexJob.perform_later({
501
- class_name: scope.model_name.name,
502
- index_name: name,
503
- batch_id: batch_id
504
- }.merge(options))
505
- Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
506
- end
507
-
508
- def batch_size
509
- @batch_size ||= @options[:batch_size] || 1000
510
- end
511
-
512
- def with_retries
513
- retries = 0
514
-
515
- begin
516
- yield
517
- rescue Faraday::ClientError => e
518
- if retries < 1
519
- retries += 1
520
- retry
521
- end
522
- raise e
523
- end
524
- end
525
-
526
- def bulk_index_helper(records)
527
- Searchkick.indexer.queue(records.map { |r| {index: record_data(r).merge(data: search_data(r))} })
528
- end
529
-
530
- def bulk_delete_helper(records)
531
- Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| {delete: record_data(r)} })
532
- end
533
-
534
- def bulk_update_helper(records, method_name)
535
- Searchkick.indexer.queue(records.map { |r| {update: record_data(r).merge(data: {doc: search_data(r, method_name)})} })
536
- end
537
-
538
- def batches_key
539
- "searchkick:reindex:#{name}:batches"
540
- end
541
380
  end
542
381
  end