searchkick-hooopo 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.travis.yml +35 -0
- data/CHANGELOG.md +491 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +22 -0
- data/README.md +1908 -0
- data/Rakefile +20 -0
- data/benchmark/Gemfile +23 -0
- data/benchmark/benchmark.rb +97 -0
- data/lib/searchkick/bulk_reindex_job.rb +17 -0
- data/lib/searchkick/index.rb +500 -0
- data/lib/searchkick/index_options.rb +333 -0
- data/lib/searchkick/indexer.rb +28 -0
- data/lib/searchkick/logging.rb +242 -0
- data/lib/searchkick/middleware.rb +12 -0
- data/lib/searchkick/model.rb +156 -0
- data/lib/searchkick/process_batch_job.rb +23 -0
- data/lib/searchkick/process_queue_job.rb +23 -0
- data/lib/searchkick/query.rb +901 -0
- data/lib/searchkick/reindex_queue.rb +38 -0
- data/lib/searchkick/reindex_v2_job.rb +39 -0
- data/lib/searchkick/results.rb +216 -0
- data/lib/searchkick/tasks.rb +33 -0
- data/lib/searchkick/version.rb +3 -0
- data/lib/searchkick.rb +215 -0
- data/searchkick.gemspec +28 -0
- data/test/aggs_test.rb +197 -0
- data/test/autocomplete_test.rb +75 -0
- data/test/boost_test.rb +175 -0
- data/test/callbacks_test.rb +59 -0
- data/test/ci/before_install.sh +17 -0
- data/test/errors_test.rb +19 -0
- data/test/gemfiles/activerecord31.gemfile +7 -0
- data/test/gemfiles/activerecord32.gemfile +7 -0
- data/test/gemfiles/activerecord40.gemfile +8 -0
- data/test/gemfiles/activerecord41.gemfile +8 -0
- data/test/gemfiles/activerecord42.gemfile +7 -0
- data/test/gemfiles/activerecord50.gemfile +7 -0
- data/test/gemfiles/apartment.gemfile +8 -0
- data/test/gemfiles/cequel.gemfile +8 -0
- data/test/gemfiles/mongoid2.gemfile +7 -0
- data/test/gemfiles/mongoid3.gemfile +6 -0
- data/test/gemfiles/mongoid4.gemfile +7 -0
- data/test/gemfiles/mongoid5.gemfile +7 -0
- data/test/gemfiles/mongoid6.gemfile +8 -0
- data/test/gemfiles/nobrainer.gemfile +8 -0
- data/test/gemfiles/parallel_tests.gemfile +8 -0
- data/test/geo_shape_test.rb +172 -0
- data/test/highlight_test.rb +78 -0
- data/test/index_test.rb +153 -0
- data/test/inheritance_test.rb +83 -0
- data/test/marshal_test.rb +8 -0
- data/test/match_test.rb +276 -0
- data/test/misspellings_test.rb +56 -0
- data/test/model_test.rb +42 -0
- data/test/multi_search_test.rb +22 -0
- data/test/multi_tenancy_test.rb +22 -0
- data/test/order_test.rb +46 -0
- data/test/pagination_test.rb +53 -0
- data/test/partial_reindex_test.rb +58 -0
- data/test/query_test.rb +35 -0
- data/test/records_test.rb +10 -0
- data/test/reindex_test.rb +52 -0
- data/test/reindex_v2_job_test.rb +32 -0
- data/test/routing_test.rb +23 -0
- data/test/should_index_test.rb +32 -0
- data/test/similar_test.rb +28 -0
- data/test/sql_test.rb +198 -0
- data/test/suggest_test.rb +85 -0
- data/test/synonyms_test.rb +67 -0
- data/test/test_helper.rb +527 -0
- data/test/where_test.rb +223 -0
- metadata +250 -0
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rake/testtask"
|
3
|
+
|
4
|
+
begin
|
5
|
+
require "parallel_tests/tasks"
|
6
|
+
require "shellwords"
|
7
|
+
rescue LoadError
|
8
|
+
# do nothing
|
9
|
+
end
|
10
|
+
|
11
|
+
task default: :test
|
12
|
+
Rake::TestTask.new do |t|
|
13
|
+
t.libs << "test"
|
14
|
+
t.pattern = "test/**/*_test.rb"
|
15
|
+
t.warning = false
|
16
|
+
end
|
17
|
+
|
18
|
+
task :benchmark do
|
19
|
+
require_relative "benchmark/benchmark"
|
20
|
+
end
|
data/benchmark/Gemfile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
source "https://rubygems.org"
|
2
|
+
|
3
|
+
# Specify your gem's dependencies in searchkick.gemspec
|
4
|
+
gemspec path: "../"
|
5
|
+
|
6
|
+
# gem "sqlite3"
|
7
|
+
gem "pg"
|
8
|
+
gem "activerecord", "~> 5.0.0"
|
9
|
+
gem "activerecord-import"
|
10
|
+
gem "activejob"
|
11
|
+
gem "redis"
|
12
|
+
gem "sidekiq"
|
13
|
+
|
14
|
+
# performance
|
15
|
+
gem "typhoeus"
|
16
|
+
gem "oj"
|
17
|
+
|
18
|
+
# profiling
|
19
|
+
gem "ruby-prof"
|
20
|
+
gem "allocation_stats"
|
21
|
+
gem "get_process_mem"
|
22
|
+
gem "memory_profiler"
|
23
|
+
gem "allocation_tracer"
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require "bundler/setup"
|
2
|
+
Bundler.require(:default)
|
3
|
+
require "active_record"
|
4
|
+
require "benchmark"
|
5
|
+
require "active_support/notifications"
|
6
|
+
|
7
|
+
ActiveSupport::Notifications.subscribe "request.searchkick" do |*args|
|
8
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
9
|
+
p event.duration
|
10
|
+
end
|
11
|
+
|
12
|
+
ActiveJob::Base.queue_adapter = :sidekiq
|
13
|
+
|
14
|
+
Searchkick.redis = Redis.new
|
15
|
+
|
16
|
+
ActiveRecord::Base.default_timezone = :utc
|
17
|
+
ActiveRecord::Base.time_zone_aware_attributes = true
|
18
|
+
# ActiveRecord::Base.establish_connection adapter: "sqlite3", database: "/tmp/searchkick"
|
19
|
+
ActiveRecord::Base.establish_connection "postgresql://localhost/searchkick_demo_development"
|
20
|
+
# ActiveRecord::Base.logger = Logger.new(STDOUT)
|
21
|
+
|
22
|
+
ActiveJob::Base.logger = nil
|
23
|
+
|
24
|
+
class Product < ActiveRecord::Base
|
25
|
+
searchkick batch_size: 1000
|
26
|
+
|
27
|
+
def search_data
|
28
|
+
{
|
29
|
+
name: name,
|
30
|
+
color: color,
|
31
|
+
store_id: store_id
|
32
|
+
}
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
total_docs = 100000
|
37
|
+
|
38
|
+
# ActiveRecord::Migration.create_table :products, force: :cascade do |t|
|
39
|
+
# t.string :name
|
40
|
+
# t.string :color
|
41
|
+
# t.integer :store_id
|
42
|
+
# end
|
43
|
+
|
44
|
+
# Product.import ["name", "color", "store_id"], total_docs.times.map { |i| ["Product #{i}", ["red", "blue"].sample, rand(10)] }
|
45
|
+
|
46
|
+
puts "Imported"
|
47
|
+
|
48
|
+
result = nil
|
49
|
+
report = nil
|
50
|
+
stats = nil
|
51
|
+
|
52
|
+
# p GetProcessMem.new.mb
|
53
|
+
|
54
|
+
Product.searchkick_index.delete rescue nil
|
55
|
+
|
56
|
+
time =
|
57
|
+
Benchmark.realtime do
|
58
|
+
# result = RubyProf.profile do
|
59
|
+
# report = MemoryProfiler.report do
|
60
|
+
# stats = AllocationStats.trace do
|
61
|
+
reindex = Product.reindex(async: true)
|
62
|
+
p reindex
|
63
|
+
# end
|
64
|
+
|
65
|
+
60.times do |i|
|
66
|
+
if reindex.is_a?(Hash)
|
67
|
+
docs = Searchkick::Index.new(reindex[:index_name]).total_docs
|
68
|
+
else
|
69
|
+
docs = Product.searchkick_index.total_docs
|
70
|
+
end
|
71
|
+
puts "#{i}: #{docs}"
|
72
|
+
if docs == total_docs
|
73
|
+
break
|
74
|
+
end
|
75
|
+
p Searchkick.reindex_status(reindex[:index_name]) if reindex.is_a?(Hash)
|
76
|
+
sleep(1)
|
77
|
+
# Product.searchkick_index.refresh
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# p GetProcessMem.new.mb
|
82
|
+
|
83
|
+
puts time.round(1)
|
84
|
+
|
85
|
+
|
86
|
+
if result
|
87
|
+
printer = RubyProf::GraphPrinter.new(result)
|
88
|
+
printer.print(STDOUT, min_percent: 5)
|
89
|
+
end
|
90
|
+
|
91
|
+
if report
|
92
|
+
puts report.pretty_print
|
93
|
+
end
|
94
|
+
|
95
|
+
if stats
|
96
|
+
puts result.allocations(alias_paths: true).group_by(:sourcefile, :class).to_text
|
97
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Searchkick
|
2
|
+
class BulkReindexJob < ActiveJob::Base
|
3
|
+
queue_as :searchkick
|
4
|
+
|
5
|
+
def perform(class_name:, record_ids: nil, index_name: nil, method_name: nil, batch_id: nil, min_id: nil, max_id: nil)
|
6
|
+
klass = class_name.constantize
|
7
|
+
index = index_name ? Searchkick::Index.new(index_name) : klass.searchkick_index
|
8
|
+
record_ids ||= min_id..max_id
|
9
|
+
index.import_scope(
|
10
|
+
Searchkick.load_records(klass, record_ids),
|
11
|
+
method_name: method_name,
|
12
|
+
batch: true,
|
13
|
+
batch_id: batch_id
|
14
|
+
)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,500 @@
|
|
1
|
+
module Searchkick
|
2
|
+
class Index
|
3
|
+
include IndexOptions
|
4
|
+
|
5
|
+
attr_reader :name, :options
|
6
|
+
|
7
|
+
def initialize(name, options = {})
|
8
|
+
@name = name
|
9
|
+
@options = options
|
10
|
+
@klass_document_type = {} # cache
|
11
|
+
end
|
12
|
+
|
13
|
+
def create(body = {})
|
14
|
+
client.indices.create index: name, body: body
|
15
|
+
end
|
16
|
+
|
17
|
+
def delete
|
18
|
+
client.indices.delete index: name
|
19
|
+
end
|
20
|
+
|
21
|
+
def exists?
|
22
|
+
client.indices.exists index: name
|
23
|
+
end
|
24
|
+
|
25
|
+
def refresh
|
26
|
+
client.indices.refresh index: name
|
27
|
+
end
|
28
|
+
|
29
|
+
def alias_exists?
|
30
|
+
client.indices.exists_alias name: name
|
31
|
+
end
|
32
|
+
|
33
|
+
def mapping
|
34
|
+
client.indices.get_mapping index: name
|
35
|
+
end
|
36
|
+
|
37
|
+
def settings
|
38
|
+
client.indices.get_settings index: name
|
39
|
+
end
|
40
|
+
|
41
|
+
def refresh_interval
|
42
|
+
settings.values.first["settings"]["index"]["refresh_interval"]
|
43
|
+
end
|
44
|
+
|
45
|
+
def update_settings(settings)
|
46
|
+
client.indices.put_settings index: name, body: settings
|
47
|
+
end
|
48
|
+
|
49
|
+
def promote(new_name, update_refresh_interval: false)
|
50
|
+
if update_refresh_interval
|
51
|
+
new_index = Searchkick::Index.new(new_name)
|
52
|
+
settings = options[:settings] || {}
|
53
|
+
refresh_interval = (settings[:index] && settings[:index][:refresh_interval]) || "1s"
|
54
|
+
new_index.update_settings(index: {refresh_interval: refresh_interval})
|
55
|
+
end
|
56
|
+
|
57
|
+
old_indices =
|
58
|
+
begin
|
59
|
+
client.indices.get_alias(name: name).keys
|
60
|
+
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
61
|
+
{}
|
62
|
+
end
|
63
|
+
actions = old_indices.map { |old_name| {remove: {index: old_name, alias: name}} } + [{add: {index: new_name, alias: name}}]
|
64
|
+
client.indices.update_aliases body: {actions: actions}
|
65
|
+
end
|
66
|
+
alias_method :swap, :promote
|
67
|
+
|
68
|
+
# record based
|
69
|
+
# use helpers for notifications
|
70
|
+
|
71
|
+
def store(record)
|
72
|
+
bulk_index_helper([record])
|
73
|
+
end
|
74
|
+
|
75
|
+
def remove(record)
|
76
|
+
bulk_delete_helper([record])
|
77
|
+
end
|
78
|
+
|
79
|
+
def update_record(record, method_name)
|
80
|
+
bulk_update_helper([record], method_name)
|
81
|
+
end
|
82
|
+
|
83
|
+
def bulk_delete(records)
|
84
|
+
bulk_delete_helper(records)
|
85
|
+
end
|
86
|
+
|
87
|
+
def bulk_index(records)
|
88
|
+
bulk_index_helper(records)
|
89
|
+
end
|
90
|
+
alias_method :import, :bulk_index
|
91
|
+
|
92
|
+
def bulk_update(records, method_name)
|
93
|
+
bulk_update_helper(records, method_name)
|
94
|
+
end
|
95
|
+
|
96
|
+
def record_data(r)
|
97
|
+
data = {
|
98
|
+
_index: name,
|
99
|
+
_id: search_id(r),
|
100
|
+
_type: document_type(r)
|
101
|
+
}
|
102
|
+
data[:_routing] = r.search_routing if r.respond_to?(:search_routing)
|
103
|
+
data
|
104
|
+
end
|
105
|
+
|
106
|
+
def retrieve(record)
|
107
|
+
client.get(
|
108
|
+
index: name,
|
109
|
+
type: document_type(record),
|
110
|
+
id: search_id(record)
|
111
|
+
)["_source"]
|
112
|
+
end
|
113
|
+
|
114
|
+
def reindex_record(record)
|
115
|
+
if record.destroyed? || !record.should_index?
|
116
|
+
begin
|
117
|
+
remove(record)
|
118
|
+
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
119
|
+
# do nothing
|
120
|
+
end
|
121
|
+
else
|
122
|
+
store(record)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def reindex_record_async(record)
|
127
|
+
if Searchkick.callbacks_value.nil?
|
128
|
+
if defined?(Searchkick::ReindexV2Job)
|
129
|
+
Searchkick::ReindexV2Job.perform_later(record.class.name, record.id.to_s)
|
130
|
+
else
|
131
|
+
raise Searchkick::Error, "Active Job not found"
|
132
|
+
end
|
133
|
+
else
|
134
|
+
reindex_record(record)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def similar_record(record, **options)
|
139
|
+
like_text = retrieve(record).to_hash
|
140
|
+
.keep_if { |k, _| !options[:fields] || options[:fields].map(&:to_s).include?(k) }
|
141
|
+
.values.compact.join(" ")
|
142
|
+
|
143
|
+
# TODO deep merge method
|
144
|
+
options[:where] ||= {}
|
145
|
+
options[:where][:_id] ||= {}
|
146
|
+
options[:where][:_id][:not] = record.id.to_s
|
147
|
+
options[:per_page] ||= 10
|
148
|
+
options[:similar] = true
|
149
|
+
|
150
|
+
# TODO use index class instead of record class
|
151
|
+
search_model(record.class, like_text, options)
|
152
|
+
end
|
153
|
+
|
154
|
+
# queue
|
155
|
+
|
156
|
+
def reindex_queue
|
157
|
+
Searchkick::ReindexQueue.new(name)
|
158
|
+
end
|
159
|
+
|
160
|
+
# search
|
161
|
+
|
162
|
+
# TODO remove in next major version
|
163
|
+
def search_model(searchkick_klass, term = "*", **options, &block)
|
164
|
+
query = Searchkick::Query.new(searchkick_klass, term, options)
|
165
|
+
yield(query.body) if block
|
166
|
+
if options[:execute] == false
|
167
|
+
query
|
168
|
+
else
|
169
|
+
query.execute
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
# reindex
|
174
|
+
|
175
|
+
def create_index(index_options: nil)
|
176
|
+
index_options ||= self.index_options
|
177
|
+
index = Searchkick::Index.new("#{name}_#{Time.now.strftime('%Y%m%d%H%M%S%L')}", @options)
|
178
|
+
index.create(index_options)
|
179
|
+
index
|
180
|
+
end
|
181
|
+
|
182
|
+
def all_indices(unaliased: false)
|
183
|
+
indices =
|
184
|
+
begin
|
185
|
+
client.indices.get_aliases
|
186
|
+
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
187
|
+
{}
|
188
|
+
end
|
189
|
+
indices = indices.select { |_k, v| v.empty? || v["aliases"].empty? } if unaliased
|
190
|
+
indices.select { |k, _v| k =~ /\A#{Regexp.escape(name)}_\d{14,17}\z/ }.keys
|
191
|
+
end
|
192
|
+
|
193
|
+
# remove old indices that start w/ index_name
|
194
|
+
def clean_indices
|
195
|
+
indices = all_indices(unaliased: true)
|
196
|
+
indices.each do |index|
|
197
|
+
Searchkick::Index.new(index).delete
|
198
|
+
end
|
199
|
+
indices
|
200
|
+
end
|
201
|
+
|
202
|
+
def total_docs
|
203
|
+
response =
|
204
|
+
client.search(
|
205
|
+
index: name,
|
206
|
+
body: {
|
207
|
+
query: {match_all: {}},
|
208
|
+
size: 0
|
209
|
+
}
|
210
|
+
)
|
211
|
+
|
212
|
+
response["hits"]["total"]
|
213
|
+
end
|
214
|
+
|
215
|
+
# https://gist.github.com/jarosan/3124884
|
216
|
+
# http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
|
217
|
+
def reindex_scope(scope, import: true, resume: false, retain: false, async: false, refresh_interval: nil)
|
218
|
+
if resume
|
219
|
+
index_name = all_indices.sort.last
|
220
|
+
raise Searchkick::Error, "No index to resume" unless index_name
|
221
|
+
index = Searchkick::Index.new(index_name)
|
222
|
+
else
|
223
|
+
clean_indices unless retain
|
224
|
+
|
225
|
+
index_options = scope.searchkick_index_options
|
226
|
+
index_options.deep_merge!(settings: {index: {refresh_interval: refresh_interval}}) if refresh_interval
|
227
|
+
index = create_index(index_options: index_options)
|
228
|
+
end
|
229
|
+
|
230
|
+
# check if alias exists
|
231
|
+
if alias_exists?
|
232
|
+
# import before promotion
|
233
|
+
index.import_scope(scope, resume: resume, async: async, full: true) if import
|
234
|
+
|
235
|
+
# get existing indices to remove
|
236
|
+
unless async
|
237
|
+
promote(index.name, update_refresh_interval: !refresh_interval.nil?)
|
238
|
+
clean_indices unless retain
|
239
|
+
end
|
240
|
+
else
|
241
|
+
delete if exists?
|
242
|
+
promote(index.name, update_refresh_interval: !refresh_interval.nil?)
|
243
|
+
|
244
|
+
# import after promotion
|
245
|
+
index.import_scope(scope, resume: resume, async: async, full: true) if import
|
246
|
+
end
|
247
|
+
|
248
|
+
if async
|
249
|
+
{index_name: index.name}
|
250
|
+
else
|
251
|
+
index.refresh
|
252
|
+
true
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
def import_scope(scope, resume: false, method_name: nil, async: false, batch: false, batch_id: nil, full: false)
|
257
|
+
# use scope for import
|
258
|
+
scope = scope.search_import if scope.respond_to?(:search_import)
|
259
|
+
|
260
|
+
if batch
|
261
|
+
import_or_update scope.to_a, method_name, async
|
262
|
+
Searchkick.with_redis { |r| r.srem(batches_key, batch_id) } if batch_id
|
263
|
+
elsif full && async
|
264
|
+
full_reindex_async(scope)
|
265
|
+
elsif scope.respond_to?(:find_in_batches)
|
266
|
+
if resume
|
267
|
+
# use total docs instead of max id since there's not a great way
|
268
|
+
# to get the max _id without scripting since it's a string
|
269
|
+
|
270
|
+
# TODO use primary key and prefix with table name
|
271
|
+
scope = scope.where("id > ?", total_docs)
|
272
|
+
end
|
273
|
+
|
274
|
+
scope = scope.select("id").except(:includes, :preload) if async
|
275
|
+
|
276
|
+
scope.find_in_batches batch_size: batch_size do |batch|
|
277
|
+
import_or_update batch, method_name, async
|
278
|
+
end
|
279
|
+
else
|
280
|
+
each_batch(scope) do |items|
|
281
|
+
import_or_update items, method_name, async
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
def batches_left
|
287
|
+
Searchkick.with_redis { |r| r.scard(batches_key) }
|
288
|
+
end
|
289
|
+
|
290
|
+
# other
|
291
|
+
|
292
|
+
def tokens(text, options = {})
|
293
|
+
client.indices.analyze({text: text, index: name}.merge(options))["tokens"].map { |t| t["token"] }
|
294
|
+
end
|
295
|
+
|
296
|
+
def klass_document_type(klass)
|
297
|
+
@klass_document_type[klass] ||= begin
|
298
|
+
if klass.respond_to?(:document_type)
|
299
|
+
klass.document_type
|
300
|
+
else
|
301
|
+
klass.model_name.to_s.underscore
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
protected
|
307
|
+
|
308
|
+
def client
|
309
|
+
Searchkick.client
|
310
|
+
end
|
311
|
+
|
312
|
+
def document_type(record)
|
313
|
+
if record.respond_to?(:search_document_type)
|
314
|
+
record.search_document_type
|
315
|
+
else
|
316
|
+
klass_document_type(record.class)
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
def search_id(record)
|
321
|
+
id = record.respond_to?(:search_document_id) ? record.search_document_id : record.id
|
322
|
+
id.is_a?(Numeric) ? id : id.to_s
|
323
|
+
end
|
324
|
+
|
325
|
+
EXCLUDED_ATTRIBUTES = ["_id", "_type"]
|
326
|
+
|
327
|
+
def search_data(record, method_name = nil)
|
328
|
+
partial_reindex = !method_name.nil?
|
329
|
+
options = record.class.searchkick_options
|
330
|
+
|
331
|
+
# remove _id since search_id is used instead
|
332
|
+
source = record.send(method_name || :search_data).each_with_object({}) { |(k, v), memo| memo[k.to_s] = v; memo }.except(*EXCLUDED_ATTRIBUTES)
|
333
|
+
|
334
|
+
# conversions
|
335
|
+
if options[:conversions]
|
336
|
+
Array(options[:conversions]).map(&:to_s).each do |conversions_field|
|
337
|
+
if source[conversions_field]
|
338
|
+
source[conversions_field] = source[conversions_field].map { |k, v| {query: k, count: v} }
|
339
|
+
end
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
# hack to prevent generator field doesn't exist error
|
344
|
+
if options[:suggest]
|
345
|
+
options[:suggest].map(&:to_s).each do |field|
|
346
|
+
source[field] = nil if !source[field] && !partial_reindex
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
# locations
|
351
|
+
if options[:locations]
|
352
|
+
options[:locations].map(&:to_s).each do |field|
|
353
|
+
if source[field]
|
354
|
+
if !source[field].is_a?(Hash) && (source[field].first.is_a?(Array) || source[field].first.is_a?(Hash))
|
355
|
+
# multiple locations
|
356
|
+
source[field] = source[field].map { |a| location_value(a) }
|
357
|
+
else
|
358
|
+
source[field] = location_value(source[field])
|
359
|
+
end
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
cast_big_decimal(source)
|
365
|
+
|
366
|
+
source
|
367
|
+
end
|
368
|
+
|
369
|
+
def location_value(value)
|
370
|
+
if value.is_a?(Array)
|
371
|
+
value.map(&:to_f).reverse
|
372
|
+
elsif value.is_a?(Hash)
|
373
|
+
{lat: value[:lat].to_f, lon: value[:lon].to_f}
|
374
|
+
else
|
375
|
+
value
|
376
|
+
end
|
377
|
+
end
|
378
|
+
|
379
|
+
# change all BigDecimal values to floats due to
|
380
|
+
# https://github.com/rails/rails/issues/6033
|
381
|
+
# possible loss of precision :/
|
382
|
+
def cast_big_decimal(obj)
|
383
|
+
case obj
|
384
|
+
when BigDecimal
|
385
|
+
obj.to_f
|
386
|
+
when Hash
|
387
|
+
obj.each do |k, v|
|
388
|
+
obj[k] = cast_big_decimal(v)
|
389
|
+
end
|
390
|
+
when Enumerable
|
391
|
+
obj.map do |v|
|
392
|
+
cast_big_decimal(v)
|
393
|
+
end
|
394
|
+
else
|
395
|
+
obj
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
399
|
+
def import_or_update(records, method_name, async)
|
400
|
+
if records.any?
|
401
|
+
if async
|
402
|
+
Searchkick::BulkReindexJob.perform_later(
|
403
|
+
class_name: records.first.class.name,
|
404
|
+
record_ids: records.map(&:id),
|
405
|
+
index_name: name,
|
406
|
+
method_name: method_name ? method_name.to_s : nil
|
407
|
+
)
|
408
|
+
else
|
409
|
+
records = records.select(&:should_index?)
|
410
|
+
if records.any?
|
411
|
+
with_retries do
|
412
|
+
method_name ? bulk_update(records, method_name) : import(records)
|
413
|
+
end
|
414
|
+
end
|
415
|
+
end
|
416
|
+
end
|
417
|
+
end
|
418
|
+
|
419
|
+
def full_reindex_async(scope)
|
420
|
+
if scope.respond_to?(:primary_key)
|
421
|
+
# TODO expire Redis key
|
422
|
+
primary_key = scope.primary_key
|
423
|
+
starting_id = scope.minimum(primary_key) || 0
|
424
|
+
max_id = scope.maximum(primary_key) || 0
|
425
|
+
batches_count = ((max_id - starting_id + 1) / batch_size.to_f).ceil
|
426
|
+
|
427
|
+
batches_count.times do |i|
|
428
|
+
batch_id = i + 1
|
429
|
+
min_id = starting_id + (i * batch_size)
|
430
|
+
bulk_reindex_job scope, batch_id, min_id: min_id, max_id: min_id + batch_size - 1
|
431
|
+
end
|
432
|
+
else
|
433
|
+
batch_id = 1
|
434
|
+
# TODO remove any eager loading
|
435
|
+
scope = scope.only(:_id) if scope.respond_to?(:only)
|
436
|
+
each_batch(scope) do |items|
|
437
|
+
bulk_reindex_job scope, batch_id, record_ids: items.map { |i| i.id.to_s }
|
438
|
+
batch_id += 1
|
439
|
+
end
|
440
|
+
end
|
441
|
+
end
|
442
|
+
|
443
|
+
def each_batch(scope)
|
444
|
+
# https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
|
445
|
+
# use cursor for Mongoid
|
446
|
+
items = []
|
447
|
+
scope.all.each do |item|
|
448
|
+
items << item
|
449
|
+
if items.length == batch_size
|
450
|
+
yield items
|
451
|
+
items = []
|
452
|
+
end
|
453
|
+
end
|
454
|
+
yield items if items.any?
|
455
|
+
end
|
456
|
+
|
457
|
+
def bulk_reindex_job(scope, batch_id, options)
|
458
|
+
Searchkick::BulkReindexJob.perform_later({
|
459
|
+
class_name: scope.model_name.name,
|
460
|
+
index_name: name,
|
461
|
+
batch_id: batch_id
|
462
|
+
}.merge(options))
|
463
|
+
Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
|
464
|
+
end
|
465
|
+
|
466
|
+
def batch_size
|
467
|
+
@batch_size ||= @options[:batch_size] || 1000
|
468
|
+
end
|
469
|
+
|
470
|
+
def with_retries
|
471
|
+
retries = 0
|
472
|
+
|
473
|
+
begin
|
474
|
+
yield
|
475
|
+
rescue Faraday::ClientError => e
|
476
|
+
if retries < 1
|
477
|
+
retries += 1
|
478
|
+
retry
|
479
|
+
end
|
480
|
+
raise e
|
481
|
+
end
|
482
|
+
end
|
483
|
+
|
484
|
+
def bulk_index_helper(records)
|
485
|
+
Searchkick.indexer.queue(records.map { |r| {index: record_data(r).merge(data: search_data(r))} })
|
486
|
+
end
|
487
|
+
|
488
|
+
def bulk_delete_helper(records)
|
489
|
+
Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| {delete: record_data(r)} })
|
490
|
+
end
|
491
|
+
|
492
|
+
def bulk_update_helper(records, method_name)
|
493
|
+
Searchkick.indexer.queue(records.map { |r| {update: record_data(r).merge(data: {doc: search_data(r, method_name)})} })
|
494
|
+
end
|
495
|
+
|
496
|
+
def batches_key
|
497
|
+
"searchkick:reindex:#{name}:batches"
|
498
|
+
end
|
499
|
+
end
|
500
|
+
end
|