searchkick 4.0.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +234 -96
- data/LICENSE.txt +1 -1
- data/README.md +446 -268
- data/lib/searchkick/bulk_reindex_job.rb +12 -8
- data/lib/searchkick/controller_runtime.rb +40 -0
- data/lib/searchkick/index.rb +174 -56
- data/lib/searchkick/index_cache.rb +30 -0
- data/lib/searchkick/index_options.rb +472 -349
- data/lib/searchkick/indexer.rb +15 -8
- data/lib/searchkick/log_subscriber.rb +57 -0
- data/lib/searchkick/middleware.rb +1 -1
- data/lib/searchkick/model.rb +51 -48
- data/lib/searchkick/process_batch_job.rb +10 -26
- data/lib/searchkick/process_queue_job.rb +21 -12
- data/lib/searchkick/query.rb +183 -51
- data/lib/searchkick/record_data.rb +0 -1
- data/lib/searchkick/record_indexer.rb +135 -50
- data/lib/searchkick/reindex_queue.rb +43 -6
- data/lib/searchkick/reindex_v2_job.rb +10 -34
- data/lib/searchkick/relation.rb +36 -0
- data/lib/searchkick/relation_indexer.rb +150 -0
- data/lib/searchkick/results.rb +162 -80
- data/lib/searchkick/version.rb +1 -1
- data/lib/searchkick.rb +203 -79
- data/lib/tasks/searchkick.rake +21 -11
- metadata +17 -71
- data/CONTRIBUTING.md +0 -53
- data/lib/searchkick/bulk_indexer.rb +0 -171
- data/lib/searchkick/logging.rb +0 -243
@@ -1,171 +0,0 @@
|
|
1
|
-
module Searchkick
|
2
|
-
class BulkIndexer
|
3
|
-
attr_reader :index
|
4
|
-
|
5
|
-
def initialize(index)
|
6
|
-
@index = index
|
7
|
-
end
|
8
|
-
|
9
|
-
def import_scope(relation, resume: false, method_name: nil, async: false, batch: false, batch_id: nil, full: false, scope: nil)
|
10
|
-
if scope
|
11
|
-
relation = relation.send(scope)
|
12
|
-
elsif relation.respond_to?(:search_import)
|
13
|
-
relation = relation.search_import
|
14
|
-
end
|
15
|
-
|
16
|
-
if batch
|
17
|
-
import_or_update relation.to_a, method_name, async
|
18
|
-
Searchkick.with_redis { |r| r.srem(batches_key, batch_id) } if batch_id
|
19
|
-
elsif full && async
|
20
|
-
full_reindex_async(relation)
|
21
|
-
elsif relation.respond_to?(:find_in_batches)
|
22
|
-
if resume
|
23
|
-
# use total docs instead of max id since there's not a great way
|
24
|
-
# to get the max _id without scripting since it's a string
|
25
|
-
|
26
|
-
# TODO use primary key and prefix with table name
|
27
|
-
relation = relation.where("id > ?", index.total_docs)
|
28
|
-
end
|
29
|
-
|
30
|
-
relation = relation.select("id").except(:includes, :preload) if async
|
31
|
-
|
32
|
-
relation.find_in_batches batch_size: batch_size do |items|
|
33
|
-
import_or_update items, method_name, async
|
34
|
-
end
|
35
|
-
else
|
36
|
-
each_batch(relation) do |items|
|
37
|
-
import_or_update items, method_name, async
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
def bulk_index(records)
|
43
|
-
Searchkick.indexer.queue(records.map { |r| RecordData.new(index, r).index_data })
|
44
|
-
end
|
45
|
-
|
46
|
-
def bulk_delete(records)
|
47
|
-
Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| RecordData.new(index, r).delete_data })
|
48
|
-
end
|
49
|
-
|
50
|
-
def bulk_update(records, method_name)
|
51
|
-
Searchkick.indexer.queue(records.map { |r| RecordData.new(index, r).update_data(method_name) })
|
52
|
-
end
|
53
|
-
|
54
|
-
def batches_left
|
55
|
-
Searchkick.with_redis { |r| r.scard(batches_key) }
|
56
|
-
end
|
57
|
-
|
58
|
-
private
|
59
|
-
|
60
|
-
def import_or_update(records, method_name, async)
|
61
|
-
if records.any?
|
62
|
-
if async
|
63
|
-
Searchkick::BulkReindexJob.perform_later(
|
64
|
-
class_name: records.first.class.name,
|
65
|
-
record_ids: records.map(&:id),
|
66
|
-
index_name: index.name,
|
67
|
-
method_name: method_name ? method_name.to_s : nil
|
68
|
-
)
|
69
|
-
else
|
70
|
-
records = records.select(&:should_index?)
|
71
|
-
if records.any?
|
72
|
-
with_retries do
|
73
|
-
# call out to index for ActiveSupport notifications
|
74
|
-
if method_name
|
75
|
-
index.bulk_update(records, method_name)
|
76
|
-
else
|
77
|
-
index.bulk_index(records)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
def full_reindex_async(scope)
|
86
|
-
if scope.respond_to?(:primary_key)
|
87
|
-
# TODO expire Redis key
|
88
|
-
primary_key = scope.primary_key
|
89
|
-
|
90
|
-
starting_id =
|
91
|
-
begin
|
92
|
-
scope.minimum(primary_key)
|
93
|
-
rescue ActiveRecord::StatementInvalid
|
94
|
-
false
|
95
|
-
end
|
96
|
-
|
97
|
-
if starting_id.nil?
|
98
|
-
# no records, do nothing
|
99
|
-
elsif starting_id.is_a?(Numeric)
|
100
|
-
max_id = scope.maximum(primary_key)
|
101
|
-
batches_count = ((max_id - starting_id + 1) / batch_size.to_f).ceil
|
102
|
-
|
103
|
-
batches_count.times do |i|
|
104
|
-
batch_id = i + 1
|
105
|
-
min_id = starting_id + (i * batch_size)
|
106
|
-
bulk_reindex_job scope, batch_id, min_id: min_id, max_id: min_id + batch_size - 1
|
107
|
-
end
|
108
|
-
else
|
109
|
-
scope.find_in_batches(batch_size: batch_size).each_with_index do |batch, i|
|
110
|
-
batch_id = i + 1
|
111
|
-
|
112
|
-
bulk_reindex_job scope, batch_id, record_ids: batch.map { |record| record.id.to_s }
|
113
|
-
end
|
114
|
-
end
|
115
|
-
else
|
116
|
-
batch_id = 1
|
117
|
-
# TODO remove any eager loading
|
118
|
-
scope = scope.only(:_id) if scope.respond_to?(:only)
|
119
|
-
each_batch(scope) do |items|
|
120
|
-
bulk_reindex_job scope, batch_id, record_ids: items.map { |i| i.id.to_s }
|
121
|
-
batch_id += 1
|
122
|
-
end
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
def each_batch(scope)
|
127
|
-
# https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
|
128
|
-
# use cursor for Mongoid
|
129
|
-
items = []
|
130
|
-
scope.all.each do |item|
|
131
|
-
items << item
|
132
|
-
if items.length == batch_size
|
133
|
-
yield items
|
134
|
-
items = []
|
135
|
-
end
|
136
|
-
end
|
137
|
-
yield items if items.any?
|
138
|
-
end
|
139
|
-
|
140
|
-
def bulk_reindex_job(scope, batch_id, options)
|
141
|
-
Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
|
142
|
-
Searchkick::BulkReindexJob.perform_later({
|
143
|
-
class_name: scope.model_name.name,
|
144
|
-
index_name: index.name,
|
145
|
-
batch_id: batch_id
|
146
|
-
}.merge(options))
|
147
|
-
end
|
148
|
-
|
149
|
-
def with_retries
|
150
|
-
retries = 0
|
151
|
-
|
152
|
-
begin
|
153
|
-
yield
|
154
|
-
rescue Faraday::ClientError => e
|
155
|
-
if retries < 1
|
156
|
-
retries += 1
|
157
|
-
retry
|
158
|
-
end
|
159
|
-
raise e
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
def batches_key
|
164
|
-
"searchkick:reindex:#{index.name}:batches"
|
165
|
-
end
|
166
|
-
|
167
|
-
def batch_size
|
168
|
-
@batch_size ||= index.options[:batch_size] || 1000
|
169
|
-
end
|
170
|
-
end
|
171
|
-
end
|
data/lib/searchkick/logging.rb
DELETED
@@ -1,243 +0,0 @@
|
|
1
|
-
# based on https://gist.github.com/mnutt/566725
|
2
|
-
require "active_support/core_ext/module/attr_internal"
|
3
|
-
|
4
|
-
module Searchkick
|
5
|
-
module QueryWithInstrumentation
|
6
|
-
def execute_search
|
7
|
-
name = searchkick_klass ? "#{searchkick_klass.name} Search" : "Search"
|
8
|
-
event = {
|
9
|
-
name: name,
|
10
|
-
query: params
|
11
|
-
}
|
12
|
-
ActiveSupport::Notifications.instrument("search.searchkick", event) do
|
13
|
-
super
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
module IndexWithInstrumentation
|
19
|
-
def store(record)
|
20
|
-
event = {
|
21
|
-
name: "#{record.searchkick_klass.name} Store",
|
22
|
-
id: search_id(record)
|
23
|
-
}
|
24
|
-
if Searchkick.callbacks_value == :bulk
|
25
|
-
super
|
26
|
-
else
|
27
|
-
ActiveSupport::Notifications.instrument("request.searchkick", event) do
|
28
|
-
super
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def remove(record)
|
34
|
-
name = record && record.searchkick_klass ? "#{record.searchkick_klass.name} Remove" : "Remove"
|
35
|
-
event = {
|
36
|
-
name: name,
|
37
|
-
id: search_id(record)
|
38
|
-
}
|
39
|
-
if Searchkick.callbacks_value == :bulk
|
40
|
-
super
|
41
|
-
else
|
42
|
-
ActiveSupport::Notifications.instrument("request.searchkick", event) do
|
43
|
-
super
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
def update_record(record, method_name)
|
49
|
-
event = {
|
50
|
-
name: "#{record.searchkick_klass.name} Update",
|
51
|
-
id: search_id(record)
|
52
|
-
}
|
53
|
-
if Searchkick.callbacks_value == :bulk
|
54
|
-
super
|
55
|
-
else
|
56
|
-
ActiveSupport::Notifications.instrument("request.searchkick", event) do
|
57
|
-
super
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
def bulk_index(records)
|
63
|
-
if records.any?
|
64
|
-
event = {
|
65
|
-
name: "#{records.first.searchkick_klass.name} Import",
|
66
|
-
count: records.size
|
67
|
-
}
|
68
|
-
event[:id] = search_id(records.first) if records.size == 1
|
69
|
-
if Searchkick.callbacks_value == :bulk
|
70
|
-
super
|
71
|
-
else
|
72
|
-
ActiveSupport::Notifications.instrument("request.searchkick", event) do
|
73
|
-
super
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
alias_method :import, :bulk_index
|
79
|
-
|
80
|
-
def bulk_update(records, *args)
|
81
|
-
if records.any?
|
82
|
-
event = {
|
83
|
-
name: "#{records.first.searchkick_klass.name} Update",
|
84
|
-
count: records.size
|
85
|
-
}
|
86
|
-
event[:id] = search_id(records.first) if records.size == 1
|
87
|
-
if Searchkick.callbacks_value == :bulk
|
88
|
-
super
|
89
|
-
else
|
90
|
-
ActiveSupport::Notifications.instrument("request.searchkick", event) do
|
91
|
-
super
|
92
|
-
end
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
def bulk_delete(records)
|
98
|
-
if records.any?
|
99
|
-
event = {
|
100
|
-
name: "#{records.first.searchkick_klass.name} Delete",
|
101
|
-
count: records.size
|
102
|
-
}
|
103
|
-
event[:id] = search_id(records.first) if records.size == 1
|
104
|
-
if Searchkick.callbacks_value == :bulk
|
105
|
-
super
|
106
|
-
else
|
107
|
-
ActiveSupport::Notifications.instrument("request.searchkick", event) do
|
108
|
-
super
|
109
|
-
end
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
module IndexerWithInstrumentation
|
116
|
-
def perform
|
117
|
-
if Searchkick.callbacks_value == :bulk
|
118
|
-
event = {
|
119
|
-
name: "Bulk",
|
120
|
-
count: queued_items.size
|
121
|
-
}
|
122
|
-
ActiveSupport::Notifications.instrument("request.searchkick", event) do
|
123
|
-
super
|
124
|
-
end
|
125
|
-
else
|
126
|
-
super
|
127
|
-
end
|
128
|
-
end
|
129
|
-
end
|
130
|
-
|
131
|
-
module SearchkickWithInstrumentation
|
132
|
-
def multi_search(searches)
|
133
|
-
event = {
|
134
|
-
name: "Multi Search",
|
135
|
-
body: searches.flat_map { |q| [q.params.except(:body).to_json, q.body.to_json] }.map { |v| "#{v}\n" }.join
|
136
|
-
}
|
137
|
-
ActiveSupport::Notifications.instrument("multi_search.searchkick", event) do
|
138
|
-
super
|
139
|
-
end
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
# https://github.com/rails/rails/blob/master/activerecord/lib/active_record/log_subscriber.rb
|
144
|
-
class LogSubscriber < ActiveSupport::LogSubscriber
|
145
|
-
def self.runtime=(value)
|
146
|
-
Thread.current[:searchkick_runtime] = value
|
147
|
-
end
|
148
|
-
|
149
|
-
def self.runtime
|
150
|
-
Thread.current[:searchkick_runtime] ||= 0
|
151
|
-
end
|
152
|
-
|
153
|
-
def self.reset_runtime
|
154
|
-
rt = runtime
|
155
|
-
self.runtime = 0
|
156
|
-
rt
|
157
|
-
end
|
158
|
-
|
159
|
-
def search(event)
|
160
|
-
self.class.runtime += event.duration
|
161
|
-
return unless logger.debug?
|
162
|
-
|
163
|
-
payload = event.payload
|
164
|
-
name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
|
165
|
-
type = payload[:query][:type]
|
166
|
-
index = payload[:query][:index].is_a?(Array) ? payload[:query][:index].join(",") : payload[:query][:index]
|
167
|
-
|
168
|
-
# no easy way to tell which host the client will use
|
169
|
-
host = Searchkick.client.transport.hosts.first
|
170
|
-
debug " #{color(name, YELLOW, true)} curl #{host[:protocol]}://#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -H 'Content-Type: application/json' -d '#{payload[:query][:body].to_json}'"
|
171
|
-
end
|
172
|
-
|
173
|
-
def request(event)
|
174
|
-
self.class.runtime += event.duration
|
175
|
-
return unless logger.debug?
|
176
|
-
|
177
|
-
payload = event.payload
|
178
|
-
name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
|
179
|
-
|
180
|
-
debug " #{color(name, YELLOW, true)} #{payload.except(:name).to_json}"
|
181
|
-
end
|
182
|
-
|
183
|
-
def multi_search(event)
|
184
|
-
self.class.runtime += event.duration
|
185
|
-
return unless logger.debug?
|
186
|
-
|
187
|
-
payload = event.payload
|
188
|
-
name = "#{payload[:name]} (#{event.duration.round(1)}ms)"
|
189
|
-
|
190
|
-
# no easy way to tell which host the client will use
|
191
|
-
host = Searchkick.client.transport.hosts.first
|
192
|
-
debug " #{color(name, YELLOW, true)} curl #{host[:protocol]}://#{host[:host]}:#{host[:port]}/_msearch?pretty -H 'Content-Type: application/json' -d '#{payload[:body]}'"
|
193
|
-
end
|
194
|
-
end
|
195
|
-
|
196
|
-
# https://github.com/rails/rails/blob/master/activerecord/lib/active_record/railties/controller_runtime.rb
|
197
|
-
module ControllerRuntime
|
198
|
-
extend ActiveSupport::Concern
|
199
|
-
|
200
|
-
protected
|
201
|
-
|
202
|
-
attr_internal :searchkick_runtime
|
203
|
-
|
204
|
-
def process_action(action, *args)
|
205
|
-
# We also need to reset the runtime before each action
|
206
|
-
# because of queries in middleware or in cases we are streaming
|
207
|
-
# and it won't be cleaned up by the method below.
|
208
|
-
Searchkick::LogSubscriber.reset_runtime
|
209
|
-
super
|
210
|
-
end
|
211
|
-
|
212
|
-
def cleanup_view_runtime
|
213
|
-
searchkick_rt_before_render = Searchkick::LogSubscriber.reset_runtime
|
214
|
-
runtime = super
|
215
|
-
searchkick_rt_after_render = Searchkick::LogSubscriber.reset_runtime
|
216
|
-
self.searchkick_runtime = searchkick_rt_before_render + searchkick_rt_after_render
|
217
|
-
runtime - searchkick_rt_after_render
|
218
|
-
end
|
219
|
-
|
220
|
-
def append_info_to_payload(payload)
|
221
|
-
super
|
222
|
-
payload[:searchkick_runtime] = (searchkick_runtime || 0) + Searchkick::LogSubscriber.reset_runtime
|
223
|
-
end
|
224
|
-
|
225
|
-
module ClassMethods
|
226
|
-
def log_process_action(payload)
|
227
|
-
messages = super
|
228
|
-
runtime = payload[:searchkick_runtime]
|
229
|
-
messages << ("Searchkick: %.1fms" % runtime.to_f) if runtime.to_f > 0
|
230
|
-
messages
|
231
|
-
end
|
232
|
-
end
|
233
|
-
end
|
234
|
-
end
|
235
|
-
|
236
|
-
Searchkick::Query.prepend(Searchkick::QueryWithInstrumentation)
|
237
|
-
Searchkick::Index.prepend(Searchkick::IndexWithInstrumentation)
|
238
|
-
Searchkick::Indexer.prepend(Searchkick::IndexerWithInstrumentation)
|
239
|
-
Searchkick.singleton_class.prepend(Searchkick::SearchkickWithInstrumentation)
|
240
|
-
Searchkick::LogSubscriber.attach_to :searchkick
|
241
|
-
ActiveSupport.on_load(:action_controller) do
|
242
|
-
include Searchkick::ControllerRuntime
|
243
|
-
end
|