searchkick 4.0.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +234 -96
- data/LICENSE.txt +1 -1
- data/README.md +446 -268
- data/lib/searchkick/bulk_reindex_job.rb +12 -8
- data/lib/searchkick/controller_runtime.rb +40 -0
- data/lib/searchkick/index.rb +174 -56
- data/lib/searchkick/index_cache.rb +30 -0
- data/lib/searchkick/index_options.rb +472 -349
- data/lib/searchkick/indexer.rb +15 -8
- data/lib/searchkick/log_subscriber.rb +57 -0
- data/lib/searchkick/middleware.rb +1 -1
- data/lib/searchkick/model.rb +51 -48
- data/lib/searchkick/process_batch_job.rb +10 -26
- data/lib/searchkick/process_queue_job.rb +21 -12
- data/lib/searchkick/query.rb +183 -51
- data/lib/searchkick/record_data.rb +0 -1
- data/lib/searchkick/record_indexer.rb +135 -50
- data/lib/searchkick/reindex_queue.rb +43 -6
- data/lib/searchkick/reindex_v2_job.rb +10 -34
- data/lib/searchkick/relation.rb +36 -0
- data/lib/searchkick/relation_indexer.rb +150 -0
- data/lib/searchkick/results.rb +162 -80
- data/lib/searchkick/version.rb +1 -1
- data/lib/searchkick.rb +203 -79
- data/lib/tasks/searchkick.rake +21 -11
- metadata +17 -71
- data/CONTRIBUTING.md +0 -53
- data/lib/searchkick/bulk_indexer.rb +0 -171
- data/lib/searchkick/logging.rb +0 -243
@@ -1,78 +1,163 @@
|
|
1
1
|
module Searchkick
|
2
2
|
class RecordIndexer
|
3
|
-
attr_reader :
|
3
|
+
attr_reader :index
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
@
|
7
|
-
@index = record.class.searchkick_index
|
5
|
+
def initialize(index)
|
6
|
+
@index = index
|
8
7
|
end
|
9
8
|
|
10
|
-
def reindex(
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
mode ||= Searchkick.callbacks_value || index.options[:callbacks] || true
|
9
|
+
def reindex(records, mode:, method_name:, full: false, single: false)
|
10
|
+
# prevents exists? check if records is a relation
|
11
|
+
records = records.to_a
|
12
|
+
return if records.empty?
|
16
13
|
|
17
14
|
case mode
|
15
|
+
when :async
|
16
|
+
unless defined?(ActiveJob)
|
17
|
+
raise Searchkick::Error, "Active Job not found"
|
18
|
+
end
|
19
|
+
|
20
|
+
# we could likely combine ReindexV2Job, BulkReindexJob, and ProcessBatchJob
|
21
|
+
# but keep them separate for now
|
22
|
+
if single
|
23
|
+
record = records.first
|
24
|
+
|
25
|
+
# always pass routing in case record is deleted
|
26
|
+
# before the async job runs
|
27
|
+
if record.respond_to?(:search_routing)
|
28
|
+
routing = record.search_routing
|
29
|
+
end
|
30
|
+
|
31
|
+
Searchkick::ReindexV2Job.perform_later(
|
32
|
+
record.class.name,
|
33
|
+
record.id.to_s,
|
34
|
+
method_name ? method_name.to_s : nil,
|
35
|
+
routing: routing,
|
36
|
+
index_name: index.name
|
37
|
+
)
|
38
|
+
else
|
39
|
+
Searchkick::BulkReindexJob.perform_later(
|
40
|
+
class_name: records.first.class.searchkick_options[:class_name],
|
41
|
+
record_ids: records.map { |r| r.id.to_s },
|
42
|
+
index_name: index.name,
|
43
|
+
method_name: method_name ? method_name.to_s : nil
|
44
|
+
)
|
45
|
+
end
|
18
46
|
when :queue
|
19
47
|
if method_name
|
20
48
|
raise Searchkick::Error, "Partial reindex not supported with queue option"
|
21
49
|
end
|
22
50
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
51
|
+
index.reindex_queue.push_records(records)
|
52
|
+
when true, :inline
|
53
|
+
index_records, other_records = records.partition { |r| index_record?(r) }
|
54
|
+
import_inline(index_records, !full ? other_records : [], method_name: method_name, single: single)
|
55
|
+
else
|
56
|
+
raise ArgumentError, "Invalid value for mode"
|
57
|
+
end
|
28
58
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
index.reindex_queue.push(value)
|
33
|
-
when :async
|
34
|
-
unless defined?(ActiveJob)
|
35
|
-
raise Searchkick::Error, "Active Job not found"
|
36
|
-
end
|
59
|
+
# return true like model and relation reindex for now
|
60
|
+
true
|
61
|
+
end
|
37
62
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
routing = record.search_routing
|
42
|
-
end
|
63
|
+
def reindex_items(klass, items, method_name:, single: false)
|
64
|
+
routing = items.to_h { |r| [r[:id], r[:routing]] }
|
65
|
+
record_ids = routing.keys
|
43
66
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
routing: routing
|
49
|
-
)
|
50
|
-
else # bulk, inline/true/nil
|
51
|
-
reindex_record(method_name)
|
67
|
+
relation = Searchkick.load_records(klass, record_ids)
|
68
|
+
# call search_import even for single records for nested associations
|
69
|
+
relation = relation.search_import if relation.respond_to?(:search_import)
|
70
|
+
records = relation.select(&:should_index?)
|
52
71
|
|
53
|
-
|
54
|
-
|
72
|
+
# determine which records to delete
|
73
|
+
delete_ids = record_ids - records.map { |r| r.id.to_s }
|
74
|
+
delete_records =
|
75
|
+
delete_ids.map do |id|
|
76
|
+
construct_record(klass, id, routing[id])
|
77
|
+
end
|
78
|
+
|
79
|
+
import_inline(records, delete_records, method_name: method_name, single: single)
|
55
80
|
end
|
56
81
|
|
57
82
|
private
|
58
83
|
|
59
|
-
def
|
60
|
-
|
84
|
+
def index_record?(record)
|
85
|
+
record.persisted? && !record.destroyed? && record.should_index?
|
61
86
|
end
|
62
87
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
88
|
+
# import in single request with retries
|
89
|
+
def import_inline(index_records, delete_records, method_name:, single:)
|
90
|
+
return if index_records.empty? && delete_records.empty?
|
91
|
+
|
92
|
+
maybe_bulk(index_records, delete_records, method_name, single) do
|
93
|
+
if index_records.any?
|
94
|
+
if method_name
|
95
|
+
index.bulk_update(index_records, method_name)
|
96
|
+
else
|
97
|
+
index.bulk_index(index_records)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
if delete_records.any?
|
102
|
+
index.bulk_delete(delete_records)
|
69
103
|
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def maybe_bulk(index_records, delete_records, method_name, single)
|
108
|
+
if Searchkick.callbacks_value == :bulk
|
109
|
+
yield
|
70
110
|
else
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
111
|
+
# set action and data
|
112
|
+
action =
|
113
|
+
if single && index_records.empty?
|
114
|
+
"Remove"
|
115
|
+
elsif method_name
|
116
|
+
"Update"
|
117
|
+
else
|
118
|
+
single ? "Store" : "Import"
|
119
|
+
end
|
120
|
+
record = index_records.first || delete_records.first
|
121
|
+
name = record.class.searchkick_klass.name
|
122
|
+
message = lambda do |event|
|
123
|
+
event[:name] = "#{name} #{action}"
|
124
|
+
if single
|
125
|
+
event[:id] = index.search_id(record)
|
126
|
+
else
|
127
|
+
event[:count] = index_records.size + delete_records.size
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
with_retries do
|
132
|
+
Searchkick.callbacks(:bulk, message: message) do
|
133
|
+
yield
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def construct_record(klass, id, routing)
|
140
|
+
record = klass.new
|
141
|
+
record.id = id
|
142
|
+
if routing
|
143
|
+
record.define_singleton_method(:search_routing) do
|
144
|
+
routing
|
145
|
+
end
|
146
|
+
end
|
147
|
+
record
|
148
|
+
end
|
149
|
+
|
150
|
+
def with_retries
|
151
|
+
retries = 0
|
152
|
+
|
153
|
+
begin
|
154
|
+
yield
|
155
|
+
rescue Faraday::ClientError => e
|
156
|
+
if retries < 1
|
157
|
+
retries += 1
|
158
|
+
retry
|
75
159
|
end
|
160
|
+
raise e
|
76
161
|
end
|
77
162
|
end
|
78
163
|
end
|
@@ -8,17 +8,42 @@ module Searchkick
|
|
8
8
|
raise Searchkick::Error, "Searchkick.redis not set" unless Searchkick.redis
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
|
11
|
+
# supports single and multiple ids
|
12
|
+
def push(record_ids)
|
13
|
+
Searchkick.with_redis { |r| r.lpush(redis_key, record_ids) }
|
14
|
+
end
|
15
|
+
|
16
|
+
def push_records(records)
|
17
|
+
record_ids =
|
18
|
+
records.map do |record|
|
19
|
+
# always pass routing in case record is deleted
|
20
|
+
# before the queue job runs
|
21
|
+
if record.respond_to?(:search_routing)
|
22
|
+
routing = record.search_routing
|
23
|
+
end
|
24
|
+
|
25
|
+
# escape pipe with double pipe
|
26
|
+
value = escape(record.id.to_s)
|
27
|
+
value = "#{value}|#{escape(routing)}" if routing
|
28
|
+
value
|
29
|
+
end
|
30
|
+
|
31
|
+
push(record_ids)
|
13
32
|
end
|
14
33
|
|
15
34
|
# TODO use reliable queuing
|
16
35
|
def reserve(limit: 1000)
|
17
|
-
|
18
|
-
|
19
|
-
|
36
|
+
if supports_rpop_with_count?
|
37
|
+
Searchkick.with_redis { |r| r.call("rpop", redis_key, limit) }.to_a
|
38
|
+
else
|
39
|
+
record_ids = []
|
40
|
+
Searchkick.with_redis do |r|
|
41
|
+
while record_ids.size < limit && (record_id = r.rpop(redis_key))
|
42
|
+
record_ids << record_id
|
43
|
+
end
|
44
|
+
end
|
45
|
+
record_ids
|
20
46
|
end
|
21
|
-
record_ids.to_a
|
22
47
|
end
|
23
48
|
|
24
49
|
def clear
|
@@ -34,5 +59,17 @@ module Searchkick
|
|
34
59
|
def redis_key
|
35
60
|
"searchkick:reindex_queue:#{name}"
|
36
61
|
end
|
62
|
+
|
63
|
+
def supports_rpop_with_count?
|
64
|
+
redis_version >= Gem::Version.new("6.2")
|
65
|
+
end
|
66
|
+
|
67
|
+
def redis_version
|
68
|
+
@redis_version ||= Searchkick.with_redis { |r| Gem::Version.new(r.info["redis_version"]) }
|
69
|
+
end
|
70
|
+
|
71
|
+
def escape(value)
|
72
|
+
value.gsub("|", "||")
|
73
|
+
end
|
37
74
|
end
|
38
75
|
end
|
@@ -1,41 +1,17 @@
|
|
1
1
|
module Searchkick
|
2
2
|
class ReindexV2Job < ActiveJob::Base
|
3
|
-
RECORD_NOT_FOUND_CLASSES = [
|
4
|
-
"ActiveRecord::RecordNotFound",
|
5
|
-
"Mongoid::Errors::DocumentNotFound",
|
6
|
-
"NoBrainer::Error::DocumentNotFound",
|
7
|
-
"Cequel::Record::RecordNotFound"
|
8
|
-
]
|
9
|
-
|
10
3
|
queue_as { Searchkick.queue_name }
|
11
4
|
|
12
|
-
def perform(
|
13
|
-
model =
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
# check by name rather than rescue directly so we don't need
|
23
|
-
# to determine which classes are defined
|
24
|
-
raise e unless RECORD_NOT_FOUND_CLASSES.include?(e.class.name)
|
25
|
-
nil
|
26
|
-
end
|
27
|
-
|
28
|
-
unless record
|
29
|
-
record = model.new
|
30
|
-
record.id = id
|
31
|
-
if routing
|
32
|
-
record.define_singleton_method(:search_routing) do
|
33
|
-
routing
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
RecordIndexer.new(record).reindex(method_name, mode: :inline)
|
5
|
+
def perform(class_name, id, method_name = nil, routing: nil, index_name: nil)
|
6
|
+
model = Searchkick.load_model(class_name, allow_child: true)
|
7
|
+
index = model.searchkick_index(name: index_name)
|
8
|
+
# use should_index? to decide whether to index (not default scope)
|
9
|
+
# just like saving inline
|
10
|
+
# could use Searchkick.scope() in future
|
11
|
+
# but keep for now for backwards compatibility
|
12
|
+
model = model.unscoped if model.respond_to?(:unscoped)
|
13
|
+
items = [{id: id, routing: routing}]
|
14
|
+
RecordIndexer.new(index).reindex_items(model, items, method_name: method_name, single: true)
|
39
15
|
end
|
40
16
|
end
|
41
17
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module Searchkick
|
2
|
+
class Relation
|
3
|
+
# note: modifying body directly is not supported
|
4
|
+
# and has no impact on query after being executed
|
5
|
+
# TODO freeze body object?
|
6
|
+
delegate :body, :params, to: :@query
|
7
|
+
delegate_missing_to :private_execute
|
8
|
+
|
9
|
+
def initialize(model, term = "*", **options)
|
10
|
+
@query = Query.new(model, term, **options)
|
11
|
+
end
|
12
|
+
|
13
|
+
# same as Active Record
|
14
|
+
def inspect
|
15
|
+
entries = results.first(11).map!(&:inspect)
|
16
|
+
entries[10] = "..." if entries.size == 11
|
17
|
+
"#<#{self.class.name} [#{entries.join(', ')}]>"
|
18
|
+
end
|
19
|
+
|
20
|
+
def execute
|
21
|
+
Searchkick.warn("The execute method is no longer needed")
|
22
|
+
private_execute
|
23
|
+
self
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def private_execute
|
29
|
+
@execute ||= @query.execute
|
30
|
+
end
|
31
|
+
|
32
|
+
def query
|
33
|
+
@query
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
module Searchkick
|
2
|
+
class RelationIndexer
|
3
|
+
attr_reader :index
|
4
|
+
|
5
|
+
def initialize(index)
|
6
|
+
@index = index
|
7
|
+
end
|
8
|
+
|
9
|
+
def reindex(relation, mode:, method_name: nil, full: false, resume: false, scope: nil)
|
10
|
+
# apply scopes
|
11
|
+
if scope
|
12
|
+
relation = relation.send(scope)
|
13
|
+
elsif relation.respond_to?(:search_import)
|
14
|
+
relation = relation.search_import
|
15
|
+
end
|
16
|
+
|
17
|
+
# remove unneeded loading for async
|
18
|
+
if mode == :async
|
19
|
+
if relation.respond_to?(:primary_key)
|
20
|
+
relation = relation.select(relation.primary_key).except(:includes, :preload)
|
21
|
+
elsif relation.respond_to?(:only)
|
22
|
+
relation = relation.only(:_id)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
if mode == :async && full
|
27
|
+
return full_reindex_async(relation)
|
28
|
+
end
|
29
|
+
|
30
|
+
relation = resume_relation(relation) if resume
|
31
|
+
|
32
|
+
reindex_options = {
|
33
|
+
mode: mode,
|
34
|
+
method_name: method_name,
|
35
|
+
full: full
|
36
|
+
}
|
37
|
+
record_indexer = RecordIndexer.new(index)
|
38
|
+
|
39
|
+
in_batches(relation) do |items|
|
40
|
+
record_indexer.reindex(items, **reindex_options)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def batches_left
|
45
|
+
Searchkick.with_redis { |r| r.scard(batches_key) }
|
46
|
+
end
|
47
|
+
|
48
|
+
def batch_completed(batch_id)
|
49
|
+
Searchkick.with_redis { |r| r.srem(batches_key, batch_id) }
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def resume_relation(relation)
|
55
|
+
if relation.respond_to?(:primary_key)
|
56
|
+
# use total docs instead of max id since there's not a great way
|
57
|
+
# to get the max _id without scripting since it's a string
|
58
|
+
where = relation.arel_table[relation.primary_key].gt(index.total_docs)
|
59
|
+
relation = relation.where(where)
|
60
|
+
else
|
61
|
+
raise Error, "Resume not supported for Mongoid"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def in_batches(relation)
|
66
|
+
if relation.respond_to?(:find_in_batches)
|
67
|
+
klass = relation.klass
|
68
|
+
# remove order to prevent possible warnings
|
69
|
+
relation.except(:order).find_in_batches(batch_size: batch_size) do |batch|
|
70
|
+
# prevent scope from affecting search_data as well as inline jobs
|
71
|
+
# Active Record runs relation calls in scoping block
|
72
|
+
# https://github.com/rails/rails/blob/main/activerecord/lib/active_record/relation/delegation.rb
|
73
|
+
# note: we could probably just call klass.current_scope = nil
|
74
|
+
# anywhere in reindex method (after initial all call),
|
75
|
+
# but this is more cautious
|
76
|
+
previous_scope = klass.current_scope(true)
|
77
|
+
if previous_scope
|
78
|
+
begin
|
79
|
+
klass.current_scope = nil
|
80
|
+
yield batch
|
81
|
+
ensure
|
82
|
+
klass.current_scope = previous_scope
|
83
|
+
end
|
84
|
+
else
|
85
|
+
yield batch
|
86
|
+
end
|
87
|
+
end
|
88
|
+
else
|
89
|
+
klass = relation.klass
|
90
|
+
each_batch(relation, batch_size: batch_size) do |batch|
|
91
|
+
# prevent scope from affecting search_data as well as inline jobs
|
92
|
+
# note: Model.with_scope doesn't always restore scope, so use custom logic
|
93
|
+
previous_scope = Mongoid::Threaded.current_scope(klass)
|
94
|
+
if previous_scope
|
95
|
+
begin
|
96
|
+
Mongoid::Threaded.set_current_scope(nil, klass)
|
97
|
+
yield batch
|
98
|
+
ensure
|
99
|
+
Mongoid::Threaded.set_current_scope(previous_scope, klass)
|
100
|
+
end
|
101
|
+
else
|
102
|
+
yield batch
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def each_batch(relation, batch_size:)
|
109
|
+
# https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
|
110
|
+
# use cursor for Mongoid
|
111
|
+
items = []
|
112
|
+
relation.all.each do |item|
|
113
|
+
items << item
|
114
|
+
if items.length == batch_size
|
115
|
+
yield items
|
116
|
+
items = []
|
117
|
+
end
|
118
|
+
end
|
119
|
+
yield items if items.any?
|
120
|
+
end
|
121
|
+
|
122
|
+
def batch_size
|
123
|
+
@batch_size ||= index.options[:batch_size] || 1000
|
124
|
+
end
|
125
|
+
|
126
|
+
def full_reindex_async(relation)
|
127
|
+
batch_id = 1
|
128
|
+
class_name = relation.searchkick_options[:class_name]
|
129
|
+
|
130
|
+
in_batches(relation) do |items|
|
131
|
+
batch_job(class_name, batch_id, items.map(&:id))
|
132
|
+
batch_id += 1
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def batch_job(class_name, batch_id, record_ids)
|
137
|
+
Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
|
138
|
+
Searchkick::BulkReindexJob.perform_later(
|
139
|
+
class_name: class_name,
|
140
|
+
index_name: index.name,
|
141
|
+
batch_id: batch_id,
|
142
|
+
record_ids: record_ids.map { |v| v.instance_of?(Integer) ? v : v.to_s }
|
143
|
+
)
|
144
|
+
end
|
145
|
+
|
146
|
+
def batches_key
|
147
|
+
"searchkick:reindex:#{index.name}:batches"
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|