searchkick 4.6.3 → 5.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -1
- data/README.md +187 -124
- data/lib/searchkick/bulk_reindex_job.rb +12 -8
- data/lib/searchkick/controller_runtime.rb +40 -0
- data/lib/searchkick/index.rb +146 -65
- data/lib/searchkick/index_cache.rb +30 -0
- data/lib/searchkick/index_options.rb +17 -67
- data/lib/searchkick/indexer.rb +15 -8
- data/lib/searchkick/log_subscriber.rb +57 -0
- data/lib/searchkick/middleware.rb +1 -1
- data/lib/searchkick/model.rb +48 -49
- data/lib/searchkick/process_batch_job.rb +9 -25
- data/lib/searchkick/process_queue_job.rb +3 -2
- data/lib/searchkick/query.rb +38 -54
- data/lib/searchkick/record_data.rb +1 -1
- data/lib/searchkick/record_indexer.rb +136 -52
- data/lib/searchkick/reindex_queue.rb +26 -3
- data/lib/searchkick/reindex_v2_job.rb +10 -34
- data/lib/searchkick/relation.rb +112 -0
- data/lib/searchkick/relation_indexer.rb +150 -0
- data/lib/searchkick/results.rb +27 -28
- data/lib/searchkick/version.rb +1 -1
- data/lib/searchkick.rb +159 -84
- data/lib/tasks/searchkick.rake +6 -3
- metadata +11 -28
- data/lib/searchkick/bulk_indexer.rb +0 -173
- data/lib/searchkick/logging.rb +0 -246
@@ -1,79 +1,163 @@
|
|
1
1
|
module Searchkick
|
2
2
|
class RecordIndexer
|
3
|
-
attr_reader :
|
3
|
+
attr_reader :index
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
@
|
7
|
-
@index = record.class.searchkick_index
|
5
|
+
def initialize(index)
|
6
|
+
@index = index
|
8
7
|
end
|
9
8
|
|
10
|
-
def reindex(
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
mode ||= Searchkick.callbacks_value || index.options[:callbacks] || true
|
9
|
+
def reindex(records, mode:, method_name:, full: false, single: false)
|
10
|
+
# prevents exists? check if records is a relation
|
11
|
+
records = records.to_a
|
12
|
+
return if records.empty?
|
16
13
|
|
17
14
|
case mode
|
15
|
+
when :async
|
16
|
+
unless defined?(ActiveJob)
|
17
|
+
raise Error, "Active Job not found"
|
18
|
+
end
|
19
|
+
|
20
|
+
# we could likely combine ReindexV2Job, BulkReindexJob, and ProcessBatchJob
|
21
|
+
# but keep them separate for now
|
22
|
+
if single
|
23
|
+
record = records.first
|
24
|
+
|
25
|
+
# always pass routing in case record is deleted
|
26
|
+
# before the async job runs
|
27
|
+
if record.respond_to?(:search_routing)
|
28
|
+
routing = record.search_routing
|
29
|
+
end
|
30
|
+
|
31
|
+
Searchkick::ReindexV2Job.perform_later(
|
32
|
+
record.class.name,
|
33
|
+
record.id.to_s,
|
34
|
+
method_name ? method_name.to_s : nil,
|
35
|
+
routing: routing,
|
36
|
+
index_name: index.name
|
37
|
+
)
|
38
|
+
else
|
39
|
+
Searchkick::BulkReindexJob.perform_later(
|
40
|
+
class_name: records.first.class.searchkick_options[:class_name],
|
41
|
+
record_ids: records.map { |r| r.id.to_s },
|
42
|
+
index_name: index.name,
|
43
|
+
method_name: method_name ? method_name.to_s : nil
|
44
|
+
)
|
45
|
+
end
|
18
46
|
when :queue
|
19
47
|
if method_name
|
20
|
-
raise
|
48
|
+
raise Error, "Partial reindex not supported with queue option"
|
21
49
|
end
|
22
50
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
51
|
+
index.reindex_queue.push_records(records)
|
52
|
+
when true, :inline
|
53
|
+
index_records, other_records = records.partition { |r| index_record?(r) }
|
54
|
+
import_inline(index_records, !full ? other_records : [], method_name: method_name, single: single)
|
55
|
+
else
|
56
|
+
raise ArgumentError, "Invalid value for mode"
|
57
|
+
end
|
28
58
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
index.reindex_queue.push(value)
|
33
|
-
when :async
|
34
|
-
unless defined?(ActiveJob)
|
35
|
-
raise Searchkick::Error, "Active Job not found"
|
36
|
-
end
|
59
|
+
# return true like model and relation reindex for now
|
60
|
+
true
|
61
|
+
end
|
37
62
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
routing = record.search_routing
|
42
|
-
end
|
63
|
+
def reindex_items(klass, items, method_name:, single: false)
|
64
|
+
routing = items.to_h { |r| [r[:id], r[:routing]] }
|
65
|
+
record_ids = routing.keys
|
43
66
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
routing: routing
|
49
|
-
)
|
50
|
-
else # bulk, inline/true/nil
|
51
|
-
reindex_record(method_name)
|
67
|
+
relation = Searchkick.load_records(klass, record_ids)
|
68
|
+
# call search_import even for single records for nested associations
|
69
|
+
relation = relation.search_import if relation.respond_to?(:search_import)
|
70
|
+
records = relation.select(&:should_index?)
|
52
71
|
|
53
|
-
|
54
|
-
|
72
|
+
# determine which records to delete
|
73
|
+
delete_ids = record_ids - records.map { |r| r.id.to_s }
|
74
|
+
delete_records =
|
75
|
+
delete_ids.map do |id|
|
76
|
+
construct_record(klass, id, routing[id])
|
77
|
+
end
|
78
|
+
|
79
|
+
import_inline(records, delete_records, method_name: method_name, single: single)
|
55
80
|
end
|
56
81
|
|
57
82
|
private
|
58
83
|
|
59
|
-
def
|
60
|
-
|
84
|
+
def index_record?(record)
|
85
|
+
record.persisted? && !record.destroyed? && record.should_index?
|
61
86
|
end
|
62
87
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
88
|
+
# import in single request with retries
|
89
|
+
def import_inline(index_records, delete_records, method_name:, single:)
|
90
|
+
return if index_records.empty? && delete_records.empty?
|
91
|
+
|
92
|
+
maybe_bulk(index_records, delete_records, method_name, single) do
|
93
|
+
if index_records.any?
|
94
|
+
if method_name
|
95
|
+
index.bulk_update(index_records, method_name)
|
96
|
+
else
|
97
|
+
index.bulk_index(index_records)
|
98
|
+
end
|
70
99
|
end
|
100
|
+
|
101
|
+
if delete_records.any?
|
102
|
+
index.bulk_delete(delete_records)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def maybe_bulk(index_records, delete_records, method_name, single)
|
108
|
+
if Searchkick.callbacks_value == :bulk
|
109
|
+
yield
|
71
110
|
else
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
111
|
+
# set action and data
|
112
|
+
action =
|
113
|
+
if single && index_records.empty?
|
114
|
+
"Remove"
|
115
|
+
elsif method_name
|
116
|
+
"Update"
|
117
|
+
else
|
118
|
+
single ? "Store" : "Import"
|
119
|
+
end
|
120
|
+
record = index_records.first || delete_records.first
|
121
|
+
name = record.class.searchkick_klass.name
|
122
|
+
message = lambda do |event|
|
123
|
+
event[:name] = "#{name} #{action}"
|
124
|
+
if single
|
125
|
+
event[:id] = index.search_id(record)
|
126
|
+
else
|
127
|
+
event[:count] = index_records.size + delete_records.size
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
with_retries do
|
132
|
+
Searchkick.callbacks(:bulk, message: message) do
|
133
|
+
yield
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def construct_record(klass, id, routing)
|
140
|
+
record = klass.new
|
141
|
+
record.id = id
|
142
|
+
if routing
|
143
|
+
record.define_singleton_method(:search_routing) do
|
144
|
+
routing
|
145
|
+
end
|
146
|
+
end
|
147
|
+
record
|
148
|
+
end
|
149
|
+
|
150
|
+
def with_retries
|
151
|
+
retries = 0
|
152
|
+
|
153
|
+
begin
|
154
|
+
yield
|
155
|
+
rescue Faraday::ClientError => e
|
156
|
+
if retries < 1
|
157
|
+
retries += 1
|
158
|
+
retry
|
76
159
|
end
|
160
|
+
raise e
|
77
161
|
end
|
78
162
|
end
|
79
163
|
end
|
@@ -5,11 +5,30 @@ module Searchkick
|
|
5
5
|
def initialize(name)
|
6
6
|
@name = name
|
7
7
|
|
8
|
-
raise
|
8
|
+
raise Error, "Searchkick.redis not set" unless Searchkick.redis
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
|
11
|
+
# supports single and multiple ids
|
12
|
+
def push(record_ids)
|
13
|
+
Searchkick.with_redis { |r| r.lpush(redis_key, record_ids) }
|
14
|
+
end
|
15
|
+
|
16
|
+
def push_records(records)
|
17
|
+
record_ids =
|
18
|
+
records.map do |record|
|
19
|
+
# always pass routing in case record is deleted
|
20
|
+
# before the queue job runs
|
21
|
+
if record.respond_to?(:search_routing)
|
22
|
+
routing = record.search_routing
|
23
|
+
end
|
24
|
+
|
25
|
+
# escape pipe with double pipe
|
26
|
+
value = escape(record.id.to_s)
|
27
|
+
value = "#{value}|#{escape(routing)}" if routing
|
28
|
+
value
|
29
|
+
end
|
30
|
+
|
31
|
+
push(record_ids)
|
13
32
|
end
|
14
33
|
|
15
34
|
# TODO use reliable queuing
|
@@ -48,5 +67,9 @@ module Searchkick
|
|
48
67
|
def redis_version
|
49
68
|
@redis_version ||= Searchkick.with_redis { |r| Gem::Version.new(r.info["redis_version"]) }
|
50
69
|
end
|
70
|
+
|
71
|
+
def escape(value)
|
72
|
+
value.gsub("|", "||")
|
73
|
+
end
|
51
74
|
end
|
52
75
|
end
|
@@ -1,41 +1,17 @@
|
|
1
1
|
module Searchkick
|
2
2
|
class ReindexV2Job < ActiveJob::Base
|
3
|
-
RECORD_NOT_FOUND_CLASSES = [
|
4
|
-
"ActiveRecord::RecordNotFound",
|
5
|
-
"Mongoid::Errors::DocumentNotFound",
|
6
|
-
"NoBrainer::Error::DocumentNotFound",
|
7
|
-
"Cequel::Record::RecordNotFound"
|
8
|
-
]
|
9
|
-
|
10
3
|
queue_as { Searchkick.queue_name }
|
11
4
|
|
12
|
-
def perform(
|
13
|
-
model =
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
# check by name rather than rescue directly so we don't need
|
23
|
-
# to determine which classes are defined
|
24
|
-
raise e unless RECORD_NOT_FOUND_CLASSES.include?(e.class.name)
|
25
|
-
nil
|
26
|
-
end
|
27
|
-
|
28
|
-
unless record
|
29
|
-
record = model.new
|
30
|
-
record.id = id
|
31
|
-
if routing
|
32
|
-
record.define_singleton_method(:search_routing) do
|
33
|
-
routing
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
RecordIndexer.new(record).reindex(method_name, mode: :inline)
|
5
|
+
def perform(class_name, id, method_name = nil, routing: nil, index_name: nil)
|
6
|
+
model = Searchkick.load_model(class_name, allow_child: true)
|
7
|
+
index = model.searchkick_index(name: index_name)
|
8
|
+
# use should_index? to decide whether to index (not default scope)
|
9
|
+
# just like saving inline
|
10
|
+
# could use Searchkick.scope() in future
|
11
|
+
# but keep for now for backwards compatibility
|
12
|
+
model = model.unscoped if model.respond_to?(:unscoped)
|
13
|
+
items = [{id: id, routing: routing}]
|
14
|
+
RecordIndexer.new(index).reindex_items(model, items, method_name: method_name, single: true)
|
39
15
|
end
|
40
16
|
end
|
41
17
|
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
module Searchkick
|
2
|
+
class Relation
|
3
|
+
NO_DEFAULT_VALUE = Object.new
|
4
|
+
|
5
|
+
# note: modifying body directly is not supported
|
6
|
+
# and has no impact on query after being executed
|
7
|
+
# TODO freeze body object?
|
8
|
+
delegate :body, :params, to: :query
|
9
|
+
delegate_missing_to :private_execute
|
10
|
+
|
11
|
+
def initialize(model, term = "*", **options)
|
12
|
+
@model = model
|
13
|
+
@term = term
|
14
|
+
@options = options
|
15
|
+
|
16
|
+
# generate query to validate options
|
17
|
+
query
|
18
|
+
end
|
19
|
+
|
20
|
+
# same as Active Record
|
21
|
+
def inspect
|
22
|
+
entries = results.first(11).map!(&:inspect)
|
23
|
+
entries[10] = "..." if entries.size == 11
|
24
|
+
"#<#{self.class.name} [#{entries.join(', ')}]>"
|
25
|
+
end
|
26
|
+
|
27
|
+
def execute
|
28
|
+
Searchkick.warn("The execute method is no longer needed")
|
29
|
+
private_execute
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
def limit(value)
|
34
|
+
clone.limit!(value)
|
35
|
+
end
|
36
|
+
|
37
|
+
def limit!(value)
|
38
|
+
check_loaded
|
39
|
+
@options[:limit] = value
|
40
|
+
self
|
41
|
+
end
|
42
|
+
|
43
|
+
def offset(value = NO_DEFAULT_VALUE)
|
44
|
+
# TODO remove in Searchkick 6
|
45
|
+
if value == NO_DEFAULT_VALUE
|
46
|
+
private_execute.offset
|
47
|
+
else
|
48
|
+
clone.offset!(value)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def offset!(value)
|
53
|
+
check_loaded
|
54
|
+
@options[:offset] = value
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
def page(value)
|
59
|
+
clone.page!(value)
|
60
|
+
end
|
61
|
+
|
62
|
+
def page!(value)
|
63
|
+
check_loaded
|
64
|
+
@options[:page] = value
|
65
|
+
self
|
66
|
+
end
|
67
|
+
|
68
|
+
def per_page(value = NO_DEFAULT_VALUE)
|
69
|
+
# TODO remove in Searchkick 6
|
70
|
+
if value == NO_DEFAULT_VALUE
|
71
|
+
private_execute.per_page
|
72
|
+
else
|
73
|
+
clone.per_page!(value)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def per_page!(value)
|
78
|
+
check_loaded
|
79
|
+
@options[:per_page] = value
|
80
|
+
self
|
81
|
+
end
|
82
|
+
|
83
|
+
def only(*keys)
|
84
|
+
Relation.new(@model, @term, **@options.slice(*keys))
|
85
|
+
end
|
86
|
+
|
87
|
+
def except(*keys)
|
88
|
+
Relation.new(@model, @term, **@options.except(*keys))
|
89
|
+
end
|
90
|
+
|
91
|
+
def loaded?
|
92
|
+
!@execute.nil?
|
93
|
+
end
|
94
|
+
|
95
|
+
private
|
96
|
+
|
97
|
+
def private_execute
|
98
|
+
@execute ||= query.execute
|
99
|
+
end
|
100
|
+
|
101
|
+
def query
|
102
|
+
@query ||= Query.new(@model, @term, **@options)
|
103
|
+
end
|
104
|
+
|
105
|
+
def check_loaded
|
106
|
+
raise Error, "Relation loaded" if loaded?
|
107
|
+
|
108
|
+
# reset query since options will change
|
109
|
+
@query = nil
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
module Searchkick
|
2
|
+
class RelationIndexer
|
3
|
+
attr_reader :index
|
4
|
+
|
5
|
+
def initialize(index)
|
6
|
+
@index = index
|
7
|
+
end
|
8
|
+
|
9
|
+
def reindex(relation, mode:, method_name: nil, full: false, resume: false, scope: nil)
|
10
|
+
# apply scopes
|
11
|
+
if scope
|
12
|
+
relation = relation.send(scope)
|
13
|
+
elsif relation.respond_to?(:search_import)
|
14
|
+
relation = relation.search_import
|
15
|
+
end
|
16
|
+
|
17
|
+
# remove unneeded loading for async
|
18
|
+
if mode == :async
|
19
|
+
if relation.respond_to?(:primary_key)
|
20
|
+
relation = relation.select(relation.primary_key).except(:includes, :preload)
|
21
|
+
elsif relation.respond_to?(:only)
|
22
|
+
relation = relation.only(:_id)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
if mode == :async && full
|
27
|
+
return full_reindex_async(relation)
|
28
|
+
end
|
29
|
+
|
30
|
+
relation = resume_relation(relation) if resume
|
31
|
+
|
32
|
+
reindex_options = {
|
33
|
+
mode: mode,
|
34
|
+
method_name: method_name,
|
35
|
+
full: full
|
36
|
+
}
|
37
|
+
record_indexer = RecordIndexer.new(index)
|
38
|
+
|
39
|
+
in_batches(relation) do |items|
|
40
|
+
record_indexer.reindex(items, **reindex_options)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def batches_left
|
45
|
+
Searchkick.with_redis { |r| r.scard(batches_key) }
|
46
|
+
end
|
47
|
+
|
48
|
+
def batch_completed(batch_id)
|
49
|
+
Searchkick.with_redis { |r| r.srem(batches_key, batch_id) }
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def resume_relation(relation)
|
55
|
+
if relation.respond_to?(:primary_key)
|
56
|
+
# use total docs instead of max id since there's not a great way
|
57
|
+
# to get the max _id without scripting since it's a string
|
58
|
+
where = relation.arel_table[relation.primary_key].gt(index.total_docs)
|
59
|
+
relation = relation.where(where)
|
60
|
+
else
|
61
|
+
raise Error, "Resume not supported for Mongoid"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def in_batches(relation)
|
66
|
+
if relation.respond_to?(:find_in_batches)
|
67
|
+
klass = relation.klass
|
68
|
+
# remove order to prevent possible warnings
|
69
|
+
relation.except(:order).find_in_batches(batch_size: batch_size) do |batch|
|
70
|
+
# prevent scope from affecting search_data as well as inline jobs
|
71
|
+
# Active Record runs relation calls in scoping block
|
72
|
+
# https://github.com/rails/rails/blob/main/activerecord/lib/active_record/relation/delegation.rb
|
73
|
+
# note: we could probably just call klass.current_scope = nil
|
74
|
+
# anywhere in reindex method (after initial all call),
|
75
|
+
# but this is more cautious
|
76
|
+
previous_scope = klass.current_scope(true)
|
77
|
+
if previous_scope
|
78
|
+
begin
|
79
|
+
klass.current_scope = nil
|
80
|
+
yield batch
|
81
|
+
ensure
|
82
|
+
klass.current_scope = previous_scope
|
83
|
+
end
|
84
|
+
else
|
85
|
+
yield batch
|
86
|
+
end
|
87
|
+
end
|
88
|
+
else
|
89
|
+
klass = relation.klass
|
90
|
+
each_batch(relation, batch_size: batch_size) do |batch|
|
91
|
+
# prevent scope from affecting search_data as well as inline jobs
|
92
|
+
# note: Model.with_scope doesn't always restore scope, so use custom logic
|
93
|
+
previous_scope = Mongoid::Threaded.current_scope(klass)
|
94
|
+
if previous_scope
|
95
|
+
begin
|
96
|
+
Mongoid::Threaded.set_current_scope(nil, klass)
|
97
|
+
yield batch
|
98
|
+
ensure
|
99
|
+
Mongoid::Threaded.set_current_scope(previous_scope, klass)
|
100
|
+
end
|
101
|
+
else
|
102
|
+
yield batch
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def each_batch(relation, batch_size:)
|
109
|
+
# https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
|
110
|
+
# use cursor for Mongoid
|
111
|
+
items = []
|
112
|
+
relation.all.each do |item|
|
113
|
+
items << item
|
114
|
+
if items.length == batch_size
|
115
|
+
yield items
|
116
|
+
items = []
|
117
|
+
end
|
118
|
+
end
|
119
|
+
yield items if items.any?
|
120
|
+
end
|
121
|
+
|
122
|
+
def batch_size
|
123
|
+
@batch_size ||= index.options[:batch_size] || 1000
|
124
|
+
end
|
125
|
+
|
126
|
+
def full_reindex_async(relation)
|
127
|
+
batch_id = 1
|
128
|
+
class_name = relation.searchkick_options[:class_name]
|
129
|
+
|
130
|
+
in_batches(relation) do |items|
|
131
|
+
batch_job(class_name, batch_id, items.map(&:id))
|
132
|
+
batch_id += 1
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def batch_job(class_name, batch_id, record_ids)
|
137
|
+
Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
|
138
|
+
Searchkick::BulkReindexJob.perform_later(
|
139
|
+
class_name: class_name,
|
140
|
+
index_name: index.name,
|
141
|
+
batch_id: batch_id,
|
142
|
+
record_ids: record_ids.map { |v| v.instance_of?(Integer) ? v : v.to_s }
|
143
|
+
)
|
144
|
+
end
|
145
|
+
|
146
|
+
def batches_key
|
147
|
+
"searchkick:reindex:#{index.name}:batches"
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|