searchkick 4.4.0 → 5.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,28 +5,53 @@ module Searchkick
5
5
  def initialize(name)
6
6
  @name = name
7
7
 
8
- raise Searchkick::Error, "Searchkick.redis not set" unless Searchkick.redis
8
+ raise Error, "Searchkick.redis not set" unless Searchkick.redis
9
9
  end
10
10
 
11
- def push(record_id)
12
- Searchkick.with_redis { |r| r.lpush(redis_key, record_id) }
11
+ # supports single and multiple ids
12
+ def push(record_ids)
13
+ Searchkick.with_redis { |r| r.call("LPUSH", redis_key, record_ids) }
14
+ end
15
+
16
+ def push_records(records)
17
+ record_ids =
18
+ records.map do |record|
19
+ # always pass routing in case record is deleted
20
+ # before the queue job runs
21
+ if record.respond_to?(:search_routing)
22
+ routing = record.search_routing
23
+ end
24
+
25
+ # escape pipe with double pipe
26
+ value = escape(record.id.to_s)
27
+ value = "#{value}|#{escape(routing)}" if routing
28
+ value
29
+ end
30
+
31
+ push(record_ids)
13
32
  end
14
33
 
15
34
  # TODO use reliable queuing
16
35
  def reserve(limit: 1000)
17
- record_ids = Set.new
18
- while record_ids.size < limit && (record_id = Searchkick.with_redis { |r| r.rpop(redis_key) })
19
- record_ids << record_id
36
+ if supports_rpop_with_count?
37
+ Searchkick.with_redis { |r| r.call("RPOP", redis_key, limit) }.to_a
38
+ else
39
+ record_ids = []
40
+ Searchkick.with_redis do |r|
41
+ while record_ids.size < limit && (record_id = r.call("RPOP", redis_key))
42
+ record_ids << record_id
43
+ end
44
+ end
45
+ record_ids
20
46
  end
21
- record_ids.to_a
22
47
  end
23
48
 
24
49
  def clear
25
- Searchkick.with_redis { |r| r.del(redis_key) }
50
+ Searchkick.with_redis { |r| r.call("DEL", redis_key) }
26
51
  end
27
52
 
28
53
  def length
29
- Searchkick.with_redis { |r| r.llen(redis_key) }
54
+ Searchkick.with_redis { |r| r.call("LLEN", redis_key) }
30
55
  end
31
56
 
32
57
  private
@@ -34,5 +59,22 @@ module Searchkick
34
59
  def redis_key
35
60
  "searchkick:reindex_queue:#{name}"
36
61
  end
62
+
63
+ def supports_rpop_with_count?
64
+ redis_version >= Gem::Version.new("6.2")
65
+ end
66
+
67
+ def redis_version
68
+ @redis_version ||=
69
+ Searchkick.with_redis do |r|
70
+ info = r.call("INFO")
71
+ matches = /redis_version:(\S+)/.match(info)
72
+ Gem::Version.new(matches[1])
73
+ end
74
+ end
75
+
76
+ def escape(value)
77
+ value.to_s.gsub("|", "||")
78
+ end
37
79
  end
38
80
  end
@@ -1,41 +1,17 @@
1
1
  module Searchkick
2
2
  class ReindexV2Job < ActiveJob::Base
3
- RECORD_NOT_FOUND_CLASSES = [
4
- "ActiveRecord::RecordNotFound",
5
- "Mongoid::Errors::DocumentNotFound",
6
- "NoBrainer::Error::DocumentNotFound",
7
- "Cequel::Record::RecordNotFound"
8
- ]
9
-
10
3
  queue_as { Searchkick.queue_name }
11
4
 
12
- def perform(klass, id, method_name = nil, routing: nil)
13
- model = klass.constantize
14
- record =
15
- begin
16
- if model.respond_to?(:unscoped)
17
- model.unscoped.find(id)
18
- else
19
- model.find(id)
20
- end
21
- rescue => e
22
- # check by name rather than rescue directly so we don't need
23
- # to determine which classes are defined
24
- raise e unless RECORD_NOT_FOUND_CLASSES.include?(e.class.name)
25
- nil
26
- end
27
-
28
- unless record
29
- record = model.new
30
- record.id = id
31
- if routing
32
- record.define_singleton_method(:search_routing) do
33
- routing
34
- end
35
- end
36
- end
37
-
38
- RecordIndexer.new(record).reindex(method_name, mode: :inline)
5
+ def perform(class_name, id, method_name = nil, routing: nil, index_name: nil)
6
+ model = Searchkick.load_model(class_name, allow_child: true)
7
+ index = model.searchkick_index(name: index_name)
8
+ # use should_index? to decide whether to index (not default scope)
9
+ # just like saving inline
10
+ # could use Searchkick.scope() in future
11
+ # but keep for now for backwards compatibility
12
+ model = model.unscoped if model.respond_to?(:unscoped)
13
+ items = [{id: id, routing: routing}]
14
+ RecordIndexer.new(index).reindex_items(model, items, method_name: method_name, single: true)
39
15
  end
40
16
  end
41
17
  end
@@ -0,0 +1,247 @@
1
+ module Searchkick
2
+ class Relation
3
+ NO_DEFAULT_VALUE = Object.new
4
+
5
+ # note: modifying body directly is not supported
6
+ # and has no impact on query after being executed
7
+ # TODO freeze body object?
8
+ delegate :body, :params, to: :query
9
+ delegate_missing_to :private_execute
10
+
11
+ attr_reader :model
12
+ alias_method :klass, :model
13
+
14
+ def initialize(model, term = "*", **options)
15
+ @model = model
16
+ @term = term
17
+ @options = options
18
+
19
+ # generate query to validate options
20
+ query
21
+ end
22
+
23
+ # same as Active Record
24
+ def inspect
25
+ entries = results.first(11).map!(&:inspect)
26
+ entries[10] = "..." if entries.size == 11
27
+ "#<#{self.class.name} [#{entries.join(', ')}]>"
28
+ end
29
+
30
+ def execute
31
+ Searchkick.warn("The execute method is no longer needed")
32
+ load
33
+ end
34
+
35
+ # experimental
36
+ def limit(value)
37
+ clone.limit!(value)
38
+ end
39
+
40
+ # experimental
41
+ def limit!(value)
42
+ check_loaded
43
+ @options[:limit] = value
44
+ self
45
+ end
46
+
47
+ # experimental
48
+ def offset(value = NO_DEFAULT_VALUE)
49
+ # TODO remove in Searchkick 6
50
+ if value == NO_DEFAULT_VALUE
51
+ private_execute.offset
52
+ else
53
+ clone.offset!(value)
54
+ end
55
+ end
56
+
57
+ # experimental
58
+ def offset!(value)
59
+ check_loaded
60
+ @options[:offset] = value
61
+ self
62
+ end
63
+
64
+ # experimental
65
+ def page(value)
66
+ clone.page!(value)
67
+ end
68
+
69
+ # experimental
70
+ def page!(value)
71
+ check_loaded
72
+ @options[:page] = value
73
+ self
74
+ end
75
+
76
+ # experimental
77
+ def per_page(value = NO_DEFAULT_VALUE)
78
+ # TODO remove in Searchkick 6
79
+ if value == NO_DEFAULT_VALUE
80
+ private_execute.per_page
81
+ else
82
+ clone.per_page!(value)
83
+ end
84
+ end
85
+
86
+ # experimental
87
+ def per_page!(value)
88
+ check_loaded
89
+ @options[:per_page] = value
90
+ self
91
+ end
92
+
93
+ # experimental
94
+ def where(value = NO_DEFAULT_VALUE)
95
+ if value == NO_DEFAULT_VALUE
96
+ Where.new(self)
97
+ else
98
+ clone.where!(value)
99
+ end
100
+ end
101
+
102
+ # experimental
103
+ def where!(value)
104
+ check_loaded
105
+ if @options[:where]
106
+ @options[:where] = {_and: [@options[:where], ensure_permitted(value)]}
107
+ else
108
+ @options[:where] = ensure_permitted(value)
109
+ end
110
+ self
111
+ end
112
+
113
+ # experimental
114
+ def rewhere(value)
115
+ clone.rewhere!(value)
116
+ end
117
+
118
+ # experimental
119
+ def rewhere!(value)
120
+ check_loaded
121
+ @options[:where] = ensure_permitted(value)
122
+ self
123
+ end
124
+
125
+ # experimental
126
+ def order(*values)
127
+ clone.order!(*values)
128
+ end
129
+
130
+ # experimental
131
+ def order!(*values)
132
+ values = values.first if values.size == 1 && values.first.is_a?(Array)
133
+ check_loaded
134
+ (@options[:order] ||= []).concat(values)
135
+ self
136
+ end
137
+
138
+ # experimental
139
+ def reorder(*values)
140
+ clone.reorder!(*values)
141
+ end
142
+
143
+ # experimental
144
+ def reorder!(*values)
145
+ check_loaded
146
+ @options[:order] = values
147
+ self
148
+ end
149
+
150
+ # experimental
151
+ def select(*values, &block)
152
+ if block_given?
153
+ private_execute.select(*values, &block)
154
+ else
155
+ clone.select!(*values)
156
+ end
157
+ end
158
+
159
+ # experimental
160
+ def select!(*values)
161
+ check_loaded
162
+ (@options[:select] ||= []).concat(values)
163
+ self
164
+ end
165
+
166
+ # experimental
167
+ def reselect(*values)
168
+ clone.reselect!(*values)
169
+ end
170
+
171
+ # experimental
172
+ def reselect!(*values)
173
+ check_loaded
174
+ @options[:select] = values
175
+ self
176
+ end
177
+
178
+ # experimental
179
+ def includes(*values)
180
+ clone.includes!(*values)
181
+ end
182
+
183
+ # experimental
184
+ def includes!(*values)
185
+ check_loaded
186
+ (@options[:includes] ||= []).concat(values)
187
+ self
188
+ end
189
+
190
+ # experimental
191
+ def only(*keys)
192
+ Relation.new(@model, @term, **@options.slice(*keys))
193
+ end
194
+
195
+ # experimental
196
+ def except(*keys)
197
+ Relation.new(@model, @term, **@options.except(*keys))
198
+ end
199
+
200
+ # experimental
201
+ def load
202
+ private_execute
203
+ self
204
+ end
205
+
206
+ def loaded?
207
+ !@execute.nil?
208
+ end
209
+
210
+ def respond_to_missing?(method_name, include_all)
211
+ Results.new(nil, nil, nil).respond_to?(method_name, include_all) || super
212
+ end
213
+
214
+ # TODO uncomment in 6.0
215
+ # def to_yaml
216
+ # private_execute.to_a.to_yaml
217
+ # end
218
+
219
+ private
220
+
221
+ def private_execute
222
+ @execute ||= query.execute
223
+ end
224
+
225
+ def query
226
+ @query ||= Query.new(@model, @term, **@options)
227
+ end
228
+
229
+ def check_loaded
230
+ raise Error, "Relation loaded" if loaded?
231
+
232
+ # reset query since options will change
233
+ @query = nil
234
+ end
235
+
236
+ # provides *very* basic protection from unfiltered parameters
237
+ # this is not meant to be comprehensive and may be expanded in the future
238
+ def ensure_permitted(obj)
239
+ obj.to_h
240
+ end
241
+
242
+ def initialize_copy(other)
243
+ super
244
+ @execute = nil
245
+ end
246
+ end
247
+ end
@@ -0,0 +1,155 @@
1
+ module Searchkick
2
+ class RelationIndexer
3
+ attr_reader :index
4
+
5
+ def initialize(index)
6
+ @index = index
7
+ end
8
+
9
+ def reindex(relation, mode:, method_name: nil, full: false, resume: false, scope: nil)
10
+ # apply scopes
11
+ if scope
12
+ relation = relation.send(scope)
13
+ elsif relation.respond_to?(:search_import)
14
+ relation = relation.search_import
15
+ end
16
+
17
+ # remove unneeded loading for async and queue
18
+ if mode == :async || mode == :queue
19
+ if relation.respond_to?(:primary_key)
20
+ relation = relation.except(:includes, :preload)
21
+ unless mode == :queue && relation.klass.method_defined?(:search_routing)
22
+ relation = relation.except(:select).select(relation.primary_key)
23
+ end
24
+ elsif relation.respond_to?(:only)
25
+ unless mode == :queue && relation.klass.method_defined?(:search_routing)
26
+ relation = relation.only(:_id)
27
+ end
28
+ end
29
+ end
30
+
31
+ if mode == :async && full
32
+ return full_reindex_async(relation)
33
+ end
34
+
35
+ relation = resume_relation(relation) if resume
36
+
37
+ reindex_options = {
38
+ mode: mode,
39
+ method_name: method_name,
40
+ full: full
41
+ }
42
+ record_indexer = RecordIndexer.new(index)
43
+
44
+ in_batches(relation) do |items|
45
+ record_indexer.reindex(items, **reindex_options)
46
+ end
47
+ end
48
+
49
+ def batches_left
50
+ Searchkick.with_redis { |r| r.call("SCARD", batches_key) }
51
+ end
52
+
53
+ def batch_completed(batch_id)
54
+ Searchkick.with_redis { |r| r.call("SREM", batches_key, [batch_id]) }
55
+ end
56
+
57
+ private
58
+
59
+ def resume_relation(relation)
60
+ if relation.respond_to?(:primary_key)
61
+ # use total docs instead of max id since there's not a great way
62
+ # to get the max _id without scripting since it's a string
63
+ where = relation.arel_table[relation.primary_key].gt(index.total_docs)
64
+ relation = relation.where(where)
65
+ else
66
+ raise Error, "Resume not supported for Mongoid"
67
+ end
68
+ end
69
+
70
+ def in_batches(relation)
71
+ if relation.respond_to?(:find_in_batches)
72
+ klass = relation.klass
73
+ # remove order to prevent possible warnings
74
+ relation.except(:order).find_in_batches(batch_size: batch_size) do |batch|
75
+ # prevent scope from affecting search_data as well as inline jobs
76
+ # Active Record runs relation calls in scoping block
77
+ # https://github.com/rails/rails/blob/main/activerecord/lib/active_record/relation/delegation.rb
78
+ # note: we could probably just call klass.current_scope = nil
79
+ # anywhere in reindex method (after initial all call),
80
+ # but this is more cautious
81
+ previous_scope = klass.current_scope(true)
82
+ if previous_scope
83
+ begin
84
+ klass.current_scope = nil
85
+ yield batch
86
+ ensure
87
+ klass.current_scope = previous_scope
88
+ end
89
+ else
90
+ yield batch
91
+ end
92
+ end
93
+ else
94
+ klass = relation.klass
95
+ each_batch(relation, batch_size: batch_size) do |batch|
96
+ # prevent scope from affecting search_data as well as inline jobs
97
+ # note: Model.with_scope doesn't always restore scope, so use custom logic
98
+ previous_scope = Mongoid::Threaded.current_scope(klass)
99
+ if previous_scope
100
+ begin
101
+ Mongoid::Threaded.set_current_scope(nil, klass)
102
+ yield batch
103
+ ensure
104
+ Mongoid::Threaded.set_current_scope(previous_scope, klass)
105
+ end
106
+ else
107
+ yield batch
108
+ end
109
+ end
110
+ end
111
+ end
112
+
113
+ def each_batch(relation, batch_size:)
114
+ # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
115
+ # use cursor for Mongoid
116
+ items = []
117
+ relation.all.each do |item|
118
+ items << item
119
+ if items.length == batch_size
120
+ yield items
121
+ items = []
122
+ end
123
+ end
124
+ yield items if items.any?
125
+ end
126
+
127
+ def batch_size
128
+ @batch_size ||= index.options[:batch_size] || 1000
129
+ end
130
+
131
+ def full_reindex_async(relation)
132
+ batch_id = 1
133
+ class_name = relation.searchkick_options[:class_name]
134
+
135
+ in_batches(relation) do |items|
136
+ batch_job(class_name, batch_id, items.map(&:id))
137
+ batch_id += 1
138
+ end
139
+ end
140
+
141
+ def batch_job(class_name, batch_id, record_ids)
142
+ Searchkick.with_redis { |r| r.call("SADD", batches_key, [batch_id]) }
143
+ Searchkick::BulkReindexJob.perform_later(
144
+ class_name: class_name,
145
+ index_name: index.name,
146
+ batch_id: batch_id,
147
+ record_ids: record_ids.map { |v| v.instance_of?(Integer) ? v : v.to_s }
148
+ )
149
+ end
150
+
151
+ def batches_key
152
+ "searchkick:reindex:#{index.name}:batches"
153
+ end
154
+ end
155
+ end