searchkick 4.0.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,78 +1,163 @@
1
1
  module Searchkick
2
2
  class RecordIndexer
3
- attr_reader :record, :index
3
+ attr_reader :index
4
4
 
5
- def initialize(record)
6
- @record = record
7
- @index = record.class.searchkick_index
5
+ def initialize(index)
6
+ @index = index
8
7
  end
9
8
 
10
- def reindex(method_name = nil, refresh: false, mode: nil)
11
- unless [:inline, true, nil, :async, :queue].include?(mode)
12
- raise ArgumentError, "Invalid value for mode"
13
- end
14
-
15
- mode ||= Searchkick.callbacks_value || index.options[:callbacks] || true
9
+ def reindex(records, mode:, method_name:, full: false, single: false)
10
+ # prevents exists? check if records is a relation
11
+ records = records.to_a
12
+ return if records.empty?
16
13
 
17
14
  case mode
15
+ when :async
16
+ unless defined?(ActiveJob)
17
+ raise Searchkick::Error, "Active Job not found"
18
+ end
19
+
20
+ # we could likely combine ReindexV2Job, BulkReindexJob, and ProcessBatchJob
21
+ # but keep them separate for now
22
+ if single
23
+ record = records.first
24
+
25
+ # always pass routing in case record is deleted
26
+ # before the async job runs
27
+ if record.respond_to?(:search_routing)
28
+ routing = record.search_routing
29
+ end
30
+
31
+ Searchkick::ReindexV2Job.perform_later(
32
+ record.class.name,
33
+ record.id.to_s,
34
+ method_name ? method_name.to_s : nil,
35
+ routing: routing,
36
+ index_name: index.name
37
+ )
38
+ else
39
+ Searchkick::BulkReindexJob.perform_later(
40
+ class_name: records.first.class.searchkick_options[:class_name],
41
+ record_ids: records.map { |r| r.id.to_s },
42
+ index_name: index.name,
43
+ method_name: method_name ? method_name.to_s : nil
44
+ )
45
+ end
18
46
  when :queue
19
47
  if method_name
20
48
  raise Searchkick::Error, "Partial reindex not supported with queue option"
21
49
  end
22
50
 
23
- # always pass routing in case record is deleted
24
- # before the queue job runs
25
- if record.respond_to?(:search_routing)
26
- routing = record.search_routing
27
- end
51
+ index.reindex_queue.push_records(records)
52
+ when true, :inline
53
+ index_records, other_records = records.partition { |r| index_record?(r) }
54
+ import_inline(index_records, !full ? other_records : [], method_name: method_name, single: single)
55
+ else
56
+ raise ArgumentError, "Invalid value for mode"
57
+ end
28
58
 
29
- # escape pipe with double pipe
30
- value = queue_escape(record.id.to_s)
31
- value = "#{value}|#{queue_escape(routing)}" if routing
32
- index.reindex_queue.push(value)
33
- when :async
34
- unless defined?(ActiveJob)
35
- raise Searchkick::Error, "Active Job not found"
36
- end
59
+ # return true like model and relation reindex for now
60
+ true
61
+ end
37
62
 
38
- # always pass routing in case record is deleted
39
- # before the async job runs
40
- if record.respond_to?(:search_routing)
41
- routing = record.search_routing
42
- end
63
+ def reindex_items(klass, items, method_name:, single: false)
64
+ routing = items.to_h { |r| [r[:id], r[:routing]] }
65
+ record_ids = routing.keys
43
66
 
44
- Searchkick::ReindexV2Job.perform_later(
45
- record.class.name,
46
- record.id.to_s,
47
- method_name ? method_name.to_s : nil,
48
- routing: routing
49
- )
50
- else # bulk, inline/true/nil
51
- reindex_record(method_name)
67
+ relation = Searchkick.load_records(klass, record_ids)
68
+ # call search_import even for single records for nested associations
69
+ relation = relation.search_import if relation.respond_to?(:search_import)
70
+ records = relation.select(&:should_index?)
52
71
 
53
- index.refresh if refresh
54
- end
72
+ # determine which records to delete
73
+ delete_ids = record_ids - records.map { |r| r.id.to_s }
74
+ delete_records =
75
+ delete_ids.map do |id|
76
+ construct_record(klass, id, routing[id])
77
+ end
78
+
79
+ import_inline(records, delete_records, method_name: method_name, single: single)
55
80
  end
56
81
 
57
82
  private
58
83
 
59
- def queue_escape(value)
60
- value.gsub("|", "||")
84
+ def index_record?(record)
85
+ record.persisted? && !record.destroyed? && record.should_index?
61
86
  end
62
87
 
63
- def reindex_record(method_name)
64
- if record.destroyed? || !record.persisted? || !record.should_index?
65
- begin
66
- index.remove(record)
67
- rescue Elasticsearch::Transport::Transport::Errors::NotFound
68
- # do nothing
88
+ # import in single request with retries
89
+ def import_inline(index_records, delete_records, method_name:, single:)
90
+ return if index_records.empty? && delete_records.empty?
91
+
92
+ maybe_bulk(index_records, delete_records, method_name, single) do
93
+ if index_records.any?
94
+ if method_name
95
+ index.bulk_update(index_records, method_name)
96
+ else
97
+ index.bulk_index(index_records)
98
+ end
99
+ end
100
+
101
+ if delete_records.any?
102
+ index.bulk_delete(delete_records)
69
103
  end
104
+ end
105
+ end
106
+
107
+ def maybe_bulk(index_records, delete_records, method_name, single)
108
+ if Searchkick.callbacks_value == :bulk
109
+ yield
70
110
  else
71
- if method_name
72
- index.update_record(record, method_name)
73
- else
74
- index.store(record)
111
+ # set action and data
112
+ action =
113
+ if single && index_records.empty?
114
+ "Remove"
115
+ elsif method_name
116
+ "Update"
117
+ else
118
+ single ? "Store" : "Import"
119
+ end
120
+ record = index_records.first || delete_records.first
121
+ name = record.class.searchkick_klass.name
122
+ message = lambda do |event|
123
+ event[:name] = "#{name} #{action}"
124
+ if single
125
+ event[:id] = index.search_id(record)
126
+ else
127
+ event[:count] = index_records.size + delete_records.size
128
+ end
129
+ end
130
+
131
+ with_retries do
132
+ Searchkick.callbacks(:bulk, message: message) do
133
+ yield
134
+ end
135
+ end
136
+ end
137
+ end
138
+
139
+ def construct_record(klass, id, routing)
140
+ record = klass.new
141
+ record.id = id
142
+ if routing
143
+ record.define_singleton_method(:search_routing) do
144
+ routing
145
+ end
146
+ end
147
+ record
148
+ end
149
+
150
+ def with_retries
151
+ retries = 0
152
+
153
+ begin
154
+ yield
155
+ rescue Faraday::ClientError => e
156
+ if retries < 1
157
+ retries += 1
158
+ retry
75
159
  end
160
+ raise e
76
161
  end
77
162
  end
78
163
  end
@@ -8,17 +8,42 @@ module Searchkick
8
8
  raise Searchkick::Error, "Searchkick.redis not set" unless Searchkick.redis
9
9
  end
10
10
 
11
- def push(record_id)
12
- Searchkick.with_redis { |r| r.lpush(redis_key, record_id) }
11
+ # supports single and multiple ids
12
+ def push(record_ids)
13
+ Searchkick.with_redis { |r| r.lpush(redis_key, record_ids) }
14
+ end
15
+
16
+ def push_records(records)
17
+ record_ids =
18
+ records.map do |record|
19
+ # always pass routing in case record is deleted
20
+ # before the queue job runs
21
+ if record.respond_to?(:search_routing)
22
+ routing = record.search_routing
23
+ end
24
+
25
+ # escape pipe with double pipe
26
+ value = escape(record.id.to_s)
27
+ value = "#{value}|#{escape(routing)}" if routing
28
+ value
29
+ end
30
+
31
+ push(record_ids)
13
32
  end
14
33
 
15
34
  # TODO use reliable queuing
16
35
  def reserve(limit: 1000)
17
- record_ids = Set.new
18
- while record_ids.size < limit && (record_id = Searchkick.with_redis { |r| r.rpop(redis_key) })
19
- record_ids << record_id
36
+ if supports_rpop_with_count?
37
+ Searchkick.with_redis { |r| r.call("rpop", redis_key, limit) }.to_a
38
+ else
39
+ record_ids = []
40
+ Searchkick.with_redis do |r|
41
+ while record_ids.size < limit && (record_id = r.rpop(redis_key))
42
+ record_ids << record_id
43
+ end
44
+ end
45
+ record_ids
20
46
  end
21
- record_ids.to_a
22
47
  end
23
48
 
24
49
  def clear
@@ -34,5 +59,17 @@ module Searchkick
34
59
  def redis_key
35
60
  "searchkick:reindex_queue:#{name}"
36
61
  end
62
+
63
+ def supports_rpop_with_count?
64
+ redis_version >= Gem::Version.new("6.2")
65
+ end
66
+
67
+ def redis_version
68
+ @redis_version ||= Searchkick.with_redis { |r| Gem::Version.new(r.info["redis_version"]) }
69
+ end
70
+
71
+ def escape(value)
72
+ value.gsub("|", "||")
73
+ end
37
74
  end
38
75
  end
@@ -1,41 +1,17 @@
1
1
  module Searchkick
2
2
  class ReindexV2Job < ActiveJob::Base
3
- RECORD_NOT_FOUND_CLASSES = [
4
- "ActiveRecord::RecordNotFound",
5
- "Mongoid::Errors::DocumentNotFound",
6
- "NoBrainer::Error::DocumentNotFound",
7
- "Cequel::Record::RecordNotFound"
8
- ]
9
-
10
3
  queue_as { Searchkick.queue_name }
11
4
 
12
- def perform(klass, id, method_name = nil, routing: nil)
13
- model = klass.constantize
14
- record =
15
- begin
16
- if model.respond_to?(:unscoped)
17
- model.unscoped.find(id)
18
- else
19
- model.find(id)
20
- end
21
- rescue => e
22
- # check by name rather than rescue directly so we don't need
23
- # to determine which classes are defined
24
- raise e unless RECORD_NOT_FOUND_CLASSES.include?(e.class.name)
25
- nil
26
- end
27
-
28
- unless record
29
- record = model.new
30
- record.id = id
31
- if routing
32
- record.define_singleton_method(:search_routing) do
33
- routing
34
- end
35
- end
36
- end
37
-
38
- RecordIndexer.new(record).reindex(method_name, mode: :inline)
5
+ def perform(class_name, id, method_name = nil, routing: nil, index_name: nil)
6
+ model = Searchkick.load_model(class_name, allow_child: true)
7
+ index = model.searchkick_index(name: index_name)
8
+ # use should_index? to decide whether to index (not default scope)
9
+ # just like saving inline
10
+ # could use Searchkick.scope() in future
11
+ # but keep for now for backwards compatibility
12
+ model = model.unscoped if model.respond_to?(:unscoped)
13
+ items = [{id: id, routing: routing}]
14
+ RecordIndexer.new(index).reindex_items(model, items, method_name: method_name, single: true)
39
15
  end
40
16
  end
41
17
  end
@@ -0,0 +1,36 @@
1
+ module Searchkick
2
+ class Relation
3
+ # note: modifying body directly is not supported
4
+ # and has no impact on query after being executed
5
+ # TODO freeze body object?
6
+ delegate :body, :params, to: :@query
7
+ delegate_missing_to :private_execute
8
+
9
+ def initialize(model, term = "*", **options)
10
+ @query = Query.new(model, term, **options)
11
+ end
12
+
13
+ # same as Active Record
14
+ def inspect
15
+ entries = results.first(11).map!(&:inspect)
16
+ entries[10] = "..." if entries.size == 11
17
+ "#<#{self.class.name} [#{entries.join(', ')}]>"
18
+ end
19
+
20
+ def execute
21
+ Searchkick.warn("The execute method is no longer needed")
22
+ private_execute
23
+ self
24
+ end
25
+
26
+ private
27
+
28
+ def private_execute
29
+ @execute ||= @query.execute
30
+ end
31
+
32
+ def query
33
+ @query
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,150 @@
1
+ module Searchkick
2
+ class RelationIndexer
3
+ attr_reader :index
4
+
5
+ def initialize(index)
6
+ @index = index
7
+ end
8
+
9
+ def reindex(relation, mode:, method_name: nil, full: false, resume: false, scope: nil)
10
+ # apply scopes
11
+ if scope
12
+ relation = relation.send(scope)
13
+ elsif relation.respond_to?(:search_import)
14
+ relation = relation.search_import
15
+ end
16
+
17
+ # remove unneeded loading for async
18
+ if mode == :async
19
+ if relation.respond_to?(:primary_key)
20
+ relation = relation.select(relation.primary_key).except(:includes, :preload)
21
+ elsif relation.respond_to?(:only)
22
+ relation = relation.only(:_id)
23
+ end
24
+ end
25
+
26
+ if mode == :async && full
27
+ return full_reindex_async(relation)
28
+ end
29
+
30
+ relation = resume_relation(relation) if resume
31
+
32
+ reindex_options = {
33
+ mode: mode,
34
+ method_name: method_name,
35
+ full: full
36
+ }
37
+ record_indexer = RecordIndexer.new(index)
38
+
39
+ in_batches(relation) do |items|
40
+ record_indexer.reindex(items, **reindex_options)
41
+ end
42
+ end
43
+
44
+ def batches_left
45
+ Searchkick.with_redis { |r| r.scard(batches_key) }
46
+ end
47
+
48
+ def batch_completed(batch_id)
49
+ Searchkick.with_redis { |r| r.srem(batches_key, batch_id) }
50
+ end
51
+
52
+ private
53
+
54
+ def resume_relation(relation)
55
+ if relation.respond_to?(:primary_key)
56
+ # use total docs instead of max id since there's not a great way
57
+ # to get the max _id without scripting since it's a string
58
+ where = relation.arel_table[relation.primary_key].gt(index.total_docs)
59
+ relation = relation.where(where)
60
+ else
61
+ raise Error, "Resume not supported for Mongoid"
62
+ end
63
+ end
64
+
65
+ def in_batches(relation)
66
+ if relation.respond_to?(:find_in_batches)
67
+ klass = relation.klass
68
+ # remove order to prevent possible warnings
69
+ relation.except(:order).find_in_batches(batch_size: batch_size) do |batch|
70
+ # prevent scope from affecting search_data as well as inline jobs
71
+ # Active Record runs relation calls in scoping block
72
+ # https://github.com/rails/rails/blob/main/activerecord/lib/active_record/relation/delegation.rb
73
+ # note: we could probably just call klass.current_scope = nil
74
+ # anywhere in reindex method (after initial all call),
75
+ # but this is more cautious
76
+ previous_scope = klass.current_scope(true)
77
+ if previous_scope
78
+ begin
79
+ klass.current_scope = nil
80
+ yield batch
81
+ ensure
82
+ klass.current_scope = previous_scope
83
+ end
84
+ else
85
+ yield batch
86
+ end
87
+ end
88
+ else
89
+ klass = relation.klass
90
+ each_batch(relation, batch_size: batch_size) do |batch|
91
+ # prevent scope from affecting search_data as well as inline jobs
92
+ # note: Model.with_scope doesn't always restore scope, so use custom logic
93
+ previous_scope = Mongoid::Threaded.current_scope(klass)
94
+ if previous_scope
95
+ begin
96
+ Mongoid::Threaded.set_current_scope(nil, klass)
97
+ yield batch
98
+ ensure
99
+ Mongoid::Threaded.set_current_scope(previous_scope, klass)
100
+ end
101
+ else
102
+ yield batch
103
+ end
104
+ end
105
+ end
106
+ end
107
+
108
+ def each_batch(relation, batch_size:)
109
+ # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
110
+ # use cursor for Mongoid
111
+ items = []
112
+ relation.all.each do |item|
113
+ items << item
114
+ if items.length == batch_size
115
+ yield items
116
+ items = []
117
+ end
118
+ end
119
+ yield items if items.any?
120
+ end
121
+
122
+ def batch_size
123
+ @batch_size ||= index.options[:batch_size] || 1000
124
+ end
125
+
126
+ def full_reindex_async(relation)
127
+ batch_id = 1
128
+ class_name = relation.searchkick_options[:class_name]
129
+
130
+ in_batches(relation) do |items|
131
+ batch_job(class_name, batch_id, items.map(&:id))
132
+ batch_id += 1
133
+ end
134
+ end
135
+
136
+ def batch_job(class_name, batch_id, record_ids)
137
+ Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
138
+ Searchkick::BulkReindexJob.perform_later(
139
+ class_name: class_name,
140
+ index_name: index.name,
141
+ batch_id: batch_id,
142
+ record_ids: record_ids.map { |v| v.instance_of?(Integer) ? v : v.to_s }
143
+ )
144
+ end
145
+
146
+ def batches_key
147
+ "searchkick:reindex:#{index.name}:batches"
148
+ end
149
+ end
150
+ end