searchkick 4.6.3 → 5.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,79 +1,163 @@
1
1
  module Searchkick
2
2
  class RecordIndexer
3
- attr_reader :record, :index
3
+ attr_reader :index
4
4
 
5
- def initialize(record)
6
- @record = record
7
- @index = record.class.searchkick_index
5
+ def initialize(index)
6
+ @index = index
8
7
  end
9
8
 
10
- def reindex(method_name = nil, refresh: false, mode: nil)
11
- unless [:inline, true, nil, :async, :queue].include?(mode)
12
- raise ArgumentError, "Invalid value for mode"
13
- end
14
-
15
- mode ||= Searchkick.callbacks_value || index.options[:callbacks] || true
9
+ def reindex(records, mode:, method_name:, full: false, single: false)
10
+ # prevents exists? check if records is a relation
11
+ records = records.to_a
12
+ return if records.empty?
16
13
 
17
14
  case mode
15
+ when :async
16
+ unless defined?(ActiveJob)
17
+ raise Error, "Active Job not found"
18
+ end
19
+
20
+ # we could likely combine ReindexV2Job, BulkReindexJob, and ProcessBatchJob
21
+ # but keep them separate for now
22
+ if single
23
+ record = records.first
24
+
25
+ # always pass routing in case record is deleted
26
+ # before the async job runs
27
+ if record.respond_to?(:search_routing)
28
+ routing = record.search_routing
29
+ end
30
+
31
+ Searchkick::ReindexV2Job.perform_later(
32
+ record.class.name,
33
+ record.id.to_s,
34
+ method_name ? method_name.to_s : nil,
35
+ routing: routing,
36
+ index_name: index.name
37
+ )
38
+ else
39
+ Searchkick::BulkReindexJob.perform_later(
40
+ class_name: records.first.class.searchkick_options[:class_name],
41
+ record_ids: records.map { |r| r.id.to_s },
42
+ index_name: index.name,
43
+ method_name: method_name ? method_name.to_s : nil
44
+ )
45
+ end
18
46
  when :queue
19
47
  if method_name
20
- raise Searchkick::Error, "Partial reindex not supported with queue option"
48
+ raise Error, "Partial reindex not supported with queue option"
21
49
  end
22
50
 
23
- # always pass routing in case record is deleted
24
- # before the queue job runs
25
- if record.respond_to?(:search_routing)
26
- routing = record.search_routing
27
- end
51
+ index.reindex_queue.push_records(records)
52
+ when true, :inline
53
+ index_records, other_records = records.partition { |r| index_record?(r) }
54
+ import_inline(index_records, !full ? other_records : [], method_name: method_name, single: single)
55
+ else
56
+ raise ArgumentError, "Invalid value for mode"
57
+ end
28
58
 
29
- # escape pipe with double pipe
30
- value = queue_escape(record.id.to_s)
31
- value = "#{value}|#{queue_escape(routing)}" if routing
32
- index.reindex_queue.push(value)
33
- when :async
34
- unless defined?(ActiveJob)
35
- raise Searchkick::Error, "Active Job not found"
36
- end
59
+ # return true like model and relation reindex for now
60
+ true
61
+ end
37
62
 
38
- # always pass routing in case record is deleted
39
- # before the async job runs
40
- if record.respond_to?(:search_routing)
41
- routing = record.search_routing
42
- end
63
+ def reindex_items(klass, items, method_name:, single: false)
64
+ routing = items.to_h { |r| [r[:id], r[:routing]] }
65
+ record_ids = routing.keys
43
66
 
44
- Searchkick::ReindexV2Job.perform_later(
45
- record.class.name,
46
- record.id.to_s,
47
- method_name ? method_name.to_s : nil,
48
- routing: routing
49
- )
50
- else # bulk, inline/true/nil
51
- reindex_record(method_name)
67
+ relation = Searchkick.load_records(klass, record_ids)
68
+ # call search_import even for single records for nested associations
69
+ relation = relation.search_import if relation.respond_to?(:search_import)
70
+ records = relation.select(&:should_index?)
52
71
 
53
- index.refresh if refresh
54
- end
72
+ # determine which records to delete
73
+ delete_ids = record_ids - records.map { |r| r.id.to_s }
74
+ delete_records =
75
+ delete_ids.map do |id|
76
+ construct_record(klass, id, routing[id])
77
+ end
78
+
79
+ import_inline(records, delete_records, method_name: method_name, single: single)
55
80
  end
56
81
 
57
82
  private
58
83
 
59
- def queue_escape(value)
60
- value.gsub("|", "||")
84
+ def index_record?(record)
85
+ record.persisted? && !record.destroyed? && record.should_index?
61
86
  end
62
87
 
63
- def reindex_record(method_name)
64
- if record.destroyed? || !record.persisted? || !record.should_index?
65
- begin
66
- index.remove(record)
67
- rescue => e
68
- raise e unless Searchkick.not_found_error?(e)
69
- # do nothing if not found
88
+ # import in single request with retries
89
+ def import_inline(index_records, delete_records, method_name:, single:)
90
+ return if index_records.empty? && delete_records.empty?
91
+
92
+ maybe_bulk(index_records, delete_records, method_name, single) do
93
+ if index_records.any?
94
+ if method_name
95
+ index.bulk_update(index_records, method_name)
96
+ else
97
+ index.bulk_index(index_records)
98
+ end
70
99
  end
100
+
101
+ if delete_records.any?
102
+ index.bulk_delete(delete_records)
103
+ end
104
+ end
105
+ end
106
+
107
+ def maybe_bulk(index_records, delete_records, method_name, single)
108
+ if Searchkick.callbacks_value == :bulk
109
+ yield
71
110
  else
72
- if method_name
73
- index.update_record(record, method_name)
74
- else
75
- index.store(record)
111
+ # set action and data
112
+ action =
113
+ if single && index_records.empty?
114
+ "Remove"
115
+ elsif method_name
116
+ "Update"
117
+ else
118
+ single ? "Store" : "Import"
119
+ end
120
+ record = index_records.first || delete_records.first
121
+ name = record.class.searchkick_klass.name
122
+ message = lambda do |event|
123
+ event[:name] = "#{name} #{action}"
124
+ if single
125
+ event[:id] = index.search_id(record)
126
+ else
127
+ event[:count] = index_records.size + delete_records.size
128
+ end
129
+ end
130
+
131
+ with_retries do
132
+ Searchkick.callbacks(:bulk, message: message) do
133
+ yield
134
+ end
135
+ end
136
+ end
137
+ end
138
+
139
+ def construct_record(klass, id, routing)
140
+ record = klass.new
141
+ record.id = id
142
+ if routing
143
+ record.define_singleton_method(:search_routing) do
144
+ routing
145
+ end
146
+ end
147
+ record
148
+ end
149
+
150
+ def with_retries
151
+ retries = 0
152
+
153
+ begin
154
+ yield
155
+ rescue Faraday::ClientError => e
156
+ if retries < 1
157
+ retries += 1
158
+ retry
76
159
  end
160
+ raise e
77
161
  end
78
162
  end
79
163
  end
@@ -5,11 +5,30 @@ module Searchkick
5
5
  def initialize(name)
6
6
  @name = name
7
7
 
8
- raise Searchkick::Error, "Searchkick.redis not set" unless Searchkick.redis
8
+ raise Error, "Searchkick.redis not set" unless Searchkick.redis
9
9
  end
10
10
 
11
- def push(record_id)
12
- Searchkick.with_redis { |r| r.lpush(redis_key, record_id) }
11
+ # supports single and multiple ids
12
+ def push(record_ids)
13
+ Searchkick.with_redis { |r| r.lpush(redis_key, record_ids) }
14
+ end
15
+
16
+ def push_records(records)
17
+ record_ids =
18
+ records.map do |record|
19
+ # always pass routing in case record is deleted
20
+ # before the queue job runs
21
+ if record.respond_to?(:search_routing)
22
+ routing = record.search_routing
23
+ end
24
+
25
+ # escape pipe with double pipe
26
+ value = escape(record.id.to_s)
27
+ value = "#{value}|#{escape(routing)}" if routing
28
+ value
29
+ end
30
+
31
+ push(record_ids)
13
32
  end
14
33
 
15
34
  # TODO use reliable queuing
@@ -48,5 +67,9 @@ module Searchkick
48
67
  def redis_version
49
68
  @redis_version ||= Searchkick.with_redis { |r| Gem::Version.new(r.info["redis_version"]) }
50
69
  end
70
+
71
+ def escape(value)
72
+ value.gsub("|", "||")
73
+ end
51
74
  end
52
75
  end
@@ -1,41 +1,17 @@
1
1
  module Searchkick
2
2
  class ReindexV2Job < ActiveJob::Base
3
- RECORD_NOT_FOUND_CLASSES = [
4
- "ActiveRecord::RecordNotFound",
5
- "Mongoid::Errors::DocumentNotFound",
6
- "NoBrainer::Error::DocumentNotFound",
7
- "Cequel::Record::RecordNotFound"
8
- ]
9
-
10
3
  queue_as { Searchkick.queue_name }
11
4
 
12
- def perform(klass, id, method_name = nil, routing: nil)
13
- model = klass.constantize
14
- record =
15
- begin
16
- if model.respond_to?(:unscoped)
17
- model.unscoped.find(id)
18
- else
19
- model.find(id)
20
- end
21
- rescue => e
22
- # check by name rather than rescue directly so we don't need
23
- # to determine which classes are defined
24
- raise e unless RECORD_NOT_FOUND_CLASSES.include?(e.class.name)
25
- nil
26
- end
27
-
28
- unless record
29
- record = model.new
30
- record.id = id
31
- if routing
32
- record.define_singleton_method(:search_routing) do
33
- routing
34
- end
35
- end
36
- end
37
-
38
- RecordIndexer.new(record).reindex(method_name, mode: :inline)
5
+ def perform(class_name, id, method_name = nil, routing: nil, index_name: nil)
6
+ model = Searchkick.load_model(class_name, allow_child: true)
7
+ index = model.searchkick_index(name: index_name)
8
+ # use should_index? to decide whether to index (not default scope)
9
+ # just like saving inline
10
+ # could use Searchkick.scope() in future
11
+ # but keep for now for backwards compatibility
12
+ model = model.unscoped if model.respond_to?(:unscoped)
13
+ items = [{id: id, routing: routing}]
14
+ RecordIndexer.new(index).reindex_items(model, items, method_name: method_name, single: true)
39
15
  end
40
16
  end
41
17
  end
@@ -0,0 +1,112 @@
1
+ module Searchkick
2
+ class Relation
3
+ NO_DEFAULT_VALUE = Object.new
4
+
5
+ # note: modifying body directly is not supported
6
+ # and has no impact on query after being executed
7
+ # TODO freeze body object?
8
+ delegate :body, :params, to: :query
9
+ delegate_missing_to :private_execute
10
+
11
+ def initialize(model, term = "*", **options)
12
+ @model = model
13
+ @term = term
14
+ @options = options
15
+
16
+ # generate query to validate options
17
+ query
18
+ end
19
+
20
+ # same as Active Record
21
+ def inspect
22
+ entries = results.first(11).map!(&:inspect)
23
+ entries[10] = "..." if entries.size == 11
24
+ "#<#{self.class.name} [#{entries.join(', ')}]>"
25
+ end
26
+
27
+ def execute
28
+ Searchkick.warn("The execute method is no longer needed")
29
+ private_execute
30
+ self
31
+ end
32
+
33
+ def limit(value)
34
+ clone.limit!(value)
35
+ end
36
+
37
+ def limit!(value)
38
+ check_loaded
39
+ @options[:limit] = value
40
+ self
41
+ end
42
+
43
+ def offset(value = NO_DEFAULT_VALUE)
44
+ # TODO remove in Searchkick 6
45
+ if value == NO_DEFAULT_VALUE
46
+ private_execute.offset
47
+ else
48
+ clone.offset!(value)
49
+ end
50
+ end
51
+
52
+ def offset!(value)
53
+ check_loaded
54
+ @options[:offset] = value
55
+ self
56
+ end
57
+
58
+ def page(value)
59
+ clone.page!(value)
60
+ end
61
+
62
+ def page!(value)
63
+ check_loaded
64
+ @options[:page] = value
65
+ self
66
+ end
67
+
68
+ def per_page(value = NO_DEFAULT_VALUE)
69
+ # TODO remove in Searchkick 6
70
+ if value == NO_DEFAULT_VALUE
71
+ private_execute.per_page
72
+ else
73
+ clone.per_page!(value)
74
+ end
75
+ end
76
+
77
+ def per_page!(value)
78
+ check_loaded
79
+ @options[:per_page] = value
80
+ self
81
+ end
82
+
83
+ def only(*keys)
84
+ Relation.new(@model, @term, **@options.slice(*keys))
85
+ end
86
+
87
+ def except(*keys)
88
+ Relation.new(@model, @term, **@options.except(*keys))
89
+ end
90
+
91
+ def loaded?
92
+ !@execute.nil?
93
+ end
94
+
95
+ private
96
+
97
+ def private_execute
98
+ @execute ||= query.execute
99
+ end
100
+
101
+ def query
102
+ @query ||= Query.new(@model, @term, **@options)
103
+ end
104
+
105
+ def check_loaded
106
+ raise Error, "Relation loaded" if loaded?
107
+
108
+ # reset query since options will change
109
+ @query = nil
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,150 @@
1
+ module Searchkick
2
+ class RelationIndexer
3
+ attr_reader :index
4
+
5
+ def initialize(index)
6
+ @index = index
7
+ end
8
+
9
+ def reindex(relation, mode:, method_name: nil, full: false, resume: false, scope: nil)
10
+ # apply scopes
11
+ if scope
12
+ relation = relation.send(scope)
13
+ elsif relation.respond_to?(:search_import)
14
+ relation = relation.search_import
15
+ end
16
+
17
+ # remove unneeded loading for async
18
+ if mode == :async
19
+ if relation.respond_to?(:primary_key)
20
+ relation = relation.select(relation.primary_key).except(:includes, :preload)
21
+ elsif relation.respond_to?(:only)
22
+ relation = relation.only(:_id)
23
+ end
24
+ end
25
+
26
+ if mode == :async && full
27
+ return full_reindex_async(relation)
28
+ end
29
+
30
+ relation = resume_relation(relation) if resume
31
+
32
+ reindex_options = {
33
+ mode: mode,
34
+ method_name: method_name,
35
+ full: full
36
+ }
37
+ record_indexer = RecordIndexer.new(index)
38
+
39
+ in_batches(relation) do |items|
40
+ record_indexer.reindex(items, **reindex_options)
41
+ end
42
+ end
43
+
44
+ def batches_left
45
+ Searchkick.with_redis { |r| r.scard(batches_key) }
46
+ end
47
+
48
+ def batch_completed(batch_id)
49
+ Searchkick.with_redis { |r| r.srem(batches_key, batch_id) }
50
+ end
51
+
52
+ private
53
+
54
+ def resume_relation(relation)
55
+ if relation.respond_to?(:primary_key)
56
+ # use total docs instead of max id since there's not a great way
57
+ # to get the max _id without scripting since it's a string
58
+ where = relation.arel_table[relation.primary_key].gt(index.total_docs)
59
+ relation = relation.where(where)
60
+ else
61
+ raise Error, "Resume not supported for Mongoid"
62
+ end
63
+ end
64
+
65
+ def in_batches(relation)
66
+ if relation.respond_to?(:find_in_batches)
67
+ klass = relation.klass
68
+ # remove order to prevent possible warnings
69
+ relation.except(:order).find_in_batches(batch_size: batch_size) do |batch|
70
+ # prevent scope from affecting search_data as well as inline jobs
71
+ # Active Record runs relation calls in scoping block
72
+ # https://github.com/rails/rails/blob/main/activerecord/lib/active_record/relation/delegation.rb
73
+ # note: we could probably just call klass.current_scope = nil
74
+ # anywhere in reindex method (after initial all call),
75
+ # but this is more cautious
76
+ previous_scope = klass.current_scope(true)
77
+ if previous_scope
78
+ begin
79
+ klass.current_scope = nil
80
+ yield batch
81
+ ensure
82
+ klass.current_scope = previous_scope
83
+ end
84
+ else
85
+ yield batch
86
+ end
87
+ end
88
+ else
89
+ klass = relation.klass
90
+ each_batch(relation, batch_size: batch_size) do |batch|
91
+ # prevent scope from affecting search_data as well as inline jobs
92
+ # note: Model.with_scope doesn't always restore scope, so use custom logic
93
+ previous_scope = Mongoid::Threaded.current_scope(klass)
94
+ if previous_scope
95
+ begin
96
+ Mongoid::Threaded.set_current_scope(nil, klass)
97
+ yield batch
98
+ ensure
99
+ Mongoid::Threaded.set_current_scope(previous_scope, klass)
100
+ end
101
+ else
102
+ yield batch
103
+ end
104
+ end
105
+ end
106
+ end
107
+
108
+ def each_batch(relation, batch_size:)
109
+ # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
110
+ # use cursor for Mongoid
111
+ items = []
112
+ relation.all.each do |item|
113
+ items << item
114
+ if items.length == batch_size
115
+ yield items
116
+ items = []
117
+ end
118
+ end
119
+ yield items if items.any?
120
+ end
121
+
122
+ def batch_size
123
+ @batch_size ||= index.options[:batch_size] || 1000
124
+ end
125
+
126
+ def full_reindex_async(relation)
127
+ batch_id = 1
128
+ class_name = relation.searchkick_options[:class_name]
129
+
130
+ in_batches(relation) do |items|
131
+ batch_job(class_name, batch_id, items.map(&:id))
132
+ batch_id += 1
133
+ end
134
+ end
135
+
136
+ def batch_job(class_name, batch_id, record_ids)
137
+ Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
138
+ Searchkick::BulkReindexJob.perform_later(
139
+ class_name: class_name,
140
+ index_name: index.name,
141
+ batch_id: batch_id,
142
+ record_ids: record_ids.map { |v| v.instance_of?(Integer) ? v : v.to_s }
143
+ )
144
+ end
145
+
146
+ def batches_key
147
+ "searchkick:reindex:#{index.name}:batches"
148
+ end
149
+ end
150
+ end