searchkick 4.6.3 → 5.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,79 +1,163 @@
1
1
  module Searchkick
2
2
  class RecordIndexer
3
- attr_reader :record, :index
3
+ attr_reader :index
4
4
 
5
- def initialize(record)
6
- @record = record
7
- @index = record.class.searchkick_index
5
+ def initialize(index)
6
+ @index = index
8
7
  end
9
8
 
10
- def reindex(method_name = nil, refresh: false, mode: nil)
11
- unless [:inline, true, nil, :async, :queue].include?(mode)
12
- raise ArgumentError, "Invalid value for mode"
13
- end
14
-
15
- mode ||= Searchkick.callbacks_value || index.options[:callbacks] || true
9
+ def reindex(records, mode:, method_name:, full: false, single: false)
10
+ # prevents exists? check if records is a relation
11
+ records = records.to_a
12
+ return if records.empty?
16
13
 
17
14
  case mode
15
+ when :async
16
+ unless defined?(ActiveJob)
17
+ raise Error, "Active Job not found"
18
+ end
19
+
20
+ # we could likely combine ReindexV2Job, BulkReindexJob, and ProcessBatchJob
21
+ # but keep them separate for now
22
+ if single
23
+ record = records.first
24
+
25
+ # always pass routing in case record is deleted
26
+ # before the async job runs
27
+ if record.respond_to?(:search_routing)
28
+ routing = record.search_routing
29
+ end
30
+
31
+ Searchkick::ReindexV2Job.perform_later(
32
+ record.class.name,
33
+ record.id.to_s,
34
+ method_name ? method_name.to_s : nil,
35
+ routing: routing,
36
+ index_name: index.name
37
+ )
38
+ else
39
+ Searchkick::BulkReindexJob.perform_later(
40
+ class_name: records.first.class.searchkick_options[:class_name],
41
+ record_ids: records.map { |r| r.id.to_s },
42
+ index_name: index.name,
43
+ method_name: method_name ? method_name.to_s : nil
44
+ )
45
+ end
18
46
  when :queue
19
47
  if method_name
20
- raise Searchkick::Error, "Partial reindex not supported with queue option"
48
+ raise Error, "Partial reindex not supported with queue option"
21
49
  end
22
50
 
23
- # always pass routing in case record is deleted
24
- # before the queue job runs
25
- if record.respond_to?(:search_routing)
26
- routing = record.search_routing
27
- end
51
+ index.reindex_queue.push_records(records)
52
+ when true, :inline
53
+ index_records, other_records = records.partition { |r| index_record?(r) }
54
+ import_inline(index_records, !full ? other_records : [], method_name: method_name, single: single)
55
+ else
56
+ raise ArgumentError, "Invalid value for mode"
57
+ end
28
58
 
29
- # escape pipe with double pipe
30
- value = queue_escape(record.id.to_s)
31
- value = "#{value}|#{queue_escape(routing)}" if routing
32
- index.reindex_queue.push(value)
33
- when :async
34
- unless defined?(ActiveJob)
35
- raise Searchkick::Error, "Active Job not found"
36
- end
59
+ # return true like model and relation reindex for now
60
+ true
61
+ end
37
62
 
38
- # always pass routing in case record is deleted
39
- # before the async job runs
40
- if record.respond_to?(:search_routing)
41
- routing = record.search_routing
42
- end
63
+ def reindex_items(klass, items, method_name:, single: false)
64
+ routing = items.to_h { |r| [r[:id], r[:routing]] }
65
+ record_ids = routing.keys
43
66
 
44
- Searchkick::ReindexV2Job.perform_later(
45
- record.class.name,
46
- record.id.to_s,
47
- method_name ? method_name.to_s : nil,
48
- routing: routing
49
- )
50
- else # bulk, inline/true/nil
51
- reindex_record(method_name)
67
+ relation = Searchkick.load_records(klass, record_ids)
68
+ # call search_import even for single records for nested associations
69
+ relation = relation.search_import if relation.respond_to?(:search_import)
70
+ records = relation.select(&:should_index?)
52
71
 
53
- index.refresh if refresh
54
- end
72
+ # determine which records to delete
73
+ delete_ids = record_ids - records.map { |r| r.id.to_s }
74
+ delete_records =
75
+ delete_ids.map do |id|
76
+ construct_record(klass, id, routing[id])
77
+ end
78
+
79
+ import_inline(records, delete_records, method_name: method_name, single: single)
55
80
  end
56
81
 
57
82
  private
58
83
 
59
- def queue_escape(value)
60
- value.gsub("|", "||")
84
+ def index_record?(record)
85
+ record.persisted? && !record.destroyed? && record.should_index?
61
86
  end
62
87
 
63
- def reindex_record(method_name)
64
- if record.destroyed? || !record.persisted? || !record.should_index?
65
- begin
66
- index.remove(record)
67
- rescue => e
68
- raise e unless Searchkick.not_found_error?(e)
69
- # do nothing if not found
88
+ # import in single request with retries
89
+ def import_inline(index_records, delete_records, method_name:, single:)
90
+ return if index_records.empty? && delete_records.empty?
91
+
92
+ maybe_bulk(index_records, delete_records, method_name, single) do
93
+ if index_records.any?
94
+ if method_name
95
+ index.bulk_update(index_records, method_name)
96
+ else
97
+ index.bulk_index(index_records)
98
+ end
70
99
  end
100
+
101
+ if delete_records.any?
102
+ index.bulk_delete(delete_records)
103
+ end
104
+ end
105
+ end
106
+
107
+ def maybe_bulk(index_records, delete_records, method_name, single)
108
+ if Searchkick.callbacks_value == :bulk
109
+ yield
71
110
  else
72
- if method_name
73
- index.update_record(record, method_name)
74
- else
75
- index.store(record)
111
+ # set action and data
112
+ action =
113
+ if single && index_records.empty?
114
+ "Remove"
115
+ elsif method_name
116
+ "Update"
117
+ else
118
+ single ? "Store" : "Import"
119
+ end
120
+ record = index_records.first || delete_records.first
121
+ name = record.class.searchkick_klass.name
122
+ message = lambda do |event|
123
+ event[:name] = "#{name} #{action}"
124
+ if single
125
+ event[:id] = index.search_id(record)
126
+ else
127
+ event[:count] = index_records.size + delete_records.size
128
+ end
129
+ end
130
+
131
+ with_retries do
132
+ Searchkick.callbacks(:bulk, message: message) do
133
+ yield
134
+ end
135
+ end
136
+ end
137
+ end
138
+
139
+ def construct_record(klass, id, routing)
140
+ record = klass.new
141
+ record.id = id
142
+ if routing
143
+ record.define_singleton_method(:search_routing) do
144
+ routing
145
+ end
146
+ end
147
+ record
148
+ end
149
+
150
+ def with_retries
151
+ retries = 0
152
+
153
+ begin
154
+ yield
155
+ rescue Faraday::ClientError => e
156
+ if retries < 1
157
+ retries += 1
158
+ retry
76
159
  end
160
+ raise e
77
161
  end
78
162
  end
79
163
  end
@@ -5,11 +5,30 @@ module Searchkick
5
5
  def initialize(name)
6
6
  @name = name
7
7
 
8
- raise Searchkick::Error, "Searchkick.redis not set" unless Searchkick.redis
8
+ raise Error, "Searchkick.redis not set" unless Searchkick.redis
9
9
  end
10
10
 
11
- def push(record_id)
12
- Searchkick.with_redis { |r| r.lpush(redis_key, record_id) }
11
+ # supports single and multiple ids
12
+ def push(record_ids)
13
+ Searchkick.with_redis { |r| r.lpush(redis_key, record_ids) }
14
+ end
15
+
16
+ def push_records(records)
17
+ record_ids =
18
+ records.map do |record|
19
+ # always pass routing in case record is deleted
20
+ # before the queue job runs
21
+ if record.respond_to?(:search_routing)
22
+ routing = record.search_routing
23
+ end
24
+
25
+ # escape pipe with double pipe
26
+ value = escape(record.id.to_s)
27
+ value = "#{value}|#{escape(routing)}" if routing
28
+ value
29
+ end
30
+
31
+ push(record_ids)
13
32
  end
14
33
 
15
34
  # TODO use reliable queuing
@@ -48,5 +67,9 @@ module Searchkick
48
67
  def redis_version
49
68
  @redis_version ||= Searchkick.with_redis { |r| Gem::Version.new(r.info["redis_version"]) }
50
69
  end
70
+
71
+ def escape(value)
72
+ value.gsub("|", "||")
73
+ end
51
74
  end
52
75
  end
@@ -1,41 +1,17 @@
1
1
  module Searchkick
2
2
  class ReindexV2Job < ActiveJob::Base
3
- RECORD_NOT_FOUND_CLASSES = [
4
- "ActiveRecord::RecordNotFound",
5
- "Mongoid::Errors::DocumentNotFound",
6
- "NoBrainer::Error::DocumentNotFound",
7
- "Cequel::Record::RecordNotFound"
8
- ]
9
-
10
3
  queue_as { Searchkick.queue_name }
11
4
 
12
- def perform(klass, id, method_name = nil, routing: nil)
13
- model = klass.constantize
14
- record =
15
- begin
16
- if model.respond_to?(:unscoped)
17
- model.unscoped.find(id)
18
- else
19
- model.find(id)
20
- end
21
- rescue => e
22
- # check by name rather than rescue directly so we don't need
23
- # to determine which classes are defined
24
- raise e unless RECORD_NOT_FOUND_CLASSES.include?(e.class.name)
25
- nil
26
- end
27
-
28
- unless record
29
- record = model.new
30
- record.id = id
31
- if routing
32
- record.define_singleton_method(:search_routing) do
33
- routing
34
- end
35
- end
36
- end
37
-
38
- RecordIndexer.new(record).reindex(method_name, mode: :inline)
5
+ def perform(class_name, id, method_name = nil, routing: nil, index_name: nil)
6
+ model = Searchkick.load_model(class_name, allow_child: true)
7
+ index = model.searchkick_index(name: index_name)
8
+ # use should_index? to decide whether to index (not default scope)
9
+ # just like saving inline
10
+ # could use Searchkick.scope() in future
11
+ # but keep for now for backwards compatibility
12
+ model = model.unscoped if model.respond_to?(:unscoped)
13
+ items = [{id: id, routing: routing}]
14
+ RecordIndexer.new(index).reindex_items(model, items, method_name: method_name, single: true)
39
15
  end
40
16
  end
41
17
  end
@@ -0,0 +1,112 @@
1
+ module Searchkick
2
+ class Relation
3
+ NO_DEFAULT_VALUE = Object.new
4
+
5
+ # note: modifying body directly is not supported
6
+ # and has no impact on query after being executed
7
+ # TODO freeze body object?
8
+ delegate :body, :params, to: :query
9
+ delegate_missing_to :private_execute
10
+
11
+ def initialize(model, term = "*", **options)
12
+ @model = model
13
+ @term = term
14
+ @options = options
15
+
16
+ # generate query to validate options
17
+ query
18
+ end
19
+
20
+ # same as Active Record
21
+ def inspect
22
+ entries = results.first(11).map!(&:inspect)
23
+ entries[10] = "..." if entries.size == 11
24
+ "#<#{self.class.name} [#{entries.join(', ')}]>"
25
+ end
26
+
27
+ def execute
28
+ Searchkick.warn("The execute method is no longer needed")
29
+ private_execute
30
+ self
31
+ end
32
+
33
+ def limit(value)
34
+ clone.limit!(value)
35
+ end
36
+
37
+ def limit!(value)
38
+ check_loaded
39
+ @options[:limit] = value
40
+ self
41
+ end
42
+
43
+ def offset(value = NO_DEFAULT_VALUE)
44
+ # TODO remove in Searchkick 6
45
+ if value == NO_DEFAULT_VALUE
46
+ private_execute.offset
47
+ else
48
+ clone.offset!(value)
49
+ end
50
+ end
51
+
52
+ def offset!(value)
53
+ check_loaded
54
+ @options[:offset] = value
55
+ self
56
+ end
57
+
58
+ def page(value)
59
+ clone.page!(value)
60
+ end
61
+
62
+ def page!(value)
63
+ check_loaded
64
+ @options[:page] = value
65
+ self
66
+ end
67
+
68
+ def per_page(value = NO_DEFAULT_VALUE)
69
+ # TODO remove in Searchkick 6
70
+ if value == NO_DEFAULT_VALUE
71
+ private_execute.per_page
72
+ else
73
+ clone.per_page!(value)
74
+ end
75
+ end
76
+
77
+ def per_page!(value)
78
+ check_loaded
79
+ @options[:per_page] = value
80
+ self
81
+ end
82
+
83
+ def only(*keys)
84
+ Relation.new(@model, @term, **@options.slice(*keys))
85
+ end
86
+
87
+ def except(*keys)
88
+ Relation.new(@model, @term, **@options.except(*keys))
89
+ end
90
+
91
+ def loaded?
92
+ !@execute.nil?
93
+ end
94
+
95
+ private
96
+
97
+ def private_execute
98
+ @execute ||= query.execute
99
+ end
100
+
101
+ def query
102
+ @query ||= Query.new(@model, @term, **@options)
103
+ end
104
+
105
+ def check_loaded
106
+ raise Error, "Relation loaded" if loaded?
107
+
108
+ # reset query since options will change
109
+ @query = nil
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,150 @@
1
+ module Searchkick
2
+ class RelationIndexer
3
+ attr_reader :index
4
+
5
+ def initialize(index)
6
+ @index = index
7
+ end
8
+
9
+ def reindex(relation, mode:, method_name: nil, full: false, resume: false, scope: nil)
10
+ # apply scopes
11
+ if scope
12
+ relation = relation.send(scope)
13
+ elsif relation.respond_to?(:search_import)
14
+ relation = relation.search_import
15
+ end
16
+
17
+ # remove unneeded loading for async
18
+ if mode == :async
19
+ if relation.respond_to?(:primary_key)
20
+ relation = relation.select(relation.primary_key).except(:includes, :preload)
21
+ elsif relation.respond_to?(:only)
22
+ relation = relation.only(:_id)
23
+ end
24
+ end
25
+
26
+ if mode == :async && full
27
+ return full_reindex_async(relation)
28
+ end
29
+
30
+ relation = resume_relation(relation) if resume
31
+
32
+ reindex_options = {
33
+ mode: mode,
34
+ method_name: method_name,
35
+ full: full
36
+ }
37
+ record_indexer = RecordIndexer.new(index)
38
+
39
+ in_batches(relation) do |items|
40
+ record_indexer.reindex(items, **reindex_options)
41
+ end
42
+ end
43
+
44
+ def batches_left
45
+ Searchkick.with_redis { |r| r.scard(batches_key) }
46
+ end
47
+
48
+ def batch_completed(batch_id)
49
+ Searchkick.with_redis { |r| r.srem(batches_key, batch_id) }
50
+ end
51
+
52
+ private
53
+
54
+ def resume_relation(relation)
55
+ if relation.respond_to?(:primary_key)
56
+ # use total docs instead of max id since there's not a great way
57
+ # to get the max _id without scripting since it's a string
58
+ where = relation.arel_table[relation.primary_key].gt(index.total_docs)
59
+ relation = relation.where(where)
60
+ else
61
+ raise Error, "Resume not supported for Mongoid"
62
+ end
63
+ end
64
+
65
+ def in_batches(relation)
66
+ if relation.respond_to?(:find_in_batches)
67
+ klass = relation.klass
68
+ # remove order to prevent possible warnings
69
+ relation.except(:order).find_in_batches(batch_size: batch_size) do |batch|
70
+ # prevent scope from affecting search_data as well as inline jobs
71
+ # Active Record runs relation calls in scoping block
72
+ # https://github.com/rails/rails/blob/main/activerecord/lib/active_record/relation/delegation.rb
73
+ # note: we could probably just call klass.current_scope = nil
74
+ # anywhere in reindex method (after initial all call),
75
+ # but this is more cautious
76
+ previous_scope = klass.current_scope(true)
77
+ if previous_scope
78
+ begin
79
+ klass.current_scope = nil
80
+ yield batch
81
+ ensure
82
+ klass.current_scope = previous_scope
83
+ end
84
+ else
85
+ yield batch
86
+ end
87
+ end
88
+ else
89
+ klass = relation.klass
90
+ each_batch(relation, batch_size: batch_size) do |batch|
91
+ # prevent scope from affecting search_data as well as inline jobs
92
+ # note: Model.with_scope doesn't always restore scope, so use custom logic
93
+ previous_scope = Mongoid::Threaded.current_scope(klass)
94
+ if previous_scope
95
+ begin
96
+ Mongoid::Threaded.set_current_scope(nil, klass)
97
+ yield batch
98
+ ensure
99
+ Mongoid::Threaded.set_current_scope(previous_scope, klass)
100
+ end
101
+ else
102
+ yield batch
103
+ end
104
+ end
105
+ end
106
+ end
107
+
108
+ def each_batch(relation, batch_size:)
109
+ # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
110
+ # use cursor for Mongoid
111
+ items = []
112
+ relation.all.each do |item|
113
+ items << item
114
+ if items.length == batch_size
115
+ yield items
116
+ items = []
117
+ end
118
+ end
119
+ yield items if items.any?
120
+ end
121
+
122
+ def batch_size
123
+ @batch_size ||= index.options[:batch_size] || 1000
124
+ end
125
+
126
+ def full_reindex_async(relation)
127
+ batch_id = 1
128
+ class_name = relation.searchkick_options[:class_name]
129
+
130
+ in_batches(relation) do |items|
131
+ batch_job(class_name, batch_id, items.map(&:id))
132
+ batch_id += 1
133
+ end
134
+ end
135
+
136
+ def batch_job(class_name, batch_id, record_ids)
137
+ Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
138
+ Searchkick::BulkReindexJob.perform_later(
139
+ class_name: class_name,
140
+ index_name: index.name,
141
+ batch_id: batch_id,
142
+ record_ids: record_ids.map { |v| v.instance_of?(Integer) ? v : v.to_s }
143
+ )
144
+ end
145
+
146
+ def batches_key
147
+ "searchkick:reindex:#{index.name}:batches"
148
+ end
149
+ end
150
+ end