searchkick 2.3.2 → 4.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +251 -84
- data/LICENSE.txt +1 -1
- data/README.md +552 -432
- data/lib/searchkick/bulk_indexer.rb +173 -0
- data/lib/searchkick/bulk_reindex_job.rb +2 -2
- data/lib/searchkick/hash_wrapper.rb +12 -0
- data/lib/searchkick/index.rb +187 -348
- data/lib/searchkick/index_options.rb +494 -282
- data/lib/searchkick/logging.rb +17 -13
- data/lib/searchkick/model.rb +52 -97
- data/lib/searchkick/multi_search.rb +9 -10
- data/lib/searchkick/process_batch_job.rb +17 -4
- data/lib/searchkick/process_queue_job.rb +20 -12
- data/lib/searchkick/query.rb +415 -199
- data/lib/searchkick/railtie.rb +7 -0
- data/lib/searchkick/record_data.rb +128 -0
- data/lib/searchkick/record_indexer.rb +79 -0
- data/lib/searchkick/reindex_queue.rb +1 -1
- data/lib/searchkick/reindex_v2_job.rb +14 -12
- data/lib/searchkick/results.rb +135 -41
- data/lib/searchkick/version.rb +1 -1
- data/lib/searchkick.rb +130 -61
- data/lib/tasks/searchkick.rake +34 -0
- metadata +18 -162
- data/.gitignore +0 -22
- data/.travis.yml +0 -39
- data/Gemfile +0 -16
- data/Rakefile +0 -20
- data/benchmark/Gemfile +0 -23
- data/benchmark/benchmark.rb +0 -97
- data/lib/searchkick/tasks.rb +0 -33
- data/searchkick.gemspec +0 -28
- data/test/aggs_test.rb +0 -197
- data/test/autocomplete_test.rb +0 -75
- data/test/boost_test.rb +0 -202
- data/test/callbacks_test.rb +0 -59
- data/test/ci/before_install.sh +0 -17
- data/test/errors_test.rb +0 -19
- data/test/gemfiles/activerecord31.gemfile +0 -7
- data/test/gemfiles/activerecord32.gemfile +0 -7
- data/test/gemfiles/activerecord40.gemfile +0 -8
- data/test/gemfiles/activerecord41.gemfile +0 -8
- data/test/gemfiles/activerecord42.gemfile +0 -7
- data/test/gemfiles/activerecord50.gemfile +0 -7
- data/test/gemfiles/apartment.gemfile +0 -8
- data/test/gemfiles/cequel.gemfile +0 -8
- data/test/gemfiles/mongoid2.gemfile +0 -7
- data/test/gemfiles/mongoid3.gemfile +0 -6
- data/test/gemfiles/mongoid4.gemfile +0 -7
- data/test/gemfiles/mongoid5.gemfile +0 -7
- data/test/gemfiles/mongoid6.gemfile +0 -12
- data/test/gemfiles/nobrainer.gemfile +0 -8
- data/test/gemfiles/parallel_tests.gemfile +0 -8
- data/test/geo_shape_test.rb +0 -175
- data/test/highlight_test.rb +0 -78
- data/test/index_test.rb +0 -166
- data/test/inheritance_test.rb +0 -83
- data/test/marshal_test.rb +0 -8
- data/test/match_test.rb +0 -276
- data/test/misspellings_test.rb +0 -56
- data/test/model_test.rb +0 -42
- data/test/multi_search_test.rb +0 -36
- data/test/multi_tenancy_test.rb +0 -22
- data/test/order_test.rb +0 -46
- data/test/pagination_test.rb +0 -70
- data/test/partial_reindex_test.rb +0 -58
- data/test/query_test.rb +0 -35
- data/test/records_test.rb +0 -10
- data/test/reindex_test.rb +0 -64
- data/test/reindex_v2_job_test.rb +0 -32
- data/test/routing_test.rb +0 -23
- data/test/should_index_test.rb +0 -32
- data/test/similar_test.rb +0 -28
- data/test/sql_test.rb +0 -214
- data/test/suggest_test.rb +0 -95
- data/test/support/kaminari.yml +0 -21
- data/test/synonyms_test.rb +0 -67
- data/test/test_helper.rb +0 -567
- data/test/where_test.rb +0 -223
@@ -0,0 +1,173 @@
|
|
1
|
+
module Searchkick
|
2
|
+
class BulkIndexer
|
3
|
+
attr_reader :index
|
4
|
+
|
5
|
+
def initialize(index)
|
6
|
+
@index = index
|
7
|
+
end
|
8
|
+
|
9
|
+
def import_scope(relation, resume: false, method_name: nil, async: false, batch: false, batch_id: nil, full: false, scope: nil)
|
10
|
+
if scope
|
11
|
+
relation = relation.send(scope)
|
12
|
+
elsif relation.respond_to?(:search_import)
|
13
|
+
relation = relation.search_import
|
14
|
+
end
|
15
|
+
|
16
|
+
if batch
|
17
|
+
import_or_update relation.to_a, method_name, async
|
18
|
+
Searchkick.with_redis { |r| r.srem(batches_key, batch_id) } if batch_id
|
19
|
+
elsif full && async
|
20
|
+
full_reindex_async(relation)
|
21
|
+
elsif relation.respond_to?(:find_in_batches)
|
22
|
+
if resume
|
23
|
+
# use total docs instead of max id since there's not a great way
|
24
|
+
# to get the max _id without scripting since it's a string
|
25
|
+
|
26
|
+
# TODO use primary key and prefix with table name
|
27
|
+
relation = relation.where("id > ?", index.total_docs)
|
28
|
+
end
|
29
|
+
|
30
|
+
relation = relation.select("id").except(:includes, :preload) if async
|
31
|
+
|
32
|
+
relation.find_in_batches batch_size: batch_size do |items|
|
33
|
+
import_or_update items, method_name, async
|
34
|
+
end
|
35
|
+
else
|
36
|
+
each_batch(relation) do |items|
|
37
|
+
import_or_update items, method_name, async
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def bulk_index(records)
|
43
|
+
Searchkick.indexer.queue(records.map { |r| RecordData.new(index, r).index_data })
|
44
|
+
end
|
45
|
+
|
46
|
+
def bulk_delete(records)
|
47
|
+
Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| RecordData.new(index, r).delete_data })
|
48
|
+
end
|
49
|
+
|
50
|
+
def bulk_update(records, method_name)
|
51
|
+
Searchkick.indexer.queue(records.map { |r| RecordData.new(index, r).update_data(method_name) })
|
52
|
+
end
|
53
|
+
|
54
|
+
def batches_left
|
55
|
+
Searchkick.with_redis { |r| r.scard(batches_key) }
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def import_or_update(records, method_name, async)
|
61
|
+
if records.any?
|
62
|
+
if async
|
63
|
+
Searchkick::BulkReindexJob.perform_later(
|
64
|
+
class_name: records.first.class.searchkick_options[:class_name],
|
65
|
+
record_ids: records.map(&:id),
|
66
|
+
index_name: index.name,
|
67
|
+
method_name: method_name ? method_name.to_s : nil
|
68
|
+
)
|
69
|
+
else
|
70
|
+
records = records.select(&:should_index?)
|
71
|
+
if records.any?
|
72
|
+
with_retries do
|
73
|
+
# call out to index for ActiveSupport notifications
|
74
|
+
if method_name
|
75
|
+
index.bulk_update(records, method_name)
|
76
|
+
else
|
77
|
+
index.bulk_index(records)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def full_reindex_async(scope)
|
86
|
+
if scope.respond_to?(:primary_key)
|
87
|
+
# TODO expire Redis key
|
88
|
+
primary_key = scope.primary_key
|
89
|
+
|
90
|
+
scope = scope.select(primary_key).except(:includes, :preload)
|
91
|
+
|
92
|
+
starting_id =
|
93
|
+
begin
|
94
|
+
scope.minimum(primary_key)
|
95
|
+
rescue ActiveRecord::StatementInvalid
|
96
|
+
false
|
97
|
+
end
|
98
|
+
|
99
|
+
if starting_id.nil?
|
100
|
+
# no records, do nothing
|
101
|
+
elsif starting_id.is_a?(Numeric)
|
102
|
+
max_id = scope.maximum(primary_key)
|
103
|
+
batches_count = ((max_id - starting_id + 1) / batch_size.to_f).ceil
|
104
|
+
|
105
|
+
batches_count.times do |i|
|
106
|
+
batch_id = i + 1
|
107
|
+
min_id = starting_id + (i * batch_size)
|
108
|
+
bulk_reindex_job scope, batch_id, min_id: min_id, max_id: min_id + batch_size - 1
|
109
|
+
end
|
110
|
+
else
|
111
|
+
scope.find_in_batches(batch_size: batch_size).each_with_index do |batch, i|
|
112
|
+
batch_id = i + 1
|
113
|
+
|
114
|
+
bulk_reindex_job scope, batch_id, record_ids: batch.map { |record| record.id.to_s }
|
115
|
+
end
|
116
|
+
end
|
117
|
+
else
|
118
|
+
batch_id = 1
|
119
|
+
# TODO remove any eager loading
|
120
|
+
scope = scope.only(:_id) if scope.respond_to?(:only)
|
121
|
+
each_batch(scope) do |items|
|
122
|
+
bulk_reindex_job scope, batch_id, record_ids: items.map { |i| i.id.to_s }
|
123
|
+
batch_id += 1
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def each_batch(scope)
|
129
|
+
# https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
|
130
|
+
# use cursor for Mongoid
|
131
|
+
items = []
|
132
|
+
scope.all.each do |item|
|
133
|
+
items << item
|
134
|
+
if items.length == batch_size
|
135
|
+
yield items
|
136
|
+
items = []
|
137
|
+
end
|
138
|
+
end
|
139
|
+
yield items if items.any?
|
140
|
+
end
|
141
|
+
|
142
|
+
def bulk_reindex_job(scope, batch_id, options)
|
143
|
+
Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
|
144
|
+
Searchkick::BulkReindexJob.perform_later(**{
|
145
|
+
class_name: scope.searchkick_options[:class_name],
|
146
|
+
index_name: index.name,
|
147
|
+
batch_id: batch_id
|
148
|
+
}.merge(options))
|
149
|
+
end
|
150
|
+
|
151
|
+
def with_retries
|
152
|
+
retries = 0
|
153
|
+
|
154
|
+
begin
|
155
|
+
yield
|
156
|
+
rescue Faraday::ClientError => e
|
157
|
+
if retries < 1
|
158
|
+
retries += 1
|
159
|
+
retry
|
160
|
+
end
|
161
|
+
raise e
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def batches_key
|
166
|
+
"searchkick:reindex:#{index.name}:batches"
|
167
|
+
end
|
168
|
+
|
169
|
+
def batch_size
|
170
|
+
@batch_size ||= index.options[:batch_size] || 1000
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
@@ -1,10 +1,10 @@
|
|
1
1
|
module Searchkick
|
2
2
|
class BulkReindexJob < ActiveJob::Base
|
3
|
-
queue_as
|
3
|
+
queue_as { Searchkick.queue_name }
|
4
4
|
|
5
5
|
def perform(class_name:, record_ids: nil, index_name: nil, method_name: nil, batch_id: nil, min_id: nil, max_id: nil)
|
6
6
|
klass = class_name.constantize
|
7
|
-
index = index_name ? Searchkick::Index.new(index_name) : klass.searchkick_index
|
7
|
+
index = index_name ? Searchkick::Index.new(index_name, **klass.searchkick_options) : klass.searchkick_index
|
8
8
|
record_ids ||= min_id..max_id
|
9
9
|
index.import_scope(
|
10
10
|
Searchkick.load_records(klass, record_ids),
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Searchkick
|
2
|
+
# Subclass of `Hashie::Mash` to wrap Hash-like structures
|
3
|
+
# (responses from Elasticsearch)
|
4
|
+
#
|
5
|
+
# The primary goal of the subclass is to disable the
|
6
|
+
# warning being printed by Hashie for re-defined
|
7
|
+
# methods, such as `sort`.
|
8
|
+
#
|
9
|
+
class HashWrapper < ::Hashie::Mash
|
10
|
+
disable_warnings if respond_to?(:disable_warnings)
|
11
|
+
end
|
12
|
+
end
|