searchkick 2.5.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE.md +7 -0
  3. data/.travis.yml +2 -11
  4. data/CHANGELOG.md +22 -0
  5. data/CONTRIBUTING.md +1 -1
  6. data/Gemfile +3 -3
  7. data/LICENSE.txt +1 -1
  8. data/README.md +68 -141
  9. data/Rakefile +0 -4
  10. data/benchmark/Gemfile +3 -2
  11. data/benchmark/{benchmark.rb → index.rb} +33 -31
  12. data/benchmark/search.rb +48 -0
  13. data/docs/Searchkick-3-Upgrade.md +57 -0
  14. data/lib/searchkick.rb +50 -27
  15. data/lib/searchkick/bulk_indexer.rb +168 -0
  16. data/lib/searchkick/bulk_reindex_job.rb +1 -1
  17. data/lib/searchkick/index.rb +122 -348
  18. data/lib/searchkick/index_options.rb +29 -26
  19. data/lib/searchkick/logging.rb +8 -7
  20. data/lib/searchkick/model.rb +37 -90
  21. data/lib/searchkick/multi_search.rb +6 -7
  22. data/lib/searchkick/query.rb +169 -166
  23. data/lib/searchkick/record_data.rb +133 -0
  24. data/lib/searchkick/record_indexer.rb +55 -0
  25. data/lib/searchkick/reindex_queue.rb +1 -1
  26. data/lib/searchkick/reindex_v2_job.rb +10 -13
  27. data/lib/searchkick/results.rb +14 -25
  28. data/lib/searchkick/tasks.rb +0 -4
  29. data/lib/searchkick/version.rb +1 -1
  30. data/searchkick.gemspec +3 -3
  31. data/test/boost_test.rb +3 -9
  32. data/test/geo_shape_test.rb +0 -4
  33. data/test/highlight_test.rb +28 -12
  34. data/test/index_test.rb +9 -10
  35. data/test/language_test.rb +16 -0
  36. data/test/marshal_test.rb +6 -1
  37. data/test/match_test.rb +9 -4
  38. data/test/model_test.rb +3 -5
  39. data/test/multi_search_test.rb +0 -7
  40. data/test/order_test.rb +1 -7
  41. data/test/pagination_test.rb +1 -1
  42. data/test/reindex_v2_job_test.rb +6 -11
  43. data/test/routing_test.rb +1 -1
  44. data/test/similar_test.rb +2 -2
  45. data/test/sql_test.rb +0 -31
  46. data/test/test_helper.rb +37 -23
  47. metadata +19 -26
  48. data/test/gemfiles/activerecord31.gemfile +0 -7
  49. data/test/gemfiles/activerecord32.gemfile +0 -7
  50. data/test/gemfiles/activerecord40.gemfile +0 -8
  51. data/test/gemfiles/activerecord41.gemfile +0 -8
  52. data/test/gemfiles/mongoid2.gemfile +0 -7
  53. data/test/gemfiles/mongoid3.gemfile +0 -6
  54. data/test/gemfiles/mongoid4.gemfile +0 -7
  55. data/test/records_test.rb +0 -10
data/Rakefile CHANGED
@@ -14,7 +14,3 @@ Rake::TestTask.new do |t|
14
14
  t.pattern = "test/**/*_test.rb"
15
15
  t.warning = false
16
16
  end
17
-
18
- task :benchmark do
19
- require_relative "benchmark/benchmark"
20
- end
data/benchmark/Gemfile CHANGED
@@ -3,9 +3,9 @@ source "https://rubygems.org"
3
3
  # Specify your gem's dependencies in searchkick.gemspec
4
4
  gemspec path: "../"
5
5
 
6
- # gem "sqlite3"
6
+ gem "sqlite3"
7
7
  gem "pg"
8
- gem "activerecord", "~> 5.0.0"
8
+ gem "activerecord", "~> 5.1.0"
9
9
  gem "activerecord-import"
10
10
  gem "activejob"
11
11
  gem "redis"
@@ -21,3 +21,4 @@ gem "allocation_stats"
21
21
  gem "get_process_mem"
22
22
  gem "memory_profiler"
23
23
  gem "allocation_tracer"
24
+ gem "benchmark-ips"
@@ -6,7 +6,7 @@ require "active_support/notifications"
6
6
 
7
7
  ActiveSupport::Notifications.subscribe "request.searchkick" do |*args|
8
8
  event = ActiveSupport::Notifications::Event.new(*args)
9
- p event.duration
9
+ puts "Import: #{event.duration.round}ms"
10
10
  end
11
11
 
12
12
  ActiveJob::Base.queue_adapter = :sidekiq
@@ -33,55 +33,57 @@ class Product < ActiveRecord::Base
33
33
  end
34
34
  end
35
35
 
36
- total_docs = 100000
36
+ if ENV["SETUP"]
37
+ total_docs = 100000
37
38
 
38
- # ActiveRecord::Migration.create_table :products, force: :cascade do |t|
39
- # t.string :name
40
- # t.string :color
41
- # t.integer :store_id
42
- # end
39
+ ActiveRecord::Migration.create_table :products, force: :cascade do |t|
40
+ t.string :name
41
+ t.string :color
42
+ t.integer :store_id
43
+ end
43
44
 
44
- # Product.import ["name", "color", "store_id"], total_docs.times.map { |i| ["Product #{i}", ["red", "blue"].sample, rand(10)] }
45
+ Product.import ["name", "color", "store_id"], total_docs.times.map { |i| ["Product #{i}", ["red", "blue"].sample, rand(10)] }
45
46
 
46
- puts "Imported"
47
+ puts "Imported"
48
+ end
47
49
 
48
50
  result = nil
49
51
  report = nil
50
52
  stats = nil
51
53
 
52
- # p GetProcessMem.new.mb
53
-
54
54
  Product.searchkick_index.delete rescue nil
55
55
 
56
+ GC.start
57
+ GC.disable
58
+ start_mem = GetProcessMem.new.mb
59
+
56
60
  time =
57
61
  Benchmark.realtime do
58
62
  # result = RubyProf.profile do
59
63
  # report = MemoryProfiler.report do
60
64
  # stats = AllocationStats.trace do
61
- reindex = Product.reindex(async: true)
62
- p reindex
65
+ reindex = Product.reindex #(async: true)
66
+ # p reindex
63
67
  # end
64
68
 
65
- 60.times do |i|
66
- if reindex.is_a?(Hash)
67
- docs = Searchkick::Index.new(reindex[:index_name]).total_docs
68
- else
69
- docs = Product.searchkick_index.total_docs
70
- end
71
- puts "#{i}: #{docs}"
72
- if docs == total_docs
73
- break
74
- end
75
- p Searchkick.reindex_status(reindex[:index_name]) if reindex.is_a?(Hash)
76
- sleep(1)
77
- # Product.searchkick_index.refresh
78
- end
69
+ # 60.times do |i|
70
+ # if reindex.is_a?(Hash)
71
+ # docs = Searchkick::Index.new(reindex[:index_name]).total_docs
72
+ # else
73
+ # docs = Product.searchkick_index.total_docs
74
+ # end
75
+ # puts "#{i}: #{docs}"
76
+ # if docs == total_docs
77
+ # break
78
+ # end
79
+ # p Searchkick.reindex_status(reindex[:index_name]) if reindex.is_a?(Hash)
80
+ # sleep(1)
81
+ # # Product.searchkick_index.refresh
82
+ # end
79
83
  end
80
84
 
81
- # p GetProcessMem.new.mb
82
-
83
- puts time.round(1)
84
-
85
+ puts
86
+ puts "Time: #{time.round(1)}s"
85
87
 
86
88
  if result
87
89
  printer = RubyProf::GraphPrinter.new(result)
@@ -0,0 +1,48 @@
1
+ require "bundler/setup"
2
+ Bundler.require(:default)
3
+ require "active_record"
4
+ require "benchmark/ips"
5
+
6
+ ActiveRecord::Base.default_timezone = :utc
7
+ ActiveRecord::Base.time_zone_aware_attributes = true
8
+ ActiveRecord::Base.establish_connection adapter: "sqlite3", database: "/tmp/searchkick"
9
+
10
+ class Product < ActiveRecord::Base
11
+ searchkick batch_size: 1000
12
+
13
+ def search_data
14
+ {
15
+ name: name,
16
+ color: color,
17
+ store_id: store_id
18
+ }
19
+ end
20
+ end
21
+
22
+ if ENV["SETUP"]
23
+ total_docs = 1000000
24
+
25
+ ActiveRecord::Migration.create_table :products, force: :cascade do |t|
26
+ t.string :name
27
+ t.string :color
28
+ t.integer :store_id
29
+ end
30
+
31
+ Product.import ["name", "color", "store_id"], total_docs.times.map { |i| ["Product #{i}", ["red", "blue"].sample, rand(10)] }
32
+
33
+ puts "Imported"
34
+
35
+ Product.reindex
36
+
37
+ puts "Reindexed"
38
+ end
39
+
40
+ query = Product.search("product", fields: [:name], where: {color: "red", store_id: 5}, limit: 10000, load: false, execute: false)
41
+
42
+ require "pp"
43
+ pp query.body.as_json
44
+ puts
45
+
46
+ Benchmark.ips do |x|
47
+ x.report { query.dup.execute }
48
+ end
@@ -0,0 +1,57 @@
1
+ # Searchkick 3 Upgrade
2
+
3
+ ## Before You Upgrade
4
+
5
+ Searchkick 3 no longer uses types, since they are deprecated in Elasticsearch 6.
6
+
7
+ If you use inheritance, add to your parent model:
8
+
9
+ ```ruby
10
+ class Animal < ApplicationRecord
11
+ searchkick inheritance: true
12
+ end
13
+ ```
14
+
15
+ And do a full reindex before upgrading.
16
+
17
+ ## Upgrading
18
+
19
+ Update your Gemfile:
20
+
21
+ ```ruby
22
+ gem 'searchkick', '~> 3'
23
+ ```
24
+
25
+ And run:
26
+
27
+ ```sh
28
+ bundle update searchkick
29
+ ```
30
+
31
+ We recommend you don’t stem conversions anymore, so conversions for `pepper` don’t affect `peppers`, but if you want to keep the old behavior, use:
32
+
33
+ ```ruby
34
+ Searchkick.model_options = {
35
+ stem_conversions: true
36
+ }
37
+ ```
38
+
39
+ Searchkick 3 disables the `_all` field by default, since Elasticsearch 6 removes the ability to reindex with it. If you’re on Elasticsearch 5 and still need it, add to your model:
40
+
41
+ ```ruby
42
+ class Product < ApplicationRecord
43
+ searchkick _all: true
44
+ end
45
+ ```
46
+
47
+ If you use `record.reindex_async` or `record.reindex(async: true)`, replace it with:
48
+
49
+ ```ruby
50
+ record.reindex(mode: :async)
51
+ ```
52
+
53
+ If you use `log: true` with `boost_by`, replace it with `modifier: "ln2p"`.
54
+
55
+ If you use the `body` option and have warnings about incompatible options, remove them, as they now throw an `ArgumentError`.
56
+
57
+ Check out the [changelog](https://github.com/ankane/searchkick/blob/master/CHANGELOG.md) for the full list of changes.
data/lib/searchkick.rb CHANGED
@@ -1,20 +1,37 @@
1
1
  require "active_model"
2
+ require "active_support/core_ext/hash/deep_merge"
2
3
  require "elasticsearch"
3
4
  require "hashie"
4
- require "searchkick/version"
5
- require "searchkick/index_options"
5
+
6
+ require "searchkick/bulk_indexer"
6
7
  require "searchkick/index"
7
8
  require "searchkick/indexer"
8
- require "searchkick/reindex_queue"
9
9
  require "searchkick/hash_wrapper"
10
- require "searchkick/results"
11
- require "searchkick/query"
12
- require "searchkick/multi_search"
13
- require "searchkick/model"
14
- require "searchkick/tasks"
15
10
  require "searchkick/middleware"
11
+ require "searchkick/model"
12
+ require "searchkick/multi_search"
13
+ require "searchkick/query"
14
+ require "searchkick/reindex_queue"
15
+ require "searchkick/record_data"
16
+ require "searchkick/record_indexer"
17
+ require "searchkick/results"
18
+ require "searchkick/version"
19
+
16
20
  require "searchkick/logging" if defined?(ActiveSupport::Notifications)
17
- require "active_support/core_ext/hash/deep_merge"
21
+
22
+ begin
23
+ require "rake"
24
+ rescue LoadError
25
+ # do nothing
26
+ end
27
+ require "searchkick/tasks" if defined?(Rake)
28
+
29
+ begin
30
+ require "rake"
31
+ rescue LoadError
32
+ # do nothing
33
+ end
34
+ require "searchkick/tasks" if defined?(Rake)
18
35
 
19
36
  # background jobs
20
37
  begin
@@ -24,8 +41,8 @@ rescue LoadError
24
41
  end
25
42
  if defined?(ActiveJob)
26
43
  require "searchkick/bulk_reindex_job"
27
- require "searchkick/process_queue_job"
28
44
  require "searchkick/process_batch_job"
45
+ require "searchkick/process_queue_job"
29
46
  require "searchkick/reindex_v2_job"
30
47
  end
31
48
 
@@ -38,7 +55,7 @@ module Searchkick
38
55
  class ImportError < Error; end
39
56
 
40
57
  class << self
41
- attr_accessor :search_method_name, :wordnet_path, :timeout, :models, :client_options, :redis, :index_prefix, :index_suffix, :queue_name
58
+ attr_accessor :search_method_name, :wordnet_path, :timeout, :models, :client_options, :redis, :index_prefix, :index_suffix, :queue_name, :model_options
42
59
  attr_writer :client, :env, :search_timeout
43
60
  attr_reader :aws_credentials
44
61
  end
@@ -48,6 +65,7 @@ module Searchkick
48
65
  self.models = []
49
66
  self.client_options = {}
50
67
  self.queue_name = :searchkick
68
+ self.model_options = {}
51
69
 
52
70
  def self.client
53
71
  @client ||= begin
@@ -80,18 +98,18 @@ module Searchkick
80
98
  Gem::Version.new(server_version.sub("-", ".")) < Gem::Version.new(version.sub("-", "."))
81
99
  end
82
100
 
83
- def self.search(term = "*", **options, &block)
84
- klass = options[:model]
101
+ def self.search(term = "*", model: nil, **options, &block)
102
+ klass = model
85
103
 
86
- # TODO add in next major version
87
- # if !klass
88
- # index_name = Array(options[:index_name])
89
- # if index_name.size == 1 && index_name.first.respond_to?(:searchkick_index)
90
- # klass = index_name.first
91
- # end
92
- # end
104
+ # make Searchkick.search(index_name: [Product]) and Product.search equivalent
105
+ unless klass
106
+ index_name = Array(options[:index_name])
107
+ if index_name.size == 1 && index_name.first.respond_to?(:searchkick_index)
108
+ klass = index_name.first
109
+ end
110
+ end
93
111
 
94
- query = Searchkick::Query.new(klass, term, options.except(:model))
112
+ query = Searchkick::Query.new(klass, term, options)
95
113
  block.call(query.body) if block
96
114
  if options[:execute] == false
97
115
  query
@@ -100,8 +118,8 @@ module Searchkick
100
118
  end
101
119
  end
102
120
 
103
- def self.multi_search(queries, retry_misspellings: false)
104
- Searchkick::MultiSearch.new(queries, retry_misspellings: retry_misspellings).perform
121
+ def self.multi_search(queries)
122
+ Searchkick::MultiSearch.new(queries).perform
105
123
  end
106
124
 
107
125
  # callbacks
@@ -114,8 +132,12 @@ module Searchkick
114
132
  self.callbacks_value = false
115
133
  end
116
134
 
117
- def self.callbacks?
118
- Thread.current[:searchkick_callbacks_enabled].nil? || Thread.current[:searchkick_callbacks_enabled]
135
+ def self.callbacks?(default: true)
136
+ if callbacks_value.nil?
137
+ default
138
+ else
139
+ callbacks_value != false
140
+ end
119
141
  end
120
142
 
121
143
  def self.callbacks(value)
@@ -123,8 +145,9 @@ module Searchkick
123
145
  previous_value = callbacks_value
124
146
  begin
125
147
  self.callbacks_value = value
126
- yield
148
+ result = yield
127
149
  indexer.perform if callbacks_value == :bulk
150
+ result
128
151
  ensure
129
152
  self.callbacks_value = previous_value
130
153
  end
@@ -223,7 +246,7 @@ module Searchkick
223
246
  end
224
247
 
225
248
  # TODO find better ActiveModel hook
226
- ActiveModel::Callbacks.send(:include, Searchkick::Model)
249
+ ActiveModel::Callbacks.include(Searchkick::Model)
227
250
 
228
251
  ActiveSupport.on_load(:active_record) do
229
252
  extend Searchkick::Model
@@ -0,0 +1,168 @@
1
+ module Searchkick
2
+ class BulkIndexer
3
+ attr_reader :index
4
+
5
+ def initialize(index)
6
+ @index = index
7
+ end
8
+
9
+ def import_scope(scope, resume: false, method_name: nil, async: false, batch: false, batch_id: nil, full: false)
10
+ # use scope for import
11
+ scope = scope.search_import if scope.respond_to?(:search_import)
12
+
13
+ if batch
14
+ import_or_update scope.to_a, method_name, async
15
+ Searchkick.with_redis { |r| r.srem(batches_key, batch_id) } if batch_id
16
+ elsif full && async
17
+ full_reindex_async(scope)
18
+ elsif scope.respond_to?(:find_in_batches)
19
+ if resume
20
+ # use total docs instead of max id since there's not a great way
21
+ # to get the max _id without scripting since it's a string
22
+
23
+ # TODO use primary key and prefix with table name
24
+ scope = scope.where("id > ?", total_docs)
25
+ end
26
+
27
+ scope = scope.select("id").except(:includes, :preload) if async
28
+
29
+ scope.find_in_batches batch_size: batch_size do |items|
30
+ import_or_update items, method_name, async
31
+ end
32
+ else
33
+ each_batch(scope) do |items|
34
+ import_or_update items, method_name, async
35
+ end
36
+ end
37
+ end
38
+
39
+ def bulk_index(records)
40
+ Searchkick.indexer.queue(records.map { |r| RecordData.new(index, r).index_data })
41
+ end
42
+
43
+ def bulk_delete(records)
44
+ Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| RecordData.new(index, r).delete_data })
45
+ end
46
+
47
+ def bulk_update(records, method_name)
48
+ Searchkick.indexer.queue(records.map { |r| RecordData.new(index, r).update_data(method_name) })
49
+ end
50
+
51
+ private
52
+
53
+ def import_or_update(records, method_name, async)
54
+ if records.any?
55
+ if async
56
+ Searchkick::BulkReindexJob.perform_later(
57
+ class_name: records.first.class.name,
58
+ record_ids: records.map(&:id),
59
+ index_name: index.name,
60
+ method_name: method_name ? method_name.to_s : nil
61
+ )
62
+ else
63
+ records = records.select(&:should_index?)
64
+ if records.any?
65
+ with_retries do
66
+ # call out to index for ActiveSupport notifications
67
+ if method_name
68
+ index.bulk_update(records, method_name)
69
+ else
70
+ index.bulk_index(records)
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ def full_reindex_async(scope)
79
+ if scope.respond_to?(:primary_key)
80
+ # TODO expire Redis key
81
+ primary_key = scope.primary_key
82
+
83
+ starting_id =
84
+ begin
85
+ scope.minimum(primary_key)
86
+ rescue ActiveRecord::StatementInvalid
87
+ false
88
+ end
89
+
90
+ if starting_id.nil?
91
+ # no records, do nothing
92
+ elsif starting_id.is_a?(Numeric)
93
+ max_id = scope.maximum(primary_key)
94
+ batches_count = ((max_id - starting_id + 1) / batch_size.to_f).ceil
95
+
96
+ batches_count.times do |i|
97
+ batch_id = i + 1
98
+ min_id = starting_id + (i * batch_size)
99
+ bulk_reindex_job scope, batch_id, min_id: min_id, max_id: min_id + batch_size - 1
100
+ end
101
+ else
102
+ scope.find_in_batches(batch_size: batch_size).each_with_index do |batch, i|
103
+ batch_id = i + 1
104
+
105
+ bulk_reindex_job scope, batch_id, record_ids: batch.map { |record| record.id.to_s }
106
+ end
107
+ end
108
+ else
109
+ batch_id = 1
110
+ # TODO remove any eager loading
111
+ scope = scope.only(:_id) if scope.respond_to?(:only)
112
+ each_batch(scope) do |items|
113
+ bulk_reindex_job scope, batch_id, record_ids: items.map { |i| i.id.to_s }
114
+ batch_id += 1
115
+ end
116
+ end
117
+ end
118
+
119
+ def each_batch(scope)
120
+ # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
121
+ # use cursor for Mongoid
122
+ items = []
123
+ scope.all.each do |item|
124
+ items << item
125
+ if items.length == batch_size
126
+ yield items
127
+ items = []
128
+ end
129
+ end
130
+ yield items if items.any?
131
+ end
132
+
133
+ def bulk_reindex_job(scope, batch_id, options)
134
+ Searchkick::BulkReindexJob.perform_later({
135
+ class_name: scope.model_name.name,
136
+ index_name: index.name,
137
+ batch_id: batch_id
138
+ }.merge(options))
139
+ Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
140
+ end
141
+
142
+ def with_retries
143
+ retries = 0
144
+
145
+ begin
146
+ yield
147
+ rescue Faraday::ClientError => e
148
+ if retries < 1
149
+ retries += 1
150
+ retry
151
+ end
152
+ raise e
153
+ end
154
+ end
155
+
156
+ def batches_left
157
+ Searchkick.with_redis { |r| r.scard(batches_key) }
158
+ end
159
+
160
+ def batches_key
161
+ "searchkick:reindex:#{index.name}:batches"
162
+ end
163
+
164
+ def batch_size
165
+ @batch_size ||= index.options[:batch_size] || 1000
166
+ end
167
+ end
168
+ end