searchkick 2.5.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE.md +7 -0
  3. data/.travis.yml +2 -11
  4. data/CHANGELOG.md +22 -0
  5. data/CONTRIBUTING.md +1 -1
  6. data/Gemfile +3 -3
  7. data/LICENSE.txt +1 -1
  8. data/README.md +68 -141
  9. data/Rakefile +0 -4
  10. data/benchmark/Gemfile +3 -2
  11. data/benchmark/{benchmark.rb → index.rb} +33 -31
  12. data/benchmark/search.rb +48 -0
  13. data/docs/Searchkick-3-Upgrade.md +57 -0
  14. data/lib/searchkick.rb +50 -27
  15. data/lib/searchkick/bulk_indexer.rb +168 -0
  16. data/lib/searchkick/bulk_reindex_job.rb +1 -1
  17. data/lib/searchkick/index.rb +122 -348
  18. data/lib/searchkick/index_options.rb +29 -26
  19. data/lib/searchkick/logging.rb +8 -7
  20. data/lib/searchkick/model.rb +37 -90
  21. data/lib/searchkick/multi_search.rb +6 -7
  22. data/lib/searchkick/query.rb +169 -166
  23. data/lib/searchkick/record_data.rb +133 -0
  24. data/lib/searchkick/record_indexer.rb +55 -0
  25. data/lib/searchkick/reindex_queue.rb +1 -1
  26. data/lib/searchkick/reindex_v2_job.rb +10 -13
  27. data/lib/searchkick/results.rb +14 -25
  28. data/lib/searchkick/tasks.rb +0 -4
  29. data/lib/searchkick/version.rb +1 -1
  30. data/searchkick.gemspec +3 -3
  31. data/test/boost_test.rb +3 -9
  32. data/test/geo_shape_test.rb +0 -4
  33. data/test/highlight_test.rb +28 -12
  34. data/test/index_test.rb +9 -10
  35. data/test/language_test.rb +16 -0
  36. data/test/marshal_test.rb +6 -1
  37. data/test/match_test.rb +9 -4
  38. data/test/model_test.rb +3 -5
  39. data/test/multi_search_test.rb +0 -7
  40. data/test/order_test.rb +1 -7
  41. data/test/pagination_test.rb +1 -1
  42. data/test/reindex_v2_job_test.rb +6 -11
  43. data/test/routing_test.rb +1 -1
  44. data/test/similar_test.rb +2 -2
  45. data/test/sql_test.rb +0 -31
  46. data/test/test_helper.rb +37 -23
  47. metadata +19 -26
  48. data/test/gemfiles/activerecord31.gemfile +0 -7
  49. data/test/gemfiles/activerecord32.gemfile +0 -7
  50. data/test/gemfiles/activerecord40.gemfile +0 -8
  51. data/test/gemfiles/activerecord41.gemfile +0 -8
  52. data/test/gemfiles/mongoid2.gemfile +0 -7
  53. data/test/gemfiles/mongoid3.gemfile +0 -6
  54. data/test/gemfiles/mongoid4.gemfile +0 -7
  55. data/test/records_test.rb +0 -10
data/Rakefile CHANGED
@@ -14,7 +14,3 @@ Rake::TestTask.new do |t|
14
14
  t.pattern = "test/**/*_test.rb"
15
15
  t.warning = false
16
16
  end
17
-
18
- task :benchmark do
19
- require_relative "benchmark/benchmark"
20
- end
data/benchmark/Gemfile CHANGED
@@ -3,9 +3,9 @@ source "https://rubygems.org"
3
3
  # Specify your gem's dependencies in searchkick.gemspec
4
4
  gemspec path: "../"
5
5
 
6
- # gem "sqlite3"
6
+ gem "sqlite3"
7
7
  gem "pg"
8
- gem "activerecord", "~> 5.0.0"
8
+ gem "activerecord", "~> 5.1.0"
9
9
  gem "activerecord-import"
10
10
  gem "activejob"
11
11
  gem "redis"
@@ -21,3 +21,4 @@ gem "allocation_stats"
21
21
  gem "get_process_mem"
22
22
  gem "memory_profiler"
23
23
  gem "allocation_tracer"
24
+ gem "benchmark-ips"
@@ -6,7 +6,7 @@ require "active_support/notifications"
6
6
 
7
7
  ActiveSupport::Notifications.subscribe "request.searchkick" do |*args|
8
8
  event = ActiveSupport::Notifications::Event.new(*args)
9
- p event.duration
9
+ puts "Import: #{event.duration.round}ms"
10
10
  end
11
11
 
12
12
  ActiveJob::Base.queue_adapter = :sidekiq
@@ -33,55 +33,57 @@ class Product < ActiveRecord::Base
33
33
  end
34
34
  end
35
35
 
36
- total_docs = 100000
36
+ if ENV["SETUP"]
37
+ total_docs = 100000
37
38
 
38
- # ActiveRecord::Migration.create_table :products, force: :cascade do |t|
39
- # t.string :name
40
- # t.string :color
41
- # t.integer :store_id
42
- # end
39
+ ActiveRecord::Migration.create_table :products, force: :cascade do |t|
40
+ t.string :name
41
+ t.string :color
42
+ t.integer :store_id
43
+ end
43
44
 
44
- # Product.import ["name", "color", "store_id"], total_docs.times.map { |i| ["Product #{i}", ["red", "blue"].sample, rand(10)] }
45
+ Product.import ["name", "color", "store_id"], total_docs.times.map { |i| ["Product #{i}", ["red", "blue"].sample, rand(10)] }
45
46
 
46
- puts "Imported"
47
+ puts "Imported"
48
+ end
47
49
 
48
50
  result = nil
49
51
  report = nil
50
52
  stats = nil
51
53
 
52
- # p GetProcessMem.new.mb
53
-
54
54
  Product.searchkick_index.delete rescue nil
55
55
 
56
+ GC.start
57
+ GC.disable
58
+ start_mem = GetProcessMem.new.mb
59
+
56
60
  time =
57
61
  Benchmark.realtime do
58
62
  # result = RubyProf.profile do
59
63
  # report = MemoryProfiler.report do
60
64
  # stats = AllocationStats.trace do
61
- reindex = Product.reindex(async: true)
62
- p reindex
65
+ reindex = Product.reindex #(async: true)
66
+ # p reindex
63
67
  # end
64
68
 
65
- 60.times do |i|
66
- if reindex.is_a?(Hash)
67
- docs = Searchkick::Index.new(reindex[:index_name]).total_docs
68
- else
69
- docs = Product.searchkick_index.total_docs
70
- end
71
- puts "#{i}: #{docs}"
72
- if docs == total_docs
73
- break
74
- end
75
- p Searchkick.reindex_status(reindex[:index_name]) if reindex.is_a?(Hash)
76
- sleep(1)
77
- # Product.searchkick_index.refresh
78
- end
69
+ # 60.times do |i|
70
+ # if reindex.is_a?(Hash)
71
+ # docs = Searchkick::Index.new(reindex[:index_name]).total_docs
72
+ # else
73
+ # docs = Product.searchkick_index.total_docs
74
+ # end
75
+ # puts "#{i}: #{docs}"
76
+ # if docs == total_docs
77
+ # break
78
+ # end
79
+ # p Searchkick.reindex_status(reindex[:index_name]) if reindex.is_a?(Hash)
80
+ # sleep(1)
81
+ # # Product.searchkick_index.refresh
82
+ # end
79
83
  end
80
84
 
81
- # p GetProcessMem.new.mb
82
-
83
- puts time.round(1)
84
-
85
+ puts
86
+ puts "Time: #{time.round(1)}s"
85
87
 
86
88
  if result
87
89
  printer = RubyProf::GraphPrinter.new(result)
@@ -0,0 +1,48 @@
1
+ require "bundler/setup"
2
+ Bundler.require(:default)
3
+ require "active_record"
4
+ require "benchmark/ips"
5
+
6
+ ActiveRecord::Base.default_timezone = :utc
7
+ ActiveRecord::Base.time_zone_aware_attributes = true
8
+ ActiveRecord::Base.establish_connection adapter: "sqlite3", database: "/tmp/searchkick"
9
+
10
+ class Product < ActiveRecord::Base
11
+ searchkick batch_size: 1000
12
+
13
+ def search_data
14
+ {
15
+ name: name,
16
+ color: color,
17
+ store_id: store_id
18
+ }
19
+ end
20
+ end
21
+
22
+ if ENV["SETUP"]
23
+ total_docs = 1000000
24
+
25
+ ActiveRecord::Migration.create_table :products, force: :cascade do |t|
26
+ t.string :name
27
+ t.string :color
28
+ t.integer :store_id
29
+ end
30
+
31
+ Product.import ["name", "color", "store_id"], total_docs.times.map { |i| ["Product #{i}", ["red", "blue"].sample, rand(10)] }
32
+
33
+ puts "Imported"
34
+
35
+ Product.reindex
36
+
37
+ puts "Reindexed"
38
+ end
39
+
40
+ query = Product.search("product", fields: [:name], where: {color: "red", store_id: 5}, limit: 10000, load: false, execute: false)
41
+
42
+ require "pp"
43
+ pp query.body.as_json
44
+ puts
45
+
46
+ Benchmark.ips do |x|
47
+ x.report { query.dup.execute }
48
+ end
@@ -0,0 +1,57 @@
1
+ # Searchkick 3 Upgrade
2
+
3
+ ## Before You Upgrade
4
+
5
+ Searchkick 3 no longer uses types, since they are deprecated in Elasticsearch 6.
6
+
7
+ If you use inheritance, add to your parent model:
8
+
9
+ ```ruby
10
+ class Animal < ApplicationRecord
11
+ searchkick inheritance: true
12
+ end
13
+ ```
14
+
15
+ And do a full reindex before upgrading.
16
+
17
+ ## Upgrading
18
+
19
+ Update your Gemfile:
20
+
21
+ ```ruby
22
+ gem 'searchkick', '~> 3'
23
+ ```
24
+
25
+ And run:
26
+
27
+ ```sh
28
+ bundle update searchkick
29
+ ```
30
+
31
+ We recommend you don’t stem conversions anymore, so conversions for `pepper` don’t affect `peppers`, but if you want to keep the old behavior, use:
32
+
33
+ ```ruby
34
+ Searchkick.model_options = {
35
+ stem_conversions: true
36
+ }
37
+ ```
38
+
39
+ Searchkick 3 disables the `_all` field by default, since Elasticsearch 6 removes the ability to reindex with it. If you’re on Elasticsearch 5 and still need it, add to your model:
40
+
41
+ ```ruby
42
+ class Product < ApplicationRecord
43
+ searchkick _all: true
44
+ end
45
+ ```
46
+
47
+ If you use `record.reindex_async` or `record.reindex(async: true)`, replace it with:
48
+
49
+ ```ruby
50
+ record.reindex(mode: :async)
51
+ ```
52
+
53
+ If you use `log: true` with `boost_by`, replace it with `modifier: "ln2p"`.
54
+
55
+ If you use the `body` option and have warnings about incompatible options, remove them, as they now throw an `ArgumentError`.
56
+
57
+ Check out the [changelog](https://github.com/ankane/searchkick/blob/master/CHANGELOG.md) for the full list of changes.
data/lib/searchkick.rb CHANGED
@@ -1,20 +1,37 @@
1
1
  require "active_model"
2
+ require "active_support/core_ext/hash/deep_merge"
2
3
  require "elasticsearch"
3
4
  require "hashie"
4
- require "searchkick/version"
5
- require "searchkick/index_options"
5
+
6
+ require "searchkick/bulk_indexer"
6
7
  require "searchkick/index"
7
8
  require "searchkick/indexer"
8
- require "searchkick/reindex_queue"
9
9
  require "searchkick/hash_wrapper"
10
- require "searchkick/results"
11
- require "searchkick/query"
12
- require "searchkick/multi_search"
13
- require "searchkick/model"
14
- require "searchkick/tasks"
15
10
  require "searchkick/middleware"
11
+ require "searchkick/model"
12
+ require "searchkick/multi_search"
13
+ require "searchkick/query"
14
+ require "searchkick/reindex_queue"
15
+ require "searchkick/record_data"
16
+ require "searchkick/record_indexer"
17
+ require "searchkick/results"
18
+ require "searchkick/version"
19
+
16
20
  require "searchkick/logging" if defined?(ActiveSupport::Notifications)
17
- require "active_support/core_ext/hash/deep_merge"
21
+
22
+ begin
23
+ require "rake"
24
+ rescue LoadError
25
+ # do nothing
26
+ end
27
+ require "searchkick/tasks" if defined?(Rake)
28
+
29
+ begin
30
+ require "rake"
31
+ rescue LoadError
32
+ # do nothing
33
+ end
34
+ require "searchkick/tasks" if defined?(Rake)
18
35
 
19
36
  # background jobs
20
37
  begin
@@ -24,8 +41,8 @@ rescue LoadError
24
41
  end
25
42
  if defined?(ActiveJob)
26
43
  require "searchkick/bulk_reindex_job"
27
- require "searchkick/process_queue_job"
28
44
  require "searchkick/process_batch_job"
45
+ require "searchkick/process_queue_job"
29
46
  require "searchkick/reindex_v2_job"
30
47
  end
31
48
 
@@ -38,7 +55,7 @@ module Searchkick
38
55
  class ImportError < Error; end
39
56
 
40
57
  class << self
41
- attr_accessor :search_method_name, :wordnet_path, :timeout, :models, :client_options, :redis, :index_prefix, :index_suffix, :queue_name
58
+ attr_accessor :search_method_name, :wordnet_path, :timeout, :models, :client_options, :redis, :index_prefix, :index_suffix, :queue_name, :model_options
42
59
  attr_writer :client, :env, :search_timeout
43
60
  attr_reader :aws_credentials
44
61
  end
@@ -48,6 +65,7 @@ module Searchkick
48
65
  self.models = []
49
66
  self.client_options = {}
50
67
  self.queue_name = :searchkick
68
+ self.model_options = {}
51
69
 
52
70
  def self.client
53
71
  @client ||= begin
@@ -80,18 +98,18 @@ module Searchkick
80
98
  Gem::Version.new(server_version.sub("-", ".")) < Gem::Version.new(version.sub("-", "."))
81
99
  end
82
100
 
83
- def self.search(term = "*", **options, &block)
84
- klass = options[:model]
101
+ def self.search(term = "*", model: nil, **options, &block)
102
+ klass = model
85
103
 
86
- # TODO add in next major version
87
- # if !klass
88
- # index_name = Array(options[:index_name])
89
- # if index_name.size == 1 && index_name.first.respond_to?(:searchkick_index)
90
- # klass = index_name.first
91
- # end
92
- # end
104
+ # make Searchkick.search(index_name: [Product]) and Product.search equivalent
105
+ unless klass
106
+ index_name = Array(options[:index_name])
107
+ if index_name.size == 1 && index_name.first.respond_to?(:searchkick_index)
108
+ klass = index_name.first
109
+ end
110
+ end
93
111
 
94
- query = Searchkick::Query.new(klass, term, options.except(:model))
112
+ query = Searchkick::Query.new(klass, term, options)
95
113
  block.call(query.body) if block
96
114
  if options[:execute] == false
97
115
  query
@@ -100,8 +118,8 @@ module Searchkick
100
118
  end
101
119
  end
102
120
 
103
- def self.multi_search(queries, retry_misspellings: false)
104
- Searchkick::MultiSearch.new(queries, retry_misspellings: retry_misspellings).perform
121
+ def self.multi_search(queries)
122
+ Searchkick::MultiSearch.new(queries).perform
105
123
  end
106
124
 
107
125
  # callbacks
@@ -114,8 +132,12 @@ module Searchkick
114
132
  self.callbacks_value = false
115
133
  end
116
134
 
117
- def self.callbacks?
118
- Thread.current[:searchkick_callbacks_enabled].nil? || Thread.current[:searchkick_callbacks_enabled]
135
+ def self.callbacks?(default: true)
136
+ if callbacks_value.nil?
137
+ default
138
+ else
139
+ callbacks_value != false
140
+ end
119
141
  end
120
142
 
121
143
  def self.callbacks(value)
@@ -123,8 +145,9 @@ module Searchkick
123
145
  previous_value = callbacks_value
124
146
  begin
125
147
  self.callbacks_value = value
126
- yield
148
+ result = yield
127
149
  indexer.perform if callbacks_value == :bulk
150
+ result
128
151
  ensure
129
152
  self.callbacks_value = previous_value
130
153
  end
@@ -223,7 +246,7 @@ module Searchkick
223
246
  end
224
247
 
225
248
  # TODO find better ActiveModel hook
226
- ActiveModel::Callbacks.send(:include, Searchkick::Model)
249
+ ActiveModel::Callbacks.include(Searchkick::Model)
227
250
 
228
251
  ActiveSupport.on_load(:active_record) do
229
252
  extend Searchkick::Model
@@ -0,0 +1,168 @@
1
+ module Searchkick
2
+ class BulkIndexer
3
+ attr_reader :index
4
+
5
+ def initialize(index)
6
+ @index = index
7
+ end
8
+
9
+ def import_scope(scope, resume: false, method_name: nil, async: false, batch: false, batch_id: nil, full: false)
10
+ # use scope for import
11
+ scope = scope.search_import if scope.respond_to?(:search_import)
12
+
13
+ if batch
14
+ import_or_update scope.to_a, method_name, async
15
+ Searchkick.with_redis { |r| r.srem(batches_key, batch_id) } if batch_id
16
+ elsif full && async
17
+ full_reindex_async(scope)
18
+ elsif scope.respond_to?(:find_in_batches)
19
+ if resume
20
+ # use total docs instead of max id since there's not a great way
21
+ # to get the max _id without scripting since it's a string
22
+
23
+ # TODO use primary key and prefix with table name
24
+ scope = scope.where("id > ?", total_docs)
25
+ end
26
+
27
+ scope = scope.select("id").except(:includes, :preload) if async
28
+
29
+ scope.find_in_batches batch_size: batch_size do |items|
30
+ import_or_update items, method_name, async
31
+ end
32
+ else
33
+ each_batch(scope) do |items|
34
+ import_or_update items, method_name, async
35
+ end
36
+ end
37
+ end
38
+
39
+ def bulk_index(records)
40
+ Searchkick.indexer.queue(records.map { |r| RecordData.new(index, r).index_data })
41
+ end
42
+
43
+ def bulk_delete(records)
44
+ Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| RecordData.new(index, r).delete_data })
45
+ end
46
+
47
+ def bulk_update(records, method_name)
48
+ Searchkick.indexer.queue(records.map { |r| RecordData.new(index, r).update_data(method_name) })
49
+ end
50
+
51
+ private
52
+
53
+ def import_or_update(records, method_name, async)
54
+ if records.any?
55
+ if async
56
+ Searchkick::BulkReindexJob.perform_later(
57
+ class_name: records.first.class.name,
58
+ record_ids: records.map(&:id),
59
+ index_name: index.name,
60
+ method_name: method_name ? method_name.to_s : nil
61
+ )
62
+ else
63
+ records = records.select(&:should_index?)
64
+ if records.any?
65
+ with_retries do
66
+ # call out to index for ActiveSupport notifications
67
+ if method_name
68
+ index.bulk_update(records, method_name)
69
+ else
70
+ index.bulk_index(records)
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ def full_reindex_async(scope)
79
+ if scope.respond_to?(:primary_key)
80
+ # TODO expire Redis key
81
+ primary_key = scope.primary_key
82
+
83
+ starting_id =
84
+ begin
85
+ scope.minimum(primary_key)
86
+ rescue ActiveRecord::StatementInvalid
87
+ false
88
+ end
89
+
90
+ if starting_id.nil?
91
+ # no records, do nothing
92
+ elsif starting_id.is_a?(Numeric)
93
+ max_id = scope.maximum(primary_key)
94
+ batches_count = ((max_id - starting_id + 1) / batch_size.to_f).ceil
95
+
96
+ batches_count.times do |i|
97
+ batch_id = i + 1
98
+ min_id = starting_id + (i * batch_size)
99
+ bulk_reindex_job scope, batch_id, min_id: min_id, max_id: min_id + batch_size - 1
100
+ end
101
+ else
102
+ scope.find_in_batches(batch_size: batch_size).each_with_index do |batch, i|
103
+ batch_id = i + 1
104
+
105
+ bulk_reindex_job scope, batch_id, record_ids: batch.map { |record| record.id.to_s }
106
+ end
107
+ end
108
+ else
109
+ batch_id = 1
110
+ # TODO remove any eager loading
111
+ scope = scope.only(:_id) if scope.respond_to?(:only)
112
+ each_batch(scope) do |items|
113
+ bulk_reindex_job scope, batch_id, record_ids: items.map { |i| i.id.to_s }
114
+ batch_id += 1
115
+ end
116
+ end
117
+ end
118
+
119
+ def each_batch(scope)
120
+ # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
121
+ # use cursor for Mongoid
122
+ items = []
123
+ scope.all.each do |item|
124
+ items << item
125
+ if items.length == batch_size
126
+ yield items
127
+ items = []
128
+ end
129
+ end
130
+ yield items if items.any?
131
+ end
132
+
133
+ def bulk_reindex_job(scope, batch_id, options)
134
+ Searchkick::BulkReindexJob.perform_later({
135
+ class_name: scope.model_name.name,
136
+ index_name: index.name,
137
+ batch_id: batch_id
138
+ }.merge(options))
139
+ Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
140
+ end
141
+
142
+ def with_retries
143
+ retries = 0
144
+
145
+ begin
146
+ yield
147
+ rescue Faraday::ClientError => e
148
+ if retries < 1
149
+ retries += 1
150
+ retry
151
+ end
152
+ raise e
153
+ end
154
+ end
155
+
156
+ def batches_left
157
+ Searchkick.with_redis { |r| r.scard(batches_key) }
158
+ end
159
+
160
+ def batches_key
161
+ "searchkick:reindex:#{index.name}:batches"
162
+ end
163
+
164
+ def batch_size
165
+ @batch_size ||= index.options[:batch_size] || 1000
166
+ end
167
+ end
168
+ end