searchkick 2.5.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE.md +7 -0
  3. data/.travis.yml +2 -11
  4. data/CHANGELOG.md +22 -0
  5. data/CONTRIBUTING.md +1 -1
  6. data/Gemfile +3 -3
  7. data/LICENSE.txt +1 -1
  8. data/README.md +68 -141
  9. data/Rakefile +0 -4
  10. data/benchmark/Gemfile +3 -2
  11. data/benchmark/{benchmark.rb → index.rb} +33 -31
  12. data/benchmark/search.rb +48 -0
  13. data/docs/Searchkick-3-Upgrade.md +57 -0
  14. data/lib/searchkick.rb +50 -27
  15. data/lib/searchkick/bulk_indexer.rb +168 -0
  16. data/lib/searchkick/bulk_reindex_job.rb +1 -1
  17. data/lib/searchkick/index.rb +122 -348
  18. data/lib/searchkick/index_options.rb +29 -26
  19. data/lib/searchkick/logging.rb +8 -7
  20. data/lib/searchkick/model.rb +37 -90
  21. data/lib/searchkick/multi_search.rb +6 -7
  22. data/lib/searchkick/query.rb +169 -166
  23. data/lib/searchkick/record_data.rb +133 -0
  24. data/lib/searchkick/record_indexer.rb +55 -0
  25. data/lib/searchkick/reindex_queue.rb +1 -1
  26. data/lib/searchkick/reindex_v2_job.rb +10 -13
  27. data/lib/searchkick/results.rb +14 -25
  28. data/lib/searchkick/tasks.rb +0 -4
  29. data/lib/searchkick/version.rb +1 -1
  30. data/searchkick.gemspec +3 -3
  31. data/test/boost_test.rb +3 -9
  32. data/test/geo_shape_test.rb +0 -4
  33. data/test/highlight_test.rb +28 -12
  34. data/test/index_test.rb +9 -10
  35. data/test/language_test.rb +16 -0
  36. data/test/marshal_test.rb +6 -1
  37. data/test/match_test.rb +9 -4
  38. data/test/model_test.rb +3 -5
  39. data/test/multi_search_test.rb +0 -7
  40. data/test/order_test.rb +1 -7
  41. data/test/pagination_test.rb +1 -1
  42. data/test/reindex_v2_job_test.rb +6 -11
  43. data/test/routing_test.rb +1 -1
  44. data/test/similar_test.rb +2 -2
  45. data/test/sql_test.rb +0 -31
  46. data/test/test_helper.rb +37 -23
  47. metadata +19 -26
  48. data/test/gemfiles/activerecord31.gemfile +0 -7
  49. data/test/gemfiles/activerecord32.gemfile +0 -7
  50. data/test/gemfiles/activerecord40.gemfile +0 -8
  51. data/test/gemfiles/activerecord41.gemfile +0 -8
  52. data/test/gemfiles/mongoid2.gemfile +0 -7
  53. data/test/gemfiles/mongoid3.gemfile +0 -6
  54. data/test/gemfiles/mongoid4.gemfile +0 -7
  55. data/test/records_test.rb +0 -10
@@ -0,0 +1,133 @@
1
+ module Searchkick
2
+ class RecordData
3
+ EXCLUDED_ATTRIBUTES = ["id", :id]
4
+ TYPE_KEYS = ["type", :type]
5
+
6
+ attr_reader :index, :record
7
+
8
+ def initialize(index, record)
9
+ @index = index
10
+ @record = record
11
+ end
12
+
13
+ def index_data
14
+ data = record_data
15
+ data[:data] = search_data
16
+ {index: data}
17
+ end
18
+
19
+ def update_data(method_name)
20
+ data = record_data
21
+ data[:data] = {doc: search_data(method_name)}
22
+ {update: data}
23
+ end
24
+
25
+ def delete_data
26
+ {delete: record_data}
27
+ end
28
+
29
+ def search_id
30
+ id = record.respond_to?(:search_document_id) ? record.search_document_id : record.id
31
+ id.is_a?(Numeric) ? id : id.to_s
32
+ end
33
+
34
+ def document_type(ignore_type = false)
35
+ index.klass_document_type(record.class, ignore_type)
36
+ end
37
+
38
+ private
39
+
40
+ def record_data
41
+ data = {
42
+ _index: index.name,
43
+ _id: search_id,
44
+ _type: document_type
45
+ }
46
+ data[:_routing] = record.search_routing if record.respond_to?(:search_routing)
47
+ data
48
+ end
49
+
50
+ def search_data(method_name = nil)
51
+ partial_reindex = !method_name.nil?
52
+
53
+ # remove _id since search_id is used instead
54
+ source = record.send(method_name || :search_data)
55
+ EXCLUDED_ATTRIBUTES.each do |attr|
56
+ raise Searchkick::Error, "Cannot index a field with name: #{attr}" if source[attr]
57
+ end
58
+
59
+ # conversions
60
+ index.conversions_fields.each do |conversions_field|
61
+ if source[conversions_field]
62
+ source[conversions_field] = source[conversions_field].map { |k, v| {query: k, count: v} }
63
+ end
64
+ end
65
+
66
+ # hack to prevent generator field doesn't exist error
67
+ if !partial_reindex
68
+ index.suggest_fields.each do |field|
69
+ if !source[field] && !source[field.to_sym]
70
+ source[field] = nil
71
+ end
72
+ end
73
+ end
74
+
75
+ # locations
76
+ index.locations_fields.each do |field|
77
+ if source[field]
78
+ if !source[field].is_a?(Hash) && (source[field].first.is_a?(Array) || source[field].first.is_a?(Hash))
79
+ # multiple locations
80
+ source[field] = source[field].map { |a| location_value(a) }
81
+ else
82
+ source[field] = location_value(source[field])
83
+ end
84
+ end
85
+ end
86
+
87
+ if index.options[:inheritance]
88
+ if !TYPE_KEYS.any? { |tk| source.key?(tk) }
89
+ source[:type] = document_type(true)
90
+ end
91
+ end
92
+
93
+ cast_big_decimal(source)
94
+
95
+ source
96
+ end
97
+
98
+ def location_value(value)
99
+ if value.is_a?(Array)
100
+ value.map(&:to_f).reverse
101
+ elsif value.is_a?(Hash)
102
+ {lat: value[:lat].to_f, lon: value[:lon].to_f}
103
+ else
104
+ value
105
+ end
106
+ end
107
+
108
+ # change all BigDecimal values to floats due to
109
+ # https://github.com/rails/rails/issues/6033
110
+ # possible loss of precision :/
111
+ def cast_big_decimal(obj)
112
+ case obj
113
+ when BigDecimal
114
+ obj.to_f
115
+ when Hash
116
+ obj.each do |k, v|
117
+ # performance
118
+ if v.is_a?(BigDecimal)
119
+ obj[k] = v.to_f
120
+ elsif v.is_a?(Enumerable) ||
121
+ obj[k] = cast_big_decimal(v)
122
+ end
123
+ end
124
+ when Enumerable
125
+ obj.map do |v|
126
+ cast_big_decimal(v)
127
+ end
128
+ else
129
+ obj
130
+ end
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,55 @@
1
+ module Searchkick
2
+ class RecordIndexer
3
+ attr_reader :record, :index
4
+
5
+ def initialize(record)
6
+ @record = record
7
+ @index = record.class.searchkick_index
8
+ end
9
+
10
+ def reindex(method_name = nil, refresh: false, mode: nil)
11
+ unless [true, nil, :async, :queue].include?(mode)
12
+ raise ArgumentError, "Invalid value for mode"
13
+ end
14
+
15
+ mode ||= Searchkick.callbacks_value || index.options[:callbacks] || true
16
+
17
+ case mode
18
+ when :queue
19
+ if method_name
20
+ raise Searchkick::Error, "Partial reindex not supported with queue option"
21
+ end
22
+
23
+ index.reindex_queue.push(record.id.to_s)
24
+ when :async
25
+ unless defined?(ActiveJob)
26
+ raise Searchkick::Error, "Active Job not found"
27
+ end
28
+
29
+ Searchkick::ReindexV2Job.perform_later(record.class.name, record.id.to_s, method_name)
30
+ else # bulk, true
31
+ reindex_record(method_name)
32
+
33
+ index.refresh if refresh
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ def reindex_record(method_name)
40
+ if record.destroyed? || !record.persisted? || !record.should_index?
41
+ begin
42
+ index.remove(record)
43
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
44
+ # do nothing
45
+ end
46
+ else
47
+ if method_name
48
+ index.update_record(record, method_name)
49
+ else
50
+ index.store(record)
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -15,7 +15,7 @@ module Searchkick
15
15
  # TODO use reliable queuing
16
16
  def reserve(limit: 1000)
17
17
  record_ids = Set.new
18
- while record_ids.size < limit && record_id = Searchkick.with_redis { |r| r.rpop(redis_key) }
18
+ while record_ids.size < limit && (record_id = Searchkick.with_redis { |r| r.rpop(redis_key) })
19
19
  record_ids << record_id
20
20
  end
21
21
  record_ids.to_a
@@ -9,11 +9,15 @@ module Searchkick
9
9
 
10
10
  queue_as { Searchkick.queue_name }
11
11
 
12
- def perform(klass, id)
12
+ def perform(klass, id, method_name = nil)
13
13
  model = klass.constantize
14
14
  record =
15
15
  begin
16
- model.find(id)
16
+ if model.respond_to?(:unscoped)
17
+ model.unscoped.find(id)
18
+ else
19
+ model.find(id)
20
+ end
17
21
  rescue => e
18
22
  # check by name rather than rescue directly so we don't need
19
23
  # to determine which classes are defined
@@ -21,19 +25,12 @@ module Searchkick
21
25
  nil
22
26
  end
23
27
 
24
- index = model.searchkick_index
25
- if !record || !record.should_index?
26
- # hacky
27
- record ||= model.new
28
+ unless record
29
+ record = model.new
28
30
  record.id = id
29
- begin
30
- index.remove record
31
- rescue Elasticsearch::Transport::Transport::Errors::NotFound
32
- # do nothing
33
- end
34
- else
35
- index.store record
36
31
  end
32
+
33
+ RecordIndexer.new(record).reindex(method_name, mode: true)
37
34
  end
38
35
  end
39
36
  end
@@ -15,11 +15,6 @@ module Searchkick
15
15
  @options = options
16
16
  end
17
17
 
18
- # experimental: may not make next release
19
- def records
20
- @records ||= results_query(klass, hits)
21
- end
22
-
23
18
  def results
24
19
  @results ||= begin
25
20
  if options[:load]
@@ -34,12 +29,6 @@ module Searchkick
34
29
  hits.map do |hit|
35
30
  result = results[hit["_type"]][hit["_id"].to_s]
36
31
  if result && !(options[:load].is_a?(Hash) && options[:load][:dumpable])
37
- unless result.respond_to?(:search_hit)
38
- result.define_singleton_method(:search_hit) do
39
- hit
40
- end
41
- end
42
-
43
32
  if hit["highlight"] && !result.respond_to?(:search_highlights)
44
33
  highlights = Hash[hit["highlight"].map { |k, v| [(options[:json] ? k : k.sub(/\.#{@options[:match_suffix]}\z/, "")).to_sym, v.first] }]
45
34
  result.define_singleton_method(:search_highlights) do
@@ -84,20 +73,6 @@ module Searchkick
84
73
  end
85
74
  end
86
75
 
87
- def each_with_hit(&block)
88
- results.zip(hits).each(&block)
89
- end
90
-
91
- def with_details
92
- each_with_hit.map do |model, hit|
93
- details = {}
94
- if hit["highlight"]
95
- details[:highlight] = Hash[hit["highlight"].map { |k, v| [(options[:json] ? k : k.sub(/\.#{@options[:match_suffix]}\z/, "")).to_sym, v.first] }]
96
- end
97
- [model, details]
98
- end
99
- end
100
-
101
76
  def aggregations
102
77
  response["aggregations"]
103
78
  end
@@ -196,6 +171,20 @@ module Searchkick
196
171
  end
197
172
  end
198
173
 
174
+ def with_hit
175
+ results.zip(hits)
176
+ end
177
+
178
+ def highlights(multiple: false)
179
+ hits.map do |hit|
180
+ Hash[hit["highlight"].map { |k, v| [(options[:json] ? k : k.sub(/\.#{@options[:match_suffix]}\z/, "")).to_sym, multiple ? v : v.first] }]
181
+ end
182
+ end
183
+
184
+ def with_highlights(multiple: false)
185
+ results.zip(highlights(multiple: multiple))
186
+ end
187
+
199
188
  def misspellings?
200
189
  @options[:misspellings]
201
190
  end
@@ -1,5 +1,3 @@
1
- require "rake"
2
-
3
1
  namespace :searchkick do
4
2
  desc "reindex model"
5
3
  task reindex: :environment do
@@ -16,7 +14,6 @@ namespace :searchkick do
16
14
  end
17
15
 
18
16
  if defined?(Rails)
19
-
20
17
  namespace :reindex do
21
18
  desc "reindex all models"
22
19
  task all: :environment do
@@ -28,6 +25,5 @@ namespace :searchkick do
28
25
  puts "Reindex complete"
29
26
  end
30
27
  end
31
-
32
28
  end
33
29
  end
@@ -1,3 +1,3 @@
1
1
  module Searchkick
2
- VERSION = "2.5.0"
2
+ VERSION = "3.0.0"
3
3
  end
data/searchkick.gemspec CHANGED
@@ -18,11 +18,11 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features|benchmark)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_dependency "activemodel", ">= 4.1"
22
- spec.add_dependency "elasticsearch", ">= 1"
21
+ spec.add_dependency "activemodel", ">= 4.2"
22
+ spec.add_dependency "elasticsearch", ">= 5"
23
23
  spec.add_dependency "hashie"
24
24
 
25
25
  spec.add_development_dependency "bundler"
26
- spec.add_development_dependency "rake"
27
26
  spec.add_development_dependency "minitest"
27
+ spec.add_development_dependency "rake"
28
28
  end
data/test/boost_test.rb CHANGED
@@ -40,9 +40,9 @@ class BoostTest < Minitest::Test
40
40
  assert_order "speaker", ["Speaker D", "Speaker C", "Speaker B", "Speaker A"], {conversions: "conversions_a", conversions_term: "speaker_1"}, Speaker
41
41
  end
42
42
 
43
- def test_conversions_stemmed
43
+ def test_conversions_case
44
44
  store [
45
- {name: "Tomato A", conversions: {"tomato" => 1, "tomatos" => 1, "Tomatoes" => 1}},
45
+ {name: "Tomato A", conversions: {"tomato" => 1, "TOMATO" => 1, "tOmAtO" => 1}},
46
46
  {name: "Tomato B", conversions: {"tomato" => 2}}
47
47
  ]
48
48
  assert_order "tomato", ["Tomato A", "Tomato B"]
@@ -122,13 +122,7 @@ class BoostTest < Minitest::Test
122
122
  {name: "Tomato B", orders_count: 10},
123
123
  ]
124
124
 
125
- if elasticsearch_below50?
126
- assert_raises(ArgumentError) do
127
- assert_order "tomato", ["Tomato A", "Tomato B"], boost_by: {orders_count: {missing: 100}}
128
- end
129
- else
130
- assert_order "tomato", ["Tomato A", "Tomato B"], boost_by: {orders_count: {missing: 100}}
131
- end
125
+ assert_order "tomato", ["Tomato A", "Tomato B"], boost_by: {orders_count: {missing: 100}}
132
126
  end
133
127
 
134
128
  def test_boost_by_boost_mode_multiply
@@ -116,9 +116,6 @@ class GeoShapeTest < Minitest::Test
116
116
  end
117
117
 
118
118
  def test_search_math
119
- # TODO find out why this is failing
120
- skip unless elasticsearch_below60?
121
-
122
119
  assert_search "witch", ["Region A"], {
123
120
  where: {
124
121
  territory: {
@@ -145,7 +142,6 @@ class GeoShapeTest < Minitest::Test
145
142
  end
146
143
 
147
144
  def test_contains
148
- skip if elasticsearch_below22?
149
145
  assert_search "*", ["Region C"], {
150
146
  where: {
151
147
  territory: {
@@ -3,29 +3,34 @@ require_relative "test_helper"
3
3
  class HighlightTest < Minitest::Test
4
4
  def test_basic
5
5
  store_names ["Two Door Cinema Club"]
6
- assert_equal "Two Door <em>Cinema</em> Club", Product.search("cinema", fields: [:name], highlight: true).first.search_highlights[:name]
6
+ assert_equal "Two Door <em>Cinema</em> Club", Product.search("cinema", highlight: true).highlights.first[:name]
7
7
  end
8
8
 
9
9
  def test_tag
10
10
  store_names ["Two Door Cinema Club"]
11
- assert_equal "Two Door <strong>Cinema</strong> Club", Product.search("cinema", fields: [:name], highlight: {tag: "<strong>"}).first.search_highlights[:name]
11
+ assert_equal "Two Door <strong>Cinema</strong> Club", Product.search("cinema", highlight: {tag: "<strong>"}).highlights.first[:name]
12
12
  end
13
13
 
14
14
  def test_tag_class
15
15
  store_names ["Two Door Cinema Club"]
16
- assert_equal "Two Door <strong class='classy'>Cinema</strong> Club", Product.search("cinema", fields: [:name], highlight: {tag: "<strong class='classy'>"}).first.search_highlights[:name]
16
+ assert_equal "Two Door <strong class='classy'>Cinema</strong> Club", Product.search("cinema", highlight: {tag: "<strong class='classy'>"}).highlights.first[:name]
17
+ end
18
+
19
+ def test_very_long
20
+ store_names [("Two Door Cinema Club " * 100).strip]
21
+ assert_equal ("Two Door <em>Cinema</em> Club " * 100).strip, Product.search("cinema", highlight: true).highlights.first[:name]
17
22
  end
18
23
 
19
24
  def test_multiple_fields
20
25
  store [{name: "Two Door Cinema Club", color: "Cinema Orange"}]
21
- highlights = Product.search("cinema", fields: [:name, :color], highlight: true).first.search_highlights
26
+ highlights = Product.search("cinema", fields: [:name, :color], highlight: true).highlights.first
22
27
  assert_equal "Two Door <em>Cinema</em> Club", highlights[:name]
23
28
  assert_equal "<em>Cinema</em> Orange", highlights[:color]
24
29
  end
25
30
 
26
31
  def test_fields
27
32
  store [{name: "Two Door Cinema Club", color: "Cinema Orange"}]
28
- highlights = Product.search("cinema", fields: [:name, :color], highlight: {fields: [:name]}).first.search_highlights
33
+ highlights = Product.search("cinema", fields: [:name, :color], highlight: {fields: [:name]}).highlights.first
29
34
  assert_equal "Two Door <em>Cinema</em> Club", highlights[:name]
30
35
  assert_nil highlights[:color]
31
36
  end
@@ -33,22 +38,22 @@ class HighlightTest < Minitest::Test
33
38
  def test_field_options
34
39
  store_names ["Two Door Cinema Club are a Northern Irish indie rock band"]
35
40
  fragment_size = ENV["MATCH"] == "word_start" ? 26 : 21
36
- assert_equal "Two Door <em>Cinema</em> Club are", Product.search("cinema", fields: [:name], highlight: {fields: {name: {fragment_size: fragment_size}}}).first.search_highlights[:name]
41
+ assert_equal "Two Door <em>Cinema</em> Club are", Product.search("cinema", highlight: {fields: {name: {fragment_size: fragment_size}}}).highlights.first[:name]
37
42
  end
38
43
 
39
44
  def test_multiple_words
40
45
  store_names ["Hello World Hello"]
41
- assert_equal "<em>Hello</em> World <em>Hello</em>", Product.search("hello", fields: [:name], highlight: true).first.search_highlights[:name]
46
+ assert_equal "<em>Hello</em> World <em>Hello</em>", Product.search("hello", highlight: true).highlights.first[:name]
42
47
  end
43
48
 
44
49
  def test_encoder
45
50
  store_names ["<b>Hello</b>"]
46
- assert_equal "&lt;b&gt;<em>Hello</em>&lt;&#x2F;b&gt;", Product.search("hello", fields: [:name], highlight: {encoder: "html"}, misspellings: false).first.search_highlights[:name]
51
+ assert_equal "&lt;b&gt;<em>Hello</em>&lt;&#x2F;b&gt;", Product.search("hello", highlight: {encoder: "html"}, misspellings: false).highlights.first[:name]
47
52
  end
48
53
 
49
54
  def test_word_middle
50
55
  store_names ["Two Door Cinema Club"]
51
- assert_equal "Two Door <em>Cinema</em> Club", Product.search("ine", fields: [:name], match: :word_middle, highlight: true).first.search_highlights[:name]
56
+ assert_equal "Two Door <em>Cinema</em> Club", Product.search("ine", match: :word_middle, highlight: true).highlights.first[:name]
52
57
  end
53
58
 
54
59
  def test_body
@@ -68,11 +73,22 @@ class HighlightTest < Minitest::Test
68
73
  }
69
74
  }
70
75
  }
71
- assert_equal "Two Door <strong>Cinema</strong> Club", Product.search(body: body).first.search_highlights[:"name.analyzed"]
76
+ assert_equal "Two Door <strong>Cinema</strong> Club", Product.search(body: body).highlights.first[:"name.analyzed"]
77
+ end
78
+
79
+ def test_multiple_highlights
80
+ store_names ["Two Door Cinema Club Some Other Words And Much More Doors Cinema Club"]
81
+ highlights = Product.search("cinema", highlight: {fragment_size: 20}).highlights(multiple: true).first[:name]
82
+ assert highlights.is_a?(Array)
83
+ assert_equal highlights.count, 2
84
+ refute_equal highlights.first, highlights.last
85
+ highlights.each do |highlight|
86
+ assert highlight.include?("<em>Cinema</em>")
87
+ end
72
88
  end
73
89
 
74
- def test_legacy
90
+ def test_search_highlights_method
75
91
  store_names ["Two Door Cinema Club"]
76
- assert_equal "Two Door <em>Cinema</em> Club", Product.search("cinema", fields: [:name], highlight: true).with_details.first[1][:highlight][:name]
92
+ assert_equal "Two Door <em>Cinema</em> Club", Product.search("cinema", highlight: true).first.search_highlights[:name]
77
93
  end
78
94
  end