searchkick 2.3.2 → 4.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +251 -84
  3. data/LICENSE.txt +1 -1
  4. data/README.md +552 -432
  5. data/lib/searchkick/bulk_indexer.rb +173 -0
  6. data/lib/searchkick/bulk_reindex_job.rb +2 -2
  7. data/lib/searchkick/hash_wrapper.rb +12 -0
  8. data/lib/searchkick/index.rb +187 -348
  9. data/lib/searchkick/index_options.rb +494 -282
  10. data/lib/searchkick/logging.rb +17 -13
  11. data/lib/searchkick/model.rb +52 -97
  12. data/lib/searchkick/multi_search.rb +9 -10
  13. data/lib/searchkick/process_batch_job.rb +17 -4
  14. data/lib/searchkick/process_queue_job.rb +20 -12
  15. data/lib/searchkick/query.rb +415 -199
  16. data/lib/searchkick/railtie.rb +7 -0
  17. data/lib/searchkick/record_data.rb +128 -0
  18. data/lib/searchkick/record_indexer.rb +79 -0
  19. data/lib/searchkick/reindex_queue.rb +1 -1
  20. data/lib/searchkick/reindex_v2_job.rb +14 -12
  21. data/lib/searchkick/results.rb +135 -41
  22. data/lib/searchkick/version.rb +1 -1
  23. data/lib/searchkick.rb +130 -61
  24. data/lib/tasks/searchkick.rake +34 -0
  25. metadata +18 -162
  26. data/.gitignore +0 -22
  27. data/.travis.yml +0 -39
  28. data/Gemfile +0 -16
  29. data/Rakefile +0 -20
  30. data/benchmark/Gemfile +0 -23
  31. data/benchmark/benchmark.rb +0 -97
  32. data/lib/searchkick/tasks.rb +0 -33
  33. data/searchkick.gemspec +0 -28
  34. data/test/aggs_test.rb +0 -197
  35. data/test/autocomplete_test.rb +0 -75
  36. data/test/boost_test.rb +0 -202
  37. data/test/callbacks_test.rb +0 -59
  38. data/test/ci/before_install.sh +0 -17
  39. data/test/errors_test.rb +0 -19
  40. data/test/gemfiles/activerecord31.gemfile +0 -7
  41. data/test/gemfiles/activerecord32.gemfile +0 -7
  42. data/test/gemfiles/activerecord40.gemfile +0 -8
  43. data/test/gemfiles/activerecord41.gemfile +0 -8
  44. data/test/gemfiles/activerecord42.gemfile +0 -7
  45. data/test/gemfiles/activerecord50.gemfile +0 -7
  46. data/test/gemfiles/apartment.gemfile +0 -8
  47. data/test/gemfiles/cequel.gemfile +0 -8
  48. data/test/gemfiles/mongoid2.gemfile +0 -7
  49. data/test/gemfiles/mongoid3.gemfile +0 -6
  50. data/test/gemfiles/mongoid4.gemfile +0 -7
  51. data/test/gemfiles/mongoid5.gemfile +0 -7
  52. data/test/gemfiles/mongoid6.gemfile +0 -12
  53. data/test/gemfiles/nobrainer.gemfile +0 -8
  54. data/test/gemfiles/parallel_tests.gemfile +0 -8
  55. data/test/geo_shape_test.rb +0 -175
  56. data/test/highlight_test.rb +0 -78
  57. data/test/index_test.rb +0 -166
  58. data/test/inheritance_test.rb +0 -83
  59. data/test/marshal_test.rb +0 -8
  60. data/test/match_test.rb +0 -276
  61. data/test/misspellings_test.rb +0 -56
  62. data/test/model_test.rb +0 -42
  63. data/test/multi_search_test.rb +0 -36
  64. data/test/multi_tenancy_test.rb +0 -22
  65. data/test/order_test.rb +0 -46
  66. data/test/pagination_test.rb +0 -70
  67. data/test/partial_reindex_test.rb +0 -58
  68. data/test/query_test.rb +0 -35
  69. data/test/records_test.rb +0 -10
  70. data/test/reindex_test.rb +0 -64
  71. data/test/reindex_v2_job_test.rb +0 -32
  72. data/test/routing_test.rb +0 -23
  73. data/test/should_index_test.rb +0 -32
  74. data/test/similar_test.rb +0 -28
  75. data/test/sql_test.rb +0 -214
  76. data/test/suggest_test.rb +0 -95
  77. data/test/support/kaminari.yml +0 -21
  78. data/test/synonyms_test.rb +0 -67
  79. data/test/test_helper.rb +0 -567
  80. data/test/where_test.rb +0 -223
@@ -0,0 +1,7 @@
1
+ module Searckick
2
+ class Railtie < Rails::Railtie
3
+ rake_tasks do
4
+ load "tasks/searchkick.rake"
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,128 @@
1
+ module Searchkick
2
+ class RecordData
3
+ TYPE_KEYS = ["type", :type]
4
+
5
+ attr_reader :index, :record
6
+
7
+ def initialize(index, record)
8
+ @index = index
9
+ @record = record
10
+ end
11
+
12
+ def index_data
13
+ data = record_data
14
+ data[:data] = search_data
15
+ {index: data}
16
+ end
17
+
18
+ def update_data(method_name)
19
+ data = record_data
20
+ data[:data] = {doc: search_data(method_name)}
21
+ {update: data}
22
+ end
23
+
24
+ def delete_data
25
+ {delete: record_data}
26
+ end
27
+
28
+ def search_id
29
+ id = record.respond_to?(:search_document_id) ? record.search_document_id : record.id
30
+ id.is_a?(Numeric) ? id : id.to_s
31
+ end
32
+
33
+ def document_type(ignore_type = false)
34
+ index.klass_document_type(record.class, ignore_type)
35
+ end
36
+
37
+ def record_data
38
+ data = {
39
+ _index: index.name,
40
+ _id: search_id
41
+ }
42
+ data[:_type] = document_type if Searchkick.server_below7?
43
+ data[:routing] = record.search_routing if record.respond_to?(:search_routing)
44
+ data
45
+ end
46
+
47
+ private
48
+
49
+ def search_data(method_name = nil)
50
+ partial_reindex = !method_name.nil?
51
+
52
+ source = record.send(method_name || :search_data)
53
+
54
+ # conversions
55
+ index.conversions_fields.each do |conversions_field|
56
+ if source[conversions_field]
57
+ source[conversions_field] = source[conversions_field].map { |k, v| {query: k, count: v} }
58
+ end
59
+ end
60
+
61
+ # hack to prevent generator field doesn't exist error
62
+ if !partial_reindex
63
+ index.suggest_fields.each do |field|
64
+ if !source.key?(field) && !source.key?(field.to_sym)
65
+ source[field] = nil
66
+ end
67
+ end
68
+ end
69
+
70
+ # locations
71
+ index.locations_fields.each do |field|
72
+ if source[field]
73
+ if !source[field].is_a?(Hash) && (source[field].first.is_a?(Array) || source[field].first.is_a?(Hash))
74
+ # multiple locations
75
+ source[field] = source[field].map { |a| location_value(a) }
76
+ else
77
+ source[field] = location_value(source[field])
78
+ end
79
+ end
80
+ end
81
+
82
+ if index.options[:inheritance]
83
+ if !TYPE_KEYS.any? { |tk| source.key?(tk) }
84
+ source[:type] = document_type(true)
85
+ end
86
+ end
87
+
88
+ cast_big_decimal(source)
89
+
90
+ source
91
+ end
92
+
93
+ def location_value(value)
94
+ if value.is_a?(Array)
95
+ value.map(&:to_f).reverse
96
+ elsif value.is_a?(Hash)
97
+ {lat: value[:lat].to_f, lon: value[:lon].to_f}
98
+ else
99
+ value
100
+ end
101
+ end
102
+
103
+ # change all BigDecimal values to floats due to
104
+ # https://github.com/rails/rails/issues/6033
105
+ # possible loss of precision :/
106
+ def cast_big_decimal(obj)
107
+ case obj
108
+ when BigDecimal
109
+ obj.to_f
110
+ when Hash
111
+ obj.each do |k, v|
112
+ # performance
113
+ if v.is_a?(BigDecimal)
114
+ obj[k] = v.to_f
115
+ elsif v.is_a?(Enumerable)
116
+ obj[k] = cast_big_decimal(v)
117
+ end
118
+ end
119
+ when Enumerable
120
+ obj.map do |v|
121
+ cast_big_decimal(v)
122
+ end
123
+ else
124
+ obj
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,79 @@
1
+ module Searchkick
2
+ class RecordIndexer
3
+ attr_reader :record, :index
4
+
5
+ def initialize(record)
6
+ @record = record
7
+ @index = record.class.searchkick_index
8
+ end
9
+
10
+ def reindex(method_name = nil, refresh: false, mode: nil)
11
+ unless [:inline, true, nil, :async, :queue].include?(mode)
12
+ raise ArgumentError, "Invalid value for mode"
13
+ end
14
+
15
+ mode ||= Searchkick.callbacks_value || index.options[:callbacks] || true
16
+
17
+ case mode
18
+ when :queue
19
+ if method_name
20
+ raise Searchkick::Error, "Partial reindex not supported with queue option"
21
+ end
22
+
23
+ # always pass routing in case record is deleted
24
+ # before the queue job runs
25
+ if record.respond_to?(:search_routing)
26
+ routing = record.search_routing
27
+ end
28
+
29
+ # escape pipe with double pipe
30
+ value = queue_escape(record.id.to_s)
31
+ value = "#{value}|#{queue_escape(routing)}" if routing
32
+ index.reindex_queue.push(value)
33
+ when :async
34
+ unless defined?(ActiveJob)
35
+ raise Searchkick::Error, "Active Job not found"
36
+ end
37
+
38
+ # always pass routing in case record is deleted
39
+ # before the async job runs
40
+ if record.respond_to?(:search_routing)
41
+ routing = record.search_routing
42
+ end
43
+
44
+ Searchkick::ReindexV2Job.perform_later(
45
+ record.class.name,
46
+ record.id.to_s,
47
+ method_name ? method_name.to_s : nil,
48
+ routing: routing
49
+ )
50
+ else # bulk, inline/true/nil
51
+ reindex_record(method_name)
52
+
53
+ index.refresh if refresh
54
+ end
55
+ end
56
+
57
+ private
58
+
59
+ def queue_escape(value)
60
+ value.gsub("|", "||")
61
+ end
62
+
63
+ def reindex_record(method_name)
64
+ if record.destroyed? || !record.persisted? || !record.should_index?
65
+ begin
66
+ index.remove(record)
67
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
68
+ # do nothing
69
+ end
70
+ else
71
+ if method_name
72
+ index.update_record(record, method_name)
73
+ else
74
+ index.store(record)
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -15,7 +15,7 @@ module Searchkick
15
15
  # TODO use reliable queuing
16
16
  def reserve(limit: 1000)
17
17
  record_ids = Set.new
18
- while record_ids.size < limit && record_id = Searchkick.with_redis { |r| r.rpop(redis_key) }
18
+ while record_ids.size < limit && (record_id = Searchkick.with_redis { |r| r.rpop(redis_key) })
19
19
  record_ids << record_id
20
20
  end
21
21
  record_ids.to_a
@@ -9,11 +9,15 @@ module Searchkick
9
9
 
10
10
  queue_as { Searchkick.queue_name }
11
11
 
12
- def perform(klass, id)
12
+ def perform(klass, id, method_name = nil, routing: nil)
13
13
  model = klass.constantize
14
14
  record =
15
15
  begin
16
- model.find(id)
16
+ if model.respond_to?(:unscoped)
17
+ model.unscoped.find(id)
18
+ else
19
+ model.find(id)
20
+ end
17
21
  rescue => e
18
22
  # check by name rather than rescue directly so we don't need
19
23
  # to determine which classes are defined
@@ -21,19 +25,17 @@ module Searchkick
21
25
  nil
22
26
  end
23
27
 
24
- index = model.searchkick_index
25
- if !record || !record.should_index?
26
- # hacky
27
- record ||= model.new
28
+ unless record
29
+ record = model.new
28
30
  record.id = id
29
- begin
30
- index.remove record
31
- rescue Elasticsearch::Transport::Transport::Errors::NotFound
32
- # do nothing
31
+ if routing
32
+ record.define_singleton_method(:search_routing) do
33
+ routing
34
+ end
33
35
  end
34
- else
35
- index.store record
36
36
  end
37
+
38
+ RecordIndexer.new(record).reindex(method_name, mode: :inline)
37
39
  end
38
40
  end
39
41
  end
@@ -15,40 +15,58 @@ module Searchkick
15
15
  @options = options
16
16
  end
17
17
 
18
- # experimental: may not make next release
19
- def records
20
- @records ||= results_query(klass, hits)
18
+ def results
19
+ @results ||= with_hit.map(&:first)
21
20
  end
22
21
 
23
- def results
24
- @results ||= begin
22
+ # TODO return enumerator like with_score
23
+ def with_hit
24
+ @with_hit ||= begin
25
25
  if options[:load]
26
26
  # results can have different types
27
27
  results = {}
28
28
 
29
- hits.group_by { |hit, _| hit["_type"] }.each do |type, grouped_hits|
30
- results[type] = results_query(type.camelize.constantize, grouped_hits).to_a.index_by { |r| r.id.to_s }
29
+ hits.group_by { |hit, _| hit["_index"] }.each do |index, grouped_hits|
30
+ klasses =
31
+ if @klass
32
+ [@klass]
33
+ else
34
+ index_alias = index.split("_")[0..-2].join("_")
35
+ Array((options[:index_mapping] || {})[index_alias])
36
+ end
37
+ raise Searchkick::Error, "Unknown model for index: #{index}" unless klasses.any?
38
+
39
+ results[index] = {}
40
+ klasses.each do |klass|
41
+ results[index].merge!(results_query(klass, grouped_hits).to_a.index_by { |r| r.id.to_s })
42
+ end
31
43
  end
32
44
 
33
- # sort
34
- hits.map do |hit|
35
- result = results[hit["_type"]][hit["_id"].to_s]
36
- if result && !(options[:load].is_a?(Hash) && options[:load][:dumpable])
37
- unless result.respond_to?(:search_hit)
38
- result.define_singleton_method(:search_hit) do
39
- hit
40
- end
41
- end
45
+ missing_ids = []
42
46
 
43
- if hit["highlight"] && !result.respond_to?(:search_highlights)
44
- highlights = Hash[hit["highlight"].map { |k, v| [(options[:json] ? k : k.sub(/\.#{@options[:match_suffix]}\z/, "")).to_sym, v.first] }]
45
- result.define_singleton_method(:search_highlights) do
46
- highlights
47
+ # sort
48
+ results =
49
+ hits.map do |hit|
50
+ result = results[hit["_index"]][hit["_id"].to_s]
51
+ if result && !(options[:load].is_a?(Hash) && options[:load][:dumpable])
52
+ if (hit["highlight"] || options[:highlight]) && !result.respond_to?(:search_highlights)
53
+ highlights = hit_highlights(hit)
54
+ result.define_singleton_method(:search_highlights) do
55
+ highlights
56
+ end
47
57
  end
48
58
  end
59
+ [result, hit]
60
+ end.select do |result, hit|
61
+ missing_ids << hit["_id"] unless result
62
+ result
49
63
  end
50
- result
51
- end.compact
64
+
65
+ if missing_ids.any?
66
+ Searchkick.warn("Records in search index do not exist in database: #{missing_ids.join(", ")}")
67
+ end
68
+
69
+ results
52
70
  else
53
71
  hits.map do |hit|
54
72
  result =
@@ -60,15 +78,15 @@ module Searchkick
60
78
  hit
61
79
  end
62
80
 
63
- if hit["highlight"]
64
- highlight = Hash[hit["highlight"].map { |k, v| [base_field(k), v.first] }]
81
+ if hit["highlight"] || options[:highlight]
82
+ highlight = Hash[hit["highlight"].to_a.map { |k, v| [base_field(k), v.first] }]
65
83
  options[:highlighted_fields].map { |k| base_field(k) }.each do |k|
66
84
  result["highlighted_#{k}"] ||= (highlight[k] || result[k])
67
85
  end
68
86
  end
69
87
 
70
88
  result["id"] ||= result["_id"] # needed for legacy reasons
71
- Hashie::Mash.new(result)
89
+ [HashWrapper.new(result), hit]
72
90
  end
73
91
  end
74
92
  end
@@ -77,25 +95,13 @@ module Searchkick
77
95
  def suggestions
78
96
  if response["suggest"]
79
97
  response["suggest"].values.flat_map { |v| v.first["options"] }.sort_by { |o| -o["score"] }.map { |o| o["text"] }.uniq
98
+ elsif options[:suggest] || options[:term] == "*" # TODO remove 2nd term
99
+ []
80
100
  else
81
101
  raise "Pass `suggest: true` to the search method for suggestions"
82
102
  end
83
103
  end
84
104
 
85
- def each_with_hit(&block)
86
- results.zip(hits).each(&block)
87
- end
88
-
89
- def with_details
90
- each_with_hit.map do |model, hit|
91
- details = {}
92
- if hit["highlight"]
93
- details[:highlight] = Hash[hit["highlight"].map { |k, v| [(options[:json] ? k : k.sub(/\.#{@options[:match_suffix]}\z/, "")).to_sym, v.first] }]
94
- end
95
- [model, details]
96
- end
97
- end
98
-
99
105
  def aggregations
100
106
  response["aggregations"]
101
107
  end
@@ -138,7 +144,13 @@ module Searchkick
138
144
  end
139
145
 
140
146
  def total_count
141
- response["hits"]["total"]
147
+ if options[:total_entries]
148
+ options[:total_entries]
149
+ elsif response["hits"]["total"].is_a?(Hash)
150
+ response["hits"]["total"]["value"]
151
+ else
152
+ response["hits"]["total"]
153
+ end
142
154
  end
143
155
  alias_method :total_entries, :total_count
144
156
 
@@ -187,13 +199,83 @@ module Searchkick
187
199
  end
188
200
 
189
201
  def hits
190
- @response["hits"]["hits"]
202
+ if error
203
+ raise Searchkick::Error, "Query error - use the error method to view it"
204
+ else
205
+ @response["hits"]["hits"]
206
+ end
207
+ end
208
+
209
+ def highlights(multiple: false)
210
+ hits.map do |hit|
211
+ hit_highlights(hit, multiple: multiple)
212
+ end
213
+ end
214
+
215
+ # TODO return enumerator like with_score
216
+ def with_highlights(multiple: false)
217
+ with_hit.map do |result, hit|
218
+ [result, hit_highlights(hit, multiple: multiple)]
219
+ end
220
+ end
221
+
222
+ def with_score
223
+ return enum_for(:with_score) unless block_given?
224
+
225
+ with_hit.each do |result, hit|
226
+ yield result, hit["_score"]
227
+ end
191
228
  end
192
229
 
193
230
  def misspellings?
194
231
  @options[:misspellings]
195
232
  end
196
233
 
234
+ def scroll_id
235
+ @response["_scroll_id"]
236
+ end
237
+
238
+ def scroll
239
+ raise Searchkick::Error, "Pass `scroll` option to the search method for scrolling" unless scroll_id
240
+
241
+ if block_given?
242
+ records = self
243
+ while records.any?
244
+ yield records
245
+ records = records.scroll
246
+ end
247
+
248
+ records.clear_scroll
249
+ else
250
+ params = {
251
+ scroll: options[:scroll],
252
+ scroll_id: scroll_id
253
+ }
254
+
255
+ begin
256
+ # TODO Active Support notifications for this scroll call
257
+ Searchkick::Results.new(@klass, Searchkick.client.scroll(params), @options)
258
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound => e
259
+ if e.class.to_s =~ /NotFound/ && e.message =~ /search_context_missing_exception/i
260
+ raise Searchkick::Error, "Scroll id has expired"
261
+ else
262
+ raise e
263
+ end
264
+ end
265
+ end
266
+ end
267
+
268
+ def clear_scroll
269
+ begin
270
+ # try to clear scroll
271
+ # not required as scroll will expire
272
+ # but there is a cost to open scrolls
273
+ Searchkick.client.clear_scroll(scroll_id: scroll_id)
274
+ rescue Elasticsearch::Transport::Transport::Error
275
+ # do nothing
276
+ end
277
+ end
278
+
197
279
  private
198
280
 
199
281
  def results_query(records, hits)
@@ -215,6 +297,10 @@ module Searchkick
215
297
  end
216
298
  end
217
299
 
300
+ if options[:scope_results]
301
+ records = options[:scope_results].call(records)
302
+ end
303
+
218
304
  Searchkick.load_records(records, ids)
219
305
  end
220
306
 
@@ -231,5 +317,13 @@ module Searchkick
231
317
  def base_field(k)
232
318
  k.sub(/\.(analyzed|word_start|word_middle|word_end|text_start|text_middle|text_end|exact)\z/, "")
233
319
  end
320
+
321
+ def hit_highlights(hit, multiple: false)
322
+ if hit["highlight"]
323
+ Hash[hit["highlight"].map { |k, v| [(options[:json] ? k : k.sub(/\.#{@options[:match_suffix]}\z/, "")).to_sym, multiple ? v : v.first] }]
324
+ else
325
+ {}
326
+ end
327
+ end
234
328
  end
235
329
  end
@@ -1,3 +1,3 @@
1
1
  module Searchkick
2
- VERSION = "2.3.2"
2
+ VERSION = "4.4.1"
3
3
  end