searchkick 2.3.2 → 4.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +251 -84
  3. data/LICENSE.txt +1 -1
  4. data/README.md +552 -432
  5. data/lib/searchkick/bulk_indexer.rb +173 -0
  6. data/lib/searchkick/bulk_reindex_job.rb +2 -2
  7. data/lib/searchkick/hash_wrapper.rb +12 -0
  8. data/lib/searchkick/index.rb +187 -348
  9. data/lib/searchkick/index_options.rb +494 -282
  10. data/lib/searchkick/logging.rb +17 -13
  11. data/lib/searchkick/model.rb +52 -97
  12. data/lib/searchkick/multi_search.rb +9 -10
  13. data/lib/searchkick/process_batch_job.rb +17 -4
  14. data/lib/searchkick/process_queue_job.rb +20 -12
  15. data/lib/searchkick/query.rb +415 -199
  16. data/lib/searchkick/railtie.rb +7 -0
  17. data/lib/searchkick/record_data.rb +128 -0
  18. data/lib/searchkick/record_indexer.rb +79 -0
  19. data/lib/searchkick/reindex_queue.rb +1 -1
  20. data/lib/searchkick/reindex_v2_job.rb +14 -12
  21. data/lib/searchkick/results.rb +135 -41
  22. data/lib/searchkick/version.rb +1 -1
  23. data/lib/searchkick.rb +130 -61
  24. data/lib/tasks/searchkick.rake +34 -0
  25. metadata +18 -162
  26. data/.gitignore +0 -22
  27. data/.travis.yml +0 -39
  28. data/Gemfile +0 -16
  29. data/Rakefile +0 -20
  30. data/benchmark/Gemfile +0 -23
  31. data/benchmark/benchmark.rb +0 -97
  32. data/lib/searchkick/tasks.rb +0 -33
  33. data/searchkick.gemspec +0 -28
  34. data/test/aggs_test.rb +0 -197
  35. data/test/autocomplete_test.rb +0 -75
  36. data/test/boost_test.rb +0 -202
  37. data/test/callbacks_test.rb +0 -59
  38. data/test/ci/before_install.sh +0 -17
  39. data/test/errors_test.rb +0 -19
  40. data/test/gemfiles/activerecord31.gemfile +0 -7
  41. data/test/gemfiles/activerecord32.gemfile +0 -7
  42. data/test/gemfiles/activerecord40.gemfile +0 -8
  43. data/test/gemfiles/activerecord41.gemfile +0 -8
  44. data/test/gemfiles/activerecord42.gemfile +0 -7
  45. data/test/gemfiles/activerecord50.gemfile +0 -7
  46. data/test/gemfiles/apartment.gemfile +0 -8
  47. data/test/gemfiles/cequel.gemfile +0 -8
  48. data/test/gemfiles/mongoid2.gemfile +0 -7
  49. data/test/gemfiles/mongoid3.gemfile +0 -6
  50. data/test/gemfiles/mongoid4.gemfile +0 -7
  51. data/test/gemfiles/mongoid5.gemfile +0 -7
  52. data/test/gemfiles/mongoid6.gemfile +0 -12
  53. data/test/gemfiles/nobrainer.gemfile +0 -8
  54. data/test/gemfiles/parallel_tests.gemfile +0 -8
  55. data/test/geo_shape_test.rb +0 -175
  56. data/test/highlight_test.rb +0 -78
  57. data/test/index_test.rb +0 -166
  58. data/test/inheritance_test.rb +0 -83
  59. data/test/marshal_test.rb +0 -8
  60. data/test/match_test.rb +0 -276
  61. data/test/misspellings_test.rb +0 -56
  62. data/test/model_test.rb +0 -42
  63. data/test/multi_search_test.rb +0 -36
  64. data/test/multi_tenancy_test.rb +0 -22
  65. data/test/order_test.rb +0 -46
  66. data/test/pagination_test.rb +0 -70
  67. data/test/partial_reindex_test.rb +0 -58
  68. data/test/query_test.rb +0 -35
  69. data/test/records_test.rb +0 -10
  70. data/test/reindex_test.rb +0 -64
  71. data/test/reindex_v2_job_test.rb +0 -32
  72. data/test/routing_test.rb +0 -23
  73. data/test/should_index_test.rb +0 -32
  74. data/test/similar_test.rb +0 -28
  75. data/test/sql_test.rb +0 -214
  76. data/test/suggest_test.rb +0 -95
  77. data/test/support/kaminari.yml +0 -21
  78. data/test/synonyms_test.rb +0 -67
  79. data/test/test_helper.rb +0 -567
  80. data/test/where_test.rb +0 -223
@@ -0,0 +1,173 @@
1
+ module Searchkick
2
+ class BulkIndexer
3
+ attr_reader :index
4
+
5
+ def initialize(index)
6
+ @index = index
7
+ end
8
+
9
+ def import_scope(relation, resume: false, method_name: nil, async: false, batch: false, batch_id: nil, full: false, scope: nil)
10
+ if scope
11
+ relation = relation.send(scope)
12
+ elsif relation.respond_to?(:search_import)
13
+ relation = relation.search_import
14
+ end
15
+
16
+ if batch
17
+ import_or_update relation.to_a, method_name, async
18
+ Searchkick.with_redis { |r| r.srem(batches_key, batch_id) } if batch_id
19
+ elsif full && async
20
+ full_reindex_async(relation)
21
+ elsif relation.respond_to?(:find_in_batches)
22
+ if resume
23
+ # use total docs instead of max id since there's not a great way
24
+ # to get the max _id without scripting since it's a string
25
+
26
+ # TODO use primary key and prefix with table name
27
+ relation = relation.where("id > ?", index.total_docs)
28
+ end
29
+
30
+ relation = relation.select("id").except(:includes, :preload) if async
31
+
32
+ relation.find_in_batches batch_size: batch_size do |items|
33
+ import_or_update items, method_name, async
34
+ end
35
+ else
36
+ each_batch(relation) do |items|
37
+ import_or_update items, method_name, async
38
+ end
39
+ end
40
+ end
41
+
42
+ def bulk_index(records)
43
+ Searchkick.indexer.queue(records.map { |r| RecordData.new(index, r).index_data })
44
+ end
45
+
46
+ def bulk_delete(records)
47
+ Searchkick.indexer.queue(records.reject { |r| r.id.blank? }.map { |r| RecordData.new(index, r).delete_data })
48
+ end
49
+
50
+ def bulk_update(records, method_name)
51
+ Searchkick.indexer.queue(records.map { |r| RecordData.new(index, r).update_data(method_name) })
52
+ end
53
+
54
+ def batches_left
55
+ Searchkick.with_redis { |r| r.scard(batches_key) }
56
+ end
57
+
58
+ private
59
+
60
+ def import_or_update(records, method_name, async)
61
+ if records.any?
62
+ if async
63
+ Searchkick::BulkReindexJob.perform_later(
64
+ class_name: records.first.class.searchkick_options[:class_name],
65
+ record_ids: records.map(&:id),
66
+ index_name: index.name,
67
+ method_name: method_name ? method_name.to_s : nil
68
+ )
69
+ else
70
+ records = records.select(&:should_index?)
71
+ if records.any?
72
+ with_retries do
73
+ # call out to index for ActiveSupport notifications
74
+ if method_name
75
+ index.bulk_update(records, method_name)
76
+ else
77
+ index.bulk_index(records)
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ def full_reindex_async(scope)
86
+ if scope.respond_to?(:primary_key)
87
+ # TODO expire Redis key
88
+ primary_key = scope.primary_key
89
+
90
+ scope = scope.select(primary_key).except(:includes, :preload)
91
+
92
+ starting_id =
93
+ begin
94
+ scope.minimum(primary_key)
95
+ rescue ActiveRecord::StatementInvalid
96
+ false
97
+ end
98
+
99
+ if starting_id.nil?
100
+ # no records, do nothing
101
+ elsif starting_id.is_a?(Numeric)
102
+ max_id = scope.maximum(primary_key)
103
+ batches_count = ((max_id - starting_id + 1) / batch_size.to_f).ceil
104
+
105
+ batches_count.times do |i|
106
+ batch_id = i + 1
107
+ min_id = starting_id + (i * batch_size)
108
+ bulk_reindex_job scope, batch_id, min_id: min_id, max_id: min_id + batch_size - 1
109
+ end
110
+ else
111
+ scope.find_in_batches(batch_size: batch_size).each_with_index do |batch, i|
112
+ batch_id = i + 1
113
+
114
+ bulk_reindex_job scope, batch_id, record_ids: batch.map { |record| record.id.to_s }
115
+ end
116
+ end
117
+ else
118
+ batch_id = 1
119
+ # TODO remove any eager loading
120
+ scope = scope.only(:_id) if scope.respond_to?(:only)
121
+ each_batch(scope) do |items|
122
+ bulk_reindex_job scope, batch_id, record_ids: items.map { |i| i.id.to_s }
123
+ batch_id += 1
124
+ end
125
+ end
126
+ end
127
+
128
+ def each_batch(scope)
129
+ # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
130
+ # use cursor for Mongoid
131
+ items = []
132
+ scope.all.each do |item|
133
+ items << item
134
+ if items.length == batch_size
135
+ yield items
136
+ items = []
137
+ end
138
+ end
139
+ yield items if items.any?
140
+ end
141
+
142
+ def bulk_reindex_job(scope, batch_id, options)
143
+ Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
144
+ Searchkick::BulkReindexJob.perform_later(**{
145
+ class_name: scope.searchkick_options[:class_name],
146
+ index_name: index.name,
147
+ batch_id: batch_id
148
+ }.merge(options))
149
+ end
150
+
151
+ def with_retries
152
+ retries = 0
153
+
154
+ begin
155
+ yield
156
+ rescue Faraday::ClientError => e
157
+ if retries < 1
158
+ retries += 1
159
+ retry
160
+ end
161
+ raise e
162
+ end
163
+ end
164
+
165
+ def batches_key
166
+ "searchkick:reindex:#{index.name}:batches"
167
+ end
168
+
169
+ def batch_size
170
+ @batch_size ||= index.options[:batch_size] || 1000
171
+ end
172
+ end
173
+ end
@@ -1,10 +1,10 @@
1
1
  module Searchkick
2
2
  class BulkReindexJob < ActiveJob::Base
3
- queue_as :searchkick
3
+ queue_as { Searchkick.queue_name }
4
4
 
5
5
  def perform(class_name:, record_ids: nil, index_name: nil, method_name: nil, batch_id: nil, min_id: nil, max_id: nil)
6
6
  klass = class_name.constantize
7
- index = index_name ? Searchkick::Index.new(index_name) : klass.searchkick_index
7
+ index = index_name ? Searchkick::Index.new(index_name, **klass.searchkick_options) : klass.searchkick_index
8
8
  record_ids ||= min_id..max_id
9
9
  index.import_scope(
10
10
  Searchkick.load_records(klass, record_ids),
@@ -0,0 +1,12 @@
1
+ module Searchkick
2
+ # Subclass of `Hashie::Mash` to wrap Hash-like structures
3
+ # (responses from Elasticsearch)
4
+ #
5
+ # The primary goal of the subclass is to disable the
6
+ # warning being printed by Hashie for re-defined
7
+ # methods, such as `sort`.
8
+ #
9
+ class HashWrapper < ::Hashie::Mash
10
+ disable_warnings if respond_to?(:disable_warnings)
11
+ end
12
+ end