chewy 6.0.0 → 7.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. checksums.yaml +4 -4
  2. data/.github/CODEOWNERS +1 -0
  3. data/.github/ISSUE_TEMPLATE/bug_report.md +39 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  5. data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
  6. data/.github/dependabot.yml +42 -0
  7. data/.github/workflows/ruby.yml +60 -0
  8. data/.rubocop.yml +16 -8
  9. data/.rubocop_todo.yml +110 -22
  10. data/CHANGELOG.md +396 -105
  11. data/CODE_OF_CONDUCT.md +14 -0
  12. data/CONTRIBUTING.md +63 -0
  13. data/Gemfile +4 -10
  14. data/Guardfile +3 -1
  15. data/README.md +497 -275
  16. data/chewy.gemspec +5 -20
  17. data/gemfiles/base.gemfile +12 -0
  18. data/gemfiles/rails.6.1.activerecord.gemfile +10 -15
  19. data/gemfiles/rails.7.0.activerecord.gemfile +14 -0
  20. data/gemfiles/rails.7.1.activerecord.gemfile +14 -0
  21. data/lib/chewy/config.rb +60 -52
  22. data/lib/chewy/elastic_client.rb +31 -0
  23. data/lib/chewy/errors.rb +7 -10
  24. data/lib/chewy/fields/base.rb +79 -13
  25. data/lib/chewy/fields/root.rb +4 -14
  26. data/lib/chewy/index/actions.rb +54 -37
  27. data/lib/chewy/{type → index}/adapter/active_record.rb +30 -6
  28. data/lib/chewy/{type → index}/adapter/base.rb +2 -3
  29. data/lib/chewy/{type → index}/adapter/object.rb +27 -31
  30. data/lib/chewy/{type → index}/adapter/orm.rb +17 -18
  31. data/lib/chewy/index/aliases.rb +14 -5
  32. data/lib/chewy/index/crutch.rb +40 -0
  33. data/lib/chewy/index/import/bulk_builder.rb +311 -0
  34. data/lib/chewy/{type → index}/import/bulk_request.rb +6 -7
  35. data/lib/chewy/{type → index}/import/journal_builder.rb +11 -12
  36. data/lib/chewy/{type → index}/import/routine.rb +18 -17
  37. data/lib/chewy/{type → index}/import.rb +76 -32
  38. data/lib/chewy/{type → index}/mapping.rb +29 -34
  39. data/lib/chewy/index/observe/active_record_methods.rb +87 -0
  40. data/lib/chewy/index/observe/callback.rb +34 -0
  41. data/lib/chewy/index/observe.rb +17 -0
  42. data/lib/chewy/index/specification.rb +1 -0
  43. data/lib/chewy/{type → index}/syncer.rb +59 -59
  44. data/lib/chewy/{type → index}/witchcraft.rb +11 -7
  45. data/lib/chewy/{type → index}/wrapper.rb +2 -2
  46. data/lib/chewy/index.rb +67 -94
  47. data/lib/chewy/journal.rb +25 -14
  48. data/lib/chewy/log_subscriber.rb +5 -1
  49. data/lib/chewy/minitest/helpers.rb +86 -13
  50. data/lib/chewy/minitest/search_index_receiver.rb +24 -26
  51. data/lib/chewy/railtie.rb +6 -20
  52. data/lib/chewy/rake_helper.rb +169 -113
  53. data/lib/chewy/rspec/build_query.rb +12 -0
  54. data/lib/chewy/rspec/helpers.rb +55 -0
  55. data/lib/chewy/rspec/update_index.rb +55 -44
  56. data/lib/chewy/rspec.rb +2 -0
  57. data/lib/chewy/runtime/version.rb +1 -1
  58. data/lib/chewy/runtime.rb +1 -1
  59. data/lib/chewy/search/loader.rb +19 -41
  60. data/lib/chewy/search/parameters/collapse.rb +16 -0
  61. data/lib/chewy/search/parameters/concerns/query_storage.rb +2 -2
  62. data/lib/chewy/search/parameters/ignore_unavailable.rb +27 -0
  63. data/lib/chewy/search/parameters/indices.rb +13 -58
  64. data/lib/chewy/search/parameters/knn.rb +16 -0
  65. data/lib/chewy/search/parameters/order.rb +6 -19
  66. data/lib/chewy/search/parameters/source.rb +5 -1
  67. data/lib/chewy/search/parameters/storage.rb +1 -1
  68. data/lib/chewy/search/parameters/track_total_hits.rb +16 -0
  69. data/lib/chewy/search/parameters.rb +6 -4
  70. data/lib/chewy/search/query_proxy.rb +9 -2
  71. data/lib/chewy/search/request.rb +169 -134
  72. data/lib/chewy/search/response.rb +5 -5
  73. data/lib/chewy/search/scoping.rb +7 -8
  74. data/lib/chewy/search/scrolling.rb +13 -13
  75. data/lib/chewy/search.rb +9 -19
  76. data/lib/chewy/stash.rb +19 -30
  77. data/lib/chewy/strategy/active_job.rb +1 -1
  78. data/lib/chewy/strategy/atomic_no_refresh.rb +18 -0
  79. data/lib/chewy/strategy/base.rb +10 -0
  80. data/lib/chewy/strategy/delayed_sidekiq/scheduler.rb +168 -0
  81. data/lib/chewy/strategy/delayed_sidekiq/worker.rb +76 -0
  82. data/lib/chewy/strategy/delayed_sidekiq.rb +30 -0
  83. data/lib/chewy/strategy/lazy_sidekiq.rb +64 -0
  84. data/lib/chewy/strategy/sidekiq.rb +2 -1
  85. data/lib/chewy/strategy.rb +6 -19
  86. data/lib/chewy/version.rb +1 -1
  87. data/lib/chewy.rb +39 -86
  88. data/lib/generators/chewy/install_generator.rb +1 -1
  89. data/lib/tasks/chewy.rake +36 -32
  90. data/migration_guide.md +46 -8
  91. data/spec/chewy/config_spec.rb +16 -41
  92. data/spec/chewy/elastic_client_spec.rb +26 -0
  93. data/spec/chewy/fields/base_spec.rb +432 -147
  94. data/spec/chewy/fields/root_spec.rb +20 -28
  95. data/spec/chewy/fields/time_fields_spec.rb +5 -5
  96. data/spec/chewy/index/actions_spec.rb +368 -59
  97. data/spec/chewy/{type → index}/adapter/active_record_spec.rb +156 -40
  98. data/spec/chewy/{type → index}/adapter/object_spec.rb +21 -6
  99. data/spec/chewy/index/aliases_spec.rb +3 -3
  100. data/spec/chewy/index/import/bulk_builder_spec.rb +494 -0
  101. data/spec/chewy/{type → index}/import/bulk_request_spec.rb +5 -12
  102. data/spec/chewy/{type → index}/import/journal_builder_spec.rb +9 -19
  103. data/spec/chewy/{type → index}/import/routine_spec.rb +19 -19
  104. data/spec/chewy/{type → index}/import_spec.rb +164 -98
  105. data/spec/chewy/index/mapping_spec.rb +135 -0
  106. data/spec/chewy/index/observe/active_record_methods_spec.rb +68 -0
  107. data/spec/chewy/index/observe/callback_spec.rb +139 -0
  108. data/spec/chewy/index/observe_spec.rb +143 -0
  109. data/spec/chewy/index/settings_spec.rb +3 -1
  110. data/spec/chewy/index/specification_spec.rb +20 -30
  111. data/spec/chewy/{type → index}/syncer_spec.rb +14 -19
  112. data/spec/chewy/{type → index}/witchcraft_spec.rb +20 -22
  113. data/spec/chewy/index/wrapper_spec.rb +100 -0
  114. data/spec/chewy/index_spec.rb +60 -105
  115. data/spec/chewy/journal_spec.rb +25 -74
  116. data/spec/chewy/minitest/helpers_spec.rb +123 -15
  117. data/spec/chewy/minitest/search_index_receiver_spec.rb +28 -30
  118. data/spec/chewy/multi_search_spec.rb +4 -5
  119. data/spec/chewy/rake_helper_spec.rb +315 -55
  120. data/spec/chewy/rspec/build_query_spec.rb +34 -0
  121. data/spec/chewy/rspec/helpers_spec.rb +61 -0
  122. data/spec/chewy/rspec/update_index_spec.rb +74 -71
  123. data/spec/chewy/runtime_spec.rb +2 -2
  124. data/spec/chewy/search/loader_spec.rb +19 -53
  125. data/spec/chewy/search/pagination/kaminari_examples.rb +4 -6
  126. data/spec/chewy/search/pagination/kaminari_spec.rb +2 -2
  127. data/spec/chewy/search/parameters/collapse_spec.rb +5 -0
  128. data/spec/chewy/search/parameters/ignore_unavailable_spec.rb +67 -0
  129. data/spec/chewy/search/parameters/indices_spec.rb +26 -117
  130. data/spec/chewy/search/parameters/knn_spec.rb +5 -0
  131. data/spec/chewy/search/parameters/order_spec.rb +18 -11
  132. data/spec/chewy/search/parameters/query_storage_examples.rb +67 -21
  133. data/spec/chewy/search/parameters/search_after_spec.rb +4 -1
  134. data/spec/chewy/search/parameters/source_spec.rb +8 -2
  135. data/spec/chewy/search/parameters/track_total_hits_spec.rb +5 -0
  136. data/spec/chewy/search/parameters_spec.rb +18 -4
  137. data/spec/chewy/search/query_proxy_spec.rb +68 -17
  138. data/spec/chewy/search/request_spec.rb +292 -110
  139. data/spec/chewy/search/response_spec.rb +12 -12
  140. data/spec/chewy/search/scrolling_spec.rb +10 -17
  141. data/spec/chewy/search_spec.rb +40 -34
  142. data/spec/chewy/stash_spec.rb +9 -21
  143. data/spec/chewy/strategy/active_job_spec.rb +16 -16
  144. data/spec/chewy/strategy/atomic_no_refresh_spec.rb +60 -0
  145. data/spec/chewy/strategy/atomic_spec.rb +9 -10
  146. data/spec/chewy/strategy/delayed_sidekiq_spec.rb +208 -0
  147. data/spec/chewy/strategy/lazy_sidekiq_spec.rb +214 -0
  148. data/spec/chewy/strategy/sidekiq_spec.rb +12 -12
  149. data/spec/chewy/strategy_spec.rb +19 -15
  150. data/spec/chewy_spec.rb +24 -107
  151. data/spec/spec_helper.rb +3 -22
  152. data/spec/support/active_record.rb +25 -7
  153. metadata +78 -339
  154. data/.circleci/config.yml +0 -240
  155. data/Appraisals +0 -81
  156. data/gemfiles/rails.5.2.activerecord.gemfile +0 -17
  157. data/gemfiles/rails.5.2.mongoid.6.4.gemfile +0 -17
  158. data/gemfiles/rails.6.0.activerecord.gemfile +0 -17
  159. data/gemfiles/sequel.4.45.gemfile +0 -11
  160. data/lib/chewy/backports/deep_dup.rb +0 -46
  161. data/lib/chewy/backports/duplicable.rb +0 -91
  162. data/lib/chewy/search/pagination/will_paginate.rb +0 -43
  163. data/lib/chewy/search/parameters/types.rb +0 -20
  164. data/lib/chewy/strategy/resque.rb +0 -27
  165. data/lib/chewy/strategy/shoryuken.rb +0 -40
  166. data/lib/chewy/type/actions.rb +0 -43
  167. data/lib/chewy/type/adapter/mongoid.rb +0 -67
  168. data/lib/chewy/type/adapter/sequel.rb +0 -93
  169. data/lib/chewy/type/crutch.rb +0 -32
  170. data/lib/chewy/type/import/bulk_builder.rb +0 -122
  171. data/lib/chewy/type/observe.rb +0 -82
  172. data/lib/chewy/type.rb +0 -120
  173. data/lib/sequel/plugins/chewy_observe.rb +0 -63
  174. data/spec/chewy/search/pagination/will_paginate_examples.rb +0 -63
  175. data/spec/chewy/search/pagination/will_paginate_spec.rb +0 -23
  176. data/spec/chewy/search/parameters/types_spec.rb +0 -5
  177. data/spec/chewy/strategy/resque_spec.rb +0 -46
  178. data/spec/chewy/strategy/shoryuken_spec.rb +0 -70
  179. data/spec/chewy/type/actions_spec.rb +0 -50
  180. data/spec/chewy/type/adapter/mongoid_spec.rb +0 -372
  181. data/spec/chewy/type/adapter/sequel_spec.rb +0 -472
  182. data/spec/chewy/type/import/bulk_builder_spec.rb +0 -194
  183. data/spec/chewy/type/mapping_spec.rb +0 -175
  184. data/spec/chewy/type/observe_spec.rb +0 -137
  185. data/spec/chewy/type/wrapper_spec.rb +0 -100
  186. data/spec/chewy/type_spec.rb +0 -55
  187. data/spec/support/mongoid.rb +0 -93
  188. data/spec/support/sequel.rb +0 -80
@@ -1,21 +1,31 @@
1
- require 'chewy/type/adapter/orm'
1
+ require 'chewy/index/adapter/orm'
2
2
 
3
3
  module Chewy
4
- class Type
4
+ class Index
5
5
  module Adapter
6
6
  class ActiveRecord < Orm
7
7
  def self.accepts?(target)
8
8
  defined?(::ActiveRecord::Base) && (
9
- target.is_a?(Class) && target < ::ActiveRecord::Base ||
9
+ (target.is_a?(Class) && target < ::ActiveRecord::Base) ||
10
10
  target.is_a?(::ActiveRecord::Relation))
11
11
  end
12
12
 
13
13
  private
14
14
 
15
15
  def cleanup_default_scope!
16
- if Chewy.logger && (@default_scope.arel.orders.present? ||
16
+ behavior = Chewy.config.import_scope_cleanup_behavior
17
+
18
+ if behavior != :ignore && (@default_scope.arel.orders.present? ||
17
19
  @default_scope.arel.limit.present? || @default_scope.arel.offset.present?)
18
- Chewy.logger.warn('Default type scope order, limit and offset are ignored and will be nullified')
20
+ if behavior == :warn && Chewy.logger
21
+ gem_dir = File.realpath('../..', __dir__)
22
+ source = caller.grep_v(Regexp.new(gem_dir)).first
23
+ Chewy.logger.warn(
24
+ "Default type scope order, limit and offset are ignored and will be nullified (called from: #{source})"
25
+ )
26
+ elsif behavior == :raise
27
+ raise ImportScopeCleanupError, 'Default type scope order, limit and offset are ignored and will be nullified'
28
+ end
19
29
  end
20
30
 
21
31
  @default_scope = @default_scope.reorder(nil).limit(nil).offset(nil)
@@ -60,7 +70,15 @@ module Chewy
60
70
  end
61
71
 
62
72
  def pluck_in_batches(scope, fields: [], batch_size: nil, typecast: true)
63
- return enum_for(:pluck_in_batches, scope, fields: fields, batch_size: batch_size, typecast: typecast) unless block_given?
73
+ unless block_given?
74
+ return enum_for(
75
+ :pluck_in_batches,
76
+ scope,
77
+ fields: fields,
78
+ batch_size: batch_size,
79
+ typecast: typecast
80
+ )
81
+ end
64
82
 
65
83
  scope = scope.reorder(target_id.asc).limit(batch_size)
66
84
  ids = pluck(scope, fields: fields, typecast: typecast)
@@ -69,6 +87,7 @@ module Chewy
69
87
  while ids.present?
70
88
  yield ids
71
89
  break if ids.size < batch_size
90
+
72
91
  last_id = ids.last.is_a?(Array) ? ids.last.first : ids.last
73
92
  ids = pluck(scope.where(target_id.gt(last_id)), fields: fields, typecast: typecast)
74
93
  end
@@ -85,6 +104,11 @@ module Chewy
85
104
  object_class.connection.execute(sql).map(&converter)
86
105
  end
87
106
 
107
+ def raw(scope, converter)
108
+ sql = scope.to_sql
109
+ object_class.connection.execute(sql).map(&converter)
110
+ end
111
+
88
112
  def relation_class
89
113
  ::ActiveRecord::Relation
90
114
  end
@@ -1,5 +1,5 @@
1
1
  module Chewy
2
- class Type
2
+ class Index
3
3
  module Adapter
4
4
  # Basic adapter class. Contains interface, need to implement to add any classes support
5
5
  class Base
@@ -13,8 +13,7 @@ module Chewy
13
13
  true
14
14
  end
15
15
 
16
- # Camelcased name, used as type class constant name.
17
- # For returned value 'Product' will be generated class name `ProductsIndex::Product`
16
+ # Camelcased name.
18
17
  #
19
18
  def name
20
19
  raise NotImplementedError
@@ -1,7 +1,7 @@
1
- require 'chewy/type/adapter/base'
1
+ require 'chewy/index/adapter/base'
2
2
 
3
3
  module Chewy
4
- class Type
4
+ class Index
5
5
  module Adapter
6
6
  # This adapter provides an ability to import documents from any
7
7
  # source. You can actually use any class or even a symbol as
@@ -14,15 +14,15 @@ module Chewy
14
14
  # @see #import
15
15
  # @see #load
16
16
  class Object < Base
17
- # The signature of the type definition.
17
+ # The signature of the index scope definition.
18
18
  #
19
19
  # @example
20
- # define_type :geoname
21
- # define_type Geoname
22
- # define_type -> { Geoname.all_the_places }, name: 'geoname'
20
+ # index_scope :geoname
21
+ # index_scope Geoname
22
+ # index_scope -> { Geoname.all_the_places }, name: 'geoname'
23
23
  #
24
24
  # @param target [Class, Symbol, String, Proc] a source of data and everything
25
- # @option options [String, Symbol] :name redefines the inferred type name if necessary
25
+ # @option options [String, Symbol] :name redefines the inferred name if necessary
26
26
  # @option options [String, Symbol] :import_all_method redefines import method name
27
27
  # @option options [String, Symbol] :load_all_method redefines batch load method name
28
28
  # @option options [String, Symbol] :load_one_method redefines per-object load method name
@@ -31,14 +31,13 @@ module Chewy
31
31
  @options = options
32
32
  end
33
33
 
34
- # Name is used for the type class creation. Inferred from the target
35
- # by default if possible.
34
+ # Inferred from the target by default if possible.
36
35
  #
37
36
  # @example
38
- # # defines MyIndex::Geoname
39
- # define_type :geoname
40
- # # still defines MyIndex::Geoname
41
- # define_type -> { Geoname.all_the_places }, name: 'geoname'
37
+ # # defines name = Geoname
38
+ # index_scope :geoname
39
+ # # still defines name = Geoname
40
+ # index_scope -> { Geoname.all_the_places }, name: 'geoname'
42
41
  #
43
42
  # @return [String]
44
43
  def name
@@ -54,14 +53,14 @@ module Chewy
54
53
  Array.wrap(collection)
55
54
  end
56
55
 
57
- # This method is used internally by `Chewy::Type.import`.
56
+ # This method is used internally by `Chewy::Index.import`.
58
57
  #
59
58
  # The idea is that any object can be imported to ES if
60
59
  # it responds to `#to_json` method.
61
60
  #
62
61
  # If method `destroyed?` is defined for object (or, in case of hash object,
63
62
  # it has `:_destroyed` or `'_destroyed'` key) and returns `true` or object
64
- # satisfy `delete_if` type option then object will be deleted from index.
63
+ # satisfy `delete_if` option then object will be deleted from index.
65
64
  # But in order to be destroyable, objects need to respond to `id` method
66
65
  # or have an `id` key so ElasticSearch could know which one to delete.
67
66
  #
@@ -78,10 +77,10 @@ module Chewy
78
77
  # end
79
78
  # end
80
79
  #
81
- # # All the folloving variants will work:
82
- # define_type Geoname
83
- # define_type Geoname, import_all_method: 'import_all'
84
- # define_type -> { FancyGeoAPI.all_points_collection }, name: 'geoname'
80
+ # # All the following variants will work:
81
+ # index_scope Geoname
82
+ # index_scope Geoname, import_all_method: 'import_all'
83
+ # index_scope -> { FancyGeoAPI.all_points_collection }, name: 'geoname'
85
84
  #
86
85
  # @param args [Array<#to_json>]
87
86
  # @option options [Integer] :batch_size import processing batch size
@@ -113,16 +112,15 @@ module Chewy
113
112
  # end
114
113
  # end
115
114
  #
116
- # @see Chewy::Type::Adapter::Base#import_fields
117
- def import_fields(*args)
115
+ # @see Chewy::Index::Adapter::Base#import_fields
116
+ def import_fields(*args, &block)
118
117
  return enum_for(:import_fields, *args) unless block_given?
118
+
119
119
  options = args.extract_options!
120
120
  options[:batch_size] ||= BATCH_SIZE
121
121
 
122
122
  if args.empty? && @target.respond_to?(pluck_method)
123
- @target.send(pluck_method, :id, *options[:fields]).each_slice(options[:batch_size]) do |batch|
124
- yield batch
125
- end
123
+ @target.send(pluck_method, :id, *options[:fields]).each_slice(options[:batch_size], &block)
126
124
  elsif options[:fields].blank?
127
125
  import_references(*args, options) do |batch|
128
126
  yield batch.map { |object| object_field(object, :id) || object }
@@ -140,14 +138,12 @@ module Chewy
140
138
 
141
139
  # For the Object adapter returns the objects themselves in batches.
142
140
  #
143
- # @see Chewy::Type::Adapter::Base#import_references
144
- def import_references(*args)
141
+ # @see Chewy::Index::Adapter::Base#import_references
142
+ def import_references(*args, &block)
145
143
  return enum_for(:import_references, *args) unless block_given?
146
144
 
147
145
  collection, options = import_args(*args)
148
- collection.each_slice(options[:batch_size]) do |batch|
149
- yield batch
150
- end
146
+ collection.each_slice(options[:batch_size], &block)
151
147
  end
152
148
 
153
149
  # This method is used internally by the request DSL when the
@@ -157,7 +153,7 @@ module Chewy
157
153
  #
158
154
  # If none of the `load_all_method` or `load_one_method` is implemented
159
155
  # for the target - the method will return nil. This means that the
160
- # loader will return an array `Chewy::Type` objects that actually was passed.
156
+ # loader will return an array `Chewy::Index` objects that actually was passed.
161
157
  #
162
158
  # To use loading for objects it is obviously required to provide
163
159
  # some meaningful ids for ES documents.
@@ -175,7 +171,7 @@ module Chewy
175
171
  # end
176
172
  # end
177
173
  #
178
- # MyIndex::Geoname.load(additional_data: true).objects
174
+ # MyIndex.load(additional_data: true).objects
179
175
  #
180
176
  # @param ids [Array<Hash>] an array of ids from ES hits
181
177
  # @param options [Hash] any options passed here with the request DSL `load` method.
@@ -1,7 +1,7 @@
1
- require 'chewy/type/adapter/base'
1
+ require 'chewy/index/adapter/base'
2
2
 
3
3
  module Chewy
4
- class Type
4
+ class Index
5
5
  module Adapter
6
6
  class Orm < Base
7
7
  attr_reader :default_scope
@@ -54,24 +54,23 @@ module Chewy
54
54
  #
55
55
  # users = User.all
56
56
  # users.each { |user| user.destroy if user.inactive? }
57
- # UsersIndex::User.import users # inactive users will be deleted from index
57
+ # UsersIndex.import users # inactive users will be deleted from index
58
58
  # # or
59
- # UsersIndex::User.import users.map(&:id) # deleted user ids will be deleted from index
59
+ # UsersIndex.import users.map(&:id) # deleted user ids will be deleted from index
60
60
  #
61
61
  # Also there is custom type option `delete_if`. It it returns `true`
62
62
  # object will be deleted from index. Note that if this option is defined and
63
63
  # return `false` Chewy will still check `destroyed?` method. This is useful
64
64
  # for paranoid objects deleting implementation.
65
65
  #
66
- # define_type User, delete_if: ->{ deleted_at } do
67
- # ...
68
- # end
66
+ # index_scope User, delete_if: ->{ deleted_at }
67
+ # ...
69
68
  #
70
69
  # users = User.all
71
70
  # users.each { |user| user.deleted_at = Time.now }
72
- # UsersIndex::User.import users # paranoid deleted users will be deleted from index
71
+ # UsersIndex.import users # paranoid deleted users will be deleted from index
73
72
  # # or
74
- # UsersIndex::User.import users.map(&:id) # user ids will be deleted from index
73
+ # UsersIndex.import users.map(&:id) # user ids will be deleted from index
75
74
  #
76
75
  def import(*args, &block)
77
76
  collection, options = import_args(*args)
@@ -92,30 +91,30 @@ module Chewy
92
91
  collection = all_scope_where_ids_in(identify(collection)) unless collection.is_a?(relation_class)
93
92
  pluck_in_batches(collection, **options.slice(:fields, :batch_size, :typecast), &block)
94
93
  else
95
- identify(collection).each_slice(options[:batch_size]) do |batch|
96
- yield batch
97
- end
94
+ identify(collection).each_slice(options[:batch_size], &block)
98
95
  end
99
96
  end
100
97
  alias_method :import_references, :import_fields
101
98
 
102
99
  def load(ids, **options)
103
100
  scope = all_scope_where_ids_in(ids)
104
- additional_scope = options[options[:_type].type_name.to_sym].try(:[], :scope) || options[:scope]
101
+ additional_scope = options[options[:_index].to_sym].try(:[], :scope) || options[:scope]
105
102
 
106
103
  loaded_objects = load_scope_objects(scope, additional_scope)
107
- .index_by do |object|
108
- object.public_send(primary_key).to_s
109
- end
104
+ loaded_objects = raw(loaded_objects, options[:raw_import]) if options[:raw_import]
105
+
106
+ indexed_objects = loaded_objects.index_by do |object|
107
+ object.public_send(primary_key).to_s
108
+ end
110
109
 
111
- ids.map { |id| loaded_objects[id.to_s] }
110
+ ids.map { |id| indexed_objects[id.to_s] }
112
111
  end
113
112
 
114
113
  private
115
114
 
116
115
  def import_objects(collection, options)
117
116
  collection_ids = identify(collection)
118
- hash = Hash[collection_ids.map(&:to_s).zip(collection)]
117
+ hash = collection_ids.map(&:to_s).zip(collection).to_h
119
118
 
120
119
  indexed = collection_ids.each_slice(options[:batch_size]).map do |ids|
121
120
  batch = if options[:raw_import]
@@ -5,14 +5,23 @@ module Chewy
5
5
 
6
6
  module ClassMethods
7
7
  def indexes
8
- client.indices.get_alias(name: index_name).keys
9
- rescue Elasticsearch::Transport::Transport::Errors::NotFound
10
- []
8
+ indexes = empty_if_not_found { client.indices.get(index: index_name).keys }
9
+ indexes += empty_if_not_found { client.indices.get_alias(name: index_name).keys }
10
+ indexes.compact.uniq
11
11
  end
12
12
 
13
13
  def aliases
14
- name = index_name
15
- client.indices.get_alias(index: name, name: '*')[name].try(:[], 'aliases').try(:keys) || []
14
+ empty_if_not_found do
15
+ client.indices.get_alias(index: index_name, name: '*').values.flat_map do |aliases|
16
+ aliases['aliases'].keys
17
+ end
18
+ end.compact.uniq
19
+ end
20
+
21
+ private
22
+
23
+ def empty_if_not_found
24
+ yield
16
25
  rescue Elasticsearch::Transport::Transport::Errors::NotFound
17
26
  []
18
27
  end
@@ -0,0 +1,40 @@
1
+ module Chewy
2
+ class Index
3
+ module Crutch
4
+ extend ActiveSupport::Concern
5
+
6
+ included do
7
+ class_attribute :_crutches
8
+ self._crutches = {}
9
+ end
10
+
11
+ class Crutches
12
+ def initialize(index, collection)
13
+ @index = index
14
+ @collection = collection
15
+ @crutches_instances = {}
16
+ end
17
+
18
+ def method_missing(name, *, **)
19
+ return self[name] if @index._crutches.key?(name)
20
+
21
+ super
22
+ end
23
+
24
+ def respond_to_missing?(name, include_private = false)
25
+ @index._crutches.key?(name) || super
26
+ end
27
+
28
+ def [](name)
29
+ @crutches_instances[name] ||= @index._crutches[:"#{name}"].call(@collection)
30
+ end
31
+ end
32
+
33
+ module ClassMethods
34
+ def crutch(name, &block)
35
+ self._crutches = _crutches.merge(name.to_sym => block)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,311 @@
1
+ module Chewy
2
+ class Index
3
+ module Import
4
+ # This class purpose is to build ES client-acceptable bulk
5
+ # request body from the passed objects for index and deletion.
6
+ # It handles parent-child relationships as well by fetching
7
+ # existing documents from ES and database, taking their join field values and
8
+ # using it in the bulk body.
9
+ # If fields are passed - it creates partial update entries except for
10
+ # the cases when the type has parent and parent_id has been changed.
11
+ class BulkBuilder
12
+ # @param index [Chewy::Index] desired index
13
+ # @param to_index [Array<Object>] objects to index
14
+ # @param delete [Array<Object>] objects or ids to delete
15
+ # @param fields [Array<Symbol, String>] and array of fields for documents update
16
+ def initialize(index, to_index: [], delete: [], fields: [])
17
+ @index = index
18
+ @to_index = to_index
19
+ @delete = delete
20
+ @fields = fields.map!(&:to_sym)
21
+ end
22
+
23
+ # Returns ES API-ready bulk requiest body.
24
+ # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
25
+ # @return [Array<Hash>] bulk body
26
+ def bulk_body
27
+ populate_cache
28
+
29
+ @bulk_body ||= @to_index.flat_map(&method(:index_entry)).concat(
30
+ @delete.flat_map(&method(:delete_entry))
31
+ ).uniq
32
+ end
33
+
34
+ # The only purpose of this method is to cache document ids for
35
+ # all the passed object for index to avoid ids recalculation.
36
+ #
37
+ # @return [Hash[String => Object]] an ids-objects index hash
38
+ def index_objects_by_id
39
+ @index_objects_by_id ||= index_object_ids.invert.stringify_keys!
40
+ end
41
+
42
+ private
43
+
44
+ def crutches_for_index
45
+ @crutches_for_index ||= Chewy::Index::Crutch::Crutches.new @index, @to_index
46
+ end
47
+
48
+ def index_entry(object)
49
+ entry = {}
50
+ entry[:_id] = index_object_ids[object] if index_object_ids[object]
51
+ entry[:routing] = routing(object) if join_field?
52
+
53
+ parent = cache(entry[:_id])
54
+ data = data_for(object) if parent.present?
55
+ if parent.present? && parent_changed?(data, parent)
56
+ reindex_entries(object, data) + reindex_descendants(object)
57
+ elsif @fields.present?
58
+ return [] unless entry[:_id]
59
+
60
+ entry[:data] = {doc: data_for(object, fields: @fields)}
61
+ [{update: entry}]
62
+ else
63
+ entry[:data] = data || data_for(object)
64
+ [{index: entry}]
65
+ end
66
+ end
67
+
68
+ def reindex_entries(object, data, root: object)
69
+ entry = {}
70
+ entry[:_id] = index_object_ids[object] || entry_id(object)
71
+ entry[:data] = data
72
+ entry[:routing] = routing(root) || routing(object) if join_field?
73
+ delete = delete_single_entry(object, root: root).first
74
+ index = {index: entry}
75
+ [delete, index]
76
+ end
77
+
78
+ def reindex_descendants(root)
79
+ descendants = load_descendants(root)
80
+ crutches = Chewy::Index::Crutch::Crutches.new @index, [root, *descendants]
81
+ descendants.flat_map do |object|
82
+ reindex_entries(
83
+ object,
84
+ data_for(object, crutches: crutches),
85
+ root: root
86
+ )
87
+ end
88
+ end
89
+
90
+ def delete_entry(object)
91
+ delete_single_entry(object)
92
+ end
93
+
94
+ def delete_single_entry(object, root: object)
95
+ entry = {}
96
+ entry[:_id] = entry_id(object)
97
+ entry[:_id] ||= object.as_json
98
+
99
+ return [] if entry[:_id].blank?
100
+
101
+ if join_field?
102
+ cached_parent = cache(entry[:_id])
103
+ entry_parent_id =
104
+ if cached_parent
105
+ cached_parent[:parent_id]
106
+ else
107
+ find_parent_id(object)
108
+ end
109
+
110
+ entry[:routing] = existing_routing(root.try(:id)) || existing_routing(object.id)
111
+ entry[:parent] = entry_parent_id if entry_parent_id
112
+ end
113
+
114
+ [{delete: entry}]
115
+ end
116
+
117
+ def load_descendants(root)
118
+ root_type = join_field_type(root)
119
+ return [] unless root_type
120
+
121
+ descendant_ids = []
122
+ grouped_parents = {root_type => [root.id]}
123
+ # iteratively fetch all the descendants (with grouped_parents as a queue for next iteration)
124
+ until grouped_parents.empty?
125
+ children_data = grouped_parents.flat_map do |parent_type, parent_ids|
126
+ @index.query(
127
+ has_parent: {
128
+ parent_type: parent_type,
129
+ # ignore_unmapped to avoid error for the leaves of the tree
130
+ # (types without children)
131
+ ignore_unmapped: true,
132
+ query: {ids: {values: parent_ids}}
133
+ }
134
+ ).pluck(:_id, join_field).map { |id, join| [join['name'], id] }
135
+ end
136
+ descendant_ids |= children_data.map(&:last)
137
+
138
+ grouped_parents = {}
139
+ children_data.each do |name, id|
140
+ next unless name
141
+
142
+ grouped_parents[name] ||= []
143
+ grouped_parents[name] << id
144
+ end
145
+ end
146
+ # query the primary database to load the descentants' records
147
+ @index.adapter.load(descendant_ids, _index: @index.base_name, raw_import: @index._default_import_options[:raw_import])
148
+ end
149
+
150
+ def populate_cache
151
+ @cache = load_cache
152
+ end
153
+
154
+ def cache(id)
155
+ @cache[id.to_s]
156
+ end
157
+
158
+ def load_cache
159
+ return {} unless join_field?
160
+
161
+ @index
162
+ .filter(ids: {values: ids_for_cache})
163
+ .order('_doc')
164
+ .pluck(:_id, :_routing, join_field)
165
+ .to_h do |id, routing, join|
166
+ [
167
+ id,
168
+ {routing: routing, parent_id: join['parent']}
169
+ ]
170
+ end
171
+ end
172
+
173
+ def existing_routing(id)
174
+ # All objects needed here should be cached in #load_cache,
175
+ # if not, we return nil. In some cases we don't have existing routing cached,
176
+ # e.g. for loaded descendants
177
+ return unless cache(id)
178
+
179
+ cache(id)[:routing]
180
+ end
181
+
182
+ # Two types of ids:
183
+ # * of parents of the objects to be indexed
184
+ # * of objects to be deleted
185
+ def ids_for_cache
186
+ ids = @to_index.flat_map do |object|
187
+ [find_parent_id(object), object.id] if object.respond_to?(:id)
188
+ end
189
+ ids.concat(@delete.map do |object|
190
+ object.id if object.respond_to?(:id)
191
+ end)
192
+ ids.uniq.compact
193
+ end
194
+
195
+ def routing(object)
196
+ # filter out non-model objects, early return on object==nil
197
+ return unless object.respond_to?(:id)
198
+
199
+ parent_id = find_parent_id(object)
200
+ if parent_id
201
+ routing(index_objects_by_id[parent_id.to_s]) || existing_routing(parent_id)
202
+ else
203
+ object.id.to_s
204
+ end
205
+ end
206
+
207
+ def find_parent_id(object)
208
+ return unless object.respond_to?(:id)
209
+
210
+ join = data_for(object, fields: [join_field.to_sym])[join_field]
211
+ join['parent'] if join
212
+ end
213
+
214
+ def join_field
215
+ return @join_field if defined?(@join_field)
216
+
217
+ @join_field = find_join_field
218
+ end
219
+
220
+ def find_join_field
221
+ type_settings = @index.mappings_hash[:mappings]
222
+ return unless type_settings
223
+
224
+ properties = type_settings[:properties]
225
+ join_fields = properties.find { |_, options| options[:type] == :join }
226
+ return unless join_fields
227
+
228
+ join_fields.first.to_s
229
+ end
230
+
231
+ def join_field_type(object)
232
+ return unless join_field?
233
+
234
+ raw_object =
235
+ if @index._default_import_options[:raw_import]
236
+ @index._default_import_options[:raw_import].call(object.attributes)
237
+ else
238
+ object
239
+ end
240
+
241
+ join_field_value = data_for(
242
+ raw_object,
243
+ fields: [join_field.to_sym], # build only the field that is needed
244
+ crutches: Chewy::Index::Crutch::Crutches.new(@index, [raw_object])
245
+ )[join_field]
246
+
247
+ case join_field_value
248
+ when String
249
+ join_field_value
250
+ when Hash
251
+ join_field_value['name']
252
+ end
253
+ end
254
+
255
+ def join_field?
256
+ join_field && !join_field.empty?
257
+ end
258
+
259
+ def data_for(object, fields: [], crutches: crutches_for_index)
260
+ @index.compose(object, crutches, fields: fields)
261
+ end
262
+
263
+ def parent_changed?(data, old_parent)
264
+ return false unless old_parent
265
+ return false unless join_field?
266
+ return false unless @fields.include?(join_field.to_sym)
267
+ return false unless data.key?(join_field)
268
+
269
+ # The join field value can be a hash, e.g.:
270
+ # {"name": "child", "parent": "123"} for a child
271
+ # {"name": "parent"} for a parent
272
+ # but it can also be a string: (e.g. "parent") for a parent:
273
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/parent-join.html#parent-join
274
+ new_join_field_value = data[join_field]
275
+ if new_join_field_value.is_a? Hash
276
+ # If we have a hash in the join field,
277
+ # we're taking the `parent` field that holds the parent id.
278
+ new_parent_id = new_join_field_value['parent']
279
+ new_parent_id != old_parent[:parent_id]
280
+ else
281
+ # If there is a non-hash value (String or nil), it means that the join field is changed
282
+ # and the current object is no longer a child.
283
+ true
284
+ end
285
+ end
286
+
287
+ def entry_id(object)
288
+ if type_root.id
289
+ type_root.compose_id(object)
290
+ else
291
+ id = object.id if object.respond_to?(:id)
292
+ id ||= object[:id] || object['id'] if object.is_a?(Hash)
293
+ id = id.to_s if defined?(BSON) && id.is_a?(BSON::ObjectId)
294
+ id
295
+ end
296
+ end
297
+
298
+ def index_object_ids
299
+ @index_object_ids ||= @to_index.each_with_object({}) do |object, result|
300
+ id = entry_id(object)
301
+ result[object] = id if id.present?
302
+ end
303
+ end
304
+
305
+ def type_root
306
+ @type_root ||= @index.root
307
+ end
308
+ end
309
+ end
310
+ end
311
+ end