chewy 5.1.0 → 7.2.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (234) hide show
  1. checksums.yaml +4 -4
  2. data/.github/CODEOWNERS +1 -0
  3. data/.github/ISSUE_TEMPLATE/bug_report.md +39 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  5. data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
  6. data/.github/workflows/ruby.yml +73 -0
  7. data/.rubocop.yml +13 -8
  8. data/.rubocop_todo.yml +110 -22
  9. data/CHANGELOG.md +449 -347
  10. data/CODE_OF_CONDUCT.md +14 -0
  11. data/CONTRIBUTING.md +63 -0
  12. data/Gemfile +3 -7
  13. data/Guardfile +3 -1
  14. data/LICENSE.txt +1 -1
  15. data/README.md +423 -311
  16. data/chewy.gemspec +8 -10
  17. data/gemfiles/rails.5.2.activerecord.gemfile +9 -14
  18. data/gemfiles/rails.6.0.activerecord.gemfile +11 -0
  19. data/gemfiles/rails.6.1.activerecord.gemfile +13 -0
  20. data/gemfiles/rails.7.0.activerecord.gemfile +13 -0
  21. data/lib/chewy/config.rb +42 -60
  22. data/lib/chewy/errors.rb +4 -10
  23. data/lib/chewy/fields/base.rb +80 -20
  24. data/lib/chewy/fields/root.rb +7 -17
  25. data/lib/chewy/index/actions.rb +62 -35
  26. data/lib/chewy/{type → index}/adapter/active_record.rb +18 -4
  27. data/lib/chewy/{type → index}/adapter/base.rb +2 -3
  28. data/lib/chewy/{type → index}/adapter/object.rb +28 -32
  29. data/lib/chewy/{type → index}/adapter/orm.rb +26 -24
  30. data/lib/chewy/index/aliases.rb +14 -5
  31. data/lib/chewy/{type → index}/crutch.rb +5 -5
  32. data/lib/chewy/index/import/bulk_builder.rb +311 -0
  33. data/lib/chewy/{type → index}/import/bulk_request.rb +6 -7
  34. data/lib/chewy/{type → index}/import/journal_builder.rb +11 -12
  35. data/lib/chewy/{type → index}/import/routine.rb +17 -16
  36. data/lib/chewy/{type → index}/import.rb +51 -33
  37. data/lib/chewy/{type → index}/mapping.rb +32 -37
  38. data/lib/chewy/index/observe/active_record_methods.rb +87 -0
  39. data/lib/chewy/index/observe/callback.rb +34 -0
  40. data/lib/chewy/index/observe.rb +17 -0
  41. data/lib/chewy/index/specification.rb +1 -0
  42. data/lib/chewy/{type → index}/syncer.rb +61 -62
  43. data/lib/chewy/{type → index}/witchcraft.rb +15 -9
  44. data/lib/chewy/{type → index}/wrapper.rb +13 -3
  45. data/lib/chewy/index.rb +46 -96
  46. data/lib/chewy/journal.rb +25 -14
  47. data/lib/chewy/minitest/helpers.rb +86 -13
  48. data/lib/chewy/minitest/search_index_receiver.rb +22 -26
  49. data/lib/chewy/multi_search.rb +62 -0
  50. data/lib/chewy/railtie.rb +6 -20
  51. data/lib/chewy/rake_helper.rb +136 -108
  52. data/lib/chewy/rspec/build_query.rb +12 -0
  53. data/lib/chewy/rspec/helpers.rb +55 -0
  54. data/lib/chewy/rspec/update_index.rb +55 -44
  55. data/lib/chewy/rspec.rb +2 -0
  56. data/lib/chewy/runtime.rb +1 -1
  57. data/lib/chewy/search/loader.rb +19 -41
  58. data/lib/chewy/search/parameters/collapse.rb +16 -0
  59. data/lib/chewy/search/parameters/concerns/query_storage.rb +2 -2
  60. data/lib/chewy/search/parameters/ignore_unavailable.rb +27 -0
  61. data/lib/chewy/search/parameters/indices.rb +12 -57
  62. data/lib/chewy/search/parameters/none.rb +1 -3
  63. data/lib/chewy/search/parameters/order.rb +6 -19
  64. data/lib/chewy/search/parameters/source.rb +5 -1
  65. data/lib/chewy/search/parameters/track_total_hits.rb +16 -0
  66. data/lib/chewy/search/parameters.rb +7 -4
  67. data/lib/chewy/search/query_proxy.rb +9 -2
  68. data/lib/chewy/search/request.rb +180 -154
  69. data/lib/chewy/search/response.rb +5 -5
  70. data/lib/chewy/search/scoping.rb +7 -8
  71. data/lib/chewy/search/scrolling.rb +16 -13
  72. data/lib/chewy/search.rb +7 -22
  73. data/lib/chewy/stash.rb +19 -30
  74. data/lib/chewy/strategy/active_job.rb +2 -2
  75. data/lib/chewy/strategy/atomic_no_refresh.rb +18 -0
  76. data/lib/chewy/strategy/base.rb +10 -0
  77. data/lib/chewy/strategy/lazy_sidekiq.rb +64 -0
  78. data/lib/chewy/strategy/sidekiq.rb +3 -2
  79. data/lib/chewy/strategy.rb +5 -19
  80. data/lib/chewy/version.rb +1 -1
  81. data/lib/chewy.rb +36 -80
  82. data/lib/generators/chewy/install_generator.rb +1 -1
  83. data/lib/tasks/chewy.rake +26 -32
  84. data/migration_guide.md +56 -0
  85. data/spec/chewy/config_spec.rb +15 -61
  86. data/spec/chewy/fields/base_spec.rb +432 -145
  87. data/spec/chewy/fields/root_spec.rb +20 -28
  88. data/spec/chewy/fields/time_fields_spec.rb +5 -5
  89. data/spec/chewy/index/actions_spec.rb +388 -55
  90. data/spec/chewy/{type → index}/adapter/active_record_spec.rb +110 -44
  91. data/spec/chewy/{type → index}/adapter/object_spec.rb +21 -6
  92. data/spec/chewy/index/aliases_spec.rb +3 -3
  93. data/spec/chewy/index/import/bulk_builder_spec.rb +494 -0
  94. data/spec/chewy/{type → index}/import/bulk_request_spec.rb +5 -12
  95. data/spec/chewy/{type → index}/import/journal_builder_spec.rb +14 -22
  96. data/spec/chewy/{type → index}/import/routine_spec.rb +19 -19
  97. data/spec/chewy/{type → index}/import_spec.rb +149 -96
  98. data/spec/chewy/index/mapping_spec.rb +135 -0
  99. data/spec/chewy/index/observe/active_record_methods_spec.rb +68 -0
  100. data/spec/chewy/index/observe/callback_spec.rb +139 -0
  101. data/spec/chewy/index/observe_spec.rb +143 -0
  102. data/spec/chewy/index/settings_spec.rb +3 -1
  103. data/spec/chewy/index/specification_spec.rb +20 -30
  104. data/spec/chewy/{type → index}/syncer_spec.rb +14 -19
  105. data/spec/chewy/{type → index}/witchcraft_spec.rb +34 -21
  106. data/spec/chewy/index/wrapper_spec.rb +100 -0
  107. data/spec/chewy/index_spec.rb +69 -137
  108. data/spec/chewy/journal_spec.rb +46 -91
  109. data/spec/chewy/minitest/helpers_spec.rb +122 -14
  110. data/spec/chewy/minitest/search_index_receiver_spec.rb +24 -26
  111. data/spec/chewy/multi_search_spec.rb +84 -0
  112. data/spec/chewy/rake_helper_spec.rb +293 -101
  113. data/spec/chewy/rspec/build_query_spec.rb +34 -0
  114. data/spec/chewy/rspec/helpers_spec.rb +61 -0
  115. data/spec/chewy/rspec/update_index_spec.rb +106 -102
  116. data/spec/chewy/runtime_spec.rb +2 -2
  117. data/spec/chewy/search/loader_spec.rb +19 -53
  118. data/spec/chewy/search/pagination/kaminari_examples.rb +3 -5
  119. data/spec/chewy/search/pagination/kaminari_spec.rb +1 -1
  120. data/spec/chewy/search/parameters/collapse_spec.rb +5 -0
  121. data/spec/chewy/search/parameters/ignore_unavailable_spec.rb +67 -0
  122. data/spec/chewy/search/parameters/indices_spec.rb +26 -118
  123. data/spec/chewy/search/parameters/none_spec.rb +1 -1
  124. data/spec/chewy/search/parameters/order_spec.rb +18 -11
  125. data/spec/chewy/search/parameters/query_storage_examples.rb +67 -21
  126. data/spec/chewy/search/parameters/search_after_spec.rb +4 -1
  127. data/spec/chewy/search/parameters/source_spec.rb +8 -2
  128. data/spec/chewy/search/parameters/track_total_hits_spec.rb +5 -0
  129. data/spec/chewy/search/parameters_spec.rb +23 -7
  130. data/spec/chewy/search/query_proxy_spec.rb +68 -17
  131. data/spec/chewy/search/request_spec.rb +344 -149
  132. data/spec/chewy/search/response_spec.rb +35 -25
  133. data/spec/chewy/search/scrolling_spec.rb +28 -26
  134. data/spec/chewy/search_spec.rb +69 -59
  135. data/spec/chewy/stash_spec.rb +16 -26
  136. data/spec/chewy/strategy/active_job_spec.rb +23 -10
  137. data/spec/chewy/strategy/atomic_no_refresh_spec.rb +60 -0
  138. data/spec/chewy/strategy/atomic_spec.rb +9 -10
  139. data/spec/chewy/strategy/lazy_sidekiq_spec.rb +214 -0
  140. data/spec/chewy/strategy/sidekiq_spec.rb +14 -10
  141. data/spec/chewy/strategy_spec.rb +19 -15
  142. data/spec/chewy_spec.rb +17 -110
  143. data/spec/spec_helper.rb +6 -29
  144. data/spec/support/active_record.rb +43 -5
  145. metadata +102 -198
  146. data/.travis.yml +0 -45
  147. data/Appraisals +0 -81
  148. data/LEGACY_DSL.md +0 -497
  149. data/gemfiles/rails.4.0.activerecord.gemfile +0 -15
  150. data/gemfiles/rails.4.1.activerecord.gemfile +0 -15
  151. data/gemfiles/rails.4.2.activerecord.gemfile +0 -16
  152. data/gemfiles/rails.4.2.mongoid.5.2.gemfile +0 -16
  153. data/gemfiles/rails.5.0.activerecord.gemfile +0 -16
  154. data/gemfiles/rails.5.0.mongoid.6.1.gemfile +0 -16
  155. data/gemfiles/rails.5.1.activerecord.gemfile +0 -16
  156. data/gemfiles/rails.5.1.mongoid.6.3.gemfile +0 -16
  157. data/gemfiles/sequel.4.45.gemfile +0 -11
  158. data/lib/chewy/backports/deep_dup.rb +0 -46
  159. data/lib/chewy/backports/duplicable.rb +0 -91
  160. data/lib/chewy/query/compose.rb +0 -68
  161. data/lib/chewy/query/criteria.rb +0 -191
  162. data/lib/chewy/query/filters.rb +0 -244
  163. data/lib/chewy/query/loading.rb +0 -110
  164. data/lib/chewy/query/nodes/and.rb +0 -25
  165. data/lib/chewy/query/nodes/base.rb +0 -17
  166. data/lib/chewy/query/nodes/bool.rb +0 -34
  167. data/lib/chewy/query/nodes/equal.rb +0 -34
  168. data/lib/chewy/query/nodes/exists.rb +0 -20
  169. data/lib/chewy/query/nodes/expr.rb +0 -28
  170. data/lib/chewy/query/nodes/field.rb +0 -110
  171. data/lib/chewy/query/nodes/has_child.rb +0 -15
  172. data/lib/chewy/query/nodes/has_parent.rb +0 -15
  173. data/lib/chewy/query/nodes/has_relation.rb +0 -59
  174. data/lib/chewy/query/nodes/match_all.rb +0 -11
  175. data/lib/chewy/query/nodes/missing.rb +0 -20
  176. data/lib/chewy/query/nodes/not.rb +0 -25
  177. data/lib/chewy/query/nodes/or.rb +0 -25
  178. data/lib/chewy/query/nodes/prefix.rb +0 -19
  179. data/lib/chewy/query/nodes/query.rb +0 -20
  180. data/lib/chewy/query/nodes/range.rb +0 -63
  181. data/lib/chewy/query/nodes/raw.rb +0 -15
  182. data/lib/chewy/query/nodes/regexp.rb +0 -35
  183. data/lib/chewy/query/nodes/script.rb +0 -20
  184. data/lib/chewy/query/pagination.rb +0 -25
  185. data/lib/chewy/query.rb +0 -1142
  186. data/lib/chewy/search/pagination/will_paginate.rb +0 -43
  187. data/lib/chewy/search/parameters/types.rb +0 -20
  188. data/lib/chewy/strategy/resque.rb +0 -27
  189. data/lib/chewy/strategy/shoryuken.rb +0 -40
  190. data/lib/chewy/type/actions.rb +0 -43
  191. data/lib/chewy/type/adapter/mongoid.rb +0 -67
  192. data/lib/chewy/type/adapter/sequel.rb +0 -93
  193. data/lib/chewy/type/import/bulk_builder.rb +0 -122
  194. data/lib/chewy/type/observe.rb +0 -82
  195. data/lib/chewy/type.rb +0 -117
  196. data/lib/sequel/plugins/chewy_observe.rb +0 -63
  197. data/spec/chewy/query/criteria_spec.rb +0 -700
  198. data/spec/chewy/query/filters_spec.rb +0 -201
  199. data/spec/chewy/query/loading_spec.rb +0 -124
  200. data/spec/chewy/query/nodes/and_spec.rb +0 -12
  201. data/spec/chewy/query/nodes/bool_spec.rb +0 -14
  202. data/spec/chewy/query/nodes/equal_spec.rb +0 -32
  203. data/spec/chewy/query/nodes/exists_spec.rb +0 -18
  204. data/spec/chewy/query/nodes/has_child_spec.rb +0 -59
  205. data/spec/chewy/query/nodes/has_parent_spec.rb +0 -59
  206. data/spec/chewy/query/nodes/match_all_spec.rb +0 -11
  207. data/spec/chewy/query/nodes/missing_spec.rb +0 -16
  208. data/spec/chewy/query/nodes/not_spec.rb +0 -14
  209. data/spec/chewy/query/nodes/or_spec.rb +0 -12
  210. data/spec/chewy/query/nodes/prefix_spec.rb +0 -16
  211. data/spec/chewy/query/nodes/query_spec.rb +0 -12
  212. data/spec/chewy/query/nodes/range_spec.rb +0 -32
  213. data/spec/chewy/query/nodes/raw_spec.rb +0 -11
  214. data/spec/chewy/query/nodes/regexp_spec.rb +0 -43
  215. data/spec/chewy/query/nodes/script_spec.rb +0 -15
  216. data/spec/chewy/query/pagination/kaminari_spec.rb +0 -5
  217. data/spec/chewy/query/pagination/will_paginate_spec.rb +0 -5
  218. data/spec/chewy/query/pagination_spec.rb +0 -39
  219. data/spec/chewy/query_spec.rb +0 -637
  220. data/spec/chewy/search/pagination/will_paginate_examples.rb +0 -63
  221. data/spec/chewy/search/pagination/will_paginate_spec.rb +0 -23
  222. data/spec/chewy/search/parameters/types_spec.rb +0 -5
  223. data/spec/chewy/strategy/resque_spec.rb +0 -46
  224. data/spec/chewy/strategy/shoryuken_spec.rb +0 -66
  225. data/spec/chewy/type/actions_spec.rb +0 -50
  226. data/spec/chewy/type/adapter/mongoid_spec.rb +0 -372
  227. data/spec/chewy/type/adapter/sequel_spec.rb +0 -472
  228. data/spec/chewy/type/import/bulk_builder_spec.rb +0 -279
  229. data/spec/chewy/type/mapping_spec.rb +0 -173
  230. data/spec/chewy/type/observe_spec.rb +0 -137
  231. data/spec/chewy/type/wrapper_spec.rb +0 -98
  232. data/spec/chewy/type_spec.rb +0 -55
  233. data/spec/support/mongoid.rb +0 -93
  234. data/spec/support/sequel.rb +0 -80
@@ -0,0 +1,311 @@
1
+ module Chewy
2
+ class Index
3
+ module Import
4
+ # This class purpose is to build ES client-acceptable bulk
5
+ # request body from the passed objects for index and deletion.
6
+ # It handles parent-child relationships as well by fetching
7
+ # existing documents from ES and database, taking their join field values and
8
+ # using it in the bulk body.
9
+ # If fields are passed - it creates partial update entries except for
10
+ # the cases when the type has parent and parent_id has been changed.
11
+ class BulkBuilder
12
+ # @param index [Chewy::Index] desired index
13
+ # @param to_index [Array<Object>] objects to index
14
+ # @param delete [Array<Object>] objects or ids to delete
15
+ # @param fields [Array<Symbol, String>] and array of fields for documents update
16
+ def initialize(index, to_index: [], delete: [], fields: [])
17
+ @index = index
18
+ @to_index = to_index
19
+ @delete = delete
20
+ @fields = fields.map!(&:to_sym)
21
+ end
22
+
23
+ # Returns ES API-ready bulk requiest body.
24
+ # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
25
+ # @return [Array<Hash>] bulk body
26
+ def bulk_body
27
+ populate_cache
28
+
29
+ @bulk_body ||= @to_index.flat_map(&method(:index_entry)).concat(
30
+ @delete.flat_map(&method(:delete_entry))
31
+ ).uniq
32
+ end
33
+
34
+ # The only purpose of this method is to cache document ids for
35
+ # all the passed object for index to avoid ids recalculation.
36
+ #
37
+ # @return [Hash[String => Object]] an ids-objects index hash
38
+ def index_objects_by_id
39
+ @index_objects_by_id ||= index_object_ids.invert.stringify_keys!
40
+ end
41
+
42
+ private
43
+
44
+ def crutches_for_index
45
+ @crutches_for_index ||= Chewy::Index::Crutch::Crutches.new @index, @to_index
46
+ end
47
+
48
+ def index_entry(object)
49
+ entry = {}
50
+ entry[:_id] = index_object_ids[object] if index_object_ids[object]
51
+ entry[:routing] = routing(object) if join_field?
52
+
53
+ parent = cache(entry[:_id])
54
+ data = data_for(object) if parent.present?
55
+ if parent.present? && parent_changed?(data, parent)
56
+ reindex_entries(object, data) + reindex_descendants(object)
57
+ elsif @fields.present?
58
+ return [] unless entry[:_id]
59
+
60
+ entry[:data] = {doc: data_for(object, fields: @fields)}
61
+ [{update: entry}]
62
+ else
63
+ entry[:data] = data || data_for(object)
64
+ [{index: entry}]
65
+ end
66
+ end
67
+
68
+ def reindex_entries(object, data, root: object)
69
+ entry = {}
70
+ entry[:_id] = index_object_ids[object] || entry_id(object)
71
+ entry[:data] = data
72
+ entry[:routing] = routing(root) || routing(object) if join_field?
73
+ delete = delete_single_entry(object, root: root).first
74
+ index = {index: entry}
75
+ [delete, index]
76
+ end
77
+
78
+ def reindex_descendants(root)
79
+ descendants = load_descendants(root)
80
+ crutches = Chewy::Index::Crutch::Crutches.new @index, [root, *descendants]
81
+ descendants.flat_map do |object|
82
+ reindex_entries(
83
+ object,
84
+ data_for(object, crutches: crutches),
85
+ root: root
86
+ )
87
+ end
88
+ end
89
+
90
+ def delete_entry(object)
91
+ delete_single_entry(object)
92
+ end
93
+
94
+ def delete_single_entry(object, root: object)
95
+ entry = {}
96
+ entry[:_id] = entry_id(object)
97
+ entry[:_id] ||= object.as_json
98
+
99
+ return [] if entry[:_id].blank?
100
+
101
+ if join_field?
102
+ cached_parent = cache(entry[:_id])
103
+ entry_parent_id =
104
+ if cached_parent
105
+ cached_parent[:parent_id]
106
+ else
107
+ find_parent_id(object)
108
+ end
109
+
110
+ entry[:routing] = existing_routing(root.try(:id)) || existing_routing(object.id)
111
+ entry[:parent] = entry_parent_id if entry_parent_id
112
+ end
113
+
114
+ [{delete: entry}]
115
+ end
116
+
117
+ def load_descendants(root)
118
+ root_type = join_field_type(root)
119
+ return [] unless root_type
120
+
121
+ descendant_ids = []
122
+ grouped_parents = {root_type => [root.id]}
123
+ # iteratively fetch all the descendants (with grouped_parents as a queue for next iteration)
124
+ until grouped_parents.empty?
125
+ children_data = grouped_parents.flat_map do |parent_type, parent_ids|
126
+ @index.query(
127
+ has_parent: {
128
+ parent_type: parent_type,
129
+ # ignore_unmapped to avoid error for the leaves of the tree
130
+ # (types without children)
131
+ ignore_unmapped: true,
132
+ query: {ids: {values: parent_ids}}
133
+ }
134
+ ).pluck(:_id, join_field).map { |id, join| [join['name'], id] }
135
+ end
136
+ descendant_ids |= children_data.map(&:last)
137
+
138
+ grouped_parents = {}
139
+ children_data.each do |name, id|
140
+ next unless name
141
+
142
+ grouped_parents[name] ||= []
143
+ grouped_parents[name] << id
144
+ end
145
+ end
146
+ # query the primary database to load the descentants' records
147
+ @index.adapter.load(descendant_ids, _index: @index.base_name, raw_import: @index._default_import_options[:raw_import])
148
+ end
149
+
150
+ def populate_cache
151
+ @cache = load_cache
152
+ end
153
+
154
+ def cache(id)
155
+ @cache[id.to_s]
156
+ end
157
+
158
+ def load_cache
159
+ return {} unless join_field?
160
+
161
+ @index
162
+ .filter(ids: {values: ids_for_cache})
163
+ .order('_doc')
164
+ .pluck(:_id, :_routing, join_field)
165
+ .map do |id, routing, join|
166
+ [
167
+ id,
168
+ {routing: routing, parent_id: join['parent']}
169
+ ]
170
+ end.to_h
171
+ end
172
+
173
+ def existing_routing(id)
174
+ # All objects needed here should be cached in #load_cache,
175
+ # if not, we return nil. In some cases we don't have existing routing cached,
176
+ # e.g. for loaded descendants
177
+ return unless cache(id)
178
+
179
+ cache(id)[:routing]
180
+ end
181
+
182
+ # Two types of ids:
183
+ # * of parents of the objects to be indexed
184
+ # * of objects to be deleted
185
+ def ids_for_cache
186
+ ids = @to_index.flat_map do |object|
187
+ [find_parent_id(object), object.id] if object.respond_to?(:id)
188
+ end
189
+ ids.concat(@delete.map do |object|
190
+ object.id if object.respond_to?(:id)
191
+ end)
192
+ ids.uniq.compact
193
+ end
194
+
195
+ def routing(object)
196
+ # filter out non-model objects, early return on object==nil
197
+ return unless object.respond_to?(:id)
198
+
199
+ parent_id = find_parent_id(object)
200
+ if parent_id
201
+ routing(index_objects_by_id[parent_id.to_s]) || existing_routing(parent_id)
202
+ else
203
+ object.id.to_s
204
+ end
205
+ end
206
+
207
+ def find_parent_id(object)
208
+ return unless object.respond_to?(:id)
209
+
210
+ join = data_for(object, fields: [join_field.to_sym])[join_field]
211
+ join['parent'] if join
212
+ end
213
+
214
+ def join_field
215
+ return @join_field if defined?(@join_field)
216
+
217
+ @join_field = find_join_field
218
+ end
219
+
220
+ def find_join_field
221
+ type_settings = @index.mappings_hash[:mappings]
222
+ return unless type_settings
223
+
224
+ properties = type_settings[:properties]
225
+ join_fields = properties.find { |_, options| options[:type] == :join }
226
+ return unless join_fields
227
+
228
+ join_fields.first.to_s
229
+ end
230
+
231
+ def join_field_type(object)
232
+ return unless join_field?
233
+
234
+ raw_object =
235
+ if @index._default_import_options[:raw_import]
236
+ @index._default_import_options[:raw_import].call(object.attributes)
237
+ else
238
+ object
239
+ end
240
+
241
+ join_field_value = data_for(
242
+ raw_object,
243
+ fields: [join_field.to_sym], # build only the field that is needed
244
+ crutches: Chewy::Index::Crutch::Crutches.new(@index, [raw_object])
245
+ )[join_field]
246
+
247
+ case join_field_value
248
+ when String
249
+ join_field_value
250
+ when Hash
251
+ join_field_value['name']
252
+ end
253
+ end
254
+
255
+ def join_field?
256
+ join_field && !join_field.empty?
257
+ end
258
+
259
+ def data_for(object, fields: [], crutches: crutches_for_index)
260
+ @index.compose(object, crutches, fields: fields)
261
+ end
262
+
263
+ def parent_changed?(data, old_parent)
264
+ return false unless old_parent
265
+ return false unless join_field?
266
+ return false unless @fields.include?(join_field.to_sym)
267
+ return false unless data.key?(join_field)
268
+
269
+ # The join field value can be a hash, e.g.:
270
+ # {"name": "child", "parent": "123"} for a child
271
+ # {"name": "parent"} for a parent
272
+ # but it can also be a string: (e.g. "parent") for a parent:
273
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/parent-join.html#parent-join
274
+ new_join_field_value = data[join_field]
275
+ if new_join_field_value.is_a? Hash
276
+ # If we have a hash in the join field,
277
+ # we're taking the `parent` field that holds the parent id.
278
+ new_parent_id = new_join_field_value['parent']
279
+ new_parent_id != old_parent[:parent_id]
280
+ else
281
+ # If there is a non-hash value (String or nil), it means that the join field is changed
282
+ # and the current object is no longer a child.
283
+ true
284
+ end
285
+ end
286
+
287
+ def entry_id(object)
288
+ if type_root.id
289
+ type_root.compose_id(object)
290
+ else
291
+ id = object.id if object.respond_to?(:id)
292
+ id ||= object[:id] || object['id'] if object.is_a?(Hash)
293
+ id = id.to_s if defined?(BSON) && id.is_a?(BSON::ObjectId)
294
+ id
295
+ end
296
+ end
297
+
298
+ def index_object_ids
299
+ @index_object_ids ||= @to_index.each_with_object({}) do |object, result|
300
+ id = entry_id(object)
301
+ result[object] = id if id.present?
302
+ end
303
+ end
304
+
305
+ def type_root
306
+ @type_root ||= @index.root
307
+ end
308
+ end
309
+ end
310
+ end
311
+ end
@@ -1,5 +1,5 @@
1
1
  module Chewy
2
- class Type
2
+ class Index
3
3
  module Import
4
4
  # Adds additional features to elasticsearch-api bulk method:
5
5
  # * supports Chewy index suffix if necessary;
@@ -10,12 +10,12 @@ module Chewy
10
10
  #
11
11
  # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
12
12
  class BulkRequest
13
- # @param type [Chewy::Type] a type for the request
13
+ # @param index [Chewy::Index] an index for the request
14
14
  # @param suffix [String] an index name optional suffix
15
15
  # @param bulk_size [Integer] bulk size in bytes
16
16
  # @param bulk_options [Hash] options passed to the elasticsearch-api bulk method
17
- def initialize(type, suffix: nil, bulk_size: nil, **bulk_options)
18
- @type = type
17
+ def initialize(index, suffix: nil, bulk_size: nil, **bulk_options)
18
+ @index = index
19
19
  @suffix = suffix
20
20
  @bulk_size = bulk_size - 1.kilobyte if bulk_size # 1 kilobyte for request header and newlines
21
21
  @bulk_options = bulk_options
@@ -33,7 +33,7 @@ module Chewy
33
33
  return [] if body.blank?
34
34
 
35
35
  request_bodies(body).each_with_object([]) do |request_body, results|
36
- response = @type.client.bulk request_base.merge(body: request_body) if request_body.present?
36
+ response = @index.client.bulk(**request_base.merge(body: request_body)) if request_body.present?
37
37
 
38
38
  next unless response.try(:[], 'errors')
39
39
 
@@ -47,8 +47,7 @@ module Chewy
47
47
 
48
48
  def request_base
49
49
  @request_base ||= {
50
- index: @type.index_name(suffix: @suffix),
51
- type: @type.type_name
50
+ index: @index.index_name(suffix: @suffix)
52
51
  }.merge!(@bulk_options)
53
52
  end
54
53
 
@@ -1,24 +1,23 @@
1
1
  module Chewy
2
- class Type
2
+ class Index
3
3
  module Import
4
4
  class JournalBuilder
5
- def initialize(type, index: [], delete: [])
6
- @type = type
5
+ def initialize(index, to_index: [], delete: [])
7
6
  @index = index
7
+ @to_index = to_index
8
8
  @delete = delete
9
9
  end
10
10
 
11
11
  def bulk_body
12
- Chewy::Type::Import::BulkBuilder.new(
13
- Chewy::Stash::Journal::Journal,
14
- index: [
15
- entries(:index, @index),
12
+ Chewy::Index::Import::BulkBuilder.new(
13
+ Chewy::Stash::Journal,
14
+ to_index: [
15
+ entries(:index, @to_index),
16
16
  entries(:delete, @delete)
17
17
  ].compact
18
18
  ).bulk_body.each do |item|
19
19
  item.values.first.merge!(
20
- _index: Chewy::Stash::Journal.index_name,
21
- _type: Chewy::Stash::Journal::Journal.type_name
20
+ _index: Chewy::Stash::Journal.index_name
22
21
  )
23
22
  end
24
23
  end
@@ -27,9 +26,9 @@ module Chewy
27
26
 
28
27
  def entries(action, objects)
29
28
  return unless objects.present?
29
+
30
30
  {
31
- index_name: @type.index.derivable_name,
32
- type_name: @type.type_name,
31
+ index_name: @index.derivable_name,
33
32
  action: action,
34
33
  references: identify(objects).map { |item| Base64.encode64(::Elasticsearch::API.serializer.dump(item)) },
35
34
  created_at: Time.now.utc
@@ -37,7 +36,7 @@ module Chewy
37
36
  end
38
37
 
39
38
  def identify(objects)
40
- @type.adapter.identify(objects)
39
+ @index.adapter.identify(objects)
41
40
  end
42
41
  end
43
42
  end
@@ -1,5 +1,5 @@
1
1
  module Chewy
2
- class Type
2
+ class Index
3
3
  module Import
4
4
  # This class performs the import routine for the options and objects given.
5
5
  #
@@ -20,7 +20,7 @@ module Chewy
20
20
  # when the document doesn't exist only if `update_failover` option is true. In order to
21
21
  # restore, it indexes such an objects completely on the next iteration.
22
22
  #
23
- # @see Chewy::Type::Import::ClassMethods#import
23
+ # @see Chewy::Index::Import::ClassMethods#import
24
24
  class Routine
25
25
  BULK_OPTIONS = %i[
26
26
  suffix bulk_size
@@ -33,18 +33,18 @@ module Chewy
33
33
  refresh: true,
34
34
  update_fields: [],
35
35
  update_failover: true,
36
- batch_size: Chewy::Type::Adapter::Base::BATCH_SIZE
36
+ batch_size: Chewy::Index::Adapter::Base::BATCH_SIZE
37
37
  }.freeze
38
38
 
39
39
  attr_reader :options, :parallel_options, :errors, :stats, :leftovers
40
40
 
41
41
  # Basically, processes passed options, extracting bulk request specific options.
42
- # @param type [Chewy::Type] chewy type
43
- # @param options [Hash] import options, see {Chewy::Type::Import::ClassMethods#import}
44
- def initialize(type, **options)
45
- @type = type
42
+ # @param index [Chewy::Index] chewy index
43
+ # @param options [Hash] import options, see {Chewy::Index::Import::ClassMethods#import}
44
+ def initialize(index, **options)
45
+ @index = index
46
46
  @options = options
47
- @options.reverse_merge!(@type._default_import_options)
47
+ @options.reverse_merge!(@index._default_import_options)
48
48
  @options.reverse_merge!(journal: Chewy.configuration[:journal])
49
49
  @options.reverse_merge!(DEFAULT_OPTIONS)
50
50
  @bulk_options = @options.slice(*BULK_OPTIONS)
@@ -61,27 +61,28 @@ module Chewy
61
61
  @leftovers = []
62
62
  end
63
63
 
64
- # Creates the journal index and the type corresponding index if necessary.
64
+ # Creates the journal index and the corresponding index if necessary.
65
65
  # @return [Object] whatever
66
66
  def create_indexes!
67
67
  Chewy::Stash::Journal.create if @options[:journal]
68
68
  return if Chewy.configuration[:skip_index_creation_on_import]
69
- @type.index.create!(@bulk_options.slice(:suffix)) unless @type.index.exists?
69
+
70
+ @index.create!(**@bulk_options.slice(:suffix)) unless @index.exists?
70
71
  end
71
72
 
72
- # The main process method. Converts passed objects to thr bulk request body,
73
- # appends journal entires, performs this request and handles errors performing
73
+ # The main process method. Converts passed objects to the bulk request body,
74
+ # appends journal entries, performs this request and handles errors performing
74
75
  # failover procedures if applicable.
75
76
  #
76
77
  # @param index [Array<Object>] any acceptable objects for indexing
77
78
  # @param delete [Array<Object>] any acceptable objects for deleting
78
79
  # @return [true, false] the result of the request, true if no errors
79
80
  def process(index: [], delete: [])
80
- bulk_builder = BulkBuilder.new(@type, index: index, delete: delete, fields: @options[:update_fields])
81
+ bulk_builder = BulkBuilder.new(@index, to_index: index, delete: delete, fields: @options[:update_fields])
81
82
  bulk_body = bulk_builder.bulk_body
82
83
 
83
84
  if @options[:journal]
84
- journal_builder = JournalBuilder.new(@type, index: index, delete: delete)
85
+ journal_builder = JournalBuilder.new(@index, to_index: index, delete: delete)
85
86
  bulk_body.concat(journal_builder.bulk_body)
86
87
  end
87
88
 
@@ -126,11 +127,11 @@ module Chewy
126
127
  errors_to_cleanup.each { |error| errors.delete(error) }
127
128
 
128
129
  failed_objects = index_objects_by_id.values_at(*failed_ids_for_reimport)
129
- BulkBuilder.new(@type, index: failed_objects).bulk_body
130
+ BulkBuilder.new(@index, to_index: failed_objects).bulk_body
130
131
  end
131
132
 
132
133
  def bulk
133
- @bulk ||= BulkRequest.new(@type, **@bulk_options)
134
+ @bulk ||= BulkRequest.new(@index, **@bulk_options)
134
135
  end
135
136
  end
136
137
  end
@@ -1,43 +1,42 @@
1
- require 'chewy/type/import/journal_builder'
2
- require 'chewy/type/import/bulk_builder'
3
- require 'chewy/type/import/bulk_request'
4
- require 'chewy/type/import/routine'
1
+ require 'chewy/index/import/journal_builder'
2
+ require 'chewy/index/import/bulk_builder'
3
+ require 'chewy/index/import/bulk_request'
4
+ require 'chewy/index/import/routine'
5
5
 
6
6
  module Chewy
7
- class Type
7
+ class Index
8
8
  module Import
9
9
  extend ActiveSupport::Concern
10
10
 
11
- IMPORT_WORKER = lambda do |type, options, total, ids, index|
12
- ::Process.setproctitle("chewy [#{type}]: import data (#{index + 1}/#{total})")
13
- routine = Routine.new(type, options)
14
- type.adapter.import(*ids, routine.options) do |action_objects|
11
+ IMPORT_WORKER = lambda do |index, options, total, ids, iteration|
12
+ ::Process.setproctitle("chewy [#{index}]: import data (#{iteration + 1}/#{total})")
13
+ routine = Routine.new(index, **options)
14
+ index.adapter.import(*ids, routine.options) do |action_objects|
15
15
  routine.process(**action_objects)
16
16
  end
17
17
  {errors: routine.errors, import: routine.stats, leftovers: routine.leftovers}
18
18
  end
19
19
 
20
- LEFTOVERS_WORKER = lambda do |type, options, total, body, index|
21
- ::Process.setproctitle("chewy [#{type}]: import leftovers (#{index + 1}/#{total})")
22
- routine = Routine.new(type, options)
20
+ LEFTOVERS_WORKER = lambda do |index, options, total, body, iteration|
21
+ ::Process.setproctitle("chewy [#{index}]: import leftovers (#{iteration + 1}/#{total})")
22
+ routine = Routine.new(index, **options)
23
23
  routine.perform_bulk(body)
24
24
  routine.errors
25
25
  end
26
26
 
27
27
  module ClassMethods
28
28
  # @!method import(*collection, **options)
29
- # Basically, one of the main methods for type. Performs any objects import
30
- # to the index for a specified type. Does all the objects handling routines.
29
+ # Basically, one of the main methods for an index. Performs any objects import
30
+ # to the index. Does all the objects handling routines.
31
31
  # Performs document import by utilizing bulk API. Bulk size and objects batch
32
32
  # size are controlled by the corresponding options.
33
33
  #
34
34
  # It accepts ORM/ODM objects, PORO, hashes, ids which are used by adapter to
35
- # fetch objects from the source depenting on the used adapter. It destroys
36
- # passed objects from the index if they are not in the default type scope
35
+ # fetch objects from the source depending on the used adapter. It destroys
36
+ # passed objects from the index if they are not in the default scope
37
37
  # or marked for destruction.
38
38
  #
39
- # It handles parent-child relationships: if the object parent_id has been
40
- # changed it destroys the object and recreates it from scratch.
39
+ # It handles parent-child relationships with a join field reindexing children when the parent is reindexed.
41
40
  #
42
41
  # Performs journaling if enabled: it stores all the ids of the imported
43
42
  # objects to a specialized index. It is possible to replay particular import
@@ -51,15 +50,15 @@ module Chewy
51
50
  #
52
51
  # Utilizes `ActiveSupport::Notifications`, so it is possible to get imported
53
52
  # objects later by listening to the `import_objects.chewy` queue. It is also
54
- # possible to get the list of occured errors from the payload if something
53
+ # possible to get the list of occurred errors from the payload if something
55
54
  # went wrong.
56
55
  #
57
56
  # Import can also be run in parallel using the Parallel gem functionality.
58
57
  #
59
58
  # @example
60
- # UsersIndex::User.import(parallel: true) # imports everything in parallel with automatic workers number
61
- # UsersIndex::User.import(parallel: 3) # using 3 workers
62
- # UsersIndex::User.import(parallel: {in_threads: 10}) # in 10 threads
59
+ # UsersIndex.import(parallel: true) # imports everything in parallel with automatic workers number
60
+ # UsersIndex.import(parallel: 3) # using 3 workers
61
+ # UsersIndex.import(parallel: {in_threads: 10}) # in 10 threads
63
62
  #
64
63
  # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
65
64
  # @param collection [Array<Object>] and array or anything to import
@@ -67,12 +66,13 @@ module Chewy
67
66
  # @option options [String] suffix an index name suffix, used for zero-downtime reset mostly, no suffix by default
68
67
  # @option options [Integer] bulk_size bulk API chunk size in bytes; if passed, the request is performed several times for each chunk, empty by default
69
68
  # @option options [Integer] batch_size passed to the adapter import method, used to split imported objects in chunks, 1000 by default
69
+ # @option options [Boolean] direct_import skips object reloading in ORM adapter, `false` by default
70
70
  # @option options [true, false] journal enables imported objects journaling, false by default
71
71
  # @option options [Array<Symbol, String>] update_fields list of fields for the partial import, empty by default
72
72
  # @option options [true, false] update_failover enables full objects reimport in cases of partial update errors, `true` by default
73
73
  # @option options [true, Integer, Hash] parallel enables parallel import processing with the Parallel gem, accepts the number of workers or any Parallel gem acceptable options
74
74
  # @return [true, false] false in case of errors
75
- def import(*args)
75
+ ruby2_keywords def import(*args)
76
76
  import_routine(*args).blank?
77
77
  end
78
78
 
@@ -83,9 +83,10 @@ module Chewy
83
83
  # in case of any import errors.
84
84
  #
85
85
  # @raise [Chewy::ImportFailed] in case of errors
86
- def import!(*args)
86
+ ruby2_keywords def import!(*args)
87
87
  errors = import_routine(*args)
88
88
  raise Chewy::ImportFailed.new(self, errors) if errors.present?
89
+
89
90
  true
90
91
  end
91
92
 
@@ -93,7 +94,7 @@ module Chewy
93
94
  # `bulk_size` and `suffix`.
94
95
  #
95
96
  # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
96
- # @see Chewy::Type::Import::Bulk
97
+ # @see Chewy::Index::Import::BulkRequest
97
98
  # @param options [Hash{Symbol => Object}] besides specific import options, it accepts all the options suitable for the bulk API call like `refresh` or `timeout`
98
99
  # @option options [String] suffix bulk API chunk size in bytes; if passed, the request is performed several times for each chunk, empty by default
99
100
  # @option options [Integer] bulk_size bulk API chunk size in bytes; if passed, the request is performed several times for each chunk, empty by default
@@ -110,11 +111,11 @@ module Chewy
110
111
  # or normal composing under the hood.
111
112
  #
112
113
  # @param object [Object] a data source object
113
- # @param crutches [Object] optional crutches object; if ommited - a crutch for the single passed object is created as a fallback
114
+ # @param crutches [Object] optional crutches object; if omitted - a crutch for the single passed object is created as a fallback
114
115
  # @param fields [Array<Symbol>] and array of fields to restrict the generated document
115
116
  # @return [Hash] a JSON-ready hash
116
117
  def compose(object, crutches = nil, fields: [])
117
- crutches ||= Chewy::Type::Crutch::Crutches.new self, [object]
118
+ crutches ||= Chewy::Index::Crutch::Crutches.new self, [object]
118
119
 
119
120
  if witchcraft? && root.children.present?
120
121
  cauldron(fields: fields).brew(object, crutches)
@@ -126,8 +127,9 @@ module Chewy
126
127
  private
127
128
 
128
129
  def import_routine(*args)
129
- return if args.first.blank? && !args.first.nil?
130
- routine = Routine.new(self, args.extract_options!)
130
+ return if !args.first.nil? && empty_objects_or_scope?(args.first)
131
+
132
+ routine = Routine.new(self, **args.extract_options!)
131
133
  routine.create_indexes!
132
134
 
133
135
  if routine.parallel_options
@@ -137,8 +139,16 @@ module Chewy
137
139
  end
138
140
  end
139
141
 
142
+ def empty_objects_or_scope?(objects_or_scope)
143
+ if objects_or_scope.respond_to?(:empty?)
144
+ objects_or_scope.empty?
145
+ else
146
+ objects_or_scope.blank?
147
+ end
148
+ end
149
+
140
150
  def import_linear(objects, routine)
141
- ActiveSupport::Notifications.instrument 'import_objects.chewy', type: self do |payload|
151
+ ActiveSupport::Notifications.instrument 'import_objects.chewy', index: self do |payload|
142
152
  adapter.import(*objects, routine.options) do |action_objects|
143
153
  routine.process(**action_objects)
144
154
  end
@@ -152,17 +162,25 @@ module Chewy
152
162
  def import_parallel(objects, routine)
153
163
  raise "The `parallel` gem is required for parallel import, please add `gem 'parallel'` to your Gemfile" unless '::Parallel'.safe_constantize
154
164
 
155
- ActiveSupport::Notifications.instrument 'import_objects.chewy', type: self do |payload|
165
+ ActiveSupport::Notifications.instrument 'import_objects.chewy', index: self do |payload|
156
166
  batches = adapter.import_references(*objects, routine.options.slice(:batch_size)).to_a
157
167
 
158
168
  ::ActiveRecord::Base.connection.close if defined?(::ActiveRecord::Base)
159
- results = ::Parallel.map_with_index(batches, routine.parallel_options, &IMPORT_WORKER.curry[self, routine.options, batches.size])
169
+ results = ::Parallel.map_with_index(
170
+ batches,
171
+ routine.parallel_options,
172
+ &IMPORT_WORKER.curry[self, routine.options, batches.size]
173
+ )
160
174
  ::ActiveRecord::Base.connection.reconnect! if defined?(::ActiveRecord::Base)
161
175
  errors, import, leftovers = process_parallel_import_results(results)
162
176
 
163
177
  if leftovers.present?
164
178
  batches = leftovers.each_slice(routine.options[:batch_size])
165
- results = ::Parallel.map_with_index(batches, routine.parallel_options, &LEFTOVERS_WORKER.curry[self, routine.options, batches.size])
179
+ results = ::Parallel.map_with_index(
180
+ batches,
181
+ routine.parallel_options,
182
+ &LEFTOVERS_WORKER.curry[self, routine.options, batches.size]
183
+ )
166
184
  errors.concat(results.flatten(1))
167
185
  end
168
186