chewy 0.9.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (265) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +24 -2
  4. data/.rubocop_todo.yml +2 -2
  5. data/.travis.yml +38 -21
  6. data/.yardopts +5 -0
  7. data/Appraisals +55 -27
  8. data/CHANGELOG.md +57 -12
  9. data/Gemfile +14 -10
  10. data/LEGACY_DSL.md +497 -0
  11. data/README.md +249 -515
  12. data/chewy.gemspec +5 -4
  13. data/gemfiles/rails.4.0.activerecord.gemfile +14 -0
  14. data/gemfiles/rails.4.1.activerecord.gemfile +14 -0
  15. data/gemfiles/rails.4.2.activerecord.gemfile +8 -10
  16. data/gemfiles/rails.4.2.mongoid.5.1.gemfile +9 -10
  17. data/gemfiles/rails.5.0.activerecord.gemfile +8 -10
  18. data/gemfiles/rails.5.0.mongoid.6.0.gemfile +15 -0
  19. data/gemfiles/rails.5.1.activerecord.gemfile +15 -0
  20. data/gemfiles/rails.5.1.mongoid.6.1.gemfile +15 -0
  21. data/gemfiles/sequel.4.45.gemfile +11 -0
  22. data/lib/chewy.rb +77 -43
  23. data/lib/chewy/config.rb +44 -7
  24. data/lib/chewy/errors.rb +2 -2
  25. data/lib/chewy/fields/base.rb +39 -32
  26. data/lib/chewy/fields/root.rb +33 -7
  27. data/lib/chewy/index.rb +237 -149
  28. data/lib/chewy/index/actions.rb +85 -28
  29. data/lib/chewy/index/aliases.rb +2 -1
  30. data/lib/chewy/index/settings.rb +9 -5
  31. data/lib/chewy/index/specification.rb +58 -0
  32. data/lib/chewy/journal.rb +40 -92
  33. data/lib/chewy/query.rb +43 -27
  34. data/lib/chewy/query/compose.rb +13 -13
  35. data/lib/chewy/query/criteria.rb +13 -13
  36. data/lib/chewy/query/filters.rb +1 -1
  37. data/lib/chewy/query/loading.rb +1 -1
  38. data/lib/chewy/query/nodes/and.rb +2 -2
  39. data/lib/chewy/query/nodes/bool.rb +1 -1
  40. data/lib/chewy/query/nodes/equal.rb +2 -2
  41. data/lib/chewy/query/nodes/exists.rb +1 -1
  42. data/lib/chewy/query/nodes/has_relation.rb +2 -2
  43. data/lib/chewy/query/nodes/match_all.rb +1 -1
  44. data/lib/chewy/query/nodes/missing.rb +1 -1
  45. data/lib/chewy/query/nodes/not.rb +2 -2
  46. data/lib/chewy/query/nodes/or.rb +2 -2
  47. data/lib/chewy/query/nodes/prefix.rb +1 -1
  48. data/lib/chewy/query/nodes/query.rb +2 -2
  49. data/lib/chewy/query/nodes/range.rb +4 -4
  50. data/lib/chewy/query/nodes/regexp.rb +4 -4
  51. data/lib/chewy/query/nodes/script.rb +3 -3
  52. data/lib/chewy/query/pagination.rb +10 -1
  53. data/lib/chewy/railtie.rb +1 -0
  54. data/lib/chewy/rake_helper.rb +265 -48
  55. data/lib/chewy/rspec/update_index.rb +30 -22
  56. data/lib/chewy/search.rb +78 -21
  57. data/lib/chewy/search/loader.rb +83 -0
  58. data/lib/chewy/{query → search}/pagination/kaminari.rb +13 -5
  59. data/lib/chewy/search/pagination/will_paginate.rb +41 -0
  60. data/lib/chewy/search/parameters.rb +150 -0
  61. data/lib/chewy/search/parameters/aggs.rb +16 -0
  62. data/lib/chewy/search/parameters/concerns/bool_storage.rb +24 -0
  63. data/lib/chewy/search/parameters/concerns/hash_storage.rb +23 -0
  64. data/lib/chewy/search/parameters/concerns/integer_storage.rb +14 -0
  65. data/lib/chewy/search/parameters/concerns/query_storage.rb +237 -0
  66. data/lib/chewy/search/parameters/concerns/string_array_storage.rb +23 -0
  67. data/lib/chewy/search/parameters/concerns/string_storage.rb +14 -0
  68. data/lib/chewy/search/parameters/docvalue_fields.rb +12 -0
  69. data/lib/chewy/search/parameters/explain.rb +16 -0
  70. data/lib/chewy/search/parameters/filter.rb +47 -0
  71. data/lib/chewy/search/parameters/highlight.rb +16 -0
  72. data/lib/chewy/search/parameters/indices_boost.rb +52 -0
  73. data/lib/chewy/search/parameters/limit.rb +17 -0
  74. data/lib/chewy/search/parameters/load.rb +32 -0
  75. data/lib/chewy/search/parameters/min_score.rb +16 -0
  76. data/lib/chewy/search/parameters/none.rb +27 -0
  77. data/lib/chewy/search/parameters/offset.rb +17 -0
  78. data/lib/chewy/search/parameters/order.rb +64 -0
  79. data/lib/chewy/search/parameters/post_filter.rb +19 -0
  80. data/lib/chewy/search/parameters/preference.rb +16 -0
  81. data/lib/chewy/search/parameters/profile.rb +16 -0
  82. data/lib/chewy/search/parameters/query.rb +19 -0
  83. data/lib/chewy/search/parameters/request_cache.rb +27 -0
  84. data/lib/chewy/search/parameters/rescore.rb +29 -0
  85. data/lib/chewy/search/parameters/script_fields.rb +16 -0
  86. data/lib/chewy/search/parameters/search_after.rb +20 -0
  87. data/lib/chewy/search/parameters/search_type.rb +16 -0
  88. data/lib/chewy/search/parameters/source.rb +73 -0
  89. data/lib/chewy/search/parameters/storage.rb +95 -0
  90. data/lib/chewy/search/parameters/stored_fields.rb +63 -0
  91. data/lib/chewy/search/parameters/suggest.rb +16 -0
  92. data/lib/chewy/search/parameters/terminate_after.rb +16 -0
  93. data/lib/chewy/search/parameters/timeout.rb +16 -0
  94. data/lib/chewy/search/parameters/track_scores.rb +16 -0
  95. data/lib/chewy/search/parameters/types.rb +20 -0
  96. data/lib/chewy/search/parameters/version.rb +16 -0
  97. data/lib/chewy/search/query_proxy.rb +257 -0
  98. data/lib/chewy/search/request.rb +1021 -0
  99. data/lib/chewy/search/response.rb +119 -0
  100. data/lib/chewy/search/scoping.rb +50 -0
  101. data/lib/chewy/search/scrolling.rb +136 -0
  102. data/lib/chewy/stash.rb +70 -0
  103. data/lib/chewy/strategy.rb +10 -3
  104. data/lib/chewy/strategy/active_job.rb +1 -0
  105. data/lib/chewy/strategy/atomic.rb +1 -3
  106. data/lib/chewy/strategy/bypass.rb +1 -1
  107. data/lib/chewy/strategy/resque.rb +1 -0
  108. data/lib/chewy/strategy/shoryuken.rb +40 -0
  109. data/lib/chewy/strategy/sidekiq.rb +13 -3
  110. data/lib/chewy/type.rb +29 -7
  111. data/lib/chewy/type/actions.rb +26 -2
  112. data/lib/chewy/type/adapter/active_record.rb +44 -29
  113. data/lib/chewy/type/adapter/base.rb +27 -7
  114. data/lib/chewy/type/adapter/mongoid.rb +18 -7
  115. data/lib/chewy/type/adapter/object.rb +187 -26
  116. data/lib/chewy/type/adapter/orm.rb +59 -32
  117. data/lib/chewy/type/adapter/sequel.rb +32 -16
  118. data/lib/chewy/type/import.rb +145 -191
  119. data/lib/chewy/type/import/bulk_builder.rb +122 -0
  120. data/lib/chewy/type/import/bulk_request.rb +76 -0
  121. data/lib/chewy/type/import/journal_builder.rb +45 -0
  122. data/lib/chewy/type/import/routine.rb +138 -0
  123. data/lib/chewy/type/mapping.rb +11 -1
  124. data/lib/chewy/type/observe.rb +1 -1
  125. data/lib/chewy/type/syncer.rb +220 -0
  126. data/lib/chewy/type/witchcraft.rb +27 -13
  127. data/lib/chewy/type/wrapper.rb +28 -2
  128. data/lib/chewy/version.rb +1 -1
  129. data/lib/tasks/chewy.rake +84 -26
  130. data/spec/chewy/config_spec.rb +82 -1
  131. data/spec/chewy/fields/base_spec.rb +147 -112
  132. data/spec/chewy/fields/root_spec.rb +75 -18
  133. data/spec/chewy/fields/time_fields_spec.rb +2 -3
  134. data/spec/chewy/index/actions_spec.rb +180 -50
  135. data/spec/chewy/index/aliases_spec.rb +2 -2
  136. data/spec/chewy/index/settings_spec.rb +67 -38
  137. data/spec/chewy/index/specification_spec.rb +160 -0
  138. data/spec/chewy/index_spec.rb +57 -66
  139. data/spec/chewy/journal_spec.rb +149 -54
  140. data/spec/chewy/minitest/helpers_spec.rb +4 -4
  141. data/spec/chewy/minitest/search_index_receiver_spec.rb +1 -1
  142. data/spec/chewy/query/criteria_spec.rb +179 -179
  143. data/spec/chewy/query/filters_spec.rb +15 -15
  144. data/spec/chewy/query/loading_spec.rb +22 -20
  145. data/spec/chewy/query/nodes/and_spec.rb +2 -2
  146. data/spec/chewy/query/nodes/bool_spec.rb +4 -4
  147. data/spec/chewy/query/nodes/equal_spec.rb +19 -19
  148. data/spec/chewy/query/nodes/exists_spec.rb +6 -6
  149. data/spec/chewy/query/nodes/has_child_spec.rb +19 -19
  150. data/spec/chewy/query/nodes/has_parent_spec.rb +19 -19
  151. data/spec/chewy/query/nodes/missing_spec.rb +5 -5
  152. data/spec/chewy/query/nodes/not_spec.rb +3 -2
  153. data/spec/chewy/query/nodes/or_spec.rb +2 -2
  154. data/spec/chewy/query/nodes/prefix_spec.rb +5 -5
  155. data/spec/chewy/query/nodes/query_spec.rb +2 -2
  156. data/spec/chewy/query/nodes/range_spec.rb +18 -18
  157. data/spec/chewy/query/nodes/raw_spec.rb +1 -1
  158. data/spec/chewy/query/nodes/regexp_spec.rb +14 -14
  159. data/spec/chewy/query/nodes/script_spec.rb +4 -4
  160. data/spec/chewy/query/pagination/kaminari_spec.rb +3 -55
  161. data/spec/chewy/query/pagination/will_paginate_spec.rb +5 -0
  162. data/spec/chewy/query/pagination_spec.rb +25 -21
  163. data/spec/chewy/query_spec.rb +501 -560
  164. data/spec/chewy/rake_helper_spec.rb +368 -0
  165. data/spec/chewy/repository_spec.rb +4 -4
  166. data/spec/chewy/rspec/update_index_spec.rb +89 -56
  167. data/spec/chewy/runtime_spec.rb +2 -2
  168. data/spec/chewy/search/loader_spec.rb +117 -0
  169. data/spec/chewy/search/pagination/kaminari_examples.rb +71 -0
  170. data/spec/chewy/search/pagination/kaminari_spec.rb +17 -0
  171. data/spec/chewy/search/pagination/will_paginate_examples.rb +63 -0
  172. data/spec/chewy/search/pagination/will_paginate_spec.rb +17 -0
  173. data/spec/chewy/search/parameters/aggs_spec.rb +5 -0
  174. data/spec/chewy/search/parameters/bool_storage_examples.rb +53 -0
  175. data/spec/chewy/search/parameters/docvalue_fields_spec.rb +5 -0
  176. data/spec/chewy/search/parameters/explain_spec.rb +5 -0
  177. data/spec/chewy/search/parameters/filter_spec.rb +5 -0
  178. data/spec/chewy/search/parameters/hash_storage_examples.rb +59 -0
  179. data/spec/chewy/search/parameters/highlight_spec.rb +5 -0
  180. data/spec/chewy/search/parameters/indices_boost_spec.rb +83 -0
  181. data/spec/chewy/search/parameters/integer_storage_examples.rb +32 -0
  182. data/spec/chewy/search/parameters/limit_spec.rb +5 -0
  183. data/spec/chewy/search/parameters/load_spec.rb +60 -0
  184. data/spec/chewy/search/parameters/min_score_spec.rb +32 -0
  185. data/spec/chewy/search/parameters/none_spec.rb +5 -0
  186. data/spec/chewy/search/parameters/offset_spec.rb +5 -0
  187. data/spec/chewy/search/parameters/order_spec.rb +65 -0
  188. data/spec/chewy/search/parameters/post_filter_spec.rb +5 -0
  189. data/spec/chewy/search/parameters/preference_spec.rb +5 -0
  190. data/spec/chewy/search/parameters/profile_spec.rb +5 -0
  191. data/spec/chewy/search/parameters/query_spec.rb +5 -0
  192. data/spec/chewy/search/parameters/query_storage_examples.rb +388 -0
  193. data/spec/chewy/search/parameters/request_cache_spec.rb +67 -0
  194. data/spec/chewy/search/parameters/rescore_spec.rb +62 -0
  195. data/spec/chewy/search/parameters/script_fields_spec.rb +5 -0
  196. data/spec/chewy/search/parameters/search_after_spec.rb +32 -0
  197. data/spec/chewy/search/parameters/search_type_spec.rb +5 -0
  198. data/spec/chewy/search/parameters/source_spec.rb +156 -0
  199. data/spec/chewy/search/parameters/storage_spec.rb +60 -0
  200. data/spec/chewy/search/parameters/stored_fields_spec.rb +126 -0
  201. data/spec/chewy/search/parameters/string_array_storage_examples.rb +63 -0
  202. data/spec/chewy/search/parameters/string_storage_examples.rb +32 -0
  203. data/spec/chewy/search/parameters/suggest_spec.rb +5 -0
  204. data/spec/chewy/search/parameters/terminate_after_spec.rb +5 -0
  205. data/spec/chewy/search/parameters/timeout_spec.rb +5 -0
  206. data/spec/chewy/search/parameters/track_scores_spec.rb +5 -0
  207. data/spec/chewy/search/parameters/types_spec.rb +5 -0
  208. data/spec/chewy/search/parameters/version_spec.rb +5 -0
  209. data/spec/chewy/search/parameters_spec.rb +130 -0
  210. data/spec/chewy/search/query_proxy_spec.rb +68 -0
  211. data/spec/chewy/search/request_spec.rb +669 -0
  212. data/spec/chewy/search/response_spec.rb +192 -0
  213. data/spec/chewy/search/scrolling_spec.rb +169 -0
  214. data/spec/chewy/search_spec.rb +13 -6
  215. data/spec/chewy/stash_spec.rb +95 -0
  216. data/spec/chewy/strategy/active_job_spec.rb +6 -0
  217. data/spec/chewy/strategy/resque_spec.rb +6 -0
  218. data/spec/chewy/strategy/shoryuken_spec.rb +64 -0
  219. data/spec/chewy/strategy/sidekiq_spec.rb +8 -0
  220. data/spec/chewy/strategy_spec.rb +6 -6
  221. data/spec/chewy/type/actions_spec.rb +29 -10
  222. data/spec/chewy/type/adapter/active_record_spec.rb +203 -91
  223. data/spec/chewy/type/adapter/mongoid_spec.rb +112 -54
  224. data/spec/chewy/type/adapter/object_spec.rb +101 -28
  225. data/spec/chewy/type/adapter/sequel_spec.rb +149 -82
  226. data/spec/chewy/type/import/bulk_builder_spec.rb +279 -0
  227. data/spec/chewy/type/import/bulk_request_spec.rb +102 -0
  228. data/spec/chewy/type/import/journal_builder_spec.rb +95 -0
  229. data/spec/chewy/type/import/routine_spec.rb +110 -0
  230. data/spec/chewy/type/import_spec.rb +350 -271
  231. data/spec/chewy/type/mapping_spec.rb +54 -18
  232. data/spec/chewy/type/observe_spec.rb +5 -1
  233. data/spec/chewy/type/syncer_spec.rb +123 -0
  234. data/spec/chewy/type/witchcraft_spec.rb +45 -29
  235. data/spec/chewy/type/wrapper_spec.rb +63 -23
  236. data/spec/chewy/type_spec.rb +28 -7
  237. data/spec/chewy_spec.rb +75 -7
  238. data/spec/spec_helper.rb +5 -2
  239. data/spec/support/active_record.rb +5 -1
  240. data/spec/support/class_helpers.rb +0 -14
  241. data/spec/support/mongoid.rb +15 -3
  242. data/spec/support/sequel.rb +6 -1
  243. metadata +198 -37
  244. data/gemfiles/rails.3.2.activerecord.gemfile +0 -16
  245. data/gemfiles/rails.3.2.activerecord.kaminari.gemfile +0 -15
  246. data/gemfiles/rails.3.2.activerecord.will_paginate.gemfile +0 -15
  247. data/gemfiles/rails.4.2.activerecord.kaminari.gemfile +0 -16
  248. data/gemfiles/rails.4.2.activerecord.will_paginate.gemfile +0 -16
  249. data/gemfiles/rails.4.2.mongoid.4.0.gemfile +0 -16
  250. data/gemfiles/rails.4.2.mongoid.4.0.kaminari.gemfile +0 -15
  251. data/gemfiles/rails.4.2.mongoid.4.0.will_paginate.gemfile +0 -15
  252. data/gemfiles/rails.4.2.mongoid.5.1.kaminari.gemfile +0 -15
  253. data/gemfiles/rails.4.2.mongoid.5.1.will_paginate.gemfile +0 -15
  254. data/gemfiles/rails.5.0.activerecord.kaminari.gemfile +0 -16
  255. data/gemfiles/rails.5.0.activerecord.will_paginate.gemfile +0 -16
  256. data/gemfiles/sequel.4.38.gemfile +0 -14
  257. data/lib/chewy/journal/apply.rb +0 -31
  258. data/lib/chewy/journal/clean.rb +0 -24
  259. data/lib/chewy/journal/entry.rb +0 -83
  260. data/lib/chewy/journal/query.rb +0 -87
  261. data/lib/chewy/query/pagination/will_paginate.rb +0 -27
  262. data/lib/chewy/query/scoping.rb +0 -20
  263. data/spec/chewy/journal/apply_spec.rb +0 -120
  264. data/spec/chewy/journal/entry_spec.rb +0 -237
  265. data/spec/chewy/query/pagination/will_paginage_spec.rb +0 -59
@@ -6,16 +6,15 @@ module Chewy
6
6
  class Orm < Base
7
7
  attr_reader :default_scope
8
8
 
9
- def initialize(*args)
10
- @options = args.extract_options!
11
- class_or_relation = args.first
12
- if class_or_relation.is_a?(relation_class)
13
- @target = model_of_relation(class_or_relation)
14
- @default_scope = class_or_relation
9
+ def initialize(target, **options)
10
+ if target.is_a?(relation_class)
11
+ @target = model_of_relation(target)
12
+ @default_scope = target
15
13
  else
16
- @target = class_or_relation
14
+ @target = target
17
15
  @default_scope = all_scope
18
16
  end
17
+ @options = options
19
18
  cleanup_default_scope!
20
19
  end
21
20
 
@@ -25,10 +24,10 @@ module Chewy
25
24
 
26
25
  def identify(collection)
27
26
  if collection.is_a?(relation_class)
28
- pluck_ids(collection)
27
+ pluck(collection)
29
28
  else
30
29
  Array.wrap(collection).map do |entity|
31
- entity.is_a?(object_class) ? entity.public_send(primary_key) : entity
30
+ entity.respond_to?(primary_key) ? entity.public_send(primary_key) : entity
32
31
  end
33
32
  end
34
33
  end
@@ -49,7 +48,7 @@ module Chewy
49
48
  #
50
49
  # Method handles destroyed objects as well. In case of objects ORM scope
51
50
  # or array passed, objects, responding with true to `destroyed?` method will be deleted
52
- # from index. In case of ids array passed - documents with missing records ids will be
51
+ # from index. In case of ids array passed - documents with missing source object ids will be
53
52
  # deleted from index:
54
53
  #
55
54
  # users = User.all
@@ -74,16 +73,7 @@ module Chewy
74
73
  # UsersIndex::User.import users.map(&:id) # user ids will be deleted from index
75
74
  #
76
75
  def import(*args, &block)
77
- options = args.extract_options!
78
- options[:batch_size] ||= BATCH_SIZE
79
-
80
- collection = if args.empty?
81
- default_scope
82
- elsif args.one? && args.first.is_a?(relation_class)
83
- args.first
84
- else
85
- args.flatten.compact
86
- end
76
+ collection, options = import_args(*args)
87
77
 
88
78
  if collection.is_a?(relation_class)
89
79
  import_scope(collection, options, &block)
@@ -92,29 +82,51 @@ module Chewy
92
82
  end
93
83
  end
94
84
 
95
- def load(*args)
96
- load_options = args.extract_options!
97
- objects = args.flatten
85
+ def import_fields(*args, &block)
86
+ return enum_for(:import_fields, *args) unless block_given?
87
+
88
+ collection, options = import_args(*args)
89
+
90
+ if options[:fields].present? || collection.is_a?(relation_class)
91
+ collection = all_scope_where_ids_in(identify(collection)) unless collection.is_a?(relation_class)
92
+ pluck_in_batches(collection, options.slice(:fields, :batch_size, :typecast), &block)
93
+ else
94
+ identify(collection).each_slice(options[:batch_size]) do |batch|
95
+ yield batch
96
+ end
97
+ end
98
+ end
99
+ alias_method :import_references, :import_fields
98
100
 
99
- additional_scope = load_options[load_options[:_type].type_name.to_sym].try(:[], :scope) || load_options[:scope]
101
+ def load(ids, **options)
102
+ scope = all_scope_where_ids_in(ids)
103
+ additional_scope = options[options[:_type].type_name.to_sym].try(:[], :scope) || options[:scope]
100
104
 
101
- scope = all_scope_where_ids_in(objects.map(&primary_key))
102
- loaded_objects = load_scope_objects(scope, additional_scope).index_by { |object| object.public_send(primary_key).to_s }
105
+ loaded_objects = load_scope_objects(scope, additional_scope)
106
+ .index_by do |object|
107
+ object.public_send(primary_key).to_s
108
+ end
103
109
 
104
- objects.map { |object| loaded_objects[object.public_send(primary_key).to_s] }
110
+ ids.map { |id| loaded_objects[id.to_s] }
105
111
  end
106
112
 
107
113
  private
108
114
 
109
115
  def import_objects(collection, options)
110
- hash = Hash[identify(collection).zip(collection)]
116
+ collection_ids = identify(collection)
117
+ hash = Hash[collection_ids.map(&:to_s).zip(collection)]
118
+
119
+ indexed = collection_ids.each_slice(options[:batch_size]).map do |ids|
120
+ batch = if options[:raw_import]
121
+ raw_default_scope_where_ids_in(ids, options[:raw_import])
122
+ else
123
+ default_scope_where_ids_in(ids)
124
+ end
111
125
 
112
- indexed = hash.keys.each_slice(options[:batch_size]).map do |ids|
113
- batch = default_scope_where_ids_in(ids)
114
126
  if batch.empty?
115
127
  true
116
128
  else
117
- identify(batch).each { |id| hash.delete(id) }
129
+ batch.each { |object| hash.delete(object.send(primary_key).to_s) }
118
130
  yield grouped_objects(batch)
119
131
  end
120
132
  end.all?
@@ -157,7 +169,22 @@ module Chewy
157
169
  end
158
170
 
159
171
  def grouped_objects(objects)
160
- options[:delete_if] ? super : { index: objects.to_a }
172
+ options[:delete_if] ? super : {index: objects.to_a}
173
+ end
174
+
175
+ def import_args(*args)
176
+ options = args.extract_options!
177
+ options[:batch_size] ||= BATCH_SIZE
178
+
179
+ collection = if args.empty?
180
+ default_scope
181
+ elsif args.one? && args.first.is_a?(relation_class)
182
+ args.first
183
+ else
184
+ args.flatten.compact
185
+ end
186
+
187
+ [collection, options]
161
188
  end
162
189
  end
163
190
  end
@@ -24,38 +24,54 @@ module Chewy
24
24
  end
25
25
 
26
26
  def import_scope(scope, options)
27
- scope = scope.unordered.order(::Sequel.asc(primary_key_with_table_name)).limit(options[:batch_size])
28
-
29
- ids = pluck_ids(scope)
30
- result = true
31
-
32
- while ids.present?
33
- result &= yield grouped_objects(default_scope_where_ids_in(ids).all)
34
- break if ids.size < options[:batch_size]
35
- ids = pluck_ids(scope.where { |o| o.__send__(primary_key_with_table_name) > ids.last })
27
+ pluck_in_batches(scope, options.slice(:batch_size)).inject(true) do |result, ids|
28
+ result & yield(grouped_objects(default_scope_where_ids_in(ids).all))
36
29
  end
37
-
38
- result
39
30
  end
40
31
 
41
32
  def primary_key
42
33
  target.primary_key
43
34
  end
44
35
 
45
- def primary_key_with_table_name
46
- "#{target.table_name}__#{primary_key}".to_sym
36
+ def full_column_name(column)
37
+ "#{target.table_name}__#{column}".to_sym
47
38
  end
48
39
 
49
40
  def all_scope
50
41
  target.dataset
51
42
  end
52
43
 
53
- def pluck_ids(scope)
54
- scope.distinct.select_map(primary_key_with_table_name)
44
+ def target_columns
45
+ @target_columns ||= target.columns.to_set
46
+ end
47
+
48
+ def pluck(scope, fields: [])
49
+ fields = fields.map(&:to_sym).unshift(primary_key).map do |column|
50
+ target_columns.include?(column) ? full_column_name(column) : column
51
+ end
52
+ scope.distinct.select_map(fields.one? ? fields.first : fields)
53
+ end
54
+
55
+ def pluck_in_batches(scope, fields: [], batch_size: nil, **options)
56
+ return enum_for(:pluck_in_batches, scope, fields: fields, batch_size: batch_size, **options) unless block_given?
57
+
58
+ scope = scope.unordered.order(::Sequel.asc(full_column_name(primary_key))).limit(batch_size)
59
+
60
+ ids = pluck(scope, fields: fields)
61
+ count = 0
62
+
63
+ while ids.present?
64
+ yield ids
65
+ break if ids.size < batch_size
66
+ last_id = ids.last.is_a?(Array) ? ids.last.first : ids.last
67
+ ids = pluck(scope.where { |o| o.__send__(full_column_name(primary_key)) > last_id }, fields: fields)
68
+ end
69
+
70
+ count
55
71
  end
56
72
 
57
73
  def scope_where_ids_in(scope, ids)
58
- scope.where(primary_key_with_table_name => Array.wrap(ids))
74
+ scope.where(full_column_name(primary_key) => Array.wrap(ids))
59
75
  end
60
76
 
61
77
  def model_of_relation(relation)
@@ -1,241 +1,195 @@
1
+ require 'chewy/type/import/journal_builder'
2
+ require 'chewy/type/import/bulk_builder'
3
+ require 'chewy/type/import/bulk_request'
4
+ require 'chewy/type/import/routine'
5
+
1
6
  module Chewy
2
7
  class Type
3
8
  module Import
4
9
  extend ActiveSupport::Concern
5
10
 
6
- BULK_OPTIONS = [:suffix, :bulk_size, :refresh, :consistency, :replication].freeze
11
+ IMPORT_WORKER = lambda do |type, options, ids|
12
+ ::Process.setproctitle("chewy import #{type}[#{::Parallel.worker_number}]")
13
+ routine = Routine.new(type, options)
14
+ type.adapter.import(*ids, routine.options) do |action_objects|
15
+ routine.process(**action_objects)
16
+ end
17
+ {errors: routine.errors, import: routine.stats, leftovers: routine.leftovers}
18
+ end
19
+
20
+ LEFTOVERS_WORKER = lambda do |type, options, body|
21
+ ::Process.setproctitle("chewy import #{type}[#{::Parallel.worker_number}]")
22
+ routine = Routine.new(type, options)
23
+ routine.perform_bulk(body)
24
+ routine.errors
25
+ end
7
26
 
8
27
  module ClassMethods
9
- # Perform import operation for specified documents.
10
- # Returns true or false depending on success.
28
+ # @!method import(*collection, **options)
29
+ # Basically, one of the main methods for type. Performs any objects import
30
+ # to the index for a specified type. Does all the objects handling routines.
31
+ # Performs document import by utilizing bulk API. Bulk size and objects batch
32
+ # size are controlled by the corresponding options.
33
+ #
34
+ # It accepts ORM/ODM objects, PORO, hashes, ids which are used by adapter to
35
+ # fetch objects from the source depenting on the used adapter. It destroys
36
+ # passed objects from the index if they are not in the default type scope
37
+ # or marked for destruction.
38
+ #
39
+ # It handles parent-child relationships: if the object parent_id has been
40
+ # changed it destroys the object and recreates it from scratch.
41
+ #
42
+ # Performs journaling if enabled: it stores all the ids of the imported
43
+ # objects to a specialized index. It is possible to replay particular import
44
+ # later to restore the data consistency.
11
45
  #
12
- # UsersIndex::User.import # imports default data set
13
- # UsersIndex::User.import User.active # imports active users
14
- # UsersIndex::User.import [1, 2, 3] # imports users with specified ids
15
- # UsersIndex::User.import users # imports users collection
16
- # UsersIndex::User.import suffix: Time.now.to_i # imports data to index with specified suffix if such exists
17
- # UsersIndex::User.import refresh: false # to disable index refreshing after import
18
- # UsersIndex::User.import journal: true # import will record all the actions into special journal index
19
- # UsersIndex::User.import batch_size: 300 # import batch size
20
- # UsersIndex::User.import bulk_size: 10.megabytes # import ElasticSearch bulk size in bytes
21
- # UsersIndex::User.import consistency: :quorum # explicit write consistency setting for the operation (one, quorum, all)
22
- # UsersIndex::User.import replication: :async # explicitly set the replication type (sync, async)
46
+ # Performs partial index update using `update` bulk action if any `fields` are
47
+ # specified. Note that if document doesn't exist yet, an error will be raised
48
+ # by ES, but import catches this an errors and performs full indexing
49
+ # for the corresponding documents. This feature can be disabled by setting
50
+ # `update_failover` to `false`.
23
51
  #
24
- # See adapters documentation for more details.
52
+ # Utilizes `ActiveSupport::Notifications`, so it is possible to get imported
53
+ # objects later by listening to the `import_objects.chewy` queue. It is also
54
+ # possible to get the list of occured errors from the payload if something
55
+ # went wrong.
25
56
  #
57
+ # Import can also be run in parallel using the Parallel gem functionality.
58
+ #
59
+ # @example
60
+ # UsersIndex::User.import(parallel: true) # imports everything in parallel with automatic workers number
61
+ # UsersIndex::User.import(parallel: 3) # using 3 workers
62
+ # UsersIndex::User.import(parallel: {in_threads: 10}) # in 10 threads
63
+ #
64
+ # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
65
+ # @param collection [Array<Object>] and array or anything to import
66
+ # @param options [Hash{Symbol => Object}] besides specific import options, it accepts all the options suitable for the bulk API call like `refresh` or `timeout`
67
+ # @option options [String] suffix an index name suffix, used for zero-downtime reset mostly, no suffix by default
68
+ # @option options [Integer] bulk_size bulk API chunk size in bytes; if passed, the request is performed several times for each chunk, empty by default
69
+ # @option options [Integer] batch_size passed to the adapter import method, used to split imported objects in chunks, 1000 by default
70
+ # @option options [true, false] journal enables imported objects journaling, false by default
71
+ # @option options [Array<Symbol, String>] update_fields list of fields for the partial import, empty by default
72
+ # @option options [true, false] update_failover enables full objects reimport in cases of partial update errors, `true` by default
73
+ # @option options [true, Integer, Hash] parallel enables parallel import processing with the Parallel gem, accepts the number of workers or any Parallel gem acceptable options
74
+ # @return [true, false] false in case of errors
26
75
  def import(*args)
27
- import_options = args.extract_options!
28
- import_options.reverse_merge! _default_import_options
29
- bulk_options = import_options.reject { |k, _| !BULK_OPTIONS.include?(k) }.reverse_merge!(refresh: true)
30
-
31
- index.create!(bulk_options.slice(:suffix)) unless index.exists?
32
-
33
- ActiveSupport::Notifications.instrument 'import_objects.chewy', type: self do |payload|
34
- adapter.import(*args, import_options) do |action_objects|
35
- journal = Chewy::Journal.new(self)
36
- journal.add(action_objects) if import_options.fetch(:journal) { journal? }
37
-
38
- indexed_objects = build_root.parent_id && fetch_indexed_objects(action_objects.values.flatten)
39
- body = bulk_body(action_objects, indexed_objects)
40
-
41
- errors = bulk(bulk_options.merge(body: body, journal: journal)) if body.present?
42
-
43
- fill_payload_import payload, action_objects
44
- fill_payload_errors payload, errors if errors.present?
45
- !errors.present?
46
- end
47
- end
76
+ import_routine(*args).blank?
48
77
  end
49
78
 
50
- # Perform import operation for specified documents.
51
- # Raises Chewy::ImportFailed exception in case of import errors.
52
- # Options are completely the same as for `import` method
53
- # See adapters documentation for more details.
79
+ # @!method import!(*collection, **options)
80
+ # (see #import)
81
+ #
82
+ # The only difference from {#import} is that it raises an exception
83
+ # in case of any import errors.
54
84
  #
85
+ # @raise [Chewy::ImportFailed] in case of errors
55
86
  def import!(*args)
56
- errors = nil
57
- subscriber = ActiveSupport::Notifications.subscribe('import_objects.chewy') do |*notification_args|
58
- errors = notification_args.last[:errors]
59
- end
60
- import(*args)
87
+ errors = import_routine(*args)
61
88
  raise Chewy::ImportFailed.new(self, errors) if errors.present?
62
89
  true
63
- ensure
64
- ActiveSupport::Notifications.unsubscribe(subscriber) if subscriber
65
90
  end
66
91
 
67
- # Wraps elasticsearch-ruby client indices bulk method.
68
- # Adds `:suffix` option to bulk import to index with specified suffix.
69
- def bulk(options = {})
70
- suffix = options.delete(:suffix)
71
- bulk_size = options.delete(:bulk_size)
72
- body = options.delete(:body)
73
- journal = options.delete(:journal)
74
- header = { index: index.build_index_name(suffix: suffix), type: type_name }
75
-
76
- bodies = if bulk_size
77
- bulk_size -= 1.kilobyte # 1 kilobyte for request header and newlines
78
- raise ArgumentError, 'Import `:bulk_size` can\'t be less than 1 kilobyte' if bulk_size <= 0
79
-
80
- entries = body.each_with_object(['']) do |entry, result|
81
- operation, meta = entry.to_a.first
82
- data = meta.delete(:data)
83
- entry = [{ operation => meta }, data].compact.map(&:to_json).join("\n")
84
-
85
- raise ArgumentError, 'Import `:bulk_size` seems to be less than entry size' if entry.bytesize > bulk_size
86
-
87
- if result.last.bytesize + entry.bytesize > bulk_size
88
- result.push(entry)
89
- else
90
- result[-1] = [result[-1], entry].delete_if(&:blank?).join("\n")
91
- end
92
- end
93
- entries.map { |entry| entry + "\n" }
94
- else
95
- [body]
96
- end
97
-
98
- if journal.any_records?
99
- Chewy::Journal.create
100
- bodies += [journal.bulk_body]
101
- end
102
-
103
- items = bodies.map do |item_body|
104
- result = client.bulk options.merge(header).merge(body: item_body)
105
- result.try(:[], 'items') || []
106
- end.flatten
92
+ # Wraps elasticsearch API bulk method, adds additional features like
93
+ # `bulk_size` and `suffix`.
94
+ #
95
+ # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
96
+ # @see Chewy::Type::Import::Bulk
97
+ # @param options [Hash{Symbol => Object}] besides specific import options, it accepts all the options suitable for the bulk API call like `refresh` or `timeout`
98
+ # @option options [String] suffix bulk API chunk size in bytes; if passed, the request is performed several times for each chunk, empty by default
99
+ # @option options [Integer] bulk_size bulk API chunk size in bytes; if passed, the request is performed several times for each chunk, empty by default
100
+ # @option options [Array<Hash>] body elasticsearch API bulk method body
101
+ # @return [Hash] tricky transposed errors hash, empty if everything is fine
102
+ def bulk(**options)
103
+ error_items = BulkRequest.new(self, **options).perform(options[:body])
107
104
  Chewy.wait_for_status
108
105
 
109
- extract_errors items
106
+ payload_errors(error_items)
110
107
  end
111
108
 
112
- def journal?
113
- _default_import_options.fetch(:journal) { Chewy.configuration[:journal] }
109
+ # Composes a single document from the passed object. Uses either witchcraft
110
+ # or normal composing under the hood.
111
+ #
112
+ # @param object [Object] a data source object
113
+ # @param crutches [Object] optional crutches object; if ommited - a crutch for the single passed object is created as a fallback
114
+ # @param fields [Array<Symbol>] and array of fields to restrict the generated document
115
+ # @return [Hash] a JSON-ready hash
116
+ def compose(object, crutches = nil, fields: [])
117
+ crutches ||= Chewy::Type::Crutch::Crutches.new self, [object]
118
+
119
+ if witchcraft? && build_root.children.present?
120
+ cauldron(fields: fields).brew(object, crutches)
121
+ else
122
+ build_root.compose(object, crutches, fields: fields)
123
+ end
114
124
  end
115
125
 
116
126
  private
117
127
 
118
- def bulk_body(action_objects, indexed_objects = nil)
119
- action_objects.flat_map do |action, objects|
120
- method = "#{action}_bulk_entry"
121
- crutches = Chewy::Type::Crutch::Crutches.new self, objects
122
- objects.flat_map { |object| send(method, object, indexed_objects, crutches) }
123
- end
124
- end
125
-
126
- def delete_bulk_entry(object, indexed_objects = nil, _crutches = nil)
127
- entry = {}
128
+ def import_routine(*args)
129
+ routine = Routine.new(self, args.extract_options!)
130
+ routine.create_indexes!
128
131
 
129
- if root_object.id
130
- entry[:_id] = root_object.compose_id(object)
132
+ if routine.parallel_options
133
+ import_parallel(args, routine)
131
134
  else
132
- entry[:_id] = object.id if object.respond_to?(:id)
133
- entry[:_id] ||= object[:id] || object['id'] if object.is_a?(Hash)
134
- entry[:_id] ||= object
135
- entry[:_id] = entry[:_id].to_s if defined?(BSON) && entry[:_id].is_a?(BSON::ObjectId)
135
+ import_linear(args, routine)
136
136
  end
137
-
138
- if root_object.parent_id
139
- existing_object = entry[:_id].present? && indexed_objects && indexed_objects[entry[:_id].to_s]
140
- return [] unless existing_object
141
- entry[:parent] = existing_object[:parent]
142
- end
143
-
144
- [{ delete: entry }]
145
137
  end
146
138
 
147
- def index_bulk_entry(object, indexed_objects = nil, crutches = nil)
148
- entry = {}
149
-
150
- if root_object.id
151
- entry[:_id] = root_object.compose_id(object)
152
- else
153
- entry[:_id] = object.id if object.respond_to?(:id)
154
- entry[:_id] ||= object[:id] || object['id'] if object.is_a?(Hash)
155
- entry[:_id] = entry[:_id].to_s if defined?(BSON) && entry[:_id].is_a?(BSON::ObjectId)
156
- end
157
- entry.delete(:_id) if entry[:_id].blank?
158
-
159
- if root_object.parent_id
160
- entry[:parent] = root_object.compose_parent(object)
161
- existing_object = entry[:_id].present? && indexed_objects && indexed_objects[entry[:_id].to_s]
139
+ def import_linear(objects, routine)
140
+ ActiveSupport::Notifications.instrument 'import_objects.chewy', type: self do |payload|
141
+ adapter.import(*objects, routine.options) do |action_objects|
142
+ routine.process(**action_objects)
143
+ end
144
+ routine.perform_bulk(routine.leftovers)
145
+ payload[:import] = routine.stats
146
+ payload[:errors] = payload_errors(routine.errors) if routine.errors.present?
147
+ payload[:errors]
162
148
  end
149
+ end
163
150
 
164
- entry[:data] = object_data(object, crutches)
151
+ def import_parallel(objects, routine)
152
+ raise "The `parallel` gem is required for parallel import, please add `gem 'parallel'` to your Gemfile" unless '::Parallel'.safe_constantize
165
153
 
166
- if existing_object && entry[:parent].to_s != existing_object[:parent]
167
- [{ delete: entry.except(:data).merge(parent: existing_object[:parent]) }, { index: entry }]
168
- else
169
- [{ index: entry }]
170
- end
171
- end
154
+ ActiveSupport::Notifications.instrument 'import_objects.chewy', type: self do |payload|
155
+ batches = adapter.import_references(*objects, routine.options.slice(:batch_size)).to_a
172
156
 
173
- def fill_payload_import(payload, action_objects)
174
- imported = Hash[action_objects.map { |action, objects| [action, objects.count] }]
175
- imported.each do |action, count|
176
- payload[:import] ||= {}
177
- payload[:import][action] ||= 0
178
- payload[:import][action] += count
179
- end
180
- end
157
+ ::ActiveRecord::Base.connection.close if defined?(::ActiveRecord::Base)
158
+ results = ::Parallel.map(batches, routine.parallel_options, &IMPORT_WORKER.curry[self, routine.options])
159
+ ::ActiveRecord::Base.connection.reconnect! if defined?(::ActiveRecord::Base)
160
+ errors, import, leftovers = process_parallel_import_results(results)
181
161
 
182
- def fill_payload_errors(payload, import_errors)
183
- import_errors.each do |action, action_errors|
184
- action_errors.each do |error, documents|
185
- payload[:errors] ||= {}
186
- payload[:errors][action] ||= {}
187
- payload[:errors][action][error] ||= []
188
- payload[:errors][action][error] |= documents
162
+ if leftovers.present?
163
+ batches = leftovers.each_slice(routine.options[:batch_size])
164
+ results = ::Parallel.map(batches, routine.parallel_options, &LEFTOVERS_WORKER.curry[self, routine.options])
165
+ errors.concat(results.flatten(1))
189
166
  end
190
- end
191
- end
192
167
 
193
- def object_data(object, crutches = nil)
194
- if witchcraft?
195
- cauldron.brew(object, crutches)
196
- else
197
- build_root.compose(object, crutches)[type_name.to_s]
168
+ payload[:import] = import
169
+ payload[:errors] = payload_errors(errors) if errors.present?
170
+ payload[:errors]
198
171
  end
199
172
  end
200
173
 
201
- def extract_errors(items)
202
- items = items.each.with_object({}) do |item, memo|
203
- action = item.keys.first.to_sym
204
- data = item.values.first
205
- if data['error']
206
- (memo[action] ||= []).push(action: action, id: data['_id'], error: data['error'])
207
- end
174
+ def process_parallel_import_results(results)
175
+ results.each_with_object([[], {}, []]) do |r, (e, i, l)|
176
+ e.concat(r[:errors])
177
+ i.merge!(r[:import]) { |_k, v1, v2| v1.to_i + v2.to_i }
178
+ l.concat(r[:leftovers])
208
179
  end
209
-
210
- items.map do |action, action_items|
211
- errors = action_items.group_by { |item| item[:error] }.map do |error, error_items|
212
- { error => error_items.map { |item| item[:id] } }
213
- end.reduce(&:merge)
214
- { action => errors }
215
- end.reduce(&:merge) || {}
216
180
  end
217
181
 
218
- def fetch_indexed_objects(objects)
219
- ids = objects.map { |object| object.respond_to?(:id) ? object.id : object }
220
- result = client.search index: index_name,
221
- type: type_name,
222
- fields: '_parent',
223
- body: { filter: { ids: { values: ids } } },
224
- search_type: 'scan',
225
- scroll: '1m'
226
-
227
- indexed_objects = {}
182
+ def payload_errors(errors)
183
+ errors.each_with_object({}) do |error, result|
184
+ action = error.keys.first.to_sym
185
+ item = error.values.first
186
+ error = item['error']
187
+ id = item['_id']
228
188
 
229
- while (result = client.scroll(scroll_id: result['_scroll_id'], scroll: '1m'))
230
- break if result['hits']['hits'].empty?
231
-
232
- result['hits']['hits'].map do |hit|
233
- parent = hit.key?('_parent') ? hit['_parent'] : hit['fields']['_parent']
234
- indexed_objects[hit['_id']] = { parent: parent }
235
- end
189
+ result[action] ||= {}
190
+ result[action][error] ||= []
191
+ result[action][error].push(id)
236
192
  end
237
-
238
- indexed_objects
239
193
  end
240
194
  end
241
195
  end