chewy 0.8.4 → 5.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (303) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +56 -0
  4. data/.rubocop_todo.yml +44 -0
  5. data/.travis.yml +36 -67
  6. data/.yardopts +5 -0
  7. data/Appraisals +63 -58
  8. data/CHANGELOG.md +168 -11
  9. data/Gemfile +16 -9
  10. data/Guardfile +5 -5
  11. data/LEGACY_DSL.md +497 -0
  12. data/README.md +403 -470
  13. data/Rakefile +11 -1
  14. data/chewy.gemspec +12 -15
  15. data/gemfiles/rails.4.0.activerecord.gemfile +9 -9
  16. data/gemfiles/rails.4.1.activerecord.gemfile +9 -9
  17. data/gemfiles/rails.4.2.activerecord.gemfile +8 -8
  18. data/gemfiles/rails.4.2.mongoid.5.2.gemfile +16 -0
  19. data/gemfiles/rails.5.0.activerecord.gemfile +16 -0
  20. data/gemfiles/rails.5.0.mongoid.6.1.gemfile +16 -0
  21. data/gemfiles/rails.5.1.activerecord.gemfile +16 -0
  22. data/gemfiles/rails.5.1.mongoid.6.3.gemfile +16 -0
  23. data/gemfiles/rails.5.2.activerecord.gemfile +16 -0
  24. data/gemfiles/sequel.4.45.gemfile +11 -0
  25. data/lib/chewy/backports/deep_dup.rb +1 -1
  26. data/lib/chewy/backports/duplicable.rb +1 -0
  27. data/lib/chewy/config.rb +53 -21
  28. data/lib/chewy/errors.rb +6 -6
  29. data/lib/chewy/fields/base.rb +59 -29
  30. data/lib/chewy/fields/root.rb +49 -14
  31. data/lib/chewy/index/actions.rb +95 -36
  32. data/lib/chewy/index/aliases.rb +2 -1
  33. data/lib/chewy/index/settings.rb +10 -5
  34. data/lib/chewy/index/specification.rb +60 -0
  35. data/lib/chewy/index.rb +239 -138
  36. data/lib/chewy/journal.rb +55 -0
  37. data/lib/chewy/log_subscriber.rb +8 -8
  38. data/lib/chewy/minitest/helpers.rb +77 -0
  39. data/lib/chewy/minitest/search_index_receiver.rb +80 -0
  40. data/lib/chewy/minitest.rb +1 -0
  41. data/lib/chewy/query/compose.rb +18 -19
  42. data/lib/chewy/query/criteria.rb +34 -24
  43. data/lib/chewy/query/filters.rb +28 -11
  44. data/lib/chewy/query/loading.rb +3 -4
  45. data/lib/chewy/query/nodes/and.rb +1 -1
  46. data/lib/chewy/query/nodes/base.rb +1 -1
  47. data/lib/chewy/query/nodes/bool.rb +6 -4
  48. data/lib/chewy/query/nodes/equal.rb +4 -4
  49. data/lib/chewy/query/nodes/exists.rb +1 -1
  50. data/lib/chewy/query/nodes/expr.rb +2 -2
  51. data/lib/chewy/query/nodes/field.rb +35 -31
  52. data/lib/chewy/query/nodes/has_child.rb +1 -0
  53. data/lib/chewy/query/nodes/has_parent.rb +1 -0
  54. data/lib/chewy/query/nodes/has_relation.rb +10 -12
  55. data/lib/chewy/query/nodes/missing.rb +1 -1
  56. data/lib/chewy/query/nodes/not.rb +1 -1
  57. data/lib/chewy/query/nodes/or.rb +1 -1
  58. data/lib/chewy/query/nodes/prefix.rb +3 -2
  59. data/lib/chewy/query/nodes/query.rb +1 -1
  60. data/lib/chewy/query/nodes/range.rb +9 -9
  61. data/lib/chewy/query/nodes/raw.rb +1 -1
  62. data/lib/chewy/query/nodes/regexp.rb +13 -9
  63. data/lib/chewy/query/nodes/script.rb +4 -4
  64. data/lib/chewy/query/pagination.rb +10 -1
  65. data/lib/chewy/query.rb +286 -170
  66. data/lib/chewy/railtie.rb +7 -6
  67. data/lib/chewy/rake_helper.rb +275 -37
  68. data/lib/chewy/repository.rb +2 -2
  69. data/lib/chewy/rspec/update_index.rb +70 -65
  70. data/lib/chewy/rspec.rb +1 -1
  71. data/lib/chewy/runtime/version.rb +4 -4
  72. data/lib/chewy/search/loader.rb +83 -0
  73. data/lib/chewy/{query → search}/pagination/kaminari.rb +13 -5
  74. data/lib/chewy/search/pagination/will_paginate.rb +43 -0
  75. data/lib/chewy/search/parameters/aggs.rb +16 -0
  76. data/lib/chewy/search/parameters/allow_partial_search_results.rb +27 -0
  77. data/lib/chewy/search/parameters/concerns/bool_storage.rb +24 -0
  78. data/lib/chewy/search/parameters/concerns/hash_storage.rb +23 -0
  79. data/lib/chewy/search/parameters/concerns/integer_storage.rb +14 -0
  80. data/lib/chewy/search/parameters/concerns/query_storage.rb +238 -0
  81. data/lib/chewy/search/parameters/concerns/string_array_storage.rb +23 -0
  82. data/lib/chewy/search/parameters/concerns/string_storage.rb +14 -0
  83. data/lib/chewy/search/parameters/docvalue_fields.rb +12 -0
  84. data/lib/chewy/search/parameters/explain.rb +16 -0
  85. data/lib/chewy/search/parameters/filter.rb +47 -0
  86. data/lib/chewy/search/parameters/highlight.rb +16 -0
  87. data/lib/chewy/search/parameters/indices.rb +123 -0
  88. data/lib/chewy/search/parameters/indices_boost.rb +52 -0
  89. data/lib/chewy/search/parameters/limit.rb +17 -0
  90. data/lib/chewy/search/parameters/load.rb +32 -0
  91. data/lib/chewy/search/parameters/min_score.rb +16 -0
  92. data/lib/chewy/search/parameters/none.rb +27 -0
  93. data/lib/chewy/search/parameters/offset.rb +17 -0
  94. data/lib/chewy/search/parameters/order.rb +64 -0
  95. data/lib/chewy/search/parameters/post_filter.rb +19 -0
  96. data/lib/chewy/search/parameters/preference.rb +16 -0
  97. data/lib/chewy/search/parameters/profile.rb +16 -0
  98. data/lib/chewy/search/parameters/query.rb +19 -0
  99. data/lib/chewy/search/parameters/request_cache.rb +27 -0
  100. data/lib/chewy/search/parameters/rescore.rb +29 -0
  101. data/lib/chewy/search/parameters/script_fields.rb +16 -0
  102. data/lib/chewy/search/parameters/search_after.rb +20 -0
  103. data/lib/chewy/search/parameters/search_type.rb +16 -0
  104. data/lib/chewy/search/parameters/source.rb +73 -0
  105. data/lib/chewy/search/parameters/storage.rb +95 -0
  106. data/lib/chewy/search/parameters/stored_fields.rb +63 -0
  107. data/lib/chewy/search/parameters/suggest.rb +16 -0
  108. data/lib/chewy/search/parameters/terminate_after.rb +16 -0
  109. data/lib/chewy/search/parameters/timeout.rb +16 -0
  110. data/lib/chewy/search/parameters/track_scores.rb +16 -0
  111. data/lib/chewy/search/parameters/types.rb +20 -0
  112. data/lib/chewy/search/parameters/version.rb +16 -0
  113. data/lib/chewy/search/parameters.rb +167 -0
  114. data/lib/chewy/search/query_proxy.rb +257 -0
  115. data/lib/chewy/search/request.rb +1045 -0
  116. data/lib/chewy/search/response.rb +119 -0
  117. data/lib/chewy/search/scoping.rb +50 -0
  118. data/lib/chewy/search/scrolling.rb +134 -0
  119. data/lib/chewy/search.rb +81 -26
  120. data/lib/chewy/stash.rb +79 -0
  121. data/lib/chewy/strategy/active_job.rb +1 -0
  122. data/lib/chewy/strategy/atomic.rb +2 -4
  123. data/lib/chewy/strategy/base.rb +4 -4
  124. data/lib/chewy/strategy/bypass.rb +1 -2
  125. data/lib/chewy/strategy/resque.rb +1 -0
  126. data/lib/chewy/strategy/shoryuken.rb +40 -0
  127. data/lib/chewy/strategy/sidekiq.rb +13 -1
  128. data/lib/chewy/strategy/urgent.rb +1 -1
  129. data/lib/chewy/strategy.rb +19 -10
  130. data/lib/chewy/type/actions.rb +26 -2
  131. data/lib/chewy/type/adapter/active_record.rb +50 -24
  132. data/lib/chewy/type/adapter/base.rb +29 -9
  133. data/lib/chewy/type/adapter/mongoid.rb +19 -10
  134. data/lib/chewy/type/adapter/object.rb +195 -31
  135. data/lib/chewy/type/adapter/orm.rb +69 -33
  136. data/lib/chewy/type/adapter/sequel.rb +37 -19
  137. data/lib/chewy/type/crutch.rb +5 -4
  138. data/lib/chewy/type/import/bulk_builder.rb +122 -0
  139. data/lib/chewy/type/import/bulk_request.rb +78 -0
  140. data/lib/chewy/type/import/journal_builder.rb +45 -0
  141. data/lib/chewy/type/import/routine.rb +138 -0
  142. data/lib/chewy/type/import.rb +150 -176
  143. data/lib/chewy/type/mapping.rb +58 -42
  144. data/lib/chewy/type/observe.rb +21 -15
  145. data/lib/chewy/type/syncer.rb +222 -0
  146. data/lib/chewy/type/witchcraft.rb +89 -34
  147. data/lib/chewy/type/wrapper.rb +48 -16
  148. data/lib/chewy/type.rb +77 -49
  149. data/lib/chewy/version.rb +1 -1
  150. data/lib/chewy.rb +95 -52
  151. data/lib/generators/chewy/install_generator.rb +3 -3
  152. data/lib/sequel/plugins/chewy_observe.rb +4 -19
  153. data/lib/tasks/chewy.rake +91 -28
  154. data/spec/chewy/config_spec.rb +130 -12
  155. data/spec/chewy/fields/base_spec.rb +194 -172
  156. data/spec/chewy/fields/root_spec.rb +123 -17
  157. data/spec/chewy/fields/time_fields_spec.rb +10 -9
  158. data/spec/chewy/index/actions_spec.rb +228 -43
  159. data/spec/chewy/index/aliases_spec.rb +2 -2
  160. data/spec/chewy/index/settings_spec.rb +100 -49
  161. data/spec/chewy/index/specification_spec.rb +169 -0
  162. data/spec/chewy/index_spec.rb +159 -63
  163. data/spec/chewy/journal_spec.rb +268 -0
  164. data/spec/chewy/minitest/helpers_spec.rb +90 -0
  165. data/spec/chewy/minitest/search_index_receiver_spec.rb +120 -0
  166. data/spec/chewy/query/criteria_spec.rb +503 -236
  167. data/spec/chewy/query/filters_spec.rb +96 -68
  168. data/spec/chewy/query/loading_spec.rb +80 -42
  169. data/spec/chewy/query/nodes/and_spec.rb +3 -7
  170. data/spec/chewy/query/nodes/bool_spec.rb +5 -13
  171. data/spec/chewy/query/nodes/equal_spec.rb +20 -20
  172. data/spec/chewy/query/nodes/exists_spec.rb +7 -7
  173. data/spec/chewy/query/nodes/has_child_spec.rb +42 -23
  174. data/spec/chewy/query/nodes/has_parent_spec.rb +42 -23
  175. data/spec/chewy/query/nodes/match_all_spec.rb +2 -2
  176. data/spec/chewy/query/nodes/missing_spec.rb +6 -5
  177. data/spec/chewy/query/nodes/not_spec.rb +5 -7
  178. data/spec/chewy/query/nodes/or_spec.rb +3 -7
  179. data/spec/chewy/query/nodes/prefix_spec.rb +6 -6
  180. data/spec/chewy/query/nodes/query_spec.rb +3 -3
  181. data/spec/chewy/query/nodes/range_spec.rb +19 -19
  182. data/spec/chewy/query/nodes/raw_spec.rb +2 -2
  183. data/spec/chewy/query/nodes/regexp_spec.rb +31 -19
  184. data/spec/chewy/query/nodes/script_spec.rb +5 -5
  185. data/spec/chewy/query/pagination/kaminari_spec.rb +3 -55
  186. data/spec/chewy/query/pagination/will_paginate_spec.rb +5 -0
  187. data/spec/chewy/query/pagination_spec.rb +25 -22
  188. data/spec/chewy/query_spec.rb +510 -505
  189. data/spec/chewy/rake_helper_spec.rb +381 -0
  190. data/spec/chewy/repository_spec.rb +8 -8
  191. data/spec/chewy/rspec/update_index_spec.rb +215 -113
  192. data/spec/chewy/runtime_spec.rb +2 -2
  193. data/spec/chewy/search/loader_spec.rb +117 -0
  194. data/spec/chewy/search/pagination/kaminari_examples.rb +71 -0
  195. data/spec/chewy/search/pagination/kaminari_spec.rb +21 -0
  196. data/spec/chewy/search/pagination/will_paginate_examples.rb +63 -0
  197. data/spec/chewy/search/pagination/will_paginate_spec.rb +23 -0
  198. data/spec/chewy/search/parameters/aggs_spec.rb +5 -0
  199. data/spec/chewy/search/parameters/bool_storage_examples.rb +53 -0
  200. data/spec/chewy/search/parameters/docvalue_fields_spec.rb +5 -0
  201. data/spec/chewy/search/parameters/explain_spec.rb +5 -0
  202. data/spec/chewy/search/parameters/filter_spec.rb +5 -0
  203. data/spec/chewy/search/parameters/hash_storage_examples.rb +59 -0
  204. data/spec/chewy/search/parameters/highlight_spec.rb +5 -0
  205. data/spec/chewy/search/parameters/indices_spec.rb +191 -0
  206. data/spec/chewy/search/parameters/integer_storage_examples.rb +32 -0
  207. data/spec/chewy/search/parameters/limit_spec.rb +5 -0
  208. data/spec/chewy/search/parameters/load_spec.rb +60 -0
  209. data/spec/chewy/search/parameters/min_score_spec.rb +32 -0
  210. data/spec/chewy/search/parameters/none_spec.rb +5 -0
  211. data/spec/chewy/search/parameters/offset_spec.rb +5 -0
  212. data/spec/chewy/search/parameters/order_spec.rb +65 -0
  213. data/spec/chewy/search/parameters/post_filter_spec.rb +5 -0
  214. data/spec/chewy/search/parameters/preference_spec.rb +5 -0
  215. data/spec/chewy/search/parameters/profile_spec.rb +5 -0
  216. data/spec/chewy/search/parameters/query_spec.rb +5 -0
  217. data/spec/chewy/search/parameters/query_storage_examples.rb +388 -0
  218. data/spec/chewy/search/parameters/request_cache_spec.rb +67 -0
  219. data/spec/chewy/search/parameters/rescore_spec.rb +62 -0
  220. data/spec/chewy/search/parameters/script_fields_spec.rb +5 -0
  221. data/spec/chewy/search/parameters/search_after_spec.rb +32 -0
  222. data/spec/chewy/search/parameters/search_type_spec.rb +5 -0
  223. data/spec/chewy/search/parameters/source_spec.rb +156 -0
  224. data/spec/chewy/search/parameters/storage_spec.rb +60 -0
  225. data/spec/chewy/search/parameters/stored_fields_spec.rb +126 -0
  226. data/spec/chewy/search/parameters/string_array_storage_examples.rb +63 -0
  227. data/spec/chewy/search/parameters/string_storage_examples.rb +32 -0
  228. data/spec/chewy/search/parameters/suggest_spec.rb +5 -0
  229. data/spec/chewy/search/parameters/terminate_after_spec.rb +5 -0
  230. data/spec/chewy/search/parameters/timeout_spec.rb +5 -0
  231. data/spec/chewy/search/parameters/track_scores_spec.rb +5 -0
  232. data/spec/chewy/search/parameters/types_spec.rb +5 -0
  233. data/spec/chewy/search/parameters/version_spec.rb +5 -0
  234. data/spec/chewy/search/parameters_spec.rb +145 -0
  235. data/spec/chewy/search/query_proxy_spec.rb +68 -0
  236. data/spec/chewy/search/request_spec.rb +685 -0
  237. data/spec/chewy/search/response_spec.rb +192 -0
  238. data/spec/chewy/search/scrolling_spec.rb +169 -0
  239. data/spec/chewy/search_spec.rb +37 -20
  240. data/spec/chewy/stash_spec.rb +95 -0
  241. data/spec/chewy/strategy/active_job_spec.rb +8 -2
  242. data/spec/chewy/strategy/atomic_spec.rb +4 -1
  243. data/spec/chewy/strategy/resque_spec.rb +8 -2
  244. data/spec/chewy/strategy/shoryuken_spec.rb +66 -0
  245. data/spec/chewy/strategy/sidekiq_spec.rb +10 -2
  246. data/spec/chewy/strategy_spec.rb +6 -6
  247. data/spec/chewy/type/actions_spec.rb +29 -10
  248. data/spec/chewy/type/adapter/active_record_spec.rb +357 -139
  249. data/spec/chewy/type/adapter/mongoid_spec.rb +220 -101
  250. data/spec/chewy/type/adapter/object_spec.rb +129 -40
  251. data/spec/chewy/type/adapter/sequel_spec.rb +304 -152
  252. data/spec/chewy/type/import/bulk_builder_spec.rb +279 -0
  253. data/spec/chewy/type/import/bulk_request_spec.rb +102 -0
  254. data/spec/chewy/type/import/journal_builder_spec.rb +95 -0
  255. data/spec/chewy/type/import/routine_spec.rb +110 -0
  256. data/spec/chewy/type/import_spec.rb +360 -244
  257. data/spec/chewy/type/mapping_spec.rb +96 -29
  258. data/spec/chewy/type/observe_spec.rb +25 -15
  259. data/spec/chewy/type/syncer_spec.rb +123 -0
  260. data/spec/chewy/type/witchcraft_spec.rb +122 -44
  261. data/spec/chewy/type/wrapper_spec.rb +63 -23
  262. data/spec/chewy/type_spec.rb +32 -10
  263. data/spec/chewy_spec.rb +82 -12
  264. data/spec/spec_helper.rb +16 -2
  265. data/spec/support/active_record.rb +6 -2
  266. data/spec/support/class_helpers.rb +4 -19
  267. data/spec/support/mongoid.rb +17 -5
  268. data/spec/support/sequel.rb +6 -1
  269. metadata +250 -57
  270. data/gemfiles/rails.3.2.activerecord.gemfile +0 -15
  271. data/gemfiles/rails.3.2.activerecord.kaminari.gemfile +0 -14
  272. data/gemfiles/rails.3.2.activerecord.will_paginate.gemfile +0 -14
  273. data/gemfiles/rails.4.0.activerecord.kaminari.gemfile +0 -14
  274. data/gemfiles/rails.4.0.activerecord.will_paginate.gemfile +0 -14
  275. data/gemfiles/rails.4.0.mongoid.4.0.0.gemfile +0 -15
  276. data/gemfiles/rails.4.0.mongoid.4.0.0.kaminari.gemfile +0 -14
  277. data/gemfiles/rails.4.0.mongoid.4.0.0.will_paginate.gemfile +0 -14
  278. data/gemfiles/rails.4.0.mongoid.5.1.0.gemfile +0 -15
  279. data/gemfiles/rails.4.0.mongoid.5.1.0.kaminari.gemfile +0 -14
  280. data/gemfiles/rails.4.0.mongoid.5.1.0.will_paginate.gemfile +0 -14
  281. data/gemfiles/rails.4.1.activerecord.kaminari.gemfile +0 -14
  282. data/gemfiles/rails.4.1.activerecord.will_paginate.gemfile +0 -14
  283. data/gemfiles/rails.4.1.mongoid.4.0.0.gemfile +0 -15
  284. data/gemfiles/rails.4.1.mongoid.4.0.0.kaminari.gemfile +0 -14
  285. data/gemfiles/rails.4.1.mongoid.4.0.0.will_paginate.gemfile +0 -14
  286. data/gemfiles/rails.4.1.mongoid.5.1.0.gemfile +0 -15
  287. data/gemfiles/rails.4.1.mongoid.5.1.0.kaminari.gemfile +0 -14
  288. data/gemfiles/rails.4.1.mongoid.5.1.0.will_paginate.gemfile +0 -14
  289. data/gemfiles/rails.4.2.activerecord.kaminari.gemfile +0 -15
  290. data/gemfiles/rails.4.2.activerecord.will_paginate.gemfile +0 -15
  291. data/gemfiles/rails.4.2.mongoid.4.0.0.gemfile +0 -15
  292. data/gemfiles/rails.4.2.mongoid.4.0.0.kaminari.gemfile +0 -14
  293. data/gemfiles/rails.4.2.mongoid.4.0.0.will_paginate.gemfile +0 -14
  294. data/gemfiles/rails.4.2.mongoid.5.1.0.gemfile +0 -15
  295. data/gemfiles/rails.4.2.mongoid.5.1.0.kaminari.gemfile +0 -14
  296. data/gemfiles/rails.4.2.mongoid.5.1.0.will_paginate.gemfile +0 -14
  297. data/gemfiles/rails.5.0.0.beta3.activerecord.gemfile +0 -16
  298. data/gemfiles/rails.5.0.0.beta3.activerecord.kaminari.gemfile +0 -16
  299. data/gemfiles/rails.5.0.0.beta3.activerecord.will_paginate.gemfile +0 -15
  300. data/gemfiles/sequel.4.31.gemfile +0 -13
  301. data/lib/chewy/query/pagination/will_paginate.rb +0 -27
  302. data/lib/chewy/query/scoping.rb +0 -20
  303. data/spec/chewy/query/pagination/will_paginage_spec.rb +0 -60
@@ -6,16 +6,15 @@ module Chewy
6
6
  class Orm < Base
7
7
  attr_reader :default_scope
8
8
 
9
- def initialize *args
10
- @options = args.extract_options!
11
- class_or_relation = args.first
12
- if class_or_relation.is_a?(relation_class)
13
- @target = model_of_relation(class_or_relation)
14
- @default_scope = class_or_relation
9
+ def initialize(target, **options)
10
+ if target.is_a?(relation_class)
11
+ @target = model_of_relation(target)
12
+ @default_scope = target
15
13
  else
16
- @target = class_or_relation
14
+ @target = target
17
15
  @default_scope = all_scope
18
16
  end
17
+ @options = options
19
18
  cleanup_default_scope!
20
19
  end
21
20
 
@@ -23,12 +22,12 @@ module Chewy
23
22
  @name ||= (options[:name].presence || target.name).to_s.camelize.demodulize
24
23
  end
25
24
 
26
- def identify collection
25
+ def identify(collection)
27
26
  if collection.is_a?(relation_class)
28
- pluck_ids(collection)
27
+ pluck(collection)
29
28
  else
30
29
  Array.wrap(collection).map do |entity|
31
- entity.is_a?(object_class) ? entity.public_send(primary_key) : entity
30
+ entity.respond_to?(primary_key) ? entity.public_send(primary_key) : entity
32
31
  end
33
32
  end
34
33
  end
@@ -49,7 +48,7 @@ module Chewy
49
48
  #
50
49
  # Method handles destroyed objects as well. In case of objects ORM scope
51
50
  # or array passed, objects, responding with true to `destroyed?` method will be deleted
52
- # from index. In case of ids array passed - documents with missing records ids will be
51
+ # from index. In case of ids array passed - documents with missing source object ids will be
53
52
  # deleted from index:
54
53
  #
55
54
  # users = User.all
@@ -73,48 +72,66 @@ module Chewy
73
72
  # # or
74
73
  # UsersIndex::User.import users.map(&:id) # user ids will be deleted from index
75
74
  #
76
- def import *args, &block
77
- import_options = args.extract_options!
78
- batch_size = import_options[:batch_size] || BATCH_SIZE
79
-
80
- collection = args.empty? ? default_scope :
81
- (args.one? && args.first.is_a?(relation_class) ? args.first : args.flatten.compact)
75
+ def import(*args, &block)
76
+ collection, options = import_args(*args)
82
77
 
83
78
  if collection.is_a?(relation_class)
84
- import_scope(collection, batch_size, &block)
79
+ import_scope(collection, options, &block)
85
80
  else
86
- import_objects(collection, batch_size, &block)
81
+ import_objects(collection, options, &block)
87
82
  end
88
83
  end
89
84
 
90
- def load *args
91
- load_options = args.extract_options!
92
- objects = args.flatten
85
+ def import_fields(*args, &block)
86
+ return enum_for(:import_fields, *args) unless block_given?
87
+
88
+ collection, options = import_args(*args)
89
+
90
+ if options[:fields].present? || collection.is_a?(relation_class)
91
+ collection = all_scope_where_ids_in(identify(collection)) unless collection.is_a?(relation_class)
92
+ pluck_in_batches(collection, options.slice(:fields, :batch_size, :typecast), &block)
93
+ else
94
+ identify(collection).each_slice(options[:batch_size]) do |batch|
95
+ yield batch
96
+ end
97
+ end
98
+ end
99
+ alias_method :import_references, :import_fields
93
100
 
94
- additional_scope = load_options[load_options[:_type].type_name.to_sym].try(:[], :scope) || load_options[:scope]
101
+ def load(ids, **options)
102
+ scope = all_scope_where_ids_in(ids)
103
+ additional_scope = options[options[:_type].type_name.to_sym].try(:[], :scope) || options[:scope]
95
104
 
96
- scope = all_scope_where_ids_in(objects.map(&primary_key))
97
- loaded_objects = load_scope_objects(scope, additional_scope).index_by { |object| object.public_send(primary_key).to_s }
105
+ loaded_objects = load_scope_objects(scope, additional_scope)
106
+ .index_by do |object|
107
+ object.public_send(primary_key).to_s
108
+ end
98
109
 
99
- objects.map { |object| loaded_objects[object.public_send(primary_key).to_s] }
110
+ ids.map { |id| loaded_objects[id.to_s] }
100
111
  end
101
112
 
102
113
  private
103
114
 
104
- def import_objects(collection, batch_size)
105
- hash = Hash[identify(collection).zip(collection)]
115
+ def import_objects(collection, options)
116
+ collection_ids = identify(collection)
117
+ hash = Hash[collection_ids.map(&:to_s).zip(collection)]
118
+
119
+ indexed = collection_ids.each_slice(options[:batch_size]).map do |ids|
120
+ batch = if options[:raw_import]
121
+ raw_default_scope_where_ids_in(ids, options[:raw_import])
122
+ else
123
+ default_scope_where_ids_in(ids)
124
+ end
106
125
 
107
- indexed = hash.keys.each_slice(batch_size).map do |ids|
108
- batch = default_scope_where_ids_in(ids)
109
126
  if batch.empty?
110
127
  true
111
128
  else
112
- identify(batch).each { |id| hash.delete(id) }
129
+ batch.each { |object| hash.delete(object.send(primary_key).to_s) }
113
130
  yield grouped_objects(batch)
114
131
  end
115
132
  end.all?
116
133
 
117
- deleted = hash.keys.each_slice(batch_size).map do |group|
134
+ deleted = hash.keys.each_slice(options[:batch_size]).map do |group|
118
135
  yield delete: hash.values_at(*group)
119
136
  end.all?
120
137
 
@@ -137,7 +154,7 @@ module Chewy
137
154
  target.where(nil)
138
155
  end
139
156
 
140
- def model_of_relation relation
157
+ def model_of_relation(relation)
141
158
  relation.klass
142
159
  end
143
160
 
@@ -150,6 +167,25 @@ module Chewy
150
167
  scope
151
168
  end
152
169
  end
170
+
171
+ def grouped_objects(objects)
172
+ options[:delete_if] ? super : {index: objects.to_a}
173
+ end
174
+
175
+ def import_args(*args)
176
+ options = args.extract_options!
177
+ options[:batch_size] ||= BATCH_SIZE
178
+
179
+ collection = if args.empty?
180
+ default_scope
181
+ elsif args.one? && args.first.is_a?(relation_class)
182
+ args.first
183
+ else
184
+ args.flatten.compact
185
+ end
186
+
187
+ [collection, options]
188
+ end
153
189
  end
154
190
  end
155
191
  end
@@ -16,45 +16,63 @@ module Chewy
16
16
  private
17
17
 
18
18
  def cleanup_default_scope!
19
- if Chewy.logger && @default_scope != @default_scope.unordered.unlimited
20
- Chewy.logger.warn('Default type scope order, limit and offset are ignored and will be nullified')
21
- end
19
+ Chewy.logger.warn('Default type scope order, limit and offset are ignored and will be nullified') if Chewy.logger && @default_scope != @default_scope.unordered.unlimited
22
20
 
23
21
  @default_scope = @default_scope.unordered.unlimited
24
22
  end
25
23
 
26
- def import_scope(scope, batch_size)
27
- scope = scope.unordered.order(::Sequel.asc(primary_key)).limit(batch_size)
28
-
29
- ids = pluck_ids(scope)
30
- result = true
31
-
32
- while ids.present?
33
- result &= yield grouped_objects(default_scope_where_ids_in(ids).all)
34
- break if ids.size < batch_size
35
- ids = pluck_ids(scope.where { |o| o.__send__(primary_key) > ids.last })
24
+ def import_scope(scope, options)
25
+ pluck_in_batches(scope, options.slice(:batch_size)).inject(true) do |result, ids|
26
+ result & yield(grouped_objects(default_scope_where_ids_in(ids).all))
36
27
  end
37
-
38
- result
39
28
  end
40
29
 
41
30
  def primary_key
42
31
  target.primary_key
43
32
  end
44
33
 
34
+ def full_column_name(column)
35
+ ::Sequel.qualify(target.table_name, column)
36
+ end
37
+
45
38
  def all_scope
46
39
  target.dataset
47
40
  end
48
41
 
49
- def pluck_ids(scope)
50
- scope.distinct.select_map(primary_key)
42
+ def target_columns
43
+ @target_columns ||= target.columns.to_set
44
+ end
45
+
46
+ def pluck(scope, fields: [])
47
+ fields = fields.map(&:to_sym).unshift(primary_key).map do |column|
48
+ target_columns.include?(column) ? full_column_name(column) : column
49
+ end
50
+ scope.distinct.select_map(fields.one? ? fields.first : fields)
51
+ end
52
+
53
+ def pluck_in_batches(scope, fields: [], batch_size: nil, **options)
54
+ return enum_for(:pluck_in_batches, scope, fields: fields, batch_size: batch_size, **options) unless block_given?
55
+
56
+ scope = scope.unordered.order(full_column_name(primary_key).asc).limit(batch_size)
57
+
58
+ ids = pluck(scope, fields: fields)
59
+ count = 0
60
+
61
+ while ids.present?
62
+ yield ids
63
+ break if ids.size < batch_size
64
+ last_id = ids.last.is_a?(Array) ? ids.last.first : ids.last
65
+ ids = pluck(scope.where { |_o| full_column_name(primary_key) > last_id }, fields: fields)
66
+ end
67
+
68
+ count
51
69
  end
52
70
 
53
71
  def scope_where_ids_in(scope, ids)
54
- scope.where(primary_key => Array.wrap(ids))
72
+ scope.where(full_column_name(primary_key) => Array.wrap(ids))
55
73
  end
56
74
 
57
- def model_of_relation relation
75
+ def model_of_relation(relation)
58
76
  relation.model
59
77
  end
60
78
 
@@ -9,9 +9,10 @@ module Chewy
9
9
  end
10
10
 
11
11
  class Crutches
12
- def initialize type, collection
13
- @type, @collection = type, collection
14
- @type._crutches.keys.each do |name|
12
+ def initialize(type, collection)
13
+ @type = type
14
+ @collection = collection
15
+ @type._crutches.each_key do |name|
15
16
  singleton_class.class_eval <<-METHOD, __FILE__, __LINE__ + 1
16
17
  def #{name}
17
18
  @#{name} ||= @type._crutches[:#{name}].call @collection
@@ -22,7 +23,7 @@ module Chewy
22
23
  end
23
24
 
24
25
  module ClassMethods
25
- def crutch name, &block
26
+ def crutch(name, &block)
26
27
  self._crutches = _crutches.merge(name.to_sym => block)
27
28
  end
28
29
  end
@@ -0,0 +1,122 @@
1
+ module Chewy
2
+ class Type
3
+ module Import
4
+ # This class purpose is to build ES client-acceptable bulk
5
+ # request body from the passed objects for index and deletion.
6
+ # It handles parent-child relationships as well by fetching
7
+ # existing documents from ES, taking their `_parent` field and
8
+ # using it in the bulk body.
9
+ # If fields are passed - it creates partial update entries except for
10
+ # the cases when the type has parent and parent_id has been changed.
11
+ class BulkBuilder
12
+ # @param type [Chewy::Type] desired type
13
+ # @param index [Array<Object>] objects to index
14
+ # @param delete [Array<Object>] objects or ids to delete
15
+ # @param fields [Array<Symbol, String>] and array of fields for documents update
16
+ def initialize(type, index: [], delete: [], fields: [])
17
+ @type = type
18
+ @index = index
19
+ @delete = delete
20
+ @fields = fields.map!(&:to_sym)
21
+ end
22
+
23
+ # Returns ES API-ready bulk requiest body.
24
+ # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
25
+ # @return [Array<Hash>] bulk body
26
+ def bulk_body
27
+ @bulk_body ||= @index.flat_map(&method(:index_entry)).concat(
28
+ @delete.flat_map(&method(:delete_entry))
29
+ )
30
+ end
31
+
32
+ # The only purpose of this method is to cache document ids for
33
+ # all the passed object for index to avoid ids recalculation.
34
+ #
35
+ # @return [Hash[String => Object]] an ids-objects index hash
36
+ def index_objects_by_id
37
+ @index_objects_by_id ||= index_object_ids.invert.stringify_keys!
38
+ end
39
+
40
+ private
41
+
42
+ def crutches
43
+ @crutches ||= Chewy::Type::Crutch::Crutches.new @type, @index
44
+ end
45
+
46
+ def parents
47
+ return unless type_root.parent_id
48
+
49
+ @parents ||= begin
50
+ ids = @index.map do |object|
51
+ object.respond_to?(:id) ? object.id : object
52
+ end
53
+ ids.concat(@delete.map do |object|
54
+ object.respond_to?(:id) ? object.id : object
55
+ end)
56
+ @type.filter(ids: {values: ids}).order('_doc').pluck(:_id, :_parent).to_h
57
+ end
58
+ end
59
+
60
+ def index_entry(object)
61
+ entry = {}
62
+ entry[:_id] = index_object_ids[object] if index_object_ids[object]
63
+
64
+ if parents
65
+ entry[:parent] = type_root.compose_parent(object)
66
+ parent = entry[:_id].present? && parents[entry[:_id].to_s]
67
+ end
68
+
69
+ if parent && entry[:parent].to_s != parent
70
+ entry[:data] = @type.compose(object, crutches)
71
+ [{delete: entry.except(:data).merge(parent: parent)}, {index: entry}]
72
+ elsif @fields.present?
73
+ return [] unless entry[:_id]
74
+ entry[:data] = {doc: @type.compose(object, crutches, fields: @fields)}
75
+ [{update: entry}]
76
+ else
77
+ entry[:data] = @type.compose(object, crutches)
78
+ [{index: entry}]
79
+ end
80
+ end
81
+
82
+ def delete_entry(object)
83
+ entry = {}
84
+ entry[:_id] = entry_id(object)
85
+ entry[:_id] ||= object.as_json
86
+
87
+ return [] if entry[:_id].blank?
88
+
89
+ if parents
90
+ parent = entry[:_id].present? && parents[entry[:_id].to_s]
91
+ return [] unless parent
92
+ entry[:parent] = parent
93
+ end
94
+
95
+ [{delete: entry}]
96
+ end
97
+
98
+ def entry_id(object)
99
+ if type_root.id
100
+ type_root.compose_id(object)
101
+ else
102
+ id = object.id if object.respond_to?(:id)
103
+ id ||= object[:id] || object['id'] if object.is_a?(Hash)
104
+ id = id.to_s if defined?(BSON) && id.is_a?(BSON::ObjectId)
105
+ id
106
+ end
107
+ end
108
+
109
+ def index_object_ids
110
+ @index_object_ids ||= @index.each_with_object({}) do |object, result|
111
+ id = entry_id(object)
112
+ result[object] = id if id.present?
113
+ end
114
+ end
115
+
116
+ def type_root
117
+ @type_root ||= @type.root
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,78 @@
1
+ module Chewy
2
+ class Type
3
+ module Import
4
+ # Adds additional features to elasticsearch-api bulk method:
5
+ # * supports Chewy index suffix if necessary;
6
+ # * supports bulk_size, devides the passed body in chunks
7
+ # and peforms a separate request for each chunk;
8
+ # * returns only errored document entries from the response
9
+ # if any present.
10
+ #
11
+ # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
12
+ class BulkRequest
13
+ # @param type [Chewy::Type] a type for the request
14
+ # @param suffix [String] an index name optional suffix
15
+ # @param bulk_size [Integer] bulk size in bytes
16
+ # @param bulk_options [Hash] options passed to the elasticsearch-api bulk method
17
+ def initialize(type, suffix: nil, bulk_size: nil, **bulk_options)
18
+ @type = type
19
+ @suffix = suffix
20
+ @bulk_size = bulk_size - 1.kilobyte if bulk_size # 1 kilobyte for request header and newlines
21
+ @bulk_options = bulk_options
22
+
23
+ raise ArgumentError, '`bulk_size` can\'t be less than 1 kilobyte' if @bulk_size && @bulk_size <= 0
24
+ end
25
+
26
+ # Performs a bulk request with the passed body, returns empty
27
+ # array if everything is fine and array filled with errored
28
+ # document entries if something went wrong.
29
+ #
30
+ # @param body [Array<Hash>] a standard bulk request body
31
+ # @return [Array<Hash>] an array of bulk errors
32
+ def perform(body)
33
+ return [] if body.blank?
34
+
35
+ request_bodies(body).each_with_object([]) do |request_body, results|
36
+ response = @type.client.bulk request_base.merge(body: request_body) if request_body.present?
37
+
38
+ next unless response.try(:[], 'errors')
39
+
40
+ response_items = (response.try(:[], 'items') || [])
41
+ .select { |item| item.values.first['error'] }
42
+ results.concat(response_items)
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def request_base
49
+ @request_base ||= {
50
+ index: @type.index_name(suffix: @suffix),
51
+ type: @type.type_name
52
+ }.merge!(@bulk_options)
53
+ end
54
+
55
+ def request_bodies(body)
56
+ if @bulk_size
57
+ serializer = ::Elasticsearch::API.serializer
58
+ pieces = body.each_with_object(['']) do |piece, result|
59
+ operation, meta = piece.to_a.first
60
+ data = meta.delete(:data)
61
+ piece = serializer.dump(operation => meta)
62
+ piece << "\n" << serializer.dump(data) if data.present?
63
+
64
+ if result.last.bytesize + piece.bytesize > @bulk_size
65
+ result.push(piece)
66
+ else
67
+ result[-1].blank? ? (result[-1] = piece) : (result[-1] << "\n" << piece)
68
+ end
69
+ end
70
+ pieces.each { |piece| piece << "\n" }
71
+ else
72
+ [body]
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,45 @@
1
+ module Chewy
2
+ class Type
3
+ module Import
4
+ class JournalBuilder
5
+ def initialize(type, index: [], delete: [])
6
+ @type = type
7
+ @index = index
8
+ @delete = delete
9
+ end
10
+
11
+ def bulk_body
12
+ Chewy::Type::Import::BulkBuilder.new(
13
+ Chewy::Stash::Journal::Journal,
14
+ index: [
15
+ entries(:index, @index),
16
+ entries(:delete, @delete)
17
+ ].compact
18
+ ).bulk_body.each do |item|
19
+ item.values.first.merge!(
20
+ _index: Chewy::Stash::Journal.index_name,
21
+ _type: Chewy::Stash::Journal::Journal.type_name
22
+ )
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def entries(action, objects)
29
+ return unless objects.present?
30
+ {
31
+ index_name: @type.index.derivable_name,
32
+ type_name: @type.type_name,
33
+ action: action,
34
+ references: identify(objects).map { |item| Base64.encode64(::Elasticsearch::API.serializer.dump(item)) },
35
+ created_at: Time.now.utc
36
+ }
37
+ end
38
+
39
+ def identify(objects)
40
+ @type.adapter.identify(objects)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,138 @@
1
+ module Chewy
2
+ class Type
3
+ module Import
4
+ # This class performs the import routine for the options and objects given.
5
+ #
6
+ # 0. Create target and journal indexes if needed.
7
+ # 1. Iterate over all the passed objects in batches.
8
+ # 2. For each batch {#process} method is called:
9
+ # * creates a bulk request body;
10
+ # * appends journal entries for the current batch to the request body;
11
+ # * prepends a leftovers bulk to the request body, which is calculated
12
+ # basing on the previous iteration errors;
13
+ # * performs the bulk request;
14
+ # * composes new leftovers bulk for the next iteration basing on the response errors if `update_failover` is true;
15
+ # * appends the rest of unfixable errors to the instance level errors array.
16
+ # 4. Perform the request for the last leftovers bulk if present using {#extract_leftovers}.
17
+ # 3. Return the result errors array.
18
+ #
19
+ # At the moment, it tries to restore only from the partial document update errors in cases
20
+ # when the document doesn't exist only if `update_failover` option is true. In order to
21
+ # restore, it indexes such an objects completely on the next iteration.
22
+ #
23
+ # @see Chewy::Type::Import::ClassMethods#import
24
+ class Routine
25
+ BULK_OPTIONS = %i[
26
+ suffix bulk_size
27
+ refresh timeout fields pipeline
28
+ consistency replication
29
+ wait_for_active_shards routing _source _source_exclude _source_include
30
+ ].freeze
31
+
32
+ DEFAULT_OPTIONS = {
33
+ refresh: true,
34
+ update_fields: [],
35
+ update_failover: true,
36
+ batch_size: Chewy::Type::Adapter::Base::BATCH_SIZE
37
+ }.freeze
38
+
39
+ attr_reader :options, :parallel_options, :errors, :stats, :leftovers
40
+
41
+ # Basically, processes passed options, extracting bulk request specific options.
42
+ # @param type [Chewy::Type] chewy type
43
+ # @param options [Hash] import options, see {Chewy::Type::Import::ClassMethods#import}
44
+ def initialize(type, **options)
45
+ @type = type
46
+ @options = options
47
+ @options.reverse_merge!(@type._default_import_options)
48
+ @options.reverse_merge!(journal: Chewy.configuration[:journal])
49
+ @options.reverse_merge!(DEFAULT_OPTIONS)
50
+ @bulk_options = @options.slice(*BULK_OPTIONS)
51
+ @parallel_options = @options.delete(:parallel)
52
+ if @parallel_options && !@parallel_options.is_a?(Hash)
53
+ @parallel_options = if @parallel_options.is_a?(Integer)
54
+ {in_processes: @parallel_options}
55
+ else
56
+ {}
57
+ end
58
+ end
59
+ @errors = []
60
+ @stats = {}
61
+ @leftovers = []
62
+ end
63
+
64
+ # Creates the journal index and the type corresponding index if necessary.
65
+ # @return [Object] whatever
66
+ def create_indexes!
67
+ Chewy::Stash::Journal.create if @options[:journal]
68
+ return if Chewy.configuration[:skip_index_creation_on_import]
69
+ @type.index.create!(@bulk_options.slice(:suffix)) unless @type.index.exists?
70
+ end
71
+
72
+ # The main process method. Converts passed objects to thr bulk request body,
73
+ # appends journal entires, performs this request and handles errors performing
74
+ # failover procedures if applicable.
75
+ #
76
+ # @param index [Array<Object>] any acceptable objects for indexing
77
+ # @param delete [Array<Object>] any acceptable objects for deleting
78
+ # @return [true, false] the result of the request, true if no errors
79
+ def process(index: [], delete: [])
80
+ bulk_builder = BulkBuilder.new(@type, index: index, delete: delete, fields: @options[:update_fields])
81
+ bulk_body = bulk_builder.bulk_body
82
+
83
+ if @options[:journal]
84
+ journal_builder = JournalBuilder.new(@type, index: index, delete: delete)
85
+ bulk_body.concat(journal_builder.bulk_body)
86
+ end
87
+
88
+ bulk_body.unshift(*flush_leftovers)
89
+
90
+ perform_bulk(bulk_body) do |response|
91
+ @leftovers = extract_leftovers(response, bulk_builder.index_objects_by_id)
92
+ @stats[:index] = @stats[:index].to_i + index.count if index.present?
93
+ @stats[:delete] = @stats[:delete].to_i + delete.count if delete.present?
94
+ end
95
+ end
96
+
97
+ # Performs a bulk request for the passed body.
98
+ #
99
+ # @param body [Array<Hash>] a standard bulk request body
100
+ # @return [true, false] the result of the request, true if no errors
101
+ def perform_bulk(body)
102
+ response = bulk.perform(body)
103
+ yield response if block_given?
104
+ Chewy.wait_for_status
105
+ @errors.concat(response)
106
+ response.blank?
107
+ end
108
+
109
+ private
110
+
111
+ def flush_leftovers
112
+ leftovers = @leftovers
113
+ @leftovers = []
114
+ leftovers
115
+ end
116
+
117
+ def extract_leftovers(errors, index_objects_by_id)
118
+ return [] unless @options[:update_fields].present? && @options[:update_failover] && errors.present?
119
+
120
+ failed_partial_updates = errors.select do |item|
121
+ item.keys.first == 'update' && item.values.first['error']['type'] == 'document_missing_exception'
122
+ end
123
+ failed_ids_hash = failed_partial_updates.index_by { |item| item.values.first['_id'].to_s }
124
+ failed_ids_for_reimport = failed_ids_hash.keys & index_objects_by_id.keys
125
+ errors_to_cleanup = failed_ids_hash.values_at(*failed_ids_for_reimport)
126
+ errors_to_cleanup.each { |error| errors.delete(error) }
127
+
128
+ failed_objects = index_objects_by_id.values_at(*failed_ids_for_reimport)
129
+ BulkBuilder.new(@type, index: failed_objects).bulk_body
130
+ end
131
+
132
+ def bulk
133
+ @bulk ||= BulkRequest.new(@type, **@bulk_options)
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end