chewy 0.8.4 → 5.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (303) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +56 -0
  4. data/.rubocop_todo.yml +44 -0
  5. data/.travis.yml +36 -67
  6. data/.yardopts +5 -0
  7. data/Appraisals +63 -58
  8. data/CHANGELOG.md +168 -11
  9. data/Gemfile +16 -9
  10. data/Guardfile +5 -5
  11. data/LEGACY_DSL.md +497 -0
  12. data/README.md +403 -470
  13. data/Rakefile +11 -1
  14. data/chewy.gemspec +12 -15
  15. data/gemfiles/rails.4.0.activerecord.gemfile +9 -9
  16. data/gemfiles/rails.4.1.activerecord.gemfile +9 -9
  17. data/gemfiles/rails.4.2.activerecord.gemfile +8 -8
  18. data/gemfiles/rails.4.2.mongoid.5.2.gemfile +16 -0
  19. data/gemfiles/rails.5.0.activerecord.gemfile +16 -0
  20. data/gemfiles/rails.5.0.mongoid.6.1.gemfile +16 -0
  21. data/gemfiles/rails.5.1.activerecord.gemfile +16 -0
  22. data/gemfiles/rails.5.1.mongoid.6.3.gemfile +16 -0
  23. data/gemfiles/rails.5.2.activerecord.gemfile +16 -0
  24. data/gemfiles/sequel.4.45.gemfile +11 -0
  25. data/lib/chewy/backports/deep_dup.rb +1 -1
  26. data/lib/chewy/backports/duplicable.rb +1 -0
  27. data/lib/chewy/config.rb +53 -21
  28. data/lib/chewy/errors.rb +6 -6
  29. data/lib/chewy/fields/base.rb +59 -29
  30. data/lib/chewy/fields/root.rb +49 -14
  31. data/lib/chewy/index/actions.rb +95 -36
  32. data/lib/chewy/index/aliases.rb +2 -1
  33. data/lib/chewy/index/settings.rb +10 -5
  34. data/lib/chewy/index/specification.rb +60 -0
  35. data/lib/chewy/index.rb +239 -138
  36. data/lib/chewy/journal.rb +55 -0
  37. data/lib/chewy/log_subscriber.rb +8 -8
  38. data/lib/chewy/minitest/helpers.rb +77 -0
  39. data/lib/chewy/minitest/search_index_receiver.rb +80 -0
  40. data/lib/chewy/minitest.rb +1 -0
  41. data/lib/chewy/query/compose.rb +18 -19
  42. data/lib/chewy/query/criteria.rb +34 -24
  43. data/lib/chewy/query/filters.rb +28 -11
  44. data/lib/chewy/query/loading.rb +3 -4
  45. data/lib/chewy/query/nodes/and.rb +1 -1
  46. data/lib/chewy/query/nodes/base.rb +1 -1
  47. data/lib/chewy/query/nodes/bool.rb +6 -4
  48. data/lib/chewy/query/nodes/equal.rb +4 -4
  49. data/lib/chewy/query/nodes/exists.rb +1 -1
  50. data/lib/chewy/query/nodes/expr.rb +2 -2
  51. data/lib/chewy/query/nodes/field.rb +35 -31
  52. data/lib/chewy/query/nodes/has_child.rb +1 -0
  53. data/lib/chewy/query/nodes/has_parent.rb +1 -0
  54. data/lib/chewy/query/nodes/has_relation.rb +10 -12
  55. data/lib/chewy/query/nodes/missing.rb +1 -1
  56. data/lib/chewy/query/nodes/not.rb +1 -1
  57. data/lib/chewy/query/nodes/or.rb +1 -1
  58. data/lib/chewy/query/nodes/prefix.rb +3 -2
  59. data/lib/chewy/query/nodes/query.rb +1 -1
  60. data/lib/chewy/query/nodes/range.rb +9 -9
  61. data/lib/chewy/query/nodes/raw.rb +1 -1
  62. data/lib/chewy/query/nodes/regexp.rb +13 -9
  63. data/lib/chewy/query/nodes/script.rb +4 -4
  64. data/lib/chewy/query/pagination.rb +10 -1
  65. data/lib/chewy/query.rb +286 -170
  66. data/lib/chewy/railtie.rb +7 -6
  67. data/lib/chewy/rake_helper.rb +275 -37
  68. data/lib/chewy/repository.rb +2 -2
  69. data/lib/chewy/rspec/update_index.rb +70 -65
  70. data/lib/chewy/rspec.rb +1 -1
  71. data/lib/chewy/runtime/version.rb +4 -4
  72. data/lib/chewy/search/loader.rb +83 -0
  73. data/lib/chewy/{query → search}/pagination/kaminari.rb +13 -5
  74. data/lib/chewy/search/pagination/will_paginate.rb +43 -0
  75. data/lib/chewy/search/parameters/aggs.rb +16 -0
  76. data/lib/chewy/search/parameters/allow_partial_search_results.rb +27 -0
  77. data/lib/chewy/search/parameters/concerns/bool_storage.rb +24 -0
  78. data/lib/chewy/search/parameters/concerns/hash_storage.rb +23 -0
  79. data/lib/chewy/search/parameters/concerns/integer_storage.rb +14 -0
  80. data/lib/chewy/search/parameters/concerns/query_storage.rb +238 -0
  81. data/lib/chewy/search/parameters/concerns/string_array_storage.rb +23 -0
  82. data/lib/chewy/search/parameters/concerns/string_storage.rb +14 -0
  83. data/lib/chewy/search/parameters/docvalue_fields.rb +12 -0
  84. data/lib/chewy/search/parameters/explain.rb +16 -0
  85. data/lib/chewy/search/parameters/filter.rb +47 -0
  86. data/lib/chewy/search/parameters/highlight.rb +16 -0
  87. data/lib/chewy/search/parameters/indices.rb +123 -0
  88. data/lib/chewy/search/parameters/indices_boost.rb +52 -0
  89. data/lib/chewy/search/parameters/limit.rb +17 -0
  90. data/lib/chewy/search/parameters/load.rb +32 -0
  91. data/lib/chewy/search/parameters/min_score.rb +16 -0
  92. data/lib/chewy/search/parameters/none.rb +27 -0
  93. data/lib/chewy/search/parameters/offset.rb +17 -0
  94. data/lib/chewy/search/parameters/order.rb +64 -0
  95. data/lib/chewy/search/parameters/post_filter.rb +19 -0
  96. data/lib/chewy/search/parameters/preference.rb +16 -0
  97. data/lib/chewy/search/parameters/profile.rb +16 -0
  98. data/lib/chewy/search/parameters/query.rb +19 -0
  99. data/lib/chewy/search/parameters/request_cache.rb +27 -0
  100. data/lib/chewy/search/parameters/rescore.rb +29 -0
  101. data/lib/chewy/search/parameters/script_fields.rb +16 -0
  102. data/lib/chewy/search/parameters/search_after.rb +20 -0
  103. data/lib/chewy/search/parameters/search_type.rb +16 -0
  104. data/lib/chewy/search/parameters/source.rb +73 -0
  105. data/lib/chewy/search/parameters/storage.rb +95 -0
  106. data/lib/chewy/search/parameters/stored_fields.rb +63 -0
  107. data/lib/chewy/search/parameters/suggest.rb +16 -0
  108. data/lib/chewy/search/parameters/terminate_after.rb +16 -0
  109. data/lib/chewy/search/parameters/timeout.rb +16 -0
  110. data/lib/chewy/search/parameters/track_scores.rb +16 -0
  111. data/lib/chewy/search/parameters/types.rb +20 -0
  112. data/lib/chewy/search/parameters/version.rb +16 -0
  113. data/lib/chewy/search/parameters.rb +167 -0
  114. data/lib/chewy/search/query_proxy.rb +257 -0
  115. data/lib/chewy/search/request.rb +1045 -0
  116. data/lib/chewy/search/response.rb +119 -0
  117. data/lib/chewy/search/scoping.rb +50 -0
  118. data/lib/chewy/search/scrolling.rb +134 -0
  119. data/lib/chewy/search.rb +81 -26
  120. data/lib/chewy/stash.rb +79 -0
  121. data/lib/chewy/strategy/active_job.rb +1 -0
  122. data/lib/chewy/strategy/atomic.rb +2 -4
  123. data/lib/chewy/strategy/base.rb +4 -4
  124. data/lib/chewy/strategy/bypass.rb +1 -2
  125. data/lib/chewy/strategy/resque.rb +1 -0
  126. data/lib/chewy/strategy/shoryuken.rb +40 -0
  127. data/lib/chewy/strategy/sidekiq.rb +13 -1
  128. data/lib/chewy/strategy/urgent.rb +1 -1
  129. data/lib/chewy/strategy.rb +19 -10
  130. data/lib/chewy/type/actions.rb +26 -2
  131. data/lib/chewy/type/adapter/active_record.rb +50 -24
  132. data/lib/chewy/type/adapter/base.rb +29 -9
  133. data/lib/chewy/type/adapter/mongoid.rb +19 -10
  134. data/lib/chewy/type/adapter/object.rb +195 -31
  135. data/lib/chewy/type/adapter/orm.rb +69 -33
  136. data/lib/chewy/type/adapter/sequel.rb +37 -19
  137. data/lib/chewy/type/crutch.rb +5 -4
  138. data/lib/chewy/type/import/bulk_builder.rb +122 -0
  139. data/lib/chewy/type/import/bulk_request.rb +78 -0
  140. data/lib/chewy/type/import/journal_builder.rb +45 -0
  141. data/lib/chewy/type/import/routine.rb +138 -0
  142. data/lib/chewy/type/import.rb +150 -176
  143. data/lib/chewy/type/mapping.rb +58 -42
  144. data/lib/chewy/type/observe.rb +21 -15
  145. data/lib/chewy/type/syncer.rb +222 -0
  146. data/lib/chewy/type/witchcraft.rb +89 -34
  147. data/lib/chewy/type/wrapper.rb +48 -16
  148. data/lib/chewy/type.rb +77 -49
  149. data/lib/chewy/version.rb +1 -1
  150. data/lib/chewy.rb +95 -52
  151. data/lib/generators/chewy/install_generator.rb +3 -3
  152. data/lib/sequel/plugins/chewy_observe.rb +4 -19
  153. data/lib/tasks/chewy.rake +91 -28
  154. data/spec/chewy/config_spec.rb +130 -12
  155. data/spec/chewy/fields/base_spec.rb +194 -172
  156. data/spec/chewy/fields/root_spec.rb +123 -17
  157. data/spec/chewy/fields/time_fields_spec.rb +10 -9
  158. data/spec/chewy/index/actions_spec.rb +228 -43
  159. data/spec/chewy/index/aliases_spec.rb +2 -2
  160. data/spec/chewy/index/settings_spec.rb +100 -49
  161. data/spec/chewy/index/specification_spec.rb +169 -0
  162. data/spec/chewy/index_spec.rb +159 -63
  163. data/spec/chewy/journal_spec.rb +268 -0
  164. data/spec/chewy/minitest/helpers_spec.rb +90 -0
  165. data/spec/chewy/minitest/search_index_receiver_spec.rb +120 -0
  166. data/spec/chewy/query/criteria_spec.rb +503 -236
  167. data/spec/chewy/query/filters_spec.rb +96 -68
  168. data/spec/chewy/query/loading_spec.rb +80 -42
  169. data/spec/chewy/query/nodes/and_spec.rb +3 -7
  170. data/spec/chewy/query/nodes/bool_spec.rb +5 -13
  171. data/spec/chewy/query/nodes/equal_spec.rb +20 -20
  172. data/spec/chewy/query/nodes/exists_spec.rb +7 -7
  173. data/spec/chewy/query/nodes/has_child_spec.rb +42 -23
  174. data/spec/chewy/query/nodes/has_parent_spec.rb +42 -23
  175. data/spec/chewy/query/nodes/match_all_spec.rb +2 -2
  176. data/spec/chewy/query/nodes/missing_spec.rb +6 -5
  177. data/spec/chewy/query/nodes/not_spec.rb +5 -7
  178. data/spec/chewy/query/nodes/or_spec.rb +3 -7
  179. data/spec/chewy/query/nodes/prefix_spec.rb +6 -6
  180. data/spec/chewy/query/nodes/query_spec.rb +3 -3
  181. data/spec/chewy/query/nodes/range_spec.rb +19 -19
  182. data/spec/chewy/query/nodes/raw_spec.rb +2 -2
  183. data/spec/chewy/query/nodes/regexp_spec.rb +31 -19
  184. data/spec/chewy/query/nodes/script_spec.rb +5 -5
  185. data/spec/chewy/query/pagination/kaminari_spec.rb +3 -55
  186. data/spec/chewy/query/pagination/will_paginate_spec.rb +5 -0
  187. data/spec/chewy/query/pagination_spec.rb +25 -22
  188. data/spec/chewy/query_spec.rb +510 -505
  189. data/spec/chewy/rake_helper_spec.rb +381 -0
  190. data/spec/chewy/repository_spec.rb +8 -8
  191. data/spec/chewy/rspec/update_index_spec.rb +215 -113
  192. data/spec/chewy/runtime_spec.rb +2 -2
  193. data/spec/chewy/search/loader_spec.rb +117 -0
  194. data/spec/chewy/search/pagination/kaminari_examples.rb +71 -0
  195. data/spec/chewy/search/pagination/kaminari_spec.rb +21 -0
  196. data/spec/chewy/search/pagination/will_paginate_examples.rb +63 -0
  197. data/spec/chewy/search/pagination/will_paginate_spec.rb +23 -0
  198. data/spec/chewy/search/parameters/aggs_spec.rb +5 -0
  199. data/spec/chewy/search/parameters/bool_storage_examples.rb +53 -0
  200. data/spec/chewy/search/parameters/docvalue_fields_spec.rb +5 -0
  201. data/spec/chewy/search/parameters/explain_spec.rb +5 -0
  202. data/spec/chewy/search/parameters/filter_spec.rb +5 -0
  203. data/spec/chewy/search/parameters/hash_storage_examples.rb +59 -0
  204. data/spec/chewy/search/parameters/highlight_spec.rb +5 -0
  205. data/spec/chewy/search/parameters/indices_spec.rb +191 -0
  206. data/spec/chewy/search/parameters/integer_storage_examples.rb +32 -0
  207. data/spec/chewy/search/parameters/limit_spec.rb +5 -0
  208. data/spec/chewy/search/parameters/load_spec.rb +60 -0
  209. data/spec/chewy/search/parameters/min_score_spec.rb +32 -0
  210. data/spec/chewy/search/parameters/none_spec.rb +5 -0
  211. data/spec/chewy/search/parameters/offset_spec.rb +5 -0
  212. data/spec/chewy/search/parameters/order_spec.rb +65 -0
  213. data/spec/chewy/search/parameters/post_filter_spec.rb +5 -0
  214. data/spec/chewy/search/parameters/preference_spec.rb +5 -0
  215. data/spec/chewy/search/parameters/profile_spec.rb +5 -0
  216. data/spec/chewy/search/parameters/query_spec.rb +5 -0
  217. data/spec/chewy/search/parameters/query_storage_examples.rb +388 -0
  218. data/spec/chewy/search/parameters/request_cache_spec.rb +67 -0
  219. data/spec/chewy/search/parameters/rescore_spec.rb +62 -0
  220. data/spec/chewy/search/parameters/script_fields_spec.rb +5 -0
  221. data/spec/chewy/search/parameters/search_after_spec.rb +32 -0
  222. data/spec/chewy/search/parameters/search_type_spec.rb +5 -0
  223. data/spec/chewy/search/parameters/source_spec.rb +156 -0
  224. data/spec/chewy/search/parameters/storage_spec.rb +60 -0
  225. data/spec/chewy/search/parameters/stored_fields_spec.rb +126 -0
  226. data/spec/chewy/search/parameters/string_array_storage_examples.rb +63 -0
  227. data/spec/chewy/search/parameters/string_storage_examples.rb +32 -0
  228. data/spec/chewy/search/parameters/suggest_spec.rb +5 -0
  229. data/spec/chewy/search/parameters/terminate_after_spec.rb +5 -0
  230. data/spec/chewy/search/parameters/timeout_spec.rb +5 -0
  231. data/spec/chewy/search/parameters/track_scores_spec.rb +5 -0
  232. data/spec/chewy/search/parameters/types_spec.rb +5 -0
  233. data/spec/chewy/search/parameters/version_spec.rb +5 -0
  234. data/spec/chewy/search/parameters_spec.rb +145 -0
  235. data/spec/chewy/search/query_proxy_spec.rb +68 -0
  236. data/spec/chewy/search/request_spec.rb +685 -0
  237. data/spec/chewy/search/response_spec.rb +192 -0
  238. data/spec/chewy/search/scrolling_spec.rb +169 -0
  239. data/spec/chewy/search_spec.rb +37 -20
  240. data/spec/chewy/stash_spec.rb +95 -0
  241. data/spec/chewy/strategy/active_job_spec.rb +8 -2
  242. data/spec/chewy/strategy/atomic_spec.rb +4 -1
  243. data/spec/chewy/strategy/resque_spec.rb +8 -2
  244. data/spec/chewy/strategy/shoryuken_spec.rb +66 -0
  245. data/spec/chewy/strategy/sidekiq_spec.rb +10 -2
  246. data/spec/chewy/strategy_spec.rb +6 -6
  247. data/spec/chewy/type/actions_spec.rb +29 -10
  248. data/spec/chewy/type/adapter/active_record_spec.rb +357 -139
  249. data/spec/chewy/type/adapter/mongoid_spec.rb +220 -101
  250. data/spec/chewy/type/adapter/object_spec.rb +129 -40
  251. data/spec/chewy/type/adapter/sequel_spec.rb +304 -152
  252. data/spec/chewy/type/import/bulk_builder_spec.rb +279 -0
  253. data/spec/chewy/type/import/bulk_request_spec.rb +102 -0
  254. data/spec/chewy/type/import/journal_builder_spec.rb +95 -0
  255. data/spec/chewy/type/import/routine_spec.rb +110 -0
  256. data/spec/chewy/type/import_spec.rb +360 -244
  257. data/spec/chewy/type/mapping_spec.rb +96 -29
  258. data/spec/chewy/type/observe_spec.rb +25 -15
  259. data/spec/chewy/type/syncer_spec.rb +123 -0
  260. data/spec/chewy/type/witchcraft_spec.rb +122 -44
  261. data/spec/chewy/type/wrapper_spec.rb +63 -23
  262. data/spec/chewy/type_spec.rb +32 -10
  263. data/spec/chewy_spec.rb +82 -12
  264. data/spec/spec_helper.rb +16 -2
  265. data/spec/support/active_record.rb +6 -2
  266. data/spec/support/class_helpers.rb +4 -19
  267. data/spec/support/mongoid.rb +17 -5
  268. data/spec/support/sequel.rb +6 -1
  269. metadata +250 -57
  270. data/gemfiles/rails.3.2.activerecord.gemfile +0 -15
  271. data/gemfiles/rails.3.2.activerecord.kaminari.gemfile +0 -14
  272. data/gemfiles/rails.3.2.activerecord.will_paginate.gemfile +0 -14
  273. data/gemfiles/rails.4.0.activerecord.kaminari.gemfile +0 -14
  274. data/gemfiles/rails.4.0.activerecord.will_paginate.gemfile +0 -14
  275. data/gemfiles/rails.4.0.mongoid.4.0.0.gemfile +0 -15
  276. data/gemfiles/rails.4.0.mongoid.4.0.0.kaminari.gemfile +0 -14
  277. data/gemfiles/rails.4.0.mongoid.4.0.0.will_paginate.gemfile +0 -14
  278. data/gemfiles/rails.4.0.mongoid.5.1.0.gemfile +0 -15
  279. data/gemfiles/rails.4.0.mongoid.5.1.0.kaminari.gemfile +0 -14
  280. data/gemfiles/rails.4.0.mongoid.5.1.0.will_paginate.gemfile +0 -14
  281. data/gemfiles/rails.4.1.activerecord.kaminari.gemfile +0 -14
  282. data/gemfiles/rails.4.1.activerecord.will_paginate.gemfile +0 -14
  283. data/gemfiles/rails.4.1.mongoid.4.0.0.gemfile +0 -15
  284. data/gemfiles/rails.4.1.mongoid.4.0.0.kaminari.gemfile +0 -14
  285. data/gemfiles/rails.4.1.mongoid.4.0.0.will_paginate.gemfile +0 -14
  286. data/gemfiles/rails.4.1.mongoid.5.1.0.gemfile +0 -15
  287. data/gemfiles/rails.4.1.mongoid.5.1.0.kaminari.gemfile +0 -14
  288. data/gemfiles/rails.4.1.mongoid.5.1.0.will_paginate.gemfile +0 -14
  289. data/gemfiles/rails.4.2.activerecord.kaminari.gemfile +0 -15
  290. data/gemfiles/rails.4.2.activerecord.will_paginate.gemfile +0 -15
  291. data/gemfiles/rails.4.2.mongoid.4.0.0.gemfile +0 -15
  292. data/gemfiles/rails.4.2.mongoid.4.0.0.kaminari.gemfile +0 -14
  293. data/gemfiles/rails.4.2.mongoid.4.0.0.will_paginate.gemfile +0 -14
  294. data/gemfiles/rails.4.2.mongoid.5.1.0.gemfile +0 -15
  295. data/gemfiles/rails.4.2.mongoid.5.1.0.kaminari.gemfile +0 -14
  296. data/gemfiles/rails.4.2.mongoid.5.1.0.will_paginate.gemfile +0 -14
  297. data/gemfiles/rails.5.0.0.beta3.activerecord.gemfile +0 -16
  298. data/gemfiles/rails.5.0.0.beta3.activerecord.kaminari.gemfile +0 -16
  299. data/gemfiles/rails.5.0.0.beta3.activerecord.will_paginate.gemfile +0 -15
  300. data/gemfiles/sequel.4.31.gemfile +0 -13
  301. data/lib/chewy/query/pagination/will_paginate.rb +0 -27
  302. data/lib/chewy/query/scoping.rb +0 -20
  303. data/spec/chewy/query/pagination/will_paginage_spec.rb +0 -60
@@ -1,222 +1,196 @@
1
+ require 'chewy/type/import/journal_builder'
2
+ require 'chewy/type/import/bulk_builder'
3
+ require 'chewy/type/import/bulk_request'
4
+ require 'chewy/type/import/routine'
5
+
1
6
  module Chewy
2
7
  class Type
3
8
  module Import
4
9
  extend ActiveSupport::Concern
5
10
 
6
- BULK_OPTIONS = [:suffix, :bulk_size, :refresh, :consistency, :replication]
11
+ IMPORT_WORKER = lambda do |type, options, total, ids, index|
12
+ ::Process.setproctitle("chewy [#{type}]: import data (#{index + 1}/#{total})")
13
+ routine = Routine.new(type, options)
14
+ type.adapter.import(*ids, routine.options) do |action_objects|
15
+ routine.process(**action_objects)
16
+ end
17
+ {errors: routine.errors, import: routine.stats, leftovers: routine.leftovers}
18
+ end
19
+
20
+ LEFTOVERS_WORKER = lambda do |type, options, total, body, index|
21
+ ::Process.setproctitle("chewy [#{type}]: import leftovers (#{index + 1}/#{total})")
22
+ routine = Routine.new(type, options)
23
+ routine.perform_bulk(body)
24
+ routine.errors
25
+ end
7
26
 
8
27
  module ClassMethods
9
- # Perform import operation for specified documents.
10
- # Returns true or false depending on success.
28
+ # @!method import(*collection, **options)
29
+ # Basically, one of the main methods for type. Performs any objects import
30
+ # to the index for a specified type. Does all the objects handling routines.
31
+ # Performs document import by utilizing bulk API. Bulk size and objects batch
32
+ # size are controlled by the corresponding options.
11
33
  #
12
- # UsersIndex::User.import # imports default data set
13
- # UsersIndex::User.import User.active # imports active users
14
- # UsersIndex::User.import [1, 2, 3] # imports users with specified ids
15
- # UsersIndex::User.import users # imports users collection
16
- # UsersIndex::User.import suffix: Time.now.to_i # imports data to index with specified suffix if such is exists
17
- # UsersIndex::User.import refresh: false # to disable index refreshing after import
18
- # UsersIndex::User.import batch_size: 300 # import batch size
19
- # UsersIndex::User.import bulk_size: 10.megabytes # import ElasticSearch bulk size in bytes
20
- # UsersIndex::User.import consistency: :quorum # explicit write consistency setting for the operation (one, quorum, all)
21
- # UsersIndex::User.import replication: :async # explicitly set the replication type (sync, async)
34
+ # It accepts ORM/ODM objects, PORO, hashes, ids which are used by adapter to
35
+ # fetch objects from the source depenting on the used adapter. It destroys
36
+ # passed objects from the index if they are not in the default type scope
37
+ # or marked for destruction.
22
38
  #
23
- # See adapters documentation for more details.
39
+ # It handles parent-child relationships: if the object parent_id has been
40
+ # changed it destroys the object and recreates it from scratch.
24
41
  #
25
- def import *args
26
- import_options = args.extract_options!
27
- import_options.reverse_merge! _default_import_options
28
- bulk_options = import_options.reject { |k, _| !BULK_OPTIONS.include?(k) }.reverse_merge!(refresh: true)
29
-
30
- index.create!(bulk_options.slice(:suffix)) unless index.exists?
31
-
32
- ActiveSupport::Notifications.instrument 'import_objects.chewy', type: self do |payload|
33
- adapter.import(*args, import_options) do |action_objects|
34
- indexed_objects = build_root.parent_id && fetch_indexed_objects(action_objects.values.flatten)
35
- body = bulk_body(action_objects, indexed_objects)
36
-
37
- errors = bulk(bulk_options.merge(body: body)) if body.present?
38
-
39
- fill_payload_import payload, action_objects
40
- fill_payload_errors payload, errors if errors.present?
41
- !errors.present?
42
- end
43
- end
42
+ # Performs journaling if enabled: it stores all the ids of the imported
43
+ # objects to a specialized index. It is possible to replay particular import
44
+ # later to restore the data consistency.
45
+ #
46
+ # Performs partial index update using `update` bulk action if any `fields` are
47
+ # specified. Note that if document doesn't exist yet, an error will be raised
48
+ # by ES, but import catches this an errors and performs full indexing
49
+ # for the corresponding documents. This feature can be disabled by setting
50
+ # `update_failover` to `false`.
51
+ #
52
+ # Utilizes `ActiveSupport::Notifications`, so it is possible to get imported
53
+ # objects later by listening to the `import_objects.chewy` queue. It is also
54
+ # possible to get the list of occured errors from the payload if something
55
+ # went wrong.
56
+ #
57
+ # Import can also be run in parallel using the Parallel gem functionality.
58
+ #
59
+ # @example
60
+ # UsersIndex::User.import(parallel: true) # imports everything in parallel with automatic workers number
61
+ # UsersIndex::User.import(parallel: 3) # using 3 workers
62
+ # UsersIndex::User.import(parallel: {in_threads: 10}) # in 10 threads
63
+ #
64
+ # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
65
+ # @param collection [Array<Object>] and array or anything to import
66
+ # @param options [Hash{Symbol => Object}] besides specific import options, it accepts all the options suitable for the bulk API call like `refresh` or `timeout`
67
+ # @option options [String] suffix an index name suffix, used for zero-downtime reset mostly, no suffix by default
68
+ # @option options [Integer] bulk_size bulk API chunk size in bytes; if passed, the request is performed several times for each chunk, empty by default
69
+ # @option options [Integer] batch_size passed to the adapter import method, used to split imported objects in chunks, 1000 by default
70
+ # @option options [true, false] journal enables imported objects journaling, false by default
71
+ # @option options [Array<Symbol, String>] update_fields list of fields for the partial import, empty by default
72
+ # @option options [true, false] update_failover enables full objects reimport in cases of partial update errors, `true` by default
73
+ # @option options [true, Integer, Hash] parallel enables parallel import processing with the Parallel gem, accepts the number of workers or any Parallel gem acceptable options
74
+ # @return [true, false] false in case of errors
75
+ def import(*args)
76
+ import_routine(*args).blank?
44
77
  end
45
78
 
46
- # Perform import operation for specified documents.
47
- # Raises Chewy::ImportFailed exception in case of import errors.
48
- # Options are completely the same as for `import` method
49
- # See adapters documentation for more details.
79
+ # @!method import!(*collection, **options)
80
+ # (see #import)
50
81
  #
51
- def import! *args
52
- errors = nil
53
- subscriber = ActiveSupport::Notifications.subscribe('import_objects.chewy') do |*args|
54
- errors = args.last[:errors]
55
- end
56
- import *args
82
+ # The only difference from {#import} is that it raises an exception
83
+ # in case of any import errors.
84
+ #
85
+ # @raise [Chewy::ImportFailed] in case of errors
86
+ def import!(*args)
87
+ errors = import_routine(*args)
57
88
  raise Chewy::ImportFailed.new(self, errors) if errors.present?
58
89
  true
59
- ensure
60
- ActiveSupport::Notifications.unsubscribe(subscriber) if subscriber
61
90
  end
62
91
 
63
- # Wraps elasticsearch-ruby client indices bulk method.
64
- # Adds `:suffix` option to bulk import to index with specified suffix.
65
- def bulk options = {}
66
- suffix = options.delete(:suffix)
67
- bulk_size = options.delete(:bulk_size)
68
- body = options.delete(:body)
69
- header = { index: index.build_index_name(suffix: suffix), type: type_name }
70
-
71
- bodies = if bulk_size
72
- bulk_size -= 1.kilobyte # 1 kilobyte for request header and newlines
73
- raise ArgumentError.new('Import `:bulk_size` can\'t be less then 1 kilobyte') if bulk_size <= 0
74
-
75
- body.each_with_object(['']) do |entry, result|
76
- operation, meta = entry.to_a.first
77
- data = meta.delete(:data)
78
- entry = [{ operation => meta }, data].compact.map(&:to_json).join("\n")
79
- if entry.bytesize > bulk_size
80
- raise ArgumentError.new('Import `:bulk_size` seems to be less then entry size')
81
- elsif result.last.bytesize + entry.bytesize > bulk_size
82
- result.push(entry)
83
- else
84
- result[-1] = [result[-1], entry].delete_if(&:blank?).join("\n")
85
- end
86
- end
87
- else
88
- [body]
89
- end
90
-
91
- items = bodies.map do |body|
92
- result = client.bulk options.merge(header).merge(body: body)
93
- result.try(:[], 'items') || []
94
- end.flatten
92
+ # Wraps elasticsearch API bulk method, adds additional features like
93
+ # `bulk_size` and `suffix`.
94
+ #
95
+ # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
96
+ # @see Chewy::Type::Import::Bulk
97
+ # @param options [Hash{Symbol => Object}] besides specific import options, it accepts all the options suitable for the bulk API call like `refresh` or `timeout`
98
+ # @option options [String] suffix bulk API chunk size in bytes; if passed, the request is performed several times for each chunk, empty by default
99
+ # @option options [Integer] bulk_size bulk API chunk size in bytes; if passed, the request is performed several times for each chunk, empty by default
100
+ # @option options [Array<Hash>] body elasticsearch API bulk method body
101
+ # @return [Hash] tricky transposed errors hash, empty if everything is fine
102
+ def bulk(**options)
103
+ error_items = BulkRequest.new(self, **options).perform(options[:body])
95
104
  Chewy.wait_for_status
96
105
 
97
- extract_errors items
106
+ payload_errors(error_items)
98
107
  end
99
108
 
100
- private
101
-
102
- def bulk_body(action_objects, indexed_objects = nil)
103
- action_objects.flat_map do |action, objects|
104
- method = "#{action}_bulk_entry"
105
- crutches = Chewy::Type::Crutch::Crutches.new self, objects
106
- objects.flat_map { |object| send(method, object, indexed_objects, crutches) }
107
- end
108
- end
109
-
110
- def delete_bulk_entry(object, indexed_objects = nil, crutches = nil)
111
- entry = {}
112
-
113
- if root_object.id
114
- entry[:_id] = root_object.compose_id(object)
109
+ # Composes a single document from the passed object. Uses either witchcraft
110
+ # or normal composing under the hood.
111
+ #
112
+ # @param object [Object] a data source object
113
+ # @param crutches [Object] optional crutches object; if ommited - a crutch for the single passed object is created as a fallback
114
+ # @param fields [Array<Symbol>] and array of fields to restrict the generated document
115
+ # @return [Hash] a JSON-ready hash
116
+ def compose(object, crutches = nil, fields: [])
117
+ crutches ||= Chewy::Type::Crutch::Crutches.new self, [object]
118
+
119
+ if witchcraft? && root.children.present?
120
+ cauldron(fields: fields).brew(object, crutches)
115
121
  else
116
- entry[:_id] = object.id if object.respond_to?(:id)
117
- entry[:_id] ||= object[:id] || object['id'] if object.is_a?(Hash)
118
- entry[:_id] ||= object
119
- entry[:_id] = entry[:_id].to_s if defined?(BSON) && entry[:_id].is_a?(BSON::ObjectId)
122
+ root.compose(object, crutches, fields: fields)
120
123
  end
121
-
122
- if root_object.parent_id
123
- existing_object = entry[:_id].present? && indexed_objects && indexed_objects[entry[:_id].to_s]
124
- entry.merge!(parent: existing_object[:parent]) if existing_object
125
- end
126
-
127
- [{ delete: entry }]
128
124
  end
129
125
 
130
- def index_bulk_entry(object, indexed_objects = nil, crutches = nil)
131
- entry = {}
132
-
133
- if root_object.id
134
- entry[:_id] = root_object.compose_id(object)
135
- else
136
- entry[:_id] = object.id if object.respond_to?(:id)
137
- entry[:_id] ||= object[:id] || object['id'] if object.is_a?(Hash)
138
- entry[:_id] = entry[:_id].to_s if defined?(BSON) && entry[:_id].is_a?(BSON::ObjectId)
139
- end
140
- entry.delete(:_id) if entry[:_id].blank?
141
-
142
- if root_object.parent_id
143
- entry[:parent] = root_object.compose_parent(object)
144
- existing_object = entry[:_id].present? && indexed_objects && indexed_objects[entry[:_id].to_s]
145
- end
126
+ private
146
127
 
147
- entry[:data] = object_data(object, crutches)
128
+ def import_routine(*args)
129
+ return if args.first.blank? && !args.first.nil?
130
+ routine = Routine.new(self, args.extract_options!)
131
+ routine.create_indexes!
148
132
 
149
- if existing_object && entry[:parent].to_s != existing_object[:parent]
150
- [{ delete: entry.except(:data).merge(parent: existing_object[:parent]) }, { index: entry }]
133
+ if routine.parallel_options
134
+ import_parallel(args, routine)
151
135
  else
152
- [{ index: entry }]
136
+ import_linear(args, routine)
153
137
  end
154
138
  end
155
139
 
156
- def fill_payload_import payload, action_objects
157
- imported = Hash[action_objects.map { |action, objects| [action, objects.count] }]
158
- imported.each do |action, count|
159
- payload[:import] ||= {}
160
- payload[:import][action] ||= 0
161
- payload[:import][action] += count
140
+ def import_linear(objects, routine)
141
+ ActiveSupport::Notifications.instrument 'import_objects.chewy', type: self do |payload|
142
+ adapter.import(*objects, routine.options) do |action_objects|
143
+ routine.process(**action_objects)
144
+ end
145
+ routine.perform_bulk(routine.leftovers)
146
+ payload[:import] = routine.stats
147
+ payload[:errors] = payload_errors(routine.errors) if routine.errors.present?
148
+ payload[:errors]
162
149
  end
163
150
  end
164
151
 
165
- def fill_payload_errors payload, errors
166
- errors.each do |action, errors|
167
- errors.each do |error, documents|
168
- payload[:errors] ||= {}
169
- payload[:errors][action] ||= {}
170
- payload[:errors][action][error] ||= []
171
- payload[:errors][action][error] |= documents
152
+ def import_parallel(objects, routine)
153
+ raise "The `parallel` gem is required for parallel import, please add `gem 'parallel'` to your Gemfile" unless '::Parallel'.safe_constantize
154
+
155
+ ActiveSupport::Notifications.instrument 'import_objects.chewy', type: self do |payload|
156
+ batches = adapter.import_references(*objects, routine.options.slice(:batch_size)).to_a
157
+
158
+ ::ActiveRecord::Base.connection.close if defined?(::ActiveRecord::Base)
159
+ results = ::Parallel.map_with_index(batches, routine.parallel_options, &IMPORT_WORKER.curry[self, routine.options, batches.size])
160
+ ::ActiveRecord::Base.connection.reconnect! if defined?(::ActiveRecord::Base)
161
+ errors, import, leftovers = process_parallel_import_results(results)
162
+
163
+ if leftovers.present?
164
+ batches = leftovers.each_slice(routine.options[:batch_size])
165
+ results = ::Parallel.map_with_index(batches, routine.parallel_options, &LEFTOVERS_WORKER.curry[self, routine.options, batches.size])
166
+ errors.concat(results.flatten(1))
172
167
  end
173
- end
174
- end
175
168
 
176
- def object_data object, crutches = nil
177
- if witchcraft?
178
- cauldron.brew(object, crutches)
179
- else
180
- build_root.compose(object, crutches)[type_name.to_sym]
169
+ payload[:import] = import
170
+ payload[:errors] = payload_errors(errors) if errors.present?
171
+ payload[:errors]
181
172
  end
182
173
  end
183
174
 
184
- def extract_errors items
185
- items.each.with_object({}) do |item, memo|
186
- action = item.keys.first.to_sym
187
- data = item.values.first
188
- if data['error']
189
- (memo[action] ||= []).push(action: action, id: data['_id'], error: data['error'])
190
- end
191
- end.map do |action, items|
192
- errors = items.group_by { |item| item[:error] }.map do |error, items|
193
- {error => items.map { |item| item[:id] }}
194
- end.reduce(&:merge)
195
- {action => errors}
196
- end.reduce(&:merge) || {}
175
+ def process_parallel_import_results(results)
176
+ results.each_with_object([[], {}, []]) do |r, (e, i, l)|
177
+ e.concat(r[:errors])
178
+ i.merge!(r[:import]) { |_k, v1, v2| v1.to_i + v2.to_i }
179
+ l.concat(r[:leftovers])
180
+ end
197
181
  end
198
182
 
199
- def fetch_indexed_objects(objects)
200
- ids = objects.map { |object| object.respond_to?(:id) ? object.id : object }
201
- result = client.search index: index_name,
202
- type: type_name,
203
- fields: '_parent',
204
- body: { filter: { ids: { values: ids } } },
205
- search_type: 'scan',
206
- scroll: '1m'
207
-
208
- indexed_objects = {}
183
+ def payload_errors(errors)
184
+ errors.each_with_object({}) do |error, result|
185
+ action = error.keys.first.to_sym
186
+ item = error.values.first
187
+ error = item['error']
188
+ id = item['_id']
209
189
 
210
- while result = client.scroll(scroll_id: result['_scroll_id'], scroll: '1m') do
211
- break if result['hits']['hits'].empty?
212
-
213
- result['hits']['hits'].map do |hit|
214
- parent = hit.has_key?('_parent') ? hit['_parent'] : hit['fields']['_parent']
215
- indexed_objects[hit['_id']] = { parent: parent }
216
- end
190
+ result[action] ||= {}
191
+ result[action][error] ||= []
192
+ result[action][error].push(id)
217
193
  end
218
-
219
- indexed_objects
220
194
  end
221
195
  end
222
196
  end
@@ -8,6 +8,8 @@ module Chewy
8
8
  class_attribute :_templates
9
9
  class_attribute :_agg_defs
10
10
  self._agg_defs = {}
11
+ class_attribute :outdated_sync_field
12
+ self.outdated_sync_field = :updated_at
11
13
  end
12
14
 
13
15
  module ClassMethods
@@ -15,10 +17,11 @@ module Chewy
15
17
  # definition. Use it only if you need to pass options for root
16
18
  # object mapping, such as `date_detection` or `dynamic_date_formats`
17
19
  #
20
+ # @example
18
21
  # class UsersIndex < Chewy::Index
19
22
  # define_type User do
20
23
  # # root object defined implicitly and optionless for current type
21
- # field :full_name, type: 'string'
24
+ # field :full_name, type: 'keyword'
22
25
  # end
23
26
  # end
24
27
  #
@@ -26,32 +29,37 @@ module Chewy
26
29
  # define_type Car do
27
30
  # # explicit root definition with additional options
28
31
  # root dynamic_date_formats: ['yyyy-MM-dd'] do
29
- # field :model_name, type: 'string'
32
+ # field :model_name, type: 'keyword'
30
33
  # end
31
34
  # end
32
35
  # end
33
36
  #
34
- def root options = {}, &block
35
- raise "Root is already defined" if root_object
36
- build_root(options, &block)
37
+ def root(**options)
38
+ self.root_object ||= Chewy::Fields::Root.new(type_name, Chewy.default_root_options.merge(options))
39
+ root_object.update_options!(options)
40
+ yield if block_given?
41
+ root_object
37
42
  end
38
43
 
39
44
  # Defines mapping field for current type
40
45
  #
46
+ # @example
41
47
  # class UsersIndex < Chewy::Index
42
48
  # define_type User do
43
49
  # # passing all the options to field definition:
44
- # field :full_name, type: 'string', analyzer: 'special'
50
+ # field :full_name, analyzer: 'special'
45
51
  # end
46
52
  # end
47
53
  #
48
54
  # The `type` is optional and defaults to `string` if not defined:
49
55
  #
56
+ # @example
50
57
  # field :full_name
51
58
  #
52
59
  # Also, multiple fields might be defined with one call and
53
60
  # with the same options:
54
61
  #
62
+ # @example
55
63
  # field :first_name, :last_name, analyzer: 'special'
56
64
  #
57
65
  # The only special option in the field definition
@@ -59,31 +67,35 @@ module Chewy
59
67
  # method will be called for the indexed object. Also
60
68
  # `:value` might be a proc or indexed object method name:
61
69
  #
70
+ # @example
62
71
  # class User < ActiveRecord::Base
63
72
  # def user_full_name
64
73
  # [first_name, last_name].join(' ')
65
74
  # end
66
75
  # end
67
76
  #
68
- # field :full_name, type: 'string', value: :user_full_name
77
+ # field :full_name, type: 'keyword', value: :user_full_name
69
78
  #
70
79
  # The proc evaluates inside the indexed object context if
71
80
  # its arity is 0 and in present contexts if there is an argument:
72
81
  #
73
- # field :full_name, type: 'string', value: -> { [first_name, last_name].join(' ') }
82
+ # @example
83
+ # field :full_name, type: 'keyword', value: -> { [first_name, last_name].join(' ') }
74
84
  #
75
85
  # separator = ' '
76
- # field :full_name, type: 'string', value: ->(user) { [user.first_name, user.last_name].join(separator) }
86
+ # field :full_name, type: 'keyword', value: ->(user) { [user.first_name, user.last_name].join(separator) }
77
87
  #
78
88
  # If array was returned as value - it will be put in index as well.
79
89
  #
80
- # field :tags, type: 'string', value: -> { tags.map(&:name) }
90
+ # @example
91
+ # field :tags, type: 'keyword', value: -> { tags.map(&:name) }
81
92
  #
82
93
  # Fields supports nesting in case of `object` field type. If
83
94
  # `user.quiz` will return an array of objects, then result index content
84
95
  # will be an array of hashes, if `user.quiz` is not a collection association
85
96
  # then just values hash will be put in the index.
86
97
  #
98
+ # @example
87
99
  # field :quiz do
88
100
  # field :question, :answer
89
101
  # field :score, type: 'integer'
@@ -91,28 +103,28 @@ module Chewy
91
103
  #
92
104
  # Nested fields are composed from nested objects:
93
105
  #
106
+ # @example
94
107
  # field :name, value: -> { name_translations } do
95
108
  # field :ru, value: ->(name) { name['ru'] }
96
109
  # field :en, value: ->(name) { name['en'] }
97
110
  # end
98
111
  #
99
- # Off course it is possible to define object fields contents dynamically
112
+ # Of course it is possible to define object fields contents dynamically
100
113
  # but make sure evaluation proc returns hash:
101
114
  #
115
+ # @example
102
116
  # field :name, type: 'object', value: -> { name_translations }
103
117
  #
104
118
  # The special case is multi_field. If type options and block are
105
119
  # both present field is treated as a multi-field. In that case field
106
120
  # composition changes satisfy elasticsearch rules:
107
121
  #
108
- # field :full_name, type: 'string', analyzer: 'name', value: ->{ full_name.try(:strip) } do
122
+ # @example
123
+ # field :full_name, type: 'text', analyzer: 'name', value: ->{ full_name.try(:strip) } do
109
124
  # field :sorted, analyzer: 'sorted'
110
125
  # end
111
126
  #
112
- def field *args, &block
113
- options = args.extract_options!
114
- build_root
115
-
127
+ def field(*args, **options, &block)
116
128
  if args.size > 1
117
129
  args.map { |name| field(name, options) }
118
130
  else
@@ -122,9 +134,9 @@ module Chewy
122
134
 
123
135
  # Defines an aggregation that can be bound to a query or filter
124
136
  #
125
- # Suppose that a user has posts and each post has ratings
126
- # avg_post_rating is the mean of all ratings
127
- #
137
+ # @example
138
+ # # Suppose that a user has posts and each post has ratings
139
+ # # avg_post_rating is the mean of all ratings
128
140
  # class UsersIndex < Chewy::Index
129
141
  # define_type User do
130
142
  # field :posts do
@@ -136,64 +148,68 @@ module Chewy
136
148
  # end
137
149
  # end
138
150
  # end
139
- def agg *args, &block
140
- options = args.extract_options!
141
- build_root
142
- self._agg_defs = _agg_defs.merge(args.first => block)
151
+ def agg(name, &block)
152
+ self._agg_defs = _agg_defs.merge(name => block)
143
153
  end
144
154
  alias_method :aggregation, :agg
145
155
 
146
156
  # Defines dynamic template in mapping root objects
147
157
  #
158
+ # @example
148
159
  # class CarsIndex < Chewy::Index
149
160
  # define_type Car do
150
- # template 'model.*', type: 'string', analyzer: 'special'
151
- # field 'model', type: 'object' # here we can put { ru: 'Мерседес', en: 'Mercedes' }
161
+ # template 'model.*', type: 'text', analyzer: 'special'
162
+ # field 'model', type: 'object' # here we can put { de: 'Der Mercedes', en: 'Mercedes' }
152
163
  # # and template will be applyed to this field
153
164
  # end
154
165
  # end
155
166
  #
156
167
  # Name for each template is generated with the following
157
- # rule: "template_#{dynamic_templates.size + 1}".
168
+ # rule: `template_#!{dynamic_templates.size + 1}`.
158
169
  #
170
+ # @example Templates
159
171
  # template 'tit*', mapping_hash
160
172
  # template 'title.*', mapping_hash # dot in template causes "path_match" using
161
173
  # template /tit.+/, mapping_hash # using "match_pattern": "regexp"
162
174
  # template /title\..+/, mapping_hash # "\." - escaped dot causes "path_match" using
163
- # template /tit.+/, 'string', mapping_hash # "match_mapping_type" as the optionsl second argument
175
+ # template /tit.+/, type: 'text', mapping_hash # "match_mapping_type" as the optionsl second argument
164
176
  # template template42: {match: 'hello*', mapping: {type: 'object'}} # or even pass a template as is
165
177
  #
166
- def template *args
167
- build_root.dynamic_template *args
178
+ def template(*args)
179
+ root.dynamic_template(*args)
168
180
  end
169
181
  alias_method :dynamic_template, :template
170
182
 
171
183
  # Returns compiled mappings hash for current type
172
184
  #
173
185
  def mappings_hash
174
- root_object ? root_object.mappings_hash : {}
186
+ root.mappings_hash[type_name.to_sym].present? ? root.mappings_hash : {}
187
+ end
188
+
189
+ # Check whether the type has outdated_sync_field defined with a simple value.
190
+ #
191
+ # @return [true, false]
192
+ def supports_outdated_sync?
193
+ updated_at_field = root.child_hash[outdated_sync_field] if outdated_sync_field
194
+ !!updated_at_field && updated_at_field.value.nil?
175
195
  end
176
196
 
177
197
  private
178
198
 
179
- def expand_nested field, &block
199
+ def expand_nested(field)
200
+ @_current_field ||= root
201
+
180
202
  if @_current_field
181
203
  field.parent = @_current_field
182
204
  @_current_field.children.push(field)
183
205
  end
184
206
 
185
- if block
186
- previous_field, @_current_field = @_current_field, field
187
- block.call
188
- @_current_field = previous_field
189
- end
190
- end
207
+ return unless block_given?
191
208
 
192
- def build_root options = {}, &block
193
- return root_object if root_object
194
- self.root_object = Chewy::Fields::Root.new(type_name, options)
195
- expand_nested(root_object, &block)
196
- @_current_field = root_object
209
+ previous_field = @_current_field
210
+ @_current_field = field
211
+ yield
212
+ @_current_field = previous_field
197
213
  end
198
214
  end
199
215
  end