chewy 0.9.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (265) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +24 -2
  4. data/.rubocop_todo.yml +2 -2
  5. data/.travis.yml +38 -21
  6. data/.yardopts +5 -0
  7. data/Appraisals +55 -27
  8. data/CHANGELOG.md +57 -12
  9. data/Gemfile +14 -10
  10. data/LEGACY_DSL.md +497 -0
  11. data/README.md +249 -515
  12. data/chewy.gemspec +5 -4
  13. data/gemfiles/rails.4.0.activerecord.gemfile +14 -0
  14. data/gemfiles/rails.4.1.activerecord.gemfile +14 -0
  15. data/gemfiles/rails.4.2.activerecord.gemfile +8 -10
  16. data/gemfiles/rails.4.2.mongoid.5.1.gemfile +9 -10
  17. data/gemfiles/rails.5.0.activerecord.gemfile +8 -10
  18. data/gemfiles/rails.5.0.mongoid.6.0.gemfile +15 -0
  19. data/gemfiles/rails.5.1.activerecord.gemfile +15 -0
  20. data/gemfiles/rails.5.1.mongoid.6.1.gemfile +15 -0
  21. data/gemfiles/sequel.4.45.gemfile +11 -0
  22. data/lib/chewy.rb +77 -43
  23. data/lib/chewy/config.rb +44 -7
  24. data/lib/chewy/errors.rb +2 -2
  25. data/lib/chewy/fields/base.rb +39 -32
  26. data/lib/chewy/fields/root.rb +33 -7
  27. data/lib/chewy/index.rb +237 -149
  28. data/lib/chewy/index/actions.rb +85 -28
  29. data/lib/chewy/index/aliases.rb +2 -1
  30. data/lib/chewy/index/settings.rb +9 -5
  31. data/lib/chewy/index/specification.rb +58 -0
  32. data/lib/chewy/journal.rb +40 -92
  33. data/lib/chewy/query.rb +43 -27
  34. data/lib/chewy/query/compose.rb +13 -13
  35. data/lib/chewy/query/criteria.rb +13 -13
  36. data/lib/chewy/query/filters.rb +1 -1
  37. data/lib/chewy/query/loading.rb +1 -1
  38. data/lib/chewy/query/nodes/and.rb +2 -2
  39. data/lib/chewy/query/nodes/bool.rb +1 -1
  40. data/lib/chewy/query/nodes/equal.rb +2 -2
  41. data/lib/chewy/query/nodes/exists.rb +1 -1
  42. data/lib/chewy/query/nodes/has_relation.rb +2 -2
  43. data/lib/chewy/query/nodes/match_all.rb +1 -1
  44. data/lib/chewy/query/nodes/missing.rb +1 -1
  45. data/lib/chewy/query/nodes/not.rb +2 -2
  46. data/lib/chewy/query/nodes/or.rb +2 -2
  47. data/lib/chewy/query/nodes/prefix.rb +1 -1
  48. data/lib/chewy/query/nodes/query.rb +2 -2
  49. data/lib/chewy/query/nodes/range.rb +4 -4
  50. data/lib/chewy/query/nodes/regexp.rb +4 -4
  51. data/lib/chewy/query/nodes/script.rb +3 -3
  52. data/lib/chewy/query/pagination.rb +10 -1
  53. data/lib/chewy/railtie.rb +1 -0
  54. data/lib/chewy/rake_helper.rb +265 -48
  55. data/lib/chewy/rspec/update_index.rb +30 -22
  56. data/lib/chewy/search.rb +78 -21
  57. data/lib/chewy/search/loader.rb +83 -0
  58. data/lib/chewy/{query → search}/pagination/kaminari.rb +13 -5
  59. data/lib/chewy/search/pagination/will_paginate.rb +41 -0
  60. data/lib/chewy/search/parameters.rb +150 -0
  61. data/lib/chewy/search/parameters/aggs.rb +16 -0
  62. data/lib/chewy/search/parameters/concerns/bool_storage.rb +24 -0
  63. data/lib/chewy/search/parameters/concerns/hash_storage.rb +23 -0
  64. data/lib/chewy/search/parameters/concerns/integer_storage.rb +14 -0
  65. data/lib/chewy/search/parameters/concerns/query_storage.rb +237 -0
  66. data/lib/chewy/search/parameters/concerns/string_array_storage.rb +23 -0
  67. data/lib/chewy/search/parameters/concerns/string_storage.rb +14 -0
  68. data/lib/chewy/search/parameters/docvalue_fields.rb +12 -0
  69. data/lib/chewy/search/parameters/explain.rb +16 -0
  70. data/lib/chewy/search/parameters/filter.rb +47 -0
  71. data/lib/chewy/search/parameters/highlight.rb +16 -0
  72. data/lib/chewy/search/parameters/indices_boost.rb +52 -0
  73. data/lib/chewy/search/parameters/limit.rb +17 -0
  74. data/lib/chewy/search/parameters/load.rb +32 -0
  75. data/lib/chewy/search/parameters/min_score.rb +16 -0
  76. data/lib/chewy/search/parameters/none.rb +27 -0
  77. data/lib/chewy/search/parameters/offset.rb +17 -0
  78. data/lib/chewy/search/parameters/order.rb +64 -0
  79. data/lib/chewy/search/parameters/post_filter.rb +19 -0
  80. data/lib/chewy/search/parameters/preference.rb +16 -0
  81. data/lib/chewy/search/parameters/profile.rb +16 -0
  82. data/lib/chewy/search/parameters/query.rb +19 -0
  83. data/lib/chewy/search/parameters/request_cache.rb +27 -0
  84. data/lib/chewy/search/parameters/rescore.rb +29 -0
  85. data/lib/chewy/search/parameters/script_fields.rb +16 -0
  86. data/lib/chewy/search/parameters/search_after.rb +20 -0
  87. data/lib/chewy/search/parameters/search_type.rb +16 -0
  88. data/lib/chewy/search/parameters/source.rb +73 -0
  89. data/lib/chewy/search/parameters/storage.rb +95 -0
  90. data/lib/chewy/search/parameters/stored_fields.rb +63 -0
  91. data/lib/chewy/search/parameters/suggest.rb +16 -0
  92. data/lib/chewy/search/parameters/terminate_after.rb +16 -0
  93. data/lib/chewy/search/parameters/timeout.rb +16 -0
  94. data/lib/chewy/search/parameters/track_scores.rb +16 -0
  95. data/lib/chewy/search/parameters/types.rb +20 -0
  96. data/lib/chewy/search/parameters/version.rb +16 -0
  97. data/lib/chewy/search/query_proxy.rb +257 -0
  98. data/lib/chewy/search/request.rb +1021 -0
  99. data/lib/chewy/search/response.rb +119 -0
  100. data/lib/chewy/search/scoping.rb +50 -0
  101. data/lib/chewy/search/scrolling.rb +136 -0
  102. data/lib/chewy/stash.rb +70 -0
  103. data/lib/chewy/strategy.rb +10 -3
  104. data/lib/chewy/strategy/active_job.rb +1 -0
  105. data/lib/chewy/strategy/atomic.rb +1 -3
  106. data/lib/chewy/strategy/bypass.rb +1 -1
  107. data/lib/chewy/strategy/resque.rb +1 -0
  108. data/lib/chewy/strategy/shoryuken.rb +40 -0
  109. data/lib/chewy/strategy/sidekiq.rb +13 -3
  110. data/lib/chewy/type.rb +29 -7
  111. data/lib/chewy/type/actions.rb +26 -2
  112. data/lib/chewy/type/adapter/active_record.rb +44 -29
  113. data/lib/chewy/type/adapter/base.rb +27 -7
  114. data/lib/chewy/type/adapter/mongoid.rb +18 -7
  115. data/lib/chewy/type/adapter/object.rb +187 -26
  116. data/lib/chewy/type/adapter/orm.rb +59 -32
  117. data/lib/chewy/type/adapter/sequel.rb +32 -16
  118. data/lib/chewy/type/import.rb +145 -191
  119. data/lib/chewy/type/import/bulk_builder.rb +122 -0
  120. data/lib/chewy/type/import/bulk_request.rb +76 -0
  121. data/lib/chewy/type/import/journal_builder.rb +45 -0
  122. data/lib/chewy/type/import/routine.rb +138 -0
  123. data/lib/chewy/type/mapping.rb +11 -1
  124. data/lib/chewy/type/observe.rb +1 -1
  125. data/lib/chewy/type/syncer.rb +220 -0
  126. data/lib/chewy/type/witchcraft.rb +27 -13
  127. data/lib/chewy/type/wrapper.rb +28 -2
  128. data/lib/chewy/version.rb +1 -1
  129. data/lib/tasks/chewy.rake +84 -26
  130. data/spec/chewy/config_spec.rb +82 -1
  131. data/spec/chewy/fields/base_spec.rb +147 -112
  132. data/spec/chewy/fields/root_spec.rb +75 -18
  133. data/spec/chewy/fields/time_fields_spec.rb +2 -3
  134. data/spec/chewy/index/actions_spec.rb +180 -50
  135. data/spec/chewy/index/aliases_spec.rb +2 -2
  136. data/spec/chewy/index/settings_spec.rb +67 -38
  137. data/spec/chewy/index/specification_spec.rb +160 -0
  138. data/spec/chewy/index_spec.rb +57 -66
  139. data/spec/chewy/journal_spec.rb +149 -54
  140. data/spec/chewy/minitest/helpers_spec.rb +4 -4
  141. data/spec/chewy/minitest/search_index_receiver_spec.rb +1 -1
  142. data/spec/chewy/query/criteria_spec.rb +179 -179
  143. data/spec/chewy/query/filters_spec.rb +15 -15
  144. data/spec/chewy/query/loading_spec.rb +22 -20
  145. data/spec/chewy/query/nodes/and_spec.rb +2 -2
  146. data/spec/chewy/query/nodes/bool_spec.rb +4 -4
  147. data/spec/chewy/query/nodes/equal_spec.rb +19 -19
  148. data/spec/chewy/query/nodes/exists_spec.rb +6 -6
  149. data/spec/chewy/query/nodes/has_child_spec.rb +19 -19
  150. data/spec/chewy/query/nodes/has_parent_spec.rb +19 -19
  151. data/spec/chewy/query/nodes/missing_spec.rb +5 -5
  152. data/spec/chewy/query/nodes/not_spec.rb +3 -2
  153. data/spec/chewy/query/nodes/or_spec.rb +2 -2
  154. data/spec/chewy/query/nodes/prefix_spec.rb +5 -5
  155. data/spec/chewy/query/nodes/query_spec.rb +2 -2
  156. data/spec/chewy/query/nodes/range_spec.rb +18 -18
  157. data/spec/chewy/query/nodes/raw_spec.rb +1 -1
  158. data/spec/chewy/query/nodes/regexp_spec.rb +14 -14
  159. data/spec/chewy/query/nodes/script_spec.rb +4 -4
  160. data/spec/chewy/query/pagination/kaminari_spec.rb +3 -55
  161. data/spec/chewy/query/pagination/will_paginate_spec.rb +5 -0
  162. data/spec/chewy/query/pagination_spec.rb +25 -21
  163. data/spec/chewy/query_spec.rb +501 -560
  164. data/spec/chewy/rake_helper_spec.rb +368 -0
  165. data/spec/chewy/repository_spec.rb +4 -4
  166. data/spec/chewy/rspec/update_index_spec.rb +89 -56
  167. data/spec/chewy/runtime_spec.rb +2 -2
  168. data/spec/chewy/search/loader_spec.rb +117 -0
  169. data/spec/chewy/search/pagination/kaminari_examples.rb +71 -0
  170. data/spec/chewy/search/pagination/kaminari_spec.rb +17 -0
  171. data/spec/chewy/search/pagination/will_paginate_examples.rb +63 -0
  172. data/spec/chewy/search/pagination/will_paginate_spec.rb +17 -0
  173. data/spec/chewy/search/parameters/aggs_spec.rb +5 -0
  174. data/spec/chewy/search/parameters/bool_storage_examples.rb +53 -0
  175. data/spec/chewy/search/parameters/docvalue_fields_spec.rb +5 -0
  176. data/spec/chewy/search/parameters/explain_spec.rb +5 -0
  177. data/spec/chewy/search/parameters/filter_spec.rb +5 -0
  178. data/spec/chewy/search/parameters/hash_storage_examples.rb +59 -0
  179. data/spec/chewy/search/parameters/highlight_spec.rb +5 -0
  180. data/spec/chewy/search/parameters/indices_boost_spec.rb +83 -0
  181. data/spec/chewy/search/parameters/integer_storage_examples.rb +32 -0
  182. data/spec/chewy/search/parameters/limit_spec.rb +5 -0
  183. data/spec/chewy/search/parameters/load_spec.rb +60 -0
  184. data/spec/chewy/search/parameters/min_score_spec.rb +32 -0
  185. data/spec/chewy/search/parameters/none_spec.rb +5 -0
  186. data/spec/chewy/search/parameters/offset_spec.rb +5 -0
  187. data/spec/chewy/search/parameters/order_spec.rb +65 -0
  188. data/spec/chewy/search/parameters/post_filter_spec.rb +5 -0
  189. data/spec/chewy/search/parameters/preference_spec.rb +5 -0
  190. data/spec/chewy/search/parameters/profile_spec.rb +5 -0
  191. data/spec/chewy/search/parameters/query_spec.rb +5 -0
  192. data/spec/chewy/search/parameters/query_storage_examples.rb +388 -0
  193. data/spec/chewy/search/parameters/request_cache_spec.rb +67 -0
  194. data/spec/chewy/search/parameters/rescore_spec.rb +62 -0
  195. data/spec/chewy/search/parameters/script_fields_spec.rb +5 -0
  196. data/spec/chewy/search/parameters/search_after_spec.rb +32 -0
  197. data/spec/chewy/search/parameters/search_type_spec.rb +5 -0
  198. data/spec/chewy/search/parameters/source_spec.rb +156 -0
  199. data/spec/chewy/search/parameters/storage_spec.rb +60 -0
  200. data/spec/chewy/search/parameters/stored_fields_spec.rb +126 -0
  201. data/spec/chewy/search/parameters/string_array_storage_examples.rb +63 -0
  202. data/spec/chewy/search/parameters/string_storage_examples.rb +32 -0
  203. data/spec/chewy/search/parameters/suggest_spec.rb +5 -0
  204. data/spec/chewy/search/parameters/terminate_after_spec.rb +5 -0
  205. data/spec/chewy/search/parameters/timeout_spec.rb +5 -0
  206. data/spec/chewy/search/parameters/track_scores_spec.rb +5 -0
  207. data/spec/chewy/search/parameters/types_spec.rb +5 -0
  208. data/spec/chewy/search/parameters/version_spec.rb +5 -0
  209. data/spec/chewy/search/parameters_spec.rb +130 -0
  210. data/spec/chewy/search/query_proxy_spec.rb +68 -0
  211. data/spec/chewy/search/request_spec.rb +669 -0
  212. data/spec/chewy/search/response_spec.rb +192 -0
  213. data/spec/chewy/search/scrolling_spec.rb +169 -0
  214. data/spec/chewy/search_spec.rb +13 -6
  215. data/spec/chewy/stash_spec.rb +95 -0
  216. data/spec/chewy/strategy/active_job_spec.rb +6 -0
  217. data/spec/chewy/strategy/resque_spec.rb +6 -0
  218. data/spec/chewy/strategy/shoryuken_spec.rb +64 -0
  219. data/spec/chewy/strategy/sidekiq_spec.rb +8 -0
  220. data/spec/chewy/strategy_spec.rb +6 -6
  221. data/spec/chewy/type/actions_spec.rb +29 -10
  222. data/spec/chewy/type/adapter/active_record_spec.rb +203 -91
  223. data/spec/chewy/type/adapter/mongoid_spec.rb +112 -54
  224. data/spec/chewy/type/adapter/object_spec.rb +101 -28
  225. data/spec/chewy/type/adapter/sequel_spec.rb +149 -82
  226. data/spec/chewy/type/import/bulk_builder_spec.rb +279 -0
  227. data/spec/chewy/type/import/bulk_request_spec.rb +102 -0
  228. data/spec/chewy/type/import/journal_builder_spec.rb +95 -0
  229. data/spec/chewy/type/import/routine_spec.rb +110 -0
  230. data/spec/chewy/type/import_spec.rb +350 -271
  231. data/spec/chewy/type/mapping_spec.rb +54 -18
  232. data/spec/chewy/type/observe_spec.rb +5 -1
  233. data/spec/chewy/type/syncer_spec.rb +123 -0
  234. data/spec/chewy/type/witchcraft_spec.rb +45 -29
  235. data/spec/chewy/type/wrapper_spec.rb +63 -23
  236. data/spec/chewy/type_spec.rb +28 -7
  237. data/spec/chewy_spec.rb +75 -7
  238. data/spec/spec_helper.rb +5 -2
  239. data/spec/support/active_record.rb +5 -1
  240. data/spec/support/class_helpers.rb +0 -14
  241. data/spec/support/mongoid.rb +15 -3
  242. data/spec/support/sequel.rb +6 -1
  243. metadata +198 -37
  244. data/gemfiles/rails.3.2.activerecord.gemfile +0 -16
  245. data/gemfiles/rails.3.2.activerecord.kaminari.gemfile +0 -15
  246. data/gemfiles/rails.3.2.activerecord.will_paginate.gemfile +0 -15
  247. data/gemfiles/rails.4.2.activerecord.kaminari.gemfile +0 -16
  248. data/gemfiles/rails.4.2.activerecord.will_paginate.gemfile +0 -16
  249. data/gemfiles/rails.4.2.mongoid.4.0.gemfile +0 -16
  250. data/gemfiles/rails.4.2.mongoid.4.0.kaminari.gemfile +0 -15
  251. data/gemfiles/rails.4.2.mongoid.4.0.will_paginate.gemfile +0 -15
  252. data/gemfiles/rails.4.2.mongoid.5.1.kaminari.gemfile +0 -15
  253. data/gemfiles/rails.4.2.mongoid.5.1.will_paginate.gemfile +0 -15
  254. data/gemfiles/rails.5.0.activerecord.kaminari.gemfile +0 -16
  255. data/gemfiles/rails.5.0.activerecord.will_paginate.gemfile +0 -16
  256. data/gemfiles/sequel.4.38.gemfile +0 -14
  257. data/lib/chewy/journal/apply.rb +0 -31
  258. data/lib/chewy/journal/clean.rb +0 -24
  259. data/lib/chewy/journal/entry.rb +0 -83
  260. data/lib/chewy/journal/query.rb +0 -87
  261. data/lib/chewy/query/pagination/will_paginate.rb +0 -27
  262. data/lib/chewy/query/scoping.rb +0 -20
  263. data/spec/chewy/journal/apply_spec.rb +0 -120
  264. data/spec/chewy/journal/entry_spec.rb +0 -237
  265. data/spec/chewy/query/pagination/will_paginage_spec.rb +0 -59
@@ -0,0 +1,122 @@
1
+ module Chewy
2
+ class Type
3
+ module Import
4
+ # This class purpose is to build ES client-acceptable bulk
5
+ # request body from the passed objects for index and deletion.
6
+ # It handles parent-child relationships as well by fetching
7
+ # existing documents from ES, taking their `_parent` field and
8
+ # using it in the bulk body.
9
+ # If fields are passed - it creates partial update entries except for
10
+ # the cases when the type has parent and parent_id has been changed.
11
+ class BulkBuilder
12
+ # @param type [Chewy::Type] desired type
13
+ # @param index [Array<Object>] objects to index
14
+ # @param delete [Array<Object>] objects or ids to delete
15
+ # @param fields [Array<Symbol, String>] and array of fields for documents update
16
+ def initialize(type, index: [], delete: [], fields: [])
17
+ @type = type
18
+ @index = index
19
+ @delete = delete
20
+ @fields = fields.map!(&:to_sym)
21
+ end
22
+
23
+ # Returns ES API-ready bulk requiest body.
24
+ # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
25
+ # @return [Array<Hash>] bulk body
26
+ def bulk_body
27
+ @bulk_body ||= @index.flat_map(&method(:index_entry)).concat(
28
+ @delete.flat_map(&method(:delete_entry))
29
+ )
30
+ end
31
+
32
+ # The only purpose of this method is to cache document ids for
33
+ # all the passed object for index to avoid ids recalculation.
34
+ #
35
+ # @return [Hash[String => Object]] an ids-objects index hash
36
+ def index_objects_by_id
37
+ @index_objects_by_id ||= index_object_ids.invert.stringify_keys!
38
+ end
39
+
40
+ private
41
+
42
+ def crutches
43
+ @crutches ||= Chewy::Type::Crutch::Crutches.new @type, @index
44
+ end
45
+
46
+ def parents
47
+ return unless type_root.parent_id
48
+
49
+ @parents ||= begin
50
+ ids = @index.map do |object|
51
+ object.respond_to?(:id) ? object.id : object
52
+ end
53
+ ids.concat(@delete.map do |object|
54
+ object.respond_to?(:id) ? object.id : object
55
+ end)
56
+ @type.filter(ids: {values: ids}).order('_doc').pluck(:_id, :_parent).to_h
57
+ end
58
+ end
59
+
60
+ def index_entry(object)
61
+ entry = {}
62
+ entry[:_id] = index_object_ids[object] if index_object_ids[object]
63
+
64
+ if parents
65
+ entry[:parent] = type_root.compose_parent(object)
66
+ parent = entry[:_id].present? && parents[entry[:_id].to_s]
67
+ end
68
+
69
+ if parent && entry[:parent].to_s != parent
70
+ entry[:data] = @type.compose(object, crutches)
71
+ [{delete: entry.except(:data).merge(parent: parent)}, {index: entry}]
72
+ elsif @fields.present?
73
+ return [] unless entry[:_id]
74
+ entry[:data] = {doc: @type.compose(object, crutches, fields: @fields)}
75
+ [{update: entry}]
76
+ else
77
+ entry[:data] = @type.compose(object, crutches)
78
+ [{index: entry}]
79
+ end
80
+ end
81
+
82
+ def delete_entry(object)
83
+ entry = {}
84
+ entry[:_id] = entry_id(object)
85
+ entry[:_id] ||= object.as_json
86
+
87
+ return [] if entry[:_id].blank?
88
+
89
+ if parents
90
+ parent = entry[:_id].present? && parents[entry[:_id].to_s]
91
+ return [] unless parent
92
+ entry[:parent] = parent
93
+ end
94
+
95
+ [{delete: entry}]
96
+ end
97
+
98
+ def entry_id(object)
99
+ if type_root.id
100
+ type_root.compose_id(object)
101
+ else
102
+ id = object.id if object.respond_to?(:id)
103
+ id ||= object[:id] || object['id'] if object.is_a?(Hash)
104
+ id = id.to_s if defined?(BSON) && id.is_a?(BSON::ObjectId)
105
+ id
106
+ end
107
+ end
108
+
109
+ def index_object_ids
110
+ @index_object_ids ||= @index.each_with_object({}) do |object, result|
111
+ id = entry_id(object)
112
+ result[object] = id if id.present?
113
+ end
114
+ end
115
+
116
+ def type_root
117
+ @type_root = @type.send(:build_root)
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,76 @@
1
+ module Chewy
2
+ class Type
3
+ module Import
4
+ # Adds additional features to elasticsearch-api bulk method:
5
+ # * supports Chewy index suffix if necessary;
6
+ # * supports bulk_size, devides the passed body in chunks
7
+ # and peforms a separate request for each chunk;
8
+ # * returns only errored document entries from the response
9
+ # if any present.
10
+ #
11
+ # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
12
+ class BulkRequest
13
+ # @param type [Chewy::Type] a type for the request
14
+ # @param suffix [String] an index name optional suffix
15
+ # @param bulk_size [Integer] bulk size in bytes
16
+ # @param bulk_options [Hash] options passed to the elasticsearch-api bulk method
17
+ def initialize(type, suffix: nil, bulk_size: nil, **bulk_options)
18
+ @type = type
19
+ @suffix = suffix
20
+ @bulk_size = bulk_size - 1.kilobyte if bulk_size # 1 kilobyte for request header and newlines
21
+ @bulk_options = bulk_options
22
+
23
+ raise ArgumentError, '`bulk_size` can\'t be less than 1 kilobyte' if @bulk_size && @bulk_size <= 0
24
+ end
25
+
26
+ # Performs a bulk request with the passed body, returns empty
27
+ # array if everything is fine and array filled with errored
28
+ # document entries if something went wrong.
29
+ #
30
+ # @param body [Array<Hash>] a standard bulk request body
31
+ # @return [Array<Hash>] an array of bulk errors
32
+ def perform(body)
33
+ return [] if body.blank?
34
+
35
+ request_bodies(body).each_with_object([]) do |request_body, results|
36
+ response = @type.client.bulk request_base.merge(body: request_body) if request_body.present?
37
+
38
+ next unless response.try(:[], 'errors')
39
+
40
+ response_items = (response.try(:[], 'items') || [])
41
+ .select { |item| item.values.first['error'] }
42
+ results.concat(response_items)
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def request_base
49
+ @request_base ||= {
50
+ index: @type.index_name(suffix: @suffix),
51
+ type: @type.type_name
52
+ }.merge!(@bulk_options)
53
+ end
54
+
55
+ def request_bodies(body)
56
+ if @bulk_size
57
+ pieces = body.each_with_object(['']) do |piece, result|
58
+ operation, meta = piece.to_a.first
59
+ data = meta.delete(:data)
60
+ piece = [{operation => meta}, data].compact.map(&:to_json).join("\n")
61
+
62
+ if result.last.bytesize + piece.bytesize > @bulk_size
63
+ result.push(piece)
64
+ else
65
+ result[-1] = [result[-1], piece].reject(&:blank?).join("\n")
66
+ end
67
+ end
68
+ pieces.each { |piece| piece << "\n" }
69
+ else
70
+ [body]
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,45 @@
1
+ module Chewy
2
+ class Type
3
+ module Import
4
+ class JournalBuilder
5
+ def initialize(type, index: [], delete: [])
6
+ @type = type
7
+ @index = index
8
+ @delete = delete
9
+ end
10
+
11
+ def bulk_body
12
+ Chewy::Type::Import::BulkBuilder.new(
13
+ Chewy::Stash::Journal,
14
+ index: [
15
+ entries(:index, @index),
16
+ entries(:delete, @delete)
17
+ ].compact
18
+ ).bulk_body.each do |item|
19
+ item.values.first.merge!(
20
+ _index: Chewy::Stash::Journal.index_name,
21
+ _type: Chewy::Stash::Journal.type_name
22
+ )
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def entries(action, objects)
29
+ return unless objects.present?
30
+ {
31
+ index_name: @type.index.derivable_name,
32
+ type_name: @type.type_name,
33
+ action: action,
34
+ references: identify(objects).map(&:to_json),
35
+ created_at: Time.now.utc
36
+ }
37
+ end
38
+
39
+ def identify(objects)
40
+ @type.adapter.identify(objects)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,138 @@
1
+ module Chewy
2
+ class Type
3
+ module Import
4
+ # This class performs the import routine for the options and objects given.
5
+ #
6
+ # 0. Create target and journal indexes if needed.
7
+ # 1. Iterate over all the passed objects in batches.
8
+ # 2. For each batch {#process} method is called:
9
+ # * creates a bulk request body;
10
+ # * appends journal entries for the current batch to the request body;
11
+ # * prepends a leftovers bulk to the request body, which is calculated
12
+ # basing on the previous iteration errors;
13
+ # * performs the bulk request;
14
+ # * composes new leftovers bulk for the next iteration basing on the response errors if `update_failover` is true;
15
+ # * appends the rest of unfixable errors to the instance level errors array.
16
+ # 4. Perform the request for the last leftovers bulk if present using {#process_leftovers}.
17
+ # 3. Return the result errors array.
18
+ #
19
+ # At the moment, it tries to restore only from the partial document update errors in cases
20
+ # when the document doesn't exist only if `update_failover` option is true. In order to
21
+ # restore, it indexes such an objects completely on the next iteration.
22
+ #
23
+ # @see Chewy::Type::Import::ClassMethods#import
24
+ class Routine
25
+ BULK_OPTIONS = %i[
26
+ suffix bulk_size
27
+ refresh timeout fields pipeline
28
+ consistency replication
29
+ wait_for_active_shards routing _source _source_exclude _source_include
30
+ ].freeze
31
+
32
+ DEFAULT_OPTIONS = {
33
+ refresh: true,
34
+ update_fields: [],
35
+ update_failover: true,
36
+ batch_size: Chewy::Type::Adapter::Base::BATCH_SIZE
37
+ }.freeze
38
+
39
+ attr_reader :options, :parallel_options, :errors, :stats, :leftovers
40
+
41
+ # Basically, processes passed options, extracting bulk request specific options.
42
+ # @param type [Chewy::Type] chewy type
43
+ # @param options [Hash] import options, see {Chewy::Type::Import::ClassMethods#import}
44
+ def initialize(type, **options)
45
+ @type = type
46
+ @options = options
47
+ @options.reverse_merge!(@type._default_import_options)
48
+ @options.reverse_merge!(journal: Chewy.configuration[:journal])
49
+ @options.reverse_merge!(DEFAULT_OPTIONS)
50
+ @bulk_options = @options.slice(*BULK_OPTIONS)
51
+ @parallel_options = @options.delete(:parallel)
52
+ if @parallel_options && !@parallel_options.is_a?(Hash)
53
+ @parallel_options = if @parallel_options.is_a?(Integer)
54
+ {in_processes: @parallel_options}
55
+ else
56
+ {}
57
+ end
58
+ end
59
+ @errors = []
60
+ @stats = {}
61
+ @leftovers = []
62
+ end
63
+
64
+ # Creates the journal index and the type corresponding index if necessary.
65
+ # @return [Object] whatever
66
+ def create_indexes!
67
+ Chewy::Stash.create if @options[:journal]
68
+ return if Chewy.configuration[:skip_index_creation_on_import]
69
+ @type.index.create!(@bulk_options.slice(:suffix)) unless @type.index.exists?
70
+ end
71
+
72
+ # The main process method. Converts passed objects to thr bulk request body,
73
+ # appends journal entires, performs this request and handles errors performing
74
+ # failover procedures if applicable.
75
+ #
76
+ # @param index [Array<Object>] any acceptable objects for indexing
77
+ # @param delete [Array<Object>] any acceptable objects for deleting
78
+ # @return [true, false] the result of the request, true if no errors
79
+ def process(index: [], delete: [])
80
+ bulk_builder = BulkBuilder.new(@type, index: index, delete: delete, fields: @options[:update_fields])
81
+ bulk_body = bulk_builder.bulk_body
82
+
83
+ if @options[:journal]
84
+ journal_builder = JournalBuilder.new(@type, index: index, delete: delete)
85
+ bulk_body.concat(journal_builder.bulk_body)
86
+ end
87
+
88
+ bulk_body.unshift(*flush_leftovers)
89
+
90
+ perform_bulk(bulk_body) do |response|
91
+ @leftovers = extract_leftovers(response, bulk_builder.index_objects_by_id)
92
+ @stats[:index] = @stats[:index].to_i + index.count if index.present?
93
+ @stats[:delete] = @stats[:delete].to_i + delete.count if delete.present?
94
+ end
95
+ end
96
+
97
+ # Performs a bulk request for the passed body.
98
+ #
99
+ # @param body [Array<Hash>] a standard bulk request body
100
+ # @return [true, false] the result of the request, true if no errors
101
+ def perform_bulk(body)
102
+ response = bulk.perform(body)
103
+ yield response if block_given?
104
+ Chewy.wait_for_status
105
+ @errors.concat(response)
106
+ response.blank?
107
+ end
108
+
109
+ private
110
+
111
+ def flush_leftovers
112
+ leftovers = @leftovers
113
+ @leftovers = []
114
+ leftovers
115
+ end
116
+
117
+ def extract_leftovers(errors, index_objects_by_id)
118
+ return [] unless @options[:update_fields].present? && @options[:update_failover] && errors.present?
119
+
120
+ failed_partial_updates = errors.select do |item|
121
+ item.keys.first == 'update' && item.values.first['error']['type'] == 'document_missing_exception'
122
+ end
123
+ failed_ids_hash = failed_partial_updates.index_by { |item| item.values.first['_id'].to_s }
124
+ failed_ids_for_reimport = failed_ids_hash.keys & index_objects_by_id.keys
125
+ errors_to_cleanup = failed_ids_hash.values_at(*failed_ids_for_reimport)
126
+ errors_to_cleanup.each { |error| errors.delete(error) }
127
+
128
+ failed_objects = index_objects_by_id.values_at(*failed_ids_for_reimport)
129
+ BulkBuilder.new(@type, index: failed_objects).bulk_body
130
+ end
131
+
132
+ def bulk
133
+ @bulk ||= BulkRequest.new(@type, **@bulk_options)
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end
@@ -8,6 +8,8 @@ module Chewy
8
8
  class_attribute :_templates
9
9
  class_attribute :_agg_defs
10
10
  self._agg_defs = {}
11
+ class_attribute :outdated_sync_field
12
+ self.outdated_sync_field = :updated_at
11
13
  end
12
14
 
13
15
  module ClassMethods
@@ -173,6 +175,14 @@ module Chewy
173
175
  root_object ? root_object.mappings_hash : {}
174
176
  end
175
177
 
178
+ # Check whether the type has outdated_sync_field defined with a simple value.
179
+ #
180
+ # @return [true, false]
181
+ def supports_outdated_sync?
182
+ updated_at_field = root_object.child_hash[outdated_sync_field] if root_object && outdated_sync_field
183
+ !!updated_at_field && updated_at_field.value.nil?
184
+ end
185
+
176
186
  private
177
187
 
178
188
  def expand_nested(field, &block)
@@ -191,7 +201,7 @@ module Chewy
191
201
 
192
202
  def build_root(options = {}, &block)
193
203
  return root_object if root_object
194
- self.root_object = Chewy::Fields::Root.new(type_name, options)
204
+ self.root_object = Chewy::Fields::Root.new(type_name, Chewy.default_root_options.merge(options))
195
205
  expand_nested(root_object, &block)
196
206
  @_current_field = root_object
197
207
  end
@@ -34,7 +34,7 @@ module Chewy
34
34
  def extract_callback_options!(args)
35
35
  options = args.extract_options!
36
36
  result = options.each_key.with_object({}) do |key, hash|
37
- hash[key] = options.delete(key) if [:if, :unless].include?(key)
37
+ hash[key] = options.delete(key) if %i[if unless].include?(key)
38
38
  end
39
39
  args.push(options) unless options.empty?
40
40
  result
@@ -0,0 +1,220 @@
1
+ module Chewy
2
+ class Type
3
+ # This class is able to find missing and outdated documents in the ES
4
+ # comparing ids from the data source and the ES index. Also, if `outdated_sync_field`
5
+ # existss in the index definition, it performs comparison of this field
6
+ # values for each source object and corresponding ES document. Usually,
7
+ # this field is `updated_at` and if its value in the source is not equal
8
+ # to the value in the index - this means that this document outdated and
9
+ # should be reindexed.
10
+ #
11
+ # To fetch necessary data from the source it uses adapter method
12
+ # {Chewy::Type::Adapter::Base#import_fields}, in case when the Object
13
+ # adapter is used it makes sense to read corresponding documentation.
14
+ #
15
+ # If `parallel` option is passed to the initializer - it will fetch surce and
16
+ # index data in parallel and then perform outdated objects calculation in
17
+ # parallel processes. Also, further import (if required) will be performed
18
+ # in parallel as well.
19
+ #
20
+ # @note
21
+ # In rails 4.0 time converted to json with the precision of seconds
22
+ # without milliseconds used, so outdated check is not so precise there.
23
+ #
24
+ # ATTENTION: synchronization may be slow in case when synchronized tables
25
+ # are missing compound index on primary key and `outdated_sync_field`.
26
+ #
27
+ # @see Chewy::Type::Actions::ClassMethods#sync
28
+ class Syncer
29
+ DEFAULT_SYNC_BATCH_SIZE = 20_000
30
+ ISO_DATETIME = /\A(\d{4})-(\d\d)-(\d\d) (\d\d):(\d\d):(\d\d)(\.\d+)?\z/
31
+ OUTDATED_IDS_WORKER = lambda do |outdated_sync_field_type, source_data_hash, index_data|
32
+ index_data.each_with_object([]) do |(id, index_sync_value), result|
33
+ next unless source_data_hash[id]
34
+
35
+ outdated = if outdated_sync_field_type == 'date'
36
+ !Chewy::Type::Syncer.dates_equal(typecast_date(source_data_hash[id]), DateTime.iso8601(index_sync_value))
37
+ else
38
+ source_data_hash[id] != index_sync_value
39
+ end
40
+
41
+ result.push(id) if outdated
42
+ end
43
+ end
44
+ SOURCE_OR_INDEX_DATA_WORKER = lambda do |syncer, type|
45
+ result = case type
46
+ when :source
47
+ syncer.send(:fetch_source_data)
48
+ when :index
49
+ syncer.send(:fetch_index_data)
50
+ end
51
+ {type => result}
52
+ end
53
+
54
+ def self.typecast_date(string)
55
+ if string.is_a?(String) && (match = ISO_DATETIME.match(string))
56
+ microsec = (match[7].to_r * 1_000_000).to_i
57
+ date = "#{match[1]}-#{match[2]}-#{match[3]}T#{match[4]}:#{match[5]}:#{match[6]}.#{format('%06d', microsec)}+00:00"
58
+ DateTime.iso8601(date)
59
+ else
60
+ string
61
+ end
62
+ end
63
+
64
+ # Compares times with ms precision.
65
+ def self.dates_equal(one, two)
66
+ [one.to_i, one.usec / 1000] == [two.to_i, two.usec / 1000]
67
+ end
68
+
69
+ # In ActiveSupport ~> 4.0 json dumpled times without any
70
+ # milliseconds, so ES stored time with the seconds precision.
71
+ if ActiveSupport::VERSION::STRING < '4.1.0'
72
+ def self.dates_equal(one, two)
73
+ one.to_i == two.to_i
74
+ end
75
+ end
76
+
77
+ # @param type [Chewy::Type] chewy type
78
+ # @param parallel [true, Integer, Hash] options for parallel execution or the number of processes
79
+ def initialize(type, parallel: nil)
80
+ @type = type
81
+ @parallel = if !parallel || parallel.is_a?(Hash)
82
+ parallel
83
+ elsif parallel.is_a?(Integer)
84
+ {in_processes: parallel}
85
+ else
86
+ {}
87
+ end
88
+ end
89
+
90
+ # Finds all the missing and outdated ids and performs import for them.
91
+ #
92
+ # @return [Integer, nil] the amount of missing and outdated documents reindexed, nil in case of errors
93
+ def perform
94
+ ids = missing_ids | outdated_ids
95
+ return 0 if ids.blank?
96
+ @type.import(ids, parallel: @parallel) && ids.count
97
+ end
98
+
99
+ # Finds ids of all the objects that are not indexed yet or deleted
100
+ # from the source already.
101
+ #
102
+ # @return [Array<String>] an array of missing ids from both sides
103
+ def missing_ids
104
+ return [] if source_data.blank?
105
+
106
+ @missing_ids ||= begin
107
+ source_data_ids = data_ids(source_data)
108
+ index_data_ids = data_ids(index_data)
109
+
110
+ (source_data_ids - index_data_ids).concat(index_data_ids - source_data_ids)
111
+ end
112
+ end
113
+
114
+ # If type supports outdated sync, it compares for the values of the
115
+ # type `outdated_sync_field` for each object and document in the source
116
+ # and index and returns the ids of entities which which are having
117
+ # different values there.
118
+ #
119
+ # @see Chewy::Type::Mapping::ClassMethods#supports_outdated_sync?
120
+ # @return [Array<String>] an array of outdated ids
121
+ def outdated_ids
122
+ return [] if source_data.blank? || index_data.blank? || !@type.supports_outdated_sync?
123
+ @outdated_ids ||= begin
124
+ if @parallel
125
+ parallel_outdated_ids
126
+ else
127
+ linear_outdated_ids
128
+ end
129
+ end
130
+ end
131
+
132
+ private
133
+
134
+ def source_data
135
+ @source_data ||= source_and_index_data.first
136
+ end
137
+
138
+ def index_data
139
+ @index_data ||= source_and_index_data.second
140
+ end
141
+
142
+ def source_and_index_data
143
+ @source_and_index_data ||= begin
144
+ if @parallel
145
+ ::ActiveRecord::Base.connection.close if defined?(::ActiveRecord::Base)
146
+ result = ::Parallel.map(%i[source index], @parallel, &SOURCE_OR_INDEX_DATA_WORKER.curry[self])
147
+ ::ActiveRecord::Base.connection.reconnect! if defined?(::ActiveRecord::Base)
148
+ if result.first.keys.first == :source
149
+ [result.first.values.first, result.second.values.first]
150
+ else
151
+ [result.second.values.first, result.first.values.first]
152
+ end
153
+ else
154
+ [fetch_source_data, fetch_index_data]
155
+ end
156
+ end
157
+ end
158
+
159
+ def fetch_source_data
160
+ if @type.supports_outdated_sync?
161
+ @type.adapter.import_fields(fields: [@type.outdated_sync_field], batch_size: DEFAULT_SYNC_BATCH_SIZE, typecast: false).to_a.flatten(1).each do |data|
162
+ data[0] = data[0].to_s
163
+ end
164
+ else
165
+ @type.adapter.import_fields(batch_size: DEFAULT_SYNC_BATCH_SIZE, typecast: false).to_a.flatten(1).map(&:to_s)
166
+ end
167
+ end
168
+
169
+ def fetch_index_data
170
+ if @type.supports_outdated_sync?
171
+ @type.pluck(:_id, @type.outdated_sync_field).each do |data|
172
+ data[0] = data[0].to_s
173
+ end
174
+ else
175
+ @type.pluck(:_id).map(&:to_s)
176
+ end
177
+ end
178
+
179
+ def data_ids(data)
180
+ return data unless @type.supports_outdated_sync?
181
+ data.map(&:first)
182
+ end
183
+
184
+ def linear_outdated_ids
185
+ OUTDATED_IDS_WORKER.call(outdated_sync_field_type, source_data.to_h, index_data)
186
+ end
187
+
188
+ def parallel_outdated_ids
189
+ size = processor_count.zero? ? index_data.size : (index_data.size / processor_count.to_f).ceil
190
+ batches = index_data.each_slice(size)
191
+
192
+ ::ActiveRecord::Base.connection.close if defined?(::ActiveRecord::Base)
193
+ result = ::Parallel.map(batches, @parallel, &OUTDATED_IDS_WORKER.curry[outdated_sync_field_type, source_data.to_h]).flatten(1)
194
+ ::ActiveRecord::Base.connection.reconnect! if defined?(::ActiveRecord::Base)
195
+ result
196
+ end
197
+
198
+ def processor_count
199
+ @processor_count ||= @parallel[:in_processes] || @parallel[:in_threads] || ::Parallel.processor_count
200
+ end
201
+
202
+ def outdated_sync_field_type
203
+ return @outdated_sync_field_type if instance_variable_defined?(:@outdated_sync_field_type)
204
+ return unless @type.outdated_sync_field
205
+
206
+ mappings = @type.client.indices.get_mapping(
207
+ index: @type.index_name,
208
+ type: @type.type_name
209
+ ).values.first.fetch('mappings', {})
210
+
211
+ @outdated_sync_field_type = mappings
212
+ .fetch(@type.type_name, {})
213
+ .fetch('properties', {})
214
+ .fetch(@type.outdated_sync_field.to_s, {})['type']
215
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
216
+ nil
217
+ end
218
+ end
219
+ end
220
+ end