chewy 0.9.0 → 5.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (275) hide show
  1. checksums.yaml +5 -5
  2. data/.circleci/config.yml +214 -0
  3. data/.gitignore +1 -0
  4. data/.rubocop.yml +41 -19
  5. data/.rubocop_todo.yml +2 -2
  6. data/.yardopts +5 -0
  7. data/Appraisals +58 -28
  8. data/CHANGELOG.md +153 -12
  9. data/Gemfile +20 -12
  10. data/LEGACY_DSL.md +497 -0
  11. data/LICENSE.txt +1 -1
  12. data/README.md +338 -528
  13. data/chewy.gemspec +11 -12
  14. data/gemfiles/rails.5.2.activerecord.gemfile +17 -0
  15. data/gemfiles/rails.5.2.mongoid.6.4.gemfile +17 -0
  16. data/gemfiles/rails.6.0.activerecord.gemfile +17 -0
  17. data/gemfiles/rails.6.1.activerecord.gemfile +19 -0
  18. data/gemfiles/ruby3.gemfile +10 -0
  19. data/gemfiles/sequel.4.45.gemfile +11 -0
  20. data/lib/chewy.rb +79 -44
  21. data/lib/chewy/backports/duplicable.rb +1 -1
  22. data/lib/chewy/config.rb +43 -17
  23. data/lib/chewy/errors.rb +2 -2
  24. data/lib/chewy/fields/base.rb +56 -31
  25. data/lib/chewy/fields/root.rb +44 -11
  26. data/lib/chewy/index.rb +237 -149
  27. data/lib/chewy/index/actions.rb +100 -35
  28. data/lib/chewy/index/aliases.rb +2 -1
  29. data/lib/chewy/index/settings.rb +11 -5
  30. data/lib/chewy/index/specification.rb +60 -0
  31. data/lib/chewy/journal.rb +40 -92
  32. data/lib/chewy/minitest/helpers.rb +6 -6
  33. data/lib/chewy/minitest/search_index_receiver.rb +17 -17
  34. data/lib/chewy/query.rb +182 -122
  35. data/lib/chewy/query/compose.rb +13 -13
  36. data/lib/chewy/query/criteria.rb +13 -13
  37. data/lib/chewy/query/filters.rb +21 -4
  38. data/lib/chewy/query/loading.rb +1 -2
  39. data/lib/chewy/query/nodes/and.rb +2 -2
  40. data/lib/chewy/query/nodes/bool.rb +1 -1
  41. data/lib/chewy/query/nodes/equal.rb +2 -2
  42. data/lib/chewy/query/nodes/exists.rb +1 -1
  43. data/lib/chewy/query/nodes/field.rb +1 -1
  44. data/lib/chewy/query/nodes/has_relation.rb +2 -2
  45. data/lib/chewy/query/nodes/match_all.rb +1 -1
  46. data/lib/chewy/query/nodes/missing.rb +1 -1
  47. data/lib/chewy/query/nodes/not.rb +2 -2
  48. data/lib/chewy/query/nodes/or.rb +2 -2
  49. data/lib/chewy/query/nodes/prefix.rb +1 -1
  50. data/lib/chewy/query/nodes/query.rb +2 -2
  51. data/lib/chewy/query/nodes/range.rb +4 -4
  52. data/lib/chewy/query/nodes/regexp.rb +4 -4
  53. data/lib/chewy/query/nodes/script.rb +3 -3
  54. data/lib/chewy/query/pagination.rb +10 -1
  55. data/lib/chewy/railtie.rb +4 -3
  56. data/lib/chewy/rake_helper.rb +265 -48
  57. data/lib/chewy/rspec/update_index.rb +33 -27
  58. data/lib/chewy/search.rb +79 -26
  59. data/lib/chewy/search/loader.rb +83 -0
  60. data/lib/chewy/{query → search}/pagination/kaminari.rb +13 -5
  61. data/lib/chewy/search/pagination/will_paginate.rb +43 -0
  62. data/lib/chewy/search/parameters.rb +168 -0
  63. data/lib/chewy/search/parameters/aggs.rb +16 -0
  64. data/lib/chewy/search/parameters/allow_partial_search_results.rb +27 -0
  65. data/lib/chewy/search/parameters/concerns/bool_storage.rb +24 -0
  66. data/lib/chewy/search/parameters/concerns/hash_storage.rb +23 -0
  67. data/lib/chewy/search/parameters/concerns/integer_storage.rb +14 -0
  68. data/lib/chewy/search/parameters/concerns/query_storage.rb +238 -0
  69. data/lib/chewy/search/parameters/concerns/string_array_storage.rb +23 -0
  70. data/lib/chewy/search/parameters/concerns/string_storage.rb +14 -0
  71. data/lib/chewy/search/parameters/docvalue_fields.rb +12 -0
  72. data/lib/chewy/search/parameters/explain.rb +16 -0
  73. data/lib/chewy/search/parameters/filter.rb +47 -0
  74. data/lib/chewy/search/parameters/highlight.rb +16 -0
  75. data/lib/chewy/search/parameters/indices.rb +123 -0
  76. data/lib/chewy/search/parameters/indices_boost.rb +52 -0
  77. data/lib/chewy/search/parameters/limit.rb +17 -0
  78. data/lib/chewy/search/parameters/load.rb +32 -0
  79. data/lib/chewy/search/parameters/min_score.rb +16 -0
  80. data/lib/chewy/search/parameters/none.rb +27 -0
  81. data/lib/chewy/search/parameters/offset.rb +17 -0
  82. data/lib/chewy/search/parameters/order.rb +64 -0
  83. data/lib/chewy/search/parameters/post_filter.rb +19 -0
  84. data/lib/chewy/search/parameters/preference.rb +16 -0
  85. data/lib/chewy/search/parameters/profile.rb +16 -0
  86. data/lib/chewy/search/parameters/query.rb +19 -0
  87. data/lib/chewy/search/parameters/request_cache.rb +27 -0
  88. data/lib/chewy/search/parameters/rescore.rb +29 -0
  89. data/lib/chewy/search/parameters/script_fields.rb +16 -0
  90. data/lib/chewy/search/parameters/search_after.rb +20 -0
  91. data/lib/chewy/search/parameters/search_type.rb +16 -0
  92. data/lib/chewy/search/parameters/source.rb +73 -0
  93. data/lib/chewy/search/parameters/storage.rb +95 -0
  94. data/lib/chewy/search/parameters/stored_fields.rb +63 -0
  95. data/lib/chewy/search/parameters/suggest.rb +16 -0
  96. data/lib/chewy/search/parameters/terminate_after.rb +16 -0
  97. data/lib/chewy/search/parameters/timeout.rb +16 -0
  98. data/lib/chewy/search/parameters/track_scores.rb +16 -0
  99. data/lib/chewy/search/parameters/types.rb +20 -0
  100. data/lib/chewy/search/parameters/version.rb +16 -0
  101. data/lib/chewy/search/query_proxy.rb +257 -0
  102. data/lib/chewy/search/request.rb +1046 -0
  103. data/lib/chewy/search/response.rb +119 -0
  104. data/lib/chewy/search/scoping.rb +50 -0
  105. data/lib/chewy/search/scrolling.rb +134 -0
  106. data/lib/chewy/stash.rb +79 -0
  107. data/lib/chewy/strategy.rb +10 -3
  108. data/lib/chewy/strategy/active_job.rb +2 -1
  109. data/lib/chewy/strategy/atomic.rb +2 -4
  110. data/lib/chewy/strategy/bypass.rb +1 -1
  111. data/lib/chewy/strategy/resque.rb +1 -0
  112. data/lib/chewy/strategy/shoryuken.rb +40 -0
  113. data/lib/chewy/strategy/sidekiq.rb +13 -3
  114. data/lib/chewy/type.rb +29 -7
  115. data/lib/chewy/type/actions.rb +26 -2
  116. data/lib/chewy/type/adapter/active_record.rb +44 -29
  117. data/lib/chewy/type/adapter/base.rb +27 -7
  118. data/lib/chewy/type/adapter/mongoid.rb +19 -10
  119. data/lib/chewy/type/adapter/object.rb +187 -26
  120. data/lib/chewy/type/adapter/orm.rb +59 -32
  121. data/lib/chewy/type/adapter/sequel.rb +33 -19
  122. data/lib/chewy/type/crutch.rb +1 -1
  123. data/lib/chewy/type/import.rb +146 -191
  124. data/lib/chewy/type/import/bulk_builder.rb +122 -0
  125. data/lib/chewy/type/import/bulk_request.rb +78 -0
  126. data/lib/chewy/type/import/journal_builder.rb +45 -0
  127. data/lib/chewy/type/import/routine.rb +138 -0
  128. data/lib/chewy/type/mapping.rb +51 -35
  129. data/lib/chewy/type/observe.rb +17 -13
  130. data/lib/chewy/type/syncer.rb +222 -0
  131. data/lib/chewy/type/witchcraft.rb +32 -16
  132. data/lib/chewy/type/wrapper.rb +30 -4
  133. data/lib/chewy/version.rb +1 -1
  134. data/lib/sequel/plugins/chewy_observe.rb +4 -19
  135. data/lib/tasks/chewy.rake +84 -26
  136. data/spec/chewy/config_spec.rb +98 -1
  137. data/spec/chewy/fields/base_spec.rb +170 -135
  138. data/spec/chewy/fields/root_spec.rb +124 -20
  139. data/spec/chewy/fields/time_fields_spec.rb +2 -3
  140. data/spec/chewy/index/actions_spec.rb +214 -52
  141. data/spec/chewy/index/aliases_spec.rb +2 -2
  142. data/spec/chewy/index/settings_spec.rb +67 -38
  143. data/spec/chewy/index/specification_spec.rb +169 -0
  144. data/spec/chewy/index_spec.rb +108 -64
  145. data/spec/chewy/journal_spec.rb +150 -55
  146. data/spec/chewy/minitest/helpers_spec.rb +4 -4
  147. data/spec/chewy/minitest/search_index_receiver_spec.rb +1 -1
  148. data/spec/chewy/query/criteria_spec.rb +179 -179
  149. data/spec/chewy/query/filters_spec.rb +16 -16
  150. data/spec/chewy/query/loading_spec.rb +22 -20
  151. data/spec/chewy/query/nodes/and_spec.rb +2 -2
  152. data/spec/chewy/query/nodes/bool_spec.rb +4 -4
  153. data/spec/chewy/query/nodes/equal_spec.rb +19 -19
  154. data/spec/chewy/query/nodes/exists_spec.rb +6 -6
  155. data/spec/chewy/query/nodes/has_child_spec.rb +19 -19
  156. data/spec/chewy/query/nodes/has_parent_spec.rb +19 -19
  157. data/spec/chewy/query/nodes/missing_spec.rb +5 -5
  158. data/spec/chewy/query/nodes/not_spec.rb +4 -2
  159. data/spec/chewy/query/nodes/or_spec.rb +2 -2
  160. data/spec/chewy/query/nodes/prefix_spec.rb +5 -5
  161. data/spec/chewy/query/nodes/query_spec.rb +2 -2
  162. data/spec/chewy/query/nodes/range_spec.rb +18 -18
  163. data/spec/chewy/query/nodes/raw_spec.rb +1 -1
  164. data/spec/chewy/query/nodes/regexp_spec.rb +14 -14
  165. data/spec/chewy/query/nodes/script_spec.rb +4 -4
  166. data/spec/chewy/query/pagination/kaminari_spec.rb +3 -55
  167. data/spec/chewy/query/pagination/will_paginate_spec.rb +5 -0
  168. data/spec/chewy/query/pagination_spec.rb +25 -21
  169. data/spec/chewy/query_spec.rb +503 -561
  170. data/spec/chewy/rake_helper_spec.rb +381 -0
  171. data/spec/chewy/repository_spec.rb +4 -4
  172. data/spec/chewy/rspec/update_index_spec.rb +89 -56
  173. data/spec/chewy/runtime_spec.rb +2 -2
  174. data/spec/chewy/search/loader_spec.rb +117 -0
  175. data/spec/chewy/search/pagination/kaminari_examples.rb +71 -0
  176. data/spec/chewy/search/pagination/kaminari_spec.rb +21 -0
  177. data/spec/chewy/search/pagination/will_paginate_examples.rb +63 -0
  178. data/spec/chewy/search/pagination/will_paginate_spec.rb +23 -0
  179. data/spec/chewy/search/parameters/aggs_spec.rb +5 -0
  180. data/spec/chewy/search/parameters/bool_storage_examples.rb +53 -0
  181. data/spec/chewy/search/parameters/docvalue_fields_spec.rb +5 -0
  182. data/spec/chewy/search/parameters/explain_spec.rb +5 -0
  183. data/spec/chewy/search/parameters/filter_spec.rb +5 -0
  184. data/spec/chewy/search/parameters/hash_storage_examples.rb +59 -0
  185. data/spec/chewy/search/parameters/highlight_spec.rb +5 -0
  186. data/spec/chewy/search/parameters/indices_spec.rb +191 -0
  187. data/spec/chewy/search/parameters/integer_storage_examples.rb +32 -0
  188. data/spec/chewy/search/parameters/limit_spec.rb +5 -0
  189. data/spec/chewy/search/parameters/load_spec.rb +60 -0
  190. data/spec/chewy/search/parameters/min_score_spec.rb +32 -0
  191. data/spec/chewy/search/parameters/none_spec.rb +5 -0
  192. data/spec/chewy/search/parameters/offset_spec.rb +5 -0
  193. data/spec/chewy/search/parameters/order_spec.rb +65 -0
  194. data/spec/chewy/search/parameters/post_filter_spec.rb +5 -0
  195. data/spec/chewy/search/parameters/preference_spec.rb +5 -0
  196. data/spec/chewy/search/parameters/profile_spec.rb +5 -0
  197. data/spec/chewy/search/parameters/query_spec.rb +5 -0
  198. data/spec/chewy/search/parameters/query_storage_examples.rb +388 -0
  199. data/spec/chewy/search/parameters/request_cache_spec.rb +67 -0
  200. data/spec/chewy/search/parameters/rescore_spec.rb +62 -0
  201. data/spec/chewy/search/parameters/script_fields_spec.rb +5 -0
  202. data/spec/chewy/search/parameters/search_after_spec.rb +32 -0
  203. data/spec/chewy/search/parameters/search_type_spec.rb +5 -0
  204. data/spec/chewy/search/parameters/source_spec.rb +156 -0
  205. data/spec/chewy/search/parameters/storage_spec.rb +60 -0
  206. data/spec/chewy/search/parameters/stored_fields_spec.rb +126 -0
  207. data/spec/chewy/search/parameters/string_array_storage_examples.rb +63 -0
  208. data/spec/chewy/search/parameters/string_storage_examples.rb +32 -0
  209. data/spec/chewy/search/parameters/suggest_spec.rb +5 -0
  210. data/spec/chewy/search/parameters/terminate_after_spec.rb +5 -0
  211. data/spec/chewy/search/parameters/timeout_spec.rb +5 -0
  212. data/spec/chewy/search/parameters/track_scores_spec.rb +5 -0
  213. data/spec/chewy/search/parameters/types_spec.rb +5 -0
  214. data/spec/chewy/search/parameters/version_spec.rb +5 -0
  215. data/spec/chewy/search/parameters_spec.rb +147 -0
  216. data/spec/chewy/search/query_proxy_spec.rb +68 -0
  217. data/spec/chewy/search/request_spec.rb +685 -0
  218. data/spec/chewy/search/response_spec.rb +198 -0
  219. data/spec/chewy/search/scrolling_spec.rb +169 -0
  220. data/spec/chewy/search_spec.rb +33 -16
  221. data/spec/chewy/stash_spec.rb +95 -0
  222. data/spec/chewy/strategy/active_job_spec.rb +21 -2
  223. data/spec/chewy/strategy/resque_spec.rb +6 -0
  224. data/spec/chewy/strategy/shoryuken_spec.rb +70 -0
  225. data/spec/chewy/strategy/sidekiq_spec.rb +13 -1
  226. data/spec/chewy/strategy_spec.rb +6 -6
  227. data/spec/chewy/type/actions_spec.rb +29 -10
  228. data/spec/chewy/type/adapter/active_record_spec.rb +203 -91
  229. data/spec/chewy/type/adapter/mongoid_spec.rb +112 -54
  230. data/spec/chewy/type/adapter/object_spec.rb +101 -28
  231. data/spec/chewy/type/adapter/sequel_spec.rb +149 -82
  232. data/spec/chewy/type/import/bulk_builder_spec.rb +279 -0
  233. data/spec/chewy/type/import/bulk_request_spec.rb +102 -0
  234. data/spec/chewy/type/import/journal_builder_spec.rb +95 -0
  235. data/spec/chewy/type/import/routine_spec.rb +110 -0
  236. data/spec/chewy/type/import_spec.rb +356 -271
  237. data/spec/chewy/type/mapping_spec.rb +96 -29
  238. data/spec/chewy/type/observe_spec.rb +9 -5
  239. data/spec/chewy/type/syncer_spec.rb +123 -0
  240. data/spec/chewy/type/witchcraft_spec.rb +61 -29
  241. data/spec/chewy/type/wrapper_spec.rb +63 -23
  242. data/spec/chewy/type_spec.rb +28 -7
  243. data/spec/chewy_spec.rb +75 -7
  244. data/spec/spec_helper.rb +17 -3
  245. data/spec/support/active_record.rb +5 -1
  246. data/spec/support/class_helpers.rb +0 -14
  247. data/spec/support/mongoid.rb +15 -3
  248. data/spec/support/sequel.rb +6 -1
  249. metadata +219 -58
  250. data/.travis.yml +0 -36
  251. data/gemfiles/rails.3.2.activerecord.gemfile +0 -16
  252. data/gemfiles/rails.3.2.activerecord.kaminari.gemfile +0 -15
  253. data/gemfiles/rails.3.2.activerecord.will_paginate.gemfile +0 -15
  254. data/gemfiles/rails.4.2.activerecord.gemfile +0 -17
  255. data/gemfiles/rails.4.2.activerecord.kaminari.gemfile +0 -16
  256. data/gemfiles/rails.4.2.activerecord.will_paginate.gemfile +0 -16
  257. data/gemfiles/rails.4.2.mongoid.4.0.gemfile +0 -16
  258. data/gemfiles/rails.4.2.mongoid.4.0.kaminari.gemfile +0 -15
  259. data/gemfiles/rails.4.2.mongoid.4.0.will_paginate.gemfile +0 -15
  260. data/gemfiles/rails.4.2.mongoid.5.1.gemfile +0 -16
  261. data/gemfiles/rails.4.2.mongoid.5.1.kaminari.gemfile +0 -15
  262. data/gemfiles/rails.4.2.mongoid.5.1.will_paginate.gemfile +0 -15
  263. data/gemfiles/rails.5.0.activerecord.gemfile +0 -17
  264. data/gemfiles/rails.5.0.activerecord.kaminari.gemfile +0 -16
  265. data/gemfiles/rails.5.0.activerecord.will_paginate.gemfile +0 -16
  266. data/gemfiles/sequel.4.38.gemfile +0 -14
  267. data/lib/chewy/journal/apply.rb +0 -31
  268. data/lib/chewy/journal/clean.rb +0 -24
  269. data/lib/chewy/journal/entry.rb +0 -83
  270. data/lib/chewy/journal/query.rb +0 -87
  271. data/lib/chewy/query/pagination/will_paginate.rb +0 -27
  272. data/lib/chewy/query/scoping.rb +0 -20
  273. data/spec/chewy/journal/apply_spec.rb +0 -120
  274. data/spec/chewy/journal/entry_spec.rb +0 -237
  275. data/spec/chewy/query/pagination/will_paginage_spec.rb +0 -59
@@ -0,0 +1,122 @@
1
+ module Chewy
2
+ class Type
3
+ module Import
4
+ # This class purpose is to build ES client-acceptable bulk
5
+ # request body from the passed objects for index and deletion.
6
+ # It handles parent-child relationships as well by fetching
7
+ # existing documents from ES, taking their `_parent` field and
8
+ # using it in the bulk body.
9
+ # If fields are passed - it creates partial update entries except for
10
+ # the cases when the type has parent and parent_id has been changed.
11
+ class BulkBuilder
12
+ # @param type [Chewy::Type] desired type
13
+ # @param index [Array<Object>] objects to index
14
+ # @param delete [Array<Object>] objects or ids to delete
15
+ # @param fields [Array<Symbol, String>] and array of fields for documents update
16
+ def initialize(type, index: [], delete: [], fields: [])
17
+ @type = type
18
+ @index = index
19
+ @delete = delete
20
+ @fields = fields.map!(&:to_sym)
21
+ end
22
+
23
+ # Returns ES API-ready bulk requiest body.
24
+ # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
25
+ # @return [Array<Hash>] bulk body
26
+ def bulk_body
27
+ @bulk_body ||= @index.flat_map(&method(:index_entry)).concat(
28
+ @delete.flat_map(&method(:delete_entry))
29
+ )
30
+ end
31
+
32
+ # The only purpose of this method is to cache document ids for
33
+ # all the passed object for index to avoid ids recalculation.
34
+ #
35
+ # @return [Hash[String => Object]] an ids-objects index hash
36
+ def index_objects_by_id
37
+ @index_objects_by_id ||= index_object_ids.invert.stringify_keys!
38
+ end
39
+
40
+ private
41
+
42
+ def crutches
43
+ @crutches ||= Chewy::Type::Crutch::Crutches.new @type, @index
44
+ end
45
+
46
+ def parents
47
+ return unless type_root.parent_id
48
+
49
+ @parents ||= begin
50
+ ids = @index.map do |object|
51
+ object.respond_to?(:id) ? object.id : object
52
+ end
53
+ ids.concat(@delete.map do |object|
54
+ object.respond_to?(:id) ? object.id : object
55
+ end)
56
+ @type.filter(ids: {values: ids}).order('_doc').pluck(:_id, :_parent).to_h
57
+ end
58
+ end
59
+
60
+ def index_entry(object)
61
+ entry = {}
62
+ entry[:_id] = index_object_ids[object] if index_object_ids[object]
63
+
64
+ if parents
65
+ entry[:parent] = type_root.compose_parent(object)
66
+ parent = entry[:_id].present? && parents[entry[:_id].to_s]
67
+ end
68
+
69
+ if parent && entry[:parent].to_s != parent
70
+ entry[:data] = @type.compose(object, crutches)
71
+ [{delete: entry.except(:data).merge(parent: parent)}, {index: entry}]
72
+ elsif @fields.present?
73
+ return [] unless entry[:_id]
74
+ entry[:data] = {doc: @type.compose(object, crutches, fields: @fields)}
75
+ [{update: entry}]
76
+ else
77
+ entry[:data] = @type.compose(object, crutches)
78
+ [{index: entry}]
79
+ end
80
+ end
81
+
82
+ def delete_entry(object)
83
+ entry = {}
84
+ entry[:_id] = entry_id(object)
85
+ entry[:_id] ||= object.as_json
86
+
87
+ return [] if entry[:_id].blank?
88
+
89
+ if parents
90
+ parent = entry[:_id].present? && parents[entry[:_id].to_s]
91
+ return [] unless parent
92
+ entry[:parent] = parent
93
+ end
94
+
95
+ [{delete: entry}]
96
+ end
97
+
98
+ def entry_id(object)
99
+ if type_root.id
100
+ type_root.compose_id(object)
101
+ else
102
+ id = object.id if object.respond_to?(:id)
103
+ id ||= object[:id] || object['id'] if object.is_a?(Hash)
104
+ id = id.to_s if defined?(BSON) && id.is_a?(BSON::ObjectId)
105
+ id
106
+ end
107
+ end
108
+
109
+ def index_object_ids
110
+ @index_object_ids ||= @index.each_with_object({}) do |object, result|
111
+ id = entry_id(object)
112
+ result[object] = id if id.present?
113
+ end
114
+ end
115
+
116
+ def type_root
117
+ @type_root ||= @type.root
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,78 @@
1
+ module Chewy
2
+ class Type
3
+ module Import
4
+ # Adds additional features to elasticsearch-api bulk method:
5
+ # * supports Chewy index suffix if necessary;
6
+ # * supports bulk_size, devides the passed body in chunks
7
+ # and peforms a separate request for each chunk;
8
+ # * returns only errored document entries from the response
9
+ # if any present.
10
+ #
11
+ # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
12
+ class BulkRequest
13
+ # @param type [Chewy::Type] a type for the request
14
+ # @param suffix [String] an index name optional suffix
15
+ # @param bulk_size [Integer] bulk size in bytes
16
+ # @param bulk_options [Hash] options passed to the elasticsearch-api bulk method
17
+ def initialize(type, suffix: nil, bulk_size: nil, **bulk_options)
18
+ @type = type
19
+ @suffix = suffix
20
+ @bulk_size = bulk_size - 1.kilobyte if bulk_size # 1 kilobyte for request header and newlines
21
+ @bulk_options = bulk_options
22
+
23
+ raise ArgumentError, '`bulk_size` can\'t be less than 1 kilobyte' if @bulk_size && @bulk_size <= 0
24
+ end
25
+
26
+ # Performs a bulk request with the passed body, returns empty
27
+ # array if everything is fine and array filled with errored
28
+ # document entries if something went wrong.
29
+ #
30
+ # @param body [Array<Hash>] a standard bulk request body
31
+ # @return [Array<Hash>] an array of bulk errors
32
+ def perform(body)
33
+ return [] if body.blank?
34
+
35
+ request_bodies(body).each_with_object([]) do |request_body, results|
36
+ response = @type.client.bulk request_base.merge(body: request_body) if request_body.present?
37
+
38
+ next unless response.try(:[], 'errors')
39
+
40
+ response_items = (response.try(:[], 'items') || [])
41
+ .select { |item| item.values.first['error'] }
42
+ results.concat(response_items)
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def request_base
49
+ @request_base ||= {
50
+ index: @type.index_name(suffix: @suffix),
51
+ type: @type.type_name
52
+ }.merge!(@bulk_options)
53
+ end
54
+
55
+ def request_bodies(body)
56
+ if @bulk_size
57
+ serializer = ::Elasticsearch::API.serializer
58
+ pieces = body.each_with_object(['']) do |piece, result|
59
+ operation, meta = piece.to_a.first
60
+ data = meta.delete(:data)
61
+ piece = serializer.dump(operation => meta)
62
+ piece << "\n" << serializer.dump(data) if data.present?
63
+
64
+ if result.last.bytesize + piece.bytesize > @bulk_size
65
+ result.push(piece)
66
+ else
67
+ result[-1].blank? ? (result[-1] = piece) : (result[-1] << "\n" << piece)
68
+ end
69
+ end
70
+ pieces.each { |piece| piece << "\n" }
71
+ else
72
+ [body]
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,45 @@
1
+ module Chewy
2
+ class Type
3
+ module Import
4
+ class JournalBuilder
5
+ def initialize(type, index: [], delete: [])
6
+ @type = type
7
+ @index = index
8
+ @delete = delete
9
+ end
10
+
11
+ def bulk_body
12
+ Chewy::Type::Import::BulkBuilder.new(
13
+ Chewy::Stash::Journal::Journal,
14
+ index: [
15
+ entries(:index, @index),
16
+ entries(:delete, @delete)
17
+ ].compact
18
+ ).bulk_body.each do |item|
19
+ item.values.first.merge!(
20
+ _index: Chewy::Stash::Journal.index_name,
21
+ _type: Chewy::Stash::Journal::Journal.type_name
22
+ )
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def entries(action, objects)
29
+ return unless objects.present?
30
+ {
31
+ index_name: @type.index.derivable_name,
32
+ type_name: @type.type_name,
33
+ action: action,
34
+ references: identify(objects).map { |item| Base64.encode64(::Elasticsearch::API.serializer.dump(item)) },
35
+ created_at: Time.now.utc
36
+ }
37
+ end
38
+
39
+ def identify(objects)
40
+ @type.adapter.identify(objects)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,138 @@
1
+ module Chewy
2
+ class Type
3
+ module Import
4
+ # This class performs the import routine for the options and objects given.
5
+ #
6
+ # 0. Create target and journal indexes if needed.
7
+ # 1. Iterate over all the passed objects in batches.
8
+ # 2. For each batch {#process} method is called:
9
+ # * creates a bulk request body;
10
+ # * appends journal entries for the current batch to the request body;
11
+ # * prepends a leftovers bulk to the request body, which is calculated
12
+ # basing on the previous iteration errors;
13
+ # * performs the bulk request;
14
+ # * composes new leftovers bulk for the next iteration basing on the response errors if `update_failover` is true;
15
+ # * appends the rest of unfixable errors to the instance level errors array.
16
+ # 4. Perform the request for the last leftovers bulk if present using {#extract_leftovers}.
17
+ # 3. Return the result errors array.
18
+ #
19
+ # At the moment, it tries to restore only from the partial document update errors in cases
20
+ # when the document doesn't exist only if `update_failover` option is true. In order to
21
+ # restore, it indexes such an objects completely on the next iteration.
22
+ #
23
+ # @see Chewy::Type::Import::ClassMethods#import
24
+ class Routine
25
+ BULK_OPTIONS = %i[
26
+ suffix bulk_size
27
+ refresh timeout fields pipeline
28
+ consistency replication
29
+ wait_for_active_shards routing _source _source_exclude _source_include
30
+ ].freeze
31
+
32
+ DEFAULT_OPTIONS = {
33
+ refresh: true,
34
+ update_fields: [],
35
+ update_failover: true,
36
+ batch_size: Chewy::Type::Adapter::Base::BATCH_SIZE
37
+ }.freeze
38
+
39
+ attr_reader :options, :parallel_options, :errors, :stats, :leftovers
40
+
41
+ # Basically, processes passed options, extracting bulk request specific options.
42
+ # @param type [Chewy::Type] chewy type
43
+ # @param options [Hash] import options, see {Chewy::Type::Import::ClassMethods#import}
44
+ def initialize(type, **options)
45
+ @type = type
46
+ @options = options
47
+ @options.reverse_merge!(@type._default_import_options)
48
+ @options.reverse_merge!(journal: Chewy.configuration[:journal])
49
+ @options.reverse_merge!(DEFAULT_OPTIONS)
50
+ @bulk_options = @options.slice(*BULK_OPTIONS)
51
+ @parallel_options = @options.delete(:parallel)
52
+ if @parallel_options && !@parallel_options.is_a?(Hash)
53
+ @parallel_options = if @parallel_options.is_a?(Integer)
54
+ {in_processes: @parallel_options}
55
+ else
56
+ {}
57
+ end
58
+ end
59
+ @errors = []
60
+ @stats = {}
61
+ @leftovers = []
62
+ end
63
+
64
+ # Creates the journal index and the type corresponding index if necessary.
65
+ # @return [Object] whatever
66
+ def create_indexes!
67
+ Chewy::Stash::Journal.create if @options[:journal]
68
+ return if Chewy.configuration[:skip_index_creation_on_import]
69
+ @type.index.create!(**@bulk_options.slice(:suffix)) unless @type.index.exists?
70
+ end
71
+
72
+ # The main process method. Converts passed objects to thr bulk request body,
73
+ # appends journal entires, performs this request and handles errors performing
74
+ # failover procedures if applicable.
75
+ #
76
+ # @param index [Array<Object>] any acceptable objects for indexing
77
+ # @param delete [Array<Object>] any acceptable objects for deleting
78
+ # @return [true, false] the result of the request, true if no errors
79
+ def process(index: [], delete: [])
80
+ bulk_builder = BulkBuilder.new(@type, index: index, delete: delete, fields: @options[:update_fields])
81
+ bulk_body = bulk_builder.bulk_body
82
+
83
+ if @options[:journal]
84
+ journal_builder = JournalBuilder.new(@type, index: index, delete: delete)
85
+ bulk_body.concat(journal_builder.bulk_body)
86
+ end
87
+
88
+ bulk_body.unshift(*flush_leftovers)
89
+
90
+ perform_bulk(bulk_body) do |response|
91
+ @leftovers = extract_leftovers(response, bulk_builder.index_objects_by_id)
92
+ @stats[:index] = @stats[:index].to_i + index.count if index.present?
93
+ @stats[:delete] = @stats[:delete].to_i + delete.count if delete.present?
94
+ end
95
+ end
96
+
97
+ # Performs a bulk request for the passed body.
98
+ #
99
+ # @param body [Array<Hash>] a standard bulk request body
100
+ # @return [true, false] the result of the request, true if no errors
101
+ def perform_bulk(body)
102
+ response = bulk.perform(body)
103
+ yield response if block_given?
104
+ Chewy.wait_for_status
105
+ @errors.concat(response)
106
+ response.blank?
107
+ end
108
+
109
+ private
110
+
111
+ def flush_leftovers
112
+ leftovers = @leftovers
113
+ @leftovers = []
114
+ leftovers
115
+ end
116
+
117
+ def extract_leftovers(errors, index_objects_by_id)
118
+ return [] unless @options[:update_fields].present? && @options[:update_failover] && errors.present?
119
+
120
+ failed_partial_updates = errors.select do |item|
121
+ item.keys.first == 'update' && item.values.first['error']['type'] == 'document_missing_exception'
122
+ end
123
+ failed_ids_hash = failed_partial_updates.index_by { |item| item.values.first['_id'].to_s }
124
+ failed_ids_for_reimport = failed_ids_hash.keys & index_objects_by_id.keys
125
+ errors_to_cleanup = failed_ids_hash.values_at(*failed_ids_for_reimport)
126
+ errors_to_cleanup.each { |error| errors.delete(error) }
127
+
128
+ failed_objects = index_objects_by_id.values_at(*failed_ids_for_reimport)
129
+ BulkBuilder.new(@type, index: failed_objects).bulk_body
130
+ end
131
+
132
+ def bulk
133
+ @bulk ||= BulkRequest.new(@type, **@bulk_options)
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end
@@ -8,6 +8,8 @@ module Chewy
8
8
  class_attribute :_templates
9
9
  class_attribute :_agg_defs
10
10
  self._agg_defs = {}
11
+ class_attribute :outdated_sync_field
12
+ self.outdated_sync_field = :updated_at
11
13
  end
12
14
 
13
15
  module ClassMethods
@@ -15,10 +17,11 @@ module Chewy
15
17
  # definition. Use it only if you need to pass options for root
16
18
  # object mapping, such as `date_detection` or `dynamic_date_formats`
17
19
  #
20
+ # @example
18
21
  # class UsersIndex < Chewy::Index
19
22
  # define_type User do
20
23
  # # root object defined implicitly and optionless for current type
21
- # field :full_name, type: 'string'
24
+ # field :full_name, type: 'keyword'
22
25
  # end
23
26
  # end
24
27
  #
@@ -26,32 +29,37 @@ module Chewy
26
29
  # define_type Car do
27
30
  # # explicit root definition with additional options
28
31
  # root dynamic_date_formats: ['yyyy-MM-dd'] do
29
- # field :model_name, type: 'string'
32
+ # field :model_name, type: 'keyword'
30
33
  # end
31
34
  # end
32
35
  # end
33
36
  #
34
- def root(options = {}, &block)
35
- raise 'Root is already defined' if root_object
36
- build_root(options, &block)
37
+ def root(**options)
38
+ self.root_object ||= Chewy::Fields::Root.new(type_name, **Chewy.default_root_options.merge(options))
39
+ root_object.update_options!(**options)
40
+ yield if block_given?
41
+ root_object
37
42
  end
38
43
 
39
44
  # Defines mapping field for current type
40
45
  #
46
+ # @example
41
47
  # class UsersIndex < Chewy::Index
42
48
  # define_type User do
43
49
  # # passing all the options to field definition:
44
- # field :full_name, type: 'string', analyzer: 'special'
50
+ # field :full_name, analyzer: 'special'
45
51
  # end
46
52
  # end
47
53
  #
48
54
  # The `type` is optional and defaults to `string` if not defined:
49
55
  #
56
+ # @example
50
57
  # field :full_name
51
58
  #
52
59
  # Also, multiple fields might be defined with one call and
53
60
  # with the same options:
54
61
  #
62
+ # @example
55
63
  # field :first_name, :last_name, analyzer: 'special'
56
64
  #
57
65
  # The only special option in the field definition
@@ -59,31 +67,35 @@ module Chewy
59
67
  # method will be called for the indexed object. Also
60
68
  # `:value` might be a proc or indexed object method name:
61
69
  #
70
+ # @example
62
71
  # class User < ActiveRecord::Base
63
72
  # def user_full_name
64
73
  # [first_name, last_name].join(' ')
65
74
  # end
66
75
  # end
67
76
  #
68
- # field :full_name, type: 'string', value: :user_full_name
77
+ # field :full_name, type: 'keyword', value: :user_full_name
69
78
  #
70
79
  # The proc evaluates inside the indexed object context if
71
80
  # its arity is 0 and in present contexts if there is an argument:
72
81
  #
73
- # field :full_name, type: 'string', value: -> { [first_name, last_name].join(' ') }
82
+ # @example
83
+ # field :full_name, type: 'keyword', value: -> { [first_name, last_name].join(' ') }
74
84
  #
75
85
  # separator = ' '
76
- # field :full_name, type: 'string', value: ->(user) { [user.first_name, user.last_name].join(separator) }
86
+ # field :full_name, type: 'keyword', value: ->(user) { [user.first_name, user.last_name].join(separator) }
77
87
  #
78
88
  # If array was returned as value - it will be put in index as well.
79
89
  #
80
- # field :tags, type: 'string', value: -> { tags.map(&:name) }
90
+ # @example
91
+ # field :tags, type: 'keyword', value: -> { tags.map(&:name) }
81
92
  #
82
93
  # Fields supports nesting in case of `object` field type. If
83
94
  # `user.quiz` will return an array of objects, then result index content
84
95
  # will be an array of hashes, if `user.quiz` is not a collection association
85
96
  # then just values hash will be put in the index.
86
97
  #
98
+ # @example
87
99
  # field :quiz do
88
100
  # field :question, :answer
89
101
  # field :score, type: 'integer'
@@ -91,6 +103,7 @@ module Chewy
91
103
  #
92
104
  # Nested fields are composed from nested objects:
93
105
  #
106
+ # @example
94
107
  # field :name, value: -> { name_translations } do
95
108
  # field :ru, value: ->(name) { name['ru'] }
96
109
  # field :en, value: ->(name) { name['en'] }
@@ -99,32 +112,31 @@ module Chewy
99
112
  # Of course it is possible to define object fields contents dynamically
100
113
  # but make sure evaluation proc returns hash:
101
114
  #
115
+ # @example
102
116
  # field :name, type: 'object', value: -> { name_translations }
103
117
  #
104
118
  # The special case is multi_field. If type options and block are
105
119
  # both present field is treated as a multi-field. In that case field
106
120
  # composition changes satisfy elasticsearch rules:
107
121
  #
108
- # field :full_name, type: 'string', analyzer: 'name', value: ->{ full_name.try(:strip) } do
122
+ # @example
123
+ # field :full_name, type: 'text', analyzer: 'name', value: ->{ full_name.try(:strip) } do
109
124
  # field :sorted, analyzer: 'sorted'
110
125
  # end
111
126
  #
112
- def field(*args, &block)
113
- options = args.extract_options!
114
- build_root
115
-
127
+ def field(*args, **options, &block)
116
128
  if args.size > 1
117
- args.map { |name| field(name, options) }
129
+ args.map { |name| field(name, **options) }
118
130
  else
119
- expand_nested(Chewy::Fields::Base.new(args.first, options), &block)
131
+ expand_nested(Chewy::Fields::Base.new(args.first, **options), &block)
120
132
  end
121
133
  end
122
134
 
123
135
  # Defines an aggregation that can be bound to a query or filter
124
136
  #
125
- # Suppose that a user has posts and each post has ratings
126
- # avg_post_rating is the mean of all ratings
127
- #
137
+ # @example
138
+ # # Suppose that a user has posts and each post has ratings
139
+ # # avg_post_rating is the mean of all ratings
128
140
  # class UsersIndex < Chewy::Index
129
141
  # define_type User do
130
142
  # field :posts do
@@ -137,64 +149,68 @@ module Chewy
137
149
  # end
138
150
  # end
139
151
  def agg(name, &block)
140
- build_root
141
152
  self._agg_defs = _agg_defs.merge(name => block)
142
153
  end
143
154
  alias_method :aggregation, :agg
144
155
 
145
156
  # Defines dynamic template in mapping root objects
146
157
  #
158
+ # @example
147
159
  # class CarsIndex < Chewy::Index
148
160
  # define_type Car do
149
- # template 'model.*', type: 'string', analyzer: 'special'
161
+ # template 'model.*', type: 'text', analyzer: 'special'
150
162
  # field 'model', type: 'object' # here we can put { de: 'Der Mercedes', en: 'Mercedes' }
151
163
  # # and template will be applyed to this field
152
164
  # end
153
165
  # end
154
166
  #
155
167
  # Name for each template is generated with the following
156
- # rule: "template_#{dynamic_templates.size + 1}".
168
+ # rule: `template_#!{dynamic_templates.size + 1}`.
157
169
  #
170
+ # @example Templates
158
171
  # template 'tit*', mapping_hash
159
172
  # template 'title.*', mapping_hash # dot in template causes "path_match" using
160
173
  # template /tit.+/, mapping_hash # using "match_pattern": "regexp"
161
174
  # template /title\..+/, mapping_hash # "\." - escaped dot causes "path_match" using
162
- # template /tit.+/, 'string', mapping_hash # "match_mapping_type" as the optionsl second argument
175
+ # template /tit.+/, type: 'text', mapping_hash # "match_mapping_type" as the optionsl second argument
163
176
  # template template42: {match: 'hello*', mapping: {type: 'object'}} # or even pass a template as is
164
177
  #
165
178
  def template(*args)
166
- build_root.dynamic_template(*args)
179
+ root.dynamic_template(*args)
167
180
  end
168
181
  alias_method :dynamic_template, :template
169
182
 
170
183
  # Returns compiled mappings hash for current type
171
184
  #
172
185
  def mappings_hash
173
- root_object ? root_object.mappings_hash : {}
186
+ root.mappings_hash[type_name.to_sym].present? ? root.mappings_hash : {}
187
+ end
188
+
189
+ # Check whether the type has outdated_sync_field defined with a simple value.
190
+ #
191
+ # @return [true, false]
192
+ def supports_outdated_sync?
193
+ updated_at_field = root.child_hash[outdated_sync_field] if outdated_sync_field
194
+ !!updated_at_field && updated_at_field.value.nil?
174
195
  end
175
196
 
176
197
  private
177
198
 
178
- def expand_nested(field, &block)
199
+ def expand_nested(field)
200
+ @_current_field ||= root
201
+
179
202
  if @_current_field
180
203
  field.parent = @_current_field
181
204
  @_current_field.children.push(field)
182
205
  end
183
206
 
184
- return unless block
207
+ return unless block_given?
185
208
 
186
209
  previous_field = @_current_field
187
210
  @_current_field = field
188
211
  yield
189
212
  @_current_field = previous_field
190
213
  end
191
-
192
- def build_root(options = {}, &block)
193
- return root_object if root_object
194
- self.root_object = Chewy::Fields::Root.new(type_name, options)
195
- expand_nested(root_object, &block)
196
- @_current_field = root_object
197
- end
198
214
  end
199
215
  end
200
216
  end