chewy 7.2.1 → 7.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/.github/CODEOWNERS +1 -0
  3. data/.github/dependabot.yml +42 -0
  4. data/.github/workflows/ruby.yml +28 -26
  5. data/.rubocop.yml +4 -1
  6. data/CHANGELOG.md +196 -0
  7. data/Gemfile +4 -3
  8. data/README.md +203 -20
  9. data/chewy.gemspec +4 -18
  10. data/gemfiles/base.gemfile +12 -0
  11. data/gemfiles/rails.6.1.activerecord.gemfile +2 -1
  12. data/gemfiles/{rails.5.2.activerecord.gemfile → rails.7.0.activerecord.gemfile} +6 -3
  13. data/gemfiles/{rails.6.0.activerecord.gemfile → rails.7.1.activerecord.gemfile} +6 -3
  14. data/lib/chewy/config.rb +22 -14
  15. data/lib/chewy/elastic_client.rb +31 -0
  16. data/lib/chewy/errors.rb +11 -2
  17. data/lib/chewy/fields/base.rb +69 -13
  18. data/lib/chewy/fields/root.rb +2 -10
  19. data/lib/chewy/index/actions.rb +11 -16
  20. data/lib/chewy/index/adapter/active_record.rb +18 -3
  21. data/lib/chewy/index/adapter/object.rb +0 -10
  22. data/lib/chewy/index/adapter/orm.rb +4 -14
  23. data/lib/chewy/index/crutch.rb +15 -7
  24. data/lib/chewy/index/import/bulk_builder.rb +219 -32
  25. data/lib/chewy/index/import/bulk_request.rb +1 -1
  26. data/lib/chewy/index/import/routine.rb +3 -3
  27. data/lib/chewy/index/import.rb +45 -31
  28. data/lib/chewy/index/mapping.rb +2 -2
  29. data/lib/chewy/index/observe/active_record_methods.rb +87 -0
  30. data/lib/chewy/index/observe/callback.rb +34 -0
  31. data/lib/chewy/index/observe.rb +3 -58
  32. data/lib/chewy/index/syncer.rb +1 -1
  33. data/lib/chewy/index.rb +25 -0
  34. data/lib/chewy/journal.rb +17 -6
  35. data/lib/chewy/log_subscriber.rb +5 -1
  36. data/lib/chewy/minitest/helpers.rb +77 -0
  37. data/lib/chewy/minitest/search_index_receiver.rb +3 -1
  38. data/lib/chewy/rake_helper.rb +92 -11
  39. data/lib/chewy/rspec/build_query.rb +12 -0
  40. data/lib/chewy/rspec/helpers.rb +55 -0
  41. data/lib/chewy/rspec/update_index.rb +14 -7
  42. data/lib/chewy/rspec.rb +2 -0
  43. data/lib/chewy/runtime/version.rb +1 -1
  44. data/lib/chewy/runtime.rb +1 -1
  45. data/lib/chewy/search/parameters/collapse.rb +16 -0
  46. data/lib/chewy/search/parameters/ignore_unavailable.rb +27 -0
  47. data/lib/chewy/search/parameters/indices.rb +1 -1
  48. data/lib/chewy/search/parameters/knn.rb +16 -0
  49. data/lib/chewy/search/parameters/order.rb +6 -19
  50. data/lib/chewy/search/parameters/storage.rb +1 -1
  51. data/lib/chewy/search/parameters/track_total_hits.rb +16 -0
  52. data/lib/chewy/search/parameters.rb +4 -4
  53. data/lib/chewy/search/request.rb +74 -16
  54. data/lib/chewy/search/scoping.rb +1 -1
  55. data/lib/chewy/search.rb +5 -2
  56. data/lib/chewy/stash.rb +3 -3
  57. data/lib/chewy/strategy/active_job.rb +1 -1
  58. data/lib/chewy/strategy/atomic_no_refresh.rb +18 -0
  59. data/lib/chewy/strategy/base.rb +10 -0
  60. data/lib/chewy/strategy/delayed_sidekiq/scheduler.rb +168 -0
  61. data/lib/chewy/strategy/delayed_sidekiq/worker.rb +76 -0
  62. data/lib/chewy/strategy/delayed_sidekiq.rb +30 -0
  63. data/lib/chewy/strategy/lazy_sidekiq.rb +64 -0
  64. data/lib/chewy/strategy/sidekiq.rb +1 -1
  65. data/lib/chewy/strategy.rb +3 -0
  66. data/lib/chewy/version.rb +1 -1
  67. data/lib/chewy.rb +21 -14
  68. data/lib/tasks/chewy.rake +18 -2
  69. data/migration_guide.md +1 -1
  70. data/spec/chewy/config_spec.rb +2 -2
  71. data/spec/chewy/elastic_client_spec.rb +26 -0
  72. data/spec/chewy/fields/base_spec.rb +39 -18
  73. data/spec/chewy/index/actions_spec.rb +10 -10
  74. data/spec/chewy/index/adapter/active_record_spec.rb +88 -0
  75. data/spec/chewy/index/import/bulk_builder_spec.rb +309 -1
  76. data/spec/chewy/index/import/routine_spec.rb +5 -5
  77. data/spec/chewy/index/import_spec.rb +48 -26
  78. data/spec/chewy/index/observe/active_record_methods_spec.rb +68 -0
  79. data/spec/chewy/index/observe/callback_spec.rb +139 -0
  80. data/spec/chewy/index/observe_spec.rb +27 -0
  81. data/spec/chewy/journal_spec.rb +13 -49
  82. data/spec/chewy/minitest/helpers_spec.rb +111 -1
  83. data/spec/chewy/minitest/search_index_receiver_spec.rb +6 -4
  84. data/spec/chewy/rake_helper_spec.rb +170 -0
  85. data/spec/chewy/rspec/build_query_spec.rb +34 -0
  86. data/spec/chewy/rspec/helpers_spec.rb +61 -0
  87. data/spec/chewy/search/pagination/kaminari_examples.rb +1 -1
  88. data/spec/chewy/search/pagination/kaminari_spec.rb +1 -1
  89. data/spec/chewy/search/parameters/collapse_spec.rb +5 -0
  90. data/spec/chewy/search/parameters/ignore_unavailable_spec.rb +67 -0
  91. data/spec/chewy/search/parameters/knn_spec.rb +5 -0
  92. data/spec/chewy/search/parameters/order_spec.rb +18 -11
  93. data/spec/chewy/search/parameters/track_total_hits_spec.rb +5 -0
  94. data/spec/chewy/search/parameters_spec.rb +6 -1
  95. data/spec/chewy/search/request_spec.rb +58 -9
  96. data/spec/chewy/search_spec.rb +9 -0
  97. data/spec/chewy/strategy/active_job_spec.rb +8 -8
  98. data/spec/chewy/strategy/atomic_no_refresh_spec.rb +60 -0
  99. data/spec/chewy/strategy/delayed_sidekiq_spec.rb +208 -0
  100. data/spec/chewy/strategy/lazy_sidekiq_spec.rb +214 -0
  101. data/spec/chewy/strategy/sidekiq_spec.rb +4 -4
  102. data/spec/chewy_spec.rb +10 -7
  103. data/spec/spec_helper.rb +1 -2
  104. data/spec/support/active_record.rb +8 -1
  105. metadata +45 -264
  106. data/lib/chewy/backports/deep_dup.rb +0 -46
  107. data/lib/chewy/backports/duplicable.rb +0 -91
  108. data/lib/chewy/index/import/thread_safe_progress_bar.rb +0 -40
@@ -1,8 +1,8 @@
1
1
  module Chewy
2
2
  module Fields
3
3
  class Base
4
- attr_reader :name, :options, :value, :children
5
- attr_accessor :parent
4
+ attr_reader :name, :join_options, :options, :children
5
+ attr_accessor :parent # used by Chewy::Index::Mapping to expand nested fields
6
6
 
7
7
  def initialize(name, value: nil, **options)
8
8
  @name = name.to_sym
@@ -10,9 +10,11 @@ module Chewy
10
10
  update_options!(**options)
11
11
  @value = value
12
12
  @children = []
13
+ @allowed_relations = find_allowed_relations(options[:relations]) # for join fields
13
14
  end
14
15
 
15
16
  def update_options!(**options)
17
+ @join_options = options.delete(:join) || {}
16
18
  @options = options
17
19
  end
18
20
 
@@ -31,7 +33,7 @@ module Chewy
31
33
  else
32
34
  {}
33
35
  end
34
- mapping.reverse_merge!(options)
36
+ mapping.reverse_merge!(options.except(:ignore_blank))
35
37
  mapping.reverse_merge!(type: (children.present? ? 'object' : Chewy.default_field_type))
36
38
 
37
39
  {name => mapping}
@@ -53,30 +55,70 @@ module Chewy
53
55
  {name => result}
54
56
  end
55
57
 
58
+ def value
59
+ if join_field?
60
+ join_type = join_options[:type]
61
+ join_id = join_options[:id]
62
+ # memoize
63
+ @value ||= proc do |object|
64
+ validate_join_type!(value_by_name_proc(join_type).call(object))
65
+ # If it's a join field and it has join_id, the value is compound and contains
66
+ # both name (type) and id of the parent object
67
+ if value_by_name_proc(join_id).call(object).present?
68
+ {
69
+ name: value_by_name_proc(join_type).call(object), # parent type
70
+ parent: value_by_name_proc(join_id).call(object) # parent id
71
+ }
72
+ else
73
+ value_by_name_proc(join_type).call(object)
74
+ end
75
+ end
76
+ else
77
+ @value
78
+ end
79
+ end
80
+
56
81
  private
57
82
 
58
83
  def geo_point?
59
84
  @options[:type].to_s == 'geo_point'
60
85
  end
61
86
 
87
+ def join_field?
88
+ @options[:type].to_s == 'join'
89
+ end
90
+
62
91
  def ignore_blank?
63
92
  @options.fetch(:ignore_blank) { geo_point? }
64
93
  end
65
94
 
66
95
  def evaluate(objects)
67
- object = objects.first
68
-
69
96
  if value.is_a?(Proc)
70
- if value.arity.zero?
71
- object.instance_exec(&value)
72
- elsif value.arity.negative?
73
- value.call(*object)
74
- else
75
- value.call(*objects.first(value.arity))
76
- end
97
+ value_by_proc(objects, value)
77
98
  else
78
- message = value.is_a?(Symbol) || value.is_a?(String) ? value.to_sym : name
99
+ value_by_name(objects, value)
100
+ end
101
+ end
79
102
 
103
+ def value_by_proc(objects, value)
104
+ object = objects.first
105
+ if value.arity.zero?
106
+ object.instance_exec(&value)
107
+ elsif value.arity.negative?
108
+ value.call(*object)
109
+ else
110
+ value.call(*objects.first(value.arity))
111
+ end
112
+ end
113
+
114
+ def value_by_name(objects, value)
115
+ object = objects.first
116
+ message = value.is_a?(Symbol) || value.is_a?(String) ? value.to_sym : name
117
+ value_by_name_proc(message).call(object)
118
+ end
119
+
120
+ def value_by_name_proc(message)
121
+ proc do |object|
80
122
  if object.is_a?(Hash)
81
123
  if object.key?(message)
82
124
  object[message]
@@ -89,6 +131,20 @@ module Chewy
89
131
  end
90
132
  end
91
133
 
134
+ def validate_join_type!(type)
135
+ return unless type
136
+ return if @allowed_relations.include?(type.to_sym)
137
+
138
+ raise Chewy::InvalidJoinFieldType.new(type, @name, options[:relations])
139
+ end
140
+
141
+ def find_allowed_relations(relations)
142
+ return [] unless relations
143
+ return relations unless relations.is_a?(Hash)
144
+
145
+ (relations.keys + relations.values).flatten.uniq
146
+ end
147
+
92
148
  def compose_children(value, *parent_objects)
93
149
  return unless value
94
150
 
@@ -1,7 +1,7 @@
1
1
  module Chewy
2
2
  module Fields
3
3
  class Root < Chewy::Fields::Base
4
- attr_reader :dynamic_templates, :id, :parent, :parent_id
4
+ attr_reader :dynamic_templates, :id
5
5
 
6
6
  def initialize(name, **options)
7
7
  super(name, **options)
@@ -12,9 +12,7 @@ module Chewy
12
12
 
13
13
  def update_options!(**options)
14
14
  @id = options.fetch(:id, options.fetch(:_id, @id))
15
- @parent = options.fetch(:parent, options.fetch(:_parent, @parent))
16
- @parent_id = options.fetch(:parent_id, @parent_id)
17
- @options.merge!(options.except(:id, :_id, :parent, :_parent, :parent_id, :type))
15
+ @options.merge!(options.except(:id, :_id, :type))
18
16
  end
19
17
 
20
18
  def mappings_hash
@@ -50,12 +48,6 @@ module Chewy
50
48
  end
51
49
  end
52
50
 
53
- def compose_parent(object)
54
- return unless parent_id
55
-
56
- parent_id.arity.zero? ? object.instance_exec(&parent_id) : parent_id.call(object)
57
- end
58
-
59
51
  def compose_id(object)
60
52
  return unless id
61
53
 
@@ -146,7 +146,7 @@ module Chewy
146
146
  # @param journal [true, false] journaling is switched off for import during reset by default
147
147
  # @param import_options [Hash] options, passed to the import call
148
148
  # @return [true, false] false in case of errors
149
- def reset!(suffix = nil, apply_journal: true, journal: false, progressbar: false, **import_options)
149
+ def reset!(suffix = nil, apply_journal: true, journal: false, **import_options)
150
150
  result = if suffix.present?
151
151
  start_time = Time.now
152
152
  indexes = self.indexes - [index_name]
@@ -156,23 +156,27 @@ module Chewy
156
156
  suffixed_name = index_name(suffix: suffix)
157
157
 
158
158
  optimize_index_settings suffixed_name
159
- result = import import_options.merge(
159
+ result = import(**import_options.merge(
160
160
  suffix: suffix,
161
161
  journal: journal,
162
- refresh: !Chewy.reset_disable_refresh_interval,
163
- progressbar: progressbar
164
- )
162
+ refresh: !Chewy.reset_disable_refresh_interval
163
+ ))
165
164
  original_index_settings suffixed_name
166
165
 
167
166
  delete if indexes.blank?
168
- update_aliases(indexes, general_name, suffixed_name)
167
+ client.indices.update_aliases body: {actions: [
168
+ *indexes.map do |index|
169
+ {remove: {index: index, alias: general_name}}
170
+ end,
171
+ {add: {index: suffixed_name, alias: general_name}}
172
+ ]}
169
173
  client.indices.delete index: indexes if indexes.present?
170
174
 
171
175
  self.journal.apply(start_time, **import_options) if apply_journal
172
176
  result
173
177
  else
174
178
  purge!
175
- import import_options.merge(journal: journal)
179
+ import(**import_options.merge(journal: journal))
176
180
  end
177
181
 
178
182
  specification.lock!
@@ -233,15 +237,6 @@ module Chewy
233
237
 
234
238
  private
235
239
 
236
- def update_aliases(indexes, general_name, suffixed_name)
237
- client.indices.update_aliases body: {actions: [
238
- *indexes.map do |index|
239
- {remove: {index: index, alias: general_name}}
240
- end,
241
- {add: {index: suffixed_name, alias: general_name}}
242
- ]}
243
- end
244
-
245
240
  def optimize_index_settings(index_name)
246
241
  settings = {}
247
242
  settings[:refresh_interval] = -1 if Chewy.reset_disable_refresh_interval
@@ -6,16 +6,26 @@ module Chewy
6
6
  class ActiveRecord < Orm
7
7
  def self.accepts?(target)
8
8
  defined?(::ActiveRecord::Base) && (
9
- target.is_a?(Class) && target < ::ActiveRecord::Base ||
9
+ (target.is_a?(Class) && target < ::ActiveRecord::Base) ||
10
10
  target.is_a?(::ActiveRecord::Relation))
11
11
  end
12
12
 
13
13
  private
14
14
 
15
15
  def cleanup_default_scope!
16
- if Chewy.logger && (@default_scope.arel.orders.present? ||
16
+ behavior = Chewy.config.import_scope_cleanup_behavior
17
+
18
+ if behavior != :ignore && (@default_scope.arel.orders.present? ||
17
19
  @default_scope.arel.limit.present? || @default_scope.arel.offset.present?)
18
- Chewy.logger.warn('Default type scope order, limit and offset are ignored and will be nullified')
20
+ if behavior == :warn && Chewy.logger
21
+ gem_dir = File.realpath('../..', __dir__)
22
+ source = caller.grep_v(Regexp.new(gem_dir)).first
23
+ Chewy.logger.warn(
24
+ "Default type scope order, limit and offset are ignored and will be nullified (called from: #{source})"
25
+ )
26
+ elsif behavior == :raise
27
+ raise ImportScopeCleanupError, 'Default type scope order, limit and offset are ignored and will be nullified'
28
+ end
19
29
  end
20
30
 
21
31
  @default_scope = @default_scope.reorder(nil).limit(nil).offset(nil)
@@ -94,6 +104,11 @@ module Chewy
94
104
  object_class.connection.execute(sql).map(&converter)
95
105
  end
96
106
 
107
+ def raw(scope, converter)
108
+ sql = scope.to_sql
109
+ object_class.connection.execute(sql).map(&converter)
110
+ end
111
+
97
112
  def relation_class
98
113
  ::ActiveRecord::Relation
99
114
  end
@@ -192,16 +192,6 @@ module Chewy
192
192
  end
193
193
  end
194
194
 
195
- def import_count(*args)
196
- collection = if args.first.empty? && @target.respond_to?(import_all_method)
197
- @target.send(import_all_method)
198
- else
199
- args.flatten(1).compact
200
- end
201
-
202
- collection.count
203
- end
204
-
205
195
  private
206
196
 
207
197
  def import_objects(objects, options)
@@ -101,23 +101,13 @@ module Chewy
101
101
  additional_scope = options[options[:_index].to_sym].try(:[], :scope) || options[:scope]
102
102
 
103
103
  loaded_objects = load_scope_objects(scope, additional_scope)
104
- .index_by do |object|
105
- object.public_send(primary_key).to_s
106
- end
107
-
108
- ids.map { |id| loaded_objects[id.to_s] }
109
- end
104
+ loaded_objects = raw(loaded_objects, options[:raw_import]) if options[:raw_import]
110
105
 
111
- def import_count(*args)
112
- collection = if args.first.empty?
113
- default_scope
114
- elsif args.first.is_a?(relation_class)
115
- args.first
116
- else
117
- args.flatten.compact
106
+ indexed_objects = loaded_objects.index_by do |object|
107
+ object.public_send(primary_key).to_s
118
108
  end
119
109
 
120
- collection.count
110
+ ids.map { |id| indexed_objects[id.to_s] }
121
111
  end
122
112
 
123
113
  private
@@ -12,13 +12,21 @@ module Chewy
12
12
  def initialize(index, collection)
13
13
  @index = index
14
14
  @collection = collection
15
- @index._crutches.each_key do |name|
16
- singleton_class.class_eval <<-METHOD, __FILE__, __LINE__ + 1
17
- def #{name}
18
- @#{name} ||= @index._crutches[:#{name}].call @collection
19
- end
20
- METHOD
21
- end
15
+ @crutches_instances = {}
16
+ end
17
+
18
+ def method_missing(name, *, **)
19
+ return self[name] if @index._crutches.key?(name)
20
+
21
+ super
22
+ end
23
+
24
+ def respond_to_missing?(name, include_private = false)
25
+ @index._crutches.key?(name) || super
26
+ end
27
+
28
+ def [](name)
29
+ @crutches_instances[name] ||= @index._crutches[:"#{name}"].call(@collection)
22
30
  end
23
31
  end
24
32
 
@@ -4,7 +4,7 @@ module Chewy
4
4
  # This class purpose is to build ES client-acceptable bulk
5
5
  # request body from the passed objects for index and deletion.
6
6
  # It handles parent-child relationships as well by fetching
7
- # existing documents from ES, taking their `_parent` field and
7
+ # existing documents from ES and database, taking their join field values and
8
8
  # using it in the bulk body.
9
9
  # If fields are passed - it creates partial update entries except for
10
10
  # the cases when the type has parent and parent_id has been changed.
@@ -24,9 +24,11 @@ module Chewy
24
24
  # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
25
25
  # @return [Array<Hash>] bulk body
26
26
  def bulk_body
27
+ populate_cache
28
+
27
29
  @bulk_body ||= @to_index.flat_map(&method(:index_entry)).concat(
28
30
  @delete.flat_map(&method(:delete_entry))
29
- )
31
+ ).uniq
30
32
  end
31
33
 
32
34
  # The only purpose of this method is to cache document ids for
@@ -39,64 +41,249 @@ module Chewy
39
41
 
40
42
  private
41
43
 
42
- def crutches
43
- @crutches ||= Chewy::Index::Crutch::Crutches.new @index, @to_index
44
- end
45
-
46
- def parents
47
- return unless type_root.parent_id
48
-
49
- @parents ||= begin
50
- ids = @index.map do |object|
51
- object.respond_to?(:id) ? object.id : object
52
- end
53
- ids.concat(@delete.map do |object|
54
- object.respond_to?(:id) ? object.id : object
55
- end)
56
- @index.filter(ids: {values: ids}).order('_doc').pluck(:_id, :_parent).to_h
57
- end
44
+ def crutches_for_index
45
+ @crutches_for_index ||= Chewy::Index::Crutch::Crutches.new @index, @to_index
58
46
  end
59
47
 
60
48
  def index_entry(object)
61
49
  entry = {}
62
50
  entry[:_id] = index_object_ids[object] if index_object_ids[object]
51
+ entry[:routing] = routing(object) if join_field?
63
52
 
64
- if parents
65
- entry[:parent] = type_root.compose_parent(object)
66
- parent = entry[:_id].present? && parents[entry[:_id].to_s]
67
- end
68
-
69
- if parent && entry[:parent].to_s != parent
70
- entry[:data] = @index.compose(object, crutches)
71
- [{delete: entry.except(:data).merge(parent: parent)}, {index: entry}]
53
+ parent = cache(entry[:_id])
54
+ data = data_for(object) if parent.present?
55
+ if parent.present? && parent_changed?(data, parent)
56
+ reindex_entries(object, data) + reindex_descendants(object)
72
57
  elsif @fields.present?
73
58
  return [] unless entry[:_id]
74
59
 
75
- entry[:data] = {doc: @index.compose(object, crutches, fields: @fields)}
60
+ entry[:data] = {doc: data_for(object, fields: @fields)}
76
61
  [{update: entry}]
77
62
  else
78
- entry[:data] = @index.compose(object, crutches)
63
+ entry[:data] = data || data_for(object)
79
64
  [{index: entry}]
80
65
  end
81
66
  end
82
67
 
68
+ def reindex_entries(object, data, root: object)
69
+ entry = {}
70
+ entry[:_id] = index_object_ids[object] || entry_id(object)
71
+ entry[:data] = data
72
+ entry[:routing] = routing(root) || routing(object) if join_field?
73
+ delete = delete_single_entry(object, root: root).first
74
+ index = {index: entry}
75
+ [delete, index]
76
+ end
77
+
78
+ def reindex_descendants(root)
79
+ descendants = load_descendants(root)
80
+ crutches = Chewy::Index::Crutch::Crutches.new @index, [root, *descendants]
81
+ descendants.flat_map do |object|
82
+ reindex_entries(
83
+ object,
84
+ data_for(object, crutches: crutches),
85
+ root: root
86
+ )
87
+ end
88
+ end
89
+
83
90
  def delete_entry(object)
91
+ delete_single_entry(object)
92
+ end
93
+
94
+ def delete_single_entry(object, root: object)
84
95
  entry = {}
85
96
  entry[:_id] = entry_id(object)
86
97
  entry[:_id] ||= object.as_json
87
98
 
88
99
  return [] if entry[:_id].blank?
89
100
 
90
- if parents
91
- parent = entry[:_id].present? && parents[entry[:_id].to_s]
92
- return [] unless parent
101
+ if join_field?
102
+ cached_parent = cache(entry[:_id])
103
+ entry_parent_id =
104
+ if cached_parent
105
+ cached_parent[:parent_id]
106
+ else
107
+ find_parent_id(object)
108
+ end
93
109
 
94
- entry[:parent] = parent
110
+ entry[:routing] = existing_routing(root.try(:id)) || existing_routing(object.id)
111
+ entry[:parent] = entry_parent_id if entry_parent_id
95
112
  end
96
113
 
97
114
  [{delete: entry}]
98
115
  end
99
116
 
117
+ def load_descendants(root)
118
+ root_type = join_field_type(root)
119
+ return [] unless root_type
120
+
121
+ descendant_ids = []
122
+ grouped_parents = {root_type => [root.id]}
123
+ # iteratively fetch all the descendants (with grouped_parents as a queue for next iteration)
124
+ until grouped_parents.empty?
125
+ children_data = grouped_parents.flat_map do |parent_type, parent_ids|
126
+ @index.query(
127
+ has_parent: {
128
+ parent_type: parent_type,
129
+ # ignore_unmapped to avoid error for the leaves of the tree
130
+ # (types without children)
131
+ ignore_unmapped: true,
132
+ query: {ids: {values: parent_ids}}
133
+ }
134
+ ).pluck(:_id, join_field).map { |id, join| [join['name'], id] }
135
+ end
136
+ descendant_ids |= children_data.map(&:last)
137
+
138
+ grouped_parents = {}
139
+ children_data.each do |name, id|
140
+ next unless name
141
+
142
+ grouped_parents[name] ||= []
143
+ grouped_parents[name] << id
144
+ end
145
+ end
146
+ # query the primary database to load the descentants' records
147
+ @index.adapter.load(descendant_ids, _index: @index.base_name, raw_import: @index._default_import_options[:raw_import])
148
+ end
149
+
150
+ def populate_cache
151
+ @cache = load_cache
152
+ end
153
+
154
+ def cache(id)
155
+ @cache[id.to_s]
156
+ end
157
+
158
+ def load_cache
159
+ return {} unless join_field?
160
+
161
+ @index
162
+ .filter(ids: {values: ids_for_cache})
163
+ .order('_doc')
164
+ .pluck(:_id, :_routing, join_field)
165
+ .to_h do |id, routing, join|
166
+ [
167
+ id,
168
+ {routing: routing, parent_id: join['parent']}
169
+ ]
170
+ end
171
+ end
172
+
173
+ def existing_routing(id)
174
+ # All objects needed here should be cached in #load_cache,
175
+ # if not, we return nil. In some cases we don't have existing routing cached,
176
+ # e.g. for loaded descendants
177
+ return unless cache(id)
178
+
179
+ cache(id)[:routing]
180
+ end
181
+
182
+ # Two types of ids:
183
+ # * of parents of the objects to be indexed
184
+ # * of objects to be deleted
185
+ def ids_for_cache
186
+ ids = @to_index.flat_map do |object|
187
+ [find_parent_id(object), object.id] if object.respond_to?(:id)
188
+ end
189
+ ids.concat(@delete.map do |object|
190
+ object.id if object.respond_to?(:id)
191
+ end)
192
+ ids.uniq.compact
193
+ end
194
+
195
+ def routing(object)
196
+ # filter out non-model objects, early return on object==nil
197
+ return unless object.respond_to?(:id)
198
+
199
+ parent_id = find_parent_id(object)
200
+ if parent_id
201
+ routing(index_objects_by_id[parent_id.to_s]) || existing_routing(parent_id)
202
+ else
203
+ object.id.to_s
204
+ end
205
+ end
206
+
207
+ def find_parent_id(object)
208
+ return unless object.respond_to?(:id)
209
+
210
+ join = data_for(object, fields: [join_field.to_sym])[join_field]
211
+ join['parent'] if join
212
+ end
213
+
214
+ def join_field
215
+ return @join_field if defined?(@join_field)
216
+
217
+ @join_field = find_join_field
218
+ end
219
+
220
+ def find_join_field
221
+ type_settings = @index.mappings_hash[:mappings]
222
+ return unless type_settings
223
+
224
+ properties = type_settings[:properties]
225
+ join_fields = properties.find { |_, options| options[:type] == :join }
226
+ return unless join_fields
227
+
228
+ join_fields.first.to_s
229
+ end
230
+
231
+ def join_field_type(object)
232
+ return unless join_field?
233
+
234
+ raw_object =
235
+ if @index._default_import_options[:raw_import]
236
+ @index._default_import_options[:raw_import].call(object.attributes)
237
+ else
238
+ object
239
+ end
240
+
241
+ join_field_value = data_for(
242
+ raw_object,
243
+ fields: [join_field.to_sym], # build only the field that is needed
244
+ crutches: Chewy::Index::Crutch::Crutches.new(@index, [raw_object])
245
+ )[join_field]
246
+
247
+ case join_field_value
248
+ when String
249
+ join_field_value
250
+ when Hash
251
+ join_field_value['name']
252
+ end
253
+ end
254
+
255
+ def join_field?
256
+ join_field && !join_field.empty?
257
+ end
258
+
259
+ def data_for(object, fields: [], crutches: crutches_for_index)
260
+ @index.compose(object, crutches, fields: fields)
261
+ end
262
+
263
+ def parent_changed?(data, old_parent)
264
+ return false unless old_parent
265
+ return false unless join_field?
266
+ return false unless @fields.include?(join_field.to_sym)
267
+ return false unless data.key?(join_field)
268
+
269
+ # The join field value can be a hash, e.g.:
270
+ # {"name": "child", "parent": "123"} for a child
271
+ # {"name": "parent"} for a parent
272
+ # but it can also be a string: (e.g. "parent") for a parent:
273
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/parent-join.html#parent-join
274
+ new_join_field_value = data[join_field]
275
+ if new_join_field_value.is_a? Hash
276
+ # If we have a hash in the join field,
277
+ # we're taking the `parent` field that holds the parent id.
278
+ new_parent_id = new_join_field_value['parent']
279
+ new_parent_id != old_parent[:parent_id]
280
+ else
281
+ # If there is a non-hash value (String or nil), it means that the join field is changed
282
+ # and the current object is no longer a child.
283
+ true
284
+ end
285
+ end
286
+
100
287
  def entry_id(object)
101
288
  if type_root.id
102
289
  type_root.compose_id(object)
@@ -33,7 +33,7 @@ module Chewy
33
33
  return [] if body.blank?
34
34
 
35
35
  request_bodies(body).each_with_object([]) do |request_body, results|
36
- response = @index.client.bulk request_base.merge(body: request_body) if request_body.present?
36
+ response = @index.client.bulk(**request_base.merge(body: request_body)) if request_body.present?
37
37
 
38
38
  next unless response.try(:[], 'errors')
39
39
 
@@ -51,9 +51,9 @@ module Chewy
51
51
  @parallel_options = @options.delete(:parallel)
52
52
  if @parallel_options && !@parallel_options.is_a?(Hash)
53
53
  @parallel_options = if @parallel_options.is_a?(Integer)
54
- {in_threads: @parallel_options}
54
+ {in_processes: @parallel_options}
55
55
  else
56
- {in_threads: [::Parallel.processor_count, ActiveRecord::Base.connection_pool.size].min}
56
+ {}
57
57
  end
58
58
  end
59
59
  @errors = []
@@ -64,7 +64,7 @@ module Chewy
64
64
  # Creates the journal index and the corresponding index if necessary.
65
65
  # @return [Object] whatever
66
66
  def create_indexes!
67
- Chewy::Stash::Journal.create if @options[:journal]
67
+ Chewy::Stash::Journal.create if @options[:journal] && !Chewy.configuration[:skip_journal_creation_on_import]
68
68
  return if Chewy.configuration[:skip_index_creation_on_import]
69
69
 
70
70
  @index.create!(**@bulk_options.slice(:suffix)) unless @index.exists?