chewy 7.2.3 → 7.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ module Chewy
4
4
  # This class purpose is to build ES client-acceptable bulk
5
5
  # request body from the passed objects for index and deletion.
6
6
  # It handles parent-child relationships as well by fetching
7
- # existing documents from ES, taking their `_parent` field and
7
+ # existing documents from ES and database, taking their join field values and
8
8
  # using it in the bulk body.
9
9
  # If fields are passed - it creates partial update entries except for
10
10
  # the cases when the type has parent and parent_id has been changed.
@@ -24,9 +24,11 @@ module Chewy
24
24
  # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
25
25
  # @return [Array<Hash>] bulk body
26
26
  def bulk_body
27
+ populate_cache
28
+
27
29
  @bulk_body ||= @to_index.flat_map(&method(:index_entry)).concat(
28
30
  @delete.flat_map(&method(:delete_entry))
29
- )
31
+ ).uniq
30
32
  end
31
33
 
32
34
  # The only purpose of this method is to cache document ids for
@@ -39,64 +41,250 @@ module Chewy
39
41
 
40
42
  private
41
43
 
42
- def crutches
43
- @crutches ||= Chewy::Index::Crutch::Crutches.new @index, @to_index
44
- end
45
-
46
- def parents
47
- return unless type_root.parent_id
48
-
49
- @parents ||= begin
50
- ids = @index.map do |object|
51
- object.respond_to?(:id) ? object.id : object
52
- end
53
- ids.concat(@delete.map do |object|
54
- object.respond_to?(:id) ? object.id : object
55
- end)
56
- @index.filter(ids: {values: ids}).order('_doc').pluck(:_id, :_parent).to_h
57
- end
44
+ def crutches_for_index
45
+ @crutches_for_index ||= Chewy::Index::Crutch::Crutches.new @index, @to_index
58
46
  end
59
47
 
60
48
  def index_entry(object)
61
49
  entry = {}
62
50
  entry[:_id] = index_object_ids[object] if index_object_ids[object]
63
51
 
64
- if parents
65
- entry[:parent] = type_root.compose_parent(object)
66
- parent = entry[:_id].present? && parents[entry[:_id].to_s]
67
- end
52
+ data = data_for(object)
53
+ parent = cache(entry[:_id])
68
54
 
69
- if parent && entry[:parent].to_s != parent
70
- entry[:data] = @index.compose(object, crutches)
71
- [{delete: entry.except(:data).merge(parent: parent)}, {index: entry}]
55
+ entry[:routing] = routing(object) if join_field?
56
+ if parent_changed?(data, parent)
57
+ reindex_entries(object, data) + reindex_descendants(object)
72
58
  elsif @fields.present?
73
59
  return [] unless entry[:_id]
74
60
 
75
- entry[:data] = {doc: @index.compose(object, crutches, fields: @fields)}
61
+ entry[:data] = {doc: data_for(object, fields: @fields)}
76
62
  [{update: entry}]
77
63
  else
78
- entry[:data] = @index.compose(object, crutches)
64
+ entry[:data] = data
79
65
  [{index: entry}]
80
66
  end
81
67
  end
82
68
 
69
+ def reindex_entries(object, data, root: object)
70
+ entry = {}
71
+ entry[:_id] = index_object_ids[object] || entry_id(object)
72
+ entry[:data] = data
73
+ entry[:routing] = routing(root) || routing(object) if join_field?
74
+ delete = delete_single_entry(object, root: root).first
75
+ index = {index: entry}
76
+ [delete, index]
77
+ end
78
+
79
+ def reindex_descendants(root)
80
+ descendants = load_descendants(root)
81
+ crutches = Chewy::Index::Crutch::Crutches.new @index, [root, *descendants]
82
+ descendants.flat_map do |object|
83
+ reindex_entries(
84
+ object,
85
+ data_for(object, crutches: crutches),
86
+ root: root
87
+ )
88
+ end
89
+ end
90
+
83
91
  def delete_entry(object)
92
+ delete_single_entry(object)
93
+ end
94
+
95
+ def delete_single_entry(object, root: object)
84
96
  entry = {}
85
97
  entry[:_id] = entry_id(object)
86
98
  entry[:_id] ||= object.as_json
87
99
 
88
100
  return [] if entry[:_id].blank?
89
101
 
90
- if parents
91
- parent = entry[:_id].present? && parents[entry[:_id].to_s]
92
- return [] unless parent
102
+ if join_field?
103
+ cached_parent = cache(entry[:_id])
104
+ entry_parent_id =
105
+ if cached_parent
106
+ cached_parent[:parent_id]
107
+ else
108
+ find_parent_id(object)
109
+ end
93
110
 
94
- entry[:parent] = parent
111
+ entry[:routing] = existing_routing(root.try(:id)) || existing_routing(object.id)
112
+ entry[:parent] = entry_parent_id if entry_parent_id
95
113
  end
96
114
 
97
115
  [{delete: entry}]
98
116
  end
99
117
 
118
+ def load_descendants(root)
119
+ root_type = join_field_type(root)
120
+ return [] unless root_type
121
+
122
+ descendant_ids = []
123
+ grouped_parents = {root_type => [root.id]}
124
+ # iteratively fetch all the descendants (with grouped_parents as a queue for next iteration)
125
+ until grouped_parents.empty?
126
+ children_data = grouped_parents.flat_map do |parent_type, parent_ids|
127
+ @index.query(
128
+ has_parent: {
129
+ parent_type: parent_type,
130
+ # ignore_unmapped to avoid error for the leaves of the tree
131
+ # (types without children)
132
+ ignore_unmapped: true,
133
+ query: {ids: {values: parent_ids}}
134
+ }
135
+ ).pluck(:_id, join_field).map { |id, join| [join['name'], id] }
136
+ end
137
+ descendant_ids |= children_data.map(&:last)
138
+
139
+ grouped_parents = {}
140
+ children_data.each do |name, id|
141
+ next unless name
142
+
143
+ grouped_parents[name] ||= []
144
+ grouped_parents[name] << id
145
+ end
146
+ end
147
+ # query the primary database to load the descentants' records
148
+ @index.adapter.load(descendant_ids, _index: @index.base_name, raw_import: @index._default_import_options[:raw_import])
149
+ end
150
+
151
+ def populate_cache
152
+ @cache = load_cache
153
+ end
154
+
155
+ def cache(id)
156
+ @cache[id.to_s]
157
+ end
158
+
159
+ def load_cache
160
+ return {} unless join_field?
161
+
162
+ @index
163
+ .filter(ids: {values: ids_for_cache})
164
+ .order('_doc')
165
+ .pluck(:_id, :_routing, join_field)
166
+ .map do |id, routing, join|
167
+ [
168
+ id,
169
+ {routing: routing, parent_id: join['parent']}
170
+ ]
171
+ end.to_h
172
+ end
173
+
174
+ def existing_routing(id)
175
+ # All objects needed here should be cached in #load_cache,
176
+ # if not, we return nil. In some cases we don't have existing routing cached,
177
+ # e.g. for loaded descendants
178
+ return unless cache(id)
179
+
180
+ cache(id)[:routing]
181
+ end
182
+
183
+ # Two types of ids:
184
+ # * of parents of the objects to be indexed
185
+ # * of objects to be deleted
186
+ def ids_for_cache
187
+ ids = @to_index.flat_map do |object|
188
+ [find_parent_id(object), object.id] if object.respond_to?(:id)
189
+ end
190
+ ids.concat(@delete.map do |object|
191
+ object.id if object.respond_to?(:id)
192
+ end)
193
+ ids.uniq.compact
194
+ end
195
+
196
+ def routing(object)
197
+ # filter out non-model objects, early return on object==nil
198
+ return unless object.respond_to?(:id)
199
+
200
+ parent_id = find_parent_id(object)
201
+ if parent_id
202
+ routing(index_objects_by_id[parent_id.to_s]) || existing_routing(parent_id)
203
+ else
204
+ object.id.to_s
205
+ end
206
+ end
207
+
208
+ def find_parent_id(object)
209
+ return unless object.respond_to?(:id)
210
+
211
+ join = data_for(object, fields: [join_field.to_sym])[join_field]
212
+ join['parent'] if join
213
+ end
214
+
215
+ def join_field
216
+ return @join_field if defined?(@join_field)
217
+
218
+ @join_field = find_join_field
219
+ end
220
+
221
+ def find_join_field
222
+ type_settings = @index.mappings_hash[:mappings]
223
+ return unless type_settings
224
+
225
+ properties = type_settings[:properties]
226
+ join_fields = properties.find { |_, options| options[:type] == :join }
227
+ return unless join_fields
228
+
229
+ join_fields.first.to_s
230
+ end
231
+
232
+ def join_field_type(object)
233
+ return unless join_field?
234
+
235
+ raw_object =
236
+ if @index._default_import_options[:raw_import]
237
+ @index._default_import_options[:raw_import].call(object.attributes)
238
+ else
239
+ object
240
+ end
241
+
242
+ join_field_value = data_for(
243
+ raw_object,
244
+ fields: [join_field.to_sym], # build only the field that is needed
245
+ crutches: Chewy::Index::Crutch::Crutches.new(@index, [raw_object])
246
+ )[join_field]
247
+
248
+ case join_field_value
249
+ when String
250
+ join_field_value
251
+ when Hash
252
+ join_field_value['name']
253
+ end
254
+ end
255
+
256
+ def join_field?
257
+ join_field && !join_field.empty?
258
+ end
259
+
260
+ def data_for(object, fields: [], crutches: crutches_for_index)
261
+ @index.compose(object, crutches, fields: fields)
262
+ end
263
+
264
+ def parent_changed?(data, old_parent)
265
+ return false unless old_parent
266
+ return false unless join_field?
267
+ return false unless @fields.include?(join_field.to_sym)
268
+ return false unless data.key?(join_field)
269
+
270
+ # The join field value can be a hash, e.g.:
271
+ # {"name": "child", "parent": "123"} for a child
272
+ # {"name": "parent"} for a parent
273
+ # but it can also be a string: (e.g. "parent") for a parent:
274
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/parent-join.html#parent-join
275
+ new_join_field_value = data[join_field]
276
+ if new_join_field_value.is_a? Hash
277
+ # If we have a hash in the join field,
278
+ # we're taking the `parent` field that holds the parent id.
279
+ new_parent_id = new_join_field_value['parent']
280
+ new_parent_id != old_parent[:parent_id]
281
+ else
282
+ # If there is a non-hash value (String or nil), it means that the join field is changed
283
+ # and the current object is no longer a child.
284
+ true
285
+ end
286
+ end
287
+
100
288
  def entry_id(object)
101
289
  if type_root.id
102
290
  type_root.compose_id(object)
@@ -33,7 +33,7 @@ module Chewy
33
33
  return [] if body.blank?
34
34
 
35
35
  request_bodies(body).each_with_object([]) do |request_body, results|
36
- response = @index.client.bulk request_base.merge(body: request_body) if request_body.present?
36
+ response = @index.client.bulk(**request_base.merge(body: request_body)) if request_body.present?
37
37
 
38
38
  next unless response.try(:[], 'errors')
39
39
 
@@ -36,8 +36,7 @@ module Chewy
36
36
  # passed objects from the index if they are not in the default scope
37
37
  # or marked for destruction.
38
38
  #
39
- # It handles parent-child relationships: if the object parent_id has been
40
- # changed it destroys the object and recreates it from scratch.
39
+ # It handles parent-child relationships with a join field reindexing children when the parent is reindexed.
41
40
  #
42
41
  # Performs journaling if enabled: it stores all the ids of the imported
43
42
  # objects to a specialized index. It is possible to replay particular import
@@ -73,7 +72,7 @@ module Chewy
73
72
  # @option options [true, false] update_failover enables full objects reimport in cases of partial update errors, `true` by default
74
73
  # @option options [true, Integer, Hash] parallel enables parallel import processing with the Parallel gem, accepts the number of workers or any Parallel gem acceptable options
75
74
  # @return [true, false] false in case of errors
76
- def import(*args)
75
+ ruby2_keywords def import(*args)
77
76
  import_routine(*args).blank?
78
77
  end
79
78
 
@@ -84,7 +83,7 @@ module Chewy
84
83
  # in case of any import errors.
85
84
  #
86
85
  # @raise [Chewy::ImportFailed] in case of errors
87
- def import!(*args)
86
+ ruby2_keywords def import!(*args)
88
87
  errors = import_routine(*args)
89
88
  raise Chewy::ImportFailed.new(self, errors) if errors.present?
90
89
 
@@ -170,8 +170,8 @@ module Chewy
170
170
  # template /tit.+/, type: 'text', mapping_hash # "match_mapping_type" as an optional second argument
171
171
  # template template42: {match: 'hello*', mapping: {type: 'object'}} # or even pass a template as is
172
172
  #
173
- def template(*args)
174
- root.dynamic_template(*args)
173
+ def template(*args, **options)
174
+ root.dynamic_template(*args, **options)
175
175
  end
176
176
  alias_method :dynamic_template, :template
177
177
 
@@ -48,7 +48,7 @@ module Chewy
48
48
  extend Helpers
49
49
 
50
50
  module ActiveRecordMethods
51
- def update_index(type_name, *args, &block)
51
+ ruby2_keywords def update_index(type_name, *args, &block)
52
52
  callback_options = Observe.extract_callback_options!(args)
53
53
  update_proc = Observe.update_proc(type_name, *args, &block)
54
54
 
@@ -1,4 +1,4 @@
1
- require 'i18n/core_ext/hash'
1
+ require 'active_support/core_ext/hash/keys'
2
2
 
3
3
  # Rspec matcher `update_index`
4
4
  # To use it - add `require 'chewy/rspec'` to the `spec_helper.rb`
@@ -18,7 +18,7 @@ module Chewy
18
18
  include Scoping
19
19
  include Scrolling
20
20
  UNDEFINED = Class.new.freeze
21
- EVERFIELDS = %w[_index _type _id _parent].freeze
21
+ EVERFIELDS = %w[_index _type _id _parent _routing].freeze
22
22
  DELEGATED_METHODS = %i[
23
23
  query filter post_filter order reorder docvalue_fields
24
24
  track_scores track_total_hits request_cache explain version profile
@@ -914,7 +914,7 @@ module Chewy
914
914
 
915
915
  # Returns and array of values for specified fields.
916
916
  # Uses `source` to restrict the list of returned fields.
917
- # Fields `_id`, `_type` and `_index` are also supported.
917
+ # Fields `_id`, `_type`, `_routing` and `_index` are also supported.
918
918
  #
919
919
  # @overload pluck(field)
920
920
  # If single field is passed - it returns and array of values.
data/lib/chewy/search.rb CHANGED
@@ -56,7 +56,7 @@ module Chewy
56
56
  #
57
57
  # @example
58
58
  # PlacesIndex.query(match: {name: 'Moscow'})
59
- def method_missing(name, *args, &block)
59
+ ruby2_keywords def method_missing(name, *args, &block)
60
60
  if search_class::DELEGATED_METHODS.include?(name)
61
61
  all.send(name, *args, &block)
62
62
  else
@@ -15,7 +15,7 @@ module Chewy
15
15
 
16
16
  def perform(type, ids, options = {})
17
17
  options[:refresh] = !Chewy.disable_refresh_async if Chewy.disable_refresh_async
18
- type.constantize.import!(ids, options)
18
+ type.constantize.import!(ids, **options)
19
19
  end
20
20
  end
21
21
 
@@ -15,7 +15,7 @@ module Chewy
15
15
 
16
16
  def perform(type, ids, options = {})
17
17
  options[:refresh] = !Chewy.disable_refresh_async if Chewy.disable_refresh_async
18
- type.constantize.import!(ids, options)
18
+ type.constantize.import!(ids, **options)
19
19
  end
20
20
  end
21
21
 
data/lib/chewy/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Chewy
2
- VERSION = '7.2.3'.freeze
2
+ VERSION = '7.2.4'.freeze
3
3
  end
data/lib/chewy.rb CHANGED
@@ -4,18 +4,18 @@ require 'active_support/deprecation'
4
4
  require 'active_support/json'
5
5
  require 'active_support/log_subscriber'
6
6
 
7
+ require 'active_support/isolated_execution_state' if ActiveSupport::VERSION::MAJOR >= 7
7
8
  require 'active_support/core_ext/array/access'
8
9
  require 'active_support/core_ext/array/wrap'
9
10
  require 'active_support/core_ext/enumerable'
10
11
  require 'active_support/core_ext/hash/reverse_merge'
12
+ require 'active_support/core_ext/hash/keys'
11
13
  require 'active_support/core_ext/numeric/time'
12
14
  require 'active_support/core_ext/numeric/bytes'
13
15
  require 'active_support/core_ext/object/blank'
14
16
  require 'active_support/core_ext/object/inclusion'
15
17
  require 'active_support/core_ext/string/inflections'
16
18
 
17
- require 'i18n/core_ext/hash'
18
- require 'chewy/backports/deep_dup' unless Object.respond_to?(:deep_dup)
19
19
  require 'singleton'
20
20
  require 'base64'
21
21
 
data/lib/tasks/chewy.rake CHANGED
@@ -54,7 +54,7 @@ namespace :chewy do
54
54
 
55
55
  desc 'Update mapping of exising index with body hash'
56
56
  task :update_mapping, %i[index_name] => :environment do |_task, args|
57
- Chewy::RakeHelper.update_mapping(name: args[:name])
57
+ Chewy::RakeHelper.update_mapping(name: args[:index_name])
58
58
  end
59
59
 
60
60
  namespace :parallel do
@@ -44,24 +44,9 @@ describe Chewy::Fields::Base do
44
44
  end
45
45
 
46
46
  context 'parent objects' do
47
- let!(:country) do
48
- described_class.new(:name, value: lambda { |country, crutches|
49
- country.cities.map do |city|
50
- double(districts: city.districts, name: crutches.city_name)
51
- end
52
- })
53
- end
54
- let!(:city) do
55
- described_class.new(:name, value: lambda { |city, country, crutches|
56
- city.districts.map do |district|
57
- [district, country.name, crutches.suffix]
58
- end
59
- })
60
- end
61
- let(:district_value) { ->(district, city, country, crutches) { [district, city.name, country.name, crutches] } }
62
- let!(:district) do
63
- described_class.new(:name, value: district_value)
64
- end
47
+ let!(:country) { described_class.new(:name, value: ->(country, crutches) { country.cities.map { |city| double(districts: city.districts, name: crutches.city_name) } }) }
48
+ let!(:city) { described_class.new(:name, value: ->(city, country, crutches) { city.districts.map { |district| [district, country.name, crutches.suffix] } }) }
49
+ let!(:district) { described_class.new(:name, value: ->(district, city, country, crutches) { [district, city.name, country.name, crutches] }) }
65
50
  let(:crutches) { double(suffix: 'suffix', city_name: 'Bangkok') }
66
51
 
67
52
  before do
@@ -556,6 +541,41 @@ describe Chewy::Fields::Base do
556
541
  end
557
542
  end
558
543
 
544
+ context 'join field type' do
545
+ before do
546
+ stub_model(:comment)
547
+ stub_index(:comments) do
548
+ index_scope Comment
549
+ field :id
550
+ field :hierarchy, type: :join, relations: {question: %i[answer comment], answer: :vote, vote: :subvote}, join: {type: :comment_type, id: :commented_id}
551
+ end
552
+ end
553
+
554
+ specify do
555
+ expect(
556
+ CommentsIndex.root.compose(
557
+ {'id' => 1, 'comment_type' => 'question'}
558
+ )
559
+ ).to eq(
560
+ {'id' => 1, 'hierarchy' => 'question'}
561
+ )
562
+
563
+ expect(
564
+ CommentsIndex.root.compose(
565
+ {'id' => 2, 'comment_type' => 'answer', 'commented_id' => 1}
566
+ )
567
+ ).to eq(
568
+ {'id' => 2, 'hierarchy' => {'name' => 'answer', 'parent' => 1}}
569
+ )
570
+
571
+ expect do
572
+ CommentsIndex.root.compose(
573
+ {'id' => 2, 'comment_type' => 'asd', 'commented_id' => 1}
574
+ )
575
+ end.to raise_error Chewy::InvalidJoinFieldType
576
+ end
577
+ end
578
+
559
579
  context 'without ignore_blank option' do
560
580
  before do
561
581
  stub_index(:countries) do
@@ -1,5 +1,11 @@
1
1
  require 'spec_helper'
2
2
 
3
+ RawCity = Struct.new(:id) do
4
+ def rating
5
+ id * 10
6
+ end
7
+ end
8
+
3
9
  describe Chewy::Index::Adapter::ActiveRecord, :active_record do
4
10
  before do
5
11
  stub_model(:city)
@@ -571,5 +577,25 @@ describe Chewy::Index::Adapter::ActiveRecord, :active_record do
571
577
  ).to eq(cities.first(2) + [nil])
572
578
  end
573
579
  end
580
+
581
+ context 'with raw_import option' do
582
+ subject { described_class.new(City) }
583
+
584
+ let!(:cities) { Array.new(3) { |i| City.create!(rating: i / 2) } }
585
+ let(:city_ids) { cities.map(&:id) }
586
+
587
+ let(:raw_import) { ->(hash) { RawCity.new(hash['id']) } }
588
+ it 'uses the custom loader' do
589
+ raw_cities = subject.load(city_ids, _index: 'cities', raw_import: raw_import).map do |c|
590
+ {id: c.id, rating: c.rating}
591
+ end
592
+
593
+ expect(raw_cities).to eq([
594
+ {id: 1, rating: 10},
595
+ {id: 2, rating: 20},
596
+ {id: 3, rating: 30}
597
+ ])
598
+ end
599
+ end
574
600
  end
575
601
  end