chewy 7.2.3 → 7.2.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,7 +4,7 @@ module Chewy
4
4
  # This class purpose is to build ES client-acceptable bulk
5
5
  # request body from the passed objects for index and deletion.
6
6
  # It handles parent-child relationships as well by fetching
7
- # existing documents from ES, taking their `_parent` field and
7
+ # existing documents from ES and database, taking their join field values and
8
8
  # using it in the bulk body.
9
9
  # If fields are passed - it creates partial update entries except for
10
10
  # the cases when the type has parent and parent_id has been changed.
@@ -24,9 +24,11 @@ module Chewy
24
24
  # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
25
25
  # @return [Array<Hash>] bulk body
26
26
  def bulk_body
27
+ populate_cache
28
+
27
29
  @bulk_body ||= @to_index.flat_map(&method(:index_entry)).concat(
28
30
  @delete.flat_map(&method(:delete_entry))
29
- )
31
+ ).uniq
30
32
  end
31
33
 
32
34
  # The only purpose of this method is to cache document ids for
@@ -39,64 +41,250 @@ module Chewy
39
41
 
40
42
  private
41
43
 
42
- def crutches
43
- @crutches ||= Chewy::Index::Crutch::Crutches.new @index, @to_index
44
- end
45
-
46
- def parents
47
- return unless type_root.parent_id
48
-
49
- @parents ||= begin
50
- ids = @index.map do |object|
51
- object.respond_to?(:id) ? object.id : object
52
- end
53
- ids.concat(@delete.map do |object|
54
- object.respond_to?(:id) ? object.id : object
55
- end)
56
- @index.filter(ids: {values: ids}).order('_doc').pluck(:_id, :_parent).to_h
57
- end
44
+ def crutches_for_index
45
+ @crutches_for_index ||= Chewy::Index::Crutch::Crutches.new @index, @to_index
58
46
  end
59
47
 
60
48
  def index_entry(object)
61
49
  entry = {}
62
50
  entry[:_id] = index_object_ids[object] if index_object_ids[object]
63
51
 
64
- if parents
65
- entry[:parent] = type_root.compose_parent(object)
66
- parent = entry[:_id].present? && parents[entry[:_id].to_s]
67
- end
52
+ data = data_for(object)
53
+ parent = cache(entry[:_id])
68
54
 
69
- if parent && entry[:parent].to_s != parent
70
- entry[:data] = @index.compose(object, crutches)
71
- [{delete: entry.except(:data).merge(parent: parent)}, {index: entry}]
55
+ entry[:routing] = routing(object) if join_field?
56
+ if parent_changed?(data, parent)
57
+ reindex_entries(object, data) + reindex_descendants(object)
72
58
  elsif @fields.present?
73
59
  return [] unless entry[:_id]
74
60
 
75
- entry[:data] = {doc: @index.compose(object, crutches, fields: @fields)}
61
+ entry[:data] = {doc: data_for(object, fields: @fields)}
76
62
  [{update: entry}]
77
63
  else
78
- entry[:data] = @index.compose(object, crutches)
64
+ entry[:data] = data
79
65
  [{index: entry}]
80
66
  end
81
67
  end
82
68
 
69
+ def reindex_entries(object, data, root: object)
70
+ entry = {}
71
+ entry[:_id] = index_object_ids[object] || entry_id(object)
72
+ entry[:data] = data
73
+ entry[:routing] = routing(root) || routing(object) if join_field?
74
+ delete = delete_single_entry(object, root: root).first
75
+ index = {index: entry}
76
+ [delete, index]
77
+ end
78
+
79
+ def reindex_descendants(root)
80
+ descendants = load_descendants(root)
81
+ crutches = Chewy::Index::Crutch::Crutches.new @index, [root, *descendants]
82
+ descendants.flat_map do |object|
83
+ reindex_entries(
84
+ object,
85
+ data_for(object, crutches: crutches),
86
+ root: root
87
+ )
88
+ end
89
+ end
90
+
83
91
  def delete_entry(object)
92
+ delete_single_entry(object)
93
+ end
94
+
95
+ def delete_single_entry(object, root: object)
84
96
  entry = {}
85
97
  entry[:_id] = entry_id(object)
86
98
  entry[:_id] ||= object.as_json
87
99
 
88
100
  return [] if entry[:_id].blank?
89
101
 
90
- if parents
91
- parent = entry[:_id].present? && parents[entry[:_id].to_s]
92
- return [] unless parent
102
+ if join_field?
103
+ cached_parent = cache(entry[:_id])
104
+ entry_parent_id =
105
+ if cached_parent
106
+ cached_parent[:parent_id]
107
+ else
108
+ find_parent_id(object)
109
+ end
93
110
 
94
- entry[:parent] = parent
111
+ entry[:routing] = existing_routing(root.try(:id)) || existing_routing(object.id)
112
+ entry[:parent] = entry_parent_id if entry_parent_id
95
113
  end
96
114
 
97
115
  [{delete: entry}]
98
116
  end
99
117
 
118
+ def load_descendants(root)
119
+ root_type = join_field_type(root)
120
+ return [] unless root_type
121
+
122
+ descendant_ids = []
123
+ grouped_parents = {root_type => [root.id]}
124
+ # iteratively fetch all the descendants (with grouped_parents as a queue for next iteration)
125
+ until grouped_parents.empty?
126
+ children_data = grouped_parents.flat_map do |parent_type, parent_ids|
127
+ @index.query(
128
+ has_parent: {
129
+ parent_type: parent_type,
130
+ # ignore_unmapped to avoid error for the leaves of the tree
131
+ # (types without children)
132
+ ignore_unmapped: true,
133
+ query: {ids: {values: parent_ids}}
134
+ }
135
+ ).pluck(:_id, join_field).map { |id, join| [join['name'], id] }
136
+ end
137
+ descendant_ids |= children_data.map(&:last)
138
+
139
+ grouped_parents = {}
140
+ children_data.each do |name, id|
141
+ next unless name
142
+
143
+ grouped_parents[name] ||= []
144
+ grouped_parents[name] << id
145
+ end
146
+ end
147
+ # query the primary database to load the descentants' records
148
+ @index.adapter.load(descendant_ids, _index: @index.base_name, raw_import: @index._default_import_options[:raw_import])
149
+ end
150
+
151
+ def populate_cache
152
+ @cache = load_cache
153
+ end
154
+
155
+ def cache(id)
156
+ @cache[id.to_s]
157
+ end
158
+
159
+ def load_cache
160
+ return {} unless join_field?
161
+
162
+ @index
163
+ .filter(ids: {values: ids_for_cache})
164
+ .order('_doc')
165
+ .pluck(:_id, :_routing, join_field)
166
+ .map do |id, routing, join|
167
+ [
168
+ id,
169
+ {routing: routing, parent_id: join['parent']}
170
+ ]
171
+ end.to_h
172
+ end
173
+
174
+ def existing_routing(id)
175
+ # All objects needed here should be cached in #load_cache,
176
+ # if not, we return nil. In some cases we don't have existing routing cached,
177
+ # e.g. for loaded descendants
178
+ return unless cache(id)
179
+
180
+ cache(id)[:routing]
181
+ end
182
+
183
+ # Two types of ids:
184
+ # * of parents of the objects to be indexed
185
+ # * of objects to be deleted
186
+ def ids_for_cache
187
+ ids = @to_index.flat_map do |object|
188
+ [find_parent_id(object), object.id] if object.respond_to?(:id)
189
+ end
190
+ ids.concat(@delete.map do |object|
191
+ object.id if object.respond_to?(:id)
192
+ end)
193
+ ids.uniq.compact
194
+ end
195
+
196
+ def routing(object)
197
+ # filter out non-model objects, early return on object==nil
198
+ return unless object.respond_to?(:id)
199
+
200
+ parent_id = find_parent_id(object)
201
+ if parent_id
202
+ routing(index_objects_by_id[parent_id.to_s]) || existing_routing(parent_id)
203
+ else
204
+ object.id.to_s
205
+ end
206
+ end
207
+
208
+ def find_parent_id(object)
209
+ return unless object.respond_to?(:id)
210
+
211
+ join = data_for(object, fields: [join_field.to_sym])[join_field]
212
+ join['parent'] if join
213
+ end
214
+
215
+ def join_field
216
+ return @join_field if defined?(@join_field)
217
+
218
+ @join_field = find_join_field
219
+ end
220
+
221
+ def find_join_field
222
+ type_settings = @index.mappings_hash[:mappings]
223
+ return unless type_settings
224
+
225
+ properties = type_settings[:properties]
226
+ join_fields = properties.find { |_, options| options[:type] == :join }
227
+ return unless join_fields
228
+
229
+ join_fields.first.to_s
230
+ end
231
+
232
+ def join_field_type(object)
233
+ return unless join_field?
234
+
235
+ raw_object =
236
+ if @index._default_import_options[:raw_import]
237
+ @index._default_import_options[:raw_import].call(object.attributes)
238
+ else
239
+ object
240
+ end
241
+
242
+ join_field_value = data_for(
243
+ raw_object,
244
+ fields: [join_field.to_sym], # build only the field that is needed
245
+ crutches: Chewy::Index::Crutch::Crutches.new(@index, [raw_object])
246
+ )[join_field]
247
+
248
+ case join_field_value
249
+ when String
250
+ join_field_value
251
+ when Hash
252
+ join_field_value['name']
253
+ end
254
+ end
255
+
256
+ def join_field?
257
+ join_field && !join_field.empty?
258
+ end
259
+
260
+ def data_for(object, fields: [], crutches: crutches_for_index)
261
+ @index.compose(object, crutches, fields: fields)
262
+ end
263
+
264
+ def parent_changed?(data, old_parent)
265
+ return false unless old_parent
266
+ return false unless join_field?
267
+ return false unless @fields.include?(join_field.to_sym)
268
+ return false unless data.key?(join_field)
269
+
270
+ # The join field value can be a hash, e.g.:
271
+ # {"name": "child", "parent": "123"} for a child
272
+ # {"name": "parent"} for a parent
273
+ # but it can also be a string: (e.g. "parent") for a parent:
274
+ # https://www.elastic.co/guide/en/elasticsearch/reference/current/parent-join.html#parent-join
275
+ new_join_field_value = data[join_field]
276
+ if new_join_field_value.is_a? Hash
277
+ # If we have a hash in the join field,
278
+ # we're taking the `parent` field that holds the parent id.
279
+ new_parent_id = new_join_field_value['parent']
280
+ new_parent_id != old_parent[:parent_id]
281
+ else
282
+ # If there is a non-hash value (String or nil), it means that the join field is changed
283
+ # and the current object is no longer a child.
284
+ true
285
+ end
286
+ end
287
+
100
288
  def entry_id(object)
101
289
  if type_root.id
102
290
  type_root.compose_id(object)
@@ -33,7 +33,7 @@ module Chewy
33
33
  return [] if body.blank?
34
34
 
35
35
  request_bodies(body).each_with_object([]) do |request_body, results|
36
- response = @index.client.bulk request_base.merge(body: request_body) if request_body.present?
36
+ response = @index.client.bulk(**request_base.merge(body: request_body)) if request_body.present?
37
37
 
38
38
  next unless response.try(:[], 'errors')
39
39
 
@@ -36,8 +36,7 @@ module Chewy
36
36
  # passed objects from the index if they are not in the default scope
37
37
  # or marked for destruction.
38
38
  #
39
- # It handles parent-child relationships: if the object parent_id has been
40
- # changed it destroys the object and recreates it from scratch.
39
+ # It handles parent-child relationships with a join field reindexing children when the parent is reindexed.
41
40
  #
42
41
  # Performs journaling if enabled: it stores all the ids of the imported
43
42
  # objects to a specialized index. It is possible to replay particular import
@@ -73,7 +72,7 @@ module Chewy
73
72
  # @option options [true, false] update_failover enables full objects reimport in cases of partial update errors, `true` by default
74
73
  # @option options [true, Integer, Hash] parallel enables parallel import processing with the Parallel gem, accepts the number of workers or any Parallel gem acceptable options
75
74
  # @return [true, false] false in case of errors
76
- def import(*args)
75
+ ruby2_keywords def import(*args)
77
76
  import_routine(*args).blank?
78
77
  end
79
78
 
@@ -84,7 +83,7 @@ module Chewy
84
83
  # in case of any import errors.
85
84
  #
86
85
  # @raise [Chewy::ImportFailed] in case of errors
87
- def import!(*args)
86
+ ruby2_keywords def import!(*args)
88
87
  errors = import_routine(*args)
89
88
  raise Chewy::ImportFailed.new(self, errors) if errors.present?
90
89
 
@@ -170,8 +170,8 @@ module Chewy
170
170
  # template /tit.+/, type: 'text', mapping_hash # "match_mapping_type" as an optional second argument
171
171
  # template template42: {match: 'hello*', mapping: {type: 'object'}} # or even pass a template as is
172
172
  #
173
- def template(*args)
174
- root.dynamic_template(*args)
173
+ def template(*args, **options)
174
+ root.dynamic_template(*args, **options)
175
175
  end
176
176
  alias_method :dynamic_template, :template
177
177
 
@@ -48,7 +48,7 @@ module Chewy
48
48
  extend Helpers
49
49
 
50
50
  module ActiveRecordMethods
51
- def update_index(type_name, *args, &block)
51
+ ruby2_keywords def update_index(type_name, *args, &block)
52
52
  callback_options = Observe.extract_callback_options!(args)
53
53
  update_proc = Observe.update_proc(type_name, *args, &block)
54
54
 
@@ -1,4 +1,4 @@
1
- require 'i18n/core_ext/hash'
1
+ require 'active_support/core_ext/hash/keys'
2
2
 
3
3
  # Rspec matcher `update_index`
4
4
  # To use it - add `require 'chewy/rspec'` to the `spec_helper.rb`
@@ -18,7 +18,7 @@ module Chewy
18
18
  include Scoping
19
19
  include Scrolling
20
20
  UNDEFINED = Class.new.freeze
21
- EVERFIELDS = %w[_index _type _id _parent].freeze
21
+ EVERFIELDS = %w[_index _type _id _parent _routing].freeze
22
22
  DELEGATED_METHODS = %i[
23
23
  query filter post_filter order reorder docvalue_fields
24
24
  track_scores track_total_hits request_cache explain version profile
@@ -914,7 +914,7 @@ module Chewy
914
914
 
915
915
  # Returns and array of values for specified fields.
916
916
  # Uses `source` to restrict the list of returned fields.
917
- # Fields `_id`, `_type` and `_index` are also supported.
917
+ # Fields `_id`, `_type`, `_routing` and `_index` are also supported.
918
918
  #
919
919
  # @overload pluck(field)
920
920
  # If single field is passed - it returns and array of values.
data/lib/chewy/search.rb CHANGED
@@ -56,7 +56,7 @@ module Chewy
56
56
  #
57
57
  # @example
58
58
  # PlacesIndex.query(match: {name: 'Moscow'})
59
- def method_missing(name, *args, &block)
59
+ ruby2_keywords def method_missing(name, *args, &block)
60
60
  if search_class::DELEGATED_METHODS.include?(name)
61
61
  all.send(name, *args, &block)
62
62
  else
@@ -15,7 +15,7 @@ module Chewy
15
15
 
16
16
  def perform(type, ids, options = {})
17
17
  options[:refresh] = !Chewy.disable_refresh_async if Chewy.disable_refresh_async
18
- type.constantize.import!(ids, options)
18
+ type.constantize.import!(ids, **options)
19
19
  end
20
20
  end
21
21
 
@@ -15,7 +15,7 @@ module Chewy
15
15
 
16
16
  def perform(type, ids, options = {})
17
17
  options[:refresh] = !Chewy.disable_refresh_async if Chewy.disable_refresh_async
18
- type.constantize.import!(ids, options)
18
+ type.constantize.import!(ids, **options)
19
19
  end
20
20
  end
21
21
 
data/lib/chewy/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Chewy
2
- VERSION = '7.2.3'.freeze
2
+ VERSION = '7.2.4'.freeze
3
3
  end
data/lib/chewy.rb CHANGED
@@ -4,18 +4,18 @@ require 'active_support/deprecation'
4
4
  require 'active_support/json'
5
5
  require 'active_support/log_subscriber'
6
6
 
7
+ require 'active_support/isolated_execution_state' if ActiveSupport::VERSION::MAJOR >= 7
7
8
  require 'active_support/core_ext/array/access'
8
9
  require 'active_support/core_ext/array/wrap'
9
10
  require 'active_support/core_ext/enumerable'
10
11
  require 'active_support/core_ext/hash/reverse_merge'
12
+ require 'active_support/core_ext/hash/keys'
11
13
  require 'active_support/core_ext/numeric/time'
12
14
  require 'active_support/core_ext/numeric/bytes'
13
15
  require 'active_support/core_ext/object/blank'
14
16
  require 'active_support/core_ext/object/inclusion'
15
17
  require 'active_support/core_ext/string/inflections'
16
18
 
17
- require 'i18n/core_ext/hash'
18
- require 'chewy/backports/deep_dup' unless Object.respond_to?(:deep_dup)
19
19
  require 'singleton'
20
20
  require 'base64'
21
21
 
data/lib/tasks/chewy.rake CHANGED
@@ -54,7 +54,7 @@ namespace :chewy do
54
54
 
55
55
  desc 'Update mapping of exising index with body hash'
56
56
  task :update_mapping, %i[index_name] => :environment do |_task, args|
57
- Chewy::RakeHelper.update_mapping(name: args[:name])
57
+ Chewy::RakeHelper.update_mapping(name: args[:index_name])
58
58
  end
59
59
 
60
60
  namespace :parallel do
@@ -44,24 +44,9 @@ describe Chewy::Fields::Base do
44
44
  end
45
45
 
46
46
  context 'parent objects' do
47
- let!(:country) do
48
- described_class.new(:name, value: lambda { |country, crutches|
49
- country.cities.map do |city|
50
- double(districts: city.districts, name: crutches.city_name)
51
- end
52
- })
53
- end
54
- let!(:city) do
55
- described_class.new(:name, value: lambda { |city, country, crutches|
56
- city.districts.map do |district|
57
- [district, country.name, crutches.suffix]
58
- end
59
- })
60
- end
61
- let(:district_value) { ->(district, city, country, crutches) { [district, city.name, country.name, crutches] } }
62
- let!(:district) do
63
- described_class.new(:name, value: district_value)
64
- end
47
+ let!(:country) { described_class.new(:name, value: ->(country, crutches) { country.cities.map { |city| double(districts: city.districts, name: crutches.city_name) } }) }
48
+ let!(:city) { described_class.new(:name, value: ->(city, country, crutches) { city.districts.map { |district| [district, country.name, crutches.suffix] } }) }
49
+ let!(:district) { described_class.new(:name, value: ->(district, city, country, crutches) { [district, city.name, country.name, crutches] }) }
65
50
  let(:crutches) { double(suffix: 'suffix', city_name: 'Bangkok') }
66
51
 
67
52
  before do
@@ -556,6 +541,41 @@ describe Chewy::Fields::Base do
556
541
  end
557
542
  end
558
543
 
544
+ context 'join field type' do
545
+ before do
546
+ stub_model(:comment)
547
+ stub_index(:comments) do
548
+ index_scope Comment
549
+ field :id
550
+ field :hierarchy, type: :join, relations: {question: %i[answer comment], answer: :vote, vote: :subvote}, join: {type: :comment_type, id: :commented_id}
551
+ end
552
+ end
553
+
554
+ specify do
555
+ expect(
556
+ CommentsIndex.root.compose(
557
+ {'id' => 1, 'comment_type' => 'question'}
558
+ )
559
+ ).to eq(
560
+ {'id' => 1, 'hierarchy' => 'question'}
561
+ )
562
+
563
+ expect(
564
+ CommentsIndex.root.compose(
565
+ {'id' => 2, 'comment_type' => 'answer', 'commented_id' => 1}
566
+ )
567
+ ).to eq(
568
+ {'id' => 2, 'hierarchy' => {'name' => 'answer', 'parent' => 1}}
569
+ )
570
+
571
+ expect do
572
+ CommentsIndex.root.compose(
573
+ {'id' => 2, 'comment_type' => 'asd', 'commented_id' => 1}
574
+ )
575
+ end.to raise_error Chewy::InvalidJoinFieldType
576
+ end
577
+ end
578
+
559
579
  context 'without ignore_blank option' do
560
580
  before do
561
581
  stub_index(:countries) do
@@ -1,5 +1,11 @@
1
1
  require 'spec_helper'
2
2
 
3
+ RawCity = Struct.new(:id) do
4
+ def rating
5
+ id * 10
6
+ end
7
+ end
8
+
3
9
  describe Chewy::Index::Adapter::ActiveRecord, :active_record do
4
10
  before do
5
11
  stub_model(:city)
@@ -571,5 +577,25 @@ describe Chewy::Index::Adapter::ActiveRecord, :active_record do
571
577
  ).to eq(cities.first(2) + [nil])
572
578
  end
573
579
  end
580
+
581
+ context 'with raw_import option' do
582
+ subject { described_class.new(City) }
583
+
584
+ let!(:cities) { Array.new(3) { |i| City.create!(rating: i / 2) } }
585
+ let(:city_ids) { cities.map(&:id) }
586
+
587
+ let(:raw_import) { ->(hash) { RawCity.new(hash['id']) } }
588
+ it 'uses the custom loader' do
589
+ raw_cities = subject.load(city_ids, _index: 'cities', raw_import: raw_import).map do |c|
590
+ {id: c.id, rating: c.rating}
591
+ end
592
+
593
+ expect(raw_cities).to eq([
594
+ {id: 1, rating: 10},
595
+ {id: 2, rating: 20},
596
+ {id: 3, rating: 30}
597
+ ])
598
+ end
599
+ end
574
600
  end
575
601
  end