chewy 7.2.1 → 7.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/CODEOWNERS +1 -0
- data/.github/dependabot.yml +42 -0
- data/.github/workflows/ruby.yml +28 -26
- data/.rubocop.yml +4 -1
- data/CHANGELOG.md +196 -0
- data/Gemfile +4 -3
- data/README.md +203 -20
- data/chewy.gemspec +4 -18
- data/gemfiles/base.gemfile +12 -0
- data/gemfiles/rails.6.1.activerecord.gemfile +2 -1
- data/gemfiles/{rails.5.2.activerecord.gemfile → rails.7.0.activerecord.gemfile} +6 -3
- data/gemfiles/{rails.6.0.activerecord.gemfile → rails.7.1.activerecord.gemfile} +6 -3
- data/lib/chewy/config.rb +22 -14
- data/lib/chewy/elastic_client.rb +31 -0
- data/lib/chewy/errors.rb +11 -2
- data/lib/chewy/fields/base.rb +69 -13
- data/lib/chewy/fields/root.rb +2 -10
- data/lib/chewy/index/actions.rb +11 -16
- data/lib/chewy/index/adapter/active_record.rb +18 -3
- data/lib/chewy/index/adapter/object.rb +0 -10
- data/lib/chewy/index/adapter/orm.rb +4 -14
- data/lib/chewy/index/crutch.rb +15 -7
- data/lib/chewy/index/import/bulk_builder.rb +219 -32
- data/lib/chewy/index/import/bulk_request.rb +1 -1
- data/lib/chewy/index/import/routine.rb +3 -3
- data/lib/chewy/index/import.rb +45 -31
- data/lib/chewy/index/mapping.rb +2 -2
- data/lib/chewy/index/observe/active_record_methods.rb +87 -0
- data/lib/chewy/index/observe/callback.rb +34 -0
- data/lib/chewy/index/observe.rb +3 -58
- data/lib/chewy/index/syncer.rb +1 -1
- data/lib/chewy/index.rb +25 -0
- data/lib/chewy/journal.rb +17 -6
- data/lib/chewy/log_subscriber.rb +5 -1
- data/lib/chewy/minitest/helpers.rb +77 -0
- data/lib/chewy/minitest/search_index_receiver.rb +3 -1
- data/lib/chewy/rake_helper.rb +92 -11
- data/lib/chewy/rspec/build_query.rb +12 -0
- data/lib/chewy/rspec/helpers.rb +55 -0
- data/lib/chewy/rspec/update_index.rb +14 -7
- data/lib/chewy/rspec.rb +2 -0
- data/lib/chewy/runtime/version.rb +1 -1
- data/lib/chewy/runtime.rb +1 -1
- data/lib/chewy/search/parameters/collapse.rb +16 -0
- data/lib/chewy/search/parameters/ignore_unavailable.rb +27 -0
- data/lib/chewy/search/parameters/indices.rb +1 -1
- data/lib/chewy/search/parameters/knn.rb +16 -0
- data/lib/chewy/search/parameters/order.rb +6 -19
- data/lib/chewy/search/parameters/storage.rb +1 -1
- data/lib/chewy/search/parameters/track_total_hits.rb +16 -0
- data/lib/chewy/search/parameters.rb +4 -4
- data/lib/chewy/search/request.rb +74 -16
- data/lib/chewy/search/scoping.rb +1 -1
- data/lib/chewy/search.rb +5 -2
- data/lib/chewy/stash.rb +3 -3
- data/lib/chewy/strategy/active_job.rb +1 -1
- data/lib/chewy/strategy/atomic_no_refresh.rb +18 -0
- data/lib/chewy/strategy/base.rb +10 -0
- data/lib/chewy/strategy/delayed_sidekiq/scheduler.rb +168 -0
- data/lib/chewy/strategy/delayed_sidekiq/worker.rb +76 -0
- data/lib/chewy/strategy/delayed_sidekiq.rb +30 -0
- data/lib/chewy/strategy/lazy_sidekiq.rb +64 -0
- data/lib/chewy/strategy/sidekiq.rb +1 -1
- data/lib/chewy/strategy.rb +3 -0
- data/lib/chewy/version.rb +1 -1
- data/lib/chewy.rb +21 -14
- data/lib/tasks/chewy.rake +18 -2
- data/migration_guide.md +1 -1
- data/spec/chewy/config_spec.rb +2 -2
- data/spec/chewy/elastic_client_spec.rb +26 -0
- data/spec/chewy/fields/base_spec.rb +39 -18
- data/spec/chewy/index/actions_spec.rb +10 -10
- data/spec/chewy/index/adapter/active_record_spec.rb +88 -0
- data/spec/chewy/index/import/bulk_builder_spec.rb +309 -1
- data/spec/chewy/index/import/routine_spec.rb +5 -5
- data/spec/chewy/index/import_spec.rb +48 -26
- data/spec/chewy/index/observe/active_record_methods_spec.rb +68 -0
- data/spec/chewy/index/observe/callback_spec.rb +139 -0
- data/spec/chewy/index/observe_spec.rb +27 -0
- data/spec/chewy/journal_spec.rb +13 -49
- data/spec/chewy/minitest/helpers_spec.rb +111 -1
- data/spec/chewy/minitest/search_index_receiver_spec.rb +6 -4
- data/spec/chewy/rake_helper_spec.rb +170 -0
- data/spec/chewy/rspec/build_query_spec.rb +34 -0
- data/spec/chewy/rspec/helpers_spec.rb +61 -0
- data/spec/chewy/search/pagination/kaminari_examples.rb +1 -1
- data/spec/chewy/search/pagination/kaminari_spec.rb +1 -1
- data/spec/chewy/search/parameters/collapse_spec.rb +5 -0
- data/spec/chewy/search/parameters/ignore_unavailable_spec.rb +67 -0
- data/spec/chewy/search/parameters/knn_spec.rb +5 -0
- data/spec/chewy/search/parameters/order_spec.rb +18 -11
- data/spec/chewy/search/parameters/track_total_hits_spec.rb +5 -0
- data/spec/chewy/search/parameters_spec.rb +6 -1
- data/spec/chewy/search/request_spec.rb +58 -9
- data/spec/chewy/search_spec.rb +9 -0
- data/spec/chewy/strategy/active_job_spec.rb +8 -8
- data/spec/chewy/strategy/atomic_no_refresh_spec.rb +60 -0
- data/spec/chewy/strategy/delayed_sidekiq_spec.rb +208 -0
- data/spec/chewy/strategy/lazy_sidekiq_spec.rb +214 -0
- data/spec/chewy/strategy/sidekiq_spec.rb +4 -4
- data/spec/chewy_spec.rb +10 -7
- data/spec/spec_helper.rb +1 -2
- data/spec/support/active_record.rb +8 -1
- metadata +45 -264
- data/lib/chewy/backports/deep_dup.rb +0 -46
- data/lib/chewy/backports/duplicable.rb +0 -91
- data/lib/chewy/index/import/thread_safe_progress_bar.rb +0 -40
data/lib/chewy/fields/base.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
module Chewy
|
2
2
|
module Fields
|
3
3
|
class Base
|
4
|
-
attr_reader :name, :
|
5
|
-
attr_accessor :parent
|
4
|
+
attr_reader :name, :join_options, :options, :children
|
5
|
+
attr_accessor :parent # used by Chewy::Index::Mapping to expand nested fields
|
6
6
|
|
7
7
|
def initialize(name, value: nil, **options)
|
8
8
|
@name = name.to_sym
|
@@ -10,9 +10,11 @@ module Chewy
|
|
10
10
|
update_options!(**options)
|
11
11
|
@value = value
|
12
12
|
@children = []
|
13
|
+
@allowed_relations = find_allowed_relations(options[:relations]) # for join fields
|
13
14
|
end
|
14
15
|
|
15
16
|
def update_options!(**options)
|
17
|
+
@join_options = options.delete(:join) || {}
|
16
18
|
@options = options
|
17
19
|
end
|
18
20
|
|
@@ -31,7 +33,7 @@ module Chewy
|
|
31
33
|
else
|
32
34
|
{}
|
33
35
|
end
|
34
|
-
mapping.reverse_merge!(options)
|
36
|
+
mapping.reverse_merge!(options.except(:ignore_blank))
|
35
37
|
mapping.reverse_merge!(type: (children.present? ? 'object' : Chewy.default_field_type))
|
36
38
|
|
37
39
|
{name => mapping}
|
@@ -53,30 +55,70 @@ module Chewy
|
|
53
55
|
{name => result}
|
54
56
|
end
|
55
57
|
|
58
|
+
def value
|
59
|
+
if join_field?
|
60
|
+
join_type = join_options[:type]
|
61
|
+
join_id = join_options[:id]
|
62
|
+
# memoize
|
63
|
+
@value ||= proc do |object|
|
64
|
+
validate_join_type!(value_by_name_proc(join_type).call(object))
|
65
|
+
# If it's a join field and it has join_id, the value is compound and contains
|
66
|
+
# both name (type) and id of the parent object
|
67
|
+
if value_by_name_proc(join_id).call(object).present?
|
68
|
+
{
|
69
|
+
name: value_by_name_proc(join_type).call(object), # parent type
|
70
|
+
parent: value_by_name_proc(join_id).call(object) # parent id
|
71
|
+
}
|
72
|
+
else
|
73
|
+
value_by_name_proc(join_type).call(object)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
else
|
77
|
+
@value
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
56
81
|
private
|
57
82
|
|
58
83
|
def geo_point?
|
59
84
|
@options[:type].to_s == 'geo_point'
|
60
85
|
end
|
61
86
|
|
87
|
+
def join_field?
|
88
|
+
@options[:type].to_s == 'join'
|
89
|
+
end
|
90
|
+
|
62
91
|
def ignore_blank?
|
63
92
|
@options.fetch(:ignore_blank) { geo_point? }
|
64
93
|
end
|
65
94
|
|
66
95
|
def evaluate(objects)
|
67
|
-
object = objects.first
|
68
|
-
|
69
96
|
if value.is_a?(Proc)
|
70
|
-
|
71
|
-
object.instance_exec(&value)
|
72
|
-
elsif value.arity.negative?
|
73
|
-
value.call(*object)
|
74
|
-
else
|
75
|
-
value.call(*objects.first(value.arity))
|
76
|
-
end
|
97
|
+
value_by_proc(objects, value)
|
77
98
|
else
|
78
|
-
|
99
|
+
value_by_name(objects, value)
|
100
|
+
end
|
101
|
+
end
|
79
102
|
|
103
|
+
def value_by_proc(objects, value)
|
104
|
+
object = objects.first
|
105
|
+
if value.arity.zero?
|
106
|
+
object.instance_exec(&value)
|
107
|
+
elsif value.arity.negative?
|
108
|
+
value.call(*object)
|
109
|
+
else
|
110
|
+
value.call(*objects.first(value.arity))
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def value_by_name(objects, value)
|
115
|
+
object = objects.first
|
116
|
+
message = value.is_a?(Symbol) || value.is_a?(String) ? value.to_sym : name
|
117
|
+
value_by_name_proc(message).call(object)
|
118
|
+
end
|
119
|
+
|
120
|
+
def value_by_name_proc(message)
|
121
|
+
proc do |object|
|
80
122
|
if object.is_a?(Hash)
|
81
123
|
if object.key?(message)
|
82
124
|
object[message]
|
@@ -89,6 +131,20 @@ module Chewy
|
|
89
131
|
end
|
90
132
|
end
|
91
133
|
|
134
|
+
def validate_join_type!(type)
|
135
|
+
return unless type
|
136
|
+
return if @allowed_relations.include?(type.to_sym)
|
137
|
+
|
138
|
+
raise Chewy::InvalidJoinFieldType.new(type, @name, options[:relations])
|
139
|
+
end
|
140
|
+
|
141
|
+
def find_allowed_relations(relations)
|
142
|
+
return [] unless relations
|
143
|
+
return relations unless relations.is_a?(Hash)
|
144
|
+
|
145
|
+
(relations.keys + relations.values).flatten.uniq
|
146
|
+
end
|
147
|
+
|
92
148
|
def compose_children(value, *parent_objects)
|
93
149
|
return unless value
|
94
150
|
|
data/lib/chewy/fields/root.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Chewy
|
2
2
|
module Fields
|
3
3
|
class Root < Chewy::Fields::Base
|
4
|
-
attr_reader :dynamic_templates, :id
|
4
|
+
attr_reader :dynamic_templates, :id
|
5
5
|
|
6
6
|
def initialize(name, **options)
|
7
7
|
super(name, **options)
|
@@ -12,9 +12,7 @@ module Chewy
|
|
12
12
|
|
13
13
|
def update_options!(**options)
|
14
14
|
@id = options.fetch(:id, options.fetch(:_id, @id))
|
15
|
-
@
|
16
|
-
@parent_id = options.fetch(:parent_id, @parent_id)
|
17
|
-
@options.merge!(options.except(:id, :_id, :parent, :_parent, :parent_id, :type))
|
15
|
+
@options.merge!(options.except(:id, :_id, :type))
|
18
16
|
end
|
19
17
|
|
20
18
|
def mappings_hash
|
@@ -50,12 +48,6 @@ module Chewy
|
|
50
48
|
end
|
51
49
|
end
|
52
50
|
|
53
|
-
def compose_parent(object)
|
54
|
-
return unless parent_id
|
55
|
-
|
56
|
-
parent_id.arity.zero? ? object.instance_exec(&parent_id) : parent_id.call(object)
|
57
|
-
end
|
58
|
-
|
59
51
|
def compose_id(object)
|
60
52
|
return unless id
|
61
53
|
|
data/lib/chewy/index/actions.rb
CHANGED
@@ -146,7 +146,7 @@ module Chewy
|
|
146
146
|
# @param journal [true, false] journaling is switched off for import during reset by default
|
147
147
|
# @param import_options [Hash] options, passed to the import call
|
148
148
|
# @return [true, false] false in case of errors
|
149
|
-
def reset!(suffix = nil, apply_journal: true, journal: false,
|
149
|
+
def reset!(suffix = nil, apply_journal: true, journal: false, **import_options)
|
150
150
|
result = if suffix.present?
|
151
151
|
start_time = Time.now
|
152
152
|
indexes = self.indexes - [index_name]
|
@@ -156,23 +156,27 @@ module Chewy
|
|
156
156
|
suffixed_name = index_name(suffix: suffix)
|
157
157
|
|
158
158
|
optimize_index_settings suffixed_name
|
159
|
-
result = import
|
159
|
+
result = import(**import_options.merge(
|
160
160
|
suffix: suffix,
|
161
161
|
journal: journal,
|
162
|
-
refresh: !Chewy.reset_disable_refresh_interval
|
163
|
-
|
164
|
-
)
|
162
|
+
refresh: !Chewy.reset_disable_refresh_interval
|
163
|
+
))
|
165
164
|
original_index_settings suffixed_name
|
166
165
|
|
167
166
|
delete if indexes.blank?
|
168
|
-
update_aliases
|
167
|
+
client.indices.update_aliases body: {actions: [
|
168
|
+
*indexes.map do |index|
|
169
|
+
{remove: {index: index, alias: general_name}}
|
170
|
+
end,
|
171
|
+
{add: {index: suffixed_name, alias: general_name}}
|
172
|
+
]}
|
169
173
|
client.indices.delete index: indexes if indexes.present?
|
170
174
|
|
171
175
|
self.journal.apply(start_time, **import_options) if apply_journal
|
172
176
|
result
|
173
177
|
else
|
174
178
|
purge!
|
175
|
-
import
|
179
|
+
import(**import_options.merge(journal: journal))
|
176
180
|
end
|
177
181
|
|
178
182
|
specification.lock!
|
@@ -233,15 +237,6 @@ module Chewy
|
|
233
237
|
|
234
238
|
private
|
235
239
|
|
236
|
-
def update_aliases(indexes, general_name, suffixed_name)
|
237
|
-
client.indices.update_aliases body: {actions: [
|
238
|
-
*indexes.map do |index|
|
239
|
-
{remove: {index: index, alias: general_name}}
|
240
|
-
end,
|
241
|
-
{add: {index: suffixed_name, alias: general_name}}
|
242
|
-
]}
|
243
|
-
end
|
244
|
-
|
245
240
|
def optimize_index_settings(index_name)
|
246
241
|
settings = {}
|
247
242
|
settings[:refresh_interval] = -1 if Chewy.reset_disable_refresh_interval
|
@@ -6,16 +6,26 @@ module Chewy
|
|
6
6
|
class ActiveRecord < Orm
|
7
7
|
def self.accepts?(target)
|
8
8
|
defined?(::ActiveRecord::Base) && (
|
9
|
-
target.is_a?(Class) && target < ::ActiveRecord::Base ||
|
9
|
+
(target.is_a?(Class) && target < ::ActiveRecord::Base) ||
|
10
10
|
target.is_a?(::ActiveRecord::Relation))
|
11
11
|
end
|
12
12
|
|
13
13
|
private
|
14
14
|
|
15
15
|
def cleanup_default_scope!
|
16
|
-
|
16
|
+
behavior = Chewy.config.import_scope_cleanup_behavior
|
17
|
+
|
18
|
+
if behavior != :ignore && (@default_scope.arel.orders.present? ||
|
17
19
|
@default_scope.arel.limit.present? || @default_scope.arel.offset.present?)
|
18
|
-
|
20
|
+
if behavior == :warn && Chewy.logger
|
21
|
+
gem_dir = File.realpath('../..', __dir__)
|
22
|
+
source = caller.grep_v(Regexp.new(gem_dir)).first
|
23
|
+
Chewy.logger.warn(
|
24
|
+
"Default type scope order, limit and offset are ignored and will be nullified (called from: #{source})"
|
25
|
+
)
|
26
|
+
elsif behavior == :raise
|
27
|
+
raise ImportScopeCleanupError, 'Default type scope order, limit and offset are ignored and will be nullified'
|
28
|
+
end
|
19
29
|
end
|
20
30
|
|
21
31
|
@default_scope = @default_scope.reorder(nil).limit(nil).offset(nil)
|
@@ -94,6 +104,11 @@ module Chewy
|
|
94
104
|
object_class.connection.execute(sql).map(&converter)
|
95
105
|
end
|
96
106
|
|
107
|
+
def raw(scope, converter)
|
108
|
+
sql = scope.to_sql
|
109
|
+
object_class.connection.execute(sql).map(&converter)
|
110
|
+
end
|
111
|
+
|
97
112
|
def relation_class
|
98
113
|
::ActiveRecord::Relation
|
99
114
|
end
|
@@ -192,16 +192,6 @@ module Chewy
|
|
192
192
|
end
|
193
193
|
end
|
194
194
|
|
195
|
-
def import_count(*args)
|
196
|
-
collection = if args.first.empty? && @target.respond_to?(import_all_method)
|
197
|
-
@target.send(import_all_method)
|
198
|
-
else
|
199
|
-
args.flatten(1).compact
|
200
|
-
end
|
201
|
-
|
202
|
-
collection.count
|
203
|
-
end
|
204
|
-
|
205
195
|
private
|
206
196
|
|
207
197
|
def import_objects(objects, options)
|
@@ -101,23 +101,13 @@ module Chewy
|
|
101
101
|
additional_scope = options[options[:_index].to_sym].try(:[], :scope) || options[:scope]
|
102
102
|
|
103
103
|
loaded_objects = load_scope_objects(scope, additional_scope)
|
104
|
-
|
105
|
-
object.public_send(primary_key).to_s
|
106
|
-
end
|
107
|
-
|
108
|
-
ids.map { |id| loaded_objects[id.to_s] }
|
109
|
-
end
|
104
|
+
loaded_objects = raw(loaded_objects, options[:raw_import]) if options[:raw_import]
|
110
105
|
|
111
|
-
|
112
|
-
|
113
|
-
default_scope
|
114
|
-
elsif args.first.is_a?(relation_class)
|
115
|
-
args.first
|
116
|
-
else
|
117
|
-
args.flatten.compact
|
106
|
+
indexed_objects = loaded_objects.index_by do |object|
|
107
|
+
object.public_send(primary_key).to_s
|
118
108
|
end
|
119
109
|
|
120
|
-
|
110
|
+
ids.map { |id| indexed_objects[id.to_s] }
|
121
111
|
end
|
122
112
|
|
123
113
|
private
|
data/lib/chewy/index/crutch.rb
CHANGED
@@ -12,13 +12,21 @@ module Chewy
|
|
12
12
|
def initialize(index, collection)
|
13
13
|
@index = index
|
14
14
|
@collection = collection
|
15
|
-
@
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
15
|
+
@crutches_instances = {}
|
16
|
+
end
|
17
|
+
|
18
|
+
def method_missing(name, *, **)
|
19
|
+
return self[name] if @index._crutches.key?(name)
|
20
|
+
|
21
|
+
super
|
22
|
+
end
|
23
|
+
|
24
|
+
def respond_to_missing?(name, include_private = false)
|
25
|
+
@index._crutches.key?(name) || super
|
26
|
+
end
|
27
|
+
|
28
|
+
def [](name)
|
29
|
+
@crutches_instances[name] ||= @index._crutches[:"#{name}"].call(@collection)
|
22
30
|
end
|
23
31
|
end
|
24
32
|
|
@@ -4,7 +4,7 @@ module Chewy
|
|
4
4
|
# This class purpose is to build ES client-acceptable bulk
|
5
5
|
# request body from the passed objects for index and deletion.
|
6
6
|
# It handles parent-child relationships as well by fetching
|
7
|
-
# existing documents from ES, taking their
|
7
|
+
# existing documents from ES and database, taking their join field values and
|
8
8
|
# using it in the bulk body.
|
9
9
|
# If fields are passed - it creates partial update entries except for
|
10
10
|
# the cases when the type has parent and parent_id has been changed.
|
@@ -24,9 +24,11 @@ module Chewy
|
|
24
24
|
# @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
|
25
25
|
# @return [Array<Hash>] bulk body
|
26
26
|
def bulk_body
|
27
|
+
populate_cache
|
28
|
+
|
27
29
|
@bulk_body ||= @to_index.flat_map(&method(:index_entry)).concat(
|
28
30
|
@delete.flat_map(&method(:delete_entry))
|
29
|
-
)
|
31
|
+
).uniq
|
30
32
|
end
|
31
33
|
|
32
34
|
# The only purpose of this method is to cache document ids for
|
@@ -39,64 +41,249 @@ module Chewy
|
|
39
41
|
|
40
42
|
private
|
41
43
|
|
42
|
-
def
|
43
|
-
@
|
44
|
-
end
|
45
|
-
|
46
|
-
def parents
|
47
|
-
return unless type_root.parent_id
|
48
|
-
|
49
|
-
@parents ||= begin
|
50
|
-
ids = @index.map do |object|
|
51
|
-
object.respond_to?(:id) ? object.id : object
|
52
|
-
end
|
53
|
-
ids.concat(@delete.map do |object|
|
54
|
-
object.respond_to?(:id) ? object.id : object
|
55
|
-
end)
|
56
|
-
@index.filter(ids: {values: ids}).order('_doc').pluck(:_id, :_parent).to_h
|
57
|
-
end
|
44
|
+
def crutches_for_index
|
45
|
+
@crutches_for_index ||= Chewy::Index::Crutch::Crutches.new @index, @to_index
|
58
46
|
end
|
59
47
|
|
60
48
|
def index_entry(object)
|
61
49
|
entry = {}
|
62
50
|
entry[:_id] = index_object_ids[object] if index_object_ids[object]
|
51
|
+
entry[:routing] = routing(object) if join_field?
|
63
52
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
if parent && entry[:parent].to_s != parent
|
70
|
-
entry[:data] = @index.compose(object, crutches)
|
71
|
-
[{delete: entry.except(:data).merge(parent: parent)}, {index: entry}]
|
53
|
+
parent = cache(entry[:_id])
|
54
|
+
data = data_for(object) if parent.present?
|
55
|
+
if parent.present? && parent_changed?(data, parent)
|
56
|
+
reindex_entries(object, data) + reindex_descendants(object)
|
72
57
|
elsif @fields.present?
|
73
58
|
return [] unless entry[:_id]
|
74
59
|
|
75
|
-
entry[:data] = {doc:
|
60
|
+
entry[:data] = {doc: data_for(object, fields: @fields)}
|
76
61
|
[{update: entry}]
|
77
62
|
else
|
78
|
-
entry[:data] =
|
63
|
+
entry[:data] = data || data_for(object)
|
79
64
|
[{index: entry}]
|
80
65
|
end
|
81
66
|
end
|
82
67
|
|
68
|
+
def reindex_entries(object, data, root: object)
|
69
|
+
entry = {}
|
70
|
+
entry[:_id] = index_object_ids[object] || entry_id(object)
|
71
|
+
entry[:data] = data
|
72
|
+
entry[:routing] = routing(root) || routing(object) if join_field?
|
73
|
+
delete = delete_single_entry(object, root: root).first
|
74
|
+
index = {index: entry}
|
75
|
+
[delete, index]
|
76
|
+
end
|
77
|
+
|
78
|
+
def reindex_descendants(root)
|
79
|
+
descendants = load_descendants(root)
|
80
|
+
crutches = Chewy::Index::Crutch::Crutches.new @index, [root, *descendants]
|
81
|
+
descendants.flat_map do |object|
|
82
|
+
reindex_entries(
|
83
|
+
object,
|
84
|
+
data_for(object, crutches: crutches),
|
85
|
+
root: root
|
86
|
+
)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
83
90
|
def delete_entry(object)
|
91
|
+
delete_single_entry(object)
|
92
|
+
end
|
93
|
+
|
94
|
+
def delete_single_entry(object, root: object)
|
84
95
|
entry = {}
|
85
96
|
entry[:_id] = entry_id(object)
|
86
97
|
entry[:_id] ||= object.as_json
|
87
98
|
|
88
99
|
return [] if entry[:_id].blank?
|
89
100
|
|
90
|
-
if
|
91
|
-
|
92
|
-
|
101
|
+
if join_field?
|
102
|
+
cached_parent = cache(entry[:_id])
|
103
|
+
entry_parent_id =
|
104
|
+
if cached_parent
|
105
|
+
cached_parent[:parent_id]
|
106
|
+
else
|
107
|
+
find_parent_id(object)
|
108
|
+
end
|
93
109
|
|
94
|
-
entry[:
|
110
|
+
entry[:routing] = existing_routing(root.try(:id)) || existing_routing(object.id)
|
111
|
+
entry[:parent] = entry_parent_id if entry_parent_id
|
95
112
|
end
|
96
113
|
|
97
114
|
[{delete: entry}]
|
98
115
|
end
|
99
116
|
|
117
|
+
def load_descendants(root)
|
118
|
+
root_type = join_field_type(root)
|
119
|
+
return [] unless root_type
|
120
|
+
|
121
|
+
descendant_ids = []
|
122
|
+
grouped_parents = {root_type => [root.id]}
|
123
|
+
# iteratively fetch all the descendants (with grouped_parents as a queue for next iteration)
|
124
|
+
until grouped_parents.empty?
|
125
|
+
children_data = grouped_parents.flat_map do |parent_type, parent_ids|
|
126
|
+
@index.query(
|
127
|
+
has_parent: {
|
128
|
+
parent_type: parent_type,
|
129
|
+
# ignore_unmapped to avoid error for the leaves of the tree
|
130
|
+
# (types without children)
|
131
|
+
ignore_unmapped: true,
|
132
|
+
query: {ids: {values: parent_ids}}
|
133
|
+
}
|
134
|
+
).pluck(:_id, join_field).map { |id, join| [join['name'], id] }
|
135
|
+
end
|
136
|
+
descendant_ids |= children_data.map(&:last)
|
137
|
+
|
138
|
+
grouped_parents = {}
|
139
|
+
children_data.each do |name, id|
|
140
|
+
next unless name
|
141
|
+
|
142
|
+
grouped_parents[name] ||= []
|
143
|
+
grouped_parents[name] << id
|
144
|
+
end
|
145
|
+
end
|
146
|
+
# query the primary database to load the descentants' records
|
147
|
+
@index.adapter.load(descendant_ids, _index: @index.base_name, raw_import: @index._default_import_options[:raw_import])
|
148
|
+
end
|
149
|
+
|
150
|
+
def populate_cache
|
151
|
+
@cache = load_cache
|
152
|
+
end
|
153
|
+
|
154
|
+
def cache(id)
|
155
|
+
@cache[id.to_s]
|
156
|
+
end
|
157
|
+
|
158
|
+
def load_cache
|
159
|
+
return {} unless join_field?
|
160
|
+
|
161
|
+
@index
|
162
|
+
.filter(ids: {values: ids_for_cache})
|
163
|
+
.order('_doc')
|
164
|
+
.pluck(:_id, :_routing, join_field)
|
165
|
+
.to_h do |id, routing, join|
|
166
|
+
[
|
167
|
+
id,
|
168
|
+
{routing: routing, parent_id: join['parent']}
|
169
|
+
]
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def existing_routing(id)
|
174
|
+
# All objects needed here should be cached in #load_cache,
|
175
|
+
# if not, we return nil. In some cases we don't have existing routing cached,
|
176
|
+
# e.g. for loaded descendants
|
177
|
+
return unless cache(id)
|
178
|
+
|
179
|
+
cache(id)[:routing]
|
180
|
+
end
|
181
|
+
|
182
|
+
# Two types of ids:
|
183
|
+
# * of parents of the objects to be indexed
|
184
|
+
# * of objects to be deleted
|
185
|
+
def ids_for_cache
|
186
|
+
ids = @to_index.flat_map do |object|
|
187
|
+
[find_parent_id(object), object.id] if object.respond_to?(:id)
|
188
|
+
end
|
189
|
+
ids.concat(@delete.map do |object|
|
190
|
+
object.id if object.respond_to?(:id)
|
191
|
+
end)
|
192
|
+
ids.uniq.compact
|
193
|
+
end
|
194
|
+
|
195
|
+
def routing(object)
|
196
|
+
# filter out non-model objects, early return on object==nil
|
197
|
+
return unless object.respond_to?(:id)
|
198
|
+
|
199
|
+
parent_id = find_parent_id(object)
|
200
|
+
if parent_id
|
201
|
+
routing(index_objects_by_id[parent_id.to_s]) || existing_routing(parent_id)
|
202
|
+
else
|
203
|
+
object.id.to_s
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def find_parent_id(object)
|
208
|
+
return unless object.respond_to?(:id)
|
209
|
+
|
210
|
+
join = data_for(object, fields: [join_field.to_sym])[join_field]
|
211
|
+
join['parent'] if join
|
212
|
+
end
|
213
|
+
|
214
|
+
def join_field
|
215
|
+
return @join_field if defined?(@join_field)
|
216
|
+
|
217
|
+
@join_field = find_join_field
|
218
|
+
end
|
219
|
+
|
220
|
+
def find_join_field
|
221
|
+
type_settings = @index.mappings_hash[:mappings]
|
222
|
+
return unless type_settings
|
223
|
+
|
224
|
+
properties = type_settings[:properties]
|
225
|
+
join_fields = properties.find { |_, options| options[:type] == :join }
|
226
|
+
return unless join_fields
|
227
|
+
|
228
|
+
join_fields.first.to_s
|
229
|
+
end
|
230
|
+
|
231
|
+
def join_field_type(object)
|
232
|
+
return unless join_field?
|
233
|
+
|
234
|
+
raw_object =
|
235
|
+
if @index._default_import_options[:raw_import]
|
236
|
+
@index._default_import_options[:raw_import].call(object.attributes)
|
237
|
+
else
|
238
|
+
object
|
239
|
+
end
|
240
|
+
|
241
|
+
join_field_value = data_for(
|
242
|
+
raw_object,
|
243
|
+
fields: [join_field.to_sym], # build only the field that is needed
|
244
|
+
crutches: Chewy::Index::Crutch::Crutches.new(@index, [raw_object])
|
245
|
+
)[join_field]
|
246
|
+
|
247
|
+
case join_field_value
|
248
|
+
when String
|
249
|
+
join_field_value
|
250
|
+
when Hash
|
251
|
+
join_field_value['name']
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
def join_field?
|
256
|
+
join_field && !join_field.empty?
|
257
|
+
end
|
258
|
+
|
259
|
+
def data_for(object, fields: [], crutches: crutches_for_index)
|
260
|
+
@index.compose(object, crutches, fields: fields)
|
261
|
+
end
|
262
|
+
|
263
|
+
def parent_changed?(data, old_parent)
|
264
|
+
return false unless old_parent
|
265
|
+
return false unless join_field?
|
266
|
+
return false unless @fields.include?(join_field.to_sym)
|
267
|
+
return false unless data.key?(join_field)
|
268
|
+
|
269
|
+
# The join field value can be a hash, e.g.:
|
270
|
+
# {"name": "child", "parent": "123"} for a child
|
271
|
+
# {"name": "parent"} for a parent
|
272
|
+
# but it can also be a string: (e.g. "parent") for a parent:
|
273
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/current/parent-join.html#parent-join
|
274
|
+
new_join_field_value = data[join_field]
|
275
|
+
if new_join_field_value.is_a? Hash
|
276
|
+
# If we have a hash in the join field,
|
277
|
+
# we're taking the `parent` field that holds the parent id.
|
278
|
+
new_parent_id = new_join_field_value['parent']
|
279
|
+
new_parent_id != old_parent[:parent_id]
|
280
|
+
else
|
281
|
+
# If there is a non-hash value (String or nil), it means that the join field is changed
|
282
|
+
# and the current object is no longer a child.
|
283
|
+
true
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
100
287
|
def entry_id(object)
|
101
288
|
if type_root.id
|
102
289
|
type_root.compose_id(object)
|
@@ -33,7 +33,7 @@ module Chewy
|
|
33
33
|
return [] if body.blank?
|
34
34
|
|
35
35
|
request_bodies(body).each_with_object([]) do |request_body, results|
|
36
|
-
response = @index.client.bulk
|
36
|
+
response = @index.client.bulk(**request_base.merge(body: request_body)) if request_body.present?
|
37
37
|
|
38
38
|
next unless response.try(:[], 'errors')
|
39
39
|
|
@@ -51,9 +51,9 @@ module Chewy
|
|
51
51
|
@parallel_options = @options.delete(:parallel)
|
52
52
|
if @parallel_options && !@parallel_options.is_a?(Hash)
|
53
53
|
@parallel_options = if @parallel_options.is_a?(Integer)
|
54
|
-
{
|
54
|
+
{in_processes: @parallel_options}
|
55
55
|
else
|
56
|
-
{
|
56
|
+
{}
|
57
57
|
end
|
58
58
|
end
|
59
59
|
@errors = []
|
@@ -64,7 +64,7 @@ module Chewy
|
|
64
64
|
# Creates the journal index and the corresponding index if necessary.
|
65
65
|
# @return [Object] whatever
|
66
66
|
def create_indexes!
|
67
|
-
Chewy::Stash::Journal.create if @options[:journal]
|
67
|
+
Chewy::Stash::Journal.create if @options[:journal] && !Chewy.configuration[:skip_journal_creation_on_import]
|
68
68
|
return if Chewy.configuration[:skip_index_creation_on_import]
|
69
69
|
|
70
70
|
@index.create!(**@bulk_options.slice(:suffix)) unless @index.exists?
|