chewy 0.10.1 → 7.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/CODEOWNERS +1 -0
- data/.github/ISSUE_TEMPLATE/bug_report.md +39 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
- data/.github/workflows/ruby.yml +74 -0
- data/.rubocop.yml +28 -23
- data/.rubocop_todo.yml +110 -22
- data/CHANGELOG.md +480 -298
- data/CODE_OF_CONDUCT.md +14 -0
- data/CONTRIBUTING.md +63 -0
- data/Gemfile +3 -5
- data/Guardfile +3 -1
- data/LICENSE.txt +1 -1
- data/README.md +571 -333
- data/chewy.gemspec +12 -15
- data/gemfiles/rails.5.2.activerecord.gemfile +11 -0
- data/gemfiles/rails.6.0.activerecord.gemfile +11 -0
- data/gemfiles/rails.6.1.activerecord.gemfile +13 -0
- data/gemfiles/rails.7.0.activerecord.gemfile +13 -0
- data/lib/chewy/config.rb +48 -77
- data/lib/chewy/errors.rb +4 -10
- data/lib/chewy/fields/base.rb +88 -16
- data/lib/chewy/fields/root.rb +15 -21
- data/lib/chewy/index/actions.rb +67 -38
- data/lib/chewy/{type → index}/adapter/active_record.rb +18 -4
- data/lib/chewy/{type → index}/adapter/base.rb +11 -12
- data/lib/chewy/{type → index}/adapter/object.rb +28 -32
- data/lib/chewy/{type → index}/adapter/orm.rb +26 -24
- data/lib/chewy/index/aliases.rb +14 -5
- data/lib/chewy/index/crutch.rb +40 -0
- data/lib/chewy/index/import/bulk_builder.rb +311 -0
- data/lib/chewy/{type → index}/import/bulk_request.rb +10 -9
- data/lib/chewy/{type → index}/import/journal_builder.rb +11 -12
- data/lib/chewy/{type → index}/import/routine.rb +19 -18
- data/lib/chewy/{type → index}/import.rb +82 -36
- data/lib/chewy/{type → index}/mapping.rb +63 -62
- data/lib/chewy/index/observe/active_record_methods.rb +87 -0
- data/lib/chewy/index/observe/callback.rb +34 -0
- data/lib/chewy/index/observe.rb +17 -0
- data/lib/chewy/index/settings.rb +2 -0
- data/lib/chewy/index/specification.rb +13 -10
- data/lib/chewy/{type → index}/syncer.rb +62 -63
- data/lib/chewy/{type → index}/witchcraft.rb +15 -9
- data/lib/chewy/{type → index}/wrapper.rb +16 -6
- data/lib/chewy/index.rb +68 -93
- data/lib/chewy/journal.rb +25 -14
- data/lib/chewy/minitest/helpers.rb +91 -18
- data/lib/chewy/minitest/search_index_receiver.rb +29 -33
- data/lib/chewy/multi_search.rb +62 -0
- data/lib/chewy/railtie.rb +8 -24
- data/lib/chewy/rake_helper.rb +141 -112
- data/lib/chewy/rspec/build_query.rb +12 -0
- data/lib/chewy/rspec/helpers.rb +55 -0
- data/lib/chewy/rspec/update_index.rb +58 -49
- data/lib/chewy/rspec.rb +2 -0
- data/lib/chewy/runtime.rb +1 -1
- data/lib/chewy/search/loader.rb +19 -41
- data/lib/chewy/search/parameters/allow_partial_search_results.rb +27 -0
- data/lib/chewy/search/parameters/collapse.rb +16 -0
- data/lib/chewy/search/parameters/concerns/query_storage.rb +6 -5
- data/lib/chewy/search/parameters/ignore_unavailable.rb +27 -0
- data/lib/chewy/search/parameters/indices.rb +78 -0
- data/lib/chewy/search/parameters/none.rb +1 -3
- data/lib/chewy/search/parameters/order.rb +6 -19
- data/lib/chewy/search/parameters/source.rb +5 -1
- data/lib/chewy/search/parameters/track_total_hits.rb +16 -0
- data/lib/chewy/search/parameters.rb +28 -8
- data/lib/chewy/search/query_proxy.rb +9 -2
- data/lib/chewy/search/request.rb +207 -157
- data/lib/chewy/search/response.rb +5 -5
- data/lib/chewy/search/scoping.rb +7 -8
- data/lib/chewy/search/scrolling.rb +14 -13
- data/lib/chewy/search.rb +7 -26
- data/lib/chewy/stash.rb +27 -29
- data/lib/chewy/strategy/active_job.rb +2 -2
- data/lib/chewy/strategy/atomic.rb +1 -1
- data/lib/chewy/strategy/atomic_no_refresh.rb +18 -0
- data/lib/chewy/strategy/base.rb +10 -0
- data/lib/chewy/strategy/delayed_sidekiq/scheduler.rb +148 -0
- data/lib/chewy/strategy/delayed_sidekiq/worker.rb +52 -0
- data/lib/chewy/strategy/delayed_sidekiq.rb +17 -0
- data/lib/chewy/strategy/lazy_sidekiq.rb +64 -0
- data/lib/chewy/strategy/sidekiq.rb +3 -2
- data/lib/chewy/strategy.rb +6 -19
- data/lib/chewy/version.rb +1 -1
- data/lib/chewy.rb +37 -80
- data/lib/generators/chewy/install_generator.rb +1 -1
- data/lib/tasks/chewy.rake +26 -32
- data/migration_guide.md +56 -0
- data/spec/chewy/config_spec.rb +27 -57
- data/spec/chewy/fields/base_spec.rb +457 -174
- data/spec/chewy/fields/root_spec.rb +24 -32
- data/spec/chewy/fields/time_fields_spec.rb +5 -5
- data/spec/chewy/index/actions_spec.rb +425 -60
- data/spec/chewy/{type → index}/adapter/active_record_spec.rb +110 -44
- data/spec/chewy/{type → index}/adapter/object_spec.rb +21 -6
- data/spec/chewy/index/aliases_spec.rb +3 -3
- data/spec/chewy/index/import/bulk_builder_spec.rb +494 -0
- data/spec/chewy/{type → index}/import/bulk_request_spec.rb +5 -12
- data/spec/chewy/{type → index}/import/journal_builder_spec.rb +22 -30
- data/spec/chewy/{type → index}/import/routine_spec.rb +19 -19
- data/spec/chewy/{type → index}/import_spec.rb +154 -95
- data/spec/chewy/index/mapping_spec.rb +135 -0
- data/spec/chewy/index/observe/active_record_methods_spec.rb +68 -0
- data/spec/chewy/index/observe/callback_spec.rb +139 -0
- data/spec/chewy/index/observe_spec.rb +143 -0
- data/spec/chewy/index/settings_spec.rb +3 -1
- data/spec/chewy/index/specification_spec.rb +32 -33
- data/spec/chewy/{type → index}/syncer_spec.rb +14 -19
- data/spec/chewy/{type → index}/witchcraft_spec.rb +34 -21
- data/spec/chewy/index/wrapper_spec.rb +100 -0
- data/spec/chewy/index_spec.rb +99 -114
- data/spec/chewy/journal_spec.rb +56 -101
- data/spec/chewy/minitest/helpers_spec.rb +122 -14
- data/spec/chewy/minitest/search_index_receiver_spec.rb +24 -26
- data/spec/chewy/multi_search_spec.rb +84 -0
- data/spec/chewy/rake_helper_spec.rb +325 -101
- data/spec/chewy/rspec/build_query_spec.rb +34 -0
- data/spec/chewy/rspec/helpers_spec.rb +61 -0
- data/spec/chewy/rspec/update_index_spec.rb +106 -102
- data/spec/chewy/runtime_spec.rb +2 -2
- data/spec/chewy/search/loader_spec.rb +19 -53
- data/spec/chewy/search/pagination/kaminari_examples.rb +3 -5
- data/spec/chewy/search/pagination/kaminari_spec.rb +1 -1
- data/spec/chewy/search/parameters/collapse_spec.rb +5 -0
- data/spec/chewy/search/parameters/ignore_unavailable_spec.rb +67 -0
- data/spec/chewy/search/parameters/indices_spec.rb +99 -0
- data/spec/chewy/search/parameters/none_spec.rb +1 -1
- data/spec/chewy/search/parameters/order_spec.rb +18 -11
- data/spec/chewy/search/parameters/query_storage_examples.rb +67 -21
- data/spec/chewy/search/parameters/search_after_spec.rb +4 -1
- data/spec/chewy/search/parameters/source_spec.rb +8 -2
- data/spec/chewy/search/parameters/track_total_hits_spec.rb +5 -0
- data/spec/chewy/search/parameters_spec.rb +39 -8
- data/spec/chewy/search/query_proxy_spec.rb +68 -17
- data/spec/chewy/search/request_spec.rb +360 -149
- data/spec/chewy/search/response_spec.rb +35 -25
- data/spec/chewy/search/scrolling_spec.rb +28 -26
- data/spec/chewy/search_spec.rb +73 -53
- data/spec/chewy/stash_spec.rb +16 -26
- data/spec/chewy/strategy/active_job_spec.rb +23 -10
- data/spec/chewy/strategy/atomic_no_refresh_spec.rb +60 -0
- data/spec/chewy/strategy/atomic_spec.rb +9 -10
- data/spec/chewy/strategy/delayed_sidekiq_spec.rb +190 -0
- data/spec/chewy/strategy/lazy_sidekiq_spec.rb +214 -0
- data/spec/chewy/strategy/sidekiq_spec.rb +14 -10
- data/spec/chewy/strategy_spec.rb +19 -15
- data/spec/chewy_spec.rb +17 -110
- data/spec/spec_helper.rb +7 -22
- data/spec/support/active_record.rb +43 -5
- metadata +123 -198
- data/.travis.yml +0 -53
- data/Appraisals +0 -79
- data/LEGACY_DSL.md +0 -497
- data/gemfiles/rails.4.0.activerecord.gemfile +0 -14
- data/gemfiles/rails.4.1.activerecord.gemfile +0 -14
- data/gemfiles/rails.4.2.activerecord.gemfile +0 -15
- data/gemfiles/rails.4.2.mongoid.5.1.gemfile +0 -15
- data/gemfiles/rails.5.0.activerecord.gemfile +0 -15
- data/gemfiles/rails.5.0.mongoid.6.0.gemfile +0 -15
- data/gemfiles/rails.5.1.activerecord.gemfile +0 -15
- data/gemfiles/rails.5.1.mongoid.6.1.gemfile +0 -15
- data/gemfiles/sequel.4.45.gemfile +0 -11
- data/lib/chewy/backports/deep_dup.rb +0 -46
- data/lib/chewy/backports/duplicable.rb +0 -91
- data/lib/chewy/query/compose.rb +0 -68
- data/lib/chewy/query/criteria.rb +0 -191
- data/lib/chewy/query/filters.rb +0 -227
- data/lib/chewy/query/loading.rb +0 -111
- data/lib/chewy/query/nodes/and.rb +0 -25
- data/lib/chewy/query/nodes/base.rb +0 -17
- data/lib/chewy/query/nodes/bool.rb +0 -34
- data/lib/chewy/query/nodes/equal.rb +0 -34
- data/lib/chewy/query/nodes/exists.rb +0 -20
- data/lib/chewy/query/nodes/expr.rb +0 -28
- data/lib/chewy/query/nodes/field.rb +0 -110
- data/lib/chewy/query/nodes/has_child.rb +0 -15
- data/lib/chewy/query/nodes/has_parent.rb +0 -15
- data/lib/chewy/query/nodes/has_relation.rb +0 -59
- data/lib/chewy/query/nodes/match_all.rb +0 -11
- data/lib/chewy/query/nodes/missing.rb +0 -20
- data/lib/chewy/query/nodes/not.rb +0 -25
- data/lib/chewy/query/nodes/or.rb +0 -25
- data/lib/chewy/query/nodes/prefix.rb +0 -19
- data/lib/chewy/query/nodes/query.rb +0 -20
- data/lib/chewy/query/nodes/range.rb +0 -63
- data/lib/chewy/query/nodes/raw.rb +0 -15
- data/lib/chewy/query/nodes/regexp.rb +0 -35
- data/lib/chewy/query/nodes/script.rb +0 -20
- data/lib/chewy/query/pagination.rb +0 -25
- data/lib/chewy/query.rb +0 -1098
- data/lib/chewy/search/pagination/will_paginate.rb +0 -43
- data/lib/chewy/search/parameters/types.rb +0 -20
- data/lib/chewy/strategy/resque.rb +0 -27
- data/lib/chewy/strategy/shoryuken.rb +0 -40
- data/lib/chewy/type/actions.rb +0 -43
- data/lib/chewy/type/adapter/mongoid.rb +0 -69
- data/lib/chewy/type/adapter/sequel.rb +0 -95
- data/lib/chewy/type/crutch.rb +0 -32
- data/lib/chewy/type/import/bulk_builder.rb +0 -122
- data/lib/chewy/type/observe.rb +0 -78
- data/lib/chewy/type.rb +0 -117
- data/lib/sequel/plugins/chewy_observe.rb +0 -78
- data/spec/chewy/query/criteria_spec.rb +0 -700
- data/spec/chewy/query/filters_spec.rb +0 -201
- data/spec/chewy/query/loading_spec.rb +0 -124
- data/spec/chewy/query/nodes/and_spec.rb +0 -12
- data/spec/chewy/query/nodes/bool_spec.rb +0 -14
- data/spec/chewy/query/nodes/equal_spec.rb +0 -32
- data/spec/chewy/query/nodes/exists_spec.rb +0 -18
- data/spec/chewy/query/nodes/has_child_spec.rb +0 -59
- data/spec/chewy/query/nodes/has_parent_spec.rb +0 -59
- data/spec/chewy/query/nodes/match_all_spec.rb +0 -11
- data/spec/chewy/query/nodes/missing_spec.rb +0 -16
- data/spec/chewy/query/nodes/not_spec.rb +0 -13
- data/spec/chewy/query/nodes/or_spec.rb +0 -12
- data/spec/chewy/query/nodes/prefix_spec.rb +0 -16
- data/spec/chewy/query/nodes/query_spec.rb +0 -12
- data/spec/chewy/query/nodes/range_spec.rb +0 -32
- data/spec/chewy/query/nodes/raw_spec.rb +0 -11
- data/spec/chewy/query/nodes/regexp_spec.rb +0 -43
- data/spec/chewy/query/nodes/script_spec.rb +0 -15
- data/spec/chewy/query/pagination/kaminari_spec.rb +0 -5
- data/spec/chewy/query/pagination/will_paginate_spec.rb +0 -5
- data/spec/chewy/query/pagination_spec.rb +0 -39
- data/spec/chewy/query_spec.rb +0 -636
- data/spec/chewy/search/pagination/will_paginate_examples.rb +0 -63
- data/spec/chewy/search/pagination/will_paginate_spec.rb +0 -23
- data/spec/chewy/search/parameters/indices_boost_spec.rb +0 -83
- data/spec/chewy/search/parameters/types_spec.rb +0 -5
- data/spec/chewy/strategy/resque_spec.rb +0 -46
- data/spec/chewy/strategy/shoryuken_spec.rb +0 -64
- data/spec/chewy/type/actions_spec.rb +0 -50
- data/spec/chewy/type/adapter/mongoid_spec.rb +0 -372
- data/spec/chewy/type/adapter/sequel_spec.rb +0 -472
- data/spec/chewy/type/import/bulk_builder_spec.rb +0 -279
- data/spec/chewy/type/mapping_spec.rb +0 -142
- data/spec/chewy/type/observe_spec.rb +0 -137
- data/spec/chewy/type/wrapper_spec.rb +0 -98
- data/spec/chewy/type_spec.rb +0 -55
- data/spec/support/mongoid.rb +0 -93
- data/spec/support/sequel.rb +0 -80
@@ -0,0 +1,311 @@
|
|
1
|
+
module Chewy
|
2
|
+
class Index
|
3
|
+
module Import
|
4
|
+
# This class purpose is to build ES client-acceptable bulk
|
5
|
+
# request body from the passed objects for index and deletion.
|
6
|
+
# It handles parent-child relationships as well by fetching
|
7
|
+
# existing documents from ES and database, taking their join field values and
|
8
|
+
# using it in the bulk body.
|
9
|
+
# If fields are passed - it creates partial update entries except for
|
10
|
+
# the cases when the type has parent and parent_id has been changed.
|
11
|
+
class BulkBuilder
|
12
|
+
# @param index [Chewy::Index] desired index
|
13
|
+
# @param to_index [Array<Object>] objects to index
|
14
|
+
# @param delete [Array<Object>] objects or ids to delete
|
15
|
+
# @param fields [Array<Symbol, String>] and array of fields for documents update
|
16
|
+
def initialize(index, to_index: [], delete: [], fields: [])
|
17
|
+
@index = index
|
18
|
+
@to_index = to_index
|
19
|
+
@delete = delete
|
20
|
+
@fields = fields.map!(&:to_sym)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns ES API-ready bulk requiest body.
|
24
|
+
# @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
|
25
|
+
# @return [Array<Hash>] bulk body
|
26
|
+
def bulk_body
|
27
|
+
populate_cache
|
28
|
+
|
29
|
+
@bulk_body ||= @to_index.flat_map(&method(:index_entry)).concat(
|
30
|
+
@delete.flat_map(&method(:delete_entry))
|
31
|
+
).uniq
|
32
|
+
end
|
33
|
+
|
34
|
+
# The only purpose of this method is to cache document ids for
|
35
|
+
# all the passed object for index to avoid ids recalculation.
|
36
|
+
#
|
37
|
+
# @return [Hash[String => Object]] an ids-objects index hash
|
38
|
+
def index_objects_by_id
|
39
|
+
@index_objects_by_id ||= index_object_ids.invert.stringify_keys!
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def crutches_for_index
|
45
|
+
@crutches_for_index ||= Chewy::Index::Crutch::Crutches.new @index, @to_index
|
46
|
+
end
|
47
|
+
|
48
|
+
def index_entry(object)
|
49
|
+
entry = {}
|
50
|
+
entry[:_id] = index_object_ids[object] if index_object_ids[object]
|
51
|
+
entry[:routing] = routing(object) if join_field?
|
52
|
+
|
53
|
+
parent = cache(entry[:_id])
|
54
|
+
data = data_for(object) if parent.present?
|
55
|
+
if parent.present? && parent_changed?(data, parent)
|
56
|
+
reindex_entries(object, data) + reindex_descendants(object)
|
57
|
+
elsif @fields.present?
|
58
|
+
return [] unless entry[:_id]
|
59
|
+
|
60
|
+
entry[:data] = {doc: data_for(object, fields: @fields)}
|
61
|
+
[{update: entry}]
|
62
|
+
else
|
63
|
+
entry[:data] = data || data_for(object)
|
64
|
+
[{index: entry}]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def reindex_entries(object, data, root: object)
|
69
|
+
entry = {}
|
70
|
+
entry[:_id] = index_object_ids[object] || entry_id(object)
|
71
|
+
entry[:data] = data
|
72
|
+
entry[:routing] = routing(root) || routing(object) if join_field?
|
73
|
+
delete = delete_single_entry(object, root: root).first
|
74
|
+
index = {index: entry}
|
75
|
+
[delete, index]
|
76
|
+
end
|
77
|
+
|
78
|
+
def reindex_descendants(root)
|
79
|
+
descendants = load_descendants(root)
|
80
|
+
crutches = Chewy::Index::Crutch::Crutches.new @index, [root, *descendants]
|
81
|
+
descendants.flat_map do |object|
|
82
|
+
reindex_entries(
|
83
|
+
object,
|
84
|
+
data_for(object, crutches: crutches),
|
85
|
+
root: root
|
86
|
+
)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def delete_entry(object)
|
91
|
+
delete_single_entry(object)
|
92
|
+
end
|
93
|
+
|
94
|
+
def delete_single_entry(object, root: object)
|
95
|
+
entry = {}
|
96
|
+
entry[:_id] = entry_id(object)
|
97
|
+
entry[:_id] ||= object.as_json
|
98
|
+
|
99
|
+
return [] if entry[:_id].blank?
|
100
|
+
|
101
|
+
if join_field?
|
102
|
+
cached_parent = cache(entry[:_id])
|
103
|
+
entry_parent_id =
|
104
|
+
if cached_parent
|
105
|
+
cached_parent[:parent_id]
|
106
|
+
else
|
107
|
+
find_parent_id(object)
|
108
|
+
end
|
109
|
+
|
110
|
+
entry[:routing] = existing_routing(root.try(:id)) || existing_routing(object.id)
|
111
|
+
entry[:parent] = entry_parent_id if entry_parent_id
|
112
|
+
end
|
113
|
+
|
114
|
+
[{delete: entry}]
|
115
|
+
end
|
116
|
+
|
117
|
+
def load_descendants(root)
|
118
|
+
root_type = join_field_type(root)
|
119
|
+
return [] unless root_type
|
120
|
+
|
121
|
+
descendant_ids = []
|
122
|
+
grouped_parents = {root_type => [root.id]}
|
123
|
+
# iteratively fetch all the descendants (with grouped_parents as a queue for next iteration)
|
124
|
+
until grouped_parents.empty?
|
125
|
+
children_data = grouped_parents.flat_map do |parent_type, parent_ids|
|
126
|
+
@index.query(
|
127
|
+
has_parent: {
|
128
|
+
parent_type: parent_type,
|
129
|
+
# ignore_unmapped to avoid error for the leaves of the tree
|
130
|
+
# (types without children)
|
131
|
+
ignore_unmapped: true,
|
132
|
+
query: {ids: {values: parent_ids}}
|
133
|
+
}
|
134
|
+
).pluck(:_id, join_field).map { |id, join| [join['name'], id] }
|
135
|
+
end
|
136
|
+
descendant_ids |= children_data.map(&:last)
|
137
|
+
|
138
|
+
grouped_parents = {}
|
139
|
+
children_data.each do |name, id|
|
140
|
+
next unless name
|
141
|
+
|
142
|
+
grouped_parents[name] ||= []
|
143
|
+
grouped_parents[name] << id
|
144
|
+
end
|
145
|
+
end
|
146
|
+
# query the primary database to load the descentants' records
|
147
|
+
@index.adapter.load(descendant_ids, _index: @index.base_name, raw_import: @index._default_import_options[:raw_import])
|
148
|
+
end
|
149
|
+
|
150
|
+
def populate_cache
|
151
|
+
@cache = load_cache
|
152
|
+
end
|
153
|
+
|
154
|
+
def cache(id)
|
155
|
+
@cache[id.to_s]
|
156
|
+
end
|
157
|
+
|
158
|
+
def load_cache
|
159
|
+
return {} unless join_field?
|
160
|
+
|
161
|
+
@index
|
162
|
+
.filter(ids: {values: ids_for_cache})
|
163
|
+
.order('_doc')
|
164
|
+
.pluck(:_id, :_routing, join_field)
|
165
|
+
.map do |id, routing, join|
|
166
|
+
[
|
167
|
+
id,
|
168
|
+
{routing: routing, parent_id: join['parent']}
|
169
|
+
]
|
170
|
+
end.to_h
|
171
|
+
end
|
172
|
+
|
173
|
+
def existing_routing(id)
|
174
|
+
# All objects needed here should be cached in #load_cache,
|
175
|
+
# if not, we return nil. In some cases we don't have existing routing cached,
|
176
|
+
# e.g. for loaded descendants
|
177
|
+
return unless cache(id)
|
178
|
+
|
179
|
+
cache(id)[:routing]
|
180
|
+
end
|
181
|
+
|
182
|
+
# Two types of ids:
|
183
|
+
# * of parents of the objects to be indexed
|
184
|
+
# * of objects to be deleted
|
185
|
+
def ids_for_cache
|
186
|
+
ids = @to_index.flat_map do |object|
|
187
|
+
[find_parent_id(object), object.id] if object.respond_to?(:id)
|
188
|
+
end
|
189
|
+
ids.concat(@delete.map do |object|
|
190
|
+
object.id if object.respond_to?(:id)
|
191
|
+
end)
|
192
|
+
ids.uniq.compact
|
193
|
+
end
|
194
|
+
|
195
|
+
def routing(object)
|
196
|
+
# filter out non-model objects, early return on object==nil
|
197
|
+
return unless object.respond_to?(:id)
|
198
|
+
|
199
|
+
parent_id = find_parent_id(object)
|
200
|
+
if parent_id
|
201
|
+
routing(index_objects_by_id[parent_id.to_s]) || existing_routing(parent_id)
|
202
|
+
else
|
203
|
+
object.id.to_s
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def find_parent_id(object)
|
208
|
+
return unless object.respond_to?(:id)
|
209
|
+
|
210
|
+
join = data_for(object, fields: [join_field.to_sym])[join_field]
|
211
|
+
join['parent'] if join
|
212
|
+
end
|
213
|
+
|
214
|
+
def join_field
|
215
|
+
return @join_field if defined?(@join_field)
|
216
|
+
|
217
|
+
@join_field = find_join_field
|
218
|
+
end
|
219
|
+
|
220
|
+
def find_join_field
|
221
|
+
type_settings = @index.mappings_hash[:mappings]
|
222
|
+
return unless type_settings
|
223
|
+
|
224
|
+
properties = type_settings[:properties]
|
225
|
+
join_fields = properties.find { |_, options| options[:type] == :join }
|
226
|
+
return unless join_fields
|
227
|
+
|
228
|
+
join_fields.first.to_s
|
229
|
+
end
|
230
|
+
|
231
|
+
def join_field_type(object)
|
232
|
+
return unless join_field?
|
233
|
+
|
234
|
+
raw_object =
|
235
|
+
if @index._default_import_options[:raw_import]
|
236
|
+
@index._default_import_options[:raw_import].call(object.attributes)
|
237
|
+
else
|
238
|
+
object
|
239
|
+
end
|
240
|
+
|
241
|
+
join_field_value = data_for(
|
242
|
+
raw_object,
|
243
|
+
fields: [join_field.to_sym], # build only the field that is needed
|
244
|
+
crutches: Chewy::Index::Crutch::Crutches.new(@index, [raw_object])
|
245
|
+
)[join_field]
|
246
|
+
|
247
|
+
case join_field_value
|
248
|
+
when String
|
249
|
+
join_field_value
|
250
|
+
when Hash
|
251
|
+
join_field_value['name']
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
def join_field?
|
256
|
+
join_field && !join_field.empty?
|
257
|
+
end
|
258
|
+
|
259
|
+
def data_for(object, fields: [], crutches: crutches_for_index)
|
260
|
+
@index.compose(object, crutches, fields: fields)
|
261
|
+
end
|
262
|
+
|
263
|
+
def parent_changed?(data, old_parent)
|
264
|
+
return false unless old_parent
|
265
|
+
return false unless join_field?
|
266
|
+
return false unless @fields.include?(join_field.to_sym)
|
267
|
+
return false unless data.key?(join_field)
|
268
|
+
|
269
|
+
# The join field value can be a hash, e.g.:
|
270
|
+
# {"name": "child", "parent": "123"} for a child
|
271
|
+
# {"name": "parent"} for a parent
|
272
|
+
# but it can also be a string: (e.g. "parent") for a parent:
|
273
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/current/parent-join.html#parent-join
|
274
|
+
new_join_field_value = data[join_field]
|
275
|
+
if new_join_field_value.is_a? Hash
|
276
|
+
# If we have a hash in the join field,
|
277
|
+
# we're taking the `parent` field that holds the parent id.
|
278
|
+
new_parent_id = new_join_field_value['parent']
|
279
|
+
new_parent_id != old_parent[:parent_id]
|
280
|
+
else
|
281
|
+
# If there is a non-hash value (String or nil), it means that the join field is changed
|
282
|
+
# and the current object is no longer a child.
|
283
|
+
true
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
def entry_id(object)
|
288
|
+
if type_root.id
|
289
|
+
type_root.compose_id(object)
|
290
|
+
else
|
291
|
+
id = object.id if object.respond_to?(:id)
|
292
|
+
id ||= object[:id] || object['id'] if object.is_a?(Hash)
|
293
|
+
id = id.to_s if defined?(BSON) && id.is_a?(BSON::ObjectId)
|
294
|
+
id
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
def index_object_ids
|
299
|
+
@index_object_ids ||= @to_index.each_with_object({}) do |object, result|
|
300
|
+
id = entry_id(object)
|
301
|
+
result[object] = id if id.present?
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
def type_root
|
306
|
+
@type_root ||= @index.root
|
307
|
+
end
|
308
|
+
end
|
309
|
+
end
|
310
|
+
end
|
311
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Chewy
|
2
|
-
class
|
2
|
+
class Index
|
3
3
|
module Import
|
4
4
|
# Adds additional features to elasticsearch-api bulk method:
|
5
5
|
# * supports Chewy index suffix if necessary;
|
@@ -10,12 +10,12 @@ module Chewy
|
|
10
10
|
#
|
11
11
|
# @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
|
12
12
|
class BulkRequest
|
13
|
-
# @param
|
13
|
+
# @param index [Chewy::Index] an index for the request
|
14
14
|
# @param suffix [String] an index name optional suffix
|
15
15
|
# @param bulk_size [Integer] bulk size in bytes
|
16
16
|
# @param bulk_options [Hash] options passed to the elasticsearch-api bulk method
|
17
|
-
def initialize(
|
18
|
-
@
|
17
|
+
def initialize(index, suffix: nil, bulk_size: nil, **bulk_options)
|
18
|
+
@index = index
|
19
19
|
@suffix = suffix
|
20
20
|
@bulk_size = bulk_size - 1.kilobyte if bulk_size # 1 kilobyte for request header and newlines
|
21
21
|
@bulk_options = bulk_options
|
@@ -33,7 +33,7 @@ module Chewy
|
|
33
33
|
return [] if body.blank?
|
34
34
|
|
35
35
|
request_bodies(body).each_with_object([]) do |request_body, results|
|
36
|
-
response = @
|
36
|
+
response = @index.client.bulk(**request_base.merge(body: request_body)) if request_body.present?
|
37
37
|
|
38
38
|
next unless response.try(:[], 'errors')
|
39
39
|
|
@@ -47,22 +47,23 @@ module Chewy
|
|
47
47
|
|
48
48
|
def request_base
|
49
49
|
@request_base ||= {
|
50
|
-
index: @
|
51
|
-
type: @type.type_name
|
50
|
+
index: @index.index_name(suffix: @suffix)
|
52
51
|
}.merge!(@bulk_options)
|
53
52
|
end
|
54
53
|
|
55
54
|
def request_bodies(body)
|
56
55
|
if @bulk_size
|
56
|
+
serializer = ::Elasticsearch::API.serializer
|
57
57
|
pieces = body.each_with_object(['']) do |piece, result|
|
58
58
|
operation, meta = piece.to_a.first
|
59
59
|
data = meta.delete(:data)
|
60
|
-
piece =
|
60
|
+
piece = serializer.dump(operation => meta)
|
61
|
+
piece << "\n" << serializer.dump(data) if data.present?
|
61
62
|
|
62
63
|
if result.last.bytesize + piece.bytesize > @bulk_size
|
63
64
|
result.push(piece)
|
64
65
|
else
|
65
|
-
result[-1]
|
66
|
+
result[-1].blank? ? (result[-1] = piece) : (result[-1] << "\n" << piece)
|
66
67
|
end
|
67
68
|
end
|
68
69
|
pieces.each { |piece| piece << "\n" }
|
@@ -1,24 +1,23 @@
|
|
1
1
|
module Chewy
|
2
|
-
class
|
2
|
+
class Index
|
3
3
|
module Import
|
4
4
|
class JournalBuilder
|
5
|
-
def initialize(
|
6
|
-
@type = type
|
5
|
+
def initialize(index, to_index: [], delete: [])
|
7
6
|
@index = index
|
7
|
+
@to_index = to_index
|
8
8
|
@delete = delete
|
9
9
|
end
|
10
10
|
|
11
11
|
def bulk_body
|
12
|
-
Chewy::
|
12
|
+
Chewy::Index::Import::BulkBuilder.new(
|
13
13
|
Chewy::Stash::Journal,
|
14
|
-
|
15
|
-
entries(:index, @
|
14
|
+
to_index: [
|
15
|
+
entries(:index, @to_index),
|
16
16
|
entries(:delete, @delete)
|
17
17
|
].compact
|
18
18
|
).bulk_body.each do |item|
|
19
19
|
item.values.first.merge!(
|
20
|
-
_index: Chewy::Stash::Journal.index_name
|
21
|
-
_type: Chewy::Stash::Journal.type_name
|
20
|
+
_index: Chewy::Stash::Journal.index_name
|
22
21
|
)
|
23
22
|
end
|
24
23
|
end
|
@@ -27,17 +26,17 @@ module Chewy
|
|
27
26
|
|
28
27
|
def entries(action, objects)
|
29
28
|
return unless objects.present?
|
29
|
+
|
30
30
|
{
|
31
|
-
index_name: @
|
32
|
-
type_name: @type.type_name,
|
31
|
+
index_name: @index.derivable_name,
|
33
32
|
action: action,
|
34
|
-
references: identify(objects).map(
|
33
|
+
references: identify(objects).map { |item| Base64.encode64(::Elasticsearch::API.serializer.dump(item)) },
|
35
34
|
created_at: Time.now.utc
|
36
35
|
}
|
37
36
|
end
|
38
37
|
|
39
38
|
def identify(objects)
|
40
|
-
@
|
39
|
+
@index.adapter.identify(objects)
|
41
40
|
end
|
42
41
|
end
|
43
42
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Chewy
|
2
|
-
class
|
2
|
+
class Index
|
3
3
|
module Import
|
4
4
|
# This class performs the import routine for the options and objects given.
|
5
5
|
#
|
@@ -13,14 +13,14 @@ module Chewy
|
|
13
13
|
# * performs the bulk request;
|
14
14
|
# * composes new leftovers bulk for the next iteration basing on the response errors if `update_failover` is true;
|
15
15
|
# * appends the rest of unfixable errors to the instance level errors array.
|
16
|
-
# 4. Perform the request for the last leftovers bulk if present using {#
|
16
|
+
# 4. Perform the request for the last leftovers bulk if present using {#extract_leftovers}.
|
17
17
|
# 3. Return the result errors array.
|
18
18
|
#
|
19
19
|
# At the moment, it tries to restore only from the partial document update errors in cases
|
20
20
|
# when the document doesn't exist only if `update_failover` option is true. In order to
|
21
21
|
# restore, it indexes such an objects completely on the next iteration.
|
22
22
|
#
|
23
|
-
# @see Chewy::
|
23
|
+
# @see Chewy::Index::Import::ClassMethods#import
|
24
24
|
class Routine
|
25
25
|
BULK_OPTIONS = %i[
|
26
26
|
suffix bulk_size
|
@@ -33,18 +33,18 @@ module Chewy
|
|
33
33
|
refresh: true,
|
34
34
|
update_fields: [],
|
35
35
|
update_failover: true,
|
36
|
-
batch_size: Chewy::
|
36
|
+
batch_size: Chewy::Index::Adapter::Base::BATCH_SIZE
|
37
37
|
}.freeze
|
38
38
|
|
39
39
|
attr_reader :options, :parallel_options, :errors, :stats, :leftovers
|
40
40
|
|
41
41
|
# Basically, processes passed options, extracting bulk request specific options.
|
42
|
-
# @param
|
43
|
-
# @param options [Hash] import options, see {Chewy::
|
44
|
-
def initialize(
|
45
|
-
@
|
42
|
+
# @param index [Chewy::Index] chewy index
|
43
|
+
# @param options [Hash] import options, see {Chewy::Index::Import::ClassMethods#import}
|
44
|
+
def initialize(index, **options)
|
45
|
+
@index = index
|
46
46
|
@options = options
|
47
|
-
@options.reverse_merge!(@
|
47
|
+
@options.reverse_merge!(@index._default_import_options)
|
48
48
|
@options.reverse_merge!(journal: Chewy.configuration[:journal])
|
49
49
|
@options.reverse_merge!(DEFAULT_OPTIONS)
|
50
50
|
@bulk_options = @options.slice(*BULK_OPTIONS)
|
@@ -61,27 +61,28 @@ module Chewy
|
|
61
61
|
@leftovers = []
|
62
62
|
end
|
63
63
|
|
64
|
-
# Creates the journal index and the
|
64
|
+
# Creates the journal index and the corresponding index if necessary.
|
65
65
|
# @return [Object] whatever
|
66
66
|
def create_indexes!
|
67
|
-
Chewy::Stash.create if @options[:journal]
|
67
|
+
Chewy::Stash::Journal.create if @options[:journal]
|
68
68
|
return if Chewy.configuration[:skip_index_creation_on_import]
|
69
|
-
|
69
|
+
|
70
|
+
@index.create!(**@bulk_options.slice(:suffix)) unless @index.exists?
|
70
71
|
end
|
71
72
|
|
72
|
-
# The main process method. Converts passed objects to
|
73
|
-
# appends journal
|
73
|
+
# The main process method. Converts passed objects to the bulk request body,
|
74
|
+
# appends journal entries, performs this request and handles errors performing
|
74
75
|
# failover procedures if applicable.
|
75
76
|
#
|
76
77
|
# @param index [Array<Object>] any acceptable objects for indexing
|
77
78
|
# @param delete [Array<Object>] any acceptable objects for deleting
|
78
79
|
# @return [true, false] the result of the request, true if no errors
|
79
80
|
def process(index: [], delete: [])
|
80
|
-
bulk_builder = BulkBuilder.new(@
|
81
|
+
bulk_builder = BulkBuilder.new(@index, to_index: index, delete: delete, fields: @options[:update_fields])
|
81
82
|
bulk_body = bulk_builder.bulk_body
|
82
83
|
|
83
84
|
if @options[:journal]
|
84
|
-
journal_builder = JournalBuilder.new(@
|
85
|
+
journal_builder = JournalBuilder.new(@index, to_index: index, delete: delete)
|
85
86
|
bulk_body.concat(journal_builder.bulk_body)
|
86
87
|
end
|
87
88
|
|
@@ -126,11 +127,11 @@ module Chewy
|
|
126
127
|
errors_to_cleanup.each { |error| errors.delete(error) }
|
127
128
|
|
128
129
|
failed_objects = index_objects_by_id.values_at(*failed_ids_for_reimport)
|
129
|
-
BulkBuilder.new(@
|
130
|
+
BulkBuilder.new(@index, to_index: failed_objects).bulk_body
|
130
131
|
end
|
131
132
|
|
132
133
|
def bulk
|
133
|
-
@bulk ||= BulkRequest.new(@
|
134
|
+
@bulk ||= BulkRequest.new(@index, **@bulk_options)
|
134
135
|
end
|
135
136
|
end
|
136
137
|
end
|