chewy 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +24 -2
- data/.rubocop_todo.yml +2 -2
- data/.travis.yml +38 -21
- data/.yardopts +5 -0
- data/Appraisals +55 -27
- data/CHANGELOG.md +57 -12
- data/Gemfile +14 -10
- data/LEGACY_DSL.md +497 -0
- data/README.md +249 -515
- data/chewy.gemspec +5 -4
- data/gemfiles/rails.4.0.activerecord.gemfile +14 -0
- data/gemfiles/rails.4.1.activerecord.gemfile +14 -0
- data/gemfiles/rails.4.2.activerecord.gemfile +8 -10
- data/gemfiles/rails.4.2.mongoid.5.1.gemfile +9 -10
- data/gemfiles/rails.5.0.activerecord.gemfile +8 -10
- data/gemfiles/rails.5.0.mongoid.6.0.gemfile +15 -0
- data/gemfiles/rails.5.1.activerecord.gemfile +15 -0
- data/gemfiles/rails.5.1.mongoid.6.1.gemfile +15 -0
- data/gemfiles/sequel.4.45.gemfile +11 -0
- data/lib/chewy.rb +77 -43
- data/lib/chewy/config.rb +44 -7
- data/lib/chewy/errors.rb +2 -2
- data/lib/chewy/fields/base.rb +39 -32
- data/lib/chewy/fields/root.rb +33 -7
- data/lib/chewy/index.rb +237 -149
- data/lib/chewy/index/actions.rb +85 -28
- data/lib/chewy/index/aliases.rb +2 -1
- data/lib/chewy/index/settings.rb +9 -5
- data/lib/chewy/index/specification.rb +58 -0
- data/lib/chewy/journal.rb +40 -92
- data/lib/chewy/query.rb +43 -27
- data/lib/chewy/query/compose.rb +13 -13
- data/lib/chewy/query/criteria.rb +13 -13
- data/lib/chewy/query/filters.rb +1 -1
- data/lib/chewy/query/loading.rb +1 -1
- data/lib/chewy/query/nodes/and.rb +2 -2
- data/lib/chewy/query/nodes/bool.rb +1 -1
- data/lib/chewy/query/nodes/equal.rb +2 -2
- data/lib/chewy/query/nodes/exists.rb +1 -1
- data/lib/chewy/query/nodes/has_relation.rb +2 -2
- data/lib/chewy/query/nodes/match_all.rb +1 -1
- data/lib/chewy/query/nodes/missing.rb +1 -1
- data/lib/chewy/query/nodes/not.rb +2 -2
- data/lib/chewy/query/nodes/or.rb +2 -2
- data/lib/chewy/query/nodes/prefix.rb +1 -1
- data/lib/chewy/query/nodes/query.rb +2 -2
- data/lib/chewy/query/nodes/range.rb +4 -4
- data/lib/chewy/query/nodes/regexp.rb +4 -4
- data/lib/chewy/query/nodes/script.rb +3 -3
- data/lib/chewy/query/pagination.rb +10 -1
- data/lib/chewy/railtie.rb +1 -0
- data/lib/chewy/rake_helper.rb +265 -48
- data/lib/chewy/rspec/update_index.rb +30 -22
- data/lib/chewy/search.rb +78 -21
- data/lib/chewy/search/loader.rb +83 -0
- data/lib/chewy/{query → search}/pagination/kaminari.rb +13 -5
- data/lib/chewy/search/pagination/will_paginate.rb +41 -0
- data/lib/chewy/search/parameters.rb +150 -0
- data/lib/chewy/search/parameters/aggs.rb +16 -0
- data/lib/chewy/search/parameters/concerns/bool_storage.rb +24 -0
- data/lib/chewy/search/parameters/concerns/hash_storage.rb +23 -0
- data/lib/chewy/search/parameters/concerns/integer_storage.rb +14 -0
- data/lib/chewy/search/parameters/concerns/query_storage.rb +237 -0
- data/lib/chewy/search/parameters/concerns/string_array_storage.rb +23 -0
- data/lib/chewy/search/parameters/concerns/string_storage.rb +14 -0
- data/lib/chewy/search/parameters/docvalue_fields.rb +12 -0
- data/lib/chewy/search/parameters/explain.rb +16 -0
- data/lib/chewy/search/parameters/filter.rb +47 -0
- data/lib/chewy/search/parameters/highlight.rb +16 -0
- data/lib/chewy/search/parameters/indices_boost.rb +52 -0
- data/lib/chewy/search/parameters/limit.rb +17 -0
- data/lib/chewy/search/parameters/load.rb +32 -0
- data/lib/chewy/search/parameters/min_score.rb +16 -0
- data/lib/chewy/search/parameters/none.rb +27 -0
- data/lib/chewy/search/parameters/offset.rb +17 -0
- data/lib/chewy/search/parameters/order.rb +64 -0
- data/lib/chewy/search/parameters/post_filter.rb +19 -0
- data/lib/chewy/search/parameters/preference.rb +16 -0
- data/lib/chewy/search/parameters/profile.rb +16 -0
- data/lib/chewy/search/parameters/query.rb +19 -0
- data/lib/chewy/search/parameters/request_cache.rb +27 -0
- data/lib/chewy/search/parameters/rescore.rb +29 -0
- data/lib/chewy/search/parameters/script_fields.rb +16 -0
- data/lib/chewy/search/parameters/search_after.rb +20 -0
- data/lib/chewy/search/parameters/search_type.rb +16 -0
- data/lib/chewy/search/parameters/source.rb +73 -0
- data/lib/chewy/search/parameters/storage.rb +95 -0
- data/lib/chewy/search/parameters/stored_fields.rb +63 -0
- data/lib/chewy/search/parameters/suggest.rb +16 -0
- data/lib/chewy/search/parameters/terminate_after.rb +16 -0
- data/lib/chewy/search/parameters/timeout.rb +16 -0
- data/lib/chewy/search/parameters/track_scores.rb +16 -0
- data/lib/chewy/search/parameters/types.rb +20 -0
- data/lib/chewy/search/parameters/version.rb +16 -0
- data/lib/chewy/search/query_proxy.rb +257 -0
- data/lib/chewy/search/request.rb +1021 -0
- data/lib/chewy/search/response.rb +119 -0
- data/lib/chewy/search/scoping.rb +50 -0
- data/lib/chewy/search/scrolling.rb +136 -0
- data/lib/chewy/stash.rb +70 -0
- data/lib/chewy/strategy.rb +10 -3
- data/lib/chewy/strategy/active_job.rb +1 -0
- data/lib/chewy/strategy/atomic.rb +1 -3
- data/lib/chewy/strategy/bypass.rb +1 -1
- data/lib/chewy/strategy/resque.rb +1 -0
- data/lib/chewy/strategy/shoryuken.rb +40 -0
- data/lib/chewy/strategy/sidekiq.rb +13 -3
- data/lib/chewy/type.rb +29 -7
- data/lib/chewy/type/actions.rb +26 -2
- data/lib/chewy/type/adapter/active_record.rb +44 -29
- data/lib/chewy/type/adapter/base.rb +27 -7
- data/lib/chewy/type/adapter/mongoid.rb +18 -7
- data/lib/chewy/type/adapter/object.rb +187 -26
- data/lib/chewy/type/adapter/orm.rb +59 -32
- data/lib/chewy/type/adapter/sequel.rb +32 -16
- data/lib/chewy/type/import.rb +145 -191
- data/lib/chewy/type/import/bulk_builder.rb +122 -0
- data/lib/chewy/type/import/bulk_request.rb +76 -0
- data/lib/chewy/type/import/journal_builder.rb +45 -0
- data/lib/chewy/type/import/routine.rb +138 -0
- data/lib/chewy/type/mapping.rb +11 -1
- data/lib/chewy/type/observe.rb +1 -1
- data/lib/chewy/type/syncer.rb +220 -0
- data/lib/chewy/type/witchcraft.rb +27 -13
- data/lib/chewy/type/wrapper.rb +28 -2
- data/lib/chewy/version.rb +1 -1
- data/lib/tasks/chewy.rake +84 -26
- data/spec/chewy/config_spec.rb +82 -1
- data/spec/chewy/fields/base_spec.rb +147 -112
- data/spec/chewy/fields/root_spec.rb +75 -18
- data/spec/chewy/fields/time_fields_spec.rb +2 -3
- data/spec/chewy/index/actions_spec.rb +180 -50
- data/spec/chewy/index/aliases_spec.rb +2 -2
- data/spec/chewy/index/settings_spec.rb +67 -38
- data/spec/chewy/index/specification_spec.rb +160 -0
- data/spec/chewy/index_spec.rb +57 -66
- data/spec/chewy/journal_spec.rb +149 -54
- data/spec/chewy/minitest/helpers_spec.rb +4 -4
- data/spec/chewy/minitest/search_index_receiver_spec.rb +1 -1
- data/spec/chewy/query/criteria_spec.rb +179 -179
- data/spec/chewy/query/filters_spec.rb +15 -15
- data/spec/chewy/query/loading_spec.rb +22 -20
- data/spec/chewy/query/nodes/and_spec.rb +2 -2
- data/spec/chewy/query/nodes/bool_spec.rb +4 -4
- data/spec/chewy/query/nodes/equal_spec.rb +19 -19
- data/spec/chewy/query/nodes/exists_spec.rb +6 -6
- data/spec/chewy/query/nodes/has_child_spec.rb +19 -19
- data/spec/chewy/query/nodes/has_parent_spec.rb +19 -19
- data/spec/chewy/query/nodes/missing_spec.rb +5 -5
- data/spec/chewy/query/nodes/not_spec.rb +3 -2
- data/spec/chewy/query/nodes/or_spec.rb +2 -2
- data/spec/chewy/query/nodes/prefix_spec.rb +5 -5
- data/spec/chewy/query/nodes/query_spec.rb +2 -2
- data/spec/chewy/query/nodes/range_spec.rb +18 -18
- data/spec/chewy/query/nodes/raw_spec.rb +1 -1
- data/spec/chewy/query/nodes/regexp_spec.rb +14 -14
- data/spec/chewy/query/nodes/script_spec.rb +4 -4
- data/spec/chewy/query/pagination/kaminari_spec.rb +3 -55
- data/spec/chewy/query/pagination/will_paginate_spec.rb +5 -0
- data/spec/chewy/query/pagination_spec.rb +25 -21
- data/spec/chewy/query_spec.rb +501 -560
- data/spec/chewy/rake_helper_spec.rb +368 -0
- data/spec/chewy/repository_spec.rb +4 -4
- data/spec/chewy/rspec/update_index_spec.rb +89 -56
- data/spec/chewy/runtime_spec.rb +2 -2
- data/spec/chewy/search/loader_spec.rb +117 -0
- data/spec/chewy/search/pagination/kaminari_examples.rb +71 -0
- data/spec/chewy/search/pagination/kaminari_spec.rb +17 -0
- data/spec/chewy/search/pagination/will_paginate_examples.rb +63 -0
- data/spec/chewy/search/pagination/will_paginate_spec.rb +17 -0
- data/spec/chewy/search/parameters/aggs_spec.rb +5 -0
- data/spec/chewy/search/parameters/bool_storage_examples.rb +53 -0
- data/spec/chewy/search/parameters/docvalue_fields_spec.rb +5 -0
- data/spec/chewy/search/parameters/explain_spec.rb +5 -0
- data/spec/chewy/search/parameters/filter_spec.rb +5 -0
- data/spec/chewy/search/parameters/hash_storage_examples.rb +59 -0
- data/spec/chewy/search/parameters/highlight_spec.rb +5 -0
- data/spec/chewy/search/parameters/indices_boost_spec.rb +83 -0
- data/spec/chewy/search/parameters/integer_storage_examples.rb +32 -0
- data/spec/chewy/search/parameters/limit_spec.rb +5 -0
- data/spec/chewy/search/parameters/load_spec.rb +60 -0
- data/spec/chewy/search/parameters/min_score_spec.rb +32 -0
- data/spec/chewy/search/parameters/none_spec.rb +5 -0
- data/spec/chewy/search/parameters/offset_spec.rb +5 -0
- data/spec/chewy/search/parameters/order_spec.rb +65 -0
- data/spec/chewy/search/parameters/post_filter_spec.rb +5 -0
- data/spec/chewy/search/parameters/preference_spec.rb +5 -0
- data/spec/chewy/search/parameters/profile_spec.rb +5 -0
- data/spec/chewy/search/parameters/query_spec.rb +5 -0
- data/spec/chewy/search/parameters/query_storage_examples.rb +388 -0
- data/spec/chewy/search/parameters/request_cache_spec.rb +67 -0
- data/spec/chewy/search/parameters/rescore_spec.rb +62 -0
- data/spec/chewy/search/parameters/script_fields_spec.rb +5 -0
- data/spec/chewy/search/parameters/search_after_spec.rb +32 -0
- data/spec/chewy/search/parameters/search_type_spec.rb +5 -0
- data/spec/chewy/search/parameters/source_spec.rb +156 -0
- data/spec/chewy/search/parameters/storage_spec.rb +60 -0
- data/spec/chewy/search/parameters/stored_fields_spec.rb +126 -0
- data/spec/chewy/search/parameters/string_array_storage_examples.rb +63 -0
- data/spec/chewy/search/parameters/string_storage_examples.rb +32 -0
- data/spec/chewy/search/parameters/suggest_spec.rb +5 -0
- data/spec/chewy/search/parameters/terminate_after_spec.rb +5 -0
- data/spec/chewy/search/parameters/timeout_spec.rb +5 -0
- data/spec/chewy/search/parameters/track_scores_spec.rb +5 -0
- data/spec/chewy/search/parameters/types_spec.rb +5 -0
- data/spec/chewy/search/parameters/version_spec.rb +5 -0
- data/spec/chewy/search/parameters_spec.rb +130 -0
- data/spec/chewy/search/query_proxy_spec.rb +68 -0
- data/spec/chewy/search/request_spec.rb +669 -0
- data/spec/chewy/search/response_spec.rb +192 -0
- data/spec/chewy/search/scrolling_spec.rb +169 -0
- data/spec/chewy/search_spec.rb +13 -6
- data/spec/chewy/stash_spec.rb +95 -0
- data/spec/chewy/strategy/active_job_spec.rb +6 -0
- data/spec/chewy/strategy/resque_spec.rb +6 -0
- data/spec/chewy/strategy/shoryuken_spec.rb +64 -0
- data/spec/chewy/strategy/sidekiq_spec.rb +8 -0
- data/spec/chewy/strategy_spec.rb +6 -6
- data/spec/chewy/type/actions_spec.rb +29 -10
- data/spec/chewy/type/adapter/active_record_spec.rb +203 -91
- data/spec/chewy/type/adapter/mongoid_spec.rb +112 -54
- data/spec/chewy/type/adapter/object_spec.rb +101 -28
- data/spec/chewy/type/adapter/sequel_spec.rb +149 -82
- data/spec/chewy/type/import/bulk_builder_spec.rb +279 -0
- data/spec/chewy/type/import/bulk_request_spec.rb +102 -0
- data/spec/chewy/type/import/journal_builder_spec.rb +95 -0
- data/spec/chewy/type/import/routine_spec.rb +110 -0
- data/spec/chewy/type/import_spec.rb +350 -271
- data/spec/chewy/type/mapping_spec.rb +54 -18
- data/spec/chewy/type/observe_spec.rb +5 -1
- data/spec/chewy/type/syncer_spec.rb +123 -0
- data/spec/chewy/type/witchcraft_spec.rb +45 -29
- data/spec/chewy/type/wrapper_spec.rb +63 -23
- data/spec/chewy/type_spec.rb +28 -7
- data/spec/chewy_spec.rb +75 -7
- data/spec/spec_helper.rb +5 -2
- data/spec/support/active_record.rb +5 -1
- data/spec/support/class_helpers.rb +0 -14
- data/spec/support/mongoid.rb +15 -3
- data/spec/support/sequel.rb +6 -1
- metadata +198 -37
- data/gemfiles/rails.3.2.activerecord.gemfile +0 -16
- data/gemfiles/rails.3.2.activerecord.kaminari.gemfile +0 -15
- data/gemfiles/rails.3.2.activerecord.will_paginate.gemfile +0 -15
- data/gemfiles/rails.4.2.activerecord.kaminari.gemfile +0 -16
- data/gemfiles/rails.4.2.activerecord.will_paginate.gemfile +0 -16
- data/gemfiles/rails.4.2.mongoid.4.0.gemfile +0 -16
- data/gemfiles/rails.4.2.mongoid.4.0.kaminari.gemfile +0 -15
- data/gemfiles/rails.4.2.mongoid.4.0.will_paginate.gemfile +0 -15
- data/gemfiles/rails.4.2.mongoid.5.1.kaminari.gemfile +0 -15
- data/gemfiles/rails.4.2.mongoid.5.1.will_paginate.gemfile +0 -15
- data/gemfiles/rails.5.0.activerecord.kaminari.gemfile +0 -16
- data/gemfiles/rails.5.0.activerecord.will_paginate.gemfile +0 -16
- data/gemfiles/sequel.4.38.gemfile +0 -14
- data/lib/chewy/journal/apply.rb +0 -31
- data/lib/chewy/journal/clean.rb +0 -24
- data/lib/chewy/journal/entry.rb +0 -83
- data/lib/chewy/journal/query.rb +0 -87
- data/lib/chewy/query/pagination/will_paginate.rb +0 -27
- data/lib/chewy/query/scoping.rb +0 -20
- data/spec/chewy/journal/apply_spec.rb +0 -120
- data/spec/chewy/journal/entry_spec.rb +0 -237
- data/spec/chewy/query/pagination/will_paginage_spec.rb +0 -59
@@ -6,16 +6,15 @@ module Chewy
|
|
6
6
|
class Orm < Base
|
7
7
|
attr_reader :default_scope
|
8
8
|
|
9
|
-
def initialize(
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
@target = model_of_relation(class_or_relation)
|
14
|
-
@default_scope = class_or_relation
|
9
|
+
def initialize(target, **options)
|
10
|
+
if target.is_a?(relation_class)
|
11
|
+
@target = model_of_relation(target)
|
12
|
+
@default_scope = target
|
15
13
|
else
|
16
|
-
@target =
|
14
|
+
@target = target
|
17
15
|
@default_scope = all_scope
|
18
16
|
end
|
17
|
+
@options = options
|
19
18
|
cleanup_default_scope!
|
20
19
|
end
|
21
20
|
|
@@ -25,10 +24,10 @@ module Chewy
|
|
25
24
|
|
26
25
|
def identify(collection)
|
27
26
|
if collection.is_a?(relation_class)
|
28
|
-
|
27
|
+
pluck(collection)
|
29
28
|
else
|
30
29
|
Array.wrap(collection).map do |entity|
|
31
|
-
entity.
|
30
|
+
entity.respond_to?(primary_key) ? entity.public_send(primary_key) : entity
|
32
31
|
end
|
33
32
|
end
|
34
33
|
end
|
@@ -49,7 +48,7 @@ module Chewy
|
|
49
48
|
#
|
50
49
|
# Method handles destroyed objects as well. In case of objects ORM scope
|
51
50
|
# or array passed, objects, responding with true to `destroyed?` method will be deleted
|
52
|
-
# from index. In case of ids array passed - documents with missing
|
51
|
+
# from index. In case of ids array passed - documents with missing source object ids will be
|
53
52
|
# deleted from index:
|
54
53
|
#
|
55
54
|
# users = User.all
|
@@ -74,16 +73,7 @@ module Chewy
|
|
74
73
|
# UsersIndex::User.import users.map(&:id) # user ids will be deleted from index
|
75
74
|
#
|
76
75
|
def import(*args, &block)
|
77
|
-
options = args
|
78
|
-
options[:batch_size] ||= BATCH_SIZE
|
79
|
-
|
80
|
-
collection = if args.empty?
|
81
|
-
default_scope
|
82
|
-
elsif args.one? && args.first.is_a?(relation_class)
|
83
|
-
args.first
|
84
|
-
else
|
85
|
-
args.flatten.compact
|
86
|
-
end
|
76
|
+
collection, options = import_args(*args)
|
87
77
|
|
88
78
|
if collection.is_a?(relation_class)
|
89
79
|
import_scope(collection, options, &block)
|
@@ -92,29 +82,51 @@ module Chewy
|
|
92
82
|
end
|
93
83
|
end
|
94
84
|
|
95
|
-
def
|
96
|
-
|
97
|
-
|
85
|
+
def import_fields(*args, &block)
|
86
|
+
return enum_for(:import_fields, *args) unless block_given?
|
87
|
+
|
88
|
+
collection, options = import_args(*args)
|
89
|
+
|
90
|
+
if options[:fields].present? || collection.is_a?(relation_class)
|
91
|
+
collection = all_scope_where_ids_in(identify(collection)) unless collection.is_a?(relation_class)
|
92
|
+
pluck_in_batches(collection, options.slice(:fields, :batch_size, :typecast), &block)
|
93
|
+
else
|
94
|
+
identify(collection).each_slice(options[:batch_size]) do |batch|
|
95
|
+
yield batch
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
alias_method :import_references, :import_fields
|
98
100
|
|
99
|
-
|
101
|
+
def load(ids, **options)
|
102
|
+
scope = all_scope_where_ids_in(ids)
|
103
|
+
additional_scope = options[options[:_type].type_name.to_sym].try(:[], :scope) || options[:scope]
|
100
104
|
|
101
|
-
|
102
|
-
|
105
|
+
loaded_objects = load_scope_objects(scope, additional_scope)
|
106
|
+
.index_by do |object|
|
107
|
+
object.public_send(primary_key).to_s
|
108
|
+
end
|
103
109
|
|
104
|
-
|
110
|
+
ids.map { |id| loaded_objects[id.to_s] }
|
105
111
|
end
|
106
112
|
|
107
113
|
private
|
108
114
|
|
109
115
|
def import_objects(collection, options)
|
110
|
-
|
116
|
+
collection_ids = identify(collection)
|
117
|
+
hash = Hash[collection_ids.map(&:to_s).zip(collection)]
|
118
|
+
|
119
|
+
indexed = collection_ids.each_slice(options[:batch_size]).map do |ids|
|
120
|
+
batch = if options[:raw_import]
|
121
|
+
raw_default_scope_where_ids_in(ids, options[:raw_import])
|
122
|
+
else
|
123
|
+
default_scope_where_ids_in(ids)
|
124
|
+
end
|
111
125
|
|
112
|
-
indexed = hash.keys.each_slice(options[:batch_size]).map do |ids|
|
113
|
-
batch = default_scope_where_ids_in(ids)
|
114
126
|
if batch.empty?
|
115
127
|
true
|
116
128
|
else
|
117
|
-
|
129
|
+
batch.each { |object| hash.delete(object.send(primary_key).to_s) }
|
118
130
|
yield grouped_objects(batch)
|
119
131
|
end
|
120
132
|
end.all?
|
@@ -157,7 +169,22 @@ module Chewy
|
|
157
169
|
end
|
158
170
|
|
159
171
|
def grouped_objects(objects)
|
160
|
-
options[:delete_if] ? super : {
|
172
|
+
options[:delete_if] ? super : {index: objects.to_a}
|
173
|
+
end
|
174
|
+
|
175
|
+
def import_args(*args)
|
176
|
+
options = args.extract_options!
|
177
|
+
options[:batch_size] ||= BATCH_SIZE
|
178
|
+
|
179
|
+
collection = if args.empty?
|
180
|
+
default_scope
|
181
|
+
elsif args.one? && args.first.is_a?(relation_class)
|
182
|
+
args.first
|
183
|
+
else
|
184
|
+
args.flatten.compact
|
185
|
+
end
|
186
|
+
|
187
|
+
[collection, options]
|
161
188
|
end
|
162
189
|
end
|
163
190
|
end
|
@@ -24,38 +24,54 @@ module Chewy
|
|
24
24
|
end
|
25
25
|
|
26
26
|
def import_scope(scope, options)
|
27
|
-
scope
|
28
|
-
|
29
|
-
ids = pluck_ids(scope)
|
30
|
-
result = true
|
31
|
-
|
32
|
-
while ids.present?
|
33
|
-
result &= yield grouped_objects(default_scope_where_ids_in(ids).all)
|
34
|
-
break if ids.size < options[:batch_size]
|
35
|
-
ids = pluck_ids(scope.where { |o| o.__send__(primary_key_with_table_name) > ids.last })
|
27
|
+
pluck_in_batches(scope, options.slice(:batch_size)).inject(true) do |result, ids|
|
28
|
+
result & yield(grouped_objects(default_scope_where_ids_in(ids).all))
|
36
29
|
end
|
37
|
-
|
38
|
-
result
|
39
30
|
end
|
40
31
|
|
41
32
|
def primary_key
|
42
33
|
target.primary_key
|
43
34
|
end
|
44
35
|
|
45
|
-
def
|
46
|
-
"#{target.table_name}__#{
|
36
|
+
def full_column_name(column)
|
37
|
+
"#{target.table_name}__#{column}".to_sym
|
47
38
|
end
|
48
39
|
|
49
40
|
def all_scope
|
50
41
|
target.dataset
|
51
42
|
end
|
52
43
|
|
53
|
-
def
|
54
|
-
|
44
|
+
def target_columns
|
45
|
+
@target_columns ||= target.columns.to_set
|
46
|
+
end
|
47
|
+
|
48
|
+
def pluck(scope, fields: [])
|
49
|
+
fields = fields.map(&:to_sym).unshift(primary_key).map do |column|
|
50
|
+
target_columns.include?(column) ? full_column_name(column) : column
|
51
|
+
end
|
52
|
+
scope.distinct.select_map(fields.one? ? fields.first : fields)
|
53
|
+
end
|
54
|
+
|
55
|
+
def pluck_in_batches(scope, fields: [], batch_size: nil, **options)
|
56
|
+
return enum_for(:pluck_in_batches, scope, fields: fields, batch_size: batch_size, **options) unless block_given?
|
57
|
+
|
58
|
+
scope = scope.unordered.order(::Sequel.asc(full_column_name(primary_key))).limit(batch_size)
|
59
|
+
|
60
|
+
ids = pluck(scope, fields: fields)
|
61
|
+
count = 0
|
62
|
+
|
63
|
+
while ids.present?
|
64
|
+
yield ids
|
65
|
+
break if ids.size < batch_size
|
66
|
+
last_id = ids.last.is_a?(Array) ? ids.last.first : ids.last
|
67
|
+
ids = pluck(scope.where { |o| o.__send__(full_column_name(primary_key)) > last_id }, fields: fields)
|
68
|
+
end
|
69
|
+
|
70
|
+
count
|
55
71
|
end
|
56
72
|
|
57
73
|
def scope_where_ids_in(scope, ids)
|
58
|
-
scope.where(
|
74
|
+
scope.where(full_column_name(primary_key) => Array.wrap(ids))
|
59
75
|
end
|
60
76
|
|
61
77
|
def model_of_relation(relation)
|
data/lib/chewy/type/import.rb
CHANGED
@@ -1,241 +1,195 @@
|
|
1
|
+
require 'chewy/type/import/journal_builder'
|
2
|
+
require 'chewy/type/import/bulk_builder'
|
3
|
+
require 'chewy/type/import/bulk_request'
|
4
|
+
require 'chewy/type/import/routine'
|
5
|
+
|
1
6
|
module Chewy
|
2
7
|
class Type
|
3
8
|
module Import
|
4
9
|
extend ActiveSupport::Concern
|
5
10
|
|
6
|
-
|
11
|
+
IMPORT_WORKER = lambda do |type, options, ids|
|
12
|
+
::Process.setproctitle("chewy import #{type}[#{::Parallel.worker_number}]")
|
13
|
+
routine = Routine.new(type, options)
|
14
|
+
type.adapter.import(*ids, routine.options) do |action_objects|
|
15
|
+
routine.process(**action_objects)
|
16
|
+
end
|
17
|
+
{errors: routine.errors, import: routine.stats, leftovers: routine.leftovers}
|
18
|
+
end
|
19
|
+
|
20
|
+
LEFTOVERS_WORKER = lambda do |type, options, body|
|
21
|
+
::Process.setproctitle("chewy import #{type}[#{::Parallel.worker_number}]")
|
22
|
+
routine = Routine.new(type, options)
|
23
|
+
routine.perform_bulk(body)
|
24
|
+
routine.errors
|
25
|
+
end
|
7
26
|
|
8
27
|
module ClassMethods
|
9
|
-
#
|
10
|
-
#
|
28
|
+
# @!method import(*collection, **options)
|
29
|
+
# Basically, one of the main methods for type. Performs any objects import
|
30
|
+
# to the index for a specified type. Does all the objects handling routines.
|
31
|
+
# Performs document import by utilizing bulk API. Bulk size and objects batch
|
32
|
+
# size are controlled by the corresponding options.
|
33
|
+
#
|
34
|
+
# It accepts ORM/ODM objects, PORO, hashes, ids which are used by adapter to
|
35
|
+
# fetch objects from the source depenting on the used adapter. It destroys
|
36
|
+
# passed objects from the index if they are not in the default type scope
|
37
|
+
# or marked for destruction.
|
38
|
+
#
|
39
|
+
# It handles parent-child relationships: if the object parent_id has been
|
40
|
+
# changed it destroys the object and recreates it from scratch.
|
41
|
+
#
|
42
|
+
# Performs journaling if enabled: it stores all the ids of the imported
|
43
|
+
# objects to a specialized index. It is possible to replay particular import
|
44
|
+
# later to restore the data consistency.
|
11
45
|
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
# UsersIndex::User.import refresh: false # to disable index refreshing after import
|
18
|
-
# UsersIndex::User.import journal: true # import will record all the actions into special journal index
|
19
|
-
# UsersIndex::User.import batch_size: 300 # import batch size
|
20
|
-
# UsersIndex::User.import bulk_size: 10.megabytes # import ElasticSearch bulk size in bytes
|
21
|
-
# UsersIndex::User.import consistency: :quorum # explicit write consistency setting for the operation (one, quorum, all)
|
22
|
-
# UsersIndex::User.import replication: :async # explicitly set the replication type (sync, async)
|
46
|
+
# Performs partial index update using `update` bulk action if any `fields` are
|
47
|
+
# specified. Note that if document doesn't exist yet, an error will be raised
|
48
|
+
# by ES, but import catches this an errors and performs full indexing
|
49
|
+
# for the corresponding documents. This feature can be disabled by setting
|
50
|
+
# `update_failover` to `false`.
|
23
51
|
#
|
24
|
-
#
|
52
|
+
# Utilizes `ActiveSupport::Notifications`, so it is possible to get imported
|
53
|
+
# objects later by listening to the `import_objects.chewy` queue. It is also
|
54
|
+
# possible to get the list of occured errors from the payload if something
|
55
|
+
# went wrong.
|
25
56
|
#
|
57
|
+
# Import can also be run in parallel using the Parallel gem functionality.
|
58
|
+
#
|
59
|
+
# @example
|
60
|
+
# UsersIndex::User.import(parallel: true) # imports everything in parallel with automatic workers number
|
61
|
+
# UsersIndex::User.import(parallel: 3) # using 3 workers
|
62
|
+
# UsersIndex::User.import(parallel: {in_threads: 10}) # in 10 threads
|
63
|
+
#
|
64
|
+
# @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
|
65
|
+
# @param collection [Array<Object>] and array or anything to import
|
66
|
+
# @param options [Hash{Symbol => Object}] besides specific import options, it accepts all the options suitable for the bulk API call like `refresh` or `timeout`
|
67
|
+
# @option options [String] suffix an index name suffix, used for zero-downtime reset mostly, no suffix by default
|
68
|
+
# @option options [Integer] bulk_size bulk API chunk size in bytes; if passed, the request is performed several times for each chunk, empty by default
|
69
|
+
# @option options [Integer] batch_size passed to the adapter import method, used to split imported objects in chunks, 1000 by default
|
70
|
+
# @option options [true, false] journal enables imported objects journaling, false by default
|
71
|
+
# @option options [Array<Symbol, String>] update_fields list of fields for the partial import, empty by default
|
72
|
+
# @option options [true, false] update_failover enables full objects reimport in cases of partial update errors, `true` by default
|
73
|
+
# @option options [true, Integer, Hash] parallel enables parallel import processing with the Parallel gem, accepts the number of workers or any Parallel gem acceptable options
|
74
|
+
# @return [true, false] false in case of errors
|
26
75
|
def import(*args)
|
27
|
-
|
28
|
-
import_options.reverse_merge! _default_import_options
|
29
|
-
bulk_options = import_options.reject { |k, _| !BULK_OPTIONS.include?(k) }.reverse_merge!(refresh: true)
|
30
|
-
|
31
|
-
index.create!(bulk_options.slice(:suffix)) unless index.exists?
|
32
|
-
|
33
|
-
ActiveSupport::Notifications.instrument 'import_objects.chewy', type: self do |payload|
|
34
|
-
adapter.import(*args, import_options) do |action_objects|
|
35
|
-
journal = Chewy::Journal.new(self)
|
36
|
-
journal.add(action_objects) if import_options.fetch(:journal) { journal? }
|
37
|
-
|
38
|
-
indexed_objects = build_root.parent_id && fetch_indexed_objects(action_objects.values.flatten)
|
39
|
-
body = bulk_body(action_objects, indexed_objects)
|
40
|
-
|
41
|
-
errors = bulk(bulk_options.merge(body: body, journal: journal)) if body.present?
|
42
|
-
|
43
|
-
fill_payload_import payload, action_objects
|
44
|
-
fill_payload_errors payload, errors if errors.present?
|
45
|
-
!errors.present?
|
46
|
-
end
|
47
|
-
end
|
76
|
+
import_routine(*args).blank?
|
48
77
|
end
|
49
78
|
|
50
|
-
#
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
79
|
+
# @!method import!(*collection, **options)
|
80
|
+
# (see #import)
|
81
|
+
#
|
82
|
+
# The only difference from {#import} is that it raises an exception
|
83
|
+
# in case of any import errors.
|
54
84
|
#
|
85
|
+
# @raise [Chewy::ImportFailed] in case of errors
|
55
86
|
def import!(*args)
|
56
|
-
errors =
|
57
|
-
subscriber = ActiveSupport::Notifications.subscribe('import_objects.chewy') do |*notification_args|
|
58
|
-
errors = notification_args.last[:errors]
|
59
|
-
end
|
60
|
-
import(*args)
|
87
|
+
errors = import_routine(*args)
|
61
88
|
raise Chewy::ImportFailed.new(self, errors) if errors.present?
|
62
89
|
true
|
63
|
-
ensure
|
64
|
-
ActiveSupport::Notifications.unsubscribe(subscriber) if subscriber
|
65
90
|
end
|
66
91
|
|
67
|
-
# Wraps elasticsearch
|
68
|
-
#
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
entries = body.each_with_object(['']) do |entry, result|
|
81
|
-
operation, meta = entry.to_a.first
|
82
|
-
data = meta.delete(:data)
|
83
|
-
entry = [{ operation => meta }, data].compact.map(&:to_json).join("\n")
|
84
|
-
|
85
|
-
raise ArgumentError, 'Import `:bulk_size` seems to be less than entry size' if entry.bytesize > bulk_size
|
86
|
-
|
87
|
-
if result.last.bytesize + entry.bytesize > bulk_size
|
88
|
-
result.push(entry)
|
89
|
-
else
|
90
|
-
result[-1] = [result[-1], entry].delete_if(&:blank?).join("\n")
|
91
|
-
end
|
92
|
-
end
|
93
|
-
entries.map { |entry| entry + "\n" }
|
94
|
-
else
|
95
|
-
[body]
|
96
|
-
end
|
97
|
-
|
98
|
-
if journal.any_records?
|
99
|
-
Chewy::Journal.create
|
100
|
-
bodies += [journal.bulk_body]
|
101
|
-
end
|
102
|
-
|
103
|
-
items = bodies.map do |item_body|
|
104
|
-
result = client.bulk options.merge(header).merge(body: item_body)
|
105
|
-
result.try(:[], 'items') || []
|
106
|
-
end.flatten
|
92
|
+
# Wraps elasticsearch API bulk method, adds additional features like
|
93
|
+
# `bulk_size` and `suffix`.
|
94
|
+
#
|
95
|
+
# @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
|
96
|
+
# @see Chewy::Type::Import::Bulk
|
97
|
+
# @param options [Hash{Symbol => Object}] besides specific import options, it accepts all the options suitable for the bulk API call like `refresh` or `timeout`
|
98
|
+
# @option options [String] suffix bulk API chunk size in bytes; if passed, the request is performed several times for each chunk, empty by default
|
99
|
+
# @option options [Integer] bulk_size bulk API chunk size in bytes; if passed, the request is performed several times for each chunk, empty by default
|
100
|
+
# @option options [Array<Hash>] body elasticsearch API bulk method body
|
101
|
+
# @return [Hash] tricky transposed errors hash, empty if everything is fine
|
102
|
+
def bulk(**options)
|
103
|
+
error_items = BulkRequest.new(self, **options).perform(options[:body])
|
107
104
|
Chewy.wait_for_status
|
108
105
|
|
109
|
-
|
106
|
+
payload_errors(error_items)
|
110
107
|
end
|
111
108
|
|
112
|
-
|
113
|
-
|
109
|
+
# Composes a single document from the passed object. Uses either witchcraft
|
110
|
+
# or normal composing under the hood.
|
111
|
+
#
|
112
|
+
# @param object [Object] a data source object
|
113
|
+
# @param crutches [Object] optional crutches object; if ommited - a crutch for the single passed object is created as a fallback
|
114
|
+
# @param fields [Array<Symbol>] and array of fields to restrict the generated document
|
115
|
+
# @return [Hash] a JSON-ready hash
|
116
|
+
def compose(object, crutches = nil, fields: [])
|
117
|
+
crutches ||= Chewy::Type::Crutch::Crutches.new self, [object]
|
118
|
+
|
119
|
+
if witchcraft? && build_root.children.present?
|
120
|
+
cauldron(fields: fields).brew(object, crutches)
|
121
|
+
else
|
122
|
+
build_root.compose(object, crutches, fields: fields)
|
123
|
+
end
|
114
124
|
end
|
115
125
|
|
116
126
|
private
|
117
127
|
|
118
|
-
def
|
119
|
-
|
120
|
-
|
121
|
-
crutches = Chewy::Type::Crutch::Crutches.new self, objects
|
122
|
-
objects.flat_map { |object| send(method, object, indexed_objects, crutches) }
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
def delete_bulk_entry(object, indexed_objects = nil, _crutches = nil)
|
127
|
-
entry = {}
|
128
|
+
def import_routine(*args)
|
129
|
+
routine = Routine.new(self, args.extract_options!)
|
130
|
+
routine.create_indexes!
|
128
131
|
|
129
|
-
if
|
130
|
-
|
132
|
+
if routine.parallel_options
|
133
|
+
import_parallel(args, routine)
|
131
134
|
else
|
132
|
-
|
133
|
-
entry[:_id] ||= object[:id] || object['id'] if object.is_a?(Hash)
|
134
|
-
entry[:_id] ||= object
|
135
|
-
entry[:_id] = entry[:_id].to_s if defined?(BSON) && entry[:_id].is_a?(BSON::ObjectId)
|
135
|
+
import_linear(args, routine)
|
136
136
|
end
|
137
|
-
|
138
|
-
if root_object.parent_id
|
139
|
-
existing_object = entry[:_id].present? && indexed_objects && indexed_objects[entry[:_id].to_s]
|
140
|
-
return [] unless existing_object
|
141
|
-
entry[:parent] = existing_object[:parent]
|
142
|
-
end
|
143
|
-
|
144
|
-
[{ delete: entry }]
|
145
137
|
end
|
146
138
|
|
147
|
-
def
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
end
|
157
|
-
entry.delete(:_id) if entry[:_id].blank?
|
158
|
-
|
159
|
-
if root_object.parent_id
|
160
|
-
entry[:parent] = root_object.compose_parent(object)
|
161
|
-
existing_object = entry[:_id].present? && indexed_objects && indexed_objects[entry[:_id].to_s]
|
139
|
+
def import_linear(objects, routine)
|
140
|
+
ActiveSupport::Notifications.instrument 'import_objects.chewy', type: self do |payload|
|
141
|
+
adapter.import(*objects, routine.options) do |action_objects|
|
142
|
+
routine.process(**action_objects)
|
143
|
+
end
|
144
|
+
routine.perform_bulk(routine.leftovers)
|
145
|
+
payload[:import] = routine.stats
|
146
|
+
payload[:errors] = payload_errors(routine.errors) if routine.errors.present?
|
147
|
+
payload[:errors]
|
162
148
|
end
|
149
|
+
end
|
163
150
|
|
164
|
-
|
151
|
+
def import_parallel(objects, routine)
|
152
|
+
raise "The `parallel` gem is required for parallel import, please add `gem 'parallel'` to your Gemfile" unless '::Parallel'.safe_constantize
|
165
153
|
|
166
|
-
|
167
|
-
|
168
|
-
else
|
169
|
-
[{ index: entry }]
|
170
|
-
end
|
171
|
-
end
|
154
|
+
ActiveSupport::Notifications.instrument 'import_objects.chewy', type: self do |payload|
|
155
|
+
batches = adapter.import_references(*objects, routine.options.slice(:batch_size)).to_a
|
172
156
|
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
payload[:import][action] ||= 0
|
178
|
-
payload[:import][action] += count
|
179
|
-
end
|
180
|
-
end
|
157
|
+
::ActiveRecord::Base.connection.close if defined?(::ActiveRecord::Base)
|
158
|
+
results = ::Parallel.map(batches, routine.parallel_options, &IMPORT_WORKER.curry[self, routine.options])
|
159
|
+
::ActiveRecord::Base.connection.reconnect! if defined?(::ActiveRecord::Base)
|
160
|
+
errors, import, leftovers = process_parallel_import_results(results)
|
181
161
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
payload[:errors][action] ||= {}
|
187
|
-
payload[:errors][action][error] ||= []
|
188
|
-
payload[:errors][action][error] |= documents
|
162
|
+
if leftovers.present?
|
163
|
+
batches = leftovers.each_slice(routine.options[:batch_size])
|
164
|
+
results = ::Parallel.map(batches, routine.parallel_options, &LEFTOVERS_WORKER.curry[self, routine.options])
|
165
|
+
errors.concat(results.flatten(1))
|
189
166
|
end
|
190
|
-
end
|
191
|
-
end
|
192
167
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
else
|
197
|
-
build_root.compose(object, crutches)[type_name.to_s]
|
168
|
+
payload[:import] = import
|
169
|
+
payload[:errors] = payload_errors(errors) if errors.present?
|
170
|
+
payload[:errors]
|
198
171
|
end
|
199
172
|
end
|
200
173
|
|
201
|
-
def
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
(memo[action] ||= []).push(action: action, id: data['_id'], error: data['error'])
|
207
|
-
end
|
174
|
+
def process_parallel_import_results(results)
|
175
|
+
results.each_with_object([[], {}, []]) do |r, (e, i, l)|
|
176
|
+
e.concat(r[:errors])
|
177
|
+
i.merge!(r[:import]) { |_k, v1, v2| v1.to_i + v2.to_i }
|
178
|
+
l.concat(r[:leftovers])
|
208
179
|
end
|
209
|
-
|
210
|
-
items.map do |action, action_items|
|
211
|
-
errors = action_items.group_by { |item| item[:error] }.map do |error, error_items|
|
212
|
-
{ error => error_items.map { |item| item[:id] } }
|
213
|
-
end.reduce(&:merge)
|
214
|
-
{ action => errors }
|
215
|
-
end.reduce(&:merge) || {}
|
216
180
|
end
|
217
181
|
|
218
|
-
def
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
search_type: 'scan',
|
225
|
-
scroll: '1m'
|
226
|
-
|
227
|
-
indexed_objects = {}
|
182
|
+
def payload_errors(errors)
|
183
|
+
errors.each_with_object({}) do |error, result|
|
184
|
+
action = error.keys.first.to_sym
|
185
|
+
item = error.values.first
|
186
|
+
error = item['error']
|
187
|
+
id = item['_id']
|
228
188
|
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
result['hits']['hits'].map do |hit|
|
233
|
-
parent = hit.key?('_parent') ? hit['_parent'] : hit['fields']['_parent']
|
234
|
-
indexed_objects[hit['_id']] = { parent: parent }
|
235
|
-
end
|
189
|
+
result[action] ||= {}
|
190
|
+
result[action][error] ||= []
|
191
|
+
result[action][error].push(id)
|
236
192
|
end
|
237
|
-
|
238
|
-
indexed_objects
|
239
193
|
end
|
240
194
|
end
|
241
195
|
end
|