chewy 6.0.0 → 7.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/CODEOWNERS +1 -0
- data/.github/ISSUE_TEMPLATE/bug_report.md +39 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- data/.github/PULL_REQUEST_TEMPLATE.md +16 -0
- data/.github/dependabot.yml +42 -0
- data/.github/workflows/ruby.yml +60 -0
- data/.rubocop.yml +16 -8
- data/.rubocop_todo.yml +110 -22
- data/CHANGELOG.md +396 -105
- data/CODE_OF_CONDUCT.md +14 -0
- data/CONTRIBUTING.md +63 -0
- data/Gemfile +4 -10
- data/Guardfile +3 -1
- data/README.md +497 -275
- data/chewy.gemspec +5 -20
- data/gemfiles/base.gemfile +12 -0
- data/gemfiles/rails.6.1.activerecord.gemfile +10 -15
- data/gemfiles/rails.7.0.activerecord.gemfile +14 -0
- data/gemfiles/rails.7.1.activerecord.gemfile +14 -0
- data/lib/chewy/config.rb +60 -52
- data/lib/chewy/elastic_client.rb +31 -0
- data/lib/chewy/errors.rb +7 -10
- data/lib/chewy/fields/base.rb +79 -13
- data/lib/chewy/fields/root.rb +4 -14
- data/lib/chewy/index/actions.rb +54 -37
- data/lib/chewy/{type → index}/adapter/active_record.rb +30 -6
- data/lib/chewy/{type → index}/adapter/base.rb +2 -3
- data/lib/chewy/{type → index}/adapter/object.rb +27 -31
- data/lib/chewy/{type → index}/adapter/orm.rb +17 -18
- data/lib/chewy/index/aliases.rb +14 -5
- data/lib/chewy/index/crutch.rb +40 -0
- data/lib/chewy/index/import/bulk_builder.rb +311 -0
- data/lib/chewy/{type → index}/import/bulk_request.rb +6 -7
- data/lib/chewy/{type → index}/import/journal_builder.rb +11 -12
- data/lib/chewy/{type → index}/import/routine.rb +18 -17
- data/lib/chewy/{type → index}/import.rb +76 -32
- data/lib/chewy/{type → index}/mapping.rb +29 -34
- data/lib/chewy/index/observe/active_record_methods.rb +87 -0
- data/lib/chewy/index/observe/callback.rb +34 -0
- data/lib/chewy/index/observe.rb +17 -0
- data/lib/chewy/index/specification.rb +1 -0
- data/lib/chewy/{type → index}/syncer.rb +59 -59
- data/lib/chewy/{type → index}/witchcraft.rb +11 -7
- data/lib/chewy/{type → index}/wrapper.rb +2 -2
- data/lib/chewy/index.rb +67 -94
- data/lib/chewy/journal.rb +25 -14
- data/lib/chewy/log_subscriber.rb +5 -1
- data/lib/chewy/minitest/helpers.rb +86 -13
- data/lib/chewy/minitest/search_index_receiver.rb +24 -26
- data/lib/chewy/railtie.rb +6 -20
- data/lib/chewy/rake_helper.rb +169 -113
- data/lib/chewy/rspec/build_query.rb +12 -0
- data/lib/chewy/rspec/helpers.rb +55 -0
- data/lib/chewy/rspec/update_index.rb +55 -44
- data/lib/chewy/rspec.rb +2 -0
- data/lib/chewy/runtime/version.rb +1 -1
- data/lib/chewy/runtime.rb +1 -1
- data/lib/chewy/search/loader.rb +19 -41
- data/lib/chewy/search/parameters/collapse.rb +16 -0
- data/lib/chewy/search/parameters/concerns/query_storage.rb +2 -2
- data/lib/chewy/search/parameters/ignore_unavailable.rb +27 -0
- data/lib/chewy/search/parameters/indices.rb +13 -58
- data/lib/chewy/search/parameters/knn.rb +16 -0
- data/lib/chewy/search/parameters/order.rb +6 -19
- data/lib/chewy/search/parameters/source.rb +5 -1
- data/lib/chewy/search/parameters/storage.rb +1 -1
- data/lib/chewy/search/parameters/track_total_hits.rb +16 -0
- data/lib/chewy/search/parameters.rb +6 -4
- data/lib/chewy/search/query_proxy.rb +9 -2
- data/lib/chewy/search/request.rb +169 -134
- data/lib/chewy/search/response.rb +5 -5
- data/lib/chewy/search/scoping.rb +7 -8
- data/lib/chewy/search/scrolling.rb +13 -13
- data/lib/chewy/search.rb +9 -19
- data/lib/chewy/stash.rb +19 -30
- data/lib/chewy/strategy/active_job.rb +1 -1
- data/lib/chewy/strategy/atomic_no_refresh.rb +18 -0
- data/lib/chewy/strategy/base.rb +10 -0
- data/lib/chewy/strategy/delayed_sidekiq/scheduler.rb +168 -0
- data/lib/chewy/strategy/delayed_sidekiq/worker.rb +76 -0
- data/lib/chewy/strategy/delayed_sidekiq.rb +30 -0
- data/lib/chewy/strategy/lazy_sidekiq.rb +64 -0
- data/lib/chewy/strategy/sidekiq.rb +2 -1
- data/lib/chewy/strategy.rb +6 -19
- data/lib/chewy/version.rb +1 -1
- data/lib/chewy.rb +39 -86
- data/lib/generators/chewy/install_generator.rb +1 -1
- data/lib/tasks/chewy.rake +36 -32
- data/migration_guide.md +46 -8
- data/spec/chewy/config_spec.rb +16 -41
- data/spec/chewy/elastic_client_spec.rb +26 -0
- data/spec/chewy/fields/base_spec.rb +432 -147
- data/spec/chewy/fields/root_spec.rb +20 -28
- data/spec/chewy/fields/time_fields_spec.rb +5 -5
- data/spec/chewy/index/actions_spec.rb +368 -59
- data/spec/chewy/{type → index}/adapter/active_record_spec.rb +156 -40
- data/spec/chewy/{type → index}/adapter/object_spec.rb +21 -6
- data/spec/chewy/index/aliases_spec.rb +3 -3
- data/spec/chewy/index/import/bulk_builder_spec.rb +494 -0
- data/spec/chewy/{type → index}/import/bulk_request_spec.rb +5 -12
- data/spec/chewy/{type → index}/import/journal_builder_spec.rb +9 -19
- data/spec/chewy/{type → index}/import/routine_spec.rb +19 -19
- data/spec/chewy/{type → index}/import_spec.rb +164 -98
- data/spec/chewy/index/mapping_spec.rb +135 -0
- data/spec/chewy/index/observe/active_record_methods_spec.rb +68 -0
- data/spec/chewy/index/observe/callback_spec.rb +139 -0
- data/spec/chewy/index/observe_spec.rb +143 -0
- data/spec/chewy/index/settings_spec.rb +3 -1
- data/spec/chewy/index/specification_spec.rb +20 -30
- data/spec/chewy/{type → index}/syncer_spec.rb +14 -19
- data/spec/chewy/{type → index}/witchcraft_spec.rb +20 -22
- data/spec/chewy/index/wrapper_spec.rb +100 -0
- data/spec/chewy/index_spec.rb +60 -105
- data/spec/chewy/journal_spec.rb +25 -74
- data/spec/chewy/minitest/helpers_spec.rb +123 -15
- data/spec/chewy/minitest/search_index_receiver_spec.rb +28 -30
- data/spec/chewy/multi_search_spec.rb +4 -5
- data/spec/chewy/rake_helper_spec.rb +315 -55
- data/spec/chewy/rspec/build_query_spec.rb +34 -0
- data/spec/chewy/rspec/helpers_spec.rb +61 -0
- data/spec/chewy/rspec/update_index_spec.rb +74 -71
- data/spec/chewy/runtime_spec.rb +2 -2
- data/spec/chewy/search/loader_spec.rb +19 -53
- data/spec/chewy/search/pagination/kaminari_examples.rb +4 -6
- data/spec/chewy/search/pagination/kaminari_spec.rb +2 -2
- data/spec/chewy/search/parameters/collapse_spec.rb +5 -0
- data/spec/chewy/search/parameters/ignore_unavailable_spec.rb +67 -0
- data/spec/chewy/search/parameters/indices_spec.rb +26 -117
- data/spec/chewy/search/parameters/knn_spec.rb +5 -0
- data/spec/chewy/search/parameters/order_spec.rb +18 -11
- data/spec/chewy/search/parameters/query_storage_examples.rb +67 -21
- data/spec/chewy/search/parameters/search_after_spec.rb +4 -1
- data/spec/chewy/search/parameters/source_spec.rb +8 -2
- data/spec/chewy/search/parameters/track_total_hits_spec.rb +5 -0
- data/spec/chewy/search/parameters_spec.rb +18 -4
- data/spec/chewy/search/query_proxy_spec.rb +68 -17
- data/spec/chewy/search/request_spec.rb +292 -110
- data/spec/chewy/search/response_spec.rb +12 -12
- data/spec/chewy/search/scrolling_spec.rb +10 -17
- data/spec/chewy/search_spec.rb +40 -34
- data/spec/chewy/stash_spec.rb +9 -21
- data/spec/chewy/strategy/active_job_spec.rb +16 -16
- data/spec/chewy/strategy/atomic_no_refresh_spec.rb +60 -0
- data/spec/chewy/strategy/atomic_spec.rb +9 -10
- data/spec/chewy/strategy/delayed_sidekiq_spec.rb +208 -0
- data/spec/chewy/strategy/lazy_sidekiq_spec.rb +214 -0
- data/spec/chewy/strategy/sidekiq_spec.rb +12 -12
- data/spec/chewy/strategy_spec.rb +19 -15
- data/spec/chewy_spec.rb +24 -107
- data/spec/spec_helper.rb +3 -22
- data/spec/support/active_record.rb +25 -7
- metadata +78 -339
- data/.circleci/config.yml +0 -240
- data/Appraisals +0 -81
- data/gemfiles/rails.5.2.activerecord.gemfile +0 -17
- data/gemfiles/rails.5.2.mongoid.6.4.gemfile +0 -17
- data/gemfiles/rails.6.0.activerecord.gemfile +0 -17
- data/gemfiles/sequel.4.45.gemfile +0 -11
- data/lib/chewy/backports/deep_dup.rb +0 -46
- data/lib/chewy/backports/duplicable.rb +0 -91
- data/lib/chewy/search/pagination/will_paginate.rb +0 -43
- data/lib/chewy/search/parameters/types.rb +0 -20
- data/lib/chewy/strategy/resque.rb +0 -27
- data/lib/chewy/strategy/shoryuken.rb +0 -40
- data/lib/chewy/type/actions.rb +0 -43
- data/lib/chewy/type/adapter/mongoid.rb +0 -67
- data/lib/chewy/type/adapter/sequel.rb +0 -93
- data/lib/chewy/type/crutch.rb +0 -32
- data/lib/chewy/type/import/bulk_builder.rb +0 -122
- data/lib/chewy/type/observe.rb +0 -82
- data/lib/chewy/type.rb +0 -120
- data/lib/sequel/plugins/chewy_observe.rb +0 -63
- data/spec/chewy/search/pagination/will_paginate_examples.rb +0 -63
- data/spec/chewy/search/pagination/will_paginate_spec.rb +0 -23
- data/spec/chewy/search/parameters/types_spec.rb +0 -5
- data/spec/chewy/strategy/resque_spec.rb +0 -46
- data/spec/chewy/strategy/shoryuken_spec.rb +0 -70
- data/spec/chewy/type/actions_spec.rb +0 -50
- data/spec/chewy/type/adapter/mongoid_spec.rb +0 -372
- data/spec/chewy/type/adapter/sequel_spec.rb +0 -472
- data/spec/chewy/type/import/bulk_builder_spec.rb +0 -194
- data/spec/chewy/type/mapping_spec.rb +0 -175
- data/spec/chewy/type/observe_spec.rb +0 -137
- data/spec/chewy/type/wrapper_spec.rb +0 -100
- data/spec/chewy/type_spec.rb +0 -55
- data/spec/support/mongoid.rb +0 -93
- data/spec/support/sequel.rb +0 -80
@@ -1,21 +1,31 @@
|
|
1
|
-
require 'chewy/
|
1
|
+
require 'chewy/index/adapter/orm'
|
2
2
|
|
3
3
|
module Chewy
|
4
|
-
class
|
4
|
+
class Index
|
5
5
|
module Adapter
|
6
6
|
class ActiveRecord < Orm
|
7
7
|
def self.accepts?(target)
|
8
8
|
defined?(::ActiveRecord::Base) && (
|
9
|
-
target.is_a?(Class) && target < ::ActiveRecord::Base ||
|
9
|
+
(target.is_a?(Class) && target < ::ActiveRecord::Base) ||
|
10
10
|
target.is_a?(::ActiveRecord::Relation))
|
11
11
|
end
|
12
12
|
|
13
13
|
private
|
14
14
|
|
15
15
|
def cleanup_default_scope!
|
16
|
-
|
16
|
+
behavior = Chewy.config.import_scope_cleanup_behavior
|
17
|
+
|
18
|
+
if behavior != :ignore && (@default_scope.arel.orders.present? ||
|
17
19
|
@default_scope.arel.limit.present? || @default_scope.arel.offset.present?)
|
18
|
-
|
20
|
+
if behavior == :warn && Chewy.logger
|
21
|
+
gem_dir = File.realpath('../..', __dir__)
|
22
|
+
source = caller.grep_v(Regexp.new(gem_dir)).first
|
23
|
+
Chewy.logger.warn(
|
24
|
+
"Default type scope order, limit and offset are ignored and will be nullified (called from: #{source})"
|
25
|
+
)
|
26
|
+
elsif behavior == :raise
|
27
|
+
raise ImportScopeCleanupError, 'Default type scope order, limit and offset are ignored and will be nullified'
|
28
|
+
end
|
19
29
|
end
|
20
30
|
|
21
31
|
@default_scope = @default_scope.reorder(nil).limit(nil).offset(nil)
|
@@ -60,7 +70,15 @@ module Chewy
|
|
60
70
|
end
|
61
71
|
|
62
72
|
def pluck_in_batches(scope, fields: [], batch_size: nil, typecast: true)
|
63
|
-
|
73
|
+
unless block_given?
|
74
|
+
return enum_for(
|
75
|
+
:pluck_in_batches,
|
76
|
+
scope,
|
77
|
+
fields: fields,
|
78
|
+
batch_size: batch_size,
|
79
|
+
typecast: typecast
|
80
|
+
)
|
81
|
+
end
|
64
82
|
|
65
83
|
scope = scope.reorder(target_id.asc).limit(batch_size)
|
66
84
|
ids = pluck(scope, fields: fields, typecast: typecast)
|
@@ -69,6 +87,7 @@ module Chewy
|
|
69
87
|
while ids.present?
|
70
88
|
yield ids
|
71
89
|
break if ids.size < batch_size
|
90
|
+
|
72
91
|
last_id = ids.last.is_a?(Array) ? ids.last.first : ids.last
|
73
92
|
ids = pluck(scope.where(target_id.gt(last_id)), fields: fields, typecast: typecast)
|
74
93
|
end
|
@@ -85,6 +104,11 @@ module Chewy
|
|
85
104
|
object_class.connection.execute(sql).map(&converter)
|
86
105
|
end
|
87
106
|
|
107
|
+
def raw(scope, converter)
|
108
|
+
sql = scope.to_sql
|
109
|
+
object_class.connection.execute(sql).map(&converter)
|
110
|
+
end
|
111
|
+
|
88
112
|
def relation_class
|
89
113
|
::ActiveRecord::Relation
|
90
114
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Chewy
|
2
|
-
class
|
2
|
+
class Index
|
3
3
|
module Adapter
|
4
4
|
# Basic adapter class. Contains interface, need to implement to add any classes support
|
5
5
|
class Base
|
@@ -13,8 +13,7 @@ module Chewy
|
|
13
13
|
true
|
14
14
|
end
|
15
15
|
|
16
|
-
# Camelcased name
|
17
|
-
# For returned value 'Product' will be generated class name `ProductsIndex::Product`
|
16
|
+
# Camelcased name.
|
18
17
|
#
|
19
18
|
def name
|
20
19
|
raise NotImplementedError
|
@@ -1,7 +1,7 @@
|
|
1
|
-
require 'chewy/
|
1
|
+
require 'chewy/index/adapter/base'
|
2
2
|
|
3
3
|
module Chewy
|
4
|
-
class
|
4
|
+
class Index
|
5
5
|
module Adapter
|
6
6
|
# This adapter provides an ability to import documents from any
|
7
7
|
# source. You can actually use any class or even a symbol as
|
@@ -14,15 +14,15 @@ module Chewy
|
|
14
14
|
# @see #import
|
15
15
|
# @see #load
|
16
16
|
class Object < Base
|
17
|
-
# The signature of the
|
17
|
+
# The signature of the index scope definition.
|
18
18
|
#
|
19
19
|
# @example
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
20
|
+
# index_scope :geoname
|
21
|
+
# index_scope Geoname
|
22
|
+
# index_scope -> { Geoname.all_the_places }, name: 'geoname'
|
23
23
|
#
|
24
24
|
# @param target [Class, Symbol, String, Proc] a source of data and everything
|
25
|
-
# @option options [String, Symbol] :name redefines the inferred
|
25
|
+
# @option options [String, Symbol] :name redefines the inferred name if necessary
|
26
26
|
# @option options [String, Symbol] :import_all_method redefines import method name
|
27
27
|
# @option options [String, Symbol] :load_all_method redefines batch load method name
|
28
28
|
# @option options [String, Symbol] :load_one_method redefines per-object load method name
|
@@ -31,14 +31,13 @@ module Chewy
|
|
31
31
|
@options = options
|
32
32
|
end
|
33
33
|
|
34
|
-
#
|
35
|
-
# by default if possible.
|
34
|
+
# Inferred from the target by default if possible.
|
36
35
|
#
|
37
36
|
# @example
|
38
|
-
# # defines
|
39
|
-
#
|
40
|
-
# # still defines
|
41
|
-
#
|
37
|
+
# # defines name = Geoname
|
38
|
+
# index_scope :geoname
|
39
|
+
# # still defines name = Geoname
|
40
|
+
# index_scope -> { Geoname.all_the_places }, name: 'geoname'
|
42
41
|
#
|
43
42
|
# @return [String]
|
44
43
|
def name
|
@@ -54,14 +53,14 @@ module Chewy
|
|
54
53
|
Array.wrap(collection)
|
55
54
|
end
|
56
55
|
|
57
|
-
# This method is used internally by `Chewy::
|
56
|
+
# This method is used internally by `Chewy::Index.import`.
|
58
57
|
#
|
59
58
|
# The idea is that any object can be imported to ES if
|
60
59
|
# it responds to `#to_json` method.
|
61
60
|
#
|
62
61
|
# If method `destroyed?` is defined for object (or, in case of hash object,
|
63
62
|
# it has `:_destroyed` or `'_destroyed'` key) and returns `true` or object
|
64
|
-
# satisfy `delete_if`
|
63
|
+
# satisfy `delete_if` option then object will be deleted from index.
|
65
64
|
# But in order to be destroyable, objects need to respond to `id` method
|
66
65
|
# or have an `id` key so ElasticSearch could know which one to delete.
|
67
66
|
#
|
@@ -78,10 +77,10 @@ module Chewy
|
|
78
77
|
# end
|
79
78
|
# end
|
80
79
|
#
|
81
|
-
# # All the
|
82
|
-
#
|
83
|
-
#
|
84
|
-
#
|
80
|
+
# # All the following variants will work:
|
81
|
+
# index_scope Geoname
|
82
|
+
# index_scope Geoname, import_all_method: 'import_all'
|
83
|
+
# index_scope -> { FancyGeoAPI.all_points_collection }, name: 'geoname'
|
85
84
|
#
|
86
85
|
# @param args [Array<#to_json>]
|
87
86
|
# @option options [Integer] :batch_size import processing batch size
|
@@ -113,16 +112,15 @@ module Chewy
|
|
113
112
|
# end
|
114
113
|
# end
|
115
114
|
#
|
116
|
-
# @see Chewy::
|
117
|
-
def import_fields(*args)
|
115
|
+
# @see Chewy::Index::Adapter::Base#import_fields
|
116
|
+
def import_fields(*args, &block)
|
118
117
|
return enum_for(:import_fields, *args) unless block_given?
|
118
|
+
|
119
119
|
options = args.extract_options!
|
120
120
|
options[:batch_size] ||= BATCH_SIZE
|
121
121
|
|
122
122
|
if args.empty? && @target.respond_to?(pluck_method)
|
123
|
-
@target.send(pluck_method, :id, *options[:fields]).each_slice(options[:batch_size])
|
124
|
-
yield batch
|
125
|
-
end
|
123
|
+
@target.send(pluck_method, :id, *options[:fields]).each_slice(options[:batch_size], &block)
|
126
124
|
elsif options[:fields].blank?
|
127
125
|
import_references(*args, options) do |batch|
|
128
126
|
yield batch.map { |object| object_field(object, :id) || object }
|
@@ -140,14 +138,12 @@ module Chewy
|
|
140
138
|
|
141
139
|
# For the Object adapter returns the objects themselves in batches.
|
142
140
|
#
|
143
|
-
# @see Chewy::
|
144
|
-
def import_references(*args)
|
141
|
+
# @see Chewy::Index::Adapter::Base#import_references
|
142
|
+
def import_references(*args, &block)
|
145
143
|
return enum_for(:import_references, *args) unless block_given?
|
146
144
|
|
147
145
|
collection, options = import_args(*args)
|
148
|
-
collection.each_slice(options[:batch_size])
|
149
|
-
yield batch
|
150
|
-
end
|
146
|
+
collection.each_slice(options[:batch_size], &block)
|
151
147
|
end
|
152
148
|
|
153
149
|
# This method is used internally by the request DSL when the
|
@@ -157,7 +153,7 @@ module Chewy
|
|
157
153
|
#
|
158
154
|
# If none of the `load_all_method` or `load_one_method` is implemented
|
159
155
|
# for the target - the method will return nil. This means that the
|
160
|
-
# loader will return an array `Chewy::
|
156
|
+
# loader will return an array `Chewy::Index` objects that actually was passed.
|
161
157
|
#
|
162
158
|
# To use loading for objects it is obviously required to provide
|
163
159
|
# some meaningful ids for ES documents.
|
@@ -175,7 +171,7 @@ module Chewy
|
|
175
171
|
# end
|
176
172
|
# end
|
177
173
|
#
|
178
|
-
# MyIndex
|
174
|
+
# MyIndex.load(additional_data: true).objects
|
179
175
|
#
|
180
176
|
# @param ids [Array<Hash>] an array of ids from ES hits
|
181
177
|
# @param options [Hash] any options passed here with the request DSL `load` method.
|
@@ -1,7 +1,7 @@
|
|
1
|
-
require 'chewy/
|
1
|
+
require 'chewy/index/adapter/base'
|
2
2
|
|
3
3
|
module Chewy
|
4
|
-
class
|
4
|
+
class Index
|
5
5
|
module Adapter
|
6
6
|
class Orm < Base
|
7
7
|
attr_reader :default_scope
|
@@ -54,24 +54,23 @@ module Chewy
|
|
54
54
|
#
|
55
55
|
# users = User.all
|
56
56
|
# users.each { |user| user.destroy if user.inactive? }
|
57
|
-
# UsersIndex
|
57
|
+
# UsersIndex.import users # inactive users will be deleted from index
|
58
58
|
# # or
|
59
|
-
# UsersIndex
|
59
|
+
# UsersIndex.import users.map(&:id) # deleted user ids will be deleted from index
|
60
60
|
#
|
61
61
|
# Also there is custom type option `delete_if`. It it returns `true`
|
62
62
|
# object will be deleted from index. Note that if this option is defined and
|
63
63
|
# return `false` Chewy will still check `destroyed?` method. This is useful
|
64
64
|
# for paranoid objects deleting implementation.
|
65
65
|
#
|
66
|
-
#
|
67
|
-
#
|
68
|
-
# end
|
66
|
+
# index_scope User, delete_if: ->{ deleted_at }
|
67
|
+
# ...
|
69
68
|
#
|
70
69
|
# users = User.all
|
71
70
|
# users.each { |user| user.deleted_at = Time.now }
|
72
|
-
# UsersIndex
|
71
|
+
# UsersIndex.import users # paranoid deleted users will be deleted from index
|
73
72
|
# # or
|
74
|
-
# UsersIndex
|
73
|
+
# UsersIndex.import users.map(&:id) # user ids will be deleted from index
|
75
74
|
#
|
76
75
|
def import(*args, &block)
|
77
76
|
collection, options = import_args(*args)
|
@@ -92,30 +91,30 @@ module Chewy
|
|
92
91
|
collection = all_scope_where_ids_in(identify(collection)) unless collection.is_a?(relation_class)
|
93
92
|
pluck_in_batches(collection, **options.slice(:fields, :batch_size, :typecast), &block)
|
94
93
|
else
|
95
|
-
identify(collection).each_slice(options[:batch_size])
|
96
|
-
yield batch
|
97
|
-
end
|
94
|
+
identify(collection).each_slice(options[:batch_size], &block)
|
98
95
|
end
|
99
96
|
end
|
100
97
|
alias_method :import_references, :import_fields
|
101
98
|
|
102
99
|
def load(ids, **options)
|
103
100
|
scope = all_scope_where_ids_in(ids)
|
104
|
-
additional_scope = options[options[:
|
101
|
+
additional_scope = options[options[:_index].to_sym].try(:[], :scope) || options[:scope]
|
105
102
|
|
106
103
|
loaded_objects = load_scope_objects(scope, additional_scope)
|
107
|
-
|
108
|
-
|
109
|
-
|
104
|
+
loaded_objects = raw(loaded_objects, options[:raw_import]) if options[:raw_import]
|
105
|
+
|
106
|
+
indexed_objects = loaded_objects.index_by do |object|
|
107
|
+
object.public_send(primary_key).to_s
|
108
|
+
end
|
110
109
|
|
111
|
-
ids.map { |id|
|
110
|
+
ids.map { |id| indexed_objects[id.to_s] }
|
112
111
|
end
|
113
112
|
|
114
113
|
private
|
115
114
|
|
116
115
|
def import_objects(collection, options)
|
117
116
|
collection_ids = identify(collection)
|
118
|
-
hash =
|
117
|
+
hash = collection_ids.map(&:to_s).zip(collection).to_h
|
119
118
|
|
120
119
|
indexed = collection_ids.each_slice(options[:batch_size]).map do |ids|
|
121
120
|
batch = if options[:raw_import]
|
data/lib/chewy/index/aliases.rb
CHANGED
@@ -5,14 +5,23 @@ module Chewy
|
|
5
5
|
|
6
6
|
module ClassMethods
|
7
7
|
def indexes
|
8
|
-
client.indices.
|
9
|
-
|
10
|
-
|
8
|
+
indexes = empty_if_not_found { client.indices.get(index: index_name).keys }
|
9
|
+
indexes += empty_if_not_found { client.indices.get_alias(name: index_name).keys }
|
10
|
+
indexes.compact.uniq
|
11
11
|
end
|
12
12
|
|
13
13
|
def aliases
|
14
|
-
|
15
|
-
|
14
|
+
empty_if_not_found do
|
15
|
+
client.indices.get_alias(index: index_name, name: '*').values.flat_map do |aliases|
|
16
|
+
aliases['aliases'].keys
|
17
|
+
end
|
18
|
+
end.compact.uniq
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def empty_if_not_found
|
24
|
+
yield
|
16
25
|
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
17
26
|
[]
|
18
27
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Chewy
|
2
|
+
class Index
|
3
|
+
module Crutch
|
4
|
+
extend ActiveSupport::Concern
|
5
|
+
|
6
|
+
included do
|
7
|
+
class_attribute :_crutches
|
8
|
+
self._crutches = {}
|
9
|
+
end
|
10
|
+
|
11
|
+
class Crutches
|
12
|
+
def initialize(index, collection)
|
13
|
+
@index = index
|
14
|
+
@collection = collection
|
15
|
+
@crutches_instances = {}
|
16
|
+
end
|
17
|
+
|
18
|
+
def method_missing(name, *, **)
|
19
|
+
return self[name] if @index._crutches.key?(name)
|
20
|
+
|
21
|
+
super
|
22
|
+
end
|
23
|
+
|
24
|
+
def respond_to_missing?(name, include_private = false)
|
25
|
+
@index._crutches.key?(name) || super
|
26
|
+
end
|
27
|
+
|
28
|
+
def [](name)
|
29
|
+
@crutches_instances[name] ||= @index._crutches[:"#{name}"].call(@collection)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
module ClassMethods
|
34
|
+
def crutch(name, &block)
|
35
|
+
self._crutches = _crutches.merge(name.to_sym => block)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,311 @@
|
|
1
|
+
module Chewy
|
2
|
+
class Index
|
3
|
+
module Import
|
4
|
+
# This class purpose is to build ES client-acceptable bulk
|
5
|
+
# request body from the passed objects for index and deletion.
|
6
|
+
# It handles parent-child relationships as well by fetching
|
7
|
+
# existing documents from ES and database, taking their join field values and
|
8
|
+
# using it in the bulk body.
|
9
|
+
# If fields are passed - it creates partial update entries except for
|
10
|
+
# the cases when the type has parent and parent_id has been changed.
|
11
|
+
class BulkBuilder
|
12
|
+
# @param index [Chewy::Index] desired index
|
13
|
+
# @param to_index [Array<Object>] objects to index
|
14
|
+
# @param delete [Array<Object>] objects or ids to delete
|
15
|
+
# @param fields [Array<Symbol, String>] and array of fields for documents update
|
16
|
+
def initialize(index, to_index: [], delete: [], fields: [])
|
17
|
+
@index = index
|
18
|
+
@to_index = to_index
|
19
|
+
@delete = delete
|
20
|
+
@fields = fields.map!(&:to_sym)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns ES API-ready bulk requiest body.
|
24
|
+
# @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
|
25
|
+
# @return [Array<Hash>] bulk body
|
26
|
+
def bulk_body
|
27
|
+
populate_cache
|
28
|
+
|
29
|
+
@bulk_body ||= @to_index.flat_map(&method(:index_entry)).concat(
|
30
|
+
@delete.flat_map(&method(:delete_entry))
|
31
|
+
).uniq
|
32
|
+
end
|
33
|
+
|
34
|
+
# The only purpose of this method is to cache document ids for
|
35
|
+
# all the passed object for index to avoid ids recalculation.
|
36
|
+
#
|
37
|
+
# @return [Hash[String => Object]] an ids-objects index hash
|
38
|
+
def index_objects_by_id
|
39
|
+
@index_objects_by_id ||= index_object_ids.invert.stringify_keys!
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def crutches_for_index
|
45
|
+
@crutches_for_index ||= Chewy::Index::Crutch::Crutches.new @index, @to_index
|
46
|
+
end
|
47
|
+
|
48
|
+
def index_entry(object)
|
49
|
+
entry = {}
|
50
|
+
entry[:_id] = index_object_ids[object] if index_object_ids[object]
|
51
|
+
entry[:routing] = routing(object) if join_field?
|
52
|
+
|
53
|
+
parent = cache(entry[:_id])
|
54
|
+
data = data_for(object) if parent.present?
|
55
|
+
if parent.present? && parent_changed?(data, parent)
|
56
|
+
reindex_entries(object, data) + reindex_descendants(object)
|
57
|
+
elsif @fields.present?
|
58
|
+
return [] unless entry[:_id]
|
59
|
+
|
60
|
+
entry[:data] = {doc: data_for(object, fields: @fields)}
|
61
|
+
[{update: entry}]
|
62
|
+
else
|
63
|
+
entry[:data] = data || data_for(object)
|
64
|
+
[{index: entry}]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def reindex_entries(object, data, root: object)
|
69
|
+
entry = {}
|
70
|
+
entry[:_id] = index_object_ids[object] || entry_id(object)
|
71
|
+
entry[:data] = data
|
72
|
+
entry[:routing] = routing(root) || routing(object) if join_field?
|
73
|
+
delete = delete_single_entry(object, root: root).first
|
74
|
+
index = {index: entry}
|
75
|
+
[delete, index]
|
76
|
+
end
|
77
|
+
|
78
|
+
def reindex_descendants(root)
|
79
|
+
descendants = load_descendants(root)
|
80
|
+
crutches = Chewy::Index::Crutch::Crutches.new @index, [root, *descendants]
|
81
|
+
descendants.flat_map do |object|
|
82
|
+
reindex_entries(
|
83
|
+
object,
|
84
|
+
data_for(object, crutches: crutches),
|
85
|
+
root: root
|
86
|
+
)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def delete_entry(object)
|
91
|
+
delete_single_entry(object)
|
92
|
+
end
|
93
|
+
|
94
|
+
def delete_single_entry(object, root: object)
|
95
|
+
entry = {}
|
96
|
+
entry[:_id] = entry_id(object)
|
97
|
+
entry[:_id] ||= object.as_json
|
98
|
+
|
99
|
+
return [] if entry[:_id].blank?
|
100
|
+
|
101
|
+
if join_field?
|
102
|
+
cached_parent = cache(entry[:_id])
|
103
|
+
entry_parent_id =
|
104
|
+
if cached_parent
|
105
|
+
cached_parent[:parent_id]
|
106
|
+
else
|
107
|
+
find_parent_id(object)
|
108
|
+
end
|
109
|
+
|
110
|
+
entry[:routing] = existing_routing(root.try(:id)) || existing_routing(object.id)
|
111
|
+
entry[:parent] = entry_parent_id if entry_parent_id
|
112
|
+
end
|
113
|
+
|
114
|
+
[{delete: entry}]
|
115
|
+
end
|
116
|
+
|
117
|
+
def load_descendants(root)
|
118
|
+
root_type = join_field_type(root)
|
119
|
+
return [] unless root_type
|
120
|
+
|
121
|
+
descendant_ids = []
|
122
|
+
grouped_parents = {root_type => [root.id]}
|
123
|
+
# iteratively fetch all the descendants (with grouped_parents as a queue for next iteration)
|
124
|
+
until grouped_parents.empty?
|
125
|
+
children_data = grouped_parents.flat_map do |parent_type, parent_ids|
|
126
|
+
@index.query(
|
127
|
+
has_parent: {
|
128
|
+
parent_type: parent_type,
|
129
|
+
# ignore_unmapped to avoid error for the leaves of the tree
|
130
|
+
# (types without children)
|
131
|
+
ignore_unmapped: true,
|
132
|
+
query: {ids: {values: parent_ids}}
|
133
|
+
}
|
134
|
+
).pluck(:_id, join_field).map { |id, join| [join['name'], id] }
|
135
|
+
end
|
136
|
+
descendant_ids |= children_data.map(&:last)
|
137
|
+
|
138
|
+
grouped_parents = {}
|
139
|
+
children_data.each do |name, id|
|
140
|
+
next unless name
|
141
|
+
|
142
|
+
grouped_parents[name] ||= []
|
143
|
+
grouped_parents[name] << id
|
144
|
+
end
|
145
|
+
end
|
146
|
+
# query the primary database to load the descentants' records
|
147
|
+
@index.adapter.load(descendant_ids, _index: @index.base_name, raw_import: @index._default_import_options[:raw_import])
|
148
|
+
end
|
149
|
+
|
150
|
+
def populate_cache
|
151
|
+
@cache = load_cache
|
152
|
+
end
|
153
|
+
|
154
|
+
def cache(id)
|
155
|
+
@cache[id.to_s]
|
156
|
+
end
|
157
|
+
|
158
|
+
def load_cache
|
159
|
+
return {} unless join_field?
|
160
|
+
|
161
|
+
@index
|
162
|
+
.filter(ids: {values: ids_for_cache})
|
163
|
+
.order('_doc')
|
164
|
+
.pluck(:_id, :_routing, join_field)
|
165
|
+
.to_h do |id, routing, join|
|
166
|
+
[
|
167
|
+
id,
|
168
|
+
{routing: routing, parent_id: join['parent']}
|
169
|
+
]
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def existing_routing(id)
|
174
|
+
# All objects needed here should be cached in #load_cache,
|
175
|
+
# if not, we return nil. In some cases we don't have existing routing cached,
|
176
|
+
# e.g. for loaded descendants
|
177
|
+
return unless cache(id)
|
178
|
+
|
179
|
+
cache(id)[:routing]
|
180
|
+
end
|
181
|
+
|
182
|
+
# Two types of ids:
|
183
|
+
# * of parents of the objects to be indexed
|
184
|
+
# * of objects to be deleted
|
185
|
+
def ids_for_cache
|
186
|
+
ids = @to_index.flat_map do |object|
|
187
|
+
[find_parent_id(object), object.id] if object.respond_to?(:id)
|
188
|
+
end
|
189
|
+
ids.concat(@delete.map do |object|
|
190
|
+
object.id if object.respond_to?(:id)
|
191
|
+
end)
|
192
|
+
ids.uniq.compact
|
193
|
+
end
|
194
|
+
|
195
|
+
def routing(object)
|
196
|
+
# filter out non-model objects, early return on object==nil
|
197
|
+
return unless object.respond_to?(:id)
|
198
|
+
|
199
|
+
parent_id = find_parent_id(object)
|
200
|
+
if parent_id
|
201
|
+
routing(index_objects_by_id[parent_id.to_s]) || existing_routing(parent_id)
|
202
|
+
else
|
203
|
+
object.id.to_s
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def find_parent_id(object)
|
208
|
+
return unless object.respond_to?(:id)
|
209
|
+
|
210
|
+
join = data_for(object, fields: [join_field.to_sym])[join_field]
|
211
|
+
join['parent'] if join
|
212
|
+
end
|
213
|
+
|
214
|
+
def join_field
|
215
|
+
return @join_field if defined?(@join_field)
|
216
|
+
|
217
|
+
@join_field = find_join_field
|
218
|
+
end
|
219
|
+
|
220
|
+
def find_join_field
|
221
|
+
type_settings = @index.mappings_hash[:mappings]
|
222
|
+
return unless type_settings
|
223
|
+
|
224
|
+
properties = type_settings[:properties]
|
225
|
+
join_fields = properties.find { |_, options| options[:type] == :join }
|
226
|
+
return unless join_fields
|
227
|
+
|
228
|
+
join_fields.first.to_s
|
229
|
+
end
|
230
|
+
|
231
|
+
def join_field_type(object)
|
232
|
+
return unless join_field?
|
233
|
+
|
234
|
+
raw_object =
|
235
|
+
if @index._default_import_options[:raw_import]
|
236
|
+
@index._default_import_options[:raw_import].call(object.attributes)
|
237
|
+
else
|
238
|
+
object
|
239
|
+
end
|
240
|
+
|
241
|
+
join_field_value = data_for(
|
242
|
+
raw_object,
|
243
|
+
fields: [join_field.to_sym], # build only the field that is needed
|
244
|
+
crutches: Chewy::Index::Crutch::Crutches.new(@index, [raw_object])
|
245
|
+
)[join_field]
|
246
|
+
|
247
|
+
case join_field_value
|
248
|
+
when String
|
249
|
+
join_field_value
|
250
|
+
when Hash
|
251
|
+
join_field_value['name']
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
def join_field?
|
256
|
+
join_field && !join_field.empty?
|
257
|
+
end
|
258
|
+
|
259
|
+
def data_for(object, fields: [], crutches: crutches_for_index)
|
260
|
+
@index.compose(object, crutches, fields: fields)
|
261
|
+
end
|
262
|
+
|
263
|
+
def parent_changed?(data, old_parent)
|
264
|
+
return false unless old_parent
|
265
|
+
return false unless join_field?
|
266
|
+
return false unless @fields.include?(join_field.to_sym)
|
267
|
+
return false unless data.key?(join_field)
|
268
|
+
|
269
|
+
# The join field value can be a hash, e.g.:
|
270
|
+
# {"name": "child", "parent": "123"} for a child
|
271
|
+
# {"name": "parent"} for a parent
|
272
|
+
# but it can also be a string: (e.g. "parent") for a parent:
|
273
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/current/parent-join.html#parent-join
|
274
|
+
new_join_field_value = data[join_field]
|
275
|
+
if new_join_field_value.is_a? Hash
|
276
|
+
# If we have a hash in the join field,
|
277
|
+
# we're taking the `parent` field that holds the parent id.
|
278
|
+
new_parent_id = new_join_field_value['parent']
|
279
|
+
new_parent_id != old_parent[:parent_id]
|
280
|
+
else
|
281
|
+
# If there is a non-hash value (String or nil), it means that the join field is changed
|
282
|
+
# and the current object is no longer a child.
|
283
|
+
true
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
def entry_id(object)
|
288
|
+
if type_root.id
|
289
|
+
type_root.compose_id(object)
|
290
|
+
else
|
291
|
+
id = object.id if object.respond_to?(:id)
|
292
|
+
id ||= object[:id] || object['id'] if object.is_a?(Hash)
|
293
|
+
id = id.to_s if defined?(BSON) && id.is_a?(BSON::ObjectId)
|
294
|
+
id
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
def index_object_ids
|
299
|
+
@index_object_ids ||= @to_index.each_with_object({}) do |object, result|
|
300
|
+
id = entry_id(object)
|
301
|
+
result[object] = id if id.present?
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
def type_root
|
306
|
+
@type_root ||= @index.root
|
307
|
+
end
|
308
|
+
end
|
309
|
+
end
|
310
|
+
end
|
311
|
+
end
|