chewy 0.10.0 → 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.circleci/config.yml +240 -0
- data/.rubocop.yml +25 -25
- data/Appraisals +12 -10
- data/CHANGELOG.md +252 -263
- data/Gemfile +5 -1
- data/LICENSE.txt +1 -1
- data/README.md +142 -78
- data/chewy.gemspec +10 -12
- data/gemfiles/{rails.4.2.mongoid.5.1.gemfile → rails.5.2.activerecord.gemfile} +6 -4
- data/gemfiles/{rails.4.2.activerecord.gemfile → rails.5.2.mongoid.6.4.gemfile} +6 -4
- data/gemfiles/{rails.4.0.activerecord.gemfile → rails.6.0.activerecord.gemfile} +6 -3
- data/gemfiles/rails.6.1.activerecord.gemfile +19 -0
- data/gemfiles/sequel.4.45.gemfile +2 -2
- data/lib/chewy.rb +2 -1
- data/lib/chewy/backports/duplicable.rb +1 -1
- data/lib/chewy/config.rb +10 -39
- data/lib/chewy/fields/base.rb +40 -28
- data/lib/chewy/fields/root.rb +18 -11
- data/lib/chewy/index.rb +3 -1
- data/lib/chewy/index/actions.rb +27 -15
- data/lib/chewy/index/settings.rb +2 -0
- data/lib/chewy/index/specification.rb +12 -10
- data/lib/chewy/minitest/helpers.rb +6 -6
- data/lib/chewy/minitest/search_index_receiver.rb +17 -17
- data/lib/chewy/multi_search.rb +62 -0
- data/lib/chewy/railtie.rb +4 -4
- data/lib/chewy/rake_helper.rb +5 -5
- data/lib/chewy/rspec/update_index.rb +3 -5
- data/lib/chewy/search.rb +4 -11
- data/lib/chewy/search/loader.rb +1 -1
- data/lib/chewy/search/pagination/will_paginate.rb +4 -2
- data/lib/chewy/search/parameters.rb +24 -6
- data/lib/chewy/search/parameters/allow_partial_search_results.rb +27 -0
- data/lib/chewy/search/parameters/concerns/query_storage.rb +4 -3
- data/lib/chewy/search/parameters/indices.rb +123 -0
- data/lib/chewy/search/parameters/none.rb +1 -3
- data/lib/chewy/search/request.rb +100 -74
- data/lib/chewy/search/scrolling.rb +7 -6
- data/lib/chewy/stash.rb +30 -21
- data/lib/chewy/strategy/active_job.rb +1 -1
- data/lib/chewy/strategy/atomic.rb +1 -1
- data/lib/chewy/strategy/sidekiq.rb +1 -1
- data/lib/chewy/type.rb +5 -2
- data/lib/chewy/type/adapter/active_record.rb +1 -1
- data/lib/chewy/type/adapter/base.rb +9 -9
- data/lib/chewy/type/adapter/mongoid.rb +2 -4
- data/lib/chewy/type/adapter/orm.rb +7 -4
- data/lib/chewy/type/adapter/sequel.rb +5 -7
- data/lib/chewy/type/crutch.rb +1 -1
- data/lib/chewy/type/import.rb +13 -11
- data/lib/chewy/type/import/bulk_builder.rb +1 -1
- data/lib/chewy/type/import/bulk_request.rb +4 -2
- data/lib/chewy/type/import/journal_builder.rb +3 -3
- data/lib/chewy/type/import/routine.rb +3 -3
- data/lib/chewy/type/mapping.rb +42 -36
- data/lib/chewy/type/observe.rb +16 -12
- data/lib/chewy/type/syncer.rb +15 -14
- data/lib/chewy/type/witchcraft.rb +11 -7
- data/lib/chewy/type/wrapper.rb +14 -4
- data/lib/chewy/version.rb +1 -1
- data/lib/sequel/plugins/chewy_observe.rb +4 -19
- data/migration_guide.md +18 -0
- data/spec/chewy/config_spec.rb +16 -21
- data/spec/chewy/fields/base_spec.rb +70 -70
- data/spec/chewy/fields/root_spec.rb +56 -9
- data/spec/chewy/index/actions_spec.rb +63 -7
- data/spec/chewy/index/specification_spec.rb +25 -16
- data/spec/chewy/index_spec.rb +75 -45
- data/spec/chewy/journal_spec.rb +33 -29
- data/spec/chewy/minitest/search_index_receiver_spec.rb +11 -9
- data/spec/chewy/multi_search_spec.rb +85 -0
- data/spec/chewy/rake_helper_spec.rb +123 -95
- data/spec/chewy/rspec/update_index_spec.rb +47 -46
- data/spec/chewy/runtime_spec.rb +2 -2
- data/spec/chewy/search/pagination/kaminari_spec.rb +7 -3
- data/spec/chewy/search/pagination/will_paginate_spec.rb +9 -3
- data/spec/chewy/search/parameters/indices_spec.rb +190 -0
- data/spec/chewy/search/parameters/none_spec.rb +1 -1
- data/spec/chewy/search/parameters_spec.rb +21 -4
- data/spec/chewy/search/request_spec.rb +101 -70
- data/spec/chewy/search/response_spec.rb +27 -17
- data/spec/chewy/search/scrolling_spec.rb +25 -16
- data/spec/chewy/search_spec.rb +49 -35
- data/spec/chewy/stash_spec.rb +15 -13
- data/spec/chewy/strategy/active_job_spec.rb +15 -2
- data/spec/chewy/strategy/shoryuken_spec.rb +8 -2
- data/spec/chewy/strategy/sidekiq_spec.rb +6 -2
- data/spec/chewy/type/adapter/active_record_spec.rb +16 -4
- data/spec/chewy/type/import/bulk_builder_spec.rb +9 -94
- data/spec/chewy/type/import/journal_builder_spec.rb +17 -15
- data/spec/chewy/type/import_spec.rb +6 -0
- data/spec/chewy/type/mapping_spec.rb +51 -18
- data/spec/chewy/type/observe_spec.rb +4 -4
- data/spec/chewy/type/witchcraft_spec.rb +31 -0
- data/spec/chewy/type/wrapper_spec.rb +3 -1
- data/spec/chewy_spec.rb +0 -7
- data/spec/spec_helper.rb +5 -1
- data/spec/support/active_record.rb +20 -0
- metadata +46 -116
- data/.travis.yml +0 -53
- data/LEGACY_DSL.md +0 -497
- data/gemfiles/rails.4.1.activerecord.gemfile +0 -14
- data/gemfiles/rails.5.0.activerecord.gemfile +0 -15
- data/gemfiles/rails.5.0.mongoid.6.0.gemfile +0 -15
- data/gemfiles/rails.5.1.activerecord.gemfile +0 -15
- data/gemfiles/rails.5.1.mongoid.6.1.gemfile +0 -15
- data/lib/chewy/query.rb +0 -1098
- data/lib/chewy/query/compose.rb +0 -68
- data/lib/chewy/query/criteria.rb +0 -191
- data/lib/chewy/query/filters.rb +0 -227
- data/lib/chewy/query/loading.rb +0 -111
- data/lib/chewy/query/nodes/and.rb +0 -25
- data/lib/chewy/query/nodes/base.rb +0 -17
- data/lib/chewy/query/nodes/bool.rb +0 -34
- data/lib/chewy/query/nodes/equal.rb +0 -34
- data/lib/chewy/query/nodes/exists.rb +0 -20
- data/lib/chewy/query/nodes/expr.rb +0 -28
- data/lib/chewy/query/nodes/field.rb +0 -110
- data/lib/chewy/query/nodes/has_child.rb +0 -15
- data/lib/chewy/query/nodes/has_parent.rb +0 -15
- data/lib/chewy/query/nodes/has_relation.rb +0 -59
- data/lib/chewy/query/nodes/match_all.rb +0 -11
- data/lib/chewy/query/nodes/missing.rb +0 -20
- data/lib/chewy/query/nodes/not.rb +0 -25
- data/lib/chewy/query/nodes/or.rb +0 -25
- data/lib/chewy/query/nodes/prefix.rb +0 -19
- data/lib/chewy/query/nodes/query.rb +0 -20
- data/lib/chewy/query/nodes/range.rb +0 -63
- data/lib/chewy/query/nodes/raw.rb +0 -15
- data/lib/chewy/query/nodes/regexp.rb +0 -35
- data/lib/chewy/query/nodes/script.rb +0 -20
- data/lib/chewy/query/pagination.rb +0 -25
- data/spec/chewy/query/criteria_spec.rb +0 -700
- data/spec/chewy/query/filters_spec.rb +0 -201
- data/spec/chewy/query/loading_spec.rb +0 -124
- data/spec/chewy/query/nodes/and_spec.rb +0 -12
- data/spec/chewy/query/nodes/bool_spec.rb +0 -14
- data/spec/chewy/query/nodes/equal_spec.rb +0 -32
- data/spec/chewy/query/nodes/exists_spec.rb +0 -18
- data/spec/chewy/query/nodes/has_child_spec.rb +0 -59
- data/spec/chewy/query/nodes/has_parent_spec.rb +0 -59
- data/spec/chewy/query/nodes/match_all_spec.rb +0 -11
- data/spec/chewy/query/nodes/missing_spec.rb +0 -16
- data/spec/chewy/query/nodes/not_spec.rb +0 -13
- data/spec/chewy/query/nodes/or_spec.rb +0 -12
- data/spec/chewy/query/nodes/prefix_spec.rb +0 -16
- data/spec/chewy/query/nodes/query_spec.rb +0 -12
- data/spec/chewy/query/nodes/range_spec.rb +0 -32
- data/spec/chewy/query/nodes/raw_spec.rb +0 -11
- data/spec/chewy/query/nodes/regexp_spec.rb +0 -43
- data/spec/chewy/query/nodes/script_spec.rb +0 -15
- data/spec/chewy/query/pagination/kaminari_spec.rb +0 -5
- data/spec/chewy/query/pagination/will_paginate_spec.rb +0 -5
- data/spec/chewy/query/pagination_spec.rb +0 -39
- data/spec/chewy/query_spec.rb +0 -636
- data/spec/chewy/search/parameters/indices_boost_spec.rb +0 -83
@@ -31,16 +31,19 @@ module Chewy
|
|
31
31
|
total = [raw_limit_value, result.fetch('hits', {}).fetch('total', 0)].compact.min
|
32
32
|
last_batch_size = total % batch_size
|
33
33
|
fetched = 0
|
34
|
+
scroll_id = nil
|
34
35
|
|
35
36
|
loop do
|
36
37
|
hits = result.fetch('hits', {}).fetch('hits', [])
|
37
38
|
fetched += hits.size
|
38
39
|
hits = hits.first(last_batch_size) if last_batch_size != 0 && fetched >= total
|
39
40
|
yield(hits) if hits.present?
|
40
|
-
break if fetched >= total
|
41
41
|
scroll_id = result['_scroll_id']
|
42
|
+
break if fetched >= total
|
42
43
|
result = perform_scroll(scroll: scroll, scroll_id: scroll_id)
|
43
44
|
end
|
45
|
+
ensure
|
46
|
+
Chewy.client.clear_scroll(scroll_id: scroll_id) if scroll_id
|
44
47
|
end
|
45
48
|
|
46
49
|
# @!method scroll_hits(batch_size: 1000, scroll: '1m')
|
@@ -125,11 +128,9 @@ module Chewy
|
|
125
128
|
|
126
129
|
def perform_scroll(body)
|
127
130
|
ActiveSupport::Notifications.instrument 'search_query.chewy',
|
128
|
-
request: body
|
129
|
-
|
130
|
-
|
131
|
-
Chewy.client.scroll(body)
|
132
|
-
end
|
131
|
+
notification_payload(request: body) do
|
132
|
+
Chewy.client.scroll(body)
|
133
|
+
end
|
133
134
|
end
|
134
135
|
end
|
135
136
|
end
|
data/lib/chewy/stash.rb
CHANGED
@@ -1,26 +1,23 @@
|
|
1
1
|
module Chewy
|
2
2
|
# This class is the main storage for Chewy service data,
|
3
|
-
# Now index raw specifications are stored in the `
|
4
|
-
# index.
|
3
|
+
# Now index raw specifications are stored in the `chewy_specifications`
|
4
|
+
# index.
|
5
|
+
# Journal entries are stored in `chewy_journal`
|
5
6
|
#
|
6
7
|
# @see Chewy::Index::Specification
|
7
|
-
|
8
|
-
|
8
|
+
module Stash
|
9
|
+
class Specification < Chewy::Index
|
10
|
+
index_name 'chewy_specifications'
|
9
11
|
|
10
|
-
|
11
|
-
|
12
|
+
define_type :specification do
|
13
|
+
default_import_options journal: false
|
12
14
|
|
13
|
-
|
15
|
+
field :specification, type: 'binary'
|
16
|
+
end
|
14
17
|
end
|
15
18
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
field :index_name, type: 'string', index: 'not_analyzed'
|
20
|
-
field :type_name, type: 'string', index: 'not_analyzed'
|
21
|
-
field :action, type: 'string', index: 'not_analyzed'
|
22
|
-
field :references, type: 'string', index: 'no'
|
23
|
-
field :created_at, type: 'date'
|
19
|
+
class Journal < Chewy::Index
|
20
|
+
index_name 'chewy_journal'
|
24
21
|
|
25
22
|
# Loads all entries since the specified time.
|
26
23
|
#
|
@@ -52,18 +49,30 @@ module Chewy
|
|
52
49
|
types.group_by(&:index).each do |index, index_types|
|
53
50
|
scope = scope.or(
|
54
51
|
filter(term: {index_name: index.derivable_name})
|
55
|
-
|
52
|
+
.filter(terms: {type_name: index_types.map(&:type_name)})
|
56
53
|
)
|
57
54
|
end
|
58
55
|
scope
|
59
56
|
end
|
60
57
|
|
61
|
-
|
62
|
-
|
63
|
-
|
58
|
+
define_type :journal do
|
59
|
+
default_import_options journal: false
|
60
|
+
|
61
|
+
field :index_name, type: 'keyword'
|
62
|
+
field :type_name, type: 'keyword'
|
63
|
+
field :action, type: 'keyword'
|
64
|
+
field :references, type: 'binary'
|
65
|
+
field :created_at, type: 'date'
|
64
66
|
|
65
|
-
|
66
|
-
|
67
|
+
def type
|
68
|
+
@type ||= Chewy.derive_type("#{index_name}##{type_name}")
|
69
|
+
end
|
70
|
+
|
71
|
+
def references
|
72
|
+
@references ||= Array.wrap(@attributes['references']).map do |item|
|
73
|
+
JSON.load(Base64.decode64(item)) # rubocop:disable Security/JSONLoad
|
74
|
+
end
|
75
|
+
end
|
67
76
|
end
|
68
77
|
end
|
69
78
|
end
|
@@ -11,7 +11,7 @@ module Chewy
|
|
11
11
|
#
|
12
12
|
class ActiveJob < Atomic
|
13
13
|
class Worker < ::ActiveJob::Base
|
14
|
-
queue_as :chewy
|
14
|
+
queue_as { Chewy.settings.dig(:active_job, :queue) || 'chewy' }
|
15
15
|
|
16
16
|
def perform(type, ids, options = {})
|
17
17
|
options[:refresh] = !Chewy.disable_refresh_async if Chewy.disable_refresh_async
|
@@ -18,7 +18,7 @@ module Chewy
|
|
18
18
|
|
19
19
|
def update(type, objects, _options = {})
|
20
20
|
@stash[type] ||= []
|
21
|
-
@stash[type] |= type.
|
21
|
+
@stash[type] |= type.root.id ? Array.wrap(objects) : type.adapter.identify(objects)
|
22
22
|
end
|
23
23
|
|
24
24
|
def leave
|
data/lib/chewy/type.rb
CHANGED
@@ -14,7 +14,10 @@ require 'chewy/type/witchcraft'
|
|
14
14
|
|
15
15
|
module Chewy
|
16
16
|
class Type
|
17
|
-
IMPORT_OPTIONS_KEYS = %i[
|
17
|
+
IMPORT_OPTIONS_KEYS = %i[
|
18
|
+
batch_size bulk_size consistency direct_import journal
|
19
|
+
pipeline raw_import refresh replication
|
20
|
+
].freeze
|
18
21
|
|
19
22
|
include Search
|
20
23
|
include Mapping
|
@@ -34,7 +37,7 @@ module Chewy
|
|
34
37
|
# Chewy index current type belongs to. Defined inside `Chewy.create_type`
|
35
38
|
#
|
36
39
|
def index
|
37
|
-
raise NotImplementedError, 'Looks like this type
|
40
|
+
raise NotImplementedError, 'Looks like this type was defined outside the index scope and `.index` method is undefined for it'
|
38
41
|
end
|
39
42
|
|
40
43
|
# Current type adapter. Defined inside `Chewy.create_type`, derived from
|
@@ -22,7 +22,7 @@ module Chewy
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def import_scope(scope, options)
|
25
|
-
pluck_in_batches(scope, options.slice(:batch_size)).inject(true) do |result, ids|
|
25
|
+
pluck_in_batches(scope, **options.slice(:batch_size)).inject(true) do |result, ids|
|
26
26
|
objects = if options[:raw_import]
|
27
27
|
raw_default_scope_where_ids_in(ids, options[:raw_import])
|
28
28
|
else
|
@@ -41,19 +41,19 @@ module Chewy
|
|
41
41
|
#
|
42
42
|
# { delete: [object_or_id1, object_or_id2], index: [object3, object4, object5] }
|
43
43
|
#
|
44
|
-
# @
|
44
|
+
# @yieldparam _batch [Array<Object>] each batch of objects
|
45
45
|
# @return [true, false] returns true if all the block call returns true and false otherwise
|
46
|
-
def import(
|
46
|
+
def import(_batch, &_block)
|
47
47
|
raise NotImplementedError
|
48
48
|
end
|
49
49
|
|
50
50
|
# Unlike {#import} fetches only ids (references) to the imported objects,
|
51
51
|
# using the same procedures as {#import}.
|
52
52
|
#
|
53
|
-
# @param
|
54
|
-
# @param
|
55
|
-
# @
|
56
|
-
def import_fields(
|
53
|
+
# @param _fields [Array<Symbol>] additional fields to fetch
|
54
|
+
# @param _batch_size [Integer] batch size, defaults to 1000
|
55
|
+
# @yieldparam batch [Array<Object>] each batch of objects
|
56
|
+
def import_fields(_fields, _batch_size, &_block)
|
57
57
|
raise NotImplementedError
|
58
58
|
end
|
59
59
|
|
@@ -61,9 +61,9 @@ module Chewy
|
|
61
61
|
# an array of references to the passed objects. Returns ids if possible.
|
62
62
|
# Otherwise - and array of objects themselves.
|
63
63
|
#
|
64
|
-
# @param
|
65
|
-
# @
|
66
|
-
def import_references(
|
64
|
+
# @param _batch_size [Integer] batch size, defaults to 1000
|
65
|
+
# @yieldparam batch [Array<Object>] each batch of objects
|
66
|
+
def import_references(_batch_size, &_block)
|
67
67
|
raise NotImplementedError
|
68
68
|
end
|
69
69
|
|
@@ -17,9 +17,7 @@ module Chewy
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def cleanup_default_scope!
|
20
|
-
if Chewy.logger && @default_scope.options.values_at(:sort, :limit, :skip).compact.present?
|
21
|
-
Chewy.logger.warn('Default type scope order, limit and offset are ignored and will be nullified')
|
22
|
-
end
|
20
|
+
Chewy.logger.warn('Default type scope order, limit and offset are ignored and will be nullified') if Chewy.logger && @default_scope.options.values_at(:sort, :limit, :skip).compact.present?
|
23
21
|
|
24
22
|
@default_scope.options.delete(:limit)
|
25
23
|
@default_scope.options.delete(:skip)
|
@@ -27,7 +25,7 @@ module Chewy
|
|
27
25
|
end
|
28
26
|
|
29
27
|
def import_scope(scope, options)
|
30
|
-
pluck_in_batches(scope, options.slice(:batch_size)).map do |ids|
|
28
|
+
pluck_in_batches(scope, **options.slice(:batch_size)).map do |ids|
|
31
29
|
yield grouped_objects(default_scope_where_ids_in(ids))
|
32
30
|
end.all?
|
33
31
|
end
|
@@ -45,6 +45,7 @@ module Chewy
|
|
45
45
|
# Import options:
|
46
46
|
#
|
47
47
|
# <tt>:batch_size</tt> - import batch size, 1000 objects by default
|
48
|
+
# <tt>:direct_import</tt> - import objects without reloading
|
48
49
|
#
|
49
50
|
# Method handles destroyed objects as well. In case of objects ORM scope
|
50
51
|
# or array passed, objects, responding with true to `destroyed?` method will be deleted
|
@@ -75,10 +76,10 @@ module Chewy
|
|
75
76
|
def import(*args, &block)
|
76
77
|
collection, options = import_args(*args)
|
77
78
|
|
78
|
-
if collection.is_a?(relation_class)
|
79
|
-
import_scope(collection, options, &block)
|
80
|
-
else
|
79
|
+
if !collection.is_a?(relation_class) || options[:direct_import]
|
81
80
|
import_objects(collection, options, &block)
|
81
|
+
else
|
82
|
+
import_scope(collection, options, &block)
|
82
83
|
end
|
83
84
|
end
|
84
85
|
|
@@ -89,7 +90,7 @@ module Chewy
|
|
89
90
|
|
90
91
|
if options[:fields].present? || collection.is_a?(relation_class)
|
91
92
|
collection = all_scope_where_ids_in(identify(collection)) unless collection.is_a?(relation_class)
|
92
|
-
pluck_in_batches(collection, options.slice(:fields, :batch_size, :typecast), &block)
|
93
|
+
pluck_in_batches(collection, **options.slice(:fields, :batch_size, :typecast), &block)
|
93
94
|
else
|
94
95
|
identify(collection).each_slice(options[:batch_size]) do |batch|
|
95
96
|
yield batch
|
@@ -119,6 +120,8 @@ module Chewy
|
|
119
120
|
indexed = collection_ids.each_slice(options[:batch_size]).map do |ids|
|
120
121
|
batch = if options[:raw_import]
|
121
122
|
raw_default_scope_where_ids_in(ids, options[:raw_import])
|
123
|
+
elsif options[:direct_import]
|
124
|
+
hash.values_at(*ids.map(&:to_s))
|
122
125
|
else
|
123
126
|
default_scope_where_ids_in(ids)
|
124
127
|
end
|
@@ -16,15 +16,13 @@ module Chewy
|
|
16
16
|
private
|
17
17
|
|
18
18
|
def cleanup_default_scope!
|
19
|
-
if Chewy.logger && @default_scope != @default_scope.unordered.unlimited
|
20
|
-
Chewy.logger.warn('Default type scope order, limit and offset are ignored and will be nullified')
|
21
|
-
end
|
19
|
+
Chewy.logger.warn('Default type scope order, limit and offset are ignored and will be nullified') if Chewy.logger && @default_scope != @default_scope.unordered.unlimited
|
22
20
|
|
23
21
|
@default_scope = @default_scope.unordered.unlimited
|
24
22
|
end
|
25
23
|
|
26
24
|
def import_scope(scope, options)
|
27
|
-
pluck_in_batches(scope, options.slice(:batch_size)).inject(true) do |result, ids|
|
25
|
+
pluck_in_batches(scope, **options.slice(:batch_size)).inject(true) do |result, ids|
|
28
26
|
result & yield(grouped_objects(default_scope_where_ids_in(ids).all))
|
29
27
|
end
|
30
28
|
end
|
@@ -34,7 +32,7 @@ module Chewy
|
|
34
32
|
end
|
35
33
|
|
36
34
|
def full_column_name(column)
|
37
|
-
|
35
|
+
::Sequel.qualify(target.table_name, column)
|
38
36
|
end
|
39
37
|
|
40
38
|
def all_scope
|
@@ -55,7 +53,7 @@ module Chewy
|
|
55
53
|
def pluck_in_batches(scope, fields: [], batch_size: nil, **options)
|
56
54
|
return enum_for(:pluck_in_batches, scope, fields: fields, batch_size: batch_size, **options) unless block_given?
|
57
55
|
|
58
|
-
scope = scope.unordered.order(
|
56
|
+
scope = scope.unordered.order(full_column_name(primary_key).asc).limit(batch_size)
|
59
57
|
|
60
58
|
ids = pluck(scope, fields: fields)
|
61
59
|
count = 0
|
@@ -64,7 +62,7 @@ module Chewy
|
|
64
62
|
yield ids
|
65
63
|
break if ids.size < batch_size
|
66
64
|
last_id = ids.last.is_a?(Array) ? ids.last.first : ids.last
|
67
|
-
ids = pluck(scope.where { |
|
65
|
+
ids = pluck(scope.where { |_o| full_column_name(primary_key) > last_id }, fields: fields)
|
68
66
|
end
|
69
67
|
|
70
68
|
count
|
data/lib/chewy/type/crutch.rb
CHANGED
@@ -12,7 +12,7 @@ module Chewy
|
|
12
12
|
def initialize(type, collection)
|
13
13
|
@type = type
|
14
14
|
@collection = collection
|
15
|
-
@type._crutches.
|
15
|
+
@type._crutches.each_key do |name|
|
16
16
|
singleton_class.class_eval <<-METHOD, __FILE__, __LINE__ + 1
|
17
17
|
def #{name}
|
18
18
|
@#{name} ||= @type._crutches[:#{name}].call @collection
|
data/lib/chewy/type/import.rb
CHANGED
@@ -8,18 +8,18 @@ module Chewy
|
|
8
8
|
module Import
|
9
9
|
extend ActiveSupport::Concern
|
10
10
|
|
11
|
-
IMPORT_WORKER = lambda do |type, options, ids|
|
12
|
-
::Process.setproctitle("chewy
|
13
|
-
routine = Routine.new(type, options)
|
11
|
+
IMPORT_WORKER = lambda do |type, options, total, ids, index|
|
12
|
+
::Process.setproctitle("chewy [#{type}]: import data (#{index + 1}/#{total})")
|
13
|
+
routine = Routine.new(type, **options)
|
14
14
|
type.adapter.import(*ids, routine.options) do |action_objects|
|
15
15
|
routine.process(**action_objects)
|
16
16
|
end
|
17
17
|
{errors: routine.errors, import: routine.stats, leftovers: routine.leftovers}
|
18
18
|
end
|
19
19
|
|
20
|
-
LEFTOVERS_WORKER = lambda do |type, options, body|
|
21
|
-
::Process.setproctitle("chewy
|
22
|
-
routine = Routine.new(type, options)
|
20
|
+
LEFTOVERS_WORKER = lambda do |type, options, total, body, index|
|
21
|
+
::Process.setproctitle("chewy [#{type}]: import leftovers (#{index + 1}/#{total})")
|
22
|
+
routine = Routine.new(type, **options)
|
23
23
|
routine.perform_bulk(body)
|
24
24
|
routine.errors
|
25
25
|
end
|
@@ -67,6 +67,7 @@ module Chewy
|
|
67
67
|
# @option options [String] suffix an index name suffix, used for zero-downtime reset mostly, no suffix by default
|
68
68
|
# @option options [Integer] bulk_size bulk API chunk size in bytes; if passed, the request is performed several times for each chunk, empty by default
|
69
69
|
# @option options [Integer] batch_size passed to the adapter import method, used to split imported objects in chunks, 1000 by default
|
70
|
+
# @option options [Boolean] direct_import skips object reloading in ORM adapter, `false` by default
|
70
71
|
# @option options [true, false] journal enables imported objects journaling, false by default
|
71
72
|
# @option options [Array<Symbol, String>] update_fields list of fields for the partial import, empty by default
|
72
73
|
# @option options [true, false] update_failover enables full objects reimport in cases of partial update errors, `true` by default
|
@@ -116,17 +117,18 @@ module Chewy
|
|
116
117
|
def compose(object, crutches = nil, fields: [])
|
117
118
|
crutches ||= Chewy::Type::Crutch::Crutches.new self, [object]
|
118
119
|
|
119
|
-
if witchcraft? &&
|
120
|
+
if witchcraft? && root.children.present?
|
120
121
|
cauldron(fields: fields).brew(object, crutches)
|
121
122
|
else
|
122
|
-
|
123
|
+
root.compose(object, crutches, fields: fields)
|
123
124
|
end
|
124
125
|
end
|
125
126
|
|
126
127
|
private
|
127
128
|
|
128
129
|
def import_routine(*args)
|
129
|
-
|
130
|
+
return if args.first.blank? && !args.first.nil?
|
131
|
+
routine = Routine.new(self, **args.extract_options!)
|
130
132
|
routine.create_indexes!
|
131
133
|
|
132
134
|
if routine.parallel_options
|
@@ -155,13 +157,13 @@ module Chewy
|
|
155
157
|
batches = adapter.import_references(*objects, routine.options.slice(:batch_size)).to_a
|
156
158
|
|
157
159
|
::ActiveRecord::Base.connection.close if defined?(::ActiveRecord::Base)
|
158
|
-
results = ::Parallel.
|
160
|
+
results = ::Parallel.map_with_index(batches, routine.parallel_options, &IMPORT_WORKER.curry[self, routine.options, batches.size])
|
159
161
|
::ActiveRecord::Base.connection.reconnect! if defined?(::ActiveRecord::Base)
|
160
162
|
errors, import, leftovers = process_parallel_import_results(results)
|
161
163
|
|
162
164
|
if leftovers.present?
|
163
165
|
batches = leftovers.each_slice(routine.options[:batch_size])
|
164
|
-
results = ::Parallel.
|
166
|
+
results = ::Parallel.map_with_index(batches, routine.parallel_options, &LEFTOVERS_WORKER.curry[self, routine.options, batches.size])
|
165
167
|
errors.concat(results.flatten(1))
|
166
168
|
end
|
167
169
|
|
@@ -54,15 +54,17 @@ module Chewy
|
|
54
54
|
|
55
55
|
def request_bodies(body)
|
56
56
|
if @bulk_size
|
57
|
+
serializer = ::Elasticsearch::API.serializer
|
57
58
|
pieces = body.each_with_object(['']) do |piece, result|
|
58
59
|
operation, meta = piece.to_a.first
|
59
60
|
data = meta.delete(:data)
|
60
|
-
piece =
|
61
|
+
piece = serializer.dump(operation => meta)
|
62
|
+
piece << "\n" << serializer.dump(data) if data.present?
|
61
63
|
|
62
64
|
if result.last.bytesize + piece.bytesize > @bulk_size
|
63
65
|
result.push(piece)
|
64
66
|
else
|
65
|
-
result[-1]
|
67
|
+
result[-1].blank? ? (result[-1] = piece) : (result[-1] << "\n" << piece)
|
66
68
|
end
|
67
69
|
end
|
68
70
|
pieces.each { |piece| piece << "\n" }
|
@@ -10,7 +10,7 @@ module Chewy
|
|
10
10
|
|
11
11
|
def bulk_body
|
12
12
|
Chewy::Type::Import::BulkBuilder.new(
|
13
|
-
Chewy::Stash::Journal,
|
13
|
+
Chewy::Stash::Journal::Journal,
|
14
14
|
index: [
|
15
15
|
entries(:index, @index),
|
16
16
|
entries(:delete, @delete)
|
@@ -18,7 +18,7 @@ module Chewy
|
|
18
18
|
).bulk_body.each do |item|
|
19
19
|
item.values.first.merge!(
|
20
20
|
_index: Chewy::Stash::Journal.index_name,
|
21
|
-
_type: Chewy::Stash::Journal.type_name
|
21
|
+
_type: Chewy::Stash::Journal::Journal.type_name
|
22
22
|
)
|
23
23
|
end
|
24
24
|
end
|
@@ -31,7 +31,7 @@ module Chewy
|
|
31
31
|
index_name: @type.index.derivable_name,
|
32
32
|
type_name: @type.type_name,
|
33
33
|
action: action,
|
34
|
-
references: identify(objects).map(
|
34
|
+
references: identify(objects).map { |item| Base64.encode64(::Elasticsearch::API.serializer.dump(item)) },
|
35
35
|
created_at: Time.now.utc
|
36
36
|
}
|
37
37
|
end
|