chewy 0.9.0 → 5.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.circleci/config.yml +214 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +41 -19
- data/.rubocop_todo.yml +2 -2
- data/.yardopts +5 -0
- data/Appraisals +58 -28
- data/CHANGELOG.md +153 -12
- data/Gemfile +20 -12
- data/LEGACY_DSL.md +497 -0
- data/LICENSE.txt +1 -1
- data/README.md +338 -528
- data/chewy.gemspec +11 -12
- data/gemfiles/rails.5.2.activerecord.gemfile +17 -0
- data/gemfiles/rails.5.2.mongoid.6.4.gemfile +17 -0
- data/gemfiles/rails.6.0.activerecord.gemfile +17 -0
- data/gemfiles/rails.6.1.activerecord.gemfile +19 -0
- data/gemfiles/ruby3.gemfile +10 -0
- data/gemfiles/sequel.4.45.gemfile +11 -0
- data/lib/chewy.rb +79 -44
- data/lib/chewy/backports/duplicable.rb +1 -1
- data/lib/chewy/config.rb +43 -17
- data/lib/chewy/errors.rb +2 -2
- data/lib/chewy/fields/base.rb +56 -31
- data/lib/chewy/fields/root.rb +44 -11
- data/lib/chewy/index.rb +237 -149
- data/lib/chewy/index/actions.rb +100 -35
- data/lib/chewy/index/aliases.rb +2 -1
- data/lib/chewy/index/settings.rb +11 -5
- data/lib/chewy/index/specification.rb +60 -0
- data/lib/chewy/journal.rb +40 -92
- data/lib/chewy/minitest/helpers.rb +6 -6
- data/lib/chewy/minitest/search_index_receiver.rb +17 -17
- data/lib/chewy/query.rb +182 -122
- data/lib/chewy/query/compose.rb +13 -13
- data/lib/chewy/query/criteria.rb +13 -13
- data/lib/chewy/query/filters.rb +21 -4
- data/lib/chewy/query/loading.rb +1 -2
- data/lib/chewy/query/nodes/and.rb +2 -2
- data/lib/chewy/query/nodes/bool.rb +1 -1
- data/lib/chewy/query/nodes/equal.rb +2 -2
- data/lib/chewy/query/nodes/exists.rb +1 -1
- data/lib/chewy/query/nodes/field.rb +1 -1
- data/lib/chewy/query/nodes/has_relation.rb +2 -2
- data/lib/chewy/query/nodes/match_all.rb +1 -1
- data/lib/chewy/query/nodes/missing.rb +1 -1
- data/lib/chewy/query/nodes/not.rb +2 -2
- data/lib/chewy/query/nodes/or.rb +2 -2
- data/lib/chewy/query/nodes/prefix.rb +1 -1
- data/lib/chewy/query/nodes/query.rb +2 -2
- data/lib/chewy/query/nodes/range.rb +4 -4
- data/lib/chewy/query/nodes/regexp.rb +4 -4
- data/lib/chewy/query/nodes/script.rb +3 -3
- data/lib/chewy/query/pagination.rb +10 -1
- data/lib/chewy/railtie.rb +4 -3
- data/lib/chewy/rake_helper.rb +265 -48
- data/lib/chewy/rspec/update_index.rb +33 -27
- data/lib/chewy/search.rb +79 -26
- data/lib/chewy/search/loader.rb +83 -0
- data/lib/chewy/{query → search}/pagination/kaminari.rb +13 -5
- data/lib/chewy/search/pagination/will_paginate.rb +43 -0
- data/lib/chewy/search/parameters.rb +168 -0
- data/lib/chewy/search/parameters/aggs.rb +16 -0
- data/lib/chewy/search/parameters/allow_partial_search_results.rb +27 -0
- data/lib/chewy/search/parameters/concerns/bool_storage.rb +24 -0
- data/lib/chewy/search/parameters/concerns/hash_storage.rb +23 -0
- data/lib/chewy/search/parameters/concerns/integer_storage.rb +14 -0
- data/lib/chewy/search/parameters/concerns/query_storage.rb +238 -0
- data/lib/chewy/search/parameters/concerns/string_array_storage.rb +23 -0
- data/lib/chewy/search/parameters/concerns/string_storage.rb +14 -0
- data/lib/chewy/search/parameters/docvalue_fields.rb +12 -0
- data/lib/chewy/search/parameters/explain.rb +16 -0
- data/lib/chewy/search/parameters/filter.rb +47 -0
- data/lib/chewy/search/parameters/highlight.rb +16 -0
- data/lib/chewy/search/parameters/indices.rb +123 -0
- data/lib/chewy/search/parameters/indices_boost.rb +52 -0
- data/lib/chewy/search/parameters/limit.rb +17 -0
- data/lib/chewy/search/parameters/load.rb +32 -0
- data/lib/chewy/search/parameters/min_score.rb +16 -0
- data/lib/chewy/search/parameters/none.rb +27 -0
- data/lib/chewy/search/parameters/offset.rb +17 -0
- data/lib/chewy/search/parameters/order.rb +64 -0
- data/lib/chewy/search/parameters/post_filter.rb +19 -0
- data/lib/chewy/search/parameters/preference.rb +16 -0
- data/lib/chewy/search/parameters/profile.rb +16 -0
- data/lib/chewy/search/parameters/query.rb +19 -0
- data/lib/chewy/search/parameters/request_cache.rb +27 -0
- data/lib/chewy/search/parameters/rescore.rb +29 -0
- data/lib/chewy/search/parameters/script_fields.rb +16 -0
- data/lib/chewy/search/parameters/search_after.rb +20 -0
- data/lib/chewy/search/parameters/search_type.rb +16 -0
- data/lib/chewy/search/parameters/source.rb +73 -0
- data/lib/chewy/search/parameters/storage.rb +95 -0
- data/lib/chewy/search/parameters/stored_fields.rb +63 -0
- data/lib/chewy/search/parameters/suggest.rb +16 -0
- data/lib/chewy/search/parameters/terminate_after.rb +16 -0
- data/lib/chewy/search/parameters/timeout.rb +16 -0
- data/lib/chewy/search/parameters/track_scores.rb +16 -0
- data/lib/chewy/search/parameters/types.rb +20 -0
- data/lib/chewy/search/parameters/version.rb +16 -0
- data/lib/chewy/search/query_proxy.rb +257 -0
- data/lib/chewy/search/request.rb +1046 -0
- data/lib/chewy/search/response.rb +119 -0
- data/lib/chewy/search/scoping.rb +50 -0
- data/lib/chewy/search/scrolling.rb +134 -0
- data/lib/chewy/stash.rb +79 -0
- data/lib/chewy/strategy.rb +10 -3
- data/lib/chewy/strategy/active_job.rb +2 -1
- data/lib/chewy/strategy/atomic.rb +2 -4
- data/lib/chewy/strategy/bypass.rb +1 -1
- data/lib/chewy/strategy/resque.rb +1 -0
- data/lib/chewy/strategy/shoryuken.rb +40 -0
- data/lib/chewy/strategy/sidekiq.rb +13 -3
- data/lib/chewy/type.rb +29 -7
- data/lib/chewy/type/actions.rb +26 -2
- data/lib/chewy/type/adapter/active_record.rb +44 -29
- data/lib/chewy/type/adapter/base.rb +27 -7
- data/lib/chewy/type/adapter/mongoid.rb +19 -10
- data/lib/chewy/type/adapter/object.rb +187 -26
- data/lib/chewy/type/adapter/orm.rb +59 -32
- data/lib/chewy/type/adapter/sequel.rb +33 -19
- data/lib/chewy/type/crutch.rb +1 -1
- data/lib/chewy/type/import.rb +146 -191
- data/lib/chewy/type/import/bulk_builder.rb +122 -0
- data/lib/chewy/type/import/bulk_request.rb +78 -0
- data/lib/chewy/type/import/journal_builder.rb +45 -0
- data/lib/chewy/type/import/routine.rb +138 -0
- data/lib/chewy/type/mapping.rb +51 -35
- data/lib/chewy/type/observe.rb +17 -13
- data/lib/chewy/type/syncer.rb +222 -0
- data/lib/chewy/type/witchcraft.rb +32 -16
- data/lib/chewy/type/wrapper.rb +30 -4
- data/lib/chewy/version.rb +1 -1
- data/lib/sequel/plugins/chewy_observe.rb +4 -19
- data/lib/tasks/chewy.rake +84 -26
- data/spec/chewy/config_spec.rb +98 -1
- data/spec/chewy/fields/base_spec.rb +170 -135
- data/spec/chewy/fields/root_spec.rb +124 -20
- data/spec/chewy/fields/time_fields_spec.rb +2 -3
- data/spec/chewy/index/actions_spec.rb +214 -52
- data/spec/chewy/index/aliases_spec.rb +2 -2
- data/spec/chewy/index/settings_spec.rb +67 -38
- data/spec/chewy/index/specification_spec.rb +169 -0
- data/spec/chewy/index_spec.rb +108 -64
- data/spec/chewy/journal_spec.rb +150 -55
- data/spec/chewy/minitest/helpers_spec.rb +4 -4
- data/spec/chewy/minitest/search_index_receiver_spec.rb +1 -1
- data/spec/chewy/query/criteria_spec.rb +179 -179
- data/spec/chewy/query/filters_spec.rb +16 -16
- data/spec/chewy/query/loading_spec.rb +22 -20
- data/spec/chewy/query/nodes/and_spec.rb +2 -2
- data/spec/chewy/query/nodes/bool_spec.rb +4 -4
- data/spec/chewy/query/nodes/equal_spec.rb +19 -19
- data/spec/chewy/query/nodes/exists_spec.rb +6 -6
- data/spec/chewy/query/nodes/has_child_spec.rb +19 -19
- data/spec/chewy/query/nodes/has_parent_spec.rb +19 -19
- data/spec/chewy/query/nodes/missing_spec.rb +5 -5
- data/spec/chewy/query/nodes/not_spec.rb +4 -2
- data/spec/chewy/query/nodes/or_spec.rb +2 -2
- data/spec/chewy/query/nodes/prefix_spec.rb +5 -5
- data/spec/chewy/query/nodes/query_spec.rb +2 -2
- data/spec/chewy/query/nodes/range_spec.rb +18 -18
- data/spec/chewy/query/nodes/raw_spec.rb +1 -1
- data/spec/chewy/query/nodes/regexp_spec.rb +14 -14
- data/spec/chewy/query/nodes/script_spec.rb +4 -4
- data/spec/chewy/query/pagination/kaminari_spec.rb +3 -55
- data/spec/chewy/query/pagination/will_paginate_spec.rb +5 -0
- data/spec/chewy/query/pagination_spec.rb +25 -21
- data/spec/chewy/query_spec.rb +503 -561
- data/spec/chewy/rake_helper_spec.rb +381 -0
- data/spec/chewy/repository_spec.rb +4 -4
- data/spec/chewy/rspec/update_index_spec.rb +89 -56
- data/spec/chewy/runtime_spec.rb +2 -2
- data/spec/chewy/search/loader_spec.rb +117 -0
- data/spec/chewy/search/pagination/kaminari_examples.rb +71 -0
- data/spec/chewy/search/pagination/kaminari_spec.rb +21 -0
- data/spec/chewy/search/pagination/will_paginate_examples.rb +63 -0
- data/spec/chewy/search/pagination/will_paginate_spec.rb +23 -0
- data/spec/chewy/search/parameters/aggs_spec.rb +5 -0
- data/spec/chewy/search/parameters/bool_storage_examples.rb +53 -0
- data/spec/chewy/search/parameters/docvalue_fields_spec.rb +5 -0
- data/spec/chewy/search/parameters/explain_spec.rb +5 -0
- data/spec/chewy/search/parameters/filter_spec.rb +5 -0
- data/spec/chewy/search/parameters/hash_storage_examples.rb +59 -0
- data/spec/chewy/search/parameters/highlight_spec.rb +5 -0
- data/spec/chewy/search/parameters/indices_spec.rb +191 -0
- data/spec/chewy/search/parameters/integer_storage_examples.rb +32 -0
- data/spec/chewy/search/parameters/limit_spec.rb +5 -0
- data/spec/chewy/search/parameters/load_spec.rb +60 -0
- data/spec/chewy/search/parameters/min_score_spec.rb +32 -0
- data/spec/chewy/search/parameters/none_spec.rb +5 -0
- data/spec/chewy/search/parameters/offset_spec.rb +5 -0
- data/spec/chewy/search/parameters/order_spec.rb +65 -0
- data/spec/chewy/search/parameters/post_filter_spec.rb +5 -0
- data/spec/chewy/search/parameters/preference_spec.rb +5 -0
- data/spec/chewy/search/parameters/profile_spec.rb +5 -0
- data/spec/chewy/search/parameters/query_spec.rb +5 -0
- data/spec/chewy/search/parameters/query_storage_examples.rb +388 -0
- data/spec/chewy/search/parameters/request_cache_spec.rb +67 -0
- data/spec/chewy/search/parameters/rescore_spec.rb +62 -0
- data/spec/chewy/search/parameters/script_fields_spec.rb +5 -0
- data/spec/chewy/search/parameters/search_after_spec.rb +32 -0
- data/spec/chewy/search/parameters/search_type_spec.rb +5 -0
- data/spec/chewy/search/parameters/source_spec.rb +156 -0
- data/spec/chewy/search/parameters/storage_spec.rb +60 -0
- data/spec/chewy/search/parameters/stored_fields_spec.rb +126 -0
- data/spec/chewy/search/parameters/string_array_storage_examples.rb +63 -0
- data/spec/chewy/search/parameters/string_storage_examples.rb +32 -0
- data/spec/chewy/search/parameters/suggest_spec.rb +5 -0
- data/spec/chewy/search/parameters/terminate_after_spec.rb +5 -0
- data/spec/chewy/search/parameters/timeout_spec.rb +5 -0
- data/spec/chewy/search/parameters/track_scores_spec.rb +5 -0
- data/spec/chewy/search/parameters/types_spec.rb +5 -0
- data/spec/chewy/search/parameters/version_spec.rb +5 -0
- data/spec/chewy/search/parameters_spec.rb +147 -0
- data/spec/chewy/search/query_proxy_spec.rb +68 -0
- data/spec/chewy/search/request_spec.rb +685 -0
- data/spec/chewy/search/response_spec.rb +198 -0
- data/spec/chewy/search/scrolling_spec.rb +169 -0
- data/spec/chewy/search_spec.rb +33 -16
- data/spec/chewy/stash_spec.rb +95 -0
- data/spec/chewy/strategy/active_job_spec.rb +21 -2
- data/spec/chewy/strategy/resque_spec.rb +6 -0
- data/spec/chewy/strategy/shoryuken_spec.rb +70 -0
- data/spec/chewy/strategy/sidekiq_spec.rb +13 -1
- data/spec/chewy/strategy_spec.rb +6 -6
- data/spec/chewy/type/actions_spec.rb +29 -10
- data/spec/chewy/type/adapter/active_record_spec.rb +203 -91
- data/spec/chewy/type/adapter/mongoid_spec.rb +112 -54
- data/spec/chewy/type/adapter/object_spec.rb +101 -28
- data/spec/chewy/type/adapter/sequel_spec.rb +149 -82
- data/spec/chewy/type/import/bulk_builder_spec.rb +279 -0
- data/spec/chewy/type/import/bulk_request_spec.rb +102 -0
- data/spec/chewy/type/import/journal_builder_spec.rb +95 -0
- data/spec/chewy/type/import/routine_spec.rb +110 -0
- data/spec/chewy/type/import_spec.rb +356 -271
- data/spec/chewy/type/mapping_spec.rb +96 -29
- data/spec/chewy/type/observe_spec.rb +9 -5
- data/spec/chewy/type/syncer_spec.rb +123 -0
- data/spec/chewy/type/witchcraft_spec.rb +61 -29
- data/spec/chewy/type/wrapper_spec.rb +63 -23
- data/spec/chewy/type_spec.rb +28 -7
- data/spec/chewy_spec.rb +75 -7
- data/spec/spec_helper.rb +17 -3
- data/spec/support/active_record.rb +5 -1
- data/spec/support/class_helpers.rb +0 -14
- data/spec/support/mongoid.rb +15 -3
- data/spec/support/sequel.rb +6 -1
- metadata +219 -58
- data/.travis.yml +0 -36
- data/gemfiles/rails.3.2.activerecord.gemfile +0 -16
- data/gemfiles/rails.3.2.activerecord.kaminari.gemfile +0 -15
- data/gemfiles/rails.3.2.activerecord.will_paginate.gemfile +0 -15
- data/gemfiles/rails.4.2.activerecord.gemfile +0 -17
- data/gemfiles/rails.4.2.activerecord.kaminari.gemfile +0 -16
- data/gemfiles/rails.4.2.activerecord.will_paginate.gemfile +0 -16
- data/gemfiles/rails.4.2.mongoid.4.0.gemfile +0 -16
- data/gemfiles/rails.4.2.mongoid.4.0.kaminari.gemfile +0 -15
- data/gemfiles/rails.4.2.mongoid.4.0.will_paginate.gemfile +0 -15
- data/gemfiles/rails.4.2.mongoid.5.1.gemfile +0 -16
- data/gemfiles/rails.4.2.mongoid.5.1.kaminari.gemfile +0 -15
- data/gemfiles/rails.4.2.mongoid.5.1.will_paginate.gemfile +0 -15
- data/gemfiles/rails.5.0.activerecord.gemfile +0 -17
- data/gemfiles/rails.5.0.activerecord.kaminari.gemfile +0 -16
- data/gemfiles/rails.5.0.activerecord.will_paginate.gemfile +0 -16
- data/gemfiles/sequel.4.38.gemfile +0 -14
- data/lib/chewy/journal/apply.rb +0 -31
- data/lib/chewy/journal/clean.rb +0 -24
- data/lib/chewy/journal/entry.rb +0 -83
- data/lib/chewy/journal/query.rb +0 -87
- data/lib/chewy/query/pagination/will_paginate.rb +0 -27
- data/lib/chewy/query/scoping.rb +0 -20
- data/spec/chewy/journal/apply_spec.rb +0 -120
- data/spec/chewy/journal/entry_spec.rb +0 -237
- data/spec/chewy/query/pagination/will_paginage_spec.rb +0 -59
@@ -0,0 +1,122 @@
|
|
1
|
+
module Chewy
|
2
|
+
class Type
|
3
|
+
module Import
|
4
|
+
# This class purpose is to build ES client-acceptable bulk
|
5
|
+
# request body from the passed objects for index and deletion.
|
6
|
+
# It handles parent-child relationships as well by fetching
|
7
|
+
# existing documents from ES, taking their `_parent` field and
|
8
|
+
# using it in the bulk body.
|
9
|
+
# If fields are passed - it creates partial update entries except for
|
10
|
+
# the cases when the type has parent and parent_id has been changed.
|
11
|
+
class BulkBuilder
|
12
|
+
# @param type [Chewy::Type] desired type
|
13
|
+
# @param index [Array<Object>] objects to index
|
14
|
+
# @param delete [Array<Object>] objects or ids to delete
|
15
|
+
# @param fields [Array<Symbol, String>] and array of fields for documents update
|
16
|
+
def initialize(type, index: [], delete: [], fields: [])
|
17
|
+
@type = type
|
18
|
+
@index = index
|
19
|
+
@delete = delete
|
20
|
+
@fields = fields.map!(&:to_sym)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns ES API-ready bulk requiest body.
|
24
|
+
# @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
|
25
|
+
# @return [Array<Hash>] bulk body
|
26
|
+
def bulk_body
|
27
|
+
@bulk_body ||= @index.flat_map(&method(:index_entry)).concat(
|
28
|
+
@delete.flat_map(&method(:delete_entry))
|
29
|
+
)
|
30
|
+
end
|
31
|
+
|
32
|
+
# The only purpose of this method is to cache document ids for
|
33
|
+
# all the passed object for index to avoid ids recalculation.
|
34
|
+
#
|
35
|
+
# @return [Hash[String => Object]] an ids-objects index hash
|
36
|
+
def index_objects_by_id
|
37
|
+
@index_objects_by_id ||= index_object_ids.invert.stringify_keys!
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def crutches
|
43
|
+
@crutches ||= Chewy::Type::Crutch::Crutches.new @type, @index
|
44
|
+
end
|
45
|
+
|
46
|
+
def parents
|
47
|
+
return unless type_root.parent_id
|
48
|
+
|
49
|
+
@parents ||= begin
|
50
|
+
ids = @index.map do |object|
|
51
|
+
object.respond_to?(:id) ? object.id : object
|
52
|
+
end
|
53
|
+
ids.concat(@delete.map do |object|
|
54
|
+
object.respond_to?(:id) ? object.id : object
|
55
|
+
end)
|
56
|
+
@type.filter(ids: {values: ids}).order('_doc').pluck(:_id, :_parent).to_h
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def index_entry(object)
|
61
|
+
entry = {}
|
62
|
+
entry[:_id] = index_object_ids[object] if index_object_ids[object]
|
63
|
+
|
64
|
+
if parents
|
65
|
+
entry[:parent] = type_root.compose_parent(object)
|
66
|
+
parent = entry[:_id].present? && parents[entry[:_id].to_s]
|
67
|
+
end
|
68
|
+
|
69
|
+
if parent && entry[:parent].to_s != parent
|
70
|
+
entry[:data] = @type.compose(object, crutches)
|
71
|
+
[{delete: entry.except(:data).merge(parent: parent)}, {index: entry}]
|
72
|
+
elsif @fields.present?
|
73
|
+
return [] unless entry[:_id]
|
74
|
+
entry[:data] = {doc: @type.compose(object, crutches, fields: @fields)}
|
75
|
+
[{update: entry}]
|
76
|
+
else
|
77
|
+
entry[:data] = @type.compose(object, crutches)
|
78
|
+
[{index: entry}]
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def delete_entry(object)
|
83
|
+
entry = {}
|
84
|
+
entry[:_id] = entry_id(object)
|
85
|
+
entry[:_id] ||= object.as_json
|
86
|
+
|
87
|
+
return [] if entry[:_id].blank?
|
88
|
+
|
89
|
+
if parents
|
90
|
+
parent = entry[:_id].present? && parents[entry[:_id].to_s]
|
91
|
+
return [] unless parent
|
92
|
+
entry[:parent] = parent
|
93
|
+
end
|
94
|
+
|
95
|
+
[{delete: entry}]
|
96
|
+
end
|
97
|
+
|
98
|
+
def entry_id(object)
|
99
|
+
if type_root.id
|
100
|
+
type_root.compose_id(object)
|
101
|
+
else
|
102
|
+
id = object.id if object.respond_to?(:id)
|
103
|
+
id ||= object[:id] || object['id'] if object.is_a?(Hash)
|
104
|
+
id = id.to_s if defined?(BSON) && id.is_a?(BSON::ObjectId)
|
105
|
+
id
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def index_object_ids
|
110
|
+
@index_object_ids ||= @index.each_with_object({}) do |object, result|
|
111
|
+
id = entry_id(object)
|
112
|
+
result[object] = id if id.present?
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def type_root
|
117
|
+
@type_root ||= @type.root
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module Chewy
|
2
|
+
class Type
|
3
|
+
module Import
|
4
|
+
# Adds additional features to elasticsearch-api bulk method:
|
5
|
+
# * supports Chewy index suffix if necessary;
|
6
|
+
# * supports bulk_size, devides the passed body in chunks
|
7
|
+
# and peforms a separate request for each chunk;
|
8
|
+
# * returns only errored document entries from the response
|
9
|
+
# if any present.
|
10
|
+
#
|
11
|
+
# @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
|
12
|
+
class BulkRequest
|
13
|
+
# @param type [Chewy::Type] a type for the request
|
14
|
+
# @param suffix [String] an index name optional suffix
|
15
|
+
# @param bulk_size [Integer] bulk size in bytes
|
16
|
+
# @param bulk_options [Hash] options passed to the elasticsearch-api bulk method
|
17
|
+
def initialize(type, suffix: nil, bulk_size: nil, **bulk_options)
|
18
|
+
@type = type
|
19
|
+
@suffix = suffix
|
20
|
+
@bulk_size = bulk_size - 1.kilobyte if bulk_size # 1 kilobyte for request header and newlines
|
21
|
+
@bulk_options = bulk_options
|
22
|
+
|
23
|
+
raise ArgumentError, '`bulk_size` can\'t be less than 1 kilobyte' if @bulk_size && @bulk_size <= 0
|
24
|
+
end
|
25
|
+
|
26
|
+
# Performs a bulk request with the passed body, returns empty
|
27
|
+
# array if everything is fine and array filled with errored
|
28
|
+
# document entries if something went wrong.
|
29
|
+
#
|
30
|
+
# @param body [Array<Hash>] a standard bulk request body
|
31
|
+
# @return [Array<Hash>] an array of bulk errors
|
32
|
+
def perform(body)
|
33
|
+
return [] if body.blank?
|
34
|
+
|
35
|
+
request_bodies(body).each_with_object([]) do |request_body, results|
|
36
|
+
response = @type.client.bulk request_base.merge(body: request_body) if request_body.present?
|
37
|
+
|
38
|
+
next unless response.try(:[], 'errors')
|
39
|
+
|
40
|
+
response_items = (response.try(:[], 'items') || [])
|
41
|
+
.select { |item| item.values.first['error'] }
|
42
|
+
results.concat(response_items)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def request_base
|
49
|
+
@request_base ||= {
|
50
|
+
index: @type.index_name(suffix: @suffix),
|
51
|
+
type: @type.type_name
|
52
|
+
}.merge!(@bulk_options)
|
53
|
+
end
|
54
|
+
|
55
|
+
def request_bodies(body)
|
56
|
+
if @bulk_size
|
57
|
+
serializer = ::Elasticsearch::API.serializer
|
58
|
+
pieces = body.each_with_object(['']) do |piece, result|
|
59
|
+
operation, meta = piece.to_a.first
|
60
|
+
data = meta.delete(:data)
|
61
|
+
piece = serializer.dump(operation => meta)
|
62
|
+
piece << "\n" << serializer.dump(data) if data.present?
|
63
|
+
|
64
|
+
if result.last.bytesize + piece.bytesize > @bulk_size
|
65
|
+
result.push(piece)
|
66
|
+
else
|
67
|
+
result[-1].blank? ? (result[-1] = piece) : (result[-1] << "\n" << piece)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
pieces.each { |piece| piece << "\n" }
|
71
|
+
else
|
72
|
+
[body]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Chewy
|
2
|
+
class Type
|
3
|
+
module Import
|
4
|
+
class JournalBuilder
|
5
|
+
def initialize(type, index: [], delete: [])
|
6
|
+
@type = type
|
7
|
+
@index = index
|
8
|
+
@delete = delete
|
9
|
+
end
|
10
|
+
|
11
|
+
def bulk_body
|
12
|
+
Chewy::Type::Import::BulkBuilder.new(
|
13
|
+
Chewy::Stash::Journal::Journal,
|
14
|
+
index: [
|
15
|
+
entries(:index, @index),
|
16
|
+
entries(:delete, @delete)
|
17
|
+
].compact
|
18
|
+
).bulk_body.each do |item|
|
19
|
+
item.values.first.merge!(
|
20
|
+
_index: Chewy::Stash::Journal.index_name,
|
21
|
+
_type: Chewy::Stash::Journal::Journal.type_name
|
22
|
+
)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def entries(action, objects)
|
29
|
+
return unless objects.present?
|
30
|
+
{
|
31
|
+
index_name: @type.index.derivable_name,
|
32
|
+
type_name: @type.type_name,
|
33
|
+
action: action,
|
34
|
+
references: identify(objects).map { |item| Base64.encode64(::Elasticsearch::API.serializer.dump(item)) },
|
35
|
+
created_at: Time.now.utc
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
def identify(objects)
|
40
|
+
@type.adapter.identify(objects)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
module Chewy
|
2
|
+
class Type
|
3
|
+
module Import
|
4
|
+
# This class performs the import routine for the options and objects given.
|
5
|
+
#
|
6
|
+
# 0. Create target and journal indexes if needed.
|
7
|
+
# 1. Iterate over all the passed objects in batches.
|
8
|
+
# 2. For each batch {#process} method is called:
|
9
|
+
# * creates a bulk request body;
|
10
|
+
# * appends journal entries for the current batch to the request body;
|
11
|
+
# * prepends a leftovers bulk to the request body, which is calculated
|
12
|
+
# basing on the previous iteration errors;
|
13
|
+
# * performs the bulk request;
|
14
|
+
# * composes new leftovers bulk for the next iteration basing on the response errors if `update_failover` is true;
|
15
|
+
# * appends the rest of unfixable errors to the instance level errors array.
|
16
|
+
# 4. Perform the request for the last leftovers bulk if present using {#extract_leftovers}.
|
17
|
+
# 3. Return the result errors array.
|
18
|
+
#
|
19
|
+
# At the moment, it tries to restore only from the partial document update errors in cases
|
20
|
+
# when the document doesn't exist only if `update_failover` option is true. In order to
|
21
|
+
# restore, it indexes such an objects completely on the next iteration.
|
22
|
+
#
|
23
|
+
# @see Chewy::Type::Import::ClassMethods#import
|
24
|
+
class Routine
|
25
|
+
BULK_OPTIONS = %i[
|
26
|
+
suffix bulk_size
|
27
|
+
refresh timeout fields pipeline
|
28
|
+
consistency replication
|
29
|
+
wait_for_active_shards routing _source _source_exclude _source_include
|
30
|
+
].freeze
|
31
|
+
|
32
|
+
DEFAULT_OPTIONS = {
|
33
|
+
refresh: true,
|
34
|
+
update_fields: [],
|
35
|
+
update_failover: true,
|
36
|
+
batch_size: Chewy::Type::Adapter::Base::BATCH_SIZE
|
37
|
+
}.freeze
|
38
|
+
|
39
|
+
attr_reader :options, :parallel_options, :errors, :stats, :leftovers
|
40
|
+
|
41
|
+
# Basically, processes passed options, extracting bulk request specific options.
|
42
|
+
# @param type [Chewy::Type] chewy type
|
43
|
+
# @param options [Hash] import options, see {Chewy::Type::Import::ClassMethods#import}
|
44
|
+
def initialize(type, **options)
|
45
|
+
@type = type
|
46
|
+
@options = options
|
47
|
+
@options.reverse_merge!(@type._default_import_options)
|
48
|
+
@options.reverse_merge!(journal: Chewy.configuration[:journal])
|
49
|
+
@options.reverse_merge!(DEFAULT_OPTIONS)
|
50
|
+
@bulk_options = @options.slice(*BULK_OPTIONS)
|
51
|
+
@parallel_options = @options.delete(:parallel)
|
52
|
+
if @parallel_options && !@parallel_options.is_a?(Hash)
|
53
|
+
@parallel_options = if @parallel_options.is_a?(Integer)
|
54
|
+
{in_processes: @parallel_options}
|
55
|
+
else
|
56
|
+
{}
|
57
|
+
end
|
58
|
+
end
|
59
|
+
@errors = []
|
60
|
+
@stats = {}
|
61
|
+
@leftovers = []
|
62
|
+
end
|
63
|
+
|
64
|
+
# Creates the journal index and the type corresponding index if necessary.
|
65
|
+
# @return [Object] whatever
|
66
|
+
def create_indexes!
|
67
|
+
Chewy::Stash::Journal.create if @options[:journal]
|
68
|
+
return if Chewy.configuration[:skip_index_creation_on_import]
|
69
|
+
@type.index.create!(**@bulk_options.slice(:suffix)) unless @type.index.exists?
|
70
|
+
end
|
71
|
+
|
72
|
+
# The main process method. Converts passed objects to thr bulk request body,
|
73
|
+
# appends journal entires, performs this request and handles errors performing
|
74
|
+
# failover procedures if applicable.
|
75
|
+
#
|
76
|
+
# @param index [Array<Object>] any acceptable objects for indexing
|
77
|
+
# @param delete [Array<Object>] any acceptable objects for deleting
|
78
|
+
# @return [true, false] the result of the request, true if no errors
|
79
|
+
def process(index: [], delete: [])
|
80
|
+
bulk_builder = BulkBuilder.new(@type, index: index, delete: delete, fields: @options[:update_fields])
|
81
|
+
bulk_body = bulk_builder.bulk_body
|
82
|
+
|
83
|
+
if @options[:journal]
|
84
|
+
journal_builder = JournalBuilder.new(@type, index: index, delete: delete)
|
85
|
+
bulk_body.concat(journal_builder.bulk_body)
|
86
|
+
end
|
87
|
+
|
88
|
+
bulk_body.unshift(*flush_leftovers)
|
89
|
+
|
90
|
+
perform_bulk(bulk_body) do |response|
|
91
|
+
@leftovers = extract_leftovers(response, bulk_builder.index_objects_by_id)
|
92
|
+
@stats[:index] = @stats[:index].to_i + index.count if index.present?
|
93
|
+
@stats[:delete] = @stats[:delete].to_i + delete.count if delete.present?
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Performs a bulk request for the passed body.
|
98
|
+
#
|
99
|
+
# @param body [Array<Hash>] a standard bulk request body
|
100
|
+
# @return [true, false] the result of the request, true if no errors
|
101
|
+
def perform_bulk(body)
|
102
|
+
response = bulk.perform(body)
|
103
|
+
yield response if block_given?
|
104
|
+
Chewy.wait_for_status
|
105
|
+
@errors.concat(response)
|
106
|
+
response.blank?
|
107
|
+
end
|
108
|
+
|
109
|
+
private
|
110
|
+
|
111
|
+
def flush_leftovers
|
112
|
+
leftovers = @leftovers
|
113
|
+
@leftovers = []
|
114
|
+
leftovers
|
115
|
+
end
|
116
|
+
|
117
|
+
def extract_leftovers(errors, index_objects_by_id)
|
118
|
+
return [] unless @options[:update_fields].present? && @options[:update_failover] && errors.present?
|
119
|
+
|
120
|
+
failed_partial_updates = errors.select do |item|
|
121
|
+
item.keys.first == 'update' && item.values.first['error']['type'] == 'document_missing_exception'
|
122
|
+
end
|
123
|
+
failed_ids_hash = failed_partial_updates.index_by { |item| item.values.first['_id'].to_s }
|
124
|
+
failed_ids_for_reimport = failed_ids_hash.keys & index_objects_by_id.keys
|
125
|
+
errors_to_cleanup = failed_ids_hash.values_at(*failed_ids_for_reimport)
|
126
|
+
errors_to_cleanup.each { |error| errors.delete(error) }
|
127
|
+
|
128
|
+
failed_objects = index_objects_by_id.values_at(*failed_ids_for_reimport)
|
129
|
+
BulkBuilder.new(@type, index: failed_objects).bulk_body
|
130
|
+
end
|
131
|
+
|
132
|
+
def bulk
|
133
|
+
@bulk ||= BulkRequest.new(@type, **@bulk_options)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
data/lib/chewy/type/mapping.rb
CHANGED
@@ -8,6 +8,8 @@ module Chewy
|
|
8
8
|
class_attribute :_templates
|
9
9
|
class_attribute :_agg_defs
|
10
10
|
self._agg_defs = {}
|
11
|
+
class_attribute :outdated_sync_field
|
12
|
+
self.outdated_sync_field = :updated_at
|
11
13
|
end
|
12
14
|
|
13
15
|
module ClassMethods
|
@@ -15,10 +17,11 @@ module Chewy
|
|
15
17
|
# definition. Use it only if you need to pass options for root
|
16
18
|
# object mapping, such as `date_detection` or `dynamic_date_formats`
|
17
19
|
#
|
20
|
+
# @example
|
18
21
|
# class UsersIndex < Chewy::Index
|
19
22
|
# define_type User do
|
20
23
|
# # root object defined implicitly and optionless for current type
|
21
|
-
# field :full_name, type: '
|
24
|
+
# field :full_name, type: 'keyword'
|
22
25
|
# end
|
23
26
|
# end
|
24
27
|
#
|
@@ -26,32 +29,37 @@ module Chewy
|
|
26
29
|
# define_type Car do
|
27
30
|
# # explicit root definition with additional options
|
28
31
|
# root dynamic_date_formats: ['yyyy-MM-dd'] do
|
29
|
-
# field :model_name, type: '
|
32
|
+
# field :model_name, type: 'keyword'
|
30
33
|
# end
|
31
34
|
# end
|
32
35
|
# end
|
33
36
|
#
|
34
|
-
def root(options
|
35
|
-
|
36
|
-
|
37
|
+
def root(**options)
|
38
|
+
self.root_object ||= Chewy::Fields::Root.new(type_name, **Chewy.default_root_options.merge(options))
|
39
|
+
root_object.update_options!(**options)
|
40
|
+
yield if block_given?
|
41
|
+
root_object
|
37
42
|
end
|
38
43
|
|
39
44
|
# Defines mapping field for current type
|
40
45
|
#
|
46
|
+
# @example
|
41
47
|
# class UsersIndex < Chewy::Index
|
42
48
|
# define_type User do
|
43
49
|
# # passing all the options to field definition:
|
44
|
-
# field :full_name,
|
50
|
+
# field :full_name, analyzer: 'special'
|
45
51
|
# end
|
46
52
|
# end
|
47
53
|
#
|
48
54
|
# The `type` is optional and defaults to `string` if not defined:
|
49
55
|
#
|
56
|
+
# @example
|
50
57
|
# field :full_name
|
51
58
|
#
|
52
59
|
# Also, multiple fields might be defined with one call and
|
53
60
|
# with the same options:
|
54
61
|
#
|
62
|
+
# @example
|
55
63
|
# field :first_name, :last_name, analyzer: 'special'
|
56
64
|
#
|
57
65
|
# The only special option in the field definition
|
@@ -59,31 +67,35 @@ module Chewy
|
|
59
67
|
# method will be called for the indexed object. Also
|
60
68
|
# `:value` might be a proc or indexed object method name:
|
61
69
|
#
|
70
|
+
# @example
|
62
71
|
# class User < ActiveRecord::Base
|
63
72
|
# def user_full_name
|
64
73
|
# [first_name, last_name].join(' ')
|
65
74
|
# end
|
66
75
|
# end
|
67
76
|
#
|
68
|
-
# field :full_name, type: '
|
77
|
+
# field :full_name, type: 'keyword', value: :user_full_name
|
69
78
|
#
|
70
79
|
# The proc evaluates inside the indexed object context if
|
71
80
|
# its arity is 0 and in present contexts if there is an argument:
|
72
81
|
#
|
73
|
-
#
|
82
|
+
# @example
|
83
|
+
# field :full_name, type: 'keyword', value: -> { [first_name, last_name].join(' ') }
|
74
84
|
#
|
75
85
|
# separator = ' '
|
76
|
-
# field :full_name, type: '
|
86
|
+
# field :full_name, type: 'keyword', value: ->(user) { [user.first_name, user.last_name].join(separator) }
|
77
87
|
#
|
78
88
|
# If array was returned as value - it will be put in index as well.
|
79
89
|
#
|
80
|
-
#
|
90
|
+
# @example
|
91
|
+
# field :tags, type: 'keyword', value: -> { tags.map(&:name) }
|
81
92
|
#
|
82
93
|
# Fields supports nesting in case of `object` field type. If
|
83
94
|
# `user.quiz` will return an array of objects, then result index content
|
84
95
|
# will be an array of hashes, if `user.quiz` is not a collection association
|
85
96
|
# then just values hash will be put in the index.
|
86
97
|
#
|
98
|
+
# @example
|
87
99
|
# field :quiz do
|
88
100
|
# field :question, :answer
|
89
101
|
# field :score, type: 'integer'
|
@@ -91,6 +103,7 @@ module Chewy
|
|
91
103
|
#
|
92
104
|
# Nested fields are composed from nested objects:
|
93
105
|
#
|
106
|
+
# @example
|
94
107
|
# field :name, value: -> { name_translations } do
|
95
108
|
# field :ru, value: ->(name) { name['ru'] }
|
96
109
|
# field :en, value: ->(name) { name['en'] }
|
@@ -99,32 +112,31 @@ module Chewy
|
|
99
112
|
# Of course it is possible to define object fields contents dynamically
|
100
113
|
# but make sure evaluation proc returns hash:
|
101
114
|
#
|
115
|
+
# @example
|
102
116
|
# field :name, type: 'object', value: -> { name_translations }
|
103
117
|
#
|
104
118
|
# The special case is multi_field. If type options and block are
|
105
119
|
# both present field is treated as a multi-field. In that case field
|
106
120
|
# composition changes satisfy elasticsearch rules:
|
107
121
|
#
|
108
|
-
#
|
122
|
+
# @example
|
123
|
+
# field :full_name, type: 'text', analyzer: 'name', value: ->{ full_name.try(:strip) } do
|
109
124
|
# field :sorted, analyzer: 'sorted'
|
110
125
|
# end
|
111
126
|
#
|
112
|
-
def field(*args, &block)
|
113
|
-
options = args.extract_options!
|
114
|
-
build_root
|
115
|
-
|
127
|
+
def field(*args, **options, &block)
|
116
128
|
if args.size > 1
|
117
|
-
args.map { |name| field(name, options) }
|
129
|
+
args.map { |name| field(name, **options) }
|
118
130
|
else
|
119
|
-
expand_nested(Chewy::Fields::Base.new(args.first, options), &block)
|
131
|
+
expand_nested(Chewy::Fields::Base.new(args.first, **options), &block)
|
120
132
|
end
|
121
133
|
end
|
122
134
|
|
123
135
|
# Defines an aggregation that can be bound to a query or filter
|
124
136
|
#
|
125
|
-
#
|
126
|
-
#
|
127
|
-
#
|
137
|
+
# @example
|
138
|
+
# # Suppose that a user has posts and each post has ratings
|
139
|
+
# # avg_post_rating is the mean of all ratings
|
128
140
|
# class UsersIndex < Chewy::Index
|
129
141
|
# define_type User do
|
130
142
|
# field :posts do
|
@@ -137,64 +149,68 @@ module Chewy
|
|
137
149
|
# end
|
138
150
|
# end
|
139
151
|
def agg(name, &block)
|
140
|
-
build_root
|
141
152
|
self._agg_defs = _agg_defs.merge(name => block)
|
142
153
|
end
|
143
154
|
alias_method :aggregation, :agg
|
144
155
|
|
145
156
|
# Defines dynamic template in mapping root objects
|
146
157
|
#
|
158
|
+
# @example
|
147
159
|
# class CarsIndex < Chewy::Index
|
148
160
|
# define_type Car do
|
149
|
-
# template 'model.*', type: '
|
161
|
+
# template 'model.*', type: 'text', analyzer: 'special'
|
150
162
|
# field 'model', type: 'object' # here we can put { de: 'Der Mercedes', en: 'Mercedes' }
|
151
163
|
# # and template will be applyed to this field
|
152
164
|
# end
|
153
165
|
# end
|
154
166
|
#
|
155
167
|
# Name for each template is generated with the following
|
156
|
-
# rule:
|
168
|
+
# rule: `template_#!{dynamic_templates.size + 1}`.
|
157
169
|
#
|
170
|
+
# @example Templates
|
158
171
|
# template 'tit*', mapping_hash
|
159
172
|
# template 'title.*', mapping_hash # dot in template causes "path_match" using
|
160
173
|
# template /tit.+/, mapping_hash # using "match_pattern": "regexp"
|
161
174
|
# template /title\..+/, mapping_hash # "\." - escaped dot causes "path_match" using
|
162
|
-
# template /tit.+/, '
|
175
|
+
# template /tit.+/, type: 'text', mapping_hash # "match_mapping_type" as the optionsl second argument
|
163
176
|
# template template42: {match: 'hello*', mapping: {type: 'object'}} # or even pass a template as is
|
164
177
|
#
|
165
178
|
def template(*args)
|
166
|
-
|
179
|
+
root.dynamic_template(*args)
|
167
180
|
end
|
168
181
|
alias_method :dynamic_template, :template
|
169
182
|
|
170
183
|
# Returns compiled mappings hash for current type
|
171
184
|
#
|
172
185
|
def mappings_hash
|
173
|
-
|
186
|
+
root.mappings_hash[type_name.to_sym].present? ? root.mappings_hash : {}
|
187
|
+
end
|
188
|
+
|
189
|
+
# Check whether the type has outdated_sync_field defined with a simple value.
|
190
|
+
#
|
191
|
+
# @return [true, false]
|
192
|
+
def supports_outdated_sync?
|
193
|
+
updated_at_field = root.child_hash[outdated_sync_field] if outdated_sync_field
|
194
|
+
!!updated_at_field && updated_at_field.value.nil?
|
174
195
|
end
|
175
196
|
|
176
197
|
private
|
177
198
|
|
178
|
-
def expand_nested(field
|
199
|
+
def expand_nested(field)
|
200
|
+
@_current_field ||= root
|
201
|
+
|
179
202
|
if @_current_field
|
180
203
|
field.parent = @_current_field
|
181
204
|
@_current_field.children.push(field)
|
182
205
|
end
|
183
206
|
|
184
|
-
return unless
|
207
|
+
return unless block_given?
|
185
208
|
|
186
209
|
previous_field = @_current_field
|
187
210
|
@_current_field = field
|
188
211
|
yield
|
189
212
|
@_current_field = previous_field
|
190
213
|
end
|
191
|
-
|
192
|
-
def build_root(options = {}, &block)
|
193
|
-
return root_object if root_object
|
194
|
-
self.root_object = Chewy::Fields::Root.new(type_name, options)
|
195
|
-
expand_nested(root_object, &block)
|
196
|
-
@_current_field = root_object
|
197
|
-
end
|
198
214
|
end
|
199
215
|
end
|
200
216
|
end
|