esse 0.2.6 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/esse/cli/index.rb +12 -1
- data/lib/esse/core.rb +2 -0
- data/lib/esse/document.rb +38 -6
- data/lib/esse/document_lazy_attribute.rb +18 -0
- data/lib/esse/import/bulk.rb +26 -5
- data/lib/esse/import/request_body.rb +6 -1
- data/lib/esse/index/documents.rb +25 -7
- data/lib/esse/index/indices.rb +22 -7
- data/lib/esse/lazy_document_header.rb +73 -0
- data/lib/esse/primitives/array_utils.rb +17 -0
- data/lib/esse/primitives.rb +1 -0
- data/lib/esse/repository/documents.rb +37 -0
- data/lib/esse/repository/lazy_document_attributes.rb +66 -0
- data/lib/esse/repository/object_document_mapper.rb +67 -55
- data/lib/esse/repository.rb +1 -0
- data/lib/esse/version.rb +1 -1
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 46abc9b3347ca852c1f270910e21582c8f17186b554d1a7f667bafee8f84a152
|
4
|
+
data.tar.gz: baf633b7147e8cf48c704c15fc5f4a2a52c8d08f671f05e6c5577f3d92ef2901
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 129c4475225760da6620d990a1065795d6c7f1248b01745105715b8d01f4c11c500b253701f573eac5071abeeeaed83e6be1c5bf8f22b5f2fab819587c96c595
|
7
|
+
data.tar.gz: 93f66e7d04a3857c3daf9b6daaa72a39e6a3027b80e464e53a97325f74666324451f93a914d9daa31864a77a93c7fb37735e12395536f395133135042a752f21
|
data/lib/esse/cli/index.rb
CHANGED
@@ -88,9 +88,20 @@ module Esse
|
|
88
88
|
option :suffix, type: :string, default: nil, aliases: '-s', desc: 'Suffix to append to index name'
|
89
89
|
option :context, type: :hash, default: {}, required: true, desc: 'List of options to pass to the index class'
|
90
90
|
option :repo, type: :string, default: nil, alias: '-r', desc: 'Repository to use for import'
|
91
|
+
option :eager_include_document_attributes, type: :string, default: nil, desc: 'Comma separated list of lazy document attributes to include to the bulk index request'
|
92
|
+
option :lazy_update_document_attributes, type: :string, default: nil, desc: 'Comma separated list of lazy document attributes to bulk update after the bulk index request'
|
91
93
|
def import(*index_classes)
|
92
94
|
require_relative 'index/import'
|
93
|
-
|
95
|
+
opts = HashUtils.deep_transform_keys(options.to_h, &:to_sym)
|
96
|
+
opts.delete(:lazy_update_document_attributes) if opts[:lazy_update_document_attributes] == 'false'
|
97
|
+
opts.delete(:eager_include_document_attributes) if opts[:eager_include_document_attributes] == 'false'
|
98
|
+
if (val = opts[:eager_include_document_attributes])
|
99
|
+
opts[:eager_include_document_attributes] = (val == 'true') ? true : val.split(',')
|
100
|
+
end
|
101
|
+
if (val = opts[:lazy_update_document_attributes])
|
102
|
+
opts[:lazy_update_document_attributes] = (val == 'true') ? true : val.split(',')
|
103
|
+
end
|
104
|
+
Import.new(indices: index_classes, **opts).run
|
94
105
|
end
|
95
106
|
end
|
96
107
|
end
|
data/lib/esse/core.rb
CHANGED
@@ -6,6 +6,8 @@ module Esse
|
|
6
6
|
require_relative 'primitives'
|
7
7
|
require_relative 'collection'
|
8
8
|
require_relative 'document'
|
9
|
+
require_relative 'document_lazy_attribute'
|
10
|
+
require_relative 'lazy_document_header'
|
9
11
|
require_relative 'hash_document'
|
10
12
|
require_relative 'null_document'
|
11
13
|
require_relative 'repository'
|
data/lib/esse/document.rb
CHANGED
@@ -56,7 +56,7 @@ module Esse
|
|
56
56
|
|
57
57
|
# @return [Hash] the document data
|
58
58
|
def to_h
|
59
|
-
|
59
|
+
mutated_source.merge(
|
60
60
|
_id: id,
|
61
61
|
).tap do |hash|
|
62
62
|
hash[:_type] = type if type
|
@@ -65,11 +65,13 @@ module Esse
|
|
65
65
|
end
|
66
66
|
end
|
67
67
|
|
68
|
-
def to_bulk(data: true)
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
68
|
+
def to_bulk(data: true, operation: nil)
|
69
|
+
doc_header.tap do |h|
|
70
|
+
if data && operation == :update
|
71
|
+
h[:data] = { doc: mutated_source }
|
72
|
+
elsif data
|
73
|
+
h[:data] = mutated_source
|
74
|
+
end
|
73
75
|
h.merge!(meta)
|
74
76
|
end
|
75
77
|
end
|
@@ -87,5 +89,35 @@ module Esse
|
|
87
89
|
id == other.id && type == other.type && routing == other.routing && meta == other.meta && source == other.source
|
88
90
|
)
|
89
91
|
end
|
92
|
+
|
93
|
+
def doc_header
|
94
|
+
{ _id: id }.tap do |h|
|
95
|
+
h[:_type] = type if type
|
96
|
+
h[:routing] = routing if routing?
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def inspect
|
101
|
+
attributes = %i[id routing source].map do |attr|
|
102
|
+
value = send(attr)
|
103
|
+
"#{attr}: #{value.inspect}" if value
|
104
|
+
end.compact.join(', ')
|
105
|
+
attributes << " mutations: #{@__mutations__.inspect}" if @__mutations__
|
106
|
+
"#<#{self.class.name || 'Esse::Document'} #{attributes}>"
|
107
|
+
end
|
108
|
+
|
109
|
+
def mutate(key)
|
110
|
+
@__mutations__ ||= {}
|
111
|
+
@__mutations__[key] = yield
|
112
|
+
instance_variable_set(:@__mutated_source__, nil)
|
113
|
+
end
|
114
|
+
|
115
|
+
protected
|
116
|
+
|
117
|
+
def mutated_source
|
118
|
+
return source unless @__mutations__
|
119
|
+
|
120
|
+
@__mutated_source__ ||= source.merge(@__mutations__)
|
121
|
+
end
|
90
122
|
end
|
91
123
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Esse
|
4
|
+
class DocumentLazyAttribute
|
5
|
+
attr_reader :options
|
6
|
+
|
7
|
+
def initialize(**kwargs)
|
8
|
+
@options = kwargs
|
9
|
+
end
|
10
|
+
|
11
|
+
# Returns an Hash with the document ID as key and attribute data as value.
|
12
|
+
# @param doc_headers [Array<Esse::LazyDocumentHeader>] the document headers
|
13
|
+
# @return [Hash] An Hash with the instance of document header as key and the attribute data as value.
|
14
|
+
def call(doc_headers)
|
15
|
+
raise NotImplementedError, 'Override this method to return the document attribute data'
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/esse/import/bulk.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Esse
|
2
2
|
module Import
|
3
3
|
class Bulk
|
4
|
-
def initialize(type: nil, index: nil, delete: nil, create: nil)
|
4
|
+
def initialize(type: nil, index: nil, delete: nil, create: nil, update: nil)
|
5
5
|
@index = Array(index).select(&method(:valid_doc?)).reject(&:ignore_on_index?).map do |doc|
|
6
6
|
value = doc.to_bulk
|
7
7
|
value[:_type] ||= type if type
|
@@ -12,6 +12,11 @@ module Esse
|
|
12
12
|
value[:_type] ||= type if type
|
13
13
|
{ create: value }
|
14
14
|
end
|
15
|
+
@update = Array(update).select(&method(:valid_doc?)).reject(&:ignore_on_index?).map do |doc|
|
16
|
+
value = doc.to_bulk(operation: :update)
|
17
|
+
value[:_type] ||= type if type
|
18
|
+
{ update: value }
|
19
|
+
end
|
15
20
|
@delete = Array(delete).select(&method(:valid_doc?)).reject(&:ignore_on_delete?).map do |doc|
|
16
21
|
value = doc.to_bulk(data: false)
|
17
22
|
value[:_type] ||= type if type
|
@@ -22,12 +27,12 @@ module Esse
|
|
22
27
|
# Return an array of RequestBody instances
|
23
28
|
#
|
24
29
|
# In case of timeout error, will retry with an exponential backoff using the following formula:
|
25
|
-
# wait_interval = (retry_count**4) + 15 + (rand(10) * (retry_count + 1)) seconds. It will retry up to max_retries times that is default
|
30
|
+
# wait_interval = (retry_count**4) + 15 + (rand(10) * (retry_count + 1)) seconds. It will retry up to max_retries times that is default 4.
|
26
31
|
#
|
27
32
|
# Too large bulk requests will be split into multiple requests with only one attempt.
|
28
33
|
#
|
29
34
|
# @yield [RequestBody] A request body instance
|
30
|
-
def each_request(max_retries:
|
35
|
+
def each_request(max_retries: 4, last_retry_in_small_chunks: true)
|
31
36
|
# @TODO create indexes when by checking all the index suffixes (if mapping is not empty)
|
32
37
|
requests = [optimistic_request]
|
33
38
|
retry_count = 0
|
@@ -43,6 +48,8 @@ module Esse
|
|
43
48
|
rescue Faraday::TimeoutError, Esse::Transport::RequestTimeoutError => e
|
44
49
|
retry_count += 1
|
45
50
|
raise Esse::Transport::RequestTimeoutError.new(e.message) if retry_count >= max_retries
|
51
|
+
# Timeout error may be caused by a too large request, so we split the requests in small chunks as a last attempt
|
52
|
+
requests = requests_in_small_chunks if last_retry_in_small_chunks && max_retries > 2 && retry_count == max_retries - 2
|
46
53
|
wait_interval = (retry_count**4) + 15 + (rand(10) * (retry_count + 1))
|
47
54
|
Esse.logger.warn "Timeout error, retrying in #{wait_interval} seconds"
|
48
55
|
sleep(wait_interval)
|
@@ -67,16 +74,30 @@ module Esse
|
|
67
74
|
|
68
75
|
def optimistic_request
|
69
76
|
request = Import::RequestBodyAsJson.new
|
70
|
-
request.delete = @delete
|
71
77
|
request.create = @create
|
72
78
|
request.index = @index
|
79
|
+
request.update = @update
|
80
|
+
request.delete = @delete
|
73
81
|
request
|
74
82
|
end
|
75
83
|
|
84
|
+
def requests_in_small_chunks(chunk_size: 1)
|
85
|
+
arr = []
|
86
|
+
@create.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.create = slice } }
|
87
|
+
@index.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.index = slice } }
|
88
|
+
@update.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.update = slice } }
|
89
|
+
@delete.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.delete = slice } }
|
90
|
+
Esse.logger.warn <<~MSG
|
91
|
+
Retrying the last request in small chunks of #{chunk_size} documents.
|
92
|
+
This is a last resort to avoid timeout errors, consider increasing the bulk size or reducing the batch size.
|
93
|
+
MSG
|
94
|
+
arr
|
95
|
+
end
|
96
|
+
|
76
97
|
# @return [Array<RequestBody>]
|
77
98
|
def balance_requests_size(err)
|
78
99
|
if (bulk_size = err.message.scan(/exceeded.(\d+).bytes/).dig(0, 0).to_i) > 0
|
79
|
-
requests = (@
|
100
|
+
requests = (@create + @index + @update + @delete).each_with_object([Import::RequestBodyRaw.new]) do |as_json, result|
|
80
101
|
operation, meta = as_json.to_a.first
|
81
102
|
meta = meta.dup
|
82
103
|
data = meta.delete(:data)
|
@@ -5,7 +5,7 @@ module Esse
|
|
5
5
|
|
6
6
|
def initialize(body:)
|
7
7
|
@body = body # body may be String or Array<Hash>
|
8
|
-
@stats = { index: 0, create: 0, delete: 0 }
|
8
|
+
@stats = { index: 0, create: 0, delete: 0, update: 0 }
|
9
9
|
end
|
10
10
|
|
11
11
|
def body?
|
@@ -46,6 +46,11 @@ module Esse
|
|
46
46
|
@stats[:index] += docs.size
|
47
47
|
end
|
48
48
|
|
49
|
+
def update=(docs)
|
50
|
+
@body += docs
|
51
|
+
@stats[:update] += docs.size
|
52
|
+
end
|
53
|
+
|
49
54
|
def create=(docs)
|
50
55
|
@body += docs
|
51
56
|
@stats[:create] += docs.size
|
data/lib/esse/index/documents.rb
CHANGED
@@ -164,7 +164,7 @@ module Esse
|
|
164
164
|
# @see https://www.elastic.co/guide/en/elasticsearch/reference/7.5/docs-bulk.html
|
165
165
|
# @see https://github.com/elastic/elasticsearch-ruby/blob/main/elasticsearch-api/lib/elasticsearch/api/utils.rb
|
166
166
|
# @see https://github.com/elastic/elasticsearch-ruby/blob/main/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
|
167
|
-
def bulk(
|
167
|
+
def bulk(create: nil, delete: nil, index: nil, update: nil, type: nil, suffix: nil, **options)
|
168
168
|
definition = {
|
169
169
|
index: index_name(suffix: suffix),
|
170
170
|
type: type,
|
@@ -174,9 +174,10 @@ module Esse
|
|
174
174
|
# @TODO Wrap the return in a some other Stats object with more information
|
175
175
|
Esse::Import::Bulk.new(
|
176
176
|
**definition.slice(:type),
|
177
|
-
index: index,
|
178
|
-
delete: delete,
|
179
177
|
create: create,
|
178
|
+
delete: delete,
|
179
|
+
index: index,
|
180
|
+
update: update,
|
180
181
|
).each_request do |request_body|
|
181
182
|
cluster.api.bulk(**definition, body: request_body.body) do |event_payload|
|
182
183
|
event_payload[:body_stats] = request_body.stats
|
@@ -198,20 +199,37 @@ module Esse
|
|
198
199
|
# @option [Hash] :context The collection context. This value will be passed as argument to the collection
|
199
200
|
# May be SQL condition or any other filter you have defined on the collection.
|
200
201
|
# @return [Numeric] The number of documents imported
|
201
|
-
def import(*repo_types, context: {}, suffix: nil, **options)
|
202
|
+
def import(*repo_types, context: {}, eager_include_document_attributes: false, lazy_update_document_attributes: false, suffix: nil, **options)
|
202
203
|
repo_types = repo_hash.keys if repo_types.empty?
|
203
204
|
count = 0
|
205
|
+
|
204
206
|
repo_hash.slice(*repo_types).each do |repo_name, repo|
|
205
|
-
|
207
|
+
doc_attrs = {eager: [], lazy: []}
|
208
|
+
doc_attrs[:eager] = repo.lazy_document_attribute_names(eager_include_document_attributes)
|
209
|
+
doc_attrs[:lazy] = repo.lazy_document_attribute_names(lazy_update_document_attributes)
|
210
|
+
doc_attrs[:lazy] -= doc_attrs[:eager]
|
211
|
+
|
212
|
+
context ||= {}
|
213
|
+
context[:lazy_attributes] = doc_attrs[:eager] if doc_attrs[:eager].any?
|
214
|
+
repo.each_serialized_batch(**context) do |batch|
|
206
215
|
# Elasticsearch 6.x and older have multiple types per index.
|
207
216
|
# This gem supports multiple types per index for backward compatibility, but we recommend to update
|
208
217
|
# your elasticsearch to a at least 7.x version and use a single type per index.
|
209
218
|
#
|
210
219
|
# Note that the repository name will be used as the document type.
|
211
220
|
# mapping_default_type
|
212
|
-
kwargs = {
|
221
|
+
kwargs = { suffix: suffix, type: repo_name, **options }
|
213
222
|
cluster.may_update_type!(kwargs)
|
214
|
-
|
223
|
+
|
224
|
+
bulk(**kwargs, index: batch)
|
225
|
+
|
226
|
+
doc_attrs[:lazy].each do |attr_name|
|
227
|
+
partial_docs = repo.documents_for_lazy_attribute(attr_name, batch.reject(&:ignore_on_index?))
|
228
|
+
next if partial_docs.empty?
|
229
|
+
|
230
|
+
bulk(**kwargs, update: partial_docs)
|
231
|
+
end
|
232
|
+
|
215
233
|
count += batch.size
|
216
234
|
end
|
217
235
|
end
|
data/lib/esse/index/indices.rb
CHANGED
@@ -26,10 +26,10 @@ module Esse
|
|
26
26
|
#
|
27
27
|
# @see http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
|
28
28
|
# @see Esse::Transport#create_index
|
29
|
-
def create_index(suffix: nil, **options)
|
29
|
+
def create_index(suffix: nil, body: nil, **options)
|
30
30
|
options = CREATE_INDEX_RESERVED_KEYWORDS.merge(options)
|
31
31
|
name = build_real_index_name(suffix)
|
32
|
-
definition = [settings_hash, mappings_hash].reduce(&:merge)
|
32
|
+
definition = body || [settings_hash, mappings_hash].reduce(&:merge)
|
33
33
|
|
34
34
|
if options.delete(:alias) && name != index_name
|
35
35
|
definition[:aliases] = { index_name => {} }
|
@@ -48,13 +48,23 @@ module Esse
|
|
48
48
|
# @return [Hash] the elasticsearch response
|
49
49
|
#
|
50
50
|
# @see https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-open-close.html
|
51
|
-
def reset_index(suffix: index_suffix, import: true, reindex: false, **options)
|
51
|
+
def reset_index(suffix: index_suffix, optimize: true, import: true, reindex: false, **options)
|
52
52
|
cluster.throw_error_when_readonly!
|
53
|
-
|
53
|
+
|
54
54
|
suffix ||= Esse.timestamp
|
55
|
-
suffix = Esse.timestamp while index_exist?(suffix: suffix)
|
55
|
+
suffix = Esse.timestamp while index_exist?(suffix: suffix)
|
56
|
+
|
57
|
+
if optimize
|
58
|
+
definition = [settings_hash, mappings_hash].reduce(&:merge)
|
59
|
+
number_of_replicas = definition.dig(Esse::SETTING_ROOT_KEY, :index, :number_of_replicas)
|
60
|
+
refresh_interval = definition.dig(Esse::SETTING_ROOT_KEY, :index, :refresh_interval)
|
61
|
+
new_number_of_replicas = ((definition[Esse::SETTING_ROOT_KEY] ||= {})[:index] ||= {})[:number_of_replicas] = 0
|
62
|
+
new_refresh_interval = ((definition[Esse::SETTING_ROOT_KEY] ||= {})[:index] ||= {})[:refresh_interval] = '-1'
|
63
|
+
create_index(**options, suffix: suffix, alias: false, body: definition)
|
64
|
+
else
|
65
|
+
create_index(**options, suffix: suffix, alias: false)
|
66
|
+
end
|
56
67
|
|
57
|
-
create_index(**options, suffix: suffix, alias: false)
|
58
68
|
if index_exist? && aliases.none?
|
59
69
|
cluster.api.delete_index(index: index_name)
|
60
70
|
end
|
@@ -63,8 +73,13 @@ module Esse
|
|
63
73
|
elsif reindex && (_from = indices_pointing_to_alias).any?
|
64
74
|
# @TODO: Reindex using the reindex API
|
65
75
|
end
|
76
|
+
|
77
|
+
if optimize && number_of_replicas != new_number_of_replicas || refresh_interval != new_refresh_interval
|
78
|
+
update_settings(suffix: suffix)
|
79
|
+
end
|
80
|
+
|
66
81
|
update_aliases(suffix: suffix)
|
67
|
-
|
82
|
+
|
68
83
|
true
|
69
84
|
end
|
70
85
|
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Esse
|
4
|
+
class LazyDocumentHeader
|
5
|
+
def self.coerce_each(values)
|
6
|
+
arr = []
|
7
|
+
Esse::ArrayUtils.wrap(values).map do |value|
|
8
|
+
instance = coerce(value)
|
9
|
+
arr << instance if instance&.valid?
|
10
|
+
end
|
11
|
+
arr
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.coerce(value)
|
15
|
+
return unless value
|
16
|
+
|
17
|
+
if value.is_a?(Esse::LazyDocumentHeader)
|
18
|
+
value
|
19
|
+
elsif value.is_a?(Esse::Document)
|
20
|
+
new(value.doc_header)
|
21
|
+
elsif value.is_a?(Hash)
|
22
|
+
resp = value.transform_keys do |key|
|
23
|
+
case key
|
24
|
+
when :_id, :id, '_id', 'id'
|
25
|
+
:_id
|
26
|
+
when :_routing, :routing, '_routing', 'routing'
|
27
|
+
:routing
|
28
|
+
when :_type, :type, '_type', 'type'
|
29
|
+
:_type
|
30
|
+
else
|
31
|
+
key.to_sym
|
32
|
+
end
|
33
|
+
end
|
34
|
+
new(resp)
|
35
|
+
elsif String === value || Integer === value
|
36
|
+
new(_id: value)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def initialize(attributes)
|
41
|
+
@attributes = attributes
|
42
|
+
end
|
43
|
+
|
44
|
+
def valid?
|
45
|
+
!@attributes[:_id].nil?
|
46
|
+
end
|
47
|
+
|
48
|
+
def to_h
|
49
|
+
@attributes
|
50
|
+
end
|
51
|
+
|
52
|
+
def id
|
53
|
+
@attributes.fetch(:_id)
|
54
|
+
end
|
55
|
+
|
56
|
+
def type
|
57
|
+
@attributes[:_type]
|
58
|
+
end
|
59
|
+
|
60
|
+
def routing
|
61
|
+
@attributes[:routing]
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_doc(source = {})
|
65
|
+
HashDocument.new(source.merge(@attributes))
|
66
|
+
end
|
67
|
+
|
68
|
+
def eql?(other)
|
69
|
+
self.class == other.class && @attributes == other.instance_variable_get(:@attributes)
|
70
|
+
end
|
71
|
+
alias_method :==, :eql?
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Esse
|
2
|
+
# The idea here is to add useful methods to the ruby standard objects without
|
3
|
+
# monkey patching them
|
4
|
+
module ArrayUtils
|
5
|
+
module_function
|
6
|
+
|
7
|
+
def wrap(object)
|
8
|
+
if object.nil?
|
9
|
+
[]
|
10
|
+
elsif object.respond_to?(:to_ary)
|
11
|
+
object.to_ary || [object]
|
12
|
+
else
|
13
|
+
[object]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/esse/primitives.rb
CHANGED
@@ -6,6 +6,43 @@ module Esse
|
|
6
6
|
def import(**kwargs)
|
7
7
|
index.import(repo_name, **kwargs)
|
8
8
|
end
|
9
|
+
|
10
|
+
def update_documents_attribute(name, ids_or_doc_headers = [], kwargs = {})
|
11
|
+
batch = documents_for_lazy_attribute(name, ids_or_doc_headers)
|
12
|
+
return if batch.empty?
|
13
|
+
|
14
|
+
index.bulk(**kwargs.transform_keys(&:to_sym), update: batch)
|
15
|
+
end
|
16
|
+
|
17
|
+
def documents_for_lazy_attribute(name, ids_or_doc_headers)
|
18
|
+
retrieve_lazy_attribute_values(name, ids_or_doc_headers).map do |doc_header, datum|
|
19
|
+
doc_header.to_doc(name => datum)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def retrieve_lazy_attribute_values(name, ids_or_doc_headers)
|
24
|
+
unless lazy_document_attribute?(name)
|
25
|
+
raise ArgumentError, <<~MSG
|
26
|
+
The attribute `#{name}` is not defined as a lazy document attribute.
|
27
|
+
|
28
|
+
Define the attribute as a lazy document attribute using the `lazy_document_attribute` method.
|
29
|
+
MSG
|
30
|
+
end
|
31
|
+
|
32
|
+
docs = LazyDocumentHeader.coerce_each(ids_or_doc_headers)
|
33
|
+
return [] if docs.empty?
|
34
|
+
|
35
|
+
result = fetch_lazy_document_attribute(name).call(docs)
|
36
|
+
return [] unless result.is_a?(Hash)
|
37
|
+
|
38
|
+
result.each_with_object({}) do |(key, value), memo|
|
39
|
+
if key.is_a?(LazyDocumentHeader) && (doc = docs.find { |d| d == key || d.id == key.id })
|
40
|
+
memo[doc] = value
|
41
|
+
elsif (doc = docs.find { |d| d.id == key })
|
42
|
+
memo[doc] = value
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
9
46
|
end
|
10
47
|
|
11
48
|
extend ClassMethods
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Esse
|
4
|
+
# Definition for the lazy document attributes
|
5
|
+
class Repository
|
6
|
+
module ClassMethods
|
7
|
+
def lazy_document_attributes
|
8
|
+
@lazy_document_attributes ||= {}.freeze
|
9
|
+
end
|
10
|
+
|
11
|
+
def lazy_document_attribute_names(all = true)
|
12
|
+
case all
|
13
|
+
when false
|
14
|
+
[]
|
15
|
+
when true
|
16
|
+
lazy_document_attributes.keys
|
17
|
+
else
|
18
|
+
filtered = Array(all).map(&:to_s)
|
19
|
+
lazy_document_attributes.keys.select { |name| filtered.include?(name.to_s) }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def fetch_lazy_document_attribute(attr_name)
|
24
|
+
klass, kwargs = lazy_document_attributes.fetch(attr_name)
|
25
|
+
klass.new(**kwargs)
|
26
|
+
rescue KeyError
|
27
|
+
raise ArgumentError, format('Attribute %<attr>p is not defined as a lazy document attribute', attr: attr_name)
|
28
|
+
end
|
29
|
+
|
30
|
+
def lazy_document_attribute(attr_name, klass = nil, **kwargs, &block)
|
31
|
+
if attr_name.nil?
|
32
|
+
raise ArgumentError, 'Attribute name is required to define a lazy document attribute'
|
33
|
+
end
|
34
|
+
if lazy_document_attribute?(attr_name.to_sym) || lazy_document_attribute?(attr_name.to_s)
|
35
|
+
raise ArgumentError, format('Attribute %<attr>p is already defined as a lazy document attribute', attr: attr_name)
|
36
|
+
end
|
37
|
+
|
38
|
+
@lazy_document_attributes = lazy_document_attributes.dup
|
39
|
+
if block
|
40
|
+
klass = Class.new(Esse::DocumentLazyAttribute) do
|
41
|
+
define_method(:call, &block)
|
42
|
+
end
|
43
|
+
@lazy_document_attributes[attr_name] = [klass, kwargs]
|
44
|
+
elsif klass.is_a?(Class) && klass <= Esse::DocumentLazyAttribute
|
45
|
+
@lazy_document_attributes[attr_name] = [klass, kwargs]
|
46
|
+
elsif klass.is_a?(Class) && klass.instance_methods.include?(:call)
|
47
|
+
@lazy_document_attributes[attr_name] = [klass, kwargs]
|
48
|
+
elsif klass.nil?
|
49
|
+
raise ArgumentError, format('A block or a class that responds to `call` is required to define a lazy document attribute')
|
50
|
+
else
|
51
|
+
raise ArgumentError, format('%<arg>p is not a valid lazy document attribute. Class should inherit from Esse::DocumentLazyAttribute or respond to `call`', arg: klass)
|
52
|
+
end
|
53
|
+
ensure
|
54
|
+
@lazy_document_attributes&.freeze
|
55
|
+
end
|
56
|
+
|
57
|
+
protected
|
58
|
+
|
59
|
+
def lazy_document_attribute?(attr_name)
|
60
|
+
lazy_document_attributes.key?(attr_name)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
extend ClassMethods
|
65
|
+
end
|
66
|
+
end
|
@@ -6,8 +6,8 @@ module Esse
|
|
6
6
|
# @see ObjectDocumentMapper
|
7
7
|
class Repository
|
8
8
|
module ClassMethods
|
9
|
-
#
|
10
|
-
# collection. It's allowed a block or a class with the `to_h` instance method.
|
9
|
+
# Define the document type that will be used to serialize the data.
|
10
|
+
# Arguments will be same of passed through the collection. It's allowed a block or a class with the `to_h` instance method.
|
11
11
|
# Example with block
|
12
12
|
# document do |model, **context|
|
13
13
|
# {
|
@@ -38,31 +38,6 @@ module Esse
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
41
|
-
def coerce_to_document(value)
|
42
|
-
case value
|
43
|
-
when Esse::Document
|
44
|
-
value
|
45
|
-
when Hash
|
46
|
-
Esse::HashDocument.new(value)
|
47
|
-
when NilClass, FalseClass
|
48
|
-
Esse::NullDocument.new
|
49
|
-
else
|
50
|
-
raise ArgumentError, format('%<arg>p is not a valid document. The document should be a hash or an instance of Esse::Document', arg: value)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
# Convert ruby object to json by using the document of the given document type.
|
55
|
-
# @param [Object] model The ruby object
|
56
|
-
# @param [Hash] kwargs The context
|
57
|
-
# @return [Esse::Document] The serialized document
|
58
|
-
def serialize(model, **kwargs)
|
59
|
-
if @document_proc.nil?
|
60
|
-
raise NotImplementedError, format('there is no %<t>p document defined for the %<k>p index', t: repo_name, k: index.to_s)
|
61
|
-
end
|
62
|
-
|
63
|
-
@document_proc.call(model, **kwargs)
|
64
|
-
end
|
65
|
-
|
66
41
|
# Used to define the source of data. A block is required. And its
|
67
42
|
# content should yield an array of each object that should be serialized.
|
68
43
|
# The list of arguments will be passed throught the document method.
|
@@ -94,6 +69,71 @@ module Esse
|
|
94
69
|
@collection_proc = collection_klass || block
|
95
70
|
end
|
96
71
|
|
72
|
+
# Wrap collection data into serialized batches
|
73
|
+
#
|
74
|
+
# @param [Hash] kwargs The context
|
75
|
+
# @return [Enumerator] The enumerator
|
76
|
+
# @yield [Array, **context] serialized collection and the optional context from the collection
|
77
|
+
def each_serialized_batch(lazy_attributes: false, **kwargs)
|
78
|
+
each_batch(**kwargs) do |*args|
|
79
|
+
batch, collection_context = args
|
80
|
+
collection_context ||= {}
|
81
|
+
entries = [*batch].map { |entry| serialize(entry, **collection_context) }.compact
|
82
|
+
if lazy_attributes
|
83
|
+
attrs = lazy_attributes.is_a?(Array) ? lazy_attributes : lazy_document_attribute_names(lazy_attributes)
|
84
|
+
attrs.each do |attr_name|
|
85
|
+
retrieve_lazy_attribute_values(attr_name, entries).each do |doc_header, value|
|
86
|
+
doc = entries.find { |d| doc_header.id.to_s == d.id.to_s && doc_header.type == d.type && doc_header.routing == d.routing }
|
87
|
+
doc&.mutate(attr_name) { value }
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
yield entries, **kwargs
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Wrap collection data into serialized documents
|
97
|
+
#
|
98
|
+
# Example:
|
99
|
+
# GeosIndex.documents(id: 1).first
|
100
|
+
#
|
101
|
+
# @return [Enumerator] All serialized entries
|
102
|
+
def documents(**kwargs)
|
103
|
+
Enumerator.new do |yielder|
|
104
|
+
each_serialized_batch(**kwargs) do |docs, **_collection_kargs|
|
105
|
+
docs.each { |document| yielder.yield(document) }
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Convert ruby object to json by using the document of the given document type.
|
111
|
+
# @param [Object] model The ruby object
|
112
|
+
# @param [Hash] kwargs The context
|
113
|
+
# @return [Esse::Document] The serialized document
|
114
|
+
def serialize(model, **kwargs)
|
115
|
+
if @document_proc.nil?
|
116
|
+
raise NotImplementedError, format('there is no %<t>p document defined for the %<k>p index', t: repo_name, k: index.to_s)
|
117
|
+
end
|
118
|
+
|
119
|
+
@document_proc.call(model, **kwargs)
|
120
|
+
end
|
121
|
+
|
122
|
+
protected
|
123
|
+
|
124
|
+
def coerce_to_document(value)
|
125
|
+
case value
|
126
|
+
when Esse::Document
|
127
|
+
value
|
128
|
+
when Hash
|
129
|
+
Esse::HashDocument.new(value)
|
130
|
+
when NilClass, FalseClass
|
131
|
+
Esse::NullDocument.new
|
132
|
+
else
|
133
|
+
raise ArgumentError, format('%<arg>p is not a valid document. The document should be a hash or an instance of Esse::Document', arg: value)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
97
137
|
# Used to fetch all batch of data defined on the collection model.
|
98
138
|
# Arguments can be anything. They will just be passed through the block.
|
99
139
|
# Useful when the collection depends on scope or any other conditions
|
@@ -122,34 +162,6 @@ module Esse
|
|
122
162
|
rescue LocalJumpError
|
123
163
|
raise(SyntaxError, 'block must be explicitly declared in the collection definition')
|
124
164
|
end
|
125
|
-
|
126
|
-
# Wrap collection data into serialized batches
|
127
|
-
#
|
128
|
-
# @param [Hash] kwargs The context
|
129
|
-
# @return [Enumerator] The enumerator
|
130
|
-
# @yield [Array, **context] serialized collection and the optional context from the collection
|
131
|
-
def each_serialized_batch(**kwargs, &block)
|
132
|
-
each_batch(**kwargs) do |*args|
|
133
|
-
batch, collection_context = args
|
134
|
-
collection_context ||= {}
|
135
|
-
entries = [*batch].map { |entry| serialize(entry, **collection_context) }.compact
|
136
|
-
block.call(entries, **kwargs)
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
# Wrap collection data into serialized documents
|
141
|
-
#
|
142
|
-
# Example:
|
143
|
-
# GeosIndex.documents(id: 1).first
|
144
|
-
#
|
145
|
-
# @return [Enumerator] All serialized entries
|
146
|
-
def documents(**kwargs)
|
147
|
-
Enumerator.new do |yielder|
|
148
|
-
each_serialized_batch(**kwargs) do |docs, **_collection_kargs|
|
149
|
-
docs.each { |document| yielder.yield(document) }
|
150
|
-
end
|
151
|
-
end
|
152
|
-
end
|
153
165
|
end
|
154
166
|
|
155
167
|
extend ClassMethods
|
data/lib/esse/repository.rb
CHANGED
data/lib/esse/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: esse
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Marcos G. Zimmermann
|
8
8
|
autorequire:
|
9
9
|
bindir: exec
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-07-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: multi_json
|
@@ -248,6 +248,7 @@ files:
|
|
248
248
|
- lib/esse/deprecations/repository_backend_delegator.rb
|
249
249
|
- lib/esse/deprecations/serializer.rb
|
250
250
|
- lib/esse/document.rb
|
251
|
+
- lib/esse/document_lazy_attribute.rb
|
251
252
|
- lib/esse/dynamic_template.rb
|
252
253
|
- lib/esse/errors.rb
|
253
254
|
- lib/esse/events.rb
|
@@ -274,16 +275,19 @@ files:
|
|
274
275
|
- lib/esse/index/type.rb
|
275
276
|
- lib/esse/index_mapping.rb
|
276
277
|
- lib/esse/index_setting.rb
|
278
|
+
- lib/esse/lazy_document_header.rb
|
277
279
|
- lib/esse/logging.rb
|
278
280
|
- lib/esse/null_document.rb
|
279
281
|
- lib/esse/plugins.rb
|
280
282
|
- lib/esse/primitives.rb
|
283
|
+
- lib/esse/primitives/array_utils.rb
|
281
284
|
- lib/esse/primitives/hash_utils.rb
|
282
285
|
- lib/esse/primitives/hstring.rb
|
283
286
|
- lib/esse/primitives/output.rb
|
284
287
|
- lib/esse/repository.rb
|
285
288
|
- lib/esse/repository/actions.rb
|
286
289
|
- lib/esse/repository/documents.rb
|
290
|
+
- lib/esse/repository/lazy_document_attributes.rb
|
287
291
|
- lib/esse/repository/object_document_mapper.rb
|
288
292
|
- lib/esse/search/query.rb
|
289
293
|
- lib/esse/search/query/dsl.rb
|
@@ -318,7 +322,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
318
322
|
- !ruby/object:Gem::Version
|
319
323
|
version: '0'
|
320
324
|
requirements: []
|
321
|
-
rubygems_version: 3.
|
325
|
+
rubygems_version: 3.0.3.1
|
322
326
|
signing_key:
|
323
327
|
specification_version: 4
|
324
328
|
summary: Pure Ruby and framework-agnostic ElasticSearch/OpenSearch toolkit for building
|