esse 0.2.6 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/esse/cli/index.rb +12 -1
- data/lib/esse/core.rb +2 -0
- data/lib/esse/document.rb +38 -6
- data/lib/esse/document_lazy_attribute.rb +18 -0
- data/lib/esse/import/bulk.rb +26 -5
- data/lib/esse/import/request_body.rb +6 -1
- data/lib/esse/index/documents.rb +25 -7
- data/lib/esse/index/indices.rb +22 -7
- data/lib/esse/lazy_document_header.rb +73 -0
- data/lib/esse/primitives/array_utils.rb +17 -0
- data/lib/esse/primitives.rb +1 -0
- data/lib/esse/repository/documents.rb +37 -0
- data/lib/esse/repository/lazy_document_attributes.rb +66 -0
- data/lib/esse/repository/object_document_mapper.rb +67 -55
- data/lib/esse/repository.rb +1 -0
- data/lib/esse/version.rb +1 -1
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 46abc9b3347ca852c1f270910e21582c8f17186b554d1a7f667bafee8f84a152
|
4
|
+
data.tar.gz: baf633b7147e8cf48c704c15fc5f4a2a52c8d08f671f05e6c5577f3d92ef2901
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 129c4475225760da6620d990a1065795d6c7f1248b01745105715b8d01f4c11c500b253701f573eac5071abeeeaed83e6be1c5bf8f22b5f2fab819587c96c595
|
7
|
+
data.tar.gz: 93f66e7d04a3857c3daf9b6daaa72a39e6a3027b80e464e53a97325f74666324451f93a914d9daa31864a77a93c7fb37735e12395536f395133135042a752f21
|
data/lib/esse/cli/index.rb
CHANGED
@@ -88,9 +88,20 @@ module Esse
|
|
88
88
|
option :suffix, type: :string, default: nil, aliases: '-s', desc: 'Suffix to append to index name'
|
89
89
|
option :context, type: :hash, default: {}, required: true, desc: 'List of options to pass to the index class'
|
90
90
|
option :repo, type: :string, default: nil, alias: '-r', desc: 'Repository to use for import'
|
91
|
+
option :eager_include_document_attributes, type: :string, default: nil, desc: 'Comma separated list of lazy document attributes to include to the bulk index request'
|
92
|
+
option :lazy_update_document_attributes, type: :string, default: nil, desc: 'Comma separated list of lazy document attributes to bulk update after the bulk index request'
|
91
93
|
def import(*index_classes)
|
92
94
|
require_relative 'index/import'
|
93
|
-
|
95
|
+
opts = HashUtils.deep_transform_keys(options.to_h, &:to_sym)
|
96
|
+
opts.delete(:lazy_update_document_attributes) if opts[:lazy_update_document_attributes] == 'false'
|
97
|
+
opts.delete(:eager_include_document_attributes) if opts[:eager_include_document_attributes] == 'false'
|
98
|
+
if (val = opts[:eager_include_document_attributes])
|
99
|
+
opts[:eager_include_document_attributes] = (val == 'true') ? true : val.split(',')
|
100
|
+
end
|
101
|
+
if (val = opts[:lazy_update_document_attributes])
|
102
|
+
opts[:lazy_update_document_attributes] = (val == 'true') ? true : val.split(',')
|
103
|
+
end
|
104
|
+
Import.new(indices: index_classes, **opts).run
|
94
105
|
end
|
95
106
|
end
|
96
107
|
end
|
data/lib/esse/core.rb
CHANGED
@@ -6,6 +6,8 @@ module Esse
|
|
6
6
|
require_relative 'primitives'
|
7
7
|
require_relative 'collection'
|
8
8
|
require_relative 'document'
|
9
|
+
require_relative 'document_lazy_attribute'
|
10
|
+
require_relative 'lazy_document_header'
|
9
11
|
require_relative 'hash_document'
|
10
12
|
require_relative 'null_document'
|
11
13
|
require_relative 'repository'
|
data/lib/esse/document.rb
CHANGED
@@ -56,7 +56,7 @@ module Esse
|
|
56
56
|
|
57
57
|
# @return [Hash] the document data
|
58
58
|
def to_h
|
59
|
-
|
59
|
+
mutated_source.merge(
|
60
60
|
_id: id,
|
61
61
|
).tap do |hash|
|
62
62
|
hash[:_type] = type if type
|
@@ -65,11 +65,13 @@ module Esse
|
|
65
65
|
end
|
66
66
|
end
|
67
67
|
|
68
|
-
def to_bulk(data: true)
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
68
|
+
def to_bulk(data: true, operation: nil)
|
69
|
+
doc_header.tap do |h|
|
70
|
+
if data && operation == :update
|
71
|
+
h[:data] = { doc: mutated_source }
|
72
|
+
elsif data
|
73
|
+
h[:data] = mutated_source
|
74
|
+
end
|
73
75
|
h.merge!(meta)
|
74
76
|
end
|
75
77
|
end
|
@@ -87,5 +89,35 @@ module Esse
|
|
87
89
|
id == other.id && type == other.type && routing == other.routing && meta == other.meta && source == other.source
|
88
90
|
)
|
89
91
|
end
|
92
|
+
|
93
|
+
def doc_header
|
94
|
+
{ _id: id }.tap do |h|
|
95
|
+
h[:_type] = type if type
|
96
|
+
h[:routing] = routing if routing?
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def inspect
|
101
|
+
attributes = %i[id routing source].map do |attr|
|
102
|
+
value = send(attr)
|
103
|
+
"#{attr}: #{value.inspect}" if value
|
104
|
+
end.compact.join(', ')
|
105
|
+
attributes << " mutations: #{@__mutations__.inspect}" if @__mutations__
|
106
|
+
"#<#{self.class.name || 'Esse::Document'} #{attributes}>"
|
107
|
+
end
|
108
|
+
|
109
|
+
def mutate(key)
|
110
|
+
@__mutations__ ||= {}
|
111
|
+
@__mutations__[key] = yield
|
112
|
+
instance_variable_set(:@__mutated_source__, nil)
|
113
|
+
end
|
114
|
+
|
115
|
+
protected
|
116
|
+
|
117
|
+
def mutated_source
|
118
|
+
return source unless @__mutations__
|
119
|
+
|
120
|
+
@__mutated_source__ ||= source.merge(@__mutations__)
|
121
|
+
end
|
90
122
|
end
|
91
123
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Esse
|
4
|
+
class DocumentLazyAttribute
|
5
|
+
attr_reader :options
|
6
|
+
|
7
|
+
def initialize(**kwargs)
|
8
|
+
@options = kwargs
|
9
|
+
end
|
10
|
+
|
11
|
+
# Returns an Hash with the document ID as key and attribute data as value.
|
12
|
+
# @param doc_headers [Array<Esse::LazyDocumentHeader>] the document headers
|
13
|
+
# @return [Hash] An Hash with the instance of document header as key and the attribute data as value.
|
14
|
+
def call(doc_headers)
|
15
|
+
raise NotImplementedError, 'Override this method to return the document attribute data'
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/esse/import/bulk.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
module Esse
|
2
2
|
module Import
|
3
3
|
class Bulk
|
4
|
-
def initialize(type: nil, index: nil, delete: nil, create: nil)
|
4
|
+
def initialize(type: nil, index: nil, delete: nil, create: nil, update: nil)
|
5
5
|
@index = Array(index).select(&method(:valid_doc?)).reject(&:ignore_on_index?).map do |doc|
|
6
6
|
value = doc.to_bulk
|
7
7
|
value[:_type] ||= type if type
|
@@ -12,6 +12,11 @@ module Esse
|
|
12
12
|
value[:_type] ||= type if type
|
13
13
|
{ create: value }
|
14
14
|
end
|
15
|
+
@update = Array(update).select(&method(:valid_doc?)).reject(&:ignore_on_index?).map do |doc|
|
16
|
+
value = doc.to_bulk(operation: :update)
|
17
|
+
value[:_type] ||= type if type
|
18
|
+
{ update: value }
|
19
|
+
end
|
15
20
|
@delete = Array(delete).select(&method(:valid_doc?)).reject(&:ignore_on_delete?).map do |doc|
|
16
21
|
value = doc.to_bulk(data: false)
|
17
22
|
value[:_type] ||= type if type
|
@@ -22,12 +27,12 @@ module Esse
|
|
22
27
|
# Return an array of RequestBody instances
|
23
28
|
#
|
24
29
|
# In case of timeout error, will retry with an exponential backoff using the following formula:
|
25
|
-
# wait_interval = (retry_count**4) + 15 + (rand(10) * (retry_count + 1)) seconds. It will retry up to max_retries times that is default
|
30
|
+
# wait_interval = (retry_count**4) + 15 + (rand(10) * (retry_count + 1)) seconds. It will retry up to max_retries times that is default 4.
|
26
31
|
#
|
27
32
|
# Too large bulk requests will be split into multiple requests with only one attempt.
|
28
33
|
#
|
29
34
|
# @yield [RequestBody] A request body instance
|
30
|
-
def each_request(max_retries:
|
35
|
+
def each_request(max_retries: 4, last_retry_in_small_chunks: true)
|
31
36
|
# @TODO create indexes when by checking all the index suffixes (if mapping is not empty)
|
32
37
|
requests = [optimistic_request]
|
33
38
|
retry_count = 0
|
@@ -43,6 +48,8 @@ module Esse
|
|
43
48
|
rescue Faraday::TimeoutError, Esse::Transport::RequestTimeoutError => e
|
44
49
|
retry_count += 1
|
45
50
|
raise Esse::Transport::RequestTimeoutError.new(e.message) if retry_count >= max_retries
|
51
|
+
# Timeout error may be caused by a too large request, so we split the requests in small chunks as a last attempt
|
52
|
+
requests = requests_in_small_chunks if last_retry_in_small_chunks && max_retries > 2 && retry_count == max_retries - 2
|
46
53
|
wait_interval = (retry_count**4) + 15 + (rand(10) * (retry_count + 1))
|
47
54
|
Esse.logger.warn "Timeout error, retrying in #{wait_interval} seconds"
|
48
55
|
sleep(wait_interval)
|
@@ -67,16 +74,30 @@ module Esse
|
|
67
74
|
|
68
75
|
def optimistic_request
|
69
76
|
request = Import::RequestBodyAsJson.new
|
70
|
-
request.delete = @delete
|
71
77
|
request.create = @create
|
72
78
|
request.index = @index
|
79
|
+
request.update = @update
|
80
|
+
request.delete = @delete
|
73
81
|
request
|
74
82
|
end
|
75
83
|
|
84
|
+
def requests_in_small_chunks(chunk_size: 1)
|
85
|
+
arr = []
|
86
|
+
@create.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.create = slice } }
|
87
|
+
@index.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.index = slice } }
|
88
|
+
@update.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.update = slice } }
|
89
|
+
@delete.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.delete = slice } }
|
90
|
+
Esse.logger.warn <<~MSG
|
91
|
+
Retrying the last request in small chunks of #{chunk_size} documents.
|
92
|
+
This is a last resort to avoid timeout errors, consider increasing the bulk size or reducing the batch size.
|
93
|
+
MSG
|
94
|
+
arr
|
95
|
+
end
|
96
|
+
|
76
97
|
# @return [Array<RequestBody>]
|
77
98
|
def balance_requests_size(err)
|
78
99
|
if (bulk_size = err.message.scan(/exceeded.(\d+).bytes/).dig(0, 0).to_i) > 0
|
79
|
-
requests = (@
|
100
|
+
requests = (@create + @index + @update + @delete).each_with_object([Import::RequestBodyRaw.new]) do |as_json, result|
|
80
101
|
operation, meta = as_json.to_a.first
|
81
102
|
meta = meta.dup
|
82
103
|
data = meta.delete(:data)
|
@@ -5,7 +5,7 @@ module Esse
|
|
5
5
|
|
6
6
|
def initialize(body:)
|
7
7
|
@body = body # body may be String or Array<Hash>
|
8
|
-
@stats = { index: 0, create: 0, delete: 0 }
|
8
|
+
@stats = { index: 0, create: 0, delete: 0, update: 0 }
|
9
9
|
end
|
10
10
|
|
11
11
|
def body?
|
@@ -46,6 +46,11 @@ module Esse
|
|
46
46
|
@stats[:index] += docs.size
|
47
47
|
end
|
48
48
|
|
49
|
+
def update=(docs)
|
50
|
+
@body += docs
|
51
|
+
@stats[:update] += docs.size
|
52
|
+
end
|
53
|
+
|
49
54
|
def create=(docs)
|
50
55
|
@body += docs
|
51
56
|
@stats[:create] += docs.size
|
data/lib/esse/index/documents.rb
CHANGED
@@ -164,7 +164,7 @@ module Esse
|
|
164
164
|
# @see https://www.elastic.co/guide/en/elasticsearch/reference/7.5/docs-bulk.html
|
165
165
|
# @see https://github.com/elastic/elasticsearch-ruby/blob/main/elasticsearch-api/lib/elasticsearch/api/utils.rb
|
166
166
|
# @see https://github.com/elastic/elasticsearch-ruby/blob/main/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
|
167
|
-
def bulk(
|
167
|
+
def bulk(create: nil, delete: nil, index: nil, update: nil, type: nil, suffix: nil, **options)
|
168
168
|
definition = {
|
169
169
|
index: index_name(suffix: suffix),
|
170
170
|
type: type,
|
@@ -174,9 +174,10 @@ module Esse
|
|
174
174
|
# @TODO Wrap the return in a some other Stats object with more information
|
175
175
|
Esse::Import::Bulk.new(
|
176
176
|
**definition.slice(:type),
|
177
|
-
index: index,
|
178
|
-
delete: delete,
|
179
177
|
create: create,
|
178
|
+
delete: delete,
|
179
|
+
index: index,
|
180
|
+
update: update,
|
180
181
|
).each_request do |request_body|
|
181
182
|
cluster.api.bulk(**definition, body: request_body.body) do |event_payload|
|
182
183
|
event_payload[:body_stats] = request_body.stats
|
@@ -198,20 +199,37 @@ module Esse
|
|
198
199
|
# @option [Hash] :context The collection context. This value will be passed as argument to the collection
|
199
200
|
# May be SQL condition or any other filter you have defined on the collection.
|
200
201
|
# @return [Numeric] The number of documents imported
|
201
|
-
def import(*repo_types, context: {}, suffix: nil, **options)
|
202
|
+
def import(*repo_types, context: {}, eager_include_document_attributes: false, lazy_update_document_attributes: false, suffix: nil, **options)
|
202
203
|
repo_types = repo_hash.keys if repo_types.empty?
|
203
204
|
count = 0
|
205
|
+
|
204
206
|
repo_hash.slice(*repo_types).each do |repo_name, repo|
|
205
|
-
|
207
|
+
doc_attrs = {eager: [], lazy: []}
|
208
|
+
doc_attrs[:eager] = repo.lazy_document_attribute_names(eager_include_document_attributes)
|
209
|
+
doc_attrs[:lazy] = repo.lazy_document_attribute_names(lazy_update_document_attributes)
|
210
|
+
doc_attrs[:lazy] -= doc_attrs[:eager]
|
211
|
+
|
212
|
+
context ||= {}
|
213
|
+
context[:lazy_attributes] = doc_attrs[:eager] if doc_attrs[:eager].any?
|
214
|
+
repo.each_serialized_batch(**context) do |batch|
|
206
215
|
# Elasticsearch 6.x and older have multiple types per index.
|
207
216
|
# This gem supports multiple types per index for backward compatibility, but we recommend to update
|
208
217
|
# your elasticsearch to a at least 7.x version and use a single type per index.
|
209
218
|
#
|
210
219
|
# Note that the repository name will be used as the document type.
|
211
220
|
# mapping_default_type
|
212
|
-
kwargs = {
|
221
|
+
kwargs = { suffix: suffix, type: repo_name, **options }
|
213
222
|
cluster.may_update_type!(kwargs)
|
214
|
-
|
223
|
+
|
224
|
+
bulk(**kwargs, index: batch)
|
225
|
+
|
226
|
+
doc_attrs[:lazy].each do |attr_name|
|
227
|
+
partial_docs = repo.documents_for_lazy_attribute(attr_name, batch.reject(&:ignore_on_index?))
|
228
|
+
next if partial_docs.empty?
|
229
|
+
|
230
|
+
bulk(**kwargs, update: partial_docs)
|
231
|
+
end
|
232
|
+
|
215
233
|
count += batch.size
|
216
234
|
end
|
217
235
|
end
|
data/lib/esse/index/indices.rb
CHANGED
@@ -26,10 +26,10 @@ module Esse
|
|
26
26
|
#
|
27
27
|
# @see http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
|
28
28
|
# @see Esse::Transport#create_index
|
29
|
-
def create_index(suffix: nil, **options)
|
29
|
+
def create_index(suffix: nil, body: nil, **options)
|
30
30
|
options = CREATE_INDEX_RESERVED_KEYWORDS.merge(options)
|
31
31
|
name = build_real_index_name(suffix)
|
32
|
-
definition = [settings_hash, mappings_hash].reduce(&:merge)
|
32
|
+
definition = body || [settings_hash, mappings_hash].reduce(&:merge)
|
33
33
|
|
34
34
|
if options.delete(:alias) && name != index_name
|
35
35
|
definition[:aliases] = { index_name => {} }
|
@@ -48,13 +48,23 @@ module Esse
|
|
48
48
|
# @return [Hash] the elasticsearch response
|
49
49
|
#
|
50
50
|
# @see https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-open-close.html
|
51
|
-
def reset_index(suffix: index_suffix, import: true, reindex: false, **options)
|
51
|
+
def reset_index(suffix: index_suffix, optimize: true, import: true, reindex: false, **options)
|
52
52
|
cluster.throw_error_when_readonly!
|
53
|
-
|
53
|
+
|
54
54
|
suffix ||= Esse.timestamp
|
55
|
-
suffix = Esse.timestamp while index_exist?(suffix: suffix)
|
55
|
+
suffix = Esse.timestamp while index_exist?(suffix: suffix)
|
56
|
+
|
57
|
+
if optimize
|
58
|
+
definition = [settings_hash, mappings_hash].reduce(&:merge)
|
59
|
+
number_of_replicas = definition.dig(Esse::SETTING_ROOT_KEY, :index, :number_of_replicas)
|
60
|
+
refresh_interval = definition.dig(Esse::SETTING_ROOT_KEY, :index, :refresh_interval)
|
61
|
+
new_number_of_replicas = ((definition[Esse::SETTING_ROOT_KEY] ||= {})[:index] ||= {})[:number_of_replicas] = 0
|
62
|
+
new_refresh_interval = ((definition[Esse::SETTING_ROOT_KEY] ||= {})[:index] ||= {})[:refresh_interval] = '-1'
|
63
|
+
create_index(**options, suffix: suffix, alias: false, body: definition)
|
64
|
+
else
|
65
|
+
create_index(**options, suffix: suffix, alias: false)
|
66
|
+
end
|
56
67
|
|
57
|
-
create_index(**options, suffix: suffix, alias: false)
|
58
68
|
if index_exist? && aliases.none?
|
59
69
|
cluster.api.delete_index(index: index_name)
|
60
70
|
end
|
@@ -63,8 +73,13 @@ module Esse
|
|
63
73
|
elsif reindex && (_from = indices_pointing_to_alias).any?
|
64
74
|
# @TODO: Reindex using the reindex API
|
65
75
|
end
|
76
|
+
|
77
|
+
if optimize && number_of_replicas != new_number_of_replicas || refresh_interval != new_refresh_interval
|
78
|
+
update_settings(suffix: suffix)
|
79
|
+
end
|
80
|
+
|
66
81
|
update_aliases(suffix: suffix)
|
67
|
-
|
82
|
+
|
68
83
|
true
|
69
84
|
end
|
70
85
|
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Esse
|
4
|
+
class LazyDocumentHeader
|
5
|
+
def self.coerce_each(values)
|
6
|
+
arr = []
|
7
|
+
Esse::ArrayUtils.wrap(values).map do |value|
|
8
|
+
instance = coerce(value)
|
9
|
+
arr << instance if instance&.valid?
|
10
|
+
end
|
11
|
+
arr
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.coerce(value)
|
15
|
+
return unless value
|
16
|
+
|
17
|
+
if value.is_a?(Esse::LazyDocumentHeader)
|
18
|
+
value
|
19
|
+
elsif value.is_a?(Esse::Document)
|
20
|
+
new(value.doc_header)
|
21
|
+
elsif value.is_a?(Hash)
|
22
|
+
resp = value.transform_keys do |key|
|
23
|
+
case key
|
24
|
+
when :_id, :id, '_id', 'id'
|
25
|
+
:_id
|
26
|
+
when :_routing, :routing, '_routing', 'routing'
|
27
|
+
:routing
|
28
|
+
when :_type, :type, '_type', 'type'
|
29
|
+
:_type
|
30
|
+
else
|
31
|
+
key.to_sym
|
32
|
+
end
|
33
|
+
end
|
34
|
+
new(resp)
|
35
|
+
elsif String === value || Integer === value
|
36
|
+
new(_id: value)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def initialize(attributes)
|
41
|
+
@attributes = attributes
|
42
|
+
end
|
43
|
+
|
44
|
+
def valid?
|
45
|
+
!@attributes[:_id].nil?
|
46
|
+
end
|
47
|
+
|
48
|
+
def to_h
|
49
|
+
@attributes
|
50
|
+
end
|
51
|
+
|
52
|
+
def id
|
53
|
+
@attributes.fetch(:_id)
|
54
|
+
end
|
55
|
+
|
56
|
+
def type
|
57
|
+
@attributes[:_type]
|
58
|
+
end
|
59
|
+
|
60
|
+
def routing
|
61
|
+
@attributes[:routing]
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_doc(source = {})
|
65
|
+
HashDocument.new(source.merge(@attributes))
|
66
|
+
end
|
67
|
+
|
68
|
+
def eql?(other)
|
69
|
+
self.class == other.class && @attributes == other.instance_variable_get(:@attributes)
|
70
|
+
end
|
71
|
+
alias_method :==, :eql?
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Esse
|
2
|
+
# The idea here is to add useful methods to the ruby standard objects without
|
3
|
+
# monkey patching them
|
4
|
+
module ArrayUtils
|
5
|
+
module_function
|
6
|
+
|
7
|
+
def wrap(object)
|
8
|
+
if object.nil?
|
9
|
+
[]
|
10
|
+
elsif object.respond_to?(:to_ary)
|
11
|
+
object.to_ary || [object]
|
12
|
+
else
|
13
|
+
[object]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/esse/primitives.rb
CHANGED
@@ -6,6 +6,43 @@ module Esse
|
|
6
6
|
def import(**kwargs)
|
7
7
|
index.import(repo_name, **kwargs)
|
8
8
|
end
|
9
|
+
|
10
|
+
def update_documents_attribute(name, ids_or_doc_headers = [], kwargs = {})
|
11
|
+
batch = documents_for_lazy_attribute(name, ids_or_doc_headers)
|
12
|
+
return if batch.empty?
|
13
|
+
|
14
|
+
index.bulk(**kwargs.transform_keys(&:to_sym), update: batch)
|
15
|
+
end
|
16
|
+
|
17
|
+
def documents_for_lazy_attribute(name, ids_or_doc_headers)
|
18
|
+
retrieve_lazy_attribute_values(name, ids_or_doc_headers).map do |doc_header, datum|
|
19
|
+
doc_header.to_doc(name => datum)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def retrieve_lazy_attribute_values(name, ids_or_doc_headers)
|
24
|
+
unless lazy_document_attribute?(name)
|
25
|
+
raise ArgumentError, <<~MSG
|
26
|
+
The attribute `#{name}` is not defined as a lazy document attribute.
|
27
|
+
|
28
|
+
Define the attribute as a lazy document attribute using the `lazy_document_attribute` method.
|
29
|
+
MSG
|
30
|
+
end
|
31
|
+
|
32
|
+
docs = LazyDocumentHeader.coerce_each(ids_or_doc_headers)
|
33
|
+
return [] if docs.empty?
|
34
|
+
|
35
|
+
result = fetch_lazy_document_attribute(name).call(docs)
|
36
|
+
return [] unless result.is_a?(Hash)
|
37
|
+
|
38
|
+
result.each_with_object({}) do |(key, value), memo|
|
39
|
+
if key.is_a?(LazyDocumentHeader) && (doc = docs.find { |d| d == key || d.id == key.id })
|
40
|
+
memo[doc] = value
|
41
|
+
elsif (doc = docs.find { |d| d.id == key })
|
42
|
+
memo[doc] = value
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
9
46
|
end
|
10
47
|
|
11
48
|
extend ClassMethods
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Esse
|
4
|
+
# Definition for the lazy document attributes
|
5
|
+
class Repository
|
6
|
+
module ClassMethods
|
7
|
+
def lazy_document_attributes
|
8
|
+
@lazy_document_attributes ||= {}.freeze
|
9
|
+
end
|
10
|
+
|
11
|
+
def lazy_document_attribute_names(all = true)
|
12
|
+
case all
|
13
|
+
when false
|
14
|
+
[]
|
15
|
+
when true
|
16
|
+
lazy_document_attributes.keys
|
17
|
+
else
|
18
|
+
filtered = Array(all).map(&:to_s)
|
19
|
+
lazy_document_attributes.keys.select { |name| filtered.include?(name.to_s) }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def fetch_lazy_document_attribute(attr_name)
|
24
|
+
klass, kwargs = lazy_document_attributes.fetch(attr_name)
|
25
|
+
klass.new(**kwargs)
|
26
|
+
rescue KeyError
|
27
|
+
raise ArgumentError, format('Attribute %<attr>p is not defined as a lazy document attribute', attr: attr_name)
|
28
|
+
end
|
29
|
+
|
30
|
+
def lazy_document_attribute(attr_name, klass = nil, **kwargs, &block)
|
31
|
+
if attr_name.nil?
|
32
|
+
raise ArgumentError, 'Attribute name is required to define a lazy document attribute'
|
33
|
+
end
|
34
|
+
if lazy_document_attribute?(attr_name.to_sym) || lazy_document_attribute?(attr_name.to_s)
|
35
|
+
raise ArgumentError, format('Attribute %<attr>p is already defined as a lazy document attribute', attr: attr_name)
|
36
|
+
end
|
37
|
+
|
38
|
+
@lazy_document_attributes = lazy_document_attributes.dup
|
39
|
+
if block
|
40
|
+
klass = Class.new(Esse::DocumentLazyAttribute) do
|
41
|
+
define_method(:call, &block)
|
42
|
+
end
|
43
|
+
@lazy_document_attributes[attr_name] = [klass, kwargs]
|
44
|
+
elsif klass.is_a?(Class) && klass <= Esse::DocumentLazyAttribute
|
45
|
+
@lazy_document_attributes[attr_name] = [klass, kwargs]
|
46
|
+
elsif klass.is_a?(Class) && klass.instance_methods.include?(:call)
|
47
|
+
@lazy_document_attributes[attr_name] = [klass, kwargs]
|
48
|
+
elsif klass.nil?
|
49
|
+
raise ArgumentError, format('A block or a class that responds to `call` is required to define a lazy document attribute')
|
50
|
+
else
|
51
|
+
raise ArgumentError, format('%<arg>p is not a valid lazy document attribute. Class should inherit from Esse::DocumentLazyAttribute or respond to `call`', arg: klass)
|
52
|
+
end
|
53
|
+
ensure
|
54
|
+
@lazy_document_attributes&.freeze
|
55
|
+
end
|
56
|
+
|
57
|
+
protected
|
58
|
+
|
59
|
+
def lazy_document_attribute?(attr_name)
|
60
|
+
lazy_document_attributes.key?(attr_name)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
extend ClassMethods
|
65
|
+
end
|
66
|
+
end
|
@@ -6,8 +6,8 @@ module Esse
|
|
6
6
|
# @see ObjectDocumentMapper
|
7
7
|
class Repository
|
8
8
|
module ClassMethods
|
9
|
-
#
|
10
|
-
# collection. It's allowed a block or a class with the `to_h` instance method.
|
9
|
+
# Define the document type that will be used to serialize the data.
|
10
|
+
# Arguments will be same of passed through the collection. It's allowed a block or a class with the `to_h` instance method.
|
11
11
|
# Example with block
|
12
12
|
# document do |model, **context|
|
13
13
|
# {
|
@@ -38,31 +38,6 @@ module Esse
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
41
|
-
def coerce_to_document(value)
|
42
|
-
case value
|
43
|
-
when Esse::Document
|
44
|
-
value
|
45
|
-
when Hash
|
46
|
-
Esse::HashDocument.new(value)
|
47
|
-
when NilClass, FalseClass
|
48
|
-
Esse::NullDocument.new
|
49
|
-
else
|
50
|
-
raise ArgumentError, format('%<arg>p is not a valid document. The document should be a hash or an instance of Esse::Document', arg: value)
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
# Convert ruby object to json by using the document of the given document type.
|
55
|
-
# @param [Object] model The ruby object
|
56
|
-
# @param [Hash] kwargs The context
|
57
|
-
# @return [Esse::Document] The serialized document
|
58
|
-
def serialize(model, **kwargs)
|
59
|
-
if @document_proc.nil?
|
60
|
-
raise NotImplementedError, format('there is no %<t>p document defined for the %<k>p index', t: repo_name, k: index.to_s)
|
61
|
-
end
|
62
|
-
|
63
|
-
@document_proc.call(model, **kwargs)
|
64
|
-
end
|
65
|
-
|
66
41
|
# Used to define the source of data. A block is required. And its
|
67
42
|
# content should yield an array of each object that should be serialized.
|
68
43
|
# The list of arguments will be passed throught the document method.
|
@@ -94,6 +69,71 @@ module Esse
|
|
94
69
|
@collection_proc = collection_klass || block
|
95
70
|
end
|
96
71
|
|
72
|
+
# Wrap collection data into serialized batches
|
73
|
+
#
|
74
|
+
# @param [Hash] kwargs The context
|
75
|
+
# @return [Enumerator] The enumerator
|
76
|
+
# @yield [Array, **context] serialized collection and the optional context from the collection
|
77
|
+
def each_serialized_batch(lazy_attributes: false, **kwargs)
|
78
|
+
each_batch(**kwargs) do |*args|
|
79
|
+
batch, collection_context = args
|
80
|
+
collection_context ||= {}
|
81
|
+
entries = [*batch].map { |entry| serialize(entry, **collection_context) }.compact
|
82
|
+
if lazy_attributes
|
83
|
+
attrs = lazy_attributes.is_a?(Array) ? lazy_attributes : lazy_document_attribute_names(lazy_attributes)
|
84
|
+
attrs.each do |attr_name|
|
85
|
+
retrieve_lazy_attribute_values(attr_name, entries).each do |doc_header, value|
|
86
|
+
doc = entries.find { |d| doc_header.id.to_s == d.id.to_s && doc_header.type == d.type && doc_header.routing == d.routing }
|
87
|
+
doc&.mutate(attr_name) { value }
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
yield entries, **kwargs
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Wrap collection data into serialized documents
|
97
|
+
#
|
98
|
+
# Example:
|
99
|
+
# GeosIndex.documents(id: 1).first
|
100
|
+
#
|
101
|
+
# @return [Enumerator] All serialized entries
|
102
|
+
def documents(**kwargs)
|
103
|
+
Enumerator.new do |yielder|
|
104
|
+
each_serialized_batch(**kwargs) do |docs, **_collection_kargs|
|
105
|
+
docs.each { |document| yielder.yield(document) }
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Convert ruby object to json by using the document of the given document type.
|
111
|
+
# @param [Object] model The ruby object
|
112
|
+
# @param [Hash] kwargs The context
|
113
|
+
# @return [Esse::Document] The serialized document
|
114
|
+
def serialize(model, **kwargs)
|
115
|
+
if @document_proc.nil?
|
116
|
+
raise NotImplementedError, format('there is no %<t>p document defined for the %<k>p index', t: repo_name, k: index.to_s)
|
117
|
+
end
|
118
|
+
|
119
|
+
@document_proc.call(model, **kwargs)
|
120
|
+
end
|
121
|
+
|
122
|
+
protected
|
123
|
+
|
124
|
+
def coerce_to_document(value)
|
125
|
+
case value
|
126
|
+
when Esse::Document
|
127
|
+
value
|
128
|
+
when Hash
|
129
|
+
Esse::HashDocument.new(value)
|
130
|
+
when NilClass, FalseClass
|
131
|
+
Esse::NullDocument.new
|
132
|
+
else
|
133
|
+
raise ArgumentError, format('%<arg>p is not a valid document. The document should be a hash or an instance of Esse::Document', arg: value)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
97
137
|
# Used to fetch all batch of data defined on the collection model.
|
98
138
|
# Arguments can be anything. They will just be passed through the block.
|
99
139
|
# Useful when the collection depends on scope or any other conditions
|
@@ -122,34 +162,6 @@ module Esse
|
|
122
162
|
rescue LocalJumpError
|
123
163
|
raise(SyntaxError, 'block must be explicitly declared in the collection definition')
|
124
164
|
end
|
125
|
-
|
126
|
-
# Wrap collection data into serialized batches
|
127
|
-
#
|
128
|
-
# @param [Hash] kwargs The context
|
129
|
-
# @return [Enumerator] The enumerator
|
130
|
-
# @yield [Array, **context] serialized collection and the optional context from the collection
|
131
|
-
def each_serialized_batch(**kwargs, &block)
|
132
|
-
each_batch(**kwargs) do |*args|
|
133
|
-
batch, collection_context = args
|
134
|
-
collection_context ||= {}
|
135
|
-
entries = [*batch].map { |entry| serialize(entry, **collection_context) }.compact
|
136
|
-
block.call(entries, **kwargs)
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
# Wrap collection data into serialized documents
|
141
|
-
#
|
142
|
-
# Example:
|
143
|
-
# GeosIndex.documents(id: 1).first
|
144
|
-
#
|
145
|
-
# @return [Enumerator] All serialized entries
|
146
|
-
def documents(**kwargs)
|
147
|
-
Enumerator.new do |yielder|
|
148
|
-
each_serialized_batch(**kwargs) do |docs, **_collection_kargs|
|
149
|
-
docs.each { |document| yielder.yield(document) }
|
150
|
-
end
|
151
|
-
end
|
152
|
-
end
|
153
165
|
end
|
154
166
|
|
155
167
|
extend ClassMethods
|
data/lib/esse/repository.rb
CHANGED
data/lib/esse/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: esse
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Marcos G. Zimmermann
|
8
8
|
autorequire:
|
9
9
|
bindir: exec
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-07-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: multi_json
|
@@ -248,6 +248,7 @@ files:
|
|
248
248
|
- lib/esse/deprecations/repository_backend_delegator.rb
|
249
249
|
- lib/esse/deprecations/serializer.rb
|
250
250
|
- lib/esse/document.rb
|
251
|
+
- lib/esse/document_lazy_attribute.rb
|
251
252
|
- lib/esse/dynamic_template.rb
|
252
253
|
- lib/esse/errors.rb
|
253
254
|
- lib/esse/events.rb
|
@@ -274,16 +275,19 @@ files:
|
|
274
275
|
- lib/esse/index/type.rb
|
275
276
|
- lib/esse/index_mapping.rb
|
276
277
|
- lib/esse/index_setting.rb
|
278
|
+
- lib/esse/lazy_document_header.rb
|
277
279
|
- lib/esse/logging.rb
|
278
280
|
- lib/esse/null_document.rb
|
279
281
|
- lib/esse/plugins.rb
|
280
282
|
- lib/esse/primitives.rb
|
283
|
+
- lib/esse/primitives/array_utils.rb
|
281
284
|
- lib/esse/primitives/hash_utils.rb
|
282
285
|
- lib/esse/primitives/hstring.rb
|
283
286
|
- lib/esse/primitives/output.rb
|
284
287
|
- lib/esse/repository.rb
|
285
288
|
- lib/esse/repository/actions.rb
|
286
289
|
- lib/esse/repository/documents.rb
|
290
|
+
- lib/esse/repository/lazy_document_attributes.rb
|
287
291
|
- lib/esse/repository/object_document_mapper.rb
|
288
292
|
- lib/esse/search/query.rb
|
289
293
|
- lib/esse/search/query/dsl.rb
|
@@ -318,7 +322,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
318
322
|
- !ruby/object:Gem::Version
|
319
323
|
version: '0'
|
320
324
|
requirements: []
|
321
|
-
rubygems_version: 3.
|
325
|
+
rubygems_version: 3.0.3.1
|
322
326
|
signing_key:
|
323
327
|
specification_version: 4
|
324
328
|
summary: Pure Ruby and framework-agnostic ElasticSearch/OpenSearch toolkit for building
|