esse 0.2.6 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c4a9e11bda74a05216a06279503d853bccc254354ab41d41080a1b19da2058d
4
- data.tar.gz: ef29051789f406601d0bf0df066f22091cb096403c5acc7eedffcbe3b91a54bf
3
+ metadata.gz: 46abc9b3347ca852c1f270910e21582c8f17186b554d1a7f667bafee8f84a152
4
+ data.tar.gz: baf633b7147e8cf48c704c15fc5f4a2a52c8d08f671f05e6c5577f3d92ef2901
5
5
  SHA512:
6
- metadata.gz: 9b23aa1caa4cd2d9b71d9fca7d4fce63740d02d564ffb27d056bfbf4fd868595e4207fde138476a5837d99a80bb00bcc90198bf1d69d3230b5e24af1565742f9
7
- data.tar.gz: 034e4b02602c55b52ac261ff07b30000b7448eac3ec55531dacf426eefeebd6b9e56bc6381731ff06a864803ddff81e93af4215491a29dacf36eb230f7472f8c
6
+ metadata.gz: 129c4475225760da6620d990a1065795d6c7f1248b01745105715b8d01f4c11c500b253701f573eac5071abeeeaed83e6be1c5bf8f22b5f2fab819587c96c595
7
+ data.tar.gz: 93f66e7d04a3857c3daf9b6daaa72a39e6a3027b80e464e53a97325f74666324451f93a914d9daa31864a77a93c7fb37735e12395536f395133135042a752f21
@@ -88,9 +88,20 @@ module Esse
88
88
  option :suffix, type: :string, default: nil, aliases: '-s', desc: 'Suffix to append to index name'
89
89
  option :context, type: :hash, default: {}, required: true, desc: 'List of options to pass to the index class'
90
90
  option :repo, type: :string, default: nil, alias: '-r', desc: 'Repository to use for import'
91
+ option :eager_include_document_attributes, type: :string, default: nil, desc: 'Comma separated list of lazy document attributes to include to the bulk index request'
92
+ option :lazy_update_document_attributes, type: :string, default: nil, desc: 'Comma separated list of lazy document attributes to bulk update after the bulk index request'
91
93
  def import(*index_classes)
92
94
  require_relative 'index/import'
93
- Import.new(indices: index_classes, **HashUtils.deep_transform_keys(options.to_h, &:to_sym)).run
95
+ opts = HashUtils.deep_transform_keys(options.to_h, &:to_sym)
96
+ opts.delete(:lazy_update_document_attributes) if opts[:lazy_update_document_attributes] == 'false'
97
+ opts.delete(:eager_include_document_attributes) if opts[:eager_include_document_attributes] == 'false'
98
+ if (val = opts[:eager_include_document_attributes])
99
+ opts[:eager_include_document_attributes] = (val == 'true') ? true : val.split(',')
100
+ end
101
+ if (val = opts[:lazy_update_document_attributes])
102
+ opts[:lazy_update_document_attributes] = (val == 'true') ? true : val.split(',')
103
+ end
104
+ Import.new(indices: index_classes, **opts).run
94
105
  end
95
106
  end
96
107
  end
data/lib/esse/core.rb CHANGED
@@ -6,6 +6,8 @@ module Esse
6
6
  require_relative 'primitives'
7
7
  require_relative 'collection'
8
8
  require_relative 'document'
9
+ require_relative 'document_lazy_attribute'
10
+ require_relative 'lazy_document_header'
9
11
  require_relative 'hash_document'
10
12
  require_relative 'null_document'
11
13
  require_relative 'repository'
data/lib/esse/document.rb CHANGED
@@ -56,7 +56,7 @@ module Esse
56
56
 
57
57
  # @return [Hash] the document data
58
58
  def to_h
59
- source.merge(
59
+ mutated_source.merge(
60
60
  _id: id,
61
61
  ).tap do |hash|
62
62
  hash[:_type] = type if type
@@ -65,11 +65,13 @@ module Esse
65
65
  end
66
66
  end
67
67
 
68
- def to_bulk(data: true)
69
- { _id: id }.tap do |h|
70
- h[:data] = source&.to_h if data
71
- h[:_type] = type if type
72
- h[:routing] = routing if routing?
68
+ def to_bulk(data: true, operation: nil)
69
+ doc_header.tap do |h|
70
+ if data && operation == :update
71
+ h[:data] = { doc: mutated_source }
72
+ elsif data
73
+ h[:data] = mutated_source
74
+ end
73
75
  h.merge!(meta)
74
76
  end
75
77
  end
@@ -87,5 +89,35 @@ module Esse
87
89
  id == other.id && type == other.type && routing == other.routing && meta == other.meta && source == other.source
88
90
  )
89
91
  end
92
+
93
+ def doc_header
94
+ { _id: id }.tap do |h|
95
+ h[:_type] = type if type
96
+ h[:routing] = routing if routing?
97
+ end
98
+ end
99
+
100
+ def inspect
101
+ attributes = %i[id routing source].map do |attr|
102
+ value = send(attr)
103
+ "#{attr}: #{value.inspect}" if value
104
+ end.compact.join(', ')
105
+ attributes << " mutations: #{@__mutations__.inspect}" if @__mutations__
106
+ "#<#{self.class.name || 'Esse::Document'} #{attributes}>"
107
+ end
108
+
109
+ def mutate(key)
110
+ @__mutations__ ||= {}
111
+ @__mutations__[key] = yield
112
+ instance_variable_set(:@__mutated_source__, nil)
113
+ end
114
+
115
+ protected
116
+
117
+ def mutated_source
118
+ return source unless @__mutations__
119
+
120
+ @__mutated_source__ ||= source.merge(@__mutations__)
121
+ end
90
122
  end
91
123
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Esse
4
+ class DocumentLazyAttribute
5
+ attr_reader :options
6
+
7
+ def initialize(**kwargs)
8
+ @options = kwargs
9
+ end
10
+
11
+ # Returns an Hash with the document ID as key and attribute data as value.
12
+ # @param doc_headers [Array<Esse::LazyDocumentHeader>] the document headers
13
+ # @return [Hash] An Hash with the instance of document header as key and the attribute data as value.
14
+ def call(doc_headers)
15
+ raise NotImplementedError, 'Override this method to return the document attribute data'
16
+ end
17
+ end
18
+ end
@@ -1,7 +1,7 @@
1
1
  module Esse
2
2
  module Import
3
3
  class Bulk
4
- def initialize(type: nil, index: nil, delete: nil, create: nil)
4
+ def initialize(type: nil, index: nil, delete: nil, create: nil, update: nil)
5
5
  @index = Array(index).select(&method(:valid_doc?)).reject(&:ignore_on_index?).map do |doc|
6
6
  value = doc.to_bulk
7
7
  value[:_type] ||= type if type
@@ -12,6 +12,11 @@ module Esse
12
12
  value[:_type] ||= type if type
13
13
  { create: value }
14
14
  end
15
+ @update = Array(update).select(&method(:valid_doc?)).reject(&:ignore_on_index?).map do |doc|
16
+ value = doc.to_bulk(operation: :update)
17
+ value[:_type] ||= type if type
18
+ { update: value }
19
+ end
15
20
  @delete = Array(delete).select(&method(:valid_doc?)).reject(&:ignore_on_delete?).map do |doc|
16
21
  value = doc.to_bulk(data: false)
17
22
  value[:_type] ||= type if type
@@ -22,12 +27,12 @@ module Esse
22
27
  # Return an array of RequestBody instances
23
28
  #
24
29
  # In case of timeout error, will retry with an exponential backoff using the following formula:
25
- # wait_interval = (retry_count**4) + 15 + (rand(10) * (retry_count + 1)) seconds. It will retry up to max_retries times that is default 3.
30
+ # wait_interval = (retry_count**4) + 15 + (rand(10) * (retry_count + 1)) seconds. It will retry up to max_retries times that is default 4.
26
31
  #
27
32
  # Too large bulk requests will be split into multiple requests with only one attempt.
28
33
  #
29
34
  # @yield [RequestBody] A request body instance
30
- def each_request(max_retries: 3)
35
+ def each_request(max_retries: 4, last_retry_in_small_chunks: true)
31
36
  # @TODO create indexes when by checking all the index suffixes (if mapping is not empty)
32
37
  requests = [optimistic_request]
33
38
  retry_count = 0
@@ -43,6 +48,8 @@ module Esse
43
48
  rescue Faraday::TimeoutError, Esse::Transport::RequestTimeoutError => e
44
49
  retry_count += 1
45
50
  raise Esse::Transport::RequestTimeoutError.new(e.message) if retry_count >= max_retries
51
+ # Timeout error may be caused by a too large request, so we split the requests in small chunks as a last attempt
52
+ requests = requests_in_small_chunks if last_retry_in_small_chunks && max_retries > 2 && retry_count == max_retries - 2
46
53
  wait_interval = (retry_count**4) + 15 + (rand(10) * (retry_count + 1))
47
54
  Esse.logger.warn "Timeout error, retrying in #{wait_interval} seconds"
48
55
  sleep(wait_interval)
@@ -67,16 +74,30 @@ module Esse
67
74
 
68
75
  def optimistic_request
69
76
  request = Import::RequestBodyAsJson.new
70
- request.delete = @delete
71
77
  request.create = @create
72
78
  request.index = @index
79
+ request.update = @update
80
+ request.delete = @delete
73
81
  request
74
82
  end
75
83
 
84
+ def requests_in_small_chunks(chunk_size: 1)
85
+ arr = []
86
+ @create.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.create = slice } }
87
+ @index.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.index = slice } }
88
+ @update.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.update = slice } }
89
+ @delete.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.delete = slice } }
90
+ Esse.logger.warn <<~MSG
91
+ Retrying the last request in small chunks of #{chunk_size} documents.
92
+ This is a last resort to avoid timeout errors, consider increasing the bulk size or reducing the batch size.
93
+ MSG
94
+ arr
95
+ end
96
+
76
97
  # @return [Array<RequestBody>]
77
98
  def balance_requests_size(err)
78
99
  if (bulk_size = err.message.scan(/exceeded.(\d+).bytes/).dig(0, 0).to_i) > 0
79
- requests = (@delete + @create + @index).each_with_object([Import::RequestBodyRaw.new]) do |as_json, result|
100
+ requests = (@create + @index + @update + @delete).each_with_object([Import::RequestBodyRaw.new]) do |as_json, result|
80
101
  operation, meta = as_json.to_a.first
81
102
  meta = meta.dup
82
103
  data = meta.delete(:data)
@@ -5,7 +5,7 @@ module Esse
5
5
 
6
6
  def initialize(body:)
7
7
  @body = body # body may be String or Array<Hash>
8
- @stats = { index: 0, create: 0, delete: 0 }
8
+ @stats = { index: 0, create: 0, delete: 0, update: 0 }
9
9
  end
10
10
 
11
11
  def body?
@@ -46,6 +46,11 @@ module Esse
46
46
  @stats[:index] += docs.size
47
47
  end
48
48
 
49
+ def update=(docs)
50
+ @body += docs
51
+ @stats[:update] += docs.size
52
+ end
53
+
49
54
  def create=(docs)
50
55
  @body += docs
51
56
  @stats[:create] += docs.size
@@ -164,7 +164,7 @@ module Esse
164
164
  # @see https://www.elastic.co/guide/en/elasticsearch/reference/7.5/docs-bulk.html
165
165
  # @see https://github.com/elastic/elasticsearch-ruby/blob/main/elasticsearch-api/lib/elasticsearch/api/utils.rb
166
166
  # @see https://github.com/elastic/elasticsearch-ruby/blob/main/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
167
- def bulk(index: nil, delete: nil, create: nil, type: nil, suffix: nil, **options)
167
+ def bulk(create: nil, delete: nil, index: nil, update: nil, type: nil, suffix: nil, **options)
168
168
  definition = {
169
169
  index: index_name(suffix: suffix),
170
170
  type: type,
@@ -174,9 +174,10 @@ module Esse
174
174
  # @TODO Wrap the return in a some other Stats object with more information
175
175
  Esse::Import::Bulk.new(
176
176
  **definition.slice(:type),
177
- index: index,
178
- delete: delete,
179
177
  create: create,
178
+ delete: delete,
179
+ index: index,
180
+ update: update,
180
181
  ).each_request do |request_body|
181
182
  cluster.api.bulk(**definition, body: request_body.body) do |event_payload|
182
183
  event_payload[:body_stats] = request_body.stats
@@ -198,20 +199,37 @@ module Esse
198
199
  # @option [Hash] :context The collection context. This value will be passed as argument to the collection
199
200
  # May be SQL condition or any other filter you have defined on the collection.
200
201
  # @return [Numeric] The number of documents imported
201
- def import(*repo_types, context: {}, suffix: nil, **options)
202
+ def import(*repo_types, context: {}, eager_include_document_attributes: false, lazy_update_document_attributes: false, suffix: nil, **options)
202
203
  repo_types = repo_hash.keys if repo_types.empty?
203
204
  count = 0
205
+
204
206
  repo_hash.slice(*repo_types).each do |repo_name, repo|
205
- repo.each_serialized_batch(**(context || {})) do |batch|
207
+ doc_attrs = {eager: [], lazy: []}
208
+ doc_attrs[:eager] = repo.lazy_document_attribute_names(eager_include_document_attributes)
209
+ doc_attrs[:lazy] = repo.lazy_document_attribute_names(lazy_update_document_attributes)
210
+ doc_attrs[:lazy] -= doc_attrs[:eager]
211
+
212
+ context ||= {}
213
+ context[:lazy_attributes] = doc_attrs[:eager] if doc_attrs[:eager].any?
214
+ repo.each_serialized_batch(**context) do |batch|
206
215
  # Elasticsearch 6.x and older have multiple types per index.
207
216
  # This gem supports multiple types per index for backward compatibility, but we recommend to update
208
217
  # your elasticsearch to a at least 7.x version and use a single type per index.
209
218
  #
210
219
  # Note that the repository name will be used as the document type.
211
220
  # mapping_default_type
212
- kwargs = { index: batch, suffix: suffix, type: repo_name, **options }
221
+ kwargs = { suffix: suffix, type: repo_name, **options }
213
222
  cluster.may_update_type!(kwargs)
214
- bulk(**kwargs)
223
+
224
+ bulk(**kwargs, index: batch)
225
+
226
+ doc_attrs[:lazy].each do |attr_name|
227
+ partial_docs = repo.documents_for_lazy_attribute(attr_name, batch.reject(&:ignore_on_index?))
228
+ next if partial_docs.empty?
229
+
230
+ bulk(**kwargs, update: partial_docs)
231
+ end
232
+
215
233
  count += batch.size
216
234
  end
217
235
  end
@@ -26,10 +26,10 @@ module Esse
26
26
  #
27
27
  # @see http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
28
28
  # @see Esse::Transport#create_index
29
- def create_index(suffix: nil, **options)
29
+ def create_index(suffix: nil, body: nil, **options)
30
30
  options = CREATE_INDEX_RESERVED_KEYWORDS.merge(options)
31
31
  name = build_real_index_name(suffix)
32
- definition = [settings_hash, mappings_hash].reduce(&:merge)
32
+ definition = body || [settings_hash, mappings_hash].reduce(&:merge)
33
33
 
34
34
  if options.delete(:alias) && name != index_name
35
35
  definition[:aliases] = { index_name => {} }
@@ -48,13 +48,23 @@ module Esse
48
48
  # @return [Hash] the elasticsearch response
49
49
  #
50
50
  # @see https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-open-close.html
51
- def reset_index(suffix: index_suffix, import: true, reindex: false, **options)
51
+ def reset_index(suffix: index_suffix, optimize: true, import: true, reindex: false, **options)
52
52
  cluster.throw_error_when_readonly!
53
- existing = []
53
+
54
54
  suffix ||= Esse.timestamp
55
- suffix = Esse.timestamp while index_exist?(suffix: suffix).tap { |exist| existing << suffix if exist }
55
+ suffix = Esse.timestamp while index_exist?(suffix: suffix)
56
+
57
+ if optimize
58
+ definition = [settings_hash, mappings_hash].reduce(&:merge)
59
+ number_of_replicas = definition.dig(Esse::SETTING_ROOT_KEY, :index, :number_of_replicas)
60
+ refresh_interval = definition.dig(Esse::SETTING_ROOT_KEY, :index, :refresh_interval)
61
+ new_number_of_replicas = ((definition[Esse::SETTING_ROOT_KEY] ||= {})[:index] ||= {})[:number_of_replicas] = 0
62
+ new_refresh_interval = ((definition[Esse::SETTING_ROOT_KEY] ||= {})[:index] ||= {})[:refresh_interval] = '-1'
63
+ create_index(**options, suffix: suffix, alias: false, body: definition)
64
+ else
65
+ create_index(**options, suffix: suffix, alias: false)
66
+ end
56
67
 
57
- create_index(**options, suffix: suffix, alias: false)
58
68
  if index_exist? && aliases.none?
59
69
  cluster.api.delete_index(index: index_name)
60
70
  end
@@ -63,8 +73,13 @@ module Esse
63
73
  elsif reindex && (_from = indices_pointing_to_alias).any?
64
74
  # @TODO: Reindex using the reindex API
65
75
  end
76
+
77
+ if optimize && number_of_replicas != new_number_of_replicas || refresh_interval != new_refresh_interval
78
+ update_settings(suffix: suffix)
79
+ end
80
+
66
81
  update_aliases(suffix: suffix)
67
- existing.each { |_s| delete_index!(**options, suffix: suffix) }
82
+
68
83
  true
69
84
  end
70
85
 
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Esse
4
+ class LazyDocumentHeader
5
+ def self.coerce_each(values)
6
+ arr = []
7
+ Esse::ArrayUtils.wrap(values).map do |value|
8
+ instance = coerce(value)
9
+ arr << instance if instance&.valid?
10
+ end
11
+ arr
12
+ end
13
+
14
+ def self.coerce(value)
15
+ return unless value
16
+
17
+ if value.is_a?(Esse::LazyDocumentHeader)
18
+ value
19
+ elsif value.is_a?(Esse::Document)
20
+ new(value.doc_header)
21
+ elsif value.is_a?(Hash)
22
+ resp = value.transform_keys do |key|
23
+ case key
24
+ when :_id, :id, '_id', 'id'
25
+ :_id
26
+ when :_routing, :routing, '_routing', 'routing'
27
+ :routing
28
+ when :_type, :type, '_type', 'type'
29
+ :_type
30
+ else
31
+ key.to_sym
32
+ end
33
+ end
34
+ new(resp)
35
+ elsif String === value || Integer === value
36
+ new(_id: value)
37
+ end
38
+ end
39
+
40
+ def initialize(attributes)
41
+ @attributes = attributes
42
+ end
43
+
44
+ def valid?
45
+ !@attributes[:_id].nil?
46
+ end
47
+
48
+ def to_h
49
+ @attributes
50
+ end
51
+
52
+ def id
53
+ @attributes.fetch(:_id)
54
+ end
55
+
56
+ def type
57
+ @attributes[:_type]
58
+ end
59
+
60
+ def routing
61
+ @attributes[:routing]
62
+ end
63
+
64
+ def to_doc(source = {})
65
+ HashDocument.new(source.merge(@attributes))
66
+ end
67
+
68
+ def eql?(other)
69
+ self.class == other.class && @attributes == other.instance_variable_get(:@attributes)
70
+ end
71
+ alias_method :==, :eql?
72
+ end
73
+ end
@@ -0,0 +1,17 @@
1
+ module Esse
2
+ # The idea here is to add useful methods to the ruby standard objects without
3
+ # monkey patching them
4
+ module ArrayUtils
5
+ module_function
6
+
7
+ def wrap(object)
8
+ if object.nil?
9
+ []
10
+ elsif object.respond_to?(:to_ary)
11
+ object.to_ary || [object]
12
+ else
13
+ [object]
14
+ end
15
+ end
16
+ end
17
+ end
@@ -2,3 +2,4 @@
2
2
 
3
3
  require_relative 'primitives/hstring'
4
4
  require_relative 'primitives/hash_utils'
5
+ require_relative 'primitives/array_utils'
@@ -6,6 +6,43 @@ module Esse
6
6
  def import(**kwargs)
7
7
  index.import(repo_name, **kwargs)
8
8
  end
9
+
10
+ def update_documents_attribute(name, ids_or_doc_headers = [], kwargs = {})
11
+ batch = documents_for_lazy_attribute(name, ids_or_doc_headers)
12
+ return if batch.empty?
13
+
14
+ index.bulk(**kwargs.transform_keys(&:to_sym), update: batch)
15
+ end
16
+
17
+ def documents_for_lazy_attribute(name, ids_or_doc_headers)
18
+ retrieve_lazy_attribute_values(name, ids_or_doc_headers).map do |doc_header, datum|
19
+ doc_header.to_doc(name => datum)
20
+ end
21
+ end
22
+
23
+ def retrieve_lazy_attribute_values(name, ids_or_doc_headers)
24
+ unless lazy_document_attribute?(name)
25
+ raise ArgumentError, <<~MSG
26
+ The attribute `#{name}` is not defined as a lazy document attribute.
27
+
28
+ Define the attribute as a lazy document attribute using the `lazy_document_attribute` method.
29
+ MSG
30
+ end
31
+
32
+ docs = LazyDocumentHeader.coerce_each(ids_or_doc_headers)
33
+ return [] if docs.empty?
34
+
35
+ result = fetch_lazy_document_attribute(name).call(docs)
36
+ return [] unless result.is_a?(Hash)
37
+
38
+ result.each_with_object({}) do |(key, value), memo|
39
+ if key.is_a?(LazyDocumentHeader) && (doc = docs.find { |d| d == key || d.id == key.id })
40
+ memo[doc] = value
41
+ elsif (doc = docs.find { |d| d.id == key })
42
+ memo[doc] = value
43
+ end
44
+ end
45
+ end
9
46
  end
10
47
 
11
48
  extend ClassMethods
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Esse
4
+ # Definition for the lazy document attributes
5
+ class Repository
6
+ module ClassMethods
7
+ def lazy_document_attributes
8
+ @lazy_document_attributes ||= {}.freeze
9
+ end
10
+
11
+ def lazy_document_attribute_names(all = true)
12
+ case all
13
+ when false
14
+ []
15
+ when true
16
+ lazy_document_attributes.keys
17
+ else
18
+ filtered = Array(all).map(&:to_s)
19
+ lazy_document_attributes.keys.select { |name| filtered.include?(name.to_s) }
20
+ end
21
+ end
22
+
23
+ def fetch_lazy_document_attribute(attr_name)
24
+ klass, kwargs = lazy_document_attributes.fetch(attr_name)
25
+ klass.new(**kwargs)
26
+ rescue KeyError
27
+ raise ArgumentError, format('Attribute %<attr>p is not defined as a lazy document attribute', attr: attr_name)
28
+ end
29
+
30
+ def lazy_document_attribute(attr_name, klass = nil, **kwargs, &block)
31
+ if attr_name.nil?
32
+ raise ArgumentError, 'Attribute name is required to define a lazy document attribute'
33
+ end
34
+ if lazy_document_attribute?(attr_name.to_sym) || lazy_document_attribute?(attr_name.to_s)
35
+ raise ArgumentError, format('Attribute %<attr>p is already defined as a lazy document attribute', attr: attr_name)
36
+ end
37
+
38
+ @lazy_document_attributes = lazy_document_attributes.dup
39
+ if block
40
+ klass = Class.new(Esse::DocumentLazyAttribute) do
41
+ define_method(:call, &block)
42
+ end
43
+ @lazy_document_attributes[attr_name] = [klass, kwargs]
44
+ elsif klass.is_a?(Class) && klass <= Esse::DocumentLazyAttribute
45
+ @lazy_document_attributes[attr_name] = [klass, kwargs]
46
+ elsif klass.is_a?(Class) && klass.instance_methods.include?(:call)
47
+ @lazy_document_attributes[attr_name] = [klass, kwargs]
48
+ elsif klass.nil?
49
+ raise ArgumentError, format('A block or a class that responds to `call` is required to define a lazy document attribute')
50
+ else
51
+ raise ArgumentError, format('%<arg>p is not a valid lazy document attribute. Class should inherit from Esse::DocumentLazyAttribute or respond to `call`', arg: klass)
52
+ end
53
+ ensure
54
+ @lazy_document_attributes&.freeze
55
+ end
56
+
57
+ protected
58
+
59
+ def lazy_document_attribute?(attr_name)
60
+ lazy_document_attributes.key?(attr_name)
61
+ end
62
+ end
63
+
64
+ extend ClassMethods
65
+ end
66
+ end
@@ -6,8 +6,8 @@ module Esse
6
6
  # @see ObjectDocumentMapper
7
7
  class Repository
8
8
  module ClassMethods
9
- # Convert ruby object to json. Arguments will be same of passed through the
10
- # collection. It's allowed a block or a class with the `to_h` instance method.
9
+ # Define the document type that will be used to serialize the data.
10
+ # Arguments will be same of passed through the collection. It's allowed a block or a class with the `to_h` instance method.
11
11
  # Example with block
12
12
  # document do |model, **context|
13
13
  # {
@@ -38,31 +38,6 @@ module Esse
38
38
  end
39
39
  end
40
40
 
41
- def coerce_to_document(value)
42
- case value
43
- when Esse::Document
44
- value
45
- when Hash
46
- Esse::HashDocument.new(value)
47
- when NilClass, FalseClass
48
- Esse::NullDocument.new
49
- else
50
- raise ArgumentError, format('%<arg>p is not a valid document. The document should be a hash or an instance of Esse::Document', arg: value)
51
- end
52
- end
53
-
54
- # Convert ruby object to json by using the document of the given document type.
55
- # @param [Object] model The ruby object
56
- # @param [Hash] kwargs The context
57
- # @return [Esse::Document] The serialized document
58
- def serialize(model, **kwargs)
59
- if @document_proc.nil?
60
- raise NotImplementedError, format('there is no %<t>p document defined for the %<k>p index', t: repo_name, k: index.to_s)
61
- end
62
-
63
- @document_proc.call(model, **kwargs)
64
- end
65
-
66
41
  # Used to define the source of data. A block is required. And its
67
42
  # content should yield an array of each object that should be serialized.
68
43
  # The list of arguments will be passed throught the document method.
@@ -94,6 +69,71 @@ module Esse
94
69
  @collection_proc = collection_klass || block
95
70
  end
96
71
 
72
+ # Wrap collection data into serialized batches
73
+ #
74
+ # @param [Hash] kwargs The context
75
+ # @return [Enumerator] The enumerator
76
+ # @yield [Array, **context] serialized collection and the optional context from the collection
77
+ def each_serialized_batch(lazy_attributes: false, **kwargs)
78
+ each_batch(**kwargs) do |*args|
79
+ batch, collection_context = args
80
+ collection_context ||= {}
81
+ entries = [*batch].map { |entry| serialize(entry, **collection_context) }.compact
82
+ if lazy_attributes
83
+ attrs = lazy_attributes.is_a?(Array) ? lazy_attributes : lazy_document_attribute_names(lazy_attributes)
84
+ attrs.each do |attr_name|
85
+ retrieve_lazy_attribute_values(attr_name, entries).each do |doc_header, value|
86
+ doc = entries.find { |d| doc_header.id.to_s == d.id.to_s && doc_header.type == d.type && doc_header.routing == d.routing }
87
+ doc&.mutate(attr_name) { value }
88
+ end
89
+ end
90
+ end
91
+
92
+ yield entries, **kwargs
93
+ end
94
+ end
95
+
96
+ # Wrap collection data into serialized documents
97
+ #
98
+ # Example:
99
+ # GeosIndex.documents(id: 1).first
100
+ #
101
+ # @return [Enumerator] All serialized entries
102
+ def documents(**kwargs)
103
+ Enumerator.new do |yielder|
104
+ each_serialized_batch(**kwargs) do |docs, **_collection_kargs|
105
+ docs.each { |document| yielder.yield(document) }
106
+ end
107
+ end
108
+ end
109
+
110
+ # Convert ruby object to json by using the document of the given document type.
111
+ # @param [Object] model The ruby object
112
+ # @param [Hash] kwargs The context
113
+ # @return [Esse::Document] The serialized document
114
+ def serialize(model, **kwargs)
115
+ if @document_proc.nil?
116
+ raise NotImplementedError, format('there is no %<t>p document defined for the %<k>p index', t: repo_name, k: index.to_s)
117
+ end
118
+
119
+ @document_proc.call(model, **kwargs)
120
+ end
121
+
122
+ protected
123
+
124
+ def coerce_to_document(value)
125
+ case value
126
+ when Esse::Document
127
+ value
128
+ when Hash
129
+ Esse::HashDocument.new(value)
130
+ when NilClass, FalseClass
131
+ Esse::NullDocument.new
132
+ else
133
+ raise ArgumentError, format('%<arg>p is not a valid document. The document should be a hash or an instance of Esse::Document', arg: value)
134
+ end
135
+ end
136
+
97
137
  # Used to fetch all batch of data defined on the collection model.
98
138
  # Arguments can be anything. They will just be passed through the block.
99
139
  # Useful when the collection depends on scope or any other conditions
@@ -122,34 +162,6 @@ module Esse
122
162
  rescue LocalJumpError
123
163
  raise(SyntaxError, 'block must be explicitly declared in the collection definition')
124
164
  end
125
-
126
- # Wrap collection data into serialized batches
127
- #
128
- # @param [Hash] kwargs The context
129
- # @return [Enumerator] The enumerator
130
- # @yield [Array, **context] serialized collection and the optional context from the collection
131
- def each_serialized_batch(**kwargs, &block)
132
- each_batch(**kwargs) do |*args|
133
- batch, collection_context = args
134
- collection_context ||= {}
135
- entries = [*batch].map { |entry| serialize(entry, **collection_context) }.compact
136
- block.call(entries, **kwargs)
137
- end
138
- end
139
-
140
- # Wrap collection data into serialized documents
141
- #
142
- # Example:
143
- # GeosIndex.documents(id: 1).first
144
- #
145
- # @return [Enumerator] All serialized entries
146
- def documents(**kwargs)
147
- Enumerator.new do |yielder|
148
- each_serialized_batch(**kwargs) do |docs, **_collection_kargs|
149
- docs.each { |document| yielder.yield(document) }
150
- end
151
- end
152
- end
153
165
  end
154
166
 
155
167
  extend ClassMethods
@@ -13,5 +13,6 @@ module Esse
13
13
  require_relative 'repository/actions'
14
14
  require_relative 'repository/documents'
15
15
  require_relative 'repository/object_document_mapper'
16
+ require_relative 'repository/lazy_document_attributes'
16
17
  end
17
18
  end
data/lib/esse/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Esse
4
- VERSION = '0.2.6'
4
+ VERSION = '0.3.2'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: esse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Marcos G. Zimmermann
8
8
  autorequire:
9
9
  bindir: exec
10
10
  cert_chain: []
11
- date: 2023-11-27 00:00:00.000000000 Z
11
+ date: 2024-07-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: multi_json
@@ -248,6 +248,7 @@ files:
248
248
  - lib/esse/deprecations/repository_backend_delegator.rb
249
249
  - lib/esse/deprecations/serializer.rb
250
250
  - lib/esse/document.rb
251
+ - lib/esse/document_lazy_attribute.rb
251
252
  - lib/esse/dynamic_template.rb
252
253
  - lib/esse/errors.rb
253
254
  - lib/esse/events.rb
@@ -274,16 +275,19 @@ files:
274
275
  - lib/esse/index/type.rb
275
276
  - lib/esse/index_mapping.rb
276
277
  - lib/esse/index_setting.rb
278
+ - lib/esse/lazy_document_header.rb
277
279
  - lib/esse/logging.rb
278
280
  - lib/esse/null_document.rb
279
281
  - lib/esse/plugins.rb
280
282
  - lib/esse/primitives.rb
283
+ - lib/esse/primitives/array_utils.rb
281
284
  - lib/esse/primitives/hash_utils.rb
282
285
  - lib/esse/primitives/hstring.rb
283
286
  - lib/esse/primitives/output.rb
284
287
  - lib/esse/repository.rb
285
288
  - lib/esse/repository/actions.rb
286
289
  - lib/esse/repository/documents.rb
290
+ - lib/esse/repository/lazy_document_attributes.rb
287
291
  - lib/esse/repository/object_document_mapper.rb
288
292
  - lib/esse/search/query.rb
289
293
  - lib/esse/search/query/dsl.rb
@@ -318,7 +322,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
318
322
  - !ruby/object:Gem::Version
319
323
  version: '0'
320
324
  requirements: []
321
- rubygems_version: 3.2.32
325
+ rubygems_version: 3.0.3.1
322
326
  signing_key:
323
327
  specification_version: 4
324
328
  summary: Pure Ruby and framework-agnostic ElasticSearch/OpenSearch toolkit for building