esse 0.2.6 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c4a9e11bda74a05216a06279503d853bccc254354ab41d41080a1b19da2058d
4
- data.tar.gz: ef29051789f406601d0bf0df066f22091cb096403c5acc7eedffcbe3b91a54bf
3
+ metadata.gz: 46abc9b3347ca852c1f270910e21582c8f17186b554d1a7f667bafee8f84a152
4
+ data.tar.gz: baf633b7147e8cf48c704c15fc5f4a2a52c8d08f671f05e6c5577f3d92ef2901
5
5
  SHA512:
6
- metadata.gz: 9b23aa1caa4cd2d9b71d9fca7d4fce63740d02d564ffb27d056bfbf4fd868595e4207fde138476a5837d99a80bb00bcc90198bf1d69d3230b5e24af1565742f9
7
- data.tar.gz: 034e4b02602c55b52ac261ff07b30000b7448eac3ec55531dacf426eefeebd6b9e56bc6381731ff06a864803ddff81e93af4215491a29dacf36eb230f7472f8c
6
+ metadata.gz: 129c4475225760da6620d990a1065795d6c7f1248b01745105715b8d01f4c11c500b253701f573eac5071abeeeaed83e6be1c5bf8f22b5f2fab819587c96c595
7
+ data.tar.gz: 93f66e7d04a3857c3daf9b6daaa72a39e6a3027b80e464e53a97325f74666324451f93a914d9daa31864a77a93c7fb37735e12395536f395133135042a752f21
@@ -88,9 +88,20 @@ module Esse
88
88
  option :suffix, type: :string, default: nil, aliases: '-s', desc: 'Suffix to append to index name'
89
89
  option :context, type: :hash, default: {}, required: true, desc: 'List of options to pass to the index class'
90
90
  option :repo, type: :string, default: nil, alias: '-r', desc: 'Repository to use for import'
91
+ option :eager_include_document_attributes, type: :string, default: nil, desc: 'Comma separated list of lazy document attributes to include to the bulk index request'
92
+ option :lazy_update_document_attributes, type: :string, default: nil, desc: 'Comma separated list of lazy document attributes to bulk update after the bulk index request'
91
93
  def import(*index_classes)
92
94
  require_relative 'index/import'
93
- Import.new(indices: index_classes, **HashUtils.deep_transform_keys(options.to_h, &:to_sym)).run
95
+ opts = HashUtils.deep_transform_keys(options.to_h, &:to_sym)
96
+ opts.delete(:lazy_update_document_attributes) if opts[:lazy_update_document_attributes] == 'false'
97
+ opts.delete(:eager_include_document_attributes) if opts[:eager_include_document_attributes] == 'false'
98
+ if (val = opts[:eager_include_document_attributes])
99
+ opts[:eager_include_document_attributes] = (val == 'true') ? true : val.split(',')
100
+ end
101
+ if (val = opts[:lazy_update_document_attributes])
102
+ opts[:lazy_update_document_attributes] = (val == 'true') ? true : val.split(',')
103
+ end
104
+ Import.new(indices: index_classes, **opts).run
94
105
  end
95
106
  end
96
107
  end
data/lib/esse/core.rb CHANGED
@@ -6,6 +6,8 @@ module Esse
6
6
  require_relative 'primitives'
7
7
  require_relative 'collection'
8
8
  require_relative 'document'
9
+ require_relative 'document_lazy_attribute'
10
+ require_relative 'lazy_document_header'
9
11
  require_relative 'hash_document'
10
12
  require_relative 'null_document'
11
13
  require_relative 'repository'
data/lib/esse/document.rb CHANGED
@@ -56,7 +56,7 @@ module Esse
56
56
 
57
57
  # @return [Hash] the document data
58
58
  def to_h
59
- source.merge(
59
+ mutated_source.merge(
60
60
  _id: id,
61
61
  ).tap do |hash|
62
62
  hash[:_type] = type if type
@@ -65,11 +65,13 @@ module Esse
65
65
  end
66
66
  end
67
67
 
68
- def to_bulk(data: true)
69
- { _id: id }.tap do |h|
70
- h[:data] = source&.to_h if data
71
- h[:_type] = type if type
72
- h[:routing] = routing if routing?
68
+ def to_bulk(data: true, operation: nil)
69
+ doc_header.tap do |h|
70
+ if data && operation == :update
71
+ h[:data] = { doc: mutated_source }
72
+ elsif data
73
+ h[:data] = mutated_source
74
+ end
73
75
  h.merge!(meta)
74
76
  end
75
77
  end
@@ -87,5 +89,35 @@ module Esse
87
89
  id == other.id && type == other.type && routing == other.routing && meta == other.meta && source == other.source
88
90
  )
89
91
  end
92
+
93
+ def doc_header
94
+ { _id: id }.tap do |h|
95
+ h[:_type] = type if type
96
+ h[:routing] = routing if routing?
97
+ end
98
+ end
99
+
100
+ def inspect
101
+ attributes = %i[id routing source].map do |attr|
102
+ value = send(attr)
103
+ "#{attr}: #{value.inspect}" if value
104
+ end.compact.join(', ')
105
+ attributes << " mutations: #{@__mutations__.inspect}" if @__mutations__
106
+ "#<#{self.class.name || 'Esse::Document'} #{attributes}>"
107
+ end
108
+
109
+ def mutate(key)
110
+ @__mutations__ ||= {}
111
+ @__mutations__[key] = yield
112
+ instance_variable_set(:@__mutated_source__, nil)
113
+ end
114
+
115
+ protected
116
+
117
+ def mutated_source
118
+ return source unless @__mutations__
119
+
120
+ @__mutated_source__ ||= source.merge(@__mutations__)
121
+ end
90
122
  end
91
123
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Esse
4
+ class DocumentLazyAttribute
5
+ attr_reader :options
6
+
7
+ def initialize(**kwargs)
8
+ @options = kwargs
9
+ end
10
+
11
+ # Returns an Hash with the document ID as key and attribute data as value.
12
+ # @param doc_headers [Array<Esse::LazyDocumentHeader>] the document headers
13
+ # @return [Hash] An Hash with the instance of document header as key and the attribute data as value.
14
+ def call(doc_headers)
15
+ raise NotImplementedError, 'Override this method to return the document attribute data'
16
+ end
17
+ end
18
+ end
@@ -1,7 +1,7 @@
1
1
  module Esse
2
2
  module Import
3
3
  class Bulk
4
- def initialize(type: nil, index: nil, delete: nil, create: nil)
4
+ def initialize(type: nil, index: nil, delete: nil, create: nil, update: nil)
5
5
  @index = Array(index).select(&method(:valid_doc?)).reject(&:ignore_on_index?).map do |doc|
6
6
  value = doc.to_bulk
7
7
  value[:_type] ||= type if type
@@ -12,6 +12,11 @@ module Esse
12
12
  value[:_type] ||= type if type
13
13
  { create: value }
14
14
  end
15
+ @update = Array(update).select(&method(:valid_doc?)).reject(&:ignore_on_index?).map do |doc|
16
+ value = doc.to_bulk(operation: :update)
17
+ value[:_type] ||= type if type
18
+ { update: value }
19
+ end
15
20
  @delete = Array(delete).select(&method(:valid_doc?)).reject(&:ignore_on_delete?).map do |doc|
16
21
  value = doc.to_bulk(data: false)
17
22
  value[:_type] ||= type if type
@@ -22,12 +27,12 @@ module Esse
22
27
  # Return an array of RequestBody instances
23
28
  #
24
29
  # In case of timeout error, will retry with an exponential backoff using the following formula:
25
- # wait_interval = (retry_count**4) + 15 + (rand(10) * (retry_count + 1)) seconds. It will retry up to max_retries times that is default 3.
30
+ # wait_interval = (retry_count**4) + 15 + (rand(10) * (retry_count + 1)) seconds. It will retry up to max_retries times that is default 4.
26
31
  #
27
32
  # Too large bulk requests will be split into multiple requests with only one attempt.
28
33
  #
29
34
  # @yield [RequestBody] A request body instance
30
- def each_request(max_retries: 3)
35
+ def each_request(max_retries: 4, last_retry_in_small_chunks: true)
31
36
  # @TODO create indexes when by checking all the index suffixes (if mapping is not empty)
32
37
  requests = [optimistic_request]
33
38
  retry_count = 0
@@ -43,6 +48,8 @@ module Esse
43
48
  rescue Faraday::TimeoutError, Esse::Transport::RequestTimeoutError => e
44
49
  retry_count += 1
45
50
  raise Esse::Transport::RequestTimeoutError.new(e.message) if retry_count >= max_retries
51
+ # Timeout error may be caused by a too large request, so we split the requests in small chunks as a last attempt
52
+ requests = requests_in_small_chunks if last_retry_in_small_chunks && max_retries > 2 && retry_count == max_retries - 2
46
53
  wait_interval = (retry_count**4) + 15 + (rand(10) * (retry_count + 1))
47
54
  Esse.logger.warn "Timeout error, retrying in #{wait_interval} seconds"
48
55
  sleep(wait_interval)
@@ -67,16 +74,30 @@ module Esse
67
74
 
68
75
  def optimistic_request
69
76
  request = Import::RequestBodyAsJson.new
70
- request.delete = @delete
71
77
  request.create = @create
72
78
  request.index = @index
79
+ request.update = @update
80
+ request.delete = @delete
73
81
  request
74
82
  end
75
83
 
84
+ def requests_in_small_chunks(chunk_size: 1)
85
+ arr = []
86
+ @create.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.create = slice } }
87
+ @index.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.index = slice } }
88
+ @update.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.update = slice } }
89
+ @delete.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.delete = slice } }
90
+ Esse.logger.warn <<~MSG
91
+ Retrying the last request in small chunks of #{chunk_size} documents.
92
+ This is a last resort to avoid timeout errors, consider increasing the bulk size or reducing the batch size.
93
+ MSG
94
+ arr
95
+ end
96
+
76
97
  # @return [Array<RequestBody>]
77
98
  def balance_requests_size(err)
78
99
  if (bulk_size = err.message.scan(/exceeded.(\d+).bytes/).dig(0, 0).to_i) > 0
79
- requests = (@delete + @create + @index).each_with_object([Import::RequestBodyRaw.new]) do |as_json, result|
100
+ requests = (@create + @index + @update + @delete).each_with_object([Import::RequestBodyRaw.new]) do |as_json, result|
80
101
  operation, meta = as_json.to_a.first
81
102
  meta = meta.dup
82
103
  data = meta.delete(:data)
@@ -5,7 +5,7 @@ module Esse
5
5
 
6
6
  def initialize(body:)
7
7
  @body = body # body may be String or Array<Hash>
8
- @stats = { index: 0, create: 0, delete: 0 }
8
+ @stats = { index: 0, create: 0, delete: 0, update: 0 }
9
9
  end
10
10
 
11
11
  def body?
@@ -46,6 +46,11 @@ module Esse
46
46
  @stats[:index] += docs.size
47
47
  end
48
48
 
49
+ def update=(docs)
50
+ @body += docs
51
+ @stats[:update] += docs.size
52
+ end
53
+
49
54
  def create=(docs)
50
55
  @body += docs
51
56
  @stats[:create] += docs.size
@@ -164,7 +164,7 @@ module Esse
164
164
  # @see https://www.elastic.co/guide/en/elasticsearch/reference/7.5/docs-bulk.html
165
165
  # @see https://github.com/elastic/elasticsearch-ruby/blob/main/elasticsearch-api/lib/elasticsearch/api/utils.rb
166
166
  # @see https://github.com/elastic/elasticsearch-ruby/blob/main/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb
167
- def bulk(index: nil, delete: nil, create: nil, type: nil, suffix: nil, **options)
167
+ def bulk(create: nil, delete: nil, index: nil, update: nil, type: nil, suffix: nil, **options)
168
168
  definition = {
169
169
  index: index_name(suffix: suffix),
170
170
  type: type,
@@ -174,9 +174,10 @@ module Esse
174
174
  # @TODO Wrap the return in a some other Stats object with more information
175
175
  Esse::Import::Bulk.new(
176
176
  **definition.slice(:type),
177
- index: index,
178
- delete: delete,
179
177
  create: create,
178
+ delete: delete,
179
+ index: index,
180
+ update: update,
180
181
  ).each_request do |request_body|
181
182
  cluster.api.bulk(**definition, body: request_body.body) do |event_payload|
182
183
  event_payload[:body_stats] = request_body.stats
@@ -198,20 +199,37 @@ module Esse
198
199
  # @option [Hash] :context The collection context. This value will be passed as argument to the collection
199
200
  # May be SQL condition or any other filter you have defined on the collection.
200
201
  # @return [Numeric] The number of documents imported
201
- def import(*repo_types, context: {}, suffix: nil, **options)
202
+ def import(*repo_types, context: {}, eager_include_document_attributes: false, lazy_update_document_attributes: false, suffix: nil, **options)
202
203
  repo_types = repo_hash.keys if repo_types.empty?
203
204
  count = 0
205
+
204
206
  repo_hash.slice(*repo_types).each do |repo_name, repo|
205
- repo.each_serialized_batch(**(context || {})) do |batch|
207
+ doc_attrs = {eager: [], lazy: []}
208
+ doc_attrs[:eager] = repo.lazy_document_attribute_names(eager_include_document_attributes)
209
+ doc_attrs[:lazy] = repo.lazy_document_attribute_names(lazy_update_document_attributes)
210
+ doc_attrs[:lazy] -= doc_attrs[:eager]
211
+
212
+ context ||= {}
213
+ context[:lazy_attributes] = doc_attrs[:eager] if doc_attrs[:eager].any?
214
+ repo.each_serialized_batch(**context) do |batch|
206
215
  # Elasticsearch 6.x and older have multiple types per index.
207
216
  # This gem supports multiple types per index for backward compatibility, but we recommend to update
208
217
  # your elasticsearch to a at least 7.x version and use a single type per index.
209
218
  #
210
219
  # Note that the repository name will be used as the document type.
211
220
  # mapping_default_type
212
- kwargs = { index: batch, suffix: suffix, type: repo_name, **options }
221
+ kwargs = { suffix: suffix, type: repo_name, **options }
213
222
  cluster.may_update_type!(kwargs)
214
- bulk(**kwargs)
223
+
224
+ bulk(**kwargs, index: batch)
225
+
226
+ doc_attrs[:lazy].each do |attr_name|
227
+ partial_docs = repo.documents_for_lazy_attribute(attr_name, batch.reject(&:ignore_on_index?))
228
+ next if partial_docs.empty?
229
+
230
+ bulk(**kwargs, update: partial_docs)
231
+ end
232
+
215
233
  count += batch.size
216
234
  end
217
235
  end
@@ -26,10 +26,10 @@ module Esse
26
26
  #
27
27
  # @see http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
28
28
  # @see Esse::Transport#create_index
29
- def create_index(suffix: nil, **options)
29
+ def create_index(suffix: nil, body: nil, **options)
30
30
  options = CREATE_INDEX_RESERVED_KEYWORDS.merge(options)
31
31
  name = build_real_index_name(suffix)
32
- definition = [settings_hash, mappings_hash].reduce(&:merge)
32
+ definition = body || [settings_hash, mappings_hash].reduce(&:merge)
33
33
 
34
34
  if options.delete(:alias) && name != index_name
35
35
  definition[:aliases] = { index_name => {} }
@@ -48,13 +48,23 @@ module Esse
48
48
  # @return [Hash] the elasticsearch response
49
49
  #
50
50
  # @see https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-open-close.html
51
- def reset_index(suffix: index_suffix, import: true, reindex: false, **options)
51
+ def reset_index(suffix: index_suffix, optimize: true, import: true, reindex: false, **options)
52
52
  cluster.throw_error_when_readonly!
53
- existing = []
53
+
54
54
  suffix ||= Esse.timestamp
55
- suffix = Esse.timestamp while index_exist?(suffix: suffix).tap { |exist| existing << suffix if exist }
55
+ suffix = Esse.timestamp while index_exist?(suffix: suffix)
56
+
57
+ if optimize
58
+ definition = [settings_hash, mappings_hash].reduce(&:merge)
59
+ number_of_replicas = definition.dig(Esse::SETTING_ROOT_KEY, :index, :number_of_replicas)
60
+ refresh_interval = definition.dig(Esse::SETTING_ROOT_KEY, :index, :refresh_interval)
61
+ new_number_of_replicas = ((definition[Esse::SETTING_ROOT_KEY] ||= {})[:index] ||= {})[:number_of_replicas] = 0
62
+ new_refresh_interval = ((definition[Esse::SETTING_ROOT_KEY] ||= {})[:index] ||= {})[:refresh_interval] = '-1'
63
+ create_index(**options, suffix: suffix, alias: false, body: definition)
64
+ else
65
+ create_index(**options, suffix: suffix, alias: false)
66
+ end
56
67
 
57
- create_index(**options, suffix: suffix, alias: false)
58
68
  if index_exist? && aliases.none?
59
69
  cluster.api.delete_index(index: index_name)
60
70
  end
@@ -63,8 +73,13 @@ module Esse
63
73
  elsif reindex && (_from = indices_pointing_to_alias).any?
64
74
  # @TODO: Reindex using the reindex API
65
75
  end
76
+
77
+ if optimize && number_of_replicas != new_number_of_replicas || refresh_interval != new_refresh_interval
78
+ update_settings(suffix: suffix)
79
+ end
80
+
66
81
  update_aliases(suffix: suffix)
67
- existing.each { |_s| delete_index!(**options, suffix: suffix) }
82
+
68
83
  true
69
84
  end
70
85
 
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Esse
4
+ class LazyDocumentHeader
5
+ def self.coerce_each(values)
6
+ arr = []
7
+ Esse::ArrayUtils.wrap(values).map do |value|
8
+ instance = coerce(value)
9
+ arr << instance if instance&.valid?
10
+ end
11
+ arr
12
+ end
13
+
14
+ def self.coerce(value)
15
+ return unless value
16
+
17
+ if value.is_a?(Esse::LazyDocumentHeader)
18
+ value
19
+ elsif value.is_a?(Esse::Document)
20
+ new(value.doc_header)
21
+ elsif value.is_a?(Hash)
22
+ resp = value.transform_keys do |key|
23
+ case key
24
+ when :_id, :id, '_id', 'id'
25
+ :_id
26
+ when :_routing, :routing, '_routing', 'routing'
27
+ :routing
28
+ when :_type, :type, '_type', 'type'
29
+ :_type
30
+ else
31
+ key.to_sym
32
+ end
33
+ end
34
+ new(resp)
35
+ elsif String === value || Integer === value
36
+ new(_id: value)
37
+ end
38
+ end
39
+
40
+ def initialize(attributes)
41
+ @attributes = attributes
42
+ end
43
+
44
+ def valid?
45
+ !@attributes[:_id].nil?
46
+ end
47
+
48
+ def to_h
49
+ @attributes
50
+ end
51
+
52
+ def id
53
+ @attributes.fetch(:_id)
54
+ end
55
+
56
+ def type
57
+ @attributes[:_type]
58
+ end
59
+
60
+ def routing
61
+ @attributes[:routing]
62
+ end
63
+
64
+ def to_doc(source = {})
65
+ HashDocument.new(source.merge(@attributes))
66
+ end
67
+
68
+ def eql?(other)
69
+ self.class == other.class && @attributes == other.instance_variable_get(:@attributes)
70
+ end
71
+ alias_method :==, :eql?
72
+ end
73
+ end
@@ -0,0 +1,17 @@
1
+ module Esse
2
+ # The idea here is to add useful methods to the ruby standard objects without
3
+ # monkey patching them
4
+ module ArrayUtils
5
+ module_function
6
+
7
+ def wrap(object)
8
+ if object.nil?
9
+ []
10
+ elsif object.respond_to?(:to_ary)
11
+ object.to_ary || [object]
12
+ else
13
+ [object]
14
+ end
15
+ end
16
+ end
17
+ end
@@ -2,3 +2,4 @@
2
2
 
3
3
  require_relative 'primitives/hstring'
4
4
  require_relative 'primitives/hash_utils'
5
+ require_relative 'primitives/array_utils'
@@ -6,6 +6,43 @@ module Esse
6
6
  def import(**kwargs)
7
7
  index.import(repo_name, **kwargs)
8
8
  end
9
+
10
+ def update_documents_attribute(name, ids_or_doc_headers = [], kwargs = {})
11
+ batch = documents_for_lazy_attribute(name, ids_or_doc_headers)
12
+ return if batch.empty?
13
+
14
+ index.bulk(**kwargs.transform_keys(&:to_sym), update: batch)
15
+ end
16
+
17
+ def documents_for_lazy_attribute(name, ids_or_doc_headers)
18
+ retrieve_lazy_attribute_values(name, ids_or_doc_headers).map do |doc_header, datum|
19
+ doc_header.to_doc(name => datum)
20
+ end
21
+ end
22
+
23
+ def retrieve_lazy_attribute_values(name, ids_or_doc_headers)
24
+ unless lazy_document_attribute?(name)
25
+ raise ArgumentError, <<~MSG
26
+ The attribute `#{name}` is not defined as a lazy document attribute.
27
+
28
+ Define the attribute as a lazy document attribute using the `lazy_document_attribute` method.
29
+ MSG
30
+ end
31
+
32
+ docs = LazyDocumentHeader.coerce_each(ids_or_doc_headers)
33
+ return [] if docs.empty?
34
+
35
+ result = fetch_lazy_document_attribute(name).call(docs)
36
+ return [] unless result.is_a?(Hash)
37
+
38
+ result.each_with_object({}) do |(key, value), memo|
39
+ if key.is_a?(LazyDocumentHeader) && (doc = docs.find { |d| d == key || d.id == key.id })
40
+ memo[doc] = value
41
+ elsif (doc = docs.find { |d| d.id == key })
42
+ memo[doc] = value
43
+ end
44
+ end
45
+ end
9
46
  end
10
47
 
11
48
  extend ClassMethods
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Esse
4
+ # Definition for the lazy document attributes
5
+ class Repository
6
+ module ClassMethods
7
+ def lazy_document_attributes
8
+ @lazy_document_attributes ||= {}.freeze
9
+ end
10
+
11
+ def lazy_document_attribute_names(all = true)
12
+ case all
13
+ when false
14
+ []
15
+ when true
16
+ lazy_document_attributes.keys
17
+ else
18
+ filtered = Array(all).map(&:to_s)
19
+ lazy_document_attributes.keys.select { |name| filtered.include?(name.to_s) }
20
+ end
21
+ end
22
+
23
+ def fetch_lazy_document_attribute(attr_name)
24
+ klass, kwargs = lazy_document_attributes.fetch(attr_name)
25
+ klass.new(**kwargs)
26
+ rescue KeyError
27
+ raise ArgumentError, format('Attribute %<attr>p is not defined as a lazy document attribute', attr: attr_name)
28
+ end
29
+
30
+ def lazy_document_attribute(attr_name, klass = nil, **kwargs, &block)
31
+ if attr_name.nil?
32
+ raise ArgumentError, 'Attribute name is required to define a lazy document attribute'
33
+ end
34
+ if lazy_document_attribute?(attr_name.to_sym) || lazy_document_attribute?(attr_name.to_s)
35
+ raise ArgumentError, format('Attribute %<attr>p is already defined as a lazy document attribute', attr: attr_name)
36
+ end
37
+
38
+ @lazy_document_attributes = lazy_document_attributes.dup
39
+ if block
40
+ klass = Class.new(Esse::DocumentLazyAttribute) do
41
+ define_method(:call, &block)
42
+ end
43
+ @lazy_document_attributes[attr_name] = [klass, kwargs]
44
+ elsif klass.is_a?(Class) && klass <= Esse::DocumentLazyAttribute
45
+ @lazy_document_attributes[attr_name] = [klass, kwargs]
46
+ elsif klass.is_a?(Class) && klass.instance_methods.include?(:call)
47
+ @lazy_document_attributes[attr_name] = [klass, kwargs]
48
+ elsif klass.nil?
49
+ raise ArgumentError, format('A block or a class that responds to `call` is required to define a lazy document attribute')
50
+ else
51
+ raise ArgumentError, format('%<arg>p is not a valid lazy document attribute. Class should inherit from Esse::DocumentLazyAttribute or respond to `call`', arg: klass)
52
+ end
53
+ ensure
54
+ @lazy_document_attributes&.freeze
55
+ end
56
+
57
+ protected
58
+
59
+ def lazy_document_attribute?(attr_name)
60
+ lazy_document_attributes.key?(attr_name)
61
+ end
62
+ end
63
+
64
+ extend ClassMethods
65
+ end
66
+ end
@@ -6,8 +6,8 @@ module Esse
6
6
  # @see ObjectDocumentMapper
7
7
  class Repository
8
8
  module ClassMethods
9
- # Convert ruby object to json. Arguments will be same of passed through the
10
- # collection. It's allowed a block or a class with the `to_h` instance method.
9
+ # Define the document type that will be used to serialize the data.
10
+ # Arguments will be same of passed through the collection. It's allowed a block or a class with the `to_h` instance method.
11
11
  # Example with block
12
12
  # document do |model, **context|
13
13
  # {
@@ -38,31 +38,6 @@ module Esse
38
38
  end
39
39
  end
40
40
 
41
- def coerce_to_document(value)
42
- case value
43
- when Esse::Document
44
- value
45
- when Hash
46
- Esse::HashDocument.new(value)
47
- when NilClass, FalseClass
48
- Esse::NullDocument.new
49
- else
50
- raise ArgumentError, format('%<arg>p is not a valid document. The document should be a hash or an instance of Esse::Document', arg: value)
51
- end
52
- end
53
-
54
- # Convert ruby object to json by using the document of the given document type.
55
- # @param [Object] model The ruby object
56
- # @param [Hash] kwargs The context
57
- # @return [Esse::Document] The serialized document
58
- def serialize(model, **kwargs)
59
- if @document_proc.nil?
60
- raise NotImplementedError, format('there is no %<t>p document defined for the %<k>p index', t: repo_name, k: index.to_s)
61
- end
62
-
63
- @document_proc.call(model, **kwargs)
64
- end
65
-
66
41
  # Used to define the source of data. A block is required. And its
67
42
  # content should yield an array of each object that should be serialized.
68
43
  # The list of arguments will be passed throught the document method.
@@ -94,6 +69,71 @@ module Esse
94
69
  @collection_proc = collection_klass || block
95
70
  end
96
71
 
72
+ # Wrap collection data into serialized batches
73
+ #
74
+ # @param [Hash] kwargs The context
75
+ # @return [Enumerator] The enumerator
76
+ # @yield [Array, **context] serialized collection and the optional context from the collection
77
+ def each_serialized_batch(lazy_attributes: false, **kwargs)
78
+ each_batch(**kwargs) do |*args|
79
+ batch, collection_context = args
80
+ collection_context ||= {}
81
+ entries = [*batch].map { |entry| serialize(entry, **collection_context) }.compact
82
+ if lazy_attributes
83
+ attrs = lazy_attributes.is_a?(Array) ? lazy_attributes : lazy_document_attribute_names(lazy_attributes)
84
+ attrs.each do |attr_name|
85
+ retrieve_lazy_attribute_values(attr_name, entries).each do |doc_header, value|
86
+ doc = entries.find { |d| doc_header.id.to_s == d.id.to_s && doc_header.type == d.type && doc_header.routing == d.routing }
87
+ doc&.mutate(attr_name) { value }
88
+ end
89
+ end
90
+ end
91
+
92
+ yield entries, **kwargs
93
+ end
94
+ end
95
+
96
+ # Wrap collection data into serialized documents
97
+ #
98
+ # Example:
99
+ # GeosIndex.documents(id: 1).first
100
+ #
101
+ # @return [Enumerator] All serialized entries
102
+ def documents(**kwargs)
103
+ Enumerator.new do |yielder|
104
+ each_serialized_batch(**kwargs) do |docs, **_collection_kargs|
105
+ docs.each { |document| yielder.yield(document) }
106
+ end
107
+ end
108
+ end
109
+
110
+ # Convert ruby object to json by using the document of the given document type.
111
+ # @param [Object] model The ruby object
112
+ # @param [Hash] kwargs The context
113
+ # @return [Esse::Document] The serialized document
114
+ def serialize(model, **kwargs)
115
+ if @document_proc.nil?
116
+ raise NotImplementedError, format('there is no %<t>p document defined for the %<k>p index', t: repo_name, k: index.to_s)
117
+ end
118
+
119
+ @document_proc.call(model, **kwargs)
120
+ end
121
+
122
+ protected
123
+
124
+ def coerce_to_document(value)
125
+ case value
126
+ when Esse::Document
127
+ value
128
+ when Hash
129
+ Esse::HashDocument.new(value)
130
+ when NilClass, FalseClass
131
+ Esse::NullDocument.new
132
+ else
133
+ raise ArgumentError, format('%<arg>p is not a valid document. The document should be a hash or an instance of Esse::Document', arg: value)
134
+ end
135
+ end
136
+
97
137
  # Used to fetch all batch of data defined on the collection model.
98
138
  # Arguments can be anything. They will just be passed through the block.
99
139
  # Useful when the collection depends on scope or any other conditions
@@ -122,34 +162,6 @@ module Esse
122
162
  rescue LocalJumpError
123
163
  raise(SyntaxError, 'block must be explicitly declared in the collection definition')
124
164
  end
125
-
126
- # Wrap collection data into serialized batches
127
- #
128
- # @param [Hash] kwargs The context
129
- # @return [Enumerator] The enumerator
130
- # @yield [Array, **context] serialized collection and the optional context from the collection
131
- def each_serialized_batch(**kwargs, &block)
132
- each_batch(**kwargs) do |*args|
133
- batch, collection_context = args
134
- collection_context ||= {}
135
- entries = [*batch].map { |entry| serialize(entry, **collection_context) }.compact
136
- block.call(entries, **kwargs)
137
- end
138
- end
139
-
140
- # Wrap collection data into serialized documents
141
- #
142
- # Example:
143
- # GeosIndex.documents(id: 1).first
144
- #
145
- # @return [Enumerator] All serialized entries
146
- def documents(**kwargs)
147
- Enumerator.new do |yielder|
148
- each_serialized_batch(**kwargs) do |docs, **_collection_kargs|
149
- docs.each { |document| yielder.yield(document) }
150
- end
151
- end
152
- end
153
165
  end
154
166
 
155
167
  extend ClassMethods
@@ -13,5 +13,6 @@ module Esse
13
13
  require_relative 'repository/actions'
14
14
  require_relative 'repository/documents'
15
15
  require_relative 'repository/object_document_mapper'
16
+ require_relative 'repository/lazy_document_attributes'
16
17
  end
17
18
  end
data/lib/esse/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Esse
4
- VERSION = '0.2.6'
4
+ VERSION = '0.3.2'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: esse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Marcos G. Zimmermann
8
8
  autorequire:
9
9
  bindir: exec
10
10
  cert_chain: []
11
- date: 2023-11-27 00:00:00.000000000 Z
11
+ date: 2024-07-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: multi_json
@@ -248,6 +248,7 @@ files:
248
248
  - lib/esse/deprecations/repository_backend_delegator.rb
249
249
  - lib/esse/deprecations/serializer.rb
250
250
  - lib/esse/document.rb
251
+ - lib/esse/document_lazy_attribute.rb
251
252
  - lib/esse/dynamic_template.rb
252
253
  - lib/esse/errors.rb
253
254
  - lib/esse/events.rb
@@ -274,16 +275,19 @@ files:
274
275
  - lib/esse/index/type.rb
275
276
  - lib/esse/index_mapping.rb
276
277
  - lib/esse/index_setting.rb
278
+ - lib/esse/lazy_document_header.rb
277
279
  - lib/esse/logging.rb
278
280
  - lib/esse/null_document.rb
279
281
  - lib/esse/plugins.rb
280
282
  - lib/esse/primitives.rb
283
+ - lib/esse/primitives/array_utils.rb
281
284
  - lib/esse/primitives/hash_utils.rb
282
285
  - lib/esse/primitives/hstring.rb
283
286
  - lib/esse/primitives/output.rb
284
287
  - lib/esse/repository.rb
285
288
  - lib/esse/repository/actions.rb
286
289
  - lib/esse/repository/documents.rb
290
+ - lib/esse/repository/lazy_document_attributes.rb
287
291
  - lib/esse/repository/object_document_mapper.rb
288
292
  - lib/esse/search/query.rb
289
293
  - lib/esse/search/query/dsl.rb
@@ -318,7 +322,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
318
322
  - !ruby/object:Gem::Version
319
323
  version: '0'
320
324
  requirements: []
321
- rubygems_version: 3.2.32
325
+ rubygems_version: 3.0.3.1
322
326
  signing_key:
323
327
  specification_version: 4
324
328
  summary: Pure Ruby and framework-agnostic ElasticSearch/OpenSearch toolkit for building