esse 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9fc6f63594da661b35bc22ab0c83655ded02b8ebe2e20ec5612a0927934156e9
4
- data.tar.gz: c46f9a70e262d40a9c60c7ce72be3e3b816d18c536f6e3fe770bd06c06c5f57b
3
+ metadata.gz: 15b531b7c6876d665c12fd773d25fa68acda30c4c92cf7999790315f4fe35b2d
4
+ data.tar.gz: 9520b76a128e752bed3db6d2a1a96e38e2107ca790c6d0c0327c2690d91b21d7
5
5
  SHA512:
6
- metadata.gz: 8525a1ba74d9452163e87e1acf1bf621654f2bb8b9a7811e6eca94fc452c47f9255808b2c45be701112be085f8d3176243a685fcfb6647a8e9845699a727911d
7
- data.tar.gz: 3d0be64e631a47c07758341b34fc86eedffd76bbd12c4734016d9303c256d9c8470929fc564bdace28bb21f072e44d0193cbee06e8e808675a695b46aac92c38
6
+ metadata.gz: 865aa67f404a82e573009e73186dd60ee9c3f978ba3c93b39337184c2b8c9c9415bc116573c5eb4f21cb7e4ffc7e19aae4957e54fe7b8aa99a8804e86469b13f
7
+ data.tar.gz: bcacb23ae7b1b1a073298a59090455e30d6aa9531b93a815871762ed9e3364de873795c47ca0b2d495d8d9394110fd623011d3e4e3054a34ddf89b5e35947ded
@@ -13,13 +13,16 @@ module Esse
13
13
  # In case of timeout error, will retry with an exponential backoff using the following formula:
14
14
  # wait_interval = (retry_count**4) + 15 + (rand(10) * (retry_count + 1)) seconds. It will retry up to max_retries times that is default 4.
15
15
  #
16
- # Too large bulk requests will be split into multiple requests with only one attempt.
16
+ # Too large bulk requests will first be split into multiple size-balanced requests; if that still
17
+ # returns 413, the bulk is retried one document per request as a last resort. Only after a single
18
+ # document still returns 413 does the error bubble up.
17
19
  #
18
20
  # @yield [RequestBody] A request body instance
19
- def each_request(max_retries: 4, last_retry_in_small_chunks: true)
21
+ def each_request(max_retries: 4, last_retry_in_small_chunks: true, last_retry_per_document: true)
20
22
  # @TODO create indexes when by checking all the index suffixes (if mapping is not empty)
21
23
  requests = [optimistic_request]
22
24
  retry_count = 0
25
+ too_large_retry_count = 0
23
26
 
24
27
  begin
25
28
  requests.each do |request|
@@ -37,12 +40,28 @@ module Esse
37
40
  sleep(wait_interval)
38
41
  retry
39
42
  rescue Esse::Transport::RequestEntityTooLargeError => e
40
- retry_count += 1
41
- raise e if retry_count > 1 # only retry once on this error
42
- requests = balance_requests_size(e)
43
+ too_large_retry_count += 1
44
+ raise e if too_large_retry_count > 2
45
+
46
+ if too_large_retry_count == 1
47
+ balanced = balance_requests_size(e)
48
+ if balanced && !balanced.empty?
49
+ requests = balanced
50
+ Esse.logger.warn <<~MSG
51
+ Request entity too large, retrying with a bulk with: #{requests.map(&:bytesize).join(' + ')}.
52
+ Note that this cause performance degradation, consider adjusting the batch_size of the index or increasing the bulk size.
53
+ MSG
54
+ retry
55
+ end
56
+ raise e unless last_retry_per_document
57
+ too_large_retry_count = 2
58
+ end
59
+
60
+ raise e unless last_retry_per_document
61
+ requests = requests_per_document
43
62
  Esse.logger.warn <<~MSG
44
- Request entity too large, retrying with a bulk with: #{requests.map(&:bytesize).join(' + ')}.
45
- Note that this cause performance degradation, consider adjusting the batch_size of the index or increasing the bulk size.
63
+ Request entity too large after balancing, retrying one document per request as a last resort.
64
+ If a single document still exceeds the bulk size, the error will be raised.
46
65
  MSG
47
66
  retry
48
67
  end
@@ -60,45 +79,57 @@ module Esse
60
79
  end
61
80
 
62
81
  def requests_in_small_chunks(chunk_size: 1)
82
+ arr = build_per_document_requests(chunk_size: chunk_size)
83
+ Esse.logger.warn <<~MSG
84
+ Retrying the last request in small chunks of #{chunk_size} documents.
85
+ This is a last resort to avoid timeout errors, consider increasing the bulk size or reducing the batch size.
86
+ MSG
87
+ arr
88
+ end
89
+
90
+ def requests_per_document
91
+ build_per_document_requests(chunk_size: 1)
92
+ end
93
+
94
+ def build_per_document_requests(chunk_size: 1)
63
95
  arr = []
64
96
  @create.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.create = slice } }
65
97
  @index.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.index = slice } }
66
98
  @update.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.update = slice } }
67
99
  @delete.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.delete = slice } }
68
- Esse.logger.warn <<~MSG
69
- Retrying the last request in small chunks of #{chunk_size} documents.
70
- This is a last resort to avoid timeout errors, consider increasing the bulk size or reducing the batch size.
71
- MSG
72
100
  arr
73
101
  end
74
102
 
75
- # @return [Array<RequestBody>]
103
+ # @return [Array<RequestBody>, nil] balanced requests, or nil when the error message has no parseable byte limit
76
104
  def balance_requests_size(err)
77
- if (bulk_size = err.message.scan(/exceeded.(\d+).bytes/).dig(0, 0).to_i) > 0
78
- requests = (@create + @index + @update + @delete).each_with_object([Import::RequestBodyRaw.new]) do |as_json, result|
79
- operation, meta = as_json.to_a.first
80
- meta = meta.dup
81
- data = meta.delete(:data)
82
- piece = MultiJson.dump(operation => meta)
83
- piece << "\n" << MultiJson.dump(data) if data
84
- if piece.bytesize > bulk_size
85
- Esse.logger.warn <<~MSG
86
- The document #{meta.inspect} size is #{piece.bytesize} bytes, which exceeds the maximum bulk size of #{bulk_size} bytes.
87
- Consider increasing the bulk size or reducing the document size. The document will be ignored during this import.
88
- MSG
89
- next
90
- end
105
+ bulk_size = err.message.scan(/exceeded.(\d+).bytes/).dig(0, 0).to_i
106
+ return nil unless bulk_size > 0
91
107
 
92
- if result.last.body.bytesize + piece.bytesize > bulk_size
93
- result.push(Import::RequestBodyRaw.new.tap { |r| r.add(operation, piece) })
94
- else
95
- result[-1].add(operation, piece)
96
- end
108
+ requests = (@create + @index + @update + @delete).each_with_object([Import::RequestBodyRaw.new]) do |as_json, result|
109
+ operation, meta = as_json.to_a.first
110
+ meta = meta.dup
111
+ data = meta.delete(:data)
112
+ piece = MultiJson.dump(operation => meta)
113
+ piece << "\n" << MultiJson.dump(data) if data
114
+
115
+ if piece.bytesize > bulk_size
116
+ Esse.logger.warn <<~MSG
117
+ The document #{meta.inspect} size is #{piece.bytesize} bytes, which exceeds the maximum bulk size of #{bulk_size} bytes.
118
+ It will be sent in its own request; if the cluster rejects it, the error will be raised.
119
+ MSG
120
+ result.push(Import::RequestBodyRaw.new.tap { |r| r.add(operation, piece) })
121
+ result.push(Import::RequestBodyRaw.new)
122
+ next
123
+ end
124
+
125
+ if result.last.body.bytesize + piece.bytesize > bulk_size
126
+ result.push(Import::RequestBodyRaw.new.tap { |r| r.add(operation, piece) })
127
+ else
128
+ result[-1].add(operation, piece)
97
129
  end
98
- requests.each(&:finalize)
99
- else
100
- raise err
101
130
  end
131
+ requests.reject! { |r| r.body.empty? }
132
+ requests.each(&:finalize)
102
133
  end
103
134
  end
104
135
  end
data/lib/esse/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Esse
4
- VERSION = '0.4.1'
4
+ VERSION = '0.5.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: esse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Marcos G. Zimmermann
8
8
  autorequire:
9
9
  bindir: exec
10
10
  cert_chain: []
11
- date: 2026-04-29 00:00:00.000000000 Z
11
+ date: 2026-05-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: multi_json