esse 0.4.1 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9fc6f63594da661b35bc22ab0c83655ded02b8ebe2e20ec5612a0927934156e9
4
- data.tar.gz: c46f9a70e262d40a9c60c7ce72be3e3b816d18c536f6e3fe770bd06c06c5f57b
3
+ metadata.gz: d5aa2b7107d8bc92810414bacbca1c821024e8771b50be5f5c0afb4b66349874
4
+ data.tar.gz: 7ea6f01e96dc3e7a260f4b2cc8198f88231920dbd3e96a1d2493df34eb1bfeaa
5
5
  SHA512:
6
- metadata.gz: 8525a1ba74d9452163e87e1acf1bf621654f2bb8b9a7811e6eca94fc452c47f9255808b2c45be701112be085f8d3176243a685fcfb6647a8e9845699a727911d
7
- data.tar.gz: 3d0be64e631a47c07758341b34fc86eedffd76bbd12c4734016d9303c256d9c8470929fc564bdace28bb21f072e44d0193cbee06e8e808675a695b46aac92c38
6
+ metadata.gz: bce16d5dd68a36e0f75215994a96746982d84529fddd0f667660ca644cf8ca3c89c79211db91499161323c025528a48e2b371bddf743a4e19de4a95988121196
7
+ data.tar.gz: e87a03b8fc0c10fcbbfb7945483c740af2958a2991a1ea5330503f41ca8390988110dbddce59a62cb61987cf020b9f24bb4a02c3770d5dc2bed56bf124163226
data/lib/esse/config.rb CHANGED
@@ -43,7 +43,7 @@ module Esse
43
43
  # end
44
44
  class Config
45
45
  DEFAULT_CLUSTER_ID = :default
46
- ATTRIBUTES = %i[indices_directory bulk_wait_interval].freeze
46
+ ATTRIBUTES = %i[indices_directory bulk_wait_interval bulk_retry_on_failure_max_retries bulk_retry_on_failure_wait].freeze
47
47
 
48
48
  # The location of the indices. Defaults to the `app/indices`
49
49
  attr_reader :indices_directory
@@ -51,9 +51,17 @@ module Esse
51
51
  # wait a given period between posting pages to give Elasticsearch time to catch up.
52
52
  attr_reader :bulk_wait_interval
53
53
 
54
+ # number of retries on transient server errors (502, 503, 504, 429) before raising
55
+ attr_reader :bulk_retry_on_failure_max_retries
56
+
57
+ # base wait in seconds between transient-error retries (doubles each attempt)
58
+ attr_reader :bulk_retry_on_failure_wait
59
+
54
60
  def initialize
55
61
  self.indices_directory = 'app/indices'
56
62
  self.bulk_wait_interval = 0.1
63
+ self.bulk_retry_on_failure_max_retries = 3
64
+ self.bulk_retry_on_failure_wait = 2.0
57
65
  @clusters = {}
58
66
  cluster(DEFAULT_CLUSTER_ID) # initialize the :default client
59
67
  end
@@ -81,6 +89,14 @@ module Esse
81
89
  @bulk_wait_interval = value.to_f
82
90
  end
83
91
 
92
+ def bulk_retry_on_failure_max_retries=(value)
93
+ @bulk_retry_on_failure_max_retries = value.to_i
94
+ end
95
+
96
+ def bulk_retry_on_failure_wait=(value)
97
+ @bulk_retry_on_failure_wait = value.to_f
98
+ end
99
+
84
100
  def load(arg)
85
101
  case arg
86
102
  when Hash
data/lib/esse/errors.rb CHANGED
@@ -57,6 +57,7 @@ module Esse
57
57
  'ExpectationFailed' => 'ExpectationFailedError', # 417
58
58
  'ImATeapot' => 'ImATeapotError', # 418
59
59
  'TooManyConnectionsFromThisIP' => 'TooManyConnectionsFromThisIPError', # 421
60
+ 'TooManyRequests' => 'TooManyRequestsError', # 429
60
61
  'UpgradeRequired' => 'UpgradeRequiredError', # 426
61
62
  'BlockedByWindowsParentalControls' => 'BlockedByWindowsParentalControlsError', # 450
62
63
  'RequestHeaderTooLarge' => 'RequestHeaderTooLargeError', # 494
@@ -13,13 +13,18 @@ module Esse
13
13
  # In case of timeout error, will retry with an exponential backoff using the following formula:
14
14
  # wait_interval = (retry_count**4) + 15 + (rand(10) * (retry_count + 1)) seconds. It will retry up to max_retries times that is default 4.
15
15
  #
16
- # Too large bulk requests will be split into multiple requests with only one attempt.
16
+ # Too large bulk requests will first be split into multiple size-balanced requests; if that still
17
+ # returns 413, the bulk is retried one document per request as a last resort. Only after a single
18
+ # document still returns 413 does the error bubble up.
17
19
  #
18
20
  # @yield [RequestBody] A request body instance
19
- def each_request(max_retries: 4, last_retry_in_small_chunks: true)
21
+ def each_request(max_retries: 4, last_retry_in_small_chunks: true, last_retry_per_document: true,
22
+ retry_on_failure_max_retries: 3, retry_on_failure_wait: 2.0)
20
23
  # @TODO create indexes when by checking all the index suffixes (if mapping is not empty)
21
24
  requests = [optimistic_request]
22
25
  retry_count = 0
26
+ too_large_retry_count = 0
27
+ transient_failure_count = 0
23
28
 
24
29
  begin
25
30
  requests.each do |request|
@@ -37,14 +42,40 @@ module Esse
37
42
  sleep(wait_interval)
38
43
  retry
39
44
  rescue Esse::Transport::RequestEntityTooLargeError => e
40
- retry_count += 1
41
- raise e if retry_count > 1 # only retry once on this error
42
- requests = balance_requests_size(e)
45
+ too_large_retry_count += 1
46
+ raise e if too_large_retry_count > 2
47
+
48
+ if too_large_retry_count == 1
49
+ balanced = balance_requests_size(e)
50
+ if balanced && !balanced.empty?
51
+ requests = balanced
52
+ Esse.logger.warn <<~MSG
53
+ Request entity too large, retrying with a bulk with: #{requests.map(&:bytesize).join(' + ')}.
54
+ Note that this cause performance degradation, consider adjusting the batch_size of the index or increasing the bulk size.
55
+ MSG
56
+ retry
57
+ end
58
+ raise e unless last_retry_per_document
59
+ too_large_retry_count = 2
60
+ end
61
+
62
+ raise e unless last_retry_per_document
63
+ requests = requests_per_document
43
64
  Esse.logger.warn <<~MSG
44
- Request entity too large, retrying with a bulk with: #{requests.map(&:bytesize).join(' + ')}.
45
- Note that this cause performance degradation, consider adjusting the batch_size of the index or increasing the bulk size.
65
+ Request entity too large after balancing, retrying one document per request as a last resort.
66
+ If a single document still exceeds the bulk size, the error will be raised.
46
67
  MSG
47
68
  retry
69
+ rescue Esse::Transport::BadGatewayError,
70
+ Esse::Transport::ServiceUnavailableError,
71
+ Esse::Transport::GatewayTimeoutError,
72
+ Esse::Transport::TooManyRequestsError => e
73
+ transient_failure_count += 1
74
+ raise e if transient_failure_count >= retry_on_failure_max_retries
75
+ wait = retry_on_failure_wait * (2**(transient_failure_count - 1))
76
+ Esse.logger.warn "#{e.class} error, retrying in #{wait}s (attempt #{transient_failure_count}/#{retry_on_failure_max_retries})"
77
+ sleep(wait)
78
+ retry
48
79
  end
49
80
  end
50
81
 
@@ -60,45 +91,57 @@ module Esse
60
91
  end
61
92
 
62
93
  def requests_in_small_chunks(chunk_size: 1)
94
+ arr = build_per_document_requests(chunk_size: chunk_size)
95
+ Esse.logger.warn <<~MSG
96
+ Retrying the last request in small chunks of #{chunk_size} documents.
97
+ This is a last resort to avoid timeout errors, consider increasing the bulk size or reducing the batch size.
98
+ MSG
99
+ arr
100
+ end
101
+
102
+ def requests_per_document
103
+ build_per_document_requests(chunk_size: 1)
104
+ end
105
+
106
+ def build_per_document_requests(chunk_size: 1)
63
107
  arr = []
64
108
  @create.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.create = slice } }
65
109
  @index.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.index = slice } }
66
110
  @update.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.update = slice } }
67
111
  @delete.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.delete = slice } }
68
- Esse.logger.warn <<~MSG
69
- Retrying the last request in small chunks of #{chunk_size} documents.
70
- This is a last resort to avoid timeout errors, consider increasing the bulk size or reducing the batch size.
71
- MSG
72
112
  arr
73
113
  end
74
114
 
75
- # @return [Array<RequestBody>]
115
+ # @return [Array<RequestBody>, nil] balanced requests, or nil when the error message has no parseable byte limit
76
116
  def balance_requests_size(err)
77
- if (bulk_size = err.message.scan(/exceeded.(\d+).bytes/).dig(0, 0).to_i) > 0
78
- requests = (@create + @index + @update + @delete).each_with_object([Import::RequestBodyRaw.new]) do |as_json, result|
79
- operation, meta = as_json.to_a.first
80
- meta = meta.dup
81
- data = meta.delete(:data)
82
- piece = MultiJson.dump(operation => meta)
83
- piece << "\n" << MultiJson.dump(data) if data
84
- if piece.bytesize > bulk_size
85
- Esse.logger.warn <<~MSG
86
- The document #{meta.inspect} size is #{piece.bytesize} bytes, which exceeds the maximum bulk size of #{bulk_size} bytes.
87
- Consider increasing the bulk size or reducing the document size. The document will be ignored during this import.
88
- MSG
89
- next
90
- end
117
+ bulk_size = err.message.scan(/exceeded.(\d+).bytes/).dig(0, 0).to_i
118
+ return nil unless bulk_size > 0
91
119
 
92
- if result.last.body.bytesize + piece.bytesize > bulk_size
93
- result.push(Import::RequestBodyRaw.new.tap { |r| r.add(operation, piece) })
94
- else
95
- result[-1].add(operation, piece)
96
- end
120
+ requests = (@create + @index + @update + @delete).each_with_object([Import::RequestBodyRaw.new]) do |as_json, result|
121
+ operation, meta = as_json.to_a.first
122
+ meta = meta.dup
123
+ data = meta.delete(:data)
124
+ piece = MultiJson.dump(operation => meta)
125
+ piece << "\n" << MultiJson.dump(data) if data
126
+
127
+ if piece.bytesize > bulk_size
128
+ Esse.logger.warn <<~MSG
129
+ The document #{meta.inspect} size is #{piece.bytesize} bytes, which exceeds the maximum bulk size of #{bulk_size} bytes.
130
+ It will be sent in its own request; if the cluster rejects it, the error will be raised.
131
+ MSG
132
+ result.push(Import::RequestBodyRaw.new.tap { |r| r.add(operation, piece) })
133
+ result.push(Import::RequestBodyRaw.new)
134
+ next
135
+ end
136
+
137
+ if result.last.body.bytesize + piece.bytesize > bulk_size
138
+ result.push(Import::RequestBodyRaw.new.tap { |r| r.add(operation, piece) })
139
+ else
140
+ result[-1].add(operation, piece)
97
141
  end
98
- requests.each(&:finalize)
99
- else
100
- raise err
101
142
  end
143
+ requests.reject! { |r| r.body.empty? }
144
+ requests.each(&:finalize)
102
145
  end
103
146
  end
104
147
  end
@@ -72,6 +72,22 @@ module Esse
72
72
  @bulk_wait_interval = value.to_f
73
73
  end
74
74
 
75
+ def bulk_retry_on_failure_max_retries
76
+ @bulk_retry_on_failure_max_retries || Esse.config.bulk_retry_on_failure_max_retries
77
+ end
78
+
79
+ def bulk_retry_on_failure_max_retries=(value)
80
+ @bulk_retry_on_failure_max_retries = value.to_i
81
+ end
82
+
83
+ def bulk_retry_on_failure_wait
84
+ @bulk_retry_on_failure_wait || Esse.config.bulk_retry_on_failure_wait
85
+ end
86
+
87
+ def bulk_retry_on_failure_wait=(value)
88
+ @bulk_retry_on_failure_wait = value.to_f
89
+ end
90
+
75
91
  def mapping_single_type=(value)
76
92
  @mapping_single_type = !!value
77
93
  end
@@ -256,7 +256,10 @@ module Esse
256
256
  delete: to_delete,
257
257
  index: to_index,
258
258
  update: to_update,
259
- ).each_request do |request_body|
259
+ ).each_request(
260
+ retry_on_failure_max_retries: bulk_retry_on_failure_max_retries,
261
+ retry_on_failure_wait: bulk_retry_on_failure_wait,
262
+ ) do |request_body|
260
263
  cluster.api.bulk(**definition, body: request_body.body) do |event_payload|
261
264
  event_payload[:body_stats] = request_body.stats
262
265
  if bulk_wait_interval > 0
data/lib/esse/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Esse
4
- VERSION = '0.4.1'
4
+ VERSION = '0.5.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: esse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Marcos G. Zimmermann
8
8
  autorequire:
9
9
  bindir: exec
10
10
  cert_chain: []
11
- date: 2026-04-29 00:00:00.000000000 Z
11
+ date: 2026-05-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: multi_json