esse 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/esse/import/bulk.rb +65 -34
- data/lib/esse/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 15b531b7c6876d665c12fd773d25fa68acda30c4c92cf7999790315f4fe35b2d
|
|
4
|
+
data.tar.gz: 9520b76a128e752bed3db6d2a1a96e38e2107ca790c6d0c0327c2690d91b21d7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 865aa67f404a82e573009e73186dd60ee9c3f978ba3c93b39337184c2b8c9c9415bc116573c5eb4f21cb7e4ffc7e19aae4957e54fe7b8aa99a8804e86469b13f
|
|
7
|
+
data.tar.gz: bcacb23ae7b1b1a073298a59090455e30d6aa9531b93a815871762ed9e3364de873795c47ca0b2d495d8d9394110fd623011d3e4e3054a34ddf89b5e35947ded
|
data/lib/esse/import/bulk.rb
CHANGED
|
@@ -13,13 +13,16 @@ module Esse
|
|
|
13
13
|
# In case of timeout error, will retry with an exponential backoff using the following formula:
|
|
14
14
|
# wait_interval = (retry_count**4) + 15 + (rand(10) * (retry_count + 1)) seconds. It will retry up to max_retries times that is default 4.
|
|
15
15
|
#
|
|
16
|
-
# Too large bulk requests will be split into multiple requests
|
|
16
|
+
# Too large bulk requests will first be split into multiple size-balanced requests; if that still
|
|
17
|
+
# returns 413, the bulk is retried one document per request as a last resort. Only after a single
|
|
18
|
+
# document still returns 413 does the error bubble up.
|
|
17
19
|
#
|
|
18
20
|
# @yield [RequestBody] A request body instance
|
|
19
|
-
def each_request(max_retries: 4, last_retry_in_small_chunks: true)
|
|
21
|
+
def each_request(max_retries: 4, last_retry_in_small_chunks: true, last_retry_per_document: true)
|
|
20
22
|
# @TODO create indexes when by checking all the index suffixes (if mapping is not empty)
|
|
21
23
|
requests = [optimistic_request]
|
|
22
24
|
retry_count = 0
|
|
25
|
+
too_large_retry_count = 0
|
|
23
26
|
|
|
24
27
|
begin
|
|
25
28
|
requests.each do |request|
|
|
@@ -37,12 +40,28 @@ module Esse
|
|
|
37
40
|
sleep(wait_interval)
|
|
38
41
|
retry
|
|
39
42
|
rescue Esse::Transport::RequestEntityTooLargeError => e
|
|
40
|
-
|
|
41
|
-
raise e if
|
|
42
|
-
|
|
43
|
+
too_large_retry_count += 1
|
|
44
|
+
raise e if too_large_retry_count > 2
|
|
45
|
+
|
|
46
|
+
if too_large_retry_count == 1
|
|
47
|
+
balanced = balance_requests_size(e)
|
|
48
|
+
if balanced && !balanced.empty?
|
|
49
|
+
requests = balanced
|
|
50
|
+
Esse.logger.warn <<~MSG
|
|
51
|
+
Request entity too large, retrying with a bulk with: #{requests.map(&:bytesize).join(' + ')}.
|
|
52
|
+
Note that this cause performance degradation, consider adjusting the batch_size of the index or increasing the bulk size.
|
|
53
|
+
MSG
|
|
54
|
+
retry
|
|
55
|
+
end
|
|
56
|
+
raise e unless last_retry_per_document
|
|
57
|
+
too_large_retry_count = 2
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
raise e unless last_retry_per_document
|
|
61
|
+
requests = requests_per_document
|
|
43
62
|
Esse.logger.warn <<~MSG
|
|
44
|
-
Request entity too large, retrying
|
|
45
|
-
|
|
63
|
+
Request entity too large after balancing, retrying one document per request as a last resort.
|
|
64
|
+
If a single document still exceeds the bulk size, the error will be raised.
|
|
46
65
|
MSG
|
|
47
66
|
retry
|
|
48
67
|
end
|
|
@@ -60,45 +79,57 @@ module Esse
|
|
|
60
79
|
end
|
|
61
80
|
|
|
62
81
|
def requests_in_small_chunks(chunk_size: 1)
|
|
82
|
+
arr = build_per_document_requests(chunk_size: chunk_size)
|
|
83
|
+
Esse.logger.warn <<~MSG
|
|
84
|
+
Retrying the last request in small chunks of #{chunk_size} documents.
|
|
85
|
+
This is a last resort to avoid timeout errors, consider increasing the bulk size or reducing the batch size.
|
|
86
|
+
MSG
|
|
87
|
+
arr
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def requests_per_document
|
|
91
|
+
build_per_document_requests(chunk_size: 1)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def build_per_document_requests(chunk_size: 1)
|
|
63
95
|
arr = []
|
|
64
96
|
@create.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.create = slice } }
|
|
65
97
|
@index.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.index = slice } }
|
|
66
98
|
@update.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.update = slice } }
|
|
67
99
|
@delete.each_slice(chunk_size) { |slice| arr << Import::RequestBodyAsJson.new.tap { |r| r.delete = slice } }
|
|
68
|
-
Esse.logger.warn <<~MSG
|
|
69
|
-
Retrying the last request in small chunks of #{chunk_size} documents.
|
|
70
|
-
This is a last resort to avoid timeout errors, consider increasing the bulk size or reducing the batch size.
|
|
71
|
-
MSG
|
|
72
100
|
arr
|
|
73
101
|
end
|
|
74
102
|
|
|
75
|
-
# @return [Array<RequestBody
|
|
103
|
+
# @return [Array<RequestBody>, nil] balanced requests, or nil when the error message has no parseable byte limit
|
|
76
104
|
def balance_requests_size(err)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
operation, meta = as_json.to_a.first
|
|
80
|
-
meta = meta.dup
|
|
81
|
-
data = meta.delete(:data)
|
|
82
|
-
piece = MultiJson.dump(operation => meta)
|
|
83
|
-
piece << "\n" << MultiJson.dump(data) if data
|
|
84
|
-
if piece.bytesize > bulk_size
|
|
85
|
-
Esse.logger.warn <<~MSG
|
|
86
|
-
The document #{meta.inspect} size is #{piece.bytesize} bytes, which exceeds the maximum bulk size of #{bulk_size} bytes.
|
|
87
|
-
Consider increasing the bulk size or reducing the document size. The document will be ignored during this import.
|
|
88
|
-
MSG
|
|
89
|
-
next
|
|
90
|
-
end
|
|
105
|
+
bulk_size = err.message.scan(/exceeded.(\d+).bytes/).dig(0, 0).to_i
|
|
106
|
+
return nil unless bulk_size > 0
|
|
91
107
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
108
|
+
requests = (@create + @index + @update + @delete).each_with_object([Import::RequestBodyRaw.new]) do |as_json, result|
|
|
109
|
+
operation, meta = as_json.to_a.first
|
|
110
|
+
meta = meta.dup
|
|
111
|
+
data = meta.delete(:data)
|
|
112
|
+
piece = MultiJson.dump(operation => meta)
|
|
113
|
+
piece << "\n" << MultiJson.dump(data) if data
|
|
114
|
+
|
|
115
|
+
if piece.bytesize > bulk_size
|
|
116
|
+
Esse.logger.warn <<~MSG
|
|
117
|
+
The document #{meta.inspect} size is #{piece.bytesize} bytes, which exceeds the maximum bulk size of #{bulk_size} bytes.
|
|
118
|
+
It will be sent in its own request; if the cluster rejects it, the error will be raised.
|
|
119
|
+
MSG
|
|
120
|
+
result.push(Import::RequestBodyRaw.new.tap { |r| r.add(operation, piece) })
|
|
121
|
+
result.push(Import::RequestBodyRaw.new)
|
|
122
|
+
next
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
if result.last.body.bytesize + piece.bytesize > bulk_size
|
|
126
|
+
result.push(Import::RequestBodyRaw.new.tap { |r| r.add(operation, piece) })
|
|
127
|
+
else
|
|
128
|
+
result[-1].add(operation, piece)
|
|
97
129
|
end
|
|
98
|
-
requests.each(&:finalize)
|
|
99
|
-
else
|
|
100
|
-
raise err
|
|
101
130
|
end
|
|
131
|
+
requests.reject! { |r| r.body.empty? }
|
|
132
|
+
requests.each(&:finalize)
|
|
102
133
|
end
|
|
103
134
|
end
|
|
104
135
|
end
|
data/lib/esse/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: esse
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Marcos G. Zimmermann
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exec
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-05-15 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: multi_json
|