work_shaper 0.1.3.1rc1 → 0.1.3.1rc3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/lib/work_shaper/manager.rb +63 -35
- data/lib/work_shaper/offset_holder.rb +54 -0
- data/lib/work_shaper/version.rb +1 -1
- data/lib/work_shaper/worker.rb +6 -2
- data/lib/work_shaper.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 19828916c7e0f7d9006c2b508566311381edcb8e6a50522b4bffe1ddc79e5a61
|
4
|
+
data.tar.gz: 28d670fd0d3fad8e6705b2392ca00c0c720ac29649fddc04702cdd924910fdc5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca66783166d22a048cdfbf02e1a9a6b36246c60caa42e6612f3008bb8af8029621df2d85ce152315288f8c40ffb1ca28efe4024e3582af7cd5c77957e8a0722e
|
7
|
+
data.tar.gz: 73cdaa9db8bcf924983fc0a6bbfec46bd257fa7c9938a7ed795c77ebbcdb34a1f95e762949876c57618276bf0f00dec9f38daacd2ecb708ea701e3322eb6cc8c
|
data/Gemfile
CHANGED
data/lib/work_shaper/manager.rb
CHANGED
@@ -3,7 +3,7 @@ module WorkShaper
|
|
3
3
|
# for each offset in monotonically increasing order (independent of the execution order), and gracefully
|
4
4
|
# cleaning up when `#shutdown` is called.
|
5
5
|
class Manager
|
6
|
-
attr_reader :total_acked, :total_enqueued
|
6
|
+
attr_reader :total_acked, :total_enqueued, :shutting_down
|
7
7
|
|
8
8
|
# Several of the parameters here are Lambdas (not Proc). Note you can pass a method using
|
9
9
|
# `method(:some_method)` or a lambda directly `->{ puts 'Hello'}`.
|
@@ -28,14 +28,14 @@ module WorkShaper
|
|
28
28
|
@completed_offsets = {}
|
29
29
|
@max_in_queue = max_in_queue
|
30
30
|
@semaphore = Mutex.new
|
31
|
-
@
|
31
|
+
@shutting_down = false
|
32
32
|
|
33
33
|
@total_enqueued = 0
|
34
34
|
@total_acked = 0
|
35
35
|
|
36
36
|
@heartbeat = Thread.new do
|
37
37
|
while true
|
38
|
-
report(detailed:
|
38
|
+
report(detailed: true)
|
39
39
|
sleep heartbeat_period_sec
|
40
40
|
end
|
41
41
|
rescue => e
|
@@ -58,14 +58,16 @@ module WorkShaper
|
|
58
58
|
|
59
59
|
# Enqueue a message to be worked on the given `sub_key`, `partition`, and `offset`.
|
60
60
|
def enqueue(sub_key, message, partition, offset)
|
61
|
-
raise StandardError, 'Shutting down' if @
|
61
|
+
raise StandardError, 'Shutting down' if @shutting_down
|
62
62
|
pause_on_overrun
|
63
|
-
|
63
|
+
|
64
|
+
offset_holder = OffsetHolder.new(partition, offset)
|
65
|
+
WorkShaper.logger.debug "Enqueue: #{sub_key}/#{offset_holder}"
|
64
66
|
|
65
67
|
worker = nil
|
66
68
|
@semaphore.synchronize do
|
67
69
|
@total_enqueued += 1
|
68
|
-
(@received_offsets[partition] ||= Array.new) <<
|
70
|
+
(@received_offsets[partition] ||= Array.new) << offset_holder
|
69
71
|
|
70
72
|
worker =
|
71
73
|
@workers[sub_key] ||=
|
@@ -81,7 +83,7 @@ module WorkShaper
|
|
81
83
|
)
|
82
84
|
end
|
83
85
|
|
84
|
-
worker.enqueue(message,
|
86
|
+
worker.enqueue(message, offset_holder)
|
85
87
|
end
|
86
88
|
|
87
89
|
# Flush any offsets for which work has been completed. Only lowest continuous run of
|
@@ -108,7 +110,7 @@ module WorkShaper
|
|
108
110
|
if detailed
|
109
111
|
WorkShaper.logger.info(
|
110
112
|
{
|
111
|
-
|
113
|
+
message: 'Reporting - Extra Detail',
|
112
114
|
pending_ack: @completed_offsets,
|
113
115
|
received_offsets: @received_offsets
|
114
116
|
})
|
@@ -118,10 +120,11 @@ module WorkShaper
|
|
118
120
|
|
119
121
|
# Stop the underlying threads
|
120
122
|
def shutdown
|
121
|
-
@
|
122
|
-
|
123
|
+
@shutting_down = true
|
124
|
+
WorkShaper.logger.warn({ message: 'Shutting Down' })
|
123
125
|
Thread.kill(@heartbeat)
|
124
126
|
Thread.kill(@offset_manager)
|
127
|
+
report(detailed: true)
|
125
128
|
@workers.each_value(&:shutdown)
|
126
129
|
end
|
127
130
|
|
@@ -137,21 +140,35 @@ module WorkShaper
|
|
137
140
|
completed = @completed_offsets[partition].sort!
|
138
141
|
received = @received_offsets[partition].sort!
|
139
142
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
143
|
+
begin
|
144
|
+
offset = completed.first
|
145
|
+
while received.any? && received.first == offset
|
146
|
+
# We observed Kafka sending the same message twice, even after
|
147
|
+
# having committed the offset. Here we skip this offset if we
|
148
|
+
# know it has already been committed.
|
149
|
+
last_offset = @last_ack[partition]
|
150
|
+
if last_offset && offset <= last_offset
|
151
|
+
WorkShaper.logger.warn(
|
152
|
+
{ message: 'Received Duplicate Offset',
|
153
|
+
offset: "#{partition}:#{offset}",
|
154
|
+
last_acked: last_offset,
|
155
|
+
})
|
156
|
+
end
|
157
|
+
|
158
|
+
result =
|
159
|
+
begin
|
160
|
+
@ack.call(partition, offset)
|
161
|
+
rescue => e
|
162
|
+
# We expect @ack to handle it's own errors and return the error or false if it
|
163
|
+
# is safe to continue. Otherwise @ack should raise an error and we will
|
164
|
+
# shutdown.
|
165
|
+
WorkShaper.logger.error({ message: 'Error in ack', error: e })
|
166
|
+
WorkShaper.logger.error(e.backtrace.join("\n"))
|
167
|
+
shutdown
|
168
|
+
break
|
169
|
+
end
|
170
|
+
|
171
|
+
if result.is_a? Exception || !result
|
155
172
|
WorkShaper.logger.warn(
|
156
173
|
{ message: 'Failed to Ack Offset, likely re-balance',
|
157
174
|
offset: "#{partition}:#{offset}",
|
@@ -159,18 +176,21 @@ module WorkShaper
|
|
159
176
|
received: @received_offsets[partition].to_a[0..10].join(',')
|
160
177
|
})
|
161
178
|
else
|
162
|
-
@last_ack[partition] = offset
|
179
|
+
@last_ack[partition] = [@last_ack[partition] || offset, offset].max
|
163
180
|
end
|
164
|
-
end
|
165
181
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
182
|
+
@total_acked += 1
|
183
|
+
WorkShaper.logger.debug "@total_acked: #{@total_acked}"
|
184
|
+
WorkShaper.logger.debug "completed: [#{completed.join(', ')}]"
|
185
|
+
WorkShaper.logger.debug "received: [#{received.join(', ')}]"
|
186
|
+
completed.delete(offset)
|
187
|
+
received.delete(offset)
|
172
188
|
|
173
|
-
|
189
|
+
offset = completed.first
|
190
|
+
end
|
191
|
+
rescue => e
|
192
|
+
WorkShaper.logger.error({ message: 'Error in offset_ack', error: e })
|
193
|
+
WorkShaper.logger.error(e.backtrace.join("\n"))
|
174
194
|
end
|
175
195
|
end
|
176
196
|
|
@@ -179,11 +199,19 @@ module WorkShaper
|
|
179
199
|
@total_enqueued.to_i - @total_acked.to_i > @max_in_queue
|
180
200
|
end
|
181
201
|
|
202
|
+
pause_cycles = 0
|
182
203
|
# We have to be careful here to avoid a deadlock. Another thread may be waiting
|
183
204
|
# for the mutex to ack and remove offsets. If we wrap enqueue in a synchronize
|
184
205
|
# block, that would lead to a deadlock. Here the sleep allows other threads
|
185
206
|
# to wrap up.
|
186
|
-
|
207
|
+
while @semaphore.synchronize { overrun.call } do
|
208
|
+
if pause_cycles % 12000 == 0
|
209
|
+
WorkShaper.logger.warn 'Paused on Overrun'
|
210
|
+
report(detailed: true)
|
211
|
+
end
|
212
|
+
pause_cycles += 1
|
213
|
+
sleep 0.005
|
214
|
+
end
|
187
215
|
end
|
188
216
|
end
|
189
217
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module WorkShaper
|
2
|
+
class OffsetHolder
|
3
|
+
attr_reader :partition, :offset, :state
|
4
|
+
|
5
|
+
STATES = {enqueued: 2, acked: 1, completed: 0}
|
6
|
+
def initialize(partition, offset, at: Time.now.to_f)
|
7
|
+
@partition = partition
|
8
|
+
@offset = offset
|
9
|
+
@at = at
|
10
|
+
|
11
|
+
@state = :enqueued
|
12
|
+
end
|
13
|
+
|
14
|
+
def <=(other)
|
15
|
+
self.<=>(other) <= 0
|
16
|
+
end
|
17
|
+
|
18
|
+
def <(other)
|
19
|
+
self.<=>(other) == -1
|
20
|
+
end
|
21
|
+
|
22
|
+
def <=>(other)
|
23
|
+
r = offset <=> other.offset
|
24
|
+
|
25
|
+
if r == 0
|
26
|
+
r = STATES[state] <=> STATES[other.state]
|
27
|
+
puts "States: #{r} | #{STATES[state]} #{STATES[other.state]}"
|
28
|
+
end
|
29
|
+
|
30
|
+
if r == 0
|
31
|
+
r = @at <=> other.instance_variable_get(:@at)
|
32
|
+
puts "At: #{r}"
|
33
|
+
end
|
34
|
+
puts "Final: #{r}"
|
35
|
+
r
|
36
|
+
end
|
37
|
+
|
38
|
+
def ack!
|
39
|
+
@state = :acked
|
40
|
+
end
|
41
|
+
|
42
|
+
def complete!
|
43
|
+
@state = :completed
|
44
|
+
end
|
45
|
+
|
46
|
+
def to_i
|
47
|
+
offset
|
48
|
+
end
|
49
|
+
|
50
|
+
def to_s
|
51
|
+
"#{partition}/#{offset}:#{STATES[state]}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/work_shaper/version.rb
CHANGED
data/lib/work_shaper/worker.rb
CHANGED
@@ -22,7 +22,10 @@ module WorkShaper
|
|
22
22
|
# rubocop:enable Metrics/ParameterLists
|
23
23
|
# rubocop:enable Layout/LineLength
|
24
24
|
|
25
|
-
def enqueue(message,
|
25
|
+
def enqueue(message, offset_holder)
|
26
|
+
partition = offset_holder.partition
|
27
|
+
offset = offset_holder.offset
|
28
|
+
|
26
29
|
# rubocop:disable Style/RescueStandardError
|
27
30
|
@thread_pool.post do
|
28
31
|
@work.call(message, partition, offset)
|
@@ -34,7 +37,8 @@ module WorkShaper
|
|
34
37
|
ensure
|
35
38
|
@semaphore.synchronize do
|
36
39
|
WorkShaper.logger.debug "Completed: #{partition}:#{offset}"
|
37
|
-
|
40
|
+
offset_holder.complete!
|
41
|
+
(@completed_offsets[partition] ||= Array.new) << offset_holder
|
38
42
|
end
|
39
43
|
end
|
40
44
|
# rubocop:enable Style/RescueStandardError
|
data/lib/work_shaper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: work_shaper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.3.
|
4
|
+
version: 0.1.3.1rc3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jerry Fernholz
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: concurrent-ruby
|
@@ -40,6 +40,7 @@ files:
|
|
40
40
|
- Rakefile
|
41
41
|
- lib/work_shaper.rb
|
42
42
|
- lib/work_shaper/manager.rb
|
43
|
+
- lib/work_shaper/offset_holder.rb
|
43
44
|
- lib/work_shaper/version.rb
|
44
45
|
- lib/work_shaper/worker.rb
|
45
46
|
- work_shaper.gemspec
|