work_shaper 0.1.3.1rc2 → 0.1.3.1rc3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/lib/work_shaper/manager.rb +62 -34
- data/lib/work_shaper/offset_holder.rb +54 -0
- data/lib/work_shaper/version.rb +1 -1
- data/lib/work_shaper/worker.rb +6 -2
- data/lib/work_shaper.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 19828916c7e0f7d9006c2b508566311381edcb8e6a50522b4bffe1ddc79e5a61
|
4
|
+
data.tar.gz: 28d670fd0d3fad8e6705b2392ca00c0c720ac29649fddc04702cdd924910fdc5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca66783166d22a048cdfbf02e1a9a6b36246c60caa42e6612f3008bb8af8029621df2d85ce152315288f8c40ffb1ca28efe4024e3582af7cd5c77957e8a0722e
|
7
|
+
data.tar.gz: 73cdaa9db8bcf924983fc0a6bbfec46bd257fa7c9938a7ed795c77ebbcdb34a1f95e762949876c57618276bf0f00dec9f38daacd2ecb708ea701e3322eb6cc8c
|
data/Gemfile
CHANGED
data/lib/work_shaper/manager.rb
CHANGED
@@ -3,7 +3,7 @@ module WorkShaper
|
|
3
3
|
# for each offset in monotonically increasing order (independent of the execution order), and gracefully
|
4
4
|
# cleaning up when `#shutdown` is called.
|
5
5
|
class Manager
|
6
|
-
attr_reader :total_acked, :total_enqueued
|
6
|
+
attr_reader :total_acked, :total_enqueued, :shutting_down
|
7
7
|
|
8
8
|
# Several of the parameters here are Lambdas (not Proc). Note you can pass a method using
|
9
9
|
# `method(:some_method)` or a lambda directly `->{ puts 'Hello'}`.
|
@@ -28,7 +28,7 @@ module WorkShaper
|
|
28
28
|
@completed_offsets = {}
|
29
29
|
@max_in_queue = max_in_queue
|
30
30
|
@semaphore = Mutex.new
|
31
|
-
@
|
31
|
+
@shutting_down = false
|
32
32
|
|
33
33
|
@total_enqueued = 0
|
34
34
|
@total_acked = 0
|
@@ -58,14 +58,16 @@ module WorkShaper
|
|
58
58
|
|
59
59
|
# Enqueue a message to be worked on the given `sub_key`, `partition`, and `offset`.
|
60
60
|
def enqueue(sub_key, message, partition, offset)
|
61
|
-
raise StandardError, 'Shutting down' if @
|
61
|
+
raise StandardError, 'Shutting down' if @shutting_down
|
62
62
|
pause_on_overrun
|
63
|
-
|
63
|
+
|
64
|
+
offset_holder = OffsetHolder.new(partition, offset)
|
65
|
+
WorkShaper.logger.debug "Enqueue: #{sub_key}/#{offset_holder}"
|
64
66
|
|
65
67
|
worker = nil
|
66
68
|
@semaphore.synchronize do
|
67
69
|
@total_enqueued += 1
|
68
|
-
(@received_offsets[partition] ||= Array.new) <<
|
70
|
+
(@received_offsets[partition] ||= Array.new) << offset_holder
|
69
71
|
|
70
72
|
worker =
|
71
73
|
@workers[sub_key] ||=
|
@@ -81,7 +83,7 @@ module WorkShaper
|
|
81
83
|
)
|
82
84
|
end
|
83
85
|
|
84
|
-
worker.enqueue(message,
|
86
|
+
worker.enqueue(message, offset_holder)
|
85
87
|
end
|
86
88
|
|
87
89
|
# Flush any offsets for which work has been completed. Only lowest continuous run of
|
@@ -108,7 +110,7 @@ module WorkShaper
|
|
108
110
|
if detailed
|
109
111
|
WorkShaper.logger.info(
|
110
112
|
{
|
111
|
-
|
113
|
+
message: 'Reporting - Extra Detail',
|
112
114
|
pending_ack: @completed_offsets,
|
113
115
|
received_offsets: @received_offsets
|
114
116
|
})
|
@@ -118,10 +120,11 @@ module WorkShaper
|
|
118
120
|
|
119
121
|
# Stop the underlying threads
|
120
122
|
def shutdown
|
121
|
-
@
|
122
|
-
|
123
|
+
@shutting_down = true
|
124
|
+
WorkShaper.logger.warn({ message: 'Shutting Down' })
|
123
125
|
Thread.kill(@heartbeat)
|
124
126
|
Thread.kill(@offset_manager)
|
127
|
+
report(detailed: true)
|
125
128
|
@workers.each_value(&:shutdown)
|
126
129
|
end
|
127
130
|
|
@@ -137,21 +140,35 @@ module WorkShaper
|
|
137
140
|
completed = @completed_offsets[partition].sort!
|
138
141
|
received = @received_offsets[partition].sort!
|
139
142
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
143
|
+
begin
|
144
|
+
offset = completed.first
|
145
|
+
while received.any? && received.first == offset
|
146
|
+
# We observed Kafka sending the same message twice, even after
|
147
|
+
# having committed the offset. Here we skip this offset if we
|
148
|
+
# know it has already been committed.
|
149
|
+
last_offset = @last_ack[partition]
|
150
|
+
if last_offset && offset <= last_offset
|
151
|
+
WorkShaper.logger.warn(
|
152
|
+
{ message: 'Received Duplicate Offset',
|
153
|
+
offset: "#{partition}:#{offset}",
|
154
|
+
last_acked: last_offset,
|
155
|
+
})
|
156
|
+
end
|
157
|
+
|
158
|
+
result =
|
159
|
+
begin
|
160
|
+
@ack.call(partition, offset)
|
161
|
+
rescue => e
|
162
|
+
# We expect @ack to handle it's own errors and return the error or false if it
|
163
|
+
# is safe to continue. Otherwise @ack should raise an error and we will
|
164
|
+
# shutdown.
|
165
|
+
WorkShaper.logger.error({ message: 'Error in ack', error: e })
|
166
|
+
WorkShaper.logger.error(e.backtrace.join("\n"))
|
167
|
+
shutdown
|
168
|
+
break
|
169
|
+
end
|
170
|
+
|
171
|
+
if result.is_a? Exception || !result
|
155
172
|
WorkShaper.logger.warn(
|
156
173
|
{ message: 'Failed to Ack Offset, likely re-balance',
|
157
174
|
offset: "#{partition}:#{offset}",
|
@@ -159,18 +176,21 @@ module WorkShaper
|
|
159
176
|
received: @received_offsets[partition].to_a[0..10].join(',')
|
160
177
|
})
|
161
178
|
else
|
162
|
-
@last_ack[partition] = offset
|
179
|
+
@last_ack[partition] = [@last_ack[partition] || offset, offset].max
|
163
180
|
end
|
164
|
-
end
|
165
181
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
182
|
+
@total_acked += 1
|
183
|
+
WorkShaper.logger.debug "@total_acked: #{@total_acked}"
|
184
|
+
WorkShaper.logger.debug "completed: [#{completed.join(', ')}]"
|
185
|
+
WorkShaper.logger.debug "received: [#{received.join(', ')}]"
|
186
|
+
completed.delete(offset)
|
187
|
+
received.delete(offset)
|
172
188
|
|
173
|
-
|
189
|
+
offset = completed.first
|
190
|
+
end
|
191
|
+
rescue => e
|
192
|
+
WorkShaper.logger.error({ message: 'Error in offset_ack', error: e })
|
193
|
+
WorkShaper.logger.error(e.backtrace.join("\n"))
|
174
194
|
end
|
175
195
|
end
|
176
196
|
|
@@ -179,11 +199,19 @@ module WorkShaper
|
|
179
199
|
@total_enqueued.to_i - @total_acked.to_i > @max_in_queue
|
180
200
|
end
|
181
201
|
|
202
|
+
pause_cycles = 0
|
182
203
|
# We have to be careful here to avoid a deadlock. Another thread may be waiting
|
183
204
|
# for the mutex to ack and remove offsets. If we wrap enqueue in a synchronize
|
184
205
|
# block, that would lead to a deadlock. Here the sleep allows other threads
|
185
206
|
# to wrap up.
|
186
|
-
|
207
|
+
while @semaphore.synchronize { overrun.call } do
|
208
|
+
if pause_cycles % 12000 == 0
|
209
|
+
WorkShaper.logger.warn 'Paused on Overrun'
|
210
|
+
report(detailed: true)
|
211
|
+
end
|
212
|
+
pause_cycles += 1
|
213
|
+
sleep 0.005
|
214
|
+
end
|
187
215
|
end
|
188
216
|
end
|
189
217
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module WorkShaper
|
2
|
+
class OffsetHolder
|
3
|
+
attr_reader :partition, :offset, :state
|
4
|
+
|
5
|
+
STATES = {enqueued: 2, acked: 1, completed: 0}
|
6
|
+
def initialize(partition, offset, at: Time.now.to_f)
|
7
|
+
@partition = partition
|
8
|
+
@offset = offset
|
9
|
+
@at = at
|
10
|
+
|
11
|
+
@state = :enqueued
|
12
|
+
end
|
13
|
+
|
14
|
+
def <=(other)
|
15
|
+
self.<=>(other) <= 0
|
16
|
+
end
|
17
|
+
|
18
|
+
def <(other)
|
19
|
+
self.<=>(other) == -1
|
20
|
+
end
|
21
|
+
|
22
|
+
def <=>(other)
|
23
|
+
r = offset <=> other.offset
|
24
|
+
|
25
|
+
if r == 0
|
26
|
+
r = STATES[state] <=> STATES[other.state]
|
27
|
+
puts "States: #{r} | #{STATES[state]} #{STATES[other.state]}"
|
28
|
+
end
|
29
|
+
|
30
|
+
if r == 0
|
31
|
+
r = @at <=> other.instance_variable_get(:@at)
|
32
|
+
puts "At: #{r}"
|
33
|
+
end
|
34
|
+
puts "Final: #{r}"
|
35
|
+
r
|
36
|
+
end
|
37
|
+
|
38
|
+
def ack!
|
39
|
+
@state = :acked
|
40
|
+
end
|
41
|
+
|
42
|
+
def complete!
|
43
|
+
@state = :completed
|
44
|
+
end
|
45
|
+
|
46
|
+
def to_i
|
47
|
+
offset
|
48
|
+
end
|
49
|
+
|
50
|
+
def to_s
|
51
|
+
"#{partition}/#{offset}:#{STATES[state]}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/work_shaper/version.rb
CHANGED
data/lib/work_shaper/worker.rb
CHANGED
@@ -22,7 +22,10 @@ module WorkShaper
|
|
22
22
|
# rubocop:enable Metrics/ParameterLists
|
23
23
|
# rubocop:enable Layout/LineLength
|
24
24
|
|
25
|
-
def enqueue(message,
|
25
|
+
def enqueue(message, offset_holder)
|
26
|
+
partition = offset_holder.partition
|
27
|
+
offset = offset_holder.offset
|
28
|
+
|
26
29
|
# rubocop:disable Style/RescueStandardError
|
27
30
|
@thread_pool.post do
|
28
31
|
@work.call(message, partition, offset)
|
@@ -34,7 +37,8 @@ module WorkShaper
|
|
34
37
|
ensure
|
35
38
|
@semaphore.synchronize do
|
36
39
|
WorkShaper.logger.debug "Completed: #{partition}:#{offset}"
|
37
|
-
|
40
|
+
offset_holder.complete!
|
41
|
+
(@completed_offsets[partition] ||= Array.new) << offset_holder
|
38
42
|
end
|
39
43
|
end
|
40
44
|
# rubocop:enable Style/RescueStandardError
|
data/lib/work_shaper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: work_shaper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.3.
|
4
|
+
version: 0.1.3.1rc3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jerry Fernholz
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: concurrent-ruby
|
@@ -40,6 +40,7 @@ files:
|
|
40
40
|
- Rakefile
|
41
41
|
- lib/work_shaper.rb
|
42
42
|
- lib/work_shaper/manager.rb
|
43
|
+
- lib/work_shaper/offset_holder.rb
|
43
44
|
- lib/work_shaper/version.rb
|
44
45
|
- lib/work_shaper/worker.rb
|
45
46
|
- work_shaper.gemspec
|