work_shaper 0.1.3.1rc1 → 0.1.3.1rc3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ffaaf1badd8ced4836dd5e1ae9f3b60f6f811e249fa617d5278b86cf667d3ba3
4
- data.tar.gz: 065b69e44362a8c1bea9aedcbfaa70cd7b36ae6ee310cd780f33905b9036903b
3
+ metadata.gz: 19828916c7e0f7d9006c2b508566311381edcb8e6a50522b4bffe1ddc79e5a61
4
+ data.tar.gz: 28d670fd0d3fad8e6705b2392ca00c0c720ac29649fddc04702cdd924910fdc5
5
5
  SHA512:
6
- metadata.gz: 22e06e944e8f6fa7b062ac8842abad05414d653af9d7d8d68050469a40aa0b2e6bfa1c9dcc490f55e561da56afadb5c336093d7301af62fac8a68d299372bd06
7
- data.tar.gz: 8c2187099bac137a99ed48e53a57c5235ea0b63227fc5238d780ec5e3adc668c57c9419020103e22cf0ad786cd10f8efb4e360c035008e18373aa9dae43c1ebb
6
+ metadata.gz: ca66783166d22a048cdfbf02e1a9a6b36246c60caa42e6612f3008bb8af8029621df2d85ce152315288f8c40ffb1ca28efe4024e3582af7cd5c77957e8a0722e
7
+ data.tar.gz: 73cdaa9db8bcf924983fc0a6bbfec46bd257fa7c9938a7ed795c77ebbcdb34a1f95e762949876c57618276bf0f00dec9f38daacd2ecb708ea701e3322eb6cc8c
data/Gemfile CHANGED
@@ -14,3 +14,5 @@ gem "rubocop", "~> 1.21"
14
14
  gem "logger", "~> 1.4"
15
15
 
16
16
  gem "concurrent-ruby", "~> 1.2"
17
+
18
+ gem 'simplecov', require: false, group: :test
@@ -3,7 +3,7 @@ module WorkShaper
3
3
  # for each offset in monotonically increasing order (independent of the execution order), and gracefully
4
4
  # cleaning up when `#shutdown` is called.
5
5
  class Manager
6
- attr_reader :total_acked, :total_enqueued
6
+ attr_reader :total_acked, :total_enqueued, :shutting_down
7
7
 
8
8
  # Several of the parameters here are Lambdas (not Proc). Note you can pass a method using
9
9
  # `method(:some_method)` or a lambda directly `->{ puts 'Hello'}`.
@@ -28,14 +28,14 @@ module WorkShaper
28
28
  @completed_offsets = {}
29
29
  @max_in_queue = max_in_queue
30
30
  @semaphore = Mutex.new
31
- @shutdown = false
31
+ @shutting_down = false
32
32
 
33
33
  @total_enqueued = 0
34
34
  @total_acked = 0
35
35
 
36
36
  @heartbeat = Thread.new do
37
37
  while true
38
- report(detailed: false)
38
+ report(detailed: true)
39
39
  sleep heartbeat_period_sec
40
40
  end
41
41
  rescue => e
@@ -58,14 +58,16 @@ module WorkShaper
58
58
 
59
59
  # Enqueue a message to be worked on the given `sub_key`, `partition`, and `offset`.
60
60
  def enqueue(sub_key, message, partition, offset)
61
- raise StandardError, 'Shutting down' if @shutdown
61
+ raise StandardError, 'Shutting down' if @shutting_down
62
62
  pause_on_overrun
63
- WorkShaper.logger.debug "Enqueue: #{sub_key}:#{partition}:#{offset}"
63
+
64
+ offset_holder = OffsetHolder.new(partition, offset)
65
+ WorkShaper.logger.debug "Enqueue: #{sub_key}/#{offset_holder}"
64
66
 
65
67
  worker = nil
66
68
  @semaphore.synchronize do
67
69
  @total_enqueued += 1
68
- (@received_offsets[partition] ||= Array.new) << offset
70
+ (@received_offsets[partition] ||= Array.new) << offset_holder
69
71
 
70
72
  worker =
71
73
  @workers[sub_key] ||=
@@ -81,7 +83,7 @@ module WorkShaper
81
83
  )
82
84
  end
83
85
 
84
- worker.enqueue(message, partition, offset)
86
+ worker.enqueue(message, offset_holder)
85
87
  end
86
88
 
87
89
  # Flush any offsets for which work has been completed. Only lowest continuous run of
@@ -108,7 +110,7 @@ module WorkShaper
108
110
  if detailed
109
111
  WorkShaper.logger.info(
110
112
  {
111
- messaage: 'Reporting - Extra Detail',
113
+ message: 'Reporting - Extra Detail',
112
114
  pending_ack: @completed_offsets,
113
115
  received_offsets: @received_offsets
114
116
  })
@@ -118,10 +120,11 @@ module WorkShaper
118
120
 
119
121
  # Stop the underlying threads
120
122
  def shutdown
121
- @shutdown = true
122
- report(detailed: true)
123
+ @shutting_down = true
124
+ WorkShaper.logger.warn({ message: 'Shutting Down' })
123
125
  Thread.kill(@heartbeat)
124
126
  Thread.kill(@offset_manager)
127
+ report(detailed: true)
125
128
  @workers.each_value(&:shutdown)
126
129
  end
127
130
 
@@ -137,21 +140,35 @@ module WorkShaper
137
140
  completed = @completed_offsets[partition].sort!
138
141
  received = @received_offsets[partition].sort!
139
142
 
140
- offset = completed.first
141
- while received.any? && received.first == offset
142
- # We observed Kafka sending the same message twice, even after
143
- # having committed the offset. Here we skip this offset if we
144
- # know it has already been committed.
145
- last_offset = @last_ack[partition]
146
- if last_offset && offset <= last_offset
147
- WorkShaper.logger.warn(
148
- { message: 'Received Duplicate Offset',
149
- offset: "#{partition}:#{offset}",
150
- last_acked: last_offset,
151
- })
152
- else
153
- result = @ack.call(partition, offset)
154
- if result.is_a? Exception
143
+ begin
144
+ offset = completed.first
145
+ while received.any? && received.first == offset
146
+ # We observed Kafka sending the same message twice, even after
147
+ # having committed the offset. Here we skip this offset if we
148
+ # know it has already been committed.
149
+ last_offset = @last_ack[partition]
150
+ if last_offset && offset <= last_offset
151
+ WorkShaper.logger.warn(
152
+ { message: 'Received Duplicate Offset',
153
+ offset: "#{partition}:#{offset}",
154
+ last_acked: last_offset,
155
+ })
156
+ end
157
+
158
+ result =
159
+ begin
160
+ @ack.call(partition, offset)
161
+ rescue => e
162
+ # We expect @ack to handle it's own errors and return the error or false if it
163
+ # is safe to continue. Otherwise @ack should raise an error and we will
164
+ # shutdown.
165
+ WorkShaper.logger.error({ message: 'Error in ack', error: e })
166
+ WorkShaper.logger.error(e.backtrace.join("\n"))
167
+ shutdown
168
+ break
169
+ end
170
+
171
+ if result.is_a? Exception || !result
155
172
  WorkShaper.logger.warn(
156
173
  { message: 'Failed to Ack Offset, likely re-balance',
157
174
  offset: "#{partition}:#{offset}",
@@ -159,18 +176,21 @@ module WorkShaper
159
176
  received: @received_offsets[partition].to_a[0..10].join(',')
160
177
  })
161
178
  else
162
- @last_ack[partition] = offset
179
+ @last_ack[partition] = [@last_ack[partition] || offset, offset].max
163
180
  end
164
- end
165
181
 
166
- @total_acked += 1
167
- WorkShaper.logger.debug "@total_acked: #{@total_acked}"
168
- WorkShaper.logger.debug "completed: [#{completed.join(', ')}]"
169
- WorkShaper.logger.debug "received: [#{received.join(', ')}]"
170
- completed.shift
171
- received.shift
182
+ @total_acked += 1
183
+ WorkShaper.logger.debug "@total_acked: #{@total_acked}"
184
+ WorkShaper.logger.debug "completed: [#{completed.join(', ')}]"
185
+ WorkShaper.logger.debug "received: [#{received.join(', ')}]"
186
+ completed.delete(offset)
187
+ received.delete(offset)
172
188
 
173
- offset = completed.first
189
+ offset = completed.first
190
+ end
191
+ rescue => e
192
+ WorkShaper.logger.error({ message: 'Error in offset_ack', error: e })
193
+ WorkShaper.logger.error(e.backtrace.join("\n"))
174
194
  end
175
195
  end
176
196
 
@@ -179,11 +199,19 @@ module WorkShaper
179
199
  @total_enqueued.to_i - @total_acked.to_i > @max_in_queue
180
200
  end
181
201
 
202
+ pause_cycles = 0
182
203
  # We have to be careful here to avoid a deadlock. Another thread may be waiting
183
204
  # for the mutex to ack and remove offsets. If we wrap enqueue in a synchronize
184
205
  # block, that would lead to a deadlock. Here the sleep allows other threads
185
206
  # to wrap up.
186
- sleep 0.005 while @semaphore.synchronize { overrun.call }
207
+ while @semaphore.synchronize { overrun.call } do
208
+ if pause_cycles % 12000 == 0
209
+ WorkShaper.logger.warn 'Paused on Overrun'
210
+ report(detailed: true)
211
+ end
212
+ pause_cycles += 1
213
+ sleep 0.005
214
+ end
187
215
  end
188
216
  end
189
217
  end
@@ -0,0 +1,54 @@
1
+ module WorkShaper
2
+ class OffsetHolder
3
+ attr_reader :partition, :offset, :state
4
+
5
+ STATES = {enqueued: 2, acked: 1, completed: 0}
6
+ def initialize(partition, offset, at: Time.now.to_f)
7
+ @partition = partition
8
+ @offset = offset
9
+ @at = at
10
+
11
+ @state = :enqueued
12
+ end
13
+
14
+ def <=(other)
15
+ self.<=>(other) <= 0
16
+ end
17
+
18
+ def <(other)
19
+ self.<=>(other) == -1
20
+ end
21
+
22
+ def <=>(other)
23
+ r = offset <=> other.offset
24
+
25
+ if r == 0
26
+ r = STATES[state] <=> STATES[other.state]
27
+ puts "States: #{r} | #{STATES[state]} #{STATES[other.state]}"
28
+ end
29
+
30
+ if r == 0
31
+ r = @at <=> other.instance_variable_get(:@at)
32
+ puts "At: #{r}"
33
+ end
34
+ puts "Final: #{r}"
35
+ r
36
+ end
37
+
38
+ def ack!
39
+ @state = :acked
40
+ end
41
+
42
+ def complete!
43
+ @state = :completed
44
+ end
45
+
46
+ def to_i
47
+ offset
48
+ end
49
+
50
+ def to_s
51
+ "#{partition}/#{offset}:#{STATES[state]}"
52
+ end
53
+ end
54
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module WorkShaper
4
- VERSION = "0.1.3.1rc1"
4
+ VERSION = "0.1.3.1rc3"
5
5
  end
@@ -22,7 +22,10 @@ module WorkShaper
22
22
  # rubocop:enable Metrics/ParameterLists
23
23
  # rubocop:enable Layout/LineLength
24
24
 
25
- def enqueue(message, partition, offset)
25
+ def enqueue(message, offset_holder)
26
+ partition = offset_holder.partition
27
+ offset = offset_holder.offset
28
+
26
29
  # rubocop:disable Style/RescueStandardError
27
30
  @thread_pool.post do
28
31
  @work.call(message, partition, offset)
@@ -34,7 +37,8 @@ module WorkShaper
34
37
  ensure
35
38
  @semaphore.synchronize do
36
39
  WorkShaper.logger.debug "Completed: #{partition}:#{offset}"
37
- (@completed_offsets[partition] ||= Array.new) << offset
40
+ offset_holder.complete!
41
+ (@completed_offsets[partition] ||= Array.new) << offset_holder
38
42
  end
39
43
  end
40
44
  # rubocop:enable Style/RescueStandardError
data/lib/work_shaper.rb CHANGED
@@ -3,6 +3,7 @@
3
3
  require 'logger'
4
4
  require 'concurrent-ruby'
5
5
  require_relative "work_shaper/version"
6
+ require_relative "work_shaper/offset_holder"
6
7
  require_relative "work_shaper/manager"
7
8
  require_relative "work_shaper/worker"
8
9
  require 'json'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: work_shaper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3.1rc1
4
+ version: 0.1.3.1rc3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jerry Fernholz
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-03-05 00:00:00.000000000 Z
11
+ date: 2024-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: concurrent-ruby
@@ -40,6 +40,7 @@ files:
40
40
  - Rakefile
41
41
  - lib/work_shaper.rb
42
42
  - lib/work_shaper/manager.rb
43
+ - lib/work_shaper/offset_holder.rb
43
44
  - lib/work_shaper/version.rb
44
45
  - lib/work_shaper/worker.rb
45
46
  - work_shaper.gemspec