symphony 0.11.1 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,17 @@
1
+ == v0.12.0 [2019-06-26] Michael Granger <ged@FaerieMUD.org>
2
+
3
+ Enhancements:
4
+
5
+ - Set a proctitle for all tasks, not just those that include the
6
+ Metrics mixin.
7
+
8
+ Bugfixes:
9
+
10
+ - Fix a bunch of bugs with scaling, idle timeout, and message-
11
+ counting.
12
+ - Clean up unused @pids variable in longlived task group
13
+
14
+
1
15
  == v0.11.1 [2017-01-31] Mahlon E. Smith <mahlon@martini.nu>
2
16
 
3
17
  Housekeeping:
@@ -12,10 +12,10 @@ module Symphony
12
12
  Configurability
13
13
 
14
14
  # Library version constant
15
- VERSION = '0.11.1'
15
+ VERSION = '0.12.0'
16
16
 
17
17
  # Version-control revision constant
18
- REVISION = %q$Revision: 5347e2bfccb1 $
18
+ REVISION = %q$Revision$
19
19
 
20
20
 
21
21
  # The name of the environment variable to check for config file overrides
@@ -69,7 +69,7 @@ class Symphony::Daemon
69
69
  def initialize
70
70
  @task_pids = {}
71
71
  @task_groups = {}
72
- @running = false
72
+ @running = false
73
73
 
74
74
  self.set_up_signal_handling
75
75
  end
@@ -233,7 +233,7 @@ class Symphony::Daemon
233
233
  self.log.info "%p no longer configured; stopping its task group." % [ task_class ]
234
234
  self.stop_task_group( group )
235
235
  end
236
- end
236
+ end
237
237
 
238
238
 
239
239
  ### Start a new task group for the given +task_class+ and +max+ number of workers.
@@ -264,7 +264,7 @@ class Symphony::Daemon
264
264
  new_pids = group.adjust_workers or next
265
265
  new_pids.each do |pid|
266
266
  self.task_pids[ pid ] = group
267
- end
267
+ end
268
268
  end
269
269
  end
270
270
 
@@ -341,7 +341,9 @@ class Symphony::Daemon
341
341
  ### Notify the task group the specified +pid+ belongs to that its child exited
342
342
  ### with the specified +status+.
343
343
  def notify_group( pid, status )
344
+ self.log.debug "Notifying group of reaped child %d: %p" % [ pid, status ]
344
345
  return unless self.running?
346
+
345
347
  group = self.task_pids[ pid ]
346
348
  group.on_child_exit( pid, status )
347
349
  end
@@ -31,7 +31,7 @@ module Symphony::Metrics
31
31
  registry: @metriks_registry,
32
32
  prefix: self.class.name )
33
33
  @proc_reporter = Metriks::Reporter::ProcTitle.new(
34
- prefix: self.class.name,
34
+ prefix: self.procname,
35
35
  registry: @metriks_registry,
36
36
  on_error: lambda {|ex| self.log.error(ex) } )
37
37
 
@@ -235,7 +235,7 @@ class Symphony::Queue
235
235
  cons = Bunny::Consumer.new( amqp_queue.channel, amqp_queue, tag, !ackmode, false, CONSUMER_ARGS )
236
236
 
237
237
  cons.on_delivery do |delivery_info, properties, payload|
238
- rval = self.handle_message( delivery_info, properties, payload, &work_callback )
238
+ self.handle_message( delivery_info, properties, payload, &work_callback )
239
239
  self.log.debug "Done with message %s. Session is %s" %
240
240
  [ delivery_info.delivery_tag, self.class.amqp_session.closed? ? "closed" : "open" ]
241
241
  cons.cancel if self.shutting_down?
@@ -278,8 +278,8 @@ class Symphony::Queue
278
278
  end
279
279
  end
280
280
 
281
- return queue
282
- end
281
+ return queue
282
+ end
283
283
 
284
284
 
285
285
  ### Handle each subscribed message.
@@ -36,7 +36,7 @@ module Symphony::Statistics
36
36
  ### Add the specified +value+ as a sample for the current time.
37
37
  def add_sample( value )
38
38
  @samples << [ Time.now.to_f, value ]
39
- @samples.pop( @samples.size - self.sample_size ) if @samples.size > self.sample_size
39
+ @samples.shift( @samples.size - self.sample_size ) if @samples.size > self.sample_size
40
40
  @counter = ( @counter + 1 ) % 3
41
41
  end
42
42
 
@@ -292,6 +292,8 @@ class Symphony::Task
292
292
  def start
293
293
  rval = nil
294
294
 
295
+ Process.setproctitle( self.procname )
296
+
295
297
  begin
296
298
  self.restarting = false
297
299
  rval = self.with_signal_handler( *SIGNALS ) do
@@ -349,16 +351,19 @@ class Symphony::Task
349
351
 
350
352
  rval = nil
351
353
  self.queue.wait_for_message( oneshot ) do |payload, metadata|
352
- self.last_worked = nil
353
- work_payload = self.preprocess_payload( payload, metadata )
354
-
355
- rval = if self.class.timeout
356
- self.work_with_timeout( work_payload, metadata )
357
- else
358
- self.work( work_payload, metadata )
354
+ begin
355
+ self.last_worked = nil
356
+ work_payload = self.preprocess_payload( payload, metadata )
357
+
358
+ rval = if self.class.timeout
359
+ self.work_with_timeout( work_payload, metadata )
360
+ else
361
+ self.work( work_payload, metadata )
362
+ end
363
+ ensure
364
+ self.last_worked = Time.now
359
365
  end
360
366
 
361
- self.last_worked = Time.now
362
367
  rval
363
368
  end
364
369
 
@@ -390,7 +395,10 @@ class Symphony::Task
390
395
  # If it's unset, it means it's running now
391
396
  return unless self.last_worked && self.exit_on_idle?
392
397
 
393
- if (Time.now - self.last_worked) > self.class.idle_timeout
398
+ seconds_idle = Time.now - self.last_worked
399
+ self.log.debug "%p: idle %0.2fs" % [ self.class, seconds_idle ]
400
+
401
+ if seconds_idle > self.class.idle_timeout
394
402
  self.log.debug "Sending stop signal due to idle timeout"
395
403
  self.stop_gracefully
396
404
  end
@@ -470,6 +478,18 @@ class Symphony::Task
470
478
  end
471
479
 
472
480
 
481
+ ### Return a string for setting the proc title
482
+ def procname
483
+ return "%s %s: Symphony: %p (%s) -> %s" % [
484
+ RUBY_ENGINE,
485
+ RUBY_VERSION,
486
+ self.class,
487
+ self.class.work_model,
488
+ self.class.queue_name
489
+ ]
490
+ end
491
+
492
+
473
493
  ### Handle a hangup signal by re-reading the config and restarting.
474
494
  def on_hangup
475
495
  self.log.info "Hangup signal."
@@ -64,7 +64,7 @@ class Symphony::TaskGroup
64
64
 
65
65
  Process.setpgid( pid, 0 )
66
66
 
67
- self.log.info "Adding worker %p" % [ pid ]
67
+ self.log.info "Adding %p worker %p" % [ task_class, pid ]
68
68
  self.workers << pid
69
69
  @last_child_started = Time.now
70
70
 
@@ -15,10 +15,7 @@ class Symphony::TaskGroup::LongLived < Symphony::TaskGroup
15
15
  ### run a maximum of +max_workers+.
16
16
  def initialize( task_class, max_workers )
17
17
  super
18
-
19
- @queue = nil
20
- @pids = Set.new
21
- @started_one_worker = false
18
+ @queue = nil
22
19
  end
23
20
 
24
21
 
@@ -26,33 +23,6 @@ class Symphony::TaskGroup::LongLived < Symphony::TaskGroup
26
23
  public
27
24
  ######
28
25
 
29
- # The PIDs of the child this task group manages
30
- attr_reader :pids
31
-
32
-
33
- ### Return +true+ if the task group should scale up by one.
34
- def needs_a_worker?
35
- return true unless self.started_one_worker?
36
- return false unless @queue
37
- if ( cc = @queue.consumer_count ) >= self.max_workers
38
- self.log.debug "Already at max workers (%d)" % [ self.max_workers ]
39
- return false
40
- else
41
- self.log.debug "Not yet at max workers (have %d)" % [ cc ]
42
- end
43
- self.log.debug "Mean jobcount is %0.2f" % [ self.mean_jobcount ]
44
- return self.mean_jobcount > 1 && !self.sample_values_decreasing?
45
- end
46
-
47
-
48
- ### Returns +true+ if the group has started at least one worker. Used to avoid
49
- ### racing to start workers when one worker has started, but we haven't yet connected
50
- ### to AMQP to get consumer count yet.
51
- def started_one_worker?
52
- return @started_one_worker
53
- end
54
-
55
-
56
26
  ### If the number of workers is not at the maximum, start some.
57
27
  def adjust_workers
58
28
  self.sample_queue_status
@@ -61,44 +31,68 @@ class Symphony::TaskGroup::LongLived < Symphony::TaskGroup
61
31
 
62
32
  if self.needs_a_worker?
63
33
  self.log.info "Too few workers for (%s); spinning one up." % [ self.task_class.name ]
64
- pid = self.start_worker( @started_one_worker )
65
- self.pids.add( pid )
34
+ pid = self.start_worker( !self.workers.empty? )
66
35
  return [ pid ]
67
36
  end
68
37
 
38
+ @queue ||= self.get_message_counting_queue
39
+
69
40
  return nil
70
41
  end
71
42
 
72
43
 
44
+ ### Return +true+ if the task group should scale up by one.
45
+ def needs_a_worker?
46
+ return true if self.workers.empty?
47
+ return false unless @queue
48
+
49
+
50
+ # Calculate the number of workers across the whole broker
51
+ if ( cc = @queue.consumer_count ) >= self.max_workers
52
+ self.log.debug "%p: Already at max workers (%d)" % [ self.task_class, self.max_workers ]
53
+ return false
54
+ else
55
+ self.log.debug "%p: Not yet at max workers (have %d)" % [ self.task_class, cc ]
56
+ end
57
+
58
+ self.log.debug "Mean jobcount is %0.2f" % [ self.mean_jobcount ]
59
+ return self.mean_jobcount > 1 && !self.sample_values_decreasing?
60
+ end
61
+
62
+
73
63
  ### Add the current number of workers to the samples.
74
64
  def sample_queue_status
75
65
  return unless @queue
76
- self.add_sample( @queue.message_count )
66
+
67
+ count = @queue.message_count
68
+ self.add_sample( count )
77
69
  end
78
70
 
79
71
 
80
72
  ### Overridden to grab a Bunny::Queue for monitoring when the first
81
73
  ### worker starts.
82
74
  def start_worker( exit_on_idle=false )
83
- @started_one_worker = true
84
-
85
75
  pid = super
86
76
  self.log.info "Start a new worker at pid %d" % [ pid ]
87
77
 
88
- unless @queue
89
- begin
90
- channel = Symphony::Queue.amqp_channel
91
- @queue = channel.queue( self.task_class.queue_name, passive: true, prefetch: 0 )
92
- self.log.debug " got the 0-prefetch queue"
93
- rescue Bunny::NotFound => err
94
- self.log.info "Child hasn't created the queue yet; deferring"
95
- Symphony::Queue.reset
96
- end
97
- end
98
-
99
78
  return pid
100
79
  end
101
80
 
81
+
82
+ ### Get a queue for counting the number of messages in the queue for this
83
+ ### worker.
84
+ def get_message_counting_queue
85
+ channel = Symphony::Queue.amqp_channel
86
+ queue = channel.queue( self.task_class.queue_name, passive: true, prefetch: 0 )
87
+
88
+ return queue
89
+ rescue Bunny::NotFound => err
90
+ self.log.info "Child hasn't created the queue yet; deferring"
91
+ Symphony::Queue.reset
92
+
93
+ return nil
94
+ end
95
+
102
96
  end # class Symphony::TaskGroup::LongLived
103
97
 
104
98
 
@@ -17,7 +17,7 @@ class Auditor < Symphony::Task
17
17
 
18
18
 
19
19
  ### Create a new Auditor task.
20
- def initialize( queue )
20
+ def initialize( * )
21
21
  super
22
22
  @logdir = Pathname.pwd
23
23
  @logfile = @logdir + 'events.log'
@@ -40,13 +40,13 @@ class OneshotSimulator < Symphony::Task
40
40
 
41
41
  val = Random.rand
42
42
  case
43
- when val < 0.33
43
+ when val < 0.1
44
44
  $stderr.puts "Simulating an error in the task (reject)."
45
45
  raise "OOOOOPS!"
46
- when val < 0.66
46
+ when val < 0.15
47
47
  $stderr.puts "Simulating a soft failure in the task (reject+requeue)."
48
48
  return false
49
- when val < 0.88
49
+ when val < 0.20
50
50
  $stderr.puts "Simulating a timeout case"
51
51
  sleep( self.class.timeout + 1 )
52
52
  else
@@ -15,6 +15,9 @@ class Simulator < Symphony::Task
15
15
  # Fetch 100 events at a time
16
16
  prefetch 10
17
17
 
18
+ # Keep the queue around even when the task isn't running
19
+ persistent true
20
+
18
21
  # Only allow 2 seconds for work to complete before rejecting or retrying.
19
22
  # timeout 2.0, action: :retry
20
23
 
@@ -36,22 +39,22 @@ class Simulator < Symphony::Task
36
39
 
37
40
  sleep rand( 0.0 .. 2.0 )
38
41
 
39
- # val = Random.rand
40
- # case
41
- # when val < 0.33
42
- # $stderr.puts "Simulating an error in the task (reject)."
43
- # raise "OOOOOPS!"
44
- # when val < 0.66
45
- # $stderr.puts "Simulating a soft failure in the task (reject+requeue)."
46
- # return false
47
- # when val < 0.88
48
- # $stderr.puts "Simulating a timeout case"
49
- # sleep( self.class.timeout + 1 )
50
- # else
51
- # $stderr.puts "Simulating a successful task run (accept)"
52
- # puts( payload.inspect )
53
- # return true
54
- # end
42
+ val = Random.rand
43
+ case
44
+ when val < 0.05
45
+ $stderr.puts "Simulating an error in the task (reject)."
46
+ raise "OOOOOPS!"
47
+ when val < 0.10
48
+ $stderr.puts "Simulating a soft failure in the task (reject+requeue)."
49
+ return false
50
+ when val < 0.15
51
+ $stderr.puts "Simulating a timeout case"
52
+ sleep( self.class.timeout + 1 ) if self.class.timeout
53
+ else
54
+ $stderr.puts "Simulating a successful task run (accept)"
55
+ puts( payload.inspect )
56
+ return true
57
+ end
55
58
 
56
59
  true
57
60
  end
@@ -90,7 +90,7 @@ describe Symphony::Daemon do
90
90
  it "adjusts its tasks when its config is reloaded" do
91
91
  config = Configurability.default_config
92
92
  config.symphony.tasks = [ 'test1', 'test2' ]
93
- # config.logging.__default__ = 'debug'
93
+ config.logging.__default__ = 'fatal'
94
94
  config.install
95
95
 
96
96
  allow( Symphony::Task ).to receive( :exit )
@@ -6,6 +6,8 @@ require 'symphony/task_group/longlived'
6
6
 
7
7
  describe Symphony::TaskGroup::LongLived do
8
8
 
9
+ FIRST_PID = 414
10
+
9
11
  let( :task ) do
10
12
  Class.new( Symphony::Task ) do
11
13
  extend Symphony::MethodUtilities
@@ -30,7 +32,7 @@ describe Symphony::TaskGroup::LongLived do
30
32
 
31
33
  let( :pid_generator ) do
32
34
  Enumerator.new do |generator|
33
- i = 414
35
+ i = FIRST_PID
34
36
  loop do
35
37
  generator.yield( i )
36
38
  i += rand( 3 ) + 1
@@ -38,9 +40,6 @@ describe Symphony::TaskGroup::LongLived do
38
40
  end
39
41
  end
40
42
 
41
- # not enough samples
42
- # trending up
43
-
44
43
 
45
44
 
46
45
  it "doesn't start anything if it's throttled" do
@@ -61,20 +60,12 @@ describe Symphony::TaskGroup::LongLived do
61
60
 
62
61
 
63
62
  it "starts an initial worker if it doesn't have any" do
64
- allow( Process ).to receive( :setpgid ).with( 414, 0 )
65
-
66
- channel = double( Bunny::Channel )
67
- queue = double( Bunny::Queue )
68
- expect( Symphony::Queue ).to receive( :amqp_channel ).
69
- and_return( channel )
70
- expect( channel ).to receive( :queue ).
71
- with( task.queue_name, passive: true, prefetch: 0 ).
72
- and_return( queue )
63
+ allow( Process ).to receive( :setpgid ).with( FIRST_PID, 0 )
73
64
 
74
65
  task_group.adjust_workers
75
66
 
76
- expect( task_group.started_one_worker? ).to be_truthy
77
- expect( task_group.pids ).to include( 414 )
67
+ expect( task_group.workers ).to_not be_empty
68
+ expect( task_group.workers ).to contain_exactly( FIRST_PID )
78
69
  end
79
70
 
80
71
 
@@ -93,20 +84,19 @@ describe Symphony::TaskGroup::LongLived do
93
84
  and_return( queue )
94
85
 
95
86
  expect( queue ).to receive( :consumer_count ) do
96
- task_group.pids.size
87
+ task_group.workers.size
97
88
  end.at_least( :once )
98
89
  expect( queue ).to receive( :message_count ).and_return( *samples )
99
90
 
100
91
  start = 1414002605
101
- start.upto( start + samples.size ) do |time|
92
+ start.upto( start + samples.size + 1 ) do |time|
102
93
  Timecop.freeze( time ) do
103
94
  task_group.adjust_workers
104
95
  end
105
96
  end
106
97
 
107
- expect( task_group.started_one_worker? ).to be_truthy
108
- expect( task_group.pids ).to include( 414 )
109
- expect( task_group.pids.length ).to eq( 2 )
98
+ expect( task_group.workers ).to include( FIRST_PID )
99
+ expect( task_group.workers.length ).to eq( 2 )
110
100
  end
111
101
 
112
102
 
@@ -125,18 +115,18 @@ describe Symphony::TaskGroup::LongLived do
125
115
  and_return( queue )
126
116
 
127
117
  expect( queue ).to receive( :consumer_count ) do
128
- task_group.pids.size
118
+ task_group.workers.size
129
119
  end.at_least( :once )
130
120
  expect( queue ).to receive( :message_count ).and_return( *samples )
131
121
 
132
122
  start = 1414002605
133
- start.upto( start + samples.size ) do |time|
123
+ start.upto( start + samples.size + 1 ) do |time|
134
124
  Timecop.freeze( time ) do
135
125
  task_group.adjust_workers
136
126
  end
137
127
  end
138
128
 
139
- expect( task_group.pids.size ).to eq( 2 )
129
+ expect( task_group.workers.size ).to eq( 2 )
140
130
  end
141
131
 
142
132
 
@@ -155,18 +145,18 @@ describe Symphony::TaskGroup::LongLived do
155
145
  and_return( queue )
156
146
 
157
147
  expect( queue ).to receive( :consumer_count ) do
158
- task_group.pids.size
148
+ task_group.workers.size
159
149
  end.at_least( :once )
160
150
  expect( queue ).to receive( :message_count ).and_return( *samples )
161
151
 
162
152
  start = 1414002605
163
- start.upto( start + samples.size ) do |time|
153
+ start.upto( start + samples.size + 1 ) do |time|
164
154
  Timecop.freeze( time ) do
165
155
  task_group.adjust_workers
166
156
  end
167
157
  end
168
158
 
169
- expect( task_group.pids.size ).to eq( 2 )
159
+ expect( task_group.workers.size ).to eq( 2 )
170
160
  end
171
161
 
172
162
 
@@ -188,13 +178,13 @@ describe Symphony::TaskGroup::LongLived do
188
178
  expect( queue ).to receive( :message_count ).and_return( *samples )
189
179
 
190
180
  start = 1414002605
191
- start.upto( start + samples.size ) do |time|
181
+ start.upto( start + samples.size + 1 ) do |time|
192
182
  Timecop.freeze( time ) do
193
183
  task_group.adjust_workers
194
184
  end
195
185
  end
196
186
 
197
- expect( task_group.pids.size ).to eq( 1 )
187
+ expect( task_group.workers.size ).to eq( 1 )
198
188
  end
199
189
 
200
190
 
@@ -213,19 +203,18 @@ describe Symphony::TaskGroup::LongLived do
213
203
  and_return( queue )
214
204
 
215
205
  expect( queue ).to receive( :consumer_count ) do
216
- task_group.pids.size
206
+ task_group.workers.size
217
207
  end.at_least( :once )
218
208
  expect( queue ).to receive( :message_count ).and_return( *samples )
219
209
 
220
210
  start = 1414002605
221
- start.upto( start + samples.size ) do |time|
211
+ start.upto( start + samples.size + 1 ) do |time|
222
212
  Timecop.freeze( time ) do
223
213
  task_group.adjust_workers
224
214
  end
225
215
  end
226
216
 
227
- expect( task_group.started_one_worker? ).to be_truthy
228
- expect( task_group.pids.size ).to eq( 1 )
217
+ expect( task_group.workers.size ).to eq( 1 )
229
218
  end
230
219
 
231
220
  end