symphony 0.11.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,17 @@
1
+ == v0.12.0 [2019-06-26] Michael Granger <ged@FaerieMUD.org>
2
+
3
+ Enhancements:
4
+
5
+ - Set a proctitle for all tasks, not just those that include the
6
+ Metrics mixin.
7
+
8
+ Bugfixes:
9
+
10
+ - Fix a bunch of bugs with scaling, idle timeout, and message-
11
+ counting.
12
+ - Clean up unused @pids variable in longlived task group
13
+
14
+
1
15
  == v0.11.1 [2017-01-31] Mahlon E. Smith <mahlon@martini.nu>
2
16
 
3
17
  Housekeeping:
@@ -12,10 +12,10 @@ module Symphony
12
12
  Configurability
13
13
 
14
14
  # Library version constant
15
- VERSION = '0.11.1'
15
+ VERSION = '0.12.0'
16
16
 
17
17
  # Version-control revision constant
18
- REVISION = %q$Revision: 5347e2bfccb1 $
18
+ REVISION = %q$Revision$
19
19
 
20
20
 
21
21
  # The name of the environment variable to check for config file overrides
@@ -69,7 +69,7 @@ class Symphony::Daemon
69
69
  def initialize
70
70
  @task_pids = {}
71
71
  @task_groups = {}
72
- @running = false
72
+ @running = false
73
73
 
74
74
  self.set_up_signal_handling
75
75
  end
@@ -233,7 +233,7 @@ class Symphony::Daemon
233
233
  self.log.info "%p no longer configured; stopping its task group." % [ task_class ]
234
234
  self.stop_task_group( group )
235
235
  end
236
- end
236
+ end
237
237
 
238
238
 
239
239
  ### Start a new task group for the given +task_class+ and +max+ number of workers.
@@ -264,7 +264,7 @@ class Symphony::Daemon
264
264
  new_pids = group.adjust_workers or next
265
265
  new_pids.each do |pid|
266
266
  self.task_pids[ pid ] = group
267
- end
267
+ end
268
268
  end
269
269
  end
270
270
 
@@ -341,7 +341,9 @@ class Symphony::Daemon
341
341
  ### Notify the task group the specified +pid+ belongs to that its child exited
342
342
  ### with the specified +status+.
343
343
  def notify_group( pid, status )
344
+ self.log.debug "Notifying group of reaped child %d: %p" % [ pid, status ]
344
345
  return unless self.running?
346
+
345
347
  group = self.task_pids[ pid ]
346
348
  group.on_child_exit( pid, status )
347
349
  end
@@ -31,7 +31,7 @@ module Symphony::Metrics
31
31
  registry: @metriks_registry,
32
32
  prefix: self.class.name )
33
33
  @proc_reporter = Metriks::Reporter::ProcTitle.new(
34
- prefix: self.class.name,
34
+ prefix: self.procname,
35
35
  registry: @metriks_registry,
36
36
  on_error: lambda {|ex| self.log.error(ex) } )
37
37
 
@@ -235,7 +235,7 @@ class Symphony::Queue
235
235
  cons = Bunny::Consumer.new( amqp_queue.channel, amqp_queue, tag, !ackmode, false, CONSUMER_ARGS )
236
236
 
237
237
  cons.on_delivery do |delivery_info, properties, payload|
238
- rval = self.handle_message( delivery_info, properties, payload, &work_callback )
238
+ self.handle_message( delivery_info, properties, payload, &work_callback )
239
239
  self.log.debug "Done with message %s. Session is %s" %
240
240
  [ delivery_info.delivery_tag, self.class.amqp_session.closed? ? "closed" : "open" ]
241
241
  cons.cancel if self.shutting_down?
@@ -278,8 +278,8 @@ class Symphony::Queue
278
278
  end
279
279
  end
280
280
 
281
- return queue
282
- end
281
+ return queue
282
+ end
283
283
 
284
284
 
285
285
  ### Handle each subscribed message.
@@ -36,7 +36,7 @@ module Symphony::Statistics
36
36
  ### Add the specified +value+ as a sample for the current time.
37
37
  def add_sample( value )
38
38
  @samples << [ Time.now.to_f, value ]
39
- @samples.pop( @samples.size - self.sample_size ) if @samples.size > self.sample_size
39
+ @samples.shift( @samples.size - self.sample_size ) if @samples.size > self.sample_size
40
40
  @counter = ( @counter + 1 ) % 3
41
41
  end
42
42
 
@@ -292,6 +292,8 @@ class Symphony::Task
292
292
  def start
293
293
  rval = nil
294
294
 
295
+ Process.setproctitle( self.procname )
296
+
295
297
  begin
296
298
  self.restarting = false
297
299
  rval = self.with_signal_handler( *SIGNALS ) do
@@ -349,16 +351,19 @@ class Symphony::Task
349
351
 
350
352
  rval = nil
351
353
  self.queue.wait_for_message( oneshot ) do |payload, metadata|
352
- self.last_worked = nil
353
- work_payload = self.preprocess_payload( payload, metadata )
354
-
355
- rval = if self.class.timeout
356
- self.work_with_timeout( work_payload, metadata )
357
- else
358
- self.work( work_payload, metadata )
354
+ begin
355
+ self.last_worked = nil
356
+ work_payload = self.preprocess_payload( payload, metadata )
357
+
358
+ rval = if self.class.timeout
359
+ self.work_with_timeout( work_payload, metadata )
360
+ else
361
+ self.work( work_payload, metadata )
362
+ end
363
+ ensure
364
+ self.last_worked = Time.now
359
365
  end
360
366
 
361
- self.last_worked = Time.now
362
367
  rval
363
368
  end
364
369
 
@@ -390,7 +395,10 @@ class Symphony::Task
390
395
  # If it's unset, it means it's running now
391
396
  return unless self.last_worked && self.exit_on_idle?
392
397
 
393
- if (Time.now - self.last_worked) > self.class.idle_timeout
398
+ seconds_idle = Time.now - self.last_worked
399
+ self.log.debug "%p: idle %0.2fs" % [ self.class, seconds_idle ]
400
+
401
+ if seconds_idle > self.class.idle_timeout
394
402
  self.log.debug "Sending stop signal due to idle timeout"
395
403
  self.stop_gracefully
396
404
  end
@@ -470,6 +478,18 @@ class Symphony::Task
470
478
  end
471
479
 
472
480
 
481
+ ### Return a string for setting the proc title
482
+ def procname
483
+ return "%s %s: Symphony: %p (%s) -> %s" % [
484
+ RUBY_ENGINE,
485
+ RUBY_VERSION,
486
+ self.class,
487
+ self.class.work_model,
488
+ self.class.queue_name
489
+ ]
490
+ end
491
+
492
+
473
493
  ### Handle a hangup signal by re-reading the config and restarting.
474
494
  def on_hangup
475
495
  self.log.info "Hangup signal."
@@ -64,7 +64,7 @@ class Symphony::TaskGroup
64
64
 
65
65
  Process.setpgid( pid, 0 )
66
66
 
67
- self.log.info "Adding worker %p" % [ pid ]
67
+ self.log.info "Adding %p worker %p" % [ task_class, pid ]
68
68
  self.workers << pid
69
69
  @last_child_started = Time.now
70
70
 
@@ -15,10 +15,7 @@ class Symphony::TaskGroup::LongLived < Symphony::TaskGroup
15
15
  ### run a maximum of +max_workers+.
16
16
  def initialize( task_class, max_workers )
17
17
  super
18
-
19
- @queue = nil
20
- @pids = Set.new
21
- @started_one_worker = false
18
+ @queue = nil
22
19
  end
23
20
 
24
21
 
@@ -26,33 +23,6 @@ class Symphony::TaskGroup::LongLived < Symphony::TaskGroup
26
23
  public
27
24
  ######
28
25
 
29
- # The PIDs of the child this task group manages
30
- attr_reader :pids
31
-
32
-
33
- ### Return +true+ if the task group should scale up by one.
34
- def needs_a_worker?
35
- return true unless self.started_one_worker?
36
- return false unless @queue
37
- if ( cc = @queue.consumer_count ) >= self.max_workers
38
- self.log.debug "Already at max workers (%d)" % [ self.max_workers ]
39
- return false
40
- else
41
- self.log.debug "Not yet at max workers (have %d)" % [ cc ]
42
- end
43
- self.log.debug "Mean jobcount is %0.2f" % [ self.mean_jobcount ]
44
- return self.mean_jobcount > 1 && !self.sample_values_decreasing?
45
- end
46
-
47
-
48
- ### Returns +true+ if the group has started at least one worker. Used to avoid
49
- ### racing to start workers when one worker has started, but we haven't yet connected
50
- ### to AMQP to get consumer count yet.
51
- def started_one_worker?
52
- return @started_one_worker
53
- end
54
-
55
-
56
26
  ### If the number of workers is not at the maximum, start some.
57
27
  def adjust_workers
58
28
  self.sample_queue_status
@@ -61,44 +31,68 @@ class Symphony::TaskGroup::LongLived < Symphony::TaskGroup
61
31
 
62
32
  if self.needs_a_worker?
63
33
  self.log.info "Too few workers for (%s); spinning one up." % [ self.task_class.name ]
64
- pid = self.start_worker( @started_one_worker )
65
- self.pids.add( pid )
34
+ pid = self.start_worker( !self.workers.empty? )
66
35
  return [ pid ]
67
36
  end
68
37
 
38
+ @queue ||= self.get_message_counting_queue
39
+
69
40
  return nil
70
41
  end
71
42
 
72
43
 
44
+ ### Return +true+ if the task group should scale up by one.
45
+ def needs_a_worker?
46
+ return true if self.workers.empty?
47
+ return false unless @queue
48
+
49
+
50
+ # Calculate the number of workers across the whole broker
51
+ if ( cc = @queue.consumer_count ) >= self.max_workers
52
+ self.log.debug "%p: Already at max workers (%d)" % [ self.task_class, self.max_workers ]
53
+ return false
54
+ else
55
+ self.log.debug "%p: Not yet at max workers (have %d)" % [ self.task_class, cc ]
56
+ end
57
+
58
+ self.log.debug "Mean jobcount is %0.2f" % [ self.mean_jobcount ]
59
+ return self.mean_jobcount > 1 && !self.sample_values_decreasing?
60
+ end
61
+
62
+
73
63
  ### Add the current number of workers to the samples.
74
64
  def sample_queue_status
75
65
  return unless @queue
76
- self.add_sample( @queue.message_count )
66
+
67
+ count = @queue.message_count
68
+ self.add_sample( count )
77
69
  end
78
70
 
79
71
 
80
72
  ### Overridden to grab a Bunny::Queue for monitoring when the first
81
73
  ### worker starts.
82
74
  def start_worker( exit_on_idle=false )
83
- @started_one_worker = true
84
-
85
75
  pid = super
86
76
  self.log.info "Start a new worker at pid %d" % [ pid ]
87
77
 
88
- unless @queue
89
- begin
90
- channel = Symphony::Queue.amqp_channel
91
- @queue = channel.queue( self.task_class.queue_name, passive: true, prefetch: 0 )
92
- self.log.debug " got the 0-prefetch queue"
93
- rescue Bunny::NotFound => err
94
- self.log.info "Child hasn't created the queue yet; deferring"
95
- Symphony::Queue.reset
96
- end
97
- end
98
-
99
78
  return pid
100
79
  end
101
80
 
81
+
82
+ ### Get a queue for counting the number of messages in the queue for this
83
+ ### worker.
84
+ def get_message_counting_queue
85
+ channel = Symphony::Queue.amqp_channel
86
+ queue = channel.queue( self.task_class.queue_name, passive: true, prefetch: 0 )
87
+
88
+ return queue
89
+ rescue Bunny::NotFound => err
90
+ self.log.info "Child hasn't created the queue yet; deferring"
91
+ Symphony::Queue.reset
92
+
93
+ return nil
94
+ end
95
+
102
96
  end # class Symphony::TaskGroup::LongLived
103
97
 
104
98
 
@@ -17,7 +17,7 @@ class Auditor < Symphony::Task
17
17
 
18
18
 
19
19
  ### Create a new Auditor task.
20
- def initialize( queue )
20
+ def initialize( * )
21
21
  super
22
22
  @logdir = Pathname.pwd
23
23
  @logfile = @logdir + 'events.log'
@@ -40,13 +40,13 @@ class OneshotSimulator < Symphony::Task
40
40
 
41
41
  val = Random.rand
42
42
  case
43
- when val < 0.33
43
+ when val < 0.1
44
44
  $stderr.puts "Simulating an error in the task (reject)."
45
45
  raise "OOOOOPS!"
46
- when val < 0.66
46
+ when val < 0.15
47
47
  $stderr.puts "Simulating a soft failure in the task (reject+requeue)."
48
48
  return false
49
- when val < 0.88
49
+ when val < 0.20
50
50
  $stderr.puts "Simulating a timeout case"
51
51
  sleep( self.class.timeout + 1 )
52
52
  else
@@ -15,6 +15,9 @@ class Simulator < Symphony::Task
15
15
  # Fetch 100 events at a time
16
16
  prefetch 10
17
17
 
18
+ # Keep the queue around even when the task isn't running
19
+ persistent true
20
+
18
21
  # Only allow 2 seconds for work to complete before rejecting or retrying.
19
22
  # timeout 2.0, action: :retry
20
23
 
@@ -36,22 +39,22 @@ class Simulator < Symphony::Task
36
39
 
37
40
  sleep rand( 0.0 .. 2.0 )
38
41
 
39
- # val = Random.rand
40
- # case
41
- # when val < 0.33
42
- # $stderr.puts "Simulating an error in the task (reject)."
43
- # raise "OOOOOPS!"
44
- # when val < 0.66
45
- # $stderr.puts "Simulating a soft failure in the task (reject+requeue)."
46
- # return false
47
- # when val < 0.88
48
- # $stderr.puts "Simulating a timeout case"
49
- # sleep( self.class.timeout + 1 )
50
- # else
51
- # $stderr.puts "Simulating a successful task run (accept)"
52
- # puts( payload.inspect )
53
- # return true
54
- # end
42
+ val = Random.rand
43
+ case
44
+ when val < 0.05
45
+ $stderr.puts "Simulating an error in the task (reject)."
46
+ raise "OOOOOPS!"
47
+ when val < 0.10
48
+ $stderr.puts "Simulating a soft failure in the task (reject+requeue)."
49
+ return false
50
+ when val < 0.15
51
+ $stderr.puts "Simulating a timeout case"
52
+ sleep( self.class.timeout + 1 ) if self.class.timeout
53
+ else
54
+ $stderr.puts "Simulating a successful task run (accept)"
55
+ puts( payload.inspect )
56
+ return true
57
+ end
55
58
 
56
59
  true
57
60
  end
@@ -90,7 +90,7 @@ describe Symphony::Daemon do
90
90
  it "adjusts its tasks when its config is reloaded" do
91
91
  config = Configurability.default_config
92
92
  config.symphony.tasks = [ 'test1', 'test2' ]
93
- # config.logging.__default__ = 'debug'
93
+ config.logging.__default__ = 'fatal'
94
94
  config.install
95
95
 
96
96
  allow( Symphony::Task ).to receive( :exit )
@@ -6,6 +6,8 @@ require 'symphony/task_group/longlived'
6
6
 
7
7
  describe Symphony::TaskGroup::LongLived do
8
8
 
9
+ FIRST_PID = 414
10
+
9
11
  let( :task ) do
10
12
  Class.new( Symphony::Task ) do
11
13
  extend Symphony::MethodUtilities
@@ -30,7 +32,7 @@ describe Symphony::TaskGroup::LongLived do
30
32
 
31
33
  let( :pid_generator ) do
32
34
  Enumerator.new do |generator|
33
- i = 414
35
+ i = FIRST_PID
34
36
  loop do
35
37
  generator.yield( i )
36
38
  i += rand( 3 ) + 1
@@ -38,9 +40,6 @@ describe Symphony::TaskGroup::LongLived do
38
40
  end
39
41
  end
40
42
 
41
- # not enough samples
42
- # trending up
43
-
44
43
 
45
44
 
46
45
  it "doesn't start anything if it's throttled" do
@@ -61,20 +60,12 @@ describe Symphony::TaskGroup::LongLived do
61
60
 
62
61
 
63
62
  it "starts an initial worker if it doesn't have any" do
64
- allow( Process ).to receive( :setpgid ).with( 414, 0 )
65
-
66
- channel = double( Bunny::Channel )
67
- queue = double( Bunny::Queue )
68
- expect( Symphony::Queue ).to receive( :amqp_channel ).
69
- and_return( channel )
70
- expect( channel ).to receive( :queue ).
71
- with( task.queue_name, passive: true, prefetch: 0 ).
72
- and_return( queue )
63
+ allow( Process ).to receive( :setpgid ).with( FIRST_PID, 0 )
73
64
 
74
65
  task_group.adjust_workers
75
66
 
76
- expect( task_group.started_one_worker? ).to be_truthy
77
- expect( task_group.pids ).to include( 414 )
67
+ expect( task_group.workers ).to_not be_empty
68
+ expect( task_group.workers ).to contain_exactly( FIRST_PID )
78
69
  end
79
70
 
80
71
 
@@ -93,20 +84,19 @@ describe Symphony::TaskGroup::LongLived do
93
84
  and_return( queue )
94
85
 
95
86
  expect( queue ).to receive( :consumer_count ) do
96
- task_group.pids.size
87
+ task_group.workers.size
97
88
  end.at_least( :once )
98
89
  expect( queue ).to receive( :message_count ).and_return( *samples )
99
90
 
100
91
  start = 1414002605
101
- start.upto( start + samples.size ) do |time|
92
+ start.upto( start + samples.size + 1 ) do |time|
102
93
  Timecop.freeze( time ) do
103
94
  task_group.adjust_workers
104
95
  end
105
96
  end
106
97
 
107
- expect( task_group.started_one_worker? ).to be_truthy
108
- expect( task_group.pids ).to include( 414 )
109
- expect( task_group.pids.length ).to eq( 2 )
98
+ expect( task_group.workers ).to include( FIRST_PID )
99
+ expect( task_group.workers.length ).to eq( 2 )
110
100
  end
111
101
 
112
102
 
@@ -125,18 +115,18 @@ describe Symphony::TaskGroup::LongLived do
125
115
  and_return( queue )
126
116
 
127
117
  expect( queue ).to receive( :consumer_count ) do
128
- task_group.pids.size
118
+ task_group.workers.size
129
119
  end.at_least( :once )
130
120
  expect( queue ).to receive( :message_count ).and_return( *samples )
131
121
 
132
122
  start = 1414002605
133
- start.upto( start + samples.size ) do |time|
123
+ start.upto( start + samples.size + 1 ) do |time|
134
124
  Timecop.freeze( time ) do
135
125
  task_group.adjust_workers
136
126
  end
137
127
  end
138
128
 
139
- expect( task_group.pids.size ).to eq( 2 )
129
+ expect( task_group.workers.size ).to eq( 2 )
140
130
  end
141
131
 
142
132
 
@@ -155,18 +145,18 @@ describe Symphony::TaskGroup::LongLived do
155
145
  and_return( queue )
156
146
 
157
147
  expect( queue ).to receive( :consumer_count ) do
158
- task_group.pids.size
148
+ task_group.workers.size
159
149
  end.at_least( :once )
160
150
  expect( queue ).to receive( :message_count ).and_return( *samples )
161
151
 
162
152
  start = 1414002605
163
- start.upto( start + samples.size ) do |time|
153
+ start.upto( start + samples.size + 1 ) do |time|
164
154
  Timecop.freeze( time ) do
165
155
  task_group.adjust_workers
166
156
  end
167
157
  end
168
158
 
169
- expect( task_group.pids.size ).to eq( 2 )
159
+ expect( task_group.workers.size ).to eq( 2 )
170
160
  end
171
161
 
172
162
 
@@ -188,13 +178,13 @@ describe Symphony::TaskGroup::LongLived do
188
178
  expect( queue ).to receive( :message_count ).and_return( *samples )
189
179
 
190
180
  start = 1414002605
191
- start.upto( start + samples.size ) do |time|
181
+ start.upto( start + samples.size + 1 ) do |time|
192
182
  Timecop.freeze( time ) do
193
183
  task_group.adjust_workers
194
184
  end
195
185
  end
196
186
 
197
- expect( task_group.pids.size ).to eq( 1 )
187
+ expect( task_group.workers.size ).to eq( 1 )
198
188
  end
199
189
 
200
190
 
@@ -213,19 +203,18 @@ describe Symphony::TaskGroup::LongLived do
213
203
  and_return( queue )
214
204
 
215
205
  expect( queue ).to receive( :consumer_count ) do
216
- task_group.pids.size
206
+ task_group.workers.size
217
207
  end.at_least( :once )
218
208
  expect( queue ).to receive( :message_count ).and_return( *samples )
219
209
 
220
210
  start = 1414002605
221
- start.upto( start + samples.size ) do |time|
211
+ start.upto( start + samples.size + 1 ) do |time|
222
212
  Timecop.freeze( time ) do
223
213
  task_group.adjust_workers
224
214
  end
225
215
  end
226
216
 
227
- expect( task_group.started_one_worker? ).to be_truthy
228
- expect( task_group.pids.size ).to eq( 1 )
217
+ expect( task_group.workers.size ).to eq( 1 )
229
218
  end
230
219
 
231
220
  end