updater 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -39,6 +39,20 @@ end
39
39
 
40
40
  Spec::Rake::SpecTask.new do |t|
41
41
  t.warning = false
42
+ t.rcov = false
43
+ end
44
+
45
+ Spec::Rake::SpecTask.new do |t|
46
+ t.name="failing"
47
+ #todo Make this run only failing specs
48
+ t.warning = false
49
+ t.rcov = false
50
+ end
51
+
52
+ Spec::Rake::SpecTask.new do |t|
53
+ t.name="rcov"
54
+ t.warning = false
55
+ t.rcov = true
42
56
  end
43
57
 
44
58
  desc "run all tests"
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.2
1
+ 0.3.0
@@ -0,0 +1,427 @@
1
+ require 'updater/util'
2
+ #The content of this file is based on code from Unicorn by
3
+
4
+ module Updater
5
+
6
+ #This class repeatedly searches the database for active jobs and runs them
7
+ class ForkWorker
8
+ class WorkerMonitor < Struct.new(:number, :heartbeat)
9
+
10
+ def ==(other_number)
11
+ self.number == other_number
12
+ end
13
+ end
14
+
15
+ #######
16
+ # BEGIN Class Methods
17
+ #######
18
+
19
+ class <<self
20
+ QUEUE_SIGS = [:QUIT, :INT, :TERM, :USR1, :USR2, :HUP,
21
+ :TTIN, :TTOU ]
22
+
23
+ attr_accessor :logger
24
+ attr_reader :timeout, :pipe
25
+
26
+ def initial_setup(options)
27
+ unless logger
28
+ require 'logger'
29
+ @logger = Logger.new(STDOUT)
30
+ @logger.level = Logger::WARN
31
+ end
32
+ logger.info "***Setting Up Master Process***"
33
+ @max_workers = options[:workers] || 3
34
+ logger.info "Max Workers set to #{@max_workers}"
35
+ @timeout = options[:timeout] || 60
36
+ logger.info "Timeout set to #{@timeout} sec."
37
+ @current_workers = 1
38
+ @workers = {} #key is pid value is worker class
39
+ @uptime = Time.now
40
+ @downtime = Time.now
41
+ # Used to wakeup master process
42
+ if @self_pipe !=nil
43
+ @self_pipe.each {|io| io.close}
44
+ end
45
+ @self_pipe = IO.pipe
46
+ @wakeup_set = [@self_pipe.first]
47
+ @wakeup_set += [options[:sockets]].flatten.compact
48
+
49
+ #Communicate with Workers
50
+ if @pipe != nil
51
+ @pipe.each {|io| io.close}
52
+ end
53
+ @pipe = IO.pipe
54
+
55
+ @signal_queue = []
56
+ end
57
+
58
+ def handle_signal_queue
59
+ logger.debug { "Handeling Signal Queue: queue first = #{@signal_queue.first}" }
60
+ case @signal_queue.shift
61
+ when nil #routeen maintance
62
+ logger.debug "Running Routeen Maintance"
63
+ murder_lazy_workers
64
+ antisipate_workload
65
+ maintain_worker_count
66
+ master_sleep
67
+ true
68
+ when :QUIT, :INT
69
+ stop(true)
70
+ false
71
+ when :TERM
72
+ stop(false)
73
+ false
74
+ when :USR2, :DATA #wake up a child and get to work
75
+ @pipe.last.write_nonblock('.')
76
+ true
77
+ when :TTIN
78
+ @max_workers += 1
79
+ logger.warn "Maximum workers: #{@max_workers}"
80
+ when :TTOU
81
+ (@max_workers -= 1) < 1 and @max_workers = 1
82
+ logger.warn "Maximum workers: #{@max_workers}"
83
+ true
84
+ else
85
+ :noop
86
+ end
87
+ end
88
+
89
+ # Options:
90
+ # * :workers : the maximum number of worker threads
91
+ # * :timeout : how long can a worker be inactive before being killed
92
+ # * :sockets: 0 or more IO objects that should wake up master to alert it that new data is availible
93
+
94
+ def start(stream,options = {})
95
+ logger.info "=== ForkWorker Start ==="
96
+ logger.info " Pid = #{Process.pid}"
97
+ initial_setup(options) #need this for logger
98
+ logger.info "*** Starting Master Process***"
99
+ @stream = stream
100
+ logger.info "* Adding the first round of workers *"
101
+ maintain_worker_count
102
+ QUEUE_SIGS.each { |sig| trap_deferred(sig) }
103
+ trap(:CHLD) { |sig_nr| awaken_master }
104
+ logger.info "** Signal Traps Ready **"
105
+ logger.info "** master process ready **"
106
+ begin
107
+ error_count = 0
108
+ continue = true
109
+ while continue do
110
+ logger.debug "Master Process Awake"
111
+ reap_all_workers
112
+ continue = handle_signal_queue
113
+ error_count = 0
114
+ end
115
+ rescue Errno::EINTR
116
+ retry
117
+ rescue Object => e
118
+ logger.error "Unhandled master loop exception #{e.inspect}. (#{error_count})"
119
+ logger.error e.backtrace.join("\n")
120
+ error_count += 1
121
+ sleep 10 and retry unless error_count > 10
122
+ logger.fatal "10 consecutive errors! Abandoning Master process"
123
+ end
124
+ stop # gracefully shutdown all workers on our way out
125
+ logger.info "master process Exiting"
126
+ end
127
+
128
+ def stop(graceful = true)
129
+ trap(:USR2,"IGNORE")
130
+ [:INT,:TERM].each {|signal| trap(signal,"DEFAULT") }
131
+ puts "Quitting. I need 30 seconds to stop my workers..."
132
+ limit = Time.now + 30
133
+ signal_each_worker(graceful ? :QUIT : :TERM)
134
+ until @workers.empty? || Time.now > limit
135
+ sleep(0.1)
136
+ reap_all_workers
137
+ end
138
+ signal_each_worker(:KILL)
139
+ end
140
+
141
+ def master_sleep
142
+ begin
143
+ timeout = calc_timeout
144
+ logger.debug { "Sleeping for #{timeout}" } #TODO return to debug
145
+ ready, _1, _2 = IO.select(@wakeup_set, nil, nil, timeout)
146
+ return unless ready && ready.first #just wakeup and run maintance
147
+ @signal_queue << :DATA unless ready.first == @self_pipe.first #somebody wants our attention
148
+ loop {ready.first.read_nonblock(16 * 1024)}
149
+ rescue Errno::EAGAIN, Errno::EINTR
150
+ end
151
+ end
152
+
153
+ def calc_timeout
154
+ Time.now - [@uptime, @downtime].max < @timeout ? @timeout / 8 : 2*@timeout
155
+ end
156
+
157
+ def awaken_master
158
+ begin
159
+ @self_pipe.last.write_nonblock('.') # wakeup master process from select
160
+ rescue Errno::EAGAIN, Errno::EINTR
161
+ # pipe is full, master should wake up anyways
162
+ retry
163
+ end
164
+ end
165
+
166
+ def queue_signal(signal)
167
+ if @signal_queue.size < 7
168
+ @signal_queue << signal
169
+ awaken_master
170
+ else
171
+ logger.error "ignoring SIG#{signal}, queue=#{@signal_queue.inspect}"
172
+ end
173
+ end
174
+
175
+ def trap_deferred(signal)
176
+ trap(signal) do |sig|
177
+ queue_signal(signal)
178
+ end
179
+ end
180
+
181
+ # this method determins how many workers should exist based on the known future load
182
+ # and sets @current_workers accordingly
183
+ def antisipate_workload
184
+ load = Update.load
185
+ antisipated = Update.future(2*@timeout)
186
+ if (load > @current_workers &&
187
+ @current_workers < @max_workers &&
188
+ (Time.now - (@downtime || 0)).to_i > 5 &&
189
+ (Time.now-(@uptime||0.0)).to_i > 1)
190
+ @current_workers += 1
191
+ @uptime = Time.now
192
+ end
193
+
194
+ if (load + antisipated + 1 < @current_workers &&
195
+ (Time.now-(@uptime||0.0)).to_i > 60 &&
196
+ (Time.now - (@downtime || 0)).to_i > 5)
197
+ @current_workers -= 1
198
+ @downtime = Time.now
199
+ end
200
+
201
+ if @current_workers > @max_workers
202
+ @current_workers = @max_workers
203
+ end
204
+ end
205
+
206
+ def maintain_worker_count
207
+ (off = @workers.size - @current_workers) == 0 and return
208
+ off < 0 and return spawn_missing_workers
209
+ @workers.dup.each_pair { |wpid,w|
210
+ w.number >= @current_workers and signal_worker(:QUIT, wpid) rescue nil
211
+ }
212
+ end
213
+
214
+ def spawn_missing_workers
215
+ (0...@current_workers).each do |worker_number|
216
+ @workers.values.include?(worker_number) and next
217
+ add_worker(worker_number)
218
+ end
219
+ end
220
+
221
+ def add_worker(worker_number)
222
+ worker = WorkerMonitor.new(worker_number,Updater::Util.tempio)
223
+ pid = Process.fork do
224
+ fork_cleanup
225
+ self.new(@pipe,worker).run
226
+ end
227
+ @workers[pid] = worker
228
+ logger.info "Added Worker #{worker.number}: pid=>#{pid}"
229
+ end
230
+
231
+ def fork_cleanup
232
+ QUEUE_SIGS.each { |signal| trap(signal,"IGNORE") }
233
+ if @self_pipe !=nil
234
+ @self_pipe.each {|io| io.close}
235
+ end
236
+ @workers = nil
237
+ @worker_set = nil
238
+ @signal_queue = nil
239
+ end
240
+
241
+ def signal_each_worker(signal)
242
+ @workers.keys.each { |wpid| signal_worker(signal, wpid)}
243
+ end
244
+
245
+ def signal_worker(signal, wpid)
246
+ Process.kill(signal,wpid)
247
+ rescue Errno::ESRCH
248
+ remove_worker(wpid)
249
+ end
250
+
251
+ def murder_lazy_workers
252
+ diff = stat = nil
253
+ @workers.dup.each_pair do |wpid, worker|
254
+ stat = begin
255
+ worker.heartbeat.stat
256
+ rescue => e
257
+ logger.warn "worker=#{worker.number} PID:#{wpid} stat error: #{e.inspect}"
258
+ signal_worker(:QUIT, wpid)
259
+ next
260
+ end
261
+ (diff = (Time.now - stat.ctime)) <= @timeout and next
262
+ logger.error "worker=#{worker.number} PID:#{wpid} timeout " \
263
+ "(#{diff}s > #{@timeout}s), killing"
264
+ signal_worker(:KILL, wpid) # take no prisoners for timeout violations
265
+ end
266
+ end
267
+
268
+ def remove_worker(wpid)
269
+ worker = @workers.delete(wpid) and worker.heartbeat.close rescue nil
270
+ logger.debug { "removing dead worker #{worker.number}" }
271
+ end
272
+
273
+ def reap_all_workers
274
+ loop do
275
+ wpid, status = Process.waitpid2(-1, Process::WNOHANG)
276
+ wpid or break
277
+ remove_worker(wpid)
278
+ end
279
+ rescue Errno::ECHILD
280
+ end
281
+
282
+ #A convinient method for testing. It builds a dummy workier without forking or regertering it.
283
+ def build
284
+ new(@pipe,WorkerMonitor.new(-1,Updater::Util.tempio))
285
+ end
286
+
287
+ end #class << self
288
+
289
+ #
290
+ #
291
+ ##################################################
292
+ # BEGIN Instacne methods
293
+ ##################################################
294
+ #
295
+ #
296
+ #
297
+
298
+ attr_accessor :logger
299
+ attr_reader :number
300
+
301
+ def initialize(pipe,worker)
302
+ @stream = pipe.first
303
+ @pipe = pipe #keep this so signals will wake things up
304
+ @heartbeat = worker.heartbeat
305
+ @number = worker.number
306
+ @timeout = self.class.timeout
307
+ @logger = self.class.logger
308
+ @m = 0 #uesd for heartbeat
309
+ end
310
+
311
+ #loop "forever" working off jobs from the queue
312
+ def run
313
+ @continue = true
314
+ heartbeat
315
+ trap(:QUIT) do
316
+ say "#{name} caught QUIT signal. Dieing gracefully"
317
+ @continue = false
318
+ @pipe.last.write '.'
319
+ trap(:QUIT,"IGNORE")
320
+ end
321
+ trap(:TERM) { Update.clear_locks(self); exit }
322
+ while @continue do
323
+ heartbeat
324
+ begin
325
+ delay = Update.work_off(self)
326
+ heartbeat
327
+ wait_for(delay) if @continue
328
+ rescue Exception=> e
329
+ say "Caught exception in Job Loop"
330
+ say e.message
331
+ say "||=========\n|| Backtrace\n|| " + e.backtrace.join("\n|| ") + "\n||========="
332
+ Update.clear_locks(self)
333
+ exit; #die and be replaced by the master process
334
+ end
335
+ end
336
+ Update.clear_locks(self)
337
+ end
338
+
339
+ def say(text)
340
+ puts text unless @quiet || logger
341
+ logger.info text if logger
342
+ end
343
+
344
+ #we need this because logger may be set to nil
345
+ def debug(text = nil)
346
+ text = yield if block_given? && logger && logger.level == 0
347
+ logger.debug text if logger
348
+ end
349
+
350
+ def name
351
+ "Fork Worker #{@number}"
352
+ end
353
+
354
+ # Let's Talk. This function was refactored out of #run because it is the most complex piece of functionality
355
+ # in the loop and needed to be tested. #run is difficult to test because it never returns. There is a great
356
+ # deal of straitagity here. This function ultimate job is to suspend the worker process for as long as possible.
357
+ # In doing so it saves the system resources. Waiting too long will cause catistrophic, cascading failure under
358
+ # even moderate load, while not waiting long enough will waist system resources under light load, reducing
359
+ # the ability to use the system for other things.
360
+ #
361
+ # There are a number of factors that determin the amount of time to wait. The simplest is this: if there are
362
+ # still jobs in the queue that can be run then this function needs to be as close to a NOOP as possible. Every
363
+ # delay is inviting more jobs to pile up before they can be run. The Job running code returns the number of
364
+ # seconds until the next job is availible. When it retruns 0 the system is under active load and jobs need to
365
+ # be worked without delay.
366
+ #
367
+ # On the other hand when the next job is some non-negitive number of seconds away the ideal behavior
368
+ # would be to wait until it is ready then run the next job the wake and run it. There are two difficulties here
369
+ # the first is the need to let the master process know that the worker is alive and has not hung. We use a
370
+ # heartbeat file discriptor which we periodically change ctimes on by changing its access mode. This is
371
+ # modeled the technique used in the Unicorn web server. Our difficult is that we must be prepaired for a
372
+ # much less consistant load then a web server. Within a single application there may be periods where jobs
373
+ # pile up and others where there is a compleatly empty queue for hours or days. There is also the issue of
374
+ # how long a job may take to run. Jobs should generally be kept on the order of +timeout+ seconds.
375
+ # a Job that is likely to significantly exceed that will need to be broken up into smaller pieces. This
376
+ # function on the other hand deals with no jobs being present. It must wake up the worker every timeout
377
+ # seconds inorder to exicute +heartbeat+ and keep it's self from being killed.
378
+ #
379
+ # The other consideration is a new job coming in while all workers are asleep. When this happens, the
380
+ # Master process will write to the shared pipe and one of the workers will be awoken by the system. To
381
+ # minimize the number of queue hits, it is necessary to try to remove a char representing a new job from
382
+ # the pipe every time one is present. The +smoke_pipe+ method handles this by attempting to remove a
383
+ # charactor from the pipe when it is called.
384
+ def wait_for(delay)
385
+ return unless @continue
386
+ delay ||= 356*24*60*60 #delay will be nil if there are no jobs. Wait a really long time in that case.
387
+ if delay <= 0 #more jobs are immidiatly availible
388
+ smoke_pipe(@stream)
389
+ return
390
+ end
391
+
392
+ #need to wait for another job
393
+ t = Time.now + delay
394
+ while Time.now < t && @continue
395
+ delay = [@timeout,t-Time.now].min
396
+ debug "No Jobs; #{name} sleeping for #{delay}: [#{@timeout},#{t - Time.now}].min"
397
+ wakeup,_1,_2 = select([@stream],nil,nil,delay)
398
+ heartbeat
399
+ if wakeup
400
+ return if smoke_pipe(wakeup.first)
401
+ end
402
+ end
403
+ end
404
+
405
+ # tries to pull a single charictor from the pipe (representing accepting one new job)
406
+ # returns true if it succeeds, false otherwise
407
+ def smoke_pipe(pipe)
408
+ debug { "#{name} smoking pipe (#{ts})" }
409
+ pipe.read_nonblock(1) #each char in the string represents a new job
410
+ debug { " done smoking (#{ts})" }
411
+ true
412
+ rescue Errno::EAGAIN, Errno::EINTR
413
+ false
414
+ end
415
+
416
+ def heartbeat
417
+ return unless @continue
418
+ debug "Heartbeat for worker #{name}"
419
+ @heartbeat.chmod(@m = 0 == @m ? 1 : 0)
420
+ end
421
+
422
+ def ts
423
+ Time.now.strftime("%H:%M:%S")
424
+ end
425
+ end
426
+
427
+ end
@@ -0,0 +1,172 @@
1
+ require "dm-core"
2
+ require "dm-types"
3
+
4
+ module Updater
5
+ module ORM
6
+ class DMChained
7
+ include ::DataMapper::Resource
8
+ storage_names[:default] = "update_chains"
9
+ property :id, Serial
10
+ end
11
+
12
+ class DataMapper
13
+
14
+ FINDER = :get
15
+ ID = :id
16
+
17
+ include ::DataMapper::Resource
18
+
19
+ storage_names[:default] = "updates"
20
+
21
+ property :id, Serial
22
+ property :time, Integer
23
+ property :target, Class
24
+ property :finder, String
25
+ property :finder_args, Yaml
26
+ property :method, String
27
+ property :method_args, Object, :lazy=>false
28
+ property :name, String
29
+ property :lock_name, String
30
+ property :persistant, Boolean
31
+
32
+ has n, :chains, :model=>'Updater::ORM::DMChained', :child_key=>[:caller_id]
33
+
34
+ #attempt to lock this record for the worker
35
+ def lock(worker)
36
+ return true if locked? && locked_by == worker.name
37
+ #all this to make sure the check and the lock are simultanious:
38
+ cnt = repository.update({properties[:lock_name]=>worker.name},self.class.all(:id=>self.id,:lock_name=>nil))
39
+ if 0 != cnt
40
+ @lock_name = worker.name
41
+ true
42
+ else
43
+ worker.say( "Worker #{worker.name} Failed to aquire lock on job #{id}" )
44
+ false
45
+ end
46
+ end
47
+
48
+ #def failure
49
+ #def failure=
50
+ #def success
51
+ #def success=
52
+ #def ensure
53
+ #def ensure=
54
+ %w{failure success ensure}.each do |mode|
55
+ define_method "#{mode}=" do |chain|
56
+ case chain
57
+ when self.class
58
+ chains.new(:target=>chain,:occasion=>mode)
59
+ when Updater::Update
60
+ chains.new(:target=>chain.orm,:occasion=>mode)
61
+ when Hash
62
+ chain.each do |target, params|
63
+ target = target.orm if target.kind_of? Updater::Update
64
+ chains.new(:target=>target,:params=>params, :occasion=>mode)
65
+ end
66
+ when Array
67
+ chain.each do |target|
68
+ target = target.orm if target.kind_of? Updater::Update
69
+ chains.new(:target=>target,:occasion=>mode)
70
+ end
71
+ end
72
+ end
73
+
74
+ define_method mode do
75
+ chains.all(:occasion=>mode)
76
+ end
77
+ end
78
+
79
+ #Useful, but not in API
80
+ def locked?
81
+ not @lock_name.nil?
82
+ end
83
+
84
+ #Useful, but not in API
85
+ def locked_by
86
+ @lock_name
87
+ end
88
+
89
+ class << self
90
+ def current
91
+ all(:time.lte=>tnow, :lock_name=>nil)
92
+ end
93
+
94
+ def current_load;current.count;end
95
+
96
+ def delayed
97
+ all(:time.gt=>tnow).count
98
+ end
99
+
100
+ def future(start, finish)
101
+ all(:time.gt=>start+tnow,:time.lt=>finish+tnow).count
102
+ end
103
+
104
+ def queue_time
105
+ nxt = self.first(:time.not=>nil,:lock_name=>nil, :order=>[:time.asc])
106
+ return nil unless nxt
107
+ return 0 if nxt.time <= tnow
108
+ return nxt.time - tnow
109
+ end
110
+
111
+ def lock_next(worker)
112
+ updates = worker_set
113
+ unless updates.empty?
114
+ #concept copied form delayed_job. If there are a number of
115
+ #different processes working on the queue, the niave approch
116
+ #would result in every instance trying to lock the same record.
117
+ #by shuffleing our results we greatly reduce the chances that
118
+ #multilpe workers try to lock the same process
119
+ updates = updates.to_a.sort_by{rand()}
120
+ updates.each do |u|
121
+ return u if u.lock(worker)
122
+ end
123
+ end
124
+ rescue DataObjects::ConnectionError
125
+ sleep 0.1
126
+ retry
127
+ end
128
+
129
+ def clear_locks(worker)
130
+ all(:lock_name=>worker.name).update(:lock_name=>nil)
131
+ end
132
+
133
+ def clear_all
134
+ all.destroy!
135
+ DMChained.all.destroy!
136
+ end
137
+
138
+ def for(mytarget, myfinder, myfinder_args, myname)
139
+ #TODO
140
+ end
141
+
142
+ private
143
+ #This returns a set of update requests.
144
+ #The first parameter is the maximum number to return (get a few other workers may be in compitition)
145
+ #The second optional parameter is a list of options to be past to DataMapper.
146
+ def worker_set(limit = 5, options={})
147
+ #TODO: add priority to this.
148
+ options = {:lock_name=>nil,:limit=>limit, :order=>[:time.asc]}.merge(options)
149
+ current.all(options)
150
+ end
151
+
152
+ def lock
153
+
154
+ end
155
+
156
+ def tnow
157
+ Updater::Update.time.now.to_i
158
+ end
159
+
160
+ end
161
+ end
162
+
163
+ class DMChained
164
+ belongs_to :caller, :model=>Updater::ORM::DataMapper, :child_key=>[:caller_id]
165
+ belongs_to :target, :model=>Updater::ORM::DataMapper, :child_key=>[:target_id]
166
+
167
+ property :params, Yaml, :nullable=>true
168
+ property :occasion, String, :nullable=>false
169
+ end
170
+
171
+ end#ORM
172
+ end#Updater
@@ -6,7 +6,7 @@ require 'benchmark'
6
6
  module Updater
7
7
 
8
8
  #This class repeatedly searches the database for active jobs and runs them
9
- class Worker
9
+ class ThreadWorker
10
10
  cattr_accessor :logger
11
11
  attr_accessor :pid
12
12
  attr_accessor :name
@@ -39,11 +39,7 @@ module Updater
39
39
  puts text unless @quiet
40
40
  logger.info text if logger
41
41
  end
42
-
43
- def clear_locks
44
- Update.all(:lock_name=>@name).update(:lock_name=>nil)
45
- end
46
-
42
+
47
43
  def stop
48
44
  raise RuntimeError unless @t
49
45
  terminate_with @t
@@ -79,7 +75,7 @@ module Updater
79
75
  end
80
76
  end
81
77
  say "Worker thread exiting!"
82
- clear_locks
78
+ Update.clear_locks(self)
83
79
  end
84
80
  end
85
81
 
@@ -88,7 +84,7 @@ module Updater
88
84
  $exit = true
89
85
  t.run if t.alive?
90
86
  say "Forcing Shutdown" unless status = t.join(15) #Nasty inline assignment
91
- clear_locks
87
+ Update.clear_locks(self)
92
88
  exit status ? 0 : 1
93
89
  end
94
90
  end