updater 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -39,6 +39,20 @@ end
39
39
 
40
40
  Spec::Rake::SpecTask.new do |t|
41
41
  t.warning = false
42
+ t.rcov = false
43
+ end
44
+
45
+ Spec::Rake::SpecTask.new do |t|
46
+ t.name="failing"
47
+ #todo Make this run only failing specs
48
+ t.warning = false
49
+ t.rcov = false
50
+ end
51
+
52
+ Spec::Rake::SpecTask.new do |t|
53
+ t.name="rcov"
54
+ t.warning = false
55
+ t.rcov = true
42
56
  end
43
57
 
44
58
  desc "run all tests"
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.2
1
+ 0.3.0
@@ -0,0 +1,427 @@
1
+ require 'updater/util'
2
+ #The content of this file is based on code from Unicorn by
3
+
4
+ module Updater
5
+
6
+ #This class repeatedly searches the database for active jobs and runs them
7
+ class ForkWorker
8
+ class WorkerMonitor < Struct.new(:number, :heartbeat)
9
+
10
+ def ==(other_number)
11
+ self.number == other_number
12
+ end
13
+ end
14
+
15
+ #######
16
+ # BEGIN Class Methods
17
+ #######
18
+
19
+ class <<self
20
+ QUEUE_SIGS = [:QUIT, :INT, :TERM, :USR1, :USR2, :HUP,
21
+ :TTIN, :TTOU ]
22
+
23
+ attr_accessor :logger
24
+ attr_reader :timeout, :pipe
25
+
26
+ def initial_setup(options)
27
+ unless logger
28
+ require 'logger'
29
+ @logger = Logger.new(STDOUT)
30
+ @logger.level = Logger::WARN
31
+ end
32
+ logger.info "***Setting Up Master Process***"
33
+ @max_workers = options[:workers] || 3
34
+ logger.info "Max Workers set to #{@max_workers}"
35
+ @timeout = options[:timeout] || 60
36
+ logger.info "Timeout set to #{@timeout} sec."
37
+ @current_workers = 1
38
+ @workers = {} #key is pid value is worker class
39
+ @uptime = Time.now
40
+ @downtime = Time.now
41
+ # Used to wakeup master process
42
+ if @self_pipe !=nil
43
+ @self_pipe.each {|io| io.close}
44
+ end
45
+ @self_pipe = IO.pipe
46
+ @wakeup_set = [@self_pipe.first]
47
+ @wakeup_set += [options[:sockets]].flatten.compact
48
+
49
+ #Communicate with Workers
50
+ if @pipe != nil
51
+ @pipe.each {|io| io.close}
52
+ end
53
+ @pipe = IO.pipe
54
+
55
+ @signal_queue = []
56
+ end
57
+
58
+ def handle_signal_queue
59
+ logger.debug { "Handeling Signal Queue: queue first = #{@signal_queue.first}" }
60
+ case @signal_queue.shift
61
+ when nil #routeen maintance
62
+ logger.debug "Running Routeen Maintance"
63
+ murder_lazy_workers
64
+ antisipate_workload
65
+ maintain_worker_count
66
+ master_sleep
67
+ true
68
+ when :QUIT, :INT
69
+ stop(true)
70
+ false
71
+ when :TERM
72
+ stop(false)
73
+ false
74
+ when :USR2, :DATA #wake up a child and get to work
75
+ @pipe.last.write_nonblock('.')
76
+ true
77
+ when :TTIN
78
+ @max_workers += 1
79
+ logger.warn "Maximum workers: #{@max_workers}"
80
+ when :TTOU
81
+ (@max_workers -= 1) < 1 and @max_workers = 1
82
+ logger.warn "Maximum workers: #{@max_workers}"
83
+ true
84
+ else
85
+ :noop
86
+ end
87
+ end
88
+
89
+ # Options:
90
+ # * :workers : the maximum number of worker threads
91
+ # * :timeout : how long can a worker be inactive before being killed
92
+ # * :sockets: 0 or more IO objects that should wake up master to alert it that new data is availible
93
+
94
+ def start(stream,options = {})
95
+ logger.info "=== ForkWorker Start ==="
96
+ logger.info " Pid = #{Process.pid}"
97
+ initial_setup(options) #need this for logger
98
+ logger.info "*** Starting Master Process***"
99
+ @stream = stream
100
+ logger.info "* Adding the first round of workers *"
101
+ maintain_worker_count
102
+ QUEUE_SIGS.each { |sig| trap_deferred(sig) }
103
+ trap(:CHLD) { |sig_nr| awaken_master }
104
+ logger.info "** Signal Traps Ready **"
105
+ logger.info "** master process ready **"
106
+ begin
107
+ error_count = 0
108
+ continue = true
109
+ while continue do
110
+ logger.debug "Master Process Awake"
111
+ reap_all_workers
112
+ continue = handle_signal_queue
113
+ error_count = 0
114
+ end
115
+ rescue Errno::EINTR
116
+ retry
117
+ rescue Object => e
118
+ logger.error "Unhandled master loop exception #{e.inspect}. (#{error_count})"
119
+ logger.error e.backtrace.join("\n")
120
+ error_count += 1
121
+ sleep 10 and retry unless error_count > 10
122
+ logger.fatal "10 consecutive errors! Abandoning Master process"
123
+ end
124
+ stop # gracefully shutdown all workers on our way out
125
+ logger.info "master process Exiting"
126
+ end
127
+
128
+ def stop(graceful = true)
129
+ trap(:USR2,"IGNORE")
130
+ [:INT,:TERM].each {|signal| trap(signal,"DEFAULT") }
131
+ puts "Quitting. I need 30 seconds to stop my workers..."
132
+ limit = Time.now + 30
133
+ signal_each_worker(graceful ? :QUIT : :TERM)
134
+ until @workers.empty? || Time.now > limit
135
+ sleep(0.1)
136
+ reap_all_workers
137
+ end
138
+ signal_each_worker(:KILL)
139
+ end
140
+
141
+ def master_sleep
142
+ begin
143
+ timeout = calc_timeout
144
+ logger.debug { "Sleeping for #{timeout}" } #TODO return to debug
145
+ ready, _1, _2 = IO.select(@wakeup_set, nil, nil, timeout)
146
+ return unless ready && ready.first #just wakeup and run maintance
147
+ @signal_queue << :DATA unless ready.first == @self_pipe.first #somebody wants our attention
148
+ loop {ready.first.read_nonblock(16 * 1024)}
149
+ rescue Errno::EAGAIN, Errno::EINTR
150
+ end
151
+ end
152
+
153
+ def calc_timeout
154
+ Time.now - [@uptime, @downtime].max < @timeout ? @timeout / 8 : 2*@timeout
155
+ end
156
+
157
+ def awaken_master
158
+ begin
159
+ @self_pipe.last.write_nonblock('.') # wakeup master process from select
160
+ rescue Errno::EAGAIN, Errno::EINTR
161
+ # pipe is full, master should wake up anyways
162
+ retry
163
+ end
164
+ end
165
+
166
+ def queue_signal(signal)
167
+ if @signal_queue.size < 7
168
+ @signal_queue << signal
169
+ awaken_master
170
+ else
171
+ logger.error "ignoring SIG#{signal}, queue=#{@signal_queue.inspect}"
172
+ end
173
+ end
174
+
175
+ def trap_deferred(signal)
176
+ trap(signal) do |sig|
177
+ queue_signal(signal)
178
+ end
179
+ end
180
+
181
+ # this method determins how many workers should exist based on the known future load
182
+ # and sets @current_workers accordingly
183
+ def antisipate_workload
184
+ load = Update.load
185
+ antisipated = Update.future(2*@timeout)
186
+ if (load > @current_workers &&
187
+ @current_workers < @max_workers &&
188
+ (Time.now - (@downtime || 0)).to_i > 5 &&
189
+ (Time.now-(@uptime||0.0)).to_i > 1)
190
+ @current_workers += 1
191
+ @uptime = Time.now
192
+ end
193
+
194
+ if (load + antisipated + 1 < @current_workers &&
195
+ (Time.now-(@uptime||0.0)).to_i > 60 &&
196
+ (Time.now - (@downtime || 0)).to_i > 5)
197
+ @current_workers -= 1
198
+ @downtime = Time.now
199
+ end
200
+
201
+ if @current_workers > @max_workers
202
+ @current_workers = @max_workers
203
+ end
204
+ end
205
+
206
+ def maintain_worker_count
207
+ (off = @workers.size - @current_workers) == 0 and return
208
+ off < 0 and return spawn_missing_workers
209
+ @workers.dup.each_pair { |wpid,w|
210
+ w.number >= @current_workers and signal_worker(:QUIT, wpid) rescue nil
211
+ }
212
+ end
213
+
214
+ def spawn_missing_workers
215
+ (0...@current_workers).each do |worker_number|
216
+ @workers.values.include?(worker_number) and next
217
+ add_worker(worker_number)
218
+ end
219
+ end
220
+
221
+ def add_worker(worker_number)
222
+ worker = WorkerMonitor.new(worker_number,Updater::Util.tempio)
223
+ pid = Process.fork do
224
+ fork_cleanup
225
+ self.new(@pipe,worker).run
226
+ end
227
+ @workers[pid] = worker
228
+ logger.info "Added Worker #{worker.number}: pid=>#{pid}"
229
+ end
230
+
231
+ def fork_cleanup
232
+ QUEUE_SIGS.each { |signal| trap(signal,"IGNORE") }
233
+ if @self_pipe !=nil
234
+ @self_pipe.each {|io| io.close}
235
+ end
236
+ @workers = nil
237
+ @worker_set = nil
238
+ @signal_queue = nil
239
+ end
240
+
241
+ def signal_each_worker(signal)
242
+ @workers.keys.each { |wpid| signal_worker(signal, wpid)}
243
+ end
244
+
245
+ def signal_worker(signal, wpid)
246
+ Process.kill(signal,wpid)
247
+ rescue Errno::ESRCH
248
+ remove_worker(wpid)
249
+ end
250
+
251
+ def murder_lazy_workers
252
+ diff = stat = nil
253
+ @workers.dup.each_pair do |wpid, worker|
254
+ stat = begin
255
+ worker.heartbeat.stat
256
+ rescue => e
257
+ logger.warn "worker=#{worker.number} PID:#{wpid} stat error: #{e.inspect}"
258
+ signal_worker(:QUIT, wpid)
259
+ next
260
+ end
261
+ (diff = (Time.now - stat.ctime)) <= @timeout and next
262
+ logger.error "worker=#{worker.number} PID:#{wpid} timeout " \
263
+ "(#{diff}s > #{@timeout}s), killing"
264
+ signal_worker(:KILL, wpid) # take no prisoners for timeout violations
265
+ end
266
+ end
267
+
268
+ def remove_worker(wpid)
269
+ worker = @workers.delete(wpid) and worker.heartbeat.close rescue nil
270
+ logger.debug { "removing dead worker #{worker.number}" }
271
+ end
272
+
273
+ def reap_all_workers
274
+ loop do
275
+ wpid, status = Process.waitpid2(-1, Process::WNOHANG)
276
+ wpid or break
277
+ remove_worker(wpid)
278
+ end
279
+ rescue Errno::ECHILD
280
+ end
281
+
282
+ #A convinient method for testing. It builds a dummy workier without forking or regertering it.
283
+ def build
284
+ new(@pipe,WorkerMonitor.new(-1,Updater::Util.tempio))
285
+ end
286
+
287
+ end #class << self
288
+
289
+ #
290
+ #
291
+ ##################################################
292
+ # BEGIN Instacne methods
293
+ ##################################################
294
+ #
295
+ #
296
+ #
297
+
298
+ attr_accessor :logger
299
+ attr_reader :number
300
+
301
+ def initialize(pipe,worker)
302
+ @stream = pipe.first
303
+ @pipe = pipe #keep this so signals will wake things up
304
+ @heartbeat = worker.heartbeat
305
+ @number = worker.number
306
+ @timeout = self.class.timeout
307
+ @logger = self.class.logger
308
+ @m = 0 #uesd for heartbeat
309
+ end
310
+
311
+ #loop "forever" working off jobs from the queue
312
+ def run
313
+ @continue = true
314
+ heartbeat
315
+ trap(:QUIT) do
316
+ say "#{name} caught QUIT signal. Dieing gracefully"
317
+ @continue = false
318
+ @pipe.last.write '.'
319
+ trap(:QUIT,"IGNORE")
320
+ end
321
+ trap(:TERM) { Update.clear_locks(self); exit }
322
+ while @continue do
323
+ heartbeat
324
+ begin
325
+ delay = Update.work_off(self)
326
+ heartbeat
327
+ wait_for(delay) if @continue
328
+ rescue Exception=> e
329
+ say "Caught exception in Job Loop"
330
+ say e.message
331
+ say "||=========\n|| Backtrace\n|| " + e.backtrace.join("\n|| ") + "\n||========="
332
+ Update.clear_locks(self)
333
+ exit; #die and be replaced by the master process
334
+ end
335
+ end
336
+ Update.clear_locks(self)
337
+ end
338
+
339
+ def say(text)
340
+ puts text unless @quiet || logger
341
+ logger.info text if logger
342
+ end
343
+
344
+ #we need this because logger may be set to nil
345
+ def debug(text = nil)
346
+ text = yield if block_given? && logger && logger.level == 0
347
+ logger.debug text if logger
348
+ end
349
+
350
+ def name
351
+ "Fork Worker #{@number}"
352
+ end
353
+
354
+ # Let's Talk. This function was refactored out of #run because it is the most complex piece of functionality
355
+ # in the loop and needed to be tested. #run is difficult to test because it never returns. There is a great
356
+ # deal of straitagity here. This function ultimate job is to suspend the worker process for as long as possible.
357
+ # In doing so it saves the system resources. Waiting too long will cause catistrophic, cascading failure under
358
+ # even moderate load, while not waiting long enough will waist system resources under light load, reducing
359
+ # the ability to use the system for other things.
360
+ #
361
+ # There are a number of factors that determin the amount of time to wait. The simplest is this: if there are
362
+ # still jobs in the queue that can be run then this function needs to be as close to a NOOP as possible. Every
363
+ # delay is inviting more jobs to pile up before they can be run. The Job running code returns the number of
364
+ # seconds until the next job is availible. When it retruns 0 the system is under active load and jobs need to
365
+ # be worked without delay.
366
+ #
367
+ # On the other hand when the next job is some non-negitive number of seconds away the ideal behavior
368
+ # would be to wait until it is ready then run the next job the wake and run it. There are two difficulties here
369
+ # the first is the need to let the master process know that the worker is alive and has not hung. We use a
370
+ # heartbeat file discriptor which we periodically change ctimes on by changing its access mode. This is
371
+ # modeled the technique used in the Unicorn web server. Our difficult is that we must be prepaired for a
372
+ # much less consistant load then a web server. Within a single application there may be periods where jobs
373
+ # pile up and others where there is a compleatly empty queue for hours or days. There is also the issue of
374
+ # how long a job may take to run. Jobs should generally be kept on the order of +timeout+ seconds.
375
+ # a Job that is likely to significantly exceed that will need to be broken up into smaller pieces. This
376
+ # function on the other hand deals with no jobs being present. It must wake up the worker every timeout
377
+ # seconds inorder to exicute +heartbeat+ and keep it's self from being killed.
378
+ #
379
+ # The other consideration is a new job coming in while all workers are asleep. When this happens, the
380
+ # Master process will write to the shared pipe and one of the workers will be awoken by the system. To
381
+ # minimize the number of queue hits, it is necessary to try to remove a char representing a new job from
382
+ # the pipe every time one is present. The +smoke_pipe+ method handles this by attempting to remove a
383
+ # charactor from the pipe when it is called.
384
+ def wait_for(delay)
385
+ return unless @continue
386
+ delay ||= 356*24*60*60 #delay will be nil if there are no jobs. Wait a really long time in that case.
387
+ if delay <= 0 #more jobs are immidiatly availible
388
+ smoke_pipe(@stream)
389
+ return
390
+ end
391
+
392
+ #need to wait for another job
393
+ t = Time.now + delay
394
+ while Time.now < t && @continue
395
+ delay = [@timeout,t-Time.now].min
396
+ debug "No Jobs; #{name} sleeping for #{delay}: [#{@timeout},#{t - Time.now}].min"
397
+ wakeup,_1,_2 = select([@stream],nil,nil,delay)
398
+ heartbeat
399
+ if wakeup
400
+ return if smoke_pipe(wakeup.first)
401
+ end
402
+ end
403
+ end
404
+
405
+ # tries to pull a single charictor from the pipe (representing accepting one new job)
406
+ # returns true if it succeeds, false otherwise
407
+ def smoke_pipe(pipe)
408
+ debug { "#{name} smoking pipe (#{ts})" }
409
+ pipe.read_nonblock(1) #each char in the string represents a new job
410
+ debug { " done smoking (#{ts})" }
411
+ true
412
+ rescue Errno::EAGAIN, Errno::EINTR
413
+ false
414
+ end
415
+
416
+ def heartbeat
417
+ return unless @continue
418
+ debug "Heartbeat for worker #{name}"
419
+ @heartbeat.chmod(@m = 0 == @m ? 1 : 0)
420
+ end
421
+
422
+ def ts
423
+ Time.now.strftime("%H:%M:%S")
424
+ end
425
+ end
426
+
427
+ end
@@ -0,0 +1,172 @@
1
+ require "dm-core"
2
+ require "dm-types"
3
+
4
+ module Updater
5
+ module ORM
6
+ class DMChained
7
+ include ::DataMapper::Resource
8
+ storage_names[:default] = "update_chains"
9
+ property :id, Serial
10
+ end
11
+
12
+ class DataMapper
13
+
14
+ FINDER = :get
15
+ ID = :id
16
+
17
+ include ::DataMapper::Resource
18
+
19
+ storage_names[:default] = "updates"
20
+
21
+ property :id, Serial
22
+ property :time, Integer
23
+ property :target, Class
24
+ property :finder, String
25
+ property :finder_args, Yaml
26
+ property :method, String
27
+ property :method_args, Object, :lazy=>false
28
+ property :name, String
29
+ property :lock_name, String
30
+ property :persistant, Boolean
31
+
32
+ has n, :chains, :model=>'Updater::ORM::DMChained', :child_key=>[:caller_id]
33
+
34
+ #attempt to lock this record for the worker
35
+ def lock(worker)
36
+ return true if locked? && locked_by == worker.name
37
+ #all this to make sure the check and the lock are simultanious:
38
+ cnt = repository.update({properties[:lock_name]=>worker.name},self.class.all(:id=>self.id,:lock_name=>nil))
39
+ if 0 != cnt
40
+ @lock_name = worker.name
41
+ true
42
+ else
43
+ worker.say( "Worker #{worker.name} Failed to aquire lock on job #{id}" )
44
+ false
45
+ end
46
+ end
47
+
48
+ #def failure
49
+ #def failure=
50
+ #def success
51
+ #def success=
52
+ #def ensure
53
+ #def ensure=
54
+ %w{failure success ensure}.each do |mode|
55
+ define_method "#{mode}=" do |chain|
56
+ case chain
57
+ when self.class
58
+ chains.new(:target=>chain,:occasion=>mode)
59
+ when Updater::Update
60
+ chains.new(:target=>chain.orm,:occasion=>mode)
61
+ when Hash
62
+ chain.each do |target, params|
63
+ target = target.orm if target.kind_of? Updater::Update
64
+ chains.new(:target=>target,:params=>params, :occasion=>mode)
65
+ end
66
+ when Array
67
+ chain.each do |target|
68
+ target = target.orm if target.kind_of? Updater::Update
69
+ chains.new(:target=>target,:occasion=>mode)
70
+ end
71
+ end
72
+ end
73
+
74
+ define_method mode do
75
+ chains.all(:occasion=>mode)
76
+ end
77
+ end
78
+
79
+ #Useful, but not in API
80
+ def locked?
81
+ not @lock_name.nil?
82
+ end
83
+
84
+ #Useful, but not in API
85
+ def locked_by
86
+ @lock_name
87
+ end
88
+
89
+ class << self
90
+ def current
91
+ all(:time.lte=>tnow, :lock_name=>nil)
92
+ end
93
+
94
+ def current_load;current.count;end
95
+
96
+ def delayed
97
+ all(:time.gt=>tnow).count
98
+ end
99
+
100
+ def future(start, finish)
101
+ all(:time.gt=>start+tnow,:time.lt=>finish+tnow).count
102
+ end
103
+
104
+ def queue_time
105
+ nxt = self.first(:time.not=>nil,:lock_name=>nil, :order=>[:time.asc])
106
+ return nil unless nxt
107
+ return 0 if nxt.time <= tnow
108
+ return nxt.time - tnow
109
+ end
110
+
111
+ def lock_next(worker)
112
+ updates = worker_set
113
+ unless updates.empty?
114
+ #concept copied form delayed_job. If there are a number of
115
+ #different processes working on the queue, the niave approch
116
+ #would result in every instance trying to lock the same record.
117
+ #by shuffleing our results we greatly reduce the chances that
118
+ #multilpe workers try to lock the same process
119
+ updates = updates.to_a.sort_by{rand()}
120
+ updates.each do |u|
121
+ return u if u.lock(worker)
122
+ end
123
+ end
124
+ rescue DataObjects::ConnectionError
125
+ sleep 0.1
126
+ retry
127
+ end
128
+
129
+ def clear_locks(worker)
130
+ all(:lock_name=>worker.name).update(:lock_name=>nil)
131
+ end
132
+
133
+ def clear_all
134
+ all.destroy!
135
+ DMChained.all.destroy!
136
+ end
137
+
138
+ def for(mytarget, myfinder, myfinder_args, myname)
139
+ #TODO
140
+ end
141
+
142
+ private
143
+ #This returns a set of update requests.
144
+ #The first parameter is the maximum number to return (get a few other workers may be in compitition)
145
+ #The second optional parameter is a list of options to be past to DataMapper.
146
+ def worker_set(limit = 5, options={})
147
+ #TODO: add priority to this.
148
+ options = {:lock_name=>nil,:limit=>limit, :order=>[:time.asc]}.merge(options)
149
+ current.all(options)
150
+ end
151
+
152
+ def lock
153
+
154
+ end
155
+
156
+ def tnow
157
+ Updater::Update.time.now.to_i
158
+ end
159
+
160
+ end
161
+ end
162
+
163
+ class DMChained
164
+ belongs_to :caller, :model=>Updater::ORM::DataMapper, :child_key=>[:caller_id]
165
+ belongs_to :target, :model=>Updater::ORM::DataMapper, :child_key=>[:target_id]
166
+
167
+ property :params, Yaml, :nullable=>true
168
+ property :occasion, String, :nullable=>false
169
+ end
170
+
171
+ end#ORM
172
+ end#Updater
@@ -6,7 +6,7 @@ require 'benchmark'
6
6
  module Updater
7
7
 
8
8
  #This class repeatedly searches the database for active jobs and runs them
9
- class Worker
9
+ class ThreadWorker
10
10
  cattr_accessor :logger
11
11
  attr_accessor :pid
12
12
  attr_accessor :name
@@ -39,11 +39,7 @@ module Updater
39
39
  puts text unless @quiet
40
40
  logger.info text if logger
41
41
  end
42
-
43
- def clear_locks
44
- Update.all(:lock_name=>@name).update(:lock_name=>nil)
45
- end
46
-
42
+
47
43
  def stop
48
44
  raise RuntimeError unless @t
49
45
  terminate_with @t
@@ -79,7 +75,7 @@ module Updater
79
75
  end
80
76
  end
81
77
  say "Worker thread exiting!"
82
- clear_locks
78
+ Update.clear_locks(self)
83
79
  end
84
80
  end
85
81
 
@@ -88,7 +84,7 @@ module Updater
88
84
  $exit = true
89
85
  t.run if t.alive?
90
86
  say "Forcing Shutdown" unless status = t.join(15) #Nasty inline assignment
91
- clear_locks
87
+ Update.clear_locks(self)
92
88
  exit status ? 0 : 1
93
89
  end
94
90
  end