right_chimp 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGES +88 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +49 -0
- data/INSTALL +25 -0
- data/LICENSE +20 -0
- data/README +80 -0
- data/Rakefile +18 -0
- data/bin/chimp +12 -0
- data/bin/chimpd +12 -0
- data/chimp.gemspec +27 -0
- data/lib/right_chimp/Chimp.rb +1181 -0
- data/lib/right_chimp/IDManager.rb +18 -0
- data/lib/right_chimp/Log.rb +34 -0
- data/lib/right_chimp/daemon/ChimpDaemon.rb +416 -0
- data/lib/right_chimp/daemon/ChimpDaemonClient.rb +74 -0
- data/lib/right_chimp/exec/ExecArray.rb +43 -0
- data/lib/right_chimp/exec/ExecCallback.rb +15 -0
- data/lib/right_chimp/exec/ExecNoop.rb +9 -0
- data/lib/right_chimp/exec/ExecReport.rb +57 -0
- data/lib/right_chimp/exec/ExecRightScript.rb +35 -0
- data/lib/right_chimp/exec/ExecSSH.rb +45 -0
- data/lib/right_chimp/exec/Executor.rb +180 -0
- data/lib/right_chimp/queue/ChimpQueue.rb +187 -0
- data/lib/right_chimp/queue/ExecutionGroup.rb +277 -0
- data/lib/right_chimp/queue/QueueWorker.rb +42 -0
- data/lib/right_chimp/templates/all_jobs.erb +214 -0
- data/lib/right_chimp/version.rb +3 -0
- data/lib/right_chimp.rb +32 -0
- data/spec/spec_chimp.rb +20 -0
- data/spec/spec_chimp_commandline.rb +55 -0
- data/spec/spec_chimpd.rb +33 -0
- data/spec/spec_chimpd_client.rb +44 -0
- data/spec/spec_execution_group.rb +135 -0
- data/spec/spec_queue.rb +37 -0
- data/spec/spec_queue_worker.rb +30 -0
- data/spec/spec_selection.rb +33 -0
- metadata +159 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Run an SSH script
|
|
3
|
+
#
|
|
4
|
+
module Chimp
|
|
5
|
+
class ExecSSH < Executor
|
|
6
|
+
attr_accessor :ssh_user
|
|
7
|
+
|
|
8
|
+
def initialize(h={})
|
|
9
|
+
super(h)
|
|
10
|
+
@ssh_user = h[:ssh_user]
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def run
|
|
14
|
+
host = @server['ip_address'] || @server['ip-address'] || nil
|
|
15
|
+
@ssh_user ||= "root"
|
|
16
|
+
|
|
17
|
+
if host == nil
|
|
18
|
+
@server.settings
|
|
19
|
+
host = @server['ip_address'] || @server['ip-address']
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
run_with_retry do
|
|
23
|
+
Log.debug "ssh #{@ssh_user}@#{host} \"#{@exec}\""
|
|
24
|
+
success = system("ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no #{@ssh_user}@#{host} \"#{@exec}\"")
|
|
25
|
+
|
|
26
|
+
if not $?.success?
|
|
27
|
+
raise "SSH failed with status: #{$?}"
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def describe_work
|
|
33
|
+
return "ExecSSH job_id=#{@job_id} command=\"#{@exec}\" server=\"#{@server['nickname']}\""
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def info
|
|
37
|
+
return @exec.to_s
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def target
|
|
41
|
+
return @server['nickname']
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Superclass for Executors-- objects that run things on servers
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
module Chimp
|
|
6
|
+
class Executor
|
|
7
|
+
attr_accessor :server, :array, :exec, :inputs, :template, :owner, :group,
|
|
8
|
+
:job_id, :status, :dry_run, :verbose, :quiet, :timeout,
|
|
9
|
+
:retry_count, :retry_sleep, :time_start, :time_end
|
|
10
|
+
|
|
11
|
+
attr_reader :error, :results
|
|
12
|
+
|
|
13
|
+
STATUS_NONE = :none
|
|
14
|
+
STATUS_RUNNING = :running
|
|
15
|
+
STATUS_RETRYING = :retrying
|
|
16
|
+
STATUS_ERROR = :error
|
|
17
|
+
STATUS_DONE = :done
|
|
18
|
+
|
|
19
|
+
def initialize(h={})
|
|
20
|
+
@server = h[:server] || nil
|
|
21
|
+
@array = h[:array] || nil
|
|
22
|
+
@template = h[:template] || nil
|
|
23
|
+
|
|
24
|
+
@job_id = h[:job_id] || nil
|
|
25
|
+
@group = h[:group] || nil
|
|
26
|
+
@exec = h[:exec] || nil
|
|
27
|
+
@inputs = h[:inputs] || nil
|
|
28
|
+
|
|
29
|
+
@verbose = h[:verbose] || false
|
|
30
|
+
|
|
31
|
+
@retry_count = h[:retry_count].to_i || 0
|
|
32
|
+
@retry_sleep = h[:retry_sleep].to_i || 30
|
|
33
|
+
@timeout = h[:timeout].to_i || 3600
|
|
34
|
+
|
|
35
|
+
@error = nil
|
|
36
|
+
@status = STATUS_NONE
|
|
37
|
+
@owner = nil
|
|
38
|
+
@dry_run = false
|
|
39
|
+
@quiet = false
|
|
40
|
+
|
|
41
|
+
@time_start = nil
|
|
42
|
+
@time_end = nil
|
|
43
|
+
@results = nil
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
#
|
|
47
|
+
# Return total execution time (real) of a job
|
|
48
|
+
#
|
|
49
|
+
def get_total_exec_time
|
|
50
|
+
if @time_start == nil
|
|
51
|
+
return 0
|
|
52
|
+
elsif @time_end == nil
|
|
53
|
+
return Time.now.to_i - @time_start.to_i
|
|
54
|
+
else
|
|
55
|
+
return @time_end.to_i- @time_start.to_i
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
#
|
|
60
|
+
# Convenience method to requeue
|
|
61
|
+
#
|
|
62
|
+
def requeue
|
|
63
|
+
@group.requeue(self.job_id)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
#
|
|
67
|
+
# Convenience method to cancel
|
|
68
|
+
#
|
|
69
|
+
def cancel
|
|
70
|
+
@group.cancel(self.job_id)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def run
|
|
74
|
+
raise "run method must be overridden"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
#
|
|
78
|
+
# return info on what this executor does -- eg name of script or command
|
|
79
|
+
#
|
|
80
|
+
def info
|
|
81
|
+
raise "unimplemented"
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def target
|
|
85
|
+
return "UNKNOWN"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
protected
|
|
90
|
+
|
|
91
|
+
#
|
|
92
|
+
# Run a unit of work with retries
|
|
93
|
+
# This is called from the subclass with a code block to yield to
|
|
94
|
+
#
|
|
95
|
+
def run_with_retry(&block)
|
|
96
|
+
@status = STATUS_RUNNING
|
|
97
|
+
@time_start = Time.now
|
|
98
|
+
Log.info self.describe_work_start unless @quiet
|
|
99
|
+
|
|
100
|
+
#
|
|
101
|
+
# The inner level of exception handling here tries to catch anything
|
|
102
|
+
# that can be easily retired or failed-- normal exceptions.
|
|
103
|
+
#
|
|
104
|
+
# The outer level of exception handling handles weird stuff; for example,
|
|
105
|
+
# sometimes rest_connection raises RuntimeError exceptions...
|
|
106
|
+
#
|
|
107
|
+
# This fixes acu75562.
|
|
108
|
+
#
|
|
109
|
+
begin
|
|
110
|
+
begin
|
|
111
|
+
yield if not @dry_run
|
|
112
|
+
|
|
113
|
+
if @owner != nil
|
|
114
|
+
@status = STATUS_DONE
|
|
115
|
+
@group.job_completed
|
|
116
|
+
else
|
|
117
|
+
Log.warn "Ownership of job_id #{job_id} lost. User cancelled operation?"
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
rescue SystemExit, Interrupt => ex
|
|
121
|
+
$stderr.puts "Exiting!"
|
|
122
|
+
raise ex
|
|
123
|
+
|
|
124
|
+
rescue Interrupt => ex
|
|
125
|
+
name = @array['name'] if @array
|
|
126
|
+
name = @server['name'] || @server['nickname'] if @server
|
|
127
|
+
Log.error self.describe_work_error
|
|
128
|
+
|
|
129
|
+
if @retry_count > 0
|
|
130
|
+
@status = STATUS_RETRYING
|
|
131
|
+
Log.error "Error executing on \"#{name}\". Retrying in #{@retry_sleep} seconds..."
|
|
132
|
+
@retry_count -= 1
|
|
133
|
+
sleep @retry_sleep
|
|
134
|
+
retry
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
@status = STATUS_ERROR
|
|
138
|
+
@error = ex
|
|
139
|
+
Log.error "Error executing on \"#{name}\": #{ex}"
|
|
140
|
+
|
|
141
|
+
ensure
|
|
142
|
+
@time_end = Time.now
|
|
143
|
+
Log.info self.describe_work_done unless @quiet
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
rescue RuntimeError => ex
|
|
147
|
+
Log.error "Caught RuntimeError: #{ex}. Aborting job."
|
|
148
|
+
Log.error ex.inspect
|
|
149
|
+
Log.error ex.backtrace
|
|
150
|
+
@status = STATUS_ERROR
|
|
151
|
+
@error = ex
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
#
|
|
156
|
+
# This method should be overridden on Executor subclasses
|
|
157
|
+
# to provide a human readable description of the work
|
|
158
|
+
# being performed.
|
|
159
|
+
#
|
|
160
|
+
def describe_work
|
|
161
|
+
return "#{self.class.name} job_id=#{@job_id}"
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def describe_work_start
|
|
165
|
+
return("#{self.describe_work} status=START")
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def describe_work_done
|
|
169
|
+
return("#{self.describe_work} status=END time=#{@time_end.to_i-@time_start.to_i}s")
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def describe_work_done_long
|
|
173
|
+
return("#{self.describe_work} status=END time_start=#{@time_start.to_i} time_end=#{@time_end.to_i} time_total=#{@time_end.to_i-@time_start.to_i}")
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def describe_work_error
|
|
177
|
+
return("#{self.describe_work} status=ERROR")
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
module Chimp
|
|
2
|
+
#
|
|
3
|
+
# The ChimpQueue is a singleton that contains the
|
|
4
|
+
# chimp work queue
|
|
5
|
+
#
|
|
6
|
+
class ChimpQueue
|
|
7
|
+
include Singleton
|
|
8
|
+
|
|
9
|
+
attr_accessor :delay, :retry_count, :max_threads, :group
|
|
10
|
+
|
|
11
|
+
def initialize
|
|
12
|
+
@delay = 0
|
|
13
|
+
@retry_count = 0
|
|
14
|
+
@max_threads = 10
|
|
15
|
+
@workers_never_exit = true
|
|
16
|
+
@threads = []
|
|
17
|
+
|
|
18
|
+
self.reset!
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
#
|
|
22
|
+
# Reset the queue and the :default group
|
|
23
|
+
#
|
|
24
|
+
# This doesn't do anything to the groups's jobs
|
|
25
|
+
#
|
|
26
|
+
def reset!
|
|
27
|
+
@group = {}
|
|
28
|
+
@group[:default] = ParallelExecutionGroup.new(:default)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
#
|
|
32
|
+
# Start up queue runners
|
|
33
|
+
#
|
|
34
|
+
def start
|
|
35
|
+
self.sort_queues!
|
|
36
|
+
|
|
37
|
+
for i in (1..max_threads)
|
|
38
|
+
@threads << Thread.new(i) do
|
|
39
|
+
worker = QueueWorker.new
|
|
40
|
+
worker.delay = @delay
|
|
41
|
+
worker.retry_count = @retry_count
|
|
42
|
+
worker.run
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
#
|
|
48
|
+
# Push a task into the queue
|
|
49
|
+
#
|
|
50
|
+
def push(g, w)
|
|
51
|
+
raise "no group specified" unless g
|
|
52
|
+
create_group(g) if not ChimpQueue[g]
|
|
53
|
+
ChimpQueue[g].push(w)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def create_group(name, type = :parallel, concurrency = 1)
|
|
57
|
+
type = :parallel
|
|
58
|
+
Log.debug "Creating new execution group #{name} type=#{type} concurrency=#{concurrency}"
|
|
59
|
+
new_group = ExecutionGroupFactory.from_type(type)
|
|
60
|
+
new_group.group_id = name
|
|
61
|
+
new_group.concurrency = concurrency
|
|
62
|
+
ChimpQueue[name] = new_group
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
#
|
|
66
|
+
# Grab the oldest work item available
|
|
67
|
+
#
|
|
68
|
+
def shift
|
|
69
|
+
r = nil
|
|
70
|
+
@group.values.each do |group|
|
|
71
|
+
if group.ready?
|
|
72
|
+
r = group.shift
|
|
73
|
+
break
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
return(r)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
#
|
|
80
|
+
# Wait until a group is done
|
|
81
|
+
#
|
|
82
|
+
def wait_until_done(g, &block)
|
|
83
|
+
while @group[g].running?
|
|
84
|
+
@threads.each do |t|
|
|
85
|
+
t.join(1)
|
|
86
|
+
yield
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
#
|
|
92
|
+
# Quit - empty the queue and wait for remaining jobs to complete
|
|
93
|
+
#
|
|
94
|
+
def quit
|
|
95
|
+
i = 0
|
|
96
|
+
@group.keys.each do |group|
|
|
97
|
+
wait_until_done(group) do
|
|
98
|
+
if i < 30
|
|
99
|
+
sleep 1
|
|
100
|
+
i += 1
|
|
101
|
+
print "."
|
|
102
|
+
else
|
|
103
|
+
break
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
@threads.each { |t| t.kill }
|
|
109
|
+
puts " done."
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
#
|
|
113
|
+
# Run all threads forever (used by chimpd)
|
|
114
|
+
#
|
|
115
|
+
def run_threads
|
|
116
|
+
@threads.each do |t|
|
|
117
|
+
t.join(5)
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
#
|
|
122
|
+
# return the total number of queued (non-executing) objects
|
|
123
|
+
#
|
|
124
|
+
def size
|
|
125
|
+
s = 0
|
|
126
|
+
@group.values.each do |group|
|
|
127
|
+
s += group.size
|
|
128
|
+
end
|
|
129
|
+
return(s)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
#
|
|
133
|
+
# Allow the groups to be accessed as ChimpQueue.group[:foo]
|
|
134
|
+
#
|
|
135
|
+
def self.[](group)
|
|
136
|
+
return ChimpQueue.instance.group[group]
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def self.[]=(k,v)
|
|
140
|
+
ChimpQueue.instance.group[k] = v
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
#
|
|
144
|
+
# Return an array of all jobs with the requested
|
|
145
|
+
# status.
|
|
146
|
+
#
|
|
147
|
+
def get_jobs_by_status(status)
|
|
148
|
+
r = []
|
|
149
|
+
@group.values.each do |group|
|
|
150
|
+
v = group.get_jobs_by_status(status)
|
|
151
|
+
if v != nil and v != []
|
|
152
|
+
r += v
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
return r
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def get_job(id)
|
|
160
|
+
jobs = self.get_jobs
|
|
161
|
+
|
|
162
|
+
jobs.each do |j|
|
|
163
|
+
return j if j.job_id == id
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def get_jobs
|
|
168
|
+
r = []
|
|
169
|
+
@group.values.each do |group|
|
|
170
|
+
group.get_jobs.each { |job| r << job }
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
return r
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
#############################################################
|
|
177
|
+
protected
|
|
178
|
+
|
|
179
|
+
#
|
|
180
|
+
# Sort all the things, er, queues
|
|
181
|
+
#
|
|
182
|
+
def sort_queues!
|
|
183
|
+
return @group.values.each { |group| group.sort! }
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
end
|
|
187
|
+
end
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
module Chimp
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Factory
|
|
5
|
+
#
|
|
6
|
+
class ExecutionGroupFactory
|
|
7
|
+
def self.from_type(type)
|
|
8
|
+
if type == :serial
|
|
9
|
+
return SerialExecutionGroup.new(nil)
|
|
10
|
+
elsif type == :parallel
|
|
11
|
+
return ParallelExecutionGroup.new(nil)
|
|
12
|
+
else
|
|
13
|
+
raise "invalid execution group type specified"
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
#
|
|
19
|
+
# An ExecutionGroup contains a set of Executors to be processed
|
|
20
|
+
#
|
|
21
|
+
# Only the subclasses SerialExecutionGroup and ParallelExecutionGroup
|
|
22
|
+
# should be used directly.
|
|
23
|
+
#
|
|
24
|
+
class ExecutionGroup
|
|
25
|
+
attr_accessor :group_id, :description, :concurrency
|
|
26
|
+
attr_reader :time_start, :time_end
|
|
27
|
+
|
|
28
|
+
def initialize(new_group_id=nil)
|
|
29
|
+
@group_id = new_group_id
|
|
30
|
+
@queue = []
|
|
31
|
+
@jobs_by_id = {}
|
|
32
|
+
@log = nil
|
|
33
|
+
@time_start = nil
|
|
34
|
+
@time_end = nil
|
|
35
|
+
@concurrency = 1
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
#
|
|
39
|
+
# Add something to the work queue
|
|
40
|
+
#
|
|
41
|
+
def push(j)
|
|
42
|
+
raise "invalid work" if j == nil
|
|
43
|
+
j.job_id = IDManager.get if j.job_id == nil
|
|
44
|
+
j.group = self
|
|
45
|
+
@queue.push(j)
|
|
46
|
+
@jobs_by_id[j.job_id] = j
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
#
|
|
50
|
+
# Take something from the queue
|
|
51
|
+
#
|
|
52
|
+
def shift
|
|
53
|
+
x = @queue.shift
|
|
54
|
+
@time_start = Time.now if @time_start == nil
|
|
55
|
+
return x
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
#
|
|
59
|
+
# Return a hash of the results
|
|
60
|
+
#
|
|
61
|
+
def results
|
|
62
|
+
return self.get_jobs.map do |task|
|
|
63
|
+
next if task == nil
|
|
64
|
+
next if task.server == nil
|
|
65
|
+
|
|
66
|
+
{
|
|
67
|
+
:job_id => task.job_id,
|
|
68
|
+
:name => task.info,
|
|
69
|
+
:host => task.server['nickname'] || task.server['name'],
|
|
70
|
+
:status => task.status,
|
|
71
|
+
:error => task.error,
|
|
72
|
+
:total => self.get_total_execution_time(task.status, task.time_start, task.time_end),
|
|
73
|
+
:start => task.time_start,
|
|
74
|
+
:end => task.time_end,
|
|
75
|
+
:worker => task
|
|
76
|
+
}
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
#
|
|
81
|
+
# Size of the active queue
|
|
82
|
+
#
|
|
83
|
+
def size
|
|
84
|
+
return @queue.size
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
#
|
|
88
|
+
# Sort queue by server nickname
|
|
89
|
+
#
|
|
90
|
+
def sort!
|
|
91
|
+
if @queue != nil
|
|
92
|
+
@queue.sort! do |a,b|
|
|
93
|
+
a.server['nickname'] <=> b.server['nickname']
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
#
|
|
99
|
+
# Reset the queue
|
|
100
|
+
#
|
|
101
|
+
def reset!
|
|
102
|
+
@queue = []
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
#
|
|
106
|
+
# Get all jobs
|
|
107
|
+
#
|
|
108
|
+
def get_jobs
|
|
109
|
+
@jobs_by_id.values
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
#
|
|
113
|
+
# Get all job ids
|
|
114
|
+
#
|
|
115
|
+
def get_job_ids
|
|
116
|
+
@jobs_by_id.keys
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
#
|
|
120
|
+
# Get a particular job
|
|
121
|
+
#
|
|
122
|
+
def get_job(i)
|
|
123
|
+
@jobs_by_id[i]
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
#
|
|
127
|
+
# Get jobs by status
|
|
128
|
+
#
|
|
129
|
+
def get_jobs_by_status(status)
|
|
130
|
+
r = []
|
|
131
|
+
@jobs_by_id.values.each do |i|
|
|
132
|
+
r << i if i.status == status.to_sym || status.to_sym == :all
|
|
133
|
+
end
|
|
134
|
+
return r
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def job_completed
|
|
138
|
+
@time_end = Time.now
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
#
|
|
142
|
+
# Reset all jobs and bulk set them
|
|
143
|
+
#
|
|
144
|
+
def set_jobs(jobs=[])
|
|
145
|
+
self.reset!
|
|
146
|
+
jobs.each do |job|
|
|
147
|
+
self.push(job)
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
#
|
|
152
|
+
# An execution group is "ready" if it has work that can be done;
|
|
153
|
+
# see implementation in child classes.
|
|
154
|
+
#
|
|
155
|
+
def ready?
|
|
156
|
+
raise "unimplemented"
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
#
|
|
160
|
+
# An execution group is "done" if nothing is queued or running
|
|
161
|
+
#
|
|
162
|
+
def done?
|
|
163
|
+
return (get_jobs_by_status(Executor::STATUS_NONE).size == 0 && get_jobs_by_status(Executor::STATUS_RUNNING).size == 0)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
#
|
|
167
|
+
# Is this execution group running anything?
|
|
168
|
+
#
|
|
169
|
+
def running?
|
|
170
|
+
total_jobs_running = get_jobs_by_status(Executor::STATUS_NONE).size +
|
|
171
|
+
get_jobs_by_status(Executor::STATUS_RUNNING).size +
|
|
172
|
+
get_jobs_by_status(Executor::STATUS_RETRYING).size
|
|
173
|
+
return(total_jobs_running > 0)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
#
|
|
177
|
+
# Requeue all failed jobs
|
|
178
|
+
#
|
|
179
|
+
def requeue_failed_jobs!
|
|
180
|
+
get_jobs_by_status(Executor::STATUS_ERROR).each do |job|
|
|
181
|
+
requeue(job.job_id)
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
#
|
|
186
|
+
# Requeue a job by id
|
|
187
|
+
#
|
|
188
|
+
def requeue(id)
|
|
189
|
+
job = @jobs_by_id[id]
|
|
190
|
+
job.status = Executor::STATUS_NONE
|
|
191
|
+
job.owner = nil
|
|
192
|
+
job.time_start = Time.now
|
|
193
|
+
job.time_end = nil
|
|
194
|
+
self.push(job)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
#
|
|
198
|
+
# Cancel a job by id
|
|
199
|
+
#
|
|
200
|
+
def cancel(id)
|
|
201
|
+
Log.warn "Cancelling job id #{id}"
|
|
202
|
+
job = @jobs_by_id[id]
|
|
203
|
+
job.status = Executor::STATUS_ERROR
|
|
204
|
+
job.owner = nil
|
|
205
|
+
job.time_end = Time.now
|
|
206
|
+
@queue.delete(job)
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
#
|
|
210
|
+
# Return total execution time
|
|
211
|
+
#
|
|
212
|
+
def get_total_exec_time
|
|
213
|
+
if @time_start == nil
|
|
214
|
+
return 0
|
|
215
|
+
elsif @time_end == nil
|
|
216
|
+
return Time.now.to_i - @time_start.to_i
|
|
217
|
+
else
|
|
218
|
+
return @time_end.to_i- @time_start.to_i
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
#
|
|
223
|
+
# Print out ExecutionGroup information
|
|
224
|
+
#
|
|
225
|
+
def to_s
|
|
226
|
+
return "#{self.class}[#{group_id}]: ready=#{self.ready?} total_jobs=#{@jobs_by_id.size} queued_jobs=#{self.size}"
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
###################################
|
|
230
|
+
protected
|
|
231
|
+
###################################
|
|
232
|
+
|
|
233
|
+
#
|
|
234
|
+
# Return total execution time or -1 for errors
|
|
235
|
+
#
|
|
236
|
+
def get_total_execution_time(status, time_begin, time_end)
|
|
237
|
+
return(status != :error ? time_end.to_i - time_begin.to_i : -1)
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
#
|
|
243
|
+
# SerialExecutionGroup: run only one job at a time
|
|
244
|
+
#
|
|
245
|
+
class SerialExecutionGroup < ExecutionGroup
|
|
246
|
+
def ready?
|
|
247
|
+
return get_jobs_by_status(Executor::STATUS_RUNNING).size == 0 && get_jobs_by_status(Executor::STATUS_NONE).size > 0
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def short_name
|
|
251
|
+
"S"
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
#
|
|
256
|
+
# ParallelExecutionGroup: run multiple jobs at once
|
|
257
|
+
#
|
|
258
|
+
class ParallelExecutionGroup < ExecutionGroup
|
|
259
|
+
def initialize(new_group_id)
|
|
260
|
+
super(new_group_id)
|
|
261
|
+
@concurrency = 25
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
#
|
|
265
|
+
# FIXME - we're not currently using the @concurrency setting to limit execution
|
|
266
|
+
# due to an unknown bug...
|
|
267
|
+
#
|
|
268
|
+
def ready?
|
|
269
|
+
return (get_jobs_by_status(Executor::STATUS_NONE).size > 0) # and get_jobs_by_status(Executor::STATUS_RUNNING).size < @concurrency)
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def short_name
|
|
273
|
+
"P"
|
|
274
|
+
end
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#
|
|
2
|
+
# QueueWorker objects take work from the Queue and process it
|
|
3
|
+
# Each QueueWorker runs in its own thread... nothing fancy going on here
|
|
4
|
+
#
|
|
5
|
+
module Chimp
|
|
6
|
+
class QueueWorker
|
|
7
|
+
attr_accessor :delay, :retry_count, :never_exit
|
|
8
|
+
|
|
9
|
+
def initialize
|
|
10
|
+
@delay = 0
|
|
11
|
+
@retry_count = 0
|
|
12
|
+
@never_exit = true
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
#
|
|
16
|
+
# Grab work items from the ChimpQueue and process them
|
|
17
|
+
# Only stop is @ever_exit is false
|
|
18
|
+
#
|
|
19
|
+
def run
|
|
20
|
+
while @never_exit
|
|
21
|
+
work_item = ChimpQueue.instance.shift()
|
|
22
|
+
|
|
23
|
+
begin
|
|
24
|
+
if work_item != nil
|
|
25
|
+
work_item.retry_count = @retry_count
|
|
26
|
+
work_item.owner = Thread.current.object_id
|
|
27
|
+
work_item.run
|
|
28
|
+
sleep @delay
|
|
29
|
+
else
|
|
30
|
+
sleep 1
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
rescue StandardError => ex
|
|
34
|
+
$stderr.puts "Exception in QueueWorker.run: #{ex}"
|
|
35
|
+
puts ex.inspect
|
|
36
|
+
puts ex.backtrace
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
end
|
|
42
|
+
end
|