right_chimp 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,45 @@
1
+ #
2
+ # Run an SSH script
3
+ #
4
+ module Chimp
5
+ class ExecSSH < Executor
6
+ attr_accessor :ssh_user
7
+
8
+ def initialize(h={})
9
+ super(h)
10
+ @ssh_user = h[:ssh_user]
11
+ end
12
+
13
+ def run
14
+ host = @server['ip_address'] || @server['ip-address'] || nil
15
+ @ssh_user ||= "root"
16
+
17
+ if host == nil
18
+ @server.settings
19
+ host = @server['ip_address'] || @server['ip-address']
20
+ end
21
+
22
+ run_with_retry do
23
+ Log.debug "ssh #{@ssh_user}@#{host} \"#{@exec}\""
24
+ success = system("ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no #{@ssh_user}@#{host} \"#{@exec}\"")
25
+
26
+ if not $?.success?
27
+ raise "SSH failed with status: #{$?}"
28
+ end
29
+ end
30
+ end
31
+
32
+ def describe_work
33
+ return "ExecSSH job_id=#{@job_id} command=\"#{@exec}\" server=\"#{@server['nickname']}\""
34
+ end
35
+
36
+ def info
37
+ return @exec.to_s
38
+ end
39
+
40
+ def target
41
+ return @server['nickname']
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,180 @@
1
+ #
2
+ # Superclass for Executors-- objects that run things on servers
3
+ #
4
+
5
+ module Chimp
6
+ class Executor
7
+ attr_accessor :server, :array, :exec, :inputs, :template, :owner, :group,
8
+ :job_id, :status, :dry_run, :verbose, :quiet, :timeout,
9
+ :retry_count, :retry_sleep, :time_start, :time_end
10
+
11
+ attr_reader :error, :results
12
+
13
+ STATUS_NONE = :none
14
+ STATUS_RUNNING = :running
15
+ STATUS_RETRYING = :retrying
16
+ STATUS_ERROR = :error
17
+ STATUS_DONE = :done
18
+
19
+ def initialize(h={})
20
+ @server = h[:server] || nil
21
+ @array = h[:array] || nil
22
+ @template = h[:template] || nil
23
+
24
+ @job_id = h[:job_id] || nil
25
+ @group = h[:group] || nil
26
+ @exec = h[:exec] || nil
27
+ @inputs = h[:inputs] || nil
28
+
29
+ @verbose = h[:verbose] || false
30
+
31
+ @retry_count = h[:retry_count].to_i || 0
32
+ @retry_sleep = h[:retry_sleep].to_i || 30
33
+ @timeout = h[:timeout].to_i || 3600
34
+
35
+ @error = nil
36
+ @status = STATUS_NONE
37
+ @owner = nil
38
+ @dry_run = false
39
+ @quiet = false
40
+
41
+ @time_start = nil
42
+ @time_end = nil
43
+ @results = nil
44
+ end
45
+
46
+ #
47
+ # Return total execution time (real) of a job
48
+ #
49
+ def get_total_exec_time
50
+ if @time_start == nil
51
+ return 0
52
+ elsif @time_end == nil
53
+ return Time.now.to_i - @time_start.to_i
54
+ else
55
+ return @time_end.to_i- @time_start.to_i
56
+ end
57
+ end
58
+
59
+ #
60
+ # Convenience method to requeue
61
+ #
62
+ def requeue
63
+ @group.requeue(self.job_id)
64
+ end
65
+
66
+ #
67
+ # Convenience method to cancel
68
+ #
69
+ def cancel
70
+ @group.cancel(self.job_id)
71
+ end
72
+
73
+ def run
74
+ raise "run method must be overridden"
75
+ end
76
+
77
+ #
78
+ # return info on what this executor does -- eg name of script or command
79
+ #
80
+ def info
81
+ raise "unimplemented"
82
+ end
83
+
84
+ def target
85
+ return "UNKNOWN"
86
+ end
87
+
88
+
89
+ protected
90
+
91
+ #
92
+ # Run a unit of work with retries
93
+ # This is called from the subclass with a code block to yield to
94
+ #
95
+ def run_with_retry(&block)
96
+ @status = STATUS_RUNNING
97
+ @time_start = Time.now
98
+ Log.info self.describe_work_start unless @quiet
99
+
100
+ #
101
+ # The inner level of exception handling here tries to catch anything
102
+ # that can be easily retired or failed-- normal exceptions.
103
+ #
104
+ # The outer level of exception handling handles weird stuff; for example,
105
+ # sometimes rest_connection raises RuntimeError exceptions...
106
+ #
107
+ # This fixes acu75562.
108
+ #
109
+ begin
110
+ begin
111
+ yield if not @dry_run
112
+
113
+ if @owner != nil
114
+ @status = STATUS_DONE
115
+ @group.job_completed
116
+ else
117
+ Log.warn "Ownership of job_id #{job_id} lost. User cancelled operation?"
118
+ end
119
+
120
+ rescue SystemExit, Interrupt => ex
121
+ $stderr.puts "Exiting!"
122
+ raise ex
123
+
124
+ rescue Interrupt => ex
125
+ name = @array['name'] if @array
126
+ name = @server['name'] || @server['nickname'] if @server
127
+ Log.error self.describe_work_error
128
+
129
+ if @retry_count > 0
130
+ @status = STATUS_RETRYING
131
+ Log.error "Error executing on \"#{name}\". Retrying in #{@retry_sleep} seconds..."
132
+ @retry_count -= 1
133
+ sleep @retry_sleep
134
+ retry
135
+ end
136
+
137
+ @status = STATUS_ERROR
138
+ @error = ex
139
+ Log.error "Error executing on \"#{name}\": #{ex}"
140
+
141
+ ensure
142
+ @time_end = Time.now
143
+ Log.info self.describe_work_done unless @quiet
144
+ end
145
+
146
+ rescue RuntimeError => ex
147
+ Log.error "Caught RuntimeError: #{ex}. Aborting job."
148
+ Log.error ex.inspect
149
+ Log.error ex.backtrace
150
+ @status = STATUS_ERROR
151
+ @error = ex
152
+ end
153
+ end
154
+
155
+ #
156
+ # This method should be overridden on Executor subclasses
157
+ # to provide a human readable description of the work
158
+ # being performed.
159
+ #
160
+ def describe_work
161
+ return "#{self.class.name} job_id=#{@job_id}"
162
+ end
163
+
164
+ def describe_work_start
165
+ return("#{self.describe_work} status=START")
166
+ end
167
+
168
+ def describe_work_done
169
+ return("#{self.describe_work} status=END time=#{@time_end.to_i-@time_start.to_i}s")
170
+ end
171
+
172
+ def describe_work_done_long
173
+ return("#{self.describe_work} status=END time_start=#{@time_start.to_i} time_end=#{@time_end.to_i} time_total=#{@time_end.to_i-@time_start.to_i}")
174
+ end
175
+
176
+ def describe_work_error
177
+ return("#{self.describe_work} status=ERROR")
178
+ end
179
+ end
180
+ end
@@ -0,0 +1,187 @@
1
+ module Chimp
2
+ #
3
+ # The ChimpQueue is a singleton that contains the
4
+ # chimp work queue
5
+ #
6
+ class ChimpQueue
7
+ include Singleton
8
+
9
+ attr_accessor :delay, :retry_count, :max_threads, :group
10
+
11
+ def initialize
12
+ @delay = 0
13
+ @retry_count = 0
14
+ @max_threads = 10
15
+ @workers_never_exit = true
16
+ @threads = []
17
+
18
+ self.reset!
19
+ end
20
+
21
+ #
22
+ # Reset the queue and the :default group
23
+ #
24
+ # This doesn't do anything to the groups's jobs
25
+ #
26
+ def reset!
27
+ @group = {}
28
+ @group[:default] = ParallelExecutionGroup.new(:default)
29
+ end
30
+
31
+ #
32
+ # Start up queue runners
33
+ #
34
+ def start
35
+ self.sort_queues!
36
+
37
+ for i in (1..max_threads)
38
+ @threads << Thread.new(i) do
39
+ worker = QueueWorker.new
40
+ worker.delay = @delay
41
+ worker.retry_count = @retry_count
42
+ worker.run
43
+ end
44
+ end
45
+ end
46
+
47
+ #
48
+ # Push a task into the queue
49
+ #
50
+ def push(g, w)
51
+ raise "no group specified" unless g
52
+ create_group(g) if not ChimpQueue[g]
53
+ ChimpQueue[g].push(w)
54
+ end
55
+
56
+ def create_group(name, type = :parallel, concurrency = 1)
57
+ type = :parallel
58
+ Log.debug "Creating new execution group #{name} type=#{type} concurrency=#{concurrency}"
59
+ new_group = ExecutionGroupFactory.from_type(type)
60
+ new_group.group_id = name
61
+ new_group.concurrency = concurrency
62
+ ChimpQueue[name] = new_group
63
+ end
64
+
65
+ #
66
+ # Grab the oldest work item available
67
+ #
68
+ def shift
69
+ r = nil
70
+ @group.values.each do |group|
71
+ if group.ready?
72
+ r = group.shift
73
+ break
74
+ end
75
+ end
76
+ return(r)
77
+ end
78
+
79
+ #
80
+ # Wait until a group is done
81
+ #
82
+ def wait_until_done(g, &block)
83
+ while @group[g].running?
84
+ @threads.each do |t|
85
+ t.join(1)
86
+ yield
87
+ end
88
+ end
89
+ end
90
+
91
+ #
92
+ # Quit - empty the queue and wait for remaining jobs to complete
93
+ #
94
+ def quit
95
+ i = 0
96
+ @group.keys.each do |group|
97
+ wait_until_done(group) do
98
+ if i < 30
99
+ sleep 1
100
+ i += 1
101
+ print "."
102
+ else
103
+ break
104
+ end
105
+ end
106
+ end
107
+
108
+ @threads.each { |t| t.kill }
109
+ puts " done."
110
+ end
111
+
112
+ #
113
+ # Run all threads forever (used by chimpd)
114
+ #
115
+ def run_threads
116
+ @threads.each do |t|
117
+ t.join(5)
118
+ end
119
+ end
120
+
121
+ #
122
+ # return the total number of queued (non-executing) objects
123
+ #
124
+ def size
125
+ s = 0
126
+ @group.values.each do |group|
127
+ s += group.size
128
+ end
129
+ return(s)
130
+ end
131
+
132
+ #
133
+ # Allow the groups to be accessed as ChimpQueue.group[:foo]
134
+ #
135
+ def self.[](group)
136
+ return ChimpQueue.instance.group[group]
137
+ end
138
+
139
+ def self.[]=(k,v)
140
+ ChimpQueue.instance.group[k] = v
141
+ end
142
+
143
+ #
144
+ # Return an array of all jobs with the requested
145
+ # status.
146
+ #
147
+ def get_jobs_by_status(status)
148
+ r = []
149
+ @group.values.each do |group|
150
+ v = group.get_jobs_by_status(status)
151
+ if v != nil and v != []
152
+ r += v
153
+ end
154
+ end
155
+
156
+ return r
157
+ end
158
+
159
+ def get_job(id)
160
+ jobs = self.get_jobs
161
+
162
+ jobs.each do |j|
163
+ return j if j.job_id == id
164
+ end
165
+ end
166
+
167
+ def get_jobs
168
+ r = []
169
+ @group.values.each do |group|
170
+ group.get_jobs.each { |job| r << job }
171
+ end
172
+
173
+ return r
174
+ end
175
+
176
+ #############################################################
177
+ protected
178
+
179
+ #
180
+ # Sort all the things, er, queues
181
+ #
182
+ def sort_queues!
183
+ return @group.values.each { |group| group.sort! }
184
+ end
185
+
186
+ end
187
+ end
@@ -0,0 +1,277 @@
1
+ module Chimp
2
+
3
+ #
4
+ # Factory
5
+ #
6
+ class ExecutionGroupFactory
7
+ def self.from_type(type)
8
+ if type == :serial
9
+ return SerialExecutionGroup.new(nil)
10
+ elsif type == :parallel
11
+ return ParallelExecutionGroup.new(nil)
12
+ else
13
+ raise "invalid execution group type specified"
14
+ end
15
+ end
16
+ end
17
+
18
+ #
19
+ # An ExecutionGroup contains a set of Executors to be processed
20
+ #
21
+ # Only the subclasses SerialExecutionGroup and ParallelExecutionGroup
22
+ # should be used directly.
23
+ #
24
+ class ExecutionGroup
25
+ attr_accessor :group_id, :description, :concurrency
26
+ attr_reader :time_start, :time_end
27
+
28
+ def initialize(new_group_id=nil)
29
+ @group_id = new_group_id
30
+ @queue = []
31
+ @jobs_by_id = {}
32
+ @log = nil
33
+ @time_start = nil
34
+ @time_end = nil
35
+ @concurrency = 1
36
+ end
37
+
38
+ #
39
+ # Add something to the work queue
40
+ #
41
+ def push(j)
42
+ raise "invalid work" if j == nil
43
+ j.job_id = IDManager.get if j.job_id == nil
44
+ j.group = self
45
+ @queue.push(j)
46
+ @jobs_by_id[j.job_id] = j
47
+ end
48
+
49
+ #
50
+ # Take something from the queue
51
+ #
52
+ def shift
53
+ x = @queue.shift
54
+ @time_start = Time.now if @time_start == nil
55
+ return x
56
+ end
57
+
58
+ #
59
+ # Return a hash of the results
60
+ #
61
+ def results
62
+ return self.get_jobs.map do |task|
63
+ next if task == nil
64
+ next if task.server == nil
65
+
66
+ {
67
+ :job_id => task.job_id,
68
+ :name => task.info,
69
+ :host => task.server['nickname'] || task.server['name'],
70
+ :status => task.status,
71
+ :error => task.error,
72
+ :total => self.get_total_execution_time(task.status, task.time_start, task.time_end),
73
+ :start => task.time_start,
74
+ :end => task.time_end,
75
+ :worker => task
76
+ }
77
+ end
78
+ end
79
+
80
+ #
81
+ # Size of the active queue
82
+ #
83
+ def size
84
+ return @queue.size
85
+ end
86
+
87
+ #
88
+ # Sort queue by server nickname
89
+ #
90
+ def sort!
91
+ if @queue != nil
92
+ @queue.sort! do |a,b|
93
+ a.server['nickname'] <=> b.server['nickname']
94
+ end
95
+ end
96
+ end
97
+
98
+ #
99
+ # Reset the queue
100
+ #
101
+ def reset!
102
+ @queue = []
103
+ end
104
+
105
+ #
106
+ # Get all jobs
107
+ #
108
+ def get_jobs
109
+ @jobs_by_id.values
110
+ end
111
+
112
+ #
113
+ # Get all job ids
114
+ #
115
+ def get_job_ids
116
+ @jobs_by_id.keys
117
+ end
118
+
119
+ #
120
+ # Get a particular job
121
+ #
122
+ def get_job(i)
123
+ @jobs_by_id[i]
124
+ end
125
+
126
+ #
127
+ # Get jobs by status
128
+ #
129
+ def get_jobs_by_status(status)
130
+ r = []
131
+ @jobs_by_id.values.each do |i|
132
+ r << i if i.status == status.to_sym || status.to_sym == :all
133
+ end
134
+ return r
135
+ end
136
+
137
+ def job_completed
138
+ @time_end = Time.now
139
+ end
140
+
141
+ #
142
+ # Reset all jobs and bulk set them
143
+ #
144
+ def set_jobs(jobs=[])
145
+ self.reset!
146
+ jobs.each do |job|
147
+ self.push(job)
148
+ end
149
+ end
150
+
151
+ #
152
+ # An execution group is "ready" if it has work that can be done;
153
+ # see implementation in child classes.
154
+ #
155
+ def ready?
156
+ raise "unimplemented"
157
+ end
158
+
159
+ #
160
+ # An execution group is "done" if nothing is queued or running
161
+ #
162
+ def done?
163
+ return (get_jobs_by_status(Executor::STATUS_NONE).size == 0 && get_jobs_by_status(Executor::STATUS_RUNNING).size == 0)
164
+ end
165
+
166
+ #
167
+ # Is this execution group running anything?
168
+ #
169
+ def running?
170
+ total_jobs_running = get_jobs_by_status(Executor::STATUS_NONE).size +
171
+ get_jobs_by_status(Executor::STATUS_RUNNING).size +
172
+ get_jobs_by_status(Executor::STATUS_RETRYING).size
173
+ return(total_jobs_running > 0)
174
+ end
175
+
176
+ #
177
+ # Requeue all failed jobs
178
+ #
179
+ def requeue_failed_jobs!
180
+ get_jobs_by_status(Executor::STATUS_ERROR).each do |job|
181
+ requeue(job.job_id)
182
+ end
183
+ end
184
+
185
+ #
186
+ # Requeue a job by id
187
+ #
188
+ def requeue(id)
189
+ job = @jobs_by_id[id]
190
+ job.status = Executor::STATUS_NONE
191
+ job.owner = nil
192
+ job.time_start = Time.now
193
+ job.time_end = nil
194
+ self.push(job)
195
+ end
196
+
197
+ #
198
+ # Cancel a job by id
199
+ #
200
+ def cancel(id)
201
+ Log.warn "Cancelling job id #{id}"
202
+ job = @jobs_by_id[id]
203
+ job.status = Executor::STATUS_ERROR
204
+ job.owner = nil
205
+ job.time_end = Time.now
206
+ @queue.delete(job)
207
+ end
208
+
209
+ #
210
+ # Return total execution time
211
+ #
212
+ def get_total_exec_time
213
+ if @time_start == nil
214
+ return 0
215
+ elsif @time_end == nil
216
+ return Time.now.to_i - @time_start.to_i
217
+ else
218
+ return @time_end.to_i- @time_start.to_i
219
+ end
220
+ end
221
+
222
+ #
223
+ # Print out ExecutionGroup information
224
+ #
225
+ def to_s
226
+ return "#{self.class}[#{group_id}]: ready=#{self.ready?} total_jobs=#{@jobs_by_id.size} queued_jobs=#{self.size}"
227
+ end
228
+
229
+ ###################################
230
+ protected
231
+ ###################################
232
+
233
+ #
234
+ # Return total execution time or -1 for errors
235
+ #
236
+ def get_total_execution_time(status, time_begin, time_end)
237
+ return(status != :error ? time_end.to_i - time_begin.to_i : -1)
238
+ end
239
+
240
+ end
241
+
242
+ #
243
+ # SerialExecutionGroup: run only one job at a time
244
+ #
245
+ class SerialExecutionGroup < ExecutionGroup
246
+ def ready?
247
+ return get_jobs_by_status(Executor::STATUS_RUNNING).size == 0 && get_jobs_by_status(Executor::STATUS_NONE).size > 0
248
+ end
249
+
250
+ def short_name
251
+ "S"
252
+ end
253
+ end
254
+
255
+ #
256
+ # ParallelExecutionGroup: run multiple jobs at once
257
+ #
258
+ class ParallelExecutionGroup < ExecutionGroup
259
+ def initialize(new_group_id)
260
+ super(new_group_id)
261
+ @concurrency = 25
262
+ end
263
+
264
+ #
265
+ # FIXME - we're not currently using the @concurrency setting to limit execution
266
+ # due to an unknown bug...
267
+ #
268
+ def ready?
269
+ return (get_jobs_by_status(Executor::STATUS_NONE).size > 0) # and get_jobs_by_status(Executor::STATUS_RUNNING).size < @concurrency)
270
+ end
271
+
272
+ def short_name
273
+ "P"
274
+ end
275
+ end
276
+
277
+ end
@@ -0,0 +1,42 @@
1
+ #
2
+ # QueueWorker objects take work from the Queue and process it
3
+ # Each QueueWorker runs in its own thread... nothing fancy going on here
4
+ #
5
+ module Chimp
6
+ class QueueWorker
7
+ attr_accessor :delay, :retry_count, :never_exit
8
+
9
+ def initialize
10
+ @delay = 0
11
+ @retry_count = 0
12
+ @never_exit = true
13
+ end
14
+
15
+ #
16
+ # Grab work items from the ChimpQueue and process them
17
+ # Only stop is @ever_exit is false
18
+ #
19
+ def run
20
+ while @never_exit
21
+ work_item = ChimpQueue.instance.shift()
22
+
23
+ begin
24
+ if work_item != nil
25
+ work_item.retry_count = @retry_count
26
+ work_item.owner = Thread.current.object_id
27
+ work_item.run
28
+ sleep @delay
29
+ else
30
+ sleep 1
31
+ end
32
+
33
+ rescue StandardError => ex
34
+ $stderr.puts "Exception in QueueWorker.run: #{ex}"
35
+ puts ex.inspect
36
+ puts ex.backtrace
37
+ end
38
+ end
39
+ end
40
+
41
+ end
42
+ end