mssh 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. data/LICENSE.md +13 -0
  2. data/README.md +25 -0
  3. data/bin/mcmd +97 -0
  4. data/bin/mssh +86 -0
  5. data/lib/mcmd.rb +281 -0
  6. metadata +114 -0
@@ -0,0 +1,13 @@
1
+ Copyright 2011 Square Inc.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -0,0 +1,25 @@
1
+ mssh, mcmd
2
+ ==========
3
+
4
+
5
+ Tools for running multiple commands and ssh jobs in parallel, and easily collecting the result
6
+
7
+ Usage
8
+ -----
9
+
10
+
11
+ <code>mssh -r host01,host02,host03 "uname -r" -c</code>
12
+
13
+ BUGS/TODO
14
+ ---------
15
+
16
+
17
+ * Optionally Incorporate stderr into -c, with $?
18
+ * allow commandline manipulation of ssh args
19
+ * factor out redundancy between bin/mssh and bin/mcmd (cli module?)
20
+ * incorporate range / foundation lookup syntax for -r
21
+ * json output mode
22
+ * to-file output mode
23
+ * lots of rough spots, not super slick yet
24
+ * needs testing real bad. 0.1 release
25
+
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pp'
4
+
5
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
6
+ require 'mcmd'
7
+
8
+ require 'optparse'
9
+ options = {
10
+ :maxflight => 200,
11
+ :timeout => 60,
12
+ :global_timeout => 0,
13
+ }
14
+
15
+ optparse = OptionParser.new do |opts|
16
+ opts.on('-r', '--range RANGE', 'currently takes a CSV list') do |arg|
17
+ options[:range] = arg
18
+ end
19
+ opts.on('-m', '--maxflight 50', 'How many subprocesses? 50 by default') do |arg|
20
+ options[:maxflight] = arg
21
+ end
22
+ opts.on('-t', '--timeout 60', 'How many seconds may each individual process take? 0 for no timeout') do |arg|
23
+ options[:timeout] = arg
24
+ end
25
+ opts.on('-g', '--global_timeout 600', 'How many seconds for the whole shebang 0 for no timeout') do |arg|
26
+ options[:global_timeout] = arg
27
+ end
28
+ opts.on('--noshell', "Don't invoke a shell. Args will be passed to exec verbatim ") do |arg|
29
+ options[:noshell] = arg
30
+ end
31
+ opts.on('-c', '--collapse', "Collapse similar output ") do |arg|
32
+ options[:collapse] = arg
33
+ end
34
+ opts.on('-v', '--verbose', "Verbose output") do |arg|
35
+ options[:verbose] = arg
36
+ end
37
+ opts.on('-d', '--debug', "Debug output") do |arg|
38
+ options[:debug] = arg
39
+ end
40
+ # option to merge stdin/stdout into one buf? how should this work?
41
+ # option to ignore as-we-go yield output - this is off by default now except for success/fail
42
+ end
43
+ optparse.parse!
44
+
45
+ raise "Error, need -r argument" if options[:range].nil? or options[:range].empty?
46
+ raise "Error, need command to run" if ARGV.size.zero?
47
+
48
+ m = MultipleCmd.new
49
+
50
+ targets = options[:range].split ","
51
+
52
+ m.commands = targets.map { |t| ["/bin/sh", "-c"].push ARGV.map { |arg| arg.gsub('HOSTNAME', t)}.join " " }
53
+ command_to_target = Hash.new
54
+ targets.size.times do |i|
55
+ command_to_target[m.commands[i].object_id] = targets[i]
56
+ end
57
+ m.yield_startcmd = lambda { |p| puts "#{command_to_target[p.command.object_id]}: starting" } if options[:verbose]
58
+ m.yield_wait = lambda { |p| puts "#{p.success? ? 'SUCCESS' : 'FAILURE'} #{command_to_target[p.command.object_id]}: '#{p.stdout_buf}'" }
59
+ # m.yield_proc_timeout = lambda { |p| puts "am killing #{p.inspect}"}
60
+
61
+ m.perchild_timeout = options[:timeout].to_i
62
+ m.global_timeout = options[:global_timeout].to_i
63
+ m.maxflight = options[:maxflight].to_i
64
+ m.verbose = options[:verbose]
65
+ m.debug = options[:debug]
66
+
67
+ result = m.run
68
+
69
+ if options[:collapse]
70
+ # print a collapsed summary
71
+ stdout_matches_success = Hash.new
72
+ stdout_matches_failure = Hash.new
73
+ result.each do |r|
74
+ if r[:retval].success?
75
+ stdout_matches_success[r[:stdout_buf]] = [] if stdout_matches_success[r[:stdout_buf]].nil?
76
+ stdout_matches_success[r[:stdout_buf]] << command_to_target[r[:command].object_id]
77
+ else
78
+ stdout_matches_failure[r[:stdout_buf]] = [] if stdout_matches_failure[r[:stdout_buf]].nil?
79
+ stdout_matches_failure[r[:stdout_buf]] << command_to_target[r[:command].object_id]
80
+ end
81
+ end
82
+ # output => [targets ...]
83
+ stdout_matches_success.each_pair do |k,v|
84
+ puts "SUCCESS: #{v.join ','}: #{k}"
85
+ end
86
+ stdout_matches_failure.each_pair do |k,v|
87
+ puts "FAILURE: #{v.join ','}: #{k}"
88
+ end
89
+ else
90
+ # we already printed while in-flight; do nothing
91
+ # not collapse, print one per host
92
+ # result.each do |r|
93
+ # target = command_to_target[r[:command].object_id]
94
+ # puts "#{target}: '#{r[:stdout_buf].chomp}'\n"
95
+ # end
96
+ end
97
+
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'pp'
5
+ require 'mcmd'
6
+ require 'rangeclient'
7
+
8
+ require 'optparse'
9
+ options = {
10
+ :maxflight => 50,
11
+ :timeout => 60,
12
+ :global_timeout => 600,
13
+ }
14
+ optparse = OptionParser.new do |opts|
15
+ opts.on('-r', '--range RANGE', 'currently takes a CSV list') do |arg|
16
+ options[:range] = arg
17
+ end
18
+ opts.on('-m', '--maxflight 50', 'How many subprocesses? 50 by default') do |arg|
19
+ options[:maxflight] = arg
20
+ end
21
+ opts.on('-t', '--timeout 60', 'How many seconds may each individual process take? 0 for no timeout') do |arg|
22
+ options[:timeout] = arg
23
+ end
24
+ opts.on('-g', '--global_timeout 600', 'How many seconds for the whole shebang 0 for no timeout') do |arg|
25
+ options[:global_timeout] = arg
26
+ end
27
+ opts.on('-c', '--collapse', "Collapse similar output ") do |arg|
28
+ options[:collapse] = arg
29
+ end
30
+ opts.on('-v', '--verbose', "verbose ") do |arg|
31
+ options[:verbose] = arg
32
+ end
33
+ opts.on('-d', '--debug', "Debug output") do |arg|
34
+ options[:debug] = arg
35
+ end
36
+ # option to merge stdin/stdout into one buf?
37
+ # option to ignore as-we-go yield output
38
+ end
39
+ optparse.parse!
40
+
41
+ raise "Error, need -r argument" if options[:range].nil? or options[:range].empty?
42
+ raise "Error, need command to run" if ARGV.size.zero?
43
+ raise "Error, too many arguments" if ARGV.size != 1
44
+
45
+
46
+ range = Range::Client.new
47
+
48
+ m = MultipleCmd.new
49
+
50
+ targets = range.expand options[:range]
51
+
52
+ m.commands = targets.map { |t| ["/usr/bin/ssh", "-2", "-oPasswordAuthentication=no", "-A", t].push ARGV.first }
53
+ command_to_target = Hash.new
54
+ targets.size.times do |i|
55
+ command_to_target[m.commands[i].object_id] = targets[i]
56
+ end
57
+ m.yield_startcmd = lambda { |p| puts "#{command_to_target[p.command.object_id]}: starting" } if options[:verbose]
58
+ m.yield_wait = lambda { |p| puts "#{command_to_target[p.command.object_id]}: finished" } if options[:verbose]
59
+
60
+ m.perchild_timeout = options[:timeout].to_i
61
+ m.global_timeout = options[:global_timeout].to_i
62
+ m.maxflight = options[:maxflight].to_i
63
+ m.verbose = options[:verbose]
64
+ m.debug = options[:debug]
65
+
66
+ result = m.run
67
+
68
+ if options[:collapse]
69
+ # print a collapsed summary
70
+ stdout_matches = Hash.new
71
+ result.each do |r|
72
+ stdout_matches[r[:stdout_buf]] = [] if stdout_matches[r[:stdout_buf]].nil?
73
+ stdout_matches[r[:stdout_buf]] << command_to_target[r[:command].object_id]
74
+ end
75
+ # output => [targets ...]
76
+ stdout_matches.each_pair do |k,v|
77
+ hosts = range.compress v
78
+ puts "#{hosts}: '#{k.chomp}'"
79
+ end
80
+ else
81
+ # not collapse, print one per host
82
+ result.each do |r|
83
+ target = command_to_target[r[:command].object_id]
84
+ puts "#{target}: #{r[:retval] == 0 ? 'SUCCESS:':'FAILURE:'} '#{r[:stdout_buf].chomp}'\n"
85
+ end
86
+ end
@@ -0,0 +1,281 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'pp'
4
+
5
+ class MultipleCmd
6
+
7
+ attr_accessor :global_timeout, :maxflight, :perchild_timeout, :commands
8
+ attr_accessor :yield_wait, :yield_startcmd, :debug, :yield_proc_timeout
9
+ attr_accessor :verbose, :poll_period, :max_read_size
10
+
11
+ def initialize
12
+ # these are re-initialized after every run
13
+ @subproc_by_pid = Hash.new
14
+ @subproc_by_fd = Hash.new
15
+ @processed_commands = []
16
+ # end items which are re-initialized
17
+
18
+ self.commands = []
19
+ self.perchild_timeout = 60
20
+ self.global_timeout = 0
21
+ self.maxflight = 200
22
+ self.debug = false
23
+ self.poll_period = 0.5 # shouldn't need adjusting
24
+ self.max_read_size = 2 ** 19 # 512k
25
+ end
26
+
27
+ def noshell_exec(cmd)
28
+ if cmd.length == 1
29
+ Kernel.exec([cmd[0], cmd[0]])
30
+ else
31
+ Kernel.exec([cmd[0], cmd[0]], *cmd[1..-1])
32
+ end
33
+ end
34
+
35
+ # I should probably move this whole method
36
+ # into SubProc and make the subproc_by_* into
37
+ # class variables
38
+ def add_subprocess(cmd)
39
+ stdin_rd, stdin_wr = IO.pipe
40
+ stdout_rd, stdout_wr = IO.pipe
41
+ stderr_rd, stderr_wr = IO.pipe
42
+ subproc = MultipleCmd::SubProc.new
43
+ subproc.stdin_fd = stdin_wr
44
+ subproc.stdout_fd = stdout_rd
45
+ subproc.stderr_fd = stderr_rd
46
+ subproc.command = cmd
47
+
48
+ pid = fork
49
+ if not pid.nil?
50
+ # parent
51
+ # for mapping to subproc by pid
52
+ subproc.pid = pid
53
+ @subproc_by_pid[pid] = subproc
54
+ # for mapping to subproc by i/o handle (returned from select)
55
+ @subproc_by_fd[stdin_rd] = subproc
56
+ @subproc_by_fd[stdin_wr] = subproc
57
+ @subproc_by_fd[stdout_rd] = subproc
58
+ @subproc_by_fd[stdout_wr] = subproc
59
+ @subproc_by_fd[stderr_rd] = subproc
60
+ @subproc_by_fd[stderr_wr] = subproc
61
+
62
+ self.yield_startcmd.call(subproc) unless self.yield_startcmd.nil?
63
+ else
64
+ # child
65
+ # setup stdin, out, err
66
+ STDIN.reopen(stdin_rd)
67
+ STDOUT.reopen(stdout_wr)
68
+ STDERR.reopen(stderr_wr)
69
+ noshell_exec(cmd)
70
+ raise "can't be reached!!. exec failed!!"
71
+ end
72
+ end
73
+
74
+ def process_read_fds(read_fds)
75
+ read_fds.each do |fd|
76
+ # read available bytes, add to the subproc's read buf
77
+ if not @subproc_by_fd.has_key?(fd)
78
+ raise "Select returned a fd which I have not seen! fd: #{fd.inspect}"
79
+ end
80
+ subproc = @subproc_by_fd[fd]
81
+ buf = ""
82
+ begin
83
+ buf = fd.sysread(4096)
84
+
85
+ if buf.nil?
86
+ raise " Impossible result from sysread()"
87
+ end
88
+ # no exception? bytes were read. append them.
89
+ if fd == subproc.stdout_fd
90
+ subproc.stdout_buf << buf
91
+ # FIXME if we've read > maxbuf, allow closing/ignoring the fd instead of hard kill
92
+ if subproc.stdout_buf.bytesize > self.max_read_size
93
+ # self.kill_process(subproc) # can't kill this here, need a way to mark-to-kill
94
+ end
95
+ elsif fd == subproc.stderr_fd
96
+ subproc.stderr_buf << buf
97
+ # FIXME if we've read > maxbuf, allow closing/ignoring the fd instead of hard kill
98
+ if subproc.stderr_buf.bytesize > self.max_read_size
99
+ # self.kill_process(subproc) # "" above
100
+ end
101
+ end
102
+ rescue SystemCallError, EOFError => ex
103
+ puts "DEBUG: saw read exception #{ex}" if self.debug
104
+ # clear out the read fd for this subproc
105
+ # finalize read i/o
106
+ # if we're reading, it was the process's stdout or stderr
107
+ if fd == subproc.stdout_fd
108
+ subproc.stdout_fd = nil
109
+ elsif fd == subproc.stderr_fd
110
+ subproc.stderr_fd = nil
111
+ else
112
+ raise "impossible: operating on a subproc where the fd isn't found, even though it's mapped"
113
+ end
114
+ fd.close rescue true
115
+ end
116
+ end
117
+ end # process_read_fds()
118
+ def process_write_fds(write_fds)
119
+ write_fds.each do |fd|
120
+ raise "working on an unknown fd #{fd}" unless @subproc_by_fd.has_key?(fd)
121
+ subproc = @subproc_by_fd[fd]
122
+ buf = ""
123
+ # add writing here, todo. not core feature
124
+ end
125
+ end
126
+ def process_err_fds(err_fds)
127
+ end
128
+
129
+ # iterate and service fds in child procs, collect data and status
130
+ def service_subprocess_io
131
+ write_fds = @subproc_by_pid.values.select {|x| not x.stdin_fd.nil? and not x.terminated}.map {|x| x.stdin_fd}
132
+ read_fds = @subproc_by_pid.values.select {|x| not x.terminated}.map {|x| [x.stdout_fd, x.stderr_fd].select {|x| not x.nil? } }.flatten
133
+
134
+ read_fds, write_fds, err_fds = IO.select(read_fds, write_fds, nil, self.poll_period)
135
+
136
+ self.process_read_fds(read_fds) unless read_fds.nil?
137
+ self.process_write_fds(write_fds) unless write_fds.nil?
138
+ self.process_err_fds(err_fds) unless err_fds.nil?
139
+ # errors?
140
+ end
141
+
142
+ def process_timeouts
143
+ now = Time.now.to_i
144
+ @subproc_by_pid.values.each do |p|
145
+ if (now - p.time_start) > self.perchild_timeout
146
+ # expire this child process
147
+
148
+ self.yield_proc_timeout.call(p) unless self.yield_proc_timeout.nil?
149
+ self.kill_process(p)
150
+ end
151
+ end
152
+ end
153
+
154
+ def kill_process(p)
155
+ # do not remove from pid list until waited on
156
+ @subproc_by_fd.delete(p.stdin_fd)
157
+ @subproc_by_fd.delete(p.stdout_fd)
158
+ @subproc_by_fd.delete(p.stderr_fd)
159
+ # must kill after deleting from maps
160
+ # kill closes fds
161
+ p.kill
162
+ end
163
+
164
+ def run
165
+ @global_time_start = Time.now.to_i
166
+ done = false
167
+ while not done
168
+ # start up as many as maxflight processes
169
+ while @subproc_by_pid.length < self.maxflight and not @commands.empty?
170
+ # take one from @commands and start it
171
+ commands = @commands.shift
172
+ self.add_subprocess(commands)
173
+ end
174
+ # service running processes
175
+ self.service_subprocess_io
176
+ # timeout overdue processes
177
+ self.process_timeouts
178
+ # service process cleanup
179
+ self.wait
180
+ puts "have #{@subproc_by_pid.length} left to go" if self.debug
181
+ # if we have nothing in flight (active pid)
182
+ # and nothing pending on the input list
183
+ # then we're done
184
+ if @subproc_by_pid.length.zero? and @commands.empty?
185
+ done = true
186
+ end
187
+ end
188
+
189
+ data = self.return_rundata
190
+ # these are re-initialized after every run
191
+ @subproc_by_pid = Hash.new
192
+ @subproc_by_fd = Hash.new
193
+ @processed_commands = []
194
+ # end items which are re-initialized
195
+ return data
196
+ end
197
+
198
+ def return_rundata
199
+ data = []
200
+ @processed_commands.each do |c|
201
+ #FIXME pass through the process object
202
+ data << {
203
+ :pid => c.pid,
204
+ :write_buf_position => c.write_buf_position,
205
+ :stdout_buf => c.stdout_buf,
206
+ :stderr_buf => c.stderr_buf,
207
+ :command => c.command,
208
+ :time_start => c.time_start,
209
+ :time_end => c.time_end,
210
+ :retval => c.retval,
211
+ }
212
+ end
213
+ return data
214
+ end
215
+
216
+ def wait
217
+ possible_children = true
218
+ just_reaped = Array.new
219
+ while possible_children
220
+ begin
221
+ pid = Process::waitpid(-1, Process::WNOHANG)
222
+ if pid.nil?
223
+ possible_children = false
224
+ else
225
+ # pid is now gone. remove from subproc_by_pid and
226
+ # add to the processed commands list
227
+ p = @subproc_by_pid[pid]
228
+ p.time_end = Time.now.to_i
229
+ p.retval = $?
230
+ @subproc_by_pid.delete(pid)
231
+ @processed_commands << p
232
+ just_reaped << p
233
+ end
234
+ rescue Errno::ECHILD => ex
235
+ # ECHILD. ignore.
236
+ possible_children = false
237
+ end
238
+ end
239
+ # We may have waited on a child before reading all its output. Collect those missing bits. No blocking.
240
+ if not just_reaped.empty?
241
+ read_fds = just_reaped.select {|x| not x.terminated}.map {|x| [x.stdout_fd, x.stderr_fd].select {|x| not x.nil? } }.flatten
242
+ read_fds, write_fds, err_fds = IO.select(read_fds, nil, nil, 0)
243
+ self.process_read_fds(read_fds) unless read_fds.nil?
244
+ end
245
+ just_reaped.each do |p|
246
+ self.yield_wait.call(p) unless self.yield_wait.nil?
247
+ end
248
+ end
249
+
250
+ end
251
+
252
+ class MultipleCmd::SubProc
253
+ attr_accessor :stdin_fd, :stdout_fd, :stderr_fd, :write_buf_position
254
+ attr_accessor :time_start, :time_end, :pid, :retval, :stdout_buf, :stderr_buf, :command, :terminated
255
+
256
+ def initialize
257
+ self.write_buf_position = 0
258
+ self.time_start = Time.now.to_i
259
+ self.stdout_buf = ""
260
+ self.stderr_buf = ""
261
+ self.terminated = false
262
+ end
263
+
264
+ # when a process has out-stayed its welcome
265
+ def kill
266
+ self.stdin_fd.close rescue true
267
+ self.stdout_fd.close rescue true
268
+ self.stderr_fd.close rescue true
269
+ #TODO configurable sig?
270
+ Process::kill("KILL", self.pid)
271
+ self.terminated = true
272
+ end
273
+
274
+
275
+ # some heuristic to determine if this job was successful
276
+ # for now, trust retval. Also check stderr?
277
+ def success?
278
+ self.retval.success?
279
+ end
280
+ end
281
+
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mssh
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Evan Miller
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-05-16 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: json
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: rangeclient
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ hash: 3
43
+ segments:
44
+ - 0
45
+ version: "0"
46
+ type: :runtime
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: rdoc
50
+ prerelease: false
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ hash: 3
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ type: :runtime
61
+ version_requirements: *id003
62
+ description: Simple library for running jobs and sshing to many hosts at once.
63
+ email:
64
+ - github@squareup.com
65
+ executables:
66
+ - mssh
67
+ - mcmd
68
+ extensions: []
69
+
70
+ extra_rdoc_files:
71
+ - LICENSE.md
72
+ files:
73
+ - lib/mcmd.rb
74
+ - bin/mcmd
75
+ - bin/mssh
76
+ - README.md
77
+ - LICENSE.md
78
+ homepage: http://github.com/square/prodeng
79
+ licenses: []
80
+
81
+ post_install_message:
82
+ rdoc_options:
83
+ - --charset=UTF-8
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ none: false
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ hash: 3
92
+ segments:
93
+ - 0
94
+ version: "0"
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ none: false
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ hash: 23
101
+ segments:
102
+ - 1
103
+ - 3
104
+ - 6
105
+ version: 1.3.6
106
+ requirements: []
107
+
108
+ rubyforge_project:
109
+ rubygems_version: 1.8.24
110
+ signing_key:
111
+ specification_version: 3
112
+ summary: Parallel ssh and command execution.
113
+ test_files: []
114
+