jetpants 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,396 @@
1
+ require 'net/ssh'
2
+ require 'socket'
3
+
4
+ module Jetpants
5
+
6
+ # Encapsulates a UNIX server that we can SSH to as root. Maintains a pool of SSH
7
+ # connections to the host as needed.
8
+ class Host
9
+ include CallbackHandler
10
+
11
+ @@all_hosts = {}
12
+ @@all_hosts_mutex = Mutex.new
13
+
14
+ # IP address of the Host, as a string.
15
+ attr_reader :ip
16
+
17
+ # We override Host.new so that attempting to create a duplicate Host object
18
+ # (that is, one with the same IP as an existing Host object) returns the
19
+ # original object.
20
+ def self.new(ip)
21
+ @@all_hosts_mutex.synchronize do
22
+ @@all_hosts[ip] = nil unless @@all_hosts[ip].is_a? self
23
+ @@all_hosts[ip] ||= super
24
+ end
25
+ end
26
+
27
+ def initialize(ip)
28
+ @ip = ip
29
+ @connection_pool = [] # array of idle Net::SSH::Connection::Session objects
30
+ @lock = Mutex.new
31
+ @available = nil
32
+ end
33
+
34
+ # Returns a Host object for the machine Jetpants is running on.
35
+ def self.local(interface='bond0')
36
+ # This technique is adapted from Sergio Rubio Gracia's, described at
37
+ # http://blog.frameos.org/2006/12/09/getting-network-interface-addresses-using-ioctl-pure-ruby-2/
38
+ sock = Socket.new(Socket::AF_INET, Socket::SOCK_DGRAM,0)
39
+ buf = [interface, ""].pack('a16h16')
40
+ sock.ioctl(0x8915, buf) # SIOCGIFADDR
41
+ sock.close
42
+ ip_string = buf[20...24].unpack('C*').join '.'
43
+ self.new(ip_string)
44
+ end
45
+
46
+ # Returns a Net::SSH::Connection::Session for the host. Verifies that the
47
+ # connection is working before returning it.
48
+ def get_ssh_connection
49
+ conn = nil
50
+ attempts = 0
51
+ 5.times do |attempt|
52
+ @lock.synchronize do
53
+ if @connection_pool.count > 0
54
+ conn = @connection_pool.shift
55
+ end
56
+ end
57
+ unless conn
58
+ params = {
59
+ :paranoid => false,
60
+ :user_known_hosts_file => '/dev/null',
61
+ :timeout => 5,
62
+ }
63
+ params[:keys] = Jetpants.ssh_keys if Jetpants.ssh_keys
64
+ begin
65
+ @lock.synchronize do
66
+ conn = Net::SSH.start(@ip, 'root', params)
67
+ end
68
+ rescue => ex
69
+ output "Unable to SSH on attempt #{attempt + 1}: #{ex.to_s}"
70
+ conn = nil
71
+ next
72
+ end
73
+ end
74
+
75
+ # Confirm that the connection works
76
+ if conn
77
+ begin
78
+ result = conn.exec!('echo ping').strip
79
+ raise "Unexpected result" unless result == 'ping'
80
+ @available = true
81
+ return conn
82
+ rescue
83
+ output "Discarding nonfunctional SSH connection"
84
+ conn = nil
85
+ end
86
+ end
87
+ end
88
+ @available = false
89
+ raise "Unable to obtain working SSH connection to #{self} after 5 attempts"
90
+ end
91
+
92
+ # Adds a Net::SSH::Connection::Session to a pool of idle persistent connections.
93
+ def save_ssh_connection(conn)
94
+ conn.exec! 'cd ~'
95
+ @lock.synchronize do
96
+ @connection_pool << conn
97
+ end
98
+ rescue
99
+ output "Discarding nonfunctional SSH connection"
100
+ end
101
+
102
+ # Execute the given UNIX command string (or array of strings) as root via SSH.
103
+ # By default, if something is wrong with the SSH connection, the command
104
+ # will be attempted up to 3 times before an exception is thrown. Be sure
105
+ # to set this to 1 or false for commands that are not idempotent.
106
+ # Returns the result of the command executed. If cmd was an array of strings,
107
+ # returns the result of the LAST command executed.
108
+ def ssh_cmd(cmd, attempts=3)
109
+ attempts ||= 1
110
+ conn = get_ssh_connection
111
+ cmd = [cmd] unless cmd.is_a? Array
112
+ result = nil
113
+ cmd.each do |c|
114
+ failures = 0
115
+ begin
116
+ result = conn.exec! c
117
+ rescue
118
+ failures += 1
119
+ raise if failures >= attempts
120
+ output "Command \"#{c}\" failed, re-trying after delay"
121
+ sleep(failures)
122
+ retry
123
+ end
124
+ end
125
+ save_ssh_connection conn
126
+ return result
127
+ end
128
+
129
+ # Shortcut for use when a command is not idempotent and therefore
130
+ # isn't safe to retry if something goes wonky with the SSH connection.
131
+ def ssh_cmd!(cmd)
132
+ ssh_cmd cmd, false
133
+ end
134
+
135
+ # Confirm that something is listening on the given port. The timeout param
136
+ # indicates how long to wait (in seconds) for a process to be listening.
137
+ def confirm_listening_on_port(port, timeout=10)
138
+ checker_th = Thread.new { ssh_cmd "while [[ `netstat -ln | grep #{port} | wc -l` -lt 1 ]] ; do sleep 1; done" }
139
+ raise "Nothing is listening on #{@ip}:#{port} after #{timeout} seconds" unless checker_th.join(timeout)
140
+ true
141
+ end
142
+
143
+ # Returns true if the host is accessible via SSH, false otherwise
144
+ def available?
145
+ # If we haven't tried an ssh command yet, @available will be nil. Running
146
+ # a first no-op command will populate it to true or false.
147
+ if @available.nil?
148
+ ssh_cmd 'echo ping' rescue nil
149
+ end
150
+ @available
151
+ end
152
+
153
+ ###### ini file manipulation ###############################################
154
+
155
+ # Comments-out lines of an ini file beginning with any of the supplied prefixes
156
+ def comment_out_ini(file, *prefixes)
157
+ toggle_ini(file, prefixes, false)
158
+ end
159
+
160
+ # Un-comments-out lines of an ini file beginning with any of the supplied prefixes
161
+ # The prefixes should NOT include the # comment-out character -- ie, pass them
162
+ # the same as you would to DB#comment_out_ini
163
+ def uncomment_out_ini(file, *prefixes)
164
+ toggle_ini(file, prefixes, true)
165
+ end
166
+
167
+ # Comments-out (if enable is true) or un-comments-out (if enable is false) lines of an ini file.
168
+ def toggle_ini(file, prefixes, enable)
169
+ prefixes.flatten!
170
+ commands = []
171
+ prefixes.each do |setting|
172
+ if enable
173
+ search = '^#(\s*%s\s*(?:=.*)?)$' % setting
174
+ replace = '\1'
175
+ else
176
+ search = '^(\s*%s\s*(?:=.*)?)$' % setting
177
+ replace = '#\1'
178
+ end
179
+ commands << "ruby -i -pe 'sub(%r[#{search}], %q[#{replace}])' #{file}"
180
+ end
181
+ cmd_line = commands.join '; '
182
+ ssh_cmd cmd_line
183
+ end
184
+
185
+
186
+ ###### Directory Copying / Listing / Comparison methods ####################
187
+
188
+ # Quickly and efficiently recursively copies a directory to one or more target hosts.
189
+ # Requires that pigz is installed on source (self) and all targets.
190
+ # base_dir:: is base directory to copy from the source (self). Also the default destination base
191
+ # directory on the targets, if not supplied via next param.
192
+ # targets:: is one of the following:
193
+ # * Host object, or any object that delegates method_missing to a Host (such as DB)
194
+ # * array of Host objects (or delegates)
195
+ # * hash mapping Host objects (or delegates) to destination base directory overrides (as string)
196
+ # options:: is a hash that can contain --
197
+ # * :files => only copy these filenames instead of entire base_dir. String, or Array of Strings.
198
+ # * :port => port number to use for netcat. defaults to 7000 if omitted.
199
+ # * :overwrite => if true, don't raise an exception if the base_dir is non-empty or :files exist. default false.
200
+ def fast_copy_chain(base_dir, targets, options={})
201
+ # Normalize the filesnames param so it is an array
202
+ filenames = options[:files] || ['.']
203
+ filenames = [filenames] unless filenames.respond_to?(:each)
204
+
205
+ # Normalize the targets param, so that targets is an array of Hosts and
206
+ # destinations is a hash of hosts => dirs
207
+ destinations = {}
208
+ targets = [targets] unless targets.respond_to?(:each)
209
+ base_dir += '/' unless base_dir[-1] == '/'
210
+ if targets.is_a? Hash
211
+ destinations = targets
212
+ destinations.each {|t, d| destinations[t] += '/' unless d[-1] == '/'}
213
+ targets = targets.keys
214
+ else
215
+ destinations = targets.inject({}) {|memo, target| memo[target] = base_dir; memo}
216
+ end
217
+ raise "No target hosts supplied" if targets.count < 1
218
+
219
+ file_list = filenames.join ' '
220
+ port = (options[:port] || 7000).to_i
221
+
222
+ # On each destination host, do any initial setup (and optional validation/erasing),
223
+ # and then listen for new files. If there are multiple destination hosts, all of them
224
+ # except the last will use tee to "chain" the copy along to the next machine.
225
+ workers = []
226
+ targets.reverse.each_with_index do |t, i|
227
+ dir = destinations[t]
228
+ raise "Directory #{t}:#{dir} looks suspicious" if dir.include?('..') || dir.include?('./') || dir == '/' || dir == ''
229
+
230
+ t.confirm_installed 'pigz'
231
+ t.ssh_cmd "mkdir -p #{dir}"
232
+
233
+ # Check if contents already exist / non-empty.
234
+ # Note: doesn't do recursive scan of subdirectories
235
+ unless options[:overwrite]
236
+ all_paths = filenames.map {|f| dir + f}.join ' '
237
+ dirlist = t.dir_list(all_paths)
238
+ dirlist.each {|name, size| raise "File #{name} exists on destination and has nonzero size!" if size.to_i > 0}
239
+ end
240
+
241
+ if i == 0
242
+ workers << Thread.new { t.ssh_cmd "cd #{dir} && nc -l #{port} | pigz -d | tar xvf -" }
243
+ t.confirm_listening_on_port port
244
+ t.output "Listening with netcat."
245
+ else
246
+ tt = targets.reverse[i-1]
247
+ fifo = "fifo#{port}"
248
+ workers << Thread.new { t.ssh_cmd "cd #{dir} && mkfifo #{fifo} && nc #{tt.ip} #{port} <#{fifo} && rm #{fifo}" }
249
+ checker_th = Thread.new { t.ssh_cmd "while [ ! -p #{dir}/#{fifo} ] ; do sleep 1; done" }
250
+ raise "FIFO not found on #{t} after 10 tries" unless checker_th.join(10)
251
+ workers << Thread.new { t.ssh_cmd "cd #{dir} && nc -l #{port} | tee #{fifo} | pigz -d | tar xvf -" }
252
+ t.confirm_listening_on_port port
253
+ t.output "Listening with netcat, and chaining to #{tt}."
254
+ end
255
+ end
256
+
257
+ # Start the copy chain.
258
+ confirm_installed 'pigz'
259
+ output "Sending files over to #{targets[0]}: #{file_list}"
260
+ ssh_cmd "cd #{base_dir} && tar vc #{file_list} | pigz | nc #{targets[0].ip} #{port}"
261
+ workers.each {|th| th.join}
262
+ output "File copy complete."
263
+
264
+ # Verify
265
+ output "Verifying file sizes and types on all destinations."
266
+ compare_dir base_dir, destinations, options
267
+ output "Verification successful."
268
+ end
269
+
270
+ # Given the name of a directory or single file, returns a hash of filename => size of each file present.
271
+ # Subdirectories will be returned with a size of '/', so you can process these differently as needed.
272
+ # WARNING: This is brittle. It parses output of "ls". If anyone has a gem to do better remote file
273
+ # management via ssh, then please by all means send us a pull request!
274
+ def dir_list(dir)
275
+ ls_out = ssh_cmd "ls --color=never -1AgGF #{dir}" # disable color, 1 file per line, all but . and .., hide owner+group, include type suffix
276
+ result = {}
277
+ ls_out.split("\n").each do |line|
278
+ next unless matches = line.match(/^[\w-]+\s+\d+\s+(?<size>\d+).*(?:\d\d:\d\d|\d{4})\s+(?<name>.*)$/)
279
+ file_name = matches[:name]
280
+ file_name = file_name[0...-1] if file_name =~ %r![*/=>@|]$!
281
+ result[file_name.split('/')[-1]] = (matches[:name][-1] == '/' ? '/' : matches[:size].to_i)
282
+ end
283
+ result
284
+ end
285
+
286
+ # Compares file existence and size between hosts. Param format identical to
287
+ # the first three params of Host#fast_copy_chain, except only supported option
288
+ # is :files.
289
+ # Raises an exception if the files don't exactly match, otherwise returns true.
290
+ def compare_dir(base_dir, targets, options={})
291
+ # Normalize the filesnames param so it is an array
292
+ filenames = options[:files] || ['.']
293
+ filenames = [filenames] unless filenames.respond_to?(:each)
294
+
295
+ # Normalize the targets param, so that targets is an array of Hosts and
296
+ # destinations is a hash of hosts => dirs
297
+ destinations = {}
298
+ targets = [targets] unless targets.respond_to?(:each)
299
+ base_dir += '/' unless base_dir[-1] == '/'
300
+ if targets.is_a? Hash
301
+ destinations = targets
302
+ destinations.each {|t, d| destinations[t] += '/' unless d[-1] == '/'}
303
+ targets = targets.keys
304
+ else
305
+ destinations = targets.inject({}) {|memo, target| memo[target] = base_dir; memo}
306
+ end
307
+ raise "No target hosts supplied" if targets.count < 1
308
+
309
+ queue = filenames.map {|f| ['', f]} # array of [subdir, filename] pairs
310
+ while (tuple = queue.shift)
311
+ subdir, filename = tuple
312
+ source_dirlist = dir_list(base_dir + subdir + filename)
313
+ destinations.each do |target, path|
314
+ target_dirlist = target.dir_list(path + subdir + filename)
315
+ source_dirlist.each do |name, size|
316
+ target_size = target_dirlist[name] || 'MISSING'
317
+ raise "Directory listing mismatch when comparing #{self}:#{base_dir}#{subdir}#{filename}/#{name} to #{target}:#{path}#{subdir}#{filename}/#{name} (size: #{size} vs #{target_size})" unless size == target_size
318
+ end
319
+ end
320
+ queue.concat(source_dirlist.map {|name, size| size == '/' ? [subdir + '/' + name, '/'] : nil}.compact)
321
+ end
322
+ end
323
+
324
+ # Recursively computes size of files in dir
325
+ def dir_size(dir)
326
+ total_size = 0
327
+ dir_list(dir).each do |name, size|
328
+ total_size += (size == '/' ? dir_size(dir + '/' + name) : size.to_i)
329
+ end
330
+ total_size
331
+ end
332
+
333
+
334
+ ###### Misc methods ########################################################
335
+
336
+ # Performs the given operation ('start', 'stop', 'restart') on the specified
337
+ # service. Default implementation assumes RedHat/CentOS style /sbin/service.
338
+ # If you're using a distibution or OS that does not support /sbin/service,
339
+ # override this method with a plugin.
340
+ def service(operation, name)
341
+ ssh_cmd "/sbin/service #{name} #{operation.to_s}"
342
+ end
343
+
344
+ # Changes the I/O scheduler to name (such as 'deadline', 'noop', 'cfq')
345
+ # for the specified device.
346
+ def set_io_scheduler(name, device='sda')
347
+ output "Setting I/O scheduler for #{device} to #{name}."
348
+ ssh_cmd "echo '#{name}' >/sys/block/#{device}/queue/scheduler"
349
+ end
350
+
351
+ # Confirms that the specified binary is installed and on the shell path.
352
+ def confirm_installed(program_name)
353
+ out = ssh_cmd "which #{program_name}"
354
+ raise "#{program_name} not installed, or missing from path" if out =~ /no #{program_name} in /
355
+ true
356
+ end
357
+
358
+ # Returns number of cores on machine. (reflects virtual cores if hyperthreading
359
+ # enabled, so might be 2x real value in that case.)
360
+ # Not currently used by anything in Jetpants base, but might be useful for plugins
361
+ # that want to tailor the concurrency level to the machine's capabilities.
362
+ def cores
363
+ return @cores if @cores
364
+ count = ssh_cmd %q{cat /proc/cpuinfo|grep 'processor\s*:' | wc -l}
365
+ @cores = (count ? count.to_i : 1)
366
+ end
367
+
368
+ # Returns the machine's hostname
369
+ def hostname
370
+ @hostname ||= ssh_cmd('hostname').chomp
371
+ end
372
+
373
+ # Displays the provided output, along with information about the current time,
374
+ # and self (the IP of this Host)
375
+ def output(str)
376
+ str = str.to_s.strip
377
+ str = nil if str && str.length == 0
378
+ str ||= "Completed (no output)"
379
+ output = Time.now.strftime("%H:%M:%S") + " [#{self}] "
380
+ output << str
381
+ print output + "\n"
382
+ output
383
+ end
384
+
385
+ # Returns the host's IP address as a string.
386
+ def to_s
387
+ return @ip
388
+ end
389
+
390
+ # Returns self, since this object is already a Host.
391
+ def to_host
392
+ self
393
+ end
394
+
395
+ end
396
+ end
@@ -0,0 +1,74 @@
1
+ # This file contains any methods we're adding to core Ruby modules
2
+
3
+ # Reopen Enumerable to add some concurrent iterators
4
+ module Enumerable
5
+ # Works like each but runs the block in a separate thread per item.
6
+ def concurrent_each
7
+ collect {|*item| Thread.new {yield *item}}.each {|th| th.join}
8
+ self
9
+ end
10
+
11
+ # Works like map but runs the block in a separate thread per item.
12
+ def concurrent_map
13
+ collect {|*item| Thread.new {yield *item}}.collect {|th| th.value}
14
+ end
15
+
16
+ # Works like each_with_index but runs the block in a separate thread per item.
17
+ def concurrent_each_with_index(&block)
18
+ each_with_index.concurrent_each(&block)
19
+ end
20
+ end
21
+
22
+ # Add Jetpants-specific conversion methods to Object.
23
+ class Object
24
+ # Converts self to a Jetpants::Host by way of to_s. Only really useful for
25
+ # Strings containing IP addresses, or Objects whose to_string method returns
26
+ # an IP address as a string.
27
+ def to_host
28
+ Jetpants::Host.new(self.to_s)
29
+ end
30
+
31
+ # Converts self to a Jetpants::DB by way of to_s. Only really useful for
32
+ # Strings containing IP addresses, or Objects whose to_string method returns
33
+ # an IP address as a string.
34
+ def to_db
35
+ Jetpants::DB.new(self.to_s)
36
+ end
37
+ end
38
+
39
+ class Range
40
+ # Supply a block taking |chunk_min_id, chunk_max_id|. This will execute the block in
41
+ # parallel chunks, supplying the min and max id (inclusive) of the current
42
+ # chunk. Note that thread_limit is capped at the value of Jetpants.max_concurrency.
43
+ def in_chunks(chunks, thread_limit=40, min_per_chunk=1)
44
+ per_chunk = ((max - min + 1) / chunks).ceil
45
+ per_chunk = min_per_chunk if per_chunk < min_per_chunk
46
+
47
+ min_id_queue = []
48
+ results = []
49
+ min.step(max, per_chunk) {|n| min_id_queue << n}
50
+ min_id_queue.reverse!
51
+
52
+ lock = Mutex.new
53
+ group = ThreadGroup.new
54
+ thread_count = Jetpants.max_concurrency
55
+ thread_count = thread_limit if thread_limit && thread_limit < Jetpants.max_concurrency
56
+ thread_count = min_id_queue.length if min_id_queue.length < thread_count
57
+ thread_count.times do
58
+ th = Thread.new do
59
+ while min_id_queue.length > 0 do
60
+ my_min = nil
61
+ lock.synchronize {my_min = min_id_queue.pop}
62
+ break unless my_min
63
+ my_max = my_min + per_chunk - 1
64
+ my_max = max if my_max > max
65
+ result = yield my_min, my_max
66
+ lock.synchronize {results << result}
67
+ end
68
+ end
69
+ group.add(th)
70
+ end
71
+ group.list.each {|th| th.join}
72
+ results
73
+ end
74
+ end