jetpants 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,396 @@
1
+ require 'net/ssh'
2
+ require 'socket'
3
+
4
+ module Jetpants
5
+
6
+ # Encapsulates a UNIX server that we can SSH to as root. Maintains a pool of SSH
7
+ # connections to the host as needed.
8
+ class Host
9
+ include CallbackHandler
10
+
11
+ @@all_hosts = {}
12
+ @@all_hosts_mutex = Mutex.new
13
+
14
+ # IP address of the Host, as a string.
15
+ attr_reader :ip
16
+
17
+ # We override Host.new so that attempting to create a duplicate Host object
18
+ # (that is, one with the same IP as an existing Host object) returns the
19
+ # original object.
20
+ def self.new(ip)
21
+ @@all_hosts_mutex.synchronize do
22
+ @@all_hosts[ip] = nil unless @@all_hosts[ip].is_a? self
23
+ @@all_hosts[ip] ||= super
24
+ end
25
+ end
26
+
27
+ def initialize(ip)
28
+ @ip = ip
29
+ @connection_pool = [] # array of idle Net::SSH::Connection::Session objects
30
+ @lock = Mutex.new
31
+ @available = nil
32
+ end
33
+
34
+ # Returns a Host object for the machine Jetpants is running on.
35
+ def self.local(interface='bond0')
36
+ # This technique is adapted from Sergio Rubio Gracia's, described at
37
+ # http://blog.frameos.org/2006/12/09/getting-network-interface-addresses-using-ioctl-pure-ruby-2/
38
+ sock = Socket.new(Socket::AF_INET, Socket::SOCK_DGRAM,0)
39
+ buf = [interface, ""].pack('a16h16')
40
+ sock.ioctl(0x8915, buf) # SIOCGIFADDR
41
+ sock.close
42
+ ip_string = buf[20...24].unpack('C*').join '.'
43
+ self.new(ip_string)
44
+ end
45
+
46
+ # Returns a Net::SSH::Connection::Session for the host. Verifies that the
47
+ # connection is working before returning it.
48
+ def get_ssh_connection
49
+ conn = nil
50
+ attempts = 0
51
+ 5.times do |attempt|
52
+ @lock.synchronize do
53
+ if @connection_pool.count > 0
54
+ conn = @connection_pool.shift
55
+ end
56
+ end
57
+ unless conn
58
+ params = {
59
+ :paranoid => false,
60
+ :user_known_hosts_file => '/dev/null',
61
+ :timeout => 5,
62
+ }
63
+ params[:keys] = Jetpants.ssh_keys if Jetpants.ssh_keys
64
+ begin
65
+ @lock.synchronize do
66
+ conn = Net::SSH.start(@ip, 'root', params)
67
+ end
68
+ rescue => ex
69
+ output "Unable to SSH on attempt #{attempt + 1}: #{ex.to_s}"
70
+ conn = nil
71
+ next
72
+ end
73
+ end
74
+
75
+ # Confirm that the connection works
76
+ if conn
77
+ begin
78
+ result = conn.exec!('echo ping').strip
79
+ raise "Unexpected result" unless result == 'ping'
80
+ @available = true
81
+ return conn
82
+ rescue
83
+ output "Discarding nonfunctional SSH connection"
84
+ conn = nil
85
+ end
86
+ end
87
+ end
88
+ @available = false
89
+ raise "Unable to obtain working SSH connection to #{self} after 5 attempts"
90
+ end
91
+
92
+ # Adds a Net::SSH::Connection::Session to a pool of idle persistent connections.
93
+ def save_ssh_connection(conn)
94
+ conn.exec! 'cd ~'
95
+ @lock.synchronize do
96
+ @connection_pool << conn
97
+ end
98
+ rescue
99
+ output "Discarding nonfunctional SSH connection"
100
+ end
101
+
102
+ # Execute the given UNIX command string (or array of strings) as root via SSH.
103
+ # By default, if something is wrong with the SSH connection, the command
104
+ # will be attempted up to 3 times before an exception is thrown. Be sure
105
+ # to set this to 1 or false for commands that are not idempotent.
106
+ # Returns the result of the command executed. If cmd was an array of strings,
107
+ # returns the result of the LAST command executed.
108
+ def ssh_cmd(cmd, attempts=3)
109
+ attempts ||= 1
110
+ conn = get_ssh_connection
111
+ cmd = [cmd] unless cmd.is_a? Array
112
+ result = nil
113
+ cmd.each do |c|
114
+ failures = 0
115
+ begin
116
+ result = conn.exec! c
117
+ rescue
118
+ failures += 1
119
+ raise if failures >= attempts
120
+ output "Command \"#{c}\" failed, re-trying after delay"
121
+ sleep(failures)
122
+ retry
123
+ end
124
+ end
125
+ save_ssh_connection conn
126
+ return result
127
+ end
128
+
129
+ # Shortcut for use when a command is not idempotent and therefore
130
+ # isn't safe to retry if something goes wonky with the SSH connection.
131
+ def ssh_cmd!(cmd)
132
+ ssh_cmd cmd, false
133
+ end
134
+
135
+ # Confirm that something is listening on the given port. The timeout param
136
+ # indicates how long to wait (in seconds) for a process to be listening.
137
+ def confirm_listening_on_port(port, timeout=10)
138
+ checker_th = Thread.new { ssh_cmd "while [[ `netstat -ln | grep #{port} | wc -l` -lt 1 ]] ; do sleep 1; done" }
139
+ raise "Nothing is listening on #{@ip}:#{port} after #{timeout} seconds" unless checker_th.join(timeout)
140
+ true
141
+ end
142
+
143
+ # Returns true if the host is accessible via SSH, false otherwise
144
+ def available?
145
+ # If we haven't tried an ssh command yet, @available will be nil. Running
146
+ # a first no-op command will populate it to true or false.
147
+ if @available.nil?
148
+ ssh_cmd 'echo ping' rescue nil
149
+ end
150
+ @available
151
+ end
152
+
153
+ ###### ini file manipulation ###############################################
154
+
155
+ # Comments-out lines of an ini file beginning with any of the supplied prefixes
156
+ def comment_out_ini(file, *prefixes)
157
+ toggle_ini(file, prefixes, false)
158
+ end
159
+
160
+ # Un-comments-out lines of an ini file beginning with any of the supplied prefixes
161
+ # The prefixes should NOT include the # comment-out character -- ie, pass them
162
+ # the same as you would to DB#comment_out_ini
163
+ def uncomment_out_ini(file, *prefixes)
164
+ toggle_ini(file, prefixes, true)
165
+ end
166
+
167
+ # Comments-out (if enable is true) or un-comments-out (if enable is false) lines of an ini file.
168
+ def toggle_ini(file, prefixes, enable)
169
+ prefixes.flatten!
170
+ commands = []
171
+ prefixes.each do |setting|
172
+ if enable
173
+ search = '^#(\s*%s\s*(?:=.*)?)$' % setting
174
+ replace = '\1'
175
+ else
176
+ search = '^(\s*%s\s*(?:=.*)?)$' % setting
177
+ replace = '#\1'
178
+ end
179
+ commands << "ruby -i -pe 'sub(%r[#{search}], %q[#{replace}])' #{file}"
180
+ end
181
+ cmd_line = commands.join '; '
182
+ ssh_cmd cmd_line
183
+ end
184
+
185
+
186
+ ###### Directory Copying / Listing / Comparison methods ####################
187
+
188
+ # Quickly and efficiently recursively copies a directory to one or more target hosts.
189
+ # Requires that pigz is installed on source (self) and all targets.
190
+ # base_dir:: is base directory to copy from the source (self). Also the default destination base
191
+ # directory on the targets, if not supplied via next param.
192
+ # targets:: is one of the following:
193
+ # * Host object, or any object that delegates method_missing to a Host (such as DB)
194
+ # * array of Host objects (or delegates)
195
+ # * hash mapping Host objects (or delegates) to destination base directory overrides (as string)
196
+ # options:: is a hash that can contain --
197
+ # * :files => only copy these filenames instead of entire base_dir. String, or Array of Strings.
198
+ # * :port => port number to use for netcat. defaults to 7000 if omitted.
199
+ # * :overwrite => if true, don't raise an exception if the base_dir is non-empty or :files exist. default false.
200
+ def fast_copy_chain(base_dir, targets, options={})
201
+ # Normalize the filesnames param so it is an array
202
+ filenames = options[:files] || ['.']
203
+ filenames = [filenames] unless filenames.respond_to?(:each)
204
+
205
+ # Normalize the targets param, so that targets is an array of Hosts and
206
+ # destinations is a hash of hosts => dirs
207
+ destinations = {}
208
+ targets = [targets] unless targets.respond_to?(:each)
209
+ base_dir += '/' unless base_dir[-1] == '/'
210
+ if targets.is_a? Hash
211
+ destinations = targets
212
+ destinations.each {|t, d| destinations[t] += '/' unless d[-1] == '/'}
213
+ targets = targets.keys
214
+ else
215
+ destinations = targets.inject({}) {|memo, target| memo[target] = base_dir; memo}
216
+ end
217
+ raise "No target hosts supplied" if targets.count < 1
218
+
219
+ file_list = filenames.join ' '
220
+ port = (options[:port] || 7000).to_i
221
+
222
+ # On each destination host, do any initial setup (and optional validation/erasing),
223
+ # and then listen for new files. If there are multiple destination hosts, all of them
224
+ # except the last will use tee to "chain" the copy along to the next machine.
225
+ workers = []
226
+ targets.reverse.each_with_index do |t, i|
227
+ dir = destinations[t]
228
+ raise "Directory #{t}:#{dir} looks suspicious" if dir.include?('..') || dir.include?('./') || dir == '/' || dir == ''
229
+
230
+ t.confirm_installed 'pigz'
231
+ t.ssh_cmd "mkdir -p #{dir}"
232
+
233
+ # Check if contents already exist / non-empty.
234
+ # Note: doesn't do recursive scan of subdirectories
235
+ unless options[:overwrite]
236
+ all_paths = filenames.map {|f| dir + f}.join ' '
237
+ dirlist = t.dir_list(all_paths)
238
+ dirlist.each {|name, size| raise "File #{name} exists on destination and has nonzero size!" if size.to_i > 0}
239
+ end
240
+
241
+ if i == 0
242
+ workers << Thread.new { t.ssh_cmd "cd #{dir} && nc -l #{port} | pigz -d | tar xvf -" }
243
+ t.confirm_listening_on_port port
244
+ t.output "Listening with netcat."
245
+ else
246
+ tt = targets.reverse[i-1]
247
+ fifo = "fifo#{port}"
248
+ workers << Thread.new { t.ssh_cmd "cd #{dir} && mkfifo #{fifo} && nc #{tt.ip} #{port} <#{fifo} && rm #{fifo}" }
249
+ checker_th = Thread.new { t.ssh_cmd "while [ ! -p #{dir}/#{fifo} ] ; do sleep 1; done" }
250
+ raise "FIFO not found on #{t} after 10 tries" unless checker_th.join(10)
251
+ workers << Thread.new { t.ssh_cmd "cd #{dir} && nc -l #{port} | tee #{fifo} | pigz -d | tar xvf -" }
252
+ t.confirm_listening_on_port port
253
+ t.output "Listening with netcat, and chaining to #{tt}."
254
+ end
255
+ end
256
+
257
+ # Start the copy chain.
258
+ confirm_installed 'pigz'
259
+ output "Sending files over to #{targets[0]}: #{file_list}"
260
+ ssh_cmd "cd #{base_dir} && tar vc #{file_list} | pigz | nc #{targets[0].ip} #{port}"
261
+ workers.each {|th| th.join}
262
+ output "File copy complete."
263
+
264
+ # Verify
265
+ output "Verifying file sizes and types on all destinations."
266
+ compare_dir base_dir, destinations, options
267
+ output "Verification successful."
268
+ end
269
+
270
+ # Given the name of a directory or single file, returns a hash of filename => size of each file present.
271
+ # Subdirectories will be returned with a size of '/', so you can process these differently as needed.
272
+ # WARNING: This is brittle. It parses output of "ls". If anyone has a gem to do better remote file
273
+ # management via ssh, then please by all means send us a pull request!
274
+ def dir_list(dir)
275
+ ls_out = ssh_cmd "ls --color=never -1AgGF #{dir}" # disable color, 1 file per line, all but . and .., hide owner+group, include type suffix
276
+ result = {}
277
+ ls_out.split("\n").each do |line|
278
+ next unless matches = line.match(/^[\w-]+\s+\d+\s+(?<size>\d+).*(?:\d\d:\d\d|\d{4})\s+(?<name>.*)$/)
279
+ file_name = matches[:name]
280
+ file_name = file_name[0...-1] if file_name =~ %r![*/=>@|]$!
281
+ result[file_name.split('/')[-1]] = (matches[:name][-1] == '/' ? '/' : matches[:size].to_i)
282
+ end
283
+ result
284
+ end
285
+
286
+ # Compares file existence and size between hosts. Param format identical to
287
+ # the first three params of Host#fast_copy_chain, except only supported option
288
+ # is :files.
289
+ # Raises an exception if the files don't exactly match, otherwise returns true.
290
+ def compare_dir(base_dir, targets, options={})
291
+ # Normalize the filesnames param so it is an array
292
+ filenames = options[:files] || ['.']
293
+ filenames = [filenames] unless filenames.respond_to?(:each)
294
+
295
+ # Normalize the targets param, so that targets is an array of Hosts and
296
+ # destinations is a hash of hosts => dirs
297
+ destinations = {}
298
+ targets = [targets] unless targets.respond_to?(:each)
299
+ base_dir += '/' unless base_dir[-1] == '/'
300
+ if targets.is_a? Hash
301
+ destinations = targets
302
+ destinations.each {|t, d| destinations[t] += '/' unless d[-1] == '/'}
303
+ targets = targets.keys
304
+ else
305
+ destinations = targets.inject({}) {|memo, target| memo[target] = base_dir; memo}
306
+ end
307
+ raise "No target hosts supplied" if targets.count < 1
308
+
309
+ queue = filenames.map {|f| ['', f]} # array of [subdir, filename] pairs
310
+ while (tuple = queue.shift)
311
+ subdir, filename = tuple
312
+ source_dirlist = dir_list(base_dir + subdir + filename)
313
+ destinations.each do |target, path|
314
+ target_dirlist = target.dir_list(path + subdir + filename)
315
+ source_dirlist.each do |name, size|
316
+ target_size = target_dirlist[name] || 'MISSING'
317
+ raise "Directory listing mismatch when comparing #{self}:#{base_dir}#{subdir}#{filename}/#{name} to #{target}:#{path}#{subdir}#{filename}/#{name} (size: #{size} vs #{target_size})" unless size == target_size
318
+ end
319
+ end
320
+ queue.concat(source_dirlist.map {|name, size| size == '/' ? [subdir + '/' + name, '/'] : nil}.compact)
321
+ end
322
+ end
323
+
324
+ # Recursively computes size of files in dir
325
+ def dir_size(dir)
326
+ total_size = 0
327
+ dir_list(dir).each do |name, size|
328
+ total_size += (size == '/' ? dir_size(dir + '/' + name) : size.to_i)
329
+ end
330
+ total_size
331
+ end
332
+
333
+
334
+ ###### Misc methods ########################################################
335
+
336
+ # Performs the given operation ('start', 'stop', 'restart') on the specified
337
+ # service. Default implementation assumes RedHat/CentOS style /sbin/service.
338
+ # If you're using a distibution or OS that does not support /sbin/service,
339
+ # override this method with a plugin.
340
+ def service(operation, name)
341
+ ssh_cmd "/sbin/service #{name} #{operation.to_s}"
342
+ end
343
+
344
+ # Changes the I/O scheduler to name (such as 'deadline', 'noop', 'cfq')
345
+ # for the specified device.
346
+ def set_io_scheduler(name, device='sda')
347
+ output "Setting I/O scheduler for #{device} to #{name}."
348
+ ssh_cmd "echo '#{name}' >/sys/block/#{device}/queue/scheduler"
349
+ end
350
+
351
+ # Confirms that the specified binary is installed and on the shell path.
352
+ def confirm_installed(program_name)
353
+ out = ssh_cmd "which #{program_name}"
354
+ raise "#{program_name} not installed, or missing from path" if out =~ /no #{program_name} in /
355
+ true
356
+ end
357
+
358
+ # Returns number of cores on machine. (reflects virtual cores if hyperthreading
359
+ # enabled, so might be 2x real value in that case.)
360
+ # Not currently used by anything in Jetpants base, but might be useful for plugins
361
+ # that want to tailor the concurrency level to the machine's capabilities.
362
+ def cores
363
+ return @cores if @cores
364
+ count = ssh_cmd %q{cat /proc/cpuinfo|grep 'processor\s*:' | wc -l}
365
+ @cores = (count ? count.to_i : 1)
366
+ end
367
+
368
+ # Returns the machine's hostname
369
+ def hostname
370
+ @hostname ||= ssh_cmd('hostname').chomp
371
+ end
372
+
373
+ # Displays the provided output, along with information about the current time,
374
+ # and self (the IP of this Host)
375
+ def output(str)
376
+ str = str.to_s.strip
377
+ str = nil if str && str.length == 0
378
+ str ||= "Completed (no output)"
379
+ output = Time.now.strftime("%H:%M:%S") + " [#{self}] "
380
+ output << str
381
+ print output + "\n"
382
+ output
383
+ end
384
+
385
+ # Returns the host's IP address as a string.
386
+ def to_s
387
+ return @ip
388
+ end
389
+
390
+ # Returns self, since this object is already a Host.
391
+ def to_host
392
+ self
393
+ end
394
+
395
+ end
396
+ end
@@ -0,0 +1,74 @@
1
+ # This file contains any methods we're adding to core Ruby modules
2
+
3
+ # Reopen Enumerable to add some concurrent iterators
4
+ module Enumerable
5
+ # Works like each but runs the block in a separate thread per item.
6
+ def concurrent_each
7
+ collect {|*item| Thread.new {yield *item}}.each {|th| th.join}
8
+ self
9
+ end
10
+
11
+ # Works like map but runs the block in a separate thread per item.
12
+ def concurrent_map
13
+ collect {|*item| Thread.new {yield *item}}.collect {|th| th.value}
14
+ end
15
+
16
+ # Works like each_with_index but runs the block in a separate thread per item.
17
+ def concurrent_each_with_index(&block)
18
+ each_with_index.concurrent_each(&block)
19
+ end
20
+ end
21
+
22
+ # Add Jetpants-specific conversion methods to Object.
23
+ class Object
24
+ # Converts self to a Jetpants::Host by way of to_s. Only really useful for
25
+ # Strings containing IP addresses, or Objects whose to_string method returns
26
+ # an IP address as a string.
27
+ def to_host
28
+ Jetpants::Host.new(self.to_s)
29
+ end
30
+
31
+ # Converts self to a Jetpants::DB by way of to_s. Only really useful for
32
+ # Strings containing IP addresses, or Objects whose to_string method returns
33
+ # an IP address as a string.
34
+ def to_db
35
+ Jetpants::DB.new(self.to_s)
36
+ end
37
+ end
38
+
39
+ class Range
40
+ # Supply a block taking |chunk_min_id, chunk_max_id|. This will execute the block in
41
+ # parallel chunks, supplying the min and max id (inclusive) of the current
42
+ # chunk. Note that thread_limit is capped at the value of Jetpants.max_concurrency.
43
+ def in_chunks(chunks, thread_limit=40, min_per_chunk=1)
44
+ per_chunk = ((max - min + 1) / chunks).ceil
45
+ per_chunk = min_per_chunk if per_chunk < min_per_chunk
46
+
47
+ min_id_queue = []
48
+ results = []
49
+ min.step(max, per_chunk) {|n| min_id_queue << n}
50
+ min_id_queue.reverse!
51
+
52
+ lock = Mutex.new
53
+ group = ThreadGroup.new
54
+ thread_count = Jetpants.max_concurrency
55
+ thread_count = thread_limit if thread_limit && thread_limit < Jetpants.max_concurrency
56
+ thread_count = min_id_queue.length if min_id_queue.length < thread_count
57
+ thread_count.times do
58
+ th = Thread.new do
59
+ while min_id_queue.length > 0 do
60
+ my_min = nil
61
+ lock.synchronize {my_min = min_id_queue.pop}
62
+ break unless my_min
63
+ my_max = my_min + per_chunk - 1
64
+ my_max = max if my_max > max
65
+ result = yield my_min, my_max
66
+ lock.synchronize {results << result}
67
+ end
68
+ end
69
+ group.add(th)
70
+ end
71
+ group.list.each {|th| th.join}
72
+ results
73
+ end
74
+ end