jetpants 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +3 -0
- data/README.rdoc +88 -0
- data/bin/jetpants +442 -0
- data/doc/commands.rdoc +119 -0
- data/doc/configuration.rdoc +27 -0
- data/doc/plugins.rdoc +120 -0
- data/doc/requirements.rdoc +54 -0
- data/etc/jetpants.yaml.sample +58 -0
- data/lib/jetpants.rb +100 -0
- data/lib/jetpants/callback.rb +131 -0
- data/lib/jetpants/db.rb +122 -0
- data/lib/jetpants/db/client.rb +103 -0
- data/lib/jetpants/db/import_export.rb +330 -0
- data/lib/jetpants/db/privileges.rb +89 -0
- data/lib/jetpants/db/replication.rb +226 -0
- data/lib/jetpants/db/server.rb +79 -0
- data/lib/jetpants/db/state.rb +212 -0
- data/lib/jetpants/host.rb +396 -0
- data/lib/jetpants/monkeypatch.rb +74 -0
- data/lib/jetpants/pool.rb +272 -0
- data/lib/jetpants/shard.rb +311 -0
- data/lib/jetpants/table.rb +146 -0
- data/lib/jetpants/topology.rb +144 -0
- data/plugins/simple_tracker/db.rb +23 -0
- data/plugins/simple_tracker/pool.rb +70 -0
- data/plugins/simple_tracker/shard.rb +76 -0
- data/plugins/simple_tracker/simple_tracker.rb +74 -0
- data/plugins/simple_tracker/topology.rb +66 -0
- data/tasks/promotion.rb +260 -0
- metadata +191 -0
@@ -0,0 +1,396 @@
|
|
1
|
+
require 'net/ssh'
|
2
|
+
require 'socket'
|
3
|
+
|
4
|
+
module Jetpants
|
5
|
+
|
6
|
+
# Encapsulates a UNIX server that we can SSH to as root. Maintains a pool of SSH
|
7
|
+
# connections to the host as needed.
|
8
|
+
class Host
|
9
|
+
include CallbackHandler
|
10
|
+
|
11
|
+
@@all_hosts = {}
|
12
|
+
@@all_hosts_mutex = Mutex.new
|
13
|
+
|
14
|
+
# IP address of the Host, as a string.
|
15
|
+
attr_reader :ip
|
16
|
+
|
17
|
+
# We override Host.new so that attempting to create a duplicate Host object
|
18
|
+
# (that is, one with the same IP as an existing Host object) returns the
|
19
|
+
# original object.
|
20
|
+
def self.new(ip)
|
21
|
+
@@all_hosts_mutex.synchronize do
|
22
|
+
@@all_hosts[ip] = nil unless @@all_hosts[ip].is_a? self
|
23
|
+
@@all_hosts[ip] ||= super
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def initialize(ip)
|
28
|
+
@ip = ip
|
29
|
+
@connection_pool = [] # array of idle Net::SSH::Connection::Session objects
|
30
|
+
@lock = Mutex.new
|
31
|
+
@available = nil
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns a Host object for the machine Jetpants is running on.
|
35
|
+
def self.local(interface='bond0')
|
36
|
+
# This technique is adapted from Sergio Rubio Gracia's, described at
|
37
|
+
# http://blog.frameos.org/2006/12/09/getting-network-interface-addresses-using-ioctl-pure-ruby-2/
|
38
|
+
sock = Socket.new(Socket::AF_INET, Socket::SOCK_DGRAM,0)
|
39
|
+
buf = [interface, ""].pack('a16h16')
|
40
|
+
sock.ioctl(0x8915, buf) # SIOCGIFADDR
|
41
|
+
sock.close
|
42
|
+
ip_string = buf[20...24].unpack('C*').join '.'
|
43
|
+
self.new(ip_string)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Returns a Net::SSH::Connection::Session for the host. Verifies that the
|
47
|
+
# connection is working before returning it.
|
48
|
+
def get_ssh_connection
|
49
|
+
conn = nil
|
50
|
+
attempts = 0
|
51
|
+
5.times do |attempt|
|
52
|
+
@lock.synchronize do
|
53
|
+
if @connection_pool.count > 0
|
54
|
+
conn = @connection_pool.shift
|
55
|
+
end
|
56
|
+
end
|
57
|
+
unless conn
|
58
|
+
params = {
|
59
|
+
:paranoid => false,
|
60
|
+
:user_known_hosts_file => '/dev/null',
|
61
|
+
:timeout => 5,
|
62
|
+
}
|
63
|
+
params[:keys] = Jetpants.ssh_keys if Jetpants.ssh_keys
|
64
|
+
begin
|
65
|
+
@lock.synchronize do
|
66
|
+
conn = Net::SSH.start(@ip, 'root', params)
|
67
|
+
end
|
68
|
+
rescue => ex
|
69
|
+
output "Unable to SSH on attempt #{attempt + 1}: #{ex.to_s}"
|
70
|
+
conn = nil
|
71
|
+
next
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Confirm that the connection works
|
76
|
+
if conn
|
77
|
+
begin
|
78
|
+
result = conn.exec!('echo ping').strip
|
79
|
+
raise "Unexpected result" unless result == 'ping'
|
80
|
+
@available = true
|
81
|
+
return conn
|
82
|
+
rescue
|
83
|
+
output "Discarding nonfunctional SSH connection"
|
84
|
+
conn = nil
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
@available = false
|
89
|
+
raise "Unable to obtain working SSH connection to #{self} after 5 attempts"
|
90
|
+
end
|
91
|
+
|
92
|
+
# Adds a Net::SSH::Connection::Session to a pool of idle persistent connections.
|
93
|
+
def save_ssh_connection(conn)
|
94
|
+
conn.exec! 'cd ~'
|
95
|
+
@lock.synchronize do
|
96
|
+
@connection_pool << conn
|
97
|
+
end
|
98
|
+
rescue
|
99
|
+
output "Discarding nonfunctional SSH connection"
|
100
|
+
end
|
101
|
+
|
102
|
+
# Execute the given UNIX command string (or array of strings) as root via SSH.
|
103
|
+
# By default, if something is wrong with the SSH connection, the command
|
104
|
+
# will be attempted up to 3 times before an exception is thrown. Be sure
|
105
|
+
# to set this to 1 or false for commands that are not idempotent.
|
106
|
+
# Returns the result of the command executed. If cmd was an array of strings,
|
107
|
+
# returns the result of the LAST command executed.
|
108
|
+
def ssh_cmd(cmd, attempts=3)
|
109
|
+
attempts ||= 1
|
110
|
+
conn = get_ssh_connection
|
111
|
+
cmd = [cmd] unless cmd.is_a? Array
|
112
|
+
result = nil
|
113
|
+
cmd.each do |c|
|
114
|
+
failures = 0
|
115
|
+
begin
|
116
|
+
result = conn.exec! c
|
117
|
+
rescue
|
118
|
+
failures += 1
|
119
|
+
raise if failures >= attempts
|
120
|
+
output "Command \"#{c}\" failed, re-trying after delay"
|
121
|
+
sleep(failures)
|
122
|
+
retry
|
123
|
+
end
|
124
|
+
end
|
125
|
+
save_ssh_connection conn
|
126
|
+
return result
|
127
|
+
end
|
128
|
+
|
129
|
+
# Shortcut for use when a command is not idempotent and therefore
|
130
|
+
# isn't safe to retry if something goes wonky with the SSH connection.
|
131
|
+
def ssh_cmd!(cmd)
|
132
|
+
ssh_cmd cmd, false
|
133
|
+
end
|
134
|
+
|
135
|
+
# Confirm that something is listening on the given port. The timeout param
|
136
|
+
# indicates how long to wait (in seconds) for a process to be listening.
|
137
|
+
def confirm_listening_on_port(port, timeout=10)
|
138
|
+
checker_th = Thread.new { ssh_cmd "while [[ `netstat -ln | grep #{port} | wc -l` -lt 1 ]] ; do sleep 1; done" }
|
139
|
+
raise "Nothing is listening on #{@ip}:#{port} after #{timeout} seconds" unless checker_th.join(timeout)
|
140
|
+
true
|
141
|
+
end
|
142
|
+
|
143
|
+
# Returns true if the host is accessible via SSH, false otherwise
|
144
|
+
def available?
|
145
|
+
# If we haven't tried an ssh command yet, @available will be nil. Running
|
146
|
+
# a first no-op command will populate it to true or false.
|
147
|
+
if @available.nil?
|
148
|
+
ssh_cmd 'echo ping' rescue nil
|
149
|
+
end
|
150
|
+
@available
|
151
|
+
end
|
152
|
+
|
153
|
+
###### ini file manipulation ###############################################
|
154
|
+
|
155
|
+
# Comments-out lines of an ini file beginning with any of the supplied prefixes
|
156
|
+
def comment_out_ini(file, *prefixes)
|
157
|
+
toggle_ini(file, prefixes, false)
|
158
|
+
end
|
159
|
+
|
160
|
+
# Un-comments-out lines of an ini file beginning with any of the supplied prefixes
|
161
|
+
# The prefixes should NOT include the # comment-out character -- ie, pass them
|
162
|
+
# the same as you would to DB#comment_out_ini
|
163
|
+
def uncomment_out_ini(file, *prefixes)
|
164
|
+
toggle_ini(file, prefixes, true)
|
165
|
+
end
|
166
|
+
|
167
|
+
# Comments-out (if enable is true) or un-comments-out (if enable is false) lines of an ini file.
|
168
|
+
def toggle_ini(file, prefixes, enable)
|
169
|
+
prefixes.flatten!
|
170
|
+
commands = []
|
171
|
+
prefixes.each do |setting|
|
172
|
+
if enable
|
173
|
+
search = '^#(\s*%s\s*(?:=.*)?)$' % setting
|
174
|
+
replace = '\1'
|
175
|
+
else
|
176
|
+
search = '^(\s*%s\s*(?:=.*)?)$' % setting
|
177
|
+
replace = '#\1'
|
178
|
+
end
|
179
|
+
commands << "ruby -i -pe 'sub(%r[#{search}], %q[#{replace}])' #{file}"
|
180
|
+
end
|
181
|
+
cmd_line = commands.join '; '
|
182
|
+
ssh_cmd cmd_line
|
183
|
+
end
|
184
|
+
|
185
|
+
|
186
|
+
###### Directory Copying / Listing / Comparison methods ####################
|
187
|
+
|
188
|
+
# Quickly and efficiently recursively copies a directory to one or more target hosts.
|
189
|
+
# Requires that pigz is installed on source (self) and all targets.
|
190
|
+
# base_dir:: is base directory to copy from the source (self). Also the default destination base
|
191
|
+
# directory on the targets, if not supplied via next param.
|
192
|
+
# targets:: is one of the following:
|
193
|
+
# * Host object, or any object that delegates method_missing to a Host (such as DB)
|
194
|
+
# * array of Host objects (or delegates)
|
195
|
+
# * hash mapping Host objects (or delegates) to destination base directory overrides (as string)
|
196
|
+
# options:: is a hash that can contain --
|
197
|
+
# * :files => only copy these filenames instead of entire base_dir. String, or Array of Strings.
|
198
|
+
# * :port => port number to use for netcat. defaults to 7000 if omitted.
|
199
|
+
# * :overwrite => if true, don't raise an exception if the base_dir is non-empty or :files exist. default false.
|
200
|
+
def fast_copy_chain(base_dir, targets, options={})
|
201
|
+
# Normalize the filesnames param so it is an array
|
202
|
+
filenames = options[:files] || ['.']
|
203
|
+
filenames = [filenames] unless filenames.respond_to?(:each)
|
204
|
+
|
205
|
+
# Normalize the targets param, so that targets is an array of Hosts and
|
206
|
+
# destinations is a hash of hosts => dirs
|
207
|
+
destinations = {}
|
208
|
+
targets = [targets] unless targets.respond_to?(:each)
|
209
|
+
base_dir += '/' unless base_dir[-1] == '/'
|
210
|
+
if targets.is_a? Hash
|
211
|
+
destinations = targets
|
212
|
+
destinations.each {|t, d| destinations[t] += '/' unless d[-1] == '/'}
|
213
|
+
targets = targets.keys
|
214
|
+
else
|
215
|
+
destinations = targets.inject({}) {|memo, target| memo[target] = base_dir; memo}
|
216
|
+
end
|
217
|
+
raise "No target hosts supplied" if targets.count < 1
|
218
|
+
|
219
|
+
file_list = filenames.join ' '
|
220
|
+
port = (options[:port] || 7000).to_i
|
221
|
+
|
222
|
+
# On each destination host, do any initial setup (and optional validation/erasing),
|
223
|
+
# and then listen for new files. If there are multiple destination hosts, all of them
|
224
|
+
# except the last will use tee to "chain" the copy along to the next machine.
|
225
|
+
workers = []
|
226
|
+
targets.reverse.each_with_index do |t, i|
|
227
|
+
dir = destinations[t]
|
228
|
+
raise "Directory #{t}:#{dir} looks suspicious" if dir.include?('..') || dir.include?('./') || dir == '/' || dir == ''
|
229
|
+
|
230
|
+
t.confirm_installed 'pigz'
|
231
|
+
t.ssh_cmd "mkdir -p #{dir}"
|
232
|
+
|
233
|
+
# Check if contents already exist / non-empty.
|
234
|
+
# Note: doesn't do recursive scan of subdirectories
|
235
|
+
unless options[:overwrite]
|
236
|
+
all_paths = filenames.map {|f| dir + f}.join ' '
|
237
|
+
dirlist = t.dir_list(all_paths)
|
238
|
+
dirlist.each {|name, size| raise "File #{name} exists on destination and has nonzero size!" if size.to_i > 0}
|
239
|
+
end
|
240
|
+
|
241
|
+
if i == 0
|
242
|
+
workers << Thread.new { t.ssh_cmd "cd #{dir} && nc -l #{port} | pigz -d | tar xvf -" }
|
243
|
+
t.confirm_listening_on_port port
|
244
|
+
t.output "Listening with netcat."
|
245
|
+
else
|
246
|
+
tt = targets.reverse[i-1]
|
247
|
+
fifo = "fifo#{port}"
|
248
|
+
workers << Thread.new { t.ssh_cmd "cd #{dir} && mkfifo #{fifo} && nc #{tt.ip} #{port} <#{fifo} && rm #{fifo}" }
|
249
|
+
checker_th = Thread.new { t.ssh_cmd "while [ ! -p #{dir}/#{fifo} ] ; do sleep 1; done" }
|
250
|
+
raise "FIFO not found on #{t} after 10 tries" unless checker_th.join(10)
|
251
|
+
workers << Thread.new { t.ssh_cmd "cd #{dir} && nc -l #{port} | tee #{fifo} | pigz -d | tar xvf -" }
|
252
|
+
t.confirm_listening_on_port port
|
253
|
+
t.output "Listening with netcat, and chaining to #{tt}."
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
# Start the copy chain.
|
258
|
+
confirm_installed 'pigz'
|
259
|
+
output "Sending files over to #{targets[0]}: #{file_list}"
|
260
|
+
ssh_cmd "cd #{base_dir} && tar vc #{file_list} | pigz | nc #{targets[0].ip} #{port}"
|
261
|
+
workers.each {|th| th.join}
|
262
|
+
output "File copy complete."
|
263
|
+
|
264
|
+
# Verify
|
265
|
+
output "Verifying file sizes and types on all destinations."
|
266
|
+
compare_dir base_dir, destinations, options
|
267
|
+
output "Verification successful."
|
268
|
+
end
|
269
|
+
|
270
|
+
# Given the name of a directory or single file, returns a hash of filename => size of each file present.
|
271
|
+
# Subdirectories will be returned with a size of '/', so you can process these differently as needed.
|
272
|
+
# WARNING: This is brittle. It parses output of "ls". If anyone has a gem to do better remote file
|
273
|
+
# management via ssh, then please by all means send us a pull request!
|
274
|
+
def dir_list(dir)
|
275
|
+
ls_out = ssh_cmd "ls --color=never -1AgGF #{dir}" # disable color, 1 file per line, all but . and .., hide owner+group, include type suffix
|
276
|
+
result = {}
|
277
|
+
ls_out.split("\n").each do |line|
|
278
|
+
next unless matches = line.match(/^[\w-]+\s+\d+\s+(?<size>\d+).*(?:\d\d:\d\d|\d{4})\s+(?<name>.*)$/)
|
279
|
+
file_name = matches[:name]
|
280
|
+
file_name = file_name[0...-1] if file_name =~ %r![*/=>@|]$!
|
281
|
+
result[file_name.split('/')[-1]] = (matches[:name][-1] == '/' ? '/' : matches[:size].to_i)
|
282
|
+
end
|
283
|
+
result
|
284
|
+
end
|
285
|
+
|
286
|
+
# Compares file existence and size between hosts. Param format identical to
|
287
|
+
# the first three params of Host#fast_copy_chain, except only supported option
|
288
|
+
# is :files.
|
289
|
+
# Raises an exception if the files don't exactly match, otherwise returns true.
|
290
|
+
def compare_dir(base_dir, targets, options={})
|
291
|
+
# Normalize the filesnames param so it is an array
|
292
|
+
filenames = options[:files] || ['.']
|
293
|
+
filenames = [filenames] unless filenames.respond_to?(:each)
|
294
|
+
|
295
|
+
# Normalize the targets param, so that targets is an array of Hosts and
|
296
|
+
# destinations is a hash of hosts => dirs
|
297
|
+
destinations = {}
|
298
|
+
targets = [targets] unless targets.respond_to?(:each)
|
299
|
+
base_dir += '/' unless base_dir[-1] == '/'
|
300
|
+
if targets.is_a? Hash
|
301
|
+
destinations = targets
|
302
|
+
destinations.each {|t, d| destinations[t] += '/' unless d[-1] == '/'}
|
303
|
+
targets = targets.keys
|
304
|
+
else
|
305
|
+
destinations = targets.inject({}) {|memo, target| memo[target] = base_dir; memo}
|
306
|
+
end
|
307
|
+
raise "No target hosts supplied" if targets.count < 1
|
308
|
+
|
309
|
+
queue = filenames.map {|f| ['', f]} # array of [subdir, filename] pairs
|
310
|
+
while (tuple = queue.shift)
|
311
|
+
subdir, filename = tuple
|
312
|
+
source_dirlist = dir_list(base_dir + subdir + filename)
|
313
|
+
destinations.each do |target, path|
|
314
|
+
target_dirlist = target.dir_list(path + subdir + filename)
|
315
|
+
source_dirlist.each do |name, size|
|
316
|
+
target_size = target_dirlist[name] || 'MISSING'
|
317
|
+
raise "Directory listing mismatch when comparing #{self}:#{base_dir}#{subdir}#{filename}/#{name} to #{target}:#{path}#{subdir}#{filename}/#{name} (size: #{size} vs #{target_size})" unless size == target_size
|
318
|
+
end
|
319
|
+
end
|
320
|
+
queue.concat(source_dirlist.map {|name, size| size == '/' ? [subdir + '/' + name, '/'] : nil}.compact)
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
# Recursively computes size of files in dir
|
325
|
+
def dir_size(dir)
|
326
|
+
total_size = 0
|
327
|
+
dir_list(dir).each do |name, size|
|
328
|
+
total_size += (size == '/' ? dir_size(dir + '/' + name) : size.to_i)
|
329
|
+
end
|
330
|
+
total_size
|
331
|
+
end
|
332
|
+
|
333
|
+
|
334
|
+
###### Misc methods ########################################################
|
335
|
+
|
336
|
+
# Performs the given operation ('start', 'stop', 'restart') on the specified
|
337
|
+
# service. Default implementation assumes RedHat/CentOS style /sbin/service.
|
338
|
+
# If you're using a distibution or OS that does not support /sbin/service,
|
339
|
+
# override this method with a plugin.
|
340
|
+
def service(operation, name)
|
341
|
+
ssh_cmd "/sbin/service #{name} #{operation.to_s}"
|
342
|
+
end
|
343
|
+
|
344
|
+
# Changes the I/O scheduler to name (such as 'deadline', 'noop', 'cfq')
|
345
|
+
# for the specified device.
|
346
|
+
def set_io_scheduler(name, device='sda')
|
347
|
+
output "Setting I/O scheduler for #{device} to #{name}."
|
348
|
+
ssh_cmd "echo '#{name}' >/sys/block/#{device}/queue/scheduler"
|
349
|
+
end
|
350
|
+
|
351
|
+
# Confirms that the specified binary is installed and on the shell path.
|
352
|
+
def confirm_installed(program_name)
|
353
|
+
out = ssh_cmd "which #{program_name}"
|
354
|
+
raise "#{program_name} not installed, or missing from path" if out =~ /no #{program_name} in /
|
355
|
+
true
|
356
|
+
end
|
357
|
+
|
358
|
+
# Returns number of cores on machine. (reflects virtual cores if hyperthreading
|
359
|
+
# enabled, so might be 2x real value in that case.)
|
360
|
+
# Not currently used by anything in Jetpants base, but might be useful for plugins
|
361
|
+
# that want to tailor the concurrency level to the machine's capabilities.
|
362
|
+
def cores
|
363
|
+
return @cores if @cores
|
364
|
+
count = ssh_cmd %q{cat /proc/cpuinfo|grep 'processor\s*:' | wc -l}
|
365
|
+
@cores = (count ? count.to_i : 1)
|
366
|
+
end
|
367
|
+
|
368
|
+
# Returns the machine's hostname
|
369
|
+
def hostname
|
370
|
+
@hostname ||= ssh_cmd('hostname').chomp
|
371
|
+
end
|
372
|
+
|
373
|
+
# Displays the provided output, along with information about the current time,
|
374
|
+
# and self (the IP of this Host)
|
375
|
+
def output(str)
|
376
|
+
str = str.to_s.strip
|
377
|
+
str = nil if str && str.length == 0
|
378
|
+
str ||= "Completed (no output)"
|
379
|
+
output = Time.now.strftime("%H:%M:%S") + " [#{self}] "
|
380
|
+
output << str
|
381
|
+
print output + "\n"
|
382
|
+
output
|
383
|
+
end
|
384
|
+
|
385
|
+
# Returns the host's IP address as a string.
|
386
|
+
def to_s
|
387
|
+
return @ip
|
388
|
+
end
|
389
|
+
|
390
|
+
# Returns self, since this object is already a Host.
|
391
|
+
def to_host
|
392
|
+
self
|
393
|
+
end
|
394
|
+
|
395
|
+
end
|
396
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# This file contains any methods we're adding to core Ruby modules
|
2
|
+
|
3
|
+
# Reopen Enumerable to add some concurrent iterators
|
4
|
+
module Enumerable
|
5
|
+
# Works like each but runs the block in a separate thread per item.
|
6
|
+
def concurrent_each
|
7
|
+
collect {|*item| Thread.new {yield *item}}.each {|th| th.join}
|
8
|
+
self
|
9
|
+
end
|
10
|
+
|
11
|
+
# Works like map but runs the block in a separate thread per item.
|
12
|
+
def concurrent_map
|
13
|
+
collect {|*item| Thread.new {yield *item}}.collect {|th| th.value}
|
14
|
+
end
|
15
|
+
|
16
|
+
# Works like each_with_index but runs the block in a separate thread per item.
|
17
|
+
def concurrent_each_with_index(&block)
|
18
|
+
each_with_index.concurrent_each(&block)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Add Jetpants-specific conversion methods to Object.
|
23
|
+
class Object
|
24
|
+
# Converts self to a Jetpants::Host by way of to_s. Only really useful for
|
25
|
+
# Strings containing IP addresses, or Objects whose to_string method returns
|
26
|
+
# an IP address as a string.
|
27
|
+
def to_host
|
28
|
+
Jetpants::Host.new(self.to_s)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Converts self to a Jetpants::DB by way of to_s. Only really useful for
|
32
|
+
# Strings containing IP addresses, or Objects whose to_string method returns
|
33
|
+
# an IP address as a string.
|
34
|
+
def to_db
|
35
|
+
Jetpants::DB.new(self.to_s)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class Range
|
40
|
+
# Supply a block taking |chunk_min_id, chunk_max_id|. This will execute the block in
|
41
|
+
# parallel chunks, supplying the min and max id (inclusive) of the current
|
42
|
+
# chunk. Note that thread_limit is capped at the value of Jetpants.max_concurrency.
|
43
|
+
def in_chunks(chunks, thread_limit=40, min_per_chunk=1)
|
44
|
+
per_chunk = ((max - min + 1) / chunks).ceil
|
45
|
+
per_chunk = min_per_chunk if per_chunk < min_per_chunk
|
46
|
+
|
47
|
+
min_id_queue = []
|
48
|
+
results = []
|
49
|
+
min.step(max, per_chunk) {|n| min_id_queue << n}
|
50
|
+
min_id_queue.reverse!
|
51
|
+
|
52
|
+
lock = Mutex.new
|
53
|
+
group = ThreadGroup.new
|
54
|
+
thread_count = Jetpants.max_concurrency
|
55
|
+
thread_count = thread_limit if thread_limit && thread_limit < Jetpants.max_concurrency
|
56
|
+
thread_count = min_id_queue.length if min_id_queue.length < thread_count
|
57
|
+
thread_count.times do
|
58
|
+
th = Thread.new do
|
59
|
+
while min_id_queue.length > 0 do
|
60
|
+
my_min = nil
|
61
|
+
lock.synchronize {my_min = min_id_queue.pop}
|
62
|
+
break unless my_min
|
63
|
+
my_max = my_min + per_chunk - 1
|
64
|
+
my_max = max if my_max > max
|
65
|
+
result = yield my_min, my_max
|
66
|
+
lock.synchronize {results << result}
|
67
|
+
end
|
68
|
+
end
|
69
|
+
group.add(th)
|
70
|
+
end
|
71
|
+
group.list.each {|th| th.join}
|
72
|
+
results
|
73
|
+
end
|
74
|
+
end
|