jetpants 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +3 -0
- data/README.rdoc +88 -0
- data/bin/jetpants +442 -0
- data/doc/commands.rdoc +119 -0
- data/doc/configuration.rdoc +27 -0
- data/doc/plugins.rdoc +120 -0
- data/doc/requirements.rdoc +54 -0
- data/etc/jetpants.yaml.sample +58 -0
- data/lib/jetpants.rb +100 -0
- data/lib/jetpants/callback.rb +131 -0
- data/lib/jetpants/db.rb +122 -0
- data/lib/jetpants/db/client.rb +103 -0
- data/lib/jetpants/db/import_export.rb +330 -0
- data/lib/jetpants/db/privileges.rb +89 -0
- data/lib/jetpants/db/replication.rb +226 -0
- data/lib/jetpants/db/server.rb +79 -0
- data/lib/jetpants/db/state.rb +212 -0
- data/lib/jetpants/host.rb +396 -0
- data/lib/jetpants/monkeypatch.rb +74 -0
- data/lib/jetpants/pool.rb +272 -0
- data/lib/jetpants/shard.rb +311 -0
- data/lib/jetpants/table.rb +146 -0
- data/lib/jetpants/topology.rb +144 -0
- data/plugins/simple_tracker/db.rb +23 -0
- data/plugins/simple_tracker/pool.rb +70 -0
- data/plugins/simple_tracker/shard.rb +76 -0
- data/plugins/simple_tracker/simple_tracker.rb +74 -0
- data/plugins/simple_tracker/topology.rb +66 -0
- data/tasks/promotion.rb +260 -0
- metadata +191 -0
@@ -0,0 +1,396 @@
|
|
1
|
+
require 'net/ssh'
|
2
|
+
require 'socket'
|
3
|
+
|
4
|
+
module Jetpants
|
5
|
+
|
6
|
+
# Encapsulates a UNIX server that we can SSH to as root. Maintains a pool of SSH
|
7
|
+
# connections to the host as needed.
|
8
|
+
class Host
|
9
|
+
include CallbackHandler
|
10
|
+
|
11
|
+
@@all_hosts = {}
|
12
|
+
@@all_hosts_mutex = Mutex.new
|
13
|
+
|
14
|
+
# IP address of the Host, as a string.
|
15
|
+
attr_reader :ip
|
16
|
+
|
17
|
+
# We override Host.new so that attempting to create a duplicate Host object
|
18
|
+
# (that is, one with the same IP as an existing Host object) returns the
|
19
|
+
# original object.
|
20
|
+
def self.new(ip)
|
21
|
+
@@all_hosts_mutex.synchronize do
|
22
|
+
@@all_hosts[ip] = nil unless @@all_hosts[ip].is_a? self
|
23
|
+
@@all_hosts[ip] ||= super
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def initialize(ip)
|
28
|
+
@ip = ip
|
29
|
+
@connection_pool = [] # array of idle Net::SSH::Connection::Session objects
|
30
|
+
@lock = Mutex.new
|
31
|
+
@available = nil
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns a Host object for the machine Jetpants is running on.
|
35
|
+
def self.local(interface='bond0')
|
36
|
+
# This technique is adapted from Sergio Rubio Gracia's, described at
|
37
|
+
# http://blog.frameos.org/2006/12/09/getting-network-interface-addresses-using-ioctl-pure-ruby-2/
|
38
|
+
sock = Socket.new(Socket::AF_INET, Socket::SOCK_DGRAM,0)
|
39
|
+
buf = [interface, ""].pack('a16h16')
|
40
|
+
sock.ioctl(0x8915, buf) # SIOCGIFADDR
|
41
|
+
sock.close
|
42
|
+
ip_string = buf[20...24].unpack('C*').join '.'
|
43
|
+
self.new(ip_string)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Returns a Net::SSH::Connection::Session for the host. Verifies that the
|
47
|
+
# connection is working before returning it.
|
48
|
+
def get_ssh_connection
|
49
|
+
conn = nil
|
50
|
+
attempts = 0
|
51
|
+
5.times do |attempt|
|
52
|
+
@lock.synchronize do
|
53
|
+
if @connection_pool.count > 0
|
54
|
+
conn = @connection_pool.shift
|
55
|
+
end
|
56
|
+
end
|
57
|
+
unless conn
|
58
|
+
params = {
|
59
|
+
:paranoid => false,
|
60
|
+
:user_known_hosts_file => '/dev/null',
|
61
|
+
:timeout => 5,
|
62
|
+
}
|
63
|
+
params[:keys] = Jetpants.ssh_keys if Jetpants.ssh_keys
|
64
|
+
begin
|
65
|
+
@lock.synchronize do
|
66
|
+
conn = Net::SSH.start(@ip, 'root', params)
|
67
|
+
end
|
68
|
+
rescue => ex
|
69
|
+
output "Unable to SSH on attempt #{attempt + 1}: #{ex.to_s}"
|
70
|
+
conn = nil
|
71
|
+
next
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Confirm that the connection works
|
76
|
+
if conn
|
77
|
+
begin
|
78
|
+
result = conn.exec!('echo ping').strip
|
79
|
+
raise "Unexpected result" unless result == 'ping'
|
80
|
+
@available = true
|
81
|
+
return conn
|
82
|
+
rescue
|
83
|
+
output "Discarding nonfunctional SSH connection"
|
84
|
+
conn = nil
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
@available = false
|
89
|
+
raise "Unable to obtain working SSH connection to #{self} after 5 attempts"
|
90
|
+
end
|
91
|
+
|
92
|
+
# Adds a Net::SSH::Connection::Session to a pool of idle persistent connections.
|
93
|
+
def save_ssh_connection(conn)
|
94
|
+
conn.exec! 'cd ~'
|
95
|
+
@lock.synchronize do
|
96
|
+
@connection_pool << conn
|
97
|
+
end
|
98
|
+
rescue
|
99
|
+
output "Discarding nonfunctional SSH connection"
|
100
|
+
end
|
101
|
+
|
102
|
+
# Execute the given UNIX command string (or array of strings) as root via SSH.
|
103
|
+
# By default, if something is wrong with the SSH connection, the command
|
104
|
+
# will be attempted up to 3 times before an exception is thrown. Be sure
|
105
|
+
# to set this to 1 or false for commands that are not idempotent.
|
106
|
+
# Returns the result of the command executed. If cmd was an array of strings,
|
107
|
+
# returns the result of the LAST command executed.
|
108
|
+
def ssh_cmd(cmd, attempts=3)
|
109
|
+
attempts ||= 1
|
110
|
+
conn = get_ssh_connection
|
111
|
+
cmd = [cmd] unless cmd.is_a? Array
|
112
|
+
result = nil
|
113
|
+
cmd.each do |c|
|
114
|
+
failures = 0
|
115
|
+
begin
|
116
|
+
result = conn.exec! c
|
117
|
+
rescue
|
118
|
+
failures += 1
|
119
|
+
raise if failures >= attempts
|
120
|
+
output "Command \"#{c}\" failed, re-trying after delay"
|
121
|
+
sleep(failures)
|
122
|
+
retry
|
123
|
+
end
|
124
|
+
end
|
125
|
+
save_ssh_connection conn
|
126
|
+
return result
|
127
|
+
end
|
128
|
+
|
129
|
+
# Shortcut for use when a command is not idempotent and therefore
|
130
|
+
# isn't safe to retry if something goes wonky with the SSH connection.
|
131
|
+
def ssh_cmd!(cmd)
|
132
|
+
ssh_cmd cmd, false
|
133
|
+
end
|
134
|
+
|
135
|
+
# Confirm that something is listening on the given port. The timeout param
|
136
|
+
# indicates how long to wait (in seconds) for a process to be listening.
|
137
|
+
def confirm_listening_on_port(port, timeout=10)
|
138
|
+
checker_th = Thread.new { ssh_cmd "while [[ `netstat -ln | grep #{port} | wc -l` -lt 1 ]] ; do sleep 1; done" }
|
139
|
+
raise "Nothing is listening on #{@ip}:#{port} after #{timeout} seconds" unless checker_th.join(timeout)
|
140
|
+
true
|
141
|
+
end
|
142
|
+
|
143
|
+
# Returns true if the host is accessible via SSH, false otherwise
|
144
|
+
def available?
|
145
|
+
# If we haven't tried an ssh command yet, @available will be nil. Running
|
146
|
+
# a first no-op command will populate it to true or false.
|
147
|
+
if @available.nil?
|
148
|
+
ssh_cmd 'echo ping' rescue nil
|
149
|
+
end
|
150
|
+
@available
|
151
|
+
end
|
152
|
+
|
153
|
+
###### ini file manipulation ###############################################
|
154
|
+
|
155
|
+
# Comments-out lines of an ini file beginning with any of the supplied prefixes
|
156
|
+
def comment_out_ini(file, *prefixes)
|
157
|
+
toggle_ini(file, prefixes, false)
|
158
|
+
end
|
159
|
+
|
160
|
+
# Un-comments-out lines of an ini file beginning with any of the supplied prefixes
|
161
|
+
# The prefixes should NOT include the # comment-out character -- ie, pass them
|
162
|
+
# the same as you would to DB#comment_out_ini
|
163
|
+
def uncomment_out_ini(file, *prefixes)
|
164
|
+
toggle_ini(file, prefixes, true)
|
165
|
+
end
|
166
|
+
|
167
|
+
# Comments-out (if enable is true) or un-comments-out (if enable is false) lines of an ini file.
|
168
|
+
def toggle_ini(file, prefixes, enable)
|
169
|
+
prefixes.flatten!
|
170
|
+
commands = []
|
171
|
+
prefixes.each do |setting|
|
172
|
+
if enable
|
173
|
+
search = '^#(\s*%s\s*(?:=.*)?)$' % setting
|
174
|
+
replace = '\1'
|
175
|
+
else
|
176
|
+
search = '^(\s*%s\s*(?:=.*)?)$' % setting
|
177
|
+
replace = '#\1'
|
178
|
+
end
|
179
|
+
commands << "ruby -i -pe 'sub(%r[#{search}], %q[#{replace}])' #{file}"
|
180
|
+
end
|
181
|
+
cmd_line = commands.join '; '
|
182
|
+
ssh_cmd cmd_line
|
183
|
+
end
|
184
|
+
|
185
|
+
|
186
|
+
###### Directory Copying / Listing / Comparison methods ####################
|
187
|
+
|
188
|
+
# Quickly and efficiently recursively copies a directory to one or more target hosts.
|
189
|
+
# Requires that pigz is installed on source (self) and all targets.
|
190
|
+
# base_dir:: is base directory to copy from the source (self). Also the default destination base
|
191
|
+
# directory on the targets, if not supplied via next param.
|
192
|
+
# targets:: is one of the following:
|
193
|
+
# * Host object, or any object that delegates method_missing to a Host (such as DB)
|
194
|
+
# * array of Host objects (or delegates)
|
195
|
+
# * hash mapping Host objects (or delegates) to destination base directory overrides (as string)
|
196
|
+
# options:: is a hash that can contain --
|
197
|
+
# * :files => only copy these filenames instead of entire base_dir. String, or Array of Strings.
|
198
|
+
# * :port => port number to use for netcat. defaults to 7000 if omitted.
|
199
|
+
# * :overwrite => if true, don't raise an exception if the base_dir is non-empty or :files exist. default false.
|
200
|
+
def fast_copy_chain(base_dir, targets, options={})
|
201
|
+
# Normalize the filesnames param so it is an array
|
202
|
+
filenames = options[:files] || ['.']
|
203
|
+
filenames = [filenames] unless filenames.respond_to?(:each)
|
204
|
+
|
205
|
+
# Normalize the targets param, so that targets is an array of Hosts and
|
206
|
+
# destinations is a hash of hosts => dirs
|
207
|
+
destinations = {}
|
208
|
+
targets = [targets] unless targets.respond_to?(:each)
|
209
|
+
base_dir += '/' unless base_dir[-1] == '/'
|
210
|
+
if targets.is_a? Hash
|
211
|
+
destinations = targets
|
212
|
+
destinations.each {|t, d| destinations[t] += '/' unless d[-1] == '/'}
|
213
|
+
targets = targets.keys
|
214
|
+
else
|
215
|
+
destinations = targets.inject({}) {|memo, target| memo[target] = base_dir; memo}
|
216
|
+
end
|
217
|
+
raise "No target hosts supplied" if targets.count < 1
|
218
|
+
|
219
|
+
file_list = filenames.join ' '
|
220
|
+
port = (options[:port] || 7000).to_i
|
221
|
+
|
222
|
+
# On each destination host, do any initial setup (and optional validation/erasing),
|
223
|
+
# and then listen for new files. If there are multiple destination hosts, all of them
|
224
|
+
# except the last will use tee to "chain" the copy along to the next machine.
|
225
|
+
workers = []
|
226
|
+
targets.reverse.each_with_index do |t, i|
|
227
|
+
dir = destinations[t]
|
228
|
+
raise "Directory #{t}:#{dir} looks suspicious" if dir.include?('..') || dir.include?('./') || dir == '/' || dir == ''
|
229
|
+
|
230
|
+
t.confirm_installed 'pigz'
|
231
|
+
t.ssh_cmd "mkdir -p #{dir}"
|
232
|
+
|
233
|
+
# Check if contents already exist / non-empty.
|
234
|
+
# Note: doesn't do recursive scan of subdirectories
|
235
|
+
unless options[:overwrite]
|
236
|
+
all_paths = filenames.map {|f| dir + f}.join ' '
|
237
|
+
dirlist = t.dir_list(all_paths)
|
238
|
+
dirlist.each {|name, size| raise "File #{name} exists on destination and has nonzero size!" if size.to_i > 0}
|
239
|
+
end
|
240
|
+
|
241
|
+
if i == 0
|
242
|
+
workers << Thread.new { t.ssh_cmd "cd #{dir} && nc -l #{port} | pigz -d | tar xvf -" }
|
243
|
+
t.confirm_listening_on_port port
|
244
|
+
t.output "Listening with netcat."
|
245
|
+
else
|
246
|
+
tt = targets.reverse[i-1]
|
247
|
+
fifo = "fifo#{port}"
|
248
|
+
workers << Thread.new { t.ssh_cmd "cd #{dir} && mkfifo #{fifo} && nc #{tt.ip} #{port} <#{fifo} && rm #{fifo}" }
|
249
|
+
checker_th = Thread.new { t.ssh_cmd "while [ ! -p #{dir}/#{fifo} ] ; do sleep 1; done" }
|
250
|
+
raise "FIFO not found on #{t} after 10 tries" unless checker_th.join(10)
|
251
|
+
workers << Thread.new { t.ssh_cmd "cd #{dir} && nc -l #{port} | tee #{fifo} | pigz -d | tar xvf -" }
|
252
|
+
t.confirm_listening_on_port port
|
253
|
+
t.output "Listening with netcat, and chaining to #{tt}."
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
# Start the copy chain.
|
258
|
+
confirm_installed 'pigz'
|
259
|
+
output "Sending files over to #{targets[0]}: #{file_list}"
|
260
|
+
ssh_cmd "cd #{base_dir} && tar vc #{file_list} | pigz | nc #{targets[0].ip} #{port}"
|
261
|
+
workers.each {|th| th.join}
|
262
|
+
output "File copy complete."
|
263
|
+
|
264
|
+
# Verify
|
265
|
+
output "Verifying file sizes and types on all destinations."
|
266
|
+
compare_dir base_dir, destinations, options
|
267
|
+
output "Verification successful."
|
268
|
+
end
|
269
|
+
|
270
|
+
# Given the name of a directory or single file, returns a hash of filename => size of each file present.
|
271
|
+
# Subdirectories will be returned with a size of '/', so you can process these differently as needed.
|
272
|
+
# WARNING: This is brittle. It parses output of "ls". If anyone has a gem to do better remote file
|
273
|
+
# management via ssh, then please by all means send us a pull request!
|
274
|
+
def dir_list(dir)
|
275
|
+
ls_out = ssh_cmd "ls --color=never -1AgGF #{dir}" # disable color, 1 file per line, all but . and .., hide owner+group, include type suffix
|
276
|
+
result = {}
|
277
|
+
ls_out.split("\n").each do |line|
|
278
|
+
next unless matches = line.match(/^[\w-]+\s+\d+\s+(?<size>\d+).*(?:\d\d:\d\d|\d{4})\s+(?<name>.*)$/)
|
279
|
+
file_name = matches[:name]
|
280
|
+
file_name = file_name[0...-1] if file_name =~ %r![*/=>@|]$!
|
281
|
+
result[file_name.split('/')[-1]] = (matches[:name][-1] == '/' ? '/' : matches[:size].to_i)
|
282
|
+
end
|
283
|
+
result
|
284
|
+
end
|
285
|
+
|
286
|
+
# Compares file existence and size between hosts. Param format identical to
|
287
|
+
# the first three params of Host#fast_copy_chain, except only supported option
|
288
|
+
# is :files.
|
289
|
+
# Raises an exception if the files don't exactly match, otherwise returns true.
|
290
|
+
def compare_dir(base_dir, targets, options={})
|
291
|
+
# Normalize the filesnames param so it is an array
|
292
|
+
filenames = options[:files] || ['.']
|
293
|
+
filenames = [filenames] unless filenames.respond_to?(:each)
|
294
|
+
|
295
|
+
# Normalize the targets param, so that targets is an array of Hosts and
|
296
|
+
# destinations is a hash of hosts => dirs
|
297
|
+
destinations = {}
|
298
|
+
targets = [targets] unless targets.respond_to?(:each)
|
299
|
+
base_dir += '/' unless base_dir[-1] == '/'
|
300
|
+
if targets.is_a? Hash
|
301
|
+
destinations = targets
|
302
|
+
destinations.each {|t, d| destinations[t] += '/' unless d[-1] == '/'}
|
303
|
+
targets = targets.keys
|
304
|
+
else
|
305
|
+
destinations = targets.inject({}) {|memo, target| memo[target] = base_dir; memo}
|
306
|
+
end
|
307
|
+
raise "No target hosts supplied" if targets.count < 1
|
308
|
+
|
309
|
+
queue = filenames.map {|f| ['', f]} # array of [subdir, filename] pairs
|
310
|
+
while (tuple = queue.shift)
|
311
|
+
subdir, filename = tuple
|
312
|
+
source_dirlist = dir_list(base_dir + subdir + filename)
|
313
|
+
destinations.each do |target, path|
|
314
|
+
target_dirlist = target.dir_list(path + subdir + filename)
|
315
|
+
source_dirlist.each do |name, size|
|
316
|
+
target_size = target_dirlist[name] || 'MISSING'
|
317
|
+
raise "Directory listing mismatch when comparing #{self}:#{base_dir}#{subdir}#{filename}/#{name} to #{target}:#{path}#{subdir}#{filename}/#{name} (size: #{size} vs #{target_size})" unless size == target_size
|
318
|
+
end
|
319
|
+
end
|
320
|
+
queue.concat(source_dirlist.map {|name, size| size == '/' ? [subdir + '/' + name, '/'] : nil}.compact)
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
# Recursively computes size of files in dir
|
325
|
+
def dir_size(dir)
|
326
|
+
total_size = 0
|
327
|
+
dir_list(dir).each do |name, size|
|
328
|
+
total_size += (size == '/' ? dir_size(dir + '/' + name) : size.to_i)
|
329
|
+
end
|
330
|
+
total_size
|
331
|
+
end
|
332
|
+
|
333
|
+
|
334
|
+
###### Misc methods ########################################################
|
335
|
+
|
336
|
+
# Performs the given operation ('start', 'stop', 'restart') on the specified
|
337
|
+
# service. Default implementation assumes RedHat/CentOS style /sbin/service.
|
338
|
+
# If you're using a distibution or OS that does not support /sbin/service,
|
339
|
+
# override this method with a plugin.
|
340
|
+
def service(operation, name)
|
341
|
+
ssh_cmd "/sbin/service #{name} #{operation.to_s}"
|
342
|
+
end
|
343
|
+
|
344
|
+
# Changes the I/O scheduler to name (such as 'deadline', 'noop', 'cfq')
|
345
|
+
# for the specified device.
|
346
|
+
def set_io_scheduler(name, device='sda')
|
347
|
+
output "Setting I/O scheduler for #{device} to #{name}."
|
348
|
+
ssh_cmd "echo '#{name}' >/sys/block/#{device}/queue/scheduler"
|
349
|
+
end
|
350
|
+
|
351
|
+
# Confirms that the specified binary is installed and on the shell path.
|
352
|
+
def confirm_installed(program_name)
|
353
|
+
out = ssh_cmd "which #{program_name}"
|
354
|
+
raise "#{program_name} not installed, or missing from path" if out =~ /no #{program_name} in /
|
355
|
+
true
|
356
|
+
end
|
357
|
+
|
358
|
+
# Returns number of cores on machine. (reflects virtual cores if hyperthreading
|
359
|
+
# enabled, so might be 2x real value in that case.)
|
360
|
+
# Not currently used by anything in Jetpants base, but might be useful for plugins
|
361
|
+
# that want to tailor the concurrency level to the machine's capabilities.
|
362
|
+
def cores
|
363
|
+
return @cores if @cores
|
364
|
+
count = ssh_cmd %q{cat /proc/cpuinfo|grep 'processor\s*:' | wc -l}
|
365
|
+
@cores = (count ? count.to_i : 1)
|
366
|
+
end
|
367
|
+
|
368
|
+
# Returns the machine's hostname
|
369
|
+
def hostname
|
370
|
+
@hostname ||= ssh_cmd('hostname').chomp
|
371
|
+
end
|
372
|
+
|
373
|
+
# Displays the provided output, along with information about the current time,
|
374
|
+
# and self (the IP of this Host)
|
375
|
+
def output(str)
|
376
|
+
str = str.to_s.strip
|
377
|
+
str = nil if str && str.length == 0
|
378
|
+
str ||= "Completed (no output)"
|
379
|
+
output = Time.now.strftime("%H:%M:%S") + " [#{self}] "
|
380
|
+
output << str
|
381
|
+
print output + "\n"
|
382
|
+
output
|
383
|
+
end
|
384
|
+
|
385
|
+
# Returns the host's IP address as a string.
|
386
|
+
def to_s
|
387
|
+
return @ip
|
388
|
+
end
|
389
|
+
|
390
|
+
# Returns self, since this object is already a Host.
|
391
|
+
def to_host
|
392
|
+
self
|
393
|
+
end
|
394
|
+
|
395
|
+
end
|
396
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# This file contains any methods we're adding to core Ruby modules
|
2
|
+
|
3
|
+
# Reopen Enumerable to add some concurrent iterators
|
4
|
+
module Enumerable
|
5
|
+
# Works like each but runs the block in a separate thread per item.
|
6
|
+
def concurrent_each
|
7
|
+
collect {|*item| Thread.new {yield *item}}.each {|th| th.join}
|
8
|
+
self
|
9
|
+
end
|
10
|
+
|
11
|
+
# Works like map but runs the block in a separate thread per item.
|
12
|
+
def concurrent_map
|
13
|
+
collect {|*item| Thread.new {yield *item}}.collect {|th| th.value}
|
14
|
+
end
|
15
|
+
|
16
|
+
# Works like each_with_index but runs the block in a separate thread per item.
|
17
|
+
def concurrent_each_with_index(&block)
|
18
|
+
each_with_index.concurrent_each(&block)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Add Jetpants-specific conversion methods to Object.
|
23
|
+
class Object
|
24
|
+
# Converts self to a Jetpants::Host by way of to_s. Only really useful for
|
25
|
+
# Strings containing IP addresses, or Objects whose to_string method returns
|
26
|
+
# an IP address as a string.
|
27
|
+
def to_host
|
28
|
+
Jetpants::Host.new(self.to_s)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Converts self to a Jetpants::DB by way of to_s. Only really useful for
|
32
|
+
# Strings containing IP addresses, or Objects whose to_string method returns
|
33
|
+
# an IP address as a string.
|
34
|
+
def to_db
|
35
|
+
Jetpants::DB.new(self.to_s)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class Range
|
40
|
+
# Supply a block taking |chunk_min_id, chunk_max_id|. This will execute the block in
|
41
|
+
# parallel chunks, supplying the min and max id (inclusive) of the current
|
42
|
+
# chunk. Note that thread_limit is capped at the value of Jetpants.max_concurrency.
|
43
|
+
def in_chunks(chunks, thread_limit=40, min_per_chunk=1)
|
44
|
+
per_chunk = ((max - min + 1) / chunks).ceil
|
45
|
+
per_chunk = min_per_chunk if per_chunk < min_per_chunk
|
46
|
+
|
47
|
+
min_id_queue = []
|
48
|
+
results = []
|
49
|
+
min.step(max, per_chunk) {|n| min_id_queue << n}
|
50
|
+
min_id_queue.reverse!
|
51
|
+
|
52
|
+
lock = Mutex.new
|
53
|
+
group = ThreadGroup.new
|
54
|
+
thread_count = Jetpants.max_concurrency
|
55
|
+
thread_count = thread_limit if thread_limit && thread_limit < Jetpants.max_concurrency
|
56
|
+
thread_count = min_id_queue.length if min_id_queue.length < thread_count
|
57
|
+
thread_count.times do
|
58
|
+
th = Thread.new do
|
59
|
+
while min_id_queue.length > 0 do
|
60
|
+
my_min = nil
|
61
|
+
lock.synchronize {my_min = min_id_queue.pop}
|
62
|
+
break unless my_min
|
63
|
+
my_max = my_min + per_chunk - 1
|
64
|
+
my_max = max if my_max > max
|
65
|
+
result = yield my_min, my_max
|
66
|
+
lock.synchronize {results << result}
|
67
|
+
end
|
68
|
+
end
|
69
|
+
group.add(th)
|
70
|
+
end
|
71
|
+
group.list.each {|th| th.join}
|
72
|
+
results
|
73
|
+
end
|
74
|
+
end
|