pwrake 0.9.3

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,30 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .config
19
+ *~
20
+ */*~
21
+ */*/*~
22
+ *.bak
23
+ */*.bak
24
+ */*/*.bak
25
+ .#*
26
+ */.#*
27
+ */*/.#*
28
+ rhosts
29
+ spec/*/*.dat
30
+ spec/*/*.csv
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pwrake.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2009-2012 Masahiro TANAKA
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,57 @@
1
+ # Pwrake
2
+
3
+ Parallel workflow extension for Rake
4
+ * Author: Masahiro Tanaka
5
+
6
+ ## Features
7
+
8
+ * Parallelize all tasks; no need to modify Rakefile, no need to use `multitask`.
9
+ * Tasks are executed in the given number of worker threads.
10
+ * Remote exuecution using SSH.
11
+ * Pwrake is an extension to Rake, not patch to Rake: Rake and Pwrake coexist.
12
+ * High parallel I/O performance using Gfarm file system.
13
+
14
+ ## Installation
15
+
16
+ Download source tgz/zip and expand, cd to subdir and install:
17
+
18
+ $ ruby setup.rb
19
+
20
+ Or, gem install:
21
+
22
+ $ gem install pwrake
23
+
24
+ ## Usage
25
+
26
+ ### Parallel execution using 4 cores at localhost:
27
+
28
+ $ pwrake -j 4
29
+
30
+ ### Parallel execution using all cores at localhost:
31
+
32
+ $ pwrake -j
33
+
34
+ ### Parallel execution using total 2*2 cores at remote 2 hosts:
35
+
36
+ 1. Share your directory among remote hosts via distributed file system such as NFS, Gfarm.
37
+ 2. Allow passphrase-less access via SSH in either way:
38
+ * Add passphrase-less key generated by `ssh-keygen`. (Be careful)
39
+ * Add passphrase using `ssh-add`.
40
+ 3. Make `hosts` file in which remote host names and the number of cores are listed:
41
+
42
+ $ cat hosts
43
+ host1 2
44
+ host2 2
45
+
46
+ 4. Run `pwrake` with an option `--hostfile` or `-F`:
47
+
48
+ $ pwrake --hostfile=hosts
49
+
50
+ ## Tested Platform
51
+
52
+ * Ruby 1.9.3, 2.0.0-preview1
53
+ * Rake 0.9.2.2
54
+ * Fedora 16 / Debian 5.0.7
55
+
56
+ ## Workflow demo
57
+
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/pwrake ADDED
@@ -0,0 +1,36 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ #--
4
+ # Copyright (c) 2009-2012 Masahiro TANAKA
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to
8
+ # deal in the Software without restriction, including without limitation the
9
+ # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
+ # sell copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
+ # IN THE SOFTWARE.
23
+ #++
24
+
25
+ begin
26
+ require 'rubygems'
27
+ rescue LoadError
28
+ end
29
+
30
+ require 'rake'
31
+
32
+ libpath = File.expand_path(File.dirname(__FILE__))+"/../lib"
33
+ $LOAD_PATH.unshift libpath
34
+ require "pwrake"
35
+
36
+ Rake.application.run
@@ -0,0 +1,187 @@
1
+ module Rake
2
+ class << self
3
+ def application
4
+ @application ||= Pwrake.application
5
+ end
6
+ end
7
+ end
8
+
9
+
10
+ module Pwrake
11
+
12
+ class << self
13
+ def application
14
+ @application ||= Application.new
15
+ end
16
+ end
17
+
18
+ class Application < ::Rake::Application
19
+
20
+ def task_queue
21
+ @master.task_queue
22
+ end
23
+
24
+ def thread_loop(*args)
25
+ @master.thread_loop(*args)
26
+ end
27
+
28
+ def pwrake_options
29
+ @master.pwrake_options
30
+ end
31
+
32
+
33
+ # Run the Pwrake application.
34
+ def run
35
+ standard_exception_handling do
36
+ @master = Master.new
37
+ init("pwrake")
38
+ @master.init
39
+ load_rakefile
40
+ @master.setup
41
+ top_level
42
+ end
43
+ end
44
+
45
+ # Run the top level tasks of a Rake application.
46
+ def top_level
47
+ standard_exception_handling do
48
+ if options.show_tasks
49
+ display_tasks_and_comments
50
+ elsif options.show_prereqs
51
+ display_prerequisites
52
+ else
53
+ begin
54
+ @master.start
55
+ top_level_tasks.each { |task_name| invoke_task(task_name) }
56
+ ensure
57
+ @master.finish
58
+ end
59
+ end
60
+ end
61
+ end
62
+
63
+
64
+ def standard_rake_options
65
+ opts = super
66
+ opts.each_with_index do |a,i|
67
+ if a[0] == '--version'
68
+ a[3] = lambda { |value|
69
+ puts "rake, version #{RAKEVERSION}"
70
+ puts "pwrake, version #{Pwrake::VERSION}"
71
+ exit
72
+ }
73
+ end
74
+ end
75
+
76
+ opts.concat(
77
+ [
78
+ ['-F', '--hostfile FILE',
79
+ "[Pw] Read hostnames from FILE",
80
+ lambda { |value|
81
+ options.hostfile = value
82
+ }
83
+ ],
84
+ ['-j', '--jobs [N]',
85
+ "[Pw] Number of threads at localhost (default: # of processors)",
86
+ lambda { |value|
87
+ if value
88
+ value = value.to_i
89
+ if value > 0
90
+ options.num_threads = value
91
+ else
92
+ options.num_threads = x = processor_count + value
93
+ raise "negative/zero number of threads (#{x})" if x <= 0
94
+ end
95
+ else
96
+ options.num_threads = processor_count
97
+ end
98
+ }
99
+ ],
100
+ ['-L', '--logfile [FILE]', "[Pw] Write log to FILE",
101
+ lambda { |value|
102
+ if value.kind_of? String
103
+ options.logfile = value
104
+ else
105
+ options.logfile = ""
106
+ end
107
+ }
108
+ ],
109
+ ['--ssh-opt', '--ssh-option OPTION', "[Pw] Option passed to SSH",
110
+ lambda { |value|
111
+ options.ssh_option = value
112
+ }
113
+ ],
114
+ ['--filesystem FILESYSTEM', "[Pw] Specify FILESYSTEM (nfs|gfarm)",
115
+ lambda { |value|
116
+ options.filesystem = value
117
+ }
118
+ ],
119
+ ['--gfarm', "[Pw] FILESYSTEM=gfarm",
120
+ lambda { |value|
121
+ options.filesystem = "gfarm"
122
+ }
123
+ ],
124
+ ['-A', '--disable-affinity', "[Pw] Turn OFF affinity (AFFINITY=off)",
125
+ lambda { |value|
126
+ options.disable_affinity = true
127
+ }
128
+ ],
129
+ ['-S', '--disable-steal', "[Pw] Turn OFF task steal",
130
+ lambda { |value|
131
+ options.disable_steal = true
132
+ }
133
+ ],
134
+ ['-d', '--debug',
135
+ "[Pw] Output Debug messages",
136
+ lambda { |value|
137
+ options.debug = true
138
+ }
139
+ ],
140
+ ['--pwrake-conf [FILE]',
141
+ "[Pw] Pwrake configuation file in YAML",
142
+ lambda {|value| options.pwrake_conf = value}
143
+ ],
144
+ ['--show-conf','--show-config',
145
+ "[Pw] Show Pwrake configuration options",
146
+ lambda {|value| options.show_conf = true }
147
+ ]
148
+ ])
149
+ opts
150
+ end
151
+
152
+
153
+ def count(host_list, host)
154
+ @master.counter.count( host_list, host )
155
+ end
156
+
157
+ # from Michael Grosser's parallel
158
+ # https://github.com/grosser/parallel
159
+ def processor_count
160
+ host_os = RbConfig::CONFIG['host_os']
161
+ case host_os
162
+ when /linux|cygwin/
163
+ ncpu = 0
164
+ open("/proc/cpuinfo").each do |l|
165
+ ncpu += 1 if /^processor\s+: \d+/=~l
166
+ end
167
+ ncpu
168
+ when /darwin9/
169
+ `hwprefs cpu_count`.to_i
170
+ when /darwin/
171
+ (hwprefs_available? ? `hwprefs thread_count` : `sysctl -n hw.ncpu`).to_i
172
+ when /(open|free)bsd/
173
+ `sysctl -n hw.ncpu`.to_i
174
+ when /mswin|mingw/
175
+ require 'win32ole'
176
+ wmi = WIN32OLE.connect("winmgmts://")
177
+ cpu = wmi.ExecQuery("select NumberOfLogicalProcessors from Win32_Processor")
178
+ cpu.to_enum.first.NumberOfLogicalProcessors
179
+ when /solaris2/
180
+ `psrinfo -p`.to_i # physical cpus
181
+ else
182
+ raise "Unknown architecture: #{host_os}"
183
+ end
184
+ end
185
+
186
+ end
187
+ end
@@ -0,0 +1,54 @@
1
+ module Pwrake
2
+
3
+ class Counter
4
+
5
+ def initialize
6
+ @same = 0
7
+ @diff = 0
8
+ @total = 0
9
+ @same_hosts = {}
10
+ @diff_hosts = {}
11
+ @no_queue = 0
12
+ @found_queue = 0
13
+ @empty_queue = 0
14
+ end
15
+
16
+ def count(host_list, host)
17
+ @total += 1
18
+ if host_list and host_list.include?(host)
19
+ @same += 1
20
+ @same_hosts[host] = (@same_hosts[host]||0) + 1
21
+ else
22
+ @diff += 1
23
+ @diff_hosts[host] = (@diff_hosts[host]||0) + 1
24
+ end
25
+ end
26
+
27
+ def print
28
+ s = "same=#{@same}, diff=#{@diff}, total=#{@total}\n"
29
+ s << "same_hosts = {\n"
30
+ @same_hosts.keys.sort.each do |k|
31
+ s << " #{k}: #{@same_hosts[k]}\n"
32
+ end
33
+ s << "}\n"
34
+ s << "different_hosts = {\n"
35
+ @diff_hosts.keys.sort.each do |k|
36
+ s << " #{k}: #{@diff_hosts[k]}\n"
37
+ end
38
+ s << "}"
39
+ Log.info s
40
+ end
41
+
42
+ def no_queue
43
+ @no_queue += 1
44
+ end
45
+
46
+ def found_queue
47
+ @found_queue += 1
48
+ end
49
+
50
+ def empty_queue
51
+ @empty_queue += 1
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,70 @@
1
+ module FileUtils
2
+
3
+ alias sh_orig :sh
4
+
5
+ def sh(*cmd, &block)
6
+ options = (Hash === cmd.last) ? cmd.pop : {}
7
+ unless block_given?
8
+ show_command = cmd.join(" ")
9
+ show_command = show_command[0,42] + "..."
10
+ block = lambda { |ok, status|
11
+ ok or fail "Command failed with status (#{status.exitstatus}): [#{show_command}]"
12
+ }
13
+ end
14
+ if RakeFileUtils.verbose_flag == :default
15
+ options[:verbose] = true
16
+ else
17
+ options[:verbose] ||= RakeFileUtils.verbose_flag
18
+ end
19
+ options[:noop] ||= RakeFileUtils.nowrite_flag
20
+ rake_check_options options, :noop, :verbose
21
+ Pwrake::Log.output_message cmd.join(" ") if options[:verbose]
22
+ unless options[:noop]
23
+ res,status = pwrake_system(*cmd)
24
+ block.call(res, status)
25
+ end
26
+ end
27
+
28
+ def pwrake_system(*cmd)
29
+ cmd_log = cmd.join(" ").inspect
30
+ tm = Pwrake::Timer.new("sh",cmd_log)
31
+
32
+ conn = Pwrake.current_shell
33
+ if conn.kind_of?(Pwrake::Shell)
34
+ res = conn.system(*cmd)
35
+ status = Rake::PseudoStatus.new(conn.status)
36
+ else
37
+ res = system(*cmd)
38
+ status = $?
39
+ status = Rake::PseudoStatus.new(1) if !res && status.nil?
40
+ end
41
+
42
+ tm.finish("status=%s cmd=%s"%[status.exitstatus,cmd_log])
43
+ [res,status]
44
+ end
45
+ private :pwrake_system
46
+
47
+
48
+ # Pwrake version of backquote command
49
+ def pwrake_backquote(cmd)
50
+ cmd_log = cmd.inspect
51
+ tm = Pwrake::Timer.new("bq",cmd_log)
52
+
53
+ conn = Pwrake.current_shell
54
+ if conn.kind_of?(Pwrake::Shell)
55
+ res = conn.backquote(*cmd)
56
+ status = conn.status
57
+ else
58
+ res = `#{cmd}`
59
+ if !res && status.nil?
60
+ status = 1
61
+ else
62
+ status = $?.exitstatus
63
+ end
64
+ end
65
+
66
+ tm.finish("status=%s cmd=%s"%[status,cmd_log])
67
+ res
68
+ end
69
+
70
+ end # module FileUtils
@@ -0,0 +1,229 @@
1
+ module Pwrake
2
+
3
+ module GfarmPath
4
+
5
+ module_function
6
+
7
+ def mountpoint_of_cwd
8
+ path = Pathname.pwd
9
+ while !path.mountpoint?
10
+ path = path.parent
11
+ end
12
+ path
13
+ end
14
+
15
+ @@local_mountpoint = mountpoint_of_cwd
16
+ @@fs_subdir = Pathname.new('/')
17
+
18
+ def mountpoint=(d)
19
+ @@local_mountpoint = Pathname.new(d)
20
+ end
21
+
22
+ def mountpoint
23
+ @@local_mountpoint
24
+ end
25
+
26
+ def subdir=(d)
27
+ if d
28
+ @@fs_subdir = Pathname.new(d)
29
+ if @@fs_subdir.relative?
30
+ @@fs_subdir = Pathname.new('/') + @@fs_subdir
31
+ end
32
+ end
33
+ end
34
+
35
+ def subdir
36
+ @@fs_subdir.to_s
37
+ end
38
+
39
+ def pwd
40
+ Pathname.pwd.relative_path_from(@@local_mountpoint)
41
+ end
42
+
43
+ def gfarm2fs?(d=nil)
44
+ d ||= @@local_mountpoint
45
+ mount_type = nil
46
+ open('/etc/mtab','r') do |f|
47
+ f.each_line do |l|
48
+ if /#{d} (?:type )?(\S+)/o =~ l
49
+ mount_type = $1
50
+ break
51
+ end
52
+ end
53
+ end
54
+ /gfarm2fs/ =~ mount_type
55
+ end
56
+
57
+ def from_local(x)
58
+ pn = Pathname(x)
59
+ if pn.absolute?
60
+ pn.relative_path_from(@@local_mountpoint)
61
+ else
62
+ Pathname.pwd.relative_path_from(@@local_mountpoint) + pn
63
+ end
64
+ end
65
+
66
+ def from_fs(x)
67
+ Pathname(x).relative_path_from(@@fs_subdir)
68
+ end
69
+
70
+ def to_fs(x)
71
+ @@fs_subdir + Pathname(x)
72
+ end
73
+
74
+ def to_local(x)
75
+ @@local_mountpoint + Pathname(x)
76
+ end
77
+
78
+ def local_to_fs(x)
79
+ x = from_local(x)
80
+ x = to_fs(x)
81
+ x.to_s
82
+ end
83
+
84
+ def fs_to_local(x)
85
+ x = from_fs(x)
86
+ x = to_local(x)
87
+ x.to_s
88
+ end
89
+
90
+ def gfwhere(list)
91
+ result = {}
92
+ count = 0
93
+ cmd = "gfwhere"
94
+ parse_proc = proc{|x|
95
+ if count==1
96
+ result[cmd[8..-1]] = x.split
97
+ else
98
+ x.scan(/^([^\n]+):\n([^\n]*)$/m) do |file,hosts|
99
+ h = hosts.split
100
+ result[file] = h if !h.empty?
101
+ end
102
+ end
103
+ }
104
+
105
+ list.each do |a|
106
+ if a
107
+ path = local_to_fs(a)
108
+ if cmd.size + path.size + 1 > 20480 # 131000
109
+ x = `#{cmd}`
110
+ parse_proc.call(x)
111
+ cmd = "gfwhere"
112
+ count = 0
113
+ end
114
+ cmd << " "
115
+ cmd << path
116
+ count += 1
117
+ end
118
+ end
119
+ if count > 0
120
+ x = `#{cmd}`
121
+ parse_proc.call(x)
122
+ end
123
+ result
124
+ end
125
+
126
+ end
127
+
128
+
129
+ class GfarmShell < Shell
130
+
131
+ @@core_id = {}
132
+ @@prefix = "pwrake_#{ENV['USER']}"
133
+
134
+ def initialize(host,opt={})
135
+ super(host,opt)
136
+ @single_mp = @option[:single_mp]
137
+ @basedir = @option[:basedir]
138
+ @prefix = @option[:prefix] || @@prefix
139
+ @work_dir = @option[:work_dir]
140
+
141
+ @core_id = @@core_id[host] || 0
142
+ @@core_id[host] = @core_id + 1
143
+
144
+ if @single_mp
145
+ @remote_mountpoint = "#{@basedir}/#{@prefix}_00"
146
+ else
147
+ @remote_mountpoint = "#{@basedir}/#{@prefix}_%02d" % @core_id
148
+ end
149
+ end
150
+
151
+ def start
152
+ Log.debug "--- mountpoint=#{@remote_mountpoint}"
153
+ open(system_cmd)
154
+ cd
155
+ if not _system "test -d #{@remote_mountpoint}"
156
+ _system "mkdir -p #{@remote_mountpoint}"
157
+ subdir = GfarmPath.subdir
158
+ if ["/","",nil].include?(subdir)
159
+ _system "gfarm2fs #{@remote_mountpoint}"
160
+ else
161
+ _system "gfarm2fs -o modules=subdir,subdir=#{subdir} #{@remote_mountpoint}"
162
+ end
163
+ end
164
+ path = ENV['PATH'].gsub( /#{GfarmPath.mountpoint}/, @remote_mountpoint )
165
+ _system "export PATH=#{path}"
166
+ cd_work_dir
167
+ end
168
+
169
+ def close
170
+ if @remote_mountpoint
171
+ cd
172
+ _system "fusermount -u #{@remote_mountpoint}"
173
+ _system "rmdir #{@remote_mountpoint}"
174
+ end
175
+ super
176
+ self
177
+ end
178
+
179
+ def cd_work_dir
180
+ # modify local work_dir -> remote work_dir
181
+ dir = Pathname.new(@remote_mountpoint) + GfarmPath.pwd
182
+ cd dir
183
+ end
184
+
185
+ end
186
+
187
+
188
+ class GfarmQueue < LocalityAwareQueue
189
+
190
+ def abr_msg(a)
191
+ m = a[0..5].map{|x| x}.inspect
192
+ m.sub!(/]$/,",...") if a.size > 6
193
+ "size=#{a.size} #{m}"
194
+ end
195
+
196
+ def where(tasks)
197
+ if Pwrake.application.options.dryrun ||
198
+ Pwrake.application.options.disable_affinity
199
+ return tasks
200
+ end
201
+
202
+ start_time = Time.now
203
+ #Log.debug "--- GfarmQueue#where #{tasks.inspect}"
204
+ #if Pwrake.manager.gfarm and Pwrake.manager.affinity
205
+ gfwhere_result = {}
206
+ filenames = []
207
+ tasks.each do |t|
208
+ if t.kind_of?(Rake::FileTask) and
209
+ name = t.prerequisites[0] and
210
+ !filenames.include?(name)
211
+ filenames << name
212
+ end
213
+ end
214
+
215
+ if !filenames.empty?
216
+ gfwhere_result = GfarmPath.gfwhere(filenames)
217
+ tasks.each do |t|
218
+ if t.kind_of? Rake::FileTask and prereq_name = t.prerequisites[0]
219
+ t.location = gfwhere_result[GfarmPath.local_to_fs(prereq_name)]
220
+ end
221
+ end
222
+ end
223
+ Log.info "-- GfarmQueue#where %.6fs %s" % [Time.now-start_time,abr_msg(filenames)]
224
+ tasks
225
+ end
226
+
227
+ end
228
+
229
+ end