pwrake 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,30 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .config
19
+ *~
20
+ */*~
21
+ */*/*~
22
+ *.bak
23
+ */*.bak
24
+ */*/*.bak
25
+ .#*
26
+ */.#*
27
+ */*/.#*
28
+ rhosts
29
+ spec/*/*.dat
30
+ spec/*/*.csv
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pwrake.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2009-2012 Masahiro TANAKA
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,57 @@
1
+ # Pwrake
2
+
3
+ Parallel workflow extension for Rake
4
+ * Author: Masahiro Tanaka
5
+
6
+ ## Features
7
+
8
+ * Parallelize all tasks; no need to modify Rakefile, no need to use `multitask`.
9
+ * Tasks are executed in the given number of worker threads.
10
+ * Remote exuecution using SSH.
11
+ * Pwrake is an extension to Rake, not patch to Rake: Rake and Pwrake coexist.
12
+ * High parallel I/O performance using Gfarm file system.
13
+
14
+ ## Installation
15
+
16
+ Download source tgz/zip and expand, cd to subdir and install:
17
+
18
+ $ ruby setup.rb
19
+
20
+ Or, gem install:
21
+
22
+ $ gem install pwrake
23
+
24
+ ## Usage
25
+
26
+ ### Parallel execution using 4 cores at localhost:
27
+
28
+ $ pwrake -j 4
29
+
30
+ ### Parallel execution using all cores at localhost:
31
+
32
+ $ pwrake -j
33
+
34
+ ### Parallel execution using total 2*2 cores at remote 2 hosts:
35
+
36
+ 1. Share your directory among remote hosts via distributed file system such as NFS, Gfarm.
37
+ 2. Allow passphrase-less access via SSH in either way:
38
+ * Add passphrase-less key generated by `ssh-keygen`. (Be careful)
39
+ * Add passphrase using `ssh-add`.
40
+ 3. Make `hosts` file in which remote host names and the number of cores are listed:
41
+
42
+ $ cat hosts
43
+ host1 2
44
+ host2 2
45
+
46
+ 4. Run `pwrake` with an option `--hostfile` or `-F`:
47
+
48
+ $ pwrake --hostfile=hosts
49
+
50
+ ## Tested Platform
51
+
52
+ * Ruby 1.9.3, 2.0.0-preview1
53
+ * Rake 0.9.2.2
54
+ * Fedora 16 / Debian 5.0.7
55
+
56
+ ## Workflow demo
57
+
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/pwrake ADDED
@@ -0,0 +1,36 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ #--
4
+ # Copyright (c) 2009-2012 Masahiro TANAKA
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to
8
+ # deal in the Software without restriction, including without limitation the
9
+ # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
+ # sell copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
+ # IN THE SOFTWARE.
23
+ #++
24
+
25
+ begin
26
+ require 'rubygems'
27
+ rescue LoadError
28
+ end
29
+
30
+ require 'rake'
31
+
32
+ libpath = File.expand_path(File.dirname(__FILE__))+"/../lib"
33
+ $LOAD_PATH.unshift libpath
34
+ require "pwrake"
35
+
36
+ Rake.application.run
@@ -0,0 +1,187 @@
1
+ module Rake
2
+ class << self
3
+ def application
4
+ @application ||= Pwrake.application
5
+ end
6
+ end
7
+ end
8
+
9
+
10
+ module Pwrake
11
+
12
+ class << self
13
+ def application
14
+ @application ||= Application.new
15
+ end
16
+ end
17
+
18
+ class Application < ::Rake::Application
19
+
20
+ def task_queue
21
+ @master.task_queue
22
+ end
23
+
24
+ def thread_loop(*args)
25
+ @master.thread_loop(*args)
26
+ end
27
+
28
+ def pwrake_options
29
+ @master.pwrake_options
30
+ end
31
+
32
+
33
+ # Run the Pwrake application.
34
+ def run
35
+ standard_exception_handling do
36
+ @master = Master.new
37
+ init("pwrake")
38
+ @master.init
39
+ load_rakefile
40
+ @master.setup
41
+ top_level
42
+ end
43
+ end
44
+
45
+ # Run the top level tasks of a Rake application.
46
+ def top_level
47
+ standard_exception_handling do
48
+ if options.show_tasks
49
+ display_tasks_and_comments
50
+ elsif options.show_prereqs
51
+ display_prerequisites
52
+ else
53
+ begin
54
+ @master.start
55
+ top_level_tasks.each { |task_name| invoke_task(task_name) }
56
+ ensure
57
+ @master.finish
58
+ end
59
+ end
60
+ end
61
+ end
62
+
63
+
64
+ def standard_rake_options
65
+ opts = super
66
+ opts.each_with_index do |a,i|
67
+ if a[0] == '--version'
68
+ a[3] = lambda { |value|
69
+ puts "rake, version #{RAKEVERSION}"
70
+ puts "pwrake, version #{Pwrake::VERSION}"
71
+ exit
72
+ }
73
+ end
74
+ end
75
+
76
+ opts.concat(
77
+ [
78
+ ['-F', '--hostfile FILE',
79
+ "[Pw] Read hostnames from FILE",
80
+ lambda { |value|
81
+ options.hostfile = value
82
+ }
83
+ ],
84
+ ['-j', '--jobs [N]',
85
+ "[Pw] Number of threads at localhost (default: # of processors)",
86
+ lambda { |value|
87
+ if value
88
+ value = value.to_i
89
+ if value > 0
90
+ options.num_threads = value
91
+ else
92
+ options.num_threads = x = processor_count + value
93
+ raise "negative/zero number of threads (#{x})" if x <= 0
94
+ end
95
+ else
96
+ options.num_threads = processor_count
97
+ end
98
+ }
99
+ ],
100
+ ['-L', '--logfile [FILE]', "[Pw] Write log to FILE",
101
+ lambda { |value|
102
+ if value.kind_of? String
103
+ options.logfile = value
104
+ else
105
+ options.logfile = ""
106
+ end
107
+ }
108
+ ],
109
+ ['--ssh-opt', '--ssh-option OPTION', "[Pw] Option passed to SSH",
110
+ lambda { |value|
111
+ options.ssh_option = value
112
+ }
113
+ ],
114
+ ['--filesystem FILESYSTEM', "[Pw] Specify FILESYSTEM (nfs|gfarm)",
115
+ lambda { |value|
116
+ options.filesystem = value
117
+ }
118
+ ],
119
+ ['--gfarm', "[Pw] FILESYSTEM=gfarm",
120
+ lambda { |value|
121
+ options.filesystem = "gfarm"
122
+ }
123
+ ],
124
+ ['-A', '--disable-affinity', "[Pw] Turn OFF affinity (AFFINITY=off)",
125
+ lambda { |value|
126
+ options.disable_affinity = true
127
+ }
128
+ ],
129
+ ['-S', '--disable-steal', "[Pw] Turn OFF task steal",
130
+ lambda { |value|
131
+ options.disable_steal = true
132
+ }
133
+ ],
134
+ ['-d', '--debug',
135
+ "[Pw] Output Debug messages",
136
+ lambda { |value|
137
+ options.debug = true
138
+ }
139
+ ],
140
+ ['--pwrake-conf [FILE]',
141
+ "[Pw] Pwrake configuation file in YAML",
142
+ lambda {|value| options.pwrake_conf = value}
143
+ ],
144
+ ['--show-conf','--show-config',
145
+ "[Pw] Show Pwrake configuration options",
146
+ lambda {|value| options.show_conf = true }
147
+ ]
148
+ ])
149
+ opts
150
+ end
151
+
152
+
153
+ def count(host_list, host)
154
+ @master.counter.count( host_list, host )
155
+ end
156
+
157
+ # from Michael Grosser's parallel
158
+ # https://github.com/grosser/parallel
159
+ def processor_count
160
+ host_os = RbConfig::CONFIG['host_os']
161
+ case host_os
162
+ when /linux|cygwin/
163
+ ncpu = 0
164
+ open("/proc/cpuinfo").each do |l|
165
+ ncpu += 1 if /^processor\s+: \d+/=~l
166
+ end
167
+ ncpu
168
+ when /darwin9/
169
+ `hwprefs cpu_count`.to_i
170
+ when /darwin/
171
+ (hwprefs_available? ? `hwprefs thread_count` : `sysctl -n hw.ncpu`).to_i
172
+ when /(open|free)bsd/
173
+ `sysctl -n hw.ncpu`.to_i
174
+ when /mswin|mingw/
175
+ require 'win32ole'
176
+ wmi = WIN32OLE.connect("winmgmts://")
177
+ cpu = wmi.ExecQuery("select NumberOfLogicalProcessors from Win32_Processor")
178
+ cpu.to_enum.first.NumberOfLogicalProcessors
179
+ when /solaris2/
180
+ `psrinfo -p`.to_i # physical cpus
181
+ else
182
+ raise "Unknown architecture: #{host_os}"
183
+ end
184
+ end
185
+
186
+ end
187
+ end
@@ -0,0 +1,54 @@
1
+ module Pwrake
2
+
3
+ class Counter
4
+
5
+ def initialize
6
+ @same = 0
7
+ @diff = 0
8
+ @total = 0
9
+ @same_hosts = {}
10
+ @diff_hosts = {}
11
+ @no_queue = 0
12
+ @found_queue = 0
13
+ @empty_queue = 0
14
+ end
15
+
16
+ def count(host_list, host)
17
+ @total += 1
18
+ if host_list and host_list.include?(host)
19
+ @same += 1
20
+ @same_hosts[host] = (@same_hosts[host]||0) + 1
21
+ else
22
+ @diff += 1
23
+ @diff_hosts[host] = (@diff_hosts[host]||0) + 1
24
+ end
25
+ end
26
+
27
+ def print
28
+ s = "same=#{@same}, diff=#{@diff}, total=#{@total}\n"
29
+ s << "same_hosts = {\n"
30
+ @same_hosts.keys.sort.each do |k|
31
+ s << " #{k}: #{@same_hosts[k]}\n"
32
+ end
33
+ s << "}\n"
34
+ s << "different_hosts = {\n"
35
+ @diff_hosts.keys.sort.each do |k|
36
+ s << " #{k}: #{@diff_hosts[k]}\n"
37
+ end
38
+ s << "}"
39
+ Log.info s
40
+ end
41
+
42
+ def no_queue
43
+ @no_queue += 1
44
+ end
45
+
46
+ def found_queue
47
+ @found_queue += 1
48
+ end
49
+
50
+ def empty_queue
51
+ @empty_queue += 1
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,70 @@
1
+ module FileUtils
2
+
3
+ alias sh_orig :sh
4
+
5
+ def sh(*cmd, &block)
6
+ options = (Hash === cmd.last) ? cmd.pop : {}
7
+ unless block_given?
8
+ show_command = cmd.join(" ")
9
+ show_command = show_command[0,42] + "..."
10
+ block = lambda { |ok, status|
11
+ ok or fail "Command failed with status (#{status.exitstatus}): [#{show_command}]"
12
+ }
13
+ end
14
+ if RakeFileUtils.verbose_flag == :default
15
+ options[:verbose] = true
16
+ else
17
+ options[:verbose] ||= RakeFileUtils.verbose_flag
18
+ end
19
+ options[:noop] ||= RakeFileUtils.nowrite_flag
20
+ rake_check_options options, :noop, :verbose
21
+ Pwrake::Log.output_message cmd.join(" ") if options[:verbose]
22
+ unless options[:noop]
23
+ res,status = pwrake_system(*cmd)
24
+ block.call(res, status)
25
+ end
26
+ end
27
+
28
+ def pwrake_system(*cmd)
29
+ cmd_log = cmd.join(" ").inspect
30
+ tm = Pwrake::Timer.new("sh",cmd_log)
31
+
32
+ conn = Pwrake.current_shell
33
+ if conn.kind_of?(Pwrake::Shell)
34
+ res = conn.system(*cmd)
35
+ status = Rake::PseudoStatus.new(conn.status)
36
+ else
37
+ res = system(*cmd)
38
+ status = $?
39
+ status = Rake::PseudoStatus.new(1) if !res && status.nil?
40
+ end
41
+
42
+ tm.finish("status=%s cmd=%s"%[status.exitstatus,cmd_log])
43
+ [res,status]
44
+ end
45
+ private :pwrake_system
46
+
47
+
48
+ # Pwrake version of backquote command
49
+ def pwrake_backquote(cmd)
50
+ cmd_log = cmd.inspect
51
+ tm = Pwrake::Timer.new("bq",cmd_log)
52
+
53
+ conn = Pwrake.current_shell
54
+ if conn.kind_of?(Pwrake::Shell)
55
+ res = conn.backquote(*cmd)
56
+ status = conn.status
57
+ else
58
+ res = `#{cmd}`
59
+ if !res && status.nil?
60
+ status = 1
61
+ else
62
+ status = $?.exitstatus
63
+ end
64
+ end
65
+
66
+ tm.finish("status=%s cmd=%s"%[status,cmd_log])
67
+ res
68
+ end
69
+
70
+ end # module FileUtils
@@ -0,0 +1,229 @@
1
+ module Pwrake
2
+
3
+ module GfarmPath
4
+
5
+ module_function
6
+
7
+ def mountpoint_of_cwd
8
+ path = Pathname.pwd
9
+ while !path.mountpoint?
10
+ path = path.parent
11
+ end
12
+ path
13
+ end
14
+
15
+ @@local_mountpoint = mountpoint_of_cwd
16
+ @@fs_subdir = Pathname.new('/')
17
+
18
+ def mountpoint=(d)
19
+ @@local_mountpoint = Pathname.new(d)
20
+ end
21
+
22
+ def mountpoint
23
+ @@local_mountpoint
24
+ end
25
+
26
+ def subdir=(d)
27
+ if d
28
+ @@fs_subdir = Pathname.new(d)
29
+ if @@fs_subdir.relative?
30
+ @@fs_subdir = Pathname.new('/') + @@fs_subdir
31
+ end
32
+ end
33
+ end
34
+
35
+ def subdir
36
+ @@fs_subdir.to_s
37
+ end
38
+
39
+ def pwd
40
+ Pathname.pwd.relative_path_from(@@local_mountpoint)
41
+ end
42
+
43
+ def gfarm2fs?(d=nil)
44
+ d ||= @@local_mountpoint
45
+ mount_type = nil
46
+ open('/etc/mtab','r') do |f|
47
+ f.each_line do |l|
48
+ if /#{d} (?:type )?(\S+)/o =~ l
49
+ mount_type = $1
50
+ break
51
+ end
52
+ end
53
+ end
54
+ /gfarm2fs/ =~ mount_type
55
+ end
56
+
57
+ def from_local(x)
58
+ pn = Pathname(x)
59
+ if pn.absolute?
60
+ pn.relative_path_from(@@local_mountpoint)
61
+ else
62
+ Pathname.pwd.relative_path_from(@@local_mountpoint) + pn
63
+ end
64
+ end
65
+
66
+ def from_fs(x)
67
+ Pathname(x).relative_path_from(@@fs_subdir)
68
+ end
69
+
70
+ def to_fs(x)
71
+ @@fs_subdir + Pathname(x)
72
+ end
73
+
74
+ def to_local(x)
75
+ @@local_mountpoint + Pathname(x)
76
+ end
77
+
78
+ def local_to_fs(x)
79
+ x = from_local(x)
80
+ x = to_fs(x)
81
+ x.to_s
82
+ end
83
+
84
+ def fs_to_local(x)
85
+ x = from_fs(x)
86
+ x = to_local(x)
87
+ x.to_s
88
+ end
89
+
90
+ def gfwhere(list)
91
+ result = {}
92
+ count = 0
93
+ cmd = "gfwhere"
94
+ parse_proc = proc{|x|
95
+ if count==1
96
+ result[cmd[8..-1]] = x.split
97
+ else
98
+ x.scan(/^([^\n]+):\n([^\n]*)$/m) do |file,hosts|
99
+ h = hosts.split
100
+ result[file] = h if !h.empty?
101
+ end
102
+ end
103
+ }
104
+
105
+ list.each do |a|
106
+ if a
107
+ path = local_to_fs(a)
108
+ if cmd.size + path.size + 1 > 20480 # 131000
109
+ x = `#{cmd}`
110
+ parse_proc.call(x)
111
+ cmd = "gfwhere"
112
+ count = 0
113
+ end
114
+ cmd << " "
115
+ cmd << path
116
+ count += 1
117
+ end
118
+ end
119
+ if count > 0
120
+ x = `#{cmd}`
121
+ parse_proc.call(x)
122
+ end
123
+ result
124
+ end
125
+
126
+ end
127
+
128
+
129
+ class GfarmShell < Shell
130
+
131
+ @@core_id = {}
132
+ @@prefix = "pwrake_#{ENV['USER']}"
133
+
134
+ def initialize(host,opt={})
135
+ super(host,opt)
136
+ @single_mp = @option[:single_mp]
137
+ @basedir = @option[:basedir]
138
+ @prefix = @option[:prefix] || @@prefix
139
+ @work_dir = @option[:work_dir]
140
+
141
+ @core_id = @@core_id[host] || 0
142
+ @@core_id[host] = @core_id + 1
143
+
144
+ if @single_mp
145
+ @remote_mountpoint = "#{@basedir}/#{@prefix}_00"
146
+ else
147
+ @remote_mountpoint = "#{@basedir}/#{@prefix}_%02d" % @core_id
148
+ end
149
+ end
150
+
151
+ def start
152
+ Log.debug "--- mountpoint=#{@remote_mountpoint}"
153
+ open(system_cmd)
154
+ cd
155
+ if not _system "test -d #{@remote_mountpoint}"
156
+ _system "mkdir -p #{@remote_mountpoint}"
157
+ subdir = GfarmPath.subdir
158
+ if ["/","",nil].include?(subdir)
159
+ _system "gfarm2fs #{@remote_mountpoint}"
160
+ else
161
+ _system "gfarm2fs -o modules=subdir,subdir=#{subdir} #{@remote_mountpoint}"
162
+ end
163
+ end
164
+ path = ENV['PATH'].gsub( /#{GfarmPath.mountpoint}/, @remote_mountpoint )
165
+ _system "export PATH=#{path}"
166
+ cd_work_dir
167
+ end
168
+
169
+ def close
170
+ if @remote_mountpoint
171
+ cd
172
+ _system "fusermount -u #{@remote_mountpoint}"
173
+ _system "rmdir #{@remote_mountpoint}"
174
+ end
175
+ super
176
+ self
177
+ end
178
+
179
+ def cd_work_dir
180
+ # modify local work_dir -> remote work_dir
181
+ dir = Pathname.new(@remote_mountpoint) + GfarmPath.pwd
182
+ cd dir
183
+ end
184
+
185
+ end
186
+
187
+
188
+ class GfarmQueue < LocalityAwareQueue
189
+
190
+ def abr_msg(a)
191
+ m = a[0..5].map{|x| x}.inspect
192
+ m.sub!(/]$/,",...") if a.size > 6
193
+ "size=#{a.size} #{m}"
194
+ end
195
+
196
+ def where(tasks)
197
+ if Pwrake.application.options.dryrun ||
198
+ Pwrake.application.options.disable_affinity
199
+ return tasks
200
+ end
201
+
202
+ start_time = Time.now
203
+ #Log.debug "--- GfarmQueue#where #{tasks.inspect}"
204
+ #if Pwrake.manager.gfarm and Pwrake.manager.affinity
205
+ gfwhere_result = {}
206
+ filenames = []
207
+ tasks.each do |t|
208
+ if t.kind_of?(Rake::FileTask) and
209
+ name = t.prerequisites[0] and
210
+ !filenames.include?(name)
211
+ filenames << name
212
+ end
213
+ end
214
+
215
+ if !filenames.empty?
216
+ gfwhere_result = GfarmPath.gfwhere(filenames)
217
+ tasks.each do |t|
218
+ if t.kind_of? Rake::FileTask and prereq_name = t.prerequisites[0]
219
+ t.location = gfwhere_result[GfarmPath.local_to_fs(prereq_name)]
220
+ end
221
+ end
222
+ end
223
+ Log.info "-- GfarmQueue#where %.6fs %s" % [Time.now-start_time,abr_msg(filenames)]
224
+ tasks
225
+ end
226
+
227
+ end
228
+
229
+ end