pwrake 0.9.5 → 0.9.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6779de017cf5e05367aecc0f5fef0f12bfd4c54d
4
+ data.tar.gz: f9aa0b823cf6d147ae595c86643859ebb283992c
5
+ SHA512:
6
+ metadata.gz: df46550084799df1303f17a4dafbe5ee7ed2e2589a165bb2c340f37d4972b37a195da8d3f03922a79cca1c21f9ddb794a4042fa085437820d4093a24cfccd8d3
7
+ data.tar.gz: 992425a4aef9202594598b66c18f42dbd3d7d06a50f77315fa4252f72e0e6c9e483965f28ad1c166c3475956758d2e17bbbd2700d7837423682e862a0f73e008
data/README.md CHANGED
@@ -3,6 +3,9 @@
3
3
  Parallel workflow extension for Rake
4
4
  * Author: Masahiro Tanaka
5
5
 
6
+ ([日本語README](https://github.com/masa16/pwrake/wiki/Pwrake.ja)),
7
+ ([GitHub Repository](https://github.com/masa16/pwrake))
8
+
6
9
  ## Features
7
10
 
8
11
  * Parallelize all tasks; no need to modify Rakefile, no need to use `multitask`.
@@ -47,11 +50,87 @@ Or, gem install:
47
50
 
48
51
  $ pwrake --hostfile=hosts
49
52
 
53
+ ## Options
54
+
55
+ ### Command line option
56
+
57
+ -F, --hostfile FILE [Pw] Read hostnames from FILE
58
+ -j, --jobs [N] [Pw] Number of threads at localhost (default: # of processors)
59
+ -L, --logfile [FILE] [Pw] Write log to FILE
60
+ --ssh-opt, --ssh-option OPTION
61
+ [Pw] Option passed to SSH
62
+ --filesystem FILESYSTEM [Pw] Specify FILESYSTEM (nfs|gfarm)
63
+ --gfarm [Pw] FILESYSTEM=gfarm
64
+ -A, --disable-affinity [Pw] Turn OFF affinity (AFFINITY=off)
65
+ -S, --disable-steal [Pw] Turn OFF task steal
66
+ -d, --debug [Pw] Output Debug messages
67
+ --pwrake-conf [FILE] [Pw] Pwrake configuation file in YAML
68
+ --show-conf, --show-config [Pw] Show Pwrake configuration options
69
+ --report LOG [Pw] Report profile HTML from LOG and exit.
70
+
71
+ ### pwrake_conf.yaml
72
+
73
+ * If `pwrake_conf.yaml` exists at current directory, Pwrake reads options from it.
74
+ * Example (in YAML form):
75
+
76
+ HOSTFILE : hosts
77
+ LOGFILE : true
78
+ TASKLOG : true
79
+ PROFILE : true
80
+ GNU_TIME : true
81
+ PLOT_PARALLELISM : true
82
+ DISABLE_AFFINITY: true
83
+ DISABLE_STEAL: true
84
+ FAILED_TARGET : delete
85
+ PASS_ENV :
86
+ - ENV1
87
+ - ENV2
88
+
89
+ * Option list:
90
+
91
+ HOSTFILE, HOSTS default=false
92
+ LOGFILE, LOG default=none, string=filename, true="Pwrake%Y%m%d-%H%M%S_%$.log"
93
+ TASKLOG default=none, string=filename, true="Pwrake%Y%m%d-%H%M%S_%$.task"
94
+ PROFILE default=none, string=filename, true="Pwrake%Y%m%d-%H%M%S_%$.csv"
95
+ WORK_DIR default=$PWD
96
+ FILESYSTEM default=nil (autodetect)
97
+ SSH_OPTION (String) SSH option
98
+ PASS_ENV (Array) Environment variables passed to SSH
99
+ GNU_TIME If true, obtains PROFILEs using GNU time
100
+ PLOT_PARALLELISM If true, plot parallelism using GNUPLOT
101
+ FAILED_TARGET ( rename(default) | delete | leave ) failed files
102
+ QUEUE_PRIORITY ( DFS(default) | FIFO )
103
+
104
+ for Gfarm system:
105
+
106
+ DISABLE_AFFINITY default=false
107
+ DISABLE_STEAL default=false
108
+ STEAL_WAIT default=0 (sec)
109
+ STEAL_WAIT_MAX default=10 (sec)
110
+ : Wait min(STEAL_WAIT*2**n, STEAL_WAIT_MAX) sec for task steal.
111
+ GFARM_BASEDIR default="/tmp"
112
+ GFARM_PREFIX default="pwrake_$USER"
113
+ GFARM_SUBDIR default='/'
114
+
115
+ ## Note for Gfarm
116
+
117
+ * `gfwhere-pipe` command is required for file-affinity scheduling.
118
+
119
+ wget https://gist.github.com/masa16/5787473/raw/6df5deeb80a4cea6b9d1d1ce01f390f65d650717/gfwhere-pipe.patch
120
+ cd gfarm-2.5.8.1
121
+ patch -p1 < ../gfwhere-pipe.patch
122
+ ./configure --prefix=...
123
+ make
124
+ make install
125
+
50
126
  ## Tested Platform
51
127
 
52
- * Ruby 1.9.3, 2.0.0-preview1
53
- * Rake 0.9.2.2
54
- * Fedora 16 / Debian 5.0.7
128
+ * Ruby 2.0.0
129
+ * Rake 0.9.6
130
+ * CentOS 6.4
55
131
 
56
- ## Workflow demo
132
+ ## Acknowledgment
57
133
 
134
+ This work is supported by
135
+ * JST CREST, research area: "Development of System Software Technologies for Post-Peta Scale High Performance Computing," and
136
+ * MEXT Promotion of Research for Next Generation IT Infrastructure "Resources Linkage for e-Science (RENKEI)."
@@ -17,10 +17,24 @@ module Pwrake
17
17
 
18
18
  class Application < ::Rake::Application
19
19
 
20
+ def filesystem
21
+ @master.filesystem
22
+ end
23
+
20
24
  def task_queue
21
25
  @master.task_queue
22
26
  end
23
27
 
28
+ def finish_queue
29
+ @master.finish_queue
30
+ end
31
+
32
+ def postprocess(t)
33
+ if @master.postprocess
34
+ @master.postprocess.postprocess(t)
35
+ end
36
+ end
37
+
24
38
  def thread_loop(*args)
25
39
  @master.thread_loop(*args)
26
40
  end
@@ -33,6 +47,18 @@ module Pwrake
33
47
  @master.start
34
48
  end
35
49
 
50
+ def core_list
51
+ @master.core_list
52
+ end
53
+
54
+ def task_logger
55
+ @master.task_logger
56
+ end
57
+
58
+ def task_id_counter
59
+ @master.task_id_counter
60
+ end
61
+
36
62
  # Run the Pwrake application.
37
63
  def run
38
64
  standard_exception_handling do
@@ -144,7 +170,15 @@ module Pwrake
144
170
  ['--show-conf','--show-config',
145
171
  "[Pw] Show Pwrake configuration options",
146
172
  lambda {|value| options.show_conf = true }
173
+ ],
174
+ ['--report LOG', "[Pw] Report profile HTML from LOG and exit.",
175
+ lambda { |value|
176
+ require 'pwrake/report'
177
+ Report.new(File.basename(value.sub(/\.[^.]+$/,"")),[]).report_html
178
+ exit
179
+ }
147
180
  ]
181
+
148
182
  ])
149
183
  opts
150
184
  end
@@ -1,6 +1,6 @@
1
+ module Pwrake
1
2
  module FileUtils
2
-
3
- alias sh_orig :sh
3
+ module_function
4
4
 
5
5
  def sh(*cmd, &block)
6
6
  options = (Hash === cmd.last) ? cmd.pop : {}
@@ -17,14 +17,38 @@ module FileUtils
17
17
  options[:verbose] ||= RakeFileUtils.verbose_flag
18
18
  end
19
19
  options[:noop] ||= RakeFileUtils.nowrite_flag
20
- rake_check_options options, :noop, :verbose
21
- Pwrake::Log.output_message cmd.join(" ") if options[:verbose]
20
+ Rake.rake_check_options options, :noop, :verbose
21
+ Pwrake::Log.stderr_puts cmd.join(" ") if options[:verbose]
22
22
  unless options[:noop]
23
- res,status = pwrake_system(*cmd)
23
+ res,status = Pwrake::FileUtils.pwrake_system(*cmd)
24
24
  block.call(res, status)
25
25
  end
26
26
  end
27
27
 
28
+ def bq(*cmd, &block)
29
+ options = (Hash === cmd.last) ? cmd.pop : {}
30
+ unless block_given?
31
+ show_command = cmd.join(" ")
32
+ show_command = show_command[0,42] + "..."
33
+ block = lambda { |ok, status|
34
+ ok or fail "Command failed with status (#{status.exitstatus}): [#{show_command}]"
35
+ }
36
+ end
37
+ if RakeFileUtils.verbose_flag == :default
38
+ options[:verbose] = true
39
+ else
40
+ options[:verbose] ||= RakeFileUtils.verbose_flag
41
+ end
42
+ options[:noop] ||= RakeFileUtils.nowrite_flag
43
+ Rake.rake_check_options options, :noop, :verbose
44
+ Pwrake::Log.stderr_puts cmd.join(" ") if options[:verbose]
45
+ unless options[:noop]
46
+ res,status = Pwrake::FileUtils.pwrake_backquote(*cmd)
47
+ block.call(res, status)
48
+ end
49
+ res
50
+ end
51
+
28
52
  def pwrake_system(*cmd)
29
53
  cmd_log = cmd.join(" ").inspect
30
54
  tm = Pwrake::Timer.new("sh",cmd_log)
@@ -42,8 +66,6 @@ module FileUtils
42
66
  tm.finish("status=%s cmd=%s"%[status.exitstatus,cmd_log])
43
67
  [res,status]
44
68
  end
45
- private :pwrake_system
46
-
47
69
 
48
70
  # Pwrake version of backquote command
49
71
  def pwrake_backquote(cmd)
@@ -53,18 +75,23 @@ module FileUtils
53
75
  conn = Pwrake.current_shell
54
76
  if conn.kind_of?(Pwrake::Shell)
55
77
  res = conn.backquote(*cmd)
56
- status = conn.status
78
+ status = Rake::PseudoStatus.new(conn.status)
57
79
  else
58
80
  res = `#{cmd}`
59
- if !res && status.nil?
60
- status = 1
61
- else
62
- status = $?.exitstatus
63
- end
81
+ status = $?
82
+ status = Rake::PseudoStatus.new(1) if status.nil?
64
83
  end
65
84
 
66
- tm.finish("status=%s cmd=%s"%[status,cmd_log])
67
- res
85
+ tm.finish("status=%s cmd=%s"%[status.exitstatus,cmd_log])
86
+ [res,status]
68
87
  end
69
88
 
70
- end # module FileUtils
89
+ end # module Pwrake::FileUtils
90
+ end
91
+
92
+ module Rake
93
+ module DSL
94
+ include Pwrake::FileUtils
95
+ private(*Pwrake::FileUtils.instance_methods(false))
96
+ end
97
+ end
@@ -87,7 +87,20 @@ module Pwrake
87
87
  x.to_s
88
88
  end
89
89
 
90
+ def gfpath(file='.')
91
+ begin
92
+ IO.popen("gfstat '#{file}'") do |f|
93
+ if /File: "([^"]+)"/ =~ f.gets #"
94
+ return $1
95
+ end
96
+ end
97
+ rescue
98
+ end
99
+ nil
100
+ end
101
+
90
102
  def gfwhere(list)
103
+ system "sync"
91
104
  result = {}
92
105
  count = 0
93
106
  cmd = "gfwhere"
@@ -95,7 +108,7 @@ module Pwrake
95
108
  if count==1
96
109
  result[cmd[8..-1]] = x.split
97
110
  else
98
- x.scan(/^([^\n]+):\n([^\n]*)$/m) do |file,hosts|
111
+ x.scan(/^(?:gfarm:\/\/[^\/]+)?([^\n]+):\n([^\n]*)$/m) do |file,hosts|
99
112
  h = hosts.split
100
113
  result[file] = h if !h.empty?
101
114
  end
@@ -106,7 +119,7 @@ module Pwrake
106
119
  if a
107
120
  path = local_to_fs(a)
108
121
  if cmd.size + path.size + 1 > 20480 # 131000
109
- x = `#{cmd}`
122
+ x = `#{cmd} 2> /dev/null`
110
123
  parse_proc.call(x)
111
124
  cmd = "gfwhere"
112
125
  count = 0
@@ -117,7 +130,7 @@ module Pwrake
117
130
  end
118
131
  end
119
132
  if count > 0
120
- x = `#{cmd}`
133
+ x = `#{cmd} 2> /dev/null`
121
134
  parse_proc.call(x)
122
135
  end
123
136
  result
@@ -153,16 +166,22 @@ module Pwrake
153
166
  open(system_cmd)
154
167
  cd
155
168
  if not _system "test -d #{@remote_mountpoint}"
156
- _system "mkdir -p #{@remote_mountpoint}"
157
- subdir = GfarmPath.subdir
158
- if ["/","",nil].include?(subdir)
159
- _system "gfarm2fs #{@remote_mountpoint}"
160
- else
161
- _system "gfarm2fs -o modules=subdir,subdir=#{subdir} #{@remote_mountpoint}"
169
+ _system "mkdir -p #{@remote_mountpoint}" or die
170
+ else
171
+ lines = _backquote("sync; mount")
172
+ if /#{@remote_mountpoint} (?:type )?(\S+)/om =~ lines
173
+ _system "sync; fusermount -u #{@remote_mountpoint}"
174
+ _system "sync"
162
175
  end
163
176
  end
177
+ subdir = GfarmPath.subdir
178
+ if ["/","",nil].include?(subdir)
179
+ _system "gfarm2fs #{@remote_mountpoint}"
180
+ else
181
+ _system "gfarm2fs -o modules=subdir,subdir=#{subdir} #{@remote_mountpoint}"
182
+ end
164
183
  path = ENV['PATH'].gsub( /#{GfarmPath.mountpoint}/, @remote_mountpoint )
165
- _system "export PATH=#{path}"
184
+ _system "export PATH=#{path}" or die
166
185
  cd_work_dir
167
186
  end
168
187
 
@@ -185,45 +204,65 @@ module Pwrake
185
204
  end
186
205
 
187
206
 
188
- class GfarmQueue < LocalityAwareQueue
207
+ class GfarmPostprocess
189
208
 
190
- def abr_msg(a)
191
- m = a[0..5].map{|x| x}.inspect
192
- m.sub!(/]$/,",...") if a.size > 6
193
- "size=#{a.size} #{m}"
209
+ def initialize
210
+ @lock = Mutex.new
211
+ @io = IO.popen('gfwhere-pipe','r+')
212
+ @io.sync = true
213
+ end
214
+
215
+ def gfwhere(file)
216
+ return [] if file==''
217
+ @lock.synchronize do
218
+ @io.puts(file)
219
+ @io.flush
220
+ s = @io.gets
221
+ if s.nil?
222
+ raise "gfwhere: unexpected end"
223
+ end
224
+ s.chomp!
225
+ if s != file
226
+ raise "gfwhere: file=#{file}, result=#{s}"
227
+ end
228
+ while s = @io.gets
229
+ s.chomp!
230
+ case s
231
+ when ""
232
+ next
233
+ when /^gfarm:\/\//
234
+ next
235
+ when /^Error:/
236
+ return []
237
+ else
238
+ return s.split(/\s+/)
239
+ end
240
+ end
241
+ end
194
242
  end
195
243
 
196
- def where(tasks)
197
- if Pwrake.application.options.dryrun ||
198
- Pwrake.application.options.disable_affinity
199
- return tasks
244
+ def postprocess(t)
245
+ if t.kind_of? Rake::FileTask
246
+ t.location = gfwhere(t.name)
200
247
  end
248
+ end
201
249
 
202
- start_time = Time.now
203
- #Log.debug "--- GfarmQueue#where #{tasks.inspect}"
204
- #if Pwrake.manager.gfarm and Pwrake.manager.affinity
205
- gfwhere_result = {}
206
- filenames = []
250
+ def postprocess_bulk(tasks)
251
+ list = []
207
252
  tasks.each do |t|
208
- if t.kind_of?(Rake::FileTask) and
209
- name = t.prerequisites[0] and
210
- !filenames.include?(name)
211
- filenames << name
212
- end
253
+ list << t.name if t.kind_of? Rake::FileTask
213
254
  end
214
-
215
- if !filenames.empty?
216
- gfwhere_result = GfarmPath.gfwhere(filenames)
217
- tasks.each do |t|
218
- if t.kind_of? Rake::FileTask and prereq_name = t.prerequisites[0]
219
- t.location = gfwhere_result[GfarmPath.local_to_fs(prereq_name)]
220
- end
221
- end
255
+ if !list.empty?
256
+ Log.info "-- after_check: size=#{list.size} #{list.inspect}"
257
+ gfwhere_result = GfarmPath.gfwhere(list)
258
+ tasks.each do |t|
259
+ if t.kind_of? Rake::FileTask
260
+ t.location = gfwhere_result[GfarmPath.local_to_fs(t.name)]
261
+ end
262
+ end
263
+ #puts "'#{self.name}' exist? => #{File.exist?(self.name)} loc => #{loc}"
222
264
  end
223
- Log.info "-- GfarmQueue#where %.6fs %s" % [Time.now-start_time,abr_msg(filenames)]
224
- tasks
225
265
  end
226
266
 
227
267
  end
228
-
229
268
  end