pwrake 0.9.5 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6779de017cf5e05367aecc0f5fef0f12bfd4c54d
4
+ data.tar.gz: f9aa0b823cf6d147ae595c86643859ebb283992c
5
+ SHA512:
6
+ metadata.gz: df46550084799df1303f17a4dafbe5ee7ed2e2589a165bb2c340f37d4972b37a195da8d3f03922a79cca1c21f9ddb794a4042fa085437820d4093a24cfccd8d3
7
+ data.tar.gz: 992425a4aef9202594598b66c18f42dbd3d7d06a50f77315fa4252f72e0e6c9e483965f28ad1c166c3475956758d2e17bbbd2700d7837423682e862a0f73e008
data/README.md CHANGED
@@ -3,6 +3,9 @@
3
3
  Parallel workflow extension for Rake
4
4
  * Author: Masahiro Tanaka
5
5
 
6
+ ([日本語README](https://github.com/masa16/pwrake/wiki/Pwrake.ja)),
7
+ ([GitHub Repository](https://github.com/masa16/pwrake))
8
+
6
9
  ## Features
7
10
 
8
11
  * Parallelize all tasks; no need to modify Rakefile, no need to use `multitask`.
@@ -47,11 +50,87 @@ Or, gem install:
47
50
 
48
51
  $ pwrake --hostfile=hosts
49
52
 
53
+ ## Options
54
+
55
+ ### Command line option
56
+
57
+ -F, --hostfile FILE [Pw] Read hostnames from FILE
58
+ -j, --jobs [N] [Pw] Number of threads at localhost (default: # of processors)
59
+ -L, --logfile [FILE] [Pw] Write log to FILE
60
+ --ssh-opt, --ssh-option OPTION
61
+ [Pw] Option passed to SSH
62
+ --filesystem FILESYSTEM [Pw] Specify FILESYSTEM (nfs|gfarm)
63
+ --gfarm [Pw] FILESYSTEM=gfarm
64
+ -A, --disable-affinity [Pw] Turn OFF affinity (AFFINITY=off)
65
+ -S, --disable-steal [Pw] Turn OFF task steal
66
+ -d, --debug [Pw] Output Debug messages
67
+ --pwrake-conf [FILE] [Pw] Pwrake configuation file in YAML
68
+ --show-conf, --show-config [Pw] Show Pwrake configuration options
69
+ --report LOG [Pw] Report profile HTML from LOG and exit.
70
+
71
+ ### pwrake_conf.yaml
72
+
73
+ * If `pwrake_conf.yaml` exists at current directory, Pwrake reads options from it.
74
+ * Example (in YAML form):
75
+
76
+ HOSTFILE : hosts
77
+ LOGFILE : true
78
+ TASKLOG : true
79
+ PROFILE : true
80
+ GNU_TIME : true
81
+ PLOT_PARALLELISM : true
82
+ DISABLE_AFFINITY: true
83
+ DISABLE_STEAL: true
84
+ FAILED_TARGET : delete
85
+ PASS_ENV :
86
+ - ENV1
87
+ - ENV2
88
+
89
+ * Option list:
90
+
91
+ HOSTFILE, HOSTS default=false
92
+ LOGFILE, LOG default=none, string=filename, true="Pwrake%Y%m%d-%H%M%S_%$.log"
93
+ TASKLOG default=none, string=filename, true="Pwrake%Y%m%d-%H%M%S_%$.task"
94
+ PROFILE default=none, string=filename, true="Pwrake%Y%m%d-%H%M%S_%$.csv"
95
+ WORK_DIR default=$PWD
96
+ FILESYSTEM default=nil (autodetect)
97
+ SSH_OPTION (String) SSH option
98
+ PASS_ENV (Array) Environment variables passed to SSH
99
+ GNU_TIME If true, obtains PROFILEs using GNU time
100
+ PLOT_PARALLELISM If true, plot parallelism using GNUPLOT
101
+ FAILED_TARGET ( rename(default) | delete | leave ) failed files
102
+ QUEUE_PRIORITY ( DFS(default) | FIFO )
103
+
104
+ for Gfarm system:
105
+
106
+ DISABLE_AFFINITY default=false
107
+ DISABLE_STEAL default=false
108
+ STEAL_WAIT default=0 (sec)
109
+ STEAL_WAIT_MAX default=10 (sec)
110
+ : Wait min(STEAL_WAIT*2**n, STEAL_WAIT_MAX) sec for task steal.
111
+ GFARM_BASEDIR default="/tmp"
112
+ GFARM_PREFIX default="pwrake_$USER"
113
+ GFARM_SUBDIR default='/'
114
+
115
+ ## Note for Gfarm
116
+
117
+ * `gfwhere-pipe` command is required for file-affinity scheduling.
118
+
119
+ wget https://gist.github.com/masa16/5787473/raw/6df5deeb80a4cea6b9d1d1ce01f390f65d650717/gfwhere-pipe.patch
120
+ cd gfarm-2.5.8.1
121
+ patch -p1 < ../gfwhere-pipe.patch
122
+ ./configure --prefix=...
123
+ make
124
+ make install
125
+
50
126
  ## Tested Platform
51
127
 
52
- * Ruby 1.9.3, 2.0.0-preview1
53
- * Rake 0.9.2.2
54
- * Fedora 16 / Debian 5.0.7
128
+ * Ruby 2.0.0
129
+ * Rake 0.9.6
130
+ * CentOS 6.4
55
131
 
56
- ## Workflow demo
132
+ ## Acknowledgment
57
133
 
134
+ This work is supported by
135
+ * JST CREST, research area: "Development of System Software Technologies for Post-Peta Scale High Performance Computing," and
136
+ * MEXT Promotion of Research for Next Generation IT Infrastructure "Resources Linkage for e-Science (RENKEI)."
@@ -17,10 +17,24 @@ module Pwrake
17
17
 
18
18
  class Application < ::Rake::Application
19
19
 
20
+ def filesystem
21
+ @master.filesystem
22
+ end
23
+
20
24
  def task_queue
21
25
  @master.task_queue
22
26
  end
23
27
 
28
+ def finish_queue
29
+ @master.finish_queue
30
+ end
31
+
32
+ def postprocess(t)
33
+ if @master.postprocess
34
+ @master.postprocess.postprocess(t)
35
+ end
36
+ end
37
+
24
38
  def thread_loop(*args)
25
39
  @master.thread_loop(*args)
26
40
  end
@@ -33,6 +47,18 @@ module Pwrake
33
47
  @master.start
34
48
  end
35
49
 
50
+ def core_list
51
+ @master.core_list
52
+ end
53
+
54
+ def task_logger
55
+ @master.task_logger
56
+ end
57
+
58
+ def task_id_counter
59
+ @master.task_id_counter
60
+ end
61
+
36
62
  # Run the Pwrake application.
37
63
  def run
38
64
  standard_exception_handling do
@@ -144,7 +170,15 @@ module Pwrake
144
170
  ['--show-conf','--show-config',
145
171
  "[Pw] Show Pwrake configuration options",
146
172
  lambda {|value| options.show_conf = true }
173
+ ],
174
+ ['--report LOG', "[Pw] Report profile HTML from LOG and exit.",
175
+ lambda { |value|
176
+ require 'pwrake/report'
177
+ Report.new(File.basename(value.sub(/\.[^.]+$/,"")),[]).report_html
178
+ exit
179
+ }
147
180
  ]
181
+
148
182
  ])
149
183
  opts
150
184
  end
@@ -1,6 +1,6 @@
1
+ module Pwrake
1
2
  module FileUtils
2
-
3
- alias sh_orig :sh
3
+ module_function
4
4
 
5
5
  def sh(*cmd, &block)
6
6
  options = (Hash === cmd.last) ? cmd.pop : {}
@@ -17,14 +17,38 @@ module FileUtils
17
17
  options[:verbose] ||= RakeFileUtils.verbose_flag
18
18
  end
19
19
  options[:noop] ||= RakeFileUtils.nowrite_flag
20
- rake_check_options options, :noop, :verbose
21
- Pwrake::Log.output_message cmd.join(" ") if options[:verbose]
20
+ Rake.rake_check_options options, :noop, :verbose
21
+ Pwrake::Log.stderr_puts cmd.join(" ") if options[:verbose]
22
22
  unless options[:noop]
23
- res,status = pwrake_system(*cmd)
23
+ res,status = Pwrake::FileUtils.pwrake_system(*cmd)
24
24
  block.call(res, status)
25
25
  end
26
26
  end
27
27
 
28
+ def bq(*cmd, &block)
29
+ options = (Hash === cmd.last) ? cmd.pop : {}
30
+ unless block_given?
31
+ show_command = cmd.join(" ")
32
+ show_command = show_command[0,42] + "..."
33
+ block = lambda { |ok, status|
34
+ ok or fail "Command failed with status (#{status.exitstatus}): [#{show_command}]"
35
+ }
36
+ end
37
+ if RakeFileUtils.verbose_flag == :default
38
+ options[:verbose] = true
39
+ else
40
+ options[:verbose] ||= RakeFileUtils.verbose_flag
41
+ end
42
+ options[:noop] ||= RakeFileUtils.nowrite_flag
43
+ Rake.rake_check_options options, :noop, :verbose
44
+ Pwrake::Log.stderr_puts cmd.join(" ") if options[:verbose]
45
+ unless options[:noop]
46
+ res,status = Pwrake::FileUtils.pwrake_backquote(*cmd)
47
+ block.call(res, status)
48
+ end
49
+ res
50
+ end
51
+
28
52
  def pwrake_system(*cmd)
29
53
  cmd_log = cmd.join(" ").inspect
30
54
  tm = Pwrake::Timer.new("sh",cmd_log)
@@ -42,8 +66,6 @@ module FileUtils
42
66
  tm.finish("status=%s cmd=%s"%[status.exitstatus,cmd_log])
43
67
  [res,status]
44
68
  end
45
- private :pwrake_system
46
-
47
69
 
48
70
  # Pwrake version of backquote command
49
71
  def pwrake_backquote(cmd)
@@ -53,18 +75,23 @@ module FileUtils
53
75
  conn = Pwrake.current_shell
54
76
  if conn.kind_of?(Pwrake::Shell)
55
77
  res = conn.backquote(*cmd)
56
- status = conn.status
78
+ status = Rake::PseudoStatus.new(conn.status)
57
79
  else
58
80
  res = `#{cmd}`
59
- if !res && status.nil?
60
- status = 1
61
- else
62
- status = $?.exitstatus
63
- end
81
+ status = $?
82
+ status = Rake::PseudoStatus.new(1) if status.nil?
64
83
  end
65
84
 
66
- tm.finish("status=%s cmd=%s"%[status,cmd_log])
67
- res
85
+ tm.finish("status=%s cmd=%s"%[status.exitstatus,cmd_log])
86
+ [res,status]
68
87
  end
69
88
 
70
- end # module FileUtils
89
+ end # module Pwrake::FileUtils
90
+ end
91
+
92
+ module Rake
93
+ module DSL
94
+ include Pwrake::FileUtils
95
+ private(*Pwrake::FileUtils.instance_methods(false))
96
+ end
97
+ end
@@ -87,7 +87,20 @@ module Pwrake
87
87
  x.to_s
88
88
  end
89
89
 
90
+ def gfpath(file='.')
91
+ begin
92
+ IO.popen("gfstat '#{file}'") do |f|
93
+ if /File: "([^"]+)"/ =~ f.gets #"
94
+ return $1
95
+ end
96
+ end
97
+ rescue
98
+ end
99
+ nil
100
+ end
101
+
90
102
  def gfwhere(list)
103
+ system "sync"
91
104
  result = {}
92
105
  count = 0
93
106
  cmd = "gfwhere"
@@ -95,7 +108,7 @@ module Pwrake
95
108
  if count==1
96
109
  result[cmd[8..-1]] = x.split
97
110
  else
98
- x.scan(/^([^\n]+):\n([^\n]*)$/m) do |file,hosts|
111
+ x.scan(/^(?:gfarm:\/\/[^\/]+)?([^\n]+):\n([^\n]*)$/m) do |file,hosts|
99
112
  h = hosts.split
100
113
  result[file] = h if !h.empty?
101
114
  end
@@ -106,7 +119,7 @@ module Pwrake
106
119
  if a
107
120
  path = local_to_fs(a)
108
121
  if cmd.size + path.size + 1 > 20480 # 131000
109
- x = `#{cmd}`
122
+ x = `#{cmd} 2> /dev/null`
110
123
  parse_proc.call(x)
111
124
  cmd = "gfwhere"
112
125
  count = 0
@@ -117,7 +130,7 @@ module Pwrake
117
130
  end
118
131
  end
119
132
  if count > 0
120
- x = `#{cmd}`
133
+ x = `#{cmd} 2> /dev/null`
121
134
  parse_proc.call(x)
122
135
  end
123
136
  result
@@ -153,16 +166,22 @@ module Pwrake
153
166
  open(system_cmd)
154
167
  cd
155
168
  if not _system "test -d #{@remote_mountpoint}"
156
- _system "mkdir -p #{@remote_mountpoint}"
157
- subdir = GfarmPath.subdir
158
- if ["/","",nil].include?(subdir)
159
- _system "gfarm2fs #{@remote_mountpoint}"
160
- else
161
- _system "gfarm2fs -o modules=subdir,subdir=#{subdir} #{@remote_mountpoint}"
169
+ _system "mkdir -p #{@remote_mountpoint}" or die
170
+ else
171
+ lines = _backquote("sync; mount")
172
+ if /#{@remote_mountpoint} (?:type )?(\S+)/om =~ lines
173
+ _system "sync; fusermount -u #{@remote_mountpoint}"
174
+ _system "sync"
162
175
  end
163
176
  end
177
+ subdir = GfarmPath.subdir
178
+ if ["/","",nil].include?(subdir)
179
+ _system "gfarm2fs #{@remote_mountpoint}"
180
+ else
181
+ _system "gfarm2fs -o modules=subdir,subdir=#{subdir} #{@remote_mountpoint}"
182
+ end
164
183
  path = ENV['PATH'].gsub( /#{GfarmPath.mountpoint}/, @remote_mountpoint )
165
- _system "export PATH=#{path}"
184
+ _system "export PATH=#{path}" or die
166
185
  cd_work_dir
167
186
  end
168
187
 
@@ -185,45 +204,65 @@ module Pwrake
185
204
  end
186
205
 
187
206
 
188
- class GfarmQueue < LocalityAwareQueue
207
+ class GfarmPostprocess
189
208
 
190
- def abr_msg(a)
191
- m = a[0..5].map{|x| x}.inspect
192
- m.sub!(/]$/,",...") if a.size > 6
193
- "size=#{a.size} #{m}"
209
+ def initialize
210
+ @lock = Mutex.new
211
+ @io = IO.popen('gfwhere-pipe','r+')
212
+ @io.sync = true
213
+ end
214
+
215
+ def gfwhere(file)
216
+ return [] if file==''
217
+ @lock.synchronize do
218
+ @io.puts(file)
219
+ @io.flush
220
+ s = @io.gets
221
+ if s.nil?
222
+ raise "gfwhere: unexpected end"
223
+ end
224
+ s.chomp!
225
+ if s != file
226
+ raise "gfwhere: file=#{file}, result=#{s}"
227
+ end
228
+ while s = @io.gets
229
+ s.chomp!
230
+ case s
231
+ when ""
232
+ next
233
+ when /^gfarm:\/\//
234
+ next
235
+ when /^Error:/
236
+ return []
237
+ else
238
+ return s.split(/\s+/)
239
+ end
240
+ end
241
+ end
194
242
  end
195
243
 
196
- def where(tasks)
197
- if Pwrake.application.options.dryrun ||
198
- Pwrake.application.options.disable_affinity
199
- return tasks
244
+ def postprocess(t)
245
+ if t.kind_of? Rake::FileTask
246
+ t.location = gfwhere(t.name)
200
247
  end
248
+ end
201
249
 
202
- start_time = Time.now
203
- #Log.debug "--- GfarmQueue#where #{tasks.inspect}"
204
- #if Pwrake.manager.gfarm and Pwrake.manager.affinity
205
- gfwhere_result = {}
206
- filenames = []
250
+ def postprocess_bulk(tasks)
251
+ list = []
207
252
  tasks.each do |t|
208
- if t.kind_of?(Rake::FileTask) and
209
- name = t.prerequisites[0] and
210
- !filenames.include?(name)
211
- filenames << name
212
- end
253
+ list << t.name if t.kind_of? Rake::FileTask
213
254
  end
214
-
215
- if !filenames.empty?
216
- gfwhere_result = GfarmPath.gfwhere(filenames)
217
- tasks.each do |t|
218
- if t.kind_of? Rake::FileTask and prereq_name = t.prerequisites[0]
219
- t.location = gfwhere_result[GfarmPath.local_to_fs(prereq_name)]
220
- end
221
- end
255
+ if !list.empty?
256
+ Log.info "-- after_check: size=#{list.size} #{list.inspect}"
257
+ gfwhere_result = GfarmPath.gfwhere(list)
258
+ tasks.each do |t|
259
+ if t.kind_of? Rake::FileTask
260
+ t.location = gfwhere_result[GfarmPath.local_to_fs(t.name)]
261
+ end
262
+ end
263
+ #puts "'#{self.name}' exist? => #{File.exist?(self.name)} loc => #{loc}"
222
264
  end
223
- Log.info "-- GfarmQueue#where %.6fs %s" % [Time.now-start_time,abr_msg(filenames)]
224
- tasks
225
265
  end
226
266
 
227
267
  end
228
-
229
268
  end