pwrake 0.9.9.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/CHANGES_V2.md +90 -0
  4. data/{LICENSE.txt → MIT-LICENSE} +2 -3
  5. data/README +12 -0
  6. data/README.md +75 -52
  7. data/bin/gfwhere-pipe +23 -12
  8. data/bin/pwrake +22 -29
  9. data/bin/pwrake_branch +24 -0
  10. data/lib/pwrake/branch.rb +22 -0
  11. data/lib/pwrake/branch/branch.rb +213 -0
  12. data/lib/pwrake/branch/branch_application.rb +53 -0
  13. data/lib/pwrake/branch/fiber_queue.rb +36 -0
  14. data/lib/pwrake/branch/file_utils.rb +101 -0
  15. data/lib/pwrake/branch/shell.rb +231 -0
  16. data/lib/pwrake/{profiler.rb → branch/shell_profiler.rb} +28 -27
  17. data/lib/pwrake/branch/worker_communicator.rb +104 -0
  18. data/lib/pwrake/{gfarm_feature.rb → gfarm/gfarm_path.rb} +2 -100
  19. data/lib/pwrake/gfarm/gfarm_postprocess.rb +53 -0
  20. data/lib/pwrake/iomux/channel.rb +70 -0
  21. data/lib/pwrake/iomux/handler.rb +124 -0
  22. data/lib/pwrake/iomux/handler_set.rb +35 -0
  23. data/lib/pwrake/iomux/runner.rb +62 -0
  24. data/lib/pwrake/logger.rb +3 -150
  25. data/lib/pwrake/master.rb +30 -137
  26. data/lib/pwrake/master/fiber_pool.rb +69 -0
  27. data/lib/pwrake/master/idle_cores.rb +30 -0
  28. data/lib/pwrake/master/master.rb +345 -0
  29. data/lib/pwrake/master/master_application.rb +150 -0
  30. data/lib/pwrake/master/postprocess.rb +16 -0
  31. data/lib/pwrake/{graphviz.rb → misc/graphviz.rb} +0 -0
  32. data/lib/pwrake/{mcgp.rb → misc/mcgp.rb} +63 -42
  33. data/lib/pwrake/option/host_map.rb +158 -0
  34. data/lib/pwrake/option/option.rb +357 -0
  35. data/lib/pwrake/option/option_filesystem.rb +112 -0
  36. data/lib/pwrake/queue/locality_aware_queue.rb +158 -0
  37. data/lib/pwrake/queue/no_action_queue.rb +67 -0
  38. data/lib/pwrake/queue/queue_array.rb +366 -0
  39. data/lib/pwrake/queue/task_queue.rb +164 -0
  40. data/lib/pwrake/report.rb +1 -0
  41. data/lib/pwrake/report/parallelism.rb +9 -3
  42. data/lib/pwrake/report/report.rb +50 -103
  43. data/lib/pwrake/report/task_stat.rb +83 -0
  44. data/lib/pwrake/task/task_algorithm.rb +107 -0
  45. data/lib/pwrake/task/task_manager.rb +32 -0
  46. data/lib/pwrake/task/task_property.rb +98 -0
  47. data/lib/pwrake/task/task_rank.rb +48 -0
  48. data/lib/pwrake/task/task_wrapper.rb +296 -0
  49. data/lib/pwrake/version.rb +1 -1
  50. data/lib/pwrake/worker/executor.rb +169 -0
  51. data/lib/pwrake/worker/gfarm_directory.rb +90 -0
  52. data/lib/pwrake/worker/invoker.rb +199 -0
  53. data/lib/pwrake/worker/load.rb +14 -0
  54. data/lib/pwrake/worker/log_executor.rb +73 -0
  55. data/lib/pwrake/worker/shared_directory.rb +74 -0
  56. data/lib/pwrake/worker/worker_main.rb +14 -0
  57. data/lib/pwrake/worker/writer.rb +59 -0
  58. data/setup.rb +1212 -1502
  59. data/spec/003/Rakefile +2 -2
  60. data/spec/008/Rakefile +2 -1
  61. data/spec/009/Rakefile +1 -1
  62. data/spec/009/pwrake_conf.yaml +1 -3
  63. data/spec/hosts +0 -2
  64. data/spec/pwrake_spec.rb +9 -8
  65. metadata +50 -21
  66. data/lib/pwrake.rb +0 -19
  67. data/lib/pwrake/application.rb +0 -232
  68. data/lib/pwrake/counter.rb +0 -54
  69. data/lib/pwrake/file_utils.rb +0 -98
  70. data/lib/pwrake/gfwhere_pool.rb +0 -109
  71. data/lib/pwrake/host_list.rb +0 -88
  72. data/lib/pwrake/locality_aware_queue.rb +0 -413
  73. data/lib/pwrake/option.rb +0 -400
  74. data/lib/pwrake/rake_modify.rb +0 -14
  75. data/lib/pwrake/shell.rb +0 -186
  76. data/lib/pwrake/task_algorithm.rb +0 -475
  77. data/lib/pwrake/task_queue.rb +0 -633
  78. data/lib/pwrake/timer.rb +0 -22
@@ -0,0 +1,164 @@
1
+ module Pwrake
2
+
3
+ class TaskQueue
4
+
5
+ def initialize(host_map, group_map=nil)
6
+ @q = []
7
+ @empty = []
8
+
9
+ @enable_steal = true
10
+ @q_no_action = NoActionQueue.new
11
+
12
+ @host_map = host_map
13
+
14
+ pri = Rake.application.pwrake_options['QUEUE_PRIORITY'] || "LIHR"
15
+ case pri
16
+ when /prio/i
17
+ @array_class = PriorityQueueArray
18
+ when /fifo/i
19
+ @array_class = FifoQueueArray # Array # Fifo
20
+ when /lifo/i
21
+ @array_class = LifoQueueArray
22
+ when /lihr/i
23
+ @array_class = LifoHrfQueueArray
24
+ when /prhr/i
25
+ @array_class = PriorityHrfQueueArray
26
+ when /rank/i
27
+ @array_class = RankQueueArray
28
+ else
29
+ raise RuntimeError,"unknown option for QUEUE_PRIORITY: "+pri
30
+ end
31
+ Log.debug "@array_class=#{@array_class.inspect}"
32
+ init_queue(group_map)
33
+ end
34
+
35
+ def init_queue(group_map=nil)
36
+ @q_input = @array_class.new(0)
37
+ @q_no_input = FifoQueueArray.new
38
+ @n_turn = 1
39
+ end
40
+
41
+ attr_accessor :enable_steal
42
+
43
+ # enq
44
+ def enq(tw)
45
+ if tw.nil? || tw.actions.empty?
46
+ @q_no_action.push(tw)
47
+ else
48
+ enq_body(tw)
49
+ end
50
+ end
51
+
52
+ def enq_body(tw)
53
+ enq_impl(tw)
54
+ end
55
+
56
+ def enq_impl(tw)
57
+ if tw.has_input_file?
58
+ @q_input.push(tw)
59
+ else
60
+ @q_no_input.push(tw)
61
+ end
62
+ end
63
+
64
+ def deq_noaction_task(&block)
65
+ Log.debug "deq_task:"+(empty? ? " empty" : "\n#{inspect_q}")
66
+ while tw = @q_no_action.shift
67
+ Log.debug "deq_noaction: #{tw.name}"
68
+ yield(tw,nil)
69
+ end
70
+ end
71
+
72
+ def deq_task(&block) # locality version
73
+ Log.debug "deq_task:"+(empty? ? " empty" : "\n#{inspect_q}")
74
+ queued = 0
75
+ @n_turn.times do |turn|
76
+ next if turn_empty?(turn)
77
+ queued += deq_turn(turn,&block)
78
+ end
79
+ if queued>0
80
+ Log.debug "queued:#{queued}"
81
+ end
82
+ end
83
+
84
+ def deq_turn(turn,&block)
85
+ queued = 0
86
+ while true
87
+ count = 0
88
+ @host_map.by_id.each do |host_info|
89
+ #Log.debug "TaskQueue#deq_turn host_info=#{host_info.name}"
90
+ if host_info.idle_cores > 0
91
+ if turn_empty?(turn)
92
+ return queued
93
+ elsif tw = deq_impl(host_info,turn)
94
+ n_task_cores = tw.n_used_cores(host_info)
95
+ Log.debug "deq: #{tw.name} n_task_cores=#{n_task_cores}"
96
+ if host_info.idle_cores < n_task_cores
97
+ m = "task.n_used_cores=#{n_task_cores} must be "+
98
+ "<= host_info.idle_cores=#{host_info.idle_cores}"
99
+ Log.fatal m
100
+ raise RuntimeError,m
101
+ else
102
+ host_info.decrease(n_task_cores)
103
+ yield(tw,host_info.id)
104
+ count += 1
105
+ queued += 1
106
+ end
107
+ end
108
+ end
109
+ end
110
+ break if count == 0
111
+ end
112
+ queued
113
+ end
114
+
115
+ def turn_empty?(turn)
116
+ empty?
117
+ end
118
+
119
+ def deq_impl(host_info=nil, turn=nil)
120
+ @q_no_action.shift ||
121
+ @q_input.shift(host_info) ||
122
+ @q_no_input.shift(host_info)
123
+ end
124
+
125
+ def clear
126
+ @q_no_action.clear
127
+ @q_input.clear
128
+ @q_no_input.clear
129
+ end
130
+
131
+ def empty?
132
+ @q_no_action.empty? &&
133
+ @q_input.empty? &&
134
+ @q_no_input.empty?
135
+ end
136
+
137
+ def task_end(tw, hid)
138
+ host_info = @host_map.by_id[hid]
139
+ host_info.increase(tw.n_used_cores(host_info))
140
+ end
141
+
142
+ def _qstr(h,q)
143
+ s = " #{h}: size=#{q.size} "
144
+ case q.size
145
+ when 0
146
+ s << "[]\n"
147
+ when 1
148
+ s << "[#{q.first.name}]\n"
149
+ when 2
150
+ s << "[#{q.first.name}, #{q.last.name}]\n"
151
+ else
152
+ s << "[#{q.first.name},..,#{q.last.name}]\n"
153
+ end
154
+ s
155
+ end
156
+
157
+ def inspect_q
158
+ _qstr("noaction",@q_no_action) +
159
+ _qstr("input", @q_input) +
160
+ _qstr("no_input",@q_no_input)
161
+ end
162
+
163
+ end
164
+ end
@@ -4,3 +4,4 @@ require 'pwrake/report/parallelism.rb'
4
4
  require 'pwrake/report/report.rb'
5
5
  require 'pwrake/report/report_multi.rb'
6
6
  require 'pwrake/report/stat.rb'
7
+ require 'pwrake/report/task_stat.rb'
@@ -125,7 +125,7 @@ plot '#{fpara}' w l axis x1y1 title 'parallelism'
125
125
 
126
126
  density = exec_density(a)
127
127
 
128
- fimg = base+'.png'
128
+ fimg = base+'/parallelism.png'
129
129
 
130
130
  n = a.size
131
131
  i = 0
@@ -152,6 +152,7 @@ plot '#{fpara}' w l axis x1y1 title 'parallelism'
152
152
 
153
153
  t_end = (a.last)[0]
154
154
 
155
+ if system("which gnuplot >/dev/null 2>&1")
155
156
  IO.popen("gnuplot","r+") do |f|
156
157
  f.print "
157
158
  set terminal png
@@ -178,6 +179,7 @@ plot '-' w l axis x1y1 title 'parallelism', '-' w l axis x1y2 title 'exec/sec'
178
179
  f.puts "#{t} #{d}"
179
180
  end
180
181
  end
182
+ end
181
183
 
182
184
  #puts "Parallelism plot: #{fimg}"
183
185
  fimg
@@ -288,8 +290,9 @@ plot '-' w l axis x1y1 title 'parallelism', '-' w l axis x1y2 title 'exec/sec'
288
290
  end
289
291
  end
290
292
 
291
- fimg = base+'_para_cmd.png'
293
+ fimg = base+'/para_cmd.png'
292
294
 
295
+ if system("which gnuplot >/dev/null 2>&1")
293
296
  IO.popen("gnuplot","r+") do |f|
294
297
  #begin f = $stdout
295
298
  f.print "
@@ -308,6 +311,7 @@ set ylabel 'parallelism'
308
311
  f.puts "e"
309
312
  end
310
313
  end
314
+ end
311
315
 
312
316
  #puts "Parallelism plot: #{fimg}"
313
317
  fimg
@@ -335,7 +339,7 @@ set ylabel 'parallelism'
335
339
  end
336
340
 
337
341
  def plot_parallelism_by_host(csvtable,base)
338
- fpng = base+"_para_host.png"
342
+ fpng = base+"/para_host.png"
339
343
  data = read_time_by_host_from_csv(csvtable)
340
344
  return fpng if data.size == 0
341
345
 
@@ -346,6 +350,7 @@ set ylabel 'parallelism'
346
350
  grid << a
347
351
  end
348
352
 
353
+ if system("which gnuplot >/dev/null 2>&1")
349
354
  IO.popen("gnuplot","r+") do |f|
350
355
  f.puts "
351
356
  set terminal png
@@ -379,6 +384,7 @@ set format y ''
379
384
  end
380
385
  f.printf "e\n"
381
386
  end
387
+ end
382
388
  fpng
383
389
  end
384
390
 
@@ -30,28 +30,25 @@ EOL
30
30
  @@id = 0
31
31
  @@id_fmt = nil
32
32
 
33
- def initialize(base,pattern)
34
- @base = base
33
+ def initialize(option,pattern)
34
+ @dir = option['REPORT_DIR']
35
+ if !File.directory?(@dir)
36
+ raise ArgumentError,"Could not find log directory: #{@dir}"
37
+ end
35
38
  @pattern = pattern
36
39
 
37
40
  @@id = @@id.succ
38
41
  @id = @@id
42
+ @base = @dir
39
43
 
40
- @csv_file = base+'.csv'
41
- @task_file = base+'.task'
42
- @html_file = base+'.html'
43
-
44
- open(@base+".log","r").each do |s|
45
- if /num_cores=(\d+)/ =~ s
46
- @ncore = $1.to_i
47
- break
48
- end
49
- end
44
+ @cmd_file = File.join(@dir,option['COMMAND_CSV_FILE'])
45
+ @task_file = File.join(@dir,option['TASK_CSV_FILE'])
46
+ @html_file = File.join(@dir,'report.html')
50
47
 
51
48
  begin
52
- @sh_table = CSV.read(@csv_file,:headers=>true)
49
+ @sh_table = CSV.read(@cmd_file,:headers=>true,:skip_lines=>/\A#/)
53
50
  rescue
54
- $stderr.puts "error in reading "+@csv_file
51
+ $stderr.puts "error in reading "+@cmd_file
55
52
  $stderr.puts $!, $@
56
53
  exit
57
54
  end
@@ -72,14 +69,27 @@ EOL
72
69
  make_cmd_stat
73
70
 
74
71
  @stat = TaskStat.new(@task_file,@sh_table)
72
+ @ncore = @stat.ncore
75
73
  end
76
74
 
77
75
  attr_reader :base, :ncore, :elap
78
- attr_reader :csv_file, :html_file
76
+ attr_reader :cmd_file, :html_file
79
77
  attr_reader :cmd_elap, :cmd_stat
80
78
  attr_reader :sh_table, :task_table
81
79
  attr_reader :id
82
80
 
81
+ def find_single_file(pattern)
82
+ g = Dir.glob(File.join(@dir,pattern))
83
+ case g.size
84
+ when 0
85
+ raise ArgumentError, "Could not find any file with '#{pattern}' in #{@dir}"
86
+ when 1
87
+ else
88
+ raise ArgumentError, "Found multiple files '#{pattern}' in #{@dir}"
89
+ end
90
+ g[0]
91
+ end
92
+
83
93
  def id_str
84
94
  if @@id_fmt.nil?
85
95
  id_len = Math.log10(@@id).floor + 1
@@ -151,13 +161,18 @@ EOL
151
161
 
152
162
  def tr_count(x,y)
153
163
  sum = x+y
154
- xp = x*100.0/sum
155
- yp = y*100.0/sum
164
+ if sum==0
165
+ xp = "--%"
166
+ yp = "--%"
167
+ else
168
+ xp = "%.2f%%"%(x*100.0/sum)
169
+ yp = "%.2f%%"%(y*100.0/sum)
170
+ end
156
171
  td = "<td align='right' valign='top'>"
157
- m = td + '%s<br/>''%.2f%%</td>' % [format_comma(x),xp]
158
- m << td + '%s<br/>''%.2f%%</td>' % [format_comma(y),yp]
159
- m << td + "%s</td>" % format_comma(sum)
160
- m
172
+ return \
173
+ td + '%s<br/>%s</td>' % [format_comma(x),xp] +
174
+ td + '%s<br/>%s</td>' % [format_comma(y),yp] +
175
+ td + "%s</td>" % format_comma(sum)
161
176
  end
162
177
 
163
178
  def report_html
@@ -227,15 +242,17 @@ EOL
227
242
  html << "<th>local</th><th>remote</th><th>total</th>"
228
243
  html << "</tr>\n"
229
244
  @stat.exec_hosts.each do |h|
230
- html << "<tr><td>#{h}</td>"
231
- html << "<td align='right'>%.3f</td>" % @stat[h,nil,:elap]
232
- html << "<td></td>"
233
- html << tr_count(@stat[h,true,:in_num],@stat[h,false,:in_num])
234
- html << tr_count(@stat[h,true,:in_size],@stat[h,false,:in_size])
235
- html << "<td></td>"
236
- html << tr_count(@stat[h,true,:out_num],@stat[h,false,:out_num])
237
- html << tr_count(@stat[h,true,:out_size],@stat[h,false,:out_size])
238
- html << "</tr>\n"
245
+ if h.to_s!="" || @stat[h,nil,:elap]!=0
246
+ html << "<tr><td>#{h}</td>"
247
+ html << "<td align='right'>%.3f</td>" % @stat[h,nil,:elap]
248
+ html << "<td></td>"
249
+ html << tr_count(@stat[h,true,:in_num],@stat[h,false,:in_num])
250
+ html << tr_count(@stat[h,true,:in_size],@stat[h,false,:in_size])
251
+ html << "<td></td>"
252
+ html << tr_count(@stat[h,true,:out_num],@stat[h,false,:out_num])
253
+ html << tr_count(@stat[h,true,:out_size],@stat[h,false,:out_size])
254
+ html << "</tr>\n"
255
+ end
239
256
  end
240
257
  html << "<tr><td>total</td>"
241
258
  html << "<td align='right'>%.3f</td>" % @stat.total(nil,:elap)
@@ -266,7 +283,8 @@ EOL
266
283
  command_list << cmd
267
284
  end
268
285
  end
269
- hist_image = @base+"_hist.png"
286
+ hist_image = @base+"/hist.png"
287
+ if system("which gnuplot >/dev/null 2>&1")
270
288
  IO.popen("gnuplot","r+") do |f|
271
289
  f.puts "
272
290
  set terminal png # size 480,360
@@ -293,80 +311,9 @@ set title 'histogram of elapsed time'"
293
311
  f.puts "e"
294
312
  end
295
313
  end
296
- hist_image
297
- end
298
- end
299
- end
300
-
301
- class TaskStat
302
-
303
- def initialize(task_file, sh_table)
304
- begin
305
- @task_table = CSV.read(task_file,:headers=>true)
306
- rescue
307
- $stderr.puts "error in reading "+task_file
308
- $stderr.puts $!, $@
309
- exit
310
314
  end
311
- @count = Hash.new(0)
312
- task_locality
313
- stat_sh_table(sh_table)
314
- end
315
-
316
- attr_reader :exec_hosts
317
-
318
- def count(exec_host, loc, key, val)
319
- @count[[exec_host,loc,key]] += val
320
- @count[[loc,key]] += val
321
- end
322
-
323
- def total(loc,key)
324
- @count[[loc,key]]
325
- end
326
-
327
- def [](exec_host,loc,key)
328
- @count[[exec_host,loc,key]]
329
- end
330
-
331
- def task_locality
332
- file_size = {}
333
- file_host = {}
334
- h = {}
335
- @task_table.each do |row|
336
- name = row['task_name']
337
- file_size[name] = row['file_size'].to_i
338
- file_host[name] = (row['file_host']||'').split('|')
339
- h[row['exec_host']] = true
340
- end
341
- @exec_hosts = h.keys.sort
342
-
343
- @task_table.each do |row|
344
- if row['executed']=='1'
345
- name = row['task_name']
346
- exec_host = row['exec_host']
347
- loc = file_host[name].include?(exec_host)
348
- count(exec_host, loc, :out_num, 1)
349
- count(exec_host, loc, :out_size, file_size[name])
350
-
351
- preq_files = (row['preq']||'').split('|')
352
- preq_files.each do |preq|
353
- sz = file_size[preq]
354
- if sz && sz > 0
355
- loc = file_host[preq].include?(exec_host)
356
- count(exec_host, loc, :in_num, 1)
357
- count(exec_host, loc, :in_size, sz)
358
- end
359
- end
360
- end
361
- end
362
- end
363
-
364
- def stat_sh_table(sh_table)
365
- sh_table.each do |row|
366
- if (h = row['host']) && (t = row['elap_time'])
367
- count(h, nil, :elap, t.to_f)
368
- end
369
315
  end
316
+ hist_image
370
317
  end
371
318
 
372
319
  end
@@ -0,0 +1,83 @@
1
+ module Pwrake
2
+
3
+ class TaskStat
4
+
5
+ def initialize(task_file, sh_table)
6
+ begin
7
+ @task_table = CSV.read(task_file,:headers=>true,:skip_lines=>/\A#/)
8
+ rescue
9
+ $stderr.puts "error in reading "+task_file
10
+ $stderr.puts $!, $@
11
+ exit
12
+ end
13
+ shell_id = {}
14
+ @task_table.each do |row|
15
+ if id=row['shell_id']
16
+ shell_id[id.to_i] = true
17
+ end
18
+ end
19
+ @ncore = shell_id.size
20
+ @count = Hash.new(0)
21
+ task_locality
22
+ stat_sh_table(sh_table)
23
+ end
24
+
25
+ attr_reader :exec_hosts, :ncore
26
+
27
+ def count(exec_host, loc, key, val)
28
+ @count[[exec_host,loc,key]] += val
29
+ @count[[loc,key]] += val
30
+ end
31
+
32
+ def total(loc,key)
33
+ @count[[loc,key]]
34
+ end
35
+
36
+ def [](exec_host,loc,key)
37
+ @count[[exec_host,loc,key]]
38
+ end
39
+
40
+ def task_locality
41
+ file_size = {}
42
+ file_host = {}
43
+ h = {}
44
+ @task_table.each do |row|
45
+ name = row['task_name']
46
+ file_size[name] = row['file_size'].to_i
47
+ file_host[name] = (row['file_host']||'').split('|')
48
+ exec_host = row['exec_host'] || ""
49
+ h[exec_host] = true
50
+ end
51
+ @exec_hosts = h.keys.sort
52
+
53
+ @task_table.each do |row|
54
+ if row['executed']=='1'
55
+ name = row['task_name']
56
+ exec_host = row['exec_host']
57
+ loc = file_host[name].include?(exec_host)
58
+ count(exec_host, loc, :out_num, 1)
59
+ count(exec_host, loc, :out_size, file_size[name])
60
+
61
+ preq_files = (row['preq']||'').split('|')
62
+ preq_files.each do |preq|
63
+ sz = file_size[preq]
64
+ if sz && sz > 0
65
+ loc = file_host[preq].include?(exec_host)
66
+ count(exec_host, loc, :in_num, 1)
67
+ count(exec_host, loc, :in_size, sz)
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ def stat_sh_table(sh_table)
75
+ sh_table.each do |row|
76
+ if (h = row['host']) && (t = row['elap_time'])
77
+ count(h, nil, :elap, t.to_f)
78
+ end
79
+ end
80
+ end
81
+
82
+ end
83
+ end