pwrake 0.9.9.2 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/CHANGES_V2.md +90 -0
  4. data/{LICENSE.txt → MIT-LICENSE} +2 -3
  5. data/README +12 -0
  6. data/README.md +75 -52
  7. data/bin/gfwhere-pipe +23 -12
  8. data/bin/pwrake +22 -29
  9. data/bin/pwrake_branch +24 -0
  10. data/lib/pwrake/branch.rb +22 -0
  11. data/lib/pwrake/branch/branch.rb +213 -0
  12. data/lib/pwrake/branch/branch_application.rb +53 -0
  13. data/lib/pwrake/branch/fiber_queue.rb +36 -0
  14. data/lib/pwrake/branch/file_utils.rb +101 -0
  15. data/lib/pwrake/branch/shell.rb +231 -0
  16. data/lib/pwrake/{profiler.rb → branch/shell_profiler.rb} +28 -27
  17. data/lib/pwrake/branch/worker_communicator.rb +104 -0
  18. data/lib/pwrake/{gfarm_feature.rb → gfarm/gfarm_path.rb} +2 -100
  19. data/lib/pwrake/gfarm/gfarm_postprocess.rb +53 -0
  20. data/lib/pwrake/iomux/channel.rb +70 -0
  21. data/lib/pwrake/iomux/handler.rb +124 -0
  22. data/lib/pwrake/iomux/handler_set.rb +35 -0
  23. data/lib/pwrake/iomux/runner.rb +62 -0
  24. data/lib/pwrake/logger.rb +3 -150
  25. data/lib/pwrake/master.rb +30 -137
  26. data/lib/pwrake/master/fiber_pool.rb +69 -0
  27. data/lib/pwrake/master/idle_cores.rb +30 -0
  28. data/lib/pwrake/master/master.rb +345 -0
  29. data/lib/pwrake/master/master_application.rb +150 -0
  30. data/lib/pwrake/master/postprocess.rb +16 -0
  31. data/lib/pwrake/{graphviz.rb → misc/graphviz.rb} +0 -0
  32. data/lib/pwrake/{mcgp.rb → misc/mcgp.rb} +63 -42
  33. data/lib/pwrake/option/host_map.rb +158 -0
  34. data/lib/pwrake/option/option.rb +357 -0
  35. data/lib/pwrake/option/option_filesystem.rb +112 -0
  36. data/lib/pwrake/queue/locality_aware_queue.rb +158 -0
  37. data/lib/pwrake/queue/no_action_queue.rb +67 -0
  38. data/lib/pwrake/queue/queue_array.rb +366 -0
  39. data/lib/pwrake/queue/task_queue.rb +164 -0
  40. data/lib/pwrake/report.rb +1 -0
  41. data/lib/pwrake/report/parallelism.rb +9 -3
  42. data/lib/pwrake/report/report.rb +50 -103
  43. data/lib/pwrake/report/task_stat.rb +83 -0
  44. data/lib/pwrake/task/task_algorithm.rb +107 -0
  45. data/lib/pwrake/task/task_manager.rb +32 -0
  46. data/lib/pwrake/task/task_property.rb +98 -0
  47. data/lib/pwrake/task/task_rank.rb +48 -0
  48. data/lib/pwrake/task/task_wrapper.rb +296 -0
  49. data/lib/pwrake/version.rb +1 -1
  50. data/lib/pwrake/worker/executor.rb +169 -0
  51. data/lib/pwrake/worker/gfarm_directory.rb +90 -0
  52. data/lib/pwrake/worker/invoker.rb +199 -0
  53. data/lib/pwrake/worker/load.rb +14 -0
  54. data/lib/pwrake/worker/log_executor.rb +73 -0
  55. data/lib/pwrake/worker/shared_directory.rb +74 -0
  56. data/lib/pwrake/worker/worker_main.rb +14 -0
  57. data/lib/pwrake/worker/writer.rb +59 -0
  58. data/setup.rb +1212 -1502
  59. data/spec/003/Rakefile +2 -2
  60. data/spec/008/Rakefile +2 -1
  61. data/spec/009/Rakefile +1 -1
  62. data/spec/009/pwrake_conf.yaml +1 -3
  63. data/spec/hosts +0 -2
  64. data/spec/pwrake_spec.rb +9 -8
  65. metadata +50 -21
  66. data/lib/pwrake.rb +0 -19
  67. data/lib/pwrake/application.rb +0 -232
  68. data/lib/pwrake/counter.rb +0 -54
  69. data/lib/pwrake/file_utils.rb +0 -98
  70. data/lib/pwrake/gfwhere_pool.rb +0 -109
  71. data/lib/pwrake/host_list.rb +0 -88
  72. data/lib/pwrake/locality_aware_queue.rb +0 -413
  73. data/lib/pwrake/option.rb +0 -400
  74. data/lib/pwrake/rake_modify.rb +0 -14
  75. data/lib/pwrake/shell.rb +0 -186
  76. data/lib/pwrake/task_algorithm.rb +0 -475
  77. data/lib/pwrake/task_queue.rb +0 -633
  78. data/lib/pwrake/timer.rb +0 -22
@@ -0,0 +1,164 @@
1
+ module Pwrake
2
+
3
+ class TaskQueue
4
+
5
+ def initialize(host_map, group_map=nil)
6
+ @q = []
7
+ @empty = []
8
+
9
+ @enable_steal = true
10
+ @q_no_action = NoActionQueue.new
11
+
12
+ @host_map = host_map
13
+
14
+ pri = Rake.application.pwrake_options['QUEUE_PRIORITY'] || "LIHR"
15
+ case pri
16
+ when /prio/i
17
+ @array_class = PriorityQueueArray
18
+ when /fifo/i
19
+ @array_class = FifoQueueArray # Array # Fifo
20
+ when /lifo/i
21
+ @array_class = LifoQueueArray
22
+ when /lihr/i
23
+ @array_class = LifoHrfQueueArray
24
+ when /prhr/i
25
+ @array_class = PriorityHrfQueueArray
26
+ when /rank/i
27
+ @array_class = RankQueueArray
28
+ else
29
+ raise RuntimeError,"unknown option for QUEUE_PRIORITY: "+pri
30
+ end
31
+ Log.debug "@array_class=#{@array_class.inspect}"
32
+ init_queue(group_map)
33
+ end
34
+
35
+ def init_queue(group_map=nil)
36
+ @q_input = @array_class.new(0)
37
+ @q_no_input = FifoQueueArray.new
38
+ @n_turn = 1
39
+ end
40
+
41
+ attr_accessor :enable_steal
42
+
43
+ # enq
44
+ def enq(tw)
45
+ if tw.nil? || tw.actions.empty?
46
+ @q_no_action.push(tw)
47
+ else
48
+ enq_body(tw)
49
+ end
50
+ end
51
+
52
+ def enq_body(tw)
53
+ enq_impl(tw)
54
+ end
55
+
56
+ def enq_impl(tw)
57
+ if tw.has_input_file?
58
+ @q_input.push(tw)
59
+ else
60
+ @q_no_input.push(tw)
61
+ end
62
+ end
63
+
64
+ def deq_noaction_task(&block)
65
+ Log.debug "deq_task:"+(empty? ? " empty" : "\n#{inspect_q}")
66
+ while tw = @q_no_action.shift
67
+ Log.debug "deq_noaction: #{tw.name}"
68
+ yield(tw,nil)
69
+ end
70
+ end
71
+
72
+ def deq_task(&block) # locality version
73
+ Log.debug "deq_task:"+(empty? ? " empty" : "\n#{inspect_q}")
74
+ queued = 0
75
+ @n_turn.times do |turn|
76
+ next if turn_empty?(turn)
77
+ queued += deq_turn(turn,&block)
78
+ end
79
+ if queued>0
80
+ Log.debug "queued:#{queued}"
81
+ end
82
+ end
83
+
84
+ def deq_turn(turn,&block)
85
+ queued = 0
86
+ while true
87
+ count = 0
88
+ @host_map.by_id.each do |host_info|
89
+ #Log.debug "TaskQueue#deq_turn host_info=#{host_info.name}"
90
+ if host_info.idle_cores > 0
91
+ if turn_empty?(turn)
92
+ return queued
93
+ elsif tw = deq_impl(host_info,turn)
94
+ n_task_cores = tw.n_used_cores(host_info)
95
+ Log.debug "deq: #{tw.name} n_task_cores=#{n_task_cores}"
96
+ if host_info.idle_cores < n_task_cores
97
+ m = "task.n_used_cores=#{n_task_cores} must be "+
98
+ "<= host_info.idle_cores=#{host_info.idle_cores}"
99
+ Log.fatal m
100
+ raise RuntimeError,m
101
+ else
102
+ host_info.decrease(n_task_cores)
103
+ yield(tw,host_info.id)
104
+ count += 1
105
+ queued += 1
106
+ end
107
+ end
108
+ end
109
+ end
110
+ break if count == 0
111
+ end
112
+ queued
113
+ end
114
+
115
+ def turn_empty?(turn)
116
+ empty?
117
+ end
118
+
119
+ def deq_impl(host_info=nil, turn=nil)
120
+ @q_no_action.shift ||
121
+ @q_input.shift(host_info) ||
122
+ @q_no_input.shift(host_info)
123
+ end
124
+
125
+ def clear
126
+ @q_no_action.clear
127
+ @q_input.clear
128
+ @q_no_input.clear
129
+ end
130
+
131
+ def empty?
132
+ @q_no_action.empty? &&
133
+ @q_input.empty? &&
134
+ @q_no_input.empty?
135
+ end
136
+
137
+ def task_end(tw, hid)
138
+ host_info = @host_map.by_id[hid]
139
+ host_info.increase(tw.n_used_cores(host_info))
140
+ end
141
+
142
+ def _qstr(h,q)
143
+ s = " #{h}: size=#{q.size} "
144
+ case q.size
145
+ when 0
146
+ s << "[]\n"
147
+ when 1
148
+ s << "[#{q.first.name}]\n"
149
+ when 2
150
+ s << "[#{q.first.name}, #{q.last.name}]\n"
151
+ else
152
+ s << "[#{q.first.name},..,#{q.last.name}]\n"
153
+ end
154
+ s
155
+ end
156
+
157
+ def inspect_q
158
+ _qstr("noaction",@q_no_action) +
159
+ _qstr("input", @q_input) +
160
+ _qstr("no_input",@q_no_input)
161
+ end
162
+
163
+ end
164
+ end
@@ -4,3 +4,4 @@ require 'pwrake/report/parallelism.rb'
4
4
  require 'pwrake/report/report.rb'
5
5
  require 'pwrake/report/report_multi.rb'
6
6
  require 'pwrake/report/stat.rb'
7
+ require 'pwrake/report/task_stat.rb'
@@ -125,7 +125,7 @@ plot '#{fpara}' w l axis x1y1 title 'parallelism'
125
125
 
126
126
  density = exec_density(a)
127
127
 
128
- fimg = base+'.png'
128
+ fimg = base+'/parallelism.png'
129
129
 
130
130
  n = a.size
131
131
  i = 0
@@ -152,6 +152,7 @@ plot '#{fpara}' w l axis x1y1 title 'parallelism'
152
152
 
153
153
  t_end = (a.last)[0]
154
154
 
155
+ if system("which gnuplot >/dev/null 2>&1")
155
156
  IO.popen("gnuplot","r+") do |f|
156
157
  f.print "
157
158
  set terminal png
@@ -178,6 +179,7 @@ plot '-' w l axis x1y1 title 'parallelism', '-' w l axis x1y2 title 'exec/sec'
178
179
  f.puts "#{t} #{d}"
179
180
  end
180
181
  end
182
+ end
181
183
 
182
184
  #puts "Parallelism plot: #{fimg}"
183
185
  fimg
@@ -288,8 +290,9 @@ plot '-' w l axis x1y1 title 'parallelism', '-' w l axis x1y2 title 'exec/sec'
288
290
  end
289
291
  end
290
292
 
291
- fimg = base+'_para_cmd.png'
293
+ fimg = base+'/para_cmd.png'
292
294
 
295
+ if system("which gnuplot >/dev/null 2>&1")
293
296
  IO.popen("gnuplot","r+") do |f|
294
297
  #begin f = $stdout
295
298
  f.print "
@@ -308,6 +311,7 @@ set ylabel 'parallelism'
308
311
  f.puts "e"
309
312
  end
310
313
  end
314
+ end
311
315
 
312
316
  #puts "Parallelism plot: #{fimg}"
313
317
  fimg
@@ -335,7 +339,7 @@ set ylabel 'parallelism'
335
339
  end
336
340
 
337
341
  def plot_parallelism_by_host(csvtable,base)
338
- fpng = base+"_para_host.png"
342
+ fpng = base+"/para_host.png"
339
343
  data = read_time_by_host_from_csv(csvtable)
340
344
  return fpng if data.size == 0
341
345
 
@@ -346,6 +350,7 @@ set ylabel 'parallelism'
346
350
  grid << a
347
351
  end
348
352
 
353
+ if system("which gnuplot >/dev/null 2>&1")
349
354
  IO.popen("gnuplot","r+") do |f|
350
355
  f.puts "
351
356
  set terminal png
@@ -379,6 +384,7 @@ set format y ''
379
384
  end
380
385
  f.printf "e\n"
381
386
  end
387
+ end
382
388
  fpng
383
389
  end
384
390
 
@@ -30,28 +30,25 @@ EOL
30
30
  @@id = 0
31
31
  @@id_fmt = nil
32
32
 
33
- def initialize(base,pattern)
34
- @base = base
33
+ def initialize(option,pattern)
34
+ @dir = option['REPORT_DIR']
35
+ if !File.directory?(@dir)
36
+ raise ArgumentError,"Could not find log directory: #{@dir}"
37
+ end
35
38
  @pattern = pattern
36
39
 
37
40
  @@id = @@id.succ
38
41
  @id = @@id
42
+ @base = @dir
39
43
 
40
- @csv_file = base+'.csv'
41
- @task_file = base+'.task'
42
- @html_file = base+'.html'
43
-
44
- open(@base+".log","r").each do |s|
45
- if /num_cores=(\d+)/ =~ s
46
- @ncore = $1.to_i
47
- break
48
- end
49
- end
44
+ @cmd_file = File.join(@dir,option['COMMAND_CSV_FILE'])
45
+ @task_file = File.join(@dir,option['TASK_CSV_FILE'])
46
+ @html_file = File.join(@dir,'report.html')
50
47
 
51
48
  begin
52
- @sh_table = CSV.read(@csv_file,:headers=>true)
49
+ @sh_table = CSV.read(@cmd_file,:headers=>true,:skip_lines=>/\A#/)
53
50
  rescue
54
- $stderr.puts "error in reading "+@csv_file
51
+ $stderr.puts "error in reading "+@cmd_file
55
52
  $stderr.puts $!, $@
56
53
  exit
57
54
  end
@@ -72,14 +69,27 @@ EOL
72
69
  make_cmd_stat
73
70
 
74
71
  @stat = TaskStat.new(@task_file,@sh_table)
72
+ @ncore = @stat.ncore
75
73
  end
76
74
 
77
75
  attr_reader :base, :ncore, :elap
78
- attr_reader :csv_file, :html_file
76
+ attr_reader :cmd_file, :html_file
79
77
  attr_reader :cmd_elap, :cmd_stat
80
78
  attr_reader :sh_table, :task_table
81
79
  attr_reader :id
82
80
 
81
+ def find_single_file(pattern)
82
+ g = Dir.glob(File.join(@dir,pattern))
83
+ case g.size
84
+ when 0
85
+ raise ArgumentError, "Could not find any file with '#{pattern}' in #{@dir}"
86
+ when 1
87
+ else
88
+ raise ArgumentError, "Found multiple files '#{pattern}' in #{@dir}"
89
+ end
90
+ g[0]
91
+ end
92
+
83
93
  def id_str
84
94
  if @@id_fmt.nil?
85
95
  id_len = Math.log10(@@id).floor + 1
@@ -151,13 +161,18 @@ EOL
151
161
 
152
162
  def tr_count(x,y)
153
163
  sum = x+y
154
- xp = x*100.0/sum
155
- yp = y*100.0/sum
164
+ if sum==0
165
+ xp = "--%"
166
+ yp = "--%"
167
+ else
168
+ xp = "%.2f%%"%(x*100.0/sum)
169
+ yp = "%.2f%%"%(y*100.0/sum)
170
+ end
156
171
  td = "<td align='right' valign='top'>"
157
- m = td + '%s<br/>''%.2f%%</td>' % [format_comma(x),xp]
158
- m << td + '%s<br/>''%.2f%%</td>' % [format_comma(y),yp]
159
- m << td + "%s</td>" % format_comma(sum)
160
- m
172
+ return \
173
+ td + '%s<br/>%s</td>' % [format_comma(x),xp] +
174
+ td + '%s<br/>%s</td>' % [format_comma(y),yp] +
175
+ td + "%s</td>" % format_comma(sum)
161
176
  end
162
177
 
163
178
  def report_html
@@ -227,15 +242,17 @@ EOL
227
242
  html << "<th>local</th><th>remote</th><th>total</th>"
228
243
  html << "</tr>\n"
229
244
  @stat.exec_hosts.each do |h|
230
- html << "<tr><td>#{h}</td>"
231
- html << "<td align='right'>%.3f</td>" % @stat[h,nil,:elap]
232
- html << "<td></td>"
233
- html << tr_count(@stat[h,true,:in_num],@stat[h,false,:in_num])
234
- html << tr_count(@stat[h,true,:in_size],@stat[h,false,:in_size])
235
- html << "<td></td>"
236
- html << tr_count(@stat[h,true,:out_num],@stat[h,false,:out_num])
237
- html << tr_count(@stat[h,true,:out_size],@stat[h,false,:out_size])
238
- html << "</tr>\n"
245
+ if h.to_s!="" || @stat[h,nil,:elap]!=0
246
+ html << "<tr><td>#{h}</td>"
247
+ html << "<td align='right'>%.3f</td>" % @stat[h,nil,:elap]
248
+ html << "<td></td>"
249
+ html << tr_count(@stat[h,true,:in_num],@stat[h,false,:in_num])
250
+ html << tr_count(@stat[h,true,:in_size],@stat[h,false,:in_size])
251
+ html << "<td></td>"
252
+ html << tr_count(@stat[h,true,:out_num],@stat[h,false,:out_num])
253
+ html << tr_count(@stat[h,true,:out_size],@stat[h,false,:out_size])
254
+ html << "</tr>\n"
255
+ end
239
256
  end
240
257
  html << "<tr><td>total</td>"
241
258
  html << "<td align='right'>%.3f</td>" % @stat.total(nil,:elap)
@@ -266,7 +283,8 @@ EOL
266
283
  command_list << cmd
267
284
  end
268
285
  end
269
- hist_image = @base+"_hist.png"
286
+ hist_image = @base+"/hist.png"
287
+ if system("which gnuplot >/dev/null 2>&1")
270
288
  IO.popen("gnuplot","r+") do |f|
271
289
  f.puts "
272
290
  set terminal png # size 480,360
@@ -293,80 +311,9 @@ set title 'histogram of elapsed time'"
293
311
  f.puts "e"
294
312
  end
295
313
  end
296
- hist_image
297
- end
298
- end
299
- end
300
-
301
- class TaskStat
302
-
303
- def initialize(task_file, sh_table)
304
- begin
305
- @task_table = CSV.read(task_file,:headers=>true)
306
- rescue
307
- $stderr.puts "error in reading "+task_file
308
- $stderr.puts $!, $@
309
- exit
310
314
  end
311
- @count = Hash.new(0)
312
- task_locality
313
- stat_sh_table(sh_table)
314
- end
315
-
316
- attr_reader :exec_hosts
317
-
318
- def count(exec_host, loc, key, val)
319
- @count[[exec_host,loc,key]] += val
320
- @count[[loc,key]] += val
321
- end
322
-
323
- def total(loc,key)
324
- @count[[loc,key]]
325
- end
326
-
327
- def [](exec_host,loc,key)
328
- @count[[exec_host,loc,key]]
329
- end
330
-
331
- def task_locality
332
- file_size = {}
333
- file_host = {}
334
- h = {}
335
- @task_table.each do |row|
336
- name = row['task_name']
337
- file_size[name] = row['file_size'].to_i
338
- file_host[name] = (row['file_host']||'').split('|')
339
- h[row['exec_host']] = true
340
- end
341
- @exec_hosts = h.keys.sort
342
-
343
- @task_table.each do |row|
344
- if row['executed']=='1'
345
- name = row['task_name']
346
- exec_host = row['exec_host']
347
- loc = file_host[name].include?(exec_host)
348
- count(exec_host, loc, :out_num, 1)
349
- count(exec_host, loc, :out_size, file_size[name])
350
-
351
- preq_files = (row['preq']||'').split('|')
352
- preq_files.each do |preq|
353
- sz = file_size[preq]
354
- if sz && sz > 0
355
- loc = file_host[preq].include?(exec_host)
356
- count(exec_host, loc, :in_num, 1)
357
- count(exec_host, loc, :in_size, sz)
358
- end
359
- end
360
- end
361
- end
362
- end
363
-
364
- def stat_sh_table(sh_table)
365
- sh_table.each do |row|
366
- if (h = row['host']) && (t = row['elap_time'])
367
- count(h, nil, :elap, t.to_f)
368
- end
369
315
  end
316
+ hist_image
370
317
  end
371
318
 
372
319
  end
@@ -0,0 +1,83 @@
1
+ module Pwrake
2
+
3
+ class TaskStat
4
+
5
+ def initialize(task_file, sh_table)
6
+ begin
7
+ @task_table = CSV.read(task_file,:headers=>true,:skip_lines=>/\A#/)
8
+ rescue
9
+ $stderr.puts "error in reading "+task_file
10
+ $stderr.puts $!, $@
11
+ exit
12
+ end
13
+ shell_id = {}
14
+ @task_table.each do |row|
15
+ if id=row['shell_id']
16
+ shell_id[id.to_i] = true
17
+ end
18
+ end
19
+ @ncore = shell_id.size
20
+ @count = Hash.new(0)
21
+ task_locality
22
+ stat_sh_table(sh_table)
23
+ end
24
+
25
+ attr_reader :exec_hosts, :ncore
26
+
27
+ def count(exec_host, loc, key, val)
28
+ @count[[exec_host,loc,key]] += val
29
+ @count[[loc,key]] += val
30
+ end
31
+
32
+ def total(loc,key)
33
+ @count[[loc,key]]
34
+ end
35
+
36
+ def [](exec_host,loc,key)
37
+ @count[[exec_host,loc,key]]
38
+ end
39
+
40
+ def task_locality
41
+ file_size = {}
42
+ file_host = {}
43
+ h = {}
44
+ @task_table.each do |row|
45
+ name = row['task_name']
46
+ file_size[name] = row['file_size'].to_i
47
+ file_host[name] = (row['file_host']||'').split('|')
48
+ exec_host = row['exec_host'] || ""
49
+ h[exec_host] = true
50
+ end
51
+ @exec_hosts = h.keys.sort
52
+
53
+ @task_table.each do |row|
54
+ if row['executed']=='1'
55
+ name = row['task_name']
56
+ exec_host = row['exec_host']
57
+ loc = file_host[name].include?(exec_host)
58
+ count(exec_host, loc, :out_num, 1)
59
+ count(exec_host, loc, :out_size, file_size[name])
60
+
61
+ preq_files = (row['preq']||'').split('|')
62
+ preq_files.each do |preq|
63
+ sz = file_size[preq]
64
+ if sz && sz > 0
65
+ loc = file_host[preq].include?(exec_host)
66
+ count(exec_host, loc, :in_num, 1)
67
+ count(exec_host, loc, :in_size, sz)
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ def stat_sh_table(sh_table)
75
+ sh_table.each do |row|
76
+ if (h = row['host']) && (t = row['elap_time'])
77
+ count(h, nil, :elap, t.to_f)
78
+ end
79
+ end
80
+ end
81
+
82
+ end
83
+ end