pwrake 2.1.2 → 2.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c955ecbb671677c131979dcc11b35985fee3aa58
4
- data.tar.gz: aa0ebbcc4161b6e78a07656534e97528487b82d2
3
+ metadata.gz: db880d8483f79d6812bd1920557350a7b4311378
4
+ data.tar.gz: a1d2779a59eb21dd4df2b8ee5e9a00054a50654e
5
5
  SHA512:
6
- metadata.gz: a45cd6523db363a562225cb79438de77ca57f96a6cdaba59ddf67cf15147919b814167d2b07e0169b3164704b8ce548de49f67ca7befd5cff07b90e4de870b2f
7
- data.tar.gz: 7752375bc140f07f00cae72333d1378375358543646fd42f16e9c5835177b16be21049aa3c1fc52e147dbd5d54df0aa8003e688bb1410938fea2b7fb3ebfe818
6
+ metadata.gz: 4bc21c0d7a5ef317d5d03883ca918e2c2218e0ec28e13167079127820e0c4800923f4d23c6891e4eff5dcdfb010db4b2d6214a6d9eedf5c6c5597a1353467d7a
7
+ data.tar.gz: b851a5ffb98ddccf77b52bbd5c4339a900d617ce8018e9c68ee665999ac9a70a8b5295be21c88832a8d9f20cd257da81b9b4c91715f487fde0711a60db4e9931
data/README.md CHANGED
@@ -86,7 +86,8 @@ In this case, you need the rehash of command paths:
86
86
  -d, --debug [Pw] Output Debug messages
87
87
  --pwrake-conf [FILE] [Pw] Pwrake configuration file in YAML
88
88
  --show-conf, --show-config [Pw] Show Pwrake configuration options
89
- --report LOGDIR [Pw] Report workflow statistics from LOGDIR to HTML and exit.
89
+ --report LOGDIR [Pw] Generate `report.html' (Report of workflow statistics) in LOGDIR and exit.
90
+ --report-image IMAGE_TYPE [Pw] Gnuplot output format (png,jpg,svg etc.) in report.html.
90
91
  --clear-gfarm2fs [Pw] Clear gfarm2fs mountpoints left after failure.
91
92
 
92
93
  ### pwrake_conf.yaml
@@ -125,6 +126,7 @@ In this case, you need the rehash of command paths:
125
126
  NOACTION_QUEUE_PRIORITY FIFO(default)|LIFO|RAND
126
127
  SHELL_START_INTERVAL default=0.012 (sec)
127
128
  GRAPH_PARTITION false(default)|true
129
+ REPORT_IMAGE default=png
128
130
 
129
131
  * Options for Gfarm system:
130
132
 
@@ -108,7 +108,6 @@ class Communicator
108
108
  case s
109
109
  when /^heartbeat$/
110
110
  Log.debug "#{x}: #{s.inspect}"
111
- @selector.heartbeat(@reader.io)
112
111
  when /^exited$/
113
112
  Log.debug "#{x}: #{s.inspect}"
114
113
  return false
@@ -193,12 +193,15 @@ module Pwrake
193
193
  msg = "Shell#io_read_loop: exit"
194
194
  $stderr.puts(msg)
195
195
  Log.error(msg)
196
+ @exited = true
196
197
  @chan.halt
197
198
  return "exit"
198
199
  when IOError
200
+ @exited = true
199
201
  @chan.halt
200
202
  return "ioerror"
201
203
  when NBIO::TimeoutError
204
+ @exited = true
202
205
  @chan.halt
203
206
  return "timeout"
204
207
  end
@@ -236,6 +239,7 @@ module Pwrake
236
239
  Rake.application.display_error_message(e)
237
240
  Log.error e
238
241
  result = "taskfail:#{@id}:#{task.name}"
242
+ break if @exited
239
243
  ensure
240
244
  master_w.put_line result
241
245
  end
@@ -143,6 +143,7 @@ module Pwrake
143
143
  @selector.run
144
144
 
145
145
  Log.info "num_cores=#{sum_ncore}"
146
+ @option.total_cores = sum_ncore
146
147
  @hostinfo_by_id.each do |id,host|
147
148
  if ncore = @hostinfo_by_id[id].idle_cores
148
149
  Log.info "#{host} id=#{id} ncore=#{ncore}"
@@ -436,10 +437,10 @@ module Pwrake
436
437
  Log.debug "Master#finish @selector.run end"
437
438
  if !@exited
438
439
  @exited = true
439
- Log.debug "Master#finish Hander.exit begin"
440
+ Log.debug "Master#finish Handler.exit begin"
440
441
  NBIO::Handler.exit(@hdl_set)
441
442
  @selector.run(60)
442
- Log.debug "Master#finish Hander.exit end"
443
+ Log.debug "Master#finish Handler.exit end"
443
444
  end
444
445
  TaskWrapper.close_task_logger
445
446
  Log.debug "Master#finish end"
@@ -26,10 +26,10 @@ module Pwrake
26
26
  standard_exception_handling do
27
27
  init("pwrake") # <- parse options here
28
28
  @role = @master = Master.new
29
- load_rakefile
30
29
  t = Time.now
31
30
  @master.init
32
31
  @master.setup_branches
32
+ load_rakefile
33
33
  begin
34
34
  Log.debug "init: #{Time.now-t} sec"
35
35
  t = Time.now
@@ -62,7 +62,11 @@ module Pwrake
62
62
  opts.each_with_index do |a,i|
63
63
  if a[0] == '--version'
64
64
  a[3] = lambda { |value|
65
- puts "rake, version #{RAKEVERSION}"
65
+ if defined? RAKEVERSION
66
+ puts "rake, version #{RAKEVERSION}"
67
+ elsif defined? Rake::VERSION
68
+ puts "rake, version #{Rake::VERSION}"
69
+ end
66
70
  puts "pwrake, version #{Pwrake::VERSION}"
67
71
  exit
68
72
  }
@@ -139,9 +143,12 @@ module Pwrake
139
143
  "[Pw] Show Pwrake configuration options",
140
144
  lambda {|value| options.show_conf = true }
141
145
  ],
142
- ['--report LOGDIR',"[Pw] Report workflow statistics from LOGDIR to HTML and exit.",
146
+ ['--report LOGDIR',"[Pw] Generate `report.html' (Report of workflow statistics) in LOGDIR and exit.",
143
147
  lambda {|value| options.report_dir = value }
144
148
  ],
149
+ ['--report-image IMAGE_TYPE',"[Pw] Gnuplot output format (png,jpg,svg etc.) in report.html.",
150
+ lambda {|value| options.report_image = value }
151
+ ],
145
152
  ['--clear-gfarm2fs',"[Pw] Clear gfarm2fs mountpoints left after failure.",
146
153
  lambda { |value|
147
154
  Option.new.clear_gfarm2fs
data/lib/pwrake/nbio.rb CHANGED
@@ -3,7 +3,7 @@ require "fiber"
3
3
  module Pwrake
4
4
  module NBIO
5
5
 
6
- class TimeoutError < IOError
6
+ class TimeoutError < StandardError
7
7
  end
8
8
 
9
9
  class Selector
@@ -11,7 +11,6 @@ module NBIO
11
11
  def initialize
12
12
  @reader = {}
13
13
  @writer = {}
14
- @hb_time = {}
15
14
  @running = false
16
15
  end
17
16
 
@@ -19,12 +18,10 @@ module NBIO
19
18
 
20
19
  def add_reader(hdl)
21
20
  @reader[hdl.io] = hdl
22
- heartbeat(hdl.io) if hdl.timeout
23
21
  end
24
22
 
25
23
  def delete_reader(hdl)
26
24
  @reader.delete(hdl.io)
27
- delete_heartbeat(hdl.io)
28
25
  end
29
26
 
30
27
  def add_writer(hdl)
@@ -51,19 +48,9 @@ module NBIO
51
48
  hdl.respond_to?(:host) ? hdl.host : nil
52
49
  end
53
50
 
54
- # called when IO start and receive heartbeat
55
- def heartbeat(io)
56
- @hb_time[io] = Time.now
57
- @hb_earliest = @hb_time.values.min
58
- end
59
-
60
- def delete_heartbeat(io)
61
- @hb_time.delete(io)
62
- @hb_earliest = @hb_time.values.min
63
- end
64
-
65
51
  def run(timeout=nil)
66
52
  @running = true
53
+ init_heartbeat if timeout
67
54
  while @running && !empty?
68
55
  if $debug
69
56
  Log.debug "Selector#run: "+caller[0..1].join(", ")+
@@ -74,21 +61,16 @@ module NBIO
74
61
  end
75
62
  ensure
76
63
  @running = false
64
+ @hb_time = nil
77
65
  end
78
66
 
79
67
  private
80
68
  def run_select(timeout)
81
- r, w = IO.select(@reader.keys,@writer.keys,[],timeout)
69
+ to = (timeout) ? timeout*0.75 : nil
70
+ r, w, = IO.select(@reader.keys,@writer.keys,[],to)
71
+ check_heartbeat(r,timeout) if timeout
82
72
  r.each{|io| @reader[io].call} if r
83
73
  w.each{|io| @writer[io].call} if w
84
- while timeout && @hb_earliest && Time.now - @hb_earliest > timeout
85
- io = @hb_time.key(@hb_earliest)
86
- if hdl = @reader[io]
87
- e = TimeoutError.new("HB Timeout (#{timeout}s) #<Reader:%x> #{io.inspect}"%hdl.__id__)
88
- hdl.error(e)
89
- end
90
- delete_heartbeat(io)
91
- end
92
74
  rescue IOError => e
93
75
  em = "#{e.class.name}: #{e.message}"
94
76
  @reader.keys.each do |io|
@@ -112,6 +94,33 @@ module NBIO
112
94
  #raise e
113
95
  end
114
96
 
97
+ def init_heartbeat
98
+ t = Time.now
99
+ @hb_time = {}
100
+ @reader.each_key{|io| @hb_time[io] = t}
101
+ end
102
+
103
+ def check_heartbeat(ios,timeout)
104
+ t = Time.now
105
+ rds = @reader.dup
106
+ if ios
107
+ ios.each do |io|
108
+ @hb_time[io] = t
109
+ rds.delete(io)
110
+ end
111
+ end
112
+ rds.each do |io,hdl|
113
+ if hdl.check_timeout
114
+ tdif = t - @hb_time[io]
115
+ if tdif > timeout
116
+ m = "Heartbeat Timeout: no response during #{tdif}s "+
117
+ "> timeout #{timeout}s from host=#{get_host(io)}"
118
+ hdl.error(TimeoutError.new(m))
119
+ end
120
+ end
121
+ end
122
+ end
123
+
115
124
  end
116
125
 
117
126
  #------------------------------------------------------------------
@@ -216,7 +225,7 @@ module NBIO
216
225
  @chunk_size = 8192
217
226
  end
218
227
  attr_reader :io
219
- attr_accessor :timeout
228
+ attr_accessor :check_timeout
220
229
 
221
230
  # call from Selector#run
222
231
  def call
@@ -316,10 +325,9 @@ module NBIO
316
325
  @n_chan = n_chan
317
326
  @queue = @n_chan.times.map{|i| FiberReaderQueue.new(self)}
318
327
  @default_queue = FiberReaderQueue.new(self)
319
- @timeout = true
328
+ @check_timeout = true
320
329
  end
321
330
  attr_reader :queue
322
- attr_accessor :timeout
323
331
  attr_accessor :default_queue
324
332
 
325
333
  def [](ch)
@@ -39,6 +39,7 @@ module Pwrake
39
39
 
40
40
  attr_reader :counter
41
41
  attr_reader :logger
42
+ attr_accessor :total_cores
42
43
 
43
44
  DEFAULT_CONFFILES = ["pwrake_conf.yaml","PwrakeConf.yaml"]
44
45
 
@@ -121,6 +122,7 @@ module Pwrake
121
122
  'PLOT_PARALLELISM',
122
123
  'SHOW_CONF',
123
124
  ['REPORT_DIR','REPORT'],
125
+ 'REPORT_IMAGE',
124
126
  'FAILED_TARGET', # rename(default), delete, leave
125
127
  'FAILURE_TERMINATION', # wait, kill, continue
126
128
  'QUEUE_PRIORITY', # RANK(default), FIFO, LIFO, DFS
@@ -12,7 +12,8 @@ module Pwrake
12
12
  def setup_filesystem
13
13
 
14
14
  @worker_progs = %w[
15
- parallel/processor_count
15
+ parallel/processor_count.rb
16
+ pwrake/worker/reader
16
17
  pwrake/worker/writer
17
18
  pwrake/worker/log_executor
18
19
  pwrake/worker/executor
@@ -64,7 +64,7 @@ module Pwrake
64
64
  d
65
65
  end
66
66
 
67
- def plot_parallelism(file)
67
+ def plot_parallelism(file,fmt)
68
68
  a = count_start_end_from_csv(file)
69
69
  return if a.size < 4
70
70
 
@@ -101,8 +101,8 @@ module Pwrake
101
101
 
102
102
  IO.popen("gnuplot","r+") do |f|
103
103
  f.puts "
104
- set terminal png
105
- set output '#{base}.png'
104
+ set terminal #{fmt}
105
+ set output '#{base}.#{fmt}'
106
106
  #set rmargin 10
107
107
  set title '#{base}'
108
108
  set xlabel 'time (sec)'
@@ -115,17 +115,17 @@ plot '#{fpara}' w l axis x1y1 title 'parallelism'
115
115
  "
116
116
  end
117
117
 
118
- #puts "Parallelism plot: #{base}.png"
118
+ #puts "Parallelism plot: #{base}.#{fmt}"
119
119
  end
120
120
 
121
121
 
122
- def plot_parallelism2(csvtable, base)
122
+ def plot_parallelism2(csvtable, base, fmt)
123
123
  a = count_start_end_from_csv_table(csvtable)
124
124
  return if a.size < 4
125
125
 
126
126
  density = exec_density(a)
127
127
 
128
- fimg = base+'/parallelism.png'
128
+ fimg = base+'/parallelism.'+fmt
129
129
 
130
130
  n = a.size
131
131
  i = 0
@@ -155,7 +155,7 @@ plot '#{fpara}' w l axis x1y1 title 'parallelism'
155
155
  if system("which gnuplot >/dev/null 2>&1")
156
156
  IO.popen("gnuplot","r+") do |f|
157
157
  f.print "
158
- set terminal png
158
+ set terminal #{fmt}
159
159
  set output '#{fimg}'
160
160
  #set rmargin 10
161
161
  set title '#{base}'
@@ -259,7 +259,7 @@ plot '-' w l axis x1y1 title 'parallelism', '-' w l axis x1y2 title 'exec/sec'
259
259
  h
260
260
  end
261
261
 
262
- def plot_parallelism_by_pattern(csvtable, base, pattern)
262
+ def plot_parallelism_by_pattern(csvtable, base, pattern, fmt)
263
263
  y_max = 0
264
264
  t_end = 0
265
265
  para = {}
@@ -290,13 +290,13 @@ plot '-' w l axis x1y1 title 'parallelism', '-' w l axis x1y2 title 'exec/sec'
290
290
  end
291
291
  end
292
292
 
293
- fimg = base+'/para_cmd.png'
293
+ fimg = base+'/para_cmd.'+fmt
294
294
 
295
295
  if system("which gnuplot >/dev/null 2>&1")
296
296
  IO.popen("gnuplot","r+") do |f|
297
297
  #begin f = $stdout
298
298
  f.print "
299
- set terminal png
299
+ set terminal #{fmt}
300
300
  set output '#{fimg}'
301
301
  set title '#{base}'
302
302
  set xlabel 'time (sec)'
@@ -338,10 +338,10 @@ set ylabel 'parallelism'
338
338
  return grid
339
339
  end
340
340
 
341
- def plot_parallelism_by_host(csvtable,base)
342
- fpng = base+"/para_host.png"
341
+ def plot_parallelism_by_host(csvtable,base,fmt)
342
+ fimg = base+"/para_host."+fmt
343
343
  data = read_time_by_host_from_csv(csvtable)
344
- return fpng if data.size == 0
344
+ return fimg if data.size == 0
345
345
 
346
346
  grid = []
347
347
  hosts = data.keys.sort
@@ -353,8 +353,8 @@ set ylabel 'parallelism'
353
353
  if system("which gnuplot >/dev/null 2>&1")
354
354
  IO.popen("gnuplot","r+") do |f|
355
355
  f.puts "
356
- set terminal png
357
- set output '#{fpng}'
356
+ set terminal #{fmt}
357
+ set output '#{fimg}'
358
358
  #set rmargin 7
359
359
  set lmargin 16
360
360
  set pm3d map
@@ -385,7 +385,7 @@ set format y ''
385
385
  f.printf "e\n"
386
386
  end
387
387
  end
388
- fpng
388
+ fimg
389
389
  end
390
390
 
391
391
  end
@@ -43,6 +43,8 @@ EOL
43
43
  @id = @@id
44
44
  @base = @dir
45
45
 
46
+ @img_fmt = option['REPORT_IMAGE'] || 'png'
47
+
46
48
  @cmd_file = File.join(@dir,option['COMMAND_CSV_FILE'])
47
49
  @task_file = File.join(@dir,option['TASK_CSV_FILE'])
48
50
  @html_file = File.join(@dir,'report.html')
@@ -196,15 +198,15 @@ EOL
196
198
  end
197
199
  html << "</table>\n"
198
200
  html << "<h2>Parallelism</h2>\n"
199
- fimg = Parallelism.plot_parallelism2(@sh_table,@base)
201
+ fimg = Parallelism.plot_parallelism2(@sh_table,@base,@img_fmt)
200
202
  html << "<img src='./#{File.basename(fimg)}' align='top'/></br>\n"
201
203
 
202
204
  html << "<h2>Parallelism by command</h2>\n"
203
- fimg3 = Parallelism.plot_parallelism_by_pattern(@sh_table,@base,@pattern)
205
+ fimg3 = Parallelism.plot_parallelism_by_pattern(@sh_table,@base,@pattern,@img_fmt)
204
206
  html << "<img src='./#{File.basename(fimg3)}' align='top'/></br>\n"
205
207
 
206
208
  html << "<h2>Parallelism by host</h2>\n"
207
- fimg2 = Parallelism.plot_parallelism_by_host(@sh_table,@base)
209
+ fimg2 = Parallelism.plot_parallelism_by_host(@sh_table,@base,@img_fmt)
208
210
  html << "<img src='./#{File.basename(fimg2)}' align='top'/></br>\n"
209
211
 
210
212
  html << "<h2>Command statistics</h2>\n"
@@ -285,11 +287,11 @@ EOL
285
287
  command_list << cmd
286
288
  end
287
289
  end
288
- hist_image = @base+"/hist.png"
290
+ hist_image = @base+"/hist."+@img_fmt
289
291
  if system("which gnuplot >/dev/null 2>&1")
290
292
  IO.popen("gnuplot","r+") do |f|
291
293
  f.puts "
292
- set terminal png # size 480,360
294
+ set terminal #{@img_fmt} # size 480,360
293
295
  set output '#{hist_image}'
294
296
  set ylabel 'histogram'
295
297
  set xlabel 'Execution time (sec)'
@@ -2,14 +2,15 @@ module Pwrake
2
2
 
3
3
  class ReportMulti
4
4
 
5
- def initialize(list,pattern)
5
+ def initialize(list,pattern,options={})
6
6
  @reports = list.map do |base|
7
7
  r = Report.new(base,pattern)
8
8
  puts r.base+" elap=#{r.elap}"
9
9
  r
10
10
  end
11
11
  @pattern = pattern
12
- @elap_png = 'elap.png'
12
+ @img_fmt = options['REPORT_IMAGE'] || 'png'
13
+ @elap_img = 'elap.#{@img_fmt}'
13
14
  end
14
15
 
15
16
  def report(stat_html)
@@ -29,7 +30,7 @@ module Pwrake
29
30
  end
30
31
  html << "</table>\n"
31
32
  html << "<h2>Elapsed time</h2>\n"
32
- html << "<img src='./#{File.basename(@elap_png)}' align='top'/></br>\n"
33
+ html << "<img src='./#{File.basename(@elap_img)}' align='top'/></br>\n"
33
34
 
34
35
  html << "<h2>Histogram of Execution time</h2>\n"
35
36
  html << report_histogram()
@@ -51,8 +52,8 @@ module Pwrake
51
52
  ymax = 10**(mid+wid)
52
53
  IO.popen("gnuplot","r+") do |f|
53
54
  f.puts "
54
- set terminal png size 640,480
55
- set output '#{@elap_png}'
55
+ set terminal #{@img_fmt} size 640,480
56
+ set output '#{@elap_img}'
56
57
  set xlabel 'ncore'
57
58
  set ylabel 'time (sec)'
58
59
  set yrange [#{ymin}:#{ymax}]
@@ -64,7 +65,7 @@ plot #{a}/x,'-' w lp lw 2 ps 2 title 'elapsed time'
64
65
  end
65
66
  f.puts "e"
66
67
  end
67
- puts "Ncore-time plot: "+@elap_png
68
+ puts "Ncore-time plot: "+@elap_img
68
69
  end
69
70
 
70
71
  def report_histogram
@@ -81,7 +82,7 @@ plot #{a}/x,'-' w lp lw 2 ps 2 title 'elapsed time'
81
82
  end
82
83
 
83
84
  @cmd_rep.each_key do |cmd|
84
- @images[cmd] = 'hist_'+cmd.gsub(/[\/.]/,'_')+'.png'
85
+ @images[cmd] = 'hist_'+cmd.gsub(/[\/.]/,'_')+'.'+@img_fmt
85
86
  end
86
87
  histogram_plot
87
88
  histogram_html
@@ -106,7 +107,7 @@ plot #{a}/x,'-' w lp lw 2 ps 2 title 'elapsed time'
106
107
  @cmd_rep.each do |cmd,cmd_rep|
107
108
  IO.popen("gnuplot","r+") do |f|
108
109
  f.puts "
109
- set terminal png # size 480,360
110
+ set terminal #{@img_fmt} # size 480,360
110
111
  set output '#{@images[cmd]}'
111
112
  set ylabel 'histogram'
112
113
  set xlabel 'Execution time (sec)'
@@ -139,7 +140,7 @@ set title '#{cmd}'"
139
140
  @cmd_rep.each do |cmd,cmd_rep|
140
141
  IO.popen("gnuplot","r+") do |f|
141
142
  f.puts "
142
- set terminal png # size 480,360
143
+ set terminal #{@img_fmt} # size 480,360
143
144
  set output '#{@images[cmd]}'
144
145
  set nohidden3d
145
146
  set palette rgb 33,13,10
@@ -193,4 +194,3 @@ set title '#{cmd}'"
193
194
 
194
195
  end
195
196
  end
196
-
@@ -1,3 +1,3 @@
1
1
  module Pwrake
2
- VERSION = "2.1.2"
2
+ VERSION = "2.1.3"
3
3
  end
@@ -2,168 +2,147 @@ module Pwrake
2
2
 
3
3
  class Executor
4
4
 
5
- LIST = {}
6
- CHARS='0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
7
- TLEN=32
8
-
9
- def initialize(dir_class,id,shell_cmd,shell_rc)
5
+ def initialize(selector,dir_class,id)
6
+ @selector = selector
10
7
  @id = id
11
- @shell_rc = shell_rc
12
- @shell_cmd = shell_cmd || ENV['SHELL'] || '/bin/sh'
13
- @terminator = ""
14
- TLEN.times{ @terminator << CHARS[rand(CHARS.length)] }
15
8
  @out = Writer.instance
16
9
  @log = LogExecutor.instance
17
- @queue = Queue.new
10
+ @queue = []
11
+ @rd_list = []
18
12
  @dir = dir_class.new
19
- @spawn_in, @sh_in = IO.pipe
20
- @sh_out, @spawn_out = IO.pipe
21
- @sh_err, @spawn_err = IO.pipe
22
- LIST[@id] = self
23
- @exec_thread = start_exec_thread
13
+ @dir.open
14
+ @dir.open_messages.each{|m| @log.info(m)}
15
+ @out.puts "#{@id}:open"
24
16
  end
25
17
 
26
- def execute(cmd)
27
- @queue.enq(cmd)
18
+ def stop
19
+ @stopped = true
20
+ @queue.clear
28
21
  end
29
22
 
30
- def start_exec_thread
31
- Thread.new do
32
- begin
33
- @dir.open
34
- @dir.open_messages.each{|m| @log.info(m)}
35
- @pid = Kernel.spawn(@shell_cmd,
36
- :out=>@spawn_out,
37
- :err=>@spawn_err,
38
- :in=>@spawn_in,
39
- :chdir=>@dir.current)
40
- begin
41
- @out.puts "#{@id}:open"
42
- @shell_rc.each do |cmd|
43
- run_rc(cmd)
44
- end
45
- while cmd = @queue.deq
46
- run(cmd)
47
- end
48
- @sh_in.puts("exit")
49
- @sh_in.flush
50
- ensure
51
- status = nil
52
- begin
53
- Timeout.timeout(5){
54
- pid,status = Process.waitpid2(@pid)
55
- }
56
- rescue => exc
57
- @log.error(([exc.to_s]+exc.backtrace).join("\n"))
58
- @log.info("#{@id}:kill INT sh @pid=#{@pid}")
59
- Process.kill("INT",@pid)
60
- pid,status = Process.waitpid2(@pid)
61
- end
62
- @log.info("shell exit status: "+status.inspect)
63
- end
64
- rescue => exc
65
- @out.puts "#{@id}:exc:#{exc}"
66
- @log.error(([exc.to_s]+exc.backtrace).join("\n"))
67
- ensure
68
- @dir.close_messages.each{|m| @log.info(m)}
69
- @dir.close
70
- end
23
+ def close
24
+ if @thread
25
+ @thread.join(15)
26
+ sleep 0.1
71
27
  end
28
+ @thread = Thread.new do
29
+ @dir.close_messages.each{|m| @log.info(m)}
30
+ @dir.close
31
+ end
32
+ rescue => exc
33
+ @log.error(([exc.to_s]+exc.backtrace).join("\n"))
72
34
  end
73
35
 
74
- def run(cmd)
75
- case cmd
76
- when Proc
77
- cmd.call
78
- when "cd"
79
- @dir.cd
80
- run_command("cd "+@dir.current)
81
- #
82
- when /^cd\s+(.*)$/
83
- @dir.cd($1)
84
- run_command("cd "+@dir.current)
85
- #
86
- when /^exit\b/
87
- close
88
- @out.puts "#{@id}:exit"
89
- #
90
- when String
91
- run_command(cmd)
92
- #
93
- else
94
- raise RuntimeError,"invalid cmd: #{cmd.inspect}"
36
+ def join
37
+ if @thread
38
+ @thread.join(15)
95
39
  end
40
+ rescue => exc
41
+ @log.error(([exc.to_s]+exc.backtrace).join("\n"))
96
42
  end
97
43
 
98
- def run_rc(cmd)
99
- run_command_main(cmd){|s| @log.info "<"+s if @log}
44
+ def execute(cmd)
45
+ return if @stopped
46
+ @queue.push(cmd)
47
+ start_process
100
48
  end
101
49
 
102
- def run_command(cmd)
103
- run_command_main(cmd){|s| @out.puts s}
50
+ def start_process
51
+ return if @thread # running
52
+ command = @queue.shift
53
+ return if !command # empty queue
54
+ @spawn_in, @sh_in = IO.pipe
55
+ @sh_out, @spawn_out = IO.pipe
56
+ @sh_err, @spawn_err = IO.pipe
57
+
58
+ @pid = Kernel.spawn(command,
59
+ :in=>@spawn_in,
60
+ :out=>@spawn_out,
61
+ :err=>@spawn_err,
62
+ :chdir=>@dir.current,
63
+ :pgroup=>true
64
+ )
65
+ @log.info "pid=#{@pid} started. command=#{command.inspect}"
66
+
67
+ @thread = Thread.new do
68
+ @pid2,@status = Process.waitpid2(@pid)
69
+ @spawn_in.close
70
+ @spawn_out.close
71
+ @spawn_err.close
72
+ end
73
+
74
+ @rd_out = Reader.new(@sh_out,"o")
75
+ @rd_err = Reader.new(@sh_err,"e")
76
+ @rd_list = [@rd_out,@rd_err]
77
+
78
+ @selector.add_reader(@sh_out){callback(@rd_out)}
79
+ @selector.add_reader(@sh_err){callback(@rd_err)}
104
80
  end
105
81
 
106
- def run_command_main(cmd)
107
- if /\\$/ =~ cmd # command line continues
108
- @sh_in.puts(cmd)
109
- @sh_in.flush
110
- return
82
+ def callback(rd)
83
+ while s = rd.gets
84
+ @out.puts "#{@id}:#{rd.mode}:#{s.chomp}"
111
85
  end
112
- term = "\necho '#{@terminator}':$? \necho '#{@terminator}' 1>&2"
113
- @sh_in.puts(cmd+term)
114
- @sh_in.flush
115
- status = ""
116
- io_set = [@sh_out,@sh_err]
117
- loop do
118
- io_sel, = IO.select(io_set,nil,nil)
119
- for io in io_sel
120
- s = io.gets.chomp
121
- case io
122
- when @sh_out
123
- if s[0,TLEN] == @terminator
124
- status = s[TLEN+1..-1]
125
- io_set.delete(@sh_out)
126
- else
127
- yield "#{@id}:o:"+s
128
- end
129
- when @sh_err
130
- if s[0,TLEN] == @terminator
131
- io_set.delete(@sh_err)
132
- else
133
- yield "#{@id}:e:"+s
134
- end
135
- end
86
+ if rd.eof?
87
+ @selector.delete_reader(rd.io)
88
+ @rd_list.delete(rd)
89
+ if @rd_list.empty? # process_end
90
+ @thread = @pid = nil
91
+ @log.info inspect_status
92
+ @out.puts "#{@id}:z:#{exit_status}"
93
+ @sh_in.close
94
+ @sh_out.close
95
+ @sh_err.close
96
+ start_process # next process
136
97
  end
137
- break if io_set.empty?
138
98
  end
139
- yield "#{@id}:z:#{status}"
99
+ rescue => exc
100
+ @log.error(([exc.to_s]+exc.backtrace).join("\n"))
101
+ stop
140
102
  end
141
103
 
142
- def close
143
- execute(nil) # threads end
104
+ def inspect_status
105
+ s = @status
106
+ case
107
+ when s.signaled?
108
+ if s.coredump?
109
+ "pid=#{s.pid} dumped core."
110
+ else
111
+ "pid=#{s.pid} was killed by signal #{s.termsig}"
112
+ end
113
+ when s.stopped?
114
+ "pid=#{s.pid} was stopped by signal #{s.stopsig}"
115
+ when s.exited?
116
+ "pid=#{s.pid} exited normally. status=#{s.exitstatus}"
117
+ else
118
+ "unknown status %#x" % s.to_i
119
+ end
144
120
  end
145
121
 
146
- def join
147
- LIST.delete(@id)
148
- @exec_thread.join(15) if @exec_thread
122
+ def exit_status
123
+ s = @status
124
+ case
125
+ when s.signaled?
126
+ if s.coredump?
127
+ "core_dumped"
128
+ else
129
+ "killed:#{s.termsig}"
130
+ end
131
+ when s.stopped?
132
+ "stopped:#{s.stopsig}"
133
+ when s.exited?
134
+ "#{s.exitstatus}"
135
+ else
136
+ "unknown:%#x" % s.to_i
137
+ end
149
138
  end
150
139
 
151
140
  def kill(sig)
152
- @queue.clear
141
+ stop
153
142
  if @pid
154
- # kill process group
155
- s = `ps ho pid --ppid=#{@pid}`
156
- s.each_line do |x|
157
- pid = x.to_i
158
- Process.kill(sig,pid)
159
- @log.warn "Executor(id=#{@id})#kill pid=#{pid} sig=#{sig}"
160
- end
161
- if s.empty?
162
- @log.warn "Executor(id=#{@id})#kill nothing killed"
163
- end
143
+ Process.kill(sig,-@pid)
144
+ @log.warn "Executor(id=#{@id})#kill pid=#{@pid} sig=#{sig}"
164
145
  end
165
- @spawn_out.flush
166
- @spawn_err.flush
167
146
  end
168
147
 
169
148
  end
@@ -1,24 +1,32 @@
1
1
  module Pwrake
2
2
 
3
3
  class Invoker
4
- # using Michael Grosser's parallel
5
- # https://github.com/grosser/parallel
6
- include Parallel::ProcessorCount
4
+ begin
5
+ # use Michael Grosser's Parallel module
6
+ # https://github.com/grosser/parallel
7
+ include Parallel::ProcessorCount
8
+ rescue
9
+ def processor_count
10
+ # only for Linux
11
+ IO.read("/proc/cpuinfo").scan(/^processor/).size
12
+ end
13
+ end
7
14
 
8
15
  def initialize(dir_class, ncore, option)
9
16
  @dir_class = dir_class
10
17
  @option = option
18
+ @selector = Selector.new
19
+ @ex_list = {}
11
20
  @out = Writer.instance # firstly replace $stderr
12
21
  @log = LogExecutor.instance
13
22
  @log.init(@option)
14
23
  @log.open(@dir_class)
15
24
  @out.add_logger(@log)
16
- ncore_max = processor_count()
17
25
  if ncore.kind_of?(Integer)
18
26
  if ncore > 0
19
27
  @ncore = ncore
20
28
  else
21
- @ncore = ncore_max + ncore
29
+ @ncore = processor_count() + ncore
22
30
  end
23
31
  if @ncore <= 0
24
32
  m = "Out of range: ncore=#{ncore.inspect}"
@@ -26,7 +34,7 @@ module Pwrake
26
34
  raise ArgumentError,m
27
35
  end
28
36
  elsif ncore.nil?
29
- @ncore = ncore_max
37
+ @ncore = processor_count()
30
38
  else
31
39
  m = "Invalid argument: ncore=#{ncore.inspect}"
32
40
  @out.puts "ncore:"+m
@@ -37,32 +45,31 @@ module Pwrake
37
45
  Signal.trap("PIPE", "SIG_IGN")
38
46
  end
39
47
 
40
- def get_line
41
- begin
42
- line = $stdin.gets
43
- exit if !line
48
+ def get_line(io)
49
+ line = io.gets
50
+ if line
44
51
  line.chomp!
45
52
  line.strip!
46
53
  @log.info ">#{line}"
47
- return line
48
- rescue
49
- exit
50
54
  end
55
+ return line
51
56
  end
52
57
 
53
58
  def run
54
59
  setup_option
55
- if setup_loop
56
- start_heartbeat
57
- command_loop
58
- end
60
+ setup_loop
61
+ @rd = Reader.new($stdin)
62
+ @selector.add_reader($stdin){command_callback(@rd)}
63
+ @selector.loop
64
+ rescue => exc
65
+ @log.error(([exc.to_s]+exc.backtrace).join("\n"))
59
66
  ensure
60
67
  close_all
61
68
  end
62
69
 
63
70
  def setup_option
64
71
  @log.info @option.inspect
65
- @heartbeat_interval = @option[:heartbeat]
72
+ @out.heartbeat = @option[:heartbeat]
66
73
  @shell_cmd = @option[:shell_command]
67
74
  @shell_rc = @option[:shell_rc] || []
68
75
  (@option[:pass_env]||{}).each do |k,v|
@@ -71,64 +78,54 @@ module Pwrake
71
78
  end
72
79
 
73
80
  def setup_loop
74
- while line = get_line
81
+ while line = get_line($stdin)
75
82
  case line
76
83
  when /^(\d+):open$/o
77
84
  $1.split.each do |id|
78
- Executor.new(@dir_class,id,@shell_cmd,@shell_rc)
85
+ @ex_list[id] = Executor.new(@selector,@dir_class,id)
79
86
  end
80
87
  when "setup_end"
81
- return true
88
+ return
82
89
  else
83
- return false if common_line(line)
84
- end
85
- end
86
- false
87
- end
88
-
89
- def start_heartbeat
90
- if @heartbeat_interval
91
- @heartbeat_thread = Thread.new do
92
- while true
93
- @out.puts "heartbeat"
94
- sleep @heartbeat_interval
90
+ if common_line(line)
91
+ raise RuntimeError,"exit during setup_loop"
95
92
  end
96
93
  end
97
94
  end
95
+ raise RuntimeError,"incomplete setup_loop"
98
96
  end
99
97
 
100
- def command_loop
101
- while line = get_line
98
+ def command_callback(rd)
99
+ while line = get_line(rd) # rd returns nil if line is incomplete
102
100
  case line
103
101
  when /^(\d+):(.*)$/o
104
102
  id,cmd = $1,$2
105
- ex = Executor::LIST[id]
106
- if ex.nil?
107
- if cmd=="exit"
108
- @out.puts "#{id}:end"
109
- next
110
- else
111
- ex = Executor.new(@dir_class,id,@shell_cmd,@shell_rc)
112
- end
113
- end
114
- ex.execute(cmd)
103
+ @ex_list[id].execute(cmd.chomp)
115
104
  else
116
105
  break if common_line(line)
117
106
  end
118
107
  end
108
+ if rd.eof?
109
+ # connection lost
110
+ raise RuntimeError,"lost connection to master"
111
+ end
119
112
  end
120
113
 
121
114
  def common_line(line)
122
115
  case line
123
116
  when /^exit$/o
117
+ @selector.delete_reader($stdin)
124
118
  return true
125
119
  #
126
120
  when /^kill:(.*)$/o
127
- kill_all($1)
121
+ sig = $1
122
+ sig = sig.to_i if /^\d+$/o =~ sig
123
+ @log.warn "killing worker, signal=#{sig}"
124
+ @ex_list.each{|id,ex| ex.kill(sig)}
128
125
  return false
129
126
  #
130
127
  when /^p$/o
131
- puts "Executor::LIST = #{Executor::LIST.inspect}"
128
+ $stderr.puts "@ex_list = #{@ex_list.inspect}"
132
129
  return false
133
130
  #
134
131
  else
@@ -138,26 +135,13 @@ module Pwrake
138
135
  end
139
136
  end
140
137
 
141
- def kill_all(sig)
142
- sig = sig.to_i if /^\d+$/o =~ sig
143
- @log.warn "worker_killed:signal=#{sig}"
144
- Executor::LIST.each{|id,exc| exc.kill(sig)}
145
- end
146
-
147
138
  def close_all
148
139
  @log.info "close_all"
149
140
  @heartbeat_thread.kill if @heartbeat_thread
150
141
  Dir.chdir
151
- id_list = Executor::LIST.keys
152
- ex_list = Executor::LIST.values
153
- ex_list.each{|ex| ex.close}
154
- begin
155
- ex_list.each{|ex| ex.join}
156
- rescue => e
157
- @log.error e
158
- @log.error e.backtrace.join("\n")
159
- end
160
- @log.info "worker:end:#{id_list.inspect}"
142
+ @ex_list.each_value{|ex| ex.close}
143
+ @ex_list.each_value{|ex| ex.join}
144
+ @log.info "worker:end:#{@ex_list.keys.inspect}"
161
145
  begin
162
146
  Timeout.timeout(20){@log.close}
163
147
  rescue => e
@@ -4,8 +4,17 @@ require "logger"
4
4
 
5
5
  module Pwrake
6
6
 
7
+ DELEGATE_METHODS = [
8
+ :debug, :info, :error, :fatal, :warn, :unknown,
9
+ :debug?, :info?, :error?, :fatal?, :warn?, :unknown?,
10
+ :level, :level=,
11
+ :formatter, :formatter=,
12
+ :datetime_format, :datetime_format=
13
+ ]
14
+
7
15
  class DummyLogger
8
- def method_missing(id,*args)
16
+ DELEGATE_METHODS.each do |m|
17
+ define_method(m){|*a|}
9
18
  end
10
19
  end
11
20
 
@@ -13,15 +22,10 @@ module Pwrake
13
22
  include Singleton
14
23
  extend Forwardable
15
24
 
16
- def_delegators :@logger, :debug, :info, :error, :fatal, :warn, :unknown
17
- def_delegators :@logger, :debug?, :info?, :error?, :fatal?, :warn?, :unknown?
18
- def_delegators :@logger, :level, :level=
19
- def_delegators :@logger, :formatter, :formatter=
20
- def_delegators :@logger, :datetime_format, :datetime_format=
25
+ def_delegators :@logger, *DELEGATE_METHODS
21
26
 
22
27
  def initialize
23
28
  @level = ::Logger::DEBUG
24
- #@logger = @logger_stderr = ::Logger.new($stderr)
25
29
  @logger = @logger_stderr = DummyLogger.new
26
30
  @logger.level = @level
27
31
  end
@@ -0,0 +1,73 @@
1
+ module Pwrake
2
+
3
+ class Reader
4
+
5
+ def initialize(io,mode="")
6
+ @io = io
7
+ @buf = ''
8
+ @eof = false
9
+ @mode = mode
10
+ end
11
+
12
+ attr_reader :io, :mode
13
+
14
+ def eof?
15
+ @eof && @buf.empty?
16
+ end
17
+
18
+ def gets
19
+ read_until("\n")
20
+ end
21
+
22
+ def read_until(sep="\r\n", chunk_size=8192)
23
+ until i = @buf.index(sep)
24
+ if s = _read(chunk_size)
25
+ @buf += s
26
+ else
27
+ if !@buf.empty? && @eof
28
+ buf = @buf; @buf = ''
29
+ return buf
30
+ else
31
+ return nil
32
+ end
33
+ end
34
+ end
35
+ @buf.slice!(0, i+sep.bytesize)
36
+ end
37
+
38
+ def _read(sz)
39
+ @io.read_nonblock(sz)
40
+ rescue EOFError
41
+ @eof = true
42
+ nil
43
+ rescue IO::WaitReadable
44
+ nil
45
+ end
46
+
47
+ end
48
+
49
+
50
+ class Selector
51
+
52
+ def initialize
53
+ @readers = {}
54
+ end
55
+
56
+ def add_reader(io,&callback)
57
+ @readers[io] = callback
58
+ end
59
+
60
+ def delete_reader(io)
61
+ @readers.delete(io)
62
+ end
63
+
64
+ def loop
65
+ while !@readers.empty?
66
+ r, = IO.select(@readers.keys,nil,nil)
67
+ r.each{|io| @readers[io].call} if r
68
+ end
69
+ end
70
+
71
+ end
72
+
73
+ end
@@ -1,4 +1,5 @@
1
1
  require "singleton"
2
+ require "timeout"
2
3
 
3
4
  module Pwrake
4
5
 
@@ -8,14 +9,26 @@ module Pwrake
8
9
  def initialize
9
10
  @out = $stdout
10
11
  @mutex = Mutex.new
11
- pipe_in, pipe_out = IO.pipe
12
- Thread.new(pipe_in,"log:") do |pin,pre|
13
- while s = pin.gets
14
- s.chomp!
15
- @out.puts pre+s
12
+ @queue = Queue.new
13
+ @heartbeat = 0
14
+ @thread = Thread.new do
15
+ loop do
16
+ begin
17
+ Timeout.timeout(@heartbeat) do
18
+ if s = @queue.deq
19
+ _puts s
20
+ end
21
+ end
22
+ rescue Timeout::Error
23
+ _puts "heartbeat"
24
+ end
16
25
  end
17
26
  end
18
- $stderr = pipe_out
27
+ end
28
+
29
+ def heartbeat=(heartbeat)
30
+ @heartbeat = heartbeat
31
+ @queue.enq(nil)
19
32
  end
20
33
 
21
34
  def add_logger(log)
@@ -23,20 +36,13 @@ module Pwrake
23
36
  end
24
37
 
25
38
  def puts(s)
26
- begin
27
- @mutex.synchronize do
28
- @out.print s+"\n"
29
- end
30
- @out.flush
31
- rescue Errno::EPIPE
32
- end
33
- @log.info "<#{s}" if @log
39
+ @queue.enq(s)
34
40
  end
35
41
 
36
- def print(s)
42
+ def _puts(s)
37
43
  begin
38
44
  @mutex.synchronize do
39
- @out.print s
45
+ @out.print s+"\n"
40
46
  end
41
47
  @out.flush
42
48
  rescue Errno::EPIPE
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pwrake
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.2
4
+ version: 2.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Masahiro TANAKA
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-10 00:00:00.000000000 Z
11
+ date: 2016-12-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: parallel
@@ -87,6 +87,7 @@ files:
87
87
  - lib/pwrake/worker/invoker.rb
88
88
  - lib/pwrake/worker/load.rb
89
89
  - lib/pwrake/worker/log_executor.rb
90
+ - lib/pwrake/worker/reader.rb
90
91
  - lib/pwrake/worker/shared_directory.rb
91
92
  - lib/pwrake/worker/worker_main.rb
92
93
  - lib/pwrake/worker/writer.rb
@@ -129,7 +130,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
129
130
  version: '0'
130
131
  requirements: []
131
132
  rubyforge_project:
132
- rubygems_version: 2.5.1
133
+ rubygems_version: 2.6.8
133
134
  signing_key:
134
135
  specification_version: 4
135
136
  summary: Parallel Workflow engine based on Rake
@@ -151,4 +152,3 @@ test_files:
151
152
  - spec/helper.rb
152
153
  - spec/hosts
153
154
  - spec/pwrake_spec.rb
154
- has_rdoc: