pwrake 2.1.3 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,9 +6,7 @@ module Pwrake
6
6
  # group_map = {gid1=>[hid1,hid2,...], ...}
7
7
  @size_q = 0
8
8
  @q = {}
9
- @hostinfo_by_name = {}
10
9
  @hostinfo_by_id.each do |id,h|
11
- @hostinfo_by_name[h.name] = h
12
10
  @q[id] = @array_class.new(h.ncore)
13
11
  end
14
12
  @q_group = {}
@@ -26,23 +24,21 @@ module Pwrake
26
24
  @n_turn = @disable_steal ? 1 : 2
27
25
  end
28
26
 
29
-
30
27
  def enq_impl(t)
31
28
  hints = t && t.suggest_location
32
29
  Log.debug "enq #{t.name} hints=#{hints.inspect}"
33
30
  if hints.nil? || hints.empty?
34
31
  @q_remote.push(t)
35
32
  else
36
- stored = false
33
+ kv = {}
37
34
  hints.each do |h|
38
- host_info = @hostinfo_by_name[h]
39
- if host_info && q = @q[host_info.id]
40
- t.assigned.push(host_info.id)
41
- q.push(t)
42
- stored = true
43
- end
35
+ HostMap.ipmatch_for_name(h).each{|id| kv[id] = true}
44
36
  end
45
- if stored
37
+ if !kv.empty?
38
+ kv.each_key do |id|
39
+ t.assigned.push(id)
40
+ @q[id].push(t)
41
+ end
46
42
  @size_q += 1
47
43
  else
48
44
  @q_remote.push(t)
@@ -39,12 +39,10 @@ module Pwrake
39
39
 
40
40
  def task_locality
41
41
  file_size = {}
42
- file_host = {}
43
42
  h = {}
44
43
  @task_table.each do |row|
45
44
  name = row['task_name']
46
45
  file_size[name] = row['file_size'].to_i
47
- file_host[name] = (row['file_host']||'').split('|')
48
46
  exec_host = row['exec_host'] || ""
49
47
  h[exec_host] = true
50
48
  end
@@ -54,15 +52,16 @@ module Pwrake
54
52
  if row['executed']=='1'
55
53
  name = row['task_name']
56
54
  exec_host = row['exec_host']
57
- loc = file_host[name].include?(exec_host)
55
+ loc = (row['write_loc'] == "L")
58
56
  count(exec_host, loc, :out_num, 1)
59
57
  count(exec_host, loc, :out_size, file_size[name])
60
58
 
61
59
  preq_files = (row['preq']||'').split('|')
62
- preq_files.each do |preq|
60
+ preq_loc = row['preq_loc']||''
61
+ preq_files.each_with_index do |preq,i|
63
62
  sz = file_size[preq]
64
63
  if sz && sz > 0
65
- loc = file_host[preq].include?(exec_host)
64
+ loc = (preq_loc[i] == "L")
66
65
  count(exec_host, loc, :in_num, 1)
67
66
  count(exec_host, loc, :in_size, sz)
68
67
  end
@@ -25,10 +25,10 @@ module Pwrake
25
25
  @input_file_mtime = nil
26
26
  @rank = nil
27
27
  @priority = nil
28
- @lock_rank = Monitor.new
29
28
  @executed = false
30
29
  @assigned = []
31
30
  @exec_host = nil
31
+ @exec_host_id = nil
32
32
  @tried_hosts = []
33
33
  @n_retry = @property.retry || Rake.application.pwrake_options["RETRY"] || 1
34
34
  end
@@ -41,7 +41,7 @@ module Pwrake
41
41
  attr_reader :assigned
42
42
  attr_reader :tried_hosts
43
43
  attr_accessor :executed
44
- attr_accessor :exec_host
44
+ attr_accessor :exec_host, :exec_host_id
45
45
  attr_accessor :shell_id, :status
46
46
 
47
47
  def self.format_time(t)
@@ -53,8 +53,8 @@ module Pwrake
53
53
  fn = File.join(dir,option['TASK_CSV_FILE'])
54
54
  @@task_logger = CSV.open(fn,'w')
55
55
  @@task_logger.puts %w[
56
- task_id task_name start_time end_time elap_time preq preq_host
57
- exec_host shell_id has_action executed file_size file_mtime file_host
56
+ task_id task_name start_time end_time elap_time preq preq_host preq_loc
57
+ exec_host shell_id has_action executed file_size file_mtime file_host write_loc
58
58
  ]
59
59
  end
60
60
  end
@@ -78,14 +78,14 @@ module Pwrake
78
78
  @n_retry == 0
79
79
  end
80
80
 
81
- def postprocess(location)
81
+ def postprocess(postproc)
82
82
  @executed = true if !@task.actions.empty?
83
83
  #tm_taskend = Time.now
84
84
  if is_file_task?
85
85
  #t = Time.now
86
86
  if File.exist?(name)
87
87
  @file_stat = File::Stat.new(name)
88
- @location = location
88
+ @location = postproc.run(self)
89
89
  end
90
90
  end
91
91
  #Log.debug "postprocess time=#{Time.now-tm_taskend}"
@@ -114,11 +114,11 @@ module Pwrake
114
114
  def log_task
115
115
  @time_end = Time.now
116
116
  #
117
- loc = suggest_location()
117
+ sug_host = suggest_location()
118
118
  shell = Pwrake::Shell.current
119
119
  #
120
- if loc && !loc.empty? && shell && !actions.empty?
121
- Rake.application.count( loc, shell.host )
120
+ if sug_host && !sug_host.empty? && shell && !actions.empty?
121
+ Rake.application.count( sug_host, shell.host )
122
122
  end
123
123
  return if !@@task_logger
124
124
  #
@@ -127,20 +127,34 @@ module Pwrake
127
127
  RANK_STAT.add_sample(rank,elap)
128
128
  end
129
129
  #
130
+ # locality check
131
+ loc_na = true
132
+ preq_loc = prerequisites.map do |preq|
133
+ locs = Rake.application[preq].wrapper.location
134
+ if loc = file_locality(locs)
135
+ loc_na = false
136
+ loc
137
+ else
138
+ "n"
139
+ end
140
+ end.join("")
141
+ preq_loc = nil if loc_na
142
+ write_loc = file_locality(@location)
143
+ #
130
144
  if @file_stat
131
- fstat = [@file_stat.size, @file_stat.mtime, self.location.join('|')]
145
+ fstat = [@file_stat.size, @file_stat.mtime, self.location.join('|'), write_loc]
132
146
  else
133
- fstat = [nil]*3
147
+ fstat = [nil]*4
134
148
  end
135
149
  #
136
- # task_id task_name start_time end_time elap_time preq preq_host
137
- # exec_host shell_id has_action executed file_size file_mtime file_host
150
+ # task_id task_name start_time end_time elap_time preq preq_host preq_loc
151
+ # exec_host shell_id has_action executed file_size file_mtime file_host write_loc
138
152
  #
139
153
  row = [ @task_id, name, @time_start, @time_end, elap,
140
- prerequisites, loc, @exec_host, @shell_id,
154
+ prerequisites, sug_host, preq_loc, @exec_host, @shell_id,
141
155
  (actions.empty?) ? 0 : 1,
142
156
  (@executed) ? 1 : 0,
143
- *fstat ]
157
+ ] + fstat
144
158
  row.map!{|x|
145
159
  if x.kind_of?(Time)
146
160
  TaskWrapper.format_time(x)
@@ -166,6 +180,17 @@ module Pwrake
166
180
  end
167
181
  end
168
182
 
183
+ def file_locality(nodes)
184
+ if nodes.empty? || !@exec_host_id
185
+ nil # not available
186
+ elsif nodes.any?{|node|
187
+ HostMap.ipmatch_for_name(node).include?(@exec_host_id)}
188
+ "L" # Local
189
+ else
190
+ "R" # Remote
191
+ end
192
+ end
193
+
169
194
  def is_file_task?
170
195
  @task.kind_of?(Rake::FileTask)
171
196
  end
@@ -223,28 +248,26 @@ module Pwrake
223
248
  end
224
249
 
225
250
  def rank
226
- #@lock_rank.synchronize do
227
- if @rank.nil?
228
- if subsequents.nil? || subsequents.empty?
229
- @rank = 0
230
- else
231
- max_rank = 0
232
- subsequents.each do |subsq|
233
- r = subsq.wrapper.rank
234
- if max_rank < r
235
- max_rank = r
236
- end
237
- end
238
- if has_output_file?
239
- step = 1
240
- else
241
- step = 0
251
+ if @rank.nil?
252
+ if subsequents.nil? || subsequents.empty?
253
+ @rank = 0
254
+ else
255
+ max_rank = 0
256
+ subsequents.each do |subsq|
257
+ r = subsq.wrapper.rank
258
+ if max_rank < r
259
+ max_rank = r
242
260
  end
243
- @rank = max_rank + step
244
261
  end
245
- Log.debug "Task[#{name}] rank=#{@rank.inspect}"
262
+ if has_output_file?
263
+ step = 1
264
+ else
265
+ step = 0
266
+ end
267
+ @rank = max_rank + step
246
268
  end
247
- #end
269
+ Log.debug "Task[#{name}] rank=#{@rank.inspect}"
270
+ end
248
271
  @rank
249
272
  end
250
273
 
@@ -1,3 +1,3 @@
1
1
  module Pwrake
2
- VERSION = "2.1.3"
2
+ VERSION = "2.2.0"
3
3
  end
@@ -7,17 +7,37 @@ module Pwrake
7
7
  @id = id
8
8
  @out = Writer.instance
9
9
  @log = LogExecutor.instance
10
- @queue = []
10
+ @queue = FiberQueue.new
11
11
  @rd_list = []
12
12
  @dir = dir_class.new
13
13
  @dir.open
14
14
  @dir.open_messages.each{|m| @log.info(m)}
15
15
  @out.puts "#{@id}:open"
16
+
17
+ r,w = IO.pipe
18
+ @command_pipe_r = NBIO::Reader.new(@selector,r)
19
+ @command_pipe_w = NBIO::Writer.new(@selector,w)
20
+ @start_process_fiber = Fiber.new do
21
+ while line = @queue.deq
22
+ cmd = line
23
+ while /\\$/ =~ line # line continues
24
+ line = @queue.deq
25
+ break if !line
26
+ cmd += line
27
+ end
28
+ break if @stopped
29
+ cmd.chomp!
30
+ if !cmd.empty?
31
+ start_process(cmd)
32
+ end
33
+ Fiber.yield
34
+ end
35
+ end
16
36
  end
17
37
 
18
38
  def stop
19
39
  @stopped = true
20
- @queue.clear
40
+ @queue.finish
21
41
  end
22
42
 
23
43
  def close
@@ -43,13 +63,12 @@ module Pwrake
43
63
 
44
64
  def execute(cmd)
45
65
  return if @stopped
46
- @queue.push(cmd)
47
- start_process
66
+ @queue.enq(cmd)
67
+ @start_process_fiber.resume
48
68
  end
49
69
 
50
- def start_process
70
+ def start_process(command)
51
71
  return if @thread # running
52
- command = @queue.shift
53
72
  return if !command # empty queue
54
73
  @spawn_in, @sh_in = IO.pipe
55
74
  @sh_out, @spawn_out = IO.pipe
@@ -71,20 +90,19 @@ module Pwrake
71
90
  @spawn_err.close
72
91
  end
73
92
 
74
- @rd_out = Reader.new(@sh_out,"o")
75
- @rd_err = Reader.new(@sh_err,"e")
93
+ @rd_out = NBIO::Reader.new(@selector,@sh_out)
94
+ @rd_err = NBIO::Reader.new(@selector,@sh_err)
76
95
  @rd_list = [@rd_out,@rd_err]
77
96
 
78
- @selector.add_reader(@sh_out){callback(@rd_out)}
79
- @selector.add_reader(@sh_err){callback(@rd_err)}
97
+ Fiber.new{callback(@rd_err,"e")}.resume
98
+ Fiber.new{callback(@rd_out,"o")}.resume
80
99
  end
81
100
 
82
- def callback(rd)
101
+ def callback(rd,mode)
83
102
  while s = rd.gets
84
- @out.puts "#{@id}:#{rd.mode}:#{s.chomp}"
103
+ @out.puts "#{@id}:#{mode}:#{s.chomp}"
85
104
  end
86
105
  if rd.eof?
87
- @selector.delete_reader(rd.io)
88
106
  @rd_list.delete(rd)
89
107
  if @rd_list.empty? # process_end
90
108
  @thread = @pid = nil
@@ -93,7 +111,7 @@ module Pwrake
93
111
  @sh_in.close
94
112
  @sh_out.close
95
113
  @sh_err.close
96
- start_process # next process
114
+ @start_process_fiber.resume # next process
97
115
  end
98
116
  end
99
117
  rescue => exc
@@ -1,3 +1,5 @@
1
+ require "socket"
2
+
1
3
  module Pwrake
2
4
 
3
5
  class Invoker
@@ -12,37 +14,65 @@ module Pwrake
12
14
  end
13
15
  end
14
16
 
15
- def initialize(dir_class, ncore, option)
16
- @dir_class = dir_class
17
- @option = option
18
- @selector = Selector.new
17
+ def get_io
18
+ [IO, $stdin, $stdout]
19
+ end
20
+
21
+ def setup_connection
22
+ ioc, ior, iow = get_io()
23
+ # read @ncore and @option
24
+ @ncore,len = ior.read(8).unpack("V2")
25
+ @option = Marshal.load(ior.read(len))
26
+ # set pipe to branch-master
27
+ @selector = NBIO::Selector.new(ioc)
28
+ @rd = NBIO::Reader.new(@selector,ior)
29
+ @out = Writer.instance
30
+ @out.out = iow
31
+ end
32
+
33
+ def initialize
34
+ setup_connection
35
+ @dir_class = Pwrake.const_get(@option[:shared_directory])
36
+ @dir_class.init(@option)
19
37
  @ex_list = {}
20
- @out = Writer.instance # firstly replace $stderr
21
38
  @log = LogExecutor.instance
22
39
  @log.init(@option)
23
40
  @log.open(@dir_class)
24
41
  @out.add_logger(@log)
25
- if ncore.kind_of?(Integer)
26
- if ncore > 0
27
- @ncore = ncore
28
- else
29
- @ncore = processor_count() + ncore
42
+ send_ipaddr
43
+ send_ncore
44
+ # does NOT exit when writing to broken pipe
45
+ Signal.trap("PIPE", "SIG_IGN")
46
+ end
47
+
48
+ def send_ipaddr
49
+ # get IP addresses
50
+ v = Socket.getifaddrs.
51
+ select{|a| a.addr.ip? && (a.flags & Socket::IFF_MULTICAST != 0)}
52
+ # write IP addresses
53
+ v.each do |a|
54
+ @out.puts "ip:#{a.addr.ip_address}"
55
+ end
56
+ end
57
+
58
+ def send_ncore
59
+ if @ncore.kind_of?(Integer)
60
+ if @ncore <= 0
61
+ @ncore += processor_count()
30
62
  end
31
63
  if @ncore <= 0
32
- m = "Out of range: ncore=#{ncore.inspect}"
64
+ m = "Out of range: ncore=#{@ncore.inspect}"
33
65
  @out.puts "ncore:"+m
34
66
  raise ArgumentError,m
35
67
  end
36
- elsif ncore.nil?
68
+ elsif @ncore.nil?
37
69
  @ncore = processor_count()
38
70
  else
39
- m = "Invalid argument: ncore=#{ncore.inspect}"
71
+ m = "Invalid argument: ncore=#{@ncore.inspect}"
40
72
  @out.puts "ncore:"+m
41
73
  raise ArgumentError,m
42
74
  end
43
75
  @out.puts "ncore:#{@ncore}"
44
- # does NOT exit when writing to broken pipe
45
- Signal.trap("PIPE", "SIG_IGN")
46
76
  end
47
77
 
48
78
  def get_line(io)
@@ -57,10 +87,10 @@ module Pwrake
57
87
 
58
88
  def run
59
89
  setup_option
60
- setup_loop
61
- @rd = Reader.new($stdin)
62
- @selector.add_reader($stdin){command_callback(@rd)}
63
- @selector.loop
90
+ Fiber.new{setup_loop}.resume
91
+ @selector.run
92
+ Fiber.new{command_callback}.resume
93
+ @selector.run
64
94
  rescue => exc
65
95
  @log.error(([exc.to_s]+exc.backtrace).join("\n"))
66
96
  ensure
@@ -70,15 +100,14 @@ module Pwrake
70
100
  def setup_option
71
101
  @log.info @option.inspect
72
102
  @out.heartbeat = @option[:heartbeat]
73
- @shell_cmd = @option[:shell_command]
74
- @shell_rc = @option[:shell_rc] || []
75
103
  (@option[:pass_env]||{}).each do |k,v|
76
104
  ENV[k] = v
77
105
  end
78
106
  end
79
107
 
80
108
  def setup_loop
81
- while line = get_line($stdin)
109
+ loop do
110
+ line = get_line(@rd)
82
111
  case line
83
112
  when /^(\d+):open$/o
84
113
  $1.split.each do |id|
@@ -92,12 +121,16 @@ module Pwrake
92
121
  end
93
122
  end
94
123
  end
95
- raise RuntimeError,"incomplete setup_loop"
96
124
  end
97
125
 
98
- def command_callback(rd)
99
- while line = get_line(rd) # rd returns nil if line is incomplete
126
+ def command_callback
127
+ while line = get_line(@rd)
100
128
  case line
129
+ when /^(\d+):exit$/o
130
+ id = $1
131
+ ex = @ex_list.delete(id)
132
+ ex.close
133
+ ex.join
101
134
  when /^(\d+):(.*)$/o
102
135
  id,cmd = $1,$2
103
136
  @ex_list[id].execute(cmd.chomp)
@@ -105,7 +138,7 @@ module Pwrake
105
138
  break if common_line(line)
106
139
  end
107
140
  end
108
- if rd.eof?
141
+ if @rd.eof?
109
142
  # connection lost
110
143
  raise RuntimeError,"lost connection to master"
111
144
  end
@@ -114,7 +147,6 @@ module Pwrake
114
147
  def common_line(line)
115
148
  case line
116
149
  when /^exit$/o
117
- @selector.delete_reader($stdin)
118
150
  return true
119
151
  #
120
152
  when /^kill:(.*)$/o
@@ -129,7 +161,7 @@ module Pwrake
129
161
  return false
130
162
  #
131
163
  else
132
- msg = "invalid line: #{line}"
164
+ msg = "invalid line: #{line.inspect}"
133
165
  @log.fatal msg
134
166
  raise RuntimeError,msg
135
167
  end
@@ -142,12 +174,7 @@ module Pwrake
142
174
  @ex_list.each_value{|ex| ex.close}
143
175
  @ex_list.each_value{|ex| ex.join}
144
176
  @log.info "worker:end:#{@ex_list.keys.inspect}"
145
- begin
146
- Timeout.timeout(20){@log.close}
147
- rescue => e
148
- $stdout.puts e
149
- $stdout.puts e.backtrace.join("\n")
150
- end
177
+ Timeout.timeout(20){@log.close}
151
178
  ensure
152
179
  @out.puts "exited"
153
180
  end