pwrake 2.1.3 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,9 +6,7 @@ module Pwrake
6
6
  # group_map = {gid1=>[hid1,hid2,...], ...}
7
7
  @size_q = 0
8
8
  @q = {}
9
- @hostinfo_by_name = {}
10
9
  @hostinfo_by_id.each do |id,h|
11
- @hostinfo_by_name[h.name] = h
12
10
  @q[id] = @array_class.new(h.ncore)
13
11
  end
14
12
  @q_group = {}
@@ -26,23 +24,21 @@ module Pwrake
26
24
  @n_turn = @disable_steal ? 1 : 2
27
25
  end
28
26
 
29
-
30
27
  def enq_impl(t)
31
28
  hints = t && t.suggest_location
32
29
  Log.debug "enq #{t.name} hints=#{hints.inspect}"
33
30
  if hints.nil? || hints.empty?
34
31
  @q_remote.push(t)
35
32
  else
36
- stored = false
33
+ kv = {}
37
34
  hints.each do |h|
38
- host_info = @hostinfo_by_name[h]
39
- if host_info && q = @q[host_info.id]
40
- t.assigned.push(host_info.id)
41
- q.push(t)
42
- stored = true
43
- end
35
+ HostMap.ipmatch_for_name(h).each{|id| kv[id] = true}
44
36
  end
45
- if stored
37
+ if !kv.empty?
38
+ kv.each_key do |id|
39
+ t.assigned.push(id)
40
+ @q[id].push(t)
41
+ end
46
42
  @size_q += 1
47
43
  else
48
44
  @q_remote.push(t)
@@ -39,12 +39,10 @@ module Pwrake
39
39
 
40
40
  def task_locality
41
41
  file_size = {}
42
- file_host = {}
43
42
  h = {}
44
43
  @task_table.each do |row|
45
44
  name = row['task_name']
46
45
  file_size[name] = row['file_size'].to_i
47
- file_host[name] = (row['file_host']||'').split('|')
48
46
  exec_host = row['exec_host'] || ""
49
47
  h[exec_host] = true
50
48
  end
@@ -54,15 +52,16 @@ module Pwrake
54
52
  if row['executed']=='1'
55
53
  name = row['task_name']
56
54
  exec_host = row['exec_host']
57
- loc = file_host[name].include?(exec_host)
55
+ loc = (row['write_loc'] == "L")
58
56
  count(exec_host, loc, :out_num, 1)
59
57
  count(exec_host, loc, :out_size, file_size[name])
60
58
 
61
59
  preq_files = (row['preq']||'').split('|')
62
- preq_files.each do |preq|
60
+ preq_loc = row['preq_loc']||''
61
+ preq_files.each_with_index do |preq,i|
63
62
  sz = file_size[preq]
64
63
  if sz && sz > 0
65
- loc = file_host[preq].include?(exec_host)
64
+ loc = (preq_loc[i] == "L")
66
65
  count(exec_host, loc, :in_num, 1)
67
66
  count(exec_host, loc, :in_size, sz)
68
67
  end
@@ -25,10 +25,10 @@ module Pwrake
25
25
  @input_file_mtime = nil
26
26
  @rank = nil
27
27
  @priority = nil
28
- @lock_rank = Monitor.new
29
28
  @executed = false
30
29
  @assigned = []
31
30
  @exec_host = nil
31
+ @exec_host_id = nil
32
32
  @tried_hosts = []
33
33
  @n_retry = @property.retry || Rake.application.pwrake_options["RETRY"] || 1
34
34
  end
@@ -41,7 +41,7 @@ module Pwrake
41
41
  attr_reader :assigned
42
42
  attr_reader :tried_hosts
43
43
  attr_accessor :executed
44
- attr_accessor :exec_host
44
+ attr_accessor :exec_host, :exec_host_id
45
45
  attr_accessor :shell_id, :status
46
46
 
47
47
  def self.format_time(t)
@@ -53,8 +53,8 @@ module Pwrake
53
53
  fn = File.join(dir,option['TASK_CSV_FILE'])
54
54
  @@task_logger = CSV.open(fn,'w')
55
55
  @@task_logger.puts %w[
56
- task_id task_name start_time end_time elap_time preq preq_host
57
- exec_host shell_id has_action executed file_size file_mtime file_host
56
+ task_id task_name start_time end_time elap_time preq preq_host preq_loc
57
+ exec_host shell_id has_action executed file_size file_mtime file_host write_loc
58
58
  ]
59
59
  end
60
60
  end
@@ -78,14 +78,14 @@ module Pwrake
78
78
  @n_retry == 0
79
79
  end
80
80
 
81
- def postprocess(location)
81
+ def postprocess(postproc)
82
82
  @executed = true if !@task.actions.empty?
83
83
  #tm_taskend = Time.now
84
84
  if is_file_task?
85
85
  #t = Time.now
86
86
  if File.exist?(name)
87
87
  @file_stat = File::Stat.new(name)
88
- @location = location
88
+ @location = postproc.run(self)
89
89
  end
90
90
  end
91
91
  #Log.debug "postprocess time=#{Time.now-tm_taskend}"
@@ -114,11 +114,11 @@ module Pwrake
114
114
  def log_task
115
115
  @time_end = Time.now
116
116
  #
117
- loc = suggest_location()
117
+ sug_host = suggest_location()
118
118
  shell = Pwrake::Shell.current
119
119
  #
120
- if loc && !loc.empty? && shell && !actions.empty?
121
- Rake.application.count( loc, shell.host )
120
+ if sug_host && !sug_host.empty? && shell && !actions.empty?
121
+ Rake.application.count( sug_host, shell.host )
122
122
  end
123
123
  return if !@@task_logger
124
124
  #
@@ -127,20 +127,34 @@ module Pwrake
127
127
  RANK_STAT.add_sample(rank,elap)
128
128
  end
129
129
  #
130
+ # locality check
131
+ loc_na = true
132
+ preq_loc = prerequisites.map do |preq|
133
+ locs = Rake.application[preq].wrapper.location
134
+ if loc = file_locality(locs)
135
+ loc_na = false
136
+ loc
137
+ else
138
+ "n"
139
+ end
140
+ end.join("")
141
+ preq_loc = nil if loc_na
142
+ write_loc = file_locality(@location)
143
+ #
130
144
  if @file_stat
131
- fstat = [@file_stat.size, @file_stat.mtime, self.location.join('|')]
145
+ fstat = [@file_stat.size, @file_stat.mtime, self.location.join('|'), write_loc]
132
146
  else
133
- fstat = [nil]*3
147
+ fstat = [nil]*4
134
148
  end
135
149
  #
136
- # task_id task_name start_time end_time elap_time preq preq_host
137
- # exec_host shell_id has_action executed file_size file_mtime file_host
150
+ # task_id task_name start_time end_time elap_time preq preq_host preq_loc
151
+ # exec_host shell_id has_action executed file_size file_mtime file_host write_loc
138
152
  #
139
153
  row = [ @task_id, name, @time_start, @time_end, elap,
140
- prerequisites, loc, @exec_host, @shell_id,
154
+ prerequisites, sug_host, preq_loc, @exec_host, @shell_id,
141
155
  (actions.empty?) ? 0 : 1,
142
156
  (@executed) ? 1 : 0,
143
- *fstat ]
157
+ ] + fstat
144
158
  row.map!{|x|
145
159
  if x.kind_of?(Time)
146
160
  TaskWrapper.format_time(x)
@@ -166,6 +180,17 @@ module Pwrake
166
180
  end
167
181
  end
168
182
 
183
+ def file_locality(nodes)
184
+ if nodes.empty? || !@exec_host_id
185
+ nil # not available
186
+ elsif nodes.any?{|node|
187
+ HostMap.ipmatch_for_name(node).include?(@exec_host_id)}
188
+ "L" # Local
189
+ else
190
+ "R" # Remote
191
+ end
192
+ end
193
+
169
194
  def is_file_task?
170
195
  @task.kind_of?(Rake::FileTask)
171
196
  end
@@ -223,28 +248,26 @@ module Pwrake
223
248
  end
224
249
 
225
250
  def rank
226
- #@lock_rank.synchronize do
227
- if @rank.nil?
228
- if subsequents.nil? || subsequents.empty?
229
- @rank = 0
230
- else
231
- max_rank = 0
232
- subsequents.each do |subsq|
233
- r = subsq.wrapper.rank
234
- if max_rank < r
235
- max_rank = r
236
- end
237
- end
238
- if has_output_file?
239
- step = 1
240
- else
241
- step = 0
251
+ if @rank.nil?
252
+ if subsequents.nil? || subsequents.empty?
253
+ @rank = 0
254
+ else
255
+ max_rank = 0
256
+ subsequents.each do |subsq|
257
+ r = subsq.wrapper.rank
258
+ if max_rank < r
259
+ max_rank = r
242
260
  end
243
- @rank = max_rank + step
244
261
  end
245
- Log.debug "Task[#{name}] rank=#{@rank.inspect}"
262
+ if has_output_file?
263
+ step = 1
264
+ else
265
+ step = 0
266
+ end
267
+ @rank = max_rank + step
246
268
  end
247
- #end
269
+ Log.debug "Task[#{name}] rank=#{@rank.inspect}"
270
+ end
248
271
  @rank
249
272
  end
250
273
 
@@ -1,3 +1,3 @@
1
1
  module Pwrake
2
- VERSION = "2.1.3"
2
+ VERSION = "2.2.0"
3
3
  end
@@ -7,17 +7,37 @@ module Pwrake
7
7
  @id = id
8
8
  @out = Writer.instance
9
9
  @log = LogExecutor.instance
10
- @queue = []
10
+ @queue = FiberQueue.new
11
11
  @rd_list = []
12
12
  @dir = dir_class.new
13
13
  @dir.open
14
14
  @dir.open_messages.each{|m| @log.info(m)}
15
15
  @out.puts "#{@id}:open"
16
+
17
+ r,w = IO.pipe
18
+ @command_pipe_r = NBIO::Reader.new(@selector,r)
19
+ @command_pipe_w = NBIO::Writer.new(@selector,w)
20
+ @start_process_fiber = Fiber.new do
21
+ while line = @queue.deq
22
+ cmd = line
23
+ while /\\$/ =~ line # line continues
24
+ line = @queue.deq
25
+ break if !line
26
+ cmd += line
27
+ end
28
+ break if @stopped
29
+ cmd.chomp!
30
+ if !cmd.empty?
31
+ start_process(cmd)
32
+ end
33
+ Fiber.yield
34
+ end
35
+ end
16
36
  end
17
37
 
18
38
  def stop
19
39
  @stopped = true
20
- @queue.clear
40
+ @queue.finish
21
41
  end
22
42
 
23
43
  def close
@@ -43,13 +63,12 @@ module Pwrake
43
63
 
44
64
  def execute(cmd)
45
65
  return if @stopped
46
- @queue.push(cmd)
47
- start_process
66
+ @queue.enq(cmd)
67
+ @start_process_fiber.resume
48
68
  end
49
69
 
50
- def start_process
70
+ def start_process(command)
51
71
  return if @thread # running
52
- command = @queue.shift
53
72
  return if !command # empty queue
54
73
  @spawn_in, @sh_in = IO.pipe
55
74
  @sh_out, @spawn_out = IO.pipe
@@ -71,20 +90,19 @@ module Pwrake
71
90
  @spawn_err.close
72
91
  end
73
92
 
74
- @rd_out = Reader.new(@sh_out,"o")
75
- @rd_err = Reader.new(@sh_err,"e")
93
+ @rd_out = NBIO::Reader.new(@selector,@sh_out)
94
+ @rd_err = NBIO::Reader.new(@selector,@sh_err)
76
95
  @rd_list = [@rd_out,@rd_err]
77
96
 
78
- @selector.add_reader(@sh_out){callback(@rd_out)}
79
- @selector.add_reader(@sh_err){callback(@rd_err)}
97
+ Fiber.new{callback(@rd_err,"e")}.resume
98
+ Fiber.new{callback(@rd_out,"o")}.resume
80
99
  end
81
100
 
82
- def callback(rd)
101
+ def callback(rd,mode)
83
102
  while s = rd.gets
84
- @out.puts "#{@id}:#{rd.mode}:#{s.chomp}"
103
+ @out.puts "#{@id}:#{mode}:#{s.chomp}"
85
104
  end
86
105
  if rd.eof?
87
- @selector.delete_reader(rd.io)
88
106
  @rd_list.delete(rd)
89
107
  if @rd_list.empty? # process_end
90
108
  @thread = @pid = nil
@@ -93,7 +111,7 @@ module Pwrake
93
111
  @sh_in.close
94
112
  @sh_out.close
95
113
  @sh_err.close
96
- start_process # next process
114
+ @start_process_fiber.resume # next process
97
115
  end
98
116
  end
99
117
  rescue => exc
@@ -1,3 +1,5 @@
1
+ require "socket"
2
+
1
3
  module Pwrake
2
4
 
3
5
  class Invoker
@@ -12,37 +14,65 @@ module Pwrake
12
14
  end
13
15
  end
14
16
 
15
- def initialize(dir_class, ncore, option)
16
- @dir_class = dir_class
17
- @option = option
18
- @selector = Selector.new
17
+ def get_io
18
+ [IO, $stdin, $stdout]
19
+ end
20
+
21
+ def setup_connection
22
+ ioc, ior, iow = get_io()
23
+ # read @ncore and @option
24
+ @ncore,len = ior.read(8).unpack("V2")
25
+ @option = Marshal.load(ior.read(len))
26
+ # set pipe to branch-master
27
+ @selector = NBIO::Selector.new(ioc)
28
+ @rd = NBIO::Reader.new(@selector,ior)
29
+ @out = Writer.instance
30
+ @out.out = iow
31
+ end
32
+
33
+ def initialize
34
+ setup_connection
35
+ @dir_class = Pwrake.const_get(@option[:shared_directory])
36
+ @dir_class.init(@option)
19
37
  @ex_list = {}
20
- @out = Writer.instance # firstly replace $stderr
21
38
  @log = LogExecutor.instance
22
39
  @log.init(@option)
23
40
  @log.open(@dir_class)
24
41
  @out.add_logger(@log)
25
- if ncore.kind_of?(Integer)
26
- if ncore > 0
27
- @ncore = ncore
28
- else
29
- @ncore = processor_count() + ncore
42
+ send_ipaddr
43
+ send_ncore
44
+ # does NOT exit when writing to broken pipe
45
+ Signal.trap("PIPE", "SIG_IGN")
46
+ end
47
+
48
+ def send_ipaddr
49
+ # get IP addresses
50
+ v = Socket.getifaddrs.
51
+ select{|a| a.addr.ip? && (a.flags & Socket::IFF_MULTICAST != 0)}
52
+ # write IP addresses
53
+ v.each do |a|
54
+ @out.puts "ip:#{a.addr.ip_address}"
55
+ end
56
+ end
57
+
58
+ def send_ncore
59
+ if @ncore.kind_of?(Integer)
60
+ if @ncore <= 0
61
+ @ncore += processor_count()
30
62
  end
31
63
  if @ncore <= 0
32
- m = "Out of range: ncore=#{ncore.inspect}"
64
+ m = "Out of range: ncore=#{@ncore.inspect}"
33
65
  @out.puts "ncore:"+m
34
66
  raise ArgumentError,m
35
67
  end
36
- elsif ncore.nil?
68
+ elsif @ncore.nil?
37
69
  @ncore = processor_count()
38
70
  else
39
- m = "Invalid argument: ncore=#{ncore.inspect}"
71
+ m = "Invalid argument: ncore=#{@ncore.inspect}"
40
72
  @out.puts "ncore:"+m
41
73
  raise ArgumentError,m
42
74
  end
43
75
  @out.puts "ncore:#{@ncore}"
44
- # does NOT exit when writing to broken pipe
45
- Signal.trap("PIPE", "SIG_IGN")
46
76
  end
47
77
 
48
78
  def get_line(io)
@@ -57,10 +87,10 @@ module Pwrake
57
87
 
58
88
  def run
59
89
  setup_option
60
- setup_loop
61
- @rd = Reader.new($stdin)
62
- @selector.add_reader($stdin){command_callback(@rd)}
63
- @selector.loop
90
+ Fiber.new{setup_loop}.resume
91
+ @selector.run
92
+ Fiber.new{command_callback}.resume
93
+ @selector.run
64
94
  rescue => exc
65
95
  @log.error(([exc.to_s]+exc.backtrace).join("\n"))
66
96
  ensure
@@ -70,15 +100,14 @@ module Pwrake
70
100
  def setup_option
71
101
  @log.info @option.inspect
72
102
  @out.heartbeat = @option[:heartbeat]
73
- @shell_cmd = @option[:shell_command]
74
- @shell_rc = @option[:shell_rc] || []
75
103
  (@option[:pass_env]||{}).each do |k,v|
76
104
  ENV[k] = v
77
105
  end
78
106
  end
79
107
 
80
108
  def setup_loop
81
- while line = get_line($stdin)
109
+ loop do
110
+ line = get_line(@rd)
82
111
  case line
83
112
  when /^(\d+):open$/o
84
113
  $1.split.each do |id|
@@ -92,12 +121,16 @@ module Pwrake
92
121
  end
93
122
  end
94
123
  end
95
- raise RuntimeError,"incomplete setup_loop"
96
124
  end
97
125
 
98
- def command_callback(rd)
99
- while line = get_line(rd) # rd returns nil if line is incomplete
126
+ def command_callback
127
+ while line = get_line(@rd)
100
128
  case line
129
+ when /^(\d+):exit$/o
130
+ id = $1
131
+ ex = @ex_list.delete(id)
132
+ ex.close
133
+ ex.join
101
134
  when /^(\d+):(.*)$/o
102
135
  id,cmd = $1,$2
103
136
  @ex_list[id].execute(cmd.chomp)
@@ -105,7 +138,7 @@ module Pwrake
105
138
  break if common_line(line)
106
139
  end
107
140
  end
108
- if rd.eof?
141
+ if @rd.eof?
109
142
  # connection lost
110
143
  raise RuntimeError,"lost connection to master"
111
144
  end
@@ -114,7 +147,6 @@ module Pwrake
114
147
  def common_line(line)
115
148
  case line
116
149
  when /^exit$/o
117
- @selector.delete_reader($stdin)
118
150
  return true
119
151
  #
120
152
  when /^kill:(.*)$/o
@@ -129,7 +161,7 @@ module Pwrake
129
161
  return false
130
162
  #
131
163
  else
132
- msg = "invalid line: #{line}"
164
+ msg = "invalid line: #{line.inspect}"
133
165
  @log.fatal msg
134
166
  raise RuntimeError,msg
135
167
  end
@@ -142,12 +174,7 @@ module Pwrake
142
174
  @ex_list.each_value{|ex| ex.close}
143
175
  @ex_list.each_value{|ex| ex.join}
144
176
  @log.info "worker:end:#{@ex_list.keys.inspect}"
145
- begin
146
- Timeout.timeout(20){@log.close}
147
- rescue => e
148
- $stdout.puts e
149
- $stdout.puts e.backtrace.join("\n")
150
- end
177
+ Timeout.timeout(20){@log.close}
151
178
  ensure
152
179
  @out.puts "exited"
153
180
  end