pwrake 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +22 -9
  3. data/bin/gfwhere-pipe +33 -9
  4. data/bin/pwrake +5 -2
  5. data/bin/pwrake_branch +5 -3
  6. data/lib/pwrake/branch/branch.rb +95 -86
  7. data/lib/pwrake/branch/branch_application.rb +4 -0
  8. data/lib/pwrake/branch/communicator.rb +173 -0
  9. data/lib/pwrake/branch/communicator_set.rb +100 -0
  10. data/lib/pwrake/branch/fiber_queue.rb +10 -0
  11. data/lib/pwrake/branch/shell.rb +68 -24
  12. data/lib/pwrake/branch/shell_profiler.rb +2 -0
  13. data/lib/pwrake/gfarm/gfarm_postprocess.rb +8 -7
  14. data/lib/pwrake/logger.rb +5 -0
  15. data/lib/pwrake/master/master.rb +190 -87
  16. data/lib/pwrake/master/master_application.rb +8 -0
  17. data/lib/pwrake/nbio.rb +525 -0
  18. data/lib/pwrake/option/host_map.rb +36 -4
  19. data/lib/pwrake/option/option.rb +7 -1
  20. data/lib/pwrake/option/option_filesystem.rb +13 -3
  21. data/lib/pwrake/queue/locality_aware_queue.rb +41 -6
  22. data/lib/pwrake/queue/queue_array.rb +31 -11
  23. data/lib/pwrake/queue/task_queue.rb +15 -18
  24. data/lib/pwrake/report/report.rb +2 -0
  25. data/lib/pwrake/task/task_algorithm.rb +4 -1
  26. data/lib/pwrake/task/task_manager.rb +2 -0
  27. data/lib/pwrake/task/task_property.rb +1 -0
  28. data/lib/pwrake/task/task_wrapper.rb +40 -21
  29. data/lib/pwrake/version.rb +1 -1
  30. data/lib/pwrake/worker/invoker.rb +4 -29
  31. data/pwrake.gemspec +3 -2
  32. metadata +24 -12
  33. data/lib/pwrake/branch.rb +0 -22
  34. data/lib/pwrake/branch/worker_communicator.rb +0 -104
  35. data/lib/pwrake/iomux/channel.rb +0 -70
  36. data/lib/pwrake/iomux/handler.rb +0 -124
  37. data/lib/pwrake/iomux/handler_set.rb +0 -35
  38. data/lib/pwrake/iomux/runner.rb +0 -62
  39. data/lib/pwrake/master.rb +0 -30
@@ -8,24 +8,37 @@ module Pwrake
8
8
  @weight = weight || 1.0
9
9
  @group = group || 0
10
10
  @id = id
11
+ @continuous_fail = 0
12
+ @total_fail = 0
13
+ @count_task = 0
11
14
  end
12
15
 
13
16
  attr_reader :name, :ncore, :weight, :group, :id, :steal_flag
14
17
  attr_accessor :idle_cores
15
18
 
16
19
  def set_ncore(n)
20
+ @retire = 0
21
+ @busy_cores = 0
17
22
  @ncore = @idle_cores = n
18
23
  end
19
24
 
20
- def increase(n)
25
+ def idle(n)
26
+ @busy_cores -= n
21
27
  @idle_cores += n
28
+ @idle_cores -= @retire
29
+ @retire = 0
30
+ @idle_cores + @busy_cores < 1 # all retired
31
+ end
32
+
33
+ def busy(n)
34
+ @busy_cores += n
35
+ @idle_cores -= n
36
+ @idle_cores + @busy_cores < 1 # all retired
22
37
  end
23
38
 
24
39
  def decrease(n)
25
40
  @idle_cores -= n
26
- if @idle_cores < 0
27
- raise RuntimeError,"# of cores must be non-negative"
28
- end
41
+ @idle_cores + @busy_cores < 1 # all retired
29
42
  end
30
43
 
31
44
  def steal_phase
@@ -34,6 +47,25 @@ module Pwrake
34
47
  @steal_flag = false
35
48
  t
36
49
  end
50
+
51
+ def retire(n)
52
+ @retire += n
53
+ Log.debug "retire n=#{n}, host=#{@name}"
54
+ end
55
+
56
+ def task_result(result)
57
+ @count_task += 1
58
+ case result
59
+ when "end"
60
+ @continuous_fail = 0
61
+ when "fail"
62
+ @continuous_fail += 1
63
+ @total_fail += 1
64
+ else
65
+ raise "unknown result: #{result}"
66
+ end
67
+ @continuous_fail
68
+ end
37
69
  end
38
70
 
39
71
  class HostMap < Hash
@@ -1,3 +1,7 @@
1
+ require "pathname"
2
+ require "yaml"
3
+ require "pwrake/option/host_map"
4
+
1
5
  module Pwrake
2
6
 
3
7
  START_TIME = Time.now
@@ -178,7 +182,7 @@ module Pwrake
178
182
  ['NUM_THREADS', proc{|v| v && v.to_i}],
179
183
  ['SHELL_START_INTERVAL', proc{|v| (v || 0.012).to_f}],
180
184
  ['HEARTBEAT', proc{|v| (v || 240).to_i}],
181
- ['RETRY', proc{|v| (v || 0).to_i}],
185
+ ['RETRY', proc{|v| (v || 1).to_i}],
182
186
  ['DISABLE_AFFINITY', proc{|v| v || ENV['AFFINITY']=='off'}],
183
187
  ['DISABLE_STEAL', proc{|v| v || ENV['STEAL']=='off'}],
184
188
  ['GFARM_BASEDIR', proc{|v| v || '/tmp'}],
@@ -356,3 +360,5 @@ done
356
360
 
357
361
  end
358
362
  end
363
+
364
+ require "pwrake/option/option_filesystem"
@@ -1,3 +1,6 @@
1
+ require "pwrake/option/option_filesystem"
2
+ require "parallel"
3
+
1
4
  module Pwrake
2
5
 
3
6
  class Option
@@ -8,7 +11,14 @@ module Pwrake
8
11
 
9
12
  def setup_filesystem
10
13
 
11
- @worker_progs = %w[ writer log_executor executor invoker shared_directory ]
14
+ @worker_progs = %w[
15
+ parallel/processor_count
16
+ pwrake/worker/writer
17
+ pwrake/worker/log_executor
18
+ pwrake/worker/executor
19
+ pwrake/worker/invoker
20
+ pwrake/worker/shared_directory
21
+ ]
12
22
  @worker_option = {
13
23
  :base_dir => "",
14
24
  :work_dir => self['WORK_DIR'],
@@ -47,7 +57,7 @@ module Pwrake
47
57
  :gfarm2fs_debug_wait => self['GFARM2FS_DEBUG_WAIT'],
48
58
  :single_mp => self['GFARM_SINGLE_MP']
49
59
  })
50
- @worker_progs << "gfarm_directory"
60
+ @worker_progs.push "pwrake/worker/gfarm_directory"
51
61
 
52
62
  if self['DISABLE_AFFINITY']
53
63
  @queue_class = "TaskQueue"
@@ -61,7 +71,7 @@ module Pwrake
61
71
  #@num_noaction_threads = (n_noaction_th || 1).to_i
62
72
  @worker_option[:shared_directory] = "SharedDirectory"
63
73
  end
64
- @worker_progs << "worker_main"
74
+ @worker_progs.push "pwrake/worker/worker_main"
65
75
  Log.debug "@queue_class=#{@queue_class}"
66
76
  end
67
77
 
@@ -6,9 +6,13 @@ module Pwrake
6
6
  # group_map = {gid1=>[hid1,hid2,...], ...}
7
7
  @size_q = 0
8
8
  @q = {}
9
- @host_map.by_id.each{|h| @q[h.id] = @array_class.new(h.ncore)}
9
+ @hostinfo_by_name = {}
10
+ @hostinfo_by_id.each do |id,h|
11
+ @hostinfo_by_name[h.name] = h
12
+ @q[id] = @array_class.new(h.ncore)
13
+ end
10
14
  @q_group = {}
11
- group_map ||= {1=>@host_map.by_id.map{|h| h.id}}
15
+ group_map ||= {1=>@hostinfo_by_id.map{|id,h| id}}
12
16
  group_map.each do |gid,ary|
13
17
  q1 = {} # same group
14
18
  q2 = @q.dup # other groups
@@ -31,7 +35,7 @@ module Pwrake
31
35
  else
32
36
  stored = false
33
37
  hints.each do |h|
34
- host_info = @host_map.by_name[h]
38
+ host_info = @hostinfo_by_name[h]
35
39
  if host_info && q = @q[host_info.id]
36
40
  t.assigned.push(host_info.id)
37
41
  q.push(t)
@@ -88,7 +92,9 @@ module Pwrake
88
92
  t = q.shift(run_host)
89
93
  if t
90
94
  t.assigned.each do |h|
91
- @q[h].delete(t)
95
+ if q_h = @q[h]
96
+ q_h.delete(t)
97
+ end
92
98
  end
93
99
  @size_q -= 1
94
100
  end
@@ -113,7 +119,7 @@ module Pwrake
113
119
  end
114
120
  end
115
121
  if max_num > 0
116
- max_info = @host_map.by_id[max_host]
122
+ max_info = @hostinfo_by_id[max_host]
117
123
  Log.debug "deq_steal max_host=#{max_info.name} max_num=#{max_num}"
118
124
  t = host_info.steal_phase{|h| deq_locate(max_info,h)}
119
125
  #Log.debug "deq_steal task=#{t.inspect}"
@@ -131,7 +137,12 @@ module Pwrake
131
137
  n = 0
132
138
  @q.each do |h,q|
133
139
  if q.size > 0
134
- s << _qstr(@host_map.by_id[h].name,q)
140
+ hinfo = @hostinfo_by_id[h]
141
+ if hinfo
142
+ s << _qstr(hinfo.name,q)
143
+ else
144
+ s << _qstr("(#{hinfo.inspect})",q)
145
+ end
135
146
  else
136
147
  n += 1
137
148
  end
@@ -139,6 +150,7 @@ module Pwrake
139
150
  end
140
151
  s << _qstr("local*#{n}",[]) if n > 0
141
152
  s << _qstr("remote",@q_remote)
153
+ s << "@size_q=#{@size_q}"
142
154
  s
143
155
  end
144
156
 
@@ -154,5 +166,28 @@ module Pwrake
154
166
  @q_remote.empty?
155
167
  end
156
168
 
169
+ def drop_host(host_info)
170
+ hid = host_info.id
171
+ if q_drop = @q.delete(hid)
172
+ n_move = 0
173
+ q_size = q_drop.size
174
+ while t = q_drop.shift
175
+ assigned_other = false
176
+ t.assigned.each do |h|
177
+ if h != hid && @q[h]
178
+ assigned_other = true
179
+ break
180
+ end
181
+ end
182
+ if !assigned_other
183
+ @size_q -= 1
184
+ @q_remote.push(t)
185
+ n_move += 1
186
+ end
187
+ end
188
+ Log.debug "LAQ#drop_host: host=#{host_info.name} q.size=#{q_size} n_move=#{n_move}"
189
+ end
190
+ end
191
+
157
192
  end
158
193
  end
@@ -1,4 +1,5 @@
1
1
  require "forwardable"
2
+ require "pwrake/task/task_rank"
2
3
 
3
4
  module Pwrake
4
5
 
@@ -65,13 +66,20 @@ module Pwrake
65
66
  super()
66
67
  end
67
68
 
68
- def shift(host_info)
69
+ def shift(host_info=nil)
70
+ return super() unless host_info
71
+ tw_found = nil
69
72
  (size-1).downto(0) do |i|
70
- if at(i).acceptable_for(host_info)
71
- return delete_at(i)
73
+ tw = at(i)
74
+ if tw.acceptable_for(host_info)
75
+ if tw.untried_host?(host_info)
76
+ return delete_at(i)
77
+ else
78
+ tw_found ||= tw
79
+ end
72
80
  end
73
81
  end
74
- nil
82
+ tw_found
75
83
  end
76
84
  end
77
85
 
@@ -80,13 +88,20 @@ module Pwrake
80
88
  super()
81
89
  end
82
90
 
83
- def shift(host_info)
91
+ def shift(host_info=nil)
92
+ return super() unless host_info
93
+ tw_found = nil
84
94
  size.times do |i|
85
- if at(i).acceptable_for(host_info)
86
- return delete_at(i)
95
+ tw = at(i)
96
+ if tw.acceptable_for(host_info)
97
+ if tw.untried_host?(host_info)
98
+ return delete_at(i)
99
+ else
100
+ tw_found ||= tw
101
+ end
87
102
  end
88
103
  end
89
- nil
104
+ tw_found
90
105
  end
91
106
  end
92
107
 
@@ -157,13 +172,18 @@ module Pwrake
157
172
  end
158
173
 
159
174
  def pop_last_rank(r,host_info)
175
+ tw_found = nil
160
176
  (size-1).downto(0) do |i|
161
177
  tw = at(i)
162
178
  if tw.rank == r && tw.acceptable_for(host_info)
163
- return delete_at(i)
179
+ if tw.untried_host?(host_info)
180
+ return delete_at(i)
181
+ else
182
+ tw_found ||= tw
183
+ end
164
184
  end
165
185
  end
166
- nil
186
+ tw_found
167
187
  end
168
188
 
169
189
  def hrf_delete(t)
@@ -197,7 +217,7 @@ module Pwrake
197
217
  hrf_push(t)
198
218
  end
199
219
 
200
- def shift(host_info)
220
+ def shift(host_info=nil)
201
221
  return nil if empty?
202
222
  hrf_get(host_info)
203
223
  end
@@ -1,15 +1,15 @@
1
+ require "pwrake/queue/queue_array"
2
+ require "pwrake/queue/no_action_queue"
3
+
1
4
  module Pwrake
2
5
 
3
6
  class TaskQueue
4
7
 
5
- def initialize(host_map, group_map=nil)
6
- @q = []
7
- @empty = []
8
-
8
+ def initialize(hostinfo_by_id, group_map=nil)
9
9
  @enable_steal = true
10
10
  @q_no_action = NoActionQueue.new
11
11
 
12
- @host_map = host_map
12
+ @hostinfo_by_id = hostinfo_by_id
13
13
 
14
14
  pri = Rake.application.pwrake_options['QUEUE_PRIORITY'] || "LIHR"
15
15
  case pri
@@ -62,22 +62,22 @@ module Pwrake
62
62
  end
63
63
 
64
64
  def deq_noaction_task(&block)
65
- Log.debug "deq_task:"+(empty? ? " empty" : "\n#{inspect_q}")
65
+ Log.debug "deq_noaction_task:"+(empty? ? " (empty)" : "\n#{inspect_q}")
66
66
  while tw = @q_no_action.shift
67
67
  Log.debug "deq_noaction: #{tw.name}"
68
- yield(tw,nil)
68
+ yield(tw)
69
69
  end
70
70
  end
71
71
 
72
72
  def deq_task(&block) # locality version
73
- Log.debug "deq_task:"+(empty? ? " empty" : "\n#{inspect_q}")
73
+ Log.debug "deq_task:"+(empty? ? " (empty)" : "\n#{inspect_q}")
74
74
  queued = 0
75
75
  @n_turn.times do |turn|
76
76
  next if turn_empty?(turn)
77
77
  queued += deq_turn(turn,&block)
78
78
  end
79
79
  if queued>0
80
- Log.debug "queued:#{queued}"
80
+ Log.debug "deq_task: queued=#{queued}"
81
81
  end
82
82
  end
83
83
 
@@ -85,9 +85,9 @@ module Pwrake
85
85
  queued = 0
86
86
  while true
87
87
  count = 0
88
- @host_map.by_id.each do |host_info|
88
+ @hostinfo_by_id.each_value do |host_info|
89
89
  #Log.debug "TaskQueue#deq_turn host_info=#{host_info.name}"
90
- if host_info.idle_cores > 0
90
+ if (n = host_info.idle_cores) && n > 0
91
91
  if turn_empty?(turn)
92
92
  return queued
93
93
  elsif tw = deq_impl(host_info,turn)
@@ -99,8 +99,7 @@ module Pwrake
99
99
  Log.fatal m
100
100
  raise RuntimeError,m
101
101
  else
102
- host_info.decrease(n_task_cores)
103
- yield(tw,host_info.id)
102
+ yield(tw,host_info,n_task_cores)
104
103
  count += 1
105
104
  queued += 1
106
105
  end
@@ -134,11 +133,6 @@ module Pwrake
134
133
  @q_no_input.empty?
135
134
  end
136
135
 
137
- def task_end(tw, hid)
138
- host_info = @host_map.by_id[hid]
139
- host_info.increase(tw.n_used_cores(host_info))
140
- end
141
-
142
136
  def _qstr(h,q)
143
137
  s = " #{h}: size=#{q.size} "
144
138
  case q.size
@@ -160,5 +154,8 @@ module Pwrake
160
154
  _qstr("no_input",@q_no_input)
161
155
  end
162
156
 
157
+ def drop_host(host_info)
158
+ end
159
+
163
160
  end
164
161
  end
@@ -1,3 +1,5 @@
1
+ require "csv"
2
+
1
3
  module Pwrake
2
4
 
3
5
  class Report
@@ -9,6 +9,7 @@ module Pwrake
9
9
  attr_reader :subsequents
10
10
  attr_reader :arguments
11
11
  attr_reader :property
12
+ attr_reader :unfinished_prereq
12
13
 
13
14
  def pw_search_tasks(args)
14
15
  Log.debug "#{self.class}#pw_search_tasks start, args=#{args.inspect}"
@@ -81,10 +82,12 @@ module Pwrake
81
82
  private :format_search_flags
82
83
 
83
84
  def pw_enq_subsequents
84
- t = Time.now
85
+ #t = Time.now
85
86
  #h = application.pwrake_options['HALT_QUEUE_WHILE_SEARCH']
86
87
  #application.task_queue.synchronize(h) do
87
88
  @subsequents.each do |t| # <<--- competition !!!
89
+ #u = t.unfinished_prereq.keys
90
+ #Log.debug "enq_subseq: self=#{self.name} subseq=#{t.name} @unfin_preq=#{u.inspect}"
88
91
  if t && t.check_prereq_finished(self.name)
89
92
  application.task_queue.enq(t.wrapper)
90
93
  end
@@ -1,3 +1,5 @@
1
+ require "pwrake/task/task_property"
2
+
1
3
  module Pwrake
2
4
 
3
5
  module TaskManager
@@ -39,6 +39,7 @@ module Pwrake
39
39
  end
40
40
 
41
41
  def acceptable_for(host_info)
42
+ return true unless host_info
42
43
  if @disable_steal && host_info.steal_flag
43
44
  #Log.debug("@disable_steal && host_info.steal_flag")
44
45
  return false
@@ -1,4 +1,6 @@
1
- require 'forwardable'
1
+ require "forwardable"
2
+ require "csv"
3
+ require "pwrake/task/task_rank"
2
4
 
3
5
  module Pwrake
4
6
 
@@ -16,18 +18,19 @@ module Pwrake
16
18
  @@current_id += 1
17
19
  @location = []
18
20
  @group = []
19
- @group_id
21
+ @group_id = nil
20
22
  @suggest_location = nil
21
- @file_stat
22
- @input_file_size
23
- @input_file_mtime
24
- @rank
25
- @priority
23
+ @file_stat = nil
24
+ @input_file_size = nil
25
+ @input_file_mtime = nil
26
+ @rank = nil
27
+ @priority = nil
26
28
  @lock_rank = Monitor.new
27
29
  @executed = false
28
30
  @assigned = []
29
31
  @exec_host = nil
30
- @nretry = @property.retry || Rake.application.pwrake_options["RETRY"] || 0
32
+ @tried_hosts = []
33
+ @n_retry = @property.retry || Rake.application.pwrake_options["RETRY"] || 1
31
34
  end
32
35
 
33
36
  def_delegators :@task, :name, :actions, :prerequisites, :subsequents
@@ -36,6 +39,7 @@ module Pwrake
36
39
  attr_reader :task, :task_id, :group, :group_id, :file_stat
37
40
  attr_reader :location
38
41
  attr_reader :assigned
42
+ attr_reader :tried_hosts
39
43
  attr_accessor :executed
40
44
  attr_accessor :exec_host
41
45
  attr_accessor :shell_id, :status
@@ -66,24 +70,19 @@ module Pwrake
66
70
  @time_start = Time.now
67
71
  end
68
72
 
69
- def retry
70
- if @nretry > 0
71
- s="retry task: #{name}"
72
- Log.debug(s)
73
- $stderr.puts(s)
74
- @nretry -= 1
75
- Rake.application.task_queue.enq(self)
76
- true
77
- else
78
- false
79
- end
73
+ def retry?
74
+ @status != "end" && @n_retry > 0
75
+ end
76
+
77
+ def no_more_retry
78
+ @n_retry == 0
80
79
  end
81
80
 
82
81
  def postprocess(location)
83
82
  @executed = true if !@task.actions.empty?
84
- tm_taskend = Time.now
83
+ #tm_taskend = Time.now
85
84
  if is_file_task?
86
- t = Time.now
85
+ #t = Time.now
87
86
  if File.exist?(name)
88
87
  @file_stat = File::Stat.new(name)
89
88
  @location = location
@@ -92,8 +91,23 @@ module Pwrake
92
91
  #Log.debug "postprocess time=#{Time.now-tm_taskend}"
93
92
  log_task
94
93
  @shell.current_task = nil if @shell
94
+ end
95
+
96
+ def retry_or_subsequent
97
+ @tried_hosts << @exec_host
95
98
  if @status=="end"
96
99
  @task.pw_enq_subsequents
100
+ elsif @n_retry > 0
101
+ @suggest_location = []
102
+ s="retry task (retry_count=#{@n_retry}): #{name}"
103
+ Log.warn(s)
104
+ $stderr.puts(s)
105
+ @n_retry -= 1
106
+ Rake.application.task_queue.enq(self)
107
+ else
108
+ s="give up retry (retry_count=0): #{name}"
109
+ Log.error(s)
110
+ $stderr.puts(s)
97
111
  end
98
112
  end
99
113
 
@@ -306,5 +320,10 @@ module Pwrake
306
320
  @n_used_cores ||= @property.n_used_cores(host_info)
307
321
  end
308
322
 
323
+ def untried_host?(host_info)
324
+ return true unless host_info
325
+ !@tried_hosts.include?(host_info.name)
326
+ end
327
+
309
328
  end
310
329
  end