pwrake 2.2.9 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -2
- data/lib/pwrake/branch/communicator_set.rb +1 -1
- data/lib/pwrake/master/master.rb +16 -13
- data/lib/pwrake/option/host_map.rb +11 -25
- data/lib/pwrake/option/option.rb +6 -3
- data/lib/pwrake/option/option_default_filesystem.rb +1 -1
- data/lib/pwrake/option/option_gfarm2fs.rb +1 -1
- data/lib/pwrake/queue/locality_aware_queue.rb +77 -84
- data/lib/pwrake/queue/non_locality_queue.rb +72 -0
- data/lib/pwrake/queue/queue_array.rb +75 -312
- data/lib/pwrake/queue/task_queue.rb +54 -91
- data/lib/pwrake/report/parallelism.rb +4 -6
- data/lib/pwrake/task/task_algorithm.rb +1 -0
- data/lib/pwrake/task/task_property.rb +49 -21
- data/lib/pwrake/task/task_wrapper.rb +17 -4
- data/lib/pwrake/version.rb +1 -1
- data/lib/pwrake/worker/executor.rb +1 -0
- data/lib/pwrake/worker/gfarm_directory.rb +38 -6
- data/lib/pwrake/worker/shared_directory.rb +3 -0
- metadata +3 -2
@@ -1,12 +1,14 @@
|
|
1
1
|
require "pwrake/queue/queue_array"
|
2
2
|
require "pwrake/queue/no_action_queue"
|
3
|
+
require "pwrake/queue/non_locality_queue"
|
3
4
|
|
4
5
|
module Pwrake
|
5
6
|
|
6
7
|
class TaskQueue
|
7
8
|
|
8
|
-
def initialize(hostinfo_by_id, group_map=nil)
|
9
|
-
@
|
9
|
+
def initialize(queue_class, hostinfo_by_id, group_map=nil)
|
10
|
+
@queue_class = Pwrake.const_get(queue_class)
|
11
|
+
@hostinfo_by_id = hostinfo_by_id
|
10
12
|
@q_no_action = NoActionQueue.new
|
11
13
|
@q_reserved = Hash.new
|
12
14
|
def @q_reserved.first
|
@@ -16,137 +18,99 @@ module Pwrake
|
|
16
18
|
self[keys.last]
|
17
19
|
end
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
pri = Rake.application.pwrake_options['QUEUE_PRIORITY'] || "LIHR"
|
21
|
+
pri = Rake.application.pwrake_options['QUEUE_PRIORITY'] || "LIFO"
|
22
22
|
case pri
|
23
|
-
when
|
24
|
-
@array_class =
|
25
|
-
when
|
26
|
-
@array_class = FifoQueueArray # Array # Fifo
|
27
|
-
when /lifo/i
|
23
|
+
when /^fifo$/i
|
24
|
+
@array_class = FifoQueueArray
|
25
|
+
when /^lifo$/i
|
28
26
|
@array_class = LifoQueueArray
|
29
|
-
when
|
27
|
+
when /^lihr$/i
|
30
28
|
@array_class = LifoHrfQueueArray
|
31
|
-
when /prhr/i
|
32
|
-
@array_class = PriorityHrfQueueArray
|
33
|
-
when /rank/i
|
34
|
-
@array_class = RankQueueArray
|
35
29
|
else
|
36
30
|
raise RuntimeError,"unknown option for QUEUE_PRIORITY: "+pri
|
37
31
|
end
|
38
32
|
Log.debug "@array_class=#{@array_class.inspect}"
|
39
|
-
init_queue(group_map)
|
40
|
-
end
|
41
33
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
@
|
46
|
-
end
|
34
|
+
# median number of cores
|
35
|
+
a = @hostinfo_by_id.map{|id,host_info| host_info.ncore}.sort
|
36
|
+
n = a.size
|
37
|
+
@median_core = (n%2==0) ? (a[n/2-1]+a[n/2])/2 : a[(n-1)/2]
|
47
38
|
|
48
|
-
|
39
|
+
@q = @queue_class.new(hostinfo_by_id,@array_class,@median_core,group_map)
|
40
|
+
end
|
49
41
|
|
50
|
-
# enq
|
51
42
|
def enq(tw)
|
52
43
|
if tw.nil? || tw.actions.empty?
|
53
44
|
@q_no_action.push(tw)
|
54
45
|
else
|
55
|
-
|
46
|
+
@q.enq_impl(tw)
|
56
47
|
end
|
57
48
|
end
|
58
49
|
|
59
|
-
def
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
@
|
66
|
-
else
|
67
|
-
@q_no_input.push(tw)
|
50
|
+
def deq_task(&block)
|
51
|
+
Log.debug "deq_task from:"+(empty? ? " (empty)" : "\n#{inspect_q}")
|
52
|
+
deq_noaction_task(&block)
|
53
|
+
deq_reserve(&block)
|
54
|
+
@q.deq_start
|
55
|
+
unless @q.empty?
|
56
|
+
@q.turns.each{|turn| deq_turn(turn,&block) }
|
68
57
|
end
|
69
58
|
end
|
70
59
|
|
71
60
|
def deq_noaction_task(&block)
|
72
|
-
Log.debug "deq_noaction_task:"+(empty? ? " (empty)" : "\n#{inspect_q}")
|
73
61
|
while tw = @q_no_action.shift
|
74
62
|
Log.debug "deq_noaction: #{tw.name}"
|
75
63
|
yield(tw)
|
76
64
|
end
|
77
65
|
end
|
78
66
|
|
79
|
-
def
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
67
|
+
def deq_reserve(&block)
|
68
|
+
@q_reserved.each do |host_info,tw|
|
69
|
+
n_idle = host_info.idle_cores || 0
|
70
|
+
n_core = tw.use_cores(host_info)
|
71
|
+
if n_idle >= n_core
|
72
|
+
@q_reserved.delete(host_info)
|
73
|
+
Log.debug "deq_reserve: #{tw.name} n_use_cores=#{n_core}"
|
74
|
+
yield(tw,host_info,n_core)
|
75
|
+
end
|
85
76
|
end
|
86
77
|
end
|
87
78
|
|
88
79
|
def deq_turn(turn,&block)
|
89
|
-
|
90
|
-
while true
|
80
|
+
begin
|
91
81
|
count = 0
|
92
82
|
@hostinfo_by_id.each_value do |host_info|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
end
|
102
|
-
@q_reserved.delete(host_info)
|
103
|
-
Log.debug "deq_reserve: #{tw.name} n_use_cores=#{n_use}"
|
104
|
-
elsif tw = deq_impl(host_info,turn)
|
105
|
-
n_use = tw.n_used_cores(host_info)
|
106
|
-
if n_idle < n_use
|
107
|
-
@q_reserved[host_info] = tw
|
108
|
-
Log.debug "reserve host: #{host_info.name} for #{tw.name} (#{n_use} cores)"
|
109
|
-
next
|
110
|
-
end
|
111
|
-
Log.debug "deq: #{tw.name} n_use_cores=#{n_use}"
|
112
|
-
end
|
113
|
-
if tw
|
114
|
-
yield(tw,host_info,n_use)
|
83
|
+
return if @q.turn_empty?(turn)
|
84
|
+
n_idle = host_info.idle_cores || 0
|
85
|
+
next if n_idle == 0 || @q_reserved[host_info]
|
86
|
+
if tw = @q.deq_impl(host_info,turn)
|
87
|
+
n_core = tw.use_cores(host_info)
|
88
|
+
if n_idle >= n_core
|
89
|
+
Log.debug "deq: #{tw.name} n_use_cores=#{n_core}"
|
90
|
+
yield(tw,host_info,n_core)
|
115
91
|
count += 1
|
116
|
-
|
92
|
+
else
|
93
|
+
@q_reserved[host_info] = tw
|
94
|
+
Log.debug "reserve host: #{host_info.name} for #{tw.name} (#{n_core} cores)"
|
117
95
|
end
|
118
96
|
end
|
119
97
|
end
|
120
|
-
|
121
|
-
end
|
122
|
-
queued
|
123
|
-
end
|
124
|
-
|
125
|
-
def turn_empty?(turn)
|
126
|
-
empty?
|
127
|
-
end
|
128
|
-
|
129
|
-
def deq_impl(host_info=nil, turn=nil)
|
130
|
-
@q_no_action.shift ||
|
131
|
-
@q_input.shift(host_info) ||
|
132
|
-
@q_no_input.shift(host_info)
|
98
|
+
end while count > 0
|
133
99
|
end
|
134
100
|
|
135
101
|
def clear
|
136
102
|
@q_no_action.clear
|
137
103
|
@q_reserved.clear
|
138
|
-
@
|
139
|
-
@q_no_input.clear
|
104
|
+
@q.clear
|
140
105
|
end
|
141
106
|
|
142
107
|
def empty?
|
143
108
|
@q_no_action.empty? &&
|
144
|
-
|
145
|
-
|
146
|
-
@q_no_input.empty?
|
109
|
+
@q_reserved.empty? &&
|
110
|
+
@q.empty?
|
147
111
|
end
|
148
112
|
|
149
|
-
def _qstr(h,q)
|
113
|
+
def self._qstr(h,q)
|
150
114
|
s = " #{h}: size=#{q.size} "
|
151
115
|
case q.size
|
152
116
|
when 0
|
@@ -156,20 +120,19 @@ module Pwrake
|
|
156
120
|
when 2
|
157
121
|
s << "[#{q.first.name}, #{q.last.name}]\n"
|
158
122
|
else
|
159
|
-
s << "[#{q.first.name}
|
123
|
+
s << "[#{q.first.name}, .., #{q.last.name}]\n"
|
160
124
|
end
|
161
125
|
s
|
162
126
|
end
|
163
127
|
|
164
128
|
def inspect_q
|
165
|
-
_qstr("noaction",@q_no_action) +
|
166
|
-
|
167
|
-
_qstr("
|
168
|
-
_qstr("reserved",@q_reserved)
|
129
|
+
TaskQueue._qstr("noaction",@q_no_action) +
|
130
|
+
@q.inspect_q +
|
131
|
+
TaskQueue._qstr("reserved",@q_reserved)
|
169
132
|
end
|
170
133
|
|
171
134
|
def drop_host(host_info)
|
135
|
+
@q.drop_host(host_info)
|
172
136
|
end
|
173
|
-
|
174
137
|
end
|
175
138
|
end
|
@@ -61,13 +61,12 @@ module Pwrake
|
|
61
61
|
|
62
62
|
|
63
63
|
def plot_parallelism(csvtable, base, fmt)
|
64
|
+
fimg = base+'/parallelism.'+fmt
|
64
65
|
a = count_start_end_from_csv_table(csvtable)
|
65
|
-
return if a.size < 4
|
66
|
+
return fimg if a.size < 4
|
66
67
|
|
67
68
|
#density = exec_density(a)
|
68
69
|
|
69
|
-
fimg = base+'/parallelism.'+fmt
|
70
|
-
|
71
70
|
n = a.size
|
72
71
|
i = 0
|
73
72
|
y = 0
|
@@ -120,13 +119,12 @@ plot '-' w l notitle
|
|
120
119
|
|
121
120
|
|
122
121
|
def plot_parallelism2(csvtable, base, fmt)
|
122
|
+
fimg = base+'/parallelism.'+fmt
|
123
123
|
a = count_start_end_from_csv_table(csvtable)
|
124
|
-
return if a.size < 4
|
124
|
+
return fimg if a.size < 4
|
125
125
|
|
126
126
|
density = exec_density(a)
|
127
127
|
|
128
|
-
fimg = base+'/parallelism.'+fmt
|
129
|
-
|
130
128
|
n = a.size
|
131
129
|
i = 0
|
132
130
|
y = 0
|
@@ -20,6 +20,7 @@ module Pwrake
|
|
20
20
|
def pw_search_tasks(args)
|
21
21
|
Log.debug "#{self.class}#pw_search_tasks start, task=#{name} args=#{args.inspect}"
|
22
22
|
cl = Pwrake.clock
|
23
|
+
TaskWrapper.clear_rank
|
23
24
|
task_args = TaskArguments.new(arg_names, args)
|
24
25
|
# not synchronize owing to fiber
|
25
26
|
search_with_call_chain(nil, task_args, InvocationChain::EMPTY)
|
@@ -7,8 +7,19 @@ module Pwrake
|
|
7
7
|
attr_accessor :subflow
|
8
8
|
|
9
9
|
def parse_description(description)
|
10
|
-
if /\bn_?cores?[=:]\s*([
|
11
|
-
|
10
|
+
if /\bn_?cores?[=:]\s*(-?[\/\d]+)/i =~ description
|
11
|
+
case x = $1
|
12
|
+
when /^\/\d+$/
|
13
|
+
@ncore = ('1'+x).to_r
|
14
|
+
when /^\d+\/\d+$/
|
15
|
+
@ncore = x.to_r
|
16
|
+
when /^-?\d+$/
|
17
|
+
@ncore = x.to_i
|
18
|
+
else
|
19
|
+
m = "invalid task property: ncore=#{x.inspect}"
|
20
|
+
Log.fatal m
|
21
|
+
raise RuntimeError,m
|
22
|
+
end
|
12
23
|
end
|
13
24
|
if /\bretry[=:]\s*(\d+)/i =~ description
|
14
25
|
@retry = $1.to_i
|
@@ -18,9 +29,13 @@ module Pwrake
|
|
18
29
|
@exclusive = true
|
19
30
|
end
|
20
31
|
end
|
32
|
+
@reserve = Rake.application.pwrake_options["RESERVE_NODE"]
|
21
33
|
if /\breserve[=:]\s*(\S+)/i =~ description
|
22
|
-
|
34
|
+
case $1
|
35
|
+
when /^(y|t|on)/i
|
23
36
|
@reserve = true
|
37
|
+
when /^(n|f|off)/i
|
38
|
+
@reserve = false
|
24
39
|
end
|
25
40
|
end
|
26
41
|
if /\ballow[=:]\s*(\S+)/i =~ description
|
@@ -42,7 +57,6 @@ module Pwrake
|
|
42
57
|
@disable_steal = true
|
43
58
|
end
|
44
59
|
end
|
45
|
-
@use_cores = nil
|
46
60
|
end
|
47
61
|
|
48
62
|
def merge(prop)
|
@@ -55,11 +69,39 @@ module Pwrake
|
|
55
69
|
@retry = prop.retry if prop.retry
|
56
70
|
@disable_steal = prop.disable_steal if prop.disable_steal
|
57
71
|
@subflow = prop.subflow if prop.subflow
|
58
|
-
@use_cores = nil
|
59
72
|
end
|
60
73
|
|
61
|
-
def use_cores
|
62
|
-
|
74
|
+
def use_cores(arg)
|
75
|
+
case arg
|
76
|
+
when HostInfo
|
77
|
+
ppn = arg.ncore
|
78
|
+
when Integer
|
79
|
+
ppn = arg
|
80
|
+
if ppn < 1
|
81
|
+
raise "invalid ppn: #{ppn}"
|
82
|
+
end
|
83
|
+
else
|
84
|
+
raise "invalid ppn: #{ppn}"
|
85
|
+
end
|
86
|
+
|
87
|
+
if @exclusive
|
88
|
+
return ppn
|
89
|
+
end
|
90
|
+
|
91
|
+
case @ncore
|
92
|
+
when Rational
|
93
|
+
if @ncore > 0 && @ncore <= 1
|
94
|
+
return [(@ncore*ppn).to_i, 1].min
|
95
|
+
end
|
96
|
+
when 1-ppn..ppn
|
97
|
+
return (@ncore>0) ? @ncore : @ncore+ppn
|
98
|
+
when nil
|
99
|
+
return 1
|
100
|
+
end
|
101
|
+
|
102
|
+
m = "ncore=#{@ncore} is out of range of cores per node: #{ppn}"
|
103
|
+
Log.fatal m
|
104
|
+
raise RuntimeError,m
|
63
105
|
end
|
64
106
|
|
65
107
|
def accept_host(host_info)
|
@@ -87,19 +129,5 @@ module Pwrake
|
|
87
129
|
return true
|
88
130
|
end
|
89
131
|
|
90
|
-
def n_used_cores(host_info=nil)
|
91
|
-
n = use_cores
|
92
|
-
if n == 1
|
93
|
-
return 1
|
94
|
-
elsif host_info
|
95
|
-
return host_info.check_cores(n)
|
96
|
-
elsif n < 1
|
97
|
-
m = "invalid for use_cores=#{n}"
|
98
|
-
Log.fatal m
|
99
|
-
raise RuntimeError,m
|
100
|
-
end
|
101
|
-
return n
|
102
|
-
end
|
103
|
-
|
104
132
|
end
|
105
133
|
end
|
@@ -9,6 +9,7 @@ module Pwrake
|
|
9
9
|
|
10
10
|
@@current_id = 1
|
11
11
|
@@task_logger = nil
|
12
|
+
@@instances = []
|
12
13
|
|
13
14
|
def initialize(task,task_args=nil)
|
14
15
|
@task = task
|
@@ -31,14 +32,17 @@ module Pwrake
|
|
31
32
|
@exec_host_id = nil
|
32
33
|
@tried_hosts = {}
|
33
34
|
@n_retry = @property.retry || Rake.application.pwrake_options["RETRY"] || 1
|
35
|
+
@@instances << self
|
34
36
|
end
|
35
37
|
|
36
38
|
def_delegators :@task, :name, :actions, :prerequisites, :subsequents
|
39
|
+
def_delegators :@property, :use_cores
|
37
40
|
|
38
41
|
attr_reader :task, :task_id, :group, :group_id, :file_stat
|
39
42
|
attr_reader :location
|
40
43
|
attr_reader :assigned
|
41
44
|
attr_reader :tried_hosts
|
45
|
+
attr_reader :n_used_cores
|
42
46
|
attr_accessor :executed
|
43
47
|
attr_accessor :exec_host, :exec_host_id
|
44
48
|
attr_accessor :shell_id, :status
|
@@ -63,6 +67,11 @@ module Pwrake
|
|
63
67
|
@@task_logger.close if @@task_logger
|
64
68
|
end
|
65
69
|
|
70
|
+
def self.clear_rank
|
71
|
+
Log.debug "#{self}.clear_rank"
|
72
|
+
@@instances.each{|w| w.clear_rank}
|
73
|
+
end
|
74
|
+
|
66
75
|
def preprocess
|
67
76
|
@time_start = Time.now
|
68
77
|
@clock_start = Pwrake.clock
|
@@ -267,11 +276,15 @@ module Pwrake
|
|
267
276
|
end
|
268
277
|
@rank = max_rank + step
|
269
278
|
end
|
270
|
-
|
279
|
+
Log.debug "Task[#{name}] rank=#{@rank.inspect}"
|
271
280
|
end
|
272
281
|
@rank
|
273
282
|
end
|
274
283
|
|
284
|
+
def clear_rank
|
285
|
+
@rank = nil
|
286
|
+
end
|
287
|
+
|
275
288
|
def file_size
|
276
289
|
@file_stat ? @file_stat.size : 0
|
277
290
|
end
|
@@ -340,8 +353,8 @@ module Pwrake
|
|
340
353
|
@priority || 0
|
341
354
|
end
|
342
355
|
|
343
|
-
def
|
344
|
-
@n_used_cores
|
356
|
+
def set_used_cores(ncore)
|
357
|
+
@n_used_cores = ncore
|
345
358
|
end
|
346
359
|
|
347
360
|
def acceptable_for(host_info)
|
@@ -354,7 +367,7 @@ module Pwrake
|
|
354
367
|
if @property.reserve
|
355
368
|
return true
|
356
369
|
end
|
357
|
-
host_info.accept_core(@property.use_cores)
|
370
|
+
host_info.accept_core(@property.use_cores(host_info.ncore))
|
358
371
|
end
|
359
372
|
|
360
373
|
def tried_host?(host_info)
|
data/lib/pwrake/version.rb
CHANGED