rbbt-util 5.31.14 → 5.31.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc/batch.rb +35 -3
- data/lib/rbbt/hpc/orchestrate.rb +8 -1
- data/lib/rbbt/hpc/slurm.rb +6 -3
- data/lib/rbbt/workflow/util/trace.rb +126 -90
- data/share/rbbt_commands/hpc/clean +1 -27
- data/share/rbbt_commands/hpc/list +4 -28
- data/share/rbbt_commands/hpc/orchestrate +1 -27
- data/share/rbbt_commands/hpc/tail +1 -27
- data/share/rbbt_commands/hpc/task +1 -27
- data/share/rbbt_commands/workflow/trace +9 -195
- data/share/rbbt_commands/workflow/write_info +17 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9616d2b1893ed3444d432e99b4ef6ddaf7d61836ee1c23dbd4deb1efa767b80d
|
4
|
+
data.tar.gz: c5dc433a171c909eb2711ee37832fb094d271fcf6099f0394706af3cd8da1520
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: efe3b9c85ec947f900283ca4b2a3e651617558bfd7d85dbea80180ad377e18150df8fccde9efecdd2493d9e4f0ed7979dbdefe2f815e2480d910d5e507670422
|
7
|
+
data.tar.gz: 24bc0e0e6dda34cbf7ad4ea08cb1cbeb18b3bb55b42a79a4590183fd86e7247d7aeb648c31673ef5601f632828d71ae29594e3c6b8f5c7c860dab6592a373f68
|
data/lib/rbbt/hpc/batch.rb
CHANGED
@@ -6,6 +6,36 @@ module HPC
|
|
6
6
|
end
|
7
7
|
end
|
8
8
|
|
9
|
+
def self.batch_system(batch_system = 'auto')
|
10
|
+
case batch_system.to_s.downcase
|
11
|
+
when 'slurm'
|
12
|
+
HPC::SLURM
|
13
|
+
when 'lsf'
|
14
|
+
HPC::LSF
|
15
|
+
when 'auto'
|
16
|
+
case $previous_commands.last
|
17
|
+
when 'slurm'
|
18
|
+
HPC::SLURM
|
19
|
+
when 'lsf'
|
20
|
+
HPC::LSF
|
21
|
+
else
|
22
|
+
case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
|
23
|
+
when 'slurm'
|
24
|
+
HPC::SLURM
|
25
|
+
when 'lsf'
|
26
|
+
HPC::LSF
|
27
|
+
else
|
28
|
+
case ENV["BATCH_SYSTEM"].to_s.downcase
|
29
|
+
when 'slurm'
|
30
|
+
HPC::SLURM
|
31
|
+
when 'lsf'
|
32
|
+
HPC::LSF
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
9
39
|
module TemplateGeneration
|
10
40
|
def exec_cmd(job, options = {})
|
11
41
|
env_cmd = Misc.process_options options, :env_cmd
|
@@ -30,12 +60,12 @@ module HPC
|
|
30
60
|
-B "/.singularity_ruby_inline":"#{contain}/.singularity_ruby_inline":rw
|
31
61
|
-B "#{options[:batch_dir]}" \
|
32
62
|
-B /scratch/tmp \
|
33
|
-
#{ group != user_group ? "-B /gpfs/projects/#{user_group}" : "" } \
|
63
|
+
#{ group != user_group ? "-B /gpfs/projects/#{user_group}" : "" } \
|
34
64
|
-B #{scratch_group_dir} \
|
35
65
|
-B #{projects_group_dir} \
|
36
66
|
-B /apps/ \
|
37
67
|
-B ~/git:"#{contain}/git":ro \
|
38
|
-
#{Open.exists?('~/.rbbt/software/opt/')? '-B ~/.rbbt/software/opt/:"/opt/":ro' : '' } \
|
68
|
+
#{Open.exists?('~/.rbbt/software/opt/')? '-B ~/.rbbt/software/opt/:"/opt/":ro' : '' } \
|
39
69
|
-B ~/.rbbt:"#{contain}/home/":ro)
|
40
70
|
end
|
41
71
|
|
@@ -74,7 +104,7 @@ module HPC
|
|
74
104
|
[name, dep.path] * "="
|
75
105
|
end * ","
|
76
106
|
|
77
|
-
options[:override_deps] = override_deps
|
107
|
+
options[:override_deps] = override_deps unless override_deps.empty?
|
78
108
|
end
|
79
109
|
|
80
110
|
# Save inputs into inputs_dir
|
@@ -216,6 +246,7 @@ EOF
|
|
216
246
|
:fexit => File.join(batch_dir, 'exit.status'),
|
217
247
|
:fsync => File.join(batch_dir, 'sync.log'),
|
218
248
|
:fsexit => File.join(batch_dir, 'sync.status'),
|
249
|
+
:fenv => File.join(batch_dir, 'env.vars'),
|
219
250
|
:fcmd => File.join(batch_dir, 'command.batch')
|
220
251
|
|
221
252
|
batch_options
|
@@ -441,6 +472,7 @@ exit $exit_status
|
|
441
472
|
|
442
473
|
# #{Log.color :green, "1. Prepare environment"}
|
443
474
|
#{prepare_environment}
|
475
|
+
env > #{batch_options[:fenv]}
|
444
476
|
|
445
477
|
# #{Log.color :green, "2. Execute"}
|
446
478
|
#{execute}
|
data/lib/rbbt/hpc/orchestrate.rb
CHANGED
@@ -145,6 +145,8 @@ module HPC
|
|
145
145
|
job_rules.delete :workflow
|
146
146
|
|
147
147
|
|
148
|
+
option_config_keys = options[:config_keys]
|
149
|
+
|
148
150
|
job_options = IndiferentHash.setup(options.merge(job_rules).merge(:batch_dependencies => dep_ids))
|
149
151
|
job_options.delete :orchestration_rules
|
150
152
|
|
@@ -154,6 +156,11 @@ module HPC
|
|
154
156
|
job_options[:config_keys] = job_options[:config_keys] ? config_keys + "," + job_options[:config_keys] : config_keys
|
155
157
|
end
|
156
158
|
|
159
|
+
if option_config_keys
|
160
|
+
option_config_keys = option_config_keys.gsub(/,\s+/,',')
|
161
|
+
job_options[:config_keys] = job_options[:config_keys] ? job_options[:config_keys] + "," + option_config_keys : option_config_keys
|
162
|
+
end
|
163
|
+
|
157
164
|
if options[:piggyback]
|
158
165
|
manifest = options[:piggyback].uniq
|
159
166
|
manifest += [job]
|
@@ -165,7 +172,7 @@ module HPC
|
|
165
172
|
new_config_keys = self.job_rules(rules, job)[:config_keys]
|
166
173
|
if new_config_keys
|
167
174
|
new_config_keys = new_config_keys.gsub(/,\s+/,',')
|
168
|
-
job_options[:config_keys] = job_options[:config_keys] ?
|
175
|
+
job_options[:config_keys] = job_options[:config_keys] ? job_options[:config_keys] + "," + new_config_keys : new_config_keys
|
169
176
|
end
|
170
177
|
|
171
178
|
job_options.delete :piggyback
|
data/lib/rbbt/hpc/slurm.rb
CHANGED
@@ -8,9 +8,12 @@ module HPC
|
|
8
8
|
|
9
9
|
def self.batch_system_variables
|
10
10
|
<<-EOF
|
11
|
-
let
|
12
|
-
|
13
|
-
|
11
|
+
let MAX_MEMORY_DEFAULT="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / ( 1024 * $(nproc) / $SLURM_CPUS_PER_TASK )"
|
12
|
+
[ ! -z $SLURM_MEM_PER_CPU ] && let MAX_MEMORY="$SLURM_MEM_PER_CPU * $SLURM_CPUS_PER_TASK" || MAX_MEMORY="$MAX_MEMORY_DEFAULT"
|
13
|
+
export MAX_MEMORY_DEFAULT
|
14
|
+
export MAX_MEMORY
|
15
|
+
export BATCH_JOB_ID=$SLURM_JOB_ID
|
16
|
+
export BATCH_SYSTEM=SLURM
|
14
17
|
EOF
|
15
18
|
end
|
16
19
|
|
@@ -1,12 +1,8 @@
|
|
1
1
|
require 'rbbt/util/R'
|
2
2
|
|
3
3
|
module Workflow
|
4
|
-
def self.
|
5
|
-
|
6
|
-
jobs = []
|
7
|
-
seed_jobs.each{|j| jobs << j; jobs += j.rec_dependencies}
|
8
|
-
|
9
|
-
data = TSV.setup({}, "Job~Workflow,Task,Start,End#:type=:list")
|
4
|
+
def self.trace_job_times(jobs, fix_gap = false)
|
5
|
+
data = TSV.setup({}, "Job~Code,Workflow,Task,Start,End#:type=:list")
|
10
6
|
min_start = nil
|
11
7
|
max_done = nil
|
12
8
|
jobs.each do |job|
|
@@ -14,10 +10,10 @@ module Workflow
|
|
14
10
|
started = job.info[:started]
|
15
11
|
ddone = job.info[:done]
|
16
12
|
|
17
|
-
code = [job.workflow, job.task_name].compact.collect{|s| s.to_s} * "
|
18
|
-
code =
|
13
|
+
code = [job.workflow, job.task_name].compact.collect{|s| s.to_s} * " · "
|
14
|
+
code = job.name + " - " + code
|
19
15
|
|
20
|
-
data[
|
16
|
+
data[job.path] = [code,job.workflow.to_s, job.task_name, started, ddone]
|
21
17
|
if min_start.nil?
|
22
18
|
min_start = started
|
23
19
|
else
|
@@ -39,7 +35,7 @@ module Workflow
|
|
39
35
|
value["End"] - min_start
|
40
36
|
end
|
41
37
|
|
42
|
-
if
|
38
|
+
if fix_gap
|
43
39
|
ranges = []
|
44
40
|
data.through do |k,values|
|
45
41
|
start, eend = values.values_at "Start.second", "End.second"
|
@@ -67,115 +63,155 @@ module Workflow
|
|
67
63
|
gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
|
68
64
|
value - gap
|
69
65
|
end
|
66
|
+
|
67
|
+
total_gaps = Misc.sum(gaps.collect{|k,v| v})
|
68
|
+
Log.info "Total gaps: #{total_gaps} seconds"
|
70
69
|
end
|
71
70
|
|
71
|
+
start = data.column("Start.second").values.flatten.collect{|v| v.to_f}.min
|
72
|
+
eend = data.column("End.second").values.flatten.collect{|v| v.to_f}.max
|
73
|
+
total = eend - start
|
74
|
+
Log.info "Total time elapsed: #{total} seconds"
|
75
|
+
|
76
|
+
data
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.plot_trace_job_times(data, plot, width=800, height=800)
|
80
|
+
data.R <<-EOF, [:svg]
|
81
|
+
rbbt.require('tidyverse')
|
82
|
+
rbbt.require('ggplot2')
|
83
|
+
|
84
|
+
names(data) <- make.names(names(data))
|
85
|
+
data$id = data$Code
|
86
|
+
data$content = data$Task
|
87
|
+
data$start = data$Start
|
88
|
+
data$end = data$End
|
89
|
+
data$Project = data$Workflow
|
90
|
+
|
91
|
+
tasks = data
|
92
|
+
|
93
|
+
#theme_gantt <- function(base_size=11, base_family="Source Sans Pro Light") {
|
94
|
+
theme_gantt <- function(base_size=11, base_family="Sans Serif") {
|
95
|
+
ret <- theme_bw(base_size, base_family) %+replace%
|
96
|
+
theme(panel.background = element_rect(fill="#ffffff", colour=NA),
|
97
|
+
axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
|
98
|
+
title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
|
99
|
+
panel.border = element_blank(), axis.line=element_blank(),
|
100
|
+
panel.grid.minor=element_blank(),
|
101
|
+
panel.grid.major.y = element_blank(),
|
102
|
+
panel.grid.major.x = element_line(size=0.5, colour="grey80"),
|
103
|
+
axis.ticks=element_blank(),
|
104
|
+
legend.position="bottom",
|
105
|
+
axis.title=element_text(size=rel(1.2), family="Source Sans Pro Semibold"),
|
106
|
+
strip.text=element_text(size=rel(1.5), family="Source Sans Pro Semibold"),
|
107
|
+
strip.background=element_rect(fill="#ffffff", colour=NA),
|
108
|
+
panel.spacing.y=unit(1.5, "lines"),
|
109
|
+
legend.key = element_blank())
|
110
|
+
|
111
|
+
ret
|
112
|
+
}
|
113
|
+
|
114
|
+
tasks.long <- tasks %>%
|
115
|
+
gather(date.type, task.date, -c(Code,Project, Task, id, Start.second, End.second)) %>%
|
116
|
+
arrange(date.type, task.date) %>%
|
117
|
+
mutate(id = factor(id, levels=rev(unique(id)), ordered=TRUE))
|
118
|
+
|
119
|
+
x.breaks <- seq(length(tasks$Task) + 0.5 - 3, 0, by=-3)
|
120
|
+
|
121
|
+
timeline <- ggplot(tasks.long, aes(y=id, yend=id, x=Start.second, xend=End.second, colour=Task)) +
|
122
|
+
geom_segment() +
|
123
|
+
geom_vline(xintercept=x.breaks, colour="grey80", linetype="dotted") +
|
124
|
+
guides(colour=guide_legend(title=NULL)) +
|
125
|
+
labs(x=NULL, y=NULL) +
|
126
|
+
theme_gantt() + theme(axis.text.x=element_text(angle=45, hjust=1))
|
127
|
+
|
128
|
+
rbbt.png_plot('#{plot}', 'plot(timeline)', width=#{width}, height=#{height}, pointsize=6)
|
129
|
+
EOF
|
130
|
+
end
|
131
|
+
|
132
|
+
def self.trace_job_summary(jobs, report_keys = [])
|
72
133
|
tasks_info = {}
|
73
134
|
|
135
|
+
report_keys = report_keys.collect{|k| k.to_s}
|
136
|
+
|
74
137
|
jobs.each do |dep|
|
75
138
|
next unless dep.info[:done]
|
76
139
|
task = [dep.workflow, dep.task_name].compact.collect{|s| s.to_s} * "#"
|
77
|
-
info = tasks_info[task] ||= {}
|
140
|
+
info = tasks_info[task] ||= IndiferentHash.setup({})
|
141
|
+
dep_info = IndiferentHash.setup(dep.info)
|
78
142
|
|
79
|
-
time =
|
143
|
+
time = dep_info[:done] - dep_info[:started]
|
80
144
|
info[:time] ||= []
|
81
145
|
info[:time] << time
|
82
146
|
|
83
|
-
|
84
|
-
|
85
|
-
|
147
|
+
report_keys.each do |key|
|
148
|
+
info[key] = dep_info[key]
|
149
|
+
end
|
150
|
+
|
86
151
|
dep.info[:config_keys].select do |kinfo|
|
87
152
|
key, value, tokens = kinfo
|
88
|
-
key = key.to_s
|
89
|
-
cpus = value if key.include? 'cpu'
|
90
|
-
spark = value if key == 'spark'
|
91
|
-
shard = value if key == 'shard'
|
92
|
-
end
|
93
153
|
|
94
|
-
|
95
|
-
|
96
|
-
info[:shard] = shard
|
154
|
+
info[key.to_s] = value if report_keys.include? key.to_s
|
155
|
+
end
|
97
156
|
end
|
98
157
|
|
99
|
-
|
158
|
+
summary = TSV.setup({}, "Task~Calls,Avg. Time,Total Time#:type=:list")
|
100
159
|
|
101
160
|
tasks_info.each do |task, info|
|
102
|
-
time_lists
|
103
|
-
avg_time = Misc.mean(time_lists)
|
104
|
-
total_time = Misc.sum(time_lists)
|
161
|
+
time_lists = info[:time]
|
162
|
+
avg_time = Misc.mean(time_lists).to_i
|
163
|
+
total_time = Misc.sum(time_lists).to_i
|
105
164
|
calls = time_lists.length
|
106
|
-
|
165
|
+
summary[task] = [calls, avg_time, total_time]
|
107
166
|
end
|
108
167
|
|
109
|
-
|
168
|
+
report_keys.each do |key|
|
169
|
+
summary.add_field Misc.humanize(key) do |task|
|
170
|
+
tasks_info[task][key]
|
171
|
+
end
|
172
|
+
end if Array === report_keys && report_keys.any?
|
110
173
|
|
111
|
-
|
112
|
-
|
113
|
-
total = eend - start
|
114
|
-
Log.info "Total time elapsed: #{total} seconds"
|
174
|
+
summary
|
175
|
+
end
|
115
176
|
|
116
|
-
|
117
|
-
|
118
|
-
|
177
|
+
def self.trace(seed_jobs, options = {})
|
178
|
+
jobs = []
|
179
|
+
seed_jobs.each do |step|
|
180
|
+
jobs += step.rec_dependencies + [step]
|
181
|
+
|
182
|
+
step.info[:archived_info].each do |path,ainfo|
|
183
|
+
archived_step = Step.new path
|
184
|
+
class << archived_step
|
185
|
+
self
|
186
|
+
end.define_method :info do
|
187
|
+
ainfo
|
188
|
+
end
|
189
|
+
jobs << archived_step
|
190
|
+
end if step.info[:archived_info]
|
119
191
|
end
|
120
192
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
data
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
ret <- theme_bw(base_size, base_family) %+replace%
|
139
|
-
theme(panel.background = element_rect(fill="#ffffff", colour=NA),
|
140
|
-
axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
|
141
|
-
title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
|
142
|
-
panel.border = element_blank(), axis.line=element_blank(),
|
143
|
-
panel.grid.minor=element_blank(),
|
144
|
-
panel.grid.major.y = element_blank(),
|
145
|
-
panel.grid.major.x = element_line(size=0.5, colour="grey80"),
|
146
|
-
axis.ticks=element_blank(),
|
147
|
-
legend.position="bottom",
|
148
|
-
axis.title=element_text(size=rel(1.2), family="Source Sans Pro Semibold"),
|
149
|
-
strip.text=element_text(size=rel(1.5), family="Source Sans Pro Semibold"),
|
150
|
-
strip.background=element_rect(fill="#ffffff", colour=NA),
|
151
|
-
panel.spacing.y=unit(1.5, "lines"),
|
152
|
-
legend.key = element_blank())
|
153
|
-
|
154
|
-
ret
|
155
|
-
}
|
156
|
-
|
157
|
-
tasks.long <- tasks %>%
|
158
|
-
gather(date.type, task.date, -c(Project, Task, id, Start.second, End.second)) %>%
|
159
|
-
arrange(date.type, task.date) %>%
|
160
|
-
mutate(id = factor(id, levels=rev(unique(id)), ordered=TRUE))
|
161
|
-
|
162
|
-
x.breaks <- seq(length(tasks$Task) + 0.5 - 3, 0, by=-3)
|
163
|
-
|
164
|
-
timeline <- ggplot(tasks.long, aes(y=id, yend=id, x=Start.second, xend=End.second, colour=Task)) +
|
165
|
-
geom_segment() +
|
166
|
-
geom_vline(xintercept=x.breaks, colour="grey80", linetype="dotted") +
|
167
|
-
guides(colour=guide_legend(title=NULL)) +
|
168
|
-
labs(x=NULL, y=NULL) +
|
169
|
-
theme_gantt() + theme(axis.text.x=element_text(angle=45, hjust=1))
|
170
|
-
|
171
|
-
rbbt.png_plot('#{plot}', 'plot(timeline)', width=#{width}, height=#{height}, pointsize=6)
|
172
|
-
EOF
|
173
|
-
end
|
193
|
+
jobs = jobs.uniq.sort_by{|job| t = job.info[:done]; t || Open.mtime(job.path) || 0 }
|
194
|
+
|
195
|
+
data = trace_job_times(jobs, options[:fix_gap])
|
196
|
+
|
197
|
+
report_keys = options[:report_keys] || ""
|
198
|
+
report_keys = report_keys.split(/,\s*/) if String === report_keys
|
199
|
+
summary = trace_job_summary(jobs, report_keys)
|
200
|
+
|
201
|
+
raise "No jobs to process" if data.size == 0
|
202
|
+
|
203
|
+
plot, size, width, height = options.values_at :plot, :width, :height
|
204
|
+
|
205
|
+
size = 800 if size.nil?
|
206
|
+
width = size * 2 if width.nil?
|
207
|
+
height = size if height.nil?
|
208
|
+
|
209
|
+
plot_trace_job_times(data, plot, width, height) if plot
|
174
210
|
|
175
211
|
if options[:plot_data]
|
176
212
|
data
|
177
213
|
else
|
178
|
-
|
214
|
+
summary
|
179
215
|
end
|
180
216
|
|
181
217
|
end
|
@@ -36,33 +36,7 @@ end
|
|
36
36
|
batch_system = options.delete :batch_system
|
37
37
|
batch_system ||= 'auto'
|
38
38
|
|
39
|
-
HPC::BATCH_MODULE =
|
40
|
-
when 'slurm'
|
41
|
-
HPC::SLURM
|
42
|
-
when 'lsf'
|
43
|
-
HPC::LSF
|
44
|
-
when 'auto'
|
45
|
-
case $previous_commands.last
|
46
|
-
when 'slurm'
|
47
|
-
HPC::SLURM
|
48
|
-
when 'lsf'
|
49
|
-
HPC::LSF
|
50
|
-
else
|
51
|
-
case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
|
52
|
-
when 'slurm'
|
53
|
-
HPC::SLURM
|
54
|
-
when 'lsf'
|
55
|
-
HPC::LSF
|
56
|
-
else
|
57
|
-
case ENV["BATCH_SYSTEM"].to_s.downcase
|
58
|
-
when 'slurm'
|
59
|
-
HPC::SLURM
|
60
|
-
when 'lsf'
|
61
|
-
HPC::LSF
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
39
|
+
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
66
40
|
|
67
41
|
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
68
42
|
|
@@ -40,33 +40,7 @@ end
|
|
40
40
|
batch_system = options.delete :batch_system
|
41
41
|
batch_system ||= 'auto'
|
42
42
|
|
43
|
-
HPC::BATCH_MODULE =
|
44
|
-
when 'slurm'
|
45
|
-
HPC::SLURM
|
46
|
-
when 'lsf'
|
47
|
-
HPC::LSF
|
48
|
-
when 'auto'
|
49
|
-
case $previous_commands.last
|
50
|
-
when 'slurm'
|
51
|
-
HPC::SLURM
|
52
|
-
when 'lsf'
|
53
|
-
HPC::LSF
|
54
|
-
else
|
55
|
-
case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
|
56
|
-
when 'slurm'
|
57
|
-
HPC::SLURM
|
58
|
-
when 'lsf'
|
59
|
-
HPC::LSF
|
60
|
-
else
|
61
|
-
case ENV["BATCH_SYSTEM"].to_s.downcase
|
62
|
-
when 'slurm'
|
63
|
-
HPC::SLURM
|
64
|
-
when 'lsf'
|
65
|
-
HPC::LSF
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
43
|
+
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
70
44
|
|
71
45
|
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
72
46
|
|
@@ -108,7 +82,7 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
108
82
|
cmd = nil
|
109
83
|
end
|
110
84
|
|
111
|
-
if m = command_txt.match(/^BATCH_SYSTEM=(.*)/)
|
85
|
+
if m = command_txt.match(/^export BATCH_SYSTEM=(.*)/)
|
112
86
|
job_batch_system = m[1].downcase
|
113
87
|
else
|
114
88
|
job_batch_system = nil
|
@@ -235,6 +209,8 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
|
|
235
209
|
text = CMD.cmd('grep "^#SBATCH" |tail -n +5', :in => Open.read(fcmd)).read.strip
|
236
210
|
when 'lsf'
|
237
211
|
text = CMD.cmd('grep "^#BSUB" |tail -n +5', :in => Open.read(fcmd)).read.strip
|
212
|
+
else
|
213
|
+
text = ""
|
238
214
|
end
|
239
215
|
lines = text.split("\n").collect{|line| header, _sep, value = line.partition(/\s+/); Log.color(:yellow, header + ": ") + value}
|
240
216
|
puts Log.color :yellow, lines * "\n"
|
@@ -34,33 +34,7 @@ EOF
|
|
34
34
|
batch_system = $slurm_options.delete :batch_system
|
35
35
|
batch_system ||= 'auto'
|
36
36
|
|
37
|
-
HPC::BATCH_MODULE =
|
38
|
-
when 'slurm'
|
39
|
-
HPC::SLURM
|
40
|
-
when 'lsf'
|
41
|
-
HPC::LSF
|
42
|
-
when 'auto'
|
43
|
-
case $previous_commands.last
|
44
|
-
when 'slurm'
|
45
|
-
HPC::SLURM
|
46
|
-
when 'lsf'
|
47
|
-
HPC::LSF
|
48
|
-
else
|
49
|
-
case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
|
50
|
-
when 'slurm'
|
51
|
-
HPC::SLURM
|
52
|
-
when 'lsf'
|
53
|
-
HPC::LSF
|
54
|
-
else
|
55
|
-
case ENV["BATCH_SYSTEM"].to_s.downcase
|
56
|
-
when 'slurm'
|
57
|
-
HPC::SLURM
|
58
|
-
when 'lsf'
|
59
|
-
HPC::LSF
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
37
|
+
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
64
38
|
|
65
39
|
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
66
40
|
|
@@ -40,33 +40,7 @@ end
|
|
40
40
|
batch_system = options.delete :batch_system
|
41
41
|
batch_system ||= 'auto'
|
42
42
|
|
43
|
-
HPC::BATCH_MODULE =
|
44
|
-
when 'slurm'
|
45
|
-
HPC::SLURM
|
46
|
-
when 'lsf'
|
47
|
-
HPC::LSF
|
48
|
-
when 'auto'
|
49
|
-
case $previous_commands.last
|
50
|
-
when 'slurm'
|
51
|
-
HPC::SLURM
|
52
|
-
when 'lsf'
|
53
|
-
HPC::LSF
|
54
|
-
else
|
55
|
-
case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
|
56
|
-
when 'slurm'
|
57
|
-
HPC::SLURM
|
58
|
-
when 'lsf'
|
59
|
-
HPC::LSF
|
60
|
-
else
|
61
|
-
case ENV["BATCH_SYSTEM"].to_s.downcase
|
62
|
-
when 'slurm'
|
63
|
-
HPC::SLURM
|
64
|
-
when 'lsf'
|
65
|
-
HPC::LSF
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
43
|
+
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
70
44
|
|
71
45
|
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
72
46
|
|
@@ -33,33 +33,7 @@ EOF
|
|
33
33
|
batch_system = $slurm_options.delete :batch_system
|
34
34
|
batch_system ||= 'auto'
|
35
35
|
|
36
|
-
HPC::BATCH_MODULE =
|
37
|
-
when 'slurm'
|
38
|
-
HPC::SLURM
|
39
|
-
when 'lsf'
|
40
|
-
HPC::LSF
|
41
|
-
when 'auto'
|
42
|
-
case $previous_commands.last
|
43
|
-
when 'slurm'
|
44
|
-
HPC::SLURM
|
45
|
-
when 'lsf'
|
46
|
-
HPC::LSF
|
47
|
-
else
|
48
|
-
case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
|
49
|
-
when 'slurm'
|
50
|
-
HPC::SLURM
|
51
|
-
when 'lsf'
|
52
|
-
HPC::LSF
|
53
|
-
else
|
54
|
-
case ENV["BATCH_SYSTEM"].to_s.downcase
|
55
|
-
when 'slurm'
|
56
|
-
HPC::SLURM
|
57
|
-
when 'lsf'
|
58
|
-
HPC::LSF
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
36
|
+
HPC::BATCH_MODULE = HPC.batch_system batch_system
|
63
37
|
|
64
38
|
raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
|
65
39
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'rbbt/workflow'
|
4
|
+
require 'rbbt/workflow/util/trace'
|
4
5
|
|
5
6
|
require 'rbbt-util'
|
6
7
|
require 'fileutils'
|
@@ -16,30 +17,26 @@ require 'rbbt/util/R'
|
|
16
17
|
$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
|
17
18
|
|
18
19
|
options = SOPT.setup <<EOF
|
19
|
-
Examine the
|
20
|
+
Examine the execution trace of a job or set of jobs
|
20
21
|
|
21
22
|
$ rbbt workflow trace <job-result>
|
22
23
|
|
23
24
|
-h--help Help
|
25
|
+
-fg--fix_gap Remove execution gaps
|
26
|
+
-rk--report_keys* Config keys and info fields to report
|
27
|
+
-p--plot* Plot file
|
24
28
|
-w--width* Image Width
|
25
29
|
-h--height* Image Height
|
26
|
-
-p--plot* Plot file
|
27
30
|
-s--size* Image Size (Height and Width)
|
28
|
-
-fg--fix_gap Remove execution gaps
|
29
31
|
-pd--plot_data Print plot data
|
30
32
|
EOF
|
31
33
|
|
32
34
|
SOPT.usage if options[:help]
|
33
35
|
|
36
|
+
|
34
37
|
files = ARGV
|
35
38
|
plot = options[:plot]
|
36
39
|
|
37
|
-
width, height, size = options.values_at :width, :height, :size
|
38
|
-
|
39
|
-
size = 800 if size.nil?
|
40
|
-
width = size if width.nil?
|
41
|
-
height = size if height.nil?
|
42
|
-
|
43
40
|
def get_step(file)
|
44
41
|
file = File.expand_path(file)
|
45
42
|
file = file.sub(/\.(info|files)/,'')
|
@@ -47,191 +44,8 @@ def get_step(file)
|
|
47
44
|
end
|
48
45
|
|
49
46
|
jobs = []
|
50
|
-
files.
|
51
|
-
|
52
|
-
|
53
|
-
jobs += step.rec_dependencies + [step]
|
54
|
-
|
55
|
-
step.info[:archived_info].each do |path,ainfo|
|
56
|
-
archived_step = Step.new path
|
57
|
-
class << archived_step
|
58
|
-
self
|
59
|
-
end.define_method :info do
|
60
|
-
ainfo
|
61
|
-
end
|
62
|
-
jobs << archived_step
|
63
|
-
end if step.info[:archived_info]
|
64
|
-
end
|
65
|
-
|
66
|
-
jobs = jobs.select{|job| job.info[:done]}.sort_by{|job| job.info[:started]}
|
67
|
-
|
68
|
-
data = TSV.setup({}, "Job~Workflow,Task,Start,End#:type=:list")
|
69
|
-
min_start = nil
|
70
|
-
max_done = nil
|
71
|
-
jobs.each do |job|
|
72
|
-
next unless job.info[:done]
|
73
|
-
started = job.info[:started]
|
74
|
-
ddone = job.info[:done]
|
75
|
-
|
76
|
-
code = [job.workflow, job.task_name].compact.collect{|s| s.to_s} * "."
|
77
|
-
code = code + '.' + job.name
|
78
|
-
|
79
|
-
data[code] = [job.workflow.to_s, job.task_name, started, ddone]
|
80
|
-
if min_start.nil?
|
81
|
-
min_start = started
|
82
|
-
else
|
83
|
-
min_start = started if started < min_start
|
84
|
-
end
|
85
|
-
|
86
|
-
if max_done.nil?
|
87
|
-
max_done = ddone
|
88
|
-
else
|
89
|
-
max_done = ddone if ddone > max_done
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
data.add_field "Start.second" do |k,value|
|
94
|
-
value["Start"] - min_start
|
95
|
-
end
|
96
|
-
|
97
|
-
data.add_field "End.second" do |k,value|
|
98
|
-
value["End"] - min_start
|
99
|
-
end
|
100
|
-
|
101
|
-
if options[:fix_gap]
|
102
|
-
ranges = []
|
103
|
-
data.through do |k,values|
|
104
|
-
start, eend = values.values_at "Start.second", "End.second"
|
105
|
-
|
106
|
-
ranges << (start..eend)
|
107
|
-
end
|
108
|
-
|
109
|
-
gaps = {}
|
110
|
-
last = nil
|
111
|
-
Misc.collapse_ranges(ranges).each do |range|
|
112
|
-
start = range.begin
|
113
|
-
eend = range.end
|
114
|
-
if last
|
115
|
-
gaps[last] = start - last
|
116
|
-
end
|
117
|
-
last = eend
|
118
|
-
end
|
119
|
-
|
120
|
-
data.process "End.second" do |value,k,values|
|
121
|
-
gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
|
122
|
-
value - gap
|
123
|
-
end
|
124
|
-
|
125
|
-
data.process "Start.second" do |value,k,values|
|
126
|
-
gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
|
127
|
-
value - gap
|
128
|
-
end
|
47
|
+
jobs = files.collect do |file|
|
48
|
+
get_step file
|
129
49
|
end
|
130
50
|
|
131
|
-
|
132
|
-
|
133
|
-
jobs.each do |dep|
|
134
|
-
next unless dep.info[:done]
|
135
|
-
task = [dep.workflow, dep.task_name].compact.collect{|s| s.to_s} * "#"
|
136
|
-
info = tasks_info[task] ||= {}
|
137
|
-
|
138
|
-
time = dep.info[:done] - dep.info[:started]
|
139
|
-
info[:time] ||= []
|
140
|
-
info[:time] << time
|
141
|
-
|
142
|
-
cpus = nil
|
143
|
-
spark = false
|
144
|
-
shard = false
|
145
|
-
dep.info[:config_keys].select do |kinfo|
|
146
|
-
key, value, tokens = kinfo
|
147
|
-
key = key.to_s
|
148
|
-
cpus = value if key.include? 'cpu'
|
149
|
-
spark = value if key == 'spark'
|
150
|
-
shard = value if key == 'shard'
|
151
|
-
end
|
152
|
-
|
153
|
-
info[:cpus] = cpus || 1
|
154
|
-
info[:spark] = spark
|
155
|
-
info[:shard] = shard
|
156
|
-
end
|
157
|
-
|
158
|
-
stats = TSV.setup({}, "Task~Calls,Avg. Time,Total Time,Cpus,Spark,Shard#:type=:list")
|
159
|
-
|
160
|
-
tasks_info.each do |task, info|
|
161
|
-
time_lists, cpus, spark, shard = info.values_at :time, :cpus, :spark, :shard
|
162
|
-
avg_time = Misc.mean(time_lists).to_i
|
163
|
-
total_time = Misc.sum(time_lists)
|
164
|
-
calls = time_lists.length
|
165
|
-
stats[task] = [calls, avg_time, total_time, cpus, spark, shard]
|
166
|
-
end
|
167
|
-
|
168
|
-
raise "No jobs to process" if data.size == 0
|
169
|
-
|
170
|
-
start = data.column("Start.second").values.flatten.collect{|v| v.to_i}.min
|
171
|
-
eend = data.column("End.second").values.flatten.collect{|v| v.to_i}.max
|
172
|
-
total = eend - start
|
173
|
-
Log.info "Total time elapsed: #{total} seconds"
|
174
|
-
|
175
|
-
if options[:fix_gap]
|
176
|
-
total_gaps = Misc.sum(gaps.collect{|k,v| v})
|
177
|
-
Log.info "Total gaps: #{total_gaps} seconds"
|
178
|
-
end
|
179
|
-
|
180
|
-
if options[:plot_data]
|
181
|
-
puts data.to_s
|
182
|
-
else
|
183
|
-
puts stats.to_s
|
184
|
-
end
|
185
|
-
|
186
|
-
if plot
|
187
|
-
data.R <<-EOF, [:svg]
|
188
|
-
rbbt.require('tidyverse')
|
189
|
-
rbbt.require('ggplot2')
|
190
|
-
|
191
|
-
names(data) <- make.names(names(data))
|
192
|
-
data$id = rownames(data)
|
193
|
-
data$content = data$Task
|
194
|
-
data$start = data$Start
|
195
|
-
data$end = data$End
|
196
|
-
data$Project = data$Workflow
|
197
|
-
|
198
|
-
tasks = data
|
199
|
-
|
200
|
-
#theme_gantt <- function(base_size=11, base_family="Source Sans Pro Light") {
|
201
|
-
theme_gantt <- function(base_size=11, base_family="Sans Serif") {
|
202
|
-
ret <- theme_bw(base_size, base_family) %+replace%
|
203
|
-
theme(panel.background = element_rect(fill="#ffffff", colour=NA),
|
204
|
-
axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
|
205
|
-
title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
|
206
|
-
panel.border = element_blank(), axis.line=element_blank(),
|
207
|
-
panel.grid.minor=element_blank(),
|
208
|
-
panel.grid.major.y = element_blank(),
|
209
|
-
panel.grid.major.x = element_line(size=0.5, colour="grey80"),
|
210
|
-
axis.ticks=element_blank(),
|
211
|
-
legend.position="bottom",
|
212
|
-
axis.title=element_text(size=rel(1.2), family="Source Sans Pro Semibold"),
|
213
|
-
strip.text=element_text(size=rel(1.5), family="Source Sans Pro Semibold"),
|
214
|
-
strip.background=element_rect(fill="#ffffff", colour=NA),
|
215
|
-
panel.spacing.y=unit(1.5, "lines"),
|
216
|
-
legend.key = element_blank())
|
217
|
-
|
218
|
-
ret
|
219
|
-
}
|
220
|
-
|
221
|
-
tasks.long <- tasks %>%
|
222
|
-
gather(date.type, task.date, -c(Project, Task, id, Start.second, End.second)) %>%
|
223
|
-
arrange(date.type, task.date) %>%
|
224
|
-
mutate(id = factor(id, levels=rev(unique(id)), ordered=TRUE))
|
225
|
-
|
226
|
-
x.breaks <- seq(length(tasks$Task) + 0.5 - 3, 0, by=-3)
|
227
|
-
|
228
|
-
timeline <- ggplot(tasks.long, aes(y=id, yend=id, x=Start.second, xend=End.second, colour=Task)) +
|
229
|
-
geom_segment() +
|
230
|
-
geom_vline(xintercept=x.breaks, colour="grey80", linetype="dotted") +
|
231
|
-
guides(colour=guide_legend(title=NULL)) +
|
232
|
-
labs(x=NULL, y=NULL) +
|
233
|
-
theme_gantt() + theme(axis.text.x=element_text(angle=45, hjust=1))
|
234
|
-
|
235
|
-
rbbt.png_plot('#{plot}', 'plot(timeline)', width=#{width}, height=#{height}, pointsize=6)
|
236
|
-
EOF
|
237
|
-
end
|
51
|
+
puts Workflow.trace(jobs, options)
|
@@ -46,7 +46,21 @@ pid = step.info[:pid]
|
|
46
46
|
host = step.info[:pid_hostname]
|
47
47
|
|
48
48
|
step.rec_dependencies.each do |dep|
|
49
|
-
|
50
|
-
|
51
|
-
|
49
|
+
begin
|
50
|
+
dep.set_info key, value if (force || ! dep.info.include?(key)) && (!check_pid || dep.info[:pid].to_s == pid and dep.info[:pid_hostname] == host)
|
51
|
+
rescue
|
52
|
+
Log.warn "Could no set info #{key} for #{dep.path}: #{$!.message}"
|
53
|
+
end
|
52
54
|
end if recursive
|
55
|
+
|
56
|
+
if recursive && step.info[:archived_info]
|
57
|
+
ad = step.info[:archived_info]
|
58
|
+
ad.each do |d,info|
|
59
|
+
begin
|
60
|
+
info[key] = value if (force || ! info.include?(key)) && (!check_pid || info[:pid].to_s == pid and info[:pid_hostname] == host)
|
61
|
+
rescue
|
62
|
+
Log.warn "Could no set info #{key} for archived_dep #{info[:path]}: #{$!.message}"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
step.set_info :archived_info, ad
|
66
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.31.
|
4
|
+
version: 5.31.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-04-
|
11
|
+
date: 2021-04-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|