rbbt-util 5.31.11 → 5.32.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c451106f276fa0ba5bf8cacce6f4867439b756f41ea439f8ce1f7f9e926f15c8
4
- data.tar.gz: ef03fd45181215040380e9e0344a0fd86b4552e41daf7f5e2feb6ff32be325cf
3
+ metadata.gz: ef9271207cbe69c725a4ab3a8553295d3b68e5f04ec1bfd25ad4654f162ed9cb
4
+ data.tar.gz: '087009c9ac6b4fee6d9fa7bceb003e050a0cc1e73f96987ec792c07388760633'
5
5
  SHA512:
6
- metadata.gz: b0efd21d9ea3ec27fb0264fd3e4a163178e8b3b2b9430150e2822bd41ad01b729a3ca9193b779f7956545334442accc207a163e1c02ec125b17930e9a7d88dd3
7
- data.tar.gz: abf50fd62c965161f5c13230d35aae45502df0934c82d69c59ca034e33c01154fa5105b2a65d8725463bb94352b0c3c97de4f5e6955d44581e3b31d4343be5cd
6
+ metadata.gz: b031c1143992b4c75eeeabcb231d656af2927a5bc46a0ea3527c4c90b7ffcc5eb44b0ec02ef9e31d4879fcf2b3192165ba5b22cd1978dc5250eec355f2556bdc
7
+ data.tar.gz: eb43875132a5f4ddc7ff9ae5dcaed7677c6bff84a4d61454cad9a83361d2a3dd63432cd5b28c656cd9e7bb589b8517bbdd3b57c0fa370527724d3ae55810b0cb
@@ -6,6 +6,36 @@ module HPC
6
6
  end
7
7
  end
8
8
 
9
+ def self.batch_system(batch_system = 'auto')
10
+ case batch_system.to_s.downcase
11
+ when 'slurm'
12
+ HPC::SLURM
13
+ when 'lsf'
14
+ HPC::LSF
15
+ when 'auto'
16
+ case $previous_commands.last
17
+ when 'slurm'
18
+ HPC::SLURM
19
+ when 'lsf'
20
+ HPC::LSF
21
+ else
22
+ case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
23
+ when 'slurm'
24
+ HPC::SLURM
25
+ when 'lsf'
26
+ HPC::LSF
27
+ else
28
+ case ENV["BATCH_SYSTEM"].to_s.downcase
29
+ when 'slurm'
30
+ HPC::SLURM
31
+ when 'lsf'
32
+ HPC::LSF
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+
9
39
  module TemplateGeneration
10
40
  def exec_cmd(job, options = {})
11
41
  env_cmd = Misc.process_options options, :env_cmd
@@ -30,12 +60,12 @@ module HPC
30
60
  -B "/.singularity_ruby_inline":"#{contain}/.singularity_ruby_inline":rw
31
61
  -B "#{options[:batch_dir]}" \
32
62
  -B /scratch/tmp \
33
- #{ group != user_group ? "-B /gpfs/projects/#{user_group}" : "" } \
63
+ #{ group != user_group ? "-B /gpfs/projects/#{user_group}" : "" } \
34
64
  -B #{scratch_group_dir} \
35
65
  -B #{projects_group_dir} \
36
66
  -B /apps/ \
37
67
  -B ~/git:"#{contain}/git":ro \
38
- #{Open.exists?('~/.rbbt/software/opt/')? '-B ~/.rbbt/software/opt/:"/opt/":ro' : '' } \
68
+ #{Open.exists?('~/.rbbt/software/opt/')? '-B ~/.rbbt/software/opt/:"/opt/":ro' : '' } \
39
69
  -B ~/.rbbt:"#{contain}/home/":ro)
40
70
  end
41
71
 
@@ -74,7 +104,7 @@ module HPC
74
104
  [name, dep.path] * "="
75
105
  end * ","
76
106
 
77
- options[:override_deps] = override_deps
107
+ options[:override_deps] = override_deps unless override_deps.empty?
78
108
  end
79
109
 
80
110
  # Save inputs into inputs_dir
@@ -216,6 +246,7 @@ EOF
216
246
  :fexit => File.join(batch_dir, 'exit.status'),
217
247
  :fsync => File.join(batch_dir, 'sync.log'),
218
248
  :fsexit => File.join(batch_dir, 'sync.status'),
249
+ :fenv => File.join(batch_dir, 'env.vars'),
219
250
  :fcmd => File.join(batch_dir, 'command.batch')
220
251
 
221
252
  batch_options
@@ -441,6 +472,7 @@ exit $exit_status
441
472
 
442
473
  # #{Log.color :green, "1. Prepare environment"}
443
474
  #{prepare_environment}
475
+ env > #{batch_options[:fenv]}
444
476
 
445
477
  # #{Log.color :green, "2. Execute"}
446
478
  #{execute}
data/lib/rbbt/hpc/lsf.rb CHANGED
@@ -7,10 +7,13 @@ module HPC
7
7
 
8
8
  def self.batch_system_variables
9
9
  <<-EOF
10
- [[ -z $LSB_MAX_MEM_RUSAGE ]] || MAX_MEMORY=$LSB_MAX_MEM_RUSAGE
11
- [[ -z $MAX_MEMORY ]] && let MAX_MEMORY="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / 1024"
12
- BATCH_JOB_ID=$LSF_JOBID
13
- BATCH_SYSTEM=LSF
10
+ let TOTAL_PROCESORS="$(cat /proc/cpuinfo|grep ^processor |wc -l)"
11
+ let MAX_MEMORY_DEFAULT="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / ( (1024 * $TOTAL_PROCESORS) / $SLURM_CPUS_PER_TASK )"
12
+ [ ! -z $LSB_MAX_MEM_RUSAGE ] && let MAX_MEMORY="$LSB_MAX_MEM_RUSAGE" || MAX_MEMORY="$MAX_MEMORY_DEFAULT"
13
+ export MAX_MEMORY_DEFAULT
14
+ export MAX_MEMORY
15
+ export BATCH_JOB_ID=$LSF_JOBID
16
+ export BATCH_SYSTEM=LSF
14
17
  EOF
15
18
  end
16
19
 
@@ -145,6 +145,8 @@ module HPC
145
145
  job_rules.delete :workflow
146
146
 
147
147
 
148
+ option_config_keys = options[:config_keys]
149
+
148
150
  job_options = IndiferentHash.setup(options.merge(job_rules).merge(:batch_dependencies => dep_ids))
149
151
  job_options.delete :orchestration_rules
150
152
 
@@ -154,6 +156,11 @@ module HPC
154
156
  job_options[:config_keys] = job_options[:config_keys] ? config_keys + "," + job_options[:config_keys] : config_keys
155
157
  end
156
158
 
159
+ if option_config_keys
160
+ option_config_keys = option_config_keys.gsub(/,\s+/,',')
161
+ job_options[:config_keys] = job_options[:config_keys] ? job_options[:config_keys] + "," + option_config_keys : option_config_keys
162
+ end
163
+
157
164
  if options[:piggyback]
158
165
  manifest = options[:piggyback].uniq
159
166
  manifest += [job]
@@ -165,7 +172,7 @@ module HPC
165
172
  new_config_keys = self.job_rules(rules, job)[:config_keys]
166
173
  if new_config_keys
167
174
  new_config_keys = new_config_keys.gsub(/,\s+/,',')
168
- job_options[:config_keys] = job_options[:config_keys] ? new_config_keys + "," + job_options[:config_keys] : new_config_keys
175
+ job_options[:config_keys] = job_options[:config_keys] ? job_options[:config_keys] + "," + new_config_keys : new_config_keys
169
176
  end
170
177
 
171
178
  job_options.delete :piggyback
@@ -8,9 +8,13 @@ module HPC
8
8
 
9
9
  def self.batch_system_variables
10
10
  <<-EOF
11
- let "MAX_MEMORY=$SLURM_MEM_PER_CPU * $SLURM_CPUS_PER_TASK" || let MAX_MEMORY="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / 1024"
12
- BATCH_JOB_ID=$SLURM_JOB_ID
13
- BATCH_SYSTEM=SLURM
11
+ let TOTAL_PROCESORS="$(cat /proc/cpuinfo|grep ^processor |wc -l)"
12
+ let MAX_MEMORY_DEFAULT="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / ( (1024 * $TOTAL_PROCESORS) / $SLURM_CPUS_PER_TASK )"
13
+ [ ! -z $SLURM_MEM_PER_CPU ] && let MAX_MEMORY="$SLURM_MEM_PER_CPU * $SLURM_CPUS_PER_TASK" || MAX_MEMORY="$MAX_MEMORY_DEFAULT"
14
+ export MAX_MEMORY_DEFAULT
15
+ export MAX_MEMORY
16
+ export BATCH_JOB_ID=$SLURM_JOB_ID
17
+ export BATCH_SYSTEM=SLURM
14
18
  EOF
15
19
  end
16
20
 
@@ -189,6 +189,7 @@ module TSV
189
189
  other.each do |k,v|
190
190
  self.zip_new k, v
191
191
  end
192
+ self
192
193
  end
193
194
 
194
195
 
data/lib/rbbt/util/cmd.rb CHANGED
@@ -42,7 +42,6 @@ module CMD
42
42
  ["--version", "-version", "--help", ""].each do |f|
43
43
  begin
44
44
  version_txt += CMD.cmd("#{tool} #{f} 2>&1", :nofail => true).read
45
- ppp version_txt
46
45
  version = Misc.scan_version_text(version_txt, tool)
47
46
  break if version
48
47
  rescue
@@ -11,7 +11,7 @@ module Open
11
11
  class OpenGzipError < StandardError; end
12
12
 
13
13
  REMOTE_CACHEDIR = File.join(ENV["HOME"], "/tmp/open_cache")
14
- FileUtils.mkdir_p REMOTE_CACHEDIR unless File.exist? REMOTE_CACHEDIR
14
+ #FileUtils.mkdir_p REMOTE_CACHEDIR unless File.exist? REMOTE_CACHEDIR
15
15
 
16
16
  GREP_CMD = begin
17
17
  if ENV["GREP_CMD"]
@@ -346,15 +346,18 @@ module Open
346
346
  target = File.join(target, File.basename(source)) if File.directory? target
347
347
  FileUtils.mkdir_p File.dirname(target) unless File.exist?(File.dirname(target))
348
348
  FileUtils.rm target if File.exist?(target)
349
+ FileUtils.rm target if File.symlink?(target)
349
350
  FileUtils.ln_s source, target
350
351
  end
351
352
 
352
353
  def self.ln(source, target, options = {})
353
354
  source = source.find if Path === source
354
355
  target = target.find if Path === target
356
+ source = File.realpath(source) if File.symlink?(source)
355
357
 
356
358
  FileUtils.mkdir_p File.dirname(target) unless File.exist?(File.dirname(target))
357
359
  FileUtils.rm target if File.exist?(target)
360
+ FileUtils.rm target if File.symlink?(target)
358
361
  FileUtils.ln source, target
359
362
  end
360
363
 
@@ -54,7 +54,8 @@ module Workflow
54
54
  inputs[input.to_sym] = YAML.load(Open.read(file))
55
55
  else
56
56
  if File.symlink?(file)
57
- inputs[input.to_sym] = File.readlink(file)
57
+ link_target = File.expand_path(File.readlink(file), File.dirname(file))
58
+ inputs[input.to_sym] = link_target
58
59
  else
59
60
  inputs[input.to_sym] = Open.realpath(file)
60
61
  end
@@ -20,7 +20,7 @@ class RemoteWorkflow
20
20
 
21
21
  def self.encode(url)
22
22
  begin
23
- URI.encode(url)
23
+ URI::DEFAULT_PARSER.escape(url)
24
24
  rescue
25
25
  Log.warn $!.message
26
26
  url
@@ -458,6 +458,10 @@ class Step
458
458
  end
459
459
 
460
460
  def clean
461
+ if ! Open.exists?(info_file)
462
+ Log.high "Refusing to clean step with no .info file: #{path}"
463
+ return self
464
+ end
461
465
  status = []
462
466
  status << "dirty" if done? && dirty?
463
467
  status << "not running" if ! done? && ! running?
@@ -1,12 +1,8 @@
1
1
  require 'rbbt/util/R'
2
2
 
3
3
  module Workflow
4
- def self.trace(seed_jobs, options = {})
5
-
6
- jobs = []
7
- seed_jobs.each{|j| jobs << j; jobs += j.rec_dependencies}
8
-
9
- data = TSV.setup({}, "Job~Workflow,Task,Start,End#:type=:list")
4
+ def self.trace_job_times(jobs, fix_gap = false)
5
+ data = TSV.setup({}, "Job~Code,Workflow,Task,Start,End#:type=:list")
10
6
  min_start = nil
11
7
  max_done = nil
12
8
  jobs.each do |job|
@@ -14,10 +10,10 @@ module Workflow
14
10
  started = job.info[:started]
15
11
  ddone = job.info[:done]
16
12
 
17
- code = [job.workflow, job.task_name].compact.collect{|s| s.to_s} * "."
18
- code = code + '.' + job.name
13
+ code = [job.workflow, job.task_name].compact.collect{|s| s.to_s} * " · "
14
+ code = job.name + " - " + code
19
15
 
20
- data[code] = [job.workflow.to_s, job.task_name, started, ddone]
16
+ data[job.path] = [code,job.workflow.to_s, job.task_name, started, ddone]
21
17
  if min_start.nil?
22
18
  min_start = started
23
19
  else
@@ -39,7 +35,7 @@ module Workflow
39
35
  value["End"] - min_start
40
36
  end
41
37
 
42
- if options[:fix_gap]
38
+ if fix_gap
43
39
  ranges = []
44
40
  data.through do |k,values|
45
41
  start, eend = values.values_at "Start.second", "End.second"
@@ -67,115 +63,155 @@ module Workflow
67
63
  gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
68
64
  value - gap
69
65
  end
66
+
67
+ total_gaps = Misc.sum(gaps.collect{|k,v| v})
68
+ Log.info "Total gaps: #{total_gaps} seconds"
70
69
  end
71
70
 
71
+ start = data.column("Start.second").values.flatten.collect{|v| v.to_f}.min
72
+ eend = data.column("End.second").values.flatten.collect{|v| v.to_f}.max
73
+ total = eend - start
74
+ Log.info "Total time elapsed: #{total} seconds"
75
+
76
+ data
77
+ end
78
+
79
+ def self.plot_trace_job_times(data, plot, width=800, height=800)
80
+ data.R <<-EOF, [:svg]
81
+ rbbt.require('tidyverse')
82
+ rbbt.require('ggplot2')
83
+
84
+ names(data) <- make.names(names(data))
85
+ data$id = data$Code
86
+ data$content = data$Task
87
+ data$start = data$Start
88
+ data$end = data$End
89
+ data$Project = data$Workflow
90
+
91
+ tasks = data
92
+
93
+ #theme_gantt <- function(base_size=11, base_family="Source Sans Pro Light") {
94
+ theme_gantt <- function(base_size=11, base_family="Sans Serif") {
95
+ ret <- theme_bw(base_size, base_family) %+replace%
96
+ theme(panel.background = element_rect(fill="#ffffff", colour=NA),
97
+ axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
98
+ title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
99
+ panel.border = element_blank(), axis.line=element_blank(),
100
+ panel.grid.minor=element_blank(),
101
+ panel.grid.major.y = element_blank(),
102
+ panel.grid.major.x = element_line(size=0.5, colour="grey80"),
103
+ axis.ticks=element_blank(),
104
+ legend.position="bottom",
105
+ axis.title=element_text(size=rel(1.2), family="Source Sans Pro Semibold"),
106
+ strip.text=element_text(size=rel(1.5), family="Source Sans Pro Semibold"),
107
+ strip.background=element_rect(fill="#ffffff", colour=NA),
108
+ panel.spacing.y=unit(1.5, "lines"),
109
+ legend.key = element_blank())
110
+
111
+ ret
112
+ }
113
+
114
+ tasks.long <- tasks %>%
115
+ gather(date.type, task.date, -c(Code,Project, Task, id, Start.second, End.second)) %>%
116
+ arrange(date.type, task.date) %>%
117
+ mutate(id = factor(id, levels=rev(unique(id)), ordered=TRUE))
118
+
119
+ x.breaks <- seq(length(tasks$Task) + 0.5 - 3, 0, by=-3)
120
+
121
+ timeline <- ggplot(tasks.long, aes(y=id, yend=id, x=Start.second, xend=End.second, colour=Task)) +
122
+ geom_segment() +
123
+ geom_vline(xintercept=x.breaks, colour="grey80", linetype="dotted") +
124
+ guides(colour=guide_legend(title=NULL)) +
125
+ labs(x=NULL, y=NULL) +
126
+ theme_gantt() + theme(axis.text.x=element_text(angle=45, hjust=1))
127
+
128
+ rbbt.png_plot('#{plot}', 'plot(timeline)', width=#{width}, height=#{height}, pointsize=6)
129
+ EOF
130
+ end
131
+
132
+ def self.trace_job_summary(jobs, report_keys = [])
72
133
  tasks_info = {}
73
134
 
135
+ report_keys = report_keys.collect{|k| k.to_s}
136
+
74
137
  jobs.each do |dep|
75
138
  next unless dep.info[:done]
76
139
  task = [dep.workflow, dep.task_name].compact.collect{|s| s.to_s} * "#"
77
- info = tasks_info[task] ||= {}
140
+ info = tasks_info[task] ||= IndiferentHash.setup({})
141
+ dep_info = IndiferentHash.setup(dep.info)
78
142
 
79
- time = dep.info[:done] - dep.info[:started]
143
+ time = dep_info[:done] - dep_info[:started]
80
144
  info[:time] ||= []
81
145
  info[:time] << time
82
146
 
83
- cpus = nil
84
- spark = false
85
- shard = false
147
+ report_keys.each do |key|
148
+ info[key] = dep_info[key]
149
+ end
150
+
86
151
  dep.info[:config_keys].select do |kinfo|
87
152
  key, value, tokens = kinfo
88
- key = key.to_s
89
- cpus = value if key.include? 'cpu'
90
- spark = value if key == 'spark'
91
- shard = value if key == 'shard'
92
- end
93
153
 
94
- info[:cpus] = cpus || 1
95
- info[:spark] = spark
96
- info[:shard] = shard
154
+ info[key.to_s] = value if report_keys.include? key.to_s
155
+ end
97
156
  end
98
157
 
99
- stats = TSV.setup({}, "Task~Calls,Avg. Time,Total Time,Cpus,Spark,Shard#:type=:list")
158
+ summary = TSV.setup({}, "Task~Calls,Avg. Time,Total Time#:type=:list")
100
159
 
101
160
  tasks_info.each do |task, info|
102
- time_lists, cpus, spark, shard = info.values_at :time, :cpus, :spark, :shard
103
- avg_time = Misc.mean(time_lists)
104
- total_time = Misc.sum(time_lists)
161
+ time_lists = info[:time]
162
+ avg_time = Misc.mean(time_lists).to_i
163
+ total_time = Misc.sum(time_lists).to_i
105
164
  calls = time_lists.length
106
- stats[task] = [calls, avg_time, total_time, cpus, spark, shard]
165
+ summary[task] = [calls, avg_time, total_time]
107
166
  end
108
167
 
109
- raise "No jobs to process" if data.size == 0
168
+ report_keys.each do |key|
169
+ summary.add_field Misc.humanize(key) do |task|
170
+ tasks_info[task][key]
171
+ end
172
+ end if Array === report_keys && report_keys.any?
110
173
 
111
- start = data.column("Start.second").values.flatten.collect{|v| v.to_f}.min
112
- eend = data.column("End.second").values.flatten.collect{|v| v.to_f}.max
113
- total = eend - start
114
- Log.info "Total time elapsed: #{total} seconds"
174
+ summary
175
+ end
115
176
 
116
- if options[:fix_gap]
117
- total_gaps = Misc.sum(gaps.collect{|k,v| v})
118
- Log.info "Total gaps: #{total_gaps} seconds"
177
+ def self.trace(seed_jobs, options = {})
178
+ jobs = []
179
+ seed_jobs.each do |step|
180
+ jobs += step.rec_dependencies + [step]
181
+
182
+ step.info[:archived_info].each do |path,ainfo|
183
+ archived_step = Step.new path
184
+ class << archived_step
185
+ self
186
+ end.define_method :info do
187
+ ainfo
188
+ end
189
+ jobs << archived_step
190
+ end if step.info[:archived_info]
119
191
  end
120
192
 
121
- plot, width, height = options.values_at :plot, :width, :height
122
- if plot
123
- data.R <<-EOF, [:svg]
124
- rbbt.require('tidyverse')
125
- rbbt.require('ggplot2')
126
-
127
- names(data) <- make.names(names(data))
128
- data$id = rownames(data)
129
- data$content = data$Task
130
- data$start = data$Start
131
- data$end = data$End
132
- data$Project = data$Workflow
133
-
134
- tasks = data
135
-
136
- #theme_gantt <- function(base_size=11, base_family="Source Sans Pro Light") {
137
- theme_gantt <- function(base_size=11, base_family="Sans Serif") {
138
- ret <- theme_bw(base_size, base_family) %+replace%
139
- theme(panel.background = element_rect(fill="#ffffff", colour=NA),
140
- axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
141
- title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
142
- panel.border = element_blank(), axis.line=element_blank(),
143
- panel.grid.minor=element_blank(),
144
- panel.grid.major.y = element_blank(),
145
- panel.grid.major.x = element_line(size=0.5, colour="grey80"),
146
- axis.ticks=element_blank(),
147
- legend.position="bottom",
148
- axis.title=element_text(size=rel(1.2), family="Source Sans Pro Semibold"),
149
- strip.text=element_text(size=rel(1.5), family="Source Sans Pro Semibold"),
150
- strip.background=element_rect(fill="#ffffff", colour=NA),
151
- panel.spacing.y=unit(1.5, "lines"),
152
- legend.key = element_blank())
153
-
154
- ret
155
- }
156
-
157
- tasks.long <- tasks %>%
158
- gather(date.type, task.date, -c(Project, Task, id, Start.second, End.second)) %>%
159
- arrange(date.type, task.date) %>%
160
- mutate(id = factor(id, levels=rev(unique(id)), ordered=TRUE))
161
-
162
- x.breaks <- seq(length(tasks$Task) + 0.5 - 3, 0, by=-3)
163
-
164
- timeline <- ggplot(tasks.long, aes(y=id, yend=id, x=Start.second, xend=End.second, colour=Task)) +
165
- geom_segment() +
166
- geom_vline(xintercept=x.breaks, colour="grey80", linetype="dotted") +
167
- guides(colour=guide_legend(title=NULL)) +
168
- labs(x=NULL, y=NULL) +
169
- theme_gantt() + theme(axis.text.x=element_text(angle=45, hjust=1))
170
-
171
- rbbt.png_plot('#{plot}', 'plot(timeline)', width=#{width}, height=#{height}, pointsize=6)
172
- EOF
173
- end
193
+ jobs = jobs.uniq.sort_by{|job| t = job.info[:done]; t || Open.mtime(job.path) || 0 }
194
+
195
+ data = trace_job_times(jobs, options[:fix_gap])
196
+
197
+ report_keys = options[:report_keys] || ""
198
+ report_keys = report_keys.split(/,\s*/) if String === report_keys
199
+ summary = trace_job_summary(jobs, report_keys)
200
+
201
+ raise "No jobs to process" if data.size == 0
202
+
203
+ plot, size, width, height = options.values_at :plot, :width, :height
204
+
205
+ size = 800 if size.nil?
206
+ width = size * 2 if width.nil?
207
+ height = size if height.nil?
208
+
209
+ plot_trace_job_times(data, plot, width, height) if plot
174
210
 
175
211
  if options[:plot_data]
176
212
  data
177
213
  else
178
- stats
214
+ summary
179
215
  end
180
216
 
181
217
  end
@@ -36,33 +36,7 @@ end
36
36
  batch_system = options.delete :batch_system
37
37
  batch_system ||= 'auto'
38
38
 
39
- HPC::BATCH_MODULE = case batch_system.to_s.downcase
40
- when 'slurm'
41
- HPC::SLURM
42
- when 'lsf'
43
- HPC::LSF
44
- when 'auto'
45
- case $previous_commands.last
46
- when 'slurm'
47
- HPC::SLURM
48
- when 'lsf'
49
- HPC::LSF
50
- else
51
- case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
52
- when 'slurm'
53
- HPC::SLURM
54
- when 'lsf'
55
- HPC::LSF
56
- else
57
- case ENV["BATCH_SYSTEM"].to_s.downcase
58
- when 'slurm'
59
- HPC::SLURM
60
- when 'lsf'
61
- HPC::LSF
62
- end
63
- end
64
- end
65
- end
39
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
66
40
 
67
41
  raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
68
42
 
@@ -40,33 +40,7 @@ end
40
40
  batch_system = options.delete :batch_system
41
41
  batch_system ||= 'auto'
42
42
 
43
- HPC::BATCH_MODULE = case batch_system.to_s.downcase
44
- when 'slurm'
45
- HPC::SLURM
46
- when 'lsf'
47
- HPC::LSF
48
- when 'auto'
49
- case $previous_commands.last
50
- when 'slurm'
51
- HPC::SLURM
52
- when 'lsf'
53
- HPC::LSF
54
- else
55
- case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
56
- when 'slurm'
57
- HPC::SLURM
58
- when 'lsf'
59
- HPC::LSF
60
- else
61
- case ENV["BATCH_SYSTEM"].to_s.downcase
62
- when 'slurm'
63
- HPC::SLURM
64
- when 'lsf'
65
- HPC::LSF
66
- end
67
- end
68
- end
69
- end
43
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
70
44
 
71
45
  raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
72
46
 
@@ -108,7 +82,7 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
108
82
  cmd = nil
109
83
  end
110
84
 
111
- if m = command_txt.match(/^BATCH_SYSTEM=(.*)/)
85
+ if m = command_txt.match(/^export BATCH_SYSTEM=(.*)/)
112
86
  job_batch_system = m[1].downcase
113
87
  else
114
88
  job_batch_system = nil
@@ -235,6 +209,8 @@ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
235
209
  text = CMD.cmd('grep "^#SBATCH" |tail -n +5', :in => Open.read(fcmd)).read.strip
236
210
  when 'lsf'
237
211
  text = CMD.cmd('grep "^#BSUB" |tail -n +5', :in => Open.read(fcmd)).read.strip
212
+ else
213
+ text = ""
238
214
  end
239
215
  lines = text.split("\n").collect{|line| header, _sep, value = line.partition(/\s+/); Log.color(:yellow, header + ": ") + value}
240
216
  puts Log.color :yellow, lines * "\n"
@@ -34,33 +34,7 @@ EOF
34
34
  batch_system = $slurm_options.delete :batch_system
35
35
  batch_system ||= 'auto'
36
36
 
37
- HPC::BATCH_MODULE = case batch_system.to_s.downcase
38
- when 'slurm'
39
- HPC::SLURM
40
- when 'lsf'
41
- HPC::LSF
42
- when 'auto'
43
- case $previous_commands.last
44
- when 'slurm'
45
- HPC::SLURM
46
- when 'lsf'
47
- HPC::LSF
48
- else
49
- case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
50
- when 'slurm'
51
- HPC::SLURM
52
- when 'lsf'
53
- HPC::LSF
54
- else
55
- case ENV["BATCH_SYSTEM"].to_s.downcase
56
- when 'slurm'
57
- HPC::SLURM
58
- when 'lsf'
59
- HPC::LSF
60
- end
61
- end
62
- end
63
- end
37
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
64
38
 
65
39
  raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
66
40
 
@@ -40,33 +40,7 @@ end
40
40
  batch_system = options.delete :batch_system
41
41
  batch_system ||= 'auto'
42
42
 
43
- HPC::BATCH_MODULE = case batch_system.to_s.downcase
44
- when 'slurm'
45
- HPC::SLURM
46
- when 'lsf'
47
- HPC::LSF
48
- when 'auto'
49
- case $previous_commands.last
50
- when 'slurm'
51
- HPC::SLURM
52
- when 'lsf'
53
- HPC::LSF
54
- else
55
- case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
56
- when 'slurm'
57
- HPC::SLURM
58
- when 'lsf'
59
- HPC::LSF
60
- else
61
- case ENV["BATCH_SYSTEM"].to_s.downcase
62
- when 'slurm'
63
- HPC::SLURM
64
- when 'lsf'
65
- HPC::LSF
66
- end
67
- end
68
- end
69
- end
43
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
70
44
 
71
45
  raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
72
46
 
@@ -33,33 +33,7 @@ EOF
33
33
  batch_system = $slurm_options.delete :batch_system
34
34
  batch_system ||= 'auto'
35
35
 
36
- HPC::BATCH_MODULE = case batch_system.to_s.downcase
37
- when 'slurm'
38
- HPC::SLURM
39
- when 'lsf'
40
- HPC::LSF
41
- when 'auto'
42
- case $previous_commands.last
43
- when 'slurm'
44
- HPC::SLURM
45
- when 'lsf'
46
- HPC::LSF
47
- else
48
- case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
49
- when 'slurm'
50
- HPC::SLURM
51
- when 'lsf'
52
- HPC::LSF
53
- else
54
- case ENV["BATCH_SYSTEM"].to_s.downcase
55
- when 'slurm'
56
- HPC::SLURM
57
- when 'lsf'
58
- HPC::LSF
59
- end
60
- end
61
- end
62
- end
36
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
63
37
 
64
38
  raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
65
39
 
@@ -31,7 +31,9 @@ if ARGV.empty?
31
31
  puts Rbbt.etc.log_severity.find + ' does not exist'
32
32
  end
33
33
  else
34
- Open.write(Rbbt.etc.log_severity, ARGV[0].upcase)
34
+ level = ARGV[0]
35
+ level = %w(DEBUG LOW MEDIUM HIGH INFO WARN ERROR NONE)[level.to_i] if level =~ /^\d+$/
36
+ Open.write(Rbbt.etc.log_severity, level.upcase)
35
37
  end
36
38
 
37
39
 
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'rbbt/workflow'
4
+ require 'rbbt/workflow/util/trace'
4
5
 
5
6
  require 'rbbt-util'
6
7
  require 'fileutils'
@@ -16,30 +17,26 @@ require 'rbbt/util/R'
16
17
  $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
17
18
 
18
19
  options = SOPT.setup <<EOF
19
- Examine the provenance of a job result
20
+ Examine the execution trace of a job or set of jobs
20
21
 
21
22
  $ rbbt workflow trace <job-result>
22
23
 
23
24
  -h--help Help
25
+ -fg--fix_gap Remove execution gaps
26
+ -rk--report_keys* Config keys and info fields to report
27
+ -p--plot* Plot file
24
28
  -w--width* Image Width
25
29
  -h--height* Image Height
26
- -p--plot* Plot file
27
30
  -s--size* Image Size (Height and Width)
28
- -fg--fix_gap Remove execution gaps
29
31
  -pd--plot_data Print plot data
30
32
  EOF
31
33
 
32
34
  SOPT.usage if options[:help]
33
35
 
36
+
34
37
  files = ARGV
35
38
  plot = options[:plot]
36
39
 
37
- width, height, size = options.values_at :width, :height, :size
38
-
39
- size = 800 if size.nil?
40
- width = size if width.nil?
41
- height = size if height.nil?
42
-
43
40
  def get_step(file)
44
41
  file = File.expand_path(file)
45
42
  file = file.sub(/\.(info|files)/,'')
@@ -47,191 +44,8 @@ def get_step(file)
47
44
  end
48
45
 
49
46
  jobs = []
50
- files.each do |file|
51
- step = get_step file
52
-
53
- jobs += step.rec_dependencies + [step]
54
-
55
- step.info[:archived_info].each do |path,ainfo|
56
- archived_step = Step.new path
57
- class << archived_step
58
- self
59
- end.define_method :info do
60
- ainfo
61
- end
62
- jobs << archived_step
63
- end if step.info[:archived_info]
64
- end
65
-
66
- jobs = jobs.select{|job| job.info[:done]}.sort_by{|job| job.info[:started]}
67
-
68
- data = TSV.setup({}, "Job~Workflow,Task,Start,End#:type=:list")
69
- min_start = nil
70
- max_done = nil
71
- jobs.each do |job|
72
- next unless job.info[:done]
73
- started = job.info[:started]
74
- ddone = job.info[:done]
75
-
76
- code = [job.workflow, job.task_name].compact.collect{|s| s.to_s} * "."
77
- code = code + '.' + job.name
78
-
79
- data[code] = [job.workflow.to_s, job.task_name, started, ddone]
80
- if min_start.nil?
81
- min_start = started
82
- else
83
- min_start = started if started < min_start
84
- end
85
-
86
- if max_done.nil?
87
- max_done = ddone
88
- else
89
- max_done = ddone if ddone > max_done
90
- end
91
- end
92
-
93
- data.add_field "Start.second" do |k,value|
94
- value["Start"] - min_start
95
- end
96
-
97
- data.add_field "End.second" do |k,value|
98
- value["End"] - min_start
99
- end
100
-
101
- if options[:fix_gap]
102
- ranges = []
103
- data.through do |k,values|
104
- start, eend = values.values_at "Start.second", "End.second"
105
-
106
- ranges << (start..eend)
107
- end
108
-
109
- gaps = {}
110
- last = nil
111
- Misc.collapse_ranges(ranges).each do |range|
112
- start = range.begin
113
- eend = range.end
114
- if last
115
- gaps[last] = start - last
116
- end
117
- last = eend
118
- end
119
-
120
- data.process "End.second" do |value,k,values|
121
- gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
122
- value - gap
123
- end
124
-
125
- data.process "Start.second" do |value,k,values|
126
- gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
127
- value - gap
128
- end
47
+ jobs = files.collect do |file|
48
+ get_step file
129
49
  end
130
50
 
131
- tasks_info = {}
132
-
133
- jobs.each do |dep|
134
- next unless dep.info[:done]
135
- task = [dep.workflow, dep.task_name].compact.collect{|s| s.to_s} * "#"
136
- info = tasks_info[task] ||= {}
137
-
138
- time = dep.info[:done] - dep.info[:started]
139
- info[:time] ||= []
140
- info[:time] << time
141
-
142
- cpus = nil
143
- spark = false
144
- shard = false
145
- dep.info[:config_keys].select do |kinfo|
146
- key, value, tokens = kinfo
147
- key = key.to_s
148
- cpus = value if key.include? 'cpu'
149
- spark = value if key == 'spark'
150
- shard = value if key == 'shard'
151
- end
152
-
153
- info[:cpus] = cpus || 1
154
- info[:spark] = spark
155
- info[:shard] = shard
156
- end
157
-
158
- stats = TSV.setup({}, "Task~Calls,Avg. Time,Total Time,Cpus,Spark,Shard#:type=:list")
159
-
160
- tasks_info.each do |task, info|
161
- time_lists, cpus, spark, shard = info.values_at :time, :cpus, :spark, :shard
162
- avg_time = Misc.mean(time_lists).to_i
163
- total_time = Misc.sum(time_lists)
164
- calls = time_lists.length
165
- stats[task] = [calls, avg_time, total_time, cpus, spark, shard]
166
- end
167
-
168
- raise "No jobs to process" if data.size == 0
169
-
170
- start = data.column("Start.second").values.flatten.collect{|v| v.to_i}.min
171
- eend = data.column("End.second").values.flatten.collect{|v| v.to_i}.max
172
- total = eend - start
173
- Log.info "Total time elapsed: #{total} seconds"
174
-
175
- if options[:fix_gap]
176
- total_gaps = Misc.sum(gaps.collect{|k,v| v})
177
- Log.info "Total gaps: #{total_gaps} seconds"
178
- end
179
-
180
- if options[:plot_data]
181
- puts data.to_s
182
- else
183
- puts stats.to_s
184
- end
185
-
186
- if plot
187
- data.R <<-EOF, [:svg]
188
- rbbt.require('tidyverse')
189
- rbbt.require('ggplot2')
190
-
191
- names(data) <- make.names(names(data))
192
- data$id = rownames(data)
193
- data$content = data$Task
194
- data$start = data$Start
195
- data$end = data$End
196
- data$Project = data$Workflow
197
-
198
- tasks = data
199
-
200
- #theme_gantt <- function(base_size=11, base_family="Source Sans Pro Light") {
201
- theme_gantt <- function(base_size=11, base_family="Sans Serif") {
202
- ret <- theme_bw(base_size, base_family) %+replace%
203
- theme(panel.background = element_rect(fill="#ffffff", colour=NA),
204
- axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
205
- title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
206
- panel.border = element_blank(), axis.line=element_blank(),
207
- panel.grid.minor=element_blank(),
208
- panel.grid.major.y = element_blank(),
209
- panel.grid.major.x = element_line(size=0.5, colour="grey80"),
210
- axis.ticks=element_blank(),
211
- legend.position="bottom",
212
- axis.title=element_text(size=rel(1.2), family="Source Sans Pro Semibold"),
213
- strip.text=element_text(size=rel(1.5), family="Source Sans Pro Semibold"),
214
- strip.background=element_rect(fill="#ffffff", colour=NA),
215
- panel.spacing.y=unit(1.5, "lines"),
216
- legend.key = element_blank())
217
-
218
- ret
219
- }
220
-
221
- tasks.long <- tasks %>%
222
- gather(date.type, task.date, -c(Project, Task, id, Start.second, End.second)) %>%
223
- arrange(date.type, task.date) %>%
224
- mutate(id = factor(id, levels=rev(unique(id)), ordered=TRUE))
225
-
226
- x.breaks <- seq(length(tasks$Task) + 0.5 - 3, 0, by=-3)
227
-
228
- timeline <- ggplot(tasks.long, aes(y=id, yend=id, x=Start.second, xend=End.second, colour=Task)) +
229
- geom_segment() +
230
- geom_vline(xintercept=x.breaks, colour="grey80", linetype="dotted") +
231
- guides(colour=guide_legend(title=NULL)) +
232
- labs(x=NULL, y=NULL) +
233
- theme_gantt() + theme(axis.text.x=element_text(angle=45, hjust=1))
234
-
235
- rbbt.png_plot('#{plot}', 'plot(timeline)', width=#{width}, height=#{height}, pointsize=6)
236
- EOF
237
- end
51
+ puts Workflow.trace(jobs, options)
@@ -46,7 +46,21 @@ pid = step.info[:pid]
46
46
  host = step.info[:pid_hostname]
47
47
 
48
48
  step.rec_dependencies.each do |dep|
49
- dep.set_info key, value if (force || ! dep.info.include?(key)) && (!check_pid || dep.info[:pid].to_s == pid and dep.info[:pid_hostname] == host)
50
- rescue
51
- Log.warn "Could no set info #{key} for #{dep.path}: #{$!.message}"
49
+ begin
50
+ dep.set_info key, value if (force || ! dep.info.include?(key)) && (!check_pid || dep.info[:pid].to_s == pid and dep.info[:pid_hostname] == host)
51
+ rescue
52
+ Log.warn "Could no set info #{key} for #{dep.path}: #{$!.message}"
53
+ end
52
54
  end if recursive
55
+
56
+ if recursive && step.info[:archived_info]
57
+ ad = step.info[:archived_info]
58
+ ad.each do |d,info|
59
+ begin
60
+ info[key] = value if (force || ! info.include?(key)) && (!check_pid || info[:pid].to_s == pid and info[:pid_hostname] == host)
61
+ rescue
62
+ Log.warn "Could no set info #{key} for archived_dep #{info[:path]}: #{$!.message}"
63
+ end
64
+ end
65
+ step.set_info :archived_info, ad
66
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.31.11
4
+ version: 5.32.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-31 00:00:00.000000000 Z
11
+ date: 2021-04-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake