rbbt-util 5.31.14 → 5.32.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 99337ec5d1fb8c3dbc9d1b8f89bc2965dc659c3e6043defe718b0007ba00456b
4
- data.tar.gz: c61228a72814e48ff2c7bf90a0e44ace075f6f4c1e29cfb7f36e6a182896f7b6
3
+ metadata.gz: 5623a5f5b4b78dc051b02bfaa9f7503f999bc58b7cc3569d8a58f9caead7b212
4
+ data.tar.gz: 195a44841df5ef636806413bf4bcfe1b36304d0b9e4178d3e787b1ee2e5d5a71
5
5
  SHA512:
6
- metadata.gz: eeb8b9d43a4199cbea11693f7028ba6216507bb3c1cc149e30d30070652ddfd15f7660db2df1435a2ed108354d6ce261bfc23907d5cc6866c2561f2eac9285eb
7
- data.tar.gz: 405081e84408572eb3a78cda085dba453c44b5583bc93e35287f9feb8d7693912710a1946b30e7ab931517da592f5a469cc66c4c923cf126070e7c01edbff134
6
+ metadata.gz: 9d9a436961cd66fd58bdd1b3908879b16f3e9f7a09d1d05a5d4e1980ad3c0a960a102895ca257bbd18298352c59f0448d21520c749bb652dc63c0042ef56d694
7
+ data.tar.gz: 9c9ec75fd411deb4849ed02834294da897a38ca2230a2af78e1bcb8722fe8ab1c1dd4eebe41e073270da02d60288363eb548f3bfdbf8cabd31256d5616ce9d09
@@ -6,6 +6,36 @@ module HPC
6
6
  end
7
7
  end
8
8
 
9
+ def self.batch_system(batch_system = 'auto')
10
+ case batch_system.to_s.downcase
11
+ when 'slurm'
12
+ HPC::SLURM
13
+ when 'lsf'
14
+ HPC::LSF
15
+ when 'auto'
16
+ case $previous_commands.last
17
+ when 'slurm'
18
+ HPC::SLURM
19
+ when 'lsf'
20
+ HPC::LSF
21
+ else
22
+ case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
23
+ when 'slurm'
24
+ HPC::SLURM
25
+ when 'lsf'
26
+ HPC::LSF
27
+ else
28
+ case ENV["BATCH_SYSTEM"].to_s.downcase
29
+ when 'slurm'
30
+ HPC::SLURM
31
+ when 'lsf'
32
+ HPC::LSF
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+
9
39
  module TemplateGeneration
10
40
  def exec_cmd(job, options = {})
11
41
  env_cmd = Misc.process_options options, :env_cmd
@@ -30,12 +60,12 @@ module HPC
30
60
  -B "/.singularity_ruby_inline":"#{contain}/.singularity_ruby_inline":rw
31
61
  -B "#{options[:batch_dir]}" \
32
62
  -B /scratch/tmp \
33
- #{ group != user_group ? "-B /gpfs/projects/#{user_group}" : "" } \
63
+ #{ group != user_group ? "-B /gpfs/projects/#{user_group}" : "" } \
34
64
  -B #{scratch_group_dir} \
35
65
  -B #{projects_group_dir} \
36
66
  -B /apps/ \
37
67
  -B ~/git:"#{contain}/git":ro \
38
- #{Open.exists?('~/.rbbt/software/opt/')? '-B ~/.rbbt/software/opt/:"/opt/":ro' : '' } \
68
+ #{Open.exists?('~/.rbbt/software/opt/')? '-B ~/.rbbt/software/opt/:"/opt/":ro' : '' } \
39
69
  -B ~/.rbbt:"#{contain}/home/":ro)
40
70
  end
41
71
 
@@ -74,7 +104,7 @@ module HPC
74
104
  [name, dep.path] * "="
75
105
  end * ","
76
106
 
77
- options[:override_deps] = override_deps
107
+ options[:override_deps] = override_deps unless override_deps.empty?
78
108
  end
79
109
 
80
110
  # Save inputs into inputs_dir
@@ -216,6 +246,7 @@ EOF
216
246
  :fexit => File.join(batch_dir, 'exit.status'),
217
247
  :fsync => File.join(batch_dir, 'sync.log'),
218
248
  :fsexit => File.join(batch_dir, 'sync.status'),
249
+ :fenv => File.join(batch_dir, 'env.vars'),
219
250
  :fcmd => File.join(batch_dir, 'command.batch')
220
251
 
221
252
  batch_options
@@ -441,6 +472,7 @@ exit $exit_status
441
472
 
442
473
  # #{Log.color :green, "1. Prepare environment"}
443
474
  #{prepare_environment}
475
+ env > #{batch_options[:fenv]}
444
476
 
445
477
  # #{Log.color :green, "2. Execute"}
446
478
  #{execute}
data/lib/rbbt/hpc/lsf.rb CHANGED
@@ -7,10 +7,13 @@ module HPC
7
7
 
8
8
  def self.batch_system_variables
9
9
  <<-EOF
10
- [[ -z $LSB_MAX_MEM_RUSAGE ]] || MAX_MEMORY=$LSB_MAX_MEM_RUSAGE
11
- [[ -z $MAX_MEMORY ]] && let MAX_MEMORY="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / 1024"
12
- BATCH_JOB_ID=$LSF_JOBID
13
- BATCH_SYSTEM=LSF
10
+ let TOTAL_PROCESORS="$(cat /proc/cpuinfo|grep ^processor |wc -l)"
11
+ let MAX_MEMORY_DEFAULT="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / ( (1024 * $TOTAL_PROCESORS) / $LSB_MAX_NUM_PROCESSORS )"
12
+ [ ! -z $LSB_MAX_MEM_RUSAGE ] && let MAX_MEMORY="$LSB_MAX_MEM_RUSAGE" || MAX_MEMORY="$MAX_MEMORY_DEFAULT"
13
+ export MAX_MEMORY_DEFAULT
14
+ export MAX_MEMORY
15
+ export BATCH_JOB_ID=$LSF_JOBID
16
+ export BATCH_SYSTEM=LSF
14
17
  EOF
15
18
  end
16
19
 
@@ -145,6 +145,8 @@ module HPC
145
145
  job_rules.delete :workflow
146
146
 
147
147
 
148
+ option_config_keys = options[:config_keys]
149
+
148
150
  job_options = IndiferentHash.setup(options.merge(job_rules).merge(:batch_dependencies => dep_ids))
149
151
  job_options.delete :orchestration_rules
150
152
 
@@ -154,6 +156,11 @@ module HPC
154
156
  job_options[:config_keys] = job_options[:config_keys] ? config_keys + "," + job_options[:config_keys] : config_keys
155
157
  end
156
158
 
159
+ if option_config_keys
160
+ option_config_keys = option_config_keys.gsub(/,\s+/,',')
161
+ job_options[:config_keys] = job_options[:config_keys] ? job_options[:config_keys] + "," + option_config_keys : option_config_keys
162
+ end
163
+
157
164
  if options[:piggyback]
158
165
  manifest = options[:piggyback].uniq
159
166
  manifest += [job]
@@ -165,7 +172,7 @@ module HPC
165
172
  new_config_keys = self.job_rules(rules, job)[:config_keys]
166
173
  if new_config_keys
167
174
  new_config_keys = new_config_keys.gsub(/,\s+/,',')
168
- job_options[:config_keys] = job_options[:config_keys] ? new_config_keys + "," + job_options[:config_keys] : new_config_keys
175
+ job_options[:config_keys] = job_options[:config_keys] ? job_options[:config_keys] + "," + new_config_keys : new_config_keys
169
176
  end
170
177
 
171
178
  job_options.delete :piggyback
@@ -8,9 +8,13 @@ module HPC
8
8
 
9
9
  def self.batch_system_variables
10
10
  <<-EOF
11
- let "MAX_MEMORY=$SLURM_MEM_PER_CPU * $SLURM_CPUS_PER_TASK" || let MAX_MEMORY="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / 1024"
12
- BATCH_JOB_ID=$SLURM_JOB_ID
13
- BATCH_SYSTEM=SLURM
11
+ let TOTAL_PROCESORS="$(cat /proc/cpuinfo|grep ^processor |wc -l)"
12
+ let MAX_MEMORY_DEFAULT="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / ( (1024 * $TOTAL_PROCESORS) / $SLURM_CPUS_PER_TASK )"
13
+ [ ! -z $SLURM_MEM_PER_CPU ] && let MAX_MEMORY="$SLURM_MEM_PER_CPU * $SLURM_CPUS_PER_TASK" || MAX_MEMORY="$MAX_MEMORY_DEFAULT"
14
+ export MAX_MEMORY_DEFAULT
15
+ export MAX_MEMORY
16
+ export BATCH_JOB_ID=$SLURM_JOB_ID
17
+ export BATCH_SYSTEM=SLURM
14
18
  EOF
15
19
  end
16
20
 
@@ -132,7 +132,7 @@ module Persist
132
132
 
133
133
  def read_lock
134
134
  read if closed?
135
- if read?
135
+ if read? || write?
136
136
  return yield
137
137
  end
138
138
 
data/lib/rbbt/resource.rb CHANGED
@@ -6,6 +6,7 @@ require 'set'
6
6
 
7
7
 
8
8
  module Resource
9
+ class ResourceNotFound < RbbtException; end
9
10
 
10
11
  class << self
11
12
  attr_accessor :lock_dir
@@ -154,16 +155,14 @@ module Resource
154
155
  rake_dir, content = rake_for(path)
155
156
  rake_dir = Path.setup(rake_dir.dup, self.pkgdir, self)
156
157
  else
157
- begin
158
- if path !~ /\.(gz|bgz)$/
159
- begin
160
- produce(path.annotate(path + '.gz'), force)
161
- rescue
162
- produce(path.annotate(path + '.bgz'), force)
163
- end
158
+ if path !~ /\.(gz|bgz)$/
159
+ begin
160
+ produce(path.annotate(path + '.gz'), force)
161
+ rescue ResourceNotFound
162
+ produce(path.annotate(path + '.bgz'), force)
164
163
  end
165
- rescue
166
- raise "Resource is missing and does not seem to be claimed: #{ self } -- #{ path } "
164
+ else
165
+ raise ResourceNotFound, "Resource is missing and does not seem to be claimed: #{ self } -- #{ path } "
167
166
  end
168
167
  end
169
168
 
@@ -174,7 +173,7 @@ module Resource
174
173
  end
175
174
 
176
175
  if type and not File.exist?(final_path) or force
177
- Log.medium "Producing: #{ final_path }"
176
+ Log.medium "Producing: (#{self.to_s}) #{ final_path }"
178
177
  lock_filename = Persist.persistence_path(final_path, {:dir => Resource.lock_dir})
179
178
 
180
179
  Misc.lock lock_filename do
data/lib/rbbt/tsv/csv.rb CHANGED
@@ -8,6 +8,7 @@ module TSV
8
8
  noheaders = ! headers
9
9
 
10
10
  type = options.delete :type
11
+ cast = options.delete :cast
11
12
  merge = options.delete :merge
12
13
  key_field = options.delete :key_field
13
14
  fields = options.delete :fields
@@ -46,6 +47,10 @@ module TSV
46
47
  else
47
48
  key, *values = row
48
49
  end
50
+
51
+ if cast
52
+ values = values.collect{|v| v.send cast }
53
+ end
49
54
 
50
55
  case type
51
56
  when :double, :flat
@@ -40,7 +40,7 @@ module TSV
40
40
  # Process fields line
41
41
 
42
42
  preamble << line if line
43
- while line and (TrueClass === @header_hash or (String === @header_hash and Misc.fixutf8(line) =~ /^#{@header_hash}/ ))
43
+ while line && (TrueClass === @header_hash || (String === @header_hash && Misc.fixutf8(line) =~ /^#{@header_hash}/ ))
44
44
  @fields = line.split(@sep, -1)
45
45
  @key_field = @fields.shift
46
46
  @key_field = @key_field[(0 + header_hash.length)..-1] if String === @header_hash
@@ -49,7 +49,7 @@ module TSV
49
49
  line = (@header_hash != "" ? stream.gets : nil)
50
50
  line = Misc.fixutf8 line.chomp if line
51
51
  preamble << line if line
52
- @header_hash = false if TrueClass === @header_hash
52
+ @header_hash = false if TrueClass === @header_hash || @header_hash == ""
53
53
  end
54
54
 
55
55
  @preamble = preamble[0..-3] * "\n"
@@ -56,6 +56,7 @@ module TSV
56
56
  preambles = []
57
57
 
58
58
  streams = streams.collect do |stream|
59
+
59
60
  parser = TSV::Parser.new stream, options.dup
60
61
  sfields = parser.fields
61
62
 
@@ -43,7 +43,7 @@ module Misc
43
43
  File.mkfifo path
44
44
  yield path
45
45
  ensure
46
- FileUtils.rm path if erase
46
+ FileUtils.rm path if erase && File.exists?(path)
47
47
  end
48
48
  end
49
49
 
data/lib/rbbt/workflow.rb CHANGED
@@ -187,17 +187,21 @@ module Workflow
187
187
  clean_name = wf_name.sub(/::.*/,'')
188
188
  Log.info{"Looking for '#{wf_name}' in '#{clean_name}'"}
189
189
  require_workflow clean_name
190
- return Misc.string2const Misc.camel_case(wf_name)
190
+ workflow = Misc.string2const Misc.camel_case(wf_name)
191
+ workflow.load_documentation
192
+ return workflow
191
193
  end
192
194
 
193
195
  Log.high{"Loading workflow #{wf_name}"}
194
196
  require_local_workflow(wf_name) or
195
197
  (Workflow.autoinstall and `rbbt workflow install #{Misc.snake_case(wf_name)} || rbbt workflow install #{wf_name}` and require_local_workflow(wf_name)) or raise("Workflow not found or could not be loaded: #{ wf_name }")
196
- begin
197
- Misc.string2const Misc.camel_case(wf_name)
198
- rescue
199
- Workflow.workflows.last || true
200
- end
198
+ workflow = begin
199
+ Misc.string2const Misc.camel_case(wf_name)
200
+ rescue
201
+ Workflow.workflows.last || true
202
+ end
203
+ workflow.load_documentation
204
+ workflow
201
205
  end
202
206
 
203
207
  attr_accessor :description
@@ -74,6 +74,7 @@ module Workflow
74
74
  def dep_task(name, workflow, oname, *rest, &block)
75
75
  dep(workflow, oname, *rest, &block)
76
76
  extension workflow.tasks[oname].extension if workflow.tasks.include?(oname) unless @extension
77
+ returns workflow.tasks[oname].result_description if workflow.tasks.include?(oname) unless @result_description
77
78
  task name do
78
79
  raise RbbtException, "dependency not found in dep_task" if dependencies.empty?
79
80
  dep = dependencies.last
@@ -45,10 +45,25 @@ module Workflow
45
45
  end
46
46
 
47
47
  def load_documentation
48
- @documentation = Workflow.parse_workflow_doc documentation_markdown
48
+ return if @documentation
49
+ @documentation ||= Workflow.parse_workflow_doc documentation_markdown
49
50
  @documentation[:tasks].each do |task, description|
50
- raise "Documentation for #{ task }, but not a #{ self.to_s } task" unless tasks.include? task.to_sym
51
- tasks[task.to_sym].description = description
51
+ if task.include? "#"
52
+ workflow, task = task.split("#")
53
+ workflow = begin
54
+ Kernel.const_get workflow
55
+ rescue
56
+ next
57
+ end
58
+ else
59
+ workflow = self
60
+ end
61
+
62
+ if workflow.tasks.include? task.to_sym
63
+ workflow.tasks[task.to_sym].description = description
64
+ else
65
+ Log.low "Documentation for #{ task }, but not a #{ workflow.to_s } task"
66
+ end
52
67
  end
53
68
  end
54
69
 
@@ -458,6 +458,10 @@ class Step
458
458
  end
459
459
 
460
460
  def clean
461
+ if ! Open.exists?(info_file)
462
+ Log.high "Refusing to clean step with no .info file: #{path}"
463
+ return self
464
+ end
461
465
  status = []
462
466
  status << "dirty" if done? && dirty?
463
467
  status << "not running" if ! done? && ! running?
@@ -97,6 +97,7 @@ class Step
97
97
  Open.ln_s(value.path, path)
98
98
  when type.to_s == "file"
99
99
  if String === value && File.exists?(value)
100
+ value = File.expand_path(value)
100
101
  Open.ln_s(value, path)
101
102
  else
102
103
  value = value.collect{|v| v = "#{v}" if Path === v; v }if Array === value
@@ -122,7 +122,7 @@ module Workflow
122
122
  last = _prov_tasks(workflow.dep_tree(task_name))
123
123
 
124
124
  if child
125
- description << "->" << task_name.to_s
125
+ description << "-> " << task_name.to_s
126
126
  elsif first
127
127
  description << "" << task_name.to_s
128
128
  else
@@ -198,7 +198,7 @@ module Workflow
198
198
  puts Misc.format_definition_list_item(name.to_s, description, Log.terminal_width, 20, :yellow)
199
199
 
200
200
  prov_string = prov_string(dep_tree(name))
201
- puts Log.color :blue, " ->" + prov_string if prov_string && ! prov_string.empty?
201
+ puts Misc.format_paragraph Log.color(:blue, "-> " + prov_string) if prov_string && ! prov_string.empty?
202
202
  end
203
203
 
204
204
  else
@@ -1,6 +1,6 @@
1
1
  class Step
2
2
 
3
- MAIN_RSYNC_ARGS="-avztAXHP"
3
+ MAIN_RSYNC_ARGS="-avztAXHP --copy-links"
4
4
 
5
5
  def self.link_job(path, target_dir, task = nil, workflow = nil)
6
6
  Path.setup(target_dir)
@@ -1,12 +1,8 @@
1
1
  require 'rbbt/util/R'
2
2
 
3
3
  module Workflow
4
- def self.trace(seed_jobs, options = {})
5
-
6
- jobs = []
7
- seed_jobs.each{|j| jobs << j; jobs += j.rec_dependencies}
8
-
9
- data = TSV.setup({}, "Job~Workflow,Task,Start,End#:type=:list")
4
+ def self.trace_job_times(jobs, fix_gap = false)
5
+ data = TSV.setup({}, "Job~Code,Workflow,Task,Start,End#:type=:list")
10
6
  min_start = nil
11
7
  max_done = nil
12
8
  jobs.each do |job|
@@ -14,10 +10,13 @@ module Workflow
14
10
  started = job.info[:started]
15
11
  ddone = job.info[:done]
16
12
 
17
- code = [job.workflow, job.task_name].compact.collect{|s| s.to_s} * "."
18
- code = code + '.' + job.name
13
+ started = Time.parse started if String === started
14
+ ddone = Time.parse ddone if String === ddone
15
+
16
+ code = [job.workflow, job.task_name].compact.collect{|s| s.to_s} * " · "
17
+ code = job.name + " - " + code
19
18
 
20
- data[code] = [job.workflow.to_s, job.task_name, started, ddone]
19
+ data[job.path] = [code,job.workflow.to_s, job.task_name, started, ddone]
21
20
  if min_start.nil?
22
21
  min_start = started
23
22
  else
@@ -39,7 +38,7 @@ module Workflow
39
38
  value["End"] - min_start
40
39
  end
41
40
 
42
- if options[:fix_gap]
41
+ if fix_gap
43
42
  ranges = []
44
43
  data.through do |k,values|
45
44
  start, eend = values.values_at "Start.second", "End.second"
@@ -67,115 +66,167 @@ module Workflow
67
66
  gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
68
67
  value - gap
69
68
  end
69
+
70
+ total_gaps = Misc.sum(gaps.collect{|k,v| v})
71
+ Log.info "Total gaps: #{total_gaps} seconds"
70
72
  end
71
73
 
74
+ start = data.column("Start.second").values.flatten.collect{|v| v.to_f}.min
75
+ eend = data.column("End.second").values.flatten.collect{|v| v.to_f}.max
76
+ total = eend - start unless eend.nil? || start.nil?
77
+ Log.info "Total time elapsed: #{total} seconds" if total
78
+
79
+ data
80
+ end
81
+
82
+ def self.plot_trace_job_times(data, plot, width=800, height=800)
83
+ data.R <<-EOF, [:svg]
84
+ rbbt.require('tidyverse')
85
+ rbbt.require('ggplot2')
86
+
87
+ names(data) <- make.names(names(data))
88
+ data$id = data$Code
89
+ data$content = data$Task
90
+ data$start = data$Start
91
+ data$end = data$End
92
+ data$Project = data$Workflow
93
+
94
+ tasks = data
95
+
96
+ #theme_gantt <- function(base_size=11, base_family="Source Sans Pro Light") {
97
+ theme_gantt <- function(base_size=11, base_family="Sans Serif") {
98
+ ret <- theme_bw(base_size, base_family) %+replace%
99
+ theme(panel.background = element_rect(fill="#ffffff", colour=NA),
100
+ axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
101
+ title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
102
+ panel.border = element_blank(), axis.line=element_blank(),
103
+ panel.grid.minor=element_blank(),
104
+ panel.grid.major.y = element_blank(),
105
+ panel.grid.major.x = element_line(size=0.5, colour="grey80"),
106
+ axis.ticks=element_blank(),
107
+ legend.position="bottom",
108
+ axis.title=element_text(size=rel(1.2), family="Source Sans Pro Semibold"),
109
+ strip.text=element_text(size=rel(1.5), family="Source Sans Pro Semibold"),
110
+ strip.background=element_rect(fill="#ffffff", colour=NA),
111
+ panel.spacing.y=unit(1.5, "lines"),
112
+ legend.key = element_blank())
113
+
114
+ ret
115
+ }
116
+
117
+ tasks.long <- tasks %>%
118
+ gather(date.type, task.date, -c(Code,Project, Task, id, Start.second, End.second)) %>%
119
+ arrange(date.type, task.date) %>%
120
+ mutate(id = factor(id, levels=rev(unique(id)), ordered=TRUE))
121
+
122
+ x.breaks <- seq(length(tasks$Task) + 0.5 - 3, 0, by=-3)
123
+
124
+ timeline <- ggplot(tasks.long, aes(y=id, yend=id, x=Start.second, xend=End.second, colour=Task)) +
125
+ geom_segment() +
126
+ geom_vline(xintercept=x.breaks, colour="grey80", linetype="dotted") +
127
+ guides(colour=guide_legend(title=NULL)) +
128
+ labs(x=NULL, y=NULL) +
129
+ theme_gantt() + theme(axis.text.x=element_text(angle=45, hjust=1))
130
+
131
+ rbbt.png_plot('#{plot}', 'plot(timeline)', width=#{width}, height=#{height}, pointsize=6)
132
+ EOF
133
+ end
134
+
135
+ def self.trace_job_summary(jobs, report_keys = [])
72
136
  tasks_info = {}
73
137
 
138
+ report_keys = report_keys.collect{|k| k.to_s}
139
+
74
140
  jobs.each do |dep|
75
141
  next unless dep.info[:done]
76
142
  task = [dep.workflow, dep.task_name].compact.collect{|s| s.to_s} * "#"
77
- info = tasks_info[task] ||= {}
143
+ info = tasks_info[task] ||= IndiferentHash.setup({})
144
+ dep_info = IndiferentHash.setup(dep.info)
145
+
146
+ ddone = dep_info[:done]
147
+ started = dep_info[:started]
78
148
 
79
- time = dep.info[:done] - dep.info[:started]
149
+ started = Time.parse started if String === started
150
+ ddone = Time.parse ddone if String === ddone
151
+
152
+ time = ddone - started
80
153
  info[:time] ||= []
81
154
  info[:time] << time
82
155
 
83
- cpus = nil
84
- spark = false
85
- shard = false
156
+ report_keys.each do |key|
157
+ info[key] = dep_info[key]
158
+ end
159
+
86
160
  dep.info[:config_keys].select do |kinfo|
87
161
  key, value, tokens = kinfo
88
- key = key.to_s
89
- cpus = value if key.include? 'cpu'
90
- spark = value if key == 'spark'
91
- shard = value if key == 'shard'
92
- end
93
162
 
94
- info[:cpus] = cpus || 1
95
- info[:spark] = spark
96
- info[:shard] = shard
163
+ info[key.to_s] = value if report_keys.include? key.to_s
164
+ end
97
165
  end
98
166
 
99
- stats = TSV.setup({}, "Task~Calls,Avg. Time,Total Time,Cpus,Spark,Shard#:type=:list")
167
+ summary = TSV.setup({}, "Task~Calls,Avg. Time,Total Time#:type=:list")
100
168
 
101
169
  tasks_info.each do |task, info|
102
- time_lists, cpus, spark, shard = info.values_at :time, :cpus, :spark, :shard
103
- avg_time = Misc.mean(time_lists)
104
- total_time = Misc.sum(time_lists)
170
+ time_lists = info[:time]
171
+ avg_time = Misc.mean(time_lists).to_i
172
+ total_time = Misc.sum(time_lists).to_i
105
173
  calls = time_lists.length
106
- stats[task] = [calls, avg_time, total_time, cpus, spark, shard]
174
+ summary[task] = [calls, avg_time, total_time]
107
175
  end
108
176
 
109
- raise "No jobs to process" if data.size == 0
177
+ report_keys.each do |key|
178
+ summary.add_field Misc.humanize(key) do |task|
179
+ tasks_info[task][key]
180
+ end
181
+ end if Array === report_keys && report_keys.any?
110
182
 
111
- start = data.column("Start.second").values.flatten.collect{|v| v.to_f}.min
112
- eend = data.column("End.second").values.flatten.collect{|v| v.to_f}.max
113
- total = eend - start
114
- Log.info "Total time elapsed: #{total} seconds"
183
+ summary
184
+ end
115
185
 
116
- if options[:fix_gap]
117
- total_gaps = Misc.sum(gaps.collect{|k,v| v})
118
- Log.info "Total gaps: #{total_gaps} seconds"
119
- end
186
+ def self.trace(seed_jobs, options = {})
187
+ jobs = []
188
+ seed_jobs.each do |step|
189
+ jobs += step.rec_dependencies + [step]
190
+ step.info[:archived_info].each do |path,ainfo|
191
+ archived_step = Step.new path
192
+
193
+ archived_step.define_singleton_method :info do
194
+ ainfo
195
+ end
196
+
197
+ #class << archived_step
198
+ # self
199
+ #end.define_method :info do
200
+ # ainfo
201
+ #end
202
+
203
+ jobs << archived_step
204
+ end if step.info[:archived_info]
120
205
 
121
- plot, width, height = options.values_at :plot, :width, :height
122
- if plot
123
- data.R <<-EOF, [:svg]
124
- rbbt.require('tidyverse')
125
- rbbt.require('ggplot2')
126
-
127
- names(data) <- make.names(names(data))
128
- data$id = rownames(data)
129
- data$content = data$Task
130
- data$start = data$Start
131
- data$end = data$End
132
- data$Project = data$Workflow
133
-
134
- tasks = data
135
-
136
- #theme_gantt <- function(base_size=11, base_family="Source Sans Pro Light") {
137
- theme_gantt <- function(base_size=11, base_family="Sans Serif") {
138
- ret <- theme_bw(base_size, base_family) %+replace%
139
- theme(panel.background = element_rect(fill="#ffffff", colour=NA),
140
- axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
141
- title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
142
- panel.border = element_blank(), axis.line=element_blank(),
143
- panel.grid.minor=element_blank(),
144
- panel.grid.major.y = element_blank(),
145
- panel.grid.major.x = element_line(size=0.5, colour="grey80"),
146
- axis.ticks=element_blank(),
147
- legend.position="bottom",
148
- axis.title=element_text(size=rel(1.2), family="Source Sans Pro Semibold"),
149
- strip.text=element_text(size=rel(1.5), family="Source Sans Pro Semibold"),
150
- strip.background=element_rect(fill="#ffffff", colour=NA),
151
- panel.spacing.y=unit(1.5, "lines"),
152
- legend.key = element_blank())
153
-
154
- ret
155
- }
156
-
157
- tasks.long <- tasks %>%
158
- gather(date.type, task.date, -c(Project, Task, id, Start.second, End.second)) %>%
159
- arrange(date.type, task.date) %>%
160
- mutate(id = factor(id, levels=rev(unique(id)), ordered=TRUE))
161
-
162
- x.breaks <- seq(length(tasks$Task) + 0.5 - 3, 0, by=-3)
163
-
164
- timeline <- ggplot(tasks.long, aes(y=id, yend=id, x=Start.second, xend=End.second, colour=Task)) +
165
- geom_segment() +
166
- geom_vline(xintercept=x.breaks, colour="grey80", linetype="dotted") +
167
- guides(colour=guide_legend(title=NULL)) +
168
- labs(x=NULL, y=NULL) +
169
- theme_gantt() + theme(axis.text.x=element_text(angle=45, hjust=1))
170
-
171
- rbbt.png_plot('#{plot}', 'plot(timeline)', width=#{width}, height=#{height}, pointsize=6)
172
- EOF
173
206
  end
174
207
 
208
+ jobs = jobs.uniq.sort_by{|job| t = job.info[:started] || Open.mtime(job.path) || Time.now; Time === t ? t : Time.parse(t) }
209
+
210
+ data = trace_job_times(jobs, options[:fix_gap])
211
+
212
+ report_keys = options[:report_keys] || ""
213
+ report_keys = report_keys.split(/,\s*/) if String === report_keys
214
+ summary = trace_job_summary(jobs, report_keys)
215
+
216
+ raise "No jobs to process" if data.size == 0
217
+
218
+ plot, size, width, height = options.values_at :plot, :size, :width, :height
219
+
220
+ size = 800 if size.nil?
221
+ width = size.to_i * 2 if width.nil?
222
+ height = size if height.nil?
223
+
224
+ plot_trace_job_times(data, plot, width, height) if plot
225
+
175
226
  if options[:plot_data]
176
227
  data
177
228
  else
178
- stats
229
+ summary
179
230
  end
180
231
 
181
232
  end