rbbt-util 5.31.14 → 5.32.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc/batch.rb +35 -3
- data/lib/rbbt/hpc/lsf.rb +7 -4
- data/lib/rbbt/hpc/orchestrate.rb +8 -1
- data/lib/rbbt/hpc/slurm.rb +7 -3
- data/lib/rbbt/persist/tsv/adapter.rb +1 -1
- data/lib/rbbt/resource.rb +9 -10
- data/lib/rbbt/tsv/csv.rb +5 -0
- data/lib/rbbt/tsv/parser.rb +2 -2
- data/lib/rbbt/tsv/stream.rb +1 -0
- data/lib/rbbt/util/misc/pipes.rb +1 -1
- data/lib/rbbt/workflow.rb +10 -6
- data/lib/rbbt/workflow/definition.rb +1 -0
- data/lib/rbbt/workflow/doc.rb +18 -3
- data/lib/rbbt/workflow/step.rb +4 -0
- data/lib/rbbt/workflow/step/accessor.rb +1 -0
- data/lib/rbbt/workflow/usage.rb +2 -2
- data/lib/rbbt/workflow/util/archive.rb +1 -1
- data/lib/rbbt/workflow/util/trace.rb +141 -90
- data/share/rbbt_commands/hpc/clean +1 -27
- data/share/rbbt_commands/hpc/list +4 -28
- data/share/rbbt_commands/hpc/orchestrate +1 -27
- data/share/rbbt_commands/hpc/tail +1 -27
- data/share/rbbt_commands/hpc/task +1 -27
- data/share/rbbt_commands/workflow/forget_deps +17 -5
- data/share/rbbt_commands/workflow/trace +9 -195
- data/share/rbbt_commands/workflow/write_info +17 -3
- data/test/rbbt/persist/tsv/test_tokyocabinet.rb +1 -1
- metadata +81 -80
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5623a5f5b4b78dc051b02bfaa9f7503f999bc58b7cc3569d8a58f9caead7b212
|
4
|
+
data.tar.gz: 195a44841df5ef636806413bf4bcfe1b36304d0b9e4178d3e787b1ee2e5d5a71
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9d9a436961cd66fd58bdd1b3908879b16f3e9f7a09d1d05a5d4e1980ad3c0a960a102895ca257bbd18298352c59f0448d21520c749bb652dc63c0042ef56d694
|
7
|
+
data.tar.gz: 9c9ec75fd411deb4849ed02834294da897a38ca2230a2af78e1bcb8722fe8ab1c1dd4eebe41e073270da02d60288363eb548f3bfdbf8cabd31256d5616ce9d09
|
data/lib/rbbt/hpc/batch.rb
CHANGED
@@ -6,6 +6,36 @@ module HPC
|
|
6
6
|
end
|
7
7
|
end
|
8
8
|
|
9
|
+
def self.batch_system(batch_system = 'auto')
|
10
|
+
case batch_system.to_s.downcase
|
11
|
+
when 'slurm'
|
12
|
+
HPC::SLURM
|
13
|
+
when 'lsf'
|
14
|
+
HPC::LSF
|
15
|
+
when 'auto'
|
16
|
+
case $previous_commands.last
|
17
|
+
when 'slurm'
|
18
|
+
HPC::SLURM
|
19
|
+
when 'lsf'
|
20
|
+
HPC::LSF
|
21
|
+
else
|
22
|
+
case Rbbt::Config.get(:batch_system, :batch, :batch_system, :hpc, :HPC, :BATCH).to_s.downcase
|
23
|
+
when 'slurm'
|
24
|
+
HPC::SLURM
|
25
|
+
when 'lsf'
|
26
|
+
HPC::LSF
|
27
|
+
else
|
28
|
+
case ENV["BATCH_SYSTEM"].to_s.downcase
|
29
|
+
when 'slurm'
|
30
|
+
HPC::SLURM
|
31
|
+
when 'lsf'
|
32
|
+
HPC::LSF
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
9
39
|
module TemplateGeneration
|
10
40
|
def exec_cmd(job, options = {})
|
11
41
|
env_cmd = Misc.process_options options, :env_cmd
|
@@ -30,12 +60,12 @@ module HPC
|
|
30
60
|
-B "/.singularity_ruby_inline":"#{contain}/.singularity_ruby_inline":rw
|
31
61
|
-B "#{options[:batch_dir]}" \
|
32
62
|
-B /scratch/tmp \
|
33
|
-
#{ group != user_group ? "-B /gpfs/projects/#{user_group}" : "" } \
|
63
|
+
#{ group != user_group ? "-B /gpfs/projects/#{user_group}" : "" } \
|
34
64
|
-B #{scratch_group_dir} \
|
35
65
|
-B #{projects_group_dir} \
|
36
66
|
-B /apps/ \
|
37
67
|
-B ~/git:"#{contain}/git":ro \
|
38
|
-
#{Open.exists?('~/.rbbt/software/opt/')? '-B ~/.rbbt/software/opt/:"/opt/":ro' : '' } \
|
68
|
+
#{Open.exists?('~/.rbbt/software/opt/')? '-B ~/.rbbt/software/opt/:"/opt/":ro' : '' } \
|
39
69
|
-B ~/.rbbt:"#{contain}/home/":ro)
|
40
70
|
end
|
41
71
|
|
@@ -74,7 +104,7 @@ module HPC
|
|
74
104
|
[name, dep.path] * "="
|
75
105
|
end * ","
|
76
106
|
|
77
|
-
options[:override_deps] = override_deps
|
107
|
+
options[:override_deps] = override_deps unless override_deps.empty?
|
78
108
|
end
|
79
109
|
|
80
110
|
# Save inputs into inputs_dir
|
@@ -216,6 +246,7 @@ EOF
|
|
216
246
|
:fexit => File.join(batch_dir, 'exit.status'),
|
217
247
|
:fsync => File.join(batch_dir, 'sync.log'),
|
218
248
|
:fsexit => File.join(batch_dir, 'sync.status'),
|
249
|
+
:fenv => File.join(batch_dir, 'env.vars'),
|
219
250
|
:fcmd => File.join(batch_dir, 'command.batch')
|
220
251
|
|
221
252
|
batch_options
|
@@ -441,6 +472,7 @@ exit $exit_status
|
|
441
472
|
|
442
473
|
# #{Log.color :green, "1. Prepare environment"}
|
443
474
|
#{prepare_environment}
|
475
|
+
env > #{batch_options[:fenv]}
|
444
476
|
|
445
477
|
# #{Log.color :green, "2. Execute"}
|
446
478
|
#{execute}
|
data/lib/rbbt/hpc/lsf.rb
CHANGED
@@ -7,10 +7,13 @@ module HPC
|
|
7
7
|
|
8
8
|
def self.batch_system_variables
|
9
9
|
<<-EOF
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
let TOTAL_PROCESORS="$(cat /proc/cpuinfo|grep ^processor |wc -l)"
|
11
|
+
let MAX_MEMORY_DEFAULT="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / ( (1024 * $TOTAL_PROCESORS) / $LSB_MAX_NUM_PROCESSORS )"
|
12
|
+
[ ! -z $LSB_MAX_MEM_RUSAGE ] && let MAX_MEMORY="$LSB_MAX_MEM_RUSAGE" || MAX_MEMORY="$MAX_MEMORY_DEFAULT"
|
13
|
+
export MAX_MEMORY_DEFAULT
|
14
|
+
export MAX_MEMORY
|
15
|
+
export BATCH_JOB_ID=$LSF_JOBID
|
16
|
+
export BATCH_SYSTEM=LSF
|
14
17
|
EOF
|
15
18
|
end
|
16
19
|
|
data/lib/rbbt/hpc/orchestrate.rb
CHANGED
@@ -145,6 +145,8 @@ module HPC
|
|
145
145
|
job_rules.delete :workflow
|
146
146
|
|
147
147
|
|
148
|
+
option_config_keys = options[:config_keys]
|
149
|
+
|
148
150
|
job_options = IndiferentHash.setup(options.merge(job_rules).merge(:batch_dependencies => dep_ids))
|
149
151
|
job_options.delete :orchestration_rules
|
150
152
|
|
@@ -154,6 +156,11 @@ module HPC
|
|
154
156
|
job_options[:config_keys] = job_options[:config_keys] ? config_keys + "," + job_options[:config_keys] : config_keys
|
155
157
|
end
|
156
158
|
|
159
|
+
if option_config_keys
|
160
|
+
option_config_keys = option_config_keys.gsub(/,\s+/,',')
|
161
|
+
job_options[:config_keys] = job_options[:config_keys] ? job_options[:config_keys] + "," + option_config_keys : option_config_keys
|
162
|
+
end
|
163
|
+
|
157
164
|
if options[:piggyback]
|
158
165
|
manifest = options[:piggyback].uniq
|
159
166
|
manifest += [job]
|
@@ -165,7 +172,7 @@ module HPC
|
|
165
172
|
new_config_keys = self.job_rules(rules, job)[:config_keys]
|
166
173
|
if new_config_keys
|
167
174
|
new_config_keys = new_config_keys.gsub(/,\s+/,',')
|
168
|
-
job_options[:config_keys] = job_options[:config_keys] ?
|
175
|
+
job_options[:config_keys] = job_options[:config_keys] ? job_options[:config_keys] + "," + new_config_keys : new_config_keys
|
169
176
|
end
|
170
177
|
|
171
178
|
job_options.delete :piggyback
|
data/lib/rbbt/hpc/slurm.rb
CHANGED
@@ -8,9 +8,13 @@ module HPC
|
|
8
8
|
|
9
9
|
def self.batch_system_variables
|
10
10
|
<<-EOF
|
11
|
-
let
|
12
|
-
|
13
|
-
|
11
|
+
let TOTAL_PROCESORS="$(cat /proc/cpuinfo|grep ^processor |wc -l)"
|
12
|
+
let MAX_MEMORY_DEFAULT="$(grep MemTotal /proc/meminfo|grep -o "[[:digit:]]*") / ( (1024 * $TOTAL_PROCESORS) / $SLURM_CPUS_PER_TASK )"
|
13
|
+
[ ! -z $SLURM_MEM_PER_CPU ] && let MAX_MEMORY="$SLURM_MEM_PER_CPU * $SLURM_CPUS_PER_TASK" || MAX_MEMORY="$MAX_MEMORY_DEFAULT"
|
14
|
+
export MAX_MEMORY_DEFAULT
|
15
|
+
export MAX_MEMORY
|
16
|
+
export BATCH_JOB_ID=$SLURM_JOB_ID
|
17
|
+
export BATCH_SYSTEM=SLURM
|
14
18
|
EOF
|
15
19
|
end
|
16
20
|
|
data/lib/rbbt/resource.rb
CHANGED
@@ -6,6 +6,7 @@ require 'set'
|
|
6
6
|
|
7
7
|
|
8
8
|
module Resource
|
9
|
+
class ResourceNotFound < RbbtException; end
|
9
10
|
|
10
11
|
class << self
|
11
12
|
attr_accessor :lock_dir
|
@@ -154,16 +155,14 @@ module Resource
|
|
154
155
|
rake_dir, content = rake_for(path)
|
155
156
|
rake_dir = Path.setup(rake_dir.dup, self.pkgdir, self)
|
156
157
|
else
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
produce(path.annotate(path + '.bgz'), force)
|
163
|
-
end
|
158
|
+
if path !~ /\.(gz|bgz)$/
|
159
|
+
begin
|
160
|
+
produce(path.annotate(path + '.gz'), force)
|
161
|
+
rescue ResourceNotFound
|
162
|
+
produce(path.annotate(path + '.bgz'), force)
|
164
163
|
end
|
165
|
-
|
166
|
-
raise "Resource is missing and does not seem to be claimed: #{ self } -- #{ path } "
|
164
|
+
else
|
165
|
+
raise ResourceNotFound, "Resource is missing and does not seem to be claimed: #{ self } -- #{ path } "
|
167
166
|
end
|
168
167
|
end
|
169
168
|
|
@@ -174,7 +173,7 @@ module Resource
|
|
174
173
|
end
|
175
174
|
|
176
175
|
if type and not File.exist?(final_path) or force
|
177
|
-
Log.medium "Producing: #{ final_path }"
|
176
|
+
Log.medium "Producing: (#{self.to_s}) #{ final_path }"
|
178
177
|
lock_filename = Persist.persistence_path(final_path, {:dir => Resource.lock_dir})
|
179
178
|
|
180
179
|
Misc.lock lock_filename do
|
data/lib/rbbt/tsv/csv.rb
CHANGED
@@ -8,6 +8,7 @@ module TSV
|
|
8
8
|
noheaders = ! headers
|
9
9
|
|
10
10
|
type = options.delete :type
|
11
|
+
cast = options.delete :cast
|
11
12
|
merge = options.delete :merge
|
12
13
|
key_field = options.delete :key_field
|
13
14
|
fields = options.delete :fields
|
@@ -46,6 +47,10 @@ module TSV
|
|
46
47
|
else
|
47
48
|
key, *values = row
|
48
49
|
end
|
50
|
+
|
51
|
+
if cast
|
52
|
+
values = values.collect{|v| v.send cast }
|
53
|
+
end
|
49
54
|
|
50
55
|
case type
|
51
56
|
when :double, :flat
|
data/lib/rbbt/tsv/parser.rb
CHANGED
@@ -40,7 +40,7 @@ module TSV
|
|
40
40
|
# Process fields line
|
41
41
|
|
42
42
|
preamble << line if line
|
43
|
-
while line
|
43
|
+
while line && (TrueClass === @header_hash || (String === @header_hash && Misc.fixutf8(line) =~ /^#{@header_hash}/ ))
|
44
44
|
@fields = line.split(@sep, -1)
|
45
45
|
@key_field = @fields.shift
|
46
46
|
@key_field = @key_field[(0 + header_hash.length)..-1] if String === @header_hash
|
@@ -49,7 +49,7 @@ module TSV
|
|
49
49
|
line = (@header_hash != "" ? stream.gets : nil)
|
50
50
|
line = Misc.fixutf8 line.chomp if line
|
51
51
|
preamble << line if line
|
52
|
-
@header_hash = false if TrueClass === @header_hash
|
52
|
+
@header_hash = false if TrueClass === @header_hash || @header_hash == ""
|
53
53
|
end
|
54
54
|
|
55
55
|
@preamble = preamble[0..-3] * "\n"
|
data/lib/rbbt/tsv/stream.rb
CHANGED
data/lib/rbbt/util/misc/pipes.rb
CHANGED
data/lib/rbbt/workflow.rb
CHANGED
@@ -187,17 +187,21 @@ module Workflow
|
|
187
187
|
clean_name = wf_name.sub(/::.*/,'')
|
188
188
|
Log.info{"Looking for '#{wf_name}' in '#{clean_name}'"}
|
189
189
|
require_workflow clean_name
|
190
|
-
|
190
|
+
workflow = Misc.string2const Misc.camel_case(wf_name)
|
191
|
+
workflow.load_documentation
|
192
|
+
return workflow
|
191
193
|
end
|
192
194
|
|
193
195
|
Log.high{"Loading workflow #{wf_name}"}
|
194
196
|
require_local_workflow(wf_name) or
|
195
197
|
(Workflow.autoinstall and `rbbt workflow install #{Misc.snake_case(wf_name)} || rbbt workflow install #{wf_name}` and require_local_workflow(wf_name)) or raise("Workflow not found or could not be loaded: #{ wf_name }")
|
196
|
-
begin
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
198
|
+
workflow = begin
|
199
|
+
Misc.string2const Misc.camel_case(wf_name)
|
200
|
+
rescue
|
201
|
+
Workflow.workflows.last || true
|
202
|
+
end
|
203
|
+
workflow.load_documentation
|
204
|
+
workflow
|
201
205
|
end
|
202
206
|
|
203
207
|
attr_accessor :description
|
@@ -74,6 +74,7 @@ module Workflow
|
|
74
74
|
def dep_task(name, workflow, oname, *rest, &block)
|
75
75
|
dep(workflow, oname, *rest, &block)
|
76
76
|
extension workflow.tasks[oname].extension if workflow.tasks.include?(oname) unless @extension
|
77
|
+
returns workflow.tasks[oname].result_description if workflow.tasks.include?(oname) unless @result_description
|
77
78
|
task name do
|
78
79
|
raise RbbtException, "dependency not found in dep_task" if dependencies.empty?
|
79
80
|
dep = dependencies.last
|
data/lib/rbbt/workflow/doc.rb
CHANGED
@@ -45,10 +45,25 @@ module Workflow
|
|
45
45
|
end
|
46
46
|
|
47
47
|
def load_documentation
|
48
|
-
@documentation
|
48
|
+
return if @documentation
|
49
|
+
@documentation ||= Workflow.parse_workflow_doc documentation_markdown
|
49
50
|
@documentation[:tasks].each do |task, description|
|
50
|
-
|
51
|
-
|
51
|
+
if task.include? "#"
|
52
|
+
workflow, task = task.split("#")
|
53
|
+
workflow = begin
|
54
|
+
Kernel.const_get workflow
|
55
|
+
rescue
|
56
|
+
next
|
57
|
+
end
|
58
|
+
else
|
59
|
+
workflow = self
|
60
|
+
end
|
61
|
+
|
62
|
+
if workflow.tasks.include? task.to_sym
|
63
|
+
workflow.tasks[task.to_sym].description = description
|
64
|
+
else
|
65
|
+
Log.low "Documentation for #{ task }, but not a #{ workflow.to_s } task"
|
66
|
+
end
|
52
67
|
end
|
53
68
|
end
|
54
69
|
|
data/lib/rbbt/workflow/step.rb
CHANGED
@@ -458,6 +458,10 @@ class Step
|
|
458
458
|
end
|
459
459
|
|
460
460
|
def clean
|
461
|
+
if ! Open.exists?(info_file)
|
462
|
+
Log.high "Refusing to clean step with no .info file: #{path}"
|
463
|
+
return self
|
464
|
+
end
|
461
465
|
status = []
|
462
466
|
status << "dirty" if done? && dirty?
|
463
467
|
status << "not running" if ! done? && ! running?
|
@@ -97,6 +97,7 @@ class Step
|
|
97
97
|
Open.ln_s(value.path, path)
|
98
98
|
when type.to_s == "file"
|
99
99
|
if String === value && File.exists?(value)
|
100
|
+
value = File.expand_path(value)
|
100
101
|
Open.ln_s(value, path)
|
101
102
|
else
|
102
103
|
value = value.collect{|v| v = "#{v}" if Path === v; v }if Array === value
|
data/lib/rbbt/workflow/usage.rb
CHANGED
@@ -122,7 +122,7 @@ module Workflow
|
|
122
122
|
last = _prov_tasks(workflow.dep_tree(task_name))
|
123
123
|
|
124
124
|
if child
|
125
|
-
description << "->" << task_name.to_s
|
125
|
+
description << "-> " << task_name.to_s
|
126
126
|
elsif first
|
127
127
|
description << "" << task_name.to_s
|
128
128
|
else
|
@@ -198,7 +198,7 @@ module Workflow
|
|
198
198
|
puts Misc.format_definition_list_item(name.to_s, description, Log.terminal_width, 20, :yellow)
|
199
199
|
|
200
200
|
prov_string = prov_string(dep_tree(name))
|
201
|
-
puts Log.color
|
201
|
+
puts Misc.format_paragraph Log.color(:blue, "-> " + prov_string) if prov_string && ! prov_string.empty?
|
202
202
|
end
|
203
203
|
|
204
204
|
else
|
@@ -1,12 +1,8 @@
|
|
1
1
|
require 'rbbt/util/R'
|
2
2
|
|
3
3
|
module Workflow
|
4
|
-
def self.
|
5
|
-
|
6
|
-
jobs = []
|
7
|
-
seed_jobs.each{|j| jobs << j; jobs += j.rec_dependencies}
|
8
|
-
|
9
|
-
data = TSV.setup({}, "Job~Workflow,Task,Start,End#:type=:list")
|
4
|
+
def self.trace_job_times(jobs, fix_gap = false)
|
5
|
+
data = TSV.setup({}, "Job~Code,Workflow,Task,Start,End#:type=:list")
|
10
6
|
min_start = nil
|
11
7
|
max_done = nil
|
12
8
|
jobs.each do |job|
|
@@ -14,10 +10,13 @@ module Workflow
|
|
14
10
|
started = job.info[:started]
|
15
11
|
ddone = job.info[:done]
|
16
12
|
|
17
|
-
|
18
|
-
|
13
|
+
started = Time.parse started if String === started
|
14
|
+
ddone = Time.parse ddone if String === ddone
|
15
|
+
|
16
|
+
code = [job.workflow, job.task_name].compact.collect{|s| s.to_s} * " · "
|
17
|
+
code = job.name + " - " + code
|
19
18
|
|
20
|
-
data[
|
19
|
+
data[job.path] = [code,job.workflow.to_s, job.task_name, started, ddone]
|
21
20
|
if min_start.nil?
|
22
21
|
min_start = started
|
23
22
|
else
|
@@ -39,7 +38,7 @@ module Workflow
|
|
39
38
|
value["End"] - min_start
|
40
39
|
end
|
41
40
|
|
42
|
-
if
|
41
|
+
if fix_gap
|
43
42
|
ranges = []
|
44
43
|
data.through do |k,values|
|
45
44
|
start, eend = values.values_at "Start.second", "End.second"
|
@@ -67,115 +66,167 @@ module Workflow
|
|
67
66
|
gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
|
68
67
|
value - gap
|
69
68
|
end
|
69
|
+
|
70
|
+
total_gaps = Misc.sum(gaps.collect{|k,v| v})
|
71
|
+
Log.info "Total gaps: #{total_gaps} seconds"
|
70
72
|
end
|
71
73
|
|
74
|
+
start = data.column("Start.second").values.flatten.collect{|v| v.to_f}.min
|
75
|
+
eend = data.column("End.second").values.flatten.collect{|v| v.to_f}.max
|
76
|
+
total = eend - start unless eend.nil? || start.nil?
|
77
|
+
Log.info "Total time elapsed: #{total} seconds" if total
|
78
|
+
|
79
|
+
data
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.plot_trace_job_times(data, plot, width=800, height=800)
|
83
|
+
data.R <<-EOF, [:svg]
|
84
|
+
rbbt.require('tidyverse')
|
85
|
+
rbbt.require('ggplot2')
|
86
|
+
|
87
|
+
names(data) <- make.names(names(data))
|
88
|
+
data$id = data$Code
|
89
|
+
data$content = data$Task
|
90
|
+
data$start = data$Start
|
91
|
+
data$end = data$End
|
92
|
+
data$Project = data$Workflow
|
93
|
+
|
94
|
+
tasks = data
|
95
|
+
|
96
|
+
#theme_gantt <- function(base_size=11, base_family="Source Sans Pro Light") {
|
97
|
+
theme_gantt <- function(base_size=11, base_family="Sans Serif") {
|
98
|
+
ret <- theme_bw(base_size, base_family) %+replace%
|
99
|
+
theme(panel.background = element_rect(fill="#ffffff", colour=NA),
|
100
|
+
axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
|
101
|
+
title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
|
102
|
+
panel.border = element_blank(), axis.line=element_blank(),
|
103
|
+
panel.grid.minor=element_blank(),
|
104
|
+
panel.grid.major.y = element_blank(),
|
105
|
+
panel.grid.major.x = element_line(size=0.5, colour="grey80"),
|
106
|
+
axis.ticks=element_blank(),
|
107
|
+
legend.position="bottom",
|
108
|
+
axis.title=element_text(size=rel(1.2), family="Source Sans Pro Semibold"),
|
109
|
+
strip.text=element_text(size=rel(1.5), family="Source Sans Pro Semibold"),
|
110
|
+
strip.background=element_rect(fill="#ffffff", colour=NA),
|
111
|
+
panel.spacing.y=unit(1.5, "lines"),
|
112
|
+
legend.key = element_blank())
|
113
|
+
|
114
|
+
ret
|
115
|
+
}
|
116
|
+
|
117
|
+
tasks.long <- tasks %>%
|
118
|
+
gather(date.type, task.date, -c(Code,Project, Task, id, Start.second, End.second)) %>%
|
119
|
+
arrange(date.type, task.date) %>%
|
120
|
+
mutate(id = factor(id, levels=rev(unique(id)), ordered=TRUE))
|
121
|
+
|
122
|
+
x.breaks <- seq(length(tasks$Task) + 0.5 - 3, 0, by=-3)
|
123
|
+
|
124
|
+
timeline <- ggplot(tasks.long, aes(y=id, yend=id, x=Start.second, xend=End.second, colour=Task)) +
|
125
|
+
geom_segment() +
|
126
|
+
geom_vline(xintercept=x.breaks, colour="grey80", linetype="dotted") +
|
127
|
+
guides(colour=guide_legend(title=NULL)) +
|
128
|
+
labs(x=NULL, y=NULL) +
|
129
|
+
theme_gantt() + theme(axis.text.x=element_text(angle=45, hjust=1))
|
130
|
+
|
131
|
+
rbbt.png_plot('#{plot}', 'plot(timeline)', width=#{width}, height=#{height}, pointsize=6)
|
132
|
+
EOF
|
133
|
+
end
|
134
|
+
|
135
|
+
def self.trace_job_summary(jobs, report_keys = [])
|
72
136
|
tasks_info = {}
|
73
137
|
|
138
|
+
report_keys = report_keys.collect{|k| k.to_s}
|
139
|
+
|
74
140
|
jobs.each do |dep|
|
75
141
|
next unless dep.info[:done]
|
76
142
|
task = [dep.workflow, dep.task_name].compact.collect{|s| s.to_s} * "#"
|
77
|
-
info = tasks_info[task] ||= {}
|
143
|
+
info = tasks_info[task] ||= IndiferentHash.setup({})
|
144
|
+
dep_info = IndiferentHash.setup(dep.info)
|
145
|
+
|
146
|
+
ddone = dep_info[:done]
|
147
|
+
started = dep_info[:started]
|
78
148
|
|
79
|
-
|
149
|
+
started = Time.parse started if String === started
|
150
|
+
ddone = Time.parse ddone if String === ddone
|
151
|
+
|
152
|
+
time = ddone - started
|
80
153
|
info[:time] ||= []
|
81
154
|
info[:time] << time
|
82
155
|
|
83
|
-
|
84
|
-
|
85
|
-
|
156
|
+
report_keys.each do |key|
|
157
|
+
info[key] = dep_info[key]
|
158
|
+
end
|
159
|
+
|
86
160
|
dep.info[:config_keys].select do |kinfo|
|
87
161
|
key, value, tokens = kinfo
|
88
|
-
key = key.to_s
|
89
|
-
cpus = value if key.include? 'cpu'
|
90
|
-
spark = value if key == 'spark'
|
91
|
-
shard = value if key == 'shard'
|
92
|
-
end
|
93
162
|
|
94
|
-
|
95
|
-
|
96
|
-
info[:shard] = shard
|
163
|
+
info[key.to_s] = value if report_keys.include? key.to_s
|
164
|
+
end
|
97
165
|
end
|
98
166
|
|
99
|
-
|
167
|
+
summary = TSV.setup({}, "Task~Calls,Avg. Time,Total Time#:type=:list")
|
100
168
|
|
101
169
|
tasks_info.each do |task, info|
|
102
|
-
time_lists
|
103
|
-
avg_time = Misc.mean(time_lists)
|
104
|
-
total_time = Misc.sum(time_lists)
|
170
|
+
time_lists = info[:time]
|
171
|
+
avg_time = Misc.mean(time_lists).to_i
|
172
|
+
total_time = Misc.sum(time_lists).to_i
|
105
173
|
calls = time_lists.length
|
106
|
-
|
174
|
+
summary[task] = [calls, avg_time, total_time]
|
107
175
|
end
|
108
176
|
|
109
|
-
|
177
|
+
report_keys.each do |key|
|
178
|
+
summary.add_field Misc.humanize(key) do |task|
|
179
|
+
tasks_info[task][key]
|
180
|
+
end
|
181
|
+
end if Array === report_keys && report_keys.any?
|
110
182
|
|
111
|
-
|
112
|
-
|
113
|
-
total = eend - start
|
114
|
-
Log.info "Total time elapsed: #{total} seconds"
|
183
|
+
summary
|
184
|
+
end
|
115
185
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
186
|
+
def self.trace(seed_jobs, options = {})
|
187
|
+
jobs = []
|
188
|
+
seed_jobs.each do |step|
|
189
|
+
jobs += step.rec_dependencies + [step]
|
190
|
+
step.info[:archived_info].each do |path,ainfo|
|
191
|
+
archived_step = Step.new path
|
192
|
+
|
193
|
+
archived_step.define_singleton_method :info do
|
194
|
+
ainfo
|
195
|
+
end
|
196
|
+
|
197
|
+
#class << archived_step
|
198
|
+
# self
|
199
|
+
#end.define_method :info do
|
200
|
+
# ainfo
|
201
|
+
#end
|
202
|
+
|
203
|
+
jobs << archived_step
|
204
|
+
end if step.info[:archived_info]
|
120
205
|
|
121
|
-
plot, width, height = options.values_at :plot, :width, :height
|
122
|
-
if plot
|
123
|
-
data.R <<-EOF, [:svg]
|
124
|
-
rbbt.require('tidyverse')
|
125
|
-
rbbt.require('ggplot2')
|
126
|
-
|
127
|
-
names(data) <- make.names(names(data))
|
128
|
-
data$id = rownames(data)
|
129
|
-
data$content = data$Task
|
130
|
-
data$start = data$Start
|
131
|
-
data$end = data$End
|
132
|
-
data$Project = data$Workflow
|
133
|
-
|
134
|
-
tasks = data
|
135
|
-
|
136
|
-
#theme_gantt <- function(base_size=11, base_family="Source Sans Pro Light") {
|
137
|
-
theme_gantt <- function(base_size=11, base_family="Sans Serif") {
|
138
|
-
ret <- theme_bw(base_size, base_family) %+replace%
|
139
|
-
theme(panel.background = element_rect(fill="#ffffff", colour=NA),
|
140
|
-
axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
|
141
|
-
title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
|
142
|
-
panel.border = element_blank(), axis.line=element_blank(),
|
143
|
-
panel.grid.minor=element_blank(),
|
144
|
-
panel.grid.major.y = element_blank(),
|
145
|
-
panel.grid.major.x = element_line(size=0.5, colour="grey80"),
|
146
|
-
axis.ticks=element_blank(),
|
147
|
-
legend.position="bottom",
|
148
|
-
axis.title=element_text(size=rel(1.2), family="Source Sans Pro Semibold"),
|
149
|
-
strip.text=element_text(size=rel(1.5), family="Source Sans Pro Semibold"),
|
150
|
-
strip.background=element_rect(fill="#ffffff", colour=NA),
|
151
|
-
panel.spacing.y=unit(1.5, "lines"),
|
152
|
-
legend.key = element_blank())
|
153
|
-
|
154
|
-
ret
|
155
|
-
}
|
156
|
-
|
157
|
-
tasks.long <- tasks %>%
|
158
|
-
gather(date.type, task.date, -c(Project, Task, id, Start.second, End.second)) %>%
|
159
|
-
arrange(date.type, task.date) %>%
|
160
|
-
mutate(id = factor(id, levels=rev(unique(id)), ordered=TRUE))
|
161
|
-
|
162
|
-
x.breaks <- seq(length(tasks$Task) + 0.5 - 3, 0, by=-3)
|
163
|
-
|
164
|
-
timeline <- ggplot(tasks.long, aes(y=id, yend=id, x=Start.second, xend=End.second, colour=Task)) +
|
165
|
-
geom_segment() +
|
166
|
-
geom_vline(xintercept=x.breaks, colour="grey80", linetype="dotted") +
|
167
|
-
guides(colour=guide_legend(title=NULL)) +
|
168
|
-
labs(x=NULL, y=NULL) +
|
169
|
-
theme_gantt() + theme(axis.text.x=element_text(angle=45, hjust=1))
|
170
|
-
|
171
|
-
rbbt.png_plot('#{plot}', 'plot(timeline)', width=#{width}, height=#{height}, pointsize=6)
|
172
|
-
EOF
|
173
206
|
end
|
174
207
|
|
208
|
+
jobs = jobs.uniq.sort_by{|job| t = job.info[:started] || Open.mtime(job.path) || Time.now; Time === t ? t : Time.parse(t) }
|
209
|
+
|
210
|
+
data = trace_job_times(jobs, options[:fix_gap])
|
211
|
+
|
212
|
+
report_keys = options[:report_keys] || ""
|
213
|
+
report_keys = report_keys.split(/,\s*/) if String === report_keys
|
214
|
+
summary = trace_job_summary(jobs, report_keys)
|
215
|
+
|
216
|
+
raise "No jobs to process" if data.size == 0
|
217
|
+
|
218
|
+
plot, size, width, height = options.values_at :plot, :size, :width, :height
|
219
|
+
|
220
|
+
size = 800 if size.nil?
|
221
|
+
width = size.to_i * 2 if width.nil?
|
222
|
+
height = size if height.nil?
|
223
|
+
|
224
|
+
plot_trace_job_times(data, plot, width, height) if plot
|
225
|
+
|
175
226
|
if options[:plot_data]
|
176
227
|
data
|
177
228
|
else
|
178
|
-
|
229
|
+
summary
|
179
230
|
end
|
180
231
|
|
181
232
|
end
|