rbbt-util 5.28.2 → 5.28.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/hpc.rb +5 -2
- data/lib/rbbt/persist.rb +1 -0
- data/lib/rbbt/resource/path.rb +3 -1
- data/lib/rbbt/tsv/accessor.rb +10 -2
- data/lib/rbbt/tsv/change_id.rb +2 -2
- data/lib/rbbt/tsv/dumper.rb +10 -2
- data/lib/rbbt/tsv/parallel/traverse.rb +3 -0
- data/lib/rbbt/tsv/util.rb +25 -13
- data/lib/rbbt/util/config.rb +2 -1
- data/lib/rbbt/util/misc/inspect.rb +1 -1
- data/lib/rbbt/util/misc/system.rb +1 -1
- data/lib/rbbt/util/open.rb +1 -1
- data/lib/rbbt/workflow.rb +1 -0
- data/lib/rbbt/workflow/accessor.rb +94 -93
- data/lib/rbbt/workflow/definition.rb +6 -3
- data/lib/rbbt/workflow/integration/cromwell.rb +10 -3
- data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +5 -1
- data/lib/rbbt/workflow/step.rb +17 -5
- data/lib/rbbt/workflow/step/accessor.rb +9 -6
- data/lib/rbbt/workflow/usage.rb +1 -1
- data/lib/rbbt/workflow/util/archive.rb +1 -0
- data/lib/rbbt/workflow/util/orchestrator.rb +190 -0
- data/lib/rbbt/workflow/util/trace.rb +182 -0
- data/share/rbbt_commands/app/start +2 -2
- data/share/rbbt_commands/purge_job +2 -4
- data/share/rbbt_commands/system/status +1 -1
- data/share/rbbt_commands/workflow/forget_deps +1 -3
- data/share/rbbt_commands/workflow/server +2 -0
- data/test/rbbt/tsv/parallel/test_traverse.rb +14 -0
- data/test/rbbt/tsv/test_manipulate.rb +20 -0
- data/test/rbbt/workflow/test_schedule.rb +0 -0
- data/test/rbbt/workflow/util/test_orchestrator.rb +136 -0
- metadata +8 -3
- data/lib/rbbt/workflow/schedule.rb +0 -238
@@ -81,9 +81,10 @@ module Workflow
|
|
81
81
|
if forget
|
82
82
|
remove = config :remove_dep_tasks, :remove_dep_tasks, :default => REMOVE_DEP_TASKS
|
83
83
|
self.archive_deps
|
84
|
+
self.copy_files_dir
|
84
85
|
self.dependencies = self.dependencies - [dep]
|
85
86
|
Open.rm_rf self.files_dir if Open.exist? self.files_dir
|
86
|
-
FileUtils.cp_r dep.files_dir, self.files_dir if Open.exist?
|
87
|
+
FileUtils.cp_r dep.files_dir, self.files_dir if Open.exist?(dep.files_dir)
|
87
88
|
Open.ln_h dep.path, self.tmp_path
|
88
89
|
case remove.to_s
|
89
90
|
when 'true'
|
@@ -92,8 +93,10 @@ module Workflow
|
|
92
93
|
dep.recursive_clean
|
93
94
|
end
|
94
95
|
else
|
95
|
-
Open.
|
96
|
-
|
96
|
+
if Open.exists?(dep.files_dir)
|
97
|
+
Open.rm_rf self.files_dir
|
98
|
+
Open.link dep.files_dir, self.files_dir
|
99
|
+
end
|
97
100
|
Open.link dep.path, self.path
|
98
101
|
end
|
99
102
|
nil
|
@@ -1,13 +1,20 @@
|
|
1
|
-
module
|
1
|
+
module Cromwell
|
2
2
|
|
3
3
|
Rbbt.claim Rbbt.software.opt.jar["cromwell.jar"], :url, "https://github.com/broadinstitute/cromwell/releases/download/48/cromwell-48.jar"
|
4
4
|
Rbbt.claim Rbbt.software.opt.jar["wdltool.jar"], :url, "https://github.com/broadinstitute/wdltool/releases/download/0.14/wdltool-0.14.jar"
|
5
5
|
|
6
|
-
def run_cromwell(file, work_dir, options = {})
|
6
|
+
def self.run_cromwell(file, work_dir, options = {})
|
7
|
+
cromwell_inputs_file = Misc.process_options options, :cromwell_inputs_file
|
7
8
|
jar = Rbbt.software.opt.jar["cromwell.jar"].produce.find
|
8
|
-
|
9
|
+
if cromwell_inputs_file
|
10
|
+
CMD.cmd_log("java -jar '#{jar}' run '#{file}' --workflow-root='#{work_dir}' -i #{cromwell_inputs_file}", options.merge("add_option_dashes" => true))
|
11
|
+
else
|
12
|
+
CMD.cmd_log("java -jar '#{jar}' run '#{file}' --workflow-root='#{work_dir}'", options.merge("add_option_dashes" => true))
|
13
|
+
end
|
9
14
|
end
|
15
|
+
end
|
10
16
|
|
17
|
+
module Workflow
|
11
18
|
def load_cromwell(file)
|
12
19
|
jar = Rbbt.software.opt.jar["wdltool.jar"].produce.find
|
13
20
|
inputs = JSON.load(CMD.cmd("java -jar '#{jar}' inputs '#{file}'"))
|
@@ -85,7 +85,11 @@ class RemoteWorkflow
|
|
85
85
|
|
86
86
|
RemoteWorkflow::REST.__prepare_inputs_for_restclient(params)
|
87
87
|
name = RemoteWorkflow.capture_exception do
|
88
|
-
|
88
|
+
begin
|
89
|
+
RestClient.post(self.encode(url), params)
|
90
|
+
rescue RestClient::MovedPermanently, RestClient::Found, RestClient::TemporaryRedirect
|
91
|
+
raise RbbtException, "REST end-point moved to: #{$!.response.headers[:location]}"
|
92
|
+
end
|
89
93
|
end
|
90
94
|
|
91
95
|
Log.debug{ "RestClient jobname returned for #{ url } - #{Misc.fingerprint params}: #{name}" }
|
data/lib/rbbt/workflow/step.rb
CHANGED
@@ -84,6 +84,7 @@ class Step
|
|
84
84
|
end
|
85
85
|
|
86
86
|
def load_dependencies_from_info
|
87
|
+
relocated = nil
|
87
88
|
@dependencies = (self.info[:dependencies] || []).collect do |task,name,dep_path|
|
88
89
|
if Open.exists?(dep_path) || Open.exists?(dep_path + '.info')
|
89
90
|
Workflow._load_step dep_path
|
@@ -108,6 +109,14 @@ class Step
|
|
108
109
|
@inputs || []
|
109
110
|
end
|
110
111
|
|
112
|
+
def copy_files_dir
|
113
|
+
if File.symlink?(self.files_dir)
|
114
|
+
realpath = Open.realpath(self.files_dir)
|
115
|
+
Open.rm self.files_dir
|
116
|
+
Open.cp realpath, self.files_dir
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
111
120
|
def archive_deps
|
112
121
|
self.set_info :archived_info, archived_info
|
113
122
|
self.set_info :archived_dependencies, info[:dependencies]
|
@@ -411,7 +420,7 @@ class Step
|
|
411
420
|
return
|
412
421
|
end
|
413
422
|
|
414
|
-
if (Open.exists?(path) or Open.broken_link?(path)) or Open.exists?(pid_file) or Open.exists?(info_file) or Open.exists?(files_dir)
|
423
|
+
if (Open.exists?(path) or Open.broken_link?(path)) or Open.exists?(pid_file) or Open.exists?(info_file) or Open.exists?(files_dir) or Open.broken_link?(files_dir)
|
415
424
|
|
416
425
|
@result = nil
|
417
426
|
@pid = nil
|
@@ -419,8 +428,8 @@ class Step
|
|
419
428
|
Misc.insist do
|
420
429
|
Open.rm info_file if Open.exists?(info_file)
|
421
430
|
Open.rm md5_file if Open.exists?(md5_file)
|
422
|
-
Open.rm path if (Open.exists?(path)
|
423
|
-
Open.rm_rf files_dir if Open.exists?(files_dir)
|
431
|
+
Open.rm path if (Open.exists?(path) || Open.broken_link?(path))
|
432
|
+
Open.rm_rf files_dir if Open.exists?(files_dir) || Open.broken_link?(files_dir)
|
424
433
|
Open.rm pid_file if Open.exists?(pid_file)
|
425
434
|
Open.rm tmp_path if Open.exists?(tmp_path)
|
426
435
|
end
|
@@ -454,12 +463,15 @@ class Step
|
|
454
463
|
return [] if dependencies.nil? or dependencies.empty?
|
455
464
|
|
456
465
|
new_dependencies = []
|
466
|
+
archived_deps = self.info[:archived_info] ? self.info[:archived_info].keys : []
|
467
|
+
|
457
468
|
dependencies.each{|step|
|
458
469
|
#next if self.done? && Open.exists?(info_file) && info[:dependencies] && info[:dependencies].select{|task,name,path| path == step.path }.empty?
|
459
|
-
next if
|
470
|
+
next if archived_deps.include? step.path
|
471
|
+
next if seen.include? step.path
|
460
472
|
next if self.done? && need_run && ! updatable?
|
461
473
|
|
462
|
-
r = step.rec_dependencies(need_run, new_dependencies)
|
474
|
+
r = step.rec_dependencies(need_run, new_dependencies.collect{|d| d.path})
|
463
475
|
new_dependencies.concat r
|
464
476
|
new_dependencies << step
|
465
477
|
}
|
@@ -93,8 +93,8 @@ class Step
|
|
93
93
|
else
|
94
94
|
Open.write(path + '.read', value.to_s)
|
95
95
|
end
|
96
|
-
when Step ===
|
97
|
-
|
96
|
+
when Step === value
|
97
|
+
value = value.produce.load
|
98
98
|
else
|
99
99
|
Open.write(path, value.to_s)
|
100
100
|
end
|
@@ -110,12 +110,14 @@ class Step
|
|
110
110
|
task_info = workflow.task_info(task_name)
|
111
111
|
input_types = task_info[:input_types]
|
112
112
|
task_inputs = task_info[:inputs]
|
113
|
+
input_defaults = task_info[:input_defaults]
|
113
114
|
|
114
115
|
inputs = {}
|
115
116
|
job.recursive_inputs.zip(job.recursive_inputs.fields).each do |value,name|
|
116
117
|
next unless task_inputs.include? name.to_sym
|
117
118
|
next if options and ! options.include?(name)
|
118
119
|
next if value.nil?
|
120
|
+
next if input_defaults[name] == value
|
119
121
|
inputs[name] = value
|
120
122
|
end
|
121
123
|
|
@@ -125,7 +127,7 @@ class Step
|
|
125
127
|
end
|
126
128
|
save_inputs(inputs, input_types, dir)
|
127
129
|
|
128
|
-
inputs.
|
130
|
+
inputs.keys
|
129
131
|
end
|
130
132
|
|
131
133
|
def name
|
@@ -437,11 +439,12 @@ class Step
|
|
437
439
|
rec_dependencies = self.rec_dependencies
|
438
440
|
return [] if rec_dependencies.empty?
|
439
441
|
canfail_paths = self.canfail_paths
|
442
|
+
|
440
443
|
dirty_files = rec_dependencies.reject{|dep|
|
441
444
|
(defined?(WorkflowRemoteClient) && WorkflowRemoteClient::RemoteStep === dep) ||
|
442
445
|
! Open.exists?(dep.info_file) ||
|
443
446
|
(dep.path && (Open.exists?(dep.path) || Open.remote?(dep.path))) ||
|
444
|
-
((dep.error? || dep.aborted?
|
447
|
+
((dep.error? || dep.aborted?) && (! dep.recoverable_error? || canfail_paths.include?(dep.path)))
|
445
448
|
}
|
446
449
|
end
|
447
450
|
|
@@ -508,12 +511,12 @@ class Step
|
|
508
511
|
|
509
512
|
def nopid?
|
510
513
|
pid = info[:pid] || Open.exists?(pid_file)
|
511
|
-
! pid && ! (status.nil? || status == :aborted || status == :done || status == :error)
|
514
|
+
! pid && ! (status.nil? || status == :aborted || status == :done || status == :error || status == :cleaned)
|
512
515
|
end
|
513
516
|
|
514
517
|
def aborted?
|
515
518
|
status = self.status
|
516
|
-
status == :aborted || ((status != :noinfo && status != :setup && status != :noinfo) && nopid?)
|
519
|
+
status == :aborted || ((status != :cleaned && status != :noinfo && status != :setup && status != :noinfo) && nopid?)
|
517
520
|
end
|
518
521
|
|
519
522
|
# {{{ INFO
|
data/lib/rbbt/workflow/usage.rb
CHANGED
@@ -0,0 +1,190 @@
|
|
1
|
+
require 'rbbt/workflow'
|
2
|
+
|
3
|
+
module Workflow
|
4
|
+
class Orchestrator
|
5
|
+
|
6
|
+
def self.job_workload(job)
|
7
|
+
workload = {job => []}
|
8
|
+
return workload if job.done?
|
9
|
+
|
10
|
+
job.dependencies.each do |dep|
|
11
|
+
next if dep.done?
|
12
|
+
workload.merge!(job_workload(dep))
|
13
|
+
workload[job] += workload[dep]
|
14
|
+
workload[job] << dep
|
15
|
+
end
|
16
|
+
|
17
|
+
job.input_dependencies.each do |dep|
|
18
|
+
next if dep.done?
|
19
|
+
workload.merge!(job_workload(dep))
|
20
|
+
workload[job] += workload[dep]
|
21
|
+
workload[job] << dep
|
22
|
+
end
|
23
|
+
|
24
|
+
workload
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.job_rules(rules, job)
|
28
|
+
workflow = job.workflow.to_s
|
29
|
+
task_name = job.task_name.to_s
|
30
|
+
|
31
|
+
return IndiferentHash.setup(rules["defaults"]) unless rules[workflow]
|
32
|
+
return IndiferentHash.setup(rules["defaults"]) unless rules[workflow][task_name]
|
33
|
+
|
34
|
+
job_rules = IndiferentHash.setup(rules[workflow][task_name])
|
35
|
+
rules["defaults"].each{|k,v| job_rules[k] ||= v } if rules["defaults"]
|
36
|
+
job_rules
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.purge_duplicates(candidates)
|
40
|
+
seen = Set.new
|
41
|
+
candidates.select do |job|
|
42
|
+
if seen.include? job.path
|
43
|
+
false
|
44
|
+
else
|
45
|
+
seen << job.path
|
46
|
+
true
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.job_resources(rules, job)
|
52
|
+
resources = (job_rules(rules, job) || {})["resources"] || {}
|
53
|
+
|
54
|
+
IndiferentHash.setup(resources)
|
55
|
+
|
56
|
+
default_resources = rules["default_resources"] || rules["defaults"]["resources"]
|
57
|
+
default_resources.each{|k,v| resources[k] ||= v } if default_resources
|
58
|
+
|
59
|
+
resources
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.sort_candidates(candidates, rules)
|
63
|
+
seen = Set.new
|
64
|
+
candidates.sort_by do |job|
|
65
|
+
- job_resources(rules, job).values.inject(0){|acc,e| acc += e}
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.candidates(workload, rules)
|
70
|
+
if rules.empty?
|
71
|
+
candidates = workload.select{|k,v| v.empty? }.
|
72
|
+
collect{|k,v| k}.
|
73
|
+
reject{|k| k.done? }
|
74
|
+
else
|
75
|
+
candidates = workload. #select{|k,v| Orchestrator.job_rules(rules, k) }.
|
76
|
+
select{|k,v| v.empty? }.
|
77
|
+
collect{|k,v| k }.
|
78
|
+
reject{|k| k.done? }
|
79
|
+
end
|
80
|
+
|
81
|
+
top_level = workload.keys - workload.values.flatten
|
82
|
+
|
83
|
+
candidates = purge_duplicates candidates
|
84
|
+
candidates = sort_candidates candidates, rules
|
85
|
+
|
86
|
+
candidates
|
87
|
+
end
|
88
|
+
|
89
|
+
attr_accessor :available_resources, :resources_requested, :resources_used, :timer
|
90
|
+
|
91
|
+
def initialize(timer = 5, available_resources = {})
|
92
|
+
@timer = timer
|
93
|
+
@available_resources = IndiferentHash.setup(available_resources)
|
94
|
+
@resources_requested = IndiferentHash.setup({})
|
95
|
+
@resources_used = IndiferentHash.setup({})
|
96
|
+
end
|
97
|
+
|
98
|
+
def release_resources(job)
|
99
|
+
if resources_used[job]
|
100
|
+
resources_used[job].each do |resource,value|
|
101
|
+
next if resource == 'size'
|
102
|
+
resources_requested[resource] -= value.to_i
|
103
|
+
end
|
104
|
+
resources_used.delete job
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def check_resources(rules, job)
|
109
|
+
resources = Orchestrator.job_resources(rules, job)
|
110
|
+
|
111
|
+
limit_resources = resources.select{|resource,value| available_resources[resource] && ((resources_requested[resource] || 0) + value) > available_resources[resource] }.collect{|resource,v| resource }
|
112
|
+
if limit_resources.any?
|
113
|
+
Log.debug "Orchestrator waiting on #{job.path} due to #{limit_resources * ", "}"
|
114
|
+
else
|
115
|
+
|
116
|
+
resources_used[job] = resources
|
117
|
+
resources.each do |resource,value|
|
118
|
+
resources_requested[resource] ||= 0
|
119
|
+
resources_requested[resource] += value.to_i
|
120
|
+
end
|
121
|
+
Log.low "Orchestrator producing #{job.path} with resources #{resources}"
|
122
|
+
|
123
|
+
return yield
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def run_with_rules(rules, job)
|
128
|
+
job_rules = Orchestrator.job_rules(rules, job)
|
129
|
+
|
130
|
+
Rbbt::Config.with_config do
|
131
|
+
job_rules[:config_keys].each do |config|
|
132
|
+
Rbbt::Config.process_config config
|
133
|
+
end if job_rules && job_rules[:config_keys]
|
134
|
+
|
135
|
+
log = job_rules[:log] if job_rules
|
136
|
+
log = Log.severity if log.nil?
|
137
|
+
Log.with_severity log do
|
138
|
+
job.produce(false, true)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def process(rules, jobs)
|
144
|
+
begin
|
145
|
+
|
146
|
+
workload = jobs.inject({}){|acc,job| acc.merge!(Orchestrator.job_workload(job)) }
|
147
|
+
|
148
|
+
while workload.values.flatten.any?
|
149
|
+
|
150
|
+
candidates = resources_used.keys + Orchestrator.candidates(workload, rules)
|
151
|
+
raise "No candidates" if candidates.empty?
|
152
|
+
|
153
|
+
candidates.each do |job|
|
154
|
+
case
|
155
|
+
when (job.error? || job.aborted?)
|
156
|
+
if job.recoverable_error?
|
157
|
+
job.clean
|
158
|
+
raise TryAgain
|
159
|
+
else
|
160
|
+
next
|
161
|
+
end
|
162
|
+
release_resources(job)
|
163
|
+
when job.done?
|
164
|
+
Log.debug "Orchestrator done #{job.path}"
|
165
|
+
release_resources(job)
|
166
|
+
raise TryAgain
|
167
|
+
|
168
|
+
when job.running?
|
169
|
+
next
|
170
|
+
|
171
|
+
else
|
172
|
+
check_resources(rules, job) do
|
173
|
+
run_with_rules(rules, job)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
new_workload = {}
|
179
|
+
workload.each do |k,v|
|
180
|
+
next if k.done?
|
181
|
+
new_workload[k] = v.reject{|d| d.done? || (d.error? && ! d.recoverable_error?)}
|
182
|
+
end
|
183
|
+
sleep timer
|
184
|
+
end
|
185
|
+
rescue TryAgain
|
186
|
+
retry
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
require 'rbbt/util/R'
|
2
|
+
|
3
|
+
module Workflow
|
4
|
+
def self.trace(seed_jobs, options = {})
|
5
|
+
|
6
|
+
jobs = []
|
7
|
+
seed_jobs.each{|j| jobs << j; jobs += j.rec_dependencies}
|
8
|
+
|
9
|
+
data = TSV.setup({}, "Job~Workflow,Task,Start,End#:type=:list")
|
10
|
+
min_start = nil
|
11
|
+
max_done = nil
|
12
|
+
jobs.each do |job|
|
13
|
+
next unless job.info[:done]
|
14
|
+
started = job.info[:started]
|
15
|
+
ddone = job.info[:done]
|
16
|
+
|
17
|
+
code = [job.workflow, job.task_name].compact.collect{|s| s.to_s} * "."
|
18
|
+
code = code + '.' + job.name
|
19
|
+
|
20
|
+
data[code] = [job.workflow.to_s, job.task_name, started, ddone]
|
21
|
+
if min_start.nil?
|
22
|
+
min_start = started
|
23
|
+
else
|
24
|
+
min_start = started if started < min_start
|
25
|
+
end
|
26
|
+
|
27
|
+
if max_done.nil?
|
28
|
+
max_done = ddone
|
29
|
+
else
|
30
|
+
max_done = ddone if ddone > max_done
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
data.add_field "Start.second" do |k,value|
|
35
|
+
value["Start"] - min_start
|
36
|
+
end
|
37
|
+
|
38
|
+
data.add_field "End.second" do |k,value|
|
39
|
+
value["End"] - min_start
|
40
|
+
end
|
41
|
+
|
42
|
+
if options[:fix_gap]
|
43
|
+
ranges = []
|
44
|
+
data.through do |k,values|
|
45
|
+
start, eend = values.values_at "Start.second", "End.second"
|
46
|
+
|
47
|
+
ranges << (start..eend)
|
48
|
+
end
|
49
|
+
|
50
|
+
gaps = {}
|
51
|
+
last = nil
|
52
|
+
Misc.collapse_ranges(ranges).each do |range|
|
53
|
+
start = range.begin
|
54
|
+
eend = range.end
|
55
|
+
if last
|
56
|
+
gaps[last] = start - last
|
57
|
+
end
|
58
|
+
last = eend
|
59
|
+
end
|
60
|
+
|
61
|
+
data.process "End.second" do |value,k,values|
|
62
|
+
gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
|
63
|
+
value - gap
|
64
|
+
end
|
65
|
+
|
66
|
+
data.process "Start.second" do |value,k,values|
|
67
|
+
gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
|
68
|
+
value - gap
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
tasks_info = {}
|
73
|
+
|
74
|
+
jobs.each do |dep|
|
75
|
+
next unless dep.info[:done]
|
76
|
+
task = [dep.workflow, dep.task_name].compact.collect{|s| s.to_s} * "#"
|
77
|
+
info = tasks_info[task] ||= {}
|
78
|
+
|
79
|
+
time = dep.info[:done] - dep.info[:started]
|
80
|
+
info[:time] ||= []
|
81
|
+
info[:time] << time
|
82
|
+
|
83
|
+
cpus = nil
|
84
|
+
spark = false
|
85
|
+
shard = false
|
86
|
+
dep.info[:config_keys].select do |kinfo|
|
87
|
+
key, value, tokens = kinfo
|
88
|
+
key = key.to_s
|
89
|
+
cpus = value if key.include? 'cpu'
|
90
|
+
spark = value if key == 'spark'
|
91
|
+
shard = value if key == 'shard'
|
92
|
+
end
|
93
|
+
|
94
|
+
info[:cpus] = cpus || 1
|
95
|
+
info[:spark] = spark
|
96
|
+
info[:shard] = shard
|
97
|
+
end
|
98
|
+
|
99
|
+
stats = TSV.setup({}, "Task~Calls,Avg. Time,Total Time,Cpus,Spark,Shard#:type=:list")
|
100
|
+
|
101
|
+
tasks_info.each do |task, info|
|
102
|
+
time_lists, cpus, spark, shard = info.values_at :time, :cpus, :spark, :shard
|
103
|
+
avg_time = Misc.mean(time_lists)
|
104
|
+
total_time = Misc.sum(time_lists)
|
105
|
+
calls = time_lists.length
|
106
|
+
stats[task] = [calls, avg_time, total_time, cpus, spark, shard]
|
107
|
+
end
|
108
|
+
|
109
|
+
raise "No jobs to process" if data.size == 0
|
110
|
+
|
111
|
+
start = data.column("Start.second").values.flatten.collect{|v| v.to_f}.min
|
112
|
+
eend = data.column("End.second").values.flatten.collect{|v| v.to_f}.max
|
113
|
+
total = eend - start
|
114
|
+
Log.info "Total time elapsed: #{total} seconds"
|
115
|
+
|
116
|
+
if options[:fix_gap]
|
117
|
+
total_gaps = Misc.sum(gaps.collect{|k,v| v})
|
118
|
+
Log.info "Total gaps: #{total_gaps} seconds"
|
119
|
+
end
|
120
|
+
|
121
|
+
plot, width, height = options.values_at :plot, :width, :height
|
122
|
+
if plot
|
123
|
+
data.R <<-EOF, [:svg]
|
124
|
+
rbbt.require('tidyverse')
|
125
|
+
rbbt.require('ggplot2')
|
126
|
+
|
127
|
+
names(data) <- make.names(names(data))
|
128
|
+
data$id = rownames(data)
|
129
|
+
data$content = data$Task
|
130
|
+
data$start = data$Start
|
131
|
+
data$end = data$End
|
132
|
+
data$Project = data$Workflow
|
133
|
+
|
134
|
+
tasks = data
|
135
|
+
|
136
|
+
#theme_gantt <- function(base_size=11, base_family="Source Sans Pro Light") {
|
137
|
+
theme_gantt <- function(base_size=11, base_family="Sans Serif") {
|
138
|
+
ret <- theme_bw(base_size, base_family) %+replace%
|
139
|
+
theme(panel.background = element_rect(fill="#ffffff", colour=NA),
|
140
|
+
axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
|
141
|
+
title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
|
142
|
+
panel.border = element_blank(), axis.line=element_blank(),
|
143
|
+
panel.grid.minor=element_blank(),
|
144
|
+
panel.grid.major.y = element_blank(),
|
145
|
+
panel.grid.major.x = element_line(size=0.5, colour="grey80"),
|
146
|
+
axis.ticks=element_blank(),
|
147
|
+
legend.position="bottom",
|
148
|
+
axis.title=element_text(size=rel(1.2), family="Source Sans Pro Semibold"),
|
149
|
+
strip.text=element_text(size=rel(1.5), family="Source Sans Pro Semibold"),
|
150
|
+
strip.background=element_rect(fill="#ffffff", colour=NA),
|
151
|
+
panel.spacing.y=unit(1.5, "lines"),
|
152
|
+
legend.key = element_blank())
|
153
|
+
|
154
|
+
ret
|
155
|
+
}
|
156
|
+
|
157
|
+
tasks.long <- tasks %>%
|
158
|
+
gather(date.type, task.date, -c(Project, Task, id, Start.second, End.second)) %>%
|
159
|
+
arrange(date.type, task.date) %>%
|
160
|
+
mutate(id = factor(id, levels=rev(unique(id)), ordered=TRUE))
|
161
|
+
|
162
|
+
x.breaks <- seq(length(tasks$Task) + 0.5 - 3, 0, by=-3)
|
163
|
+
|
164
|
+
timeline <- ggplot(tasks.long, aes(y=id, yend=id, x=Start.second, xend=End.second, colour=Task)) +
|
165
|
+
geom_segment() +
|
166
|
+
geom_vline(xintercept=x.breaks, colour="grey80", linetype="dotted") +
|
167
|
+
guides(colour=guide_legend(title=NULL)) +
|
168
|
+
labs(x=NULL, y=NULL) +
|
169
|
+
theme_gantt() + theme(axis.text.x=element_text(angle=45, hjust=1))
|
170
|
+
|
171
|
+
rbbt.png_plot('#{plot}', 'plot(timeline)', width=#{width}, height=#{height}, pointsize=6)
|
172
|
+
EOF
|
173
|
+
end
|
174
|
+
|
175
|
+
if options[:plot_data]
|
176
|
+
data
|
177
|
+
else
|
178
|
+
stats
|
179
|
+
end
|
180
|
+
|
181
|
+
end
|
182
|
+
end
|