rbbt-util 5.21.98 → 5.21.99
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/rbbt +4 -0
- data/lib/rbbt/resource/path.rb +1 -0
- data/lib/rbbt/util/R.rb +2 -3
- data/lib/rbbt/util/concurrency/processes/worker.rb +12 -6
- data/lib/rbbt/workflow.rb +1 -1
- data/lib/rbbt/workflow/accessor.rb +25 -10
- data/lib/rbbt/workflow/step/dependencies.rb +25 -10
- data/lib/rbbt/workflow/step/run.rb +13 -2
- data/share/Rlib/util.R +45 -10
- data/share/rbbt_commands/tsv/read_excel +1 -0
- data/share/rbbt_commands/workflow/prov +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7dd511eb01b14817584552dad9178ede2319c65b
|
4
|
+
data.tar.gz: 8931609250cca85686c0be1e35b3d9cdcba8aca6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 298f9fffa765f4eb2c2cd92a85289c148ab8e49d81e4a509341fd9256222cd8d3ff42fd81217d040241dd2960f4ff743853cf5ec0ad8ae12094dbca7d11e0e2e
|
7
|
+
data.tar.gz: 06ef1631872f80edce0c3dc759d650c83466666a70ec123242d0bcce6c7ea225417b5bcc8561ae285934ec8bfd570bb980ccccb1483128f7a16ce58444a5b9ab
|
data/bin/rbbt
CHANGED
@@ -48,8 +48,12 @@ $ rbbt <command> <subcommand> ... -a --arg1 --arg2='value' --arg3 'another-value
|
|
48
48
|
--locate_file #{Log.color :yellow, "Report the location of the script instead of executing it"}
|
49
49
|
--dump_mem* #{Log.color :yellow, "Dump strings in memory each second into file"}
|
50
50
|
-nolock--no_lock_id #{Log.color :yellow, "Do not track lockfiles with ids (prevent stale file handlers for high-througput and high-concurrency)"}
|
51
|
+
-ji--jobname_as_inputs #{Log.color :yellow, "Use inputs as part of the jobname in workflows instead of digesting them"}
|
51
52
|
EOF
|
52
53
|
|
54
|
+
if options[:jobname_as_inputs]
|
55
|
+
ENV["RBBT_INPUT_JOBNAME"] = "true"
|
56
|
+
end
|
53
57
|
|
54
58
|
locate = options.delete :locate_file
|
55
59
|
|
data/lib/rbbt/resource/path.rb
CHANGED
data/lib/rbbt/util/R.rb
CHANGED
@@ -22,8 +22,7 @@ source('#{UTIL}');
|
|
22
22
|
EOF
|
23
23
|
|
24
24
|
require_sources = source.collect{|source|
|
25
|
-
source = R::LIB_DIR["
|
26
|
-
source = R::LIB_DIR["svg.R"] if source == :svg
|
25
|
+
source = R::LIB_DIR["#{source.to_s}.R"] if R::LIB_DIR["#{source.to_s}.R"].exists?
|
27
26
|
"source('#{source}')"
|
28
27
|
} * ";\n" if Array === source and source.any?
|
29
28
|
|
@@ -150,7 +149,7 @@ module TSV
|
|
150
149
|
source = [source] unless Array === source
|
151
150
|
|
152
151
|
require_sources = source.collect{|source|
|
153
|
-
source = R::LIB_DIR["
|
152
|
+
source = R::LIB_DIR["#{source.to_s}.R"] if R::LIB_DIR["#{source.to_s}.R"].exists?
|
154
153
|
"source('#{source}')"
|
155
154
|
} * ";\n" if Array === source and source.any?
|
156
155
|
|
@@ -70,19 +70,18 @@ class RbbtProcessQueue
|
|
70
70
|
|
71
71
|
status = nil
|
72
72
|
begin
|
73
|
-
@current = Process.fork do
|
74
|
-
run
|
75
|
-
end
|
76
|
-
@asked = false
|
77
73
|
|
78
74
|
initial = Misc.memory_use(Process.pid)
|
79
75
|
memory_cap = multiplier * initial
|
80
76
|
Log.debug "Worker for #{Process.pid} started with pid #{@current} -- initial: #{initial} - multiplier: #{multiplier} - cap: #{memory_cap}"
|
81
77
|
|
78
|
+
@asked = false
|
79
|
+
@monitored = false
|
82
80
|
@monitor_thread = Thread.new do
|
83
81
|
begin
|
84
82
|
while true
|
85
|
-
|
83
|
+
@monitored = true
|
84
|
+
current = @current ? 0 : Misc.memory_use(@current)
|
86
85
|
if current > memory_cap and not @asked
|
87
86
|
Log.medium "Worker #{@current} for #{Process.pid} asked to respawn -- initial: #{initial} - multiplier: #{multiplier} - cap: #{memory_cap} - current: #{current}"
|
88
87
|
RbbtSemaphore.synchronize(@callback_queue.write_sem) do
|
@@ -97,6 +96,13 @@ class RbbtProcessQueue
|
|
97
96
|
end
|
98
97
|
end
|
99
98
|
|
99
|
+
while ! @monitored
|
100
|
+
sleep 0.1
|
101
|
+
end
|
102
|
+
@current = Process.fork do
|
103
|
+
run
|
104
|
+
end
|
105
|
+
|
100
106
|
while true
|
101
107
|
pid, status = Process.waitpid2 @current
|
102
108
|
code = status.to_i >> 8
|
@@ -114,7 +120,7 @@ class RbbtProcessQueue
|
|
114
120
|
Log.exception $!
|
115
121
|
raise $!
|
116
122
|
ensure
|
117
|
-
@monitor_thread.kill
|
123
|
+
@monitor_thread.kill if @monitor_thread
|
118
124
|
Process.kill "INT", @current if Misc.pid_exists? @current
|
119
125
|
@callback_queue.close_write if @callback_queue
|
120
126
|
end
|
data/lib/rbbt/workflow.rb
CHANGED
@@ -337,7 +337,7 @@ module Workflow
|
|
337
337
|
real_inputs[k] = v
|
338
338
|
end
|
339
339
|
|
340
|
-
if real_inputs.empty?
|
340
|
+
if real_inputs.empty? and not Workflow::TAG == :inputs
|
341
341
|
step_path = step_path taskname, jobname, [], [], task.extension
|
342
342
|
input_values = task.take_input_values(inputs)
|
343
343
|
else
|
@@ -351,6 +351,10 @@ class Step
|
|
351
351
|
Open.exists?(path) or Open.exists?(pid_file) #or Open.exists?(info_file)
|
352
352
|
end
|
353
353
|
|
354
|
+
def waiting?
|
355
|
+
Open.exists?(info_file) and not started?
|
356
|
+
end
|
357
|
+
|
354
358
|
def dirty?
|
355
359
|
status = self.status
|
356
360
|
return true if done? and not status == :done and not status == :noinfo
|
@@ -359,7 +363,7 @@ class Step
|
|
359
363
|
if dirty_files.any?
|
360
364
|
true
|
361
365
|
else
|
362
|
-
|
366
|
+
! self.updated?
|
363
367
|
end
|
364
368
|
end
|
365
369
|
|
@@ -759,13 +763,10 @@ module Workflow
|
|
759
763
|
when Proc
|
760
764
|
if DependencyBlock === dependency
|
761
765
|
orig_dep = dependency.dependency
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
options = {}
|
767
|
-
compute = nil
|
768
|
-
end
|
766
|
+
wf, task_name, options = orig_dep
|
767
|
+
|
768
|
+
options = {} if options.nil?
|
769
|
+
compute = options[:compute]
|
769
770
|
|
770
771
|
options = IndiferentHash.setup(options.dup)
|
771
772
|
dep = dependency.call jobname, options.merge(_inputs), real_dependencies
|
@@ -775,7 +776,8 @@ module Workflow
|
|
775
776
|
new_=[]
|
776
777
|
dep.each{|d|
|
777
778
|
if Hash === d
|
778
|
-
d[:workflow] ||=
|
779
|
+
d[:workflow] ||= wf
|
780
|
+
d[:task] = task_name
|
779
781
|
inputs = assign_dep_inputs({}, options.merge(d[:inputs] || {}), real_dependencies, d[:workflow].task_info(d[:task]))
|
780
782
|
d = d[:workflow].job(d[:task], d[:jobname], inputs)
|
781
783
|
end
|
@@ -802,7 +804,7 @@ module Workflow
|
|
802
804
|
real_dependencies.flatten.compact
|
803
805
|
end
|
804
806
|
|
805
|
-
TAG = :hash
|
807
|
+
TAG = ENV["RBBT_INPUT_JOBNAME"] == "true" ? :inputs : :hash
|
806
808
|
def step_path(taskname, jobname, inputs, dependencies, extension = nil)
|
807
809
|
raise "Jobname makes an invalid path: #{ jobname }" if jobname =~ /\.\./
|
808
810
|
if inputs.length > 0 or dependencies.any?
|
@@ -810,6 +812,19 @@ module Workflow
|
|
810
812
|
when :hash
|
811
813
|
hash_str = Misc.obj2digest({:inputs => inputs, :dependencies => dependencies})
|
812
814
|
jobname + '_' << hash_str
|
815
|
+
when :inputs
|
816
|
+
all_inputs = {}
|
817
|
+
inputs.zip(self.task_info(taskname)[:inputs]) do |i,f|
|
818
|
+
all_inputs[f] = i
|
819
|
+
end
|
820
|
+
dependencies.each do |dep|
|
821
|
+
ri = dep.recursive_inputs
|
822
|
+
ri.zip(ri.fields).each do |i,f|
|
823
|
+
all_inputs[f] = i
|
824
|
+
end
|
825
|
+
end
|
826
|
+
|
827
|
+
all_inputs.any? ? jobname + '_' << Misc.obj2str(all_inputs) : jobname
|
813
828
|
else
|
814
829
|
jobname
|
815
830
|
end
|
@@ -80,13 +80,14 @@ class Step
|
|
80
80
|
return if status == 'streaming' and job.running?
|
81
81
|
end
|
82
82
|
|
83
|
-
if ((status == 'error' || job.aborted?) && job.recoverable_error?) || job.dirty?
|
83
|
+
if ((status == 'error' || job.aborted?) && job.recoverable_error?) || job.dirty?
|
84
84
|
job.clean
|
85
85
|
end
|
86
86
|
|
87
87
|
(job.init_info and job.dup_inputs) unless status == 'done' or job.started?
|
88
88
|
|
89
|
-
|
89
|
+
canfail = ComputeDependency === job and Array === job.compute and job.compute.include? :canfail
|
90
|
+
raise DependencyError, job if job.error? and not canfail
|
90
91
|
end
|
91
92
|
|
92
93
|
def log_dependency_exec(dependency, action)
|
@@ -119,6 +120,12 @@ class Step
|
|
119
120
|
return
|
120
121
|
end
|
121
122
|
|
123
|
+
if dependency.aborted?
|
124
|
+
log_dependency_exec(dependency, "aborted (clean)")
|
125
|
+
dependency.clean
|
126
|
+
raise TryAgain
|
127
|
+
end
|
128
|
+
|
122
129
|
if not dependency.started?
|
123
130
|
log_dependency_exec(dependency, :starting)
|
124
131
|
dependency.run(true)
|
@@ -127,12 +134,6 @@ class Step
|
|
127
134
|
|
128
135
|
dependency.grace
|
129
136
|
|
130
|
-
if dependency.aborted?
|
131
|
-
log_dependency_exec(dependency, "aborted (clean)")
|
132
|
-
dependency.clean
|
133
|
-
raise TryAgain
|
134
|
-
end
|
135
|
-
|
136
137
|
if dependency.error?
|
137
138
|
log_dependency_exec(dependency, :error)
|
138
139
|
raise DependencyError, [dependency.path, dependency.messages.last] * ": " if dependency.error?
|
@@ -161,7 +162,6 @@ class Step
|
|
161
162
|
raise $!
|
162
163
|
rescue Exception
|
163
164
|
Log.error "Exception in dep. #{ Log.color :red, dependency.task_name.to_s } -- #{$!.message}"
|
164
|
-
#Log.exception $!
|
165
165
|
raise $!
|
166
166
|
end
|
167
167
|
end
|
@@ -189,12 +189,16 @@ class Step
|
|
189
189
|
type, *rest = type
|
190
190
|
end
|
191
191
|
|
192
|
+
canfail = rest && rest.include?(:canfail)
|
193
|
+
|
192
194
|
case type
|
193
195
|
when :produce, :no_dup
|
194
196
|
list.each do |step|
|
195
197
|
Misc.insist do
|
196
198
|
begin
|
197
199
|
step.produce
|
200
|
+
rescue RbbtException
|
201
|
+
raise $! unless canfail
|
198
202
|
rescue Exception
|
199
203
|
step.exception $!
|
200
204
|
if step.recoverable_error?
|
@@ -211,11 +215,22 @@ class Step
|
|
211
215
|
cpus = 5 if cpus.nil?
|
212
216
|
cpus = list.length / 2 if cpus > list.length / 2
|
213
217
|
|
214
|
-
|
218
|
+
respawn = rest && rest.include?(:respawn)
|
219
|
+
respawn = false if rest && rest.include?(:norespawn)
|
220
|
+
respawn = rest && rest.include?(:always_respawn)
|
221
|
+
respawn = :always if respawn.nil?
|
222
|
+
|
223
|
+
Misc.bootstrap(list, cpus, :bar => "Bootstrapping dependencies for #{path}", :respawn => respawn) do |dep|
|
215
224
|
Misc.insist do
|
216
225
|
begin
|
217
226
|
dep.produce
|
218
227
|
Log.warn "Error in bootstrap dependency #{dep.path}: #{dep.messages.last}" if dep.error? or dep.aborted?
|
228
|
+
rescue Exception
|
229
|
+
if canfail
|
230
|
+
Log.warn "Allowing failing of #{dep.path}: #{dep.messages.last}"
|
231
|
+
else
|
232
|
+
raise $!
|
233
|
+
end
|
219
234
|
rescue Aborted
|
220
235
|
dep.abort
|
221
236
|
Log.warn "Aborted bootstrap dependency #{dep.path}: #{dep.messages.last}" if dep.error? or dep.aborted?
|
@@ -86,7 +86,15 @@ class Step
|
|
86
86
|
end
|
87
87
|
|
88
88
|
def checks
|
89
|
-
rec_dependencies.collect{|dependency| (defined? WorkflowRESTClient and WorkflowRESTClient::RemoteStep === dependency) ? nil : dependency.path }.compact.uniq
|
89
|
+
#rec_dependencies.collect{|dependency| (defined? WorkflowRESTClient and WorkflowRESTClient::RemoteStep === dependency) ? nil : dependency.path }.compact.uniq
|
90
|
+
rec_dependencies.
|
91
|
+
select{|dependency| ! (defined? WorkflowRESTClient and WorkflowRESTClient::RemoteStep === dependency) }.
|
92
|
+
select{|dependency| ! dependency.error? }.
|
93
|
+
collect{|dependency| dependency.path }.uniq
|
94
|
+
end
|
95
|
+
|
96
|
+
def updated?
|
97
|
+
done? and checks.select{|path| File.mtime(path) > File.mtime(self.path) }.empty?
|
90
98
|
end
|
91
99
|
|
92
100
|
def kill_children
|
@@ -114,13 +122,16 @@ class Step
|
|
114
122
|
begin
|
115
123
|
@mutex.synchronize do
|
116
124
|
no_load = :stream if no_load
|
125
|
+
|
126
|
+
Open.write(pid_file, Process.pid.to_s) unless Open.exists?(path) or Open.exists?(pid_file)
|
117
127
|
result = Persist.persist "Job", @task.result_type, :file => path, :check => checks, :no_load => no_load do
|
118
128
|
if Step === Step.log_relay_step and not self == Step.log_relay_step
|
119
129
|
relay_log(Step.log_relay_step) unless self.respond_to? :relay_step and self.relay_step
|
120
130
|
end
|
121
131
|
|
132
|
+
Open.write(pid_file, Process.pid.to_s) unless Open.exists? pid_file
|
133
|
+
|
122
134
|
@exec = false
|
123
|
-
Open.write(pid_file, Process.pid.to_s)
|
124
135
|
init_info
|
125
136
|
|
126
137
|
log :setup, "#{Log.color :green, "Setup"} step #{Log.color :yellow, task.name.to_s || ""}"
|
data/share/Rlib/util.R
CHANGED
@@ -33,7 +33,7 @@ rbbt.ruby <- function(code, load = TRUE, flat = FALSE, type = 'tsv', ...){
|
|
33
33
|
}
|
34
34
|
}
|
35
35
|
|
36
|
-
rbbt.job <- function(workflow, task, load=TRUE, flat = FALSE, type = 'tsv', jobname="
|
36
|
+
rbbt.job <- function(workflow, task, load=TRUE, flat = FALSE, type = 'tsv', jobname="Default", code='', ...){
|
37
37
|
|
38
38
|
str = "require 'rbbt/workflow'"
|
39
39
|
|
@@ -43,26 +43,47 @@ rbbt.job <- function(workflow, task, load=TRUE, flat = FALSE, type = 'tsv', jobn
|
|
43
43
|
|
44
44
|
args_list = list(...)
|
45
45
|
args_strs = c()
|
46
|
+
tmp_files = c()
|
47
|
+
|
46
48
|
for (input in names(args_list)){
|
47
49
|
value = args_list[[input]]
|
48
50
|
input = sub('input\\.', '', input)
|
49
|
-
if (
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
51
|
+
if (is.vector(value) && length(value) > 1){
|
52
|
+
file = tempfile()
|
53
|
+
writeLines(value, file)
|
54
|
+
tmp_files = c(tmp_files, file)
|
55
|
+
value = paste("Open.read('", file, "').split(\"\\n\")", sep="")
|
56
|
+
}else{
|
57
|
+
if (!is.numeric(value)){
|
58
|
+
if (all(value %in% TRUE)){
|
59
|
+
value = 'true'
|
55
60
|
}else{
|
56
|
-
|
61
|
+
if (all(value %in% FALSE)){
|
62
|
+
value = 'false'
|
63
|
+
}else{
|
64
|
+
if (is.data.frame(value)){
|
65
|
+
file = tempfile()
|
66
|
+
rbbt.tsv.write(file, value)
|
67
|
+
tmp_files = c(tmp_files, file)
|
68
|
+
value = paste("TSV.open('", file, "')", sep="")
|
69
|
+
}else{
|
70
|
+
value = paste("'", value, "'", sep="")
|
71
|
+
}
|
72
|
+
}
|
57
73
|
}
|
58
74
|
}
|
59
75
|
}
|
60
76
|
args_strs = c(args_strs, paste(":",input,' => ',value, sep=""))
|
61
77
|
}
|
62
78
|
|
63
|
-
args_str = paste(args_strs,
|
79
|
+
args_str = paste(args_strs, collapse=",")
|
64
80
|
str = paste(str, paste('wf.job(:', task, ", '", jobname, "', ", args_str,').produce.path', sep=""), sep="\n")
|
65
|
-
|
81
|
+
|
82
|
+
res = rbbt.ruby(str, load, flat, type)
|
83
|
+
|
84
|
+
unlink(tmp_files)
|
85
|
+
|
86
|
+
return(res);
|
66
87
|
}
|
67
88
|
|
68
89
|
rbbt.ruby.substitutions <- function(script, substitutions = list(), ...){
|
@@ -134,11 +155,23 @@ rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, check
|
|
134
155
|
columns = rbbt.tsv.columns(filename, sep, comment.char=comment.char)
|
135
156
|
if (! is.null(columns)){
|
136
157
|
names(data) <- columns[2:length(columns)];
|
158
|
+
attributes(data)$key.field = substring(columns[1],2);
|
137
159
|
}
|
138
160
|
|
139
161
|
return(data);
|
140
162
|
}
|
141
163
|
|
164
|
+
rbbt.tsv.comma <- function(tsv){
|
165
|
+
for (c in names(tsv)){
|
166
|
+
v = tsv[,c]
|
167
|
+
if (is.character(v)){
|
168
|
+
v = gsub('\\|', ', ', v)
|
169
|
+
tsv[,c] = v
|
170
|
+
}
|
171
|
+
}
|
172
|
+
return(tsv)
|
173
|
+
}
|
174
|
+
|
142
175
|
rbbt.tsv.numeric <- function(filename, sep="\t", ...){
|
143
176
|
|
144
177
|
columns = rbbt.tsv.columns(filename, sep)
|
@@ -156,6 +189,8 @@ rbbt.tsv2matrix <- function(data){
|
|
156
189
|
}
|
157
190
|
|
158
191
|
rbbt.tsv.write <- function(filename, data, key.field = NULL, extra_headers = NULL){
|
192
|
+
|
193
|
+
if (is.null(key.field)){ key.field = attributes(data)$key.field;}
|
159
194
|
if (is.null(key.field)){ key.field = "ID";}
|
160
195
|
|
161
196
|
f = file(filename, 'w');
|
@@ -52,7 +52,7 @@ def report_msg(status, name, path)
|
|
52
52
|
task = Log.color(:yellow, parts.pop)
|
53
53
|
workflow = Log.color(:magenta, parts.pop)
|
54
54
|
|
55
|
-
if not Open.remote?(path) and $main_mtime and ($main_mtime - File.mtime(path)) < 0
|
55
|
+
if not Open.remote?(path) and (File.exists?(path) and $main_mtime and ($main_mtime - File.mtime(path)) < 0)
|
56
56
|
status_msg(status) << " " << [workflow, task, path] * " " << " (#{Log.color(:red, "Mtime out of sync") })\n"
|
57
57
|
else
|
58
58
|
status_msg(status) << " " << [workflow, task, path] * " " << "\n"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.21.
|
4
|
+
version: 5.21.99
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-10-
|
11
|
+
date: 2017-10-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|