rbbt-util 5.21.98 → 5.21.99
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/rbbt +4 -0
- data/lib/rbbt/resource/path.rb +1 -0
- data/lib/rbbt/util/R.rb +2 -3
- data/lib/rbbt/util/concurrency/processes/worker.rb +12 -6
- data/lib/rbbt/workflow.rb +1 -1
- data/lib/rbbt/workflow/accessor.rb +25 -10
- data/lib/rbbt/workflow/step/dependencies.rb +25 -10
- data/lib/rbbt/workflow/step/run.rb +13 -2
- data/share/Rlib/util.R +45 -10
- data/share/rbbt_commands/tsv/read_excel +1 -0
- data/share/rbbt_commands/workflow/prov +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7dd511eb01b14817584552dad9178ede2319c65b
|
4
|
+
data.tar.gz: 8931609250cca85686c0be1e35b3d9cdcba8aca6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 298f9fffa765f4eb2c2cd92a85289c148ab8e49d81e4a509341fd9256222cd8d3ff42fd81217d040241dd2960f4ff743853cf5ec0ad8ae12094dbca7d11e0e2e
|
7
|
+
data.tar.gz: 06ef1631872f80edce0c3dc759d650c83466666a70ec123242d0bcce6c7ea225417b5bcc8561ae285934ec8bfd570bb980ccccb1483128f7a16ce58444a5b9ab
|
data/bin/rbbt
CHANGED
@@ -48,8 +48,12 @@ $ rbbt <command> <subcommand> ... -a --arg1 --arg2='value' --arg3 'another-value
|
|
48
48
|
--locate_file #{Log.color :yellow, "Report the location of the script instead of executing it"}
|
49
49
|
--dump_mem* #{Log.color :yellow, "Dump strings in memory each second into file"}
|
50
50
|
-nolock--no_lock_id #{Log.color :yellow, "Do not track lockfiles with ids (prevent stale file handlers for high-througput and high-concurrency)"}
|
51
|
+
-ji--jobname_as_inputs #{Log.color :yellow, "Use inputs as part of the jobname in workflows instead of digesting them"}
|
51
52
|
EOF
|
52
53
|
|
54
|
+
if options[:jobname_as_inputs]
|
55
|
+
ENV["RBBT_INPUT_JOBNAME"] = "true"
|
56
|
+
end
|
53
57
|
|
54
58
|
locate = options.delete :locate_file
|
55
59
|
|
data/lib/rbbt/resource/path.rb
CHANGED
data/lib/rbbt/util/R.rb
CHANGED
@@ -22,8 +22,7 @@ source('#{UTIL}');
|
|
22
22
|
EOF
|
23
23
|
|
24
24
|
require_sources = source.collect{|source|
|
25
|
-
source = R::LIB_DIR["
|
26
|
-
source = R::LIB_DIR["svg.R"] if source == :svg
|
25
|
+
source = R::LIB_DIR["#{source.to_s}.R"] if R::LIB_DIR["#{source.to_s}.R"].exists?
|
27
26
|
"source('#{source}')"
|
28
27
|
} * ";\n" if Array === source and source.any?
|
29
28
|
|
@@ -150,7 +149,7 @@ module TSV
|
|
150
149
|
source = [source] unless Array === source
|
151
150
|
|
152
151
|
require_sources = source.collect{|source|
|
153
|
-
source = R::LIB_DIR["
|
152
|
+
source = R::LIB_DIR["#{source.to_s}.R"] if R::LIB_DIR["#{source.to_s}.R"].exists?
|
154
153
|
"source('#{source}')"
|
155
154
|
} * ";\n" if Array === source and source.any?
|
156
155
|
|
@@ -70,19 +70,18 @@ class RbbtProcessQueue
|
|
70
70
|
|
71
71
|
status = nil
|
72
72
|
begin
|
73
|
-
@current = Process.fork do
|
74
|
-
run
|
75
|
-
end
|
76
|
-
@asked = false
|
77
73
|
|
78
74
|
initial = Misc.memory_use(Process.pid)
|
79
75
|
memory_cap = multiplier * initial
|
80
76
|
Log.debug "Worker for #{Process.pid} started with pid #{@current} -- initial: #{initial} - multiplier: #{multiplier} - cap: #{memory_cap}"
|
81
77
|
|
78
|
+
@asked = false
|
79
|
+
@monitored = false
|
82
80
|
@monitor_thread = Thread.new do
|
83
81
|
begin
|
84
82
|
while true
|
85
|
-
|
83
|
+
@monitored = true
|
84
|
+
current = @current ? 0 : Misc.memory_use(@current)
|
86
85
|
if current > memory_cap and not @asked
|
87
86
|
Log.medium "Worker #{@current} for #{Process.pid} asked to respawn -- initial: #{initial} - multiplier: #{multiplier} - cap: #{memory_cap} - current: #{current}"
|
88
87
|
RbbtSemaphore.synchronize(@callback_queue.write_sem) do
|
@@ -97,6 +96,13 @@ class RbbtProcessQueue
|
|
97
96
|
end
|
98
97
|
end
|
99
98
|
|
99
|
+
while ! @monitored
|
100
|
+
sleep 0.1
|
101
|
+
end
|
102
|
+
@current = Process.fork do
|
103
|
+
run
|
104
|
+
end
|
105
|
+
|
100
106
|
while true
|
101
107
|
pid, status = Process.waitpid2 @current
|
102
108
|
code = status.to_i >> 8
|
@@ -114,7 +120,7 @@ class RbbtProcessQueue
|
|
114
120
|
Log.exception $!
|
115
121
|
raise $!
|
116
122
|
ensure
|
117
|
-
@monitor_thread.kill
|
123
|
+
@monitor_thread.kill if @monitor_thread
|
118
124
|
Process.kill "INT", @current if Misc.pid_exists? @current
|
119
125
|
@callback_queue.close_write if @callback_queue
|
120
126
|
end
|
data/lib/rbbt/workflow.rb
CHANGED
@@ -337,7 +337,7 @@ module Workflow
|
|
337
337
|
real_inputs[k] = v
|
338
338
|
end
|
339
339
|
|
340
|
-
if real_inputs.empty?
|
340
|
+
if real_inputs.empty? and not Workflow::TAG == :inputs
|
341
341
|
step_path = step_path taskname, jobname, [], [], task.extension
|
342
342
|
input_values = task.take_input_values(inputs)
|
343
343
|
else
|
@@ -351,6 +351,10 @@ class Step
|
|
351
351
|
Open.exists?(path) or Open.exists?(pid_file) #or Open.exists?(info_file)
|
352
352
|
end
|
353
353
|
|
354
|
+
def waiting?
|
355
|
+
Open.exists?(info_file) and not started?
|
356
|
+
end
|
357
|
+
|
354
358
|
def dirty?
|
355
359
|
status = self.status
|
356
360
|
return true if done? and not status == :done and not status == :noinfo
|
@@ -359,7 +363,7 @@ class Step
|
|
359
363
|
if dirty_files.any?
|
360
364
|
true
|
361
365
|
else
|
362
|
-
|
366
|
+
! self.updated?
|
363
367
|
end
|
364
368
|
end
|
365
369
|
|
@@ -759,13 +763,10 @@ module Workflow
|
|
759
763
|
when Proc
|
760
764
|
if DependencyBlock === dependency
|
761
765
|
orig_dep = dependency.dependency
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
options = {}
|
767
|
-
compute = nil
|
768
|
-
end
|
766
|
+
wf, task_name, options = orig_dep
|
767
|
+
|
768
|
+
options = {} if options.nil?
|
769
|
+
compute = options[:compute]
|
769
770
|
|
770
771
|
options = IndiferentHash.setup(options.dup)
|
771
772
|
dep = dependency.call jobname, options.merge(_inputs), real_dependencies
|
@@ -775,7 +776,8 @@ module Workflow
|
|
775
776
|
new_=[]
|
776
777
|
dep.each{|d|
|
777
778
|
if Hash === d
|
778
|
-
d[:workflow] ||=
|
779
|
+
d[:workflow] ||= wf
|
780
|
+
d[:task] = task_name
|
779
781
|
inputs = assign_dep_inputs({}, options.merge(d[:inputs] || {}), real_dependencies, d[:workflow].task_info(d[:task]))
|
780
782
|
d = d[:workflow].job(d[:task], d[:jobname], inputs)
|
781
783
|
end
|
@@ -802,7 +804,7 @@ module Workflow
|
|
802
804
|
real_dependencies.flatten.compact
|
803
805
|
end
|
804
806
|
|
805
|
-
TAG = :hash
|
807
|
+
TAG = ENV["RBBT_INPUT_JOBNAME"] == "true" ? :inputs : :hash
|
806
808
|
def step_path(taskname, jobname, inputs, dependencies, extension = nil)
|
807
809
|
raise "Jobname makes an invalid path: #{ jobname }" if jobname =~ /\.\./
|
808
810
|
if inputs.length > 0 or dependencies.any?
|
@@ -810,6 +812,19 @@ module Workflow
|
|
810
812
|
when :hash
|
811
813
|
hash_str = Misc.obj2digest({:inputs => inputs, :dependencies => dependencies})
|
812
814
|
jobname + '_' << hash_str
|
815
|
+
when :inputs
|
816
|
+
all_inputs = {}
|
817
|
+
inputs.zip(self.task_info(taskname)[:inputs]) do |i,f|
|
818
|
+
all_inputs[f] = i
|
819
|
+
end
|
820
|
+
dependencies.each do |dep|
|
821
|
+
ri = dep.recursive_inputs
|
822
|
+
ri.zip(ri.fields).each do |i,f|
|
823
|
+
all_inputs[f] = i
|
824
|
+
end
|
825
|
+
end
|
826
|
+
|
827
|
+
all_inputs.any? ? jobname + '_' << Misc.obj2str(all_inputs) : jobname
|
813
828
|
else
|
814
829
|
jobname
|
815
830
|
end
|
@@ -80,13 +80,14 @@ class Step
|
|
80
80
|
return if status == 'streaming' and job.running?
|
81
81
|
end
|
82
82
|
|
83
|
-
if ((status == 'error' || job.aborted?) && job.recoverable_error?) || job.dirty?
|
83
|
+
if ((status == 'error' || job.aborted?) && job.recoverable_error?) || job.dirty?
|
84
84
|
job.clean
|
85
85
|
end
|
86
86
|
|
87
87
|
(job.init_info and job.dup_inputs) unless status == 'done' or job.started?
|
88
88
|
|
89
|
-
|
89
|
+
canfail = ComputeDependency === job and Array === job.compute and job.compute.include? :canfail
|
90
|
+
raise DependencyError, job if job.error? and not canfail
|
90
91
|
end
|
91
92
|
|
92
93
|
def log_dependency_exec(dependency, action)
|
@@ -119,6 +120,12 @@ class Step
|
|
119
120
|
return
|
120
121
|
end
|
121
122
|
|
123
|
+
if dependency.aborted?
|
124
|
+
log_dependency_exec(dependency, "aborted (clean)")
|
125
|
+
dependency.clean
|
126
|
+
raise TryAgain
|
127
|
+
end
|
128
|
+
|
122
129
|
if not dependency.started?
|
123
130
|
log_dependency_exec(dependency, :starting)
|
124
131
|
dependency.run(true)
|
@@ -127,12 +134,6 @@ class Step
|
|
127
134
|
|
128
135
|
dependency.grace
|
129
136
|
|
130
|
-
if dependency.aborted?
|
131
|
-
log_dependency_exec(dependency, "aborted (clean)")
|
132
|
-
dependency.clean
|
133
|
-
raise TryAgain
|
134
|
-
end
|
135
|
-
|
136
137
|
if dependency.error?
|
137
138
|
log_dependency_exec(dependency, :error)
|
138
139
|
raise DependencyError, [dependency.path, dependency.messages.last] * ": " if dependency.error?
|
@@ -161,7 +162,6 @@ class Step
|
|
161
162
|
raise $!
|
162
163
|
rescue Exception
|
163
164
|
Log.error "Exception in dep. #{ Log.color :red, dependency.task_name.to_s } -- #{$!.message}"
|
164
|
-
#Log.exception $!
|
165
165
|
raise $!
|
166
166
|
end
|
167
167
|
end
|
@@ -189,12 +189,16 @@ class Step
|
|
189
189
|
type, *rest = type
|
190
190
|
end
|
191
191
|
|
192
|
+
canfail = rest && rest.include?(:canfail)
|
193
|
+
|
192
194
|
case type
|
193
195
|
when :produce, :no_dup
|
194
196
|
list.each do |step|
|
195
197
|
Misc.insist do
|
196
198
|
begin
|
197
199
|
step.produce
|
200
|
+
rescue RbbtException
|
201
|
+
raise $! unless canfail
|
198
202
|
rescue Exception
|
199
203
|
step.exception $!
|
200
204
|
if step.recoverable_error?
|
@@ -211,11 +215,22 @@ class Step
|
|
211
215
|
cpus = 5 if cpus.nil?
|
212
216
|
cpus = list.length / 2 if cpus > list.length / 2
|
213
217
|
|
214
|
-
|
218
|
+
respawn = rest && rest.include?(:respawn)
|
219
|
+
respawn = false if rest && rest.include?(:norespawn)
|
220
|
+
respawn = rest && rest.include?(:always_respawn)
|
221
|
+
respawn = :always if respawn.nil?
|
222
|
+
|
223
|
+
Misc.bootstrap(list, cpus, :bar => "Bootstrapping dependencies for #{path}", :respawn => respawn) do |dep|
|
215
224
|
Misc.insist do
|
216
225
|
begin
|
217
226
|
dep.produce
|
218
227
|
Log.warn "Error in bootstrap dependency #{dep.path}: #{dep.messages.last}" if dep.error? or dep.aborted?
|
228
|
+
rescue Exception
|
229
|
+
if canfail
|
230
|
+
Log.warn "Allowing failing of #{dep.path}: #{dep.messages.last}"
|
231
|
+
else
|
232
|
+
raise $!
|
233
|
+
end
|
219
234
|
rescue Aborted
|
220
235
|
dep.abort
|
221
236
|
Log.warn "Aborted bootstrap dependency #{dep.path}: #{dep.messages.last}" if dep.error? or dep.aborted?
|
@@ -86,7 +86,15 @@ class Step
|
|
86
86
|
end
|
87
87
|
|
88
88
|
def checks
|
89
|
-
rec_dependencies.collect{|dependency| (defined? WorkflowRESTClient and WorkflowRESTClient::RemoteStep === dependency) ? nil : dependency.path }.compact.uniq
|
89
|
+
#rec_dependencies.collect{|dependency| (defined? WorkflowRESTClient and WorkflowRESTClient::RemoteStep === dependency) ? nil : dependency.path }.compact.uniq
|
90
|
+
rec_dependencies.
|
91
|
+
select{|dependency| ! (defined? WorkflowRESTClient and WorkflowRESTClient::RemoteStep === dependency) }.
|
92
|
+
select{|dependency| ! dependency.error? }.
|
93
|
+
collect{|dependency| dependency.path }.uniq
|
94
|
+
end
|
95
|
+
|
96
|
+
def updated?
|
97
|
+
done? and checks.select{|path| File.mtime(path) > File.mtime(self.path) }.empty?
|
90
98
|
end
|
91
99
|
|
92
100
|
def kill_children
|
@@ -114,13 +122,16 @@ class Step
|
|
114
122
|
begin
|
115
123
|
@mutex.synchronize do
|
116
124
|
no_load = :stream if no_load
|
125
|
+
|
126
|
+
Open.write(pid_file, Process.pid.to_s) unless Open.exists?(path) or Open.exists?(pid_file)
|
117
127
|
result = Persist.persist "Job", @task.result_type, :file => path, :check => checks, :no_load => no_load do
|
118
128
|
if Step === Step.log_relay_step and not self == Step.log_relay_step
|
119
129
|
relay_log(Step.log_relay_step) unless self.respond_to? :relay_step and self.relay_step
|
120
130
|
end
|
121
131
|
|
132
|
+
Open.write(pid_file, Process.pid.to_s) unless Open.exists? pid_file
|
133
|
+
|
122
134
|
@exec = false
|
123
|
-
Open.write(pid_file, Process.pid.to_s)
|
124
135
|
init_info
|
125
136
|
|
126
137
|
log :setup, "#{Log.color :green, "Setup"} step #{Log.color :yellow, task.name.to_s || ""}"
|
data/share/Rlib/util.R
CHANGED
@@ -33,7 +33,7 @@ rbbt.ruby <- function(code, load = TRUE, flat = FALSE, type = 'tsv', ...){
|
|
33
33
|
}
|
34
34
|
}
|
35
35
|
|
36
|
-
rbbt.job <- function(workflow, task, load=TRUE, flat = FALSE, type = 'tsv', jobname="
|
36
|
+
rbbt.job <- function(workflow, task, load=TRUE, flat = FALSE, type = 'tsv', jobname="Default", code='', ...){
|
37
37
|
|
38
38
|
str = "require 'rbbt/workflow'"
|
39
39
|
|
@@ -43,26 +43,47 @@ rbbt.job <- function(workflow, task, load=TRUE, flat = FALSE, type = 'tsv', jobn
|
|
43
43
|
|
44
44
|
args_list = list(...)
|
45
45
|
args_strs = c()
|
46
|
+
tmp_files = c()
|
47
|
+
|
46
48
|
for (input in names(args_list)){
|
47
49
|
value = args_list[[input]]
|
48
50
|
input = sub('input\\.', '', input)
|
49
|
-
if (
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
51
|
+
if (is.vector(value) && length(value) > 1){
|
52
|
+
file = tempfile()
|
53
|
+
writeLines(value, file)
|
54
|
+
tmp_files = c(tmp_files, file)
|
55
|
+
value = paste("Open.read('", file, "').split(\"\\n\")", sep="")
|
56
|
+
}else{
|
57
|
+
if (!is.numeric(value)){
|
58
|
+
if (all(value %in% TRUE)){
|
59
|
+
value = 'true'
|
55
60
|
}else{
|
56
|
-
|
61
|
+
if (all(value %in% FALSE)){
|
62
|
+
value = 'false'
|
63
|
+
}else{
|
64
|
+
if (is.data.frame(value)){
|
65
|
+
file = tempfile()
|
66
|
+
rbbt.tsv.write(file, value)
|
67
|
+
tmp_files = c(tmp_files, file)
|
68
|
+
value = paste("TSV.open('", file, "')", sep="")
|
69
|
+
}else{
|
70
|
+
value = paste("'", value, "'", sep="")
|
71
|
+
}
|
72
|
+
}
|
57
73
|
}
|
58
74
|
}
|
59
75
|
}
|
60
76
|
args_strs = c(args_strs, paste(":",input,' => ',value, sep=""))
|
61
77
|
}
|
62
78
|
|
63
|
-
args_str = paste(args_strs,
|
79
|
+
args_str = paste(args_strs, collapse=",")
|
64
80
|
str = paste(str, paste('wf.job(:', task, ", '", jobname, "', ", args_str,').produce.path', sep=""), sep="\n")
|
65
|
-
|
81
|
+
|
82
|
+
res = rbbt.ruby(str, load, flat, type)
|
83
|
+
|
84
|
+
unlink(tmp_files)
|
85
|
+
|
86
|
+
return(res);
|
66
87
|
}
|
67
88
|
|
68
89
|
rbbt.ruby.substitutions <- function(script, substitutions = list(), ...){
|
@@ -134,11 +155,23 @@ rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, check
|
|
134
155
|
columns = rbbt.tsv.columns(filename, sep, comment.char=comment.char)
|
135
156
|
if (! is.null(columns)){
|
136
157
|
names(data) <- columns[2:length(columns)];
|
158
|
+
attributes(data)$key.field = substring(columns[1],2);
|
137
159
|
}
|
138
160
|
|
139
161
|
return(data);
|
140
162
|
}
|
141
163
|
|
164
|
+
rbbt.tsv.comma <- function(tsv){
|
165
|
+
for (c in names(tsv)){
|
166
|
+
v = tsv[,c]
|
167
|
+
if (is.character(v)){
|
168
|
+
v = gsub('\\|', ', ', v)
|
169
|
+
tsv[,c] = v
|
170
|
+
}
|
171
|
+
}
|
172
|
+
return(tsv)
|
173
|
+
}
|
174
|
+
|
142
175
|
rbbt.tsv.numeric <- function(filename, sep="\t", ...){
|
143
176
|
|
144
177
|
columns = rbbt.tsv.columns(filename, sep)
|
@@ -156,6 +189,8 @@ rbbt.tsv2matrix <- function(data){
|
|
156
189
|
}
|
157
190
|
|
158
191
|
rbbt.tsv.write <- function(filename, data, key.field = NULL, extra_headers = NULL){
|
192
|
+
|
193
|
+
if (is.null(key.field)){ key.field = attributes(data)$key.field;}
|
159
194
|
if (is.null(key.field)){ key.field = "ID";}
|
160
195
|
|
161
196
|
f = file(filename, 'w');
|
@@ -52,7 +52,7 @@ def report_msg(status, name, path)
|
|
52
52
|
task = Log.color(:yellow, parts.pop)
|
53
53
|
workflow = Log.color(:magenta, parts.pop)
|
54
54
|
|
55
|
-
if not Open.remote?(path) and $main_mtime and ($main_mtime - File.mtime(path)) < 0
|
55
|
+
if not Open.remote?(path) and (File.exists?(path) and $main_mtime and ($main_mtime - File.mtime(path)) < 0)
|
56
56
|
status_msg(status) << " " << [workflow, task, path] * " " << " (#{Log.color(:red, "Mtime out of sync") })\n"
|
57
57
|
else
|
58
58
|
status_msg(status) << " " << [workflow, task, path] * " " << "\n"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.21.
|
4
|
+
version: 5.21.99
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-10-
|
11
|
+
date: 2017-10-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|