rbbt-util 5.21.98 → 5.21.99

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 12d10e9ae9d12ed354fe3d8df24a14ea030bb1eb
4
- data.tar.gz: 13416b83ec00f7dae0fc97479746cd636cf53a5c
3
+ metadata.gz: 7dd511eb01b14817584552dad9178ede2319c65b
4
+ data.tar.gz: 8931609250cca85686c0be1e35b3d9cdcba8aca6
5
5
  SHA512:
6
- metadata.gz: d86681d7392cd8a0dae501a10a792e91ca93a026adb918d386d5a5b83916525f66947d233bbe5302cf65c2c02ae2b19582ce52872946b98265c5d8752f02ea2f
7
- data.tar.gz: a93dc769c50f8d08e74d8088caa7c953aee25a78b47576f0015fd8b73f113d736a234534e1faf791eee6e938686db4dcd03a9f1ac44e6643ce5aaca8b8da0a56
6
+ metadata.gz: 298f9fffa765f4eb2c2cd92a85289c148ab8e49d81e4a509341fd9256222cd8d3ff42fd81217d040241dd2960f4ff743853cf5ec0ad8ae12094dbca7d11e0e2e
7
+ data.tar.gz: 06ef1631872f80edce0c3dc759d650c83466666a70ec123242d0bcce6c7ea225417b5bcc8561ae285934ec8bfd570bb980ccccb1483128f7a16ce58444a5b9ab
data/bin/rbbt CHANGED
@@ -48,8 +48,12 @@ $ rbbt <command> <subcommand> ... -a --arg1 --arg2='value' --arg3 'another-value
48
48
  --locate_file #{Log.color :yellow, "Report the location of the script instead of executing it"}
49
49
  --dump_mem* #{Log.color :yellow, "Dump strings in memory each second into file"}
50
50
  -nolock--no_lock_id #{Log.color :yellow, "Do not track lockfiles with ids (prevent stale file handlers for high-througput and high-concurrency)"}
51
+ -ji--jobname_as_inputs #{Log.color :yellow, "Use inputs as part of the jobname in workflows instead of digesting them"}
51
52
  EOF
52
53
 
54
+ if options[:jobname_as_inputs]
55
+ ENV["RBBT_INPUT_JOBNAME"] = "true"
56
+ end
53
57
 
54
58
  locate = options.delete :locate_file
55
59
 
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/resource/util'
2
+ require 'rbbt/util/misc/indiferent_hash'
2
3
  require 'yaml'
3
4
 
4
5
  module Path
data/lib/rbbt/util/R.rb CHANGED
@@ -22,8 +22,7 @@ source('#{UTIL}');
22
22
  EOF
23
23
 
24
24
  require_sources = source.collect{|source|
25
- source = R::LIB_DIR["plot.R"] if source == :plot
26
- source = R::LIB_DIR["svg.R"] if source == :svg
25
+ source = R::LIB_DIR["#{source.to_s}.R"] if R::LIB_DIR["#{source.to_s}.R"].exists?
27
26
  "source('#{source}')"
28
27
  } * ";\n" if Array === source and source.any?
29
28
 
@@ -150,7 +149,7 @@ module TSV
150
149
  source = [source] unless Array === source
151
150
 
152
151
  require_sources = source.collect{|source|
153
- source = R::LIB_DIR["plot.R"] if source == :plot
152
+ source = R::LIB_DIR["#{source.to_s}.R"] if R::LIB_DIR["#{source.to_s}.R"].exists?
154
153
  "source('#{source}')"
155
154
  } * ";\n" if Array === source and source.any?
156
155
 
@@ -70,19 +70,18 @@ class RbbtProcessQueue
70
70
 
71
71
  status = nil
72
72
  begin
73
- @current = Process.fork do
74
- run
75
- end
76
- @asked = false
77
73
 
78
74
  initial = Misc.memory_use(Process.pid)
79
75
  memory_cap = multiplier * initial
80
76
  Log.debug "Worker for #{Process.pid} started with pid #{@current} -- initial: #{initial} - multiplier: #{multiplier} - cap: #{memory_cap}"
81
77
 
78
+ @asked = false
79
+ @monitored = false
82
80
  @monitor_thread = Thread.new do
83
81
  begin
84
82
  while true
85
- current = Misc.memory_use(@current)
83
+ @monitored = true
84
+ current = @current ? 0 : Misc.memory_use(@current)
86
85
  if current > memory_cap and not @asked
87
86
  Log.medium "Worker #{@current} for #{Process.pid} asked to respawn -- initial: #{initial} - multiplier: #{multiplier} - cap: #{memory_cap} - current: #{current}"
88
87
  RbbtSemaphore.synchronize(@callback_queue.write_sem) do
@@ -97,6 +96,13 @@ class RbbtProcessQueue
97
96
  end
98
97
  end
99
98
 
99
+ while ! @monitored
100
+ sleep 0.1
101
+ end
102
+ @current = Process.fork do
103
+ run
104
+ end
105
+
100
106
  while true
101
107
  pid, status = Process.waitpid2 @current
102
108
  code = status.to_i >> 8
@@ -114,7 +120,7 @@ class RbbtProcessQueue
114
120
  Log.exception $!
115
121
  raise $!
116
122
  ensure
117
- @monitor_thread.kill
123
+ @monitor_thread.kill if @monitor_thread
118
124
  Process.kill "INT", @current if Misc.pid_exists? @current
119
125
  @callback_queue.close_write if @callback_queue
120
126
  end
data/lib/rbbt/workflow.rb CHANGED
@@ -337,7 +337,7 @@ module Workflow
337
337
  real_inputs[k] = v
338
338
  end
339
339
 
340
- if real_inputs.empty?
340
+ if real_inputs.empty? and not Workflow::TAG == :inputs
341
341
  step_path = step_path taskname, jobname, [], [], task.extension
342
342
  input_values = task.take_input_values(inputs)
343
343
  else
@@ -351,6 +351,10 @@ class Step
351
351
  Open.exists?(path) or Open.exists?(pid_file) #or Open.exists?(info_file)
352
352
  end
353
353
 
354
+ def waiting?
355
+ Open.exists?(info_file) and not started?
356
+ end
357
+
354
358
  def dirty?
355
359
  status = self.status
356
360
  return true if done? and not status == :done and not status == :noinfo
@@ -359,7 +363,7 @@ class Step
359
363
  if dirty_files.any?
360
364
  true
361
365
  else
362
- false
366
+ ! self.updated?
363
367
  end
364
368
  end
365
369
 
@@ -759,13 +763,10 @@ module Workflow
759
763
  when Proc
760
764
  if DependencyBlock === dependency
761
765
  orig_dep = dependency.dependency
762
- if Hash === orig_dep.last
763
- options = orig_dep.last
764
- compute = options[:compute]
765
- else
766
- options = {}
767
- compute = nil
768
- end
766
+ wf, task_name, options = orig_dep
767
+
768
+ options = {} if options.nil?
769
+ compute = options[:compute]
769
770
 
770
771
  options = IndiferentHash.setup(options.dup)
771
772
  dep = dependency.call jobname, options.merge(_inputs), real_dependencies
@@ -775,7 +776,8 @@ module Workflow
775
776
  new_=[]
776
777
  dep.each{|d|
777
778
  if Hash === d
778
- d[:workflow] ||= self
779
+ d[:workflow] ||= wf
780
+ d[:task] = task_name
779
781
  inputs = assign_dep_inputs({}, options.merge(d[:inputs] || {}), real_dependencies, d[:workflow].task_info(d[:task]))
780
782
  d = d[:workflow].job(d[:task], d[:jobname], inputs)
781
783
  end
@@ -802,7 +804,7 @@ module Workflow
802
804
  real_dependencies.flatten.compact
803
805
  end
804
806
 
805
- TAG = :hash
807
+ TAG = ENV["RBBT_INPUT_JOBNAME"] == "true" ? :inputs : :hash
806
808
  def step_path(taskname, jobname, inputs, dependencies, extension = nil)
807
809
  raise "Jobname makes an invalid path: #{ jobname }" if jobname =~ /\.\./
808
810
  if inputs.length > 0 or dependencies.any?
@@ -810,6 +812,19 @@ module Workflow
810
812
  when :hash
811
813
  hash_str = Misc.obj2digest({:inputs => inputs, :dependencies => dependencies})
812
814
  jobname + '_' << hash_str
815
+ when :inputs
816
+ all_inputs = {}
817
+ inputs.zip(self.task_info(taskname)[:inputs]) do |i,f|
818
+ all_inputs[f] = i
819
+ end
820
+ dependencies.each do |dep|
821
+ ri = dep.recursive_inputs
822
+ ri.zip(ri.fields).each do |i,f|
823
+ all_inputs[f] = i
824
+ end
825
+ end
826
+
827
+ all_inputs.any? ? jobname + '_' << Misc.obj2str(all_inputs) : jobname
813
828
  else
814
829
  jobname
815
830
  end
@@ -80,13 +80,14 @@ class Step
80
80
  return if status == 'streaming' and job.running?
81
81
  end
82
82
 
83
- if ((status == 'error' || job.aborted?) && job.recoverable_error?) || job.dirty?
83
+ if ((status == 'error' || job.aborted?) && job.recoverable_error?) || job.dirty?
84
84
  job.clean
85
85
  end
86
86
 
87
87
  (job.init_info and job.dup_inputs) unless status == 'done' or job.started?
88
88
 
89
- raise DependencyError, job if job.error?
89
+ canfail = ComputeDependency === job and Array === job.compute and job.compute.include? :canfail
90
+ raise DependencyError, job if job.error? and not canfail
90
91
  end
91
92
 
92
93
  def log_dependency_exec(dependency, action)
@@ -119,6 +120,12 @@ class Step
119
120
  return
120
121
  end
121
122
 
123
+ if dependency.aborted?
124
+ log_dependency_exec(dependency, "aborted (clean)")
125
+ dependency.clean
126
+ raise TryAgain
127
+ end
128
+
122
129
  if not dependency.started?
123
130
  log_dependency_exec(dependency, :starting)
124
131
  dependency.run(true)
@@ -127,12 +134,6 @@ class Step
127
134
 
128
135
  dependency.grace
129
136
 
130
- if dependency.aborted?
131
- log_dependency_exec(dependency, "aborted (clean)")
132
- dependency.clean
133
- raise TryAgain
134
- end
135
-
136
137
  if dependency.error?
137
138
  log_dependency_exec(dependency, :error)
138
139
  raise DependencyError, [dependency.path, dependency.messages.last] * ": " if dependency.error?
@@ -161,7 +162,6 @@ class Step
161
162
  raise $!
162
163
  rescue Exception
163
164
  Log.error "Exception in dep. #{ Log.color :red, dependency.task_name.to_s } -- #{$!.message}"
164
- #Log.exception $!
165
165
  raise $!
166
166
  end
167
167
  end
@@ -189,12 +189,16 @@ class Step
189
189
  type, *rest = type
190
190
  end
191
191
 
192
+ canfail = rest && rest.include?(:canfail)
193
+
192
194
  case type
193
195
  when :produce, :no_dup
194
196
  list.each do |step|
195
197
  Misc.insist do
196
198
  begin
197
199
  step.produce
200
+ rescue RbbtException
201
+ raise $! unless canfail
198
202
  rescue Exception
199
203
  step.exception $!
200
204
  if step.recoverable_error?
@@ -211,11 +215,22 @@ class Step
211
215
  cpus = 5 if cpus.nil?
212
216
  cpus = list.length / 2 if cpus > list.length / 2
213
217
 
214
- Misc.bootstrap(list, cpus, :bar => "Bootstrapping dependencies for #{path}", :respawn => :always) do |dep|
218
+ respawn = rest && rest.include?(:respawn)
219
+ respawn = false if rest && rest.include?(:norespawn)
220
+ respawn = rest && rest.include?(:always_respawn)
221
+ respawn = :always if respawn.nil?
222
+
223
+ Misc.bootstrap(list, cpus, :bar => "Bootstrapping dependencies for #{path}", :respawn => respawn) do |dep|
215
224
  Misc.insist do
216
225
  begin
217
226
  dep.produce
218
227
  Log.warn "Error in bootstrap dependency #{dep.path}: #{dep.messages.last}" if dep.error? or dep.aborted?
228
+ rescue Exception
229
+ if canfail
230
+ Log.warn "Allowing failing of #{dep.path}: #{dep.messages.last}"
231
+ else
232
+ raise $!
233
+ end
219
234
  rescue Aborted
220
235
  dep.abort
221
236
  Log.warn "Aborted bootstrap dependency #{dep.path}: #{dep.messages.last}" if dep.error? or dep.aborted?
@@ -86,7 +86,15 @@ class Step
86
86
  end
87
87
 
88
88
  def checks
89
- rec_dependencies.collect{|dependency| (defined? WorkflowRESTClient and WorkflowRESTClient::RemoteStep === dependency) ? nil : dependency.path }.compact.uniq
89
+ #rec_dependencies.collect{|dependency| (defined? WorkflowRESTClient and WorkflowRESTClient::RemoteStep === dependency) ? nil : dependency.path }.compact.uniq
90
+ rec_dependencies.
91
+ select{|dependency| ! (defined? WorkflowRESTClient and WorkflowRESTClient::RemoteStep === dependency) }.
92
+ select{|dependency| ! dependency.error? }.
93
+ collect{|dependency| dependency.path }.uniq
94
+ end
95
+
96
+ def updated?
97
+ done? and checks.select{|path| File.mtime(path) > File.mtime(self.path) }.empty?
90
98
  end
91
99
 
92
100
  def kill_children
@@ -114,13 +122,16 @@ class Step
114
122
  begin
115
123
  @mutex.synchronize do
116
124
  no_load = :stream if no_load
125
+
126
+ Open.write(pid_file, Process.pid.to_s) unless Open.exists?(path) or Open.exists?(pid_file)
117
127
  result = Persist.persist "Job", @task.result_type, :file => path, :check => checks, :no_load => no_load do
118
128
  if Step === Step.log_relay_step and not self == Step.log_relay_step
119
129
  relay_log(Step.log_relay_step) unless self.respond_to? :relay_step and self.relay_step
120
130
  end
121
131
 
132
+ Open.write(pid_file, Process.pid.to_s) unless Open.exists? pid_file
133
+
122
134
  @exec = false
123
- Open.write(pid_file, Process.pid.to_s)
124
135
  init_info
125
136
 
126
137
  log :setup, "#{Log.color :green, "Setup"} step #{Log.color :yellow, task.name.to_s || ""}"
data/share/Rlib/util.R CHANGED
@@ -33,7 +33,7 @@ rbbt.ruby <- function(code, load = TRUE, flat = FALSE, type = 'tsv', ...){
33
33
  }
34
34
  }
35
35
 
36
- rbbt.job <- function(workflow, task, load=TRUE, flat = FALSE, type = 'tsv', jobname="R.Default", code='', ...){
36
+ rbbt.job <- function(workflow, task, load=TRUE, flat = FALSE, type = 'tsv', jobname="Default", code='', ...){
37
37
 
38
38
  str = "require 'rbbt/workflow'"
39
39
 
@@ -43,26 +43,47 @@ rbbt.job <- function(workflow, task, load=TRUE, flat = FALSE, type = 'tsv', jobn
43
43
 
44
44
  args_list = list(...)
45
45
  args_strs = c()
46
+ tmp_files = c()
47
+
46
48
  for (input in names(args_list)){
47
49
  value = args_list[[input]]
48
50
  input = sub('input\\.', '', input)
49
- if (!is.numeric(value)){
50
- if (value == TRUE){
51
- value = 'true'
52
- }else{
53
- if (value == FALSE){
54
- value = 'false'
51
+ if (is.vector(value) && length(value) > 1){
52
+ file = tempfile()
53
+ writeLines(value, file)
54
+ tmp_files = c(tmp_files, file)
55
+ value = paste("Open.read('", file, "').split(\"\\n\")", sep="")
56
+ }else{
57
+ if (!is.numeric(value)){
58
+ if (all(value %in% TRUE)){
59
+ value = 'true'
55
60
  }else{
56
- value = paste("'", value, "'", sep="")
61
+ if (all(value %in% FALSE)){
62
+ value = 'false'
63
+ }else{
64
+ if (is.data.frame(value)){
65
+ file = tempfile()
66
+ rbbt.tsv.write(file, value)
67
+ tmp_files = c(tmp_files, file)
68
+ value = paste("TSV.open('", file, "')", sep="")
69
+ }else{
70
+ value = paste("'", value, "'", sep="")
71
+ }
72
+ }
57
73
  }
58
74
  }
59
75
  }
60
76
  args_strs = c(args_strs, paste(":",input,' => ',value, sep=""))
61
77
  }
62
78
 
63
- args_str = paste(args_strs, sep=",")
79
+ args_str = paste(args_strs, collapse=",")
64
80
  str = paste(str, paste('wf.job(:', task, ", '", jobname, "', ", args_str,').produce.path', sep=""), sep="\n")
65
- return(rbbt.ruby(str, load, flat, type));
81
+
82
+ res = rbbt.ruby(str, load, flat, type)
83
+
84
+ unlink(tmp_files)
85
+
86
+ return(res);
66
87
  }
67
88
 
68
89
  rbbt.ruby.substitutions <- function(script, substitutions = list(), ...){
@@ -134,11 +155,23 @@ rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, check
134
155
  columns = rbbt.tsv.columns(filename, sep, comment.char=comment.char)
135
156
  if (! is.null(columns)){
136
157
  names(data) <- columns[2:length(columns)];
158
+ attributes(data)$key.field = substring(columns[1],2);
137
159
  }
138
160
 
139
161
  return(data);
140
162
  }
141
163
 
164
+ rbbt.tsv.comma <- function(tsv){
165
+ for (c in names(tsv)){
166
+ v = tsv[,c]
167
+ if (is.character(v)){
168
+ v = gsub('\\|', ', ', v)
169
+ tsv[,c] = v
170
+ }
171
+ }
172
+ return(tsv)
173
+ }
174
+
142
175
  rbbt.tsv.numeric <- function(filename, sep="\t", ...){
143
176
 
144
177
  columns = rbbt.tsv.columns(filename, sep)
@@ -156,6 +189,8 @@ rbbt.tsv2matrix <- function(data){
156
189
  }
157
190
 
158
191
  rbbt.tsv.write <- function(filename, data, key.field = NULL, extra_headers = NULL){
192
+
193
+ if (is.null(key.field)){ key.field = attributes(data)$key.field;}
159
194
  if (is.null(key.field)){ key.field = "ID";}
160
195
 
161
196
  f = file(filename, 'w');
@@ -20,6 +20,7 @@ Use - to read from STDIN
20
20
  -z--zipped Assume data is zipped when merging (default is true)
21
21
  -s2--sep2* Second level separator. Default /[,|]\\s?/
22
22
  -h--help Print this help
23
+ -s--sheet* Sheet to extract
23
24
 
24
25
  EOF
25
26
  if options[:help]
@@ -52,7 +52,7 @@ def report_msg(status, name, path)
52
52
  task = Log.color(:yellow, parts.pop)
53
53
  workflow = Log.color(:magenta, parts.pop)
54
54
 
55
- if not Open.remote?(path) and $main_mtime and ($main_mtime - File.mtime(path)) < 0
55
+ if not Open.remote?(path) and (File.exists?(path) and $main_mtime and ($main_mtime - File.mtime(path)) < 0)
56
56
  status_msg(status) << " " << [workflow, task, path] * " " << " (#{Log.color(:red, "Mtime out of sync") })\n"
57
57
  else
58
58
  status_msg(status) << " " << [workflow, task, path] * " " << "\n"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.21.98
4
+ version: 5.21.99
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-06 00:00:00.000000000 Z
11
+ date: 2017-10-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake