rbbt-util 5.21.98 → 5.21.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 12d10e9ae9d12ed354fe3d8df24a14ea030bb1eb
4
- data.tar.gz: 13416b83ec00f7dae0fc97479746cd636cf53a5c
3
+ metadata.gz: 7dd511eb01b14817584552dad9178ede2319c65b
4
+ data.tar.gz: 8931609250cca85686c0be1e35b3d9cdcba8aca6
5
5
  SHA512:
6
- metadata.gz: d86681d7392cd8a0dae501a10a792e91ca93a026adb918d386d5a5b83916525f66947d233bbe5302cf65c2c02ae2b19582ce52872946b98265c5d8752f02ea2f
7
- data.tar.gz: a93dc769c50f8d08e74d8088caa7c953aee25a78b47576f0015fd8b73f113d736a234534e1faf791eee6e938686db4dcd03a9f1ac44e6643ce5aaca8b8da0a56
6
+ metadata.gz: 298f9fffa765f4eb2c2cd92a85289c148ab8e49d81e4a509341fd9256222cd8d3ff42fd81217d040241dd2960f4ff743853cf5ec0ad8ae12094dbca7d11e0e2e
7
+ data.tar.gz: 06ef1631872f80edce0c3dc759d650c83466666a70ec123242d0bcce6c7ea225417b5bcc8561ae285934ec8bfd570bb980ccccb1483128f7a16ce58444a5b9ab
data/bin/rbbt CHANGED
@@ -48,8 +48,12 @@ $ rbbt <command> <subcommand> ... -a --arg1 --arg2='value' --arg3 'another-value
48
48
  --locate_file #{Log.color :yellow, "Report the location of the script instead of executing it"}
49
49
  --dump_mem* #{Log.color :yellow, "Dump strings in memory each second into file"}
50
50
  -nolock--no_lock_id #{Log.color :yellow, "Do not track lockfiles with ids (prevent stale file handlers for high-througput and high-concurrency)"}
51
+ -ji--jobname_as_inputs #{Log.color :yellow, "Use inputs as part of the jobname in workflows instead of digesting them"}
51
52
  EOF
52
53
 
54
+ if options[:jobname_as_inputs]
55
+ ENV["RBBT_INPUT_JOBNAME"] = "true"
56
+ end
53
57
 
54
58
  locate = options.delete :locate_file
55
59
 
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/resource/util'
2
+ require 'rbbt/util/misc/indiferent_hash'
2
3
  require 'yaml'
3
4
 
4
5
  module Path
data/lib/rbbt/util/R.rb CHANGED
@@ -22,8 +22,7 @@ source('#{UTIL}');
22
22
  EOF
23
23
 
24
24
  require_sources = source.collect{|source|
25
- source = R::LIB_DIR["plot.R"] if source == :plot
26
- source = R::LIB_DIR["svg.R"] if source == :svg
25
+ source = R::LIB_DIR["#{source.to_s}.R"] if R::LIB_DIR["#{source.to_s}.R"].exists?
27
26
  "source('#{source}')"
28
27
  } * ";\n" if Array === source and source.any?
29
28
 
@@ -150,7 +149,7 @@ module TSV
150
149
  source = [source] unless Array === source
151
150
 
152
151
  require_sources = source.collect{|source|
153
- source = R::LIB_DIR["plot.R"] if source == :plot
152
+ source = R::LIB_DIR["#{source.to_s}.R"] if R::LIB_DIR["#{source.to_s}.R"].exists?
154
153
  "source('#{source}')"
155
154
  } * ";\n" if Array === source and source.any?
156
155
 
@@ -70,19 +70,18 @@ class RbbtProcessQueue
70
70
 
71
71
  status = nil
72
72
  begin
73
- @current = Process.fork do
74
- run
75
- end
76
- @asked = false
77
73
 
78
74
  initial = Misc.memory_use(Process.pid)
79
75
  memory_cap = multiplier * initial
80
76
  Log.debug "Worker for #{Process.pid} started with pid #{@current} -- initial: #{initial} - multiplier: #{multiplier} - cap: #{memory_cap}"
81
77
 
78
+ @asked = false
79
+ @monitored = false
82
80
  @monitor_thread = Thread.new do
83
81
  begin
84
82
  while true
85
- current = Misc.memory_use(@current)
83
+ @monitored = true
84
+ current = @current ? 0 : Misc.memory_use(@current)
86
85
  if current > memory_cap and not @asked
87
86
  Log.medium "Worker #{@current} for #{Process.pid} asked to respawn -- initial: #{initial} - multiplier: #{multiplier} - cap: #{memory_cap} - current: #{current}"
88
87
  RbbtSemaphore.synchronize(@callback_queue.write_sem) do
@@ -97,6 +96,13 @@ class RbbtProcessQueue
97
96
  end
98
97
  end
99
98
 
99
+ while ! @monitored
100
+ sleep 0.1
101
+ end
102
+ @current = Process.fork do
103
+ run
104
+ end
105
+
100
106
  while true
101
107
  pid, status = Process.waitpid2 @current
102
108
  code = status.to_i >> 8
@@ -114,7 +120,7 @@ class RbbtProcessQueue
114
120
  Log.exception $!
115
121
  raise $!
116
122
  ensure
117
- @monitor_thread.kill
123
+ @monitor_thread.kill if @monitor_thread
118
124
  Process.kill "INT", @current if Misc.pid_exists? @current
119
125
  @callback_queue.close_write if @callback_queue
120
126
  end
data/lib/rbbt/workflow.rb CHANGED
@@ -337,7 +337,7 @@ module Workflow
337
337
  real_inputs[k] = v
338
338
  end
339
339
 
340
- if real_inputs.empty?
340
+ if real_inputs.empty? and not Workflow::TAG == :inputs
341
341
  step_path = step_path taskname, jobname, [], [], task.extension
342
342
  input_values = task.take_input_values(inputs)
343
343
  else
@@ -351,6 +351,10 @@ class Step
351
351
  Open.exists?(path) or Open.exists?(pid_file) #or Open.exists?(info_file)
352
352
  end
353
353
 
354
+ def waiting?
355
+ Open.exists?(info_file) and not started?
356
+ end
357
+
354
358
  def dirty?
355
359
  status = self.status
356
360
  return true if done? and not status == :done and not status == :noinfo
@@ -359,7 +363,7 @@ class Step
359
363
  if dirty_files.any?
360
364
  true
361
365
  else
362
- false
366
+ ! self.updated?
363
367
  end
364
368
  end
365
369
 
@@ -759,13 +763,10 @@ module Workflow
759
763
  when Proc
760
764
  if DependencyBlock === dependency
761
765
  orig_dep = dependency.dependency
762
- if Hash === orig_dep.last
763
- options = orig_dep.last
764
- compute = options[:compute]
765
- else
766
- options = {}
767
- compute = nil
768
- end
766
+ wf, task_name, options = orig_dep
767
+
768
+ options = {} if options.nil?
769
+ compute = options[:compute]
769
770
 
770
771
  options = IndiferentHash.setup(options.dup)
771
772
  dep = dependency.call jobname, options.merge(_inputs), real_dependencies
@@ -775,7 +776,8 @@ module Workflow
775
776
  new_=[]
776
777
  dep.each{|d|
777
778
  if Hash === d
778
- d[:workflow] ||= self
779
+ d[:workflow] ||= wf
780
+ d[:task] = task_name
779
781
  inputs = assign_dep_inputs({}, options.merge(d[:inputs] || {}), real_dependencies, d[:workflow].task_info(d[:task]))
780
782
  d = d[:workflow].job(d[:task], d[:jobname], inputs)
781
783
  end
@@ -802,7 +804,7 @@ module Workflow
802
804
  real_dependencies.flatten.compact
803
805
  end
804
806
 
805
- TAG = :hash
807
+ TAG = ENV["RBBT_INPUT_JOBNAME"] == "true" ? :inputs : :hash
806
808
  def step_path(taskname, jobname, inputs, dependencies, extension = nil)
807
809
  raise "Jobname makes an invalid path: #{ jobname }" if jobname =~ /\.\./
808
810
  if inputs.length > 0 or dependencies.any?
@@ -810,6 +812,19 @@ module Workflow
810
812
  when :hash
811
813
  hash_str = Misc.obj2digest({:inputs => inputs, :dependencies => dependencies})
812
814
  jobname + '_' << hash_str
815
+ when :inputs
816
+ all_inputs = {}
817
+ inputs.zip(self.task_info(taskname)[:inputs]) do |i,f|
818
+ all_inputs[f] = i
819
+ end
820
+ dependencies.each do |dep|
821
+ ri = dep.recursive_inputs
822
+ ri.zip(ri.fields).each do |i,f|
823
+ all_inputs[f] = i
824
+ end
825
+ end
826
+
827
+ all_inputs.any? ? jobname + '_' << Misc.obj2str(all_inputs) : jobname
813
828
  else
814
829
  jobname
815
830
  end
@@ -80,13 +80,14 @@ class Step
80
80
  return if status == 'streaming' and job.running?
81
81
  end
82
82
 
83
- if ((status == 'error' || job.aborted?) && job.recoverable_error?) || job.dirty?
83
+ if ((status == 'error' || job.aborted?) && job.recoverable_error?) || job.dirty?
84
84
  job.clean
85
85
  end
86
86
 
87
87
  (job.init_info and job.dup_inputs) unless status == 'done' or job.started?
88
88
 
89
- raise DependencyError, job if job.error?
89
+ canfail = ComputeDependency === job and Array === job.compute and job.compute.include? :canfail
90
+ raise DependencyError, job if job.error? and not canfail
90
91
  end
91
92
 
92
93
  def log_dependency_exec(dependency, action)
@@ -119,6 +120,12 @@ class Step
119
120
  return
120
121
  end
121
122
 
123
+ if dependency.aborted?
124
+ log_dependency_exec(dependency, "aborted (clean)")
125
+ dependency.clean
126
+ raise TryAgain
127
+ end
128
+
122
129
  if not dependency.started?
123
130
  log_dependency_exec(dependency, :starting)
124
131
  dependency.run(true)
@@ -127,12 +134,6 @@ class Step
127
134
 
128
135
  dependency.grace
129
136
 
130
- if dependency.aborted?
131
- log_dependency_exec(dependency, "aborted (clean)")
132
- dependency.clean
133
- raise TryAgain
134
- end
135
-
136
137
  if dependency.error?
137
138
  log_dependency_exec(dependency, :error)
138
139
  raise DependencyError, [dependency.path, dependency.messages.last] * ": " if dependency.error?
@@ -161,7 +162,6 @@ class Step
161
162
  raise $!
162
163
  rescue Exception
163
164
  Log.error "Exception in dep. #{ Log.color :red, dependency.task_name.to_s } -- #{$!.message}"
164
- #Log.exception $!
165
165
  raise $!
166
166
  end
167
167
  end
@@ -189,12 +189,16 @@ class Step
189
189
  type, *rest = type
190
190
  end
191
191
 
192
+ canfail = rest && rest.include?(:canfail)
193
+
192
194
  case type
193
195
  when :produce, :no_dup
194
196
  list.each do |step|
195
197
  Misc.insist do
196
198
  begin
197
199
  step.produce
200
+ rescue RbbtException
201
+ raise $! unless canfail
198
202
  rescue Exception
199
203
  step.exception $!
200
204
  if step.recoverable_error?
@@ -211,11 +215,22 @@ class Step
211
215
  cpus = 5 if cpus.nil?
212
216
  cpus = list.length / 2 if cpus > list.length / 2
213
217
 
214
- Misc.bootstrap(list, cpus, :bar => "Bootstrapping dependencies for #{path}", :respawn => :always) do |dep|
218
+ respawn = rest && rest.include?(:respawn)
219
+ respawn = false if rest && rest.include?(:norespawn)
220
+ respawn = rest && rest.include?(:always_respawn)
221
+ respawn = :always if respawn.nil?
222
+
223
+ Misc.bootstrap(list, cpus, :bar => "Bootstrapping dependencies for #{path}", :respawn => respawn) do |dep|
215
224
  Misc.insist do
216
225
  begin
217
226
  dep.produce
218
227
  Log.warn "Error in bootstrap dependency #{dep.path}: #{dep.messages.last}" if dep.error? or dep.aborted?
228
+ rescue Exception
229
+ if canfail
230
+ Log.warn "Allowing failing of #{dep.path}: #{dep.messages.last}"
231
+ else
232
+ raise $!
233
+ end
219
234
  rescue Aborted
220
235
  dep.abort
221
236
  Log.warn "Aborted bootstrap dependency #{dep.path}: #{dep.messages.last}" if dep.error? or dep.aborted?
@@ -86,7 +86,15 @@ class Step
86
86
  end
87
87
 
88
88
  def checks
89
- rec_dependencies.collect{|dependency| (defined? WorkflowRESTClient and WorkflowRESTClient::RemoteStep === dependency) ? nil : dependency.path }.compact.uniq
89
+ #rec_dependencies.collect{|dependency| (defined? WorkflowRESTClient and WorkflowRESTClient::RemoteStep === dependency) ? nil : dependency.path }.compact.uniq
90
+ rec_dependencies.
91
+ select{|dependency| ! (defined? WorkflowRESTClient and WorkflowRESTClient::RemoteStep === dependency) }.
92
+ select{|dependency| ! dependency.error? }.
93
+ collect{|dependency| dependency.path }.uniq
94
+ end
95
+
96
+ def updated?
97
+ done? and checks.select{|path| File.mtime(path) > File.mtime(self.path) }.empty?
90
98
  end
91
99
 
92
100
  def kill_children
@@ -114,13 +122,16 @@ class Step
114
122
  begin
115
123
  @mutex.synchronize do
116
124
  no_load = :stream if no_load
125
+
126
+ Open.write(pid_file, Process.pid.to_s) unless Open.exists?(path) or Open.exists?(pid_file)
117
127
  result = Persist.persist "Job", @task.result_type, :file => path, :check => checks, :no_load => no_load do
118
128
  if Step === Step.log_relay_step and not self == Step.log_relay_step
119
129
  relay_log(Step.log_relay_step) unless self.respond_to? :relay_step and self.relay_step
120
130
  end
121
131
 
132
+ Open.write(pid_file, Process.pid.to_s) unless Open.exists? pid_file
133
+
122
134
  @exec = false
123
- Open.write(pid_file, Process.pid.to_s)
124
135
  init_info
125
136
 
126
137
  log :setup, "#{Log.color :green, "Setup"} step #{Log.color :yellow, task.name.to_s || ""}"
data/share/Rlib/util.R CHANGED
@@ -33,7 +33,7 @@ rbbt.ruby <- function(code, load = TRUE, flat = FALSE, type = 'tsv', ...){
33
33
  }
34
34
  }
35
35
 
36
- rbbt.job <- function(workflow, task, load=TRUE, flat = FALSE, type = 'tsv', jobname="R.Default", code='', ...){
36
+ rbbt.job <- function(workflow, task, load=TRUE, flat = FALSE, type = 'tsv', jobname="Default", code='', ...){
37
37
 
38
38
  str = "require 'rbbt/workflow'"
39
39
 
@@ -43,26 +43,47 @@ rbbt.job <- function(workflow, task, load=TRUE, flat = FALSE, type = 'tsv', jobn
43
43
 
44
44
  args_list = list(...)
45
45
  args_strs = c()
46
+ tmp_files = c()
47
+
46
48
  for (input in names(args_list)){
47
49
  value = args_list[[input]]
48
50
  input = sub('input\\.', '', input)
49
- if (!is.numeric(value)){
50
- if (value == TRUE){
51
- value = 'true'
52
- }else{
53
- if (value == FALSE){
54
- value = 'false'
51
+ if (is.vector(value) && length(value) > 1){
52
+ file = tempfile()
53
+ writeLines(value, file)
54
+ tmp_files = c(tmp_files, file)
55
+ value = paste("Open.read('", file, "').split(\"\\n\")", sep="")
56
+ }else{
57
+ if (!is.numeric(value)){
58
+ if (all(value %in% TRUE)){
59
+ value = 'true'
55
60
  }else{
56
- value = paste("'", value, "'", sep="")
61
+ if (all(value %in% FALSE)){
62
+ value = 'false'
63
+ }else{
64
+ if (is.data.frame(value)){
65
+ file = tempfile()
66
+ rbbt.tsv.write(file, value)
67
+ tmp_files = c(tmp_files, file)
68
+ value = paste("TSV.open('", file, "')", sep="")
69
+ }else{
70
+ value = paste("'", value, "'", sep="")
71
+ }
72
+ }
57
73
  }
58
74
  }
59
75
  }
60
76
  args_strs = c(args_strs, paste(":",input,' => ',value, sep=""))
61
77
  }
62
78
 
63
- args_str = paste(args_strs, sep=",")
79
+ args_str = paste(args_strs, collapse=",")
64
80
  str = paste(str, paste('wf.job(:', task, ", '", jobname, "', ", args_str,').produce.path', sep=""), sep="\n")
65
- return(rbbt.ruby(str, load, flat, type));
81
+
82
+ res = rbbt.ruby(str, load, flat, type)
83
+
84
+ unlink(tmp_files)
85
+
86
+ return(res);
66
87
  }
67
88
 
68
89
  rbbt.ruby.substitutions <- function(script, substitutions = list(), ...){
@@ -134,11 +155,23 @@ rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, check
134
155
  columns = rbbt.tsv.columns(filename, sep, comment.char=comment.char)
135
156
  if (! is.null(columns)){
136
157
  names(data) <- columns[2:length(columns)];
158
+ attributes(data)$key.field = substring(columns[1],2);
137
159
  }
138
160
 
139
161
  return(data);
140
162
  }
141
163
 
164
+ rbbt.tsv.comma <- function(tsv){
165
+ for (c in names(tsv)){
166
+ v = tsv[,c]
167
+ if (is.character(v)){
168
+ v = gsub('\\|', ', ', v)
169
+ tsv[,c] = v
170
+ }
171
+ }
172
+ return(tsv)
173
+ }
174
+
142
175
  rbbt.tsv.numeric <- function(filename, sep="\t", ...){
143
176
 
144
177
  columns = rbbt.tsv.columns(filename, sep)
@@ -156,6 +189,8 @@ rbbt.tsv2matrix <- function(data){
156
189
  }
157
190
 
158
191
  rbbt.tsv.write <- function(filename, data, key.field = NULL, extra_headers = NULL){
192
+
193
+ if (is.null(key.field)){ key.field = attributes(data)$key.field;}
159
194
  if (is.null(key.field)){ key.field = "ID";}
160
195
 
161
196
  f = file(filename, 'w');
@@ -20,6 +20,7 @@ Use - to read from STDIN
20
20
  -z--zipped Assume data is zipped when merging (default is true)
21
21
  -s2--sep2* Second level separator. Default /[,|]\\s?/
22
22
  -h--help Print this help
23
+ -s--sheet* Sheet to extract
23
24
 
24
25
  EOF
25
26
  if options[:help]
@@ -52,7 +52,7 @@ def report_msg(status, name, path)
52
52
  task = Log.color(:yellow, parts.pop)
53
53
  workflow = Log.color(:magenta, parts.pop)
54
54
 
55
- if not Open.remote?(path) and $main_mtime and ($main_mtime - File.mtime(path)) < 0
55
+ if not Open.remote?(path) and (File.exists?(path) and $main_mtime and ($main_mtime - File.mtime(path)) < 0)
56
56
  status_msg(status) << " " << [workflow, task, path] * " " << " (#{Log.color(:red, "Mtime out of sync") })\n"
57
57
  else
58
58
  status_msg(status) << " " << [workflow, task, path] * " " << "\n"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.21.98
4
+ version: 5.21.99
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-06 00:00:00.000000000 Z
11
+ date: 2017-10-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake