rbbt-util 5.13.37 → 5.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +6 -1
  3. data/lib/rbbt/fix_width_table.rb +21 -9
  4. data/lib/rbbt/monitor.rb +1 -1
  5. data/lib/rbbt/packed_index.rb +19 -5
  6. data/lib/rbbt/persist/tsv.rb +9 -1
  7. data/lib/rbbt/persist/tsv/fix_width_table.rb +1 -1
  8. data/lib/rbbt/persist/tsv/packed_index.rb +101 -0
  9. data/lib/rbbt/persist/tsv/sharder.rb +11 -3
  10. data/lib/rbbt/resource/path.rb +1 -1
  11. data/lib/rbbt/resource/rake.rb +1 -0
  12. data/lib/rbbt/tsv/accessor.rb +18 -13
  13. data/lib/rbbt/tsv/dumper.rb +2 -6
  14. data/lib/rbbt/tsv/manipulate.rb +6 -4
  15. data/lib/rbbt/tsv/parallel/traverse.rb +7 -6
  16. data/lib/rbbt/tsv/parser.rb +20 -16
  17. data/lib/rbbt/tsv/stream.rb +87 -76
  18. data/lib/rbbt/tsv/util.rb +8 -3
  19. data/lib/rbbt/util/R.rb +1 -1
  20. data/lib/rbbt/util/cmd.rb +0 -3
  21. data/lib/rbbt/util/concurrency/processes.rb +3 -0
  22. data/lib/rbbt/util/concurrency/processes/worker.rb +0 -1
  23. data/lib/rbbt/util/log.rb +45 -18
  24. data/lib/rbbt/util/log/progress/report.rb +3 -2
  25. data/lib/rbbt/util/log/progress/util.rb +1 -1
  26. data/lib/rbbt/util/misc/concurrent_stream.rb +12 -6
  27. data/lib/rbbt/util/misc/development.rb +10 -4
  28. data/lib/rbbt/util/misc/lock.rb +1 -1
  29. data/lib/rbbt/util/misc/omics.rb +2 -0
  30. data/lib/rbbt/util/misc/pipes.rb +90 -87
  31. data/lib/rbbt/workflow.rb +6 -2
  32. data/lib/rbbt/workflow/accessor.rb +70 -40
  33. data/lib/rbbt/workflow/definition.rb +23 -0
  34. data/lib/rbbt/workflow/step.rb +15 -3
  35. data/lib/rbbt/workflow/step/run.rb +18 -13
  36. data/lib/rbbt/workflow/usage.rb +3 -0
  37. data/share/Rlib/util.R +1 -1
  38. data/share/rbbt_commands/tsv/get +0 -2
  39. data/share/rbbt_commands/tsv/info +13 -5
  40. data/share/rbbt_commands/tsv/subset +1 -1
  41. data/share/rbbt_commands/workflow/info +32 -0
  42. data/share/rbbt_commands/workflow/task +0 -2
  43. data/test/rbbt/persist/tsv/test_sharder.rb +44 -0
  44. data/test/rbbt/test_fix_width_table.rb +1 -0
  45. data/test/rbbt/test_packed_index.rb +3 -0
  46. data/test/rbbt/tsv/test_stream.rb +55 -2
  47. data/test/rbbt/util/misc/test_pipes.rb +8 -6
  48. data/test/rbbt/workflow/test_step.rb +7 -6
  49. metadata +3 -2
data/lib/rbbt/util/R.rb CHANGED
@@ -38,7 +38,7 @@ source('#{UTIL}');
38
38
 
39
39
  def self.interactive(script, options = {})
40
40
  TmpFile.with_file do |init_file|
41
- Open.write(init_file) do |f|
41
+ Open.write(init_file) do |f|
42
42
  f.puts "# Loading basic rbbt environment"
43
43
  f.puts "library(utils);\n"
44
44
  f.puts "source('#{R::UTIL}');\n"
data/lib/rbbt/util/cmd.rb CHANGED
@@ -112,7 +112,6 @@ module CMD
112
112
  stderr = Log::HIGH
113
113
  end
114
114
 
115
- # Process cmd_options
116
115
  cmd_options = process_cmd_options options
117
116
  if cmd =~ /'\{opt\}'/
118
117
  cmd.sub!('\'{opt}\'', cmd_options)
@@ -203,7 +202,6 @@ module CMD
203
202
  Log.log line, stderr if Integer === stderr and log
204
203
  end
205
204
  serr.close
206
- Thread.exit!
207
205
  end
208
206
 
209
207
  #SmartIO.tie sout, pid, cmd, post, in_content, sin, serr
@@ -217,7 +215,6 @@ module CMD
217
215
  err << serr.gets if Integer === stderr
218
216
  end
219
217
  serr.close
220
- Thread.exit
221
218
  end
222
219
 
223
220
  ConcurrentStream.setup sout, :pids => [pid], :autojoin => true
@@ -96,6 +96,9 @@ class RbbtProcessQueue
96
96
  begin
97
97
  @process_monitor.join
98
98
  close_callback if @callback
99
+ rescue Aborted
100
+ Log.error "Aborted joining queue"
101
+ raise $!
99
102
  rescue Exception
100
103
  Log.error "Exception joining queue: #{$!.message}"
101
104
  raise $!
@@ -36,7 +36,6 @@ class RbbtProcessQueue
36
36
  rescue Aborted, Interrupt
37
37
  Log.warn "Worker #{Process.pid} aborted"
38
38
  Kernel.exit! 0
39
- #Kernel.exit! -1
40
39
  rescue Exception
41
40
  Log.exception $!
42
41
  @callback_queue.push($!) if @callback_queue
data/lib/rbbt/util/log.rb CHANGED
@@ -2,10 +2,18 @@ require 'term/ansicolor'
2
2
  require 'rbbt/util/color'
3
3
  require 'rbbt/util/log/progress'
4
4
 
5
+ class MockMutex
6
+ def synchronize
7
+ yield
8
+ end
9
+ end
10
+
5
11
  module Log
6
12
  extend Term::ANSIColor
7
13
 
8
14
 
15
+ #ToDo: I'm not sure if using a Mutex here really gives troubles in CPU concurrency
16
+ #LOG_MUTEX = MockMutex.new
9
17
  LOG_MUTEX = Mutex.new
10
18
 
11
19
  SEVERITY_NAMES ||= begin
@@ -16,8 +24,16 @@ module Log
16
24
  names
17
25
  end
18
26
 
27
+ def self.last_caller(stack)
28
+ line = nil
29
+ while line.nil? or line =~ /util\/log\.rb/ and stack.any?
30
+ line = stack.shift
31
+ end
32
+ line ||= caller.first
33
+ end
34
+
19
35
  def self.ignore_stderr
20
- #LOG_MUTEX.synchronize do
36
+ LOG_MUTEX.synchronize do
21
37
  backup_stderr = STDERR.dup
22
38
  File.open('/dev/null', 'w') do |f|
23
39
  STDERR.reopen(f)
@@ -28,7 +44,7 @@ module Log
28
44
  backup_stderr.close
29
45
  end
30
46
  end
31
- #end
47
+ end
32
48
  end
33
49
 
34
50
  def self.get_level(level)
@@ -133,22 +149,18 @@ module Log
133
149
  message = "" << highlight << message << color(0) if severity >= INFO
134
150
  str = prefix << " " << message
135
151
 
136
- #LOG_MUTEX.synchronize do
137
- STDERR.puts str
138
- Log::LAST.replace "log"
139
- logfile.puts str unless logfile.nil?
140
- nil
141
- #end
152
+ LOG_MUTEX.synchronize do
153
+ STDERR.puts str
154
+ Log::LAST.replace "log"
155
+ logfile.puts str unless logfile.nil?
156
+ nil
157
+ end
142
158
  end
143
159
 
144
160
  def self.log_obj_inspect(obj, level, file = $stdout)
145
161
  stack = caller
146
162
 
147
- line = nil
148
- while line.nil? or line =~ /util\/log\.rb/ and stack.any?
149
- line = stack.shift
150
- end
151
- line ||= caller.first
163
+ line = Log.last_caller stack
152
164
 
153
165
  level = Log.get_level level
154
166
  name = Log::SEVERITY_NAMES[level] + ": "
@@ -161,11 +173,7 @@ module Log
161
173
  def self.log_obj_fingerprint(obj, level, file = $stdout)
162
174
  stack = caller
163
175
 
164
- line = nil
165
- while line.nil? or line =~ /util\/log\.rb/ and stack.any?
166
- line = stack.shift
167
- end
168
- line ||= caller.first
176
+ line = Log.last_caller stack
169
177
 
170
178
  level = Log.get_level level
171
179
  name = Log::SEVERITY_NAMES[level] + ": "
@@ -210,6 +218,25 @@ module Log
210
218
  error("BACKTRACE:\n" + e.backtrace * "\n")
211
219
  end
212
220
 
221
+ def self.color_stack(stack)
222
+ stack.collect do |line|
223
+ line = line.sub('`',"'")
224
+ color = :green if line =~ /workflow/
225
+ color = :blue if line =~ /rbbt-/
226
+ Log.color color, line
227
+ end
228
+ end
229
+
230
+ def self.stack(stack)
231
+ LOG_MUTEX.synchronize do
232
+
233
+ STDERR.puts Log.color :magenta, "Stack trace: " << Log.last_caller(caller)
234
+ color_stack(stack).each do |line|
235
+ STDERR.puts line
236
+ end
237
+ end
238
+ end
239
+
213
240
  case ENV['RBBT_LOG']
214
241
  when 'DEBUG'
215
242
  self.severity = DEBUG
@@ -10,11 +10,11 @@ module Log
10
10
 
11
11
  def print(io, str)
12
12
  return if ENV["RBBT_NO_PROGRESS"] == "true"
13
- #LOG_MUTEX.synchronize do
13
+ LOG_MUTEX.synchronize do
14
14
  STDERR.print str
15
15
  Log.logfile.puts str unless Log.logfile.nil?
16
16
  Log::LAST.replace "progress"
17
- #end
17
+ end
18
18
  end
19
19
 
20
20
  attr_accessor :history, :mean_max
@@ -90,6 +90,7 @@ module Log
90
90
  if Log::LAST != "progress"
91
91
  length = Log::ProgressBar.cleanup_bars
92
92
  bars = BARS
93
+ print(io, Log.color(:yellow, "...Progress\n"))
93
94
  bars.sort_by{|b| b.depth }.reverse.each do |bar|
94
95
  print(io, Log.color(:yellow ,bar.report_msg) << "\n")
95
96
  end
@@ -31,7 +31,7 @@ module Log
31
31
  end
32
32
 
33
33
  def self.remove_bar(bar)
34
- bar.done
34
+ bar.done if bar.respond_to? :done
35
35
  BAR_MUTEX.synchronize do
36
36
  REMOVE << bar
37
37
  end
@@ -82,18 +82,18 @@ module ConcurrentStream
82
82
  join_callback
83
83
 
84
84
  @joined = true
85
- close unless closed?
86
85
  lockfile.unlock if lockfile and lockfile.locked?
86
+ close unless closed?
87
87
  end
88
88
 
89
- def abort_threads
89
+ def abort_threads(exception)
90
90
  Log.medium "Aborting threads (#{Thread.current.inspect}) #{@threads.collect{|t| t.inspect } * ", "}"
91
91
 
92
92
  @threads.each do |t|
93
93
  @aborted = false if t == Thread.current
94
94
  next if t == Thread.current
95
95
  Log.medium "Aborting thread #{t.inspect}"
96
- t.raise Aborted.new
96
+ t.raise exception ? exception : Aborted.new
97
97
  end if @threads
98
98
 
99
99
  sleeped = false
@@ -117,11 +117,16 @@ module ConcurrentStream
117
117
  end
118
118
 
119
119
  def abort_pids
120
- @pids.each{|pid| Process.kill :INT, pid } if @pids
120
+ @pids.each do |pid|
121
+ begin
122
+ Process.kill :INT, pid
123
+ rescue Errno::ESRCH
124
+ end
125
+ end if @pids
121
126
  @pids = []
122
127
  end
123
128
 
124
- def abort
129
+ def abort(exception = nil)
125
130
  return if @aborted
126
131
  Log.medium "Aborting stream #{Misc.fingerprint self} -- #{@abort_callback} [#{@aborted}]"
127
132
  @aborted = true
@@ -131,8 +136,9 @@ module ConcurrentStream
131
136
  @abort_callback = nil
132
137
  close unless closed?
133
138
 
134
- abort_threads
139
+ abort_threads(exception)
135
140
  abort_pids
141
+ ensure
136
142
  lockfile.unlock if lockfile and lockfile.locked?
137
143
  end
138
144
  Log.medium "Aborted stream #{Misc.fingerprint self} -- #{@abort_callback} [#{@aborted}]"
@@ -135,24 +135,30 @@ module Misc
135
135
  try = 0
136
136
  begin
137
137
  yield
138
- rescue Aborted
138
+ rescue TryAgain
139
+ sleep sleep
140
+ retry
141
+ rescue Aborted, Interrupt
139
142
  if msg
140
143
  Log.warn("Not Insisting after Aborted: #{$!.message} -- #{msg}")
141
144
  else
142
145
  Log.warn("Not Insisting after Aborted: #{$!.message}")
143
146
  end
147
+ raise $!
144
148
  rescue Exception
145
149
  if msg
146
- Log.warn("Insisting after exception: #{$!.message} -- #{msg}")
150
+ Log.warn("Insisting after exception: #{$!.class} #{$!.message} -- #{msg}")
147
151
  else
148
- Log.warn("Insisting after exception: #{$!.message}")
149
- end
152
+ Log.warn("Insisting after exception: #{$!.class} #{$!.message}")
153
+ end
154
+
150
155
  if sleep and try > 0
151
156
  sleep sleep
152
157
  sleep = sleep_array.shift if sleep_array
153
158
  else
154
159
  Thread.pass
155
160
  end
161
+
156
162
  try += 1
157
163
  retry if try < times
158
164
  raise $!
@@ -28,11 +28,11 @@ module Misc
28
28
 
29
29
  res = nil
30
30
 
31
- lock_path = File.expand_path(file + '.lock')
32
31
  if options[:lock]
33
32
  lockfile = options[:lock]
34
33
  lockfile.lock unless lockfile.locked?
35
34
  else
35
+ lock_path = File.expand_path(file + '.lock')
36
36
  lockfile = Lockfile.new(lock_path, options)
37
37
  lockfile.lock
38
38
  end
@@ -125,9 +125,11 @@ module Misc
125
125
 
126
126
  def self.correct_vcf_mutation(pos, ref, mut_str)
127
127
  muts = mut_str.nil? ? [] : mut_str.split(',')
128
+ muts.collect!{|m| m == '<DEL>' ? '-' : m }
128
129
 
129
130
  while ref.length >= 1 and muts.reject{|m| m[0] == ref[0]}.empty?
130
131
  ref = ref[1..-1]
132
+ raise "REF nil" if ref.nil?
131
133
  pos = pos + 1
132
134
  muts = muts.collect{|m| m[1..-1]}
133
135
  end
@@ -175,7 +175,7 @@ module Misc
175
175
  Log.medium "Consuming stream #{Misc.fingerprint io}"
176
176
  begin
177
177
  into.sync == true if IO === into
178
- while not io.closed? and block = io.read(2048 * 10)
178
+ while not io.closed? and block = io.read(2048)
179
179
  into << block if into
180
180
  end
181
181
  io.join if io.respond_to? :join
@@ -213,48 +213,47 @@ module Misc
213
213
  end
214
214
 
215
215
  def self.sensiblewrite(path, content = nil, options = {}, &block)
216
+ force = Misc.process_options options, :force
216
217
  lock_options = Misc.pull_keys options, :lock
217
218
  lock_options = lock_options[:lock] if Hash === lock_options[:lock]
218
- return if Open.exists? path
219
+ return if Open.exists? path and not force
219
220
  tmp_path = Persist.persistence_path(path, {:dir => Misc.sensiblewrite_dir})
220
221
  tmp_path_lock = Persist.persistence_path(path, {:dir => Misc.sensiblewrite_lock_dir})
221
222
  Misc.lock tmp_path_lock, lock_options do
222
- return if Open.exists? path
223
- if not Open.exists? path
224
- FileUtils.mkdir_p File.dirname(tmp_path) unless File.directory? File.dirname(tmp_path)
225
- FileUtils.rm_f tmp_path if File.exists? tmp_path
226
- begin
227
- case
228
- when block_given?
229
- File.open(tmp_path, 'wb', &block)
230
- when String === content
231
- File.open(tmp_path, 'wb') do |f| f.write content end
232
- when (IO === content or StringIO === content or File === content)
233
-
234
- Open.write(tmp_path) do |f|
235
- f.sync = true
236
- while block = content.read(2048)
237
- f.write block
238
- end
223
+ return if Open.exists? path and not force
224
+ FileUtils.mkdir_p File.dirname(tmp_path) unless File.directory? File.dirname(tmp_path)
225
+ FileUtils.rm_f tmp_path if File.exists? tmp_path
226
+ begin
227
+ case
228
+ when block_given?
229
+ File.open(tmp_path, 'wb', &block)
230
+ when String === content
231
+ File.open(tmp_path, 'wb') do |f| f.write content end
232
+ when (IO === content or StringIO === content or File === content)
233
+
234
+ Open.write(tmp_path) do |f|
235
+ f.sync = true
236
+ while block = content.read(2048)
237
+ f.write block
239
238
  end
240
- else
241
- File.open(tmp_path, 'wb') do |f| end
242
239
  end
243
-
244
- Open.mv tmp_path, path, lock_options
245
- content.join if content.respond_to? :join
246
- rescue Aborted
247
- Log.medium "Aborted sensiblewrite -- #{ Log.reset << Log.color(:blue, path) }"
248
- content.abort if content.respond_to? :abort
249
- Open.rm path if File.exists? path
250
- rescue Exception
251
- Log.medium "Exception in sensiblewrite: #{$!.message} -- #{ Log.color :blue, path }"
252
- content.abort if content.respond_to? :abort
253
- Open.rm path if File.exists? path
254
- raise $!
255
- ensure
256
- FileUtils.rm_f tmp_path if File.exists? tmp_path
240
+ else
241
+ File.open(tmp_path, 'wb') do |f| end
257
242
  end
243
+
244
+ Open.mv tmp_path, path, lock_options
245
+ content.join if content.respond_to? :join
246
+ rescue Aborted
247
+ Log.medium "Aborted sensiblewrite -- #{ Log.reset << Log.color(:blue, path) }"
248
+ content.abort if content.respond_to? :abort
249
+ Open.rm path if File.exists? path
250
+ rescue Exception
251
+ Log.medium "Exception in sensiblewrite: #{$!.message} -- #{ Log.color :blue, path }"
252
+ content.abort if content.respond_to? :abort
253
+ Open.rm path if File.exists? path
254
+ raise $!
255
+ ensure
256
+ FileUtils.rm_f tmp_path if File.exists? tmp_path
258
257
  end
259
258
  end
260
259
  end
@@ -269,7 +268,7 @@ module Misc
269
268
  end
270
269
  end
271
270
 
272
- def self.sort_stream(stream, header_hash = "#", cmd_args = nil)
271
+ def self.sort_stream(stream, header_hash = "#", cmd_args = " -u ")
273
272
  Misc.open_pipe do |sin|
274
273
  begin
275
274
  if defined? Step and Step === stream
@@ -343,64 +342,68 @@ module Misc
343
342
  end
344
343
  end
345
344
 
346
- def self.paste_streams(streams, lines = nil, sep = "\t", header = nil)
347
- sep ||= "\t"
348
- num_streams = streams.length
349
- Misc.open_pipe do |sin|
350
- sin.puts header if header
351
- streams = streams.collect do |stream|
352
- if defined? Step and Step === stream
353
- stream.get_stream || stream.join.path.open
354
- else
355
- stream
356
- end
345
+ def self._paste_streams(streams, output, lines = nil, sep = "\t", header = nil)
346
+ output.puts header if header
347
+ streams = streams.collect do |stream|
348
+ if defined? Step and Step === stream
349
+ stream.get_stream || stream.join.path.open
350
+ else
351
+ stream
357
352
  end
353
+ end
358
354
 
359
- begin
360
- done_streams = []
361
- lines ||= streams.collect{|s| s.gets }
362
- keys = []
363
- parts = []
364
- lines.each_with_index do |line,i|
365
- key, *p = line.strip.split(sep, -1)
366
- keys[i] = key
367
- parts[i] = p
368
- end
369
- sizes = parts.collect{|p| p.length }
370
- last_min = nil
371
- while lines.compact.any?
372
- min = keys.compact.sort.first
373
- str = []
374
- keys.each_with_index do |key,i|
375
- case key
376
- when min
377
- str << [parts[i] * sep]
378
- line = lines[i] = streams[i].gets
379
- if line.nil?
380
- keys[i] = nil
381
- parts[i] = nil
382
- else
383
- k, *p = line.strip.split(sep, -1)
384
- keys[i] = k
385
- parts[i] = p
386
- end
355
+ begin
356
+ done_streams = []
357
+ lines ||= streams.collect{|s| s.gets }
358
+ keys = []
359
+ parts = []
360
+ lines.each_with_index do |line,i|
361
+ key, *p = line.strip.split(sep, -1)
362
+ keys[i] = key
363
+ parts[i] = p
364
+ end
365
+ sizes = parts.collect{|p| p.length }
366
+ last_min = nil
367
+ while lines.compact.any?
368
+ min = keys.compact.sort.first
369
+ str = []
370
+ keys.each_with_index do |key,i|
371
+ case key
372
+ when min
373
+ str << [parts[i] * sep]
374
+ line = lines[i] = streams[i].gets
375
+ if line.nil?
376
+ keys[i] = nil
377
+ parts[i] = nil
387
378
  else
388
- str << [sep * (sizes[i]-1)] if sizes[i] > 0
379
+ k, *p = line.strip.split(sep, -1)
380
+ keys[i] = k
381
+ parts[i] = p
389
382
  end
383
+ else
384
+ str << [sep * (sizes[i]-1)] if sizes[i] > 0
390
385
  end
391
-
392
- sin.puts [min, str*sep] * sep
393
- end
394
- streams.each do |stream|
395
- stream.join if stream.respond_to? :join
396
386
  end
397
- rescue
398
- Log.exception $!
399
- streams.each do |stream|
400
- stream.abort if stream.respond_to? :abort
401
- end
402
- raise $!
387
+
388
+ output.puts [min, str*sep] * sep
403
389
  end
390
+ streams.each do |stream|
391
+ stream.join if stream.respond_to? :join
392
+ end
393
+ rescue
394
+ Log.exception $!
395
+ streams.each do |stream|
396
+ stream.abort if stream.respond_to? :abort
397
+ end
398
+ raise $!
399
+ end
400
+ end
401
+
402
+ def self.paste_streams(streams, lines = nil, sep = "\t", header = nil)
403
+ sep ||= "\t"
404
+ num_streams = streams.length
405
+ Misc.open_pipe do |sin|
406
+ self._paste_streams(streams, sin, lines, sep, header)
404
407
  end
405
408
  end
406
409