rbbt-util 5.13.37 → 5.14.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +6 -1
  3. data/lib/rbbt/fix_width_table.rb +21 -9
  4. data/lib/rbbt/monitor.rb +1 -1
  5. data/lib/rbbt/packed_index.rb +19 -5
  6. data/lib/rbbt/persist/tsv.rb +9 -1
  7. data/lib/rbbt/persist/tsv/fix_width_table.rb +1 -1
  8. data/lib/rbbt/persist/tsv/packed_index.rb +101 -0
  9. data/lib/rbbt/persist/tsv/sharder.rb +11 -3
  10. data/lib/rbbt/resource/path.rb +1 -1
  11. data/lib/rbbt/resource/rake.rb +1 -0
  12. data/lib/rbbt/tsv/accessor.rb +18 -13
  13. data/lib/rbbt/tsv/dumper.rb +2 -6
  14. data/lib/rbbt/tsv/manipulate.rb +6 -4
  15. data/lib/rbbt/tsv/parallel/traverse.rb +7 -6
  16. data/lib/rbbt/tsv/parser.rb +20 -16
  17. data/lib/rbbt/tsv/stream.rb +87 -76
  18. data/lib/rbbt/tsv/util.rb +8 -3
  19. data/lib/rbbt/util/R.rb +1 -1
  20. data/lib/rbbt/util/cmd.rb +0 -3
  21. data/lib/rbbt/util/concurrency/processes.rb +3 -0
  22. data/lib/rbbt/util/concurrency/processes/worker.rb +0 -1
  23. data/lib/rbbt/util/log.rb +45 -18
  24. data/lib/rbbt/util/log/progress/report.rb +3 -2
  25. data/lib/rbbt/util/log/progress/util.rb +1 -1
  26. data/lib/rbbt/util/misc/concurrent_stream.rb +12 -6
  27. data/lib/rbbt/util/misc/development.rb +10 -4
  28. data/lib/rbbt/util/misc/lock.rb +1 -1
  29. data/lib/rbbt/util/misc/omics.rb +2 -0
  30. data/lib/rbbt/util/misc/pipes.rb +90 -87
  31. data/lib/rbbt/workflow.rb +6 -2
  32. data/lib/rbbt/workflow/accessor.rb +70 -40
  33. data/lib/rbbt/workflow/definition.rb +23 -0
  34. data/lib/rbbt/workflow/step.rb +15 -3
  35. data/lib/rbbt/workflow/step/run.rb +18 -13
  36. data/lib/rbbt/workflow/usage.rb +3 -0
  37. data/share/Rlib/util.R +1 -1
  38. data/share/rbbt_commands/tsv/get +0 -2
  39. data/share/rbbt_commands/tsv/info +13 -5
  40. data/share/rbbt_commands/tsv/subset +1 -1
  41. data/share/rbbt_commands/workflow/info +32 -0
  42. data/share/rbbt_commands/workflow/task +0 -2
  43. data/test/rbbt/persist/tsv/test_sharder.rb +44 -0
  44. data/test/rbbt/test_fix_width_table.rb +1 -0
  45. data/test/rbbt/test_packed_index.rb +3 -0
  46. data/test/rbbt/tsv/test_stream.rb +55 -2
  47. data/test/rbbt/util/misc/test_pipes.rb +8 -6
  48. data/test/rbbt/workflow/test_step.rb +7 -6
  49. metadata +3 -2
data/lib/rbbt/util/R.rb CHANGED
@@ -38,7 +38,7 @@ source('#{UTIL}');
38
38
 
39
39
  def self.interactive(script, options = {})
40
40
  TmpFile.with_file do |init_file|
41
- Open.write(init_file) do |f|
41
+ Open.write(init_file) do |f|
42
42
  f.puts "# Loading basic rbbt environment"
43
43
  f.puts "library(utils);\n"
44
44
  f.puts "source('#{R::UTIL}');\n"
data/lib/rbbt/util/cmd.rb CHANGED
@@ -112,7 +112,6 @@ module CMD
112
112
  stderr = Log::HIGH
113
113
  end
114
114
 
115
- # Process cmd_options
116
115
  cmd_options = process_cmd_options options
117
116
  if cmd =~ /'\{opt\}'/
118
117
  cmd.sub!('\'{opt}\'', cmd_options)
@@ -203,7 +202,6 @@ module CMD
203
202
  Log.log line, stderr if Integer === stderr and log
204
203
  end
205
204
  serr.close
206
- Thread.exit!
207
205
  end
208
206
 
209
207
  #SmartIO.tie sout, pid, cmd, post, in_content, sin, serr
@@ -217,7 +215,6 @@ module CMD
217
215
  err << serr.gets if Integer === stderr
218
216
  end
219
217
  serr.close
220
- Thread.exit
221
218
  end
222
219
 
223
220
  ConcurrentStream.setup sout, :pids => [pid], :autojoin => true
@@ -96,6 +96,9 @@ class RbbtProcessQueue
96
96
  begin
97
97
  @process_monitor.join
98
98
  close_callback if @callback
99
+ rescue Aborted
100
+ Log.error "Aborted joining queue"
101
+ raise $!
99
102
  rescue Exception
100
103
  Log.error "Exception joining queue: #{$!.message}"
101
104
  raise $!
@@ -36,7 +36,6 @@ class RbbtProcessQueue
36
36
  rescue Aborted, Interrupt
37
37
  Log.warn "Worker #{Process.pid} aborted"
38
38
  Kernel.exit! 0
39
- #Kernel.exit! -1
40
39
  rescue Exception
41
40
  Log.exception $!
42
41
  @callback_queue.push($!) if @callback_queue
data/lib/rbbt/util/log.rb CHANGED
@@ -2,10 +2,18 @@ require 'term/ansicolor'
2
2
  require 'rbbt/util/color'
3
3
  require 'rbbt/util/log/progress'
4
4
 
5
+ class MockMutex
6
+ def synchronize
7
+ yield
8
+ end
9
+ end
10
+
5
11
  module Log
6
12
  extend Term::ANSIColor
7
13
 
8
14
 
15
+ #ToDo: I'm not sure if using a Mutex here really gives troubles in CPU concurrency
16
+ #LOG_MUTEX = MockMutex.new
9
17
  LOG_MUTEX = Mutex.new
10
18
 
11
19
  SEVERITY_NAMES ||= begin
@@ -16,8 +24,16 @@ module Log
16
24
  names
17
25
  end
18
26
 
27
+ def self.last_caller(stack)
28
+ line = nil
29
+ while line.nil? or line =~ /util\/log\.rb/ and stack.any?
30
+ line = stack.shift
31
+ end
32
+ line ||= caller.first
33
+ end
34
+
19
35
  def self.ignore_stderr
20
- #LOG_MUTEX.synchronize do
36
+ LOG_MUTEX.synchronize do
21
37
  backup_stderr = STDERR.dup
22
38
  File.open('/dev/null', 'w') do |f|
23
39
  STDERR.reopen(f)
@@ -28,7 +44,7 @@ module Log
28
44
  backup_stderr.close
29
45
  end
30
46
  end
31
- #end
47
+ end
32
48
  end
33
49
 
34
50
  def self.get_level(level)
@@ -133,22 +149,18 @@ module Log
133
149
  message = "" << highlight << message << color(0) if severity >= INFO
134
150
  str = prefix << " " << message
135
151
 
136
- #LOG_MUTEX.synchronize do
137
- STDERR.puts str
138
- Log::LAST.replace "log"
139
- logfile.puts str unless logfile.nil?
140
- nil
141
- #end
152
+ LOG_MUTEX.synchronize do
153
+ STDERR.puts str
154
+ Log::LAST.replace "log"
155
+ logfile.puts str unless logfile.nil?
156
+ nil
157
+ end
142
158
  end
143
159
 
144
160
  def self.log_obj_inspect(obj, level, file = $stdout)
145
161
  stack = caller
146
162
 
147
- line = nil
148
- while line.nil? or line =~ /util\/log\.rb/ and stack.any?
149
- line = stack.shift
150
- end
151
- line ||= caller.first
163
+ line = Log.last_caller stack
152
164
 
153
165
  level = Log.get_level level
154
166
  name = Log::SEVERITY_NAMES[level] + ": "
@@ -161,11 +173,7 @@ module Log
161
173
  def self.log_obj_fingerprint(obj, level, file = $stdout)
162
174
  stack = caller
163
175
 
164
- line = nil
165
- while line.nil? or line =~ /util\/log\.rb/ and stack.any?
166
- line = stack.shift
167
- end
168
- line ||= caller.first
176
+ line = Log.last_caller stack
169
177
 
170
178
  level = Log.get_level level
171
179
  name = Log::SEVERITY_NAMES[level] + ": "
@@ -210,6 +218,25 @@ module Log
210
218
  error("BACKTRACE:\n" + e.backtrace * "\n")
211
219
  end
212
220
 
221
+ def self.color_stack(stack)
222
+ stack.collect do |line|
223
+ line = line.sub('`',"'")
224
+ color = :green if line =~ /workflow/
225
+ color = :blue if line =~ /rbbt-/
226
+ Log.color color, line
227
+ end
228
+ end
229
+
230
+ def self.stack(stack)
231
+ LOG_MUTEX.synchronize do
232
+
233
+ STDERR.puts Log.color :magenta, "Stack trace: " << Log.last_caller(caller)
234
+ color_stack(stack).each do |line|
235
+ STDERR.puts line
236
+ end
237
+ end
238
+ end
239
+
213
240
  case ENV['RBBT_LOG']
214
241
  when 'DEBUG'
215
242
  self.severity = DEBUG
@@ -10,11 +10,11 @@ module Log
10
10
 
11
11
  def print(io, str)
12
12
  return if ENV["RBBT_NO_PROGRESS"] == "true"
13
- #LOG_MUTEX.synchronize do
13
+ LOG_MUTEX.synchronize do
14
14
  STDERR.print str
15
15
  Log.logfile.puts str unless Log.logfile.nil?
16
16
  Log::LAST.replace "progress"
17
- #end
17
+ end
18
18
  end
19
19
 
20
20
  attr_accessor :history, :mean_max
@@ -90,6 +90,7 @@ module Log
90
90
  if Log::LAST != "progress"
91
91
  length = Log::ProgressBar.cleanup_bars
92
92
  bars = BARS
93
+ print(io, Log.color(:yellow, "...Progress\n"))
93
94
  bars.sort_by{|b| b.depth }.reverse.each do |bar|
94
95
  print(io, Log.color(:yellow ,bar.report_msg) << "\n")
95
96
  end
@@ -31,7 +31,7 @@ module Log
31
31
  end
32
32
 
33
33
  def self.remove_bar(bar)
34
- bar.done
34
+ bar.done if bar.respond_to? :done
35
35
  BAR_MUTEX.synchronize do
36
36
  REMOVE << bar
37
37
  end
@@ -82,18 +82,18 @@ module ConcurrentStream
82
82
  join_callback
83
83
 
84
84
  @joined = true
85
- close unless closed?
86
85
  lockfile.unlock if lockfile and lockfile.locked?
86
+ close unless closed?
87
87
  end
88
88
 
89
- def abort_threads
89
+ def abort_threads(exception)
90
90
  Log.medium "Aborting threads (#{Thread.current.inspect}) #{@threads.collect{|t| t.inspect } * ", "}"
91
91
 
92
92
  @threads.each do |t|
93
93
  @aborted = false if t == Thread.current
94
94
  next if t == Thread.current
95
95
  Log.medium "Aborting thread #{t.inspect}"
96
- t.raise Aborted.new
96
+ t.raise exception ? exception : Aborted.new
97
97
  end if @threads
98
98
 
99
99
  sleeped = false
@@ -117,11 +117,16 @@ module ConcurrentStream
117
117
  end
118
118
 
119
119
  def abort_pids
120
- @pids.each{|pid| Process.kill :INT, pid } if @pids
120
+ @pids.each do |pid|
121
+ begin
122
+ Process.kill :INT, pid
123
+ rescue Errno::ESRCH
124
+ end
125
+ end if @pids
121
126
  @pids = []
122
127
  end
123
128
 
124
- def abort
129
+ def abort(exception = nil)
125
130
  return if @aborted
126
131
  Log.medium "Aborting stream #{Misc.fingerprint self} -- #{@abort_callback} [#{@aborted}]"
127
132
  @aborted = true
@@ -131,8 +136,9 @@ module ConcurrentStream
131
136
  @abort_callback = nil
132
137
  close unless closed?
133
138
 
134
- abort_threads
139
+ abort_threads(exception)
135
140
  abort_pids
141
+ ensure
136
142
  lockfile.unlock if lockfile and lockfile.locked?
137
143
  end
138
144
  Log.medium "Aborted stream #{Misc.fingerprint self} -- #{@abort_callback} [#{@aborted}]"
@@ -135,24 +135,30 @@ module Misc
135
135
  try = 0
136
136
  begin
137
137
  yield
138
- rescue Aborted
138
+ rescue TryAgain
139
+ sleep sleep
140
+ retry
141
+ rescue Aborted, Interrupt
139
142
  if msg
140
143
  Log.warn("Not Insisting after Aborted: #{$!.message} -- #{msg}")
141
144
  else
142
145
  Log.warn("Not Insisting after Aborted: #{$!.message}")
143
146
  end
147
+ raise $!
144
148
  rescue Exception
145
149
  if msg
146
- Log.warn("Insisting after exception: #{$!.message} -- #{msg}")
150
+ Log.warn("Insisting after exception: #{$!.class} #{$!.message} -- #{msg}")
147
151
  else
148
- Log.warn("Insisting after exception: #{$!.message}")
149
- end
152
+ Log.warn("Insisting after exception: #{$!.class} #{$!.message}")
153
+ end
154
+
150
155
  if sleep and try > 0
151
156
  sleep sleep
152
157
  sleep = sleep_array.shift if sleep_array
153
158
  else
154
159
  Thread.pass
155
160
  end
161
+
156
162
  try += 1
157
163
  retry if try < times
158
164
  raise $!
@@ -28,11 +28,11 @@ module Misc
28
28
 
29
29
  res = nil
30
30
 
31
- lock_path = File.expand_path(file + '.lock')
32
31
  if options[:lock]
33
32
  lockfile = options[:lock]
34
33
  lockfile.lock unless lockfile.locked?
35
34
  else
35
+ lock_path = File.expand_path(file + '.lock')
36
36
  lockfile = Lockfile.new(lock_path, options)
37
37
  lockfile.lock
38
38
  end
@@ -125,9 +125,11 @@ module Misc
125
125
 
126
126
  def self.correct_vcf_mutation(pos, ref, mut_str)
127
127
  muts = mut_str.nil? ? [] : mut_str.split(',')
128
+ muts.collect!{|m| m == '<DEL>' ? '-' : m }
128
129
 
129
130
  while ref.length >= 1 and muts.reject{|m| m[0] == ref[0]}.empty?
130
131
  ref = ref[1..-1]
132
+ raise "REF nil" if ref.nil?
131
133
  pos = pos + 1
132
134
  muts = muts.collect{|m| m[1..-1]}
133
135
  end
@@ -175,7 +175,7 @@ module Misc
175
175
  Log.medium "Consuming stream #{Misc.fingerprint io}"
176
176
  begin
177
177
  into.sync == true if IO === into
178
- while not io.closed? and block = io.read(2048 * 10)
178
+ while not io.closed? and block = io.read(2048)
179
179
  into << block if into
180
180
  end
181
181
  io.join if io.respond_to? :join
@@ -213,48 +213,47 @@ module Misc
213
213
  end
214
214
 
215
215
  def self.sensiblewrite(path, content = nil, options = {}, &block)
216
+ force = Misc.process_options options, :force
216
217
  lock_options = Misc.pull_keys options, :lock
217
218
  lock_options = lock_options[:lock] if Hash === lock_options[:lock]
218
- return if Open.exists? path
219
+ return if Open.exists? path and not force
219
220
  tmp_path = Persist.persistence_path(path, {:dir => Misc.sensiblewrite_dir})
220
221
  tmp_path_lock = Persist.persistence_path(path, {:dir => Misc.sensiblewrite_lock_dir})
221
222
  Misc.lock tmp_path_lock, lock_options do
222
- return if Open.exists? path
223
- if not Open.exists? path
224
- FileUtils.mkdir_p File.dirname(tmp_path) unless File.directory? File.dirname(tmp_path)
225
- FileUtils.rm_f tmp_path if File.exists? tmp_path
226
- begin
227
- case
228
- when block_given?
229
- File.open(tmp_path, 'wb', &block)
230
- when String === content
231
- File.open(tmp_path, 'wb') do |f| f.write content end
232
- when (IO === content or StringIO === content or File === content)
233
-
234
- Open.write(tmp_path) do |f|
235
- f.sync = true
236
- while block = content.read(2048)
237
- f.write block
238
- end
223
+ return if Open.exists? path and not force
224
+ FileUtils.mkdir_p File.dirname(tmp_path) unless File.directory? File.dirname(tmp_path)
225
+ FileUtils.rm_f tmp_path if File.exists? tmp_path
226
+ begin
227
+ case
228
+ when block_given?
229
+ File.open(tmp_path, 'wb', &block)
230
+ when String === content
231
+ File.open(tmp_path, 'wb') do |f| f.write content end
232
+ when (IO === content or StringIO === content or File === content)
233
+
234
+ Open.write(tmp_path) do |f|
235
+ f.sync = true
236
+ while block = content.read(2048)
237
+ f.write block
239
238
  end
240
- else
241
- File.open(tmp_path, 'wb') do |f| end
242
239
  end
243
-
244
- Open.mv tmp_path, path, lock_options
245
- content.join if content.respond_to? :join
246
- rescue Aborted
247
- Log.medium "Aborted sensiblewrite -- #{ Log.reset << Log.color(:blue, path) }"
248
- content.abort if content.respond_to? :abort
249
- Open.rm path if File.exists? path
250
- rescue Exception
251
- Log.medium "Exception in sensiblewrite: #{$!.message} -- #{ Log.color :blue, path }"
252
- content.abort if content.respond_to? :abort
253
- Open.rm path if File.exists? path
254
- raise $!
255
- ensure
256
- FileUtils.rm_f tmp_path if File.exists? tmp_path
240
+ else
241
+ File.open(tmp_path, 'wb') do |f| end
257
242
  end
243
+
244
+ Open.mv tmp_path, path, lock_options
245
+ content.join if content.respond_to? :join
246
+ rescue Aborted
247
+ Log.medium "Aborted sensiblewrite -- #{ Log.reset << Log.color(:blue, path) }"
248
+ content.abort if content.respond_to? :abort
249
+ Open.rm path if File.exists? path
250
+ rescue Exception
251
+ Log.medium "Exception in sensiblewrite: #{$!.message} -- #{ Log.color :blue, path }"
252
+ content.abort if content.respond_to? :abort
253
+ Open.rm path if File.exists? path
254
+ raise $!
255
+ ensure
256
+ FileUtils.rm_f tmp_path if File.exists? tmp_path
258
257
  end
259
258
  end
260
259
  end
@@ -269,7 +268,7 @@ module Misc
269
268
  end
270
269
  end
271
270
 
272
- def self.sort_stream(stream, header_hash = "#", cmd_args = nil)
271
+ def self.sort_stream(stream, header_hash = "#", cmd_args = " -u ")
273
272
  Misc.open_pipe do |sin|
274
273
  begin
275
274
  if defined? Step and Step === stream
@@ -343,64 +342,68 @@ module Misc
343
342
  end
344
343
  end
345
344
 
346
- def self.paste_streams(streams, lines = nil, sep = "\t", header = nil)
347
- sep ||= "\t"
348
- num_streams = streams.length
349
- Misc.open_pipe do |sin|
350
- sin.puts header if header
351
- streams = streams.collect do |stream|
352
- if defined? Step and Step === stream
353
- stream.get_stream || stream.join.path.open
354
- else
355
- stream
356
- end
345
+ def self._paste_streams(streams, output, lines = nil, sep = "\t", header = nil)
346
+ output.puts header if header
347
+ streams = streams.collect do |stream|
348
+ if defined? Step and Step === stream
349
+ stream.get_stream || stream.join.path.open
350
+ else
351
+ stream
357
352
  end
353
+ end
358
354
 
359
- begin
360
- done_streams = []
361
- lines ||= streams.collect{|s| s.gets }
362
- keys = []
363
- parts = []
364
- lines.each_with_index do |line,i|
365
- key, *p = line.strip.split(sep, -1)
366
- keys[i] = key
367
- parts[i] = p
368
- end
369
- sizes = parts.collect{|p| p.length }
370
- last_min = nil
371
- while lines.compact.any?
372
- min = keys.compact.sort.first
373
- str = []
374
- keys.each_with_index do |key,i|
375
- case key
376
- when min
377
- str << [parts[i] * sep]
378
- line = lines[i] = streams[i].gets
379
- if line.nil?
380
- keys[i] = nil
381
- parts[i] = nil
382
- else
383
- k, *p = line.strip.split(sep, -1)
384
- keys[i] = k
385
- parts[i] = p
386
- end
355
+ begin
356
+ done_streams = []
357
+ lines ||= streams.collect{|s| s.gets }
358
+ keys = []
359
+ parts = []
360
+ lines.each_with_index do |line,i|
361
+ key, *p = line.strip.split(sep, -1)
362
+ keys[i] = key
363
+ parts[i] = p
364
+ end
365
+ sizes = parts.collect{|p| p.length }
366
+ last_min = nil
367
+ while lines.compact.any?
368
+ min = keys.compact.sort.first
369
+ str = []
370
+ keys.each_with_index do |key,i|
371
+ case key
372
+ when min
373
+ str << [parts[i] * sep]
374
+ line = lines[i] = streams[i].gets
375
+ if line.nil?
376
+ keys[i] = nil
377
+ parts[i] = nil
387
378
  else
388
- str << [sep * (sizes[i]-1)] if sizes[i] > 0
379
+ k, *p = line.strip.split(sep, -1)
380
+ keys[i] = k
381
+ parts[i] = p
389
382
  end
383
+ else
384
+ str << [sep * (sizes[i]-1)] if sizes[i] > 0
390
385
  end
391
-
392
- sin.puts [min, str*sep] * sep
393
- end
394
- streams.each do |stream|
395
- stream.join if stream.respond_to? :join
396
386
  end
397
- rescue
398
- Log.exception $!
399
- streams.each do |stream|
400
- stream.abort if stream.respond_to? :abort
401
- end
402
- raise $!
387
+
388
+ output.puts [min, str*sep] * sep
403
389
  end
390
+ streams.each do |stream|
391
+ stream.join if stream.respond_to? :join
392
+ end
393
+ rescue
394
+ Log.exception $!
395
+ streams.each do |stream|
396
+ stream.abort if stream.respond_to? :abort
397
+ end
398
+ raise $!
399
+ end
400
+ end
401
+
402
+ def self.paste_streams(streams, lines = nil, sep = "\t", header = nil)
403
+ sep ||= "\t"
404
+ num_streams = streams.length
405
+ Misc.open_pipe do |sin|
406
+ self._paste_streams(streams, sin, lines, sep, header)
404
407
  end
405
408
  end
406
409