rbbt-util 5.7.0 → 5.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/annotations.rb +4 -1
  3. data/lib/rbbt/annotations/util.rb +11 -0
  4. data/lib/rbbt/persist.rb +8 -2
  5. data/lib/rbbt/resource/path.rb +1 -0
  6. data/lib/rbbt/tsv/accessor.rb +18 -15
  7. data/lib/rbbt/tsv/parallel.rb +89 -32
  8. data/lib/rbbt/tsv/util.rb +11 -0
  9. data/lib/rbbt/util/R.rb +0 -1
  10. data/lib/rbbt/util/concurrency.rb +2 -0
  11. data/lib/rbbt/util/concurrency/processes.rb +96 -0
  12. data/lib/rbbt/util/concurrency/processes/socket.rb +87 -0
  13. data/lib/rbbt/util/concurrency/processes/socket_old.rb +144 -0
  14. data/lib/rbbt/util/concurrency/processes/worker.rb +53 -0
  15. data/lib/rbbt/util/concurrency/threads.rb +76 -0
  16. data/lib/rbbt/util/log.rb +37 -5
  17. data/lib/rbbt/util/misc.rb +89 -4
  18. data/lib/rbbt/util/semaphore.rb +10 -4
  19. data/lib/rbbt/util/simpleopt/accessor.rb +5 -0
  20. data/lib/rbbt/util/simpleopt/doc.rb +2 -4
  21. data/lib/rbbt/workflow/accessor.rb +39 -12
  22. data/lib/rbbt/workflow/step.rb +5 -7
  23. data/share/rbbt_commands/benchmark/pthrough +18 -0
  24. data/share/rbbt_commands/color +41 -0
  25. data/share/rbbt_commands/stat/density +50 -0
  26. data/share/rbbt_commands/tsv/info +21 -3
  27. data/share/rbbt_commands/tsv/slice +46 -0
  28. data/share/rbbt_commands/tsv/subset +53 -0
  29. data/share/rbbt_commands/tsv/values +7 -1
  30. data/test/rbbt/annotations/test_util.rb +14 -0
  31. data/test/rbbt/tsv/test_parallel.rb +25 -3
  32. data/test/rbbt/tsv/test_util.rb +15 -0
  33. data/test/rbbt/util/concurrency/processes/test_socket.rb +37 -0
  34. data/test/rbbt/util/concurrency/test_processes.rb +53 -0
  35. data/test/rbbt/util/concurrency/test_threads.rb +42 -0
  36. data/test/rbbt/util/test_concurrency.rb +6 -0
  37. metadata +23 -2
@@ -0,0 +1,144 @@
1
+ class RbbtProcessQueue
2
+ class RbbtProcessSocket
3
+
4
+ class ClosedSocket < Exception; end
5
+
6
+ attr_accessor :sin, :sout, :in_lockfile, :out_lockfile
7
+ def initialize(lockfile = nil)
8
+ @sout, @sin = File.pipe
9
+
10
+ lockfile ||= TmpFile.tmp_file
11
+
12
+ @lockfile = lockfile
13
+ @in_lockfile = lockfile + '.in'
14
+ @out_lockfile = lockfile + '.out'
15
+ raise "in_lockfile exists?" if File.exists? @in_lockfile
16
+ raise "out_lockfile exists?" if File.exists? @in_lockfile
17
+ FileUtils.touch @in_lockfile
18
+ FileUtils.touch @out_lockfile
19
+ end
20
+
21
+ def self.serialize(obj)
22
+ dump = nil
23
+ begin
24
+ case obj
25
+ when TSV
26
+ type = "T"
27
+ info = obj.info
28
+ info.delete_if{|k,v| v.nil?}
29
+ dump = Marshal.dump([info, {}.merge(obj)])
30
+ else
31
+ type = "M"
32
+ dump = Marshal.dump(obj)
33
+ end
34
+ payload = [type, dump].pack('A1a*')
35
+ length = payload.bytesize
36
+ #Log.info "Writing #{ length }"
37
+ [length].pack('L') << payload
38
+ rescue Exception
39
+ Log.error "Serialize error for: #{Misc.fingerprint obj} - #{Misc.fingerprint dump}"
40
+ raise $!
41
+ end
42
+ end
43
+
44
+ def self.unserialize(str)
45
+ begin
46
+ c, dump = str.unpack("A1a*")
47
+ case c
48
+ when "M"
49
+ return Marshal.load(dump)
50
+ when "T"
51
+ info, hash = Marshal.load(dump)
52
+ return TSV.setup(hash, info)
53
+ end
54
+ rescue Exception
55
+ Log.error "Unserialize error for: #{Misc.fingerprint str}"
56
+ raise $!
57
+ end
58
+ end
59
+
60
+ def read_sout(length)
61
+ str = ""
62
+ str << sout.readpartial(length-str.length) while str.length < length
63
+ str
64
+ end
65
+
66
+ def write_sin(str)
67
+ str_length = str.length
68
+ wrote = 0
69
+ wrote += sin.write_nonblock(str[wrote..-1]) while wrote < str_length
70
+ end
71
+
72
+ def push(obj)
73
+ Filelock in_lockfile do
74
+ payload = RbbtProcessSocket.serialize(obj)
75
+ sin << payload
76
+ end
77
+ end
78
+
79
+
80
+ def pop
81
+ r = []
82
+
83
+ payload = begin
84
+ Filelock out_lockfile do
85
+ raise ClosedQueue if sout.eof?
86
+ r,w,e = IO.select([sout], [], [], 1)
87
+ raise TryAgain if r.empty?
88
+
89
+ first_char = read_sout(4)
90
+ length = first_char.unpack('L').first
91
+ #Log.info "Reading #{ length }"
92
+ read_sout(length)
93
+ end
94
+ rescue TryAgain
95
+ sleep 1
96
+ end
97
+
98
+ RbbtProcessSocket.unserialize(payload)
99
+ end
100
+
101
+ def pop
102
+ loop do
103
+ r,w,e = IO.select([sout], [], [], 1)
104
+ next if r.empty?
105
+ break
106
+ end
107
+
108
+ first_char = read_sout(4)
109
+ length = first_char.unpack('L').first
110
+ #Log.info "Reading #{ length }"
111
+ read_sout(length)
112
+ end
113
+ rescue TryAgain
114
+ sleep 1
115
+ end
116
+
117
+ RbbtProcessSocket.unserialize(payload)
118
+ end
119
+
120
+ def rest
121
+ sin.close
122
+ str = sout.read
123
+ res = []
124
+
125
+ while not str.empty?
126
+ first_char = str[0]
127
+ next if first_char.nil?
128
+ length = first_char.unpack("C").first
129
+ dump = str[1..length]
130
+ res << Marshal.load(dump)
131
+ str = str[length+1..-1]
132
+ end
133
+
134
+ res
135
+ end
136
+
137
+ def clean
138
+ FileUtils.rm @in_lockfile if File.exists? @in_lockfile
139
+ FileUtils.rm @out_lockfile if File.exists? @out_lockfile
140
+ sin.close unless sin.closed?
141
+ sout.close unless sout.closed?
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,53 @@
1
+ require 'rbbt/util/concurrency/processes/socket'
2
+ class RbbtProcessQueue
3
+ class RbbtProcessQueueWorker
4
+ attr_accessor :pid, :queue, :callback_queue, :block
5
+ def initialize(queue, callback_queue = nil, &block)
6
+ @queue, @callback_queue, @block = queue, callback_queue, block
7
+
8
+ @pid = Process.fork do
9
+ begin
10
+ @queue.swrite.close
11
+ @callback_queue.sread.close if @callback_queue
12
+
13
+ Signal.trap(:INT){ raise Aborted; }
14
+ loop do
15
+ p = @queue.pop
16
+ raise p if Exception === p
17
+ res = @block.call p
18
+ @callback_queue.push res if @callback_queue
19
+ end
20
+
21
+ exit 0
22
+ rescue ClosedStream
23
+ exit 0
24
+ rescue Aborted
25
+ exit -1
26
+ rescue Exception
27
+ Log.exception $!
28
+ @callback_queue.push($!) if @callback_queue
29
+ exit -1
30
+ end
31
+
32
+ end
33
+ end
34
+
35
+ def join
36
+ Process.waitpid @pid
37
+ end
38
+
39
+ def abort
40
+ Process.kill :INT, @pid
41
+ end
42
+
43
+ def done?
44
+ begin
45
+ Process.waitpid @pid, Process::WNOHANG
46
+ rescue Errno::ECHILD
47
+ true
48
+ rescue
49
+ false
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,76 @@
1
+ class RbbtThreadQueue
2
+ attr_accessor :num_threads, :threads, :queue, :mutex, :block, :done
3
+
4
+ class RbbtThreadQueueWorker < Thread
5
+ def initialize(queue, mutex = nil, &block)
6
+ if mutex.nil?
7
+ super(Thread.current) do |current|
8
+ begin
9
+ loop do
10
+ p = queue.pop
11
+ block.call *p
12
+ end
13
+ rescue Exception
14
+ current.raise $! unless Aborted === $!
15
+ end
16
+ end
17
+ else
18
+ super(Thread.current) do |current|
19
+ begin
20
+ loop do
21
+ p = queue.pop
22
+ p << mutex
23
+ block.call *p
24
+ end
25
+ rescue Exception
26
+ current.raise $! unless Aborted === $!
27
+ end
28
+ end
29
+ end
30
+ end
31
+
32
+ def clean
33
+ raise Aborted if alive?
34
+ end
35
+ end
36
+
37
+ def initialize(num_threads)
38
+ @num_threads = num_threads
39
+ @threads = []
40
+ @queue = Queue.new
41
+ @mutex = Mutex.new
42
+ end
43
+
44
+ def init(use_mutex = false, &block)
45
+ clean
46
+ num_threads.times do |i|
47
+ @threads << RbbtThreadQueueWorker.new(queue, use_mutex ? mutex : nil, &block)
48
+ end
49
+ end
50
+
51
+ def join
52
+ while queue.length > 0 or queue.num_waiting < @threads.length
53
+ Thread.pass
54
+ raise "No worker thread survived" if @threads.empty? and queue.length > 0
55
+ end
56
+ @threads.delete_if{|t| t.alive?}
57
+ @threads.each{|t| t.raise Aborted }
58
+ @threads.each{|t| t.join(0.1) }
59
+ end
60
+
61
+ def clean
62
+ threads.each{ |t| t.clean }.clear
63
+ end
64
+
65
+ def process(e)
66
+ queue << e
67
+ end
68
+
69
+
70
+ def self.each(list, num = 3, &block)
71
+ q = RbbtThreadQueue.new num
72
+ q.init(&block)
73
+ list.each do |elem| q.process elem end
74
+ q.join
75
+ end
76
+ end
data/lib/rbbt/util/log.rb CHANGED
@@ -126,6 +126,14 @@ def ppp(message)
126
126
  puts ""
127
127
  end
128
128
 
129
+ def fff(object)
130
+ stack = caller
131
+ Log.debug{"#{Log.color :cyan, "FINGERPRINT:"} " << stack.first}
132
+ Log.debug{""}
133
+ Log.debug{require 'rbbt/util/misc'; "=> " << Misc.fingerprint(object) }
134
+ Log.debug{""}
135
+ end
136
+
129
137
  def ddd(message, file = $stdout)
130
138
  stack = caller
131
139
  Log.debug{"#{Log.color :cyan, "DEBUG:"} " << stack.first}
@@ -134,12 +142,36 @@ def ddd(message, file = $stdout)
134
142
  Log.debug{""}
135
143
  end
136
144
 
137
- def fff(object)
145
+ def lll(message, file = $stdout)
138
146
  stack = caller
139
- Log.debug{"#{Log.color :cyan, "FINGERPRINT:"} " << stack.first}
140
- Log.debug{""}
141
- Log.debug{require 'rbbt/util/misc'; "=> " << Misc.fingerprint(object) }
142
- Log.debug{""}
147
+ Log.low{"#{Log.color :cyan, "LOW:"} " << stack.first}
148
+ Log.low{""}
149
+ Log.low{"=> " << message.inspect}
150
+ Log.low{""}
151
+ end
152
+
153
+ def mmm(message, file = $stdout)
154
+ stack = caller
155
+ Log.low{"#{Log.color :cyan, "MEDIUM:"} " << stack.first}
156
+ Log.low{""}
157
+ Log.low{"=> " << message.inspect}
158
+ Log.low{""}
159
+ end
160
+
161
+ def hhh(message, file = $stdout)
162
+ stack = caller
163
+ Log.high{"#{Log.color :cyan, "HIGH:"} " << stack.first}
164
+ Log.high{""}
165
+ Log.high{"=> " << message.inspect}
166
+ Log.high{""}
167
+ end
168
+
169
+ def iii(message, file = $stdout)
170
+ stack = caller
171
+ Log.info{"#{Log.color :cyan, "INFO:"} " << stack.first}
172
+ Log.info{""}
173
+ Log.info{"=> " << message.inspect}
174
+ Log.info{""}
143
175
  end
144
176
 
145
177
 
@@ -16,8 +16,11 @@ class Hash
16
16
  end
17
17
  end
18
18
 
19
- class ParameterException < Exception; end
19
+ class ParameterException < StandardError; end
20
20
  class FieldNotFoundError < StandardError;end
21
+ class Aborted < Exception; end
22
+ class TryAgain < Exception; end
23
+ class ClosedStream < Exception; end
21
24
 
22
25
  module LaterString
23
26
  def to_s
@@ -28,18 +31,34 @@ end
28
31
  Lockfile.refresh = false if ENV["RBBT_NO_LOCKFILE_REFRESH"] == "true"
29
32
  module Misc
30
33
 
34
+ def self.read_stream(stream, size)
35
+ str = nil
36
+ while not str = stream.read(size)
37
+ IO.select([stream],nil,nil,1)
38
+ raise ClosedStream if stream.eof?
39
+ end
40
+
41
+ while str.length < size
42
+ raise ClosedStream if stream.eof?
43
+ IO.select([stream],nil,nil,1)
44
+ if new = stream.read(size-str.length)
45
+ str << new
46
+ end
47
+ end
48
+ str
49
+ end
31
50
  def self.parse_cmd_params(str)
32
51
  return str if Array === str
33
52
  str.scan(/
34
- (?:["']([^"']*?)["']) |
35
- ([^"'\s]+)
53
+ (?:["']([^"']*?)["']) |
54
+ ([^"'\s]+)
36
55
  /x).flatten.compact
37
56
  end
38
57
 
39
58
  def self.correct_icgc_mutation(pos, ref, mut_str)
40
59
  mut = mut_str
41
60
  mut = '-' * (mut_str.length - 1) if mut =~/^-[ACGT]/
42
- mut = "+" << mut if ref == '-'
61
+ mut = "+" << mut if ref == '-'
43
62
  [pos, [mut]]
44
63
  end
45
64
 
@@ -504,6 +523,72 @@ end
504
523
  "tyr" => "Y",
505
524
  "val" => "V"
506
525
  }
526
+ CODON_TABLE = {
527
+ "ATT" => "I",
528
+ "ATC" => "I",
529
+ "ATA" => "I",
530
+ "CTT" => "L",
531
+ "CTC" => "L",
532
+ "CTA" => "L",
533
+ "CTG" => "L",
534
+ "TTA" => "L",
535
+ "TTG" => "L",
536
+ "GTT" => "V",
537
+ "GTC" => "V",
538
+ "GTA" => "V",
539
+ "GTG" => "V",
540
+ "TTT" => "F",
541
+ "TTC" => "F",
542
+ "ATG" => "M",
543
+ "TGT" => "C",
544
+ "TGC" => "C",
545
+ "GCT" => "A",
546
+ "GCC" => "A",
547
+ "GCA" => "A",
548
+ "GCG" => "A",
549
+ "GGT" => "G",
550
+ "GGC" => "G",
551
+ "GGA" => "G",
552
+ "GGG" => "G",
553
+ "CCT" => "P",
554
+ "CCC" => "P",
555
+ "CCA" => "P",
556
+ "CCG" => "P",
557
+ "ACT" => "T",
558
+ "ACC" => "T",
559
+ "ACA" => "T",
560
+ "ACG" => "T",
561
+ "TCT" => "S",
562
+ "TCC" => "S",
563
+ "TCA" => "S",
564
+ "TCG" => "S",
565
+ "AGT" => "S",
566
+ "AGC" => "S",
567
+ "TAT" => "Y",
568
+ "TAC" => "Y",
569
+ "TGG" => "W",
570
+ "CAA" => "Q",
571
+ "CAG" => "Q",
572
+ "AAT" => "N",
573
+ "AAC" => "N",
574
+ "CAT" => "H",
575
+ "CAC" => "H",
576
+ "GAA" => "E",
577
+ "GAG" => "E",
578
+ "GAT" => "D",
579
+ "GAC" => "D",
580
+ "AAA" => "K",
581
+ "AAG" => "K",
582
+ "CGT" => "R",
583
+ "CGC" => "R",
584
+ "CGA" => "R",
585
+ "CGG" => "R",
586
+ "AGA" => "R",
587
+ "AGG" => "R",
588
+ "TAA" => "*",
589
+ "TAG" => "*",
590
+ "TGA" => "*",
591
+ }
507
592
 
508
593
  #def self.fast_align(reference, sequence)
509
594
  #