rbbt-util 5.11.9 → 5.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/persist.rb +1 -1
- data/lib/rbbt/tsv.rb +1 -0
- data/lib/rbbt/tsv/dumper.rb +1 -2
- data/lib/rbbt/tsv/parallel/traverse.rb +2 -2
- data/lib/rbbt/tsv/parser.rb +6 -2
- data/lib/rbbt/tsv/stream.rb +55 -0
- data/lib/rbbt/tsv/util.rb +7 -1
- data/lib/rbbt/util/misc.rb +5 -762
- data/lib/rbbt/util/misc/concurrent_stream.rb +15 -0
- data/lib/rbbt/util/misc/development.rb +122 -0
- data/lib/rbbt/util/misc/inspect.rb +3 -3
- data/lib/rbbt/util/misc/manipulation.rb +136 -0
- data/lib/rbbt/util/misc/math.rb +50 -0
- data/lib/rbbt/util/misc/objects.rb +79 -0
- data/lib/rbbt/util/misc/omics.rb +10 -0
- data/lib/rbbt/util/misc/options.rb +280 -0
- data/lib/rbbt/util/misc/pipes.rb +140 -20
- data/lib/rbbt/util/misc/system.rb +90 -0
- data/lib/rbbt/util/tar.rb +0 -7
- data/lib/rbbt/workflow/accessor.rb +3 -3
- data/lib/rbbt/workflow/step/run.rb +69 -15
- data/lib/rbbt/workflow/task.rb +7 -5
- data/test/rbbt/tsv/test_stream.rb +92 -0
- data/test/rbbt/tsv/test_util.rb +1 -3
- data/test/rbbt/util/misc/test_pipes.rb +79 -0
- data/test/rbbt/workflow/test_task.rb +1 -0
- metadata +10 -2
@@ -26,6 +26,20 @@ module ConcurrentStream
|
|
26
26
|
stream
|
27
27
|
end
|
28
28
|
|
29
|
+
def annotate(stream)
|
30
|
+
ConcurrentStream.setup stream
|
31
|
+
stream.threads = threads
|
32
|
+
stream.pids = pids
|
33
|
+
stream.callback = callback
|
34
|
+
stream.abort_callback = abort_callback
|
35
|
+
stream.filename = filename
|
36
|
+
stream.joined = joined
|
37
|
+
end
|
38
|
+
|
39
|
+
def clear
|
40
|
+
threads, pids, callback, abort_callback = nil
|
41
|
+
end
|
42
|
+
|
29
43
|
def joined?
|
30
44
|
@joined
|
31
45
|
end
|
@@ -69,6 +83,7 @@ module ConcurrentStream
|
|
69
83
|
join_callback
|
70
84
|
|
71
85
|
@joined = true
|
86
|
+
close unless closed?
|
72
87
|
end
|
73
88
|
|
74
89
|
def abort_threads
|
@@ -1,4 +1,16 @@
|
|
1
1
|
module Misc
|
2
|
+
|
3
|
+
def self.string2const(string)
|
4
|
+
return nil if string.nil?
|
5
|
+
mod = Kernel
|
6
|
+
|
7
|
+
string.to_s.split('::').each do |str|
|
8
|
+
mod = mod.const_get str
|
9
|
+
end
|
10
|
+
|
11
|
+
mod
|
12
|
+
end
|
13
|
+
|
2
14
|
def self.benchmark(repeats = 1, message = nil)
|
3
15
|
require 'benchmark'
|
4
16
|
res = nil
|
@@ -92,4 +104,114 @@ module Misc
|
|
92
104
|
|
93
105
|
res
|
94
106
|
end
|
107
|
+
|
108
|
+
def self.do_once(&block)
|
109
|
+
return nil if $__did_once
|
110
|
+
$__did_once = true
|
111
|
+
yield
|
112
|
+
nil
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.reset_do_once
|
116
|
+
$__did_once = false
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.insist(times = 3, sleep = nil, msg = nil)
|
120
|
+
if Array === times
|
121
|
+
sleep_array = times
|
122
|
+
times = sleep_array.length
|
123
|
+
sleep = sleep_array.shift
|
124
|
+
end
|
125
|
+
try = 0
|
126
|
+
begin
|
127
|
+
yield
|
128
|
+
rescue
|
129
|
+
if msg
|
130
|
+
Log.warn("Insisting after exception: #{$!.message} -- #{msg}")
|
131
|
+
else
|
132
|
+
Log.warn("Insisting after exception: #{$!.message}")
|
133
|
+
end
|
134
|
+
if sleep and try > 0
|
135
|
+
sleep sleep
|
136
|
+
sleep = sleep_array.shift if sleep_array
|
137
|
+
else
|
138
|
+
Thread.pass
|
139
|
+
end
|
140
|
+
try += 1
|
141
|
+
retry if try < times
|
142
|
+
raise $!
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def self.try3times(&block)
|
147
|
+
insist(3, &block)
|
148
|
+
end
|
149
|
+
|
150
|
+
# Divides the array into +num+ chunks of the same size by placing one
|
151
|
+
# element in each chunk iteratively.
|
152
|
+
def self.divide(array, num)
|
153
|
+
num = 1 if num == 0
|
154
|
+
chunks = []
|
155
|
+
num.to_i.times do chunks << [] end
|
156
|
+
array.each_with_index{|e, i|
|
157
|
+
c = i % num
|
158
|
+
chunks[c] << e
|
159
|
+
}
|
160
|
+
chunks
|
161
|
+
end
|
162
|
+
|
163
|
+
# Divides the array into chunks of +num+ same size by placing one
|
164
|
+
# element in each chunk iteratively.
|
165
|
+
def self.ordered_divide(array, num)
|
166
|
+
last = array.length - 1
|
167
|
+
chunks = []
|
168
|
+
current = 0
|
169
|
+
while current <= last
|
170
|
+
next_current = [last, current + num - 1].min
|
171
|
+
chunks << array[current..next_current]
|
172
|
+
current = next_current + 1
|
173
|
+
end
|
174
|
+
chunks
|
175
|
+
end
|
176
|
+
|
177
|
+
def self.random_sample_in_range(total, size)
|
178
|
+
p = Set.new
|
179
|
+
|
180
|
+
if size > total / 10
|
181
|
+
template = (0..total - 1).to_a
|
182
|
+
size.times do |i|
|
183
|
+
pos = (rand * (total - i)).floor
|
184
|
+
if pos == template.length - 1
|
185
|
+
v = template.pop
|
186
|
+
else
|
187
|
+
v, n = template[pos], template[-1]
|
188
|
+
template.pop
|
189
|
+
template[pos] = n
|
190
|
+
end
|
191
|
+
p << v
|
192
|
+
end
|
193
|
+
else
|
194
|
+
size.times do
|
195
|
+
pos = nil
|
196
|
+
while pos.nil?
|
197
|
+
pos = (rand * total).floor
|
198
|
+
if p.include? pos
|
199
|
+
pos = nil
|
200
|
+
end
|
201
|
+
end
|
202
|
+
p << pos
|
203
|
+
end
|
204
|
+
end
|
205
|
+
p
|
206
|
+
end
|
207
|
+
|
208
|
+
def self.sample(ary, size, replacement = false)
|
209
|
+
if ary.respond_to? :sample
|
210
|
+
ary.sample size
|
211
|
+
else
|
212
|
+
total = ary.length
|
213
|
+
p = random_sample_in_range(total, size)
|
214
|
+
ary.values_at *p
|
215
|
+
end
|
216
|
+
end
|
95
217
|
end
|
@@ -40,9 +40,9 @@ module Misc
|
|
40
40
|
when (defined? AnnotatedArray and AnnotatedArray)
|
41
41
|
"<A: #{fingerprint Annotated.purge(obj)} #{fingerprint obj.info}>"
|
42
42
|
when (defined? TSV and TSV::Parser)
|
43
|
-
"<TSVStream:" + obj.filename + "--" << Misc.fingerprint(obj.options) << ">"
|
43
|
+
"<TSVStream:" + (obj.filename || "NOFILENAME") + "--" << Misc.fingerprint(obj.options) << ">"
|
44
44
|
when IO
|
45
|
-
"<IO:" + (obj.respond_to?(:filename) ? obj.filename : obj.inspect) + ">"
|
45
|
+
"<IO:" + (obj.respond_to?(:filename) ? obj.filename || obj.inspect : obj.inspect) + ">"
|
46
46
|
when File
|
47
47
|
"<File:" + obj.path + ">"
|
48
48
|
when Array
|
@@ -79,7 +79,7 @@ module Misc
|
|
79
79
|
def self.remove_long_items(obj)
|
80
80
|
case
|
81
81
|
when IO === obj
|
82
|
-
remove_long_items("IO: " + obj.filename)
|
82
|
+
remove_long_items("IO: " + (obj.respond_to?(:filename) ? (obj.filename || obj.inspect) : obj.inspect ))
|
83
83
|
when obj.respond_to?(:path)
|
84
84
|
remove_long_items("File: " + obj.path)
|
85
85
|
when TSV::Parser === obj
|
@@ -0,0 +1,136 @@
|
|
1
|
+
module Misc
|
2
|
+
def self.collapse_ranges(ranges)
|
3
|
+
processed = []
|
4
|
+
last = nil
|
5
|
+
final = []
|
6
|
+
ranges.sort_by{|range| range.begin }.each do |range|
|
7
|
+
rbegin = range.begin
|
8
|
+
rend = range.end
|
9
|
+
if last.nil? or rbegin > last
|
10
|
+
processed << [rbegin, rend]
|
11
|
+
last = rend
|
12
|
+
else
|
13
|
+
new_processed = []
|
14
|
+
processed.each do |pbegin,pend|
|
15
|
+
if pend < rbegin
|
16
|
+
final << [pbegin, pend]
|
17
|
+
else
|
18
|
+
eend = [rend, pend].max
|
19
|
+
new_processed << [pbegin, eend]
|
20
|
+
break
|
21
|
+
end
|
22
|
+
end
|
23
|
+
processed = new_processed
|
24
|
+
last = rend if rend > last
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
final.concat processed
|
29
|
+
final.collect{|b,e| (b..e)}
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.total_length(ranges)
|
33
|
+
self.collapse_ranges(ranges).inject(0) do |total,range| total += range.end - range.begin + 1 end
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.sorted_array_hits(a1, a2)
|
37
|
+
e1, e2 = a1.shift, a2.shift
|
38
|
+
counter = 0
|
39
|
+
match = []
|
40
|
+
while true
|
41
|
+
break if e1.nil? or e2.nil?
|
42
|
+
case e1 <=> e2
|
43
|
+
when 0
|
44
|
+
match << counter
|
45
|
+
e1, e2 = a1.shift, a2.shift
|
46
|
+
counter += 1
|
47
|
+
when -1
|
48
|
+
while not e1.nil? and e1 < e2
|
49
|
+
e1 = a1.shift
|
50
|
+
counter += 1
|
51
|
+
end
|
52
|
+
when 1
|
53
|
+
e2 = a2.shift
|
54
|
+
e2 = a2.shift while not e2.nil? and e2 < e1
|
55
|
+
end
|
56
|
+
end
|
57
|
+
match
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.intersect_sorted_arrays(a1, a2)
|
61
|
+
e1, e2 = a1.shift, a2.shift
|
62
|
+
intersect = []
|
63
|
+
while true
|
64
|
+
break if e1.nil? or e2.nil?
|
65
|
+
case e1 <=> e2
|
66
|
+
when 0
|
67
|
+
intersect << e1
|
68
|
+
e1, e2 = a1.shift, a2.shift
|
69
|
+
when -1
|
70
|
+
e1 = a1.shift while not e1.nil? and e1 < e2
|
71
|
+
when 1
|
72
|
+
e2 = a2.shift
|
73
|
+
e2 = a2.shift while not e2.nil? and e2 < e1
|
74
|
+
end
|
75
|
+
end
|
76
|
+
intersect
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.merge_sorted_arrays(a1, a2)
|
80
|
+
e1, e2 = a1.shift, a2.shift
|
81
|
+
new = []
|
82
|
+
while true
|
83
|
+
case
|
84
|
+
when (e1 and e2)
|
85
|
+
case e1 <=> e2
|
86
|
+
when 0
|
87
|
+
new << e1
|
88
|
+
e1, e2 = a1.shift, a2.shift
|
89
|
+
when -1
|
90
|
+
new << e1
|
91
|
+
e1 = a1.shift
|
92
|
+
when 1
|
93
|
+
new << e2
|
94
|
+
e2 = a2.shift
|
95
|
+
end
|
96
|
+
when e2
|
97
|
+
new << e2
|
98
|
+
new.concat a2
|
99
|
+
break
|
100
|
+
when e1
|
101
|
+
new << e1
|
102
|
+
new.concat a1
|
103
|
+
break
|
104
|
+
else
|
105
|
+
break
|
106
|
+
end
|
107
|
+
end
|
108
|
+
new
|
109
|
+
end
|
110
|
+
|
111
|
+
def self.binary_include?(array, elem)
|
112
|
+
upper = array.size - 1
|
113
|
+
lower = 0
|
114
|
+
|
115
|
+
return -1 if upper < lower
|
116
|
+
|
117
|
+
while(upper >= lower) do
|
118
|
+
idx = lower + (upper - lower) / 2
|
119
|
+
value = array[idx]
|
120
|
+
|
121
|
+
case elem <=> value
|
122
|
+
when 0
|
123
|
+
return true
|
124
|
+
when -1
|
125
|
+
upper = idx - 1
|
126
|
+
when 1
|
127
|
+
lower = idx + 1
|
128
|
+
else
|
129
|
+
raise "Cannot compare #{[elem.inspect, value.inspect] * " with "}"
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
return false
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
data/lib/rbbt/util/misc/math.rb
CHANGED
@@ -29,4 +29,54 @@ module Misc
|
|
29
29
|
Math.sqrt(list.compact.inject(0.0){|acc,e| d = e - mean; acc += d * d}) / (list.compact.length - 1)
|
30
30
|
end
|
31
31
|
|
32
|
+
def self.counts(array)
|
33
|
+
counts = {}
|
34
|
+
array.each do |e|
|
35
|
+
counts[e] ||= 0
|
36
|
+
counts[e] += 1
|
37
|
+
end
|
38
|
+
|
39
|
+
counts
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.proportions(array)
|
43
|
+
total = array.length
|
44
|
+
|
45
|
+
proportions = Hash.new 0
|
46
|
+
|
47
|
+
array.each do |e|
|
48
|
+
proportions[e] += 1.0 / total
|
49
|
+
end
|
50
|
+
|
51
|
+
class << proportions; self;end.class_eval do
|
52
|
+
def to_s
|
53
|
+
sort{|a,b| a[1] == b[1] ? a[0] <=> b[0] : a[1] <=> b[1]}.collect{|k,c| "%3d\t%s" % [c, k]} * "\n"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
proportions
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil)
|
61
|
+
name1 ||= "list 1"
|
62
|
+
name2 ||= "list 2"
|
63
|
+
name3 ||= "list 3"
|
64
|
+
|
65
|
+
sizes = [list1, list2, list3, list1 & list2, list1 & list3, list2 & list3, list1 & list2 & list3].collect{|l| l.length}
|
66
|
+
|
67
|
+
total = total.length if Array === total
|
68
|
+
|
69
|
+
label = "#{name1}: #{sizes[0]} (#{name2}: #{sizes[3]}, #{name3}: #{sizes[4]})"
|
70
|
+
label << "|#{name2}: #{sizes[1]} (#{name1}: #{sizes[3]}, #{name3}: #{sizes[5]})"
|
71
|
+
label << "|#{name3}: #{sizes[2]} (#{name1}: #{sizes[4]}, #{name2}: #{sizes[5]})"
|
72
|
+
if total
|
73
|
+
label << "| INTERSECTION: #{sizes[6]} TOTAL: #{total}"
|
74
|
+
else
|
75
|
+
label << "| INTERSECTION: #{sizes[6]}"
|
76
|
+
end
|
77
|
+
|
78
|
+
max = total || sizes.max
|
79
|
+
sizes = sizes.collect{|v| (v.to_f/max * 100).to_i.to_f / 100}
|
80
|
+
url = "https://chart.googleapis.com/chart?cht=v&chs=500x300&chd=t:#{sizes * ","}&chco=FF6342,ADDE63,63C6DE,FFFFFF&chdl=#{label}"
|
81
|
+
end
|
32
82
|
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module Misc
|
2
|
+
|
3
|
+
def self.prepare_entity(entity, field, options = {})
|
4
|
+
return entity unless defined? Entity
|
5
|
+
return entity unless String === entity or Array === entity
|
6
|
+
options ||= {}
|
7
|
+
|
8
|
+
dup_array = options.delete :dup_array
|
9
|
+
|
10
|
+
if Annotated === field or Entity.respond_to?(:formats) and Entity.formats.include? field
|
11
|
+
params = options.dup
|
12
|
+
|
13
|
+
params[:format] ||= params.delete "format"
|
14
|
+
params.merge!(:format => field) unless params.include?(:format) and not ((f = params[:format]).nil? or (String === f and f.empty?))
|
15
|
+
|
16
|
+
mod = Entity === field ? field : Entity.formats[field]
|
17
|
+
entity = mod.setup(
|
18
|
+
((entity.frozen? and not entity.nil?) ? entity.dup : ((Array === entity and dup_array) ? entity.collect{|e| e.nil? ? e : e.dup} : entity) ),
|
19
|
+
params
|
20
|
+
)
|
21
|
+
end
|
22
|
+
|
23
|
+
entity
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.consolidate(list)
|
27
|
+
list.inject(nil){|acc,e|
|
28
|
+
if acc.nil?
|
29
|
+
acc = e
|
30
|
+
else
|
31
|
+
acc.concat e
|
32
|
+
acc
|
33
|
+
end
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.append_zipped(current, new)
|
38
|
+
current.each do |v|
|
39
|
+
n = new.shift
|
40
|
+
if Array === n
|
41
|
+
v.concat new
|
42
|
+
else
|
43
|
+
v << n
|
44
|
+
end
|
45
|
+
end
|
46
|
+
current
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.zip_fields(array)
|
50
|
+
return [] if array.empty? or (first = array.first).nil?
|
51
|
+
first.zip(*array[1..-1])
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.field_position(fields, field, quiet = false)
|
55
|
+
return field if Integer === field or Range === field
|
56
|
+
raise FieldNotFoundError, "Field information missing" if fields.nil? && ! quiet
|
57
|
+
fields.each_with_index{|f,i| return i if f == field}
|
58
|
+
field_re = Regexp.new /^#{field}$/i
|
59
|
+
fields.each_with_index{|f,i| return i if f =~ field_re}
|
60
|
+
raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class Hash
|
65
|
+
def chunked_values_at(keys, max = 5000)
|
66
|
+
Misc.ordered_divide(keys, max).inject([]) do |acc,c|
|
67
|
+
new = self.values_at(*c)
|
68
|
+
new.annotate acc if new.respond_to? :annotate and acc.empty?
|
69
|
+
acc.concat(new)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
module LaterString
|
75
|
+
def to_s
|
76
|
+
yield
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|