rbbt-util 5.11.9 → 5.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/persist.rb +1 -1
- data/lib/rbbt/tsv.rb +1 -0
- data/lib/rbbt/tsv/dumper.rb +1 -2
- data/lib/rbbt/tsv/parallel/traverse.rb +2 -2
- data/lib/rbbt/tsv/parser.rb +6 -2
- data/lib/rbbt/tsv/stream.rb +55 -0
- data/lib/rbbt/tsv/util.rb +7 -1
- data/lib/rbbt/util/misc.rb +5 -762
- data/lib/rbbt/util/misc/concurrent_stream.rb +15 -0
- data/lib/rbbt/util/misc/development.rb +122 -0
- data/lib/rbbt/util/misc/inspect.rb +3 -3
- data/lib/rbbt/util/misc/manipulation.rb +136 -0
- data/lib/rbbt/util/misc/math.rb +50 -0
- data/lib/rbbt/util/misc/objects.rb +79 -0
- data/lib/rbbt/util/misc/omics.rb +10 -0
- data/lib/rbbt/util/misc/options.rb +280 -0
- data/lib/rbbt/util/misc/pipes.rb +140 -20
- data/lib/rbbt/util/misc/system.rb +90 -0
- data/lib/rbbt/util/tar.rb +0 -7
- data/lib/rbbt/workflow/accessor.rb +3 -3
- data/lib/rbbt/workflow/step/run.rb +69 -15
- data/lib/rbbt/workflow/task.rb +7 -5
- data/test/rbbt/tsv/test_stream.rb +92 -0
- data/test/rbbt/tsv/test_util.rb +1 -3
- data/test/rbbt/util/misc/test_pipes.rb +79 -0
- data/test/rbbt/workflow/test_task.rb +1 -0
- metadata +10 -2
@@ -26,6 +26,20 @@ module ConcurrentStream
|
|
26
26
|
stream
|
27
27
|
end
|
28
28
|
|
29
|
+
def annotate(stream)
|
30
|
+
ConcurrentStream.setup stream
|
31
|
+
stream.threads = threads
|
32
|
+
stream.pids = pids
|
33
|
+
stream.callback = callback
|
34
|
+
stream.abort_callback = abort_callback
|
35
|
+
stream.filename = filename
|
36
|
+
stream.joined = joined
|
37
|
+
end
|
38
|
+
|
39
|
+
def clear
|
40
|
+
threads, pids, callback, abort_callback = nil
|
41
|
+
end
|
42
|
+
|
29
43
|
def joined?
|
30
44
|
@joined
|
31
45
|
end
|
@@ -69,6 +83,7 @@ module ConcurrentStream
|
|
69
83
|
join_callback
|
70
84
|
|
71
85
|
@joined = true
|
86
|
+
close unless closed?
|
72
87
|
end
|
73
88
|
|
74
89
|
def abort_threads
|
@@ -1,4 +1,16 @@
|
|
1
1
|
module Misc
|
2
|
+
|
3
|
+
def self.string2const(string)
|
4
|
+
return nil if string.nil?
|
5
|
+
mod = Kernel
|
6
|
+
|
7
|
+
string.to_s.split('::').each do |str|
|
8
|
+
mod = mod.const_get str
|
9
|
+
end
|
10
|
+
|
11
|
+
mod
|
12
|
+
end
|
13
|
+
|
2
14
|
def self.benchmark(repeats = 1, message = nil)
|
3
15
|
require 'benchmark'
|
4
16
|
res = nil
|
@@ -92,4 +104,114 @@ module Misc
|
|
92
104
|
|
93
105
|
res
|
94
106
|
end
|
107
|
+
|
108
|
+
def self.do_once(&block)
|
109
|
+
return nil if $__did_once
|
110
|
+
$__did_once = true
|
111
|
+
yield
|
112
|
+
nil
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.reset_do_once
|
116
|
+
$__did_once = false
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.insist(times = 3, sleep = nil, msg = nil)
|
120
|
+
if Array === times
|
121
|
+
sleep_array = times
|
122
|
+
times = sleep_array.length
|
123
|
+
sleep = sleep_array.shift
|
124
|
+
end
|
125
|
+
try = 0
|
126
|
+
begin
|
127
|
+
yield
|
128
|
+
rescue
|
129
|
+
if msg
|
130
|
+
Log.warn("Insisting after exception: #{$!.message} -- #{msg}")
|
131
|
+
else
|
132
|
+
Log.warn("Insisting after exception: #{$!.message}")
|
133
|
+
end
|
134
|
+
if sleep and try > 0
|
135
|
+
sleep sleep
|
136
|
+
sleep = sleep_array.shift if sleep_array
|
137
|
+
else
|
138
|
+
Thread.pass
|
139
|
+
end
|
140
|
+
try += 1
|
141
|
+
retry if try < times
|
142
|
+
raise $!
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def self.try3times(&block)
|
147
|
+
insist(3, &block)
|
148
|
+
end
|
149
|
+
|
150
|
+
# Divides the array into +num+ chunks of the same size by placing one
|
151
|
+
# element in each chunk iteratively.
|
152
|
+
def self.divide(array, num)
|
153
|
+
num = 1 if num == 0
|
154
|
+
chunks = []
|
155
|
+
num.to_i.times do chunks << [] end
|
156
|
+
array.each_with_index{|e, i|
|
157
|
+
c = i % num
|
158
|
+
chunks[c] << e
|
159
|
+
}
|
160
|
+
chunks
|
161
|
+
end
|
162
|
+
|
163
|
+
# Divides the array into chunks of +num+ same size by placing one
|
164
|
+
# element in each chunk iteratively.
|
165
|
+
def self.ordered_divide(array, num)
|
166
|
+
last = array.length - 1
|
167
|
+
chunks = []
|
168
|
+
current = 0
|
169
|
+
while current <= last
|
170
|
+
next_current = [last, current + num - 1].min
|
171
|
+
chunks << array[current..next_current]
|
172
|
+
current = next_current + 1
|
173
|
+
end
|
174
|
+
chunks
|
175
|
+
end
|
176
|
+
|
177
|
+
def self.random_sample_in_range(total, size)
|
178
|
+
p = Set.new
|
179
|
+
|
180
|
+
if size > total / 10
|
181
|
+
template = (0..total - 1).to_a
|
182
|
+
size.times do |i|
|
183
|
+
pos = (rand * (total - i)).floor
|
184
|
+
if pos == template.length - 1
|
185
|
+
v = template.pop
|
186
|
+
else
|
187
|
+
v, n = template[pos], template[-1]
|
188
|
+
template.pop
|
189
|
+
template[pos] = n
|
190
|
+
end
|
191
|
+
p << v
|
192
|
+
end
|
193
|
+
else
|
194
|
+
size.times do
|
195
|
+
pos = nil
|
196
|
+
while pos.nil?
|
197
|
+
pos = (rand * total).floor
|
198
|
+
if p.include? pos
|
199
|
+
pos = nil
|
200
|
+
end
|
201
|
+
end
|
202
|
+
p << pos
|
203
|
+
end
|
204
|
+
end
|
205
|
+
p
|
206
|
+
end
|
207
|
+
|
208
|
+
def self.sample(ary, size, replacement = false)
|
209
|
+
if ary.respond_to? :sample
|
210
|
+
ary.sample size
|
211
|
+
else
|
212
|
+
total = ary.length
|
213
|
+
p = random_sample_in_range(total, size)
|
214
|
+
ary.values_at *p
|
215
|
+
end
|
216
|
+
end
|
95
217
|
end
|
@@ -40,9 +40,9 @@ module Misc
|
|
40
40
|
when (defined? AnnotatedArray and AnnotatedArray)
|
41
41
|
"<A: #{fingerprint Annotated.purge(obj)} #{fingerprint obj.info}>"
|
42
42
|
when (defined? TSV and TSV::Parser)
|
43
|
-
"<TSVStream:" + obj.filename + "--" << Misc.fingerprint(obj.options) << ">"
|
43
|
+
"<TSVStream:" + (obj.filename || "NOFILENAME") + "--" << Misc.fingerprint(obj.options) << ">"
|
44
44
|
when IO
|
45
|
-
"<IO:" + (obj.respond_to?(:filename) ? obj.filename : obj.inspect) + ">"
|
45
|
+
"<IO:" + (obj.respond_to?(:filename) ? obj.filename || obj.inspect : obj.inspect) + ">"
|
46
46
|
when File
|
47
47
|
"<File:" + obj.path + ">"
|
48
48
|
when Array
|
@@ -79,7 +79,7 @@ module Misc
|
|
79
79
|
def self.remove_long_items(obj)
|
80
80
|
case
|
81
81
|
when IO === obj
|
82
|
-
remove_long_items("IO: " + obj.filename)
|
82
|
+
remove_long_items("IO: " + (obj.respond_to?(:filename) ? (obj.filename || obj.inspect) : obj.inspect ))
|
83
83
|
when obj.respond_to?(:path)
|
84
84
|
remove_long_items("File: " + obj.path)
|
85
85
|
when TSV::Parser === obj
|
@@ -0,0 +1,136 @@
|
|
1
|
+
module Misc
|
2
|
+
def self.collapse_ranges(ranges)
|
3
|
+
processed = []
|
4
|
+
last = nil
|
5
|
+
final = []
|
6
|
+
ranges.sort_by{|range| range.begin }.each do |range|
|
7
|
+
rbegin = range.begin
|
8
|
+
rend = range.end
|
9
|
+
if last.nil? or rbegin > last
|
10
|
+
processed << [rbegin, rend]
|
11
|
+
last = rend
|
12
|
+
else
|
13
|
+
new_processed = []
|
14
|
+
processed.each do |pbegin,pend|
|
15
|
+
if pend < rbegin
|
16
|
+
final << [pbegin, pend]
|
17
|
+
else
|
18
|
+
eend = [rend, pend].max
|
19
|
+
new_processed << [pbegin, eend]
|
20
|
+
break
|
21
|
+
end
|
22
|
+
end
|
23
|
+
processed = new_processed
|
24
|
+
last = rend if rend > last
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
final.concat processed
|
29
|
+
final.collect{|b,e| (b..e)}
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.total_length(ranges)
|
33
|
+
self.collapse_ranges(ranges).inject(0) do |total,range| total += range.end - range.begin + 1 end
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.sorted_array_hits(a1, a2)
|
37
|
+
e1, e2 = a1.shift, a2.shift
|
38
|
+
counter = 0
|
39
|
+
match = []
|
40
|
+
while true
|
41
|
+
break if e1.nil? or e2.nil?
|
42
|
+
case e1 <=> e2
|
43
|
+
when 0
|
44
|
+
match << counter
|
45
|
+
e1, e2 = a1.shift, a2.shift
|
46
|
+
counter += 1
|
47
|
+
when -1
|
48
|
+
while not e1.nil? and e1 < e2
|
49
|
+
e1 = a1.shift
|
50
|
+
counter += 1
|
51
|
+
end
|
52
|
+
when 1
|
53
|
+
e2 = a2.shift
|
54
|
+
e2 = a2.shift while not e2.nil? and e2 < e1
|
55
|
+
end
|
56
|
+
end
|
57
|
+
match
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.intersect_sorted_arrays(a1, a2)
|
61
|
+
e1, e2 = a1.shift, a2.shift
|
62
|
+
intersect = []
|
63
|
+
while true
|
64
|
+
break if e1.nil? or e2.nil?
|
65
|
+
case e1 <=> e2
|
66
|
+
when 0
|
67
|
+
intersect << e1
|
68
|
+
e1, e2 = a1.shift, a2.shift
|
69
|
+
when -1
|
70
|
+
e1 = a1.shift while not e1.nil? and e1 < e2
|
71
|
+
when 1
|
72
|
+
e2 = a2.shift
|
73
|
+
e2 = a2.shift while not e2.nil? and e2 < e1
|
74
|
+
end
|
75
|
+
end
|
76
|
+
intersect
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.merge_sorted_arrays(a1, a2)
|
80
|
+
e1, e2 = a1.shift, a2.shift
|
81
|
+
new = []
|
82
|
+
while true
|
83
|
+
case
|
84
|
+
when (e1 and e2)
|
85
|
+
case e1 <=> e2
|
86
|
+
when 0
|
87
|
+
new << e1
|
88
|
+
e1, e2 = a1.shift, a2.shift
|
89
|
+
when -1
|
90
|
+
new << e1
|
91
|
+
e1 = a1.shift
|
92
|
+
when 1
|
93
|
+
new << e2
|
94
|
+
e2 = a2.shift
|
95
|
+
end
|
96
|
+
when e2
|
97
|
+
new << e2
|
98
|
+
new.concat a2
|
99
|
+
break
|
100
|
+
when e1
|
101
|
+
new << e1
|
102
|
+
new.concat a1
|
103
|
+
break
|
104
|
+
else
|
105
|
+
break
|
106
|
+
end
|
107
|
+
end
|
108
|
+
new
|
109
|
+
end
|
110
|
+
|
111
|
+
def self.binary_include?(array, elem)
|
112
|
+
upper = array.size - 1
|
113
|
+
lower = 0
|
114
|
+
|
115
|
+
return -1 if upper < lower
|
116
|
+
|
117
|
+
while(upper >= lower) do
|
118
|
+
idx = lower + (upper - lower) / 2
|
119
|
+
value = array[idx]
|
120
|
+
|
121
|
+
case elem <=> value
|
122
|
+
when 0
|
123
|
+
return true
|
124
|
+
when -1
|
125
|
+
upper = idx - 1
|
126
|
+
when 1
|
127
|
+
lower = idx + 1
|
128
|
+
else
|
129
|
+
raise "Cannot compare #{[elem.inspect, value.inspect] * " with "}"
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
return false
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
data/lib/rbbt/util/misc/math.rb
CHANGED
@@ -29,4 +29,54 @@ module Misc
|
|
29
29
|
Math.sqrt(list.compact.inject(0.0){|acc,e| d = e - mean; acc += d * d}) / (list.compact.length - 1)
|
30
30
|
end
|
31
31
|
|
32
|
+
def self.counts(array)
|
33
|
+
counts = {}
|
34
|
+
array.each do |e|
|
35
|
+
counts[e] ||= 0
|
36
|
+
counts[e] += 1
|
37
|
+
end
|
38
|
+
|
39
|
+
counts
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.proportions(array)
|
43
|
+
total = array.length
|
44
|
+
|
45
|
+
proportions = Hash.new 0
|
46
|
+
|
47
|
+
array.each do |e|
|
48
|
+
proportions[e] += 1.0 / total
|
49
|
+
end
|
50
|
+
|
51
|
+
class << proportions; self;end.class_eval do
|
52
|
+
def to_s
|
53
|
+
sort{|a,b| a[1] == b[1] ? a[0] <=> b[0] : a[1] <=> b[1]}.collect{|k,c| "%3d\t%s" % [c, k]} * "\n"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
proportions
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil)
|
61
|
+
name1 ||= "list 1"
|
62
|
+
name2 ||= "list 2"
|
63
|
+
name3 ||= "list 3"
|
64
|
+
|
65
|
+
sizes = [list1, list2, list3, list1 & list2, list1 & list3, list2 & list3, list1 & list2 & list3].collect{|l| l.length}
|
66
|
+
|
67
|
+
total = total.length if Array === total
|
68
|
+
|
69
|
+
label = "#{name1}: #{sizes[0]} (#{name2}: #{sizes[3]}, #{name3}: #{sizes[4]})"
|
70
|
+
label << "|#{name2}: #{sizes[1]} (#{name1}: #{sizes[3]}, #{name3}: #{sizes[5]})"
|
71
|
+
label << "|#{name3}: #{sizes[2]} (#{name1}: #{sizes[4]}, #{name2}: #{sizes[5]})"
|
72
|
+
if total
|
73
|
+
label << "| INTERSECTION: #{sizes[6]} TOTAL: #{total}"
|
74
|
+
else
|
75
|
+
label << "| INTERSECTION: #{sizes[6]}"
|
76
|
+
end
|
77
|
+
|
78
|
+
max = total || sizes.max
|
79
|
+
sizes = sizes.collect{|v| (v.to_f/max * 100).to_i.to_f / 100}
|
80
|
+
url = "https://chart.googleapis.com/chart?cht=v&chs=500x300&chd=t:#{sizes * ","}&chco=FF6342,ADDE63,63C6DE,FFFFFF&chdl=#{label}"
|
81
|
+
end
|
32
82
|
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module Misc
|
2
|
+
|
3
|
+
def self.prepare_entity(entity, field, options = {})
|
4
|
+
return entity unless defined? Entity
|
5
|
+
return entity unless String === entity or Array === entity
|
6
|
+
options ||= {}
|
7
|
+
|
8
|
+
dup_array = options.delete :dup_array
|
9
|
+
|
10
|
+
if Annotated === field or Entity.respond_to?(:formats) and Entity.formats.include? field
|
11
|
+
params = options.dup
|
12
|
+
|
13
|
+
params[:format] ||= params.delete "format"
|
14
|
+
params.merge!(:format => field) unless params.include?(:format) and not ((f = params[:format]).nil? or (String === f and f.empty?))
|
15
|
+
|
16
|
+
mod = Entity === field ? field : Entity.formats[field]
|
17
|
+
entity = mod.setup(
|
18
|
+
((entity.frozen? and not entity.nil?) ? entity.dup : ((Array === entity and dup_array) ? entity.collect{|e| e.nil? ? e : e.dup} : entity) ),
|
19
|
+
params
|
20
|
+
)
|
21
|
+
end
|
22
|
+
|
23
|
+
entity
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.consolidate(list)
|
27
|
+
list.inject(nil){|acc,e|
|
28
|
+
if acc.nil?
|
29
|
+
acc = e
|
30
|
+
else
|
31
|
+
acc.concat e
|
32
|
+
acc
|
33
|
+
end
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.append_zipped(current, new)
|
38
|
+
current.each do |v|
|
39
|
+
n = new.shift
|
40
|
+
if Array === n
|
41
|
+
v.concat new
|
42
|
+
else
|
43
|
+
v << n
|
44
|
+
end
|
45
|
+
end
|
46
|
+
current
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.zip_fields(array)
|
50
|
+
return [] if array.empty? or (first = array.first).nil?
|
51
|
+
first.zip(*array[1..-1])
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.field_position(fields, field, quiet = false)
|
55
|
+
return field if Integer === field or Range === field
|
56
|
+
raise FieldNotFoundError, "Field information missing" if fields.nil? && ! quiet
|
57
|
+
fields.each_with_index{|f,i| return i if f == field}
|
58
|
+
field_re = Regexp.new /^#{field}$/i
|
59
|
+
fields.each_with_index{|f,i| return i if f =~ field_re}
|
60
|
+
raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class Hash
|
65
|
+
def chunked_values_at(keys, max = 5000)
|
66
|
+
Misc.ordered_divide(keys, max).inject([]) do |acc,c|
|
67
|
+
new = self.values_at(*c)
|
68
|
+
new.annotate acc if new.respond_to? :annotate and acc.empty?
|
69
|
+
acc.concat(new)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
module LaterString
|
75
|
+
def to_s
|
76
|
+
yield
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|