rbbt-util 5.11.9 → 5.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/persist.rb +1 -1
- data/lib/rbbt/tsv.rb +1 -0
- data/lib/rbbt/tsv/dumper.rb +1 -2
- data/lib/rbbt/tsv/parallel/traverse.rb +2 -2
- data/lib/rbbt/tsv/parser.rb +6 -2
- data/lib/rbbt/tsv/stream.rb +55 -0
- data/lib/rbbt/tsv/util.rb +7 -1
- data/lib/rbbt/util/misc.rb +5 -762
- data/lib/rbbt/util/misc/concurrent_stream.rb +15 -0
- data/lib/rbbt/util/misc/development.rb +122 -0
- data/lib/rbbt/util/misc/inspect.rb +3 -3
- data/lib/rbbt/util/misc/manipulation.rb +136 -0
- data/lib/rbbt/util/misc/math.rb +50 -0
- data/lib/rbbt/util/misc/objects.rb +79 -0
- data/lib/rbbt/util/misc/omics.rb +10 -0
- data/lib/rbbt/util/misc/options.rb +280 -0
- data/lib/rbbt/util/misc/pipes.rb +140 -20
- data/lib/rbbt/util/misc/system.rb +90 -0
- data/lib/rbbt/util/tar.rb +0 -7
- data/lib/rbbt/workflow/accessor.rb +3 -3
- data/lib/rbbt/workflow/step/run.rb +69 -15
- data/lib/rbbt/workflow/task.rb +7 -5
- data/test/rbbt/tsv/test_stream.rb +92 -0
- data/test/rbbt/tsv/test_util.rb +1 -3
- data/test/rbbt/util/misc/test_pipes.rb +79 -0
- data/test/rbbt/workflow/test_task.rb +1 -0
- metadata +10 -2
data/lib/rbbt/util/misc/omics.rb
CHANGED
@@ -180,4 +180,14 @@ module Misc
|
|
180
180
|
end
|
181
181
|
end.collect{|p| p.last }
|
182
182
|
end
|
183
|
+
|
184
|
+
def self.ensembl_server(organism)
|
185
|
+
date = organism.split("/")[1]
|
186
|
+
if date.nil?
|
187
|
+
"www.ensembl.org"
|
188
|
+
else
|
189
|
+
"#{ date }.archive.ensembl.org"
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
183
193
|
end
|
@@ -0,0 +1,280 @@
|
|
1
|
+
module Misc
|
2
|
+
|
3
|
+
def self.parse_cmd_params(str)
|
4
|
+
return str if Array === str
|
5
|
+
str.scan(/
|
6
|
+
(?:["']([^"']*?)["']) |
|
7
|
+
([^"'\s]+)
|
8
|
+
/x).flatten.compact
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.positional2hash(keys, *values)
|
12
|
+
if Hash === values.last
|
13
|
+
extra = values.pop
|
14
|
+
inputs = Misc.zip2hash(keys, values)
|
15
|
+
inputs.delete_if{|k,v| v.nil? or (String === v and v.empty?)}
|
16
|
+
inputs = Misc.add_defaults inputs, extra
|
17
|
+
inputs.delete_if{|k,v| not keys.include?(k) and not (Symbol === k ? keys.include?(k.to_s) : keys.include?(k.to_sym))}
|
18
|
+
inputs
|
19
|
+
else
|
20
|
+
Misc.zip2hash(keys, values)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.array2hash(array, default = nil)
|
25
|
+
hash = {}
|
26
|
+
array.each do |key, value|
|
27
|
+
value = default.dup if value.nil? and not default.nil?
|
28
|
+
hash[key] = value
|
29
|
+
end
|
30
|
+
hash
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.zip2hash(list1, list2)
|
34
|
+
hash = {}
|
35
|
+
list1.each_with_index do |e,i|
|
36
|
+
hash[e] = list2[i]
|
37
|
+
end
|
38
|
+
hash
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.process_to_hash(list)
|
42
|
+
result = yield list
|
43
|
+
zip2hash(list, result)
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.hash2string(hash)
|
47
|
+
hash.sort_by{|k,v| k.to_s}.collect{|k,v|
|
48
|
+
next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
|
49
|
+
[ Symbol === k ? ":" << k.to_s : k,
|
50
|
+
Symbol === v ? ":" << v.to_s : v] * "="
|
51
|
+
}.compact * "#"
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.GET_params2hash(string)
|
55
|
+
hash = {}
|
56
|
+
string.split('&').collect{|item|
|
57
|
+
key, value = item.split("=").values_at 0, 1
|
58
|
+
hash[key] = value.nil? ? "" : CGI.unescape(value)
|
59
|
+
}
|
60
|
+
hash
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.hash2GET_params(hash)
|
64
|
+
hash.sort_by{|k,v| k.to_s}.collect{|k,v|
|
65
|
+
next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object Array).include? v.class.to_s
|
66
|
+
v = case
|
67
|
+
when Symbol === v
|
68
|
+
v.to_s
|
69
|
+
when Array === v
|
70
|
+
v * ","
|
71
|
+
else
|
72
|
+
CGI.escape(v.to_s)
|
73
|
+
end
|
74
|
+
[ Symbol === k ? k.to_s : k, v] * "="
|
75
|
+
}.compact * "&"
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.hash_to_html_tag_attributes(hash)
|
79
|
+
return "" if hash.nil? or hash.empty?
|
80
|
+
hash.collect{|k,v|
|
81
|
+
case
|
82
|
+
when (k.nil? or v.nil? or (String === v and v.empty?))
|
83
|
+
nil
|
84
|
+
when Array === v
|
85
|
+
[k,"'" << v * " " << "'"] * "="
|
86
|
+
when String === v
|
87
|
+
[k,"'" << v << "'"] * "="
|
88
|
+
when Symbol === v
|
89
|
+
[k,"'" << v.to_s << "'"] * "="
|
90
|
+
when TrueClass === v
|
91
|
+
[k,"'" << v.to_s << "'"] * "="
|
92
|
+
when (Fixnum === v or Float === v)
|
93
|
+
[k,"'" << v.to_s << "'"] * "="
|
94
|
+
else
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
}.compact * " "
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.html_tag(tag, content = nil, params = {})
|
101
|
+
attr_str = hash_to_html_tag_attributes(params)
|
102
|
+
attr_str = " " << attr_str if String === attr_str and attr_str != ""
|
103
|
+
html = if content.nil?
|
104
|
+
"<#{ tag }#{attr_str}/>"
|
105
|
+
else
|
106
|
+
"<#{ tag }#{attr_str}>#{ content }</#{ tag }>"
|
107
|
+
end
|
108
|
+
|
109
|
+
html
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.add_defaults(options, defaults = {})
|
113
|
+
case
|
114
|
+
when Hash === options
|
115
|
+
new_options = options.dup
|
116
|
+
when String === options
|
117
|
+
new_options = string2hash options
|
118
|
+
else
|
119
|
+
raise "Format of '#{options.inspect}' not understood. It should be a hash"
|
120
|
+
end
|
121
|
+
|
122
|
+
defaults.each do |key, value|
|
123
|
+
next if options.include? key
|
124
|
+
|
125
|
+
new_options[key] = value
|
126
|
+
end
|
127
|
+
|
128
|
+
new_options
|
129
|
+
end
|
130
|
+
|
131
|
+
def self.process_options(hash, *keys)
|
132
|
+
if keys.length == 1
|
133
|
+
hash.include?(keys.first.to_sym) ? hash.delete(keys.first.to_sym) : hash.delete(keys.first.to_s)
|
134
|
+
else
|
135
|
+
keys.collect do |key| hash.include?(key.to_sym) ? hash.delete(key.to_sym) : hash.delete(key.to_s) end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def self.pull_keys(hash, prefix)
|
140
|
+
new = {}
|
141
|
+
hash.keys.each do |key|
|
142
|
+
if key.to_s =~ /#{ prefix }_(.*)/
|
143
|
+
case
|
144
|
+
when String === key
|
145
|
+
new[$1] = hash.delete key
|
146
|
+
when Symbol === key
|
147
|
+
new[$1.to_sym] = hash.delete key
|
148
|
+
end
|
149
|
+
else
|
150
|
+
if key.to_s == prefix.to_s
|
151
|
+
new[key] = hash.delete key
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
new
|
157
|
+
end
|
158
|
+
|
159
|
+
#def self.string2hash_old(string)
|
160
|
+
|
161
|
+
# options = {}
|
162
|
+
# string.split(/#/).each do |str|
|
163
|
+
# if str.match(/(.*)=(.*)/)
|
164
|
+
# option, value = $1, $2
|
165
|
+
# else
|
166
|
+
# option, value = str, true
|
167
|
+
# end
|
168
|
+
|
169
|
+
# option = option.sub(":",'').to_sym if option.chars.first == ':'
|
170
|
+
# value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
|
171
|
+
|
172
|
+
# if value == true
|
173
|
+
# options[option] = option.to_s.chars.first != '!'
|
174
|
+
# else
|
175
|
+
# options[option] = Thread.start do
|
176
|
+
# $SAFE = 0;
|
177
|
+
# case
|
178
|
+
# when value =~ /^(?:true|T)$/i
|
179
|
+
# true
|
180
|
+
# when value =~ /^(?:false|F)$/i
|
181
|
+
# false
|
182
|
+
# when Symbol === value
|
183
|
+
# value
|
184
|
+
# when (String === value and value =~ /^\/(.*)\/$/)
|
185
|
+
# Regexp.new /#{$1}/
|
186
|
+
# else
|
187
|
+
# begin
|
188
|
+
# Kernel.const_get value
|
189
|
+
# rescue
|
190
|
+
# begin
|
191
|
+
# raise if value =~ /[a-z]/ and defined? value
|
192
|
+
# eval(value)
|
193
|
+
# rescue Exception
|
194
|
+
# value
|
195
|
+
# end
|
196
|
+
# end
|
197
|
+
# end
|
198
|
+
# end.value
|
199
|
+
# end
|
200
|
+
# end
|
201
|
+
|
202
|
+
# options
|
203
|
+
#end
|
204
|
+
|
205
|
+
def self.string2hash(string)
|
206
|
+
options = {}
|
207
|
+
|
208
|
+
string.split('#').each do |str|
|
209
|
+
key, sep, value = str.partition "="
|
210
|
+
|
211
|
+
key = key[1..-1].to_sym if key[0] == ":"
|
212
|
+
|
213
|
+
options[key] = true and next if value.empty?
|
214
|
+
options[key] = value[1..-1].to_sym and next if value[0] == ":"
|
215
|
+
options[key] = Regexp.new(/#{value[1..-2]}/) and next if value[0] == "/" and value[-1] == "/"
|
216
|
+
options[key] = value[1..-2] and next if value =~ /^['"].*['"]$/
|
217
|
+
options[key] = value.to_i and next if value =~ /^\d+$/
|
218
|
+
options[key] = value.to_f and next if value =~ /^\d*\.\d+$/
|
219
|
+
options[key] = true and next if value == "true"
|
220
|
+
options[key] = false and next if value == "false"
|
221
|
+
options[key] = value and next
|
222
|
+
|
223
|
+
options[key] = begin
|
224
|
+
saved_safe = $SAFE
|
225
|
+
$SAFE = 0
|
226
|
+
eval(value)
|
227
|
+
rescue Exception
|
228
|
+
value
|
229
|
+
ensure
|
230
|
+
$SAFE = saved_safe
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
return options
|
235
|
+
|
236
|
+
options = {}
|
237
|
+
string.split(/#/).each do |str|
|
238
|
+
if str.match(/(.*)=(.*)/)
|
239
|
+
option, value = $1, $2
|
240
|
+
else
|
241
|
+
option, value = str, true
|
242
|
+
end
|
243
|
+
|
244
|
+
option = option.sub(":",'').to_sym if option.chars.first == ':'
|
245
|
+
value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
|
246
|
+
|
247
|
+
if value == true
|
248
|
+
options[option] = option.to_s.chars.first != '!'
|
249
|
+
else
|
250
|
+
options[option] = Thread.start do
|
251
|
+
$SAFE = 0;
|
252
|
+
case
|
253
|
+
when value =~ /^(?:true|T)$/i
|
254
|
+
true
|
255
|
+
when value =~ /^(?:false|F)$/i
|
256
|
+
false
|
257
|
+
when Symbol === value
|
258
|
+
value
|
259
|
+
when (String === value and value =~ /^\/(.*)\/$/)
|
260
|
+
Regexp.new /#{$1}/
|
261
|
+
else
|
262
|
+
begin
|
263
|
+
Kernel.const_get value
|
264
|
+
rescue
|
265
|
+
begin
|
266
|
+
raise if value =~ /[a-z]/ and defined? value
|
267
|
+
eval(value)
|
268
|
+
rescue Exception
|
269
|
+
value
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
273
|
+
end.value
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
options
|
278
|
+
end
|
279
|
+
|
280
|
+
end
|
data/lib/rbbt/util/misc/pipes.rb
CHANGED
@@ -162,6 +162,7 @@ module Misc
|
|
162
162
|
end
|
163
163
|
|
164
164
|
def self.consume_stream(io)
|
165
|
+
return unless io.respond_to? :read
|
165
166
|
begin
|
166
167
|
while block = io.read(2048)
|
167
168
|
return if io.eof?
|
@@ -192,28 +193,8 @@ module Misc
|
|
192
193
|
str
|
193
194
|
end
|
194
195
|
|
195
|
-
def self.read_stream(stream, size)
|
196
|
-
str = nil
|
197
|
-
Thread.pass while IO.select([stream],nil,nil,1).nil?
|
198
|
-
while not str = stream.read(size)
|
199
|
-
IO.select([stream],nil,nil,1)
|
200
|
-
Thread.pass
|
201
|
-
raise ClosedStream if stream.eof?
|
202
|
-
end
|
203
|
-
|
204
|
-
while str.length < size
|
205
|
-
raise ClosedStream if stream.eof?
|
206
|
-
IO.select([stream],nil,nil,1)
|
207
|
-
if new = stream.read(size-str.length)
|
208
|
-
str << new
|
209
|
-
end
|
210
|
-
end
|
211
|
-
str
|
212
|
-
end
|
213
|
-
|
214
196
|
def self.sensiblewrite(path, content = nil, &block)
|
215
197
|
return if File.exists? path
|
216
|
-
#tmp_path = path + '.sensible_write'
|
217
198
|
tmp_path = Persist.persistence_path(path, {:dir => Misc.sensiblewrite_dir})
|
218
199
|
Misc.lock tmp_path do
|
219
200
|
if not File.exists? path
|
@@ -244,4 +225,143 @@ module Misc
|
|
244
225
|
end
|
245
226
|
end
|
246
227
|
end
|
228
|
+
|
229
|
+
def self.process_stream(s)
|
230
|
+
begin
|
231
|
+
yield s
|
232
|
+
s.join if s.respond_to? :join
|
233
|
+
rescue
|
234
|
+
s.abort if s.respond_to? :abort
|
235
|
+
raise $!
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def self.sort_stream(stream, header_hash = "#")
|
240
|
+
Misc.open_pipe do |sin|
|
241
|
+
line = stream.gets
|
242
|
+
while line =~ /^#{header_hash}/ do
|
243
|
+
sin.puts line
|
244
|
+
line = stream.gets
|
245
|
+
end
|
246
|
+
|
247
|
+
line_stream = Misc.open_pipe do |line_stream_in|
|
248
|
+
begin
|
249
|
+
while line
|
250
|
+
line_stream_in.puts line
|
251
|
+
line = stream.gets
|
252
|
+
end
|
253
|
+
stream.join if stream.respond_to? :join
|
254
|
+
rescue
|
255
|
+
stream.abort if stream.respond_to? :abort
|
256
|
+
raise $!
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
sorted = CMD.cmd("sort", :in => line_stream, :pipe => true)
|
261
|
+
|
262
|
+
while block = sorted.read(2048)
|
263
|
+
sin.write block
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
def self.collapse_stream(s, line = nil, sep = "\t", header = nil)
|
269
|
+
sep ||= "\t"
|
270
|
+
Misc.open_pipe do |sin|
|
271
|
+
sin.puts header if header
|
272
|
+
process_stream(s) do |s|
|
273
|
+
line ||= s.gets
|
274
|
+
|
275
|
+
current_parts = []
|
276
|
+
while line
|
277
|
+
key, *parts = line.strip.split(sep, -1)
|
278
|
+
current_key ||= key
|
279
|
+
case
|
280
|
+
when key.nil?
|
281
|
+
when current_key == key
|
282
|
+
parts.each_with_index do |part,i|
|
283
|
+
if current_parts[i].nil?
|
284
|
+
current_parts[i] = part
|
285
|
+
else
|
286
|
+
current_parts[i] = current_parts[i] << "|" << part
|
287
|
+
end
|
288
|
+
end
|
289
|
+
when current_key != key
|
290
|
+
sin.puts [current_key, current_parts].flatten * sep
|
291
|
+
current_key = key
|
292
|
+
current_parts = parts
|
293
|
+
end
|
294
|
+
line = s.gets
|
295
|
+
end
|
296
|
+
|
297
|
+
sin.puts [current_key, current_parts].flatten * sep unless current_key.nil?
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
def self.paste_streams(streams, lines = nil, sep = "\t", header = nil)
|
303
|
+
sep ||= "\t"
|
304
|
+
num_streams = streams.length
|
305
|
+
Misc.open_pipe do |sin|
|
306
|
+
sin.puts header if header
|
307
|
+
begin
|
308
|
+
done_streams = []
|
309
|
+
lines ||= streams.collect{|s| s.gets }
|
310
|
+
keys = []
|
311
|
+
parts = []
|
312
|
+
lines.each_with_index do |line,i|
|
313
|
+
key, *p = line.strip.split(sep, -1)
|
314
|
+
keys[i] = key
|
315
|
+
parts[i] = p
|
316
|
+
end
|
317
|
+
sizes = parts.collect{|p| p.length }
|
318
|
+
last_min = nil
|
319
|
+
count ||= 0
|
320
|
+
while lines.compact.any?
|
321
|
+
min = keys.compact.sort.first
|
322
|
+
str = []
|
323
|
+
keys.each_with_index do |key,i|
|
324
|
+
case key
|
325
|
+
when min
|
326
|
+
str << [parts[i] * sep]
|
327
|
+
line = lines[i] = streams[i].gets
|
328
|
+
if line.nil?
|
329
|
+
keys[i] = nil
|
330
|
+
parts[i] = nil
|
331
|
+
else
|
332
|
+
k, *p = line.strip.split(sep, -1)
|
333
|
+
keys[i] = k
|
334
|
+
parts[i] = p
|
335
|
+
end
|
336
|
+
else
|
337
|
+
count += 1
|
338
|
+
str << [sep * (sizes[i]-1)] if sizes[i] > 0
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
sin.puts [min, str*sep] * sep
|
343
|
+
end
|
344
|
+
streams.each do |stream|
|
345
|
+
stream.join if stream.respond_to? :join
|
346
|
+
end
|
347
|
+
rescue
|
348
|
+
streams.each do |stream|
|
349
|
+
stream.abort if stream.respond_to? :abort
|
350
|
+
end
|
351
|
+
raise $!
|
352
|
+
end
|
353
|
+
end
|
354
|
+
end
|
355
|
+
|
356
|
+
def self.dup_stream(stream)
|
357
|
+
stream_dup = stream.dup
|
358
|
+
if stream.respond_to? :annotate
|
359
|
+
stream.annotate stream_dup
|
360
|
+
stream.clear
|
361
|
+
end
|
362
|
+
tee1, tee2 = Misc.tee_stream stream_dup
|
363
|
+
stream.reopen(tee1)
|
364
|
+
tee2
|
365
|
+
end
|
366
|
+
|
247
367
|
end
|