rbbt-util 5.11.4 → 5.11.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/tsv/parallel/traverse.rb +2 -1
- data/lib/rbbt/util/cmd.rb +1 -1
- data/lib/rbbt/util/misc.rb +11 -1173
- data/lib/rbbt/util/misc/concurrent_stream.rb +69 -0
- data/lib/rbbt/util/misc/development.rb +95 -0
- data/lib/rbbt/util/misc/exceptions.rb +11 -0
- data/lib/rbbt/util/misc/format.rb +170 -0
- data/lib/rbbt/util/misc/indiferent_hash.rb +56 -0
- data/lib/rbbt/util/misc/inspect.rb +181 -0
- data/lib/rbbt/util/misc/lock.rb +87 -0
- data/lib/rbbt/util/misc/math.rb +32 -0
- data/lib/rbbt/util/misc/objects.rb +0 -0
- data/lib/rbbt/util/misc/omics.rb +183 -0
- data/lib/rbbt/util/misc/pipes.rb +224 -0
- data/lib/rbbt/workflow/accessor.rb +1 -0
- data/lib/rbbt/workflow/step.rb +15 -9
- data/share/rbbt_commands/workflow/task +2 -0
- metadata +13 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6ec4bc56a7d0d260d93be40ee61d23c08f58d5a8
|
4
|
+
data.tar.gz: 6f51a747302e936ed1e5953e929e5120fa5caa89
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: db5a97b2787f182444de536059beb027d038573c0c3ac3196ec44ce18d2561fa3204a38229d3054e5c92594309fc4a488cc6e9116e370c4f2d383b28a8ed9e71
|
7
|
+
data.tar.gz: 8a4fc523ae9266ceea9964f26f85834a8f5c0e0899a523bb938e2e813da191651215ec356432ee7b467f39b470328b32dda911d1b2cd9bd757fa467ad14d54ce
|
data/lib/rbbt/util/cmd.rb
CHANGED
data/lib/rbbt/util/misc.rb
CHANGED
@@ -5,6 +5,17 @@ require 'cgi'
|
|
5
5
|
require 'zlib'
|
6
6
|
require 'rubygems/package'
|
7
7
|
require 'rbbt/util/tar'
|
8
|
+
require 'rbbt/util/misc/exceptions'
|
9
|
+
require 'rbbt/util/misc/concurrent_stream'
|
10
|
+
require 'rbbt/util/misc/indiferent_hash'
|
11
|
+
require 'rbbt/util/misc/pipes'
|
12
|
+
require 'rbbt/util/misc/format'
|
13
|
+
require 'rbbt/util/misc/omics'
|
14
|
+
require 'rbbt/util/misc/inspect'
|
15
|
+
require 'rbbt/util/misc/math'
|
16
|
+
require 'rbbt/util/misc/development'
|
17
|
+
require 'rbbt/util/misc/lock'
|
18
|
+
|
8
19
|
|
9
20
|
class Hash
|
10
21
|
def chunked_values_at(keys, max = 5000)
|
@@ -16,372 +27,14 @@ class Hash
|
|
16
27
|
end
|
17
28
|
end
|
18
29
|
|
19
|
-
class ParameterException < Exception; end
|
20
|
-
class FieldNotFoundError < Exception;end
|
21
|
-
class Aborted < Exception; end
|
22
|
-
class TryAgain < Exception; end
|
23
|
-
class ClosedStream < Exception; end
|
24
|
-
class KeepLocked < Exception
|
25
|
-
attr_accessor :payload
|
26
|
-
def initialize(payload)
|
27
|
-
@payload = payload
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
30
|
module LaterString
|
32
31
|
def to_s
|
33
32
|
yield
|
34
33
|
end
|
35
34
|
end
|
36
35
|
|
37
|
-
module ConcurrentStream
|
38
|
-
attr_accessor :threads, :pids, :callback, :abort_callback, :filename, :joined
|
39
|
-
|
40
|
-
def joined?
|
41
|
-
@joined
|
42
|
-
end
|
43
|
-
|
44
|
-
def join
|
45
|
-
|
46
|
-
if @threads and @threads.any?
|
47
|
-
@threads.each do |t|
|
48
|
-
t.join
|
49
|
-
end
|
50
|
-
@threads = []
|
51
|
-
end
|
52
|
-
|
53
|
-
if @pids and @pids.any?
|
54
|
-
@pids.each do |pid|
|
55
|
-
begin
|
56
|
-
Process.waitpid(pid, Process::WUNTRACED)
|
57
|
-
raise "Error joining process #{pid} in #{self.inspect}" unless $?.success?
|
58
|
-
rescue Errno::ECHILD
|
59
|
-
end
|
60
|
-
end
|
61
|
-
@pids = []
|
62
|
-
end
|
63
|
-
|
64
|
-
if @callback and not joined?
|
65
|
-
@callback.call
|
66
|
-
@callback = nil
|
67
|
-
end
|
68
|
-
|
69
|
-
@joined = true
|
70
|
-
end
|
71
|
-
|
72
|
-
def abort
|
73
|
-
@threads.each{|t| t.raise Aborted.new } if @threads
|
74
|
-
@threads.each{|t| t.join } if @threads
|
75
|
-
@pids.each{|pid| Process.kill :INT, pid } if @pids
|
76
|
-
@pids.each{|pid| Process.waitpid pid } if @pids
|
77
|
-
@abort_callback.call if @abort_callback
|
78
|
-
@abort_callback = nil
|
79
|
-
end
|
80
|
-
|
81
|
-
def self.setup(stream, options = {}, &block)
|
82
|
-
threads, pids, callback, filename = Misc.process_options options, :threads, :pids, :callback, :filename
|
83
|
-
stream.extend ConcurrentStream unless ConcurrentStream === stream
|
84
|
-
|
85
|
-
stream.threads ||= []
|
86
|
-
stream.pids ||= []
|
87
|
-
stream.threads.concat(Array === threads ? threads : [threads]) unless threads.nil?
|
88
|
-
stream.pids.concat(Array === pids ? pids : [pids]) unless pids.nil? or pids.empty?
|
89
|
-
|
90
|
-
callback = block if block_given?
|
91
|
-
if stream.callback and callback
|
92
|
-
old_callback = stream.callback
|
93
|
-
stream.callback = Proc.new do
|
94
|
-
old_callback.call
|
95
|
-
callback.call
|
96
|
-
end
|
97
|
-
else
|
98
|
-
stream.callback = callback
|
99
|
-
end
|
100
|
-
|
101
|
-
stream.filename = filename unless filename.nil?
|
102
|
-
|
103
|
-
stream
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
|
108
|
-
Lockfile.refresh = false if ENV["RBBT_NO_LOCKFILE_REFRESH"] == "true"
|
109
36
|
module Misc
|
110
37
|
|
111
|
-
|
112
|
-
PIPE_MUTEX = Mutex.new
|
113
|
-
|
114
|
-
OPEN_PIPE_IN = []
|
115
|
-
def self.pipe
|
116
|
-
OPEN_PIPE_IN.delete_if{|pipe| pipe.closed? }
|
117
|
-
PIPE_MUTEX.synchronize do
|
118
|
-
sout, sin = IO.pipe
|
119
|
-
OPEN_PIPE_IN << sin
|
120
|
-
|
121
|
-
[sout, sin]
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
def self.release_pipes(*pipes)
|
126
|
-
PIPE_MUTEX.synchronize do
|
127
|
-
pipes.flatten.each do |pipe|
|
128
|
-
pipe.close unless pipe.closed?
|
129
|
-
end
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
|
134
|
-
def self.purge_pipes(*save)
|
135
|
-
PIPE_MUTEX.synchronize do
|
136
|
-
OPEN_PIPE_IN.each do |pipe|
|
137
|
-
next if save.include? pipe
|
138
|
-
pipe.close unless pipe.closed?
|
139
|
-
end
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
def self.open_pipe(do_fork = false, close = true)
|
144
|
-
raise "No block given" unless block_given?
|
145
|
-
|
146
|
-
sout, sin = Misc.pipe
|
147
|
-
|
148
|
-
if do_fork
|
149
|
-
parent_pid = Process.pid
|
150
|
-
pid = Process.fork {
|
151
|
-
purge_pipes(sin)
|
152
|
-
sout.close
|
153
|
-
begin
|
154
|
-
yield sin
|
155
|
-
rescue
|
156
|
-
Log.exception $!
|
157
|
-
Process.kill :INT, parent_pid
|
158
|
-
Kernel.exit! -1
|
159
|
-
ensure
|
160
|
-
sin.close if close and not sin.closed?
|
161
|
-
end
|
162
|
-
Kernel.exit! 0
|
163
|
-
}
|
164
|
-
sin.close #if close
|
165
|
-
ConcurrentStream.setup sout, :pids => [pid]
|
166
|
-
else
|
167
|
-
thread = Thread.new(Thread.current) do |parent|
|
168
|
-
begin
|
169
|
-
yield sin
|
170
|
-
rescue
|
171
|
-
parent.raise $!
|
172
|
-
ensure
|
173
|
-
sin.close if close and not sin.closed?
|
174
|
-
end
|
175
|
-
end
|
176
|
-
ConcurrentStream.setup sout, :threads => [thread]
|
177
|
-
end
|
178
|
-
sout
|
179
|
-
end
|
180
|
-
|
181
|
-
def self.tee_stream_fork(stream)
|
182
|
-
stream_out1, stream_in1 = Misc.pipe
|
183
|
-
stream_out2, stream_in2 = Misc.pipe
|
184
|
-
|
185
|
-
splitter_pid = Process.fork do
|
186
|
-
Misc.purge_pipes(stream_in1, stream_in2)
|
187
|
-
stream_out1.close
|
188
|
-
stream_out2.close
|
189
|
-
begin
|
190
|
-
filename = stream.respond_to?(:filename)? stream.filename : nil
|
191
|
-
skip1 = skip2 = false
|
192
|
-
while block = stream.read(2048)
|
193
|
-
begin stream_in1.write block; rescue Exception; Log.exception $!; skip1 = true end unless skip1
|
194
|
-
begin stream_in2.write block; rescue Exception; Log.exception $!; skip2 = true end unless skip2
|
195
|
-
end
|
196
|
-
raise "Error writing in stream_in2" if skip2
|
197
|
-
raise "Error writing in stream_in2" if skip2
|
198
|
-
rescue Aborted
|
199
|
-
stream.abort if stream.respond_to? :abort
|
200
|
-
raise $!
|
201
|
-
rescue IOError
|
202
|
-
Log.exception $!
|
203
|
-
rescue Exception
|
204
|
-
Log.exception $!
|
205
|
-
ensure
|
206
|
-
stream_in1.close
|
207
|
-
stream_in2.close
|
208
|
-
stream.join if stream.respond_to? :join
|
209
|
-
end
|
210
|
-
end
|
211
|
-
stream.close
|
212
|
-
stream_in1.close
|
213
|
-
stream_in2.close
|
214
|
-
#stream.join if stream.respond_to? :join
|
215
|
-
|
216
|
-
ConcurrentStream.setup stream_out1, :pids => [splitter_pid]
|
217
|
-
ConcurrentStream.setup stream_out2, :pids => [splitter_pid]
|
218
|
-
|
219
|
-
[stream_out1, stream_out2]
|
220
|
-
end
|
221
|
-
|
222
|
-
def self.tee_stream_thread(stream)
|
223
|
-
stream_out1, stream_in1 = Misc.pipe
|
224
|
-
stream_out2, stream_in2 = Misc.pipe
|
225
|
-
|
226
|
-
splitter_thread = Thread.new(Thread.current, stream_in1, stream_in2) do |parent,stream_in1,stream_in2|
|
227
|
-
begin
|
228
|
-
filename = stream.respond_to?(:filename)? stream.filename : nil
|
229
|
-
skip1 = skip2 = false
|
230
|
-
while block = stream.read(2048)
|
231
|
-
begin stream_in1.write block; rescue Exception; Aborted === $! ? raise($!): Log.exception($!); skip1 = true end unless skip1
|
232
|
-
begin stream_in2.write block; rescue Exception; Aborted === $! ? raise($!): Log.exception($!); skip2 = true end unless skip2
|
233
|
-
end
|
234
|
-
rescue Aborted
|
235
|
-
stream.abort if stream.respond_to? :abort
|
236
|
-
raise $!
|
237
|
-
rescue IOError
|
238
|
-
Log.exception $!
|
239
|
-
rescue Exception
|
240
|
-
Log.exception $!
|
241
|
-
parent.raise $!
|
242
|
-
ensure
|
243
|
-
stream_in1.close
|
244
|
-
stream_in2.close
|
245
|
-
stream.join if stream.respond_to? :join
|
246
|
-
end
|
247
|
-
end
|
248
|
-
|
249
|
-
ConcurrentStream.setup stream_out1, :threads => splitter_thread
|
250
|
-
ConcurrentStream.setup stream_out2, :threads => splitter_thread
|
251
|
-
|
252
|
-
[stream_out1, stream_out2]
|
253
|
-
end
|
254
|
-
|
255
|
-
class << self
|
256
|
-
alias tee_stream tee_stream_thread
|
257
|
-
end
|
258
|
-
|
259
|
-
def self.read_full_stream(io)
|
260
|
-
str = ""
|
261
|
-
begin
|
262
|
-
while block = io.read(2048)
|
263
|
-
str << block
|
264
|
-
end
|
265
|
-
rescue
|
266
|
-
io.abort if io.respond_to? :abort
|
267
|
-
ensure
|
268
|
-
io.join if io.respond_to? :join
|
269
|
-
io.close if io.respond_to? :close
|
270
|
-
end
|
271
|
-
str
|
272
|
-
end
|
273
|
-
|
274
|
-
def self.consume_stream(io)
|
275
|
-
begin
|
276
|
-
Thread.pass while block = io.read(2048)
|
277
|
-
rescue
|
278
|
-
io.abort if io.respond_to? :abort
|
279
|
-
ensure
|
280
|
-
io.join if io.respond_to? :join
|
281
|
-
io.close if io.respond_to? :close
|
282
|
-
end
|
283
|
-
end
|
284
|
-
|
285
|
-
def self.format_paragraph(text, size = 80, indent = 0, offset = 0)
|
286
|
-
i = 0
|
287
|
-
re = /((?:\n\s*\n\s*)|(?:\n\s*(?=\*)))/
|
288
|
-
text.split(re).collect do |paragraph|
|
289
|
-
i += 1
|
290
|
-
str = if i % 2 == 1
|
291
|
-
words = paragraph.gsub(/\s+/, "\s").split(" ")
|
292
|
-
lines = []
|
293
|
-
line = " "*offset
|
294
|
-
word = words.shift
|
295
|
-
while word
|
296
|
-
word = word[0..size-indent-offset-4] + '...' if word.length >= size - indent - offset
|
297
|
-
while word and Log.uncolor(line).length + Log.uncolor(word).length <= size - indent
|
298
|
-
line << word << " "
|
299
|
-
word = words.shift
|
300
|
-
end
|
301
|
-
lines << ((" " * indent) << line[0..-2])
|
302
|
-
line = ""
|
303
|
-
end
|
304
|
-
(lines * "\n")
|
305
|
-
else
|
306
|
-
paragraph
|
307
|
-
end
|
308
|
-
offset = 0
|
309
|
-
str
|
310
|
-
end*""
|
311
|
-
end
|
312
|
-
|
313
|
-
def self.format_definition_list_item(dt, dd, size = 80, indent = 20, color = :yellow)
|
314
|
-
dd = "" if dd.nil?
|
315
|
-
dt = dt.to_s + ":" unless dd.empty?
|
316
|
-
dt = Log.color color, dt if color
|
317
|
-
len = Log.uncolor(dt).length
|
318
|
-
|
319
|
-
if indent < 0
|
320
|
-
text = format_paragraph(dd, size, indent.abs+1, 0)
|
321
|
-
text = dt << "\n" << text
|
322
|
-
else
|
323
|
-
offset = len - indent
|
324
|
-
offset = 0 if offset < 0
|
325
|
-
text = format_paragraph(dd, size, indent.abs+1, offset)
|
326
|
-
text[0..len-1] = dt
|
327
|
-
end
|
328
|
-
text
|
329
|
-
end
|
330
|
-
|
331
|
-
def self.format_definition_list(defs, size = 80, indent = 20, color = :yellow)
|
332
|
-
entries = []
|
333
|
-
defs.each do |dt,dd|
|
334
|
-
text = format_definition_list_item(dt,dd,size,indent,color)
|
335
|
-
entries << text
|
336
|
-
end
|
337
|
-
entries * "\n\n"
|
338
|
-
end
|
339
|
-
|
340
|
-
def self.read_stream(stream, size)
|
341
|
-
str = nil
|
342
|
-
Thread.pass while IO.select([stream],nil,nil,1).nil?
|
343
|
-
while not str = stream.read(size)
|
344
|
-
IO.select([stream],nil,nil,1)
|
345
|
-
Thread.pass
|
346
|
-
raise ClosedStream if stream.eof?
|
347
|
-
end
|
348
|
-
|
349
|
-
while str.length < size
|
350
|
-
raise ClosedStream if stream.eof?
|
351
|
-
IO.select([stream],nil,nil,1)
|
352
|
-
if new = stream.read(size-str.length)
|
353
|
-
str << new
|
354
|
-
end
|
355
|
-
end
|
356
|
-
str
|
357
|
-
end
|
358
|
-
|
359
|
-
def self.read_stream(stream, size)
|
360
|
-
str = nil
|
361
|
-
Thread.pass while IO.select([stream],nil,nil,1).nil?
|
362
|
-
while not str = stream.read(size)
|
363
|
-
IO.select([stream],nil,nil,1)
|
364
|
-
Thread.pass
|
365
|
-
raise ClosedStream if stream.eof?
|
366
|
-
end
|
367
|
-
|
368
|
-
while str.length < size
|
369
|
-
raise ClosedStream if stream.eof?
|
370
|
-
IO.select([stream],nil,nil,1)
|
371
|
-
if new = stream.read(size-str.length)
|
372
|
-
str << new
|
373
|
-
end
|
374
|
-
end
|
375
|
-
str
|
376
|
-
end
|
377
|
-
def self._read_stream(stream, size)
|
378
|
-
str = ""
|
379
|
-
while (len=str.length) < size
|
380
|
-
str << (stream.read(size-len) or break)
|
381
|
-
end
|
382
|
-
str
|
383
|
-
end
|
384
|
-
|
385
38
|
def self.parse_cmd_params(str)
|
386
39
|
return str if Array === str
|
387
40
|
str.scan(/
|
@@ -390,39 +43,6 @@ module Misc
|
|
390
43
|
/x).flatten.compact
|
391
44
|
end
|
392
45
|
|
393
|
-
def self.correct_icgc_mutation(pos, ref, mut_str)
|
394
|
-
mut = mut_str
|
395
|
-
mut = '-' * (mut_str.length - 1) if mut =~/^-[ACGT]/
|
396
|
-
mut = "+" << mut if ref == '-'
|
397
|
-
[pos, [mut]]
|
398
|
-
end
|
399
|
-
|
400
|
-
def self.correct_vcf_mutation(pos, ref, mut_str)
|
401
|
-
muts = mut_str.nil? ? [] : mut_str.split(',')
|
402
|
-
|
403
|
-
while ref.length >= 1 and muts.reject{|m| m[0] == ref[0]}.empty?
|
404
|
-
ref = ref[1..-1]
|
405
|
-
pos = pos + 1
|
406
|
-
muts = muts.collect{|m| m[1..-1]}
|
407
|
-
end
|
408
|
-
|
409
|
-
muts = muts.collect do |m|
|
410
|
-
case
|
411
|
-
when ref.empty?
|
412
|
-
"+" << m
|
413
|
-
when (m.length < ref.length and (m.empty? or ref.index(m)))
|
414
|
-
"-" * (ref.length - m.length)
|
415
|
-
when (ref.length == 1 and m.length == 1)
|
416
|
-
m
|
417
|
-
else
|
418
|
-
Log.debug{"Cannot understand: #{[ref, m]} (#{ muts })"}
|
419
|
-
'-' * ref.length + m
|
420
|
-
end
|
421
|
-
end
|
422
|
-
|
423
|
-
[pos, muts]
|
424
|
-
end
|
425
|
-
|
426
46
|
def self.pid_exists?(pid)
|
427
47
|
return false if pid.nil?
|
428
48
|
begin
|
@@ -433,25 +53,6 @@ module Misc
|
|
433
53
|
end
|
434
54
|
end
|
435
55
|
|
436
|
-
COLOR_LIST = %w(#BC80BD #CCEBC5 #FFED6F #8DD3C7 #FFFFB3 #BEBADA #FB8072 #80B1D3 #FDB462 #B3DE69 #FCCDE5 #D9D9D9)
|
437
|
-
|
438
|
-
def self.colors_for(list)
|
439
|
-
unused = COLOR_LIST.dup
|
440
|
-
|
441
|
-
used = {}
|
442
|
-
colors = list.collect do |elem|
|
443
|
-
if used.include? elem
|
444
|
-
used[elem]
|
445
|
-
else
|
446
|
-
color = unused.shift
|
447
|
-
used[elem]=color
|
448
|
-
color
|
449
|
-
end
|
450
|
-
end
|
451
|
-
|
452
|
-
[colors, used]
|
453
|
-
end
|
454
|
-
|
455
56
|
def self.collapse_ranges(ranges)
|
456
57
|
processed = []
|
457
58
|
last = nil
|
@@ -527,10 +128,6 @@ module Misc
|
|
527
128
|
end
|
528
129
|
end
|
529
130
|
|
530
|
-
Log2Multiplier = 1.0 / Math.log(2.0)
|
531
|
-
def self.log2(x)
|
532
|
-
Math.log(x) * Log2Multiplier
|
533
|
-
end
|
534
131
|
|
535
132
|
def self.prepare_entity(entity, field, options = {})
|
536
133
|
return entity unless defined? Entity
|
@@ -555,116 +152,6 @@ module Misc
|
|
555
152
|
entity
|
556
153
|
end
|
557
154
|
|
558
|
-
ARRAY_MAX_LENGTH = 1000
|
559
|
-
STRING_MAX_LENGTH = ARRAY_MAX_LENGTH * 10
|
560
|
-
|
561
|
-
def self.sanitize_filename(filename, length = 254)
|
562
|
-
if filename.length > length
|
563
|
-
if filename =~ /(\..{2,9})$/
|
564
|
-
extension = $1
|
565
|
-
else
|
566
|
-
extension = ''
|
567
|
-
end
|
568
|
-
|
569
|
-
post_fix = "--#{filename.length}@#{length}_#{Misc.digest(filename)[0..4]}" + extension
|
570
|
-
|
571
|
-
filename = filename[0..(length - post_fix.length - 1)] << post_fix
|
572
|
-
else
|
573
|
-
filename
|
574
|
-
end
|
575
|
-
filename
|
576
|
-
end
|
577
|
-
|
578
|
-
def self.fingerprint(obj)
|
579
|
-
case obj
|
580
|
-
when nil
|
581
|
-
"nil"
|
582
|
-
when (defined? Step and Step)
|
583
|
-
obj.path || Misc.fingerprint([obj.task.name, obj.inputs])
|
584
|
-
when TrueClass
|
585
|
-
"true"
|
586
|
-
when FalseClass
|
587
|
-
"false"
|
588
|
-
when Symbol
|
589
|
-
":" << obj.to_s
|
590
|
-
when String
|
591
|
-
if obj.length > 100
|
592
|
-
"'" << obj.slice(0,20) << "<...#{obj.length}...>" << obj.slice(-10,10) << " " << "'"
|
593
|
-
else
|
594
|
-
"'" << obj << "'"
|
595
|
-
end
|
596
|
-
when (defined? AnnotatedArray and AnnotatedArray)
|
597
|
-
"<A: #{fingerprint Annotated.purge(obj)} #{fingerprint obj.info}>"
|
598
|
-
when (defined? TSV and TSV::Parser)
|
599
|
-
"<TSVStream:" + obj.filename + "--" << Misc.fingerprint(obj.options) << ">"
|
600
|
-
when IO
|
601
|
-
"<IO:" + (obj.respond_to?(:filename) ? obj.filename : obj.inspect) + ">"
|
602
|
-
when File
|
603
|
-
"<File:" + obj.path + ">"
|
604
|
-
when Array
|
605
|
-
if (length = obj.length) > 10
|
606
|
-
"[#{length}--" << (obj.values_at(0,1, length / 2, -2, -1).collect{|e| fingerprint(e)} * ",") << "]"
|
607
|
-
else
|
608
|
-
"[" << (obj.collect{|e| fingerprint(e) } * ",") << "]"
|
609
|
-
end
|
610
|
-
when (defined? TSV and TSV)
|
611
|
-
obj.with_unnamed do
|
612
|
-
"TSV:{"<< fingerprint(obj.all_fields|| []).inspect << ";" << fingerprint(obj.keys).inspect << "}"
|
613
|
-
end
|
614
|
-
when Hash
|
615
|
-
if obj.length > 10
|
616
|
-
"H:{"<< fingerprint(obj.keys) << ";" << fingerprint(obj.values) << "}"
|
617
|
-
else
|
618
|
-
new = "{"
|
619
|
-
obj.each do |k,v|
|
620
|
-
new << k.to_s << '=>' << fingerprint(v) << ' '
|
621
|
-
end
|
622
|
-
if new.length > 1
|
623
|
-
new[-1] = "}"
|
624
|
-
else
|
625
|
-
new << '}'
|
626
|
-
end
|
627
|
-
new
|
628
|
-
end
|
629
|
-
else
|
630
|
-
obj.to_s
|
631
|
-
end
|
632
|
-
end
|
633
|
-
|
634
|
-
|
635
|
-
def self.remove_long_items(obj)
|
636
|
-
case
|
637
|
-
when IO === obj
|
638
|
-
remove_long_items("IO: " + obj.filename)
|
639
|
-
when obj.respond_to?(:path)
|
640
|
-
remove_long_items("File: " + obj.path)
|
641
|
-
when TSV::Parser === obj
|
642
|
-
remove_long_items("TSV Stream: " + obj.filename + " -- " << Misc.fingerprint(obj.options))
|
643
|
-
when TSV === obj
|
644
|
-
remove_long_items((obj.all_fields || []) + obj.keys.sort)
|
645
|
-
when (Array === obj and obj.length > ARRAY_MAX_LENGTH)
|
646
|
-
remove_long_items(obj[0..ARRAY_MAX_LENGTH-2] << "TRUNCATED at #{ ARRAY_MAX_LENGTH } (#{obj.length})")
|
647
|
-
when (Hash === obj and obj.length > ARRAY_MAX_LENGTH)
|
648
|
-
remove_long_items(obj.collect.compact[0..ARRAY_MAX_LENGTH-2] << ["TRUNCATED", "at #{ ARRAY_MAX_LENGTH } (#{obj.length})"])
|
649
|
-
when (String === obj and obj.length > STRING_MAX_LENGTH)
|
650
|
-
obj[0..STRING_MAX_LENGTH-1] << " TRUNCATED at #{STRING_MAX_LENGTH} (#{obj.length})"
|
651
|
-
when Hash === obj
|
652
|
-
new = {}
|
653
|
-
obj.each do |k,v|
|
654
|
-
new[k] = remove_long_items(v)
|
655
|
-
end
|
656
|
-
new
|
657
|
-
when Array === obj
|
658
|
-
obj.collect do |e| remove_long_items(e) end
|
659
|
-
else
|
660
|
-
obj
|
661
|
-
end
|
662
|
-
end
|
663
|
-
|
664
|
-
#def self.remove_long_items(obj)
|
665
|
-
# return fingerprint(obj)
|
666
|
-
#end
|
667
|
-
|
668
155
|
def self.ensembl_server(organism)
|
669
156
|
date = organism.split("/")[1]
|
670
157
|
if date.nil?
|
@@ -674,18 +161,6 @@ module Misc
|
|
674
161
|
end
|
675
162
|
end
|
676
163
|
|
677
|
-
def self.filename?(string)
|
678
|
-
String === string and string.length > 0 and string.length < 250 and File.exists?(string)
|
679
|
-
end
|
680
|
-
|
681
|
-
def self.max(list)
|
682
|
-
max = nil
|
683
|
-
list.each do |v|
|
684
|
-
next if v.nil?
|
685
|
-
max = v if max.nil? or v > max
|
686
|
-
end
|
687
|
-
max
|
688
|
-
end
|
689
164
|
|
690
165
|
def self.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil)
|
691
166
|
name1 ||= "list 1"
|
@@ -710,20 +185,6 @@ module Misc
|
|
710
185
|
url = "https://chart.googleapis.com/chart?cht=v&chs=500x300&chd=t:#{sizes * ","}&chco=FF6342,ADDE63,63C6DE,FFFFFF&chdl=#{label}"
|
711
186
|
end
|
712
187
|
|
713
|
-
def self.sum(list)
|
714
|
-
list.compact.inject(0.0){|acc,e| acc += e}
|
715
|
-
end
|
716
|
-
|
717
|
-
def self.mean(list)
|
718
|
-
sum(list) / list.compact.length
|
719
|
-
end
|
720
|
-
|
721
|
-
def self.sd(list)
|
722
|
-
return nil if list.length < 3
|
723
|
-
mean = mean(list)
|
724
|
-
Math.sqrt(list.compact.inject(0.0){|acc,e| d = e - mean; acc += d * d}) / (list.compact.length - 1)
|
725
|
-
end
|
726
|
-
|
727
188
|
def self.consolidate(list)
|
728
189
|
list.inject(nil){|acc,e|
|
729
190
|
if acc.nil?
|
@@ -795,208 +256,6 @@ end
|
|
795
256
|
proportions
|
796
257
|
end
|
797
258
|
|
798
|
-
IUPAC2BASE = {
|
799
|
-
"A" => ["A"],
|
800
|
-
"C" => ["C"],
|
801
|
-
"G" => ["G"],
|
802
|
-
"T" => ["T"],
|
803
|
-
"U" => ["U"],
|
804
|
-
"R" => "A or G".split(" or "),
|
805
|
-
"Y" => "C or T".split(" or "),
|
806
|
-
"S" => "G or C".split(" or "),
|
807
|
-
"W" => "A or T".split(" or "),
|
808
|
-
"K" => "G or T".split(" or "),
|
809
|
-
"M" => "A or C".split(" or "),
|
810
|
-
"B" => "C or G or T".split(" or "),
|
811
|
-
"D" => "A or G or T".split(" or "),
|
812
|
-
"H" => "A or C or T".split(" or "),
|
813
|
-
"V" => "A or C or G".split(" or "),
|
814
|
-
"N" => %w(A C T G),
|
815
|
-
}
|
816
|
-
|
817
|
-
BASE2COMPLEMENT = {
|
818
|
-
"A" => "T",
|
819
|
-
"C" => "G",
|
820
|
-
"G" => "C",
|
821
|
-
"T" => "A",
|
822
|
-
"U" => "A",
|
823
|
-
}
|
824
|
-
|
825
|
-
THREE_TO_ONE_AA_CODE = {
|
826
|
-
"ala" => "A",
|
827
|
-
"arg" => "R",
|
828
|
-
"asn" => "N",
|
829
|
-
"asp" => "D",
|
830
|
-
"cys" => "C",
|
831
|
-
"glu" => "E",
|
832
|
-
"gln" => "Q",
|
833
|
-
"gly" => "G",
|
834
|
-
"his" => "H",
|
835
|
-
"ile" => "I",
|
836
|
-
"leu" => "L",
|
837
|
-
"lys" => "K",
|
838
|
-
"met" => "M",
|
839
|
-
"phe" => "F",
|
840
|
-
"pro" => "P",
|
841
|
-
"ser" => "S",
|
842
|
-
"thr" => "T",
|
843
|
-
"trp" => "W",
|
844
|
-
"tyr" => "Y",
|
845
|
-
"val" => "V"
|
846
|
-
}
|
847
|
-
CODON_TABLE = {
|
848
|
-
"ATT" => "I",
|
849
|
-
"ATC" => "I",
|
850
|
-
"ATA" => "I",
|
851
|
-
"CTT" => "L",
|
852
|
-
"CTC" => "L",
|
853
|
-
"CTA" => "L",
|
854
|
-
"CTG" => "L",
|
855
|
-
"TTA" => "L",
|
856
|
-
"TTG" => "L",
|
857
|
-
"GTT" => "V",
|
858
|
-
"GTC" => "V",
|
859
|
-
"GTA" => "V",
|
860
|
-
"GTG" => "V",
|
861
|
-
"TTT" => "F",
|
862
|
-
"TTC" => "F",
|
863
|
-
"ATG" => "M",
|
864
|
-
"TGT" => "C",
|
865
|
-
"TGC" => "C",
|
866
|
-
"GCT" => "A",
|
867
|
-
"GCC" => "A",
|
868
|
-
"GCA" => "A",
|
869
|
-
"GCG" => "A",
|
870
|
-
"GGT" => "G",
|
871
|
-
"GGC" => "G",
|
872
|
-
"GGA" => "G",
|
873
|
-
"GGG" => "G",
|
874
|
-
"CCT" => "P",
|
875
|
-
"CCC" => "P",
|
876
|
-
"CCA" => "P",
|
877
|
-
"CCG" => "P",
|
878
|
-
"ACT" => "T",
|
879
|
-
"ACC" => "T",
|
880
|
-
"ACA" => "T",
|
881
|
-
"ACG" => "T",
|
882
|
-
"TCT" => "S",
|
883
|
-
"TCC" => "S",
|
884
|
-
"TCA" => "S",
|
885
|
-
"TCG" => "S",
|
886
|
-
"AGT" => "S",
|
887
|
-
"AGC" => "S",
|
888
|
-
"TAT" => "Y",
|
889
|
-
"TAC" => "Y",
|
890
|
-
"TGG" => "W",
|
891
|
-
"CAA" => "Q",
|
892
|
-
"CAG" => "Q",
|
893
|
-
"AAT" => "N",
|
894
|
-
"AAC" => "N",
|
895
|
-
"CAT" => "H",
|
896
|
-
"CAC" => "H",
|
897
|
-
"GAA" => "E",
|
898
|
-
"GAG" => "E",
|
899
|
-
"GAT" => "D",
|
900
|
-
"GAC" => "D",
|
901
|
-
"AAA" => "K",
|
902
|
-
"AAG" => "K",
|
903
|
-
"CGT" => "R",
|
904
|
-
"CGC" => "R",
|
905
|
-
"CGA" => "R",
|
906
|
-
"CGG" => "R",
|
907
|
-
"AGA" => "R",
|
908
|
-
"AGG" => "R",
|
909
|
-
"TAA" => "*",
|
910
|
-
"TAG" => "*",
|
911
|
-
"TGA" => "*",
|
912
|
-
}
|
913
|
-
|
914
|
-
#def self.fast_align(reference, sequence)
|
915
|
-
#
|
916
|
-
#require 'narray'
|
917
|
-
# init_gap = -1
|
918
|
-
# gap = -2
|
919
|
-
# diff = -2
|
920
|
-
# same = 2
|
921
|
-
|
922
|
-
# cols = sequence.length + 1
|
923
|
-
# rows = reference.length + 1
|
924
|
-
|
925
|
-
# a = NArray.int(cols, rows)
|
926
|
-
|
927
|
-
# for spos in 0..cols-1 do a[spos, 0] = spos * init_gap end
|
928
|
-
# for rpos in 0..rows-1 do a[0, rpos] = rpos * init_gap end
|
929
|
-
|
930
|
-
# spos = 1
|
931
|
-
# while spos < cols do
|
932
|
-
# rpos = 1
|
933
|
-
# while rpos < rows do
|
934
|
-
# match = a[spos-1,rpos-1] + (sequence[spos-1] != reference[rpos-1] ? diff : same)
|
935
|
-
# skip_sequence = a[spos-1,rpos] + gap
|
936
|
-
# skip_reference = a[spos,rpos-1] + gap
|
937
|
-
# a[spos,rpos] = [match, skip_sequence, skip_reference].max
|
938
|
-
# rpos += 1
|
939
|
-
# end
|
940
|
-
# spos += 1
|
941
|
-
# end
|
942
|
-
|
943
|
-
# start = Misc.max(a[-1,0..rows-1])
|
944
|
-
# start_pos = a[-1,0..rows-1].to_a.index start
|
945
|
-
|
946
|
-
# ref = ''
|
947
|
-
# seq = ''
|
948
|
-
# rpos = start_pos
|
949
|
-
# spos = cols - 1
|
950
|
-
|
951
|
-
# while spos > 0 and rpos > 0
|
952
|
-
# score = a[spos,rpos]
|
953
|
-
# score_match = a[spos-1,rpos-1]
|
954
|
-
# score_skip_reference = a[spos,rpos-1]
|
955
|
-
# score_skip_sequence = a[spos-1,rpos]
|
956
|
-
|
957
|
-
# case
|
958
|
-
# when score == score_match + (sequence[spos-1] != reference[rpos-1] ? diff : same)
|
959
|
-
# ref << reference[rpos-1]
|
960
|
-
# seq << sequence[spos-1]
|
961
|
-
# spos -= 1
|
962
|
-
# rpos -= 1
|
963
|
-
# when score == score_skip_reference + gap
|
964
|
-
# ref << reference[rpos-1]
|
965
|
-
# seq << '-'
|
966
|
-
# rpos -= 1
|
967
|
-
# when score == score_skip_sequence + gap
|
968
|
-
# seq << sequence[spos-1]
|
969
|
-
# ref << '-'
|
970
|
-
# spos -= 1
|
971
|
-
# else
|
972
|
-
# raise "stop"
|
973
|
-
# end
|
974
|
-
# end
|
975
|
-
|
976
|
-
# while (rpos > 0)
|
977
|
-
# ref << reference[rpos-1]
|
978
|
-
# seq = seq << '-'
|
979
|
-
# rpos -= 1
|
980
|
-
# end
|
981
|
-
|
982
|
-
# while (spos > 0)
|
983
|
-
# seq << sequence[spos-1]
|
984
|
-
# ref = ref + '-'
|
985
|
-
# spos -= 1
|
986
|
-
# end
|
987
|
-
#
|
988
|
-
# [ref.reverse + reference[start_pos..-1], seq.reverse + '-' * (rows - start_pos - 1)]
|
989
|
-
#end
|
990
|
-
|
991
|
-
def self.IUPAC_to_base(iupac)
|
992
|
-
IUPAC2BASE[iupac]
|
993
|
-
end
|
994
|
-
|
995
|
-
def self.is_filename?(string)
|
996
|
-
return true if string.respond_to? :exists
|
997
|
-
return true if String === string and string.length < 265 and File.exists? string
|
998
|
-
return false
|
999
|
-
end
|
1000
259
|
|
1001
260
|
def self.sorted_array_hits(a1, a2)
|
1002
261
|
e1, e2 = a1.shift, a2.shift
|
@@ -1132,100 +391,6 @@ end
|
|
1132
391
|
end
|
1133
392
|
end
|
1134
393
|
|
1135
|
-
def self.benchmark(repeats = 1, message = nil)
|
1136
|
-
require 'benchmark'
|
1137
|
-
res = nil
|
1138
|
-
begin
|
1139
|
-
measure = Benchmark.measure do
|
1140
|
-
repeats.times do
|
1141
|
-
res = yield
|
1142
|
-
end
|
1143
|
-
end
|
1144
|
-
if message
|
1145
|
-
puts "#{message }: #{ repeats } repeats"
|
1146
|
-
else
|
1147
|
-
puts "Benchmark for #{ repeats } repeats"
|
1148
|
-
end
|
1149
|
-
puts measure
|
1150
|
-
rescue Exception
|
1151
|
-
puts "Benchmark aborted"
|
1152
|
-
raise $!
|
1153
|
-
end
|
1154
|
-
res
|
1155
|
-
end
|
1156
|
-
|
1157
|
-
def self.profile_html(options = {})
|
1158
|
-
require 'ruby-prof'
|
1159
|
-
RubyProf.start
|
1160
|
-
begin
|
1161
|
-
res = yield
|
1162
|
-
rescue Exception
|
1163
|
-
puts "Profiling aborted"
|
1164
|
-
raise $!
|
1165
|
-
ensure
|
1166
|
-
result = RubyProf.stop
|
1167
|
-
printer = RubyProf::MultiPrinter.new(result)
|
1168
|
-
TmpFile.with_file do |dir|
|
1169
|
-
FileUtils.mkdir_p dir unless File.exists? dir
|
1170
|
-
printer.print(:path => dir, :profile => 'profile')
|
1171
|
-
CMD.cmd("firefox -no-remote '#{ dir }'")
|
1172
|
-
end
|
1173
|
-
end
|
1174
|
-
|
1175
|
-
res
|
1176
|
-
end
|
1177
|
-
|
1178
|
-
def self.profile_graph(options = {})
|
1179
|
-
require 'ruby-prof'
|
1180
|
-
RubyProf.start
|
1181
|
-
begin
|
1182
|
-
res = yield
|
1183
|
-
rescue Exception
|
1184
|
-
puts "Profiling aborted"
|
1185
|
-
raise $!
|
1186
|
-
ensure
|
1187
|
-
result = RubyProf.stop
|
1188
|
-
#result.eliminate_methods!([/annotated_array_clean_/])
|
1189
|
-
printer = RubyProf::GraphPrinter.new(result)
|
1190
|
-
printer.print(STDOUT, options)
|
1191
|
-
end
|
1192
|
-
|
1193
|
-
res
|
1194
|
-
end
|
1195
|
-
|
1196
|
-
def self.profile(options = {})
|
1197
|
-
require 'ruby-prof'
|
1198
|
-
RubyProf.start
|
1199
|
-
begin
|
1200
|
-
res = yield
|
1201
|
-
rescue Exception
|
1202
|
-
puts "Profiling aborted"
|
1203
|
-
raise $!
|
1204
|
-
ensure
|
1205
|
-
result = RubyProf.stop
|
1206
|
-
printer = RubyProf::FlatPrinter.new(result)
|
1207
|
-
printer.print(STDOUT, options)
|
1208
|
-
end
|
1209
|
-
|
1210
|
-
res
|
1211
|
-
end
|
1212
|
-
|
1213
|
-
def self.memprof
|
1214
|
-
require 'memprof'
|
1215
|
-
Memprof.start
|
1216
|
-
begin
|
1217
|
-
res = yield
|
1218
|
-
rescue Exception
|
1219
|
-
puts "Profiling aborted"
|
1220
|
-
raise $!
|
1221
|
-
ensure
|
1222
|
-
Memprof.stop
|
1223
|
-
print Memprof.stats
|
1224
|
-
end
|
1225
|
-
|
1226
|
-
res
|
1227
|
-
end
|
1228
|
-
|
1229
394
|
def self.do_once(&block)
|
1230
395
|
return nil if $__did_once
|
1231
396
|
$__did_once = true
|
@@ -1334,22 +499,6 @@ end
|
|
1334
499
|
html
|
1335
500
|
end
|
1336
501
|
|
1337
|
-
#def self.path_relative_to(basedir, path)
|
1338
|
-
# path = File.expand_path(path) unless path[0] == "/"
|
1339
|
-
# basedir = File.expand_path(basedir) unless basedir[0] == "/"
|
1340
|
-
|
1341
|
-
# basedir << "/" unless basedir[-1] == "/"
|
1342
|
-
# case
|
1343
|
-
# when path == basedir
|
1344
|
-
# "."
|
1345
|
-
# #when path =~ /#{Regexp.quote basedir}\/(.*)/
|
1346
|
-
# when path.index(basedir) == 0
|
1347
|
-
# return path[basedir.length..-1]
|
1348
|
-
# else
|
1349
|
-
# return nil
|
1350
|
-
# end
|
1351
|
-
#end
|
1352
|
-
|
1353
502
|
def self.path_relative_to(basedir, path)
|
1354
503
|
path = File.expand_path(path) unless path[0] == "/"
|
1355
504
|
basedir = File.expand_path(basedir) unless basedir[0] == "/"
|
@@ -1369,88 +518,6 @@ end
|
|
1369
518
|
@hostanem ||= `hostname`.strip
|
1370
519
|
end
|
1371
520
|
|
1372
|
-
LOCK_MUTEX = Mutex.new
|
1373
|
-
def self.lock(file, unlock = true)
|
1374
|
-
return yield if file.nil?
|
1375
|
-
FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
|
1376
|
-
|
1377
|
-
res = nil
|
1378
|
-
|
1379
|
-
lock_path = File.expand_path(file + '.lock')
|
1380
|
-
lockfile = Lockfile.new(lock_path)
|
1381
|
-
|
1382
|
-
hostname = Misc.hostname
|
1383
|
-
LOCK_MUTEX.synchronize do
|
1384
|
-
Misc.insist 2, 0.1 do
|
1385
|
-
Misc.insist 3, 0.1 do
|
1386
|
-
begin
|
1387
|
-
if File.exists? lock_path
|
1388
|
-
info = Open.open(lock_path){|f| YAML.load(f) }
|
1389
|
-
raise "No info" unless info
|
1390
|
-
|
1391
|
-
if hostname == info["host"] and not Misc.pid_exists?(info["pid"])
|
1392
|
-
Log.info("Removing lockfile: #{lock_path}. This pid #{Process.pid}. Content: #{info.inspect}")
|
1393
|
-
FileUtils.rm lock_path
|
1394
|
-
end
|
1395
|
-
end
|
1396
|
-
rescue Exception
|
1397
|
-
FileUtils.rm lock_path if File.exists? lock_path
|
1398
|
-
lockfile = Lockfile.new(lock_path) unless File.exists? lock_path
|
1399
|
-
raise $!
|
1400
|
-
end
|
1401
|
-
end
|
1402
|
-
end
|
1403
|
-
end
|
1404
|
-
|
1405
|
-
begin
|
1406
|
-
lockfile.lock
|
1407
|
-
res = yield lockfile
|
1408
|
-
rescue Lockfile::StolenLockError
|
1409
|
-
unlock = false
|
1410
|
-
rescue KeepLocked
|
1411
|
-
unlock = false
|
1412
|
-
res = $!.payload
|
1413
|
-
ensure
|
1414
|
-
if unlock and lockfile.locked?
|
1415
|
-
lockfile.unlock
|
1416
|
-
end
|
1417
|
-
end
|
1418
|
-
|
1419
|
-
res
|
1420
|
-
end
|
1421
|
-
|
1422
|
-
|
1423
|
-
LOCK_REPO_SERIALIZER=Marshal
|
1424
|
-
def self.lock_in_repo(repo, key, *args)
|
1425
|
-
return yield file, *args if repo.nil? or key.nil?
|
1426
|
-
|
1427
|
-
lock_key = "lock-" << key
|
1428
|
-
|
1429
|
-
begin
|
1430
|
-
if repo[lock_key] and
|
1431
|
-
Misc.hostname == (info = LOCK_REPO_SERIALIZER.load(repo[lock_key]))["host"] and
|
1432
|
-
info["pid"] and not Misc.pid_exists?(info["pid"])
|
1433
|
-
|
1434
|
-
Log.info("Removing lockfile: #{lock_key}. This pid #{Process.pid}. Content: #{info.inspect}")
|
1435
|
-
repo.out lock_key
|
1436
|
-
end
|
1437
|
-
rescue
|
1438
|
-
Log.warn("Error checking lockfile #{lock_key}: #{$!.message}. Removing. Content: #{begin repo[lock_key] rescue "Could not open file" end}")
|
1439
|
-
repo.out lock_key if repo.include? lock_key
|
1440
|
-
end
|
1441
|
-
|
1442
|
-
while repo[lock_key]
|
1443
|
-
sleep 1
|
1444
|
-
end
|
1445
|
-
|
1446
|
-
repo[lock_key] = LOCK_REPO_SERIALIZER.dump({:hostname => Misc.hostname, :pid => Process.pid})
|
1447
|
-
|
1448
|
-
res = yield lock_key, *args
|
1449
|
-
|
1450
|
-
repo.delete lock_key
|
1451
|
-
|
1452
|
-
res
|
1453
|
-
end
|
1454
521
|
|
1455
522
|
def self.common_path(dir, file)
|
1456
523
|
file = File.expand_path file
|
@@ -1479,32 +546,6 @@ end
|
|
1479
546
|
res
|
1480
547
|
end
|
1481
548
|
|
1482
|
-
def self.to_utf8(string)
|
1483
|
-
string.encode("UTF-16BE", :invalid => :replace, :undef => :replace, :replace => "?").encode('UTF-8')
|
1484
|
-
end
|
1485
|
-
|
1486
|
-
def self.fixutf8(string)
|
1487
|
-
return nil if string.nil?
|
1488
|
-
return string if (string.respond_to? :valid_encoding? and string.valid_encoding?) or
|
1489
|
-
(string.respond_to? :valid_encoding and string.valid_encoding)
|
1490
|
-
|
1491
|
-
if string.respond_to?(:encode)
|
1492
|
-
string.encode("UTF-16BE", :invalid => :replace, :undef => :replace, :replace => "?").encode('UTF-8')
|
1493
|
-
else
|
1494
|
-
require 'iconv'
|
1495
|
-
@@ic ||= Iconv.new('UTF-8//IGNORE', 'UTF-8')
|
1496
|
-
@@ic.iconv(string)
|
1497
|
-
end
|
1498
|
-
end
|
1499
|
-
|
1500
|
-
def self.fixascii(string)
|
1501
|
-
if string.respond_to?(:encode)
|
1502
|
-
self.fixutf8(string).encode("ASCII-8BIT")
|
1503
|
-
else
|
1504
|
-
string
|
1505
|
-
end
|
1506
|
-
end
|
1507
|
-
|
1508
549
|
def self.sensiblewrite(path, content = nil, &block)
|
1509
550
|
return if File.exists? path
|
1510
551
|
tmp_path = path + '.sensible_write'
|
@@ -1556,69 +597,6 @@ end
|
|
1556
597
|
new_options
|
1557
598
|
end
|
1558
599
|
|
1559
|
-
def self.digest(text)
|
1560
|
-
Digest::MD5.hexdigest(text)
|
1561
|
-
end
|
1562
|
-
|
1563
|
-
HASH2MD5_MAX_STRING_LENGTH = 1000
|
1564
|
-
HASH2MD5_MAX_ARRAY_LENGTH = 100
|
1565
|
-
def self.hash2md5(hash)
|
1566
|
-
str = ""
|
1567
|
-
keys = hash.keys
|
1568
|
-
keys = keys.clean_annotations if keys.respond_to? :clean_annotations
|
1569
|
-
keys = keys.sort_by{|k| k.to_s}
|
1570
|
-
|
1571
|
-
if hash.respond_to? :unnamed
|
1572
|
-
unnamed = hash.unnamed
|
1573
|
-
hash.unnamed = true
|
1574
|
-
end
|
1575
|
-
keys.each do |k|
|
1576
|
-
next if k == :monitor or k == "monitor" or k == :in_situ_persistence or k == "in_situ_persistence"
|
1577
|
-
v = hash[k]
|
1578
|
-
case
|
1579
|
-
when TrueClass === v
|
1580
|
-
str << k.to_s << "=>true"
|
1581
|
-
when FalseClass === v
|
1582
|
-
str << k.to_s << "=>false"
|
1583
|
-
when Hash === v
|
1584
|
-
str << k.to_s << "=>" << hash2md5(v)
|
1585
|
-
when Symbol === v
|
1586
|
-
str << k.to_s << "=>" << v.to_s
|
1587
|
-
when (String === v and v.length > HASH2MD5_MAX_STRING_LENGTH)
|
1588
|
-
str << k.to_s << "=>" << v[0..HASH2MD5_MAX_STRING_LENGTH] << "; #{ v.length }"
|
1589
|
-
when String === v
|
1590
|
-
str << k.to_s << "=>" << v
|
1591
|
-
when (Array === v and v.length > HASH2MD5_MAX_ARRAY_LENGTH)
|
1592
|
-
str << k.to_s << "=>[" << v[0..HASH2MD5_MAX_ARRAY_LENGTH] * "," << "; #{ v.length }]"
|
1593
|
-
when TSV::Parser === v
|
1594
|
-
str << remove_long_items(v)
|
1595
|
-
when Array === v
|
1596
|
-
str << k.to_s << "=>[" << v * "," << "]"
|
1597
|
-
when File === v
|
1598
|
-
str << k.to_s << "=>[File:" << v.path << "]"
|
1599
|
-
else
|
1600
|
-
v_ins = v.inspect
|
1601
|
-
|
1602
|
-
case
|
1603
|
-
when v_ins =~ /:0x0/
|
1604
|
-
str << k.to_s << "=>" << v_ins.sub(/:0x[a-f0-9]+@/,'')
|
1605
|
-
else
|
1606
|
-
str << k.to_s << "=>" << v_ins
|
1607
|
-
end
|
1608
|
-
|
1609
|
-
end
|
1610
|
-
|
1611
|
-
str << "_" << hash2md5(v.info) if defined? Annotated and Annotated === v
|
1612
|
-
end
|
1613
|
-
hash.unnamed = unnamed if hash.respond_to? :unnamed
|
1614
|
-
|
1615
|
-
if str.empty?
|
1616
|
-
""
|
1617
|
-
else
|
1618
|
-
digest(str)
|
1619
|
-
end
|
1620
|
-
end
|
1621
|
-
|
1622
600
|
def self.process_options(hash, *keys)
|
1623
601
|
if keys.length == 1
|
1624
602
|
hash.include?(keys.first.to_sym) ? hash.delete(keys.first.to_sym) : hash.delete(keys.first.to_s)
|
@@ -1815,7 +793,6 @@ end
|
|
1815
793
|
chunks
|
1816
794
|
end
|
1817
795
|
|
1818
|
-
|
1819
796
|
def self.append_zipped(current, new)
|
1820
797
|
current.each do |v|
|
1821
798
|
n = new.shift
|
@@ -1832,145 +809,6 @@ end
|
|
1832
809
|
return [] if array.empty? or (first = array.first).nil?
|
1833
810
|
first.zip(*array[1..-1])
|
1834
811
|
end
|
1835
|
-
|
1836
|
-
def self.camel_case(string)
|
1837
|
-
return string if string !~ /_/ && string =~ /[A-Z]+.*/
|
1838
|
-
string.split(/_|(\d+)/).map{|e|
|
1839
|
-
(e =~ /^[A-Z]{2,}$/ ? e : e.capitalize)
|
1840
|
-
}.join
|
1841
|
-
end
|
1842
|
-
|
1843
|
-
def self.camel_case_lower(string)
|
1844
|
-
string.split('_').inject([]){ |buffer,e|
|
1845
|
-
buffer.push(buffer.empty? ? e.downcase : (e =~ /^[A-Z]{2,}$/ ? e : e.capitalize))
|
1846
|
-
}.join
|
1847
|
-
end
|
1848
|
-
|
1849
|
-
def self.snake_case(string)
|
1850
|
-
return nil if string.nil?
|
1851
|
-
string = string.to_s if Symbol === string
|
1852
|
-
string.
|
1853
|
-
gsub(/([A-Z]{2,})([A-Z][a-z])/,'\1_\2').
|
1854
|
-
gsub(/([a-z])([A-Z])/,'\1_\2').
|
1855
|
-
gsub(/\s/,'_').gsub(/[^\w_]/, '').
|
1856
|
-
split("_").collect{|p| p.match(/[A-Z]{2,}/) ? p : p.downcase } * "_"
|
1857
|
-
end
|
1858
|
-
|
1859
|
-
# source: https://gist.github.com/ekdevdes/2450285
|
1860
|
-
# author: Ethan Kramer (https://github.com/ekdevdes)
|
1861
|
-
def self.humanize(value, options = {})
|
1862
|
-
if options.empty?
|
1863
|
-
options[:format] = :sentence
|
1864
|
-
end
|
1865
|
-
|
1866
|
-
values = []
|
1867
|
-
values = value.split('_')
|
1868
|
-
values.each_index do |index|
|
1869
|
-
# lower case each item in array
|
1870
|
-
# Miguel Vazquez edit: Except for acronyms
|
1871
|
-
values[index].downcase! unless values[index].match(/[a-zA-Z][A-Z]/)
|
1872
|
-
end
|
1873
|
-
if options[:format] == :allcaps
|
1874
|
-
values.each do |value|
|
1875
|
-
value.capitalize!
|
1876
|
-
end
|
1877
|
-
|
1878
|
-
if options.empty?
|
1879
|
-
options[:seperator] = " "
|
1880
|
-
end
|
1881
|
-
|
1882
|
-
return values.join " "
|
1883
|
-
end
|
1884
|
-
|
1885
|
-
if options[:format] == :class
|
1886
|
-
values.each do |value|
|
1887
|
-
value.capitalize!
|
1888
|
-
end
|
1889
|
-
|
1890
|
-
return values.join ""
|
1891
|
-
end
|
1892
|
-
|
1893
|
-
if options[:format] == :sentence
|
1894
|
-
values[0].capitalize! unless values[0].match(/[a-zA-Z][A-Z]/)
|
1895
|
-
|
1896
|
-
return values.join " "
|
1897
|
-
end
|
1898
|
-
|
1899
|
-
if options[:format] == :nocaps
|
1900
|
-
return values.join " "
|
1901
|
-
end
|
1902
|
-
end
|
1903
|
-
end
|
1904
|
-
|
1905
|
-
#TODO: REMOVE
|
1906
|
-
#class RBBTError < StandardError
|
1907
|
-
# attr_accessor :info
|
1908
|
-
#
|
1909
|
-
# alias old_to_s to_s
|
1910
|
-
# def to_s
|
1911
|
-
# str = old_to_s.dup
|
1912
|
-
# if info
|
1913
|
-
# str << "\n" << "Additional Info:\n---\n" << info << "---"
|
1914
|
-
# end
|
1915
|
-
# str
|
1916
|
-
# end
|
1917
|
-
#end
|
1918
|
-
|
1919
|
-
module IndiferentHash
|
1920
|
-
|
1921
|
-
def self.setup(hash)
|
1922
|
-
hash.extend IndiferentHash
|
1923
|
-
end
|
1924
|
-
|
1925
|
-
def merge(other)
|
1926
|
-
new = self.dup
|
1927
|
-
IndiferentHash.setup(new)
|
1928
|
-
other.each do |k,value|
|
1929
|
-
new.delete k
|
1930
|
-
new[k] = value
|
1931
|
-
end
|
1932
|
-
new
|
1933
|
-
end
|
1934
|
-
|
1935
|
-
def [](key)
|
1936
|
-
res = super(key)
|
1937
|
-
return res unless res.nil?
|
1938
|
-
|
1939
|
-
case key
|
1940
|
-
when Symbol, Module
|
1941
|
-
super(key.to_s)
|
1942
|
-
when String
|
1943
|
-
super(key.to_sym)
|
1944
|
-
else
|
1945
|
-
super(key)
|
1946
|
-
end
|
1947
|
-
end
|
1948
|
-
|
1949
|
-
def values_at(*key_list)
|
1950
|
-
key_list.inject([]){|acc,key| acc << self[key]}
|
1951
|
-
end
|
1952
|
-
|
1953
|
-
def include?(key)
|
1954
|
-
case key
|
1955
|
-
when Symbol, Module
|
1956
|
-
super(key) || super(key.to_s)
|
1957
|
-
when String
|
1958
|
-
super(key) || super(key.to_sym)
|
1959
|
-
else
|
1960
|
-
super(key)
|
1961
|
-
end
|
1962
|
-
end
|
1963
|
-
|
1964
|
-
def delete(key)
|
1965
|
-
case key
|
1966
|
-
when Symbol, Module
|
1967
|
-
super(key) || super(key.to_s)
|
1968
|
-
when String
|
1969
|
-
super(key) || super(key.to_sym)
|
1970
|
-
else
|
1971
|
-
super(key)
|
1972
|
-
end
|
1973
|
-
end
|
1974
812
|
end
|
1975
813
|
|
1976
814
|
module PDF2Text
|