rbbt-util 5.11.4 → 5.11.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/tsv/parallel/traverse.rb +2 -1
- data/lib/rbbt/util/cmd.rb +1 -1
- data/lib/rbbt/util/misc.rb +11 -1173
- data/lib/rbbt/util/misc/concurrent_stream.rb +69 -0
- data/lib/rbbt/util/misc/development.rb +95 -0
- data/lib/rbbt/util/misc/exceptions.rb +11 -0
- data/lib/rbbt/util/misc/format.rb +170 -0
- data/lib/rbbt/util/misc/indiferent_hash.rb +56 -0
- data/lib/rbbt/util/misc/inspect.rb +181 -0
- data/lib/rbbt/util/misc/lock.rb +87 -0
- data/lib/rbbt/util/misc/math.rb +32 -0
- data/lib/rbbt/util/misc/objects.rb +0 -0
- data/lib/rbbt/util/misc/omics.rb +183 -0
- data/lib/rbbt/util/misc/pipes.rb +224 -0
- data/lib/rbbt/workflow/accessor.rb +1 -0
- data/lib/rbbt/workflow/step.rb +15 -9
- data/share/rbbt_commands/workflow/task +2 -0
- metadata +13 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6ec4bc56a7d0d260d93be40ee61d23c08f58d5a8
|
4
|
+
data.tar.gz: 6f51a747302e936ed1e5953e929e5120fa5caa89
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: db5a97b2787f182444de536059beb027d038573c0c3ac3196ec44ce18d2561fa3204a38229d3054e5c92594309fc4a488cc6e9116e370c4f2d383b28a8ed9e71
|
7
|
+
data.tar.gz: 8a4fc523ae9266ceea9964f26f85834a8f5c0e0899a523bb938e2e813da191651215ec356432ee7b467f39b470328b32dda911d1b2cd9bd757fa467ad14d54ce
|
data/lib/rbbt/util/cmd.rb
CHANGED
data/lib/rbbt/util/misc.rb
CHANGED
@@ -5,6 +5,17 @@ require 'cgi'
|
|
5
5
|
require 'zlib'
|
6
6
|
require 'rubygems/package'
|
7
7
|
require 'rbbt/util/tar'
|
8
|
+
require 'rbbt/util/misc/exceptions'
|
9
|
+
require 'rbbt/util/misc/concurrent_stream'
|
10
|
+
require 'rbbt/util/misc/indiferent_hash'
|
11
|
+
require 'rbbt/util/misc/pipes'
|
12
|
+
require 'rbbt/util/misc/format'
|
13
|
+
require 'rbbt/util/misc/omics'
|
14
|
+
require 'rbbt/util/misc/inspect'
|
15
|
+
require 'rbbt/util/misc/math'
|
16
|
+
require 'rbbt/util/misc/development'
|
17
|
+
require 'rbbt/util/misc/lock'
|
18
|
+
|
8
19
|
|
9
20
|
class Hash
|
10
21
|
def chunked_values_at(keys, max = 5000)
|
@@ -16,372 +27,14 @@ class Hash
|
|
16
27
|
end
|
17
28
|
end
|
18
29
|
|
19
|
-
class ParameterException < Exception; end
|
20
|
-
class FieldNotFoundError < Exception;end
|
21
|
-
class Aborted < Exception; end
|
22
|
-
class TryAgain < Exception; end
|
23
|
-
class ClosedStream < Exception; end
|
24
|
-
class KeepLocked < Exception
|
25
|
-
attr_accessor :payload
|
26
|
-
def initialize(payload)
|
27
|
-
@payload = payload
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
30
|
module LaterString
|
32
31
|
def to_s
|
33
32
|
yield
|
34
33
|
end
|
35
34
|
end
|
36
35
|
|
37
|
-
module ConcurrentStream
|
38
|
-
attr_accessor :threads, :pids, :callback, :abort_callback, :filename, :joined
|
39
|
-
|
40
|
-
def joined?
|
41
|
-
@joined
|
42
|
-
end
|
43
|
-
|
44
|
-
def join
|
45
|
-
|
46
|
-
if @threads and @threads.any?
|
47
|
-
@threads.each do |t|
|
48
|
-
t.join
|
49
|
-
end
|
50
|
-
@threads = []
|
51
|
-
end
|
52
|
-
|
53
|
-
if @pids and @pids.any?
|
54
|
-
@pids.each do |pid|
|
55
|
-
begin
|
56
|
-
Process.waitpid(pid, Process::WUNTRACED)
|
57
|
-
raise "Error joining process #{pid} in #{self.inspect}" unless $?.success?
|
58
|
-
rescue Errno::ECHILD
|
59
|
-
end
|
60
|
-
end
|
61
|
-
@pids = []
|
62
|
-
end
|
63
|
-
|
64
|
-
if @callback and not joined?
|
65
|
-
@callback.call
|
66
|
-
@callback = nil
|
67
|
-
end
|
68
|
-
|
69
|
-
@joined = true
|
70
|
-
end
|
71
|
-
|
72
|
-
def abort
|
73
|
-
@threads.each{|t| t.raise Aborted.new } if @threads
|
74
|
-
@threads.each{|t| t.join } if @threads
|
75
|
-
@pids.each{|pid| Process.kill :INT, pid } if @pids
|
76
|
-
@pids.each{|pid| Process.waitpid pid } if @pids
|
77
|
-
@abort_callback.call if @abort_callback
|
78
|
-
@abort_callback = nil
|
79
|
-
end
|
80
|
-
|
81
|
-
def self.setup(stream, options = {}, &block)
|
82
|
-
threads, pids, callback, filename = Misc.process_options options, :threads, :pids, :callback, :filename
|
83
|
-
stream.extend ConcurrentStream unless ConcurrentStream === stream
|
84
|
-
|
85
|
-
stream.threads ||= []
|
86
|
-
stream.pids ||= []
|
87
|
-
stream.threads.concat(Array === threads ? threads : [threads]) unless threads.nil?
|
88
|
-
stream.pids.concat(Array === pids ? pids : [pids]) unless pids.nil? or pids.empty?
|
89
|
-
|
90
|
-
callback = block if block_given?
|
91
|
-
if stream.callback and callback
|
92
|
-
old_callback = stream.callback
|
93
|
-
stream.callback = Proc.new do
|
94
|
-
old_callback.call
|
95
|
-
callback.call
|
96
|
-
end
|
97
|
-
else
|
98
|
-
stream.callback = callback
|
99
|
-
end
|
100
|
-
|
101
|
-
stream.filename = filename unless filename.nil?
|
102
|
-
|
103
|
-
stream
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
|
108
|
-
Lockfile.refresh = false if ENV["RBBT_NO_LOCKFILE_REFRESH"] == "true"
|
109
36
|
module Misc
|
110
37
|
|
111
|
-
|
112
|
-
PIPE_MUTEX = Mutex.new
|
113
|
-
|
114
|
-
OPEN_PIPE_IN = []
|
115
|
-
def self.pipe
|
116
|
-
OPEN_PIPE_IN.delete_if{|pipe| pipe.closed? }
|
117
|
-
PIPE_MUTEX.synchronize do
|
118
|
-
sout, sin = IO.pipe
|
119
|
-
OPEN_PIPE_IN << sin
|
120
|
-
|
121
|
-
[sout, sin]
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
def self.release_pipes(*pipes)
|
126
|
-
PIPE_MUTEX.synchronize do
|
127
|
-
pipes.flatten.each do |pipe|
|
128
|
-
pipe.close unless pipe.closed?
|
129
|
-
end
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
|
134
|
-
def self.purge_pipes(*save)
|
135
|
-
PIPE_MUTEX.synchronize do
|
136
|
-
OPEN_PIPE_IN.each do |pipe|
|
137
|
-
next if save.include? pipe
|
138
|
-
pipe.close unless pipe.closed?
|
139
|
-
end
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
def self.open_pipe(do_fork = false, close = true)
|
144
|
-
raise "No block given" unless block_given?
|
145
|
-
|
146
|
-
sout, sin = Misc.pipe
|
147
|
-
|
148
|
-
if do_fork
|
149
|
-
parent_pid = Process.pid
|
150
|
-
pid = Process.fork {
|
151
|
-
purge_pipes(sin)
|
152
|
-
sout.close
|
153
|
-
begin
|
154
|
-
yield sin
|
155
|
-
rescue
|
156
|
-
Log.exception $!
|
157
|
-
Process.kill :INT, parent_pid
|
158
|
-
Kernel.exit! -1
|
159
|
-
ensure
|
160
|
-
sin.close if close and not sin.closed?
|
161
|
-
end
|
162
|
-
Kernel.exit! 0
|
163
|
-
}
|
164
|
-
sin.close #if close
|
165
|
-
ConcurrentStream.setup sout, :pids => [pid]
|
166
|
-
else
|
167
|
-
thread = Thread.new(Thread.current) do |parent|
|
168
|
-
begin
|
169
|
-
yield sin
|
170
|
-
rescue
|
171
|
-
parent.raise $!
|
172
|
-
ensure
|
173
|
-
sin.close if close and not sin.closed?
|
174
|
-
end
|
175
|
-
end
|
176
|
-
ConcurrentStream.setup sout, :threads => [thread]
|
177
|
-
end
|
178
|
-
sout
|
179
|
-
end
|
180
|
-
|
181
|
-
def self.tee_stream_fork(stream)
|
182
|
-
stream_out1, stream_in1 = Misc.pipe
|
183
|
-
stream_out2, stream_in2 = Misc.pipe
|
184
|
-
|
185
|
-
splitter_pid = Process.fork do
|
186
|
-
Misc.purge_pipes(stream_in1, stream_in2)
|
187
|
-
stream_out1.close
|
188
|
-
stream_out2.close
|
189
|
-
begin
|
190
|
-
filename = stream.respond_to?(:filename)? stream.filename : nil
|
191
|
-
skip1 = skip2 = false
|
192
|
-
while block = stream.read(2048)
|
193
|
-
begin stream_in1.write block; rescue Exception; Log.exception $!; skip1 = true end unless skip1
|
194
|
-
begin stream_in2.write block; rescue Exception; Log.exception $!; skip2 = true end unless skip2
|
195
|
-
end
|
196
|
-
raise "Error writing in stream_in2" if skip2
|
197
|
-
raise "Error writing in stream_in2" if skip2
|
198
|
-
rescue Aborted
|
199
|
-
stream.abort if stream.respond_to? :abort
|
200
|
-
raise $!
|
201
|
-
rescue IOError
|
202
|
-
Log.exception $!
|
203
|
-
rescue Exception
|
204
|
-
Log.exception $!
|
205
|
-
ensure
|
206
|
-
stream_in1.close
|
207
|
-
stream_in2.close
|
208
|
-
stream.join if stream.respond_to? :join
|
209
|
-
end
|
210
|
-
end
|
211
|
-
stream.close
|
212
|
-
stream_in1.close
|
213
|
-
stream_in2.close
|
214
|
-
#stream.join if stream.respond_to? :join
|
215
|
-
|
216
|
-
ConcurrentStream.setup stream_out1, :pids => [splitter_pid]
|
217
|
-
ConcurrentStream.setup stream_out2, :pids => [splitter_pid]
|
218
|
-
|
219
|
-
[stream_out1, stream_out2]
|
220
|
-
end
|
221
|
-
|
222
|
-
def self.tee_stream_thread(stream)
|
223
|
-
stream_out1, stream_in1 = Misc.pipe
|
224
|
-
stream_out2, stream_in2 = Misc.pipe
|
225
|
-
|
226
|
-
splitter_thread = Thread.new(Thread.current, stream_in1, stream_in2) do |parent,stream_in1,stream_in2|
|
227
|
-
begin
|
228
|
-
filename = stream.respond_to?(:filename)? stream.filename : nil
|
229
|
-
skip1 = skip2 = false
|
230
|
-
while block = stream.read(2048)
|
231
|
-
begin stream_in1.write block; rescue Exception; Aborted === $! ? raise($!): Log.exception($!); skip1 = true end unless skip1
|
232
|
-
begin stream_in2.write block; rescue Exception; Aborted === $! ? raise($!): Log.exception($!); skip2 = true end unless skip2
|
233
|
-
end
|
234
|
-
rescue Aborted
|
235
|
-
stream.abort if stream.respond_to? :abort
|
236
|
-
raise $!
|
237
|
-
rescue IOError
|
238
|
-
Log.exception $!
|
239
|
-
rescue Exception
|
240
|
-
Log.exception $!
|
241
|
-
parent.raise $!
|
242
|
-
ensure
|
243
|
-
stream_in1.close
|
244
|
-
stream_in2.close
|
245
|
-
stream.join if stream.respond_to? :join
|
246
|
-
end
|
247
|
-
end
|
248
|
-
|
249
|
-
ConcurrentStream.setup stream_out1, :threads => splitter_thread
|
250
|
-
ConcurrentStream.setup stream_out2, :threads => splitter_thread
|
251
|
-
|
252
|
-
[stream_out1, stream_out2]
|
253
|
-
end
|
254
|
-
|
255
|
-
class << self
|
256
|
-
alias tee_stream tee_stream_thread
|
257
|
-
end
|
258
|
-
|
259
|
-
def self.read_full_stream(io)
|
260
|
-
str = ""
|
261
|
-
begin
|
262
|
-
while block = io.read(2048)
|
263
|
-
str << block
|
264
|
-
end
|
265
|
-
rescue
|
266
|
-
io.abort if io.respond_to? :abort
|
267
|
-
ensure
|
268
|
-
io.join if io.respond_to? :join
|
269
|
-
io.close if io.respond_to? :close
|
270
|
-
end
|
271
|
-
str
|
272
|
-
end
|
273
|
-
|
274
|
-
def self.consume_stream(io)
|
275
|
-
begin
|
276
|
-
Thread.pass while block = io.read(2048)
|
277
|
-
rescue
|
278
|
-
io.abort if io.respond_to? :abort
|
279
|
-
ensure
|
280
|
-
io.join if io.respond_to? :join
|
281
|
-
io.close if io.respond_to? :close
|
282
|
-
end
|
283
|
-
end
|
284
|
-
|
285
|
-
def self.format_paragraph(text, size = 80, indent = 0, offset = 0)
|
286
|
-
i = 0
|
287
|
-
re = /((?:\n\s*\n\s*)|(?:\n\s*(?=\*)))/
|
288
|
-
text.split(re).collect do |paragraph|
|
289
|
-
i += 1
|
290
|
-
str = if i % 2 == 1
|
291
|
-
words = paragraph.gsub(/\s+/, "\s").split(" ")
|
292
|
-
lines = []
|
293
|
-
line = " "*offset
|
294
|
-
word = words.shift
|
295
|
-
while word
|
296
|
-
word = word[0..size-indent-offset-4] + '...' if word.length >= size - indent - offset
|
297
|
-
while word and Log.uncolor(line).length + Log.uncolor(word).length <= size - indent
|
298
|
-
line << word << " "
|
299
|
-
word = words.shift
|
300
|
-
end
|
301
|
-
lines << ((" " * indent) << line[0..-2])
|
302
|
-
line = ""
|
303
|
-
end
|
304
|
-
(lines * "\n")
|
305
|
-
else
|
306
|
-
paragraph
|
307
|
-
end
|
308
|
-
offset = 0
|
309
|
-
str
|
310
|
-
end*""
|
311
|
-
end
|
312
|
-
|
313
|
-
def self.format_definition_list_item(dt, dd, size = 80, indent = 20, color = :yellow)
|
314
|
-
dd = "" if dd.nil?
|
315
|
-
dt = dt.to_s + ":" unless dd.empty?
|
316
|
-
dt = Log.color color, dt if color
|
317
|
-
len = Log.uncolor(dt).length
|
318
|
-
|
319
|
-
if indent < 0
|
320
|
-
text = format_paragraph(dd, size, indent.abs+1, 0)
|
321
|
-
text = dt << "\n" << text
|
322
|
-
else
|
323
|
-
offset = len - indent
|
324
|
-
offset = 0 if offset < 0
|
325
|
-
text = format_paragraph(dd, size, indent.abs+1, offset)
|
326
|
-
text[0..len-1] = dt
|
327
|
-
end
|
328
|
-
text
|
329
|
-
end
|
330
|
-
|
331
|
-
def self.format_definition_list(defs, size = 80, indent = 20, color = :yellow)
|
332
|
-
entries = []
|
333
|
-
defs.each do |dt,dd|
|
334
|
-
text = format_definition_list_item(dt,dd,size,indent,color)
|
335
|
-
entries << text
|
336
|
-
end
|
337
|
-
entries * "\n\n"
|
338
|
-
end
|
339
|
-
|
340
|
-
def self.read_stream(stream, size)
|
341
|
-
str = nil
|
342
|
-
Thread.pass while IO.select([stream],nil,nil,1).nil?
|
343
|
-
while not str = stream.read(size)
|
344
|
-
IO.select([stream],nil,nil,1)
|
345
|
-
Thread.pass
|
346
|
-
raise ClosedStream if stream.eof?
|
347
|
-
end
|
348
|
-
|
349
|
-
while str.length < size
|
350
|
-
raise ClosedStream if stream.eof?
|
351
|
-
IO.select([stream],nil,nil,1)
|
352
|
-
if new = stream.read(size-str.length)
|
353
|
-
str << new
|
354
|
-
end
|
355
|
-
end
|
356
|
-
str
|
357
|
-
end
|
358
|
-
|
359
|
-
def self.read_stream(stream, size)
|
360
|
-
str = nil
|
361
|
-
Thread.pass while IO.select([stream],nil,nil,1).nil?
|
362
|
-
while not str = stream.read(size)
|
363
|
-
IO.select([stream],nil,nil,1)
|
364
|
-
Thread.pass
|
365
|
-
raise ClosedStream if stream.eof?
|
366
|
-
end
|
367
|
-
|
368
|
-
while str.length < size
|
369
|
-
raise ClosedStream if stream.eof?
|
370
|
-
IO.select([stream],nil,nil,1)
|
371
|
-
if new = stream.read(size-str.length)
|
372
|
-
str << new
|
373
|
-
end
|
374
|
-
end
|
375
|
-
str
|
376
|
-
end
|
377
|
-
def self._read_stream(stream, size)
|
378
|
-
str = ""
|
379
|
-
while (len=str.length) < size
|
380
|
-
str << (stream.read(size-len) or break)
|
381
|
-
end
|
382
|
-
str
|
383
|
-
end
|
384
|
-
|
385
38
|
def self.parse_cmd_params(str)
|
386
39
|
return str if Array === str
|
387
40
|
str.scan(/
|
@@ -390,39 +43,6 @@ module Misc
|
|
390
43
|
/x).flatten.compact
|
391
44
|
end
|
392
45
|
|
393
|
-
def self.correct_icgc_mutation(pos, ref, mut_str)
|
394
|
-
mut = mut_str
|
395
|
-
mut = '-' * (mut_str.length - 1) if mut =~/^-[ACGT]/
|
396
|
-
mut = "+" << mut if ref == '-'
|
397
|
-
[pos, [mut]]
|
398
|
-
end
|
399
|
-
|
400
|
-
def self.correct_vcf_mutation(pos, ref, mut_str)
|
401
|
-
muts = mut_str.nil? ? [] : mut_str.split(',')
|
402
|
-
|
403
|
-
while ref.length >= 1 and muts.reject{|m| m[0] == ref[0]}.empty?
|
404
|
-
ref = ref[1..-1]
|
405
|
-
pos = pos + 1
|
406
|
-
muts = muts.collect{|m| m[1..-1]}
|
407
|
-
end
|
408
|
-
|
409
|
-
muts = muts.collect do |m|
|
410
|
-
case
|
411
|
-
when ref.empty?
|
412
|
-
"+" << m
|
413
|
-
when (m.length < ref.length and (m.empty? or ref.index(m)))
|
414
|
-
"-" * (ref.length - m.length)
|
415
|
-
when (ref.length == 1 and m.length == 1)
|
416
|
-
m
|
417
|
-
else
|
418
|
-
Log.debug{"Cannot understand: #{[ref, m]} (#{ muts })"}
|
419
|
-
'-' * ref.length + m
|
420
|
-
end
|
421
|
-
end
|
422
|
-
|
423
|
-
[pos, muts]
|
424
|
-
end
|
425
|
-
|
426
46
|
def self.pid_exists?(pid)
|
427
47
|
return false if pid.nil?
|
428
48
|
begin
|
@@ -433,25 +53,6 @@ module Misc
|
|
433
53
|
end
|
434
54
|
end
|
435
55
|
|
436
|
-
COLOR_LIST = %w(#BC80BD #CCEBC5 #FFED6F #8DD3C7 #FFFFB3 #BEBADA #FB8072 #80B1D3 #FDB462 #B3DE69 #FCCDE5 #D9D9D9)
|
437
|
-
|
438
|
-
def self.colors_for(list)
|
439
|
-
unused = COLOR_LIST.dup
|
440
|
-
|
441
|
-
used = {}
|
442
|
-
colors = list.collect do |elem|
|
443
|
-
if used.include? elem
|
444
|
-
used[elem]
|
445
|
-
else
|
446
|
-
color = unused.shift
|
447
|
-
used[elem]=color
|
448
|
-
color
|
449
|
-
end
|
450
|
-
end
|
451
|
-
|
452
|
-
[colors, used]
|
453
|
-
end
|
454
|
-
|
455
56
|
def self.collapse_ranges(ranges)
|
456
57
|
processed = []
|
457
58
|
last = nil
|
@@ -527,10 +128,6 @@ module Misc
|
|
527
128
|
end
|
528
129
|
end
|
529
130
|
|
530
|
-
Log2Multiplier = 1.0 / Math.log(2.0)
|
531
|
-
def self.log2(x)
|
532
|
-
Math.log(x) * Log2Multiplier
|
533
|
-
end
|
534
131
|
|
535
132
|
def self.prepare_entity(entity, field, options = {})
|
536
133
|
return entity unless defined? Entity
|
@@ -555,116 +152,6 @@ module Misc
|
|
555
152
|
entity
|
556
153
|
end
|
557
154
|
|
558
|
-
ARRAY_MAX_LENGTH = 1000
|
559
|
-
STRING_MAX_LENGTH = ARRAY_MAX_LENGTH * 10
|
560
|
-
|
561
|
-
def self.sanitize_filename(filename, length = 254)
|
562
|
-
if filename.length > length
|
563
|
-
if filename =~ /(\..{2,9})$/
|
564
|
-
extension = $1
|
565
|
-
else
|
566
|
-
extension = ''
|
567
|
-
end
|
568
|
-
|
569
|
-
post_fix = "--#{filename.length}@#{length}_#{Misc.digest(filename)[0..4]}" + extension
|
570
|
-
|
571
|
-
filename = filename[0..(length - post_fix.length - 1)] << post_fix
|
572
|
-
else
|
573
|
-
filename
|
574
|
-
end
|
575
|
-
filename
|
576
|
-
end
|
577
|
-
|
578
|
-
def self.fingerprint(obj)
|
579
|
-
case obj
|
580
|
-
when nil
|
581
|
-
"nil"
|
582
|
-
when (defined? Step and Step)
|
583
|
-
obj.path || Misc.fingerprint([obj.task.name, obj.inputs])
|
584
|
-
when TrueClass
|
585
|
-
"true"
|
586
|
-
when FalseClass
|
587
|
-
"false"
|
588
|
-
when Symbol
|
589
|
-
":" << obj.to_s
|
590
|
-
when String
|
591
|
-
if obj.length > 100
|
592
|
-
"'" << obj.slice(0,20) << "<...#{obj.length}...>" << obj.slice(-10,10) << " " << "'"
|
593
|
-
else
|
594
|
-
"'" << obj << "'"
|
595
|
-
end
|
596
|
-
when (defined? AnnotatedArray and AnnotatedArray)
|
597
|
-
"<A: #{fingerprint Annotated.purge(obj)} #{fingerprint obj.info}>"
|
598
|
-
when (defined? TSV and TSV::Parser)
|
599
|
-
"<TSVStream:" + obj.filename + "--" << Misc.fingerprint(obj.options) << ">"
|
600
|
-
when IO
|
601
|
-
"<IO:" + (obj.respond_to?(:filename) ? obj.filename : obj.inspect) + ">"
|
602
|
-
when File
|
603
|
-
"<File:" + obj.path + ">"
|
604
|
-
when Array
|
605
|
-
if (length = obj.length) > 10
|
606
|
-
"[#{length}--" << (obj.values_at(0,1, length / 2, -2, -1).collect{|e| fingerprint(e)} * ",") << "]"
|
607
|
-
else
|
608
|
-
"[" << (obj.collect{|e| fingerprint(e) } * ",") << "]"
|
609
|
-
end
|
610
|
-
when (defined? TSV and TSV)
|
611
|
-
obj.with_unnamed do
|
612
|
-
"TSV:{"<< fingerprint(obj.all_fields|| []).inspect << ";" << fingerprint(obj.keys).inspect << "}"
|
613
|
-
end
|
614
|
-
when Hash
|
615
|
-
if obj.length > 10
|
616
|
-
"H:{"<< fingerprint(obj.keys) << ";" << fingerprint(obj.values) << "}"
|
617
|
-
else
|
618
|
-
new = "{"
|
619
|
-
obj.each do |k,v|
|
620
|
-
new << k.to_s << '=>' << fingerprint(v) << ' '
|
621
|
-
end
|
622
|
-
if new.length > 1
|
623
|
-
new[-1] = "}"
|
624
|
-
else
|
625
|
-
new << '}'
|
626
|
-
end
|
627
|
-
new
|
628
|
-
end
|
629
|
-
else
|
630
|
-
obj.to_s
|
631
|
-
end
|
632
|
-
end
|
633
|
-
|
634
|
-
|
635
|
-
def self.remove_long_items(obj)
|
636
|
-
case
|
637
|
-
when IO === obj
|
638
|
-
remove_long_items("IO: " + obj.filename)
|
639
|
-
when obj.respond_to?(:path)
|
640
|
-
remove_long_items("File: " + obj.path)
|
641
|
-
when TSV::Parser === obj
|
642
|
-
remove_long_items("TSV Stream: " + obj.filename + " -- " << Misc.fingerprint(obj.options))
|
643
|
-
when TSV === obj
|
644
|
-
remove_long_items((obj.all_fields || []) + obj.keys.sort)
|
645
|
-
when (Array === obj and obj.length > ARRAY_MAX_LENGTH)
|
646
|
-
remove_long_items(obj[0..ARRAY_MAX_LENGTH-2] << "TRUNCATED at #{ ARRAY_MAX_LENGTH } (#{obj.length})")
|
647
|
-
when (Hash === obj and obj.length > ARRAY_MAX_LENGTH)
|
648
|
-
remove_long_items(obj.collect.compact[0..ARRAY_MAX_LENGTH-2] << ["TRUNCATED", "at #{ ARRAY_MAX_LENGTH } (#{obj.length})"])
|
649
|
-
when (String === obj and obj.length > STRING_MAX_LENGTH)
|
650
|
-
obj[0..STRING_MAX_LENGTH-1] << " TRUNCATED at #{STRING_MAX_LENGTH} (#{obj.length})"
|
651
|
-
when Hash === obj
|
652
|
-
new = {}
|
653
|
-
obj.each do |k,v|
|
654
|
-
new[k] = remove_long_items(v)
|
655
|
-
end
|
656
|
-
new
|
657
|
-
when Array === obj
|
658
|
-
obj.collect do |e| remove_long_items(e) end
|
659
|
-
else
|
660
|
-
obj
|
661
|
-
end
|
662
|
-
end
|
663
|
-
|
664
|
-
#def self.remove_long_items(obj)
|
665
|
-
# return fingerprint(obj)
|
666
|
-
#end
|
667
|
-
|
668
155
|
def self.ensembl_server(organism)
|
669
156
|
date = organism.split("/")[1]
|
670
157
|
if date.nil?
|
@@ -674,18 +161,6 @@ module Misc
|
|
674
161
|
end
|
675
162
|
end
|
676
163
|
|
677
|
-
def self.filename?(string)
|
678
|
-
String === string and string.length > 0 and string.length < 250 and File.exists?(string)
|
679
|
-
end
|
680
|
-
|
681
|
-
def self.max(list)
|
682
|
-
max = nil
|
683
|
-
list.each do |v|
|
684
|
-
next if v.nil?
|
685
|
-
max = v if max.nil? or v > max
|
686
|
-
end
|
687
|
-
max
|
688
|
-
end
|
689
164
|
|
690
165
|
def self.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil)
|
691
166
|
name1 ||= "list 1"
|
@@ -710,20 +185,6 @@ module Misc
|
|
710
185
|
url = "https://chart.googleapis.com/chart?cht=v&chs=500x300&chd=t:#{sizes * ","}&chco=FF6342,ADDE63,63C6DE,FFFFFF&chdl=#{label}"
|
711
186
|
end
|
712
187
|
|
713
|
-
def self.sum(list)
|
714
|
-
list.compact.inject(0.0){|acc,e| acc += e}
|
715
|
-
end
|
716
|
-
|
717
|
-
def self.mean(list)
|
718
|
-
sum(list) / list.compact.length
|
719
|
-
end
|
720
|
-
|
721
|
-
def self.sd(list)
|
722
|
-
return nil if list.length < 3
|
723
|
-
mean = mean(list)
|
724
|
-
Math.sqrt(list.compact.inject(0.0){|acc,e| d = e - mean; acc += d * d}) / (list.compact.length - 1)
|
725
|
-
end
|
726
|
-
|
727
188
|
def self.consolidate(list)
|
728
189
|
list.inject(nil){|acc,e|
|
729
190
|
if acc.nil?
|
@@ -795,208 +256,6 @@ end
|
|
795
256
|
proportions
|
796
257
|
end
|
797
258
|
|
798
|
-
IUPAC2BASE = {
|
799
|
-
"A" => ["A"],
|
800
|
-
"C" => ["C"],
|
801
|
-
"G" => ["G"],
|
802
|
-
"T" => ["T"],
|
803
|
-
"U" => ["U"],
|
804
|
-
"R" => "A or G".split(" or "),
|
805
|
-
"Y" => "C or T".split(" or "),
|
806
|
-
"S" => "G or C".split(" or "),
|
807
|
-
"W" => "A or T".split(" or "),
|
808
|
-
"K" => "G or T".split(" or "),
|
809
|
-
"M" => "A or C".split(" or "),
|
810
|
-
"B" => "C or G or T".split(" or "),
|
811
|
-
"D" => "A or G or T".split(" or "),
|
812
|
-
"H" => "A or C or T".split(" or "),
|
813
|
-
"V" => "A or C or G".split(" or "),
|
814
|
-
"N" => %w(A C T G),
|
815
|
-
}
|
816
|
-
|
817
|
-
BASE2COMPLEMENT = {
|
818
|
-
"A" => "T",
|
819
|
-
"C" => "G",
|
820
|
-
"G" => "C",
|
821
|
-
"T" => "A",
|
822
|
-
"U" => "A",
|
823
|
-
}
|
824
|
-
|
825
|
-
THREE_TO_ONE_AA_CODE = {
|
826
|
-
"ala" => "A",
|
827
|
-
"arg" => "R",
|
828
|
-
"asn" => "N",
|
829
|
-
"asp" => "D",
|
830
|
-
"cys" => "C",
|
831
|
-
"glu" => "E",
|
832
|
-
"gln" => "Q",
|
833
|
-
"gly" => "G",
|
834
|
-
"his" => "H",
|
835
|
-
"ile" => "I",
|
836
|
-
"leu" => "L",
|
837
|
-
"lys" => "K",
|
838
|
-
"met" => "M",
|
839
|
-
"phe" => "F",
|
840
|
-
"pro" => "P",
|
841
|
-
"ser" => "S",
|
842
|
-
"thr" => "T",
|
843
|
-
"trp" => "W",
|
844
|
-
"tyr" => "Y",
|
845
|
-
"val" => "V"
|
846
|
-
}
|
847
|
-
CODON_TABLE = {
|
848
|
-
"ATT" => "I",
|
849
|
-
"ATC" => "I",
|
850
|
-
"ATA" => "I",
|
851
|
-
"CTT" => "L",
|
852
|
-
"CTC" => "L",
|
853
|
-
"CTA" => "L",
|
854
|
-
"CTG" => "L",
|
855
|
-
"TTA" => "L",
|
856
|
-
"TTG" => "L",
|
857
|
-
"GTT" => "V",
|
858
|
-
"GTC" => "V",
|
859
|
-
"GTA" => "V",
|
860
|
-
"GTG" => "V",
|
861
|
-
"TTT" => "F",
|
862
|
-
"TTC" => "F",
|
863
|
-
"ATG" => "M",
|
864
|
-
"TGT" => "C",
|
865
|
-
"TGC" => "C",
|
866
|
-
"GCT" => "A",
|
867
|
-
"GCC" => "A",
|
868
|
-
"GCA" => "A",
|
869
|
-
"GCG" => "A",
|
870
|
-
"GGT" => "G",
|
871
|
-
"GGC" => "G",
|
872
|
-
"GGA" => "G",
|
873
|
-
"GGG" => "G",
|
874
|
-
"CCT" => "P",
|
875
|
-
"CCC" => "P",
|
876
|
-
"CCA" => "P",
|
877
|
-
"CCG" => "P",
|
878
|
-
"ACT" => "T",
|
879
|
-
"ACC" => "T",
|
880
|
-
"ACA" => "T",
|
881
|
-
"ACG" => "T",
|
882
|
-
"TCT" => "S",
|
883
|
-
"TCC" => "S",
|
884
|
-
"TCA" => "S",
|
885
|
-
"TCG" => "S",
|
886
|
-
"AGT" => "S",
|
887
|
-
"AGC" => "S",
|
888
|
-
"TAT" => "Y",
|
889
|
-
"TAC" => "Y",
|
890
|
-
"TGG" => "W",
|
891
|
-
"CAA" => "Q",
|
892
|
-
"CAG" => "Q",
|
893
|
-
"AAT" => "N",
|
894
|
-
"AAC" => "N",
|
895
|
-
"CAT" => "H",
|
896
|
-
"CAC" => "H",
|
897
|
-
"GAA" => "E",
|
898
|
-
"GAG" => "E",
|
899
|
-
"GAT" => "D",
|
900
|
-
"GAC" => "D",
|
901
|
-
"AAA" => "K",
|
902
|
-
"AAG" => "K",
|
903
|
-
"CGT" => "R",
|
904
|
-
"CGC" => "R",
|
905
|
-
"CGA" => "R",
|
906
|
-
"CGG" => "R",
|
907
|
-
"AGA" => "R",
|
908
|
-
"AGG" => "R",
|
909
|
-
"TAA" => "*",
|
910
|
-
"TAG" => "*",
|
911
|
-
"TGA" => "*",
|
912
|
-
}
|
913
|
-
|
914
|
-
#def self.fast_align(reference, sequence)
|
915
|
-
#
|
916
|
-
#require 'narray'
|
917
|
-
# init_gap = -1
|
918
|
-
# gap = -2
|
919
|
-
# diff = -2
|
920
|
-
# same = 2
|
921
|
-
|
922
|
-
# cols = sequence.length + 1
|
923
|
-
# rows = reference.length + 1
|
924
|
-
|
925
|
-
# a = NArray.int(cols, rows)
|
926
|
-
|
927
|
-
# for spos in 0..cols-1 do a[spos, 0] = spos * init_gap end
|
928
|
-
# for rpos in 0..rows-1 do a[0, rpos] = rpos * init_gap end
|
929
|
-
|
930
|
-
# spos = 1
|
931
|
-
# while spos < cols do
|
932
|
-
# rpos = 1
|
933
|
-
# while rpos < rows do
|
934
|
-
# match = a[spos-1,rpos-1] + (sequence[spos-1] != reference[rpos-1] ? diff : same)
|
935
|
-
# skip_sequence = a[spos-1,rpos] + gap
|
936
|
-
# skip_reference = a[spos,rpos-1] + gap
|
937
|
-
# a[spos,rpos] = [match, skip_sequence, skip_reference].max
|
938
|
-
# rpos += 1
|
939
|
-
# end
|
940
|
-
# spos += 1
|
941
|
-
# end
|
942
|
-
|
943
|
-
# start = Misc.max(a[-1,0..rows-1])
|
944
|
-
# start_pos = a[-1,0..rows-1].to_a.index start
|
945
|
-
|
946
|
-
# ref = ''
|
947
|
-
# seq = ''
|
948
|
-
# rpos = start_pos
|
949
|
-
# spos = cols - 1
|
950
|
-
|
951
|
-
# while spos > 0 and rpos > 0
|
952
|
-
# score = a[spos,rpos]
|
953
|
-
# score_match = a[spos-1,rpos-1]
|
954
|
-
# score_skip_reference = a[spos,rpos-1]
|
955
|
-
# score_skip_sequence = a[spos-1,rpos]
|
956
|
-
|
957
|
-
# case
|
958
|
-
# when score == score_match + (sequence[spos-1] != reference[rpos-1] ? diff : same)
|
959
|
-
# ref << reference[rpos-1]
|
960
|
-
# seq << sequence[spos-1]
|
961
|
-
# spos -= 1
|
962
|
-
# rpos -= 1
|
963
|
-
# when score == score_skip_reference + gap
|
964
|
-
# ref << reference[rpos-1]
|
965
|
-
# seq << '-'
|
966
|
-
# rpos -= 1
|
967
|
-
# when score == score_skip_sequence + gap
|
968
|
-
# seq << sequence[spos-1]
|
969
|
-
# ref << '-'
|
970
|
-
# spos -= 1
|
971
|
-
# else
|
972
|
-
# raise "stop"
|
973
|
-
# end
|
974
|
-
# end
|
975
|
-
|
976
|
-
# while (rpos > 0)
|
977
|
-
# ref << reference[rpos-1]
|
978
|
-
# seq = seq << '-'
|
979
|
-
# rpos -= 1
|
980
|
-
# end
|
981
|
-
|
982
|
-
# while (spos > 0)
|
983
|
-
# seq << sequence[spos-1]
|
984
|
-
# ref = ref + '-'
|
985
|
-
# spos -= 1
|
986
|
-
# end
|
987
|
-
#
|
988
|
-
# [ref.reverse + reference[start_pos..-1], seq.reverse + '-' * (rows - start_pos - 1)]
|
989
|
-
#end
|
990
|
-
|
991
|
-
def self.IUPAC_to_base(iupac)
|
992
|
-
IUPAC2BASE[iupac]
|
993
|
-
end
|
994
|
-
|
995
|
-
def self.is_filename?(string)
|
996
|
-
return true if string.respond_to? :exists
|
997
|
-
return true if String === string and string.length < 265 and File.exists? string
|
998
|
-
return false
|
999
|
-
end
|
1000
259
|
|
1001
260
|
def self.sorted_array_hits(a1, a2)
|
1002
261
|
e1, e2 = a1.shift, a2.shift
|
@@ -1132,100 +391,6 @@ end
|
|
1132
391
|
end
|
1133
392
|
end
|
1134
393
|
|
1135
|
-
def self.benchmark(repeats = 1, message = nil)
|
1136
|
-
require 'benchmark'
|
1137
|
-
res = nil
|
1138
|
-
begin
|
1139
|
-
measure = Benchmark.measure do
|
1140
|
-
repeats.times do
|
1141
|
-
res = yield
|
1142
|
-
end
|
1143
|
-
end
|
1144
|
-
if message
|
1145
|
-
puts "#{message }: #{ repeats } repeats"
|
1146
|
-
else
|
1147
|
-
puts "Benchmark for #{ repeats } repeats"
|
1148
|
-
end
|
1149
|
-
puts measure
|
1150
|
-
rescue Exception
|
1151
|
-
puts "Benchmark aborted"
|
1152
|
-
raise $!
|
1153
|
-
end
|
1154
|
-
res
|
1155
|
-
end
|
1156
|
-
|
1157
|
-
def self.profile_html(options = {})
|
1158
|
-
require 'ruby-prof'
|
1159
|
-
RubyProf.start
|
1160
|
-
begin
|
1161
|
-
res = yield
|
1162
|
-
rescue Exception
|
1163
|
-
puts "Profiling aborted"
|
1164
|
-
raise $!
|
1165
|
-
ensure
|
1166
|
-
result = RubyProf.stop
|
1167
|
-
printer = RubyProf::MultiPrinter.new(result)
|
1168
|
-
TmpFile.with_file do |dir|
|
1169
|
-
FileUtils.mkdir_p dir unless File.exists? dir
|
1170
|
-
printer.print(:path => dir, :profile => 'profile')
|
1171
|
-
CMD.cmd("firefox -no-remote '#{ dir }'")
|
1172
|
-
end
|
1173
|
-
end
|
1174
|
-
|
1175
|
-
res
|
1176
|
-
end
|
1177
|
-
|
1178
|
-
def self.profile_graph(options = {})
|
1179
|
-
require 'ruby-prof'
|
1180
|
-
RubyProf.start
|
1181
|
-
begin
|
1182
|
-
res = yield
|
1183
|
-
rescue Exception
|
1184
|
-
puts "Profiling aborted"
|
1185
|
-
raise $!
|
1186
|
-
ensure
|
1187
|
-
result = RubyProf.stop
|
1188
|
-
#result.eliminate_methods!([/annotated_array_clean_/])
|
1189
|
-
printer = RubyProf::GraphPrinter.new(result)
|
1190
|
-
printer.print(STDOUT, options)
|
1191
|
-
end
|
1192
|
-
|
1193
|
-
res
|
1194
|
-
end
|
1195
|
-
|
1196
|
-
def self.profile(options = {})
|
1197
|
-
require 'ruby-prof'
|
1198
|
-
RubyProf.start
|
1199
|
-
begin
|
1200
|
-
res = yield
|
1201
|
-
rescue Exception
|
1202
|
-
puts "Profiling aborted"
|
1203
|
-
raise $!
|
1204
|
-
ensure
|
1205
|
-
result = RubyProf.stop
|
1206
|
-
printer = RubyProf::FlatPrinter.new(result)
|
1207
|
-
printer.print(STDOUT, options)
|
1208
|
-
end
|
1209
|
-
|
1210
|
-
res
|
1211
|
-
end
|
1212
|
-
|
1213
|
-
def self.memprof
|
1214
|
-
require 'memprof'
|
1215
|
-
Memprof.start
|
1216
|
-
begin
|
1217
|
-
res = yield
|
1218
|
-
rescue Exception
|
1219
|
-
puts "Profiling aborted"
|
1220
|
-
raise $!
|
1221
|
-
ensure
|
1222
|
-
Memprof.stop
|
1223
|
-
print Memprof.stats
|
1224
|
-
end
|
1225
|
-
|
1226
|
-
res
|
1227
|
-
end
|
1228
|
-
|
1229
394
|
def self.do_once(&block)
|
1230
395
|
return nil if $__did_once
|
1231
396
|
$__did_once = true
|
@@ -1334,22 +499,6 @@ end
|
|
1334
499
|
html
|
1335
500
|
end
|
1336
501
|
|
1337
|
-
#def self.path_relative_to(basedir, path)
|
1338
|
-
# path = File.expand_path(path) unless path[0] == "/"
|
1339
|
-
# basedir = File.expand_path(basedir) unless basedir[0] == "/"
|
1340
|
-
|
1341
|
-
# basedir << "/" unless basedir[-1] == "/"
|
1342
|
-
# case
|
1343
|
-
# when path == basedir
|
1344
|
-
# "."
|
1345
|
-
# #when path =~ /#{Regexp.quote basedir}\/(.*)/
|
1346
|
-
# when path.index(basedir) == 0
|
1347
|
-
# return path[basedir.length..-1]
|
1348
|
-
# else
|
1349
|
-
# return nil
|
1350
|
-
# end
|
1351
|
-
#end
|
1352
|
-
|
1353
502
|
def self.path_relative_to(basedir, path)
|
1354
503
|
path = File.expand_path(path) unless path[0] == "/"
|
1355
504
|
basedir = File.expand_path(basedir) unless basedir[0] == "/"
|
@@ -1369,88 +518,6 @@ end
|
|
1369
518
|
@hostanem ||= `hostname`.strip
|
1370
519
|
end
|
1371
520
|
|
1372
|
-
LOCK_MUTEX = Mutex.new
|
1373
|
-
def self.lock(file, unlock = true)
|
1374
|
-
return yield if file.nil?
|
1375
|
-
FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
|
1376
|
-
|
1377
|
-
res = nil
|
1378
|
-
|
1379
|
-
lock_path = File.expand_path(file + '.lock')
|
1380
|
-
lockfile = Lockfile.new(lock_path)
|
1381
|
-
|
1382
|
-
hostname = Misc.hostname
|
1383
|
-
LOCK_MUTEX.synchronize do
|
1384
|
-
Misc.insist 2, 0.1 do
|
1385
|
-
Misc.insist 3, 0.1 do
|
1386
|
-
begin
|
1387
|
-
if File.exists? lock_path
|
1388
|
-
info = Open.open(lock_path){|f| YAML.load(f) }
|
1389
|
-
raise "No info" unless info
|
1390
|
-
|
1391
|
-
if hostname == info["host"] and not Misc.pid_exists?(info["pid"])
|
1392
|
-
Log.info("Removing lockfile: #{lock_path}. This pid #{Process.pid}. Content: #{info.inspect}")
|
1393
|
-
FileUtils.rm lock_path
|
1394
|
-
end
|
1395
|
-
end
|
1396
|
-
rescue Exception
|
1397
|
-
FileUtils.rm lock_path if File.exists? lock_path
|
1398
|
-
lockfile = Lockfile.new(lock_path) unless File.exists? lock_path
|
1399
|
-
raise $!
|
1400
|
-
end
|
1401
|
-
end
|
1402
|
-
end
|
1403
|
-
end
|
1404
|
-
|
1405
|
-
begin
|
1406
|
-
lockfile.lock
|
1407
|
-
res = yield lockfile
|
1408
|
-
rescue Lockfile::StolenLockError
|
1409
|
-
unlock = false
|
1410
|
-
rescue KeepLocked
|
1411
|
-
unlock = false
|
1412
|
-
res = $!.payload
|
1413
|
-
ensure
|
1414
|
-
if unlock and lockfile.locked?
|
1415
|
-
lockfile.unlock
|
1416
|
-
end
|
1417
|
-
end
|
1418
|
-
|
1419
|
-
res
|
1420
|
-
end
|
1421
|
-
|
1422
|
-
|
1423
|
-
LOCK_REPO_SERIALIZER=Marshal
|
1424
|
-
def self.lock_in_repo(repo, key, *args)
|
1425
|
-
return yield file, *args if repo.nil? or key.nil?
|
1426
|
-
|
1427
|
-
lock_key = "lock-" << key
|
1428
|
-
|
1429
|
-
begin
|
1430
|
-
if repo[lock_key] and
|
1431
|
-
Misc.hostname == (info = LOCK_REPO_SERIALIZER.load(repo[lock_key]))["host"] and
|
1432
|
-
info["pid"] and not Misc.pid_exists?(info["pid"])
|
1433
|
-
|
1434
|
-
Log.info("Removing lockfile: #{lock_key}. This pid #{Process.pid}. Content: #{info.inspect}")
|
1435
|
-
repo.out lock_key
|
1436
|
-
end
|
1437
|
-
rescue
|
1438
|
-
Log.warn("Error checking lockfile #{lock_key}: #{$!.message}. Removing. Content: #{begin repo[lock_key] rescue "Could not open file" end}")
|
1439
|
-
repo.out lock_key if repo.include? lock_key
|
1440
|
-
end
|
1441
|
-
|
1442
|
-
while repo[lock_key]
|
1443
|
-
sleep 1
|
1444
|
-
end
|
1445
|
-
|
1446
|
-
repo[lock_key] = LOCK_REPO_SERIALIZER.dump({:hostname => Misc.hostname, :pid => Process.pid})
|
1447
|
-
|
1448
|
-
res = yield lock_key, *args
|
1449
|
-
|
1450
|
-
repo.delete lock_key
|
1451
|
-
|
1452
|
-
res
|
1453
|
-
end
|
1454
521
|
|
1455
522
|
def self.common_path(dir, file)
|
1456
523
|
file = File.expand_path file
|
@@ -1479,32 +546,6 @@ end
|
|
1479
546
|
res
|
1480
547
|
end
|
1481
548
|
|
1482
|
-
def self.to_utf8(string)
|
1483
|
-
string.encode("UTF-16BE", :invalid => :replace, :undef => :replace, :replace => "?").encode('UTF-8')
|
1484
|
-
end
|
1485
|
-
|
1486
|
-
def self.fixutf8(string)
|
1487
|
-
return nil if string.nil?
|
1488
|
-
return string if (string.respond_to? :valid_encoding? and string.valid_encoding?) or
|
1489
|
-
(string.respond_to? :valid_encoding and string.valid_encoding)
|
1490
|
-
|
1491
|
-
if string.respond_to?(:encode)
|
1492
|
-
string.encode("UTF-16BE", :invalid => :replace, :undef => :replace, :replace => "?").encode('UTF-8')
|
1493
|
-
else
|
1494
|
-
require 'iconv'
|
1495
|
-
@@ic ||= Iconv.new('UTF-8//IGNORE', 'UTF-8')
|
1496
|
-
@@ic.iconv(string)
|
1497
|
-
end
|
1498
|
-
end
|
1499
|
-
|
1500
|
-
def self.fixascii(string)
|
1501
|
-
if string.respond_to?(:encode)
|
1502
|
-
self.fixutf8(string).encode("ASCII-8BIT")
|
1503
|
-
else
|
1504
|
-
string
|
1505
|
-
end
|
1506
|
-
end
|
1507
|
-
|
1508
549
|
def self.sensiblewrite(path, content = nil, &block)
|
1509
550
|
return if File.exists? path
|
1510
551
|
tmp_path = path + '.sensible_write'
|
@@ -1556,69 +597,6 @@ end
|
|
1556
597
|
new_options
|
1557
598
|
end
|
1558
599
|
|
1559
|
-
def self.digest(text)
|
1560
|
-
Digest::MD5.hexdigest(text)
|
1561
|
-
end
|
1562
|
-
|
1563
|
-
HASH2MD5_MAX_STRING_LENGTH = 1000
|
1564
|
-
HASH2MD5_MAX_ARRAY_LENGTH = 100
|
1565
|
-
def self.hash2md5(hash)
|
1566
|
-
str = ""
|
1567
|
-
keys = hash.keys
|
1568
|
-
keys = keys.clean_annotations if keys.respond_to? :clean_annotations
|
1569
|
-
keys = keys.sort_by{|k| k.to_s}
|
1570
|
-
|
1571
|
-
if hash.respond_to? :unnamed
|
1572
|
-
unnamed = hash.unnamed
|
1573
|
-
hash.unnamed = true
|
1574
|
-
end
|
1575
|
-
keys.each do |k|
|
1576
|
-
next if k == :monitor or k == "monitor" or k == :in_situ_persistence or k == "in_situ_persistence"
|
1577
|
-
v = hash[k]
|
1578
|
-
case
|
1579
|
-
when TrueClass === v
|
1580
|
-
str << k.to_s << "=>true"
|
1581
|
-
when FalseClass === v
|
1582
|
-
str << k.to_s << "=>false"
|
1583
|
-
when Hash === v
|
1584
|
-
str << k.to_s << "=>" << hash2md5(v)
|
1585
|
-
when Symbol === v
|
1586
|
-
str << k.to_s << "=>" << v.to_s
|
1587
|
-
when (String === v and v.length > HASH2MD5_MAX_STRING_LENGTH)
|
1588
|
-
str << k.to_s << "=>" << v[0..HASH2MD5_MAX_STRING_LENGTH] << "; #{ v.length }"
|
1589
|
-
when String === v
|
1590
|
-
str << k.to_s << "=>" << v
|
1591
|
-
when (Array === v and v.length > HASH2MD5_MAX_ARRAY_LENGTH)
|
1592
|
-
str << k.to_s << "=>[" << v[0..HASH2MD5_MAX_ARRAY_LENGTH] * "," << "; #{ v.length }]"
|
1593
|
-
when TSV::Parser === v
|
1594
|
-
str << remove_long_items(v)
|
1595
|
-
when Array === v
|
1596
|
-
str << k.to_s << "=>[" << v * "," << "]"
|
1597
|
-
when File === v
|
1598
|
-
str << k.to_s << "=>[File:" << v.path << "]"
|
1599
|
-
else
|
1600
|
-
v_ins = v.inspect
|
1601
|
-
|
1602
|
-
case
|
1603
|
-
when v_ins =~ /:0x0/
|
1604
|
-
str << k.to_s << "=>" << v_ins.sub(/:0x[a-f0-9]+@/,'')
|
1605
|
-
else
|
1606
|
-
str << k.to_s << "=>" << v_ins
|
1607
|
-
end
|
1608
|
-
|
1609
|
-
end
|
1610
|
-
|
1611
|
-
str << "_" << hash2md5(v.info) if defined? Annotated and Annotated === v
|
1612
|
-
end
|
1613
|
-
hash.unnamed = unnamed if hash.respond_to? :unnamed
|
1614
|
-
|
1615
|
-
if str.empty?
|
1616
|
-
""
|
1617
|
-
else
|
1618
|
-
digest(str)
|
1619
|
-
end
|
1620
|
-
end
|
1621
|
-
|
1622
600
|
def self.process_options(hash, *keys)
|
1623
601
|
if keys.length == 1
|
1624
602
|
hash.include?(keys.first.to_sym) ? hash.delete(keys.first.to_sym) : hash.delete(keys.first.to_s)
|
@@ -1815,7 +793,6 @@ end
|
|
1815
793
|
chunks
|
1816
794
|
end
|
1817
795
|
|
1818
|
-
|
1819
796
|
def self.append_zipped(current, new)
|
1820
797
|
current.each do |v|
|
1821
798
|
n = new.shift
|
@@ -1832,145 +809,6 @@ end
|
|
1832
809
|
return [] if array.empty? or (first = array.first).nil?
|
1833
810
|
first.zip(*array[1..-1])
|
1834
811
|
end
|
1835
|
-
|
1836
|
-
def self.camel_case(string)
|
1837
|
-
return string if string !~ /_/ && string =~ /[A-Z]+.*/
|
1838
|
-
string.split(/_|(\d+)/).map{|e|
|
1839
|
-
(e =~ /^[A-Z]{2,}$/ ? e : e.capitalize)
|
1840
|
-
}.join
|
1841
|
-
end
|
1842
|
-
|
1843
|
-
def self.camel_case_lower(string)
|
1844
|
-
string.split('_').inject([]){ |buffer,e|
|
1845
|
-
buffer.push(buffer.empty? ? e.downcase : (e =~ /^[A-Z]{2,}$/ ? e : e.capitalize))
|
1846
|
-
}.join
|
1847
|
-
end
|
1848
|
-
|
1849
|
-
def self.snake_case(string)
|
1850
|
-
return nil if string.nil?
|
1851
|
-
string = string.to_s if Symbol === string
|
1852
|
-
string.
|
1853
|
-
gsub(/([A-Z]{2,})([A-Z][a-z])/,'\1_\2').
|
1854
|
-
gsub(/([a-z])([A-Z])/,'\1_\2').
|
1855
|
-
gsub(/\s/,'_').gsub(/[^\w_]/, '').
|
1856
|
-
split("_").collect{|p| p.match(/[A-Z]{2,}/) ? p : p.downcase } * "_"
|
1857
|
-
end
|
1858
|
-
|
1859
|
-
# source: https://gist.github.com/ekdevdes/2450285
|
1860
|
-
# author: Ethan Kramer (https://github.com/ekdevdes)
|
1861
|
-
def self.humanize(value, options = {})
|
1862
|
-
if options.empty?
|
1863
|
-
options[:format] = :sentence
|
1864
|
-
end
|
1865
|
-
|
1866
|
-
values = []
|
1867
|
-
values = value.split('_')
|
1868
|
-
values.each_index do |index|
|
1869
|
-
# lower case each item in array
|
1870
|
-
# Miguel Vazquez edit: Except for acronyms
|
1871
|
-
values[index].downcase! unless values[index].match(/[a-zA-Z][A-Z]/)
|
1872
|
-
end
|
1873
|
-
if options[:format] == :allcaps
|
1874
|
-
values.each do |value|
|
1875
|
-
value.capitalize!
|
1876
|
-
end
|
1877
|
-
|
1878
|
-
if options.empty?
|
1879
|
-
options[:seperator] = " "
|
1880
|
-
end
|
1881
|
-
|
1882
|
-
return values.join " "
|
1883
|
-
end
|
1884
|
-
|
1885
|
-
if options[:format] == :class
|
1886
|
-
values.each do |value|
|
1887
|
-
value.capitalize!
|
1888
|
-
end
|
1889
|
-
|
1890
|
-
return values.join ""
|
1891
|
-
end
|
1892
|
-
|
1893
|
-
if options[:format] == :sentence
|
1894
|
-
values[0].capitalize! unless values[0].match(/[a-zA-Z][A-Z]/)
|
1895
|
-
|
1896
|
-
return values.join " "
|
1897
|
-
end
|
1898
|
-
|
1899
|
-
if options[:format] == :nocaps
|
1900
|
-
return values.join " "
|
1901
|
-
end
|
1902
|
-
end
|
1903
|
-
end
|
1904
|
-
|
1905
|
-
#TODO: REMOVE
|
1906
|
-
#class RBBTError < StandardError
|
1907
|
-
# attr_accessor :info
|
1908
|
-
#
|
1909
|
-
# alias old_to_s to_s
|
1910
|
-
# def to_s
|
1911
|
-
# str = old_to_s.dup
|
1912
|
-
# if info
|
1913
|
-
# str << "\n" << "Additional Info:\n---\n" << info << "---"
|
1914
|
-
# end
|
1915
|
-
# str
|
1916
|
-
# end
|
1917
|
-
#end
|
1918
|
-
|
1919
|
-
module IndiferentHash
|
1920
|
-
|
1921
|
-
def self.setup(hash)
|
1922
|
-
hash.extend IndiferentHash
|
1923
|
-
end
|
1924
|
-
|
1925
|
-
def merge(other)
|
1926
|
-
new = self.dup
|
1927
|
-
IndiferentHash.setup(new)
|
1928
|
-
other.each do |k,value|
|
1929
|
-
new.delete k
|
1930
|
-
new[k] = value
|
1931
|
-
end
|
1932
|
-
new
|
1933
|
-
end
|
1934
|
-
|
1935
|
-
def [](key)
|
1936
|
-
res = super(key)
|
1937
|
-
return res unless res.nil?
|
1938
|
-
|
1939
|
-
case key
|
1940
|
-
when Symbol, Module
|
1941
|
-
super(key.to_s)
|
1942
|
-
when String
|
1943
|
-
super(key.to_sym)
|
1944
|
-
else
|
1945
|
-
super(key)
|
1946
|
-
end
|
1947
|
-
end
|
1948
|
-
|
1949
|
-
def values_at(*key_list)
|
1950
|
-
key_list.inject([]){|acc,key| acc << self[key]}
|
1951
|
-
end
|
1952
|
-
|
1953
|
-
def include?(key)
|
1954
|
-
case key
|
1955
|
-
when Symbol, Module
|
1956
|
-
super(key) || super(key.to_s)
|
1957
|
-
when String
|
1958
|
-
super(key) || super(key.to_sym)
|
1959
|
-
else
|
1960
|
-
super(key)
|
1961
|
-
end
|
1962
|
-
end
|
1963
|
-
|
1964
|
-
def delete(key)
|
1965
|
-
case key
|
1966
|
-
when Symbol, Module
|
1967
|
-
super(key) || super(key.to_s)
|
1968
|
-
when String
|
1969
|
-
super(key) || super(key.to_sym)
|
1970
|
-
else
|
1971
|
-
super(key)
|
1972
|
-
end
|
1973
|
-
end
|
1974
812
|
end
|
1975
813
|
|
1976
814
|
module PDF2Text
|