rbbt-util 3.1.0 → 3.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/rbbt_query.rb +1 -1
- data/lib/rbbt/util/cmd.rb +115 -67
- data/lib/rbbt/util/fix_width_table.rb +18 -3
- data/lib/rbbt/util/misc.rb +106 -6
- data/lib/rbbt/util/open.rb +9 -7
- data/lib/rbbt/util/persistence.rb +17 -14
- data/lib/rbbt/util/resource.rb +10 -3
- data/lib/rbbt/util/task.rb +2 -2
- data/lib/rbbt/util/task/job.rb +16 -3
- data/lib/rbbt/util/tc_hash.rb +64 -27
- data/lib/rbbt/util/tsv.rb +44 -21
- data/lib/rbbt/util/tsv/accessor.rb +8 -6
- data/lib/rbbt/util/tsv/attach.rb +19 -28
- data/lib/rbbt/util/tsv/filters.rb +193 -0
- data/lib/rbbt/util/tsv/index.rb +80 -8
- data/lib/rbbt/util/tsv/manipulate.rb +17 -6
- data/lib/rbbt/util/tsv/misc.rb +10 -0
- data/lib/rbbt/util/tsv/parse.rb +18 -1
- data/lib/rbbt/util/workflow.rb +12 -3
- data/lib/rbbt/util/workflow/soap.rb +0 -1
- data/share/install/software/lib/install_helpers +0 -2
- data/share/lib/R/util.R +3 -3
- data/test/rbbt/util/test_cmd.rb +23 -0
- data/test/rbbt/util/test_excel2tsv.rb +1 -1
- data/test/rbbt/util/test_misc.rb +41 -11
- data/test/rbbt/util/test_open.rb +2 -2
- data/test/rbbt/util/test_persistence.rb +2 -2
- data/test/rbbt/util/test_resource.rb +4 -20
- data/test/rbbt/util/test_tc_hash.rb +38 -0
- data/test/rbbt/util/test_tmpfile.rb +1 -1
- data/test/rbbt/util/test_tsv.rb +6 -0
- data/test/rbbt/util/test_workflow.rb +14 -10
- data/test/rbbt/util/tsv/test_accessor.rb +42 -0
- data/test/rbbt/util/tsv/test_filters.rb +141 -0
- data/test/rbbt/util/tsv/test_index.rb +32 -0
- data/test/rbbt/util/tsv/test_manipulate.rb +18 -0
- data/test/test_helper.rb +3 -1
- metadata +41 -38
data/bin/rbbt_query.rb
CHANGED
@@ -9,7 +9,7 @@ options = SOPT.get("-i--identifiers*:-f--format*:-o--organism*:-p--persistence:-
|
|
9
9
|
file = ARGV[0]
|
10
10
|
|
11
11
|
if not File.exists? file
|
12
|
-
base, path = file.match(/(
|
12
|
+
base, path = file.match(/([^.]*)\.(.*)/).values_at 1, 2
|
13
13
|
require 'rbbt/sources/' << base.to_s.downcase
|
14
14
|
klass = Misc.string2const base
|
15
15
|
file = klass[path].find
|
data/lib/rbbt/util/cmd.rb
CHANGED
@@ -3,54 +3,74 @@ require 'rbbt/util/log'
|
|
3
3
|
require 'stringio'
|
4
4
|
|
5
5
|
module CMD
|
6
|
-
class CMDError < RBBTError; end
|
7
6
|
|
7
|
+
class CMDError < RBBTError; end
|
8
8
|
module SmartIO
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
Log.debug "Raising exception"
|
24
|
-
exception = CMDError.new "Command [#{@pid}] #{@cmd} failed with error status #{$?.exitstatus}"
|
25
|
-
raise exception
|
26
|
-
end
|
9
|
+
attr_accessor :pid, :cmd, :post, :in, :out, :err
|
10
|
+
def self.tie(io, pid = nil, cmd = "", post = nil, sin = nil, out = nil, err = nil)
|
11
|
+
io.extend SmartIO
|
12
|
+
io.pid = pid
|
13
|
+
io.cmd = cmd
|
14
|
+
io.in = sin
|
15
|
+
io.out = out
|
16
|
+
io.err = err
|
17
|
+
io.post = post
|
18
|
+
|
19
|
+
io.class.send(:alias_method, :original_close, :close)
|
20
|
+
io.class.send(:alias_method, :original_read, :read)
|
21
|
+
io
|
22
|
+
end
|
27
23
|
|
28
|
-
|
29
|
-
|
24
|
+
def wait_and_status
|
25
|
+
if @pid
|
26
|
+
begin
|
27
|
+
Process.waitpid(@pid)
|
28
|
+
rescue
|
30
29
|
end
|
31
30
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
end
|
38
|
-
@post.call if @post
|
31
|
+
Log.debug "Process #{ cmd } succeded" if $? and $?.success?
|
32
|
+
|
33
|
+
if $? and not $?.success?
|
34
|
+
Log.debug "Raising exception"
|
35
|
+
exception = CMDError.new "Command [#{@pid}] #{@cmd} failed with error status #{$?.exitstatus}"
|
39
36
|
original_close
|
37
|
+
raise exception
|
40
38
|
end
|
41
|
-
|
42
|
-
|
43
|
-
def read
|
44
|
-
data = Misc.fixutf8(original_read)
|
45
|
-
self.close unless self.closed?
|
46
|
-
data
|
47
|
-
end
|
39
|
+
end
|
40
|
+
end
|
48
41
|
|
49
|
-
|
50
|
-
|
42
|
+
def close
|
43
|
+
self.original_read unless self.eof?
|
44
|
+
|
45
|
+
wait_and_status
|
46
|
+
|
47
|
+
@post.call if @post
|
48
|
+
|
49
|
+
original_close unless self.closed?
|
50
|
+
end
|
51
|
+
|
52
|
+
def force_close
|
53
|
+
if @pid
|
54
|
+
Log.debug "Forcing close by killing '#{@pid}'"
|
55
|
+
Process.kill("KILL", @pid)
|
56
|
+
Process.waitpid(@pid)
|
57
|
+
end
|
58
|
+
|
59
|
+
@post.call if @post
|
60
|
+
|
61
|
+
original_close
|
62
|
+
end
|
63
|
+
|
64
|
+
def read(*args)
|
65
|
+
data = original_read(*args)
|
66
|
+
|
67
|
+
self.close if self.eof?
|
68
|
+
|
69
|
+
data
|
51
70
|
end
|
52
71
|
|
53
|
-
|
72
|
+
end
|
73
|
+
|
54
74
|
|
55
75
|
def self.process_cmd_options(options = {})
|
56
76
|
string = ""
|
@@ -91,79 +111,107 @@ module CMD
|
|
91
111
|
cmd << " " << cmd_options
|
92
112
|
end
|
93
113
|
|
94
|
-
|
95
|
-
|
96
|
-
case
|
97
|
-
when (false and (IO === in_content and not StringIO === in_content))
|
98
|
-
sin = [in_content, nil]
|
99
|
-
else
|
100
|
-
sin = IO.pipe
|
101
|
-
end
|
114
|
+
in_content = StringIO.new in_content if String === in_content
|
102
115
|
|
116
|
+
sout, serr, sin = IO.pipe, IO.pipe, IO.pipe
|
103
117
|
|
104
118
|
pid = fork {
|
105
119
|
begin
|
120
|
+
sin.last.close
|
121
|
+
sout.first.close
|
122
|
+
serr.first.close
|
123
|
+
|
124
|
+
io = in_content
|
125
|
+
while IO === io
|
126
|
+
if SmartIO === io
|
127
|
+
io.original_close unless io.closed?
|
128
|
+
io.out.close unless io.out.nil? or io.out.closed?
|
129
|
+
io.err.close unless io.err.nil? or io.err.closed?
|
130
|
+
io = io.in
|
131
|
+
else
|
132
|
+
io.close unless io.closed?
|
133
|
+
io = nil
|
134
|
+
end
|
135
|
+
end
|
106
136
|
|
107
|
-
sin.last.close if sin.last
|
108
137
|
STDIN.reopen sin.first
|
109
138
|
sin.first.close
|
110
139
|
|
111
|
-
serr.first.close
|
112
140
|
STDERR.reopen serr.last
|
113
141
|
serr.last.close
|
114
142
|
|
115
|
-
sout.first.close
|
116
143
|
STDOUT.reopen sout.last
|
117
144
|
sout.last.close
|
118
145
|
|
119
146
|
STDOUT.sync = STDERR.sync = true
|
147
|
+
|
120
148
|
exec(cmd)
|
149
|
+
|
150
|
+
exit(-1)
|
121
151
|
rescue Exception
|
152
|
+
Log.debug("CMDError: #{$!.message}")
|
153
|
+
ddd $!.backtrace
|
122
154
|
raise CMDError, $!.message
|
123
155
|
end
|
124
156
|
}
|
157
|
+
|
125
158
|
sin.first.close
|
126
159
|
sout.last.close
|
127
160
|
serr.last.close
|
128
161
|
|
162
|
+
sin = sin.last
|
163
|
+
sout = sout.first
|
164
|
+
serr = serr.first
|
165
|
+
|
129
166
|
|
130
167
|
Log.debug "CMD: [#{pid}] #{cmd}"
|
131
168
|
|
132
|
-
|
133
|
-
when String === in_content
|
134
|
-
sin.last.write in_content
|
135
|
-
sin.last.close
|
136
|
-
when in_content.respond_to?(:gets)
|
169
|
+
if in_content.respond_to?(:read)
|
137
170
|
Thread.new do
|
138
|
-
|
139
|
-
|
171
|
+
begin
|
172
|
+
loop do
|
173
|
+
break if in_content.closed?
|
174
|
+
block = in_content.read 1024
|
175
|
+
break if block.nil? or block.empty?
|
176
|
+
sin.write block
|
177
|
+
end
|
178
|
+
|
179
|
+
sin.close unless sin.closed?
|
180
|
+
in_content.close unless in_content.closed?
|
181
|
+
rescue
|
182
|
+
Process.kill "INT", pid
|
183
|
+
raise $!
|
140
184
|
end
|
141
|
-
sin.last.close
|
142
185
|
end
|
186
|
+
else
|
187
|
+
sin.close
|
143
188
|
end
|
144
189
|
|
145
190
|
if pipe
|
146
191
|
Thread.new do
|
147
|
-
while
|
148
|
-
Log.log
|
192
|
+
while line = serr.gets
|
193
|
+
Log.log line, stderr if Integer === stderr
|
149
194
|
end
|
150
|
-
serr.
|
195
|
+
serr.close
|
196
|
+
Thread.exit
|
151
197
|
end
|
152
198
|
|
153
|
-
SmartIO.tie sout
|
154
|
-
sout.first
|
199
|
+
SmartIO.tie sout, pid, cmd, post, in_content, sin, serr
|
155
200
|
|
201
|
+
sout
|
156
202
|
else
|
157
203
|
err = ""
|
158
204
|
Thread.new do
|
159
|
-
while
|
160
|
-
err <<
|
205
|
+
while not serr.eof?
|
206
|
+
err << serr.gets if Integer === stderr
|
161
207
|
end
|
162
|
-
serr.
|
208
|
+
serr.close
|
209
|
+
Thread.exit
|
163
210
|
end
|
164
211
|
|
165
|
-
out = StringIO.new sout.
|
166
|
-
|
212
|
+
out = StringIO.new sout.read
|
213
|
+
sout.close unless sout.closed?
|
214
|
+
SmartIO.tie out, pid, cmd, post, in_content, sin, serr
|
167
215
|
|
168
216
|
Process.waitpid pid
|
169
217
|
|
@@ -1,17 +1,16 @@
|
|
1
|
-
|
2
1
|
class FixWidthTable
|
3
2
|
|
4
3
|
attr_accessor :filename, :file, :value_size, :record_size, :range, :size
|
5
4
|
def initialize(filename, value_size = nil, range = nil, update = false)
|
6
5
|
@filename = filename
|
7
6
|
|
8
|
-
if update or %(
|
7
|
+
if update or %w(memory stringio).include?(filename.to_s.downcase) or not File.exists? filename
|
9
8
|
Log.debug "Writing FixWidthTable at #{ @filename.inspect }"
|
10
9
|
@value_size = value_size
|
11
10
|
@range = range
|
12
11
|
@record_size = @value_size + (@range ? 12 : 4)
|
13
12
|
|
14
|
-
if %(memory stringio).include? filename.to_s.downcase
|
13
|
+
if %w(memory stringio).include? filename.to_s.downcase
|
15
14
|
@file = StringIO.new
|
16
15
|
else
|
17
16
|
FileUtils.rm @filename if File.exists? @filename
|
@@ -31,6 +30,20 @@ class FixWidthTable
|
|
31
30
|
end
|
32
31
|
end
|
33
32
|
|
33
|
+
|
34
|
+
CONNECTIONS = {} unless defined? CONNECTIONS
|
35
|
+
def self.get(filename, value_size = nil, range = nil, update = false)
|
36
|
+
return self.new(filename, value_size, range, update) if filename == :memory
|
37
|
+
case
|
38
|
+
when (!File.exists?(filename) or update)
|
39
|
+
CONNECTIONS[filename] = self.new(filename, value_size, range, update)
|
40
|
+
when (not CONNECTIONS.include?(filename))
|
41
|
+
CONNECTIONS[filename] = self.new(filename, value_size, range, update)
|
42
|
+
end
|
43
|
+
|
44
|
+
CONNECTIONS[filename]
|
45
|
+
end
|
46
|
+
|
34
47
|
def format(pos, value)
|
35
48
|
padding = value_size - value.length
|
36
49
|
if range
|
@@ -82,6 +95,7 @@ class FixWidthTable
|
|
82
95
|
end
|
83
96
|
|
84
97
|
def read
|
98
|
+
return if @filename == :memory
|
85
99
|
@file.close unless @file.closed?
|
86
100
|
@file = File.open(@filename, 'r')
|
87
101
|
end
|
@@ -207,6 +221,7 @@ class FixWidthTable
|
|
207
221
|
end
|
208
222
|
|
209
223
|
def [](pos)
|
224
|
+
return [] if size == 0
|
210
225
|
if range
|
211
226
|
get_range(pos)
|
212
227
|
else
|
data/lib/rbbt/util/misc.rb
CHANGED
@@ -33,13 +33,83 @@ end
|
|
33
33
|
module Misc
|
34
34
|
class FieldNotFoundError < StandardError;end
|
35
35
|
|
36
|
+
def self.intersect_sorted_arrays(a1, a2)
|
37
|
+
e1, e2 = a1.shift, a2.shift
|
38
|
+
intersect = []
|
39
|
+
while true
|
40
|
+
case
|
41
|
+
when (e1 and e2)
|
42
|
+
case e1 <=> e2
|
43
|
+
when 0
|
44
|
+
intersect << e1
|
45
|
+
e1, e2 = a1.shift, a2.shift
|
46
|
+
when -1
|
47
|
+
e1 = a1.shift
|
48
|
+
when 1
|
49
|
+
e2 = a2.shift
|
50
|
+
end
|
51
|
+
else
|
52
|
+
break
|
53
|
+
end
|
54
|
+
end
|
55
|
+
intersect
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.merge_sorted_arrays(a1, a2)
|
59
|
+
e1, e2 = a1.shift, a2.shift
|
60
|
+
new = []
|
61
|
+
while true
|
62
|
+
case
|
63
|
+
when (e1 and e2)
|
64
|
+
case e1 <=> e2
|
65
|
+
when 0
|
66
|
+
new << e1
|
67
|
+
e1, e2 = a1.shift, a2.shift
|
68
|
+
when -1
|
69
|
+
new << e1
|
70
|
+
e1 = a1.shift
|
71
|
+
when 1
|
72
|
+
new << e2
|
73
|
+
e2 = a2.shift
|
74
|
+
end
|
75
|
+
when e2
|
76
|
+
new << e2
|
77
|
+
new.concat a2
|
78
|
+
break
|
79
|
+
when e1
|
80
|
+
new << e1
|
81
|
+
new.concat a1
|
82
|
+
break
|
83
|
+
else
|
84
|
+
break
|
85
|
+
end
|
86
|
+
end
|
87
|
+
new
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.digest(text)
|
91
|
+
Digest::MD5.hexdigest(text)
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.add_method(object, method_name, &block)
|
95
|
+
class << object
|
96
|
+
self
|
97
|
+
end.send :define_method, method_name, block
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.redefine_method(object, old_method, new_method_name, &block)
|
101
|
+
metaclass = class << object; self end
|
102
|
+
metaclass.send :alias_method, new_method_name, old_method
|
103
|
+
metaclass.send :define_method, old_method, &block
|
104
|
+
end
|
105
|
+
|
36
106
|
def self.filename?(filename)
|
37
107
|
String === filename and filename.length < 1024 and filename.index("\n").nil? and File.exists? filename
|
38
108
|
end
|
39
109
|
|
40
110
|
def self.lock(file, *args)
|
41
111
|
FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
|
42
|
-
lockfile = Lockfile.new
|
112
|
+
lockfile = Lockfile.new(file + '.lock')
|
43
113
|
lockfile.lock do
|
44
114
|
yield file, *args
|
45
115
|
end
|
@@ -104,6 +174,24 @@ module Misc
|
|
104
174
|
counts
|
105
175
|
end
|
106
176
|
|
177
|
+
def self.benchmark(repeats = 1)
|
178
|
+
require 'benchmark'
|
179
|
+
res = nil
|
180
|
+
begin
|
181
|
+
measure = Benchmark.measure do
|
182
|
+
repeats.times do
|
183
|
+
res = yield
|
184
|
+
end
|
185
|
+
end
|
186
|
+
puts "Benchmark for #{ repeats } repeats"
|
187
|
+
puts measure
|
188
|
+
rescue Exception
|
189
|
+
puts "Benchmark aborted"
|
190
|
+
raise $!
|
191
|
+
end
|
192
|
+
res
|
193
|
+
end
|
194
|
+
|
107
195
|
def self.profile
|
108
196
|
require 'ruby-prof'
|
109
197
|
RubyProf.start
|
@@ -279,7 +367,8 @@ module Misc
|
|
279
367
|
# Divides the array into +num+ chunks of the same size by placing one
|
280
368
|
# element in each chunk iteratively.
|
281
369
|
def self.divide(array, num)
|
282
|
-
chunks = [
|
370
|
+
chunks = []
|
371
|
+
num.times do chunks << [] end
|
283
372
|
array.each_with_index{|e, i|
|
284
373
|
c = i % num
|
285
374
|
chunks[c] << e
|
@@ -287,6 +376,15 @@ module Misc
|
|
287
376
|
chunks
|
288
377
|
end
|
289
378
|
|
379
|
+
|
380
|
+
|
381
|
+
def self.process_to_hash(list)
|
382
|
+
result = yield list
|
383
|
+
hash = {}
|
384
|
+
list.zip(result).each do |k,v| hash[k] = v end
|
385
|
+
hash
|
386
|
+
end
|
387
|
+
|
290
388
|
IUPAC2BASE = {
|
291
389
|
"A" => ["A"],
|
292
390
|
"C" => ["C"],
|
@@ -324,9 +422,9 @@ module PDF2Text
|
|
324
422
|
require 'rbbt/util/cmd'
|
325
423
|
require 'rbbt/util/tmpfile'
|
326
424
|
require 'rbbt/util/open'
|
327
|
-
|
328
|
-
|
329
|
-
|
425
|
+
|
426
|
+
|
427
|
+
CMD.cmd("pdftotext - -", :in => Open.open(filename, :nocache => true), :pipe => true, :stderr => true)
|
330
428
|
end
|
331
429
|
end
|
332
430
|
|
@@ -381,7 +479,7 @@ class NamedArray < Array
|
|
381
479
|
def zip_fields
|
382
480
|
return [] if self.empty?
|
383
481
|
zipped = self[0].zip(*self[1..-1])
|
384
|
-
zipped = zipped.collect{|v| NamedArray.name(v, fields)}
|
482
|
+
zipped = zipped.collect{|v| NamedArray.name(v, fields)}
|
385
483
|
zipped
|
386
484
|
end
|
387
485
|
|
@@ -400,6 +498,7 @@ class NamedArray < Array
|
|
400
498
|
"* #{ field }: #{ Array === value ? value * "|" : value }"
|
401
499
|
end * "\n"
|
402
500
|
end
|
501
|
+
|
403
502
|
end
|
404
503
|
|
405
504
|
def benchmark(bench = true)
|
@@ -430,3 +529,4 @@ def profile(prof = true)
|
|
430
529
|
yield
|
431
530
|
end
|
432
531
|
end
|
532
|
+
|