rbbt-util 3.1.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/rbbt_query.rb +1 -1
- data/lib/rbbt/util/cmd.rb +115 -67
- data/lib/rbbt/util/fix_width_table.rb +18 -3
- data/lib/rbbt/util/misc.rb +106 -6
- data/lib/rbbt/util/open.rb +9 -7
- data/lib/rbbt/util/persistence.rb +17 -14
- data/lib/rbbt/util/resource.rb +10 -3
- data/lib/rbbt/util/task.rb +2 -2
- data/lib/rbbt/util/task/job.rb +16 -3
- data/lib/rbbt/util/tc_hash.rb +64 -27
- data/lib/rbbt/util/tsv.rb +44 -21
- data/lib/rbbt/util/tsv/accessor.rb +8 -6
- data/lib/rbbt/util/tsv/attach.rb +19 -28
- data/lib/rbbt/util/tsv/filters.rb +193 -0
- data/lib/rbbt/util/tsv/index.rb +80 -8
- data/lib/rbbt/util/tsv/manipulate.rb +17 -6
- data/lib/rbbt/util/tsv/misc.rb +10 -0
- data/lib/rbbt/util/tsv/parse.rb +18 -1
- data/lib/rbbt/util/workflow.rb +12 -3
- data/lib/rbbt/util/workflow/soap.rb +0 -1
- data/share/install/software/lib/install_helpers +0 -2
- data/share/lib/R/util.R +3 -3
- data/test/rbbt/util/test_cmd.rb +23 -0
- data/test/rbbt/util/test_excel2tsv.rb +1 -1
- data/test/rbbt/util/test_misc.rb +41 -11
- data/test/rbbt/util/test_open.rb +2 -2
- data/test/rbbt/util/test_persistence.rb +2 -2
- data/test/rbbt/util/test_resource.rb +4 -20
- data/test/rbbt/util/test_tc_hash.rb +38 -0
- data/test/rbbt/util/test_tmpfile.rb +1 -1
- data/test/rbbt/util/test_tsv.rb +6 -0
- data/test/rbbt/util/test_workflow.rb +14 -10
- data/test/rbbt/util/tsv/test_accessor.rb +42 -0
- data/test/rbbt/util/tsv/test_filters.rb +141 -0
- data/test/rbbt/util/tsv/test_index.rb +32 -0
- data/test/rbbt/util/tsv/test_manipulate.rb +18 -0
- data/test/test_helper.rb +3 -1
- metadata +41 -38
data/bin/rbbt_query.rb
CHANGED
@@ -9,7 +9,7 @@ options = SOPT.get("-i--identifiers*:-f--format*:-o--organism*:-p--persistence:-
|
|
9
9
|
file = ARGV[0]
|
10
10
|
|
11
11
|
if not File.exists? file
|
12
|
-
base, path = file.match(/(
|
12
|
+
base, path = file.match(/([^.]*)\.(.*)/).values_at 1, 2
|
13
13
|
require 'rbbt/sources/' << base.to_s.downcase
|
14
14
|
klass = Misc.string2const base
|
15
15
|
file = klass[path].find
|
data/lib/rbbt/util/cmd.rb
CHANGED
@@ -3,54 +3,74 @@ require 'rbbt/util/log'
|
|
3
3
|
require 'stringio'
|
4
4
|
|
5
5
|
module CMD
|
6
|
-
class CMDError < RBBTError; end
|
7
6
|
|
7
|
+
class CMDError < RBBTError; end
|
8
8
|
module SmartIO
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
Log.debug "Raising exception"
|
24
|
-
exception = CMDError.new "Command [#{@pid}] #{@cmd} failed with error status #{$?.exitstatus}"
|
25
|
-
raise exception
|
26
|
-
end
|
9
|
+
attr_accessor :pid, :cmd, :post, :in, :out, :err
|
10
|
+
def self.tie(io, pid = nil, cmd = "", post = nil, sin = nil, out = nil, err = nil)
|
11
|
+
io.extend SmartIO
|
12
|
+
io.pid = pid
|
13
|
+
io.cmd = cmd
|
14
|
+
io.in = sin
|
15
|
+
io.out = out
|
16
|
+
io.err = err
|
17
|
+
io.post = post
|
18
|
+
|
19
|
+
io.class.send(:alias_method, :original_close, :close)
|
20
|
+
io.class.send(:alias_method, :original_read, :read)
|
21
|
+
io
|
22
|
+
end
|
27
23
|
|
28
|
-
|
29
|
-
|
24
|
+
def wait_and_status
|
25
|
+
if @pid
|
26
|
+
begin
|
27
|
+
Process.waitpid(@pid)
|
28
|
+
rescue
|
30
29
|
end
|
31
30
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
end
|
38
|
-
@post.call if @post
|
31
|
+
Log.debug "Process #{ cmd } succeded" if $? and $?.success?
|
32
|
+
|
33
|
+
if $? and not $?.success?
|
34
|
+
Log.debug "Raising exception"
|
35
|
+
exception = CMDError.new "Command [#{@pid}] #{@cmd} failed with error status #{$?.exitstatus}"
|
39
36
|
original_close
|
37
|
+
raise exception
|
40
38
|
end
|
41
|
-
|
42
|
-
|
43
|
-
def read
|
44
|
-
data = Misc.fixutf8(original_read)
|
45
|
-
self.close unless self.closed?
|
46
|
-
data
|
47
|
-
end
|
39
|
+
end
|
40
|
+
end
|
48
41
|
|
49
|
-
|
50
|
-
|
42
|
+
def close
|
43
|
+
self.original_read unless self.eof?
|
44
|
+
|
45
|
+
wait_and_status
|
46
|
+
|
47
|
+
@post.call if @post
|
48
|
+
|
49
|
+
original_close unless self.closed?
|
50
|
+
end
|
51
|
+
|
52
|
+
def force_close
|
53
|
+
if @pid
|
54
|
+
Log.debug "Forcing close by killing '#{@pid}'"
|
55
|
+
Process.kill("KILL", @pid)
|
56
|
+
Process.waitpid(@pid)
|
57
|
+
end
|
58
|
+
|
59
|
+
@post.call if @post
|
60
|
+
|
61
|
+
original_close
|
62
|
+
end
|
63
|
+
|
64
|
+
def read(*args)
|
65
|
+
data = original_read(*args)
|
66
|
+
|
67
|
+
self.close if self.eof?
|
68
|
+
|
69
|
+
data
|
51
70
|
end
|
52
71
|
|
53
|
-
|
72
|
+
end
|
73
|
+
|
54
74
|
|
55
75
|
def self.process_cmd_options(options = {})
|
56
76
|
string = ""
|
@@ -91,79 +111,107 @@ module CMD
|
|
91
111
|
cmd << " " << cmd_options
|
92
112
|
end
|
93
113
|
|
94
|
-
|
95
|
-
|
96
|
-
case
|
97
|
-
when (false and (IO === in_content and not StringIO === in_content))
|
98
|
-
sin = [in_content, nil]
|
99
|
-
else
|
100
|
-
sin = IO.pipe
|
101
|
-
end
|
114
|
+
in_content = StringIO.new in_content if String === in_content
|
102
115
|
|
116
|
+
sout, serr, sin = IO.pipe, IO.pipe, IO.pipe
|
103
117
|
|
104
118
|
pid = fork {
|
105
119
|
begin
|
120
|
+
sin.last.close
|
121
|
+
sout.first.close
|
122
|
+
serr.first.close
|
123
|
+
|
124
|
+
io = in_content
|
125
|
+
while IO === io
|
126
|
+
if SmartIO === io
|
127
|
+
io.original_close unless io.closed?
|
128
|
+
io.out.close unless io.out.nil? or io.out.closed?
|
129
|
+
io.err.close unless io.err.nil? or io.err.closed?
|
130
|
+
io = io.in
|
131
|
+
else
|
132
|
+
io.close unless io.closed?
|
133
|
+
io = nil
|
134
|
+
end
|
135
|
+
end
|
106
136
|
|
107
|
-
sin.last.close if sin.last
|
108
137
|
STDIN.reopen sin.first
|
109
138
|
sin.first.close
|
110
139
|
|
111
|
-
serr.first.close
|
112
140
|
STDERR.reopen serr.last
|
113
141
|
serr.last.close
|
114
142
|
|
115
|
-
sout.first.close
|
116
143
|
STDOUT.reopen sout.last
|
117
144
|
sout.last.close
|
118
145
|
|
119
146
|
STDOUT.sync = STDERR.sync = true
|
147
|
+
|
120
148
|
exec(cmd)
|
149
|
+
|
150
|
+
exit(-1)
|
121
151
|
rescue Exception
|
152
|
+
Log.debug("CMDError: #{$!.message}")
|
153
|
+
ddd $!.backtrace
|
122
154
|
raise CMDError, $!.message
|
123
155
|
end
|
124
156
|
}
|
157
|
+
|
125
158
|
sin.first.close
|
126
159
|
sout.last.close
|
127
160
|
serr.last.close
|
128
161
|
|
162
|
+
sin = sin.last
|
163
|
+
sout = sout.first
|
164
|
+
serr = serr.first
|
165
|
+
|
129
166
|
|
130
167
|
Log.debug "CMD: [#{pid}] #{cmd}"
|
131
168
|
|
132
|
-
|
133
|
-
when String === in_content
|
134
|
-
sin.last.write in_content
|
135
|
-
sin.last.close
|
136
|
-
when in_content.respond_to?(:gets)
|
169
|
+
if in_content.respond_to?(:read)
|
137
170
|
Thread.new do
|
138
|
-
|
139
|
-
|
171
|
+
begin
|
172
|
+
loop do
|
173
|
+
break if in_content.closed?
|
174
|
+
block = in_content.read 1024
|
175
|
+
break if block.nil? or block.empty?
|
176
|
+
sin.write block
|
177
|
+
end
|
178
|
+
|
179
|
+
sin.close unless sin.closed?
|
180
|
+
in_content.close unless in_content.closed?
|
181
|
+
rescue
|
182
|
+
Process.kill "INT", pid
|
183
|
+
raise $!
|
140
184
|
end
|
141
|
-
sin.last.close
|
142
185
|
end
|
186
|
+
else
|
187
|
+
sin.close
|
143
188
|
end
|
144
189
|
|
145
190
|
if pipe
|
146
191
|
Thread.new do
|
147
|
-
while
|
148
|
-
Log.log
|
192
|
+
while line = serr.gets
|
193
|
+
Log.log line, stderr if Integer === stderr
|
149
194
|
end
|
150
|
-
serr.
|
195
|
+
serr.close
|
196
|
+
Thread.exit
|
151
197
|
end
|
152
198
|
|
153
|
-
SmartIO.tie sout
|
154
|
-
sout.first
|
199
|
+
SmartIO.tie sout, pid, cmd, post, in_content, sin, serr
|
155
200
|
|
201
|
+
sout
|
156
202
|
else
|
157
203
|
err = ""
|
158
204
|
Thread.new do
|
159
|
-
while
|
160
|
-
err <<
|
205
|
+
while not serr.eof?
|
206
|
+
err << serr.gets if Integer === stderr
|
161
207
|
end
|
162
|
-
serr.
|
208
|
+
serr.close
|
209
|
+
Thread.exit
|
163
210
|
end
|
164
211
|
|
165
|
-
out = StringIO.new sout.
|
166
|
-
|
212
|
+
out = StringIO.new sout.read
|
213
|
+
sout.close unless sout.closed?
|
214
|
+
SmartIO.tie out, pid, cmd, post, in_content, sin, serr
|
167
215
|
|
168
216
|
Process.waitpid pid
|
169
217
|
|
@@ -1,17 +1,16 @@
|
|
1
|
-
|
2
1
|
class FixWidthTable
|
3
2
|
|
4
3
|
attr_accessor :filename, :file, :value_size, :record_size, :range, :size
|
5
4
|
def initialize(filename, value_size = nil, range = nil, update = false)
|
6
5
|
@filename = filename
|
7
6
|
|
8
|
-
if update or %(
|
7
|
+
if update or %w(memory stringio).include?(filename.to_s.downcase) or not File.exists? filename
|
9
8
|
Log.debug "Writing FixWidthTable at #{ @filename.inspect }"
|
10
9
|
@value_size = value_size
|
11
10
|
@range = range
|
12
11
|
@record_size = @value_size + (@range ? 12 : 4)
|
13
12
|
|
14
|
-
if %(memory stringio).include? filename.to_s.downcase
|
13
|
+
if %w(memory stringio).include? filename.to_s.downcase
|
15
14
|
@file = StringIO.new
|
16
15
|
else
|
17
16
|
FileUtils.rm @filename if File.exists? @filename
|
@@ -31,6 +30,20 @@ class FixWidthTable
|
|
31
30
|
end
|
32
31
|
end
|
33
32
|
|
33
|
+
|
34
|
+
CONNECTIONS = {} unless defined? CONNECTIONS
|
35
|
+
def self.get(filename, value_size = nil, range = nil, update = false)
|
36
|
+
return self.new(filename, value_size, range, update) if filename == :memory
|
37
|
+
case
|
38
|
+
when (!File.exists?(filename) or update)
|
39
|
+
CONNECTIONS[filename] = self.new(filename, value_size, range, update)
|
40
|
+
when (not CONNECTIONS.include?(filename))
|
41
|
+
CONNECTIONS[filename] = self.new(filename, value_size, range, update)
|
42
|
+
end
|
43
|
+
|
44
|
+
CONNECTIONS[filename]
|
45
|
+
end
|
46
|
+
|
34
47
|
def format(pos, value)
|
35
48
|
padding = value_size - value.length
|
36
49
|
if range
|
@@ -82,6 +95,7 @@ class FixWidthTable
|
|
82
95
|
end
|
83
96
|
|
84
97
|
def read
|
98
|
+
return if @filename == :memory
|
85
99
|
@file.close unless @file.closed?
|
86
100
|
@file = File.open(@filename, 'r')
|
87
101
|
end
|
@@ -207,6 +221,7 @@ class FixWidthTable
|
|
207
221
|
end
|
208
222
|
|
209
223
|
def [](pos)
|
224
|
+
return [] if size == 0
|
210
225
|
if range
|
211
226
|
get_range(pos)
|
212
227
|
else
|
data/lib/rbbt/util/misc.rb
CHANGED
@@ -33,13 +33,83 @@ end
|
|
33
33
|
module Misc
|
34
34
|
class FieldNotFoundError < StandardError;end
|
35
35
|
|
36
|
+
def self.intersect_sorted_arrays(a1, a2)
|
37
|
+
e1, e2 = a1.shift, a2.shift
|
38
|
+
intersect = []
|
39
|
+
while true
|
40
|
+
case
|
41
|
+
when (e1 and e2)
|
42
|
+
case e1 <=> e2
|
43
|
+
when 0
|
44
|
+
intersect << e1
|
45
|
+
e1, e2 = a1.shift, a2.shift
|
46
|
+
when -1
|
47
|
+
e1 = a1.shift
|
48
|
+
when 1
|
49
|
+
e2 = a2.shift
|
50
|
+
end
|
51
|
+
else
|
52
|
+
break
|
53
|
+
end
|
54
|
+
end
|
55
|
+
intersect
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.merge_sorted_arrays(a1, a2)
|
59
|
+
e1, e2 = a1.shift, a2.shift
|
60
|
+
new = []
|
61
|
+
while true
|
62
|
+
case
|
63
|
+
when (e1 and e2)
|
64
|
+
case e1 <=> e2
|
65
|
+
when 0
|
66
|
+
new << e1
|
67
|
+
e1, e2 = a1.shift, a2.shift
|
68
|
+
when -1
|
69
|
+
new << e1
|
70
|
+
e1 = a1.shift
|
71
|
+
when 1
|
72
|
+
new << e2
|
73
|
+
e2 = a2.shift
|
74
|
+
end
|
75
|
+
when e2
|
76
|
+
new << e2
|
77
|
+
new.concat a2
|
78
|
+
break
|
79
|
+
when e1
|
80
|
+
new << e1
|
81
|
+
new.concat a1
|
82
|
+
break
|
83
|
+
else
|
84
|
+
break
|
85
|
+
end
|
86
|
+
end
|
87
|
+
new
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.digest(text)
|
91
|
+
Digest::MD5.hexdigest(text)
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.add_method(object, method_name, &block)
|
95
|
+
class << object
|
96
|
+
self
|
97
|
+
end.send :define_method, method_name, block
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.redefine_method(object, old_method, new_method_name, &block)
|
101
|
+
metaclass = class << object; self end
|
102
|
+
metaclass.send :alias_method, new_method_name, old_method
|
103
|
+
metaclass.send :define_method, old_method, &block
|
104
|
+
end
|
105
|
+
|
36
106
|
def self.filename?(filename)
|
37
107
|
String === filename and filename.length < 1024 and filename.index("\n").nil? and File.exists? filename
|
38
108
|
end
|
39
109
|
|
40
110
|
def self.lock(file, *args)
|
41
111
|
FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
|
42
|
-
lockfile = Lockfile.new
|
112
|
+
lockfile = Lockfile.new(file + '.lock')
|
43
113
|
lockfile.lock do
|
44
114
|
yield file, *args
|
45
115
|
end
|
@@ -104,6 +174,24 @@ module Misc
|
|
104
174
|
counts
|
105
175
|
end
|
106
176
|
|
177
|
+
def self.benchmark(repeats = 1)
|
178
|
+
require 'benchmark'
|
179
|
+
res = nil
|
180
|
+
begin
|
181
|
+
measure = Benchmark.measure do
|
182
|
+
repeats.times do
|
183
|
+
res = yield
|
184
|
+
end
|
185
|
+
end
|
186
|
+
puts "Benchmark for #{ repeats } repeats"
|
187
|
+
puts measure
|
188
|
+
rescue Exception
|
189
|
+
puts "Benchmark aborted"
|
190
|
+
raise $!
|
191
|
+
end
|
192
|
+
res
|
193
|
+
end
|
194
|
+
|
107
195
|
def self.profile
|
108
196
|
require 'ruby-prof'
|
109
197
|
RubyProf.start
|
@@ -279,7 +367,8 @@ module Misc
|
|
279
367
|
# Divides the array into +num+ chunks of the same size by placing one
|
280
368
|
# element in each chunk iteratively.
|
281
369
|
def self.divide(array, num)
|
282
|
-
chunks = [
|
370
|
+
chunks = []
|
371
|
+
num.times do chunks << [] end
|
283
372
|
array.each_with_index{|e, i|
|
284
373
|
c = i % num
|
285
374
|
chunks[c] << e
|
@@ -287,6 +376,15 @@ module Misc
|
|
287
376
|
chunks
|
288
377
|
end
|
289
378
|
|
379
|
+
|
380
|
+
|
381
|
+
def self.process_to_hash(list)
|
382
|
+
result = yield list
|
383
|
+
hash = {}
|
384
|
+
list.zip(result).each do |k,v| hash[k] = v end
|
385
|
+
hash
|
386
|
+
end
|
387
|
+
|
290
388
|
IUPAC2BASE = {
|
291
389
|
"A" => ["A"],
|
292
390
|
"C" => ["C"],
|
@@ -324,9 +422,9 @@ module PDF2Text
|
|
324
422
|
require 'rbbt/util/cmd'
|
325
423
|
require 'rbbt/util/tmpfile'
|
326
424
|
require 'rbbt/util/open'
|
327
|
-
|
328
|
-
|
329
|
-
|
425
|
+
|
426
|
+
|
427
|
+
CMD.cmd("pdftotext - -", :in => Open.open(filename, :nocache => true), :pipe => true, :stderr => true)
|
330
428
|
end
|
331
429
|
end
|
332
430
|
|
@@ -381,7 +479,7 @@ class NamedArray < Array
|
|
381
479
|
def zip_fields
|
382
480
|
return [] if self.empty?
|
383
481
|
zipped = self[0].zip(*self[1..-1])
|
384
|
-
zipped = zipped.collect{|v| NamedArray.name(v, fields)}
|
482
|
+
zipped = zipped.collect{|v| NamedArray.name(v, fields)}
|
385
483
|
zipped
|
386
484
|
end
|
387
485
|
|
@@ -400,6 +498,7 @@ class NamedArray < Array
|
|
400
498
|
"* #{ field }: #{ Array === value ? value * "|" : value }"
|
401
499
|
end * "\n"
|
402
500
|
end
|
501
|
+
|
403
502
|
end
|
404
503
|
|
405
504
|
def benchmark(bench = true)
|
@@ -430,3 +529,4 @@ def profile(prof = true)
|
|
430
529
|
yield
|
431
530
|
end
|
432
531
|
end
|
532
|
+
|