rbbt-util 3.2.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +65 -0
- data/bin/run_workflow.rb +142 -69
- data/lib/rbbt-util.rb +3 -3
- data/lib/rbbt.rb +12 -3
- data/lib/rbbt/annotations.rb +215 -0
- data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
- data/lib/rbbt/persist.rb +164 -0
- data/lib/rbbt/persist/tsv.rb +135 -0
- data/lib/rbbt/resource.rb +100 -0
- data/lib/rbbt/resource/path.rb +180 -0
- data/lib/rbbt/resource/rake.rb +48 -0
- data/lib/rbbt/resource/util.rb +111 -0
- data/lib/rbbt/resource/with_key.rb +28 -0
- data/lib/rbbt/tsv.rb +134 -0
- data/lib/rbbt/tsv/accessor.rb +345 -0
- data/lib/rbbt/tsv/attach.rb +183 -0
- data/lib/rbbt/tsv/attach/util.rb +277 -0
- data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
- data/lib/rbbt/tsv/index.rb +453 -0
- data/lib/rbbt/tsv/manipulate.rb +361 -0
- data/lib/rbbt/tsv/parser.rb +231 -0
- data/lib/rbbt/tsv/serializers.rb +79 -0
- data/lib/rbbt/tsv/util.rb +67 -0
- data/lib/rbbt/util/R.rb +3 -3
- data/lib/rbbt/util/chain_methods.rb +64 -0
- data/lib/rbbt/util/cmd.rb +17 -13
- data/lib/rbbt/util/excel2tsv.rb +4 -3
- data/lib/rbbt/util/log.rb +1 -0
- data/lib/rbbt/util/misc.rb +296 -285
- data/lib/rbbt/util/open.rb +9 -2
- data/lib/rbbt/util/persistence.rb +1 -1
- data/lib/rbbt/util/task/job.rb +3 -1
- data/lib/rbbt/workflow.rb +193 -0
- data/lib/rbbt/workflow/accessor.rb +249 -0
- data/lib/rbbt/workflow/annotate.rb +60 -0
- data/lib/rbbt/workflow/soap.rb +100 -0
- data/lib/rbbt/workflow/step.rb +102 -0
- data/lib/rbbt/workflow/task.rb +76 -0
- data/test/rbbt/resource/test_path.rb +12 -0
- data/test/rbbt/test_annotations.rb +106 -0
- data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
- data/test/rbbt/test_resource.rb +66 -0
- data/test/rbbt/test_tsv.rb +332 -0
- data/test/rbbt/test_workflow.rb +102 -0
- data/test/rbbt/tsv/test_accessor.rb +163 -0
- data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
- data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
- data/test/rbbt/tsv/test_index.rb +284 -0
- data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
- data/test/rbbt/util/test_R.rb +1 -1
- data/test/rbbt/util/test_chain_methods.rb +22 -0
- data/test/rbbt/util/test_filecache.rb +0 -1
- data/test/rbbt/util/test_misc.rb +97 -79
- data/test/rbbt/util/test_open.rb +1 -0
- data/test/rbbt/util/test_tmpfile.rb +1 -1
- data/test/rbbt/workflow/test_soap.rb +103 -0
- data/test/rbbt/workflow/test_step.rb +142 -0
- data/test/rbbt/workflow/test_task.rb +84 -0
- data/test/test_helper.rb +7 -7
- metadata +80 -54
- data/lib/rbbt/util/rake.rb +0 -176
- data/lib/rbbt/util/resource.rb +0 -355
- data/lib/rbbt/util/task.rb +0 -183
- data/lib/rbbt/util/tc_hash.rb +0 -324
- data/lib/rbbt/util/tsv.rb +0 -236
- data/lib/rbbt/util/tsv/accessor.rb +0 -312
- data/lib/rbbt/util/tsv/attach.rb +0 -416
- data/lib/rbbt/util/tsv/index.rb +0 -419
- data/lib/rbbt/util/tsv/manipulate.rb +0 -300
- data/lib/rbbt/util/tsv/misc.rb +0 -41
- data/lib/rbbt/util/tsv/parse.rb +0 -324
- data/lib/rbbt/util/tsv/resource.rb +0 -88
- data/lib/rbbt/util/workflow.rb +0 -135
- data/lib/rbbt/util/workflow/soap.rb +0 -116
- data/test/rbbt/util/test_persistence.rb +0 -201
- data/test/rbbt/util/test_rake.rb +0 -54
- data/test/rbbt/util/test_resource.rb +0 -77
- data/test/rbbt/util/test_task.rb +0 -133
- data/test/rbbt/util/test_tc_hash.rb +0 -144
- data/test/rbbt/util/test_tsv.rb +0 -221
- data/test/rbbt/util/test_workflow.rb +0 -135
- data/test/rbbt/util/tsv/test_accessor.rb +0 -150
- data/test/rbbt/util/tsv/test_index.rb +0 -241
- data/test/rbbt/util/tsv/test_parse.rb +0 -87
- data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -0,0 +1,79 @@
|
|
1
|
+
module TSV
|
2
|
+
class IntegerSerializer
|
3
|
+
def self.dump(i); [i].pack("l"); end
|
4
|
+
def self.load(str); str.unpack("l").first; end
|
5
|
+
end
|
6
|
+
|
7
|
+
class FloatSerializer
|
8
|
+
def self.dump(i); [i].pack("d"); end
|
9
|
+
def self.load(str); str.unpack("d").first; end
|
10
|
+
end
|
11
|
+
|
12
|
+
class IntegerArraySerializer
|
13
|
+
def self.dump(a); a.pack("l*"); end
|
14
|
+
def self.load(str); str.unpack("l*"); end
|
15
|
+
end
|
16
|
+
|
17
|
+
class StringSerializer
|
18
|
+
def self.dump(str); str.to_s; end
|
19
|
+
def self.load(str); str; end
|
20
|
+
end
|
21
|
+
|
22
|
+
class StringArraySerializer
|
23
|
+
def self.dump(array)
|
24
|
+
array.collect{|a| a.to_s} * "\t"
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.load(string)
|
28
|
+
return [] if string.nil?
|
29
|
+
string.split("\t", -1)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class StringDoubleArraySerializer
|
34
|
+
def self.dump(array)
|
35
|
+
array.collect{|a| a.collect{|a| a.to_s} * "|"} * "\t"
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.load(string)
|
39
|
+
return [] if string.nil?
|
40
|
+
string.split("\t", -1).collect{|l| l.split("|", -1)}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class TSVMarshalSerializer
|
45
|
+
def self.dump(tsv)
|
46
|
+
Marshal.dump(tsv.dup)
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.load(string)
|
50
|
+
TSV.setup Marshal.load(string)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
class TSVSerializer
|
56
|
+
def self.dump(tsv)
|
57
|
+
tsv.to_s
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.load(string)
|
61
|
+
TSV.open StringIO.new(string)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
SERIALIZER_ALIAS = {
|
66
|
+
:integer => IntegerSerializer,
|
67
|
+
:float => FloatSerializer,
|
68
|
+
:integer_array => IntegerArraySerializer,
|
69
|
+
:marshal => Marshal,
|
70
|
+
:single => StringSerializer,
|
71
|
+
:string => StringSerializer,
|
72
|
+
:list => StringArraySerializer,
|
73
|
+
:flat => StringArraySerializer,
|
74
|
+
:double => StringDoubleArraySerializer,
|
75
|
+
:tsv => TSVSerializer,
|
76
|
+
:marshal_tsv => TSVMarshalSerializer
|
77
|
+
}
|
78
|
+
|
79
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'rbbt/resource/path'
|
2
|
+
module TSV
|
3
|
+
|
4
|
+
def self.field_match_counts(file, values)
|
5
|
+
fields = TSV.parse_header(Open.open(file)).all_fields
|
6
|
+
|
7
|
+
counts = {}
|
8
|
+
TmpFile.with_file do |tmpfile|
|
9
|
+
if Array === values
|
10
|
+
Open.write(tmpfile, values * "\n")
|
11
|
+
else
|
12
|
+
FileUtils.ln_s values, tmpfile
|
13
|
+
end
|
14
|
+
|
15
|
+
fields.each_with_index do |field,i|
|
16
|
+
counts[field] = begin
|
17
|
+
CMD.cmd("cat #{ file } |grep -v ^#|cut -f #{i + 1}|tr '|' '\\n' |sort -u |grep [[:alpha:]]|grep -f #{tmpfile} -F -w").read.count("\n")
|
18
|
+
rescue
|
19
|
+
0
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
counts
|
25
|
+
end
|
26
|
+
def self.get_filename(file)
|
27
|
+
case
|
28
|
+
when String === file
|
29
|
+
filename = file
|
30
|
+
when file.respond_to?(:gets)
|
31
|
+
filename = file.filename if file.respond_to? :filename
|
32
|
+
else
|
33
|
+
raise "Cannot get stream from: #{file.inspect}"
|
34
|
+
end
|
35
|
+
filename
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.get_stream(file)
|
39
|
+
case
|
40
|
+
when Path === file
|
41
|
+
file.open
|
42
|
+
when String === file
|
43
|
+
File.open(file)
|
44
|
+
when file.respond_to?(:gets)
|
45
|
+
file
|
46
|
+
else
|
47
|
+
raise "Cannot get stream from: #{file.inspect}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.identify_field(key_field, fields, field)
|
52
|
+
case
|
53
|
+
when Integer === field
|
54
|
+
field
|
55
|
+
when (field.nil? or field == :key or key_field == field)
|
56
|
+
:key
|
57
|
+
when String === field
|
58
|
+
fields.index field
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def identify_field(field)
|
63
|
+
TSV.identify_field(key_field, fields, field)
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
end
|
data/lib/rbbt/util/R.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'rbbt/util/cmd'
|
2
|
-
require 'rbbt/
|
2
|
+
require 'rbbt/tsv'
|
3
3
|
|
4
4
|
module R
|
5
5
|
|
@@ -24,7 +24,7 @@ module R
|
|
24
24
|
|
25
25
|
end
|
26
26
|
|
27
|
-
|
27
|
+
module TSV
|
28
28
|
def R(script, open_options = {})
|
29
29
|
TmpFile.with_file do |f|
|
30
30
|
Open.write(f, self.to_s)
|
@@ -36,7 +36,7 @@ rbbt.tsv.write('#{f}', data);
|
|
36
36
|
EOF
|
37
37
|
).read)
|
38
38
|
open_options = Misc.add_defaults open_options, :type => :list
|
39
|
-
TSV.
|
39
|
+
TSV.open(f, open_options)
|
40
40
|
end
|
41
41
|
end
|
42
42
|
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'rbbt/util/log'
|
2
|
+
|
3
|
+
module ChainMethods
|
4
|
+
def self.extended(base)
|
5
|
+
if not base.respond_to? :chain_prefix
|
6
|
+
metaclass = class << base
|
7
|
+
attr_accessor :chain_prefix, :chained_methods
|
8
|
+
|
9
|
+
def chained_methods
|
10
|
+
@chained_methods ||= instance_methods.select{|method| method =~ /^#{chain_prefix}/}
|
11
|
+
end
|
12
|
+
self
|
13
|
+
end
|
14
|
+
|
15
|
+
metaclass.module_eval do
|
16
|
+
def setup_chain(object)
|
17
|
+
object.extend self
|
18
|
+
end
|
19
|
+
|
20
|
+
def setup_chains(base)
|
21
|
+
raise "No prefix specified for #{self.to_s}" if self.chain_prefix.nil? or (String === self.chain_prefix and self.chain_prefix.empty?)
|
22
|
+
#methods = self.instance_methods.select{|method| method =~ /^#{self.chain_prefix}/}
|
23
|
+
methods = self.chained_methods
|
24
|
+
|
25
|
+
return if methods.empty?
|
26
|
+
|
27
|
+
prefix = self.chain_prefix
|
28
|
+
|
29
|
+
new_method = methods.first
|
30
|
+
original = new_method.sub(prefix.to_s + '_', '')
|
31
|
+
first_clean_method = prefix.to_s + '_clean_' + original
|
32
|
+
|
33
|
+
if not base.respond_to? first_clean_method
|
34
|
+
class << base; self; end.module_eval do
|
35
|
+
methods.each do |new_method|
|
36
|
+
original = new_method.sub(prefix.to_s + '_', '')
|
37
|
+
clean_method = prefix.to_s + '_clean_' + original
|
38
|
+
|
39
|
+
original = "[]" if original == "get_brackets"
|
40
|
+
original = "[]=" if original == "set_brackets"
|
41
|
+
|
42
|
+
begin
|
43
|
+
alias_method clean_method, original
|
44
|
+
rescue
|
45
|
+
end
|
46
|
+
alias_method original, new_method
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
if not metaclass.respond_to? :extended
|
54
|
+
metaclass.module_eval do
|
55
|
+
def extended(base)
|
56
|
+
setup_chains(base)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
base.chain_prefix = base.to_s.downcase.to_sym
|
63
|
+
end
|
64
|
+
end
|
data/lib/rbbt/util/cmd.rb
CHANGED
@@ -4,10 +4,11 @@ require 'stringio'
|
|
4
4
|
|
5
5
|
module CMD
|
6
6
|
|
7
|
-
class CMDError <
|
7
|
+
class CMDError < StandardError; end
|
8
|
+
|
8
9
|
module SmartIO
|
9
|
-
attr_accessor :pid, :cmd, :post, :in, :out, :err
|
10
|
-
def self.tie(io, pid = nil, cmd = "", post = nil, sin = nil, out = nil, err = nil)
|
10
|
+
attr_accessor :pid, :cmd, :post, :in, :out, :err, :log
|
11
|
+
def self.tie(io, pid = nil, cmd = "", post = nil, sin = nil, out = nil, err = nil, log = true)
|
11
12
|
io.extend SmartIO
|
12
13
|
io.pid = pid
|
13
14
|
io.cmd = cmd
|
@@ -15,6 +16,7 @@ module CMD
|
|
15
16
|
io.out = out
|
16
17
|
io.err = err
|
17
18
|
io.post = post
|
19
|
+
io.log = log
|
18
20
|
|
19
21
|
io.class.send(:alias_method, :original_close, :close)
|
20
22
|
io.class.send(:alias_method, :original_read, :read)
|
@@ -28,10 +30,10 @@ module CMD
|
|
28
30
|
rescue
|
29
31
|
end
|
30
32
|
|
31
|
-
Log.debug "Process #{ cmd } succeded" if $? and $?.success?
|
33
|
+
Log.debug "Process #{ cmd } succeded" if $? and $?.success? and log
|
32
34
|
|
33
35
|
if $? and not $?.success?
|
34
|
-
Log.debug "Raising exception"
|
36
|
+
Log.debug "Raising exception" if log
|
35
37
|
exception = CMDError.new "Command [#{@pid}] #{@cmd} failed with error status #{$?.exitstatus}"
|
36
38
|
original_close
|
37
39
|
raise exception
|
@@ -51,7 +53,7 @@ module CMD
|
|
51
53
|
|
52
54
|
def force_close
|
53
55
|
if @pid
|
54
|
-
Log.debug "Forcing close by killing '#{@pid}'"
|
56
|
+
Log.debug "Forcing close by killing '#{@pid}'" if log
|
55
57
|
Process.kill("KILL", @pid)
|
56
58
|
Process.waitpid(@pid)
|
57
59
|
end
|
@@ -98,6 +100,9 @@ module CMD
|
|
98
100
|
stderr = options.delete(:stderr)
|
99
101
|
pipe = options.delete(:pipe)
|
100
102
|
post = options.delete(:post)
|
103
|
+
log = options.delete(:log)
|
104
|
+
|
105
|
+
log = true if log.nil?
|
101
106
|
|
102
107
|
if stderr == true
|
103
108
|
stderr = Log::HIGH
|
@@ -149,8 +154,8 @@ module CMD
|
|
149
154
|
|
150
155
|
exit(-1)
|
151
156
|
rescue Exception
|
152
|
-
Log.debug("CMDError: #{$!.message}")
|
153
|
-
ddd $!.backtrace
|
157
|
+
Log.debug("CMDError: #{$!.message}") if log
|
158
|
+
ddd $!.backtrace if log
|
154
159
|
raise CMDError, $!.message
|
155
160
|
end
|
156
161
|
}
|
@@ -164,7 +169,7 @@ module CMD
|
|
164
169
|
serr = serr.first
|
165
170
|
|
166
171
|
|
167
|
-
Log.debug "CMD: [#{pid}] #{cmd}"
|
172
|
+
Log.debug "CMD: [#{pid}] #{cmd}" if log
|
168
173
|
|
169
174
|
if in_content.respond_to?(:read)
|
170
175
|
Thread.new do
|
@@ -190,7 +195,7 @@ module CMD
|
|
190
195
|
if pipe
|
191
196
|
Thread.new do
|
192
197
|
while line = serr.gets
|
193
|
-
Log.log line, stderr if Integer === stderr
|
198
|
+
Log.log line, stderr if Integer === stderr and log
|
194
199
|
end
|
195
200
|
serr.close
|
196
201
|
Thread.exit
|
@@ -216,11 +221,10 @@ module CMD
|
|
216
221
|
Process.waitpid pid
|
217
222
|
|
218
223
|
if not $?.success?
|
219
|
-
exception = CMDError.new "Command [#{pid}] #{cmd} failed with error status #{$?.exitstatus}"
|
220
|
-
exception.info = err if Integer === stderr and stderr >= Log.severity
|
224
|
+
exception = CMDError.new "Command [#{pid}] #{cmd} failed with error status #{$?.exitstatus}.\n#{err}"
|
221
225
|
raise exception
|
222
226
|
else
|
223
|
-
Log.log err, stderr if Integer === stderr
|
227
|
+
Log.log err, stderr if Integer === stderr and log
|
224
228
|
end
|
225
229
|
|
226
230
|
out
|
data/lib/rbbt/util/excel2tsv.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
require 'spreadsheet'
|
2
|
-
require 'rbbt/
|
2
|
+
require 'rbbt/tsv'
|
3
3
|
require 'rbbt/util/tmpfile'
|
4
|
-
|
4
|
+
|
5
|
+
module TSV
|
5
6
|
def self.excel2tsv(file, options = {})
|
6
7
|
sheet = options.delete :sheet
|
7
8
|
header = options.delete :header
|
@@ -26,7 +27,7 @@ class TSV
|
|
26
27
|
rows.each do |row| f.puts row * "\t" end
|
27
28
|
end
|
28
29
|
|
29
|
-
TSV.
|
30
|
+
TSV.open(filename, options)
|
30
31
|
end
|
31
32
|
end
|
32
33
|
end
|
data/lib/rbbt/util/log.rb
CHANGED
@@ -19,6 +19,7 @@ module Log
|
|
19
19
|
SEVERITY_COLOR = ["0;37m", "0;32m", "0;33m", "0;31m", "1;0m" ].collect{|e| "\033[#{e}"}
|
20
20
|
|
21
21
|
def self.log(message, severity = MEDIUM)
|
22
|
+
message ||= ""
|
22
23
|
severity_color = SEVERITY_COLOR[severity]
|
23
24
|
font_color = {false => "\033[0;37m", true => "\033[0m"}[severity >= INFO]
|
24
25
|
|
data/lib/rbbt/util/misc.rb
CHANGED
@@ -1,46 +1,46 @@
|
|
1
|
-
require 'iconv'
|
2
1
|
require 'lockfile'
|
3
|
-
require '
|
2
|
+
require 'rbbt/util/chain_methods'
|
3
|
+
require 'rbbt/resource/path'
|
4
4
|
|
5
|
-
|
6
|
-
|
5
|
+
module Misc
|
6
|
+
class FieldNotFoundError < StandardError;end
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
8
|
+
|
9
|
+
IUPAC2BASE = {
|
10
|
+
"A" => ["A"],
|
11
|
+
"C" => ["C"],
|
12
|
+
"G" => ["G"],
|
13
|
+
"T" => ["T"],
|
14
|
+
"U" => ["U"],
|
15
|
+
"R" => "A or G".split(" or "),
|
16
|
+
"Y" => "C or T".split(" or "),
|
17
|
+
"S" => "G or C".split(" or "),
|
18
|
+
"W" => "A or T".split(" or "),
|
19
|
+
"K" => "G or T".split(" or "),
|
20
|
+
"M" => "A or C".split(" or "),
|
21
|
+
"B" => "C or G or T".split(" or "),
|
22
|
+
"D" => "A or G or T".split(" or "),
|
23
|
+
"H" => "A or C or T".split(" or "),
|
24
|
+
"V" => "A or C or G".split(" or "),
|
25
|
+
"N" => %w(A C T G),
|
26
|
+
}
|
17
27
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
28
|
+
BASE2COMPLEMENT = {
|
29
|
+
"A" => "T",
|
30
|
+
"C" => "G",
|
31
|
+
"G" => "C",
|
32
|
+
"T" => "A",
|
33
|
+
"U" => "A",
|
34
|
+
}
|
24
35
|
|
25
|
-
def self.
|
26
|
-
|
27
|
-
alias_method :old_get, :[]
|
28
|
-
alias_method :[], :indiferent_get
|
29
|
-
end
|
36
|
+
def self.IUPAC_to_base(iupac)
|
37
|
+
IUPAC2BASE[iupac]
|
30
38
|
end
|
31
|
-
end
|
32
39
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
old_pwd = FileUtils.pwd
|
38
|
-
begin
|
39
|
-
FileUtils.cd dir
|
40
|
-
yield
|
41
|
-
ensure
|
42
|
-
FileUtils.cd old_pwd
|
43
|
-
end
|
40
|
+
def self.is_filename?(string)
|
41
|
+
return true if Path === string
|
42
|
+
return true if String === string and string.length < 265 and File.exists? string
|
43
|
+
return false
|
44
44
|
end
|
45
45
|
|
46
46
|
def self.intersect_sorted_arrays(a1, a2)
|
@@ -97,73 +97,21 @@ module Misc
|
|
97
97
|
new
|
98
98
|
end
|
99
99
|
|
100
|
-
def self.
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
def self.add_method(object, method_name, &block)
|
105
|
-
class << object
|
106
|
-
self
|
107
|
-
end.send :define_method, method_name, block
|
108
|
-
end
|
109
|
-
|
110
|
-
def self.redefine_method(object, old_method, new_method_name, &block)
|
111
|
-
return if object.respond_to? new_method_name
|
112
|
-
metaclass = class << object; self end
|
113
|
-
metaclass.send :alias_method, new_method_name, old_method
|
114
|
-
metaclass.send :define_method, old_method, &block
|
115
|
-
end
|
116
|
-
|
117
|
-
def self.filename?(filename)
|
118
|
-
String === filename and filename.length < 1024 and filename.index("\n").nil? and File.exists? filename
|
119
|
-
end
|
120
|
-
|
121
|
-
def self.lock(file, *args)
|
122
|
-
FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
|
123
|
-
lockfile = Lockfile.new(file + '.lock')
|
124
|
-
lockfile.lock do
|
125
|
-
yield file, *args
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
def self.string2const(string)
|
130
|
-
return nil if string.nil?
|
131
|
-
mod = Kernel
|
132
|
-
|
133
|
-
string.to_s.split('::').each do |str|
|
134
|
-
mod = mod.const_get str
|
135
|
-
end
|
136
|
-
|
137
|
-
mod
|
138
|
-
end
|
139
|
-
|
140
|
-
def self.path_relative_to(path, subdir)
|
141
|
-
File.expand_path(path).sub(/^#{Regexp.quote File.expand_path(subdir)}\/?/,'')
|
142
|
-
end
|
143
|
-
|
144
|
-
def self.in_directory?(file, directory)
|
145
|
-
if File.expand_path(file) =~ /^#{Regexp.quote File.expand_path(directory)}/
|
146
|
-
true
|
147
|
-
else
|
148
|
-
false
|
100
|
+
def self.array2hash(array)
|
101
|
+
hash = {}
|
102
|
+
array.each do |key, value|
|
103
|
+
hash[key] = value
|
149
104
|
end
|
105
|
+
hash
|
150
106
|
end
|
151
107
|
|
152
|
-
def self.
|
153
|
-
|
154
|
-
files = []
|
155
|
-
while in_directory?(path, subdir)
|
156
|
-
path = path.dirname
|
157
|
-
if path[target].exists?
|
158
|
-
files << path[target]
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
162
|
-
files
|
108
|
+
def self.zip2hash(list1, list2)
|
109
|
+
array2hash(list1.zip(list2))
|
163
110
|
end
|
164
111
|
|
165
|
-
def self.
|
166
|
-
|
112
|
+
def self.process_to_hash(list)
|
113
|
+
result = yield list
|
114
|
+
zip2hash(list, result)
|
167
115
|
end
|
168
116
|
|
169
117
|
def self.env_add(var, value, sep = ":", prepend = true)
|
@@ -176,15 +124,6 @@ module Misc
|
|
176
124
|
end
|
177
125
|
end
|
178
126
|
|
179
|
-
def self.count(list)
|
180
|
-
counts = Hash.new 0
|
181
|
-
list.each do |item|
|
182
|
-
counts[item] += 1
|
183
|
-
end
|
184
|
-
|
185
|
-
counts
|
186
|
-
end
|
187
|
-
|
188
127
|
def self.benchmark(repeats = 1)
|
189
128
|
require 'benchmark'
|
190
129
|
res = nil
|
@@ -220,6 +159,73 @@ module Misc
|
|
220
159
|
res
|
221
160
|
end
|
222
161
|
|
162
|
+
def self.insist(times = 3)
|
163
|
+
try = 0
|
164
|
+
begin
|
165
|
+
yield
|
166
|
+
rescue
|
167
|
+
try += 1
|
168
|
+
retry if try < times
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def self.try3times(&block)
|
173
|
+
insist(3, &block)
|
174
|
+
end
|
175
|
+
|
176
|
+
def self.hash2string(hash)
|
177
|
+
hash.sort_by{|k,v| k.to_s}.collect{|k,v|
|
178
|
+
next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
|
179
|
+
[ Symbol === k ? ":" << k.to_s : k,
|
180
|
+
Symbol === v ? ":" << v.to_s : v] * "="
|
181
|
+
}.compact * "#"
|
182
|
+
end
|
183
|
+
|
184
|
+
def self.path_relative_to(basedir, path)
|
185
|
+
path = File.expand_path(path)
|
186
|
+
basedir = File.expand_path(basedir)
|
187
|
+
|
188
|
+
if path =~ /#{Regexp.quote basedir}\/(.*)/
|
189
|
+
return $1
|
190
|
+
else
|
191
|
+
return nil
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def self.lock(file, *args)
|
196
|
+
FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
|
197
|
+
lockfile = Lockfile.new(file + '.lock')
|
198
|
+
lockfile.lock do
|
199
|
+
yield file, *args
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def self.common_path(dir, file)
|
204
|
+
file = File.expand_path file
|
205
|
+
dir = File.expand_path dir
|
206
|
+
|
207
|
+
return true if file == dir
|
208
|
+
while File.dirname(file) != file
|
209
|
+
file = File.dirname(file)
|
210
|
+
return true if file == dir
|
211
|
+
end
|
212
|
+
|
213
|
+
return false
|
214
|
+
end
|
215
|
+
|
216
|
+
def self.in_dir(dir)
|
217
|
+
old_pwd = FileUtils.pwd
|
218
|
+
res = nil
|
219
|
+
begin
|
220
|
+
FileUtils.mkdir_p dir unless File.exists? dir
|
221
|
+
FileUtils.cd dir
|
222
|
+
res = yield
|
223
|
+
ensure
|
224
|
+
FileUtils.cd old_pwd
|
225
|
+
end
|
226
|
+
res
|
227
|
+
end
|
228
|
+
|
223
229
|
def self.fixutf8(string)
|
224
230
|
if string.respond_to?(:valid_encoding?) and ! string.valid_encoding?
|
225
231
|
@@ic ||= Iconv.new('UTF-8//IGNORE', 'UTF-8')
|
@@ -229,6 +235,25 @@ module Misc
|
|
229
235
|
end
|
230
236
|
end
|
231
237
|
|
238
|
+
def self.sensiblewrite(path, content)
|
239
|
+
begin
|
240
|
+
case
|
241
|
+
when String === content
|
242
|
+
File.open(path, 'w') do |f| f.write content end
|
243
|
+
when (IO === content or StringIO === content)
|
244
|
+
File.open(path, 'w') do |f| while l = content.gets; f.write l; end end
|
245
|
+
else
|
246
|
+
File.open(path, 'w') do |f| end
|
247
|
+
end
|
248
|
+
rescue Interrupt
|
249
|
+
FileUtils.rm_f path
|
250
|
+
raise "Interrupted (Ctrl-c)"
|
251
|
+
rescue Exception
|
252
|
+
FileUtils.rm_f path
|
253
|
+
raise $!
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
232
257
|
def self.add_defaults(options, defaults = {})
|
233
258
|
case
|
234
259
|
when Hash === options
|
@@ -246,20 +271,8 @@ module Misc
|
|
246
271
|
new_options
|
247
272
|
end
|
248
273
|
|
249
|
-
def self.
|
250
|
-
|
251
|
-
hash.delete keys.first.to_sym
|
252
|
-
else
|
253
|
-
keys.collect do |key| hash.delete(key.to_sym) || hash.delete(key.to_s) end
|
254
|
-
end
|
255
|
-
end
|
256
|
-
|
257
|
-
def self.hash2string(hash)
|
258
|
-
hash.sort_by{|k,v| k.to_s}.collect{|k,v|
|
259
|
-
next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
|
260
|
-
[ Symbol === k ? ":" << k.to_s : k,
|
261
|
-
Symbol === v ? ":" << v.to_s : v] * "="
|
262
|
-
}.compact * "#"
|
274
|
+
def self.digest(text)
|
275
|
+
Digest::MD5.hexdigest(text)
|
263
276
|
end
|
264
277
|
|
265
278
|
def self.hash2md5(hash)
|
@@ -270,14 +283,57 @@ module Misc
|
|
270
283
|
case
|
271
284
|
when v.inspect =~ /:0x0/
|
272
285
|
o[k] = v.inspect.sub(/:0x[a-f0-9]+@/,'')
|
273
|
-
|
274
|
-
o[k] = "" << String.new(v.to_s)
|
286
|
+
#when Resource::Path === v
|
287
|
+
# o[k] = "" << String.new(v.to_s)
|
275
288
|
else
|
276
289
|
o[k] = v
|
277
290
|
end
|
278
291
|
end
|
279
292
|
|
280
|
-
|
293
|
+
if o.empty?
|
294
|
+
""
|
295
|
+
else
|
296
|
+
Digest::MD5.hexdigest(o.sort_by{|k| k.to_s}.inspect)
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
def self.process_options(hash, *keys)
|
301
|
+
if keys.length == 1
|
302
|
+
hash.delete keys.first.to_sym
|
303
|
+
else
|
304
|
+
keys.collect do |key| hash.delete(key.to_sym) || hash.delete(key.to_s) end
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
def self.pull_keys(hash, prefix)
|
309
|
+
new = {}
|
310
|
+
hash.keys.each do |key|
|
311
|
+
if key.to_s =~ /#{ prefix }_(.*)/
|
312
|
+
case
|
313
|
+
when String === key
|
314
|
+
new[$1] = hash.delete key
|
315
|
+
when Symbol === key
|
316
|
+
new[$1.to_sym] = hash.delete key
|
317
|
+
end
|
318
|
+
else
|
319
|
+
if key.to_s == prefix.to_s
|
320
|
+
new[key] = hash.delete key
|
321
|
+
end
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
new
|
326
|
+
end
|
327
|
+
|
328
|
+
def self.string2const(string)
|
329
|
+
return nil if string.nil?
|
330
|
+
mod = Kernel
|
331
|
+
|
332
|
+
string.to_s.split('::').each do |str|
|
333
|
+
mod = mod.const_get str
|
334
|
+
end
|
335
|
+
|
336
|
+
mod
|
281
337
|
end
|
282
338
|
|
283
339
|
def self.string2hash(string)
|
@@ -290,58 +346,42 @@ module Misc
|
|
290
346
|
option, value = str, true
|
291
347
|
end
|
292
348
|
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
349
|
+
option = option.sub(":",'').to_sym if option.chars.first == ':'
|
350
|
+
value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
|
351
|
+
|
352
|
+
if value == true
|
353
|
+
options[option] = option.to_s.chars.first != '!'
|
354
|
+
else
|
355
|
+
options[option] = Thread.start do
|
356
|
+
$SAFE = 0;
|
357
|
+
case
|
358
|
+
when value =~ /^(?:true|T)$/i
|
359
|
+
true
|
360
|
+
when value =~ /^(?:false|F)$/i
|
361
|
+
false
|
362
|
+
when Symbol === value
|
363
|
+
value
|
364
|
+
when (String === value and value =~ /^\/(.*)\/$/)
|
365
|
+
Regexp.new /#{$1}/
|
366
|
+
else
|
367
|
+
begin
|
368
|
+
Kernel.const_get value
|
369
|
+
rescue
|
370
|
+
begin
|
371
|
+
raise if value =~ /[a-z]/ and defined? value
|
372
|
+
eval(value)
|
373
|
+
rescue Exception
|
374
|
+
value
|
317
375
|
end
|
318
376
|
end
|
319
|
-
end
|
320
|
-
end
|
377
|
+
end
|
378
|
+
end.value
|
379
|
+
end
|
321
380
|
end
|
322
381
|
|
323
382
|
options
|
324
383
|
end
|
325
384
|
|
326
|
-
def self.sensiblewrite(path, content)
|
327
|
-
begin
|
328
|
-
case
|
329
|
-
when String === content
|
330
|
-
File.open(path, 'w') do |f| f.write content end
|
331
|
-
when (IO === content or StringIO === content)
|
332
|
-
File.open(path, 'w') do |f| while l = content.gets; f.write l; end end
|
333
|
-
else
|
334
|
-
File.open(path, 'w') do |f| end
|
335
|
-
end
|
336
|
-
rescue Interrupt
|
337
|
-
FileUtils.rm_f path
|
338
|
-
raise "Interrupted (Ctrl-c)"
|
339
|
-
rescue Exception
|
340
|
-
FileUtils.rm_f path
|
341
|
-
raise $!
|
342
|
-
end
|
343
|
-
end
|
344
|
-
|
345
385
|
def self.field_position(fields, field, quiet = false)
|
346
386
|
return field if Integer === field or Range === field
|
347
387
|
raise FieldNotFoundError, "Field information missing" if fields.nil? && ! quiet
|
@@ -351,30 +391,6 @@ module Misc
|
|
351
391
|
raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
|
352
392
|
end
|
353
393
|
|
354
|
-
def self.first(list)
|
355
|
-
return nil if list.nil?
|
356
|
-
return list.first
|
357
|
-
end
|
358
|
-
|
359
|
-
def self.chunk(text, split)
|
360
|
-
text.split(split)[1..-1]
|
361
|
-
end
|
362
|
-
|
363
|
-
def self.insist(times = 3)
|
364
|
-
try = 0
|
365
|
-
begin
|
366
|
-
yield
|
367
|
-
rescue
|
368
|
-
try += 1
|
369
|
-
retry if try < times
|
370
|
-
end
|
371
|
-
end
|
372
|
-
|
373
|
-
def self.try3times(&block)
|
374
|
-
insist(3, &block)
|
375
|
-
end
|
376
|
-
|
377
|
-
|
378
394
|
# Divides the array into +num+ chunks of the same size by placing one
|
379
395
|
# element in each chunk iteratively.
|
380
396
|
def self.divide(array, num)
|
@@ -387,70 +403,21 @@ module Misc
|
|
387
403
|
chunks
|
388
404
|
end
|
389
405
|
|
390
|
-
def self.
|
391
|
-
|
392
|
-
list1.zip(list2).each do |k,v| hash[k] = v end
|
393
|
-
hash
|
406
|
+
def self.zip_fields(array)
|
407
|
+
array[0].zip(*array[1..-1])
|
394
408
|
end
|
395
409
|
|
396
|
-
|
397
|
-
def self.process_to_hash(list)
|
398
|
-
result = yield list
|
399
|
-
merge2hash(list, result)
|
400
|
-
end
|
401
|
-
|
402
|
-
IUPAC2BASE = {
|
403
|
-
"A" => ["A"],
|
404
|
-
"C" => ["C"],
|
405
|
-
"G" => ["G"],
|
406
|
-
"T" => ["T"],
|
407
|
-
"U" => ["U"],
|
408
|
-
"R" => "A or G".split(" or "),
|
409
|
-
"Y" => "C or T".split(" or "),
|
410
|
-
"S" => "G or C".split(" or "),
|
411
|
-
"W" => "A or T".split(" or "),
|
412
|
-
"K" => "G or T".split(" or "),
|
413
|
-
"M" => "A or C".split(" or "),
|
414
|
-
"B" => "C or G or T".split(" or "),
|
415
|
-
"D" => "A or G or T".split(" or "),
|
416
|
-
"H" => "A or C or T".split(" or "),
|
417
|
-
"V" => "A or C or G".split(" or "),
|
418
|
-
"N" => %w(A C T G),
|
419
|
-
}
|
420
|
-
|
421
|
-
BASE2COMPLEMENT = {
|
422
|
-
"A" => "T",
|
423
|
-
"C" => "G",
|
424
|
-
"G" => "C",
|
425
|
-
"T" => "A",
|
426
|
-
"U" => "A",
|
427
|
-
}
|
428
|
-
|
429
|
-
def self.IUPAC_to_base(iupac)
|
430
|
-
IUPAC2BASE[iupac]
|
431
|
-
end
|
432
|
-
end
|
433
|
-
|
434
|
-
module PDF2Text
|
435
|
-
def self.pdf2text(filename)
|
436
|
-
require 'rbbt/util/cmd'
|
437
|
-
require 'rbbt/util/tmpfile'
|
438
|
-
require 'rbbt/util/open'
|
439
|
-
|
440
|
-
|
441
|
-
TmpFile.with_file(Open.open(filename, :nocache => true).read) do |pdf_file|
|
442
|
-
CMD.cmd("pdftotext #{pdf_file} -", :pipe => false, :stderr => true)
|
443
|
-
end
|
444
|
-
end
|
445
410
|
end
|
446
411
|
|
447
|
-
|
412
|
+
module NamedArray
|
413
|
+
extend ChainMethods
|
414
|
+
self.chain_prefix = :named_array
|
448
415
|
attr_accessor :fields
|
449
416
|
|
450
|
-
def self.
|
451
|
-
|
452
|
-
|
453
|
-
|
417
|
+
def self.setup(array, fields)
|
418
|
+
array.extend NamedArray
|
419
|
+
array.fields = fields
|
420
|
+
array
|
454
421
|
end
|
455
422
|
|
456
423
|
def merge(array)
|
@@ -476,26 +443,23 @@ class NamedArray < Array
|
|
476
443
|
end
|
477
444
|
end
|
478
445
|
|
479
|
-
|
480
|
-
|
481
|
-
original_get_brackets(Misc.field_position(fields, key))
|
446
|
+
def named_array_get_brackets(key)
|
447
|
+
named_array_clean_get_brackets(Misc.field_position(fields, key))
|
482
448
|
end
|
483
449
|
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
end
|
450
|
+
def named_array_set_brackets(key,value)
|
451
|
+
named_array_clean_set_brackets(Misc.field_position(fields, key), value)
|
452
|
+
end
|
488
453
|
|
489
|
-
|
490
|
-
def values_at(*keys)
|
454
|
+
def named_array_values_at(*keys)
|
491
455
|
keys = keys.collect{|k| Misc.field_position(fields, k) }
|
492
|
-
|
456
|
+
named_array_clean_values_at(*keys)
|
493
457
|
end
|
494
458
|
|
495
459
|
def zip_fields
|
496
460
|
return [] if self.empty?
|
497
|
-
zipped =
|
498
|
-
zipped = zipped.collect{|v| NamedArray.
|
461
|
+
zipped = Misc.zip_fields(self)
|
462
|
+
zipped = zipped.collect{|v| NamedArray.setup(v, fields)}
|
499
463
|
zipped
|
500
464
|
end
|
501
465
|
|
@@ -511,38 +475,85 @@ class NamedArray < Array
|
|
511
475
|
|
512
476
|
def report
|
513
477
|
fields.zip(self).collect do |field,value|
|
514
|
-
"* #{ field }: #{ Array === value ? value * "|" : value }"
|
478
|
+
"\nAttributes:\n* #{ field }: #{ Array === value ? value * "|" : value }"
|
515
479
|
end * "\n"
|
516
480
|
end
|
517
481
|
|
518
482
|
end
|
519
483
|
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
484
|
+
class RBBTError < StandardError
|
485
|
+
attr_accessor :info
|
486
|
+
|
487
|
+
alias old_to_s to_s
|
488
|
+
def to_s
|
489
|
+
str = old_to_s.dup
|
490
|
+
if info
|
491
|
+
str << "\n" << "Additional Info:\n---\n" << info << "---"
|
492
|
+
end
|
493
|
+
str
|
530
494
|
end
|
531
495
|
end
|
532
496
|
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
497
|
+
module IndiferentHash
|
498
|
+
extend ChainMethods
|
499
|
+
self.chain_prefix = :indiferent
|
500
|
+
|
501
|
+
def indiferent_get_brackets(key)
|
502
|
+
case
|
503
|
+
when (Symbol === key and indiferent_clean_include? key)
|
504
|
+
indiferent_clean_get_brackets(key)
|
505
|
+
when (Symbol === key and indiferent_clean_include? key.to_s)
|
506
|
+
indiferent_clean_get_brackets(key.to_s)
|
507
|
+
when (String === key and indiferent_clean_include? key)
|
508
|
+
indiferent_clean_get_brackets(key)
|
509
|
+
when (String === key and indiferent_clean_include? key.to_sym)
|
510
|
+
indiferent_clean_get_brackets(key.to_sym)
|
511
|
+
else
|
512
|
+
indiferent_clean_get_brackets(key)
|
513
|
+
end
|
514
|
+
end
|
539
515
|
|
540
|
-
|
541
|
-
|
542
|
-
|
516
|
+
def indiferent_values_at(*key_list)
|
517
|
+
res = []
|
518
|
+
key_list.each{|key| res << indiferent_get_brackets(key)}
|
543
519
|
res
|
544
|
-
|
545
|
-
|
520
|
+
end
|
521
|
+
|
522
|
+
def indiferent_include?(key)
|
523
|
+
case
|
524
|
+
when Symbol === key
|
525
|
+
indiferent_clean_include?(key) or indiferent_clean_include?(key.to_s)
|
526
|
+
when String === key
|
527
|
+
indiferent_clean_include?(key) or indiferent_clean_include?(key.to_sym)
|
528
|
+
else
|
529
|
+
indiferent_clean_include?(key)
|
530
|
+
end
|
531
|
+
end
|
532
|
+
|
533
|
+
def indiferent_delete(value)
|
534
|
+
if indiferent_clean_include? value.to_s
|
535
|
+
indiferent_clean_delete(value.to_s)
|
536
|
+
else
|
537
|
+
indiferent_clean_delete(value.to_sym)
|
538
|
+
end
|
539
|
+
end
|
540
|
+
|
541
|
+
def self.setup(hash)
|
542
|
+
return hash if IndiferentHash === hash
|
543
|
+
hash.extend IndiferentHash
|
544
|
+
hash
|
546
545
|
end
|
547
546
|
end
|
548
547
|
|
548
|
+
module PDF2Text
|
549
|
+
def self.pdftotext(filename)
|
550
|
+
require 'rbbt/util/cmd'
|
551
|
+
require 'rbbt/util/tmpfile'
|
552
|
+
require 'rbbt/util/open'
|
553
|
+
|
554
|
+
|
555
|
+
TmpFile.with_file(Open.open(filename, :nocache => true).read) do |pdf_file|
|
556
|
+
CMD.cmd("pdftotext #{pdf_file} -", :pipe => false, :stderr => true)
|
557
|
+
end
|
558
|
+
end
|
559
|
+
end
|