rbbt-util 3.2.1 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +65 -0
- data/bin/run_workflow.rb +142 -69
- data/lib/rbbt-util.rb +3 -3
- data/lib/rbbt.rb +12 -3
- data/lib/rbbt/annotations.rb +215 -0
- data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
- data/lib/rbbt/persist.rb +164 -0
- data/lib/rbbt/persist/tsv.rb +135 -0
- data/lib/rbbt/resource.rb +100 -0
- data/lib/rbbt/resource/path.rb +180 -0
- data/lib/rbbt/resource/rake.rb +48 -0
- data/lib/rbbt/resource/util.rb +111 -0
- data/lib/rbbt/resource/with_key.rb +28 -0
- data/lib/rbbt/tsv.rb +134 -0
- data/lib/rbbt/tsv/accessor.rb +345 -0
- data/lib/rbbt/tsv/attach.rb +183 -0
- data/lib/rbbt/tsv/attach/util.rb +277 -0
- data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
- data/lib/rbbt/tsv/index.rb +453 -0
- data/lib/rbbt/tsv/manipulate.rb +361 -0
- data/lib/rbbt/tsv/parser.rb +231 -0
- data/lib/rbbt/tsv/serializers.rb +79 -0
- data/lib/rbbt/tsv/util.rb +67 -0
- data/lib/rbbt/util/R.rb +3 -3
- data/lib/rbbt/util/chain_methods.rb +64 -0
- data/lib/rbbt/util/cmd.rb +17 -13
- data/lib/rbbt/util/excel2tsv.rb +4 -3
- data/lib/rbbt/util/log.rb +1 -0
- data/lib/rbbt/util/misc.rb +296 -285
- data/lib/rbbt/util/open.rb +9 -2
- data/lib/rbbt/util/persistence.rb +1 -1
- data/lib/rbbt/util/task/job.rb +3 -1
- data/lib/rbbt/workflow.rb +193 -0
- data/lib/rbbt/workflow/accessor.rb +249 -0
- data/lib/rbbt/workflow/annotate.rb +60 -0
- data/lib/rbbt/workflow/soap.rb +100 -0
- data/lib/rbbt/workflow/step.rb +102 -0
- data/lib/rbbt/workflow/task.rb +76 -0
- data/test/rbbt/resource/test_path.rb +12 -0
- data/test/rbbt/test_annotations.rb +106 -0
- data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
- data/test/rbbt/test_resource.rb +66 -0
- data/test/rbbt/test_tsv.rb +332 -0
- data/test/rbbt/test_workflow.rb +102 -0
- data/test/rbbt/tsv/test_accessor.rb +163 -0
- data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
- data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
- data/test/rbbt/tsv/test_index.rb +284 -0
- data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
- data/test/rbbt/util/test_R.rb +1 -1
- data/test/rbbt/util/test_chain_methods.rb +22 -0
- data/test/rbbt/util/test_filecache.rb +0 -1
- data/test/rbbt/util/test_misc.rb +97 -79
- data/test/rbbt/util/test_open.rb +1 -0
- data/test/rbbt/util/test_tmpfile.rb +1 -1
- data/test/rbbt/workflow/test_soap.rb +103 -0
- data/test/rbbt/workflow/test_step.rb +142 -0
- data/test/rbbt/workflow/test_task.rb +84 -0
- data/test/test_helper.rb +7 -7
- metadata +80 -54
- data/lib/rbbt/util/rake.rb +0 -176
- data/lib/rbbt/util/resource.rb +0 -355
- data/lib/rbbt/util/task.rb +0 -183
- data/lib/rbbt/util/tc_hash.rb +0 -324
- data/lib/rbbt/util/tsv.rb +0 -236
- data/lib/rbbt/util/tsv/accessor.rb +0 -312
- data/lib/rbbt/util/tsv/attach.rb +0 -416
- data/lib/rbbt/util/tsv/index.rb +0 -419
- data/lib/rbbt/util/tsv/manipulate.rb +0 -300
- data/lib/rbbt/util/tsv/misc.rb +0 -41
- data/lib/rbbt/util/tsv/parse.rb +0 -324
- data/lib/rbbt/util/tsv/resource.rb +0 -88
- data/lib/rbbt/util/workflow.rb +0 -135
- data/lib/rbbt/util/workflow/soap.rb +0 -116
- data/test/rbbt/util/test_persistence.rb +0 -201
- data/test/rbbt/util/test_rake.rb +0 -54
- data/test/rbbt/util/test_resource.rb +0 -77
- data/test/rbbt/util/test_task.rb +0 -133
- data/test/rbbt/util/test_tc_hash.rb +0 -144
- data/test/rbbt/util/test_tsv.rb +0 -221
- data/test/rbbt/util/test_workflow.rb +0 -135
- data/test/rbbt/util/tsv/test_accessor.rb +0 -150
- data/test/rbbt/util/tsv/test_index.rb +0 -241
- data/test/rbbt/util/tsv/test_parse.rb +0 -87
- data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -0,0 +1,79 @@
|
|
1
|
+
module TSV
|
2
|
+
class IntegerSerializer
|
3
|
+
def self.dump(i); [i].pack("l"); end
|
4
|
+
def self.load(str); str.unpack("l").first; end
|
5
|
+
end
|
6
|
+
|
7
|
+
class FloatSerializer
|
8
|
+
def self.dump(i); [i].pack("d"); end
|
9
|
+
def self.load(str); str.unpack("d").first; end
|
10
|
+
end
|
11
|
+
|
12
|
+
class IntegerArraySerializer
|
13
|
+
def self.dump(a); a.pack("l*"); end
|
14
|
+
def self.load(str); str.unpack("l*"); end
|
15
|
+
end
|
16
|
+
|
17
|
+
class StringSerializer
|
18
|
+
def self.dump(str); str.to_s; end
|
19
|
+
def self.load(str); str; end
|
20
|
+
end
|
21
|
+
|
22
|
+
class StringArraySerializer
|
23
|
+
def self.dump(array)
|
24
|
+
array.collect{|a| a.to_s} * "\t"
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.load(string)
|
28
|
+
return [] if string.nil?
|
29
|
+
string.split("\t", -1)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class StringDoubleArraySerializer
|
34
|
+
def self.dump(array)
|
35
|
+
array.collect{|a| a.collect{|a| a.to_s} * "|"} * "\t"
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.load(string)
|
39
|
+
return [] if string.nil?
|
40
|
+
string.split("\t", -1).collect{|l| l.split("|", -1)}
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class TSVMarshalSerializer
|
45
|
+
def self.dump(tsv)
|
46
|
+
Marshal.dump(tsv.dup)
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.load(string)
|
50
|
+
TSV.setup Marshal.load(string)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
class TSVSerializer
|
56
|
+
def self.dump(tsv)
|
57
|
+
tsv.to_s
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.load(string)
|
61
|
+
TSV.open StringIO.new(string)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
SERIALIZER_ALIAS = {
|
66
|
+
:integer => IntegerSerializer,
|
67
|
+
:float => FloatSerializer,
|
68
|
+
:integer_array => IntegerArraySerializer,
|
69
|
+
:marshal => Marshal,
|
70
|
+
:single => StringSerializer,
|
71
|
+
:string => StringSerializer,
|
72
|
+
:list => StringArraySerializer,
|
73
|
+
:flat => StringArraySerializer,
|
74
|
+
:double => StringDoubleArraySerializer,
|
75
|
+
:tsv => TSVSerializer,
|
76
|
+
:marshal_tsv => TSVMarshalSerializer
|
77
|
+
}
|
78
|
+
|
79
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'rbbt/resource/path'
|
2
|
+
module TSV
|
3
|
+
|
4
|
+
def self.field_match_counts(file, values)
|
5
|
+
fields = TSV.parse_header(Open.open(file)).all_fields
|
6
|
+
|
7
|
+
counts = {}
|
8
|
+
TmpFile.with_file do |tmpfile|
|
9
|
+
if Array === values
|
10
|
+
Open.write(tmpfile, values * "\n")
|
11
|
+
else
|
12
|
+
FileUtils.ln_s values, tmpfile
|
13
|
+
end
|
14
|
+
|
15
|
+
fields.each_with_index do |field,i|
|
16
|
+
counts[field] = begin
|
17
|
+
CMD.cmd("cat #{ file } |grep -v ^#|cut -f #{i + 1}|tr '|' '\\n' |sort -u |grep [[:alpha:]]|grep -f #{tmpfile} -F -w").read.count("\n")
|
18
|
+
rescue
|
19
|
+
0
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
counts
|
25
|
+
end
|
26
|
+
def self.get_filename(file)
|
27
|
+
case
|
28
|
+
when String === file
|
29
|
+
filename = file
|
30
|
+
when file.respond_to?(:gets)
|
31
|
+
filename = file.filename if file.respond_to? :filename
|
32
|
+
else
|
33
|
+
raise "Cannot get stream from: #{file.inspect}"
|
34
|
+
end
|
35
|
+
filename
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.get_stream(file)
|
39
|
+
case
|
40
|
+
when Path === file
|
41
|
+
file.open
|
42
|
+
when String === file
|
43
|
+
File.open(file)
|
44
|
+
when file.respond_to?(:gets)
|
45
|
+
file
|
46
|
+
else
|
47
|
+
raise "Cannot get stream from: #{file.inspect}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.identify_field(key_field, fields, field)
|
52
|
+
case
|
53
|
+
when Integer === field
|
54
|
+
field
|
55
|
+
when (field.nil? or field == :key or key_field == field)
|
56
|
+
:key
|
57
|
+
when String === field
|
58
|
+
fields.index field
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def identify_field(field)
|
63
|
+
TSV.identify_field(key_field, fields, field)
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
end
|
data/lib/rbbt/util/R.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require 'rbbt/util/cmd'
|
2
|
-
require 'rbbt/
|
2
|
+
require 'rbbt/tsv'
|
3
3
|
|
4
4
|
module R
|
5
5
|
|
@@ -24,7 +24,7 @@ module R
|
|
24
24
|
|
25
25
|
end
|
26
26
|
|
27
|
-
|
27
|
+
module TSV
|
28
28
|
def R(script, open_options = {})
|
29
29
|
TmpFile.with_file do |f|
|
30
30
|
Open.write(f, self.to_s)
|
@@ -36,7 +36,7 @@ rbbt.tsv.write('#{f}', data);
|
|
36
36
|
EOF
|
37
37
|
).read)
|
38
38
|
open_options = Misc.add_defaults open_options, :type => :list
|
39
|
-
TSV.
|
39
|
+
TSV.open(f, open_options)
|
40
40
|
end
|
41
41
|
end
|
42
42
|
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'rbbt/util/log'
|
2
|
+
|
3
|
+
module ChainMethods
|
4
|
+
def self.extended(base)
|
5
|
+
if not base.respond_to? :chain_prefix
|
6
|
+
metaclass = class << base
|
7
|
+
attr_accessor :chain_prefix, :chained_methods
|
8
|
+
|
9
|
+
def chained_methods
|
10
|
+
@chained_methods ||= instance_methods.select{|method| method =~ /^#{chain_prefix}/}
|
11
|
+
end
|
12
|
+
self
|
13
|
+
end
|
14
|
+
|
15
|
+
metaclass.module_eval do
|
16
|
+
def setup_chain(object)
|
17
|
+
object.extend self
|
18
|
+
end
|
19
|
+
|
20
|
+
def setup_chains(base)
|
21
|
+
raise "No prefix specified for #{self.to_s}" if self.chain_prefix.nil? or (String === self.chain_prefix and self.chain_prefix.empty?)
|
22
|
+
#methods = self.instance_methods.select{|method| method =~ /^#{self.chain_prefix}/}
|
23
|
+
methods = self.chained_methods
|
24
|
+
|
25
|
+
return if methods.empty?
|
26
|
+
|
27
|
+
prefix = self.chain_prefix
|
28
|
+
|
29
|
+
new_method = methods.first
|
30
|
+
original = new_method.sub(prefix.to_s + '_', '')
|
31
|
+
first_clean_method = prefix.to_s + '_clean_' + original
|
32
|
+
|
33
|
+
if not base.respond_to? first_clean_method
|
34
|
+
class << base; self; end.module_eval do
|
35
|
+
methods.each do |new_method|
|
36
|
+
original = new_method.sub(prefix.to_s + '_', '')
|
37
|
+
clean_method = prefix.to_s + '_clean_' + original
|
38
|
+
|
39
|
+
original = "[]" if original == "get_brackets"
|
40
|
+
original = "[]=" if original == "set_brackets"
|
41
|
+
|
42
|
+
begin
|
43
|
+
alias_method clean_method, original
|
44
|
+
rescue
|
45
|
+
end
|
46
|
+
alias_method original, new_method
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
if not metaclass.respond_to? :extended
|
54
|
+
metaclass.module_eval do
|
55
|
+
def extended(base)
|
56
|
+
setup_chains(base)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
base.chain_prefix = base.to_s.downcase.to_sym
|
63
|
+
end
|
64
|
+
end
|
data/lib/rbbt/util/cmd.rb
CHANGED
@@ -4,10 +4,11 @@ require 'stringio'
|
|
4
4
|
|
5
5
|
module CMD
|
6
6
|
|
7
|
-
class CMDError <
|
7
|
+
class CMDError < StandardError; end
|
8
|
+
|
8
9
|
module SmartIO
|
9
|
-
attr_accessor :pid, :cmd, :post, :in, :out, :err
|
10
|
-
def self.tie(io, pid = nil, cmd = "", post = nil, sin = nil, out = nil, err = nil)
|
10
|
+
attr_accessor :pid, :cmd, :post, :in, :out, :err, :log
|
11
|
+
def self.tie(io, pid = nil, cmd = "", post = nil, sin = nil, out = nil, err = nil, log = true)
|
11
12
|
io.extend SmartIO
|
12
13
|
io.pid = pid
|
13
14
|
io.cmd = cmd
|
@@ -15,6 +16,7 @@ module CMD
|
|
15
16
|
io.out = out
|
16
17
|
io.err = err
|
17
18
|
io.post = post
|
19
|
+
io.log = log
|
18
20
|
|
19
21
|
io.class.send(:alias_method, :original_close, :close)
|
20
22
|
io.class.send(:alias_method, :original_read, :read)
|
@@ -28,10 +30,10 @@ module CMD
|
|
28
30
|
rescue
|
29
31
|
end
|
30
32
|
|
31
|
-
Log.debug "Process #{ cmd } succeded" if $? and $?.success?
|
33
|
+
Log.debug "Process #{ cmd } succeded" if $? and $?.success? and log
|
32
34
|
|
33
35
|
if $? and not $?.success?
|
34
|
-
Log.debug "Raising exception"
|
36
|
+
Log.debug "Raising exception" if log
|
35
37
|
exception = CMDError.new "Command [#{@pid}] #{@cmd} failed with error status #{$?.exitstatus}"
|
36
38
|
original_close
|
37
39
|
raise exception
|
@@ -51,7 +53,7 @@ module CMD
|
|
51
53
|
|
52
54
|
def force_close
|
53
55
|
if @pid
|
54
|
-
Log.debug "Forcing close by killing '#{@pid}'"
|
56
|
+
Log.debug "Forcing close by killing '#{@pid}'" if log
|
55
57
|
Process.kill("KILL", @pid)
|
56
58
|
Process.waitpid(@pid)
|
57
59
|
end
|
@@ -98,6 +100,9 @@ module CMD
|
|
98
100
|
stderr = options.delete(:stderr)
|
99
101
|
pipe = options.delete(:pipe)
|
100
102
|
post = options.delete(:post)
|
103
|
+
log = options.delete(:log)
|
104
|
+
|
105
|
+
log = true if log.nil?
|
101
106
|
|
102
107
|
if stderr == true
|
103
108
|
stderr = Log::HIGH
|
@@ -149,8 +154,8 @@ module CMD
|
|
149
154
|
|
150
155
|
exit(-1)
|
151
156
|
rescue Exception
|
152
|
-
Log.debug("CMDError: #{$!.message}")
|
153
|
-
ddd $!.backtrace
|
157
|
+
Log.debug("CMDError: #{$!.message}") if log
|
158
|
+
ddd $!.backtrace if log
|
154
159
|
raise CMDError, $!.message
|
155
160
|
end
|
156
161
|
}
|
@@ -164,7 +169,7 @@ module CMD
|
|
164
169
|
serr = serr.first
|
165
170
|
|
166
171
|
|
167
|
-
Log.debug "CMD: [#{pid}] #{cmd}"
|
172
|
+
Log.debug "CMD: [#{pid}] #{cmd}" if log
|
168
173
|
|
169
174
|
if in_content.respond_to?(:read)
|
170
175
|
Thread.new do
|
@@ -190,7 +195,7 @@ module CMD
|
|
190
195
|
if pipe
|
191
196
|
Thread.new do
|
192
197
|
while line = serr.gets
|
193
|
-
Log.log line, stderr if Integer === stderr
|
198
|
+
Log.log line, stderr if Integer === stderr and log
|
194
199
|
end
|
195
200
|
serr.close
|
196
201
|
Thread.exit
|
@@ -216,11 +221,10 @@ module CMD
|
|
216
221
|
Process.waitpid pid
|
217
222
|
|
218
223
|
if not $?.success?
|
219
|
-
exception = CMDError.new "Command [#{pid}] #{cmd} failed with error status #{$?.exitstatus}"
|
220
|
-
exception.info = err if Integer === stderr and stderr >= Log.severity
|
224
|
+
exception = CMDError.new "Command [#{pid}] #{cmd} failed with error status #{$?.exitstatus}.\n#{err}"
|
221
225
|
raise exception
|
222
226
|
else
|
223
|
-
Log.log err, stderr if Integer === stderr
|
227
|
+
Log.log err, stderr if Integer === stderr and log
|
224
228
|
end
|
225
229
|
|
226
230
|
out
|
data/lib/rbbt/util/excel2tsv.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
require 'spreadsheet'
|
2
|
-
require 'rbbt/
|
2
|
+
require 'rbbt/tsv'
|
3
3
|
require 'rbbt/util/tmpfile'
|
4
|
-
|
4
|
+
|
5
|
+
module TSV
|
5
6
|
def self.excel2tsv(file, options = {})
|
6
7
|
sheet = options.delete :sheet
|
7
8
|
header = options.delete :header
|
@@ -26,7 +27,7 @@ class TSV
|
|
26
27
|
rows.each do |row| f.puts row * "\t" end
|
27
28
|
end
|
28
29
|
|
29
|
-
TSV.
|
30
|
+
TSV.open(filename, options)
|
30
31
|
end
|
31
32
|
end
|
32
33
|
end
|
data/lib/rbbt/util/log.rb
CHANGED
@@ -19,6 +19,7 @@ module Log
|
|
19
19
|
SEVERITY_COLOR = ["0;37m", "0;32m", "0;33m", "0;31m", "1;0m" ].collect{|e| "\033[#{e}"}
|
20
20
|
|
21
21
|
def self.log(message, severity = MEDIUM)
|
22
|
+
message ||= ""
|
22
23
|
severity_color = SEVERITY_COLOR[severity]
|
23
24
|
font_color = {false => "\033[0;37m", true => "\033[0m"}[severity >= INFO]
|
24
25
|
|
data/lib/rbbt/util/misc.rb
CHANGED
@@ -1,46 +1,46 @@
|
|
1
|
-
require 'iconv'
|
2
1
|
require 'lockfile'
|
3
|
-
require '
|
2
|
+
require 'rbbt/util/chain_methods'
|
3
|
+
require 'rbbt/resource/path'
|
4
4
|
|
5
|
-
|
6
|
-
|
5
|
+
module Misc
|
6
|
+
class FieldNotFoundError < StandardError;end
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
8
|
+
|
9
|
+
IUPAC2BASE = {
|
10
|
+
"A" => ["A"],
|
11
|
+
"C" => ["C"],
|
12
|
+
"G" => ["G"],
|
13
|
+
"T" => ["T"],
|
14
|
+
"U" => ["U"],
|
15
|
+
"R" => "A or G".split(" or "),
|
16
|
+
"Y" => "C or T".split(" or "),
|
17
|
+
"S" => "G or C".split(" or "),
|
18
|
+
"W" => "A or T".split(" or "),
|
19
|
+
"K" => "G or T".split(" or "),
|
20
|
+
"M" => "A or C".split(" or "),
|
21
|
+
"B" => "C or G or T".split(" or "),
|
22
|
+
"D" => "A or G or T".split(" or "),
|
23
|
+
"H" => "A or C or T".split(" or "),
|
24
|
+
"V" => "A or C or G".split(" or "),
|
25
|
+
"N" => %w(A C T G),
|
26
|
+
}
|
17
27
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
28
|
+
BASE2COMPLEMENT = {
|
29
|
+
"A" => "T",
|
30
|
+
"C" => "G",
|
31
|
+
"G" => "C",
|
32
|
+
"T" => "A",
|
33
|
+
"U" => "A",
|
34
|
+
}
|
24
35
|
|
25
|
-
def self.
|
26
|
-
|
27
|
-
alias_method :old_get, :[]
|
28
|
-
alias_method :[], :indiferent_get
|
29
|
-
end
|
36
|
+
def self.IUPAC_to_base(iupac)
|
37
|
+
IUPAC2BASE[iupac]
|
30
38
|
end
|
31
|
-
end
|
32
39
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
old_pwd = FileUtils.pwd
|
38
|
-
begin
|
39
|
-
FileUtils.cd dir
|
40
|
-
yield
|
41
|
-
ensure
|
42
|
-
FileUtils.cd old_pwd
|
43
|
-
end
|
40
|
+
def self.is_filename?(string)
|
41
|
+
return true if Path === string
|
42
|
+
return true if String === string and string.length < 265 and File.exists? string
|
43
|
+
return false
|
44
44
|
end
|
45
45
|
|
46
46
|
def self.intersect_sorted_arrays(a1, a2)
|
@@ -97,73 +97,21 @@ module Misc
|
|
97
97
|
new
|
98
98
|
end
|
99
99
|
|
100
|
-
def self.
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
def self.add_method(object, method_name, &block)
|
105
|
-
class << object
|
106
|
-
self
|
107
|
-
end.send :define_method, method_name, block
|
108
|
-
end
|
109
|
-
|
110
|
-
def self.redefine_method(object, old_method, new_method_name, &block)
|
111
|
-
return if object.respond_to? new_method_name
|
112
|
-
metaclass = class << object; self end
|
113
|
-
metaclass.send :alias_method, new_method_name, old_method
|
114
|
-
metaclass.send :define_method, old_method, &block
|
115
|
-
end
|
116
|
-
|
117
|
-
def self.filename?(filename)
|
118
|
-
String === filename and filename.length < 1024 and filename.index("\n").nil? and File.exists? filename
|
119
|
-
end
|
120
|
-
|
121
|
-
def self.lock(file, *args)
|
122
|
-
FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
|
123
|
-
lockfile = Lockfile.new(file + '.lock')
|
124
|
-
lockfile.lock do
|
125
|
-
yield file, *args
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
def self.string2const(string)
|
130
|
-
return nil if string.nil?
|
131
|
-
mod = Kernel
|
132
|
-
|
133
|
-
string.to_s.split('::').each do |str|
|
134
|
-
mod = mod.const_get str
|
135
|
-
end
|
136
|
-
|
137
|
-
mod
|
138
|
-
end
|
139
|
-
|
140
|
-
def self.path_relative_to(path, subdir)
|
141
|
-
File.expand_path(path).sub(/^#{Regexp.quote File.expand_path(subdir)}\/?/,'')
|
142
|
-
end
|
143
|
-
|
144
|
-
def self.in_directory?(file, directory)
|
145
|
-
if File.expand_path(file) =~ /^#{Regexp.quote File.expand_path(directory)}/
|
146
|
-
true
|
147
|
-
else
|
148
|
-
false
|
100
|
+
def self.array2hash(array)
|
101
|
+
hash = {}
|
102
|
+
array.each do |key, value|
|
103
|
+
hash[key] = value
|
149
104
|
end
|
105
|
+
hash
|
150
106
|
end
|
151
107
|
|
152
|
-
def self.
|
153
|
-
|
154
|
-
files = []
|
155
|
-
while in_directory?(path, subdir)
|
156
|
-
path = path.dirname
|
157
|
-
if path[target].exists?
|
158
|
-
files << path[target]
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
162
|
-
files
|
108
|
+
def self.zip2hash(list1, list2)
|
109
|
+
array2hash(list1.zip(list2))
|
163
110
|
end
|
164
111
|
|
165
|
-
def self.
|
166
|
-
|
112
|
+
def self.process_to_hash(list)
|
113
|
+
result = yield list
|
114
|
+
zip2hash(list, result)
|
167
115
|
end
|
168
116
|
|
169
117
|
def self.env_add(var, value, sep = ":", prepend = true)
|
@@ -176,15 +124,6 @@ module Misc
|
|
176
124
|
end
|
177
125
|
end
|
178
126
|
|
179
|
-
def self.count(list)
|
180
|
-
counts = Hash.new 0
|
181
|
-
list.each do |item|
|
182
|
-
counts[item] += 1
|
183
|
-
end
|
184
|
-
|
185
|
-
counts
|
186
|
-
end
|
187
|
-
|
188
127
|
def self.benchmark(repeats = 1)
|
189
128
|
require 'benchmark'
|
190
129
|
res = nil
|
@@ -220,6 +159,73 @@ module Misc
|
|
220
159
|
res
|
221
160
|
end
|
222
161
|
|
162
|
+
def self.insist(times = 3)
|
163
|
+
try = 0
|
164
|
+
begin
|
165
|
+
yield
|
166
|
+
rescue
|
167
|
+
try += 1
|
168
|
+
retry if try < times
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def self.try3times(&block)
|
173
|
+
insist(3, &block)
|
174
|
+
end
|
175
|
+
|
176
|
+
def self.hash2string(hash)
|
177
|
+
hash.sort_by{|k,v| k.to_s}.collect{|k,v|
|
178
|
+
next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
|
179
|
+
[ Symbol === k ? ":" << k.to_s : k,
|
180
|
+
Symbol === v ? ":" << v.to_s : v] * "="
|
181
|
+
}.compact * "#"
|
182
|
+
end
|
183
|
+
|
184
|
+
def self.path_relative_to(basedir, path)
|
185
|
+
path = File.expand_path(path)
|
186
|
+
basedir = File.expand_path(basedir)
|
187
|
+
|
188
|
+
if path =~ /#{Regexp.quote basedir}\/(.*)/
|
189
|
+
return $1
|
190
|
+
else
|
191
|
+
return nil
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def self.lock(file, *args)
|
196
|
+
FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
|
197
|
+
lockfile = Lockfile.new(file + '.lock')
|
198
|
+
lockfile.lock do
|
199
|
+
yield file, *args
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def self.common_path(dir, file)
|
204
|
+
file = File.expand_path file
|
205
|
+
dir = File.expand_path dir
|
206
|
+
|
207
|
+
return true if file == dir
|
208
|
+
while File.dirname(file) != file
|
209
|
+
file = File.dirname(file)
|
210
|
+
return true if file == dir
|
211
|
+
end
|
212
|
+
|
213
|
+
return false
|
214
|
+
end
|
215
|
+
|
216
|
+
def self.in_dir(dir)
|
217
|
+
old_pwd = FileUtils.pwd
|
218
|
+
res = nil
|
219
|
+
begin
|
220
|
+
FileUtils.mkdir_p dir unless File.exists? dir
|
221
|
+
FileUtils.cd dir
|
222
|
+
res = yield
|
223
|
+
ensure
|
224
|
+
FileUtils.cd old_pwd
|
225
|
+
end
|
226
|
+
res
|
227
|
+
end
|
228
|
+
|
223
229
|
def self.fixutf8(string)
|
224
230
|
if string.respond_to?(:valid_encoding?) and ! string.valid_encoding?
|
225
231
|
@@ic ||= Iconv.new('UTF-8//IGNORE', 'UTF-8')
|
@@ -229,6 +235,25 @@ module Misc
|
|
229
235
|
end
|
230
236
|
end
|
231
237
|
|
238
|
+
def self.sensiblewrite(path, content)
|
239
|
+
begin
|
240
|
+
case
|
241
|
+
when String === content
|
242
|
+
File.open(path, 'w') do |f| f.write content end
|
243
|
+
when (IO === content or StringIO === content)
|
244
|
+
File.open(path, 'w') do |f| while l = content.gets; f.write l; end end
|
245
|
+
else
|
246
|
+
File.open(path, 'w') do |f| end
|
247
|
+
end
|
248
|
+
rescue Interrupt
|
249
|
+
FileUtils.rm_f path
|
250
|
+
raise "Interrupted (Ctrl-c)"
|
251
|
+
rescue Exception
|
252
|
+
FileUtils.rm_f path
|
253
|
+
raise $!
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
232
257
|
def self.add_defaults(options, defaults = {})
|
233
258
|
case
|
234
259
|
when Hash === options
|
@@ -246,20 +271,8 @@ module Misc
|
|
246
271
|
new_options
|
247
272
|
end
|
248
273
|
|
249
|
-
def self.
|
250
|
-
|
251
|
-
hash.delete keys.first.to_sym
|
252
|
-
else
|
253
|
-
keys.collect do |key| hash.delete(key.to_sym) || hash.delete(key.to_s) end
|
254
|
-
end
|
255
|
-
end
|
256
|
-
|
257
|
-
def self.hash2string(hash)
|
258
|
-
hash.sort_by{|k,v| k.to_s}.collect{|k,v|
|
259
|
-
next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
|
260
|
-
[ Symbol === k ? ":" << k.to_s : k,
|
261
|
-
Symbol === v ? ":" << v.to_s : v] * "="
|
262
|
-
}.compact * "#"
|
274
|
+
def self.digest(text)
|
275
|
+
Digest::MD5.hexdigest(text)
|
263
276
|
end
|
264
277
|
|
265
278
|
def self.hash2md5(hash)
|
@@ -270,14 +283,57 @@ module Misc
|
|
270
283
|
case
|
271
284
|
when v.inspect =~ /:0x0/
|
272
285
|
o[k] = v.inspect.sub(/:0x[a-f0-9]+@/,'')
|
273
|
-
|
274
|
-
o[k] = "" << String.new(v.to_s)
|
286
|
+
#when Resource::Path === v
|
287
|
+
# o[k] = "" << String.new(v.to_s)
|
275
288
|
else
|
276
289
|
o[k] = v
|
277
290
|
end
|
278
291
|
end
|
279
292
|
|
280
|
-
|
293
|
+
if o.empty?
|
294
|
+
""
|
295
|
+
else
|
296
|
+
Digest::MD5.hexdigest(o.sort_by{|k| k.to_s}.inspect)
|
297
|
+
end
|
298
|
+
end
|
299
|
+
|
300
|
+
def self.process_options(hash, *keys)
|
301
|
+
if keys.length == 1
|
302
|
+
hash.delete keys.first.to_sym
|
303
|
+
else
|
304
|
+
keys.collect do |key| hash.delete(key.to_sym) || hash.delete(key.to_s) end
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
def self.pull_keys(hash, prefix)
|
309
|
+
new = {}
|
310
|
+
hash.keys.each do |key|
|
311
|
+
if key.to_s =~ /#{ prefix }_(.*)/
|
312
|
+
case
|
313
|
+
when String === key
|
314
|
+
new[$1] = hash.delete key
|
315
|
+
when Symbol === key
|
316
|
+
new[$1.to_sym] = hash.delete key
|
317
|
+
end
|
318
|
+
else
|
319
|
+
if key.to_s == prefix.to_s
|
320
|
+
new[key] = hash.delete key
|
321
|
+
end
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
new
|
326
|
+
end
|
327
|
+
|
328
|
+
def self.string2const(string)
|
329
|
+
return nil if string.nil?
|
330
|
+
mod = Kernel
|
331
|
+
|
332
|
+
string.to_s.split('::').each do |str|
|
333
|
+
mod = mod.const_get str
|
334
|
+
end
|
335
|
+
|
336
|
+
mod
|
281
337
|
end
|
282
338
|
|
283
339
|
def self.string2hash(string)
|
@@ -290,58 +346,42 @@ module Misc
|
|
290
346
|
option, value = str, true
|
291
347
|
end
|
292
348
|
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
349
|
+
option = option.sub(":",'').to_sym if option.chars.first == ':'
|
350
|
+
value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
|
351
|
+
|
352
|
+
if value == true
|
353
|
+
options[option] = option.to_s.chars.first != '!'
|
354
|
+
else
|
355
|
+
options[option] = Thread.start do
|
356
|
+
$SAFE = 0;
|
357
|
+
case
|
358
|
+
when value =~ /^(?:true|T)$/i
|
359
|
+
true
|
360
|
+
when value =~ /^(?:false|F)$/i
|
361
|
+
false
|
362
|
+
when Symbol === value
|
363
|
+
value
|
364
|
+
when (String === value and value =~ /^\/(.*)\/$/)
|
365
|
+
Regexp.new /#{$1}/
|
366
|
+
else
|
367
|
+
begin
|
368
|
+
Kernel.const_get value
|
369
|
+
rescue
|
370
|
+
begin
|
371
|
+
raise if value =~ /[a-z]/ and defined? value
|
372
|
+
eval(value)
|
373
|
+
rescue Exception
|
374
|
+
value
|
317
375
|
end
|
318
376
|
end
|
319
|
-
end
|
320
|
-
end
|
377
|
+
end
|
378
|
+
end.value
|
379
|
+
end
|
321
380
|
end
|
322
381
|
|
323
382
|
options
|
324
383
|
end
|
325
384
|
|
326
|
-
def self.sensiblewrite(path, content)
|
327
|
-
begin
|
328
|
-
case
|
329
|
-
when String === content
|
330
|
-
File.open(path, 'w') do |f| f.write content end
|
331
|
-
when (IO === content or StringIO === content)
|
332
|
-
File.open(path, 'w') do |f| while l = content.gets; f.write l; end end
|
333
|
-
else
|
334
|
-
File.open(path, 'w') do |f| end
|
335
|
-
end
|
336
|
-
rescue Interrupt
|
337
|
-
FileUtils.rm_f path
|
338
|
-
raise "Interrupted (Ctrl-c)"
|
339
|
-
rescue Exception
|
340
|
-
FileUtils.rm_f path
|
341
|
-
raise $!
|
342
|
-
end
|
343
|
-
end
|
344
|
-
|
345
385
|
def self.field_position(fields, field, quiet = false)
|
346
386
|
return field if Integer === field or Range === field
|
347
387
|
raise FieldNotFoundError, "Field information missing" if fields.nil? && ! quiet
|
@@ -351,30 +391,6 @@ module Misc
|
|
351
391
|
raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
|
352
392
|
end
|
353
393
|
|
354
|
-
def self.first(list)
|
355
|
-
return nil if list.nil?
|
356
|
-
return list.first
|
357
|
-
end
|
358
|
-
|
359
|
-
def self.chunk(text, split)
|
360
|
-
text.split(split)[1..-1]
|
361
|
-
end
|
362
|
-
|
363
|
-
def self.insist(times = 3)
|
364
|
-
try = 0
|
365
|
-
begin
|
366
|
-
yield
|
367
|
-
rescue
|
368
|
-
try += 1
|
369
|
-
retry if try < times
|
370
|
-
end
|
371
|
-
end
|
372
|
-
|
373
|
-
def self.try3times(&block)
|
374
|
-
insist(3, &block)
|
375
|
-
end
|
376
|
-
|
377
|
-
|
378
394
|
# Divides the array into +num+ chunks of the same size by placing one
|
379
395
|
# element in each chunk iteratively.
|
380
396
|
def self.divide(array, num)
|
@@ -387,70 +403,21 @@ module Misc
|
|
387
403
|
chunks
|
388
404
|
end
|
389
405
|
|
390
|
-
def self.
|
391
|
-
|
392
|
-
list1.zip(list2).each do |k,v| hash[k] = v end
|
393
|
-
hash
|
406
|
+
def self.zip_fields(array)
|
407
|
+
array[0].zip(*array[1..-1])
|
394
408
|
end
|
395
409
|
|
396
|
-
|
397
|
-
def self.process_to_hash(list)
|
398
|
-
result = yield list
|
399
|
-
merge2hash(list, result)
|
400
|
-
end
|
401
|
-
|
402
|
-
IUPAC2BASE = {
|
403
|
-
"A" => ["A"],
|
404
|
-
"C" => ["C"],
|
405
|
-
"G" => ["G"],
|
406
|
-
"T" => ["T"],
|
407
|
-
"U" => ["U"],
|
408
|
-
"R" => "A or G".split(" or "),
|
409
|
-
"Y" => "C or T".split(" or "),
|
410
|
-
"S" => "G or C".split(" or "),
|
411
|
-
"W" => "A or T".split(" or "),
|
412
|
-
"K" => "G or T".split(" or "),
|
413
|
-
"M" => "A or C".split(" or "),
|
414
|
-
"B" => "C or G or T".split(" or "),
|
415
|
-
"D" => "A or G or T".split(" or "),
|
416
|
-
"H" => "A or C or T".split(" or "),
|
417
|
-
"V" => "A or C or G".split(" or "),
|
418
|
-
"N" => %w(A C T G),
|
419
|
-
}
|
420
|
-
|
421
|
-
BASE2COMPLEMENT = {
|
422
|
-
"A" => "T",
|
423
|
-
"C" => "G",
|
424
|
-
"G" => "C",
|
425
|
-
"T" => "A",
|
426
|
-
"U" => "A",
|
427
|
-
}
|
428
|
-
|
429
|
-
def self.IUPAC_to_base(iupac)
|
430
|
-
IUPAC2BASE[iupac]
|
431
|
-
end
|
432
|
-
end
|
433
|
-
|
434
|
-
module PDF2Text
|
435
|
-
def self.pdf2text(filename)
|
436
|
-
require 'rbbt/util/cmd'
|
437
|
-
require 'rbbt/util/tmpfile'
|
438
|
-
require 'rbbt/util/open'
|
439
|
-
|
440
|
-
|
441
|
-
TmpFile.with_file(Open.open(filename, :nocache => true).read) do |pdf_file|
|
442
|
-
CMD.cmd("pdftotext #{pdf_file} -", :pipe => false, :stderr => true)
|
443
|
-
end
|
444
|
-
end
|
445
410
|
end
|
446
411
|
|
447
|
-
|
412
|
+
module NamedArray
|
413
|
+
extend ChainMethods
|
414
|
+
self.chain_prefix = :named_array
|
448
415
|
attr_accessor :fields
|
449
416
|
|
450
|
-
def self.
|
451
|
-
|
452
|
-
|
453
|
-
|
417
|
+
def self.setup(array, fields)
|
418
|
+
array.extend NamedArray
|
419
|
+
array.fields = fields
|
420
|
+
array
|
454
421
|
end
|
455
422
|
|
456
423
|
def merge(array)
|
@@ -476,26 +443,23 @@ class NamedArray < Array
|
|
476
443
|
end
|
477
444
|
end
|
478
445
|
|
479
|
-
|
480
|
-
|
481
|
-
original_get_brackets(Misc.field_position(fields, key))
|
446
|
+
def named_array_get_brackets(key)
|
447
|
+
named_array_clean_get_brackets(Misc.field_position(fields, key))
|
482
448
|
end
|
483
449
|
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
end
|
450
|
+
def named_array_set_brackets(key,value)
|
451
|
+
named_array_clean_set_brackets(Misc.field_position(fields, key), value)
|
452
|
+
end
|
488
453
|
|
489
|
-
|
490
|
-
def values_at(*keys)
|
454
|
+
def named_array_values_at(*keys)
|
491
455
|
keys = keys.collect{|k| Misc.field_position(fields, k) }
|
492
|
-
|
456
|
+
named_array_clean_values_at(*keys)
|
493
457
|
end
|
494
458
|
|
495
459
|
def zip_fields
|
496
460
|
return [] if self.empty?
|
497
|
-
zipped =
|
498
|
-
zipped = zipped.collect{|v| NamedArray.
|
461
|
+
zipped = Misc.zip_fields(self)
|
462
|
+
zipped = zipped.collect{|v| NamedArray.setup(v, fields)}
|
499
463
|
zipped
|
500
464
|
end
|
501
465
|
|
@@ -511,38 +475,85 @@ class NamedArray < Array
|
|
511
475
|
|
512
476
|
def report
|
513
477
|
fields.zip(self).collect do |field,value|
|
514
|
-
"* #{ field }: #{ Array === value ? value * "|" : value }"
|
478
|
+
"\nAttributes:\n* #{ field }: #{ Array === value ? value * "|" : value }"
|
515
479
|
end * "\n"
|
516
480
|
end
|
517
481
|
|
518
482
|
end
|
519
483
|
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
484
|
+
class RBBTError < StandardError
|
485
|
+
attr_accessor :info
|
486
|
+
|
487
|
+
alias old_to_s to_s
|
488
|
+
def to_s
|
489
|
+
str = old_to_s.dup
|
490
|
+
if info
|
491
|
+
str << "\n" << "Additional Info:\n---\n" << info << "---"
|
492
|
+
end
|
493
|
+
str
|
530
494
|
end
|
531
495
|
end
|
532
496
|
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
497
|
+
module IndiferentHash
|
498
|
+
extend ChainMethods
|
499
|
+
self.chain_prefix = :indiferent
|
500
|
+
|
501
|
+
def indiferent_get_brackets(key)
|
502
|
+
case
|
503
|
+
when (Symbol === key and indiferent_clean_include? key)
|
504
|
+
indiferent_clean_get_brackets(key)
|
505
|
+
when (Symbol === key and indiferent_clean_include? key.to_s)
|
506
|
+
indiferent_clean_get_brackets(key.to_s)
|
507
|
+
when (String === key and indiferent_clean_include? key)
|
508
|
+
indiferent_clean_get_brackets(key)
|
509
|
+
when (String === key and indiferent_clean_include? key.to_sym)
|
510
|
+
indiferent_clean_get_brackets(key.to_sym)
|
511
|
+
else
|
512
|
+
indiferent_clean_get_brackets(key)
|
513
|
+
end
|
514
|
+
end
|
539
515
|
|
540
|
-
|
541
|
-
|
542
|
-
|
516
|
+
def indiferent_values_at(*key_list)
|
517
|
+
res = []
|
518
|
+
key_list.each{|key| res << indiferent_get_brackets(key)}
|
543
519
|
res
|
544
|
-
|
545
|
-
|
520
|
+
end
|
521
|
+
|
522
|
+
def indiferent_include?(key)
|
523
|
+
case
|
524
|
+
when Symbol === key
|
525
|
+
indiferent_clean_include?(key) or indiferent_clean_include?(key.to_s)
|
526
|
+
when String === key
|
527
|
+
indiferent_clean_include?(key) or indiferent_clean_include?(key.to_sym)
|
528
|
+
else
|
529
|
+
indiferent_clean_include?(key)
|
530
|
+
end
|
531
|
+
end
|
532
|
+
|
533
|
+
def indiferent_delete(value)
|
534
|
+
if indiferent_clean_include? value.to_s
|
535
|
+
indiferent_clean_delete(value.to_s)
|
536
|
+
else
|
537
|
+
indiferent_clean_delete(value.to_sym)
|
538
|
+
end
|
539
|
+
end
|
540
|
+
|
541
|
+
def self.setup(hash)
|
542
|
+
return hash if IndiferentHash === hash
|
543
|
+
hash.extend IndiferentHash
|
544
|
+
hash
|
546
545
|
end
|
547
546
|
end
|
548
547
|
|
548
|
+
module PDF2Text
|
549
|
+
def self.pdftotext(filename)
|
550
|
+
require 'rbbt/util/cmd'
|
551
|
+
require 'rbbt/util/tmpfile'
|
552
|
+
require 'rbbt/util/open'
|
553
|
+
|
554
|
+
|
555
|
+
TmpFile.with_file(Open.open(filename, :nocache => true).read) do |pdf_file|
|
556
|
+
CMD.cmd("pdftotext #{pdf_file} -", :pipe => false, :stderr => true)
|
557
|
+
end
|
558
|
+
end
|
559
|
+
end
|