rbbt-util 3.2.1 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. data/README.rdoc +65 -0
  2. data/bin/run_workflow.rb +142 -69
  3. data/lib/rbbt-util.rb +3 -3
  4. data/lib/rbbt.rb +12 -3
  5. data/lib/rbbt/annotations.rb +215 -0
  6. data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
  7. data/lib/rbbt/persist.rb +164 -0
  8. data/lib/rbbt/persist/tsv.rb +135 -0
  9. data/lib/rbbt/resource.rb +100 -0
  10. data/lib/rbbt/resource/path.rb +180 -0
  11. data/lib/rbbt/resource/rake.rb +48 -0
  12. data/lib/rbbt/resource/util.rb +111 -0
  13. data/lib/rbbt/resource/with_key.rb +28 -0
  14. data/lib/rbbt/tsv.rb +134 -0
  15. data/lib/rbbt/tsv/accessor.rb +345 -0
  16. data/lib/rbbt/tsv/attach.rb +183 -0
  17. data/lib/rbbt/tsv/attach/util.rb +277 -0
  18. data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
  19. data/lib/rbbt/tsv/index.rb +453 -0
  20. data/lib/rbbt/tsv/manipulate.rb +361 -0
  21. data/lib/rbbt/tsv/parser.rb +231 -0
  22. data/lib/rbbt/tsv/serializers.rb +79 -0
  23. data/lib/rbbt/tsv/util.rb +67 -0
  24. data/lib/rbbt/util/R.rb +3 -3
  25. data/lib/rbbt/util/chain_methods.rb +64 -0
  26. data/lib/rbbt/util/cmd.rb +17 -13
  27. data/lib/rbbt/util/excel2tsv.rb +4 -3
  28. data/lib/rbbt/util/log.rb +1 -0
  29. data/lib/rbbt/util/misc.rb +296 -285
  30. data/lib/rbbt/util/open.rb +9 -2
  31. data/lib/rbbt/util/persistence.rb +1 -1
  32. data/lib/rbbt/util/task/job.rb +3 -1
  33. data/lib/rbbt/workflow.rb +193 -0
  34. data/lib/rbbt/workflow/accessor.rb +249 -0
  35. data/lib/rbbt/workflow/annotate.rb +60 -0
  36. data/lib/rbbt/workflow/soap.rb +100 -0
  37. data/lib/rbbt/workflow/step.rb +102 -0
  38. data/lib/rbbt/workflow/task.rb +76 -0
  39. data/test/rbbt/resource/test_path.rb +12 -0
  40. data/test/rbbt/test_annotations.rb +106 -0
  41. data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
  42. data/test/rbbt/test_resource.rb +66 -0
  43. data/test/rbbt/test_tsv.rb +332 -0
  44. data/test/rbbt/test_workflow.rb +102 -0
  45. data/test/rbbt/tsv/test_accessor.rb +163 -0
  46. data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
  47. data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
  48. data/test/rbbt/tsv/test_index.rb +284 -0
  49. data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
  50. data/test/rbbt/util/test_R.rb +1 -1
  51. data/test/rbbt/util/test_chain_methods.rb +22 -0
  52. data/test/rbbt/util/test_filecache.rb +0 -1
  53. data/test/rbbt/util/test_misc.rb +97 -79
  54. data/test/rbbt/util/test_open.rb +1 -0
  55. data/test/rbbt/util/test_tmpfile.rb +1 -1
  56. data/test/rbbt/workflow/test_soap.rb +103 -0
  57. data/test/rbbt/workflow/test_step.rb +142 -0
  58. data/test/rbbt/workflow/test_task.rb +84 -0
  59. data/test/test_helper.rb +7 -7
  60. metadata +80 -54
  61. data/lib/rbbt/util/rake.rb +0 -176
  62. data/lib/rbbt/util/resource.rb +0 -355
  63. data/lib/rbbt/util/task.rb +0 -183
  64. data/lib/rbbt/util/tc_hash.rb +0 -324
  65. data/lib/rbbt/util/tsv.rb +0 -236
  66. data/lib/rbbt/util/tsv/accessor.rb +0 -312
  67. data/lib/rbbt/util/tsv/attach.rb +0 -416
  68. data/lib/rbbt/util/tsv/index.rb +0 -419
  69. data/lib/rbbt/util/tsv/manipulate.rb +0 -300
  70. data/lib/rbbt/util/tsv/misc.rb +0 -41
  71. data/lib/rbbt/util/tsv/parse.rb +0 -324
  72. data/lib/rbbt/util/tsv/resource.rb +0 -88
  73. data/lib/rbbt/util/workflow.rb +0 -135
  74. data/lib/rbbt/util/workflow/soap.rb +0 -116
  75. data/test/rbbt/util/test_persistence.rb +0 -201
  76. data/test/rbbt/util/test_rake.rb +0 -54
  77. data/test/rbbt/util/test_resource.rb +0 -77
  78. data/test/rbbt/util/test_task.rb +0 -133
  79. data/test/rbbt/util/test_tc_hash.rb +0 -144
  80. data/test/rbbt/util/test_tsv.rb +0 -221
  81. data/test/rbbt/util/test_workflow.rb +0 -135
  82. data/test/rbbt/util/tsv/test_accessor.rb +0 -150
  83. data/test/rbbt/util/tsv/test_index.rb +0 -241
  84. data/test/rbbt/util/tsv/test_parse.rb +0 -87
  85. data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -0,0 +1,79 @@
1
+ module TSV
2
+ class IntegerSerializer
3
+ def self.dump(i); [i].pack("l"); end
4
+ def self.load(str); str.unpack("l").first; end
5
+ end
6
+
7
+ class FloatSerializer
8
+ def self.dump(i); [i].pack("d"); end
9
+ def self.load(str); str.unpack("d").first; end
10
+ end
11
+
12
+ class IntegerArraySerializer
13
+ def self.dump(a); a.pack("l*"); end
14
+ def self.load(str); str.unpack("l*"); end
15
+ end
16
+
17
+ class StringSerializer
18
+ def self.dump(str); str.to_s; end
19
+ def self.load(str); str; end
20
+ end
21
+
22
+ class StringArraySerializer
23
+ def self.dump(array)
24
+ array.collect{|a| a.to_s} * "\t"
25
+ end
26
+
27
+ def self.load(string)
28
+ return [] if string.nil?
29
+ string.split("\t", -1)
30
+ end
31
+ end
32
+
33
+ class StringDoubleArraySerializer
34
+ def self.dump(array)
35
+ array.collect{|a| a.collect{|a| a.to_s} * "|"} * "\t"
36
+ end
37
+
38
+ def self.load(string)
39
+ return [] if string.nil?
40
+ string.split("\t", -1).collect{|l| l.split("|", -1)}
41
+ end
42
+ end
43
+
44
+ class TSVMarshalSerializer
45
+ def self.dump(tsv)
46
+ Marshal.dump(tsv.dup)
47
+ end
48
+
49
+ def self.load(string)
50
+ TSV.setup Marshal.load(string)
51
+ end
52
+ end
53
+
54
+
55
+ class TSVSerializer
56
+ def self.dump(tsv)
57
+ tsv.to_s
58
+ end
59
+
60
+ def self.load(string)
61
+ TSV.open StringIO.new(string)
62
+ end
63
+ end
64
+
65
+ SERIALIZER_ALIAS = {
66
+ :integer => IntegerSerializer,
67
+ :float => FloatSerializer,
68
+ :integer_array => IntegerArraySerializer,
69
+ :marshal => Marshal,
70
+ :single => StringSerializer,
71
+ :string => StringSerializer,
72
+ :list => StringArraySerializer,
73
+ :flat => StringArraySerializer,
74
+ :double => StringDoubleArraySerializer,
75
+ :tsv => TSVSerializer,
76
+ :marshal_tsv => TSVMarshalSerializer
77
+ }
78
+
79
+ end
@@ -0,0 +1,67 @@
1
+ require 'rbbt/resource/path'
2
+ module TSV
3
+
4
+ def self.field_match_counts(file, values)
5
+ fields = TSV.parse_header(Open.open(file)).all_fields
6
+
7
+ counts = {}
8
+ TmpFile.with_file do |tmpfile|
9
+ if Array === values
10
+ Open.write(tmpfile, values * "\n")
11
+ else
12
+ FileUtils.ln_s values, tmpfile
13
+ end
14
+
15
+ fields.each_with_index do |field,i|
16
+ counts[field] = begin
17
+ CMD.cmd("cat #{ file } |grep -v ^#|cut -f #{i + 1}|tr '|' '\\n' |sort -u |grep [[:alpha:]]|grep -f #{tmpfile} -F -w").read.count("\n")
18
+ rescue
19
+ 0
20
+ end
21
+ end
22
+ end
23
+
24
+ counts
25
+ end
26
+ def self.get_filename(file)
27
+ case
28
+ when String === file
29
+ filename = file
30
+ when file.respond_to?(:gets)
31
+ filename = file.filename if file.respond_to? :filename
32
+ else
33
+ raise "Cannot get stream from: #{file.inspect}"
34
+ end
35
+ filename
36
+ end
37
+
38
+ def self.get_stream(file)
39
+ case
40
+ when Path === file
41
+ file.open
42
+ when String === file
43
+ File.open(file)
44
+ when file.respond_to?(:gets)
45
+ file
46
+ else
47
+ raise "Cannot get stream from: #{file.inspect}"
48
+ end
49
+ end
50
+
51
+ def self.identify_field(key_field, fields, field)
52
+ case
53
+ when Integer === field
54
+ field
55
+ when (field.nil? or field == :key or key_field == field)
56
+ :key
57
+ when String === field
58
+ fields.index field
59
+ end
60
+ end
61
+
62
+ def identify_field(field)
63
+ TSV.identify_field(key_field, fields, field)
64
+ end
65
+
66
+
67
+ end
data/lib/rbbt/util/R.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  require 'rbbt/util/cmd'
2
- require 'rbbt/util/tsv'
2
+ require 'rbbt/tsv'
3
3
 
4
4
  module R
5
5
 
@@ -24,7 +24,7 @@ module R
24
24
 
25
25
  end
26
26
 
27
- class TSV
27
+ module TSV
28
28
  def R(script, open_options = {})
29
29
  TmpFile.with_file do |f|
30
30
  Open.write(f, self.to_s)
@@ -36,7 +36,7 @@ rbbt.tsv.write('#{f}', data);
36
36
  EOF
37
37
  ).read)
38
38
  open_options = Misc.add_defaults open_options, :type => :list
39
- TSV.new(f, open_options)
39
+ TSV.open(f, open_options)
40
40
  end
41
41
  end
42
42
  end
@@ -0,0 +1,64 @@
1
+ require 'rbbt/util/log'
2
+
3
+ module ChainMethods
4
+ def self.extended(base)
5
+ if not base.respond_to? :chain_prefix
6
+ metaclass = class << base
7
+ attr_accessor :chain_prefix, :chained_methods
8
+
9
+ def chained_methods
10
+ @chained_methods ||= instance_methods.select{|method| method =~ /^#{chain_prefix}/}
11
+ end
12
+ self
13
+ end
14
+
15
+ metaclass.module_eval do
16
+ def setup_chain(object)
17
+ object.extend self
18
+ end
19
+
20
+ def setup_chains(base)
21
+ raise "No prefix specified for #{self.to_s}" if self.chain_prefix.nil? or (String === self.chain_prefix and self.chain_prefix.empty?)
22
+ #methods = self.instance_methods.select{|method| method =~ /^#{self.chain_prefix}/}
23
+ methods = self.chained_methods
24
+
25
+ return if methods.empty?
26
+
27
+ prefix = self.chain_prefix
28
+
29
+ new_method = methods.first
30
+ original = new_method.sub(prefix.to_s + '_', '')
31
+ first_clean_method = prefix.to_s + '_clean_' + original
32
+
33
+ if not base.respond_to? first_clean_method
34
+ class << base; self; end.module_eval do
35
+ methods.each do |new_method|
36
+ original = new_method.sub(prefix.to_s + '_', '')
37
+ clean_method = prefix.to_s + '_clean_' + original
38
+
39
+ original = "[]" if original == "get_brackets"
40
+ original = "[]=" if original == "set_brackets"
41
+
42
+ begin
43
+ alias_method clean_method, original
44
+ rescue
45
+ end
46
+ alias_method original, new_method
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+
53
+ if not metaclass.respond_to? :extended
54
+ metaclass.module_eval do
55
+ def extended(base)
56
+ setup_chains(base)
57
+ end
58
+ end
59
+ end
60
+ end
61
+
62
+ base.chain_prefix = base.to_s.downcase.to_sym
63
+ end
64
+ end
data/lib/rbbt/util/cmd.rb CHANGED
@@ -4,10 +4,11 @@ require 'stringio'
4
4
 
5
5
  module CMD
6
6
 
7
- class CMDError < RBBTError; end
7
+ class CMDError < StandardError; end
8
+
8
9
  module SmartIO
9
- attr_accessor :pid, :cmd, :post, :in, :out, :err
10
- def self.tie(io, pid = nil, cmd = "", post = nil, sin = nil, out = nil, err = nil)
10
+ attr_accessor :pid, :cmd, :post, :in, :out, :err, :log
11
+ def self.tie(io, pid = nil, cmd = "", post = nil, sin = nil, out = nil, err = nil, log = true)
11
12
  io.extend SmartIO
12
13
  io.pid = pid
13
14
  io.cmd = cmd
@@ -15,6 +16,7 @@ module CMD
15
16
  io.out = out
16
17
  io.err = err
17
18
  io.post = post
19
+ io.log = log
18
20
 
19
21
  io.class.send(:alias_method, :original_close, :close)
20
22
  io.class.send(:alias_method, :original_read, :read)
@@ -28,10 +30,10 @@ module CMD
28
30
  rescue
29
31
  end
30
32
 
31
- Log.debug "Process #{ cmd } succeded" if $? and $?.success?
33
+ Log.debug "Process #{ cmd } succeded" if $? and $?.success? and log
32
34
 
33
35
  if $? and not $?.success?
34
- Log.debug "Raising exception"
36
+ Log.debug "Raising exception" if log
35
37
  exception = CMDError.new "Command [#{@pid}] #{@cmd} failed with error status #{$?.exitstatus}"
36
38
  original_close
37
39
  raise exception
@@ -51,7 +53,7 @@ module CMD
51
53
 
52
54
  def force_close
53
55
  if @pid
54
- Log.debug "Forcing close by killing '#{@pid}'"
56
+ Log.debug "Forcing close by killing '#{@pid}'" if log
55
57
  Process.kill("KILL", @pid)
56
58
  Process.waitpid(@pid)
57
59
  end
@@ -98,6 +100,9 @@ module CMD
98
100
  stderr = options.delete(:stderr)
99
101
  pipe = options.delete(:pipe)
100
102
  post = options.delete(:post)
103
+ log = options.delete(:log)
104
+
105
+ log = true if log.nil?
101
106
 
102
107
  if stderr == true
103
108
  stderr = Log::HIGH
@@ -149,8 +154,8 @@ module CMD
149
154
 
150
155
  exit(-1)
151
156
  rescue Exception
152
- Log.debug("CMDError: #{$!.message}")
153
- ddd $!.backtrace
157
+ Log.debug("CMDError: #{$!.message}") if log
158
+ ddd $!.backtrace if log
154
159
  raise CMDError, $!.message
155
160
  end
156
161
  }
@@ -164,7 +169,7 @@ module CMD
164
169
  serr = serr.first
165
170
 
166
171
 
167
- Log.debug "CMD: [#{pid}] #{cmd}"
172
+ Log.debug "CMD: [#{pid}] #{cmd}" if log
168
173
 
169
174
  if in_content.respond_to?(:read)
170
175
  Thread.new do
@@ -190,7 +195,7 @@ module CMD
190
195
  if pipe
191
196
  Thread.new do
192
197
  while line = serr.gets
193
- Log.log line, stderr if Integer === stderr
198
+ Log.log line, stderr if Integer === stderr and log
194
199
  end
195
200
  serr.close
196
201
  Thread.exit
@@ -216,11 +221,10 @@ module CMD
216
221
  Process.waitpid pid
217
222
 
218
223
  if not $?.success?
219
- exception = CMDError.new "Command [#{pid}] #{cmd} failed with error status #{$?.exitstatus}"
220
- exception.info = err if Integer === stderr and stderr >= Log.severity
224
+ exception = CMDError.new "Command [#{pid}] #{cmd} failed with error status #{$?.exitstatus}.\n#{err}"
221
225
  raise exception
222
226
  else
223
- Log.log err, stderr if Integer === stderr
227
+ Log.log err, stderr if Integer === stderr and log
224
228
  end
225
229
 
226
230
  out
@@ -1,7 +1,8 @@
1
1
  require 'spreadsheet'
2
- require 'rbbt/util/tsv'
2
+ require 'rbbt/tsv'
3
3
  require 'rbbt/util/tmpfile'
4
- class TSV
4
+
5
+ module TSV
5
6
  def self.excel2tsv(file, options = {})
6
7
  sheet = options.delete :sheet
7
8
  header = options.delete :header
@@ -26,7 +27,7 @@ class TSV
26
27
  rows.each do |row| f.puts row * "\t" end
27
28
  end
28
29
 
29
- TSV.new(filename, options)
30
+ TSV.open(filename, options)
30
31
  end
31
32
  end
32
33
  end
data/lib/rbbt/util/log.rb CHANGED
@@ -19,6 +19,7 @@ module Log
19
19
  SEVERITY_COLOR = ["0;37m", "0;32m", "0;33m", "0;31m", "1;0m" ].collect{|e| "\033[#{e}"}
20
20
 
21
21
  def self.log(message, severity = MEDIUM)
22
+ message ||= ""
22
23
  severity_color = SEVERITY_COLOR[severity]
23
24
  font_color = {false => "\033[0;37m", true => "\033[0m"}[severity >= INFO]
24
25
 
@@ -1,46 +1,46 @@
1
- require 'iconv'
2
1
  require 'lockfile'
3
- require 'digest/md5'
2
+ require 'rbbt/util/chain_methods'
3
+ require 'rbbt/resource/path'
4
4
 
5
- class RBBTError < StandardError
6
- attr_accessor :info
5
+ module Misc
6
+ class FieldNotFoundError < StandardError;end
7
7
 
8
- alias old_to_s to_s
9
- def to_s
10
- str = old_to_s.dup
11
- if info
12
- str << "\n" << "Additional Info:\n---\n" << info << "---"
13
- end
14
- str
15
- end
16
- end
8
+
9
+ IUPAC2BASE = {
10
+ "A" => ["A"],
11
+ "C" => ["C"],
12
+ "G" => ["G"],
13
+ "T" => ["T"],
14
+ "U" => ["U"],
15
+ "R" => "A or G".split(" or "),
16
+ "Y" => "C or T".split(" or "),
17
+ "S" => "G or C".split(" or "),
18
+ "W" => "A or T".split(" or "),
19
+ "K" => "G or T".split(" or "),
20
+ "M" => "A or C".split(" or "),
21
+ "B" => "C or G or T".split(" or "),
22
+ "D" => "A or G or T".split(" or "),
23
+ "H" => "A or C or T".split(" or "),
24
+ "V" => "A or C or G".split(" or "),
25
+ "N" => %w(A C T G),
26
+ }
17
27
 
18
- module IndiferentHash
19
- def indiferent_get(key)
20
- old_get(key) ||
21
- old_get(key.to_s) ||
22
- old_get(key.to_sym)
23
- end
28
+ BASE2COMPLEMENT = {
29
+ "A" => "T",
30
+ "C" => "G",
31
+ "G" => "C",
32
+ "T" => "A",
33
+ "U" => "A",
34
+ }
24
35
 
25
- def self.extended(base)
26
- class << base
27
- alias_method :old_get, :[]
28
- alias_method :[], :indiferent_get
29
- end
36
+ def self.IUPAC_to_base(iupac)
37
+ IUPAC2BASE[iupac]
30
38
  end
31
- end
32
39
 
33
- module Misc
34
- class FieldNotFoundError < StandardError;end
35
-
36
- def self.in_dir(dir)
37
- old_pwd = FileUtils.pwd
38
- begin
39
- FileUtils.cd dir
40
- yield
41
- ensure
42
- FileUtils.cd old_pwd
43
- end
40
+ def self.is_filename?(string)
41
+ return true if Path === string
42
+ return true if String === string and string.length < 265 and File.exists? string
43
+ return false
44
44
  end
45
45
 
46
46
  def self.intersect_sorted_arrays(a1, a2)
@@ -97,73 +97,21 @@ module Misc
97
97
  new
98
98
  end
99
99
 
100
- def self.digest(text)
101
- Digest::MD5.hexdigest(text)
102
- end
103
-
104
- def self.add_method(object, method_name, &block)
105
- class << object
106
- self
107
- end.send :define_method, method_name, block
108
- end
109
-
110
- def self.redefine_method(object, old_method, new_method_name, &block)
111
- return if object.respond_to? new_method_name
112
- metaclass = class << object; self end
113
- metaclass.send :alias_method, new_method_name, old_method
114
- metaclass.send :define_method, old_method, &block
115
- end
116
-
117
- def self.filename?(filename)
118
- String === filename and filename.length < 1024 and filename.index("\n").nil? and File.exists? filename
119
- end
120
-
121
- def self.lock(file, *args)
122
- FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
123
- lockfile = Lockfile.new(file + '.lock')
124
- lockfile.lock do
125
- yield file, *args
126
- end
127
- end
128
-
129
- def self.string2const(string)
130
- return nil if string.nil?
131
- mod = Kernel
132
-
133
- string.to_s.split('::').each do |str|
134
- mod = mod.const_get str
135
- end
136
-
137
- mod
138
- end
139
-
140
- def self.path_relative_to(path, subdir)
141
- File.expand_path(path).sub(/^#{Regexp.quote File.expand_path(subdir)}\/?/,'')
142
- end
143
-
144
- def self.in_directory?(file, directory)
145
- if File.expand_path(file) =~ /^#{Regexp.quote File.expand_path(directory)}/
146
- true
147
- else
148
- false
100
+ def self.array2hash(array)
101
+ hash = {}
102
+ array.each do |key, value|
103
+ hash[key] = value
149
104
  end
105
+ hash
150
106
  end
151
107
 
152
- def self.find_files_back_to(path, target, subdir)
153
- return [] if path.nil?
154
- files = []
155
- while in_directory?(path, subdir)
156
- path = path.dirname
157
- if path[target].exists?
158
- files << path[target]
159
- end
160
- end
161
-
162
- files
108
+ def self.zip2hash(list1, list2)
109
+ array2hash(list1.zip(list2))
163
110
  end
164
111
 
165
- def self.this_dir
166
- File.expand_path(File.dirname(caller[0]))
112
+ def self.process_to_hash(list)
113
+ result = yield list
114
+ zip2hash(list, result)
167
115
  end
168
116
 
169
117
  def self.env_add(var, value, sep = ":", prepend = true)
@@ -176,15 +124,6 @@ module Misc
176
124
  end
177
125
  end
178
126
 
179
- def self.count(list)
180
- counts = Hash.new 0
181
- list.each do |item|
182
- counts[item] += 1
183
- end
184
-
185
- counts
186
- end
187
-
188
127
  def self.benchmark(repeats = 1)
189
128
  require 'benchmark'
190
129
  res = nil
@@ -220,6 +159,73 @@ module Misc
220
159
  res
221
160
  end
222
161
 
162
+ def self.insist(times = 3)
163
+ try = 0
164
+ begin
165
+ yield
166
+ rescue
167
+ try += 1
168
+ retry if try < times
169
+ end
170
+ end
171
+
172
+ def self.try3times(&block)
173
+ insist(3, &block)
174
+ end
175
+
176
+ def self.hash2string(hash)
177
+ hash.sort_by{|k,v| k.to_s}.collect{|k,v|
178
+ next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
179
+ [ Symbol === k ? ":" << k.to_s : k,
180
+ Symbol === v ? ":" << v.to_s : v] * "="
181
+ }.compact * "#"
182
+ end
183
+
184
+ def self.path_relative_to(basedir, path)
185
+ path = File.expand_path(path)
186
+ basedir = File.expand_path(basedir)
187
+
188
+ if path =~ /#{Regexp.quote basedir}\/(.*)/
189
+ return $1
190
+ else
191
+ return nil
192
+ end
193
+ end
194
+
195
+ def self.lock(file, *args)
196
+ FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
197
+ lockfile = Lockfile.new(file + '.lock')
198
+ lockfile.lock do
199
+ yield file, *args
200
+ end
201
+ end
202
+
203
+ def self.common_path(dir, file)
204
+ file = File.expand_path file
205
+ dir = File.expand_path dir
206
+
207
+ return true if file == dir
208
+ while File.dirname(file) != file
209
+ file = File.dirname(file)
210
+ return true if file == dir
211
+ end
212
+
213
+ return false
214
+ end
215
+
216
+ def self.in_dir(dir)
217
+ old_pwd = FileUtils.pwd
218
+ res = nil
219
+ begin
220
+ FileUtils.mkdir_p dir unless File.exists? dir
221
+ FileUtils.cd dir
222
+ res = yield
223
+ ensure
224
+ FileUtils.cd old_pwd
225
+ end
226
+ res
227
+ end
228
+
223
229
  def self.fixutf8(string)
224
230
  if string.respond_to?(:valid_encoding?) and ! string.valid_encoding?
225
231
  @@ic ||= Iconv.new('UTF-8//IGNORE', 'UTF-8')
@@ -229,6 +235,25 @@ module Misc
229
235
  end
230
236
  end
231
237
 
238
+ def self.sensiblewrite(path, content)
239
+ begin
240
+ case
241
+ when String === content
242
+ File.open(path, 'w') do |f| f.write content end
243
+ when (IO === content or StringIO === content)
244
+ File.open(path, 'w') do |f| while l = content.gets; f.write l; end end
245
+ else
246
+ File.open(path, 'w') do |f| end
247
+ end
248
+ rescue Interrupt
249
+ FileUtils.rm_f path
250
+ raise "Interrupted (Ctrl-c)"
251
+ rescue Exception
252
+ FileUtils.rm_f path
253
+ raise $!
254
+ end
255
+ end
256
+
232
257
  def self.add_defaults(options, defaults = {})
233
258
  case
234
259
  when Hash === options
@@ -246,20 +271,8 @@ module Misc
246
271
  new_options
247
272
  end
248
273
 
249
- def self.process_options(hash, *keys)
250
- if keys.length == 1
251
- hash.delete keys.first.to_sym
252
- else
253
- keys.collect do |key| hash.delete(key.to_sym) || hash.delete(key.to_s) end
254
- end
255
- end
256
-
257
- def self.hash2string(hash)
258
- hash.sort_by{|k,v| k.to_s}.collect{|k,v|
259
- next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
260
- [ Symbol === k ? ":" << k.to_s : k,
261
- Symbol === v ? ":" << v.to_s : v] * "="
262
- }.compact * "#"
274
+ def self.digest(text)
275
+ Digest::MD5.hexdigest(text)
263
276
  end
264
277
 
265
278
  def self.hash2md5(hash)
@@ -270,14 +283,57 @@ module Misc
270
283
  case
271
284
  when v.inspect =~ /:0x0/
272
285
  o[k] = v.inspect.sub(/:0x[a-f0-9]+@/,'')
273
- when Resource::Path === v
274
- o[k] = "" << String.new(v.to_s)
286
+ #when Resource::Path === v
287
+ # o[k] = "" << String.new(v.to_s)
275
288
  else
276
289
  o[k] = v
277
290
  end
278
291
  end
279
292
 
280
- Digest::MD5.hexdigest(o.sort_by{|k| k.to_s}.inspect)
293
+ if o.empty?
294
+ ""
295
+ else
296
+ Digest::MD5.hexdigest(o.sort_by{|k| k.to_s}.inspect)
297
+ end
298
+ end
299
+
300
+ def self.process_options(hash, *keys)
301
+ if keys.length == 1
302
+ hash.delete keys.first.to_sym
303
+ else
304
+ keys.collect do |key| hash.delete(key.to_sym) || hash.delete(key.to_s) end
305
+ end
306
+ end
307
+
308
+ def self.pull_keys(hash, prefix)
309
+ new = {}
310
+ hash.keys.each do |key|
311
+ if key.to_s =~ /#{ prefix }_(.*)/
312
+ case
313
+ when String === key
314
+ new[$1] = hash.delete key
315
+ when Symbol === key
316
+ new[$1.to_sym] = hash.delete key
317
+ end
318
+ else
319
+ if key.to_s == prefix.to_s
320
+ new[key] = hash.delete key
321
+ end
322
+ end
323
+ end
324
+
325
+ new
326
+ end
327
+
328
+ def self.string2const(string)
329
+ return nil if string.nil?
330
+ mod = Kernel
331
+
332
+ string.to_s.split('::').each do |str|
333
+ mod = mod.const_get str
334
+ end
335
+
336
+ mod
281
337
  end
282
338
 
283
339
  def self.string2hash(string)
@@ -290,58 +346,42 @@ module Misc
290
346
  option, value = str, true
291
347
  end
292
348
 
293
- option = option.sub(":",'').to_sym if option.chars.first == ':'
294
- value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
295
-
296
- if value == true
297
- options[option] = option.to_s.chars.first != '!'
298
- else
299
- options[option] = Thread.start do
300
- $SAFE = 0;
301
- case
302
- when value =~ /^(?:true|T)$/i
303
- true
304
- when value =~ /^(?:false|F)$/i
305
- false
306
- when (String === value and value =~ /^\/(.*)\/$/)
307
- Regexp.new /#{$1}/
308
- else
309
- begin
310
- Kernel.const_get value
311
- rescue
312
- begin
313
- eval(value)
314
- rescue Exception
315
- value
316
- end
349
+ option = option.sub(":",'').to_sym if option.chars.first == ':'
350
+ value = value.sub(":",'').to_sym if String === value and value.chars.first == ':'
351
+
352
+ if value == true
353
+ options[option] = option.to_s.chars.first != '!'
354
+ else
355
+ options[option] = Thread.start do
356
+ $SAFE = 0;
357
+ case
358
+ when value =~ /^(?:true|T)$/i
359
+ true
360
+ when value =~ /^(?:false|F)$/i
361
+ false
362
+ when Symbol === value
363
+ value
364
+ when (String === value and value =~ /^\/(.*)\/$/)
365
+ Regexp.new /#{$1}/
366
+ else
367
+ begin
368
+ Kernel.const_get value
369
+ rescue
370
+ begin
371
+ raise if value =~ /[a-z]/ and defined? value
372
+ eval(value)
373
+ rescue Exception
374
+ value
317
375
  end
318
376
  end
319
- end.value
320
- end
377
+ end
378
+ end.value
379
+ end
321
380
  end
322
381
 
323
382
  options
324
383
  end
325
384
 
326
- def self.sensiblewrite(path, content)
327
- begin
328
- case
329
- when String === content
330
- File.open(path, 'w') do |f| f.write content end
331
- when (IO === content or StringIO === content)
332
- File.open(path, 'w') do |f| while l = content.gets; f.write l; end end
333
- else
334
- File.open(path, 'w') do |f| end
335
- end
336
- rescue Interrupt
337
- FileUtils.rm_f path
338
- raise "Interrupted (Ctrl-c)"
339
- rescue Exception
340
- FileUtils.rm_f path
341
- raise $!
342
- end
343
- end
344
-
345
385
  def self.field_position(fields, field, quiet = false)
346
386
  return field if Integer === field or Range === field
347
387
  raise FieldNotFoundError, "Field information missing" if fields.nil? && ! quiet
@@ -351,30 +391,6 @@ module Misc
351
391
  raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
352
392
  end
353
393
 
354
- def self.first(list)
355
- return nil if list.nil?
356
- return list.first
357
- end
358
-
359
- def self.chunk(text, split)
360
- text.split(split)[1..-1]
361
- end
362
-
363
- def self.insist(times = 3)
364
- try = 0
365
- begin
366
- yield
367
- rescue
368
- try += 1
369
- retry if try < times
370
- end
371
- end
372
-
373
- def self.try3times(&block)
374
- insist(3, &block)
375
- end
376
-
377
-
378
394
  # Divides the array into +num+ chunks of the same size by placing one
379
395
  # element in each chunk iteratively.
380
396
  def self.divide(array, num)
@@ -387,70 +403,21 @@ module Misc
387
403
  chunks
388
404
  end
389
405
 
390
- def self.merge2hash(list1, list2)
391
- hash = {}
392
- list1.zip(list2).each do |k,v| hash[k] = v end
393
- hash
406
+ def self.zip_fields(array)
407
+ array[0].zip(*array[1..-1])
394
408
  end
395
409
 
396
-
397
- def self.process_to_hash(list)
398
- result = yield list
399
- merge2hash(list, result)
400
- end
401
-
402
- IUPAC2BASE = {
403
- "A" => ["A"],
404
- "C" => ["C"],
405
- "G" => ["G"],
406
- "T" => ["T"],
407
- "U" => ["U"],
408
- "R" => "A or G".split(" or "),
409
- "Y" => "C or T".split(" or "),
410
- "S" => "G or C".split(" or "),
411
- "W" => "A or T".split(" or "),
412
- "K" => "G or T".split(" or "),
413
- "M" => "A or C".split(" or "),
414
- "B" => "C or G or T".split(" or "),
415
- "D" => "A or G or T".split(" or "),
416
- "H" => "A or C or T".split(" or "),
417
- "V" => "A or C or G".split(" or "),
418
- "N" => %w(A C T G),
419
- }
420
-
421
- BASE2COMPLEMENT = {
422
- "A" => "T",
423
- "C" => "G",
424
- "G" => "C",
425
- "T" => "A",
426
- "U" => "A",
427
- }
428
-
429
- def self.IUPAC_to_base(iupac)
430
- IUPAC2BASE[iupac]
431
- end
432
- end
433
-
434
- module PDF2Text
435
- def self.pdf2text(filename)
436
- require 'rbbt/util/cmd'
437
- require 'rbbt/util/tmpfile'
438
- require 'rbbt/util/open'
439
-
440
-
441
- TmpFile.with_file(Open.open(filename, :nocache => true).read) do |pdf_file|
442
- CMD.cmd("pdftotext #{pdf_file} -", :pipe => false, :stderr => true)
443
- end
444
- end
445
410
  end
446
411
 
447
- class NamedArray < Array
412
+ module NamedArray
413
+ extend ChainMethods
414
+ self.chain_prefix = :named_array
448
415
  attr_accessor :fields
449
416
 
450
- def self.name(array, fields)
451
- a = self.new(array)
452
- a.fields = fields
453
- a
417
+ def self.setup(array, fields)
418
+ array.extend NamedArray
419
+ array.fields = fields
420
+ array
454
421
  end
455
422
 
456
423
  def merge(array)
@@ -476,26 +443,23 @@ class NamedArray < Array
476
443
  end
477
444
  end
478
445
 
479
- alias original_get_brackets []
480
- def [](key)
481
- original_get_brackets(Misc.field_position(fields, key))
446
+ def named_array_get_brackets(key)
447
+ named_array_clean_get_brackets(Misc.field_position(fields, key))
482
448
  end
483
449
 
484
- alias original_set_brackets []=
485
- def []=(key,value)
486
- original_set_brackets(Misc.field_position(fields, key), value)
487
- end
450
+ def named_array_set_brackets(key,value)
451
+ named_array_clean_set_brackets(Misc.field_position(fields, key), value)
452
+ end
488
453
 
489
- alias original_values_at values_at
490
- def values_at(*keys)
454
+ def named_array_values_at(*keys)
491
455
  keys = keys.collect{|k| Misc.field_position(fields, k) }
492
- original_values_at(*keys)
456
+ named_array_clean_values_at(*keys)
493
457
  end
494
458
 
495
459
  def zip_fields
496
460
  return [] if self.empty?
497
- zipped = self[0].zip(*self[1..-1])
498
- zipped = zipped.collect{|v| NamedArray.name(v, fields)}
461
+ zipped = Misc.zip_fields(self)
462
+ zipped = zipped.collect{|v| NamedArray.setup(v, fields)}
499
463
  zipped
500
464
  end
501
465
 
@@ -511,38 +475,85 @@ class NamedArray < Array
511
475
 
512
476
  def report
513
477
  fields.zip(self).collect do |field,value|
514
- "* #{ field }: #{ Array === value ? value * "|" : value }"
478
+ "\nAttributes:\n* #{ field }: #{ Array === value ? value * "|" : value }"
515
479
  end * "\n"
516
480
  end
517
481
 
518
482
  end
519
483
 
520
- def benchmark(bench = true)
521
- require 'benchmark'
522
- if bench
523
- res = nil
524
- puts(Benchmark.measure do
525
- res = yield
526
- end)
527
- res
528
- else
529
- yield
484
+ class RBBTError < StandardError
485
+ attr_accessor :info
486
+
487
+ alias old_to_s to_s
488
+ def to_s
489
+ str = old_to_s.dup
490
+ if info
491
+ str << "\n" << "Additional Info:\n---\n" << info << "---"
492
+ end
493
+ str
530
494
  end
531
495
  end
532
496
 
533
- def profile(prof = true)
534
- require 'ruby-prof'
535
- if prof
536
- RubyProf.start
537
- res = yield
538
- result = RubyProf.stop
497
+ module IndiferentHash
498
+ extend ChainMethods
499
+ self.chain_prefix = :indiferent
500
+
501
+ def indiferent_get_brackets(key)
502
+ case
503
+ when (Symbol === key and indiferent_clean_include? key)
504
+ indiferent_clean_get_brackets(key)
505
+ when (Symbol === key and indiferent_clean_include? key.to_s)
506
+ indiferent_clean_get_brackets(key.to_s)
507
+ when (String === key and indiferent_clean_include? key)
508
+ indiferent_clean_get_brackets(key)
509
+ when (String === key and indiferent_clean_include? key.to_sym)
510
+ indiferent_clean_get_brackets(key.to_sym)
511
+ else
512
+ indiferent_clean_get_brackets(key)
513
+ end
514
+ end
539
515
 
540
- # Print a flat profile to text
541
- printer = RubyProf::FlatPrinter.new(result)
542
- printer.print(STDOUT, 0)
516
+ def indiferent_values_at(*key_list)
517
+ res = []
518
+ key_list.each{|key| res << indiferent_get_brackets(key)}
543
519
  res
544
- else
545
- yield
520
+ end
521
+
522
+ def indiferent_include?(key)
523
+ case
524
+ when Symbol === key
525
+ indiferent_clean_include?(key) or indiferent_clean_include?(key.to_s)
526
+ when String === key
527
+ indiferent_clean_include?(key) or indiferent_clean_include?(key.to_sym)
528
+ else
529
+ indiferent_clean_include?(key)
530
+ end
531
+ end
532
+
533
+ def indiferent_delete(value)
534
+ if indiferent_clean_include? value.to_s
535
+ indiferent_clean_delete(value.to_s)
536
+ else
537
+ indiferent_clean_delete(value.to_sym)
538
+ end
539
+ end
540
+
541
+ def self.setup(hash)
542
+ return hash if IndiferentHash === hash
543
+ hash.extend IndiferentHash
544
+ hash
546
545
  end
547
546
  end
548
547
 
548
+ module PDF2Text
549
+ def self.pdftotext(filename)
550
+ require 'rbbt/util/cmd'
551
+ require 'rbbt/util/tmpfile'
552
+ require 'rbbt/util/open'
553
+
554
+
555
+ TmpFile.with_file(Open.open(filename, :nocache => true).read) do |pdf_file|
556
+ CMD.cmd("pdftotext #{pdf_file} -", :pipe => false, :stderr => true)
557
+ end
558
+ end
559
+ end