rbbt-util 2.1.0 → 3.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/bin/rbbt_query.rb +63 -0
  2. data/lib/rbbt-util.rb +5 -5
  3. data/lib/rbbt.rb +2 -11
  4. data/lib/rbbt/util/cmd.rb +1 -1
  5. data/lib/rbbt/util/fix_width_table.rb +9 -3
  6. data/lib/rbbt/util/log.rb +23 -7
  7. data/lib/rbbt/util/misc.rb +121 -15
  8. data/lib/rbbt/util/open.rb +14 -4
  9. data/lib/rbbt/util/persistence.rb +52 -21
  10. data/lib/rbbt/util/rake.rb +108 -21
  11. data/lib/rbbt/util/resource.rb +338 -0
  12. data/lib/rbbt/util/simpleDSL.rb +1 -1
  13. data/lib/rbbt/util/simpleopt.rb +1 -1
  14. data/lib/rbbt/util/task.rb +340 -0
  15. data/lib/rbbt/util/tc_hash.rb +19 -2
  16. data/lib/rbbt/util/tsv.rb +15 -10
  17. data/lib/rbbt/util/tsv/accessor.rb +16 -7
  18. data/lib/rbbt/util/tsv/attach.rb +220 -17
  19. data/lib/rbbt/util/tsv/index.rb +6 -1
  20. data/lib/rbbt/util/tsv/manipulate.rb +4 -5
  21. data/lib/rbbt/util/tsv/parse.rb +45 -21
  22. data/lib/rbbt/util/tsv/resource.rb +74 -0
  23. data/lib/rbbt/util/workflow.rb +99 -75
  24. data/test/rbbt/util/test_filecache.rb +2 -2
  25. data/test/rbbt/util/test_misc.rb +7 -2
  26. data/test/rbbt/util/test_persistence.rb +40 -5
  27. data/test/rbbt/util/test_resource.rb +92 -0
  28. data/test/rbbt/util/test_task.rb +118 -0
  29. data/test/rbbt/util/test_tsv.rb +5 -1
  30. data/test/rbbt/util/test_workflow.rb +77 -62
  31. data/test/rbbt/util/tsv/test_attach.rb +95 -7
  32. data/test/rbbt/util/tsv/test_index.rb +0 -1
  33. data/test/rbbt/util/tsv/test_manipulate.rb +20 -0
  34. data/test/rbbt/util/tsv/test_resource.rb +9 -0
  35. data/test/test_helper.rb +10 -0
  36. data/test/test_rbbt.rb +2 -37
  37. metadata +16 -18
  38. data/lib/rbbt/util/data_module.rb +0 -93
  39. data/lib/rbbt/util/path.rb +0 -155
  40. data/lib/rbbt/util/pkg_config.rb +0 -78
  41. data/lib/rbbt/util/pkg_data.rb +0 -119
  42. data/lib/rbbt/util/pkg_software.rb +0 -145
  43. data/test/rbbt/util/test_data_module.rb +0 -50
  44. data/test/rbbt/util/test_path.rb +0 -10
  45. data/test/rbbt/util/test_pkg_data.rb +0 -129
  46. data/test/test_pkg.rb +0 -28
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+ require 'rbbt/sources/organism'
6
+
7
+ options = SOPT.get("-i--identifiers*:-f--format*:-o--organism*:-p--persistence:-l--log*:-r--report:-h--hash")
8
+
9
+ file = ARGV[0]
10
+
11
+ if not File.exists? file
12
+ base, path = file.match(/(.*)?\.(.*)/).values_at 1, 2
13
+ require 'rbbt/sources/' << base.to_s.downcase
14
+ klass = Misc.string2const base
15
+ file = klass[path].find
16
+ end
17
+
18
+ entities = ARGV[1].nil? ? '-' : ARGV[1].dup
19
+ persistence = options[:persistence]
20
+ log = (options[:log] || 4).to_i
21
+ Log.severity = log
22
+ organism = options[:organism] || "Hsa"
23
+ identifiers = options[:identifiers] || Organism.identifiers(organism)
24
+ format = options[:format]
25
+ report = options[:report]
26
+
27
+ if format.to_s == "key"
28
+ File.open(file) do |f|
29
+ format = TSV.parse_header(f).first
30
+ end
31
+ end
32
+
33
+ if format.nil?
34
+ f, count = TSV.new(identifiers).guess_field(entities)
35
+ format = f if count > 0
36
+ end
37
+
38
+ data = if entities == '-'
39
+ TSV.new(STDIN)
40
+ #entities = TSV.new(STDIN.read.split(/\n|\||\t/)
41
+ else
42
+ entities = [entities]
43
+ data = TSV.new(entities)
44
+ data.type = :double
45
+ data.identifiers = identifiers
46
+ data.key_field = format.dup unless FalseClass === format or format.nil?
47
+ data.fields ||= []
48
+ data
49
+ end
50
+
51
+ data.type = :double
52
+ data.identifiers = identifiers
53
+
54
+ data.attach TSV.new file, :persistence => persistence
55
+
56
+ if report
57
+ data.each do |entity,values|
58
+ puts "== Entity: #{ entity }"
59
+ puts values.report
60
+ end
61
+ else
62
+ puts data
63
+ end
@@ -9,8 +9,8 @@ require 'rbbt/util/bed'
9
9
  require 'rbbt/util/cachehelper'
10
10
  require 'rbbt/util/misc'
11
11
 
12
- FileCache.cachedir = Rbbt.cachedir
13
- Open.cachedir = File.join(Rbbt.cachedir, 'open-remote/')
14
- TmpFile.tmpdir = File.join(Rbbt.tmpdir)
15
- Persistence.cachedir = File.join(Rbbt.cachedir, 'persistence')
16
- Bed.cachedir = File.join(Rbbt.cachedir, 'bed_cache')
12
+ FileCache.cachedir = Rbbt.var.cache.filecache.find :user
13
+ Open.cachedir = Rbbt.var.cache["open-remote"].find :user
14
+ TmpFile.tmpdir = Rbbt.tmp.find :user
15
+ Persistence.cachedir = Rbbt.var.cache.persistence.find :user
16
+ Bed.cachedir = Rbbt.var.cache["bed-persistence"].find :user
@@ -1,16 +1,7 @@
1
- require 'rbbt/util/pkg_config'
2
- require 'rbbt/util/pkg_data'
3
- require 'rbbt/util/pkg_software'
4
- require 'rbbt/util/open'
5
- require 'rbbt/util/tmpfile'
6
- require 'rbbt/util/filecache'
1
+ require 'rbbt/util/resource'
7
2
 
8
3
  module Rbbt
9
- extend PKGConfig
10
- extend PKGData
11
- extend PKGSoftware
12
-
13
- self.load_cfg(%w(tmpdir cachedir datadir))
4
+ extend Resource
14
5
  end
15
6
 
16
7
 
@@ -122,7 +122,7 @@ module CMD
122
122
  when String === in_content
123
123
  sin.last.write in_content
124
124
  sin.last.close
125
- when IO === in_content
125
+ when in_content.respond_to?(:gets)
126
126
  Thread.new do
127
127
  while not in_content.eof?
128
128
  sin.last.write in_content.gets
@@ -5,13 +5,19 @@ class FixWidthTable
5
5
  def initialize(filename, value_size = nil, range = nil, update = false)
6
6
  @filename = filename
7
7
 
8
- if update or not File.exists? filename
8
+ if update or %(memmory stringio).include?(filename.to_s.downcase) or not File.exists? filename
9
9
  Log.debug "Writing FixWidthTable at #{ @filename.inspect }"
10
- FileUtils.rm @filename if File.exists? @filename
11
10
  @value_size = value_size
12
11
  @range = range
13
12
  @record_size = @value_size + (@range ? 12 : 4)
14
- @file = File.open(@filename, 'wb')
13
+
14
+ if %(memory stringio).include? filename.to_s.downcase
15
+ @file = StringIO.new
16
+ else
17
+ FileUtils.rm @filename if File.exists? @filename
18
+ @file = File.open(@filename, 'wb')
19
+ end
20
+
15
21
  @file.write [value_size].pack("L")
16
22
  @file.write [@range ? 1 : 0 ].pack("C")
17
23
  @size = 0
@@ -1,9 +1,12 @@
1
1
  module Log
2
2
 
3
- DEBUG = 0
4
- LOW = 1
5
- MEDIUM = 2
6
- HIGH = 3
3
+ DEBUG = 0
4
+ LOW = 1
5
+ MEDIUM = 2
6
+ HIGH = 3
7
+ INFO = 4
8
+ WARN = 5
9
+ ERROR = 6
7
10
 
8
11
  def self.severity=(severity)
9
12
  @@severity = severity
@@ -13,9 +16,13 @@ module Log
13
16
  @@severity
14
17
  end
15
18
 
19
+ SEVERITY_COLOR = ["0;37m", "32m", "33m", "31m", "1;0m" ].collect{|e| "\033[#{e}"}
20
+
16
21
  def self.log(message, severity = MEDIUM)
17
- STDERR.puts caller * "\n" if @@severity == -1 and not message.empty?
18
- STDERR.puts "#{Time.now}[#{severity.to_s}]: " + message if severity >= @@severity
22
+ severity_color = SEVERITY_COLOR[severity]
23
+ STDERR.puts caller.select{|l| l =~ /rbbt/} * "\n" if @@severity == -1 and not message.empty?
24
+ #STDERR.puts "#{Time.now.strftime("[%m/%d/%y-%H:%M:%S]")}[#{severity.to_s}]: " + message if severity >= @@severity
25
+ STDERR.puts "\033[0;37m#{Time.now.strftime("[%m/%d/%y-%H:%M:%S]")}#{severity_color}[#{severity.to_s}]\033[0m: " + message if severity >= @@severity
19
26
  end
20
27
 
21
28
  def self.debug(message)
@@ -34,6 +41,15 @@ module Log
34
41
  log(message, HIGH)
35
42
  end
36
43
 
44
+ def self.warn(message)
45
+ log(message, WARN)
46
+ end
47
+
48
+ def self.error(message)
49
+ log(message, ERROR)
50
+ end
51
+
52
+
37
53
  case ENV['RBBT_LOG']
38
54
  when 'DEBUG'
39
55
  @@severity = DEBUG
@@ -44,7 +60,7 @@ module Log
44
60
  when 'HIGH'
45
61
  @@severity = HIGH
46
62
  when nil
47
- @@severity = HIGH
63
+ @@severity = INFO
48
64
  else
49
65
  @@severity = ENV['RBBT_LOG'].to_i
50
66
  end
@@ -1,11 +1,12 @@
1
1
  require 'iconv'
2
+ require 'digest/md5'
2
3
 
3
4
  class RBBTError < StandardError
4
5
  attr_accessor :info
5
6
 
6
7
  alias old_to_s to_s
7
8
  def to_s
8
- str = old_to_s
9
+ str = old_to_s.dup
9
10
  if info
10
11
  str << "\n" << "Additional Info:\n---\n" << info << "---"
11
12
  end
@@ -32,13 +33,26 @@ module Misc
32
33
  end
33
34
 
34
35
  def self.in_directory?(file, directory)
35
- if file.to_s =~ /^#{Regexp.quote File.expand_path(directory)}/
36
+ if File.expand_path(file) =~ /^#{Regexp.quote File.expand_path(directory)}/
36
37
  true
37
38
  else
38
39
  false
39
40
  end
40
41
  end
41
42
 
43
+ def self.find_files_back_to(path, target, subdir)
44
+ return [] if path.nil?
45
+ files = []
46
+ while in_directory?(path, subdir)
47
+ path = path.dirname
48
+ if path[target].exists?
49
+ files << path[target]
50
+ end
51
+ end
52
+
53
+ files
54
+ end
55
+
42
56
  def self.this_dir
43
57
  File.expand_path(File.dirname(caller[0]))
44
58
  end
@@ -46,11 +60,11 @@ module Misc
46
60
  def self.env_add(var, value, sep = ":", prepend = true)
47
61
  ENV[var] ||= ""
48
62
  return if ENV[var] =~ /(#{sep}|^)#{Regexp.quote value}(#{sep}|$)/
49
- if prepend
50
- ENV[var] = value + sep + ENV[var]
51
- else
52
- ENV[var] += sep + ENV[var]
53
- end
63
+ if prepend
64
+ ENV[var] = value + sep + ENV[var]
65
+ else
66
+ ENV[var] += sep + ENV[var]
67
+ end
54
68
  end
55
69
 
56
70
  def self.count(list)
@@ -114,13 +128,26 @@ module Misc
114
128
  end
115
129
 
116
130
  def self.hash2string(hash)
117
- hash.collect{|k,v|
131
+ hash.sort_by{|k,v| k.to_s}.collect{|k,v|
118
132
  next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
119
133
  [ Symbol === k ? ":" << k.to_s : k,
120
134
  Symbol === v ? ":" << v.to_s : v] * "="
121
135
  }.compact * "#"
122
136
  end
123
137
 
138
+ def self.hash2md5(hash)
139
+ o = {}
140
+ hash.each do |k,v|
141
+ if v.inspect =~ /:0x0/
142
+ o[k] = v.inspect.sub(/:0x[a-f0-9]+@/,'')
143
+ else
144
+ o[k] = v
145
+ end
146
+ end
147
+
148
+ Digest::MD5.hexdigest(o.inspect)
149
+ end
150
+
124
151
  def self.string2hash(string)
125
152
 
126
153
  options = {}
@@ -200,6 +227,62 @@ module Misc
200
227
  text.split(split)[1..-1]
201
228
  end
202
229
 
230
+ def self.insist(times = 3)
231
+ try = 0
232
+ begin
233
+ yield
234
+ rescue
235
+ try += 1
236
+ retry if try < times
237
+ end
238
+ end
239
+
240
+ def self.try3times(&block)
241
+ insist(3, &block)
242
+ end
243
+
244
+
245
+ # Divides the array into +num+ chunks of the same size by placing one
246
+ # element in each chunk iteratively.
247
+ def self.divide(array, num)
248
+ chunks = [[]] * num
249
+ array.each_with_index{|e, i|
250
+ c = i % num
251
+ chunks[c] << e
252
+ }
253
+ chunks
254
+ end
255
+
256
+ IUPAC2BASE = {
257
+ "A" => ["A"],
258
+ "C" => ["C"],
259
+ "G" => ["G"],
260
+ "T" => ["T"],
261
+ "U" => ["U"],
262
+ "R" => "A or G".split(" or "),
263
+ "Y" => "C or T".split(" or "),
264
+ "S" => "G or C".split(" or "),
265
+ "W" => "A or T".split(" or "),
266
+ "K" => "G or T".split(" or "),
267
+ "M" => "A or C".split(" or "),
268
+ "B" => "C or G or T".split(" or "),
269
+ "D" => "A or G or T".split(" or "),
270
+ "H" => "A or C or T".split(" or "),
271
+ "V" => "A or C or G".split(" or "),
272
+ "N" => %w(A C T G),
273
+ }
274
+
275
+ BASE2COMPLEMENT = {
276
+ "A" => "T",
277
+ "C" => "G",
278
+ "G" => "C",
279
+ "T" => "A",
280
+ "U" => "A",
281
+ }
282
+
283
+ def self.IUPAC_to_base(iupac)
284
+ IUPAC2BASE[iupac]
285
+ end
203
286
  end
204
287
 
205
288
  module PDF2Text
@@ -222,10 +305,27 @@ class NamedArray < Array
222
305
  a
223
306
  end
224
307
 
308
+ def merge(array)
309
+ double = Array === array.first
310
+ new = self.dup
311
+ (0..length - 1).each do |i|
312
+ if double
313
+ new[i] = new[i] + array[i]
314
+ else
315
+ new[i] << array[i]
316
+ end
317
+ end
318
+ new
319
+ end
320
+
225
321
  def positions(fields)
226
- fields.collect{|field|
227
- Misc.field_position(@fields, field)
228
- }
322
+ if Array == fields
323
+ fields.collect{|field|
324
+ Misc.field_position(@fields, field)
325
+ }
326
+ else
327
+ Misc.field_position(@fields, fields)
328
+ end
229
329
  end
230
330
 
231
331
  alias original_get_brackets []
@@ -234,9 +334,9 @@ class NamedArray < Array
234
334
  end
235
335
 
236
336
  alias original_set_brackets []=
237
- def []=(key,value)
238
- original_set_brackets(Misc.field_position(fields, key), value)
239
- end
337
+ def []=(key,value)
338
+ original_set_brackets(Misc.field_position(fields, key), value)
339
+ end
240
340
 
241
341
  alias original_values_at values_at
242
342
  def values_at(*keys)
@@ -259,6 +359,12 @@ class NamedArray < Array
259
359
  values = NamedArray.name(values, fields)
260
360
  values.zip_fields
261
361
  end
362
+
363
+ def report
364
+ fields.zip(self).collect do |field,value|
365
+ "* #{ field }: #{ Array === value ? value * "|" : value }"
366
+ end * "\n"
367
+ end
262
368
  end
263
369
 
264
370
  def benchmark(bench = true)
@@ -281,7 +387,7 @@ def profile(prof = true)
281
387
  res = yield
282
388
  result = RubyProf.stop
283
389
 
284
- # Print a flat profile to text
390
+ # Print a flat profile to text
285
391
  printer = RubyProf::FlatPrinter.new(result)
286
392
  printer.print(STDOUT, 0)
287
393
  res
@@ -203,12 +203,22 @@ module Open
203
203
  def self.write(file, content)
204
204
  FileUtils.mkdir_p File.dirname(file)
205
205
  if String === content
206
- File.open(file, 'w') do |f| f.write content end
206
+ File.open(file, 'w') do |f|
207
+ f.flock(File::LOCK_EX)
208
+ f.write content
209
+ f.flock(File::LOCK_UN)
210
+ end
207
211
  else
208
- File.open(file, 'w') do |f|
209
- while l = content.gets
210
- f.write l
212
+ begin
213
+ File.open(file, 'w') do |f|
214
+ f.flock(File::LOCK_EX)
215
+ while l = content.gets
216
+ f.write l
217
+ end
218
+ f.flock(File::LOCK_UN)
211
219
  end
220
+ rescue
221
+ FileUtils.rm file if File.exists? file
212
222
  end
213
223
  content.close
214
224
  end
@@ -1,6 +1,7 @@
1
1
  require 'rbbt/util/tsv'
2
2
  require 'rbbt/util/misc'
3
3
  require 'rbbt/util/open'
4
+ require 'digest/md5'
4
5
  require 'yaml'
5
6
 
6
7
  module Persistence
@@ -20,17 +21,12 @@ module Persistence
20
21
  end
21
22
 
22
23
  def self.get_persistence_file(file, prefix, options = {})
24
+ persistence_dir = Misc.process_options options, :persistence_dir
25
+ persistence_dir ||= CACHEDIR
23
26
  name = prefix.to_s << ":" << file.to_s << ":"
24
- o = {}
25
- options.each do |k,v|
26
- if v.inspect =~ /:0x0/
27
- o[k] = v.inspect.sub(/:0x[a-f0-9]+@/,'')
28
- else
29
- o[k] = v
30
- end
31
- end
32
27
 
33
- File.join(CACHEDIR, name.to_s.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, o].inspect))
28
+ options_md5 = Misc.hash2md5 options
29
+ File.join(persistence_dir, name.to_s.gsub(/\s/,'_').gsub(/\//,'>') + options_md5)
34
30
  end
35
31
 
36
32
  def self.get_filename(file)
@@ -132,16 +128,36 @@ module Persistence
132
128
  end
133
129
  end
134
130
 
135
- def self.tsv_serializer(data)
136
- case
137
- when (not Object::TSV === data)
138
- :marshal
139
- when data.type == :double
140
- :double
141
- when data.type == :single
142
- :single
131
+ def self.tsv_serializer(data, extra = nil)
132
+ if Object::TSV === data
133
+ return :integer if (data.cast == "to_i" or data.cast == :to_i) and data.type == :single
134
+ return :integer_array if (data.cast == "to_i" or data.cast == :to_i) and (data.type == :list or data.type == :flat)
135
+
136
+ case
137
+ when data.type == :double
138
+ :double
139
+ when data.type == :list
140
+ :list
141
+ when data.type == :single
142
+ :single
143
+ else
144
+ :marshal
145
+ end
143
146
  else
144
- :list
147
+ return :marshal if extra.nil?
148
+ return :integer if (extra[:cast] == "to_i" or extra[:cast] == :to_i) and extra[:type] == :single
149
+ return :integer_array if (extra[:cast] == "to_i" or extra[:cast] == :to_i) and (extra[:type] == :list or extra[:type] == :flat)
150
+
151
+ case
152
+ when extra[:type] == :double
153
+ :double
154
+ when extra[:type] == :list
155
+ :list
156
+ when extra[:type] == :single
157
+ :single
158
+ else
159
+ :marshal
160
+ end
145
161
  end
146
162
  end
147
163
 
@@ -208,7 +224,8 @@ module Persistence
208
224
  if persistence_update or not File.exists? persistence_file
209
225
  Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
210
226
  res, extra = yield file, options, filename, persistence_file
211
- serializer = tsv_serializer res
227
+
228
+ serializer = tsv_serializer res, extra
212
229
 
213
230
  per = Persistence::TSV.get persistence_file, true, serializer
214
231
 
@@ -258,7 +275,6 @@ module Persistence
258
275
  FileUtils.rm persistence_file
259
276
  end
260
277
 
261
-
262
278
  max_length = res.collect{|k,v| k.length}.max
263
279
 
264
280
  if range
@@ -266,7 +282,7 @@ module Persistence
266
282
  fwt = FixWidthTable.new persistence_file, max_length, true
267
283
  fwt.add_range res
268
284
  rescue
269
- FileUtils.rm persistence_file
285
+ FileUtils.rm persistence_file if File.exists? persistence_file
270
286
  raise $!
271
287
  end
272
288
  else
@@ -298,6 +314,19 @@ module Persistence
298
314
 
299
315
  filename = get_filename(file)
300
316
 
317
+ if persistence == :no_create
318
+ o = options.dup
319
+ options =
320
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
321
+ persistence_update, persistence_dir, persistence_file, filename =
322
+ Misc.process_options options, :persistence_update, :persistence_dir, :persistence_file, :filename
323
+
324
+ filename ||= get_filename(file)
325
+ persistence_file ||= get_persistence_file(filename, prefix, options.merge(:persistence_dir => persistence_dir))
326
+
327
+ persistence = false if not File.exists? persistence_file
328
+ end
329
+
301
330
  if not persistence
302
331
  Log.low "Non Persistent Loading for #{filename}. Prefix: #{prefix}"
303
332
  yield file, options, filename
@@ -322,4 +351,6 @@ module Persistence
322
351
  end
323
352
  end
324
353
  end
354
+
355
+
325
356
  end