rbbt-util 2.1.0 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/bin/rbbt_query.rb +63 -0
  2. data/lib/rbbt-util.rb +5 -5
  3. data/lib/rbbt.rb +2 -11
  4. data/lib/rbbt/util/cmd.rb +1 -1
  5. data/lib/rbbt/util/fix_width_table.rb +9 -3
  6. data/lib/rbbt/util/log.rb +23 -7
  7. data/lib/rbbt/util/misc.rb +121 -15
  8. data/lib/rbbt/util/open.rb +14 -4
  9. data/lib/rbbt/util/persistence.rb +52 -21
  10. data/lib/rbbt/util/rake.rb +108 -21
  11. data/lib/rbbt/util/resource.rb +338 -0
  12. data/lib/rbbt/util/simpleDSL.rb +1 -1
  13. data/lib/rbbt/util/simpleopt.rb +1 -1
  14. data/lib/rbbt/util/task.rb +340 -0
  15. data/lib/rbbt/util/tc_hash.rb +19 -2
  16. data/lib/rbbt/util/tsv.rb +15 -10
  17. data/lib/rbbt/util/tsv/accessor.rb +16 -7
  18. data/lib/rbbt/util/tsv/attach.rb +220 -17
  19. data/lib/rbbt/util/tsv/index.rb +6 -1
  20. data/lib/rbbt/util/tsv/manipulate.rb +4 -5
  21. data/lib/rbbt/util/tsv/parse.rb +45 -21
  22. data/lib/rbbt/util/tsv/resource.rb +74 -0
  23. data/lib/rbbt/util/workflow.rb +99 -75
  24. data/test/rbbt/util/test_filecache.rb +2 -2
  25. data/test/rbbt/util/test_misc.rb +7 -2
  26. data/test/rbbt/util/test_persistence.rb +40 -5
  27. data/test/rbbt/util/test_resource.rb +92 -0
  28. data/test/rbbt/util/test_task.rb +118 -0
  29. data/test/rbbt/util/test_tsv.rb +5 -1
  30. data/test/rbbt/util/test_workflow.rb +77 -62
  31. data/test/rbbt/util/tsv/test_attach.rb +95 -7
  32. data/test/rbbt/util/tsv/test_index.rb +0 -1
  33. data/test/rbbt/util/tsv/test_manipulate.rb +20 -0
  34. data/test/rbbt/util/tsv/test_resource.rb +9 -0
  35. data/test/test_helper.rb +10 -0
  36. data/test/test_rbbt.rb +2 -37
  37. metadata +16 -18
  38. data/lib/rbbt/util/data_module.rb +0 -93
  39. data/lib/rbbt/util/path.rb +0 -155
  40. data/lib/rbbt/util/pkg_config.rb +0 -78
  41. data/lib/rbbt/util/pkg_data.rb +0 -119
  42. data/lib/rbbt/util/pkg_software.rb +0 -145
  43. data/test/rbbt/util/test_data_module.rb +0 -50
  44. data/test/rbbt/util/test_path.rb +0 -10
  45. data/test/rbbt/util/test_pkg_data.rb +0 -129
  46. data/test/test_pkg.rb +0 -28
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+ require 'rbbt/sources/organism'
6
+
7
+ options = SOPT.get("-i--identifiers*:-f--format*:-o--organism*:-p--persistence:-l--log*:-r--report:-h--hash")
8
+
9
+ file = ARGV[0]
10
+
11
+ if not File.exists? file
12
+ base, path = file.match(/(.*)?\.(.*)/).values_at 1, 2
13
+ require 'rbbt/sources/' << base.to_s.downcase
14
+ klass = Misc.string2const base
15
+ file = klass[path].find
16
+ end
17
+
18
+ entities = ARGV[1].nil? ? '-' : ARGV[1].dup
19
+ persistence = options[:persistence]
20
+ log = (options[:log] || 4).to_i
21
+ Log.severity = log
22
+ organism = options[:organism] || "Hsa"
23
+ identifiers = options[:identifiers] || Organism.identifiers(organism)
24
+ format = options[:format]
25
+ report = options[:report]
26
+
27
+ if format.to_s == "key"
28
+ File.open(file) do |f|
29
+ format = TSV.parse_header(f).first
30
+ end
31
+ end
32
+
33
+ if format.nil?
34
+ f, count = TSV.new(identifiers).guess_field(entities)
35
+ format = f if count > 0
36
+ end
37
+
38
+ data = if entities == '-'
39
+ TSV.new(STDIN)
40
+ #entities = TSV.new(STDIN.read.split(/\n|\||\t/)
41
+ else
42
+ entities = [entities]
43
+ data = TSV.new(entities)
44
+ data.type = :double
45
+ data.identifiers = identifiers
46
+ data.key_field = format.dup unless FalseClass === format or format.nil?
47
+ data.fields ||= []
48
+ data
49
+ end
50
+
51
+ data.type = :double
52
+ data.identifiers = identifiers
53
+
54
+ data.attach TSV.new file, :persistence => persistence
55
+
56
+ if report
57
+ data.each do |entity,values|
58
+ puts "== Entity: #{ entity }"
59
+ puts values.report
60
+ end
61
+ else
62
+ puts data
63
+ end
@@ -9,8 +9,8 @@ require 'rbbt/util/bed'
9
9
  require 'rbbt/util/cachehelper'
10
10
  require 'rbbt/util/misc'
11
11
 
12
- FileCache.cachedir = Rbbt.cachedir
13
- Open.cachedir = File.join(Rbbt.cachedir, 'open-remote/')
14
- TmpFile.tmpdir = File.join(Rbbt.tmpdir)
15
- Persistence.cachedir = File.join(Rbbt.cachedir, 'persistence')
16
- Bed.cachedir = File.join(Rbbt.cachedir, 'bed_cache')
12
+ FileCache.cachedir = Rbbt.var.cache.filecache.find :user
13
+ Open.cachedir = Rbbt.var.cache["open-remote"].find :user
14
+ TmpFile.tmpdir = Rbbt.tmp.find :user
15
+ Persistence.cachedir = Rbbt.var.cache.persistence.find :user
16
+ Bed.cachedir = Rbbt.var.cache["bed-persistence"].find :user
@@ -1,16 +1,7 @@
1
- require 'rbbt/util/pkg_config'
2
- require 'rbbt/util/pkg_data'
3
- require 'rbbt/util/pkg_software'
4
- require 'rbbt/util/open'
5
- require 'rbbt/util/tmpfile'
6
- require 'rbbt/util/filecache'
1
+ require 'rbbt/util/resource'
7
2
 
8
3
  module Rbbt
9
- extend PKGConfig
10
- extend PKGData
11
- extend PKGSoftware
12
-
13
- self.load_cfg(%w(tmpdir cachedir datadir))
4
+ extend Resource
14
5
  end
15
6
 
16
7
 
@@ -122,7 +122,7 @@ module CMD
122
122
  when String === in_content
123
123
  sin.last.write in_content
124
124
  sin.last.close
125
- when IO === in_content
125
+ when in_content.respond_to?(:gets)
126
126
  Thread.new do
127
127
  while not in_content.eof?
128
128
  sin.last.write in_content.gets
@@ -5,13 +5,19 @@ class FixWidthTable
5
5
  def initialize(filename, value_size = nil, range = nil, update = false)
6
6
  @filename = filename
7
7
 
8
- if update or not File.exists? filename
8
+ if update or %(memmory stringio).include?(filename.to_s.downcase) or not File.exists? filename
9
9
  Log.debug "Writing FixWidthTable at #{ @filename.inspect }"
10
- FileUtils.rm @filename if File.exists? @filename
11
10
  @value_size = value_size
12
11
  @range = range
13
12
  @record_size = @value_size + (@range ? 12 : 4)
14
- @file = File.open(@filename, 'wb')
13
+
14
+ if %(memory stringio).include? filename.to_s.downcase
15
+ @file = StringIO.new
16
+ else
17
+ FileUtils.rm @filename if File.exists? @filename
18
+ @file = File.open(@filename, 'wb')
19
+ end
20
+
15
21
  @file.write [value_size].pack("L")
16
22
  @file.write [@range ? 1 : 0 ].pack("C")
17
23
  @size = 0
@@ -1,9 +1,12 @@
1
1
  module Log
2
2
 
3
- DEBUG = 0
4
- LOW = 1
5
- MEDIUM = 2
6
- HIGH = 3
3
+ DEBUG = 0
4
+ LOW = 1
5
+ MEDIUM = 2
6
+ HIGH = 3
7
+ INFO = 4
8
+ WARN = 5
9
+ ERROR = 6
7
10
 
8
11
  def self.severity=(severity)
9
12
  @@severity = severity
@@ -13,9 +16,13 @@ module Log
13
16
  @@severity
14
17
  end
15
18
 
19
+ SEVERITY_COLOR = ["0;37m", "32m", "33m", "31m", "1;0m" ].collect{|e| "\033[#{e}"}
20
+
16
21
  def self.log(message, severity = MEDIUM)
17
- STDERR.puts caller * "\n" if @@severity == -1 and not message.empty?
18
- STDERR.puts "#{Time.now}[#{severity.to_s}]: " + message if severity >= @@severity
22
+ severity_color = SEVERITY_COLOR[severity]
23
+ STDERR.puts caller.select{|l| l =~ /rbbt/} * "\n" if @@severity == -1 and not message.empty?
24
+ #STDERR.puts "#{Time.now.strftime("[%m/%d/%y-%H:%M:%S]")}[#{severity.to_s}]: " + message if severity >= @@severity
25
+ STDERR.puts "\033[0;37m#{Time.now.strftime("[%m/%d/%y-%H:%M:%S]")}#{severity_color}[#{severity.to_s}]\033[0m: " + message if severity >= @@severity
19
26
  end
20
27
 
21
28
  def self.debug(message)
@@ -34,6 +41,15 @@ module Log
34
41
  log(message, HIGH)
35
42
  end
36
43
 
44
+ def self.warn(message)
45
+ log(message, WARN)
46
+ end
47
+
48
+ def self.error(message)
49
+ log(message, ERROR)
50
+ end
51
+
52
+
37
53
  case ENV['RBBT_LOG']
38
54
  when 'DEBUG'
39
55
  @@severity = DEBUG
@@ -44,7 +60,7 @@ module Log
44
60
  when 'HIGH'
45
61
  @@severity = HIGH
46
62
  when nil
47
- @@severity = HIGH
63
+ @@severity = INFO
48
64
  else
49
65
  @@severity = ENV['RBBT_LOG'].to_i
50
66
  end
@@ -1,11 +1,12 @@
1
1
  require 'iconv'
2
+ require 'digest/md5'
2
3
 
3
4
  class RBBTError < StandardError
4
5
  attr_accessor :info
5
6
 
6
7
  alias old_to_s to_s
7
8
  def to_s
8
- str = old_to_s
9
+ str = old_to_s.dup
9
10
  if info
10
11
  str << "\n" << "Additional Info:\n---\n" << info << "---"
11
12
  end
@@ -32,13 +33,26 @@ module Misc
32
33
  end
33
34
 
34
35
  def self.in_directory?(file, directory)
35
- if file.to_s =~ /^#{Regexp.quote File.expand_path(directory)}/
36
+ if File.expand_path(file) =~ /^#{Regexp.quote File.expand_path(directory)}/
36
37
  true
37
38
  else
38
39
  false
39
40
  end
40
41
  end
41
42
 
43
+ def self.find_files_back_to(path, target, subdir)
44
+ return [] if path.nil?
45
+ files = []
46
+ while in_directory?(path, subdir)
47
+ path = path.dirname
48
+ if path[target].exists?
49
+ files << path[target]
50
+ end
51
+ end
52
+
53
+ files
54
+ end
55
+
42
56
  def self.this_dir
43
57
  File.expand_path(File.dirname(caller[0]))
44
58
  end
@@ -46,11 +60,11 @@ module Misc
46
60
  def self.env_add(var, value, sep = ":", prepend = true)
47
61
  ENV[var] ||= ""
48
62
  return if ENV[var] =~ /(#{sep}|^)#{Regexp.quote value}(#{sep}|$)/
49
- if prepend
50
- ENV[var] = value + sep + ENV[var]
51
- else
52
- ENV[var] += sep + ENV[var]
53
- end
63
+ if prepend
64
+ ENV[var] = value + sep + ENV[var]
65
+ else
66
+ ENV[var] += sep + ENV[var]
67
+ end
54
68
  end
55
69
 
56
70
  def self.count(list)
@@ -114,13 +128,26 @@ module Misc
114
128
  end
115
129
 
116
130
  def self.hash2string(hash)
117
- hash.collect{|k,v|
131
+ hash.sort_by{|k,v| k.to_s}.collect{|k,v|
118
132
  next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
119
133
  [ Symbol === k ? ":" << k.to_s : k,
120
134
  Symbol === v ? ":" << v.to_s : v] * "="
121
135
  }.compact * "#"
122
136
  end
123
137
 
138
+ def self.hash2md5(hash)
139
+ o = {}
140
+ hash.each do |k,v|
141
+ if v.inspect =~ /:0x0/
142
+ o[k] = v.inspect.sub(/:0x[a-f0-9]+@/,'')
143
+ else
144
+ o[k] = v
145
+ end
146
+ end
147
+
148
+ Digest::MD5.hexdigest(o.inspect)
149
+ end
150
+
124
151
  def self.string2hash(string)
125
152
 
126
153
  options = {}
@@ -200,6 +227,62 @@ module Misc
200
227
  text.split(split)[1..-1]
201
228
  end
202
229
 
230
+ def self.insist(times = 3)
231
+ try = 0
232
+ begin
233
+ yield
234
+ rescue
235
+ try += 1
236
+ retry if try < times
237
+ end
238
+ end
239
+
240
+ def self.try3times(&block)
241
+ insist(3, &block)
242
+ end
243
+
244
+
245
+ # Divides the array into +num+ chunks of the same size by placing one
246
+ # element in each chunk iteratively.
247
+ def self.divide(array, num)
248
+ chunks = [[]] * num
249
+ array.each_with_index{|e, i|
250
+ c = i % num
251
+ chunks[c] << e
252
+ }
253
+ chunks
254
+ end
255
+
256
+ IUPAC2BASE = {
257
+ "A" => ["A"],
258
+ "C" => ["C"],
259
+ "G" => ["G"],
260
+ "T" => ["T"],
261
+ "U" => ["U"],
262
+ "R" => "A or G".split(" or "),
263
+ "Y" => "C or T".split(" or "),
264
+ "S" => "G or C".split(" or "),
265
+ "W" => "A or T".split(" or "),
266
+ "K" => "G or T".split(" or "),
267
+ "M" => "A or C".split(" or "),
268
+ "B" => "C or G or T".split(" or "),
269
+ "D" => "A or G or T".split(" or "),
270
+ "H" => "A or C or T".split(" or "),
271
+ "V" => "A or C or G".split(" or "),
272
+ "N" => %w(A C T G),
273
+ }
274
+
275
+ BASE2COMPLEMENT = {
276
+ "A" => "T",
277
+ "C" => "G",
278
+ "G" => "C",
279
+ "T" => "A",
280
+ "U" => "A",
281
+ }
282
+
283
+ def self.IUPAC_to_base(iupac)
284
+ IUPAC2BASE[iupac]
285
+ end
203
286
  end
204
287
 
205
288
  module PDF2Text
@@ -222,10 +305,27 @@ class NamedArray < Array
222
305
  a
223
306
  end
224
307
 
308
+ def merge(array)
309
+ double = Array === array.first
310
+ new = self.dup
311
+ (0..length - 1).each do |i|
312
+ if double
313
+ new[i] = new[i] + array[i]
314
+ else
315
+ new[i] << array[i]
316
+ end
317
+ end
318
+ new
319
+ end
320
+
225
321
  def positions(fields)
226
- fields.collect{|field|
227
- Misc.field_position(@fields, field)
228
- }
322
+ if Array == fields
323
+ fields.collect{|field|
324
+ Misc.field_position(@fields, field)
325
+ }
326
+ else
327
+ Misc.field_position(@fields, fields)
328
+ end
229
329
  end
230
330
 
231
331
  alias original_get_brackets []
@@ -234,9 +334,9 @@ class NamedArray < Array
234
334
  end
235
335
 
236
336
  alias original_set_brackets []=
237
- def []=(key,value)
238
- original_set_brackets(Misc.field_position(fields, key), value)
239
- end
337
+ def []=(key,value)
338
+ original_set_brackets(Misc.field_position(fields, key), value)
339
+ end
240
340
 
241
341
  alias original_values_at values_at
242
342
  def values_at(*keys)
@@ -259,6 +359,12 @@ class NamedArray < Array
259
359
  values = NamedArray.name(values, fields)
260
360
  values.zip_fields
261
361
  end
362
+
363
+ def report
364
+ fields.zip(self).collect do |field,value|
365
+ "* #{ field }: #{ Array === value ? value * "|" : value }"
366
+ end * "\n"
367
+ end
262
368
  end
263
369
 
264
370
  def benchmark(bench = true)
@@ -281,7 +387,7 @@ def profile(prof = true)
281
387
  res = yield
282
388
  result = RubyProf.stop
283
389
 
284
- # Print a flat profile to text
390
+ # Print a flat profile to text
285
391
  printer = RubyProf::FlatPrinter.new(result)
286
392
  printer.print(STDOUT, 0)
287
393
  res
@@ -203,12 +203,22 @@ module Open
203
203
  def self.write(file, content)
204
204
  FileUtils.mkdir_p File.dirname(file)
205
205
  if String === content
206
- File.open(file, 'w') do |f| f.write content end
206
+ File.open(file, 'w') do |f|
207
+ f.flock(File::LOCK_EX)
208
+ f.write content
209
+ f.flock(File::LOCK_UN)
210
+ end
207
211
  else
208
- File.open(file, 'w') do |f|
209
- while l = content.gets
210
- f.write l
212
+ begin
213
+ File.open(file, 'w') do |f|
214
+ f.flock(File::LOCK_EX)
215
+ while l = content.gets
216
+ f.write l
217
+ end
218
+ f.flock(File::LOCK_UN)
211
219
  end
220
+ rescue
221
+ FileUtils.rm file if File.exists? file
212
222
  end
213
223
  content.close
214
224
  end
@@ -1,6 +1,7 @@
1
1
  require 'rbbt/util/tsv'
2
2
  require 'rbbt/util/misc'
3
3
  require 'rbbt/util/open'
4
+ require 'digest/md5'
4
5
  require 'yaml'
5
6
 
6
7
  module Persistence
@@ -20,17 +21,12 @@ module Persistence
20
21
  end
21
22
 
22
23
  def self.get_persistence_file(file, prefix, options = {})
24
+ persistence_dir = Misc.process_options options, :persistence_dir
25
+ persistence_dir ||= CACHEDIR
23
26
  name = prefix.to_s << ":" << file.to_s << ":"
24
- o = {}
25
- options.each do |k,v|
26
- if v.inspect =~ /:0x0/
27
- o[k] = v.inspect.sub(/:0x[a-f0-9]+@/,'')
28
- else
29
- o[k] = v
30
- end
31
- end
32
27
 
33
- File.join(CACHEDIR, name.to_s.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, o].inspect))
28
+ options_md5 = Misc.hash2md5 options
29
+ File.join(persistence_dir, name.to_s.gsub(/\s/,'_').gsub(/\//,'>') + options_md5)
34
30
  end
35
31
 
36
32
  def self.get_filename(file)
@@ -132,16 +128,36 @@ module Persistence
132
128
  end
133
129
  end
134
130
 
135
- def self.tsv_serializer(data)
136
- case
137
- when (not Object::TSV === data)
138
- :marshal
139
- when data.type == :double
140
- :double
141
- when data.type == :single
142
- :single
131
+ def self.tsv_serializer(data, extra = nil)
132
+ if Object::TSV === data
133
+ return :integer if (data.cast == "to_i" or data.cast == :to_i) and data.type == :single
134
+ return :integer_array if (data.cast == "to_i" or data.cast == :to_i) and (data.type == :list or data.type == :flat)
135
+
136
+ case
137
+ when data.type == :double
138
+ :double
139
+ when data.type == :list
140
+ :list
141
+ when data.type == :single
142
+ :single
143
+ else
144
+ :marshal
145
+ end
143
146
  else
144
- :list
147
+ return :marshal if extra.nil?
148
+ return :integer if (extra[:cast] == "to_i" or extra[:cast] == :to_i) and extra[:type] == :single
149
+ return :integer_array if (extra[:cast] == "to_i" or extra[:cast] == :to_i) and (extra[:type] == :list or extra[:type] == :flat)
150
+
151
+ case
152
+ when extra[:type] == :double
153
+ :double
154
+ when extra[:type] == :list
155
+ :list
156
+ when extra[:type] == :single
157
+ :single
158
+ else
159
+ :marshal
160
+ end
145
161
  end
146
162
  end
147
163
 
@@ -208,7 +224,8 @@ module Persistence
208
224
  if persistence_update or not File.exists? persistence_file
209
225
  Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
210
226
  res, extra = yield file, options, filename, persistence_file
211
- serializer = tsv_serializer res
227
+
228
+ serializer = tsv_serializer res, extra
212
229
 
213
230
  per = Persistence::TSV.get persistence_file, true, serializer
214
231
 
@@ -258,7 +275,6 @@ module Persistence
258
275
  FileUtils.rm persistence_file
259
276
  end
260
277
 
261
-
262
278
  max_length = res.collect{|k,v| k.length}.max
263
279
 
264
280
  if range
@@ -266,7 +282,7 @@ module Persistence
266
282
  fwt = FixWidthTable.new persistence_file, max_length, true
267
283
  fwt.add_range res
268
284
  rescue
269
- FileUtils.rm persistence_file
285
+ FileUtils.rm persistence_file if File.exists? persistence_file
270
286
  raise $!
271
287
  end
272
288
  else
@@ -298,6 +314,19 @@ module Persistence
298
314
 
299
315
  filename = get_filename(file)
300
316
 
317
+ if persistence == :no_create
318
+ o = options.dup
319
+ options =
320
+ Misc.add_defaults options, :persistence_update => false, :persistence_file => nil, :filename => nil
321
+ persistence_update, persistence_dir, persistence_file, filename =
322
+ Misc.process_options options, :persistence_update, :persistence_dir, :persistence_file, :filename
323
+
324
+ filename ||= get_filename(file)
325
+ persistence_file ||= get_persistence_file(filename, prefix, options.merge(:persistence_dir => persistence_dir))
326
+
327
+ persistence = false if not File.exists? persistence_file
328
+ end
329
+
301
330
  if not persistence
302
331
  Log.low "Non Persistent Loading for #{filename}. Prefix: #{prefix}"
303
332
  yield file, options, filename
@@ -322,4 +351,6 @@ module Persistence
322
351
  end
323
352
  end
324
353
  end
354
+
355
+
325
356
  end