rbbt-util 1.2.1 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/lib/rbbt-util.rb +2 -1
  2. data/lib/rbbt/util/R.rb +18 -1
  3. data/lib/rbbt/util/cmd.rb +7 -6
  4. data/lib/rbbt/util/data_module.rb +31 -11
  5. data/lib/rbbt/util/fix_width_table.rb +209 -0
  6. data/lib/rbbt/util/log.rb +12 -2
  7. data/lib/rbbt/util/misc.rb +91 -12
  8. data/lib/rbbt/util/open.rb +18 -9
  9. data/lib/rbbt/util/path.rb +152 -0
  10. data/lib/rbbt/util/persistence.rb +282 -75
  11. data/lib/rbbt/util/pkg_data.rb +16 -59
  12. data/lib/rbbt/util/pkg_software.rb +15 -1
  13. data/lib/rbbt/util/rake.rb +5 -1
  14. data/lib/rbbt/util/tc_hash.rb +129 -59
  15. data/lib/rbbt/util/tsv.rb +109 -1284
  16. data/lib/rbbt/util/tsv/accessor.rb +273 -0
  17. data/lib/rbbt/util/tsv/attach.rb +228 -0
  18. data/lib/rbbt/util/tsv/index.rb +303 -0
  19. data/lib/rbbt/util/tsv/manipulate.rb +271 -0
  20. data/lib/rbbt/util/tsv/parse.rb +258 -0
  21. data/share/lib/R/util.R +5 -3
  22. data/test/rbbt/util/test_R.rb +9 -1
  23. data/test/rbbt/util/test_data_module.rb +5 -0
  24. data/test/rbbt/util/test_fix_width_table.rb +107 -0
  25. data/test/rbbt/util/test_misc.rb +43 -0
  26. data/test/rbbt/util/test_open.rb +0 -1
  27. data/test/rbbt/util/test_path.rb +10 -0
  28. data/test/rbbt/util/test_persistence.rb +63 -2
  29. data/test/rbbt/util/test_pkg_data.rb +29 -8
  30. data/test/rbbt/util/test_tc_hash.rb +52 -0
  31. data/test/rbbt/util/test_tsv.rb +55 -678
  32. data/test/rbbt/util/tsv/test_accessor.rb +109 -0
  33. data/test/rbbt/util/tsv/test_attach.rb +271 -0
  34. data/test/rbbt/util/tsv/test_index.rb +158 -0
  35. data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
  36. data/test/rbbt/util/tsv/test_parse.rb +72 -0
  37. data/test/test_helper.rb +1 -0
  38. metadata +25 -4
data/lib/rbbt-util.rb CHANGED
@@ -4,6 +4,7 @@ require 'rbbt/util/cmd'
4
4
  require 'rbbt/util/tmpfile'
5
5
  require 'rbbt/util/filecache'
6
6
  require 'rbbt/util/tsv'
7
+ require 'rbbt/util/persistence'
7
8
  require 'rbbt/util/bed'
8
9
  require 'rbbt/util/cachehelper'
9
10
  require 'rbbt/util/misc'
@@ -11,5 +12,5 @@ require 'rbbt/util/misc'
11
12
  FileCache.cachedir = Rbbt.cachedir
12
13
  Open.cachedir = File.join(Rbbt.cachedir, 'open-remote/')
13
14
  TmpFile.tmpdir = File.join(Rbbt.tmpdir)
14
- TSV.cachedir = File.join(Rbbt.cachedir, 'tsv_cache')
15
+ Persistence.cachedir = File.join(Rbbt.cachedir, 'persistence')
15
16
  Bed.cachedir = File.join(Rbbt.cachedir, 'bed_cache')
data/lib/rbbt/util/R.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/cmd'
2
+ require 'rbbt/util/tsv'
2
3
 
3
4
  module R
4
5
 
@@ -18,7 +19,23 @@ module R
18
19
 
19
20
  Log.debug "R Script:\n#{ cmd }"
20
21
 
21
- CMD.cmd('R --vanilla --slave', options.merge(:in => cmd))
22
+ CMD.cmd('R --vanilla --slave --quiet', options.merge(:in => cmd))
22
23
  end
23
24
 
24
25
  end
26
+
27
+ class TSV
28
+ def R(script)
29
+ TmpFile.with_file do |f|
30
+ Open.write(f, self.to_s)
31
+ Log.debug(R.run(
32
+ <<-EOF
33
+ data = rbbt.tsv('#{f}');
34
+ #{script.strip}
35
+ rbbt.tsv.write('#{f}', data);
36
+ EOF
37
+ ).read)
38
+ TSV.new(f, :type => :list)
39
+ end
40
+ end
41
+ end
data/lib/rbbt/util/cmd.rb CHANGED
@@ -82,12 +82,13 @@ module CMD
82
82
  sout, serr = IO.pipe, IO.pipe
83
83
 
84
84
  case
85
- when (IO === in_content and not StringIO === in_content)
85
+ when (false and (IO === in_content and not StringIO === in_content))
86
86
  sin = [in_content, nil]
87
- else StringIO === in_content
87
+ else
88
88
  sin = IO.pipe
89
89
  end
90
90
 
91
+
91
92
  pid = fork {
92
93
  begin
93
94
 
@@ -95,7 +96,6 @@ module CMD
95
96
  STDIN.reopen sin.first
96
97
  sin.first.close
97
98
 
98
-
99
99
  serr.first.close
100
100
  STDERR.reopen serr.last
101
101
  serr.last.close
@@ -115,16 +115,17 @@ module CMD
115
115
  sout.last.close
116
116
  serr.last.close
117
117
 
118
+
118
119
  Log.debug "CMD: [#{pid}] #{cmd}"
119
120
 
120
121
  case
121
122
  when String === in_content
122
123
  sin.last.write in_content
123
124
  sin.last.close
124
- when StringIO === in_content
125
+ when IO === in_content
125
126
  Thread.new do
126
- while l = in_content.gets
127
- sin.last.write l
127
+ while not in_content.eof?
128
+ sin.last.write in_content.gets
128
129
  end
129
130
  sin.last.close
130
131
  end
@@ -1,24 +1,46 @@
1
1
  module DataModule
2
2
 
3
+ def self.rakefiles(sharedir, data_module)
4
+ Dir.glob(File.join(sharedir, 'install', data_module.to_s, '**','Rakefile')).collect{|f| File.expand_path f}
5
+ end
6
+
3
7
  attr_accessor :sharedir, :rakefile, :pkg_module
4
- def self.extended(base)
5
- if defined? base::PKG and base::PKG
6
- base.pkg_module = base::PKG
8
+ def self.extended(data_module)
9
+ if defined? data_module::PKG and data_module::PKG
10
+ data_module.pkg_module = data_module::PKG
7
11
  else
8
- base.pkg_module = Rbbt
12
+ data_module.pkg_module = Rbbt
9
13
  end
10
14
 
11
- base.sharedir = PKGData.get_caller_sharedir
15
+ data_module.sharedir = PKGData.get_caller_sharedir
12
16
 
13
- Dir.glob(File.join(base.sharedir, 'install', base.to_s, '**','Rakefile')).each do |rakefile|
17
+ install_dir = File.join(data_module.sharedir, 'install')
18
+ rake_sharedir = File.join(data_module.sharedir, 'install')
19
+
20
+ rakefiles(data_module.sharedir, data_module).each do |rakefile|
21
+ rakefile_dir = File.dirname(rakefile)
22
+
14
23
  RakeHelper.files(rakefile).each do |file|
15
- base.pkg_module.claim file,
16
- rakefile.sub(/^#{Regexp.quote File.join(base.sharedir)}\/?/,''),
17
- File.dirname(rakefile).sub(/^#{Regexp.quote File.join(base.sharedir, 'install')}\/?/,'')
24
+ file_path = Misc.path_relative_to(File.join(File.dirname(rakefile), file), rakefile_dir)
25
+ get = :Rakefile
26
+ subdir = Misc.path_relative_to(File.dirname(rakefile), install_dir)
27
+ namespace = data_module.to_s.gsub('/', '::')
28
+
29
+ data_module.pkg_module.claim file_path, get, subdir, namespace, rake_sharedir
18
30
  end
19
31
  end
20
32
  end
21
33
 
34
+ def files
35
+ DataModule.rakefiles(sharedir, self).collect do |rakefile|
36
+ RakeHelper.files(rakefile).collect
37
+ end.flatten
38
+ end
39
+
40
+ def datadir
41
+ File.join(pkg_module.datadir, self.to_s)
42
+ end
43
+
22
44
  module WithKey
23
45
  def klass=(klass)
24
46
  @klass = klass
@@ -68,6 +90,4 @@ module DataModule
68
90
  old_method_missing name, *args, &block
69
91
  end
70
92
  end
71
-
72
-
73
93
  end
@@ -0,0 +1,209 @@
1
+
2
+ class FixWidthTable
3
+
4
+ attr_accessor :filename, :file, :value_size, :record_size, :range, :size
5
+ def initialize(filename, value_size = nil, range = nil, update = false)
6
+ @filename = filename
7
+
8
+ if update or not File.exists? filename
9
+ Log.debug "Writing FixWidthTable at #{ @filename.inspect }"
10
+ FileUtils.rm @filename if File.exists? @filename
11
+ @value_size = value_size
12
+ @range = range
13
+ @record_size = @value_size + (@range ? 12 : 4)
14
+ @file = File.open(@filename, 'wb')
15
+ @file.write [value_size].pack("L")
16
+ @file.write [@range ? 1 : 0 ].pack("C")
17
+ @size = 0
18
+ else
19
+ Log.debug "Reading FixWidthTable at #{ @filename.inspect }"
20
+ @file = File.open(@filename, 'r')
21
+ @value_size = @file.read(4).unpack("L").first
22
+ @range = @file.read(1).unpack("C").first == 1
23
+ @record_size = @value_size + (@range ? 12 : 4)
24
+ @size = (File.size(@filename) - 5) / (@record_size)
25
+ end
26
+ end
27
+
28
+ def format(pos, value)
29
+ padding = value_size - value.length
30
+ if range
31
+ (pos + [value + "\0" * padding]).pack("llla#{value_size}")
32
+ else
33
+ [pos, value + "\0" * padding].pack("la#{value_size}")
34
+ end
35
+ end
36
+
37
+ def unformat(format)
38
+ if range
39
+ pos_start, pos_end, pos_overlap, value = format.unpack("llla#{value_size}")
40
+ [[pos_start, pos_end, pos_overlap], value.strip]
41
+ else
42
+ pos, value = format.unpack("la#{value_size}")
43
+ [pos, value.strip]
44
+ end
45
+ end
46
+
47
+ def add(pos, value)
48
+ format = format(pos, value)
49
+ @file.write format
50
+ @size += 1
51
+ end
52
+ alias << add
53
+
54
+ def pos(index)
55
+ return nil if index < 0 or index >= size
56
+ @file.seek(5 + (record_size) * index, IO::SEEK_SET)
57
+ @file.read(4).unpack("l").first
58
+ end
59
+
60
+ def pos_end(index)
61
+ return nil if index < 0 or index >= size
62
+ @file.seek(9 + (record_size) * index, IO::SEEK_SET)
63
+ @file.read(4).unpack("l").first
64
+ end
65
+
66
+ def overlap(index)
67
+ return nil if index < 0 or index >= size
68
+ @file.seek(13 + (record_size) * index, IO::SEEK_SET)
69
+ @file.read(4).unpack("l").first
70
+ end
71
+
72
+ def value(index)
73
+ return nil if index < 0 or index >= size
74
+ @file.seek((range ? 17 : 9 ) + (record_size) * index, IO::SEEK_SET)
75
+ @file.read(value_size).unpack("a#{value_size}").first.strip
76
+ end
77
+
78
+ def read
79
+ @file.close unless @file.closed?
80
+ @file = File.open(@filename, 'r')
81
+ end
82
+
83
+ def close
84
+ @file.close
85
+ end
86
+
87
+ #{{{ Adding data
88
+
89
+ def add_point(data)
90
+ data.sort_by{|value, pos| pos}.each do |value, pos|
91
+ add pos, value
92
+ end
93
+ end
94
+
95
+ def add_range(data)
96
+ latest = []
97
+ data.sort_by{|value, pos| pos[0]}.each do |value, pos|
98
+ while latest.any? and latest[0] < pos[0]
99
+ latest.shift
100
+ end
101
+
102
+ overlap = latest.length
103
+
104
+ add pos + [overlap], value
105
+ latest << pos[1]
106
+ end
107
+ end
108
+
109
+ #{{{ Searching
110
+
111
+ def closest(pos)
112
+ upper = size - 1
113
+ lower = 0
114
+
115
+ return -1 if upper < lower
116
+
117
+ while(upper >= lower) do
118
+ idx = lower + (upper - lower) / 2
119
+ comp = pos <=> pos(idx)
120
+
121
+ if comp == 0
122
+ break
123
+ elsif comp > 0
124
+ lower = idx + 1
125
+ else
126
+ upper = idx - 1
127
+ end
128
+ end
129
+
130
+ if pos(idx) > pos
131
+ idx = idx - 1
132
+ end
133
+
134
+ idx
135
+ end
136
+
137
+ def get_range(pos)
138
+ if Range === pos
139
+ r_start = pos.begin
140
+ r_end = pos.end
141
+ else
142
+ r_start = pos.to_i
143
+ r_end = pos.to_i
144
+ end
145
+
146
+ idx = closest(r_start)
147
+
148
+ return [] if idx >= size
149
+ return [] if idx <0 and r_start == r_end
150
+
151
+ idx = 0 if idx < 0
152
+
153
+ idx -= overlap(idx) unless overlap(idx).nil?
154
+
155
+ values = []
156
+ l_start = pos(idx)
157
+ l_end = pos_end(idx)
158
+ while l_start <= r_end
159
+ values << value(idx) if l_end >= r_start
160
+ idx += 1
161
+ break if idx >= size
162
+ l_start = pos(idx)
163
+ l_end = pos_end(idx)
164
+ end
165
+
166
+ values
167
+ end
168
+
169
+ def get_point(pos)
170
+ if Range === pos
171
+ r_start = pos.begin
172
+ r_end = pos.end
173
+ else
174
+ r_start = pos.to_i
175
+ r_end = pos.to_i
176
+ end
177
+
178
+ idx = closest(r_start)
179
+
180
+ return [] if idx >= size
181
+ return [] if idx <0 and r_start == r_end
182
+
183
+ idx = 0 if idx < 0
184
+
185
+ idx += 1 unless pos(idx) >= r_start
186
+
187
+ values = []
188
+ l_start = pos(idx)
189
+ l_end = pos_end(idx)
190
+ while l_start <= r_end
191
+ values << value(idx)
192
+ idx += 1
193
+ break if idx >= size
194
+ l_start = pos(idx)
195
+ l_end = pos_end(idx)
196
+ end
197
+
198
+ values
199
+ end
200
+
201
+ def [](pos)
202
+ if range
203
+ get_range(pos)
204
+ else
205
+ get_point(pos)
206
+ end
207
+ end
208
+
209
+ end
data/lib/rbbt/util/log.rb CHANGED
@@ -14,6 +14,7 @@ module Log
14
14
  end
15
15
 
16
16
  def self.log(message, severity = MEDIUM)
17
+ STDERR.puts caller * "\n" if @@severity == -1 and not message.empty?
17
18
  STDERR.puts "#{Time.now}[#{severity.to_s}]: " + message if severity >= @@severity
18
19
  end
19
20
 
@@ -50,6 +51,15 @@ module Log
50
51
  end
51
52
 
52
53
  def ddd(message, file = $stdout)
53
- Log.debug "DEVEL => " << caller.first
54
- Log.debug "DEVEL: " << message.inspect
54
+ Log.debug "DEVEL: " << caller.first
55
+ Log.debug ""
56
+ Log.debug "=> " << message.inspect
57
+ Log.debug ""
58
+ end
59
+
60
+ def ppp(message)
61
+ puts "PRINT: " << caller.first
62
+ puts ""
63
+ puts "=> " << message.inspect
64
+ puts ""
55
65
  end
@@ -16,6 +16,29 @@ end
16
16
  module Misc
17
17
  class FieldNotFoundError < StandardError;end
18
18
 
19
+ def self.string2const(string)
20
+ return nil if string.nil?
21
+ mod = Kernel
22
+
23
+ string.to_s.split('::').each do |str|
24
+ mod = mod.const_get str
25
+ end
26
+
27
+ mod
28
+ end
29
+
30
+ def self.path_relative_to(path, subdir)
31
+ File.expand_path(path).sub(/^#{Regexp.quote File.expand_path(subdir)}\/?/,'')
32
+ end
33
+
34
+ def self.in_directory?(file, directory)
35
+ if file.to_s =~ /^#{Regexp.quote File.expand_path(directory)}/
36
+ true
37
+ else
38
+ false
39
+ end
40
+ end
41
+
19
42
  def self.this_dir
20
43
  File.expand_path(File.dirname(caller[0]))
21
44
  end
@@ -75,7 +98,9 @@ module Misc
75
98
  raise "Format of '#{options.inspect}' not understood"
76
99
  end
77
100
  defaults.each do |key, value|
78
- new_options[key] = value if new_options[key].nil?
101
+ next unless new_options[key].nil?
102
+
103
+ new_options[key] = value
79
104
  end
80
105
  new_options
81
106
  end
@@ -88,6 +113,14 @@ module Misc
88
113
  end
89
114
  end
90
115
 
116
+ def self.hash2string(hash)
117
+ hash.collect{|k,v|
118
+ next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
119
+ [ Symbol === k ? ":" << k.to_s : k,
120
+ Symbol === v ? ":" << v.to_s : v] * "="
121
+ }.compact * "#"
122
+ end
123
+
91
124
  def self.string2hash(string)
92
125
 
93
126
  options = {}
@@ -104,7 +137,27 @@ module Misc
104
137
  if value == true
105
138
  options[option] = option.to_s.chars.first != '!'
106
139
  else
107
- options[option] = begin eval(value) rescue value end
140
+ options[option] = Thread.start do
141
+ $SAFE = 0;
142
+ case
143
+ when value =~ /^(?:true|T)$/i
144
+ true
145
+ when value =~ /^(?:false|F)$/i
146
+ false
147
+ when (String === value and value =~ /^\/(.*)\/$/)
148
+ Regexp.new /#{$1}/
149
+ else
150
+ begin
151
+ Kernel.const_get value
152
+ rescue
153
+ begin
154
+ eval(value)
155
+ rescue Exception
156
+ value
157
+ end
158
+ end
159
+ end
160
+ end.value
108
161
  end
109
162
  end
110
163
 
@@ -135,7 +188,7 @@ module Misc
135
188
  fields.each_with_index{|f,i| return i if f == field}
136
189
  field_re = Regexp.new /#{field}/i
137
190
  fields.each_with_index{|f,i| return i if f =~ field_re}
138
- raise FieldNotFoundError, "Field '#{ field }' was not found" unless quiet
191
+ raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
139
192
  end
140
193
 
141
194
  def self.first(list)
@@ -143,12 +196,17 @@ module Misc
143
196
  return list.first
144
197
  end
145
198
 
199
+ def self.chunk(text, split)
200
+ text.split(split)[1..-1]
201
+ end
202
+
146
203
  end
147
204
 
148
205
  module PDF2Text
149
206
  def self.pdf2text(filename)
150
207
  require 'rbbt/util/cmd'
151
208
  require 'rbbt/util/tmpfile'
209
+ require 'rbbt/util/open'
152
210
  TmpFile.with_file(Open.read(filename)) do |pdf|
153
211
  CMD.cmd("pdftotext #{pdf} -", :pipe => false, :stderr => true)
154
212
  end
@@ -191,22 +249,43 @@ class NamedArray < Array
191
249
  zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
192
250
  zipped
193
251
  end
252
+
253
+ def detach(file)
254
+ file_fields = file.fields.collect{|field| field.fullname}
255
+ detached_fields = []
256
+ self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
257
+ fields = self.fields.values_at *detached_fields
258
+ values = self.values_at *detached_fields
259
+ values = NamedArray.name(values, fields)
260
+ values.zip_fields
261
+ end
194
262
  end
195
263
 
196
- def benchmark
264
+ def benchmark(bench = true)
197
265
  require 'benchmark'
198
- puts(Benchmark.measure do
266
+ if bench
267
+ res = nil
268
+ puts(Benchmark.measure do
269
+ res = yield
270
+ end)
271
+ res
272
+ else
199
273
  yield
200
- end)
274
+ end
201
275
  end
202
276
 
203
- def profile
277
+ def profile(prof = true)
204
278
  require 'ruby-prof'
205
- RubyProf.start
206
- yield
207
- result = RubyProf.stop
279
+ if prof
280
+ RubyProf.start
281
+ res = yield
282
+ result = RubyProf.stop
208
283
 
209
284
  # Print a flat profile to text
210
- printer = RubyProf::FlatPrinter.new(result)
211
- printer.print(STDOUT, 0)
285
+ printer = RubyProf::FlatPrinter.new(result)
286
+ printer.print(STDOUT, 0)
287
+ res
288
+ else
289
+ yield
290
+ end
212
291
  end