rbbt-util 1.2.1 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/lib/rbbt-util.rb +2 -1
  2. data/lib/rbbt/util/R.rb +18 -1
  3. data/lib/rbbt/util/cmd.rb +7 -6
  4. data/lib/rbbt/util/data_module.rb +31 -11
  5. data/lib/rbbt/util/fix_width_table.rb +209 -0
  6. data/lib/rbbt/util/log.rb +12 -2
  7. data/lib/rbbt/util/misc.rb +91 -12
  8. data/lib/rbbt/util/open.rb +18 -9
  9. data/lib/rbbt/util/path.rb +152 -0
  10. data/lib/rbbt/util/persistence.rb +282 -75
  11. data/lib/rbbt/util/pkg_data.rb +16 -59
  12. data/lib/rbbt/util/pkg_software.rb +15 -1
  13. data/lib/rbbt/util/rake.rb +5 -1
  14. data/lib/rbbt/util/tc_hash.rb +129 -59
  15. data/lib/rbbt/util/tsv.rb +109 -1284
  16. data/lib/rbbt/util/tsv/accessor.rb +273 -0
  17. data/lib/rbbt/util/tsv/attach.rb +228 -0
  18. data/lib/rbbt/util/tsv/index.rb +303 -0
  19. data/lib/rbbt/util/tsv/manipulate.rb +271 -0
  20. data/lib/rbbt/util/tsv/parse.rb +258 -0
  21. data/share/lib/R/util.R +5 -3
  22. data/test/rbbt/util/test_R.rb +9 -1
  23. data/test/rbbt/util/test_data_module.rb +5 -0
  24. data/test/rbbt/util/test_fix_width_table.rb +107 -0
  25. data/test/rbbt/util/test_misc.rb +43 -0
  26. data/test/rbbt/util/test_open.rb +0 -1
  27. data/test/rbbt/util/test_path.rb +10 -0
  28. data/test/rbbt/util/test_persistence.rb +63 -2
  29. data/test/rbbt/util/test_pkg_data.rb +29 -8
  30. data/test/rbbt/util/test_tc_hash.rb +52 -0
  31. data/test/rbbt/util/test_tsv.rb +55 -678
  32. data/test/rbbt/util/tsv/test_accessor.rb +109 -0
  33. data/test/rbbt/util/tsv/test_attach.rb +271 -0
  34. data/test/rbbt/util/tsv/test_index.rb +158 -0
  35. data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
  36. data/test/rbbt/util/tsv/test_parse.rb +72 -0
  37. data/test/test_helper.rb +1 -0
  38. metadata +25 -4
data/lib/rbbt-util.rb CHANGED
@@ -4,6 +4,7 @@ require 'rbbt/util/cmd'
4
4
  require 'rbbt/util/tmpfile'
5
5
  require 'rbbt/util/filecache'
6
6
  require 'rbbt/util/tsv'
7
+ require 'rbbt/util/persistence'
7
8
  require 'rbbt/util/bed'
8
9
  require 'rbbt/util/cachehelper'
9
10
  require 'rbbt/util/misc'
@@ -11,5 +12,5 @@ require 'rbbt/util/misc'
11
12
  FileCache.cachedir = Rbbt.cachedir
12
13
  Open.cachedir = File.join(Rbbt.cachedir, 'open-remote/')
13
14
  TmpFile.tmpdir = File.join(Rbbt.tmpdir)
14
- TSV.cachedir = File.join(Rbbt.cachedir, 'tsv_cache')
15
+ Persistence.cachedir = File.join(Rbbt.cachedir, 'persistence')
15
16
  Bed.cachedir = File.join(Rbbt.cachedir, 'bed_cache')
data/lib/rbbt/util/R.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/cmd'
2
+ require 'rbbt/util/tsv'
2
3
 
3
4
  module R
4
5
 
@@ -18,7 +19,23 @@ module R
18
19
 
19
20
  Log.debug "R Script:\n#{ cmd }"
20
21
 
21
- CMD.cmd('R --vanilla --slave', options.merge(:in => cmd))
22
+ CMD.cmd('R --vanilla --slave --quiet', options.merge(:in => cmd))
22
23
  end
23
24
 
24
25
  end
26
+
27
+ class TSV
28
+ def R(script)
29
+ TmpFile.with_file do |f|
30
+ Open.write(f, self.to_s)
31
+ Log.debug(R.run(
32
+ <<-EOF
33
+ data = rbbt.tsv('#{f}');
34
+ #{script.strip}
35
+ rbbt.tsv.write('#{f}', data);
36
+ EOF
37
+ ).read)
38
+ TSV.new(f, :type => :list)
39
+ end
40
+ end
41
+ end
data/lib/rbbt/util/cmd.rb CHANGED
@@ -82,12 +82,13 @@ module CMD
82
82
  sout, serr = IO.pipe, IO.pipe
83
83
 
84
84
  case
85
- when (IO === in_content and not StringIO === in_content)
85
+ when (false and (IO === in_content and not StringIO === in_content))
86
86
  sin = [in_content, nil]
87
- else StringIO === in_content
87
+ else
88
88
  sin = IO.pipe
89
89
  end
90
90
 
91
+
91
92
  pid = fork {
92
93
  begin
93
94
 
@@ -95,7 +96,6 @@ module CMD
95
96
  STDIN.reopen sin.first
96
97
  sin.first.close
97
98
 
98
-
99
99
  serr.first.close
100
100
  STDERR.reopen serr.last
101
101
  serr.last.close
@@ -115,16 +115,17 @@ module CMD
115
115
  sout.last.close
116
116
  serr.last.close
117
117
 
118
+
118
119
  Log.debug "CMD: [#{pid}] #{cmd}"
119
120
 
120
121
  case
121
122
  when String === in_content
122
123
  sin.last.write in_content
123
124
  sin.last.close
124
- when StringIO === in_content
125
+ when IO === in_content
125
126
  Thread.new do
126
- while l = in_content.gets
127
- sin.last.write l
127
+ while not in_content.eof?
128
+ sin.last.write in_content.gets
128
129
  end
129
130
  sin.last.close
130
131
  end
@@ -1,24 +1,46 @@
1
1
  module DataModule
2
2
 
3
+ def self.rakefiles(sharedir, data_module)
4
+ Dir.glob(File.join(sharedir, 'install', data_module.to_s, '**','Rakefile')).collect{|f| File.expand_path f}
5
+ end
6
+
3
7
  attr_accessor :sharedir, :rakefile, :pkg_module
4
- def self.extended(base)
5
- if defined? base::PKG and base::PKG
6
- base.pkg_module = base::PKG
8
+ def self.extended(data_module)
9
+ if defined? data_module::PKG and data_module::PKG
10
+ data_module.pkg_module = data_module::PKG
7
11
  else
8
- base.pkg_module = Rbbt
12
+ data_module.pkg_module = Rbbt
9
13
  end
10
14
 
11
- base.sharedir = PKGData.get_caller_sharedir
15
+ data_module.sharedir = PKGData.get_caller_sharedir
12
16
 
13
- Dir.glob(File.join(base.sharedir, 'install', base.to_s, '**','Rakefile')).each do |rakefile|
17
+ install_dir = File.join(data_module.sharedir, 'install')
18
+ rake_sharedir = File.join(data_module.sharedir, 'install')
19
+
20
+ rakefiles(data_module.sharedir, data_module).each do |rakefile|
21
+ rakefile_dir = File.dirname(rakefile)
22
+
14
23
  RakeHelper.files(rakefile).each do |file|
15
- base.pkg_module.claim file,
16
- rakefile.sub(/^#{Regexp.quote File.join(base.sharedir)}\/?/,''),
17
- File.dirname(rakefile).sub(/^#{Regexp.quote File.join(base.sharedir, 'install')}\/?/,'')
24
+ file_path = Misc.path_relative_to(File.join(File.dirname(rakefile), file), rakefile_dir)
25
+ get = :Rakefile
26
+ subdir = Misc.path_relative_to(File.dirname(rakefile), install_dir)
27
+ namespace = data_module.to_s.gsub('/', '::')
28
+
29
+ data_module.pkg_module.claim file_path, get, subdir, namespace, rake_sharedir
18
30
  end
19
31
  end
20
32
  end
21
33
 
34
+ def files
35
+ DataModule.rakefiles(sharedir, self).collect do |rakefile|
36
+ RakeHelper.files(rakefile).collect
37
+ end.flatten
38
+ end
39
+
40
+ def datadir
41
+ File.join(pkg_module.datadir, self.to_s)
42
+ end
43
+
22
44
  module WithKey
23
45
  def klass=(klass)
24
46
  @klass = klass
@@ -68,6 +90,4 @@ module DataModule
68
90
  old_method_missing name, *args, &block
69
91
  end
70
92
  end
71
-
72
-
73
93
  end
@@ -0,0 +1,209 @@
1
+
2
+ class FixWidthTable
3
+
4
+ attr_accessor :filename, :file, :value_size, :record_size, :range, :size
5
+ def initialize(filename, value_size = nil, range = nil, update = false)
6
+ @filename = filename
7
+
8
+ if update or not File.exists? filename
9
+ Log.debug "Writing FixWidthTable at #{ @filename.inspect }"
10
+ FileUtils.rm @filename if File.exists? @filename
11
+ @value_size = value_size
12
+ @range = range
13
+ @record_size = @value_size + (@range ? 12 : 4)
14
+ @file = File.open(@filename, 'wb')
15
+ @file.write [value_size].pack("L")
16
+ @file.write [@range ? 1 : 0 ].pack("C")
17
+ @size = 0
18
+ else
19
+ Log.debug "Reading FixWidthTable at #{ @filename.inspect }"
20
+ @file = File.open(@filename, 'r')
21
+ @value_size = @file.read(4).unpack("L").first
22
+ @range = @file.read(1).unpack("C").first == 1
23
+ @record_size = @value_size + (@range ? 12 : 4)
24
+ @size = (File.size(@filename) - 5) / (@record_size)
25
+ end
26
+ end
27
+
28
+ def format(pos, value)
29
+ padding = value_size - value.length
30
+ if range
31
+ (pos + [value + "\0" * padding]).pack("llla#{value_size}")
32
+ else
33
+ [pos, value + "\0" * padding].pack("la#{value_size}")
34
+ end
35
+ end
36
+
37
+ def unformat(format)
38
+ if range
39
+ pos_start, pos_end, pos_overlap, value = format.unpack("llla#{value_size}")
40
+ [[pos_start, pos_end, pos_overlap], value.strip]
41
+ else
42
+ pos, value = format.unpack("la#{value_size}")
43
+ [pos, value.strip]
44
+ end
45
+ end
46
+
47
+ def add(pos, value)
48
+ format = format(pos, value)
49
+ @file.write format
50
+ @size += 1
51
+ end
52
+ alias << add
53
+
54
+ def pos(index)
55
+ return nil if index < 0 or index >= size
56
+ @file.seek(5 + (record_size) * index, IO::SEEK_SET)
57
+ @file.read(4).unpack("l").first
58
+ end
59
+
60
+ def pos_end(index)
61
+ return nil if index < 0 or index >= size
62
+ @file.seek(9 + (record_size) * index, IO::SEEK_SET)
63
+ @file.read(4).unpack("l").first
64
+ end
65
+
66
+ def overlap(index)
67
+ return nil if index < 0 or index >= size
68
+ @file.seek(13 + (record_size) * index, IO::SEEK_SET)
69
+ @file.read(4).unpack("l").first
70
+ end
71
+
72
+ def value(index)
73
+ return nil if index < 0 or index >= size
74
+ @file.seek((range ? 17 : 9 ) + (record_size) * index, IO::SEEK_SET)
75
+ @file.read(value_size).unpack("a#{value_size}").first.strip
76
+ end
77
+
78
+ def read
79
+ @file.close unless @file.closed?
80
+ @file = File.open(@filename, 'r')
81
+ end
82
+
83
+ def close
84
+ @file.close
85
+ end
86
+
87
+ #{{{ Adding data
88
+
89
+ def add_point(data)
90
+ data.sort_by{|value, pos| pos}.each do |value, pos|
91
+ add pos, value
92
+ end
93
+ end
94
+
95
+ def add_range(data)
96
+ latest = []
97
+ data.sort_by{|value, pos| pos[0]}.each do |value, pos|
98
+ while latest.any? and latest[0] < pos[0]
99
+ latest.shift
100
+ end
101
+
102
+ overlap = latest.length
103
+
104
+ add pos + [overlap], value
105
+ latest << pos[1]
106
+ end
107
+ end
108
+
109
+ #{{{ Searching
110
+
111
+ def closest(pos)
112
+ upper = size - 1
113
+ lower = 0
114
+
115
+ return -1 if upper < lower
116
+
117
+ while(upper >= lower) do
118
+ idx = lower + (upper - lower) / 2
119
+ comp = pos <=> pos(idx)
120
+
121
+ if comp == 0
122
+ break
123
+ elsif comp > 0
124
+ lower = idx + 1
125
+ else
126
+ upper = idx - 1
127
+ end
128
+ end
129
+
130
+ if pos(idx) > pos
131
+ idx = idx - 1
132
+ end
133
+
134
+ idx
135
+ end
136
+
137
+ def get_range(pos)
138
+ if Range === pos
139
+ r_start = pos.begin
140
+ r_end = pos.end
141
+ else
142
+ r_start = pos.to_i
143
+ r_end = pos.to_i
144
+ end
145
+
146
+ idx = closest(r_start)
147
+
148
+ return [] if idx >= size
149
+ return [] if idx <0 and r_start == r_end
150
+
151
+ idx = 0 if idx < 0
152
+
153
+ idx -= overlap(idx) unless overlap(idx).nil?
154
+
155
+ values = []
156
+ l_start = pos(idx)
157
+ l_end = pos_end(idx)
158
+ while l_start <= r_end
159
+ values << value(idx) if l_end >= r_start
160
+ idx += 1
161
+ break if idx >= size
162
+ l_start = pos(idx)
163
+ l_end = pos_end(idx)
164
+ end
165
+
166
+ values
167
+ end
168
+
169
+ def get_point(pos)
170
+ if Range === pos
171
+ r_start = pos.begin
172
+ r_end = pos.end
173
+ else
174
+ r_start = pos.to_i
175
+ r_end = pos.to_i
176
+ end
177
+
178
+ idx = closest(r_start)
179
+
180
+ return [] if idx >= size
181
+ return [] if idx <0 and r_start == r_end
182
+
183
+ idx = 0 if idx < 0
184
+
185
+ idx += 1 unless pos(idx) >= r_start
186
+
187
+ values = []
188
+ l_start = pos(idx)
189
+ l_end = pos_end(idx)
190
+ while l_start <= r_end
191
+ values << value(idx)
192
+ idx += 1
193
+ break if idx >= size
194
+ l_start = pos(idx)
195
+ l_end = pos_end(idx)
196
+ end
197
+
198
+ values
199
+ end
200
+
201
+ def [](pos)
202
+ if range
203
+ get_range(pos)
204
+ else
205
+ get_point(pos)
206
+ end
207
+ end
208
+
209
+ end
data/lib/rbbt/util/log.rb CHANGED
@@ -14,6 +14,7 @@ module Log
14
14
  end
15
15
 
16
16
  def self.log(message, severity = MEDIUM)
17
+ STDERR.puts caller * "\n" if @@severity == -1 and not message.empty?
17
18
  STDERR.puts "#{Time.now}[#{severity.to_s}]: " + message if severity >= @@severity
18
19
  end
19
20
 
@@ -50,6 +51,15 @@ module Log
50
51
  end
51
52
 
52
53
  def ddd(message, file = $stdout)
53
- Log.debug "DEVEL => " << caller.first
54
- Log.debug "DEVEL: " << message.inspect
54
+ Log.debug "DEVEL: " << caller.first
55
+ Log.debug ""
56
+ Log.debug "=> " << message.inspect
57
+ Log.debug ""
58
+ end
59
+
60
+ def ppp(message)
61
+ puts "PRINT: " << caller.first
62
+ puts ""
63
+ puts "=> " << message.inspect
64
+ puts ""
55
65
  end
@@ -16,6 +16,29 @@ end
16
16
  module Misc
17
17
  class FieldNotFoundError < StandardError;end
18
18
 
19
+ def self.string2const(string)
20
+ return nil if string.nil?
21
+ mod = Kernel
22
+
23
+ string.to_s.split('::').each do |str|
24
+ mod = mod.const_get str
25
+ end
26
+
27
+ mod
28
+ end
29
+
30
+ def self.path_relative_to(path, subdir)
31
+ File.expand_path(path).sub(/^#{Regexp.quote File.expand_path(subdir)}\/?/,'')
32
+ end
33
+
34
+ def self.in_directory?(file, directory)
35
+ if file.to_s =~ /^#{Regexp.quote File.expand_path(directory)}/
36
+ true
37
+ else
38
+ false
39
+ end
40
+ end
41
+
19
42
  def self.this_dir
20
43
  File.expand_path(File.dirname(caller[0]))
21
44
  end
@@ -75,7 +98,9 @@ module Misc
75
98
  raise "Format of '#{options.inspect}' not understood"
76
99
  end
77
100
  defaults.each do |key, value|
78
- new_options[key] = value if new_options[key].nil?
101
+ next unless new_options[key].nil?
102
+
103
+ new_options[key] = value
79
104
  end
80
105
  new_options
81
106
  end
@@ -88,6 +113,14 @@ module Misc
88
113
  end
89
114
  end
90
115
 
116
+ def self.hash2string(hash)
117
+ hash.collect{|k,v|
118
+ next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
119
+ [ Symbol === k ? ":" << k.to_s : k,
120
+ Symbol === v ? ":" << v.to_s : v] * "="
121
+ }.compact * "#"
122
+ end
123
+
91
124
  def self.string2hash(string)
92
125
 
93
126
  options = {}
@@ -104,7 +137,27 @@ module Misc
104
137
  if value == true
105
138
  options[option] = option.to_s.chars.first != '!'
106
139
  else
107
- options[option] = begin eval(value) rescue value end
140
+ options[option] = Thread.start do
141
+ $SAFE = 0;
142
+ case
143
+ when value =~ /^(?:true|T)$/i
144
+ true
145
+ when value =~ /^(?:false|F)$/i
146
+ false
147
+ when (String === value and value =~ /^\/(.*)\/$/)
148
+ Regexp.new /#{$1}/
149
+ else
150
+ begin
151
+ Kernel.const_get value
152
+ rescue
153
+ begin
154
+ eval(value)
155
+ rescue Exception
156
+ value
157
+ end
158
+ end
159
+ end
160
+ end.value
108
161
  end
109
162
  end
110
163
 
@@ -135,7 +188,7 @@ module Misc
135
188
  fields.each_with_index{|f,i| return i if f == field}
136
189
  field_re = Regexp.new /#{field}/i
137
190
  fields.each_with_index{|f,i| return i if f =~ field_re}
138
- raise FieldNotFoundError, "Field '#{ field }' was not found" unless quiet
191
+ raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
139
192
  end
140
193
 
141
194
  def self.first(list)
@@ -143,12 +196,17 @@ module Misc
143
196
  return list.first
144
197
  end
145
198
 
199
+ def self.chunk(text, split)
200
+ text.split(split)[1..-1]
201
+ end
202
+
146
203
  end
147
204
 
148
205
  module PDF2Text
149
206
  def self.pdf2text(filename)
150
207
  require 'rbbt/util/cmd'
151
208
  require 'rbbt/util/tmpfile'
209
+ require 'rbbt/util/open'
152
210
  TmpFile.with_file(Open.read(filename)) do |pdf|
153
211
  CMD.cmd("pdftotext #{pdf} -", :pipe => false, :stderr => true)
154
212
  end
@@ -191,22 +249,43 @@ class NamedArray < Array
191
249
  zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
192
250
  zipped
193
251
  end
252
+
253
+ def detach(file)
254
+ file_fields = file.fields.collect{|field| field.fullname}
255
+ detached_fields = []
256
+ self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
257
+ fields = self.fields.values_at *detached_fields
258
+ values = self.values_at *detached_fields
259
+ values = NamedArray.name(values, fields)
260
+ values.zip_fields
261
+ end
194
262
  end
195
263
 
196
- def benchmark
264
+ def benchmark(bench = true)
197
265
  require 'benchmark'
198
- puts(Benchmark.measure do
266
+ if bench
267
+ res = nil
268
+ puts(Benchmark.measure do
269
+ res = yield
270
+ end)
271
+ res
272
+ else
199
273
  yield
200
- end)
274
+ end
201
275
  end
202
276
 
203
- def profile
277
+ def profile(prof = true)
204
278
  require 'ruby-prof'
205
- RubyProf.start
206
- yield
207
- result = RubyProf.stop
279
+ if prof
280
+ RubyProf.start
281
+ res = yield
282
+ result = RubyProf.stop
208
283
 
209
284
  # Print a flat profile to text
210
- printer = RubyProf::FlatPrinter.new(result)
211
- printer.print(STDOUT, 0)
285
+ printer = RubyProf::FlatPrinter.new(result)
286
+ printer.print(STDOUT, 0)
287
+ res
288
+ else
289
+ yield
290
+ end
212
291
  end