rbbt-util 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt-util.rb +2 -0
- data/lib/rbbt.rb +1 -0
- data/lib/rbbt/util/R.rb +24 -0
- data/lib/rbbt/util/bed.rb +325 -0
- data/lib/rbbt/util/cmd.rb +2 -1
- data/lib/rbbt/util/data_module.rb +25 -34
- data/lib/rbbt/util/excel2tsv.rb +2 -3
- data/lib/rbbt/util/log.rb +5 -0
- data/lib/rbbt/util/misc.rb +29 -1
- data/lib/rbbt/util/open.rb +1 -0
- data/lib/rbbt/util/persistence.rb +109 -0
- data/lib/rbbt/util/pkg_data.rb +114 -62
- data/lib/rbbt/util/rake.rb +78 -0
- data/lib/rbbt/util/tc_hash.rb +7 -1
- data/lib/rbbt/util/tsv.rb +582 -153
- data/lib/rbbt/util/workflow.rb +1 -2
- data/share/lib/R/util.R +89 -0
- data/test/rbbt/util/test_R.rb +9 -0
- data/test/rbbt/util/test_bed.rb +136 -0
- data/test/rbbt/util/test_data_module.rb +10 -10
- data/test/rbbt/util/test_misc.rb +1 -0
- data/test/rbbt/util/test_persistence.rb +60 -0
- data/test/rbbt/util/test_pkg_data.rb +113 -0
- data/test/rbbt/util/test_rake.rb +54 -0
- data/test/rbbt/util/test_tsv.rb +91 -46
- data/test/rbbt/util/test_workflow.rb +5 -2
- data/test/test_helper.rb +4 -0
- data/test/test_pkg.rb +0 -10
- data/test/test_rbbt.rb +3 -48
- metadata +21 -6
data/lib/rbbt-util.rb
CHANGED
@@ -4,6 +4,7 @@ require 'rbbt/util/cmd'
|
|
4
4
|
require 'rbbt/util/tmpfile'
|
5
5
|
require 'rbbt/util/filecache'
|
6
6
|
require 'rbbt/util/tsv'
|
7
|
+
require 'rbbt/util/bed'
|
7
8
|
require 'rbbt/util/cachehelper'
|
8
9
|
require 'rbbt/util/misc'
|
9
10
|
|
@@ -11,3 +12,4 @@ FileCache.cachedir = Rbbt.cachedir
|
|
11
12
|
Open.cachedir = File.join(Rbbt.cachedir, 'open-remote/')
|
12
13
|
TmpFile.tmpdir = File.join(Rbbt.tmpdir)
|
13
14
|
TSV.cachedir = File.join(Rbbt.cachedir, 'tsv_cache')
|
15
|
+
Bed.cachedir = File.join(Rbbt.cachedir, 'bed_cache')
|
data/lib/rbbt.rb
CHANGED
data/lib/rbbt/util/R.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'rbbt/util/cmd'
|
2
|
+
|
3
|
+
module R
|
4
|
+
|
5
|
+
LIB_DIR = File.join(File.expand_path(File.dirname(__FILE__)),'../../../share/lib/R')
|
6
|
+
UTIL = File.join(LIB_DIR, 'util.R')
|
7
|
+
|
8
|
+
def self.run(command, options = {})
|
9
|
+
cmd = "source('#{UTIL}');\n"
|
10
|
+
case
|
11
|
+
when IO === command
|
12
|
+
cmd << command.read
|
13
|
+
when File.exists?(command)
|
14
|
+
cmd << File.open(command, 'r') do |f| f.read end
|
15
|
+
else
|
16
|
+
cmd << command
|
17
|
+
end
|
18
|
+
|
19
|
+
Log.debug "R Script:\n#{ cmd }"
|
20
|
+
|
21
|
+
CMD.cmd('R --vanilla --slave', options.merge(:in => cmd))
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,325 @@
|
|
1
|
+
require 'rbbt/util/tsv'
|
2
|
+
require 'rbbt/util/misc'
|
3
|
+
require 'rbbt/util/log'
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
class Bed
|
7
|
+
|
8
|
+
class Entry < Struct.new( :value, :start, :end, :overlap); end
|
9
|
+
|
10
|
+
class FixWidthTable
|
11
|
+
SERIALIZER = Marshal
|
12
|
+
def self.serialize(entry)
|
13
|
+
SERIALIZER.dump(entry)
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.deserialise(entry)
|
17
|
+
SERIALIZER.load(entry)
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.format(entry, index, record_size)
|
21
|
+
data = serialize(entry)
|
22
|
+
padding = record_size - data.length
|
23
|
+
[index, data + "\0" * padding].pack("la#{record_size}")
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.index(format)
|
27
|
+
format.unpack("s").first
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.unformat(format, record_size)
|
31
|
+
index, data = format.unpack("la#{record_size}")
|
32
|
+
deserialise(data)
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.get_record_size(entries)
|
36
|
+
max = 0
|
37
|
+
entries.each do |entry|
|
38
|
+
size = serialize(entry).length
|
39
|
+
max = size if size > max
|
40
|
+
end
|
41
|
+
|
42
|
+
max
|
43
|
+
end
|
44
|
+
|
45
|
+
attr_accessor :size, :range
|
46
|
+
def initialize(file, record_size = nil, range = false, rewrite = false)
|
47
|
+
@filename = file
|
48
|
+
@range = range
|
49
|
+
|
50
|
+
if rewrite or not File.exists? file
|
51
|
+
Log.debug("Opening FixWidthTable in #{ file } writing. Record size: #{record_size}")
|
52
|
+
@file = File.open(@filename, 'wb')
|
53
|
+
@record_size = record_size
|
54
|
+
@file.write [record_size].pack("L")
|
55
|
+
@file.write [@range ? 1 : 0 ].pack("C")
|
56
|
+
@size = 0
|
57
|
+
else
|
58
|
+
Log.debug("Opening FixWidthTable in #{ file } for reading")
|
59
|
+
@file = File.open(@filename, 'rb')
|
60
|
+
@record_size = @file.read(4).unpack("L").first
|
61
|
+
@range = @file.read(1).unpack("C").first == 1
|
62
|
+
@size = (File.size(@filename) - 5) / (@record_size + 4)
|
63
|
+
Log.debug("Record size #{@record_size}")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def length
|
68
|
+
@size / @record_size
|
69
|
+
end
|
70
|
+
|
71
|
+
def read
|
72
|
+
@file.close
|
73
|
+
@file = File.open(@filename, 'rb')
|
74
|
+
end
|
75
|
+
|
76
|
+
def add(entry, index)
|
77
|
+
@size += 1
|
78
|
+
format = FixWidthTable.format(entry, index, @record_size)
|
79
|
+
@file.write format
|
80
|
+
end
|
81
|
+
|
82
|
+
def index(index)
|
83
|
+
return nil if index < 0 or index >= size
|
84
|
+
@file.seek(5 + (@record_size + 4) * index, IO::SEEK_SET)
|
85
|
+
|
86
|
+
format = @file.read(@record_size + 4)
|
87
|
+
FixWidthTable.index(format)
|
88
|
+
end
|
89
|
+
|
90
|
+
def [](index)
|
91
|
+
return nil if index < 0 or index >= size
|
92
|
+
@file.seek(5 + (@record_size + 4) * index, IO::SEEK_SET)
|
93
|
+
|
94
|
+
format = @file.read(@record_size + 4)
|
95
|
+
FixWidthTable.unformat(format, @record_size)
|
96
|
+
end
|
97
|
+
|
98
|
+
def closest(pos)
|
99
|
+
upper = size - 1
|
100
|
+
lower = 0
|
101
|
+
|
102
|
+
return -1 if upper < lower
|
103
|
+
|
104
|
+
while(upper >= lower) do
|
105
|
+
idx = lower + (upper - lower) / 2
|
106
|
+
comp = pos <=> index(idx)
|
107
|
+
|
108
|
+
if comp == 0
|
109
|
+
break
|
110
|
+
elsif comp > 0
|
111
|
+
lower = idx + 1
|
112
|
+
else
|
113
|
+
upper = idx - 1
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
if index(idx) > pos
|
118
|
+
idx = idx - 1
|
119
|
+
end
|
120
|
+
|
121
|
+
idx
|
122
|
+
end
|
123
|
+
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
#{{{ Persistence
|
128
|
+
|
129
|
+
CACHEDIR="/tmp/bed_persistent_cache"
|
130
|
+
FileUtils.mkdir CACHEDIR unless File.exist? CACHEDIR
|
131
|
+
|
132
|
+
def self.cachedir=(cachedir)
|
133
|
+
CACHEDIR.replace cachedir
|
134
|
+
FileUtils.mkdir_p CACHEDIR unless File.exist? CACHEDIR
|
135
|
+
end
|
136
|
+
|
137
|
+
def self.cachedir
|
138
|
+
CACHEDIR
|
139
|
+
end
|
140
|
+
|
141
|
+
def self.get_persistence_file(file, prefix, options = {})
|
142
|
+
File.join(CACHEDIR, prefix.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
attr_accessor :index, :range, :persistence_file
|
147
|
+
def initialize(tsv, options = {})
|
148
|
+
options = Misc.add_defaults options, :range => nil, :key => 0, :value => 1, :persistence => false, :persistence_file => nil, :tsv => {}
|
149
|
+
|
150
|
+
options[:persistence] = true if options[:persistence].nil? and options[:persistence_file]
|
151
|
+
|
152
|
+
filename = nil
|
153
|
+
case
|
154
|
+
when TSV === tsv
|
155
|
+
filename = tsv.filename
|
156
|
+
when (String === tsv and File.exists? tsv.sub(/#.*/,''))
|
157
|
+
filename = tsv
|
158
|
+
else
|
159
|
+
filename = "None"
|
160
|
+
end
|
161
|
+
|
162
|
+
|
163
|
+
if options[:range]
|
164
|
+
options[:key] = options[:range].first
|
165
|
+
options[:value] = [options[:value], options[:range].last]
|
166
|
+
@range = true
|
167
|
+
else
|
168
|
+
@range = false
|
169
|
+
end
|
170
|
+
|
171
|
+
if options[:persistence] and options[:persistence_file].nil?
|
172
|
+
options[:persistence_file] = Bed.get_persistence_file(filename, (options[:range].nil? ? "Point:#{filename}" : "Range:#{filename}"), options)
|
173
|
+
end
|
174
|
+
|
175
|
+
@persistence_file = options[:persistence_file]
|
176
|
+
|
177
|
+
if @persistence_file and File.exists?(@persistence_file)
|
178
|
+
Log.low("Loading Persistence Bed File: #{ @persistence_file }")
|
179
|
+
|
180
|
+
@index = FixWidthTable.new @persistence_file
|
181
|
+
@range = @index.range
|
182
|
+
return
|
183
|
+
end
|
184
|
+
|
185
|
+
tsv = TSV.new(tsv, options[:tsv]) unless TSV === tsv
|
186
|
+
|
187
|
+
@index = []
|
188
|
+
entry = nil
|
189
|
+
tsv.through options[:key], options[:value] do |key, values|
|
190
|
+
if @range
|
191
|
+
entry = Entry.new(values[0], key.to_i, values[1].to_i, nil)
|
192
|
+
else
|
193
|
+
entry = Entry.new(values[0], key.to_i, nil, nil)
|
194
|
+
end
|
195
|
+
@index << entry
|
196
|
+
end
|
197
|
+
|
198
|
+
@index.sort!{|a,b| a.start <=> b.start}
|
199
|
+
|
200
|
+
if range
|
201
|
+
latest = []
|
202
|
+
@index.each do |entry|
|
203
|
+
while latest.any? and latest[0] < entry.start
|
204
|
+
latest.shift
|
205
|
+
end
|
206
|
+
|
207
|
+
entry.overlap = latest.length
|
208
|
+
latest << entry.end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
if @persistence_file
|
213
|
+
record_size = FixWidthTable.get_record_size(@index)
|
214
|
+
|
215
|
+
table = FixWidthTable.new @persistence_file, record_size, @range
|
216
|
+
@index.each do |entry| table.add entry, entry.start end
|
217
|
+
table.read
|
218
|
+
|
219
|
+
@index = table
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def closest(pos)
|
224
|
+
upper = @index.size - 1
|
225
|
+
lower = 0
|
226
|
+
|
227
|
+
return -1 if upper < lower
|
228
|
+
|
229
|
+
while(upper >= lower) do
|
230
|
+
idx = lower + (upper - lower) / 2
|
231
|
+
comp = pos <=> @index[idx].start
|
232
|
+
|
233
|
+
if comp == 0
|
234
|
+
break
|
235
|
+
elsif comp > 0
|
236
|
+
lower = idx + 1
|
237
|
+
else
|
238
|
+
upper = idx - 1
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
if @index[idx].start > pos
|
243
|
+
idx = idx - 1
|
244
|
+
end
|
245
|
+
|
246
|
+
idx
|
247
|
+
end
|
248
|
+
|
249
|
+
def get_range(pos)
|
250
|
+
if Range === pos
|
251
|
+
r_start = pos.begin
|
252
|
+
r_end = pos.end
|
253
|
+
else
|
254
|
+
r_start = pos.to_i
|
255
|
+
r_end = pos.to_i
|
256
|
+
end
|
257
|
+
|
258
|
+
if FixWidthTable === @index
|
259
|
+
idx = @index.closest(r_start)
|
260
|
+
else
|
261
|
+
idx = closest(r_start)
|
262
|
+
end
|
263
|
+
|
264
|
+
return [] if idx >= @index.size
|
265
|
+
return [] if idx <0 and r_start == r_end
|
266
|
+
|
267
|
+
idx = 0 if idx < 0
|
268
|
+
|
269
|
+
idx -= @index[idx].overlap if @index[idx].overlap
|
270
|
+
|
271
|
+
values = []
|
272
|
+
l = @index[idx]
|
273
|
+
while l.start <= r_end
|
274
|
+
values << l.value if l.end >= r_start
|
275
|
+
idx += 1
|
276
|
+
break if idx >= @index.size
|
277
|
+
l = @index[idx]
|
278
|
+
end
|
279
|
+
|
280
|
+
values
|
281
|
+
end
|
282
|
+
|
283
|
+
def get_point(pos)
|
284
|
+
if Range === pos
|
285
|
+
r_start = pos.begin
|
286
|
+
r_end = pos.end
|
287
|
+
else
|
288
|
+
r_start = pos.to_i
|
289
|
+
r_end = pos.to_i
|
290
|
+
end
|
291
|
+
|
292
|
+
if FixWidthTable === @index
|
293
|
+
idx = @index.closest(r_start)
|
294
|
+
else
|
295
|
+
idx = closest(r_start)
|
296
|
+
end
|
297
|
+
|
298
|
+
return [] if idx >= @index.size
|
299
|
+
return [] if idx <0 and r_start == r_end
|
300
|
+
|
301
|
+
idx = 0 if idx < 0
|
302
|
+
|
303
|
+
idx += 1 unless @index[idx].start >= r_start
|
304
|
+
|
305
|
+
values = []
|
306
|
+
l = @index[idx]
|
307
|
+
while l.start <= r_end
|
308
|
+
values << l.value
|
309
|
+
idx += 1
|
310
|
+
break if idx >= @index.size
|
311
|
+
l = @index[idx]
|
312
|
+
end
|
313
|
+
|
314
|
+
values
|
315
|
+
end
|
316
|
+
|
317
|
+
def [](pos)
|
318
|
+
if range
|
319
|
+
get_range(pos)
|
320
|
+
else
|
321
|
+
get_point(pos)
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
end
|
data/lib/rbbt/util/cmd.rb
CHANGED
@@ -3,7 +3,7 @@ require 'rbbt/util/log'
|
|
3
3
|
require 'stringio'
|
4
4
|
|
5
5
|
module CMD
|
6
|
-
class CMDError < RBBTError;end
|
6
|
+
class CMDError < RBBTError; end
|
7
7
|
|
8
8
|
module SmartIO
|
9
9
|
def self.tie(io, pid = nil, cmd = "", post = nil)
|
@@ -14,6 +14,7 @@ module CMD
|
|
14
14
|
alias original_close close
|
15
15
|
def close
|
16
16
|
begin
|
17
|
+
self.original_read unless self.closed? or self.eof?
|
17
18
|
Process.waitpid(@pid) if @pid
|
18
19
|
rescue
|
19
20
|
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
module DataModule
|
2
2
|
|
3
|
+
attr_accessor :sharedir, :rakefile, :pkg_module
|
3
4
|
def self.extended(base)
|
4
5
|
if defined? base::PKG and base::PKG
|
5
6
|
base.pkg_module = base::PKG
|
@@ -8,41 +9,14 @@ module DataModule
|
|
8
9
|
end
|
9
10
|
|
10
11
|
base.sharedir = PKGData.get_caller_sharedir
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
@pkg_module = pkg_module
|
19
|
-
end
|
20
|
-
|
21
|
-
def sharedir
|
22
|
-
@sharedir
|
23
|
-
end
|
24
|
-
|
25
|
-
def sharedir=(sharedir)
|
26
|
-
@sharedir = sharedir
|
27
|
-
end
|
28
|
-
|
29
|
-
alias old_method_missing method_missing
|
30
|
-
def method_missing(name, *args, &block)
|
31
|
-
if args.any?
|
32
|
-
filename = File.join(self.to_s, args.first, name.to_s)
|
33
|
-
else
|
34
|
-
filename = File.join(self.to_s, name.to_s)
|
35
|
-
end
|
36
|
-
|
37
|
-
begin
|
38
|
-
pkg_module.add_datafiles filename => ['', self.to_s, sharedir]
|
39
|
-
rescue
|
40
|
-
Log.debug $!.message
|
41
|
-
Log.debug $!.backtrace * "\n"
|
42
|
-
old_method_missing name, *args, &block
|
12
|
+
|
13
|
+
Dir.glob(File.join(base.sharedir, 'install', base.to_s, '**','Rakefile')).each do |rakefile|
|
14
|
+
RakeHelper.files(rakefile).each do |file|
|
15
|
+
base.pkg_module.claim file,
|
16
|
+
rakefile.sub(/^#{Regexp.quote File.join(base.sharedir)}\/?/,''),
|
17
|
+
File.dirname(rakefile).sub(/^#{Regexp.quote File.join(base.sharedir, 'install')}\/?/,'')
|
18
|
+
end
|
43
19
|
end
|
44
|
-
|
45
|
-
pkg_module.find_datafile filename
|
46
20
|
end
|
47
21
|
|
48
22
|
module WithKey
|
@@ -79,4 +53,21 @@ module DataModule
|
|
79
53
|
o.key = key
|
80
54
|
o
|
81
55
|
end
|
56
|
+
|
57
|
+
alias old_method_missing method_missing
|
58
|
+
def method_missing(name, *args, &block)
|
59
|
+
begin
|
60
|
+
if args.any?
|
61
|
+
pkg_module.files[self.to_s][args.first][name]
|
62
|
+
else
|
63
|
+
pkg_module.files[self.to_s][name]
|
64
|
+
end
|
65
|
+
rescue
|
66
|
+
Log.debug $!.message
|
67
|
+
Log.debug $!.backtrace * "\n"
|
68
|
+
old_method_missing name, *args, &block
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
|
82
73
|
end
|