rbbt-util 3.2.0 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/run_workflow.rb +118 -0
- data/lib/rbbt-util.rb +0 -3
- data/lib/rbbt/util/fix_width_table.rb +7 -0
- data/lib/rbbt/util/misc.rb +20 -4
- data/lib/rbbt/util/persistence.rb +51 -28
- data/lib/rbbt/util/resource.rb +1 -0
- data/lib/rbbt/util/task.rb +11 -0
- data/lib/rbbt/util/task/job.rb +3 -1
- data/lib/rbbt/util/tc_hash.rb +89 -20
- data/lib/rbbt/util/tsv.rb +23 -8
- data/lib/rbbt/util/tsv/accessor.rb +8 -5
- data/lib/rbbt/util/tsv/filters.rb +68 -20
- data/lib/rbbt/util/tsv/index.rb +115 -92
- data/lib/rbbt/util/tsv/manipulate.rb +10 -13
- data/lib/rbbt/util/tsv/parse.rb +1 -1
- data/lib/rbbt/util/tsv/resource.rb +8 -0
- data/lib/rbbt/util/workflow.rb +26 -8
- data/share/lib/R/util.R +71 -1
- data/test/rbbt/util/test_misc.rb +17 -0
- data/test/rbbt/util/test_persistence.rb +32 -0
- data/test/rbbt/util/test_resource.rb +1 -0
- data/test/rbbt/util/test_task.rb +14 -0
- data/test/rbbt/util/test_tc_hash.rb +16 -0
- data/test/rbbt/util/tsv/test_accessor.rb +0 -1
- data/test/rbbt/util/tsv/test_filters.rb +26 -0
- data/test/rbbt/util/tsv/test_index.rb +78 -26
- data/test/rbbt/util/tsv/test_manipulate.rb +1 -1
- metadata +10 -14
- data/bin/workflow.rb +0 -24
- data/lib/rbbt/util/bed.rb +0 -325
- data/lib/rbbt/util/cachehelper.rb +0 -100
- data/test/rbbt/util/test_bed.rb +0 -136
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 13
|
5
|
+
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 3
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 3.2.
|
9
|
+
- 1
|
10
|
+
version: 3.2.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-07-05 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -107,7 +107,7 @@ email: miguel.vazquez@fdi.ucm.es
|
|
107
107
|
executables:
|
108
108
|
- tsv.rb
|
109
109
|
- tchash.rb
|
110
|
-
-
|
110
|
+
- run_workflow.rb
|
111
111
|
- rbbt_query.rb
|
112
112
|
- rbbt_exec.rb
|
113
113
|
- rbbt_Rutil.rb
|
@@ -120,8 +120,6 @@ files:
|
|
120
120
|
- lib/rbbt-util.rb
|
121
121
|
- lib/rbbt.rb
|
122
122
|
- lib/rbbt/util/R.rb
|
123
|
-
- lib/rbbt/util/bed.rb
|
124
|
-
- lib/rbbt/util/cachehelper.rb
|
125
123
|
- lib/rbbt/util/cmd.rb
|
126
124
|
- lib/rbbt/util/excel2tsv.rb
|
127
125
|
- lib/rbbt/util/filecache.rb
|
@@ -165,9 +163,8 @@ files:
|
|
165
163
|
- test/rbbt/util/test_excel2tsv.rb
|
166
164
|
- test/rbbt/util/test_simpleopt.rb
|
167
165
|
- test/rbbt/util/test_task.rb
|
168
|
-
- test/rbbt/util/test_bed.rb
|
169
|
-
- test/rbbt/util/test_rake.rb
|
170
166
|
- test/rbbt/util/test_persistence.rb
|
167
|
+
- test/rbbt/util/test_rake.rb
|
171
168
|
- test/rbbt/util/test_workflow.rb
|
172
169
|
- test/rbbt/util/tsv/test_parse.rb
|
173
170
|
- test/rbbt/util/tsv/test_accessor.rb
|
@@ -180,7 +177,7 @@ files:
|
|
180
177
|
- test/test_rbbt.rb
|
181
178
|
- bin/tsv.rb
|
182
179
|
- bin/tchash.rb
|
183
|
-
- bin/
|
180
|
+
- bin/run_workflow.rb
|
184
181
|
- bin/rbbt_query.rb
|
185
182
|
- bin/rbbt_exec.rb
|
186
183
|
- bin/rbbt_Rutil.rb
|
@@ -214,7 +211,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
214
211
|
requirements: []
|
215
212
|
|
216
213
|
rubyforge_project:
|
217
|
-
rubygems_version: 1.
|
214
|
+
rubygems_version: 1.3.7
|
218
215
|
signing_key:
|
219
216
|
specification_version: 3
|
220
217
|
summary: Utilities for the Ruby Bioinformatics Toolkit (rbbt)
|
@@ -233,9 +230,8 @@ test_files:
|
|
233
230
|
- test/rbbt/util/test_excel2tsv.rb
|
234
231
|
- test/rbbt/util/test_simpleopt.rb
|
235
232
|
- test/rbbt/util/test_task.rb
|
236
|
-
- test/rbbt/util/test_bed.rb
|
237
|
-
- test/rbbt/util/test_rake.rb
|
238
233
|
- test/rbbt/util/test_persistence.rb
|
234
|
+
- test/rbbt/util/test_rake.rb
|
239
235
|
- test/rbbt/util/test_workflow.rb
|
240
236
|
- test/rbbt/util/tsv/test_parse.rb
|
241
237
|
- test/rbbt/util/tsv/test_accessor.rb
|
data/bin/workflow.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'rbbt-util'
|
4
|
-
require 'rbbt/util/simpleopt'
|
5
|
-
require 'rbbt/util/workflow'
|
6
|
-
|
7
|
-
options = SOPT.get "-h--help:-t--target*:-n--name*"
|
8
|
-
|
9
|
-
raise "No target" unless options[:target]
|
10
|
-
|
11
|
-
file = ARGV.shift
|
12
|
-
|
13
|
-
if ARGV.empty?
|
14
|
-
data = STDIN.read
|
15
|
-
else
|
16
|
-
data = ARGV.shift
|
17
|
-
end
|
18
|
-
|
19
|
-
job = options[:name] || "Job"
|
20
|
-
|
21
|
-
puts "Runing WorkFlow in #{file} for target #{options[:target]}. Job: #{job}"
|
22
|
-
WorkFlow.load file, File.join(options[:target], job), data
|
23
|
-
puts
|
24
|
-
puts "WorkFlow done. Please find results in: #{File.join(options[:target], job)}"
|
data/lib/rbbt/util/bed.rb
DELETED
@@ -1,325 +0,0 @@
|
|
1
|
-
require 'rbbt/util/tsv'
|
2
|
-
require 'rbbt/util/misc'
|
3
|
-
require 'rbbt/util/log'
|
4
|
-
require 'yaml'
|
5
|
-
|
6
|
-
class Bed
|
7
|
-
|
8
|
-
class Entry < Struct.new( :value, :start, :end, :overlap); end
|
9
|
-
|
10
|
-
class FixWidthTable
|
11
|
-
SERIALIZER = Marshal
|
12
|
-
def self.serialize(entry)
|
13
|
-
SERIALIZER.dump(entry)
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.deserialise(entry)
|
17
|
-
SERIALIZER.load(entry)
|
18
|
-
end
|
19
|
-
|
20
|
-
def self.format(entry, index, record_size)
|
21
|
-
data = serialize(entry)
|
22
|
-
padding = record_size - data.length
|
23
|
-
[index, data + "\0" * padding].pack("la#{record_size}")
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.index(format)
|
27
|
-
format.unpack("s").first
|
28
|
-
end
|
29
|
-
|
30
|
-
def self.unformat(format, record_size)
|
31
|
-
index, data = format.unpack("la#{record_size}")
|
32
|
-
deserialise(data)
|
33
|
-
end
|
34
|
-
|
35
|
-
def self.get_record_size(entries)
|
36
|
-
max = 0
|
37
|
-
entries.each do |entry|
|
38
|
-
size = serialize(entry).length
|
39
|
-
max = size if size > max
|
40
|
-
end
|
41
|
-
|
42
|
-
max
|
43
|
-
end
|
44
|
-
|
45
|
-
attr_accessor :size, :range
|
46
|
-
def initialize(file, record_size = nil, range = false, rewrite = false)
|
47
|
-
@filename = file
|
48
|
-
@range = range
|
49
|
-
|
50
|
-
if rewrite or not File.exists? file
|
51
|
-
Log.debug("Opening FixWidthTable in #{ file } writing. Record size: #{record_size}")
|
52
|
-
@file = File.open(@filename, 'wb')
|
53
|
-
@record_size = record_size
|
54
|
-
@file.write [record_size].pack("L")
|
55
|
-
@file.write [@range ? 1 : 0 ].pack("C")
|
56
|
-
@size = 0
|
57
|
-
else
|
58
|
-
Log.debug("Opening FixWidthTable in #{ file } for reading")
|
59
|
-
@file = File.open(@filename, 'rb')
|
60
|
-
@record_size = @file.read(4).unpack("L").first
|
61
|
-
@range = @file.read(1).unpack("C").first == 1
|
62
|
-
@size = (File.size(@filename) - 5) / (@record_size + 4)
|
63
|
-
Log.debug("Record size #{@record_size}")
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
def length
|
68
|
-
@size / @record_size
|
69
|
-
end
|
70
|
-
|
71
|
-
def read
|
72
|
-
@file.close
|
73
|
-
@file = File.open(@filename, 'rb')
|
74
|
-
end
|
75
|
-
|
76
|
-
def add(entry, index)
|
77
|
-
@size += 1
|
78
|
-
format = FixWidthTable.format(entry, index, @record_size)
|
79
|
-
@file.write format
|
80
|
-
end
|
81
|
-
|
82
|
-
def index(index)
|
83
|
-
return nil if index < 0 or index >= size
|
84
|
-
@file.seek(5 + (@record_size + 4) * index, IO::SEEK_SET)
|
85
|
-
|
86
|
-
format = @file.read(@record_size + 4)
|
87
|
-
FixWidthTable.index(format)
|
88
|
-
end
|
89
|
-
|
90
|
-
def [](index)
|
91
|
-
return nil if index < 0 or index >= size
|
92
|
-
@file.seek(5 + (@record_size + 4) * index, IO::SEEK_SET)
|
93
|
-
|
94
|
-
format = @file.read(@record_size + 4)
|
95
|
-
FixWidthTable.unformat(format, @record_size)
|
96
|
-
end
|
97
|
-
|
98
|
-
def closest(pos)
|
99
|
-
upper = size - 1
|
100
|
-
lower = 0
|
101
|
-
|
102
|
-
return -1 if upper < lower
|
103
|
-
|
104
|
-
while(upper >= lower) do
|
105
|
-
idx = lower + (upper - lower) / 2
|
106
|
-
comp = pos <=> index(idx)
|
107
|
-
|
108
|
-
if comp == 0
|
109
|
-
break
|
110
|
-
elsif comp > 0
|
111
|
-
lower = idx + 1
|
112
|
-
else
|
113
|
-
upper = idx - 1
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
if index(idx) > pos
|
118
|
-
idx = idx - 1
|
119
|
-
end
|
120
|
-
|
121
|
-
idx
|
122
|
-
end
|
123
|
-
|
124
|
-
|
125
|
-
end
|
126
|
-
|
127
|
-
#{{{ Persistence
|
128
|
-
|
129
|
-
CACHEDIR="/tmp/bed_persistent_cache"
|
130
|
-
FileUtils.mkdir CACHEDIR unless File.exist? CACHEDIR
|
131
|
-
|
132
|
-
def self.cachedir=(cachedir)
|
133
|
-
CACHEDIR.replace cachedir
|
134
|
-
FileUtils.mkdir_p CACHEDIR unless File.exist? CACHEDIR
|
135
|
-
end
|
136
|
-
|
137
|
-
def self.cachedir
|
138
|
-
CACHEDIR
|
139
|
-
end
|
140
|
-
|
141
|
-
def self.get_persistence_file(file, prefix, options = {})
|
142
|
-
File.join(CACHEDIR, prefix.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
|
143
|
-
end
|
144
|
-
|
145
|
-
|
146
|
-
attr_accessor :index, :range, :persistence_file
|
147
|
-
def initialize(tsv, options = {})
|
148
|
-
options = Misc.add_defaults options, :range => nil, :key => 0, :value => 1, :persistence => false, :persistence_file => nil, :tsv => {}
|
149
|
-
|
150
|
-
options[:persistence] = true if options[:persistence].nil? and options[:persistence_file]
|
151
|
-
|
152
|
-
filename = nil
|
153
|
-
case
|
154
|
-
when TSV === tsv
|
155
|
-
filename = tsv.filename
|
156
|
-
when (String === tsv and File.exists? tsv.sub(/#.*/,''))
|
157
|
-
filename = tsv
|
158
|
-
else
|
159
|
-
filename = "None"
|
160
|
-
end
|
161
|
-
|
162
|
-
|
163
|
-
if options[:range]
|
164
|
-
options[:key] = options[:range].first
|
165
|
-
options[:value] = [options[:value], options[:range].last]
|
166
|
-
@range = true
|
167
|
-
else
|
168
|
-
@range = false
|
169
|
-
end
|
170
|
-
|
171
|
-
if options[:persistence] and options[:persistence_file].nil?
|
172
|
-
options[:persistence_file] = Bed.get_persistence_file(filename, (options[:range].nil? ? "Point:#{filename}" : "Range:#{filename}"), options)
|
173
|
-
end
|
174
|
-
|
175
|
-
@persistence_file = options[:persistence_file]
|
176
|
-
|
177
|
-
if @persistence_file and File.exists?(@persistence_file)
|
178
|
-
Log.low("Loading Persistence Bed File: #{ @persistence_file }")
|
179
|
-
|
180
|
-
@index = FixWidthTable.new @persistence_file
|
181
|
-
@range = @index.range
|
182
|
-
return
|
183
|
-
end
|
184
|
-
|
185
|
-
tsv = TSV.new(tsv, options[:tsv]) unless TSV === tsv
|
186
|
-
|
187
|
-
@index = []
|
188
|
-
entry = nil
|
189
|
-
tsv.through options[:key], options[:value] do |key, values|
|
190
|
-
if @range
|
191
|
-
entry = Entry.new(values[0], key.to_i, values[1].to_i, nil)
|
192
|
-
else
|
193
|
-
entry = Entry.new(values[0], key.to_i, nil, nil)
|
194
|
-
end
|
195
|
-
@index << entry
|
196
|
-
end
|
197
|
-
|
198
|
-
@index.sort!{|a,b| a.start <=> b.start}
|
199
|
-
|
200
|
-
if range
|
201
|
-
latest = []
|
202
|
-
@index.each do |entry|
|
203
|
-
while latest.any? and latest[0] < entry.start
|
204
|
-
latest.shift
|
205
|
-
end
|
206
|
-
|
207
|
-
entry.overlap = latest.length
|
208
|
-
latest << entry.end
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
|
-
if @persistence_file
|
213
|
-
record_size = FixWidthTable.get_record_size(@index)
|
214
|
-
|
215
|
-
table = FixWidthTable.new @persistence_file, record_size, @range
|
216
|
-
@index.each do |entry| table.add entry, entry.start end
|
217
|
-
table.read
|
218
|
-
|
219
|
-
@index = table
|
220
|
-
end
|
221
|
-
end
|
222
|
-
|
223
|
-
def closest(pos)
|
224
|
-
upper = @index.size - 1
|
225
|
-
lower = 0
|
226
|
-
|
227
|
-
return -1 if upper < lower
|
228
|
-
|
229
|
-
while(upper >= lower) do
|
230
|
-
idx = lower + (upper - lower) / 2
|
231
|
-
comp = pos <=> @index[idx].start
|
232
|
-
|
233
|
-
if comp == 0
|
234
|
-
break
|
235
|
-
elsif comp > 0
|
236
|
-
lower = idx + 1
|
237
|
-
else
|
238
|
-
upper = idx - 1
|
239
|
-
end
|
240
|
-
end
|
241
|
-
|
242
|
-
if @index[idx].start > pos
|
243
|
-
idx = idx - 1
|
244
|
-
end
|
245
|
-
|
246
|
-
idx
|
247
|
-
end
|
248
|
-
|
249
|
-
def get_range(pos)
|
250
|
-
if Range === pos
|
251
|
-
r_start = pos.begin
|
252
|
-
r_end = pos.end
|
253
|
-
else
|
254
|
-
r_start = pos.to_i
|
255
|
-
r_end = pos.to_i
|
256
|
-
end
|
257
|
-
|
258
|
-
if FixWidthTable === @index
|
259
|
-
idx = @index.closest(r_start)
|
260
|
-
else
|
261
|
-
idx = closest(r_start)
|
262
|
-
end
|
263
|
-
|
264
|
-
return [] if idx >= @index.size
|
265
|
-
return [] if idx <0 and r_start == r_end
|
266
|
-
|
267
|
-
idx = 0 if idx < 0
|
268
|
-
|
269
|
-
idx -= @index[idx].overlap if @index[idx].overlap
|
270
|
-
|
271
|
-
values = []
|
272
|
-
l = @index[idx]
|
273
|
-
while l.start <= r_end
|
274
|
-
values << l.value if l.end >= r_start
|
275
|
-
idx += 1
|
276
|
-
break if idx >= @index.size
|
277
|
-
l = @index[idx]
|
278
|
-
end
|
279
|
-
|
280
|
-
values
|
281
|
-
end
|
282
|
-
|
283
|
-
def get_point(pos)
|
284
|
-
if Range === pos
|
285
|
-
r_start = pos.begin
|
286
|
-
r_end = pos.end
|
287
|
-
else
|
288
|
-
r_start = pos.to_i
|
289
|
-
r_end = pos.to_i
|
290
|
-
end
|
291
|
-
|
292
|
-
if FixWidthTable === @index
|
293
|
-
idx = @index.closest(r_start)
|
294
|
-
else
|
295
|
-
idx = closest(r_start)
|
296
|
-
end
|
297
|
-
|
298
|
-
return [] if idx >= @index.size
|
299
|
-
return [] if idx <0 and r_start == r_end
|
300
|
-
|
301
|
-
idx = 0 if idx < 0
|
302
|
-
|
303
|
-
idx += 1 unless @index[idx].start >= r_start
|
304
|
-
|
305
|
-
values = []
|
306
|
-
l = @index[idx]
|
307
|
-
while l.start <= r_end
|
308
|
-
values << l.value
|
309
|
-
idx += 1
|
310
|
-
break if idx >= @index.size
|
311
|
-
l = @index[idx]
|
312
|
-
end
|
313
|
-
|
314
|
-
values
|
315
|
-
end
|
316
|
-
|
317
|
-
def [](pos)
|
318
|
-
if range
|
319
|
-
get_range(pos)
|
320
|
-
else
|
321
|
-
get_point(pos)
|
322
|
-
end
|
323
|
-
end
|
324
|
-
|
325
|
-
end
|
@@ -1,100 +0,0 @@
|
|
1
|
-
require 'digest'
|
2
|
-
|
3
|
-
module CacheHelper
|
4
|
-
CACHE_DIR = '/tmp/cachehelper'
|
5
|
-
FileUtils.mkdir_p(CACHE_DIR) unless File.exist?(CACHE_DIR)
|
6
|
-
|
7
|
-
LOG_TIME = false
|
8
|
-
class CacheLocked < Exception; end
|
9
|
-
|
10
|
-
def self.time(id)
|
11
|
-
t = Time.now
|
12
|
-
data = block.call
|
13
|
-
STDERR.puts "#{ id } time: #{Time.now - t}"
|
14
|
-
data
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.cachedir=(dir)
|
18
|
-
@@cachedir=dir
|
19
|
-
FileUtils.mkdir_p(dir) unless File.exist?(dir)
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.cachedir
|
23
|
-
@@cachedir ||= CACHE_DIR
|
24
|
-
end
|
25
|
-
|
26
|
-
|
27
|
-
def self.reset
|
28
|
-
FileUtils.rm Dir.glob(cachedir + '*')
|
29
|
-
end
|
30
|
-
|
31
|
-
def self.reset_locks
|
32
|
-
FileUtils.rm Dir.glob(cachedir + '*.lock')
|
33
|
-
end
|
34
|
-
|
35
|
-
|
36
|
-
def self.build_filename(name, key)
|
37
|
-
File.join(cachedir, name + ": " + Digest::MD5.hexdigest(key.to_s))
|
38
|
-
end
|
39
|
-
|
40
|
-
def self.do(filename, block)
|
41
|
-
FileUtils.touch(filename + '.lock')
|
42
|
-
|
43
|
-
if LOG_TIME
|
44
|
-
data = time do
|
45
|
-
block.call
|
46
|
-
end
|
47
|
-
else
|
48
|
-
data = block.call
|
49
|
-
end
|
50
|
-
|
51
|
-
File.open(filename, 'w'){|f| f.write data}
|
52
|
-
FileUtils.rm(filename + '.lock')
|
53
|
-
return data
|
54
|
-
end
|
55
|
-
|
56
|
-
def self.clean(name)
|
57
|
-
FileUtils.rm Dir.glob(File.join(cachedir, "#{ name }*"))
|
58
|
-
end
|
59
|
-
|
60
|
-
def self.cache_ready?(name, key)
|
61
|
-
filename = CacheHelper.build_filename(name, key)
|
62
|
-
File.exist?(filename)
|
63
|
-
end
|
64
|
-
|
65
|
-
def self.cache(name, key = [], wait = nil, &block)
|
66
|
-
filename = CacheHelper.build_filename(name, key)
|
67
|
-
begin
|
68
|
-
case
|
69
|
-
when File.exist?(filename)
|
70
|
-
return File.open(filename){|f| f.read}
|
71
|
-
when File.exist?(filename + '.lock')
|
72
|
-
raise CacheLocked
|
73
|
-
else
|
74
|
-
if wait.nil?
|
75
|
-
CacheHelper.do(filename, block)
|
76
|
-
else
|
77
|
-
Thread.new{CacheHelper.do(filename, block)}
|
78
|
-
return wait
|
79
|
-
end
|
80
|
-
|
81
|
-
end
|
82
|
-
rescue CacheLocked
|
83
|
-
if wait.nil?
|
84
|
-
sleep 30
|
85
|
-
retry
|
86
|
-
else
|
87
|
-
return wait
|
88
|
-
end
|
89
|
-
rescue Exception
|
90
|
-
FileUtils.rm(filename + '.lock') if File.exist?(filename + '.lock')
|
91
|
-
raise $!
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
def self.marshal_cache(name, key = [])
|
96
|
-
Marshal::load( cache(name, key) do
|
97
|
-
Marshal::dump(yield)
|
98
|
-
end)
|
99
|
-
end
|
100
|
-
end
|