rbbt-util 3.2.0 → 3.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/run_workflow.rb +118 -0
- data/lib/rbbt-util.rb +0 -3
- data/lib/rbbt/util/fix_width_table.rb +7 -0
- data/lib/rbbt/util/misc.rb +20 -4
- data/lib/rbbt/util/persistence.rb +51 -28
- data/lib/rbbt/util/resource.rb +1 -0
- data/lib/rbbt/util/task.rb +11 -0
- data/lib/rbbt/util/task/job.rb +3 -1
- data/lib/rbbt/util/tc_hash.rb +89 -20
- data/lib/rbbt/util/tsv.rb +23 -8
- data/lib/rbbt/util/tsv/accessor.rb +8 -5
- data/lib/rbbt/util/tsv/filters.rb +68 -20
- data/lib/rbbt/util/tsv/index.rb +115 -92
- data/lib/rbbt/util/tsv/manipulate.rb +10 -13
- data/lib/rbbt/util/tsv/parse.rb +1 -1
- data/lib/rbbt/util/tsv/resource.rb +8 -0
- data/lib/rbbt/util/workflow.rb +26 -8
- data/share/lib/R/util.R +71 -1
- data/test/rbbt/util/test_misc.rb +17 -0
- data/test/rbbt/util/test_persistence.rb +32 -0
- data/test/rbbt/util/test_resource.rb +1 -0
- data/test/rbbt/util/test_task.rb +14 -0
- data/test/rbbt/util/test_tc_hash.rb +16 -0
- data/test/rbbt/util/tsv/test_accessor.rb +0 -1
- data/test/rbbt/util/tsv/test_filters.rb +26 -0
- data/test/rbbt/util/tsv/test_index.rb +78 -26
- data/test/rbbt/util/tsv/test_manipulate.rb +1 -1
- metadata +10 -14
- data/bin/workflow.rb +0 -24
- data/lib/rbbt/util/bed.rb +0 -325
- data/lib/rbbt/util/cachehelper.rb +0 -100
- data/test/rbbt/util/test_bed.rb +0 -136
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 13
|
5
|
+
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 3
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 3.2.
|
9
|
+
- 1
|
10
|
+
version: 3.2.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-07-05 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -107,7 +107,7 @@ email: miguel.vazquez@fdi.ucm.es
|
|
107
107
|
executables:
|
108
108
|
- tsv.rb
|
109
109
|
- tchash.rb
|
110
|
-
-
|
110
|
+
- run_workflow.rb
|
111
111
|
- rbbt_query.rb
|
112
112
|
- rbbt_exec.rb
|
113
113
|
- rbbt_Rutil.rb
|
@@ -120,8 +120,6 @@ files:
|
|
120
120
|
- lib/rbbt-util.rb
|
121
121
|
- lib/rbbt.rb
|
122
122
|
- lib/rbbt/util/R.rb
|
123
|
-
- lib/rbbt/util/bed.rb
|
124
|
-
- lib/rbbt/util/cachehelper.rb
|
125
123
|
- lib/rbbt/util/cmd.rb
|
126
124
|
- lib/rbbt/util/excel2tsv.rb
|
127
125
|
- lib/rbbt/util/filecache.rb
|
@@ -165,9 +163,8 @@ files:
|
|
165
163
|
- test/rbbt/util/test_excel2tsv.rb
|
166
164
|
- test/rbbt/util/test_simpleopt.rb
|
167
165
|
- test/rbbt/util/test_task.rb
|
168
|
-
- test/rbbt/util/test_bed.rb
|
169
|
-
- test/rbbt/util/test_rake.rb
|
170
166
|
- test/rbbt/util/test_persistence.rb
|
167
|
+
- test/rbbt/util/test_rake.rb
|
171
168
|
- test/rbbt/util/test_workflow.rb
|
172
169
|
- test/rbbt/util/tsv/test_parse.rb
|
173
170
|
- test/rbbt/util/tsv/test_accessor.rb
|
@@ -180,7 +177,7 @@ files:
|
|
180
177
|
- test/test_rbbt.rb
|
181
178
|
- bin/tsv.rb
|
182
179
|
- bin/tchash.rb
|
183
|
-
- bin/
|
180
|
+
- bin/run_workflow.rb
|
184
181
|
- bin/rbbt_query.rb
|
185
182
|
- bin/rbbt_exec.rb
|
186
183
|
- bin/rbbt_Rutil.rb
|
@@ -214,7 +211,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
214
211
|
requirements: []
|
215
212
|
|
216
213
|
rubyforge_project:
|
217
|
-
rubygems_version: 1.
|
214
|
+
rubygems_version: 1.3.7
|
218
215
|
signing_key:
|
219
216
|
specification_version: 3
|
220
217
|
summary: Utilities for the Ruby Bioinformatics Toolkit (rbbt)
|
@@ -233,9 +230,8 @@ test_files:
|
|
233
230
|
- test/rbbt/util/test_excel2tsv.rb
|
234
231
|
- test/rbbt/util/test_simpleopt.rb
|
235
232
|
- test/rbbt/util/test_task.rb
|
236
|
-
- test/rbbt/util/test_bed.rb
|
237
|
-
- test/rbbt/util/test_rake.rb
|
238
233
|
- test/rbbt/util/test_persistence.rb
|
234
|
+
- test/rbbt/util/test_rake.rb
|
239
235
|
- test/rbbt/util/test_workflow.rb
|
240
236
|
- test/rbbt/util/tsv/test_parse.rb
|
241
237
|
- test/rbbt/util/tsv/test_accessor.rb
|
data/bin/workflow.rb
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'rbbt-util'
|
4
|
-
require 'rbbt/util/simpleopt'
|
5
|
-
require 'rbbt/util/workflow'
|
6
|
-
|
7
|
-
options = SOPT.get "-h--help:-t--target*:-n--name*"
|
8
|
-
|
9
|
-
raise "No target" unless options[:target]
|
10
|
-
|
11
|
-
file = ARGV.shift
|
12
|
-
|
13
|
-
if ARGV.empty?
|
14
|
-
data = STDIN.read
|
15
|
-
else
|
16
|
-
data = ARGV.shift
|
17
|
-
end
|
18
|
-
|
19
|
-
job = options[:name] || "Job"
|
20
|
-
|
21
|
-
puts "Runing WorkFlow in #{file} for target #{options[:target]}. Job: #{job}"
|
22
|
-
WorkFlow.load file, File.join(options[:target], job), data
|
23
|
-
puts
|
24
|
-
puts "WorkFlow done. Please find results in: #{File.join(options[:target], job)}"
|
data/lib/rbbt/util/bed.rb
DELETED
@@ -1,325 +0,0 @@
|
|
1
|
-
require 'rbbt/util/tsv'
|
2
|
-
require 'rbbt/util/misc'
|
3
|
-
require 'rbbt/util/log'
|
4
|
-
require 'yaml'
|
5
|
-
|
6
|
-
class Bed
|
7
|
-
|
8
|
-
class Entry < Struct.new( :value, :start, :end, :overlap); end
|
9
|
-
|
10
|
-
class FixWidthTable
|
11
|
-
SERIALIZER = Marshal
|
12
|
-
def self.serialize(entry)
|
13
|
-
SERIALIZER.dump(entry)
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.deserialise(entry)
|
17
|
-
SERIALIZER.load(entry)
|
18
|
-
end
|
19
|
-
|
20
|
-
def self.format(entry, index, record_size)
|
21
|
-
data = serialize(entry)
|
22
|
-
padding = record_size - data.length
|
23
|
-
[index, data + "\0" * padding].pack("la#{record_size}")
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.index(format)
|
27
|
-
format.unpack("s").first
|
28
|
-
end
|
29
|
-
|
30
|
-
def self.unformat(format, record_size)
|
31
|
-
index, data = format.unpack("la#{record_size}")
|
32
|
-
deserialise(data)
|
33
|
-
end
|
34
|
-
|
35
|
-
def self.get_record_size(entries)
|
36
|
-
max = 0
|
37
|
-
entries.each do |entry|
|
38
|
-
size = serialize(entry).length
|
39
|
-
max = size if size > max
|
40
|
-
end
|
41
|
-
|
42
|
-
max
|
43
|
-
end
|
44
|
-
|
45
|
-
attr_accessor :size, :range
|
46
|
-
def initialize(file, record_size = nil, range = false, rewrite = false)
|
47
|
-
@filename = file
|
48
|
-
@range = range
|
49
|
-
|
50
|
-
if rewrite or not File.exists? file
|
51
|
-
Log.debug("Opening FixWidthTable in #{ file } writing. Record size: #{record_size}")
|
52
|
-
@file = File.open(@filename, 'wb')
|
53
|
-
@record_size = record_size
|
54
|
-
@file.write [record_size].pack("L")
|
55
|
-
@file.write [@range ? 1 : 0 ].pack("C")
|
56
|
-
@size = 0
|
57
|
-
else
|
58
|
-
Log.debug("Opening FixWidthTable in #{ file } for reading")
|
59
|
-
@file = File.open(@filename, 'rb')
|
60
|
-
@record_size = @file.read(4).unpack("L").first
|
61
|
-
@range = @file.read(1).unpack("C").first == 1
|
62
|
-
@size = (File.size(@filename) - 5) / (@record_size + 4)
|
63
|
-
Log.debug("Record size #{@record_size}")
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
def length
|
68
|
-
@size / @record_size
|
69
|
-
end
|
70
|
-
|
71
|
-
def read
|
72
|
-
@file.close
|
73
|
-
@file = File.open(@filename, 'rb')
|
74
|
-
end
|
75
|
-
|
76
|
-
def add(entry, index)
|
77
|
-
@size += 1
|
78
|
-
format = FixWidthTable.format(entry, index, @record_size)
|
79
|
-
@file.write format
|
80
|
-
end
|
81
|
-
|
82
|
-
def index(index)
|
83
|
-
return nil if index < 0 or index >= size
|
84
|
-
@file.seek(5 + (@record_size + 4) * index, IO::SEEK_SET)
|
85
|
-
|
86
|
-
format = @file.read(@record_size + 4)
|
87
|
-
FixWidthTable.index(format)
|
88
|
-
end
|
89
|
-
|
90
|
-
def [](index)
|
91
|
-
return nil if index < 0 or index >= size
|
92
|
-
@file.seek(5 + (@record_size + 4) * index, IO::SEEK_SET)
|
93
|
-
|
94
|
-
format = @file.read(@record_size + 4)
|
95
|
-
FixWidthTable.unformat(format, @record_size)
|
96
|
-
end
|
97
|
-
|
98
|
-
def closest(pos)
|
99
|
-
upper = size - 1
|
100
|
-
lower = 0
|
101
|
-
|
102
|
-
return -1 if upper < lower
|
103
|
-
|
104
|
-
while(upper >= lower) do
|
105
|
-
idx = lower + (upper - lower) / 2
|
106
|
-
comp = pos <=> index(idx)
|
107
|
-
|
108
|
-
if comp == 0
|
109
|
-
break
|
110
|
-
elsif comp > 0
|
111
|
-
lower = idx + 1
|
112
|
-
else
|
113
|
-
upper = idx - 1
|
114
|
-
end
|
115
|
-
end
|
116
|
-
|
117
|
-
if index(idx) > pos
|
118
|
-
idx = idx - 1
|
119
|
-
end
|
120
|
-
|
121
|
-
idx
|
122
|
-
end
|
123
|
-
|
124
|
-
|
125
|
-
end
|
126
|
-
|
127
|
-
#{{{ Persistence
|
128
|
-
|
129
|
-
CACHEDIR="/tmp/bed_persistent_cache"
|
130
|
-
FileUtils.mkdir CACHEDIR unless File.exist? CACHEDIR
|
131
|
-
|
132
|
-
def self.cachedir=(cachedir)
|
133
|
-
CACHEDIR.replace cachedir
|
134
|
-
FileUtils.mkdir_p CACHEDIR unless File.exist? CACHEDIR
|
135
|
-
end
|
136
|
-
|
137
|
-
def self.cachedir
|
138
|
-
CACHEDIR
|
139
|
-
end
|
140
|
-
|
141
|
-
def self.get_persistence_file(file, prefix, options = {})
|
142
|
-
File.join(CACHEDIR, prefix.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
|
143
|
-
end
|
144
|
-
|
145
|
-
|
146
|
-
attr_accessor :index, :range, :persistence_file
|
147
|
-
def initialize(tsv, options = {})
|
148
|
-
options = Misc.add_defaults options, :range => nil, :key => 0, :value => 1, :persistence => false, :persistence_file => nil, :tsv => {}
|
149
|
-
|
150
|
-
options[:persistence] = true if options[:persistence].nil? and options[:persistence_file]
|
151
|
-
|
152
|
-
filename = nil
|
153
|
-
case
|
154
|
-
when TSV === tsv
|
155
|
-
filename = tsv.filename
|
156
|
-
when (String === tsv and File.exists? tsv.sub(/#.*/,''))
|
157
|
-
filename = tsv
|
158
|
-
else
|
159
|
-
filename = "None"
|
160
|
-
end
|
161
|
-
|
162
|
-
|
163
|
-
if options[:range]
|
164
|
-
options[:key] = options[:range].first
|
165
|
-
options[:value] = [options[:value], options[:range].last]
|
166
|
-
@range = true
|
167
|
-
else
|
168
|
-
@range = false
|
169
|
-
end
|
170
|
-
|
171
|
-
if options[:persistence] and options[:persistence_file].nil?
|
172
|
-
options[:persistence_file] = Bed.get_persistence_file(filename, (options[:range].nil? ? "Point:#{filename}" : "Range:#{filename}"), options)
|
173
|
-
end
|
174
|
-
|
175
|
-
@persistence_file = options[:persistence_file]
|
176
|
-
|
177
|
-
if @persistence_file and File.exists?(@persistence_file)
|
178
|
-
Log.low("Loading Persistence Bed File: #{ @persistence_file }")
|
179
|
-
|
180
|
-
@index = FixWidthTable.new @persistence_file
|
181
|
-
@range = @index.range
|
182
|
-
return
|
183
|
-
end
|
184
|
-
|
185
|
-
tsv = TSV.new(tsv, options[:tsv]) unless TSV === tsv
|
186
|
-
|
187
|
-
@index = []
|
188
|
-
entry = nil
|
189
|
-
tsv.through options[:key], options[:value] do |key, values|
|
190
|
-
if @range
|
191
|
-
entry = Entry.new(values[0], key.to_i, values[1].to_i, nil)
|
192
|
-
else
|
193
|
-
entry = Entry.new(values[0], key.to_i, nil, nil)
|
194
|
-
end
|
195
|
-
@index << entry
|
196
|
-
end
|
197
|
-
|
198
|
-
@index.sort!{|a,b| a.start <=> b.start}
|
199
|
-
|
200
|
-
if range
|
201
|
-
latest = []
|
202
|
-
@index.each do |entry|
|
203
|
-
while latest.any? and latest[0] < entry.start
|
204
|
-
latest.shift
|
205
|
-
end
|
206
|
-
|
207
|
-
entry.overlap = latest.length
|
208
|
-
latest << entry.end
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
|
-
if @persistence_file
|
213
|
-
record_size = FixWidthTable.get_record_size(@index)
|
214
|
-
|
215
|
-
table = FixWidthTable.new @persistence_file, record_size, @range
|
216
|
-
@index.each do |entry| table.add entry, entry.start end
|
217
|
-
table.read
|
218
|
-
|
219
|
-
@index = table
|
220
|
-
end
|
221
|
-
end
|
222
|
-
|
223
|
-
def closest(pos)
|
224
|
-
upper = @index.size - 1
|
225
|
-
lower = 0
|
226
|
-
|
227
|
-
return -1 if upper < lower
|
228
|
-
|
229
|
-
while(upper >= lower) do
|
230
|
-
idx = lower + (upper - lower) / 2
|
231
|
-
comp = pos <=> @index[idx].start
|
232
|
-
|
233
|
-
if comp == 0
|
234
|
-
break
|
235
|
-
elsif comp > 0
|
236
|
-
lower = idx + 1
|
237
|
-
else
|
238
|
-
upper = idx - 1
|
239
|
-
end
|
240
|
-
end
|
241
|
-
|
242
|
-
if @index[idx].start > pos
|
243
|
-
idx = idx - 1
|
244
|
-
end
|
245
|
-
|
246
|
-
idx
|
247
|
-
end
|
248
|
-
|
249
|
-
def get_range(pos)
|
250
|
-
if Range === pos
|
251
|
-
r_start = pos.begin
|
252
|
-
r_end = pos.end
|
253
|
-
else
|
254
|
-
r_start = pos.to_i
|
255
|
-
r_end = pos.to_i
|
256
|
-
end
|
257
|
-
|
258
|
-
if FixWidthTable === @index
|
259
|
-
idx = @index.closest(r_start)
|
260
|
-
else
|
261
|
-
idx = closest(r_start)
|
262
|
-
end
|
263
|
-
|
264
|
-
return [] if idx >= @index.size
|
265
|
-
return [] if idx <0 and r_start == r_end
|
266
|
-
|
267
|
-
idx = 0 if idx < 0
|
268
|
-
|
269
|
-
idx -= @index[idx].overlap if @index[idx].overlap
|
270
|
-
|
271
|
-
values = []
|
272
|
-
l = @index[idx]
|
273
|
-
while l.start <= r_end
|
274
|
-
values << l.value if l.end >= r_start
|
275
|
-
idx += 1
|
276
|
-
break if idx >= @index.size
|
277
|
-
l = @index[idx]
|
278
|
-
end
|
279
|
-
|
280
|
-
values
|
281
|
-
end
|
282
|
-
|
283
|
-
def get_point(pos)
|
284
|
-
if Range === pos
|
285
|
-
r_start = pos.begin
|
286
|
-
r_end = pos.end
|
287
|
-
else
|
288
|
-
r_start = pos.to_i
|
289
|
-
r_end = pos.to_i
|
290
|
-
end
|
291
|
-
|
292
|
-
if FixWidthTable === @index
|
293
|
-
idx = @index.closest(r_start)
|
294
|
-
else
|
295
|
-
idx = closest(r_start)
|
296
|
-
end
|
297
|
-
|
298
|
-
return [] if idx >= @index.size
|
299
|
-
return [] if idx <0 and r_start == r_end
|
300
|
-
|
301
|
-
idx = 0 if idx < 0
|
302
|
-
|
303
|
-
idx += 1 unless @index[idx].start >= r_start
|
304
|
-
|
305
|
-
values = []
|
306
|
-
l = @index[idx]
|
307
|
-
while l.start <= r_end
|
308
|
-
values << l.value
|
309
|
-
idx += 1
|
310
|
-
break if idx >= @index.size
|
311
|
-
l = @index[idx]
|
312
|
-
end
|
313
|
-
|
314
|
-
values
|
315
|
-
end
|
316
|
-
|
317
|
-
def [](pos)
|
318
|
-
if range
|
319
|
-
get_range(pos)
|
320
|
-
else
|
321
|
-
get_point(pos)
|
322
|
-
end
|
323
|
-
end
|
324
|
-
|
325
|
-
end
|
@@ -1,100 +0,0 @@
|
|
1
|
-
require 'digest'
|
2
|
-
|
3
|
-
module CacheHelper
|
4
|
-
CACHE_DIR = '/tmp/cachehelper'
|
5
|
-
FileUtils.mkdir_p(CACHE_DIR) unless File.exist?(CACHE_DIR)
|
6
|
-
|
7
|
-
LOG_TIME = false
|
8
|
-
class CacheLocked < Exception; end
|
9
|
-
|
10
|
-
def self.time(id)
|
11
|
-
t = Time.now
|
12
|
-
data = block.call
|
13
|
-
STDERR.puts "#{ id } time: #{Time.now - t}"
|
14
|
-
data
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.cachedir=(dir)
|
18
|
-
@@cachedir=dir
|
19
|
-
FileUtils.mkdir_p(dir) unless File.exist?(dir)
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.cachedir
|
23
|
-
@@cachedir ||= CACHE_DIR
|
24
|
-
end
|
25
|
-
|
26
|
-
|
27
|
-
def self.reset
|
28
|
-
FileUtils.rm Dir.glob(cachedir + '*')
|
29
|
-
end
|
30
|
-
|
31
|
-
def self.reset_locks
|
32
|
-
FileUtils.rm Dir.glob(cachedir + '*.lock')
|
33
|
-
end
|
34
|
-
|
35
|
-
|
36
|
-
def self.build_filename(name, key)
|
37
|
-
File.join(cachedir, name + ": " + Digest::MD5.hexdigest(key.to_s))
|
38
|
-
end
|
39
|
-
|
40
|
-
def self.do(filename, block)
|
41
|
-
FileUtils.touch(filename + '.lock')
|
42
|
-
|
43
|
-
if LOG_TIME
|
44
|
-
data = time do
|
45
|
-
block.call
|
46
|
-
end
|
47
|
-
else
|
48
|
-
data = block.call
|
49
|
-
end
|
50
|
-
|
51
|
-
File.open(filename, 'w'){|f| f.write data}
|
52
|
-
FileUtils.rm(filename + '.lock')
|
53
|
-
return data
|
54
|
-
end
|
55
|
-
|
56
|
-
def self.clean(name)
|
57
|
-
FileUtils.rm Dir.glob(File.join(cachedir, "#{ name }*"))
|
58
|
-
end
|
59
|
-
|
60
|
-
def self.cache_ready?(name, key)
|
61
|
-
filename = CacheHelper.build_filename(name, key)
|
62
|
-
File.exist?(filename)
|
63
|
-
end
|
64
|
-
|
65
|
-
def self.cache(name, key = [], wait = nil, &block)
|
66
|
-
filename = CacheHelper.build_filename(name, key)
|
67
|
-
begin
|
68
|
-
case
|
69
|
-
when File.exist?(filename)
|
70
|
-
return File.open(filename){|f| f.read}
|
71
|
-
when File.exist?(filename + '.lock')
|
72
|
-
raise CacheLocked
|
73
|
-
else
|
74
|
-
if wait.nil?
|
75
|
-
CacheHelper.do(filename, block)
|
76
|
-
else
|
77
|
-
Thread.new{CacheHelper.do(filename, block)}
|
78
|
-
return wait
|
79
|
-
end
|
80
|
-
|
81
|
-
end
|
82
|
-
rescue CacheLocked
|
83
|
-
if wait.nil?
|
84
|
-
sleep 30
|
85
|
-
retry
|
86
|
-
else
|
87
|
-
return wait
|
88
|
-
end
|
89
|
-
rescue Exception
|
90
|
-
FileUtils.rm(filename + '.lock') if File.exist?(filename + '.lock')
|
91
|
-
raise $!
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
def self.marshal_cache(name, key = [])
|
96
|
-
Marshal::load( cache(name, key) do
|
97
|
-
Marshal::dump(yield)
|
98
|
-
end)
|
99
|
-
end
|
100
|
-
end
|