rbbt-util 3.2.0 → 3.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/run_workflow.rb +118 -0
- data/lib/rbbt-util.rb +0 -3
- data/lib/rbbt/util/fix_width_table.rb +7 -0
- data/lib/rbbt/util/misc.rb +20 -4
- data/lib/rbbt/util/persistence.rb +51 -28
- data/lib/rbbt/util/resource.rb +1 -0
- data/lib/rbbt/util/task.rb +11 -0
- data/lib/rbbt/util/task/job.rb +3 -1
- data/lib/rbbt/util/tc_hash.rb +89 -20
- data/lib/rbbt/util/tsv.rb +23 -8
- data/lib/rbbt/util/tsv/accessor.rb +8 -5
- data/lib/rbbt/util/tsv/filters.rb +68 -20
- data/lib/rbbt/util/tsv/index.rb +115 -92
- data/lib/rbbt/util/tsv/manipulate.rb +10 -13
- data/lib/rbbt/util/tsv/parse.rb +1 -1
- data/lib/rbbt/util/tsv/resource.rb +8 -0
- data/lib/rbbt/util/workflow.rb +26 -8
- data/share/lib/R/util.R +71 -1
- data/test/rbbt/util/test_misc.rb +17 -0
- data/test/rbbt/util/test_persistence.rb +32 -0
- data/test/rbbt/util/test_resource.rb +1 -0
- data/test/rbbt/util/test_task.rb +14 -0
- data/test/rbbt/util/test_tc_hash.rb +16 -0
- data/test/rbbt/util/tsv/test_accessor.rb +0 -1
- data/test/rbbt/util/tsv/test_filters.rb +26 -0
- data/test/rbbt/util/tsv/test_index.rb +78 -26
- data/test/rbbt/util/tsv/test_manipulate.rb +1 -1
- metadata +10 -14
- data/bin/workflow.rb +0 -24
- data/lib/rbbt/util/bed.rb +0 -325
- data/lib/rbbt/util/cachehelper.rb +0 -100
- data/test/rbbt/util/test_bed.rb +0 -136
@@ -119,22 +119,19 @@ class TSV
|
|
119
119
|
new_fields = options[:new_fields]
|
120
120
|
|
121
121
|
new = {}
|
122
|
-
new_key_field, new_fields = through new_key_field, new_fields do |
|
123
|
-
if Array ===
|
124
|
-
keys
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
new_values = keys.each do |key|
|
130
|
-
if new[key].nil?
|
131
|
-
new[key] = values
|
132
|
-
else
|
133
|
-
if type == :double
|
134
|
-
new[key] = new[key].zip(values).collect{|v| v.flatten}
|
122
|
+
new_key_field, new_fields = through new_key_field, new_fields do |keys, values|
|
123
|
+
if Array === keys
|
124
|
+
keys.each do |key|
|
125
|
+
if new[key].nil? or not type == :double
|
126
|
+
new[key] = values.collect{|l| l.dup}
|
127
|
+
else
|
128
|
+
new[key] = new[key].zip(values).collect{|old_list, new_list| old_list.concat new_list}
|
135
129
|
end
|
136
130
|
end
|
131
|
+
else
|
132
|
+
new[keys] = values
|
137
133
|
end
|
134
|
+
nil
|
138
135
|
end
|
139
136
|
|
140
137
|
new = TSV.new new
|
data/lib/rbbt/util/tsv/parse.rb
CHANGED
@@ -318,7 +318,7 @@ class TSV
|
|
318
318
|
|
319
319
|
fields = nil if Fixnum === fields or (Array === fields and fields.select{|f| Fixnum === f}.any?)
|
320
320
|
fields ||= other_fields
|
321
|
-
[data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :
|
321
|
+
[data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :identifiers => options[:identifiers], :cast => (cast.nil? ? false : cast)}]
|
322
322
|
end
|
323
323
|
|
324
324
|
end
|
@@ -26,6 +26,14 @@ module Resource
|
|
26
26
|
TSV.index self, options
|
27
27
|
end
|
28
28
|
|
29
|
+
def pos_index(pos_field, options = {})
|
30
|
+
TSV.pos_index self, pos_field, options
|
31
|
+
end
|
32
|
+
|
33
|
+
def range_index(start_field, end_field, options = {})
|
34
|
+
TSV.range_index self, start_field, end_field, options
|
35
|
+
end
|
36
|
+
|
29
37
|
def open(options = {})
|
30
38
|
produce
|
31
39
|
Open.open(self.find, options)
|
data/lib/rbbt/util/workflow.rb
CHANGED
@@ -5,12 +5,29 @@ require 'rbbt/util/misc'
|
|
5
5
|
|
6
6
|
module WorkFlow
|
7
7
|
|
8
|
-
def self.require_workflow(
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
def self.require_workflow(wf_name, wf_dir = nil)
|
9
|
+
|
10
|
+
if File.exists? wf_name
|
11
|
+
require "./workflow.rb"
|
12
|
+
return
|
13
13
|
end
|
14
|
+
|
15
|
+
wf_dir ||= case
|
16
|
+
when File.exists?(File.join(File.dirname(Resource.caller_lib_dir), wf_name))
|
17
|
+
File.join(File.dirname(Resource.caller_lib_dir), wf_name)
|
18
|
+
when defined? Rbbt
|
19
|
+
if Rbbt.pkg.etc.workflow_dir.exists?
|
20
|
+
File.join(pkg.etc.workflow_dir.read.strip, wf_name)
|
21
|
+
else
|
22
|
+
Rbbt.workflows[wf_name]
|
23
|
+
end
|
24
|
+
else
|
25
|
+
File.join(ENV["HOME"], '.workflows')
|
26
|
+
end
|
27
|
+
|
28
|
+
wf_dir = Resource::Path.path(wf_dir)
|
29
|
+
$LOAD_PATH.unshift(File.join(File.dirname(wf_dir["workflow.rb"].find), 'lib'))
|
30
|
+
require wf_dir["workflow.rb"].find
|
14
31
|
end
|
15
32
|
|
16
33
|
def self.extended(base)
|
@@ -74,9 +91,9 @@ module WorkFlow
|
|
74
91
|
def process_dangling
|
75
92
|
res = [
|
76
93
|
@dangling_options,
|
77
|
-
|
78
|
-
|
79
|
-
|
94
|
+
Misc.merge2hash(@dangling_options, @dangling_option_descriptions.values_at(*@dangling_options)),
|
95
|
+
Misc.merge2hash(@dangling_options, @dangling_option_types.values_at(*@dangling_options)),
|
96
|
+
Misc.merge2hash(@dangling_options, @dangling_option_defaults.values_at(*@dangling_options)),
|
80
97
|
(@dangling_dependencies || [@last_task]).compact,
|
81
98
|
@dangling_description,
|
82
99
|
]
|
@@ -103,6 +120,7 @@ module WorkFlow
|
|
103
120
|
end
|
104
121
|
|
105
122
|
def job(task, jobname = "Default", *args)
|
123
|
+
raise "Task '#{ task }' unkown" if tasks[task].nil?
|
106
124
|
tasks[task].job(jobname, *args)
|
107
125
|
end
|
108
126
|
|
data/share/lib/R/util.R
CHANGED
@@ -26,7 +26,7 @@ rbbt.load.data <- function(filename, sep = "\t", ...){
|
|
26
26
|
}
|
27
27
|
|
28
28
|
rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, ...){
|
29
|
-
data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names= row.names, comment.char = comment.char, ...);
|
29
|
+
data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, quote='', row.names= row.names, comment.char = comment.char, ...);
|
30
30
|
f = file(filename, 'r');
|
31
31
|
headers = readLines(f, 1);
|
32
32
|
if (length(grep("^#: ", headers)) > 0){
|
@@ -139,3 +139,73 @@ rbbt.run <- function(filename){
|
|
139
139
|
}
|
140
140
|
|
141
141
|
|
142
|
+
# UTILITIES
|
143
|
+
|
144
|
+
# Addapted from http://www.phaget4.org/R/image_matrix.html
|
145
|
+
rbbt.plot.matrix <- function(x, ...){
|
146
|
+
min <- min(x);
|
147
|
+
max <- max(x);
|
148
|
+
yLabels <- rownames(x);
|
149
|
+
xLabels <- colnames(x);
|
150
|
+
title <-c();
|
151
|
+
# check for additional function arguments
|
152
|
+
if( length(list(...)) ){
|
153
|
+
Lst <- list(...);
|
154
|
+
if( !is.null(Lst$zlim) ){
|
155
|
+
min <- Lst$zlim[1];
|
156
|
+
max <- Lst$zlim[2];
|
157
|
+
}
|
158
|
+
if( !is.null(Lst$yLabels) ){
|
159
|
+
yLabels <- c(Lst$yLabels);
|
160
|
+
}
|
161
|
+
if( !is.null(Lst$xLabels) ){
|
162
|
+
xLabels <- c(Lst$xLabels);
|
163
|
+
}
|
164
|
+
if( !is.null(Lst$title) ){
|
165
|
+
title <- Lst$title;
|
166
|
+
}
|
167
|
+
}
|
168
|
+
# check for null values
|
169
|
+
if( is.null(xLabels) ){
|
170
|
+
xLabels <- c(1:ncol(x));
|
171
|
+
}
|
172
|
+
if( is.null(yLabels) ){
|
173
|
+
yLabels <- c(1:nrow(x));
|
174
|
+
}
|
175
|
+
|
176
|
+
layout(matrix(data=c(1,2), nrow=1, ncol=2), widths=c(4,1), heights=c(1,1));
|
177
|
+
|
178
|
+
# Red and green range from 0 to 1 while Blue ranges from 1 to 0
|
179
|
+
ColorRamp <- rgb( seq(0,1,length=256), # Red
|
180
|
+
seq(0,1,length=256), # Green
|
181
|
+
seq(1,0,length=256)) # Blue
|
182
|
+
ColorLevels <- seq(min, max, length=length(ColorRamp));
|
183
|
+
|
184
|
+
# Reverse Y axis
|
185
|
+
reverse <- nrow(x) : 1;
|
186
|
+
yLabels <- yLabels[reverse];
|
187
|
+
x <- x[reverse,];
|
188
|
+
|
189
|
+
# Data Map
|
190
|
+
par(mar = c(3,5,2.5,2));
|
191
|
+
image(1:length(xLabels), 1:length(yLabels), t(x), col=ColorRamp, xlab="",
|
192
|
+
ylab="", axes=FALSE, zlim=c(min,max));
|
193
|
+
if( !is.null(title) ){
|
194
|
+
title(main=title);
|
195
|
+
}
|
196
|
+
axis(BELOW<-1, at=1:length(xLabels), labels=xLabels, cex.axis=0.7);
|
197
|
+
axis(LEFT <-2, at=1:length(yLabels), labels=yLabels, las= HORIZONTAL<-1,
|
198
|
+
cex.axis=0.7);
|
199
|
+
|
200
|
+
# Color Scale
|
201
|
+
par(mar = c(3,2.5,2.5,2));
|
202
|
+
image(1, ColorLevels,
|
203
|
+
matrix(data=ColorLevels, ncol=length(ColorLevels),nrow=1),
|
204
|
+
col=ColorRamp,
|
205
|
+
xlab="",ylab="",
|
206
|
+
xaxt="n");
|
207
|
+
|
208
|
+
layout(1);
|
209
|
+
}
|
210
|
+
|
211
|
+
|
data/test/rbbt/util/test_misc.rb
CHANGED
@@ -142,4 +142,21 @@ This is an example file. Entries are separated by Entry
|
|
142
142
|
assert_equal [1,2,3,4], Misc.merge_sorted_arrays([1,3], [2,4])
|
143
143
|
end
|
144
144
|
|
145
|
+
def test_intersect_sorted_arrays
|
146
|
+
assert_equal [2,4], Misc.intersect_sorted_arrays([1,2,3,4], [2,4])
|
147
|
+
end
|
148
|
+
|
149
|
+
|
150
|
+
def test_in_dir
|
151
|
+
TmpFile.with_file do |dir|
|
152
|
+
FileUtils.mkdir_p dir
|
153
|
+
Open.write(File.join(dir, 'test_file_in_dir'), 'test_file_in_dir')
|
154
|
+
Misc.in_dir(dir) do
|
155
|
+
assert Dir.glob("*").include? 'test_file_in_dir'
|
156
|
+
end
|
157
|
+
assert Dir.glob(File.join(dir, "*")).include?(File.join(dir, 'test_file_in_dir'))
|
158
|
+
assert(! Dir.glob("*").include?('test_file_in_dir'))
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
145
162
|
end
|
@@ -164,6 +164,38 @@ row2 2 4 6 8
|
|
164
164
|
Process.wait pid
|
165
165
|
end
|
166
166
|
|
167
|
+
def test_persist_fwt
|
168
|
+
content =<<-EOF
|
169
|
+
#: :sep=/\\s+/
|
170
|
+
#Id Start End
|
171
|
+
row1 1 10
|
172
|
+
row2 20 30
|
173
|
+
EOF
|
174
|
+
|
175
|
+
TmpFile.with_file(content) do |filename|
|
176
|
+
fwt = Persistence.persist(filename, :Range, :fwt, :persistence_dir => Rbbt.tmp.test.persistence) do
|
177
|
+
fwt = TSV.new(filename).range_index("Start", "End", :persistence => false)
|
178
|
+
assert fwt[15].empty?
|
179
|
+
assert fwt[25].include? "row2"
|
180
|
+
fwt
|
181
|
+
end
|
182
|
+
|
183
|
+
assert fwt[5].include? "row1"
|
184
|
+
assert fwt[(5..25)].include? "row1"
|
185
|
+
assert fwt[(5..25)].include? "row2"
|
186
|
+
|
187
|
+
fwt = Persistence.persist(filename, :Range, :fwt, :persistence_dir => Rbbt.tmp.test.persistence) do
|
188
|
+
assert false
|
189
|
+
end
|
190
|
+
|
191
|
+
assert fwt[5].include? "row1"
|
192
|
+
assert fwt[(5..25)].include? "row1"
|
193
|
+
assert fwt[(5..25)].include? "row2"
|
194
|
+
|
195
|
+
end
|
196
|
+
|
197
|
+
end
|
198
|
+
|
167
199
|
|
168
200
|
end
|
169
201
|
|
@@ -67,6 +67,7 @@ class TestResource < Test::Unit::TestCase
|
|
67
67
|
begin
|
68
68
|
assert_equal "Test String", Rbbt.tmp.test_string.read
|
69
69
|
assert_equal "Test String", Rbbt.tmp.work.foo.read
|
70
|
+
rescue
|
70
71
|
ensure
|
71
72
|
FileUtils.rm Rbbt.tmp.test_string.find if File.exists? Rbbt.tmp.test_string.find
|
72
73
|
FileUtils.rm Rbbt.tmp.url.find if File.exists? Rbbt.tmp.url.find
|
data/test/rbbt/util/test_task.rb
CHANGED
@@ -37,6 +37,20 @@ class TestTask < Test::Unit::TestCase
|
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
|
+
def test_task_options_from_hash
|
41
|
+
TmpFile.with_file do |f|
|
42
|
+
task = Task.new(:test_task, nil, :name) do |name| Open.write(f, name) end
|
43
|
+
job = task.job(:job1, :name => "TestName")
|
44
|
+
assert_equal "job1" << "_" << Misc.hash2md5(:name => "TestName"), job.id
|
45
|
+
job.fork
|
46
|
+
job.join
|
47
|
+
|
48
|
+
assert File.exists? f
|
49
|
+
assert_equal "TestName", File.open(f).read
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
|
40
54
|
def test_task_result
|
41
55
|
task = Task.new(:test_task, nil, :name) do |name| name end
|
42
56
|
job = task.job(:job1, "TestName")
|
@@ -54,6 +54,22 @@ class TestTCHash < Test::Unit::TestCase
|
|
54
54
|
end
|
55
55
|
end
|
56
56
|
|
57
|
+
def test_stringDoubleArraySerializer
|
58
|
+
TmpFile.with_file do |f|
|
59
|
+
t = TCHash.get f, true, TCHash::TSVSerializer
|
60
|
+
tsv = TSV.new({})
|
61
|
+
tsv["1"] = [[1],[2]]
|
62
|
+
tsv["2"] = [[3],[4,5]]
|
63
|
+
t["TSV"] = tsv
|
64
|
+
|
65
|
+
t = TCHash.get f
|
66
|
+
t.collect do |k,v|
|
67
|
+
assert_equal [["3"],["4","5"]], t["TSV"]["2"]
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
|
57
73
|
def test_serializer_alias
|
58
74
|
TmpFile.with_file do |f|
|
59
75
|
t = TCHash.get f, true, :double
|
@@ -135,7 +135,33 @@ row2 A B
|
|
135
135
|
end
|
136
136
|
end
|
137
137
|
|
138
|
+
def test_delete
|
139
|
+
content =<<-EOF
|
140
|
+
#ID ValueA ValueB Comment
|
141
|
+
row1 a b c
|
142
|
+
row2 A B C
|
143
|
+
EOF
|
144
|
+
|
145
|
+
TmpFile.with_file(content) do |filename|
|
146
|
+
tsv = TSV.new(File.open(filename), :double, :sep => /\s/)
|
147
|
+
assert_equal 2, tsv.keys.length
|
148
|
+
tsv.delete "row2"
|
149
|
+
assert_equal 1, tsv.keys.length
|
150
|
+
|
151
|
+
tsv = TSV.new(File.open(filename), :double, :sep => /\s/)
|
152
|
+
tsv.filter
|
153
|
+
tsv.add_filter "field:ValueA", ["A"]
|
138
154
|
|
155
|
+
assert_equal 1, tsv.keys.length
|
156
|
+
assert_equal ["row2"], tsv.keys
|
157
|
+
|
158
|
+
tsv.delete "row2"
|
159
|
+
assert_equal 0, tsv.keys.length
|
160
|
+
|
161
|
+
tsv.pop_filter
|
162
|
+
assert_equal ["row1"], tsv.keys
|
163
|
+
end
|
164
|
+
end
|
139
165
|
|
140
166
|
end
|
141
167
|
|
@@ -128,7 +128,39 @@ row2 A B
|
|
128
128
|
tsv
|
129
129
|
end
|
130
130
|
|
131
|
-
def
|
131
|
+
def test_pos_index
|
132
|
+
content =<<-EOF
|
133
|
+
#Id ValueA ValueB Pos
|
134
|
+
row1 a|aa|aaa b 0|10
|
135
|
+
row2 A B 30
|
136
|
+
EOF
|
137
|
+
|
138
|
+
TmpFile.with_file(content) do |filename|
|
139
|
+
tsv = TSV.new(File.open(filename), :double, :sep => /\s+/)
|
140
|
+
index = tsv.pos_index("Pos")
|
141
|
+
assert_equal ["row1"], index[10]
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
def test_range_index
|
147
|
+
content =<<-EOF
|
148
|
+
#Id ValueA ValueB Pos1 Pos2
|
149
|
+
row1 a|aa|aaa b 0|10 10|30
|
150
|
+
row2 A B 30 35
|
151
|
+
EOF
|
152
|
+
|
153
|
+
TmpFile.with_file(content) do |filename|
|
154
|
+
tsv = TSV.new(File.open(filename), :double, :sep => /\s+/)
|
155
|
+
index = tsv.pos_index("Pos1")
|
156
|
+
assert_equal ["row1"], index[10]
|
157
|
+
|
158
|
+
index = tsv.range_index("Pos1", "Pos2")
|
159
|
+
assert_equal ["row1"], index[20]
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def test_range_index2
|
132
164
|
data =<<-EOF
|
133
165
|
#ID:Range
|
134
166
|
#:012345678901234567890
|
@@ -142,7 +174,7 @@ g: ____
|
|
142
174
|
EOF
|
143
175
|
TmpFile.with_file(data) do |datafile|
|
144
176
|
tsv = load_data(datafile)
|
145
|
-
f = tsv.
|
177
|
+
f = tsv.range_index("Start", "End")
|
146
178
|
|
147
179
|
assert_equal %w(), f[0].sort
|
148
180
|
assert_equal %w(b), f[1].sort
|
@@ -153,35 +185,55 @@ g: ____
|
|
153
185
|
end
|
154
186
|
end
|
155
187
|
|
156
|
-
def
|
157
|
-
|
158
|
-
#
|
159
|
-
|
160
|
-
|
188
|
+
def test_range_index_persistent
|
189
|
+
data =<<-EOF
|
190
|
+
#ID:Range
|
191
|
+
#:012345678901234567890
|
192
|
+
a: ______
|
193
|
+
b: ______
|
194
|
+
c: _______
|
195
|
+
d: ____
|
196
|
+
e: ______
|
197
|
+
f: ___
|
198
|
+
g: ____
|
161
199
|
EOF
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
200
|
+
TmpFile.with_file(data) do |datafile|
|
201
|
+
TmpFile.with_file(load_data(datafile)) do |tsvfile|
|
202
|
+
f = TSV.range_index(tsvfile, "Start", "End", :persistence => true)
|
203
|
+
|
204
|
+
assert_equal %w(), f[0].sort
|
205
|
+
assert_equal %w(b), f[1].sort
|
206
|
+
assert_equal %w(), f[20].sort
|
207
|
+
assert_equal %w(), f[(20..100)].sort
|
208
|
+
assert_equal %w(a b d), f[3].sort
|
209
|
+
assert_equal %w(a b c d e), f[(3..4)].sort
|
210
|
+
end
|
167
211
|
end
|
168
212
|
end
|
169
213
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
214
|
+
def test_range_index_persistent_with_filter
|
215
|
+
data =<<-EOF
|
216
|
+
#ID:Range
|
217
|
+
#:012345678901234567890
|
218
|
+
a: ______
|
219
|
+
b: ______
|
220
|
+
c: _______
|
221
|
+
d: ____
|
222
|
+
e: ______
|
223
|
+
f: ___
|
224
|
+
g: ____
|
176
225
|
EOF
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
226
|
+
TmpFile.with_file(data) do |datafile|
|
227
|
+
TmpFile.with_file(load_data(datafile)) do |tsvfile|
|
228
|
+
f = TSV.range_index(tsvfile, "Start", "End", :filters => [["field:Start", "3"]])
|
229
|
+
|
230
|
+
assert_equal %w(), f[0].sort
|
231
|
+
assert_equal %w(), f[1].sort
|
232
|
+
assert_equal %w(), f[20].sort
|
233
|
+
assert_equal %w(), f[(20..100)].sort
|
234
|
+
assert_equal %w(a), f[3].sort
|
235
|
+
assert_equal %w(a), f[(3..4)].sort
|
236
|
+
end
|
185
237
|
end
|
186
238
|
end
|
187
239
|
|