rbbt-util 3.2.0 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/run_workflow.rb +118 -0
- data/lib/rbbt-util.rb +0 -3
- data/lib/rbbt/util/fix_width_table.rb +7 -0
- data/lib/rbbt/util/misc.rb +20 -4
- data/lib/rbbt/util/persistence.rb +51 -28
- data/lib/rbbt/util/resource.rb +1 -0
- data/lib/rbbt/util/task.rb +11 -0
- data/lib/rbbt/util/task/job.rb +3 -1
- data/lib/rbbt/util/tc_hash.rb +89 -20
- data/lib/rbbt/util/tsv.rb +23 -8
- data/lib/rbbt/util/tsv/accessor.rb +8 -5
- data/lib/rbbt/util/tsv/filters.rb +68 -20
- data/lib/rbbt/util/tsv/index.rb +115 -92
- data/lib/rbbt/util/tsv/manipulate.rb +10 -13
- data/lib/rbbt/util/tsv/parse.rb +1 -1
- data/lib/rbbt/util/tsv/resource.rb +8 -0
- data/lib/rbbt/util/workflow.rb +26 -8
- data/share/lib/R/util.R +71 -1
- data/test/rbbt/util/test_misc.rb +17 -0
- data/test/rbbt/util/test_persistence.rb +32 -0
- data/test/rbbt/util/test_resource.rb +1 -0
- data/test/rbbt/util/test_task.rb +14 -0
- data/test/rbbt/util/test_tc_hash.rb +16 -0
- data/test/rbbt/util/tsv/test_accessor.rb +0 -1
- data/test/rbbt/util/tsv/test_filters.rb +26 -0
- data/test/rbbt/util/tsv/test_index.rb +78 -26
- data/test/rbbt/util/tsv/test_manipulate.rb +1 -1
- metadata +10 -14
- data/bin/workflow.rb +0 -24
- data/lib/rbbt/util/bed.rb +0 -325
- data/lib/rbbt/util/cachehelper.rb +0 -100
- data/test/rbbt/util/test_bed.rb +0 -136
@@ -119,22 +119,19 @@ class TSV
|
|
119
119
|
new_fields = options[:new_fields]
|
120
120
|
|
121
121
|
new = {}
|
122
|
-
new_key_field, new_fields = through new_key_field, new_fields do |
|
123
|
-
if Array ===
|
124
|
-
keys
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
new_values = keys.each do |key|
|
130
|
-
if new[key].nil?
|
131
|
-
new[key] = values
|
132
|
-
else
|
133
|
-
if type == :double
|
134
|
-
new[key] = new[key].zip(values).collect{|v| v.flatten}
|
122
|
+
new_key_field, new_fields = through new_key_field, new_fields do |keys, values|
|
123
|
+
if Array === keys
|
124
|
+
keys.each do |key|
|
125
|
+
if new[key].nil? or not type == :double
|
126
|
+
new[key] = values.collect{|l| l.dup}
|
127
|
+
else
|
128
|
+
new[key] = new[key].zip(values).collect{|old_list, new_list| old_list.concat new_list}
|
135
129
|
end
|
136
130
|
end
|
131
|
+
else
|
132
|
+
new[keys] = values
|
137
133
|
end
|
134
|
+
nil
|
138
135
|
end
|
139
136
|
|
140
137
|
new = TSV.new new
|
data/lib/rbbt/util/tsv/parse.rb
CHANGED
@@ -318,7 +318,7 @@ class TSV
|
|
318
318
|
|
319
319
|
fields = nil if Fixnum === fields or (Array === fields and fields.select{|f| Fixnum === f}.any?)
|
320
320
|
fields ||= other_fields
|
321
|
-
[data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :
|
321
|
+
[data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :identifiers => options[:identifiers], :cast => (cast.nil? ? false : cast)}]
|
322
322
|
end
|
323
323
|
|
324
324
|
end
|
@@ -26,6 +26,14 @@ module Resource
|
|
26
26
|
TSV.index self, options
|
27
27
|
end
|
28
28
|
|
29
|
+
def pos_index(pos_field, options = {})
|
30
|
+
TSV.pos_index self, pos_field, options
|
31
|
+
end
|
32
|
+
|
33
|
+
def range_index(start_field, end_field, options = {})
|
34
|
+
TSV.range_index self, start_field, end_field, options
|
35
|
+
end
|
36
|
+
|
29
37
|
def open(options = {})
|
30
38
|
produce
|
31
39
|
Open.open(self.find, options)
|
data/lib/rbbt/util/workflow.rb
CHANGED
@@ -5,12 +5,29 @@ require 'rbbt/util/misc'
|
|
5
5
|
|
6
6
|
module WorkFlow
|
7
7
|
|
8
|
-
def self.require_workflow(
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
def self.require_workflow(wf_name, wf_dir = nil)
|
9
|
+
|
10
|
+
if File.exists? wf_name
|
11
|
+
require "./workflow.rb"
|
12
|
+
return
|
13
13
|
end
|
14
|
+
|
15
|
+
wf_dir ||= case
|
16
|
+
when File.exists?(File.join(File.dirname(Resource.caller_lib_dir), wf_name))
|
17
|
+
File.join(File.dirname(Resource.caller_lib_dir), wf_name)
|
18
|
+
when defined? Rbbt
|
19
|
+
if Rbbt.pkg.etc.workflow_dir.exists?
|
20
|
+
File.join(pkg.etc.workflow_dir.read.strip, wf_name)
|
21
|
+
else
|
22
|
+
Rbbt.workflows[wf_name]
|
23
|
+
end
|
24
|
+
else
|
25
|
+
File.join(ENV["HOME"], '.workflows')
|
26
|
+
end
|
27
|
+
|
28
|
+
wf_dir = Resource::Path.path(wf_dir)
|
29
|
+
$LOAD_PATH.unshift(File.join(File.dirname(wf_dir["workflow.rb"].find), 'lib'))
|
30
|
+
require wf_dir["workflow.rb"].find
|
14
31
|
end
|
15
32
|
|
16
33
|
def self.extended(base)
|
@@ -74,9 +91,9 @@ module WorkFlow
|
|
74
91
|
def process_dangling
|
75
92
|
res = [
|
76
93
|
@dangling_options,
|
77
|
-
|
78
|
-
|
79
|
-
|
94
|
+
Misc.merge2hash(@dangling_options, @dangling_option_descriptions.values_at(*@dangling_options)),
|
95
|
+
Misc.merge2hash(@dangling_options, @dangling_option_types.values_at(*@dangling_options)),
|
96
|
+
Misc.merge2hash(@dangling_options, @dangling_option_defaults.values_at(*@dangling_options)),
|
80
97
|
(@dangling_dependencies || [@last_task]).compact,
|
81
98
|
@dangling_description,
|
82
99
|
]
|
@@ -103,6 +120,7 @@ module WorkFlow
|
|
103
120
|
end
|
104
121
|
|
105
122
|
def job(task, jobname = "Default", *args)
|
123
|
+
raise "Task '#{ task }' unkown" if tasks[task].nil?
|
106
124
|
tasks[task].job(jobname, *args)
|
107
125
|
end
|
108
126
|
|
data/share/lib/R/util.R
CHANGED
@@ -26,7 +26,7 @@ rbbt.load.data <- function(filename, sep = "\t", ...){
|
|
26
26
|
}
|
27
27
|
|
28
28
|
rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, ...){
|
29
|
-
data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names= row.names, comment.char = comment.char, ...);
|
29
|
+
data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, quote='', row.names= row.names, comment.char = comment.char, ...);
|
30
30
|
f = file(filename, 'r');
|
31
31
|
headers = readLines(f, 1);
|
32
32
|
if (length(grep("^#: ", headers)) > 0){
|
@@ -139,3 +139,73 @@ rbbt.run <- function(filename){
|
|
139
139
|
}
|
140
140
|
|
141
141
|
|
142
|
+
# UTILITIES
|
143
|
+
|
144
|
+
# Addapted from http://www.phaget4.org/R/image_matrix.html
|
145
|
+
rbbt.plot.matrix <- function(x, ...){
|
146
|
+
min <- min(x);
|
147
|
+
max <- max(x);
|
148
|
+
yLabels <- rownames(x);
|
149
|
+
xLabels <- colnames(x);
|
150
|
+
title <-c();
|
151
|
+
# check for additional function arguments
|
152
|
+
if( length(list(...)) ){
|
153
|
+
Lst <- list(...);
|
154
|
+
if( !is.null(Lst$zlim) ){
|
155
|
+
min <- Lst$zlim[1];
|
156
|
+
max <- Lst$zlim[2];
|
157
|
+
}
|
158
|
+
if( !is.null(Lst$yLabels) ){
|
159
|
+
yLabels <- c(Lst$yLabels);
|
160
|
+
}
|
161
|
+
if( !is.null(Lst$xLabels) ){
|
162
|
+
xLabels <- c(Lst$xLabels);
|
163
|
+
}
|
164
|
+
if( !is.null(Lst$title) ){
|
165
|
+
title <- Lst$title;
|
166
|
+
}
|
167
|
+
}
|
168
|
+
# check for null values
|
169
|
+
if( is.null(xLabels) ){
|
170
|
+
xLabels <- c(1:ncol(x));
|
171
|
+
}
|
172
|
+
if( is.null(yLabels) ){
|
173
|
+
yLabels <- c(1:nrow(x));
|
174
|
+
}
|
175
|
+
|
176
|
+
layout(matrix(data=c(1,2), nrow=1, ncol=2), widths=c(4,1), heights=c(1,1));
|
177
|
+
|
178
|
+
# Red and green range from 0 to 1 while Blue ranges from 1 to 0
|
179
|
+
ColorRamp <- rgb( seq(0,1,length=256), # Red
|
180
|
+
seq(0,1,length=256), # Green
|
181
|
+
seq(1,0,length=256)) # Blue
|
182
|
+
ColorLevels <- seq(min, max, length=length(ColorRamp));
|
183
|
+
|
184
|
+
# Reverse Y axis
|
185
|
+
reverse <- nrow(x) : 1;
|
186
|
+
yLabels <- yLabels[reverse];
|
187
|
+
x <- x[reverse,];
|
188
|
+
|
189
|
+
# Data Map
|
190
|
+
par(mar = c(3,5,2.5,2));
|
191
|
+
image(1:length(xLabels), 1:length(yLabels), t(x), col=ColorRamp, xlab="",
|
192
|
+
ylab="", axes=FALSE, zlim=c(min,max));
|
193
|
+
if( !is.null(title) ){
|
194
|
+
title(main=title);
|
195
|
+
}
|
196
|
+
axis(BELOW<-1, at=1:length(xLabels), labels=xLabels, cex.axis=0.7);
|
197
|
+
axis(LEFT <-2, at=1:length(yLabels), labels=yLabels, las= HORIZONTAL<-1,
|
198
|
+
cex.axis=0.7);
|
199
|
+
|
200
|
+
# Color Scale
|
201
|
+
par(mar = c(3,2.5,2.5,2));
|
202
|
+
image(1, ColorLevels,
|
203
|
+
matrix(data=ColorLevels, ncol=length(ColorLevels),nrow=1),
|
204
|
+
col=ColorRamp,
|
205
|
+
xlab="",ylab="",
|
206
|
+
xaxt="n");
|
207
|
+
|
208
|
+
layout(1);
|
209
|
+
}
|
210
|
+
|
211
|
+
|
data/test/rbbt/util/test_misc.rb
CHANGED
@@ -142,4 +142,21 @@ This is an example file. Entries are separated by Entry
|
|
142
142
|
assert_equal [1,2,3,4], Misc.merge_sorted_arrays([1,3], [2,4])
|
143
143
|
end
|
144
144
|
|
145
|
+
def test_intersect_sorted_arrays
|
146
|
+
assert_equal [2,4], Misc.intersect_sorted_arrays([1,2,3,4], [2,4])
|
147
|
+
end
|
148
|
+
|
149
|
+
|
150
|
+
def test_in_dir
|
151
|
+
TmpFile.with_file do |dir|
|
152
|
+
FileUtils.mkdir_p dir
|
153
|
+
Open.write(File.join(dir, 'test_file_in_dir'), 'test_file_in_dir')
|
154
|
+
Misc.in_dir(dir) do
|
155
|
+
assert Dir.glob("*").include? 'test_file_in_dir'
|
156
|
+
end
|
157
|
+
assert Dir.glob(File.join(dir, "*")).include?(File.join(dir, 'test_file_in_dir'))
|
158
|
+
assert(! Dir.glob("*").include?('test_file_in_dir'))
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
145
162
|
end
|
@@ -164,6 +164,38 @@ row2 2 4 6 8
|
|
164
164
|
Process.wait pid
|
165
165
|
end
|
166
166
|
|
167
|
+
def test_persist_fwt
|
168
|
+
content =<<-EOF
|
169
|
+
#: :sep=/\\s+/
|
170
|
+
#Id Start End
|
171
|
+
row1 1 10
|
172
|
+
row2 20 30
|
173
|
+
EOF
|
174
|
+
|
175
|
+
TmpFile.with_file(content) do |filename|
|
176
|
+
fwt = Persistence.persist(filename, :Range, :fwt, :persistence_dir => Rbbt.tmp.test.persistence) do
|
177
|
+
fwt = TSV.new(filename).range_index("Start", "End", :persistence => false)
|
178
|
+
assert fwt[15].empty?
|
179
|
+
assert fwt[25].include? "row2"
|
180
|
+
fwt
|
181
|
+
end
|
182
|
+
|
183
|
+
assert fwt[5].include? "row1"
|
184
|
+
assert fwt[(5..25)].include? "row1"
|
185
|
+
assert fwt[(5..25)].include? "row2"
|
186
|
+
|
187
|
+
fwt = Persistence.persist(filename, :Range, :fwt, :persistence_dir => Rbbt.tmp.test.persistence) do
|
188
|
+
assert false
|
189
|
+
end
|
190
|
+
|
191
|
+
assert fwt[5].include? "row1"
|
192
|
+
assert fwt[(5..25)].include? "row1"
|
193
|
+
assert fwt[(5..25)].include? "row2"
|
194
|
+
|
195
|
+
end
|
196
|
+
|
197
|
+
end
|
198
|
+
|
167
199
|
|
168
200
|
end
|
169
201
|
|
@@ -67,6 +67,7 @@ class TestResource < Test::Unit::TestCase
|
|
67
67
|
begin
|
68
68
|
assert_equal "Test String", Rbbt.tmp.test_string.read
|
69
69
|
assert_equal "Test String", Rbbt.tmp.work.foo.read
|
70
|
+
rescue
|
70
71
|
ensure
|
71
72
|
FileUtils.rm Rbbt.tmp.test_string.find if File.exists? Rbbt.tmp.test_string.find
|
72
73
|
FileUtils.rm Rbbt.tmp.url.find if File.exists? Rbbt.tmp.url.find
|
data/test/rbbt/util/test_task.rb
CHANGED
@@ -37,6 +37,20 @@ class TestTask < Test::Unit::TestCase
|
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
|
+
def test_task_options_from_hash
|
41
|
+
TmpFile.with_file do |f|
|
42
|
+
task = Task.new(:test_task, nil, :name) do |name| Open.write(f, name) end
|
43
|
+
job = task.job(:job1, :name => "TestName")
|
44
|
+
assert_equal "job1" << "_" << Misc.hash2md5(:name => "TestName"), job.id
|
45
|
+
job.fork
|
46
|
+
job.join
|
47
|
+
|
48
|
+
assert File.exists? f
|
49
|
+
assert_equal "TestName", File.open(f).read
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
|
40
54
|
def test_task_result
|
41
55
|
task = Task.new(:test_task, nil, :name) do |name| name end
|
42
56
|
job = task.job(:job1, "TestName")
|
@@ -54,6 +54,22 @@ class TestTCHash < Test::Unit::TestCase
|
|
54
54
|
end
|
55
55
|
end
|
56
56
|
|
57
|
+
def test_stringDoubleArraySerializer
|
58
|
+
TmpFile.with_file do |f|
|
59
|
+
t = TCHash.get f, true, TCHash::TSVSerializer
|
60
|
+
tsv = TSV.new({})
|
61
|
+
tsv["1"] = [[1],[2]]
|
62
|
+
tsv["2"] = [[3],[4,5]]
|
63
|
+
t["TSV"] = tsv
|
64
|
+
|
65
|
+
t = TCHash.get f
|
66
|
+
t.collect do |k,v|
|
67
|
+
assert_equal [["3"],["4","5"]], t["TSV"]["2"]
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
|
57
73
|
def test_serializer_alias
|
58
74
|
TmpFile.with_file do |f|
|
59
75
|
t = TCHash.get f, true, :double
|
@@ -135,7 +135,33 @@ row2 A B
|
|
135
135
|
end
|
136
136
|
end
|
137
137
|
|
138
|
+
def test_delete
|
139
|
+
content =<<-EOF
|
140
|
+
#ID ValueA ValueB Comment
|
141
|
+
row1 a b c
|
142
|
+
row2 A B C
|
143
|
+
EOF
|
144
|
+
|
145
|
+
TmpFile.with_file(content) do |filename|
|
146
|
+
tsv = TSV.new(File.open(filename), :double, :sep => /\s/)
|
147
|
+
assert_equal 2, tsv.keys.length
|
148
|
+
tsv.delete "row2"
|
149
|
+
assert_equal 1, tsv.keys.length
|
150
|
+
|
151
|
+
tsv = TSV.new(File.open(filename), :double, :sep => /\s/)
|
152
|
+
tsv.filter
|
153
|
+
tsv.add_filter "field:ValueA", ["A"]
|
138
154
|
|
155
|
+
assert_equal 1, tsv.keys.length
|
156
|
+
assert_equal ["row2"], tsv.keys
|
157
|
+
|
158
|
+
tsv.delete "row2"
|
159
|
+
assert_equal 0, tsv.keys.length
|
160
|
+
|
161
|
+
tsv.pop_filter
|
162
|
+
assert_equal ["row1"], tsv.keys
|
163
|
+
end
|
164
|
+
end
|
139
165
|
|
140
166
|
end
|
141
167
|
|
@@ -128,7 +128,39 @@ row2 A B
|
|
128
128
|
tsv
|
129
129
|
end
|
130
130
|
|
131
|
-
def
|
131
|
+
def test_pos_index
|
132
|
+
content =<<-EOF
|
133
|
+
#Id ValueA ValueB Pos
|
134
|
+
row1 a|aa|aaa b 0|10
|
135
|
+
row2 A B 30
|
136
|
+
EOF
|
137
|
+
|
138
|
+
TmpFile.with_file(content) do |filename|
|
139
|
+
tsv = TSV.new(File.open(filename), :double, :sep => /\s+/)
|
140
|
+
index = tsv.pos_index("Pos")
|
141
|
+
assert_equal ["row1"], index[10]
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
def test_range_index
|
147
|
+
content =<<-EOF
|
148
|
+
#Id ValueA ValueB Pos1 Pos2
|
149
|
+
row1 a|aa|aaa b 0|10 10|30
|
150
|
+
row2 A B 30 35
|
151
|
+
EOF
|
152
|
+
|
153
|
+
TmpFile.with_file(content) do |filename|
|
154
|
+
tsv = TSV.new(File.open(filename), :double, :sep => /\s+/)
|
155
|
+
index = tsv.pos_index("Pos1")
|
156
|
+
assert_equal ["row1"], index[10]
|
157
|
+
|
158
|
+
index = tsv.range_index("Pos1", "Pos2")
|
159
|
+
assert_equal ["row1"], index[20]
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def test_range_index2
|
132
164
|
data =<<-EOF
|
133
165
|
#ID:Range
|
134
166
|
#:012345678901234567890
|
@@ -142,7 +174,7 @@ g: ____
|
|
142
174
|
EOF
|
143
175
|
TmpFile.with_file(data) do |datafile|
|
144
176
|
tsv = load_data(datafile)
|
145
|
-
f = tsv.
|
177
|
+
f = tsv.range_index("Start", "End")
|
146
178
|
|
147
179
|
assert_equal %w(), f[0].sort
|
148
180
|
assert_equal %w(b), f[1].sort
|
@@ -153,35 +185,55 @@ g: ____
|
|
153
185
|
end
|
154
186
|
end
|
155
187
|
|
156
|
-
def
|
157
|
-
|
158
|
-
#
|
159
|
-
|
160
|
-
|
188
|
+
def test_range_index_persistent
|
189
|
+
data =<<-EOF
|
190
|
+
#ID:Range
|
191
|
+
#:012345678901234567890
|
192
|
+
a: ______
|
193
|
+
b: ______
|
194
|
+
c: _______
|
195
|
+
d: ____
|
196
|
+
e: ______
|
197
|
+
f: ___
|
198
|
+
g: ____
|
161
199
|
EOF
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
200
|
+
TmpFile.with_file(data) do |datafile|
|
201
|
+
TmpFile.with_file(load_data(datafile)) do |tsvfile|
|
202
|
+
f = TSV.range_index(tsvfile, "Start", "End", :persistence => true)
|
203
|
+
|
204
|
+
assert_equal %w(), f[0].sort
|
205
|
+
assert_equal %w(b), f[1].sort
|
206
|
+
assert_equal %w(), f[20].sort
|
207
|
+
assert_equal %w(), f[(20..100)].sort
|
208
|
+
assert_equal %w(a b d), f[3].sort
|
209
|
+
assert_equal %w(a b c d e), f[(3..4)].sort
|
210
|
+
end
|
167
211
|
end
|
168
212
|
end
|
169
213
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
214
|
+
def test_range_index_persistent_with_filter
|
215
|
+
data =<<-EOF
|
216
|
+
#ID:Range
|
217
|
+
#:012345678901234567890
|
218
|
+
a: ______
|
219
|
+
b: ______
|
220
|
+
c: _______
|
221
|
+
d: ____
|
222
|
+
e: ______
|
223
|
+
f: ___
|
224
|
+
g: ____
|
176
225
|
EOF
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
226
|
+
TmpFile.with_file(data) do |datafile|
|
227
|
+
TmpFile.with_file(load_data(datafile)) do |tsvfile|
|
228
|
+
f = TSV.range_index(tsvfile, "Start", "End", :filters => [["field:Start", "3"]])
|
229
|
+
|
230
|
+
assert_equal %w(), f[0].sort
|
231
|
+
assert_equal %w(), f[1].sort
|
232
|
+
assert_equal %w(), f[20].sort
|
233
|
+
assert_equal %w(), f[(20..100)].sort
|
234
|
+
assert_equal %w(a), f[3].sort
|
235
|
+
assert_equal %w(a), f[(3..4)].sort
|
236
|
+
end
|
185
237
|
end
|
186
238
|
end
|
187
239
|
|