rbbt-util 4.1.0 → 4.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/run_workflow.rb +4 -1
- data/lib/rbbt/annotations.rb +138 -53
- data/lib/rbbt/persist.rb +30 -12
- data/lib/rbbt/persist/tsv.rb +1 -1
- data/lib/rbbt/resource/path.rb +1 -1
- data/lib/rbbt/tsv.rb +7 -1
- data/lib/rbbt/tsv/accessor.rb +21 -16
- data/lib/rbbt/tsv/attach/util.rb +1 -10
- data/lib/rbbt/tsv/manipulate.rb +21 -3
- data/lib/rbbt/tsv/parser.rb +24 -5
- data/lib/rbbt/tsv/util.rb +1 -1
- data/lib/rbbt/util/chain_methods.rb +12 -23
- data/lib/rbbt/util/misc.rb +133 -144
- data/lib/rbbt/util/named_array.rb +113 -0
- data/lib/rbbt/util/open.rb +17 -10
- data/lib/rbbt/workflow/accessor.rb +9 -1
- data/lib/rbbt/workflow/step.rb +9 -7
- data/share/lib/R/util.R +28 -6
- data/test/rbbt/test_annotations.rb +16 -2
- data/test/rbbt/test_persist.rb +37 -1
- data/test/rbbt/test_tsv.rb +12 -0
- data/test/rbbt/tsv/test_attach.rb +1 -1
- data/test/rbbt/util/test_chain_methods.rb +1 -1
- data/test/rbbt/util/test_misc.rb +10 -0
- metadata +5 -4
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'rbbt/util/chain_methods'
|
2
|
+
require 'rbbt/util/misc'
|
3
|
+
|
4
|
+
module NamedArray
|
5
|
+
extend ChainMethods
|
6
|
+
|
7
|
+
self.chain_prefix = :named_array
|
8
|
+
attr_accessor :fields
|
9
|
+
attr_accessor :key
|
10
|
+
attr_accessor :namespace
|
11
|
+
|
12
|
+
def self.setup(array, fields, key = nil, namespace = nil)
|
13
|
+
array.extend NamedArray unless NamedArray === array
|
14
|
+
array.fields = fields
|
15
|
+
array.key = key
|
16
|
+
array.namespace = namespace
|
17
|
+
array
|
18
|
+
end
|
19
|
+
|
20
|
+
def merge(array)
|
21
|
+
double = Array === array.first
|
22
|
+
new = self.dup
|
23
|
+
(0..length - 1).each do |i|
|
24
|
+
if double
|
25
|
+
new[i] = new[i] + array[i]
|
26
|
+
else
|
27
|
+
new[i] << array[i]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
new
|
31
|
+
end
|
32
|
+
|
33
|
+
def positions(fields)
|
34
|
+
if Array == fields
|
35
|
+
fields.collect{|field|
|
36
|
+
Misc.field_position(@fields, field)
|
37
|
+
}
|
38
|
+
else
|
39
|
+
Misc.field_position(@fields, fields)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def named_array_get_brackets(key)
|
44
|
+
pos = Misc.field_position(fields, key)
|
45
|
+
elem = named_array_clean_get_brackets(pos)
|
46
|
+
|
47
|
+
return elem if @fields.nil? or @fields.empty?
|
48
|
+
|
49
|
+
field = NamedArray === @fields ? @fields.named_array_clean_get_brackets(pos) : @fields[pos]
|
50
|
+
elem = Entity.formats[field].setup((elem.frozen? ? elem.dup : elem), (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => field})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include?(field) and not field == elem
|
51
|
+
elem
|
52
|
+
end
|
53
|
+
|
54
|
+
def named_array_each(&block)
|
55
|
+
if defined?(Entity) and not @fields.nil? and not @fields.empty?
|
56
|
+
@fields.zip(self).each do |field,elem|
|
57
|
+
elem = Entity.formats[field].setup((elem.frozen? ? elem.dup : elem), (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => field})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include?(field) and not field == elem
|
58
|
+
yield(elem)
|
59
|
+
elem
|
60
|
+
end
|
61
|
+
else
|
62
|
+
named_array_clean_each &block
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def named_array_collect
|
67
|
+
res = []
|
68
|
+
|
69
|
+
named_array_each do |elem|
|
70
|
+
if block_given?
|
71
|
+
res << yield(elem)
|
72
|
+
else
|
73
|
+
res << elem
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
res
|
78
|
+
end
|
79
|
+
|
80
|
+
def named_array_set_brackets(key,value)
|
81
|
+
named_array_clean_set_brackets(Misc.field_position(fields, key), value)
|
82
|
+
end
|
83
|
+
|
84
|
+
def named_array_values_at(*keys)
|
85
|
+
keys = keys.collect{|k| Misc.field_position(fields, k) }
|
86
|
+
named_array_clean_values_at(*keys)
|
87
|
+
end
|
88
|
+
|
89
|
+
def zip_fields
|
90
|
+
return [] if self.empty?
|
91
|
+
zipped = Misc.zip_fields(self)
|
92
|
+
zipped = zipped.collect{|v| NamedArray.setup(v, fields)}
|
93
|
+
zipped
|
94
|
+
end
|
95
|
+
|
96
|
+
def detach(file)
|
97
|
+
file_fields = file.fields.collect{|field| field.fullname}
|
98
|
+
detached_fields = []
|
99
|
+
self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
|
100
|
+
fields = self.fields.values_at *detached_fields
|
101
|
+
values = self.values_at *detached_fields
|
102
|
+
values = NamedArray.name(values, fields)
|
103
|
+
values.zip_fields
|
104
|
+
end
|
105
|
+
|
106
|
+
def report
|
107
|
+
fields.zip(self).collect do |field,value|
|
108
|
+
"#{ field }: #{ Array === value ? value * "|" : value }"
|
109
|
+
end * "\n"
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
|
data/lib/rbbt/util/open.rb
CHANGED
@@ -3,7 +3,6 @@ require 'rbbt/util/misc'
|
|
3
3
|
require 'rbbt/util/tmpfile'
|
4
4
|
|
5
5
|
require 'zlib'
|
6
|
-
require 'digest/md5'
|
7
6
|
|
8
7
|
module Open
|
9
8
|
class OpenURLError < StandardError; end
|
@@ -77,12 +76,14 @@ module Open
|
|
77
76
|
end
|
78
77
|
end
|
79
78
|
|
79
|
+
def self.digest_url(url, options = {})
|
80
|
+
params = [url, options.values_at("--post-data", "--post-data="), (options.include?("--post-file")? Open.read(options["--post-file"]).split("\n").sort * "\n" : "")]
|
81
|
+
digest = Misc.digest(params.inspect)
|
82
|
+
end
|
80
83
|
# Cache
|
81
84
|
#
|
82
85
|
def self.in_cache(url, options = {})
|
83
|
-
|
84
|
-
|
85
|
-
filename = File.join(REMOTE_CACHEDIR, digest)
|
86
|
+
filename = File.join(REMOTE_CACHEDIR, digest_url(url, options))
|
86
87
|
if File.exists? filename
|
87
88
|
return filename
|
88
89
|
else
|
@@ -91,7 +92,7 @@ module Open
|
|
91
92
|
end
|
92
93
|
|
93
94
|
def self.remove_from_cache(url, options = {})
|
94
|
-
digest =
|
95
|
+
digest = Misc.digest([url, options.values_at("--post-data", "--post-data="), (options.include?("--post-file")? Open.read(options["--post-file"]) : "")].inspect)
|
95
96
|
|
96
97
|
filename = File.join(REMOTE_CACHEDIR, digest)
|
97
98
|
if File.exists? filename
|
@@ -102,8 +103,10 @@ module Open
|
|
102
103
|
end
|
103
104
|
|
104
105
|
def self.add_cache(url, data, options = {})
|
105
|
-
|
106
|
-
Misc.
|
106
|
+
file = File.join(REMOTE_CACHEDIR, digest_url(url, options))
|
107
|
+
Misc.lock(file) do
|
108
|
+
Misc.sensiblewrite(file, data)
|
109
|
+
end
|
107
110
|
end
|
108
111
|
|
109
112
|
# Grep
|
@@ -168,16 +171,20 @@ module Open
|
|
168
171
|
wget_options[:nice_key] = options.delete(:nice_key)
|
169
172
|
wget_options[:quiet] = options.delete(:quiet)
|
170
173
|
wget_options["--post-data="] = options.delete(:post) if options.include? :post
|
174
|
+
wget_options["--post-file"] = options.delete("--post-file") if options.include? "--post-file"
|
175
|
+
wget_options["--post-file="] = options.delete("--post-file=") if options.include? "--post-file="
|
171
176
|
wget_options[:cookies] = options.delete(:cookies)
|
172
177
|
|
173
178
|
io = case
|
174
179
|
when (not remote?(url))
|
175
180
|
file_open(url, options[:grep])
|
176
|
-
when options[:nocache]
|
181
|
+
when (options[:nocache] and options[:nocache] != :update)
|
177
182
|
# What about grep?
|
178
183
|
wget(url, wget_options)
|
179
|
-
when in_cache(url, wget_options)
|
180
|
-
|
184
|
+
when (options[:nocache] != :update and in_cache(url, wget_options))
|
185
|
+
Misc.lock(in_cache(url, wget_options)) do
|
186
|
+
file_open(in_cache(url, wget_options), options[:grep])
|
187
|
+
end
|
181
188
|
else
|
182
189
|
io = wget(url, wget_options)
|
183
190
|
add_cache(url, io, wget_options)
|
@@ -26,7 +26,7 @@ class Step
|
|
26
26
|
Misc.lock(info_file) do
|
27
27
|
i = info
|
28
28
|
i[key] = value
|
29
|
-
Open.write(info_file, i
|
29
|
+
Open.write(info_file, YAML.dump(i))
|
30
30
|
value
|
31
31
|
end
|
32
32
|
end
|
@@ -140,6 +140,14 @@ class Step
|
|
140
140
|
end
|
141
141
|
|
142
142
|
module Workflow
|
143
|
+
def log(status, message = nil)
|
144
|
+
if message
|
145
|
+
Log.low "#{ status }: #{ message }"
|
146
|
+
else
|
147
|
+
Log.low "#{ status }"
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
143
151
|
def task_info(name)
|
144
152
|
task = tasks[name]
|
145
153
|
description = task.description
|
data/lib/rbbt/workflow/step.rb
CHANGED
@@ -23,7 +23,7 @@ class Step
|
|
23
23
|
@inputs = inputs || []
|
24
24
|
end
|
25
25
|
|
26
|
-
def
|
26
|
+
def prepare_result(value, description = nil, info = {})
|
27
27
|
return value if description.nil?
|
28
28
|
Entity.formats[description].setup(value, info.merge(:format => description)) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? description
|
29
29
|
value
|
@@ -31,7 +31,7 @@ class Step
|
|
31
31
|
|
32
32
|
def exec
|
33
33
|
result = @task.exec_in self, *@inputs
|
34
|
-
|
34
|
+
prepare_result result, @task.result_description
|
35
35
|
end
|
36
36
|
|
37
37
|
def join
|
@@ -41,17 +41,18 @@ class Step
|
|
41
41
|
sleep 5
|
42
42
|
end
|
43
43
|
else
|
44
|
+
Log.debug "Waiting for pid: #{@pid}"
|
44
45
|
Process.waitpid @pid
|
45
46
|
@pid = nil
|
46
47
|
end
|
47
48
|
self
|
48
49
|
end
|
49
50
|
|
50
|
-
def run
|
51
|
-
result = Persist.persist "Job", @task.result_type, :file => @path, :check => rec_dependencies.collect{|dependency| dependency.path}.uniq do
|
51
|
+
def run(no_load = false)
|
52
|
+
result = Persist.persist "Job", @task.result_type, :file => @path, :check => rec_dependencies.collect{|dependency| dependency.path}.uniq, :no_load => no_load do
|
52
53
|
log task.name, "Starting task: #{ name }"
|
53
54
|
set_info :dependencies, @dependencies.collect{|dep| [dep.task.name, dep.name]}
|
54
|
-
@dependencies.each{|dependency| dependency.run}
|
55
|
+
@dependencies.each{|dependency| dependency.run true}
|
55
56
|
set_info :status, :start
|
56
57
|
set_info :inputs, Misc.zip2hash(task.inputs, @inputs) unless task.inputs.nil?
|
57
58
|
res = exec
|
@@ -59,7 +60,7 @@ class Step
|
|
59
60
|
res
|
60
61
|
end
|
61
62
|
|
62
|
-
|
63
|
+
prepare_result result, @task.result_description, info
|
63
64
|
end
|
64
65
|
|
65
66
|
def fork
|
@@ -90,7 +91,7 @@ class Step
|
|
90
91
|
result = Persist.persist "Job", @task.result_type, :file => @path, :check => rec_dependencies.collect{|dependency| dependency.path} do
|
91
92
|
exec
|
92
93
|
end
|
93
|
-
|
94
|
+
prepare_result result, @task.result_description, info
|
94
95
|
end
|
95
96
|
|
96
97
|
def clean
|
@@ -108,6 +109,7 @@ class Step
|
|
108
109
|
def rec_dependencies
|
109
110
|
@dependencies.collect{|step| step.rec_dependencies}.flatten.concat @dependencies
|
110
111
|
end
|
112
|
+
|
111
113
|
def step(name)
|
112
114
|
rec_dependencies.select{|step| step.task.name.to_sym == name.to_sym}.first
|
113
115
|
end
|
data/share/lib/R/util.R
CHANGED
@@ -1,7 +1,11 @@
|
|
1
|
-
rbbt.ruby <- function(code, load = TRUE){
|
1
|
+
rbbt.ruby <- function(code, load = TRUE, flat = FALSE){
|
2
2
|
file = system('rbbt_exec.rb - file', input = code, intern=TRUE);
|
3
3
|
if (load){
|
4
|
-
|
4
|
+
if(flat){
|
5
|
+
data = rbbt.flat.tsv(file);
|
6
|
+
}else{
|
7
|
+
data = rbbt.tsv(file);
|
8
|
+
}
|
5
9
|
rm(file);
|
6
10
|
return(data);
|
7
11
|
}else{
|
@@ -25,8 +29,26 @@ rbbt.load.data <- function(filename, sep = "\t", ...){
|
|
25
29
|
return(data);
|
26
30
|
}
|
27
31
|
|
32
|
+
rbbt.flat.tsv <- function(filename, sep = "\t", comment.char ="#", ...){
|
33
|
+
f = file(filename, 'r');
|
34
|
+
headers = readLines(f, 1);
|
35
|
+
if (length(grep("^#: ", headers)) > 0){
|
36
|
+
headers = readLines(f, 1);
|
37
|
+
}
|
38
|
+
result = list();
|
39
|
+
while( TRUE ){
|
40
|
+
line = readLines(f, 1);
|
41
|
+
if (length(line) == 0){ break;}
|
42
|
+
parts = unlist(strsplit(line, sep, fixed = TRUE));
|
43
|
+
id = parts[1];
|
44
|
+
result[[id]] = parts[2:length(parts)];
|
45
|
+
}
|
46
|
+
close(f);
|
47
|
+
return(result);
|
48
|
+
}
|
49
|
+
|
28
50
|
rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, ...){
|
29
|
-
data=read.table(file=filename, sep=sep, fill=TRUE,
|
51
|
+
data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, quote='', row.names= row.names, comment.char = comment.char, ...);
|
30
52
|
f = file(filename, 'r');
|
31
53
|
headers = readLines(f, 1);
|
32
54
|
if (length(grep("^#: ", headers)) > 0){
|
@@ -148,8 +170,8 @@ rbbt.run <- function(filename){
|
|
148
170
|
|
149
171
|
# Addapted from http://www.phaget4.org/R/image_matrix.html
|
150
172
|
rbbt.plot.matrix <- function(x, ...){
|
151
|
-
min <- min(x);
|
152
|
-
max <- max(x);
|
173
|
+
min <- min(x, na.rm=T);
|
174
|
+
max <- max(x, na.rm=T);
|
153
175
|
yLabels <- rownames(x);
|
154
176
|
xLabels <- colnames(x);
|
155
177
|
title <-c();
|
@@ -184,7 +206,7 @@ rbbt.plot.matrix <- function(x, ...){
|
|
184
206
|
ColorRamp <- rgb( seq(0,1,length=256), # Red
|
185
207
|
seq(0,1,length=256), # Green
|
186
208
|
seq(1,0,length=256)) # Blue
|
187
|
-
|
209
|
+
ColorLevels <- seq(min, max, length=length(ColorRamp));
|
188
210
|
|
189
211
|
# Reverse Y axis
|
190
212
|
reverse <- nrow(x) : 1;
|
@@ -35,7 +35,7 @@ class TestAnnotations < Test::Unit::TestCase
|
|
35
35
|
def test_array
|
36
36
|
ary = ["string"]
|
37
37
|
annotation_str = "Annotation String"
|
38
|
-
AnnotatedArray
|
38
|
+
ary.extend AnnotatedArray
|
39
39
|
AnnotatedString.setup(ary, annotation_str)
|
40
40
|
assert_equal [AnnotatedString], ary.annotation_types
|
41
41
|
assert_equal annotation_str, ary.annotation_str
|
@@ -87,6 +87,20 @@ class TestAnnotations < Test::Unit::TestCase
|
|
87
87
|
assert_equal str1, Annotated.load_tsv(Annotated.tsv([str1, str2], :literal, :JSON)).sort.first
|
88
88
|
end
|
89
89
|
|
90
|
+
def test_load_array_tsv
|
91
|
+
str1 = "string1"
|
92
|
+
str2 = "string2"
|
93
|
+
a = [str1, str2]
|
94
|
+
annotation_str = "Annotation String 2"
|
95
|
+
AnnotatedString.setup(a, annotation_str)
|
96
|
+
a.extend AnnotatedArray
|
97
|
+
|
98
|
+
|
99
|
+
assert_equal annotation_str, Annotated.load_tsv(Annotated.tsv(a, :all)).annotation_str
|
100
|
+
|
101
|
+
assert_equal str1, Annotated.load_tsv(Annotated.tsv(a, :literal, :JSON)).sort.first
|
102
|
+
end
|
103
|
+
|
90
104
|
def test_inheritance
|
91
105
|
str = "string1"
|
92
106
|
annotation_str1 = "Annotation String 1"
|
@@ -104,7 +118,7 @@ class TestAnnotations < Test::Unit::TestCase
|
|
104
118
|
assert_equal str + annotation_str, str.add_annot
|
105
119
|
end
|
106
120
|
|
107
|
-
|
121
|
+
def test_annotation_positional2hash
|
108
122
|
str = "string"
|
109
123
|
annotation_str = "Annotation String"
|
110
124
|
AnnotatedString.setup(str, :annotation_str => annotation_str)
|
data/test/rbbt/test_persist.rb
CHANGED
@@ -3,7 +3,43 @@ require 'rbbt/persist'
|
|
3
3
|
require 'rbbt/util/tmpfile'
|
4
4
|
require 'test/unit'
|
5
5
|
|
6
|
+
module TestAnnotation
|
7
|
+
extend Annotation
|
8
|
+
|
9
|
+
self.annotation :test_annotation
|
10
|
+
end
|
6
11
|
class TestPersist < Test::Unit::TestCase
|
12
|
+
|
13
|
+
def test_annotation_persist
|
14
|
+
TmpFile.with_file do |tmp|
|
15
|
+
entity1 = "Entity 1"
|
16
|
+
entity2 = "Entity 2"
|
17
|
+
|
18
|
+
TestAnnotation.setup(entity1, :test_annotation => "1")
|
19
|
+
TestAnnotation.setup(entity2, :test_annotation => "2")
|
20
|
+
|
21
|
+
annotations = [entity1, entity2]
|
22
|
+
|
23
|
+
persisted_annotations = Persist.persist("Test", :annotations, :file => tmp) do
|
24
|
+
annotations
|
25
|
+
end
|
26
|
+
|
27
|
+
assert_equal "Entity 1", persisted_annotations.first
|
28
|
+
assert_equal "Entity 2", persisted_annotations.last
|
29
|
+
assert_equal "1", persisted_annotations.first.test_annotation
|
30
|
+
assert_equal "2", persisted_annotations.last.test_annotation
|
31
|
+
|
32
|
+
persisted_annotations = Persist.persist("Test", :annotations, :file => tmp) do
|
33
|
+
annotations
|
34
|
+
end
|
35
|
+
|
36
|
+
assert_equal "Entity 1", persisted_annotations.sort.first
|
37
|
+
assert_equal "Entity 2", persisted_annotations.sort.last
|
38
|
+
assert_equal "1", persisted_annotations.sort.first.test_annotation
|
39
|
+
assert_equal "2", persisted_annotations.sort.last.test_annotation
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
7
43
|
def test_array_persist
|
8
44
|
TmpFile.with_file do |tmp|
|
9
45
|
10.times do
|
@@ -12,7 +48,7 @@ class TestPersist < Test::Unit::TestCase
|
|
12
48
|
end)
|
13
49
|
end
|
14
50
|
end
|
15
|
-
|
51
|
+
|
16
52
|
TmpFile.with_file do |tmp|
|
17
53
|
10.times do
|
18
54
|
assert_equal [],(Persist.persist("Test", :array, :file => tmp) do
|
data/test/rbbt/test_tsv.rb
CHANGED
@@ -458,5 +458,17 @@ row2 A|AA|AAA
|
|
458
458
|
|
459
459
|
end
|
460
460
|
|
461
|
+
def test_flat2single
|
462
|
+
content =<<-EOF
|
463
|
+
#: :type=:flat
|
464
|
+
#Id Value
|
465
|
+
row1 a aa aaa
|
466
|
+
row2 A AA AAA
|
467
|
+
EOF
|
461
468
|
|
469
|
+
TmpFile.with_file(content) do |filename|
|
470
|
+
assert TSV.open(filename, :sep => /\s+/, :type => :single, :key_field => "Value").include? "aaa"
|
471
|
+
end
|
472
|
+
|
473
|
+
end
|
462
474
|
end
|
@@ -221,7 +221,7 @@ row2 E
|
|
221
221
|
|
222
222
|
tsv1.attach tsv2, :fields => ["OtherID"], :persist_input => true
|
223
223
|
|
224
|
-
assert_equal tsv1.fields
|
224
|
+
assert_equal tsv1.fields, %w(ValueA ValueB OtherID)
|
225
225
|
assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
|
226
226
|
end
|
227
227
|
|