rbbt-util 4.1.0 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/run_workflow.rb +4 -1
- data/lib/rbbt/annotations.rb +138 -53
- data/lib/rbbt/persist.rb +30 -12
- data/lib/rbbt/persist/tsv.rb +1 -1
- data/lib/rbbt/resource/path.rb +1 -1
- data/lib/rbbt/tsv.rb +7 -1
- data/lib/rbbt/tsv/accessor.rb +21 -16
- data/lib/rbbt/tsv/attach/util.rb +1 -10
- data/lib/rbbt/tsv/manipulate.rb +21 -3
- data/lib/rbbt/tsv/parser.rb +24 -5
- data/lib/rbbt/tsv/util.rb +1 -1
- data/lib/rbbt/util/chain_methods.rb +12 -23
- data/lib/rbbt/util/misc.rb +133 -144
- data/lib/rbbt/util/named_array.rb +113 -0
- data/lib/rbbt/util/open.rb +17 -10
- data/lib/rbbt/workflow/accessor.rb +9 -1
- data/lib/rbbt/workflow/step.rb +9 -7
- data/share/lib/R/util.R +28 -6
- data/test/rbbt/test_annotations.rb +16 -2
- data/test/rbbt/test_persist.rb +37 -1
- data/test/rbbt/test_tsv.rb +12 -0
- data/test/rbbt/tsv/test_attach.rb +1 -1
- data/test/rbbt/util/test_chain_methods.rb +1 -1
- data/test/rbbt/util/test_misc.rb +10 -0
- metadata +5 -4
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'rbbt/util/chain_methods'
|
2
|
+
require 'rbbt/util/misc'
|
3
|
+
|
4
|
+
module NamedArray
|
5
|
+
extend ChainMethods
|
6
|
+
|
7
|
+
self.chain_prefix = :named_array
|
8
|
+
attr_accessor :fields
|
9
|
+
attr_accessor :key
|
10
|
+
attr_accessor :namespace
|
11
|
+
|
12
|
+
def self.setup(array, fields, key = nil, namespace = nil)
|
13
|
+
array.extend NamedArray unless NamedArray === array
|
14
|
+
array.fields = fields
|
15
|
+
array.key = key
|
16
|
+
array.namespace = namespace
|
17
|
+
array
|
18
|
+
end
|
19
|
+
|
20
|
+
def merge(array)
|
21
|
+
double = Array === array.first
|
22
|
+
new = self.dup
|
23
|
+
(0..length - 1).each do |i|
|
24
|
+
if double
|
25
|
+
new[i] = new[i] + array[i]
|
26
|
+
else
|
27
|
+
new[i] << array[i]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
new
|
31
|
+
end
|
32
|
+
|
33
|
+
def positions(fields)
|
34
|
+
if Array == fields
|
35
|
+
fields.collect{|field|
|
36
|
+
Misc.field_position(@fields, field)
|
37
|
+
}
|
38
|
+
else
|
39
|
+
Misc.field_position(@fields, fields)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def named_array_get_brackets(key)
|
44
|
+
pos = Misc.field_position(fields, key)
|
45
|
+
elem = named_array_clean_get_brackets(pos)
|
46
|
+
|
47
|
+
return elem if @fields.nil? or @fields.empty?
|
48
|
+
|
49
|
+
field = NamedArray === @fields ? @fields.named_array_clean_get_brackets(pos) : @fields[pos]
|
50
|
+
elem = Entity.formats[field].setup((elem.frozen? ? elem.dup : elem), (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => field})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include?(field) and not field == elem
|
51
|
+
elem
|
52
|
+
end
|
53
|
+
|
54
|
+
def named_array_each(&block)
|
55
|
+
if defined?(Entity) and not @fields.nil? and not @fields.empty?
|
56
|
+
@fields.zip(self).each do |field,elem|
|
57
|
+
elem = Entity.formats[field].setup((elem.frozen? ? elem.dup : elem), (namespace ? {:namespace => namespace, :organism => namespace} : {}).merge({:format => field})) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include?(field) and not field == elem
|
58
|
+
yield(elem)
|
59
|
+
elem
|
60
|
+
end
|
61
|
+
else
|
62
|
+
named_array_clean_each &block
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def named_array_collect
|
67
|
+
res = []
|
68
|
+
|
69
|
+
named_array_each do |elem|
|
70
|
+
if block_given?
|
71
|
+
res << yield(elem)
|
72
|
+
else
|
73
|
+
res << elem
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
res
|
78
|
+
end
|
79
|
+
|
80
|
+
def named_array_set_brackets(key,value)
|
81
|
+
named_array_clean_set_brackets(Misc.field_position(fields, key), value)
|
82
|
+
end
|
83
|
+
|
84
|
+
def named_array_values_at(*keys)
|
85
|
+
keys = keys.collect{|k| Misc.field_position(fields, k) }
|
86
|
+
named_array_clean_values_at(*keys)
|
87
|
+
end
|
88
|
+
|
89
|
+
def zip_fields
|
90
|
+
return [] if self.empty?
|
91
|
+
zipped = Misc.zip_fields(self)
|
92
|
+
zipped = zipped.collect{|v| NamedArray.setup(v, fields)}
|
93
|
+
zipped
|
94
|
+
end
|
95
|
+
|
96
|
+
def detach(file)
|
97
|
+
file_fields = file.fields.collect{|field| field.fullname}
|
98
|
+
detached_fields = []
|
99
|
+
self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
|
100
|
+
fields = self.fields.values_at *detached_fields
|
101
|
+
values = self.values_at *detached_fields
|
102
|
+
values = NamedArray.name(values, fields)
|
103
|
+
values.zip_fields
|
104
|
+
end
|
105
|
+
|
106
|
+
def report
|
107
|
+
fields.zip(self).collect do |field,value|
|
108
|
+
"#{ field }: #{ Array === value ? value * "|" : value }"
|
109
|
+
end * "\n"
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
|
data/lib/rbbt/util/open.rb
CHANGED
@@ -3,7 +3,6 @@ require 'rbbt/util/misc'
|
|
3
3
|
require 'rbbt/util/tmpfile'
|
4
4
|
|
5
5
|
require 'zlib'
|
6
|
-
require 'digest/md5'
|
7
6
|
|
8
7
|
module Open
|
9
8
|
class OpenURLError < StandardError; end
|
@@ -77,12 +76,14 @@ module Open
|
|
77
76
|
end
|
78
77
|
end
|
79
78
|
|
79
|
+
def self.digest_url(url, options = {})
|
80
|
+
params = [url, options.values_at("--post-data", "--post-data="), (options.include?("--post-file")? Open.read(options["--post-file"]).split("\n").sort * "\n" : "")]
|
81
|
+
digest = Misc.digest(params.inspect)
|
82
|
+
end
|
80
83
|
# Cache
|
81
84
|
#
|
82
85
|
def self.in_cache(url, options = {})
|
83
|
-
|
84
|
-
|
85
|
-
filename = File.join(REMOTE_CACHEDIR, digest)
|
86
|
+
filename = File.join(REMOTE_CACHEDIR, digest_url(url, options))
|
86
87
|
if File.exists? filename
|
87
88
|
return filename
|
88
89
|
else
|
@@ -91,7 +92,7 @@ module Open
|
|
91
92
|
end
|
92
93
|
|
93
94
|
def self.remove_from_cache(url, options = {})
|
94
|
-
digest =
|
95
|
+
digest = Misc.digest([url, options.values_at("--post-data", "--post-data="), (options.include?("--post-file")? Open.read(options["--post-file"]) : "")].inspect)
|
95
96
|
|
96
97
|
filename = File.join(REMOTE_CACHEDIR, digest)
|
97
98
|
if File.exists? filename
|
@@ -102,8 +103,10 @@ module Open
|
|
102
103
|
end
|
103
104
|
|
104
105
|
def self.add_cache(url, data, options = {})
|
105
|
-
|
106
|
-
Misc.
|
106
|
+
file = File.join(REMOTE_CACHEDIR, digest_url(url, options))
|
107
|
+
Misc.lock(file) do
|
108
|
+
Misc.sensiblewrite(file, data)
|
109
|
+
end
|
107
110
|
end
|
108
111
|
|
109
112
|
# Grep
|
@@ -168,16 +171,20 @@ module Open
|
|
168
171
|
wget_options[:nice_key] = options.delete(:nice_key)
|
169
172
|
wget_options[:quiet] = options.delete(:quiet)
|
170
173
|
wget_options["--post-data="] = options.delete(:post) if options.include? :post
|
174
|
+
wget_options["--post-file"] = options.delete("--post-file") if options.include? "--post-file"
|
175
|
+
wget_options["--post-file="] = options.delete("--post-file=") if options.include? "--post-file="
|
171
176
|
wget_options[:cookies] = options.delete(:cookies)
|
172
177
|
|
173
178
|
io = case
|
174
179
|
when (not remote?(url))
|
175
180
|
file_open(url, options[:grep])
|
176
|
-
when options[:nocache]
|
181
|
+
when (options[:nocache] and options[:nocache] != :update)
|
177
182
|
# What about grep?
|
178
183
|
wget(url, wget_options)
|
179
|
-
when in_cache(url, wget_options)
|
180
|
-
|
184
|
+
when (options[:nocache] != :update and in_cache(url, wget_options))
|
185
|
+
Misc.lock(in_cache(url, wget_options)) do
|
186
|
+
file_open(in_cache(url, wget_options), options[:grep])
|
187
|
+
end
|
181
188
|
else
|
182
189
|
io = wget(url, wget_options)
|
183
190
|
add_cache(url, io, wget_options)
|
@@ -26,7 +26,7 @@ class Step
|
|
26
26
|
Misc.lock(info_file) do
|
27
27
|
i = info
|
28
28
|
i[key] = value
|
29
|
-
Open.write(info_file, i
|
29
|
+
Open.write(info_file, YAML.dump(i))
|
30
30
|
value
|
31
31
|
end
|
32
32
|
end
|
@@ -140,6 +140,14 @@ class Step
|
|
140
140
|
end
|
141
141
|
|
142
142
|
module Workflow
|
143
|
+
def log(status, message = nil)
|
144
|
+
if message
|
145
|
+
Log.low "#{ status }: #{ message }"
|
146
|
+
else
|
147
|
+
Log.low "#{ status }"
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
143
151
|
def task_info(name)
|
144
152
|
task = tasks[name]
|
145
153
|
description = task.description
|
data/lib/rbbt/workflow/step.rb
CHANGED
@@ -23,7 +23,7 @@ class Step
|
|
23
23
|
@inputs = inputs || []
|
24
24
|
end
|
25
25
|
|
26
|
-
def
|
26
|
+
def prepare_result(value, description = nil, info = {})
|
27
27
|
return value if description.nil?
|
28
28
|
Entity.formats[description].setup(value, info.merge(:format => description)) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? description
|
29
29
|
value
|
@@ -31,7 +31,7 @@ class Step
|
|
31
31
|
|
32
32
|
def exec
|
33
33
|
result = @task.exec_in self, *@inputs
|
34
|
-
|
34
|
+
prepare_result result, @task.result_description
|
35
35
|
end
|
36
36
|
|
37
37
|
def join
|
@@ -41,17 +41,18 @@ class Step
|
|
41
41
|
sleep 5
|
42
42
|
end
|
43
43
|
else
|
44
|
+
Log.debug "Waiting for pid: #{@pid}"
|
44
45
|
Process.waitpid @pid
|
45
46
|
@pid = nil
|
46
47
|
end
|
47
48
|
self
|
48
49
|
end
|
49
50
|
|
50
|
-
def run
|
51
|
-
result = Persist.persist "Job", @task.result_type, :file => @path, :check => rec_dependencies.collect{|dependency| dependency.path}.uniq do
|
51
|
+
def run(no_load = false)
|
52
|
+
result = Persist.persist "Job", @task.result_type, :file => @path, :check => rec_dependencies.collect{|dependency| dependency.path}.uniq, :no_load => no_load do
|
52
53
|
log task.name, "Starting task: #{ name }"
|
53
54
|
set_info :dependencies, @dependencies.collect{|dep| [dep.task.name, dep.name]}
|
54
|
-
@dependencies.each{|dependency| dependency.run}
|
55
|
+
@dependencies.each{|dependency| dependency.run true}
|
55
56
|
set_info :status, :start
|
56
57
|
set_info :inputs, Misc.zip2hash(task.inputs, @inputs) unless task.inputs.nil?
|
57
58
|
res = exec
|
@@ -59,7 +60,7 @@ class Step
|
|
59
60
|
res
|
60
61
|
end
|
61
62
|
|
62
|
-
|
63
|
+
prepare_result result, @task.result_description, info
|
63
64
|
end
|
64
65
|
|
65
66
|
def fork
|
@@ -90,7 +91,7 @@ class Step
|
|
90
91
|
result = Persist.persist "Job", @task.result_type, :file => @path, :check => rec_dependencies.collect{|dependency| dependency.path} do
|
91
92
|
exec
|
92
93
|
end
|
93
|
-
|
94
|
+
prepare_result result, @task.result_description, info
|
94
95
|
end
|
95
96
|
|
96
97
|
def clean
|
@@ -108,6 +109,7 @@ class Step
|
|
108
109
|
def rec_dependencies
|
109
110
|
@dependencies.collect{|step| step.rec_dependencies}.flatten.concat @dependencies
|
110
111
|
end
|
112
|
+
|
111
113
|
def step(name)
|
112
114
|
rec_dependencies.select{|step| step.task.name.to_sym == name.to_sym}.first
|
113
115
|
end
|
data/share/lib/R/util.R
CHANGED
@@ -1,7 +1,11 @@
|
|
1
|
-
rbbt.ruby <- function(code, load = TRUE){
|
1
|
+
rbbt.ruby <- function(code, load = TRUE, flat = FALSE){
|
2
2
|
file = system('rbbt_exec.rb - file', input = code, intern=TRUE);
|
3
3
|
if (load){
|
4
|
-
|
4
|
+
if(flat){
|
5
|
+
data = rbbt.flat.tsv(file);
|
6
|
+
}else{
|
7
|
+
data = rbbt.tsv(file);
|
8
|
+
}
|
5
9
|
rm(file);
|
6
10
|
return(data);
|
7
11
|
}else{
|
@@ -25,8 +29,26 @@ rbbt.load.data <- function(filename, sep = "\t", ...){
|
|
25
29
|
return(data);
|
26
30
|
}
|
27
31
|
|
32
|
+
rbbt.flat.tsv <- function(filename, sep = "\t", comment.char ="#", ...){
|
33
|
+
f = file(filename, 'r');
|
34
|
+
headers = readLines(f, 1);
|
35
|
+
if (length(grep("^#: ", headers)) > 0){
|
36
|
+
headers = readLines(f, 1);
|
37
|
+
}
|
38
|
+
result = list();
|
39
|
+
while( TRUE ){
|
40
|
+
line = readLines(f, 1);
|
41
|
+
if (length(line) == 0){ break;}
|
42
|
+
parts = unlist(strsplit(line, sep, fixed = TRUE));
|
43
|
+
id = parts[1];
|
44
|
+
result[[id]] = parts[2:length(parts)];
|
45
|
+
}
|
46
|
+
close(f);
|
47
|
+
return(result);
|
48
|
+
}
|
49
|
+
|
28
50
|
rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, ...){
|
29
|
-
data=read.table(file=filename, sep=sep, fill=TRUE,
|
51
|
+
data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, quote='', row.names= row.names, comment.char = comment.char, ...);
|
30
52
|
f = file(filename, 'r');
|
31
53
|
headers = readLines(f, 1);
|
32
54
|
if (length(grep("^#: ", headers)) > 0){
|
@@ -148,8 +170,8 @@ rbbt.run <- function(filename){
|
|
148
170
|
|
149
171
|
# Addapted from http://www.phaget4.org/R/image_matrix.html
|
150
172
|
rbbt.plot.matrix <- function(x, ...){
|
151
|
-
min <- min(x);
|
152
|
-
max <- max(x);
|
173
|
+
min <- min(x, na.rm=T);
|
174
|
+
max <- max(x, na.rm=T);
|
153
175
|
yLabels <- rownames(x);
|
154
176
|
xLabels <- colnames(x);
|
155
177
|
title <-c();
|
@@ -184,7 +206,7 @@ rbbt.plot.matrix <- function(x, ...){
|
|
184
206
|
ColorRamp <- rgb( seq(0,1,length=256), # Red
|
185
207
|
seq(0,1,length=256), # Green
|
186
208
|
seq(1,0,length=256)) # Blue
|
187
|
-
|
209
|
+
ColorLevels <- seq(min, max, length=length(ColorRamp));
|
188
210
|
|
189
211
|
# Reverse Y axis
|
190
212
|
reverse <- nrow(x) : 1;
|
@@ -35,7 +35,7 @@ class TestAnnotations < Test::Unit::TestCase
|
|
35
35
|
def test_array
|
36
36
|
ary = ["string"]
|
37
37
|
annotation_str = "Annotation String"
|
38
|
-
AnnotatedArray
|
38
|
+
ary.extend AnnotatedArray
|
39
39
|
AnnotatedString.setup(ary, annotation_str)
|
40
40
|
assert_equal [AnnotatedString], ary.annotation_types
|
41
41
|
assert_equal annotation_str, ary.annotation_str
|
@@ -87,6 +87,20 @@ class TestAnnotations < Test::Unit::TestCase
|
|
87
87
|
assert_equal str1, Annotated.load_tsv(Annotated.tsv([str1, str2], :literal, :JSON)).sort.first
|
88
88
|
end
|
89
89
|
|
90
|
+
def test_load_array_tsv
|
91
|
+
str1 = "string1"
|
92
|
+
str2 = "string2"
|
93
|
+
a = [str1, str2]
|
94
|
+
annotation_str = "Annotation String 2"
|
95
|
+
AnnotatedString.setup(a, annotation_str)
|
96
|
+
a.extend AnnotatedArray
|
97
|
+
|
98
|
+
|
99
|
+
assert_equal annotation_str, Annotated.load_tsv(Annotated.tsv(a, :all)).annotation_str
|
100
|
+
|
101
|
+
assert_equal str1, Annotated.load_tsv(Annotated.tsv(a, :literal, :JSON)).sort.first
|
102
|
+
end
|
103
|
+
|
90
104
|
def test_inheritance
|
91
105
|
str = "string1"
|
92
106
|
annotation_str1 = "Annotation String 1"
|
@@ -104,7 +118,7 @@ class TestAnnotations < Test::Unit::TestCase
|
|
104
118
|
assert_equal str + annotation_str, str.add_annot
|
105
119
|
end
|
106
120
|
|
107
|
-
|
121
|
+
def test_annotation_positional2hash
|
108
122
|
str = "string"
|
109
123
|
annotation_str = "Annotation String"
|
110
124
|
AnnotatedString.setup(str, :annotation_str => annotation_str)
|
data/test/rbbt/test_persist.rb
CHANGED
@@ -3,7 +3,43 @@ require 'rbbt/persist'
|
|
3
3
|
require 'rbbt/util/tmpfile'
|
4
4
|
require 'test/unit'
|
5
5
|
|
6
|
+
module TestAnnotation
|
7
|
+
extend Annotation
|
8
|
+
|
9
|
+
self.annotation :test_annotation
|
10
|
+
end
|
6
11
|
class TestPersist < Test::Unit::TestCase
|
12
|
+
|
13
|
+
def test_annotation_persist
|
14
|
+
TmpFile.with_file do |tmp|
|
15
|
+
entity1 = "Entity 1"
|
16
|
+
entity2 = "Entity 2"
|
17
|
+
|
18
|
+
TestAnnotation.setup(entity1, :test_annotation => "1")
|
19
|
+
TestAnnotation.setup(entity2, :test_annotation => "2")
|
20
|
+
|
21
|
+
annotations = [entity1, entity2]
|
22
|
+
|
23
|
+
persisted_annotations = Persist.persist("Test", :annotations, :file => tmp) do
|
24
|
+
annotations
|
25
|
+
end
|
26
|
+
|
27
|
+
assert_equal "Entity 1", persisted_annotations.first
|
28
|
+
assert_equal "Entity 2", persisted_annotations.last
|
29
|
+
assert_equal "1", persisted_annotations.first.test_annotation
|
30
|
+
assert_equal "2", persisted_annotations.last.test_annotation
|
31
|
+
|
32
|
+
persisted_annotations = Persist.persist("Test", :annotations, :file => tmp) do
|
33
|
+
annotations
|
34
|
+
end
|
35
|
+
|
36
|
+
assert_equal "Entity 1", persisted_annotations.sort.first
|
37
|
+
assert_equal "Entity 2", persisted_annotations.sort.last
|
38
|
+
assert_equal "1", persisted_annotations.sort.first.test_annotation
|
39
|
+
assert_equal "2", persisted_annotations.sort.last.test_annotation
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
7
43
|
def test_array_persist
|
8
44
|
TmpFile.with_file do |tmp|
|
9
45
|
10.times do
|
@@ -12,7 +48,7 @@ class TestPersist < Test::Unit::TestCase
|
|
12
48
|
end)
|
13
49
|
end
|
14
50
|
end
|
15
|
-
|
51
|
+
|
16
52
|
TmpFile.with_file do |tmp|
|
17
53
|
10.times do
|
18
54
|
assert_equal [],(Persist.persist("Test", :array, :file => tmp) do
|
data/test/rbbt/test_tsv.rb
CHANGED
@@ -458,5 +458,17 @@ row2 A|AA|AAA
|
|
458
458
|
|
459
459
|
end
|
460
460
|
|
461
|
+
def test_flat2single
|
462
|
+
content =<<-EOF
|
463
|
+
#: :type=:flat
|
464
|
+
#Id Value
|
465
|
+
row1 a aa aaa
|
466
|
+
row2 A AA AAA
|
467
|
+
EOF
|
461
468
|
|
469
|
+
TmpFile.with_file(content) do |filename|
|
470
|
+
assert TSV.open(filename, :sep => /\s+/, :type => :single, :key_field => "Value").include? "aaa"
|
471
|
+
end
|
472
|
+
|
473
|
+
end
|
462
474
|
end
|
@@ -221,7 +221,7 @@ row2 E
|
|
221
221
|
|
222
222
|
tsv1.attach tsv2, :fields => ["OtherID"], :persist_input => true
|
223
223
|
|
224
|
-
assert_equal tsv1.fields
|
224
|
+
assert_equal tsv1.fields, %w(ValueA ValueB OtherID)
|
225
225
|
assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
|
226
226
|
end
|
227
227
|
|