rbbt-util 3.0.3 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/rbbt_Rutil.rb +4 -0
- data/bin/rbbt_exec.rb +33 -0
- data/lib/rbbt/util/R.rb +3 -2
- data/lib/rbbt/util/cmd.rb +13 -2
- data/lib/rbbt/util/fix_width_table.rb +2 -0
- data/lib/rbbt/util/misc.rb +8 -1
- data/lib/rbbt/util/open.rb +2 -2
- data/lib/rbbt/util/persistence.rb +27 -18
- data/lib/rbbt/util/resource.rb +3 -2
- data/lib/rbbt/util/task.rb +77 -5
- data/lib/rbbt/util/task/job.rb +20 -4
- data/lib/rbbt/util/tc_hash.rb +2 -1
- data/lib/rbbt/util/tsv.rb +59 -33
- data/lib/rbbt/util/tsv/accessor.rb +27 -2
- data/lib/rbbt/util/tsv/attach.rb +48 -121
- data/lib/rbbt/util/tsv/index.rb +4 -0
- data/lib/rbbt/util/tsv/manipulate.rb +25 -3
- data/lib/rbbt/util/tsv/misc.rb +31 -0
- data/lib/rbbt/util/tsv/parse.rb +27 -4
- data/lib/rbbt/util/tsv/resource.rb +6 -0
- data/lib/rbbt/util/workflow.rb +1 -1
- data/lib/rbbt/util/workflow/soap.rb +117 -0
- data/share/lib/R/util.R +52 -2
- data/test/rbbt/util/test_misc.rb +11 -11
- data/test/rbbt/util/test_persistence.rb +13 -0
- data/test/rbbt/util/test_tc_hash.rb +4 -2
- data/test/rbbt/util/test_tsv.rb +31 -4
- data/test/rbbt/util/test_workflow.rb +11 -3
- data/test/rbbt/util/tsv/test_attach.rb +35 -1
- data/test/rbbt/util/tsv/test_index.rb +1 -3
- metadata +12 -6
@@ -1,5 +1,7 @@
|
|
1
1
|
class TSV
|
2
2
|
|
3
|
+
attr_accessor :monitor
|
4
|
+
|
3
5
|
def through(new_key_field = :key, new_fields = nil, &block)
|
4
6
|
|
5
7
|
# Get positions
|
@@ -43,11 +45,23 @@ class TSV
|
|
43
45
|
end if fields
|
44
46
|
|
45
47
|
# Cycle through
|
48
|
+
if monitor
|
49
|
+
desc = "Parsing Stream"
|
50
|
+
step = 100
|
51
|
+
if Hash === monitor
|
52
|
+
desc = monitor[:desc] if monitor.include? :desc
|
53
|
+
step = monitor[:step] if monitor.include? :step
|
54
|
+
end
|
55
|
+
progress_monitor = Progress::Bar.new(size, 0, step, desc)
|
56
|
+
else
|
57
|
+
progress_monitor = nil
|
58
|
+
end
|
46
59
|
|
47
60
|
if new_key_position == :key and ( new_fields.nil? or new_fields == fields)
|
48
|
-
each do |key, fields| yield key, fields end
|
61
|
+
each do |key, fields| progress_monitor.tick if progress_monitor; yield key, fields end
|
49
62
|
else
|
50
63
|
each do |key, fields|
|
64
|
+
progress_monitor.tick if progress_monitor;
|
51
65
|
new_key_value = case
|
52
66
|
when (new_key_position.nil? or new_key_position == :key)
|
53
67
|
key
|
@@ -190,8 +204,14 @@ class TSV
|
|
190
204
|
new[key] = values if [key,values].flatten.select{|v| v =~ method}.any?
|
191
205
|
end
|
192
206
|
when String === method
|
193
|
-
|
194
|
-
|
207
|
+
if block_given?
|
208
|
+
through do |key, values|
|
209
|
+
new[key] = values if yield((method == key_field or method == :key)? key : values[method])
|
210
|
+
end
|
211
|
+
else
|
212
|
+
through do |key, values|
|
213
|
+
new[key] = values if [key,values].flatten.select{|v| v == method}.any?
|
214
|
+
end
|
195
215
|
end
|
196
216
|
when Hash === method
|
197
217
|
key = method.keys.first
|
@@ -255,6 +275,8 @@ class TSV
|
|
255
275
|
end
|
256
276
|
|
257
277
|
self.fields = self.fields + [name] if fields != nil and name != nil
|
278
|
+
|
279
|
+
self
|
258
280
|
end
|
259
281
|
|
260
282
|
def add_fields(names = nil)
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'rbbt/util/tsv'
|
2
|
+
|
3
|
+
class TSV
|
4
|
+
def self.keys(file, sep = "\t")
|
5
|
+
CMD.cmd("cut -f 1 -d'#{sep}' '#{file}'|grep -v ^#").read.split("\n")
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.field_match_counts(file, values)
|
9
|
+
key_field, fields = TSV.parse_header(Open.open(file))
|
10
|
+
fields.unshift key_field
|
11
|
+
|
12
|
+
counts = {}
|
13
|
+
TmpFile.with_file do |tmpfile|
|
14
|
+
if Array === values
|
15
|
+
Open.write(tmpfile, values * "\n")
|
16
|
+
else
|
17
|
+
FileUtils.ln_s values, tmpfile
|
18
|
+
end
|
19
|
+
|
20
|
+
fields.each_with_index do |field,i|
|
21
|
+
counts[field] = begin
|
22
|
+
CMD.cmd("cat #{ file } |grep -v ^#|cut -f #{i + 1}|tr '|' '\\n' |sort -u |grep [[:alpha:]]|grep -f #{tmpfile} -F -w").read.count("\n")
|
23
|
+
rescue
|
24
|
+
0
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
counts
|
30
|
+
end
|
31
|
+
end
|
data/lib/rbbt/util/tsv/parse.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'rbbt/util/misc'
|
2
|
+
require 'progress-bar'
|
2
3
|
class TSV
|
3
4
|
|
4
5
|
def self.parse_fields(io, delimiter = "\t")
|
@@ -7,7 +8,6 @@ class TSV
|
|
7
8
|
## split with delimiter, do not remove empty
|
8
9
|
fields = io.split(delimiter, -1)
|
9
10
|
|
10
|
-
|
11
11
|
fields
|
12
12
|
end
|
13
13
|
|
@@ -58,6 +58,7 @@ class TSV
|
|
58
58
|
options = Misc.add_defaults options, more_options
|
59
59
|
|
60
60
|
options = Misc.add_defaults options,
|
61
|
+
:monitor => false,
|
61
62
|
:case_insensitive => false,
|
62
63
|
:type => :double,
|
63
64
|
:namespace => nil,
|
@@ -78,6 +79,8 @@ class TSV
|
|
78
79
|
:exclude => nil,
|
79
80
|
:select => nil,
|
80
81
|
:grep => nil
|
82
|
+
|
83
|
+
monitor = Misc.process_options options, :monitor
|
81
84
|
|
82
85
|
header_hash, sep, sep2 =
|
83
86
|
Misc.process_options options, :header_hash, :sep, :sep2
|
@@ -119,13 +122,33 @@ class TSV
|
|
119
122
|
|
120
123
|
exclude ||= Misc.process_options options, :reject if options.include? :reject
|
121
124
|
|
125
|
+
if monitor and (stream.respond_to?(:size) or (stream.respond_to?(:stat) and stream.stat.respond_to? :size)) and stream.respond_to?(:pos)
|
126
|
+
size = case
|
127
|
+
when stream.respond_to?(:size)
|
128
|
+
stream.size
|
129
|
+
else
|
130
|
+
stream.stat.size
|
131
|
+
end
|
132
|
+
desc = "Parsing Stream"
|
133
|
+
step = 100
|
134
|
+
if Hash === monitor
|
135
|
+
desc = monitor[:desc] if monitor.include? :desc
|
136
|
+
step = monitor[:step] if monitor.include? :step
|
137
|
+
end
|
138
|
+
progress_monitor = Progress::Bar.new(size, 0, step, desc)
|
139
|
+
else
|
140
|
+
progress_monitor = nil
|
141
|
+
end
|
142
|
+
|
122
143
|
#{{{ Process rest
|
123
|
-
data = {}
|
144
|
+
data = options[:persistence_data] || {}
|
124
145
|
single = type.to_sym != :double
|
125
146
|
max_cols = 0
|
126
147
|
while line do
|
127
148
|
line.chomp!
|
128
149
|
|
150
|
+
progress_monitor.tick(stream.pos) if progress_monitor
|
151
|
+
|
129
152
|
if line.empty? or
|
130
153
|
(exclude and exclude.call(line)) or
|
131
154
|
(select and not select.call(line))
|
@@ -187,7 +210,7 @@ class TSV
|
|
187
210
|
data[id] = extra.first
|
188
211
|
when type == :flat
|
189
212
|
if data.include? id
|
190
|
-
data[id]
|
213
|
+
data[id] = data[id] + extra
|
191
214
|
else
|
192
215
|
data[id] = extra
|
193
216
|
end
|
@@ -261,7 +284,7 @@ class TSV
|
|
261
284
|
end
|
262
285
|
end
|
263
286
|
|
264
|
-
if keep_empty and max_cols > 0
|
287
|
+
if keep_empty and max_cols > 0 and not Persistence::TSV === data
|
265
288
|
data.each do |key, values|
|
266
289
|
next if values =~ /__Ref:/
|
267
290
|
new_values = values
|
@@ -16,6 +16,12 @@ module Resource
|
|
16
16
|
namespace || File.basename(File.dirname(self))
|
17
17
|
end
|
18
18
|
|
19
|
+
def to_yaml(opts = {})
|
20
|
+
YAML.quick_emit( nil, opts ) { |out|
|
21
|
+
out.scalar( taguri, self, :plain )
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
19
25
|
def index(options = {})
|
20
26
|
TSV.index self, options
|
21
27
|
end
|
data/lib/rbbt/util/workflow.rb
CHANGED
@@ -55,7 +55,7 @@ module WorkFlow
|
|
55
55
|
|
56
56
|
def task_dependencies(dependencies)
|
57
57
|
dependencies = [dependencies] unless Array === dependencies
|
58
|
-
@dangling_dependencies = dependencies
|
58
|
+
@dangling_dependencies = dependencies.collect{|dep| Symbol === dep ? tasks[dep] : dep }
|
59
59
|
end
|
60
60
|
|
61
61
|
def task_description(description)
|
@@ -0,0 +1,117 @@
|
|
1
|
+
require 'simplews'
|
2
|
+
require 'rbbt/util/workflow'
|
3
|
+
require 'base64'
|
4
|
+
|
5
|
+
class WorkFlowWS < SimpleWS
|
6
|
+
|
7
|
+
def self.klass=(klass)
|
8
|
+
@klass = klass
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.klass
|
12
|
+
@klass || self
|
13
|
+
end
|
14
|
+
|
15
|
+
def task(name)
|
16
|
+
self.class.klass.tasks[name]
|
17
|
+
end
|
18
|
+
|
19
|
+
def export(name)
|
20
|
+
task = self.class.klass.tasks[name]
|
21
|
+
|
22
|
+
options, optional_options = task.option_summary
|
23
|
+
|
24
|
+
desc task.description
|
25
|
+
options.each do |option|
|
26
|
+
param_desc option[:name] => option[:description] if option[:description]
|
27
|
+
end
|
28
|
+
param_desc :return => "Job Identifier"
|
29
|
+
option_names = [:name] + options.collect{|option| option[:name]}
|
30
|
+
option_types = Hash[*option_names.zip([ :string] + options.collect{|option| option[:type] || :string}).flatten]
|
31
|
+
serve name, option_names, option_types do |*args|
|
32
|
+
task(name).job(*args).fork.id
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
param_desc :task => "Task name"
|
37
|
+
param_desc :id => "Job id"
|
38
|
+
serve :abort, %w(task id), {:return => false, :task => :string, :id => :string} do |task, id|
|
39
|
+
task(task).load(id).abort
|
40
|
+
nil
|
41
|
+
end
|
42
|
+
|
43
|
+
param_desc :task => "Task name"
|
44
|
+
param_desc :id => "Job id"
|
45
|
+
param_desc :return => "Info hash in yaml"
|
46
|
+
serve :info, %w(task id), {:task => :string, :id => :string} do |task, id|
|
47
|
+
Open.read(task(task).load(id).info_file)
|
48
|
+
end
|
49
|
+
|
50
|
+
param_desc :task => "Task name"
|
51
|
+
param_desc :id => "Job id"
|
52
|
+
param_desc :return => "Step"
|
53
|
+
serve :step, %w(task id), {:task => :string, :id => :string} do |task, id|
|
54
|
+
task(task).load(id).step.to_s
|
55
|
+
end
|
56
|
+
|
57
|
+
param_desc :task => "Task name"
|
58
|
+
param_desc :id => "Job id"
|
59
|
+
param_desc :return => "True if job is done (error or not)"
|
60
|
+
serve :done, %w(task id), {:return => :boolean, :task => :string, :id => :string} do |task, id|
|
61
|
+
task(task).load(id).done?
|
62
|
+
end
|
63
|
+
|
64
|
+
param_desc :task => "Task name"
|
65
|
+
param_desc :id => "Job id"
|
66
|
+
param_desc :return => "True if job finished with error. Error message is the last of the messages (see 'messages' method)."
|
67
|
+
serve :error, %w(task id), {:return => :boolean, :task => :string, :id => :string} do |task, id|
|
68
|
+
task(task).load(id).error?
|
69
|
+
end
|
70
|
+
|
71
|
+
param_desc :task => "Task name"
|
72
|
+
param_desc :id => "Job id"
|
73
|
+
param_desc :return => "Messages"
|
74
|
+
serve :messages, %w(task id), {:return => :array, :task => :string, :id => :string} do |task, id|
|
75
|
+
task(task).load(id).messages
|
76
|
+
end
|
77
|
+
|
78
|
+
param_desc :task => "Task name"
|
79
|
+
param_desc :id => "Job id"
|
80
|
+
param_desc :return => "Job result in Base64"
|
81
|
+
serve :load, %w(task id), {:return => :binary, :task => :string, :id => :string} do |task, id|
|
82
|
+
Base64.encode64(task(task).load(id).read)
|
83
|
+
end
|
84
|
+
|
85
|
+
param_desc :task => "Task name"
|
86
|
+
param_desc :id => "Job id"
|
87
|
+
param_desc :return => "File names"
|
88
|
+
serve :files, %w(task id), {:return => :array, :task => :string, :id => :string} do |task, id|
|
89
|
+
task(task).load(id).files
|
90
|
+
end
|
91
|
+
|
92
|
+
param_desc :task => "Task name"
|
93
|
+
param_desc :id => "Job id"
|
94
|
+
param_desc :file => "File name"
|
95
|
+
param_desc :return => "File contents in Base64"
|
96
|
+
serve :file, %w(task id file), {:return => :array, :task => :string, :id => :string, :file => :string} do |task, id|
|
97
|
+
Base64.encode64(task(task).load(id).files(file).read)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
if __FILE__ == $0
|
103
|
+
|
104
|
+
require 'rbbt/sources/organism/sequence'
|
105
|
+
class SequenceWF < WorkFlowWS
|
106
|
+
self.klass = Organism
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
wf = SequenceWF.new
|
111
|
+
wf.export :genomic_mutations_to_genes
|
112
|
+
Open.write('/tmp/foo.wsdl', wf.wsdl)
|
113
|
+
wf.start
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
|
data/share/lib/R/util.R
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
rbbt.ruby <- function(code, load = TRUE){
|
2
|
+
file = system('rbbt_exec.rb - file', input = code, intern=TRUE);
|
3
|
+
if (load){
|
4
|
+
data = rbbt.tsv(file);
|
5
|
+
rm(file);
|
6
|
+
return(data);
|
7
|
+
}else{
|
8
|
+
return(file);
|
9
|
+
}
|
10
|
+
}
|
11
|
+
|
1
12
|
rbbt.glob <- function(d, pattern){
|
2
13
|
d=sub("/$", '', d);
|
3
14
|
sapply(dir(d, pattern), function(file){paste(d,file,sep="/")});
|
@@ -14,8 +25,8 @@ rbbt.load.data <- function(filename, sep = "\t", ...){
|
|
14
25
|
return(data);
|
15
26
|
}
|
16
27
|
|
17
|
-
rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", ...){
|
18
|
-
data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names=
|
28
|
+
rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, ...){
|
29
|
+
data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names=row.names, comment.char = comment.char, ...);
|
19
30
|
f = file(filename, 'r');
|
20
31
|
headers = readLines(f, 1);
|
21
32
|
if (length(grep("^#: ", headers)) > 0){
|
@@ -30,6 +41,13 @@ rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", ...){
|
|
30
41
|
return(data);
|
31
42
|
}
|
32
43
|
|
44
|
+
rbbt.tsv2matrix <- function(data){
|
45
|
+
new <- data.matrix(data);
|
46
|
+
colnames(new) <- colnames(data);
|
47
|
+
rownames(new) <- rownames(data);
|
48
|
+
return(new);
|
49
|
+
}
|
50
|
+
|
33
51
|
rbbt.tsv.write <- function(filename, data, key.field = NULL){
|
34
52
|
if (is.null(key.field)){ key.field = "ID";}
|
35
53
|
|
@@ -55,6 +73,14 @@ rbbt.percent <- function(values){
|
|
55
73
|
values=values/sum(values);
|
56
74
|
}
|
57
75
|
|
76
|
+
rbbt.split <- function(string){
|
77
|
+
return(unlist(strsplit(string, "\\|")));
|
78
|
+
}
|
79
|
+
|
80
|
+
rbbt.last <-function(data){
|
81
|
+
data[length(data)];
|
82
|
+
}
|
83
|
+
|
58
84
|
rbbt.sort_by_field <- function(data, field, is.numeric=TRUE){
|
59
85
|
if (is.numeric){
|
60
86
|
field.data=as.numeric(data[,field]);
|
@@ -89,3 +115,27 @@ rbbt.init <- function(data, new){
|
|
89
115
|
}
|
90
116
|
}
|
91
117
|
|
118
|
+
rbbt.this.script = system("rbbt_Rutil.rb", intern =T)
|
119
|
+
|
120
|
+
rbbt.reload <- function (){
|
121
|
+
source(rbbt.this.script)
|
122
|
+
}
|
123
|
+
|
124
|
+
rbbt.parse <- function(filename){
|
125
|
+
f <- file(filename, open='r');
|
126
|
+
lines <- readLines(f);
|
127
|
+
close(f);
|
128
|
+
|
129
|
+
from = match(1,as.vector(sapply(lines, function(x){grep('#[[:space:]]*START',x,ignore.case=TRUE)})));
|
130
|
+
to = match(1,as.vector(sapply(lines, function(x){grep('#[[:space:]]*END',x,ignore.case=TRUE)})));
|
131
|
+
if (is.na(from)){from = 1}
|
132
|
+
if (is.na(to)){to = length(lines)}
|
133
|
+
return(parse(text=paste(lines[from:to],sep="\n")));
|
134
|
+
}
|
135
|
+
|
136
|
+
rbbt.run <- function(filename){
|
137
|
+
rbbt.reload();
|
138
|
+
eval(rbbt.parse(filename), envir=globalenv());
|
139
|
+
}
|
140
|
+
|
141
|
+
|
data/test/rbbt/util/test_misc.rb
CHANGED
@@ -4,19 +4,19 @@ require 'test/unit'
|
|
4
4
|
|
5
5
|
class TestMisc < Test::Unit::TestCase
|
6
6
|
|
7
|
-
def
|
7
|
+
def test_pdf2text_example
|
8
8
|
assert PDF2Text.pdf2text(test_datafile('example.pdf')).read =~ /An Example Paper/i
|
9
9
|
end
|
10
10
|
|
11
|
-
def
|
11
|
+
def test_pdf2text_EPAR
|
12
12
|
assert PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB/document_library/EPAR_-_Scientific_Discussion/human/000402/WC500033103.pdf").read =~ /Tamiflu/i
|
13
13
|
end
|
14
14
|
|
15
|
-
def
|
15
|
+
def test_pdf2text_wrong
|
16
16
|
assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#") end
|
17
17
|
end
|
18
18
|
|
19
|
-
def
|
19
|
+
def test_string2hash
|
20
20
|
assert(Misc.string2hash("--user-agent=firefox").include? "--user-agent")
|
21
21
|
assert(Misc.string2hash(":true")[:true] == true)
|
22
22
|
assert(Misc.string2hash("true")["true"] == true)
|
@@ -27,17 +27,17 @@ class TestMisc < Test::Unit::TestCase
|
|
27
27
|
assert(Misc.string2hash("a=b#c=d#:h=:j")[:h] == :j)
|
28
28
|
end
|
29
29
|
|
30
|
-
def
|
30
|
+
def test_named_array
|
31
31
|
a = NamedArray.name([1,2,3,4], %w(a b c d))
|
32
32
|
assert_equal(1, a['a'])
|
33
33
|
end
|
34
34
|
|
35
|
-
def
|
35
|
+
def test_path_relative_to
|
36
36
|
assert_equal "test/foo", Misc.path_relative_to('test/test/foo', 'test')
|
37
37
|
end
|
38
38
|
|
39
|
-
def
|
40
|
-
|
39
|
+
def test_chunk
|
40
|
+
test =<<-EOF
|
41
41
|
This is an example file. Entries are separated by Entry
|
42
42
|
-- Entry
|
43
43
|
1
|
@@ -52,7 +52,7 @@ This is an example file. Entries are separated by Entry
|
|
52
52
|
assert_equal "1\n2\n3", Misc.chunk(test, /^-- Entry/).first.strip
|
53
53
|
end
|
54
54
|
|
55
|
-
def
|
55
|
+
def test_hash2string
|
56
56
|
hash = {}
|
57
57
|
assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
|
58
58
|
|
@@ -73,14 +73,14 @@ This is an example file. Entries are separated by Entry
|
|
73
73
|
|
74
74
|
end
|
75
75
|
|
76
|
-
def
|
76
|
+
def test_merge
|
77
77
|
a = [[1],[2]]
|
78
78
|
a = NamedArray.name a, %w(1 2)
|
79
79
|
a.merge [3,4]
|
80
80
|
assert_equal [1,3], a[0]
|
81
81
|
end
|
82
82
|
|
83
|
-
def
|
83
|
+
def test_indiferent_hash
|
84
84
|
a = {:a => 1, "b" => 2}
|
85
85
|
a.extend IndiferentHash
|
86
86
|
|