rbbt-util 3.1.0 → 3.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/rbbt_query.rb +1 -1
- data/lib/rbbt/util/cmd.rb +115 -67
- data/lib/rbbt/util/fix_width_table.rb +18 -3
- data/lib/rbbt/util/misc.rb +106 -6
- data/lib/rbbt/util/open.rb +9 -7
- data/lib/rbbt/util/persistence.rb +17 -14
- data/lib/rbbt/util/resource.rb +10 -3
- data/lib/rbbt/util/task.rb +2 -2
- data/lib/rbbt/util/task/job.rb +16 -3
- data/lib/rbbt/util/tc_hash.rb +64 -27
- data/lib/rbbt/util/tsv.rb +44 -21
- data/lib/rbbt/util/tsv/accessor.rb +8 -6
- data/lib/rbbt/util/tsv/attach.rb +19 -28
- data/lib/rbbt/util/tsv/filters.rb +193 -0
- data/lib/rbbt/util/tsv/index.rb +80 -8
- data/lib/rbbt/util/tsv/manipulate.rb +17 -6
- data/lib/rbbt/util/tsv/misc.rb +10 -0
- data/lib/rbbt/util/tsv/parse.rb +18 -1
- data/lib/rbbt/util/workflow.rb +12 -3
- data/lib/rbbt/util/workflow/soap.rb +0 -1
- data/share/install/software/lib/install_helpers +0 -2
- data/share/lib/R/util.R +3 -3
- data/test/rbbt/util/test_cmd.rb +23 -0
- data/test/rbbt/util/test_excel2tsv.rb +1 -1
- data/test/rbbt/util/test_misc.rb +41 -11
- data/test/rbbt/util/test_open.rb +2 -2
- data/test/rbbt/util/test_persistence.rb +2 -2
- data/test/rbbt/util/test_resource.rb +4 -20
- data/test/rbbt/util/test_tc_hash.rb +38 -0
- data/test/rbbt/util/test_tmpfile.rb +1 -1
- data/test/rbbt/util/test_tsv.rb +6 -0
- data/test/rbbt/util/test_workflow.rb +14 -10
- data/test/rbbt/util/tsv/test_accessor.rb +42 -0
- data/test/rbbt/util/tsv/test_filters.rb +141 -0
- data/test/rbbt/util/tsv/test_index.rb +32 -0
- data/test/rbbt/util/tsv/test_manipulate.rb +18 -0
- data/test/test_helper.rb +3 -1
- metadata +41 -38
@@ -12,7 +12,7 @@ class TSV
|
|
12
12
|
when Integer === new_fields
|
13
13
|
[new_fields]
|
14
14
|
when String === new_fields
|
15
|
-
[identify_field
|
15
|
+
[identify_field(new_fields)]
|
16
16
|
when Array === new_fields
|
17
17
|
new_fields.collect{|new_field| identify_field new_field}
|
18
18
|
when new_fields == :key
|
@@ -46,7 +46,7 @@ class TSV
|
|
46
46
|
|
47
47
|
# Cycle through
|
48
48
|
if monitor
|
49
|
-
desc = "
|
49
|
+
desc = "Iterating TSV"
|
50
50
|
step = 100
|
51
51
|
if Hash === monitor
|
52
52
|
desc = monitor[:desc] if monitor.include? :desc
|
@@ -84,14 +84,22 @@ class TSV
|
|
84
84
|
f
|
85
85
|
else
|
86
86
|
f = fields.dup
|
87
|
-
|
87
|
+
case
|
88
|
+
when type == :single
|
89
|
+
f = [f,key]
|
90
|
+
when type == :double
|
88
91
|
f.push [key]
|
89
92
|
else
|
90
93
|
f.push key
|
91
94
|
end
|
92
95
|
f.values_at *new_field_positions.collect{|pos| pos == :key ? -1 : pos }
|
93
96
|
end
|
94
|
-
|
97
|
+
|
98
|
+
if type == :single
|
99
|
+
new_field_values = new_field_values.first
|
100
|
+
else
|
101
|
+
new_field_values = NamedArray.name new_field_values, new_field_names unless unnamed
|
102
|
+
end
|
95
103
|
|
96
104
|
next if new_key_value.nil? or (String === new_key_value and new_key_value.empty?)
|
97
105
|
yield new_key_value, new_field_values
|
@@ -221,14 +229,17 @@ class TSV
|
|
221
229
|
method.each{|item| new[item] = self[item] if self.include? item}
|
222
230
|
when Array === method
|
223
231
|
through :key, key do |key, values|
|
232
|
+
values = [values] if type == :single
|
224
233
|
new[key] = self[key] if (values.flatten & method).any?
|
225
234
|
end
|
226
235
|
when Regexp === method
|
227
236
|
through :key, key do |key, values|
|
237
|
+
values = [values] if type == :single
|
228
238
|
new[key] = self[key] if values.flatten.select{|v| v =~ method}.any?
|
229
239
|
end
|
230
240
|
when String === method
|
231
241
|
through :key, key do |key, values|
|
242
|
+
values = [values] if type == :single
|
232
243
|
new[key] = self[key] if values.flatten.select{|v| v == method}.any?
|
233
244
|
end
|
234
245
|
end
|
@@ -267,7 +278,7 @@ class TSV
|
|
267
278
|
end
|
268
279
|
|
269
280
|
def add_field(name = nil)
|
270
|
-
|
281
|
+
through do |key, values|
|
271
282
|
new_values = yield(key, values)
|
272
283
|
new_values = [new_values] if type == :double and not Array === new_values
|
273
284
|
|
@@ -280,7 +291,7 @@ class TSV
|
|
280
291
|
end
|
281
292
|
|
282
293
|
def add_fields(names = nil)
|
283
|
-
|
294
|
+
through do |key, values|
|
284
295
|
new_values = yield(key, values)
|
285
296
|
new_values = [new_values] if type == :double and not Array == new_values
|
286
297
|
|
data/lib/rbbt/util/tsv/misc.rb
CHANGED
data/lib/rbbt/util/tsv/parse.rb
CHANGED
@@ -142,12 +142,29 @@ class TSV
|
|
142
142
|
|
143
143
|
#{{{ Process rest
|
144
144
|
data = options[:persistence_data] || {}
|
145
|
+
if Persistence::TSV === data
|
146
|
+
serializer = case
|
147
|
+
when ((cast == "to_i" or cast == :to_i) and type == :single)
|
148
|
+
:integer
|
149
|
+
when ((cast == "to_i" or cast == :to_i) and (type == :flat or type == :list))
|
150
|
+
:integer_array
|
151
|
+
when (type == :list or type == :flat)
|
152
|
+
:list
|
153
|
+
when type == :single
|
154
|
+
:single
|
155
|
+
else
|
156
|
+
:double
|
157
|
+
end
|
158
|
+
data.serializer = serializer
|
159
|
+
end
|
160
|
+
|
161
|
+
|
145
162
|
single = type.to_sym != :double
|
146
163
|
max_cols = 0
|
147
164
|
while line do
|
148
165
|
line.chomp!
|
149
166
|
|
150
|
-
progress_monitor.tick(stream.pos) if progress_monitor
|
167
|
+
progress_monitor.tick(stream.pos) if progress_monitor
|
151
168
|
|
152
169
|
if line.empty? or
|
153
170
|
(exclude and exclude.call(line)) or
|
data/lib/rbbt/util/workflow.rb
CHANGED
@@ -4,6 +4,15 @@ require 'rbbt/util/persistence'
|
|
4
4
|
require 'rbbt/util/misc'
|
5
5
|
|
6
6
|
module WorkFlow
|
7
|
+
|
8
|
+
def self.require_workflow(path)
|
9
|
+
if Rbbt.etc.workflow_dir.exists?
|
10
|
+
require Dir.glob(File.join(Rbbt.etc.workflow_dir.read.strip, '*', path + '.rb')).first
|
11
|
+
else
|
12
|
+
require Dir.glob(File.join(Rbbt.share.workflows.find, '*', path + '.rb')).first
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
7
16
|
def self.extended(base)
|
8
17
|
class << base
|
9
18
|
attr_accessor :tasks, :jobdir, :dangling_options, :dangling_option_descriptions,
|
@@ -53,8 +62,8 @@ module WorkFlow
|
|
53
62
|
@dangling_option_defaults[name] = default if default
|
54
63
|
end
|
55
64
|
|
56
|
-
def task_dependencies(dependencies)
|
57
|
-
dependencies =
|
65
|
+
def task_dependencies(*dependencies)
|
66
|
+
dependencies = dependencies.flatten
|
58
67
|
@dangling_dependencies = dependencies.collect{|dep| Symbol === dep ? tasks[dep] : dep }
|
59
68
|
end
|
60
69
|
|
@@ -93,7 +102,7 @@ module WorkFlow
|
|
93
102
|
@last_task = task
|
94
103
|
end
|
95
104
|
|
96
|
-
def job(task, jobname, *args)
|
105
|
+
def job(task, jobname = "Default", *args)
|
97
106
|
tasks[task].job(jobname, *args)
|
98
107
|
end
|
99
108
|
|
@@ -8,8 +8,6 @@ OPT_SCM_DIR="$SOFTWARE_DIR/scm"; [ -d $OPT_SCM_DIR ] || mkdir -p $OPT_SCM_DIR
|
|
8
8
|
OPT_JAR_DIR="$OPT_DIR/jars"; [ -d $OPT_JAR_DIR ] || mkdir -p $OPT_JAR_DIR
|
9
9
|
OPT_BUILD_DIR="$SOFTWARE_DIR/.build"; [ -d $OPT_BUILD_DIR ] || mkdir -p $OPT_BUILD_DIR
|
10
10
|
|
11
|
-
#source "$HOME/config/bash/_utility_functions"
|
12
|
-
|
13
11
|
function expand_path(){
|
14
12
|
name=$(basename $1)
|
15
13
|
dir=$(dirname $1)
|
data/share/lib/R/util.R
CHANGED
@@ -26,7 +26,7 @@ rbbt.load.data <- function(filename, sep = "\t", ...){
|
|
26
26
|
}
|
27
27
|
|
28
28
|
rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, ...){
|
29
|
-
data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names=row.names, comment.char = comment.char, ...);
|
29
|
+
data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names= row.names, comment.char = comment.char, ...);
|
30
30
|
f = file(filename, 'r');
|
31
31
|
headers = readLines(f, 1);
|
32
32
|
if (length(grep("^#: ", headers)) > 0){
|
@@ -93,7 +93,7 @@ rbbt.sort_by_field <- function(data, field, is.numeric=TRUE){
|
|
93
93
|
|
94
94
|
rbbt.add <- function(data, new){
|
95
95
|
if (is.null(data)){
|
96
|
-
return(new);
|
96
|
+
return(c(new));
|
97
97
|
}else{
|
98
98
|
return(c(data, new));
|
99
99
|
}
|
@@ -101,7 +101,7 @@ rbbt.add <- function(data, new){
|
|
101
101
|
|
102
102
|
rbbt.acc <- function(data, new){
|
103
103
|
if (is.null(data)){
|
104
|
-
return(new);
|
104
|
+
return(c(new));
|
105
105
|
}else{
|
106
106
|
return(unique(c(data, new)));
|
107
107
|
}
|
data/test/rbbt/util/test_cmd.rb
CHANGED
@@ -22,6 +22,7 @@ class TestCmd < Test::Unit::TestCase
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def test_pipe
|
25
|
+
assert_equal("test\n", CMD.cmd("echo test", :pipe => true).read)
|
25
26
|
assert_equal("test\n", CMD.cmd("echo '{opt}' test", :pipe => true).read)
|
26
27
|
assert_equal("test", CMD.cmd("echo '{opt}' test", "-n" => true, :pipe => true).read)
|
27
28
|
assert_equal("test2\n", CMD.cmd("cut", "-f" => 2, "-d" => '" "', :in => "test1 test2", :pipe => true).read)
|
@@ -41,4 +42,26 @@ class TestCmd < Test::Unit::TestCase
|
|
41
42
|
assert_raise CMD::CMDError do CMD.cmd('ls -fake_option', :stderr => true, :pipe => true).read end
|
42
43
|
end
|
43
44
|
|
45
|
+
def test_pipes
|
46
|
+
text = <<-EOF
|
47
|
+
line1
|
48
|
+
line2
|
49
|
+
line3
|
50
|
+
line11
|
51
|
+
line22
|
52
|
+
line33
|
53
|
+
EOF
|
54
|
+
|
55
|
+
TmpFile.with_file(text * 100) do |file|
|
56
|
+
CMD.cmd("gzip #{ file }")
|
57
|
+
|
58
|
+
gz = CMD.cmd("gunzip", :in => File.open(file + '.gz'), :pipe => true)
|
59
|
+
io = CMD.cmd('tail -n 10', :in => gz, :pipe => true)
|
60
|
+
assert_equal 10, io.read.split(/\n/).length
|
61
|
+
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
44
67
|
end
|
@@ -3,7 +3,7 @@ require 'rbbt/util/excel2tsv'
|
|
3
3
|
|
4
4
|
class TestTSV < Test::Unit::TestCase
|
5
5
|
def test_tsv2excel
|
6
|
-
tsv = TSV.excel2tsv(
|
6
|
+
tsv = TSV.excel2tsv(datafile_test('Test.xls'), :header => true)
|
7
7
|
assert_equal 'Id', tsv.key_field
|
8
8
|
end
|
9
9
|
end
|
data/test/rbbt/util/test_misc.rb
CHANGED
@@ -5,7 +5,7 @@ require 'test/unit'
|
|
5
5
|
class TestMisc < Test::Unit::TestCase
|
6
6
|
|
7
7
|
def test_pdf2text_example
|
8
|
-
assert PDF2Text.pdf2text(
|
8
|
+
assert PDF2Text.pdf2text(datafile_test('example.pdf')).read =~ /An Example Paper/i
|
9
9
|
end
|
10
10
|
|
11
11
|
def test_pdf2text_EPAR
|
@@ -13,7 +13,7 @@ class TestMisc < Test::Unit::TestCase
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def test_pdf2text_wrong
|
16
|
-
assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#") end
|
16
|
+
assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#").read end
|
17
17
|
end
|
18
18
|
|
19
19
|
def test_string2hash
|
@@ -84,32 +84,62 @@ This is an example file. Entries are separated by Entry
|
|
84
84
|
a = {:a => 1, "b" => 2}
|
85
85
|
a.extend IndiferentHash
|
86
86
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
87
|
+
assert_equal 1, a["a"]
|
88
|
+
assert_equal 1, a[:a]
|
89
|
+
assert_equal 2, a["b"]
|
90
|
+
assert_equal 2, a[:b]
|
91
91
|
end
|
92
92
|
|
93
93
|
def test_lockfile
|
94
|
+
|
94
95
|
TmpFile.with_file do |tmpfile|
|
95
96
|
pids = []
|
96
|
-
|
97
|
+
4.times do |i|
|
97
98
|
pids << Process.fork do
|
98
|
-
pid = pid.to_s
|
99
|
-
Misc.lock(tmpfile, pid) do |f, val|
|
99
|
+
pid = Process.pid().to_s
|
100
|
+
status = Misc.lock(tmpfile, pid) do |f, val|
|
100
101
|
Open.write(f, val)
|
101
102
|
sleep rand * 2
|
102
103
|
if pid == Open.read(tmpfile)
|
103
|
-
|
104
|
+
0
|
104
105
|
else
|
105
|
-
|
106
|
+
1
|
106
107
|
end
|
107
108
|
end
|
109
|
+
exit(status)
|
108
110
|
end
|
111
|
+
|
109
112
|
end
|
110
113
|
pids.each do |pid| Process.waitpid pid; assert $?.success? end
|
111
114
|
end
|
112
115
|
|
113
116
|
end
|
114
117
|
|
118
|
+
def test_divide
|
119
|
+
assert_equal 2, Misc.divide(%w(1 2 3 4 5 6 7 8 9),2).length
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_process_to_hash
|
123
|
+
list = [1,2,3,4]
|
124
|
+
assert_equal 4, Misc.process_to_hash(list){|l| l.collect{|e| e * 2}}[2]
|
125
|
+
end
|
126
|
+
|
127
|
+
def test_add_method
|
128
|
+
a = "Test"
|
129
|
+
Misc.add_method a, :invert do self.reverse end
|
130
|
+
assert_equal "Test".reverse, a.invert
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_redefine_method
|
134
|
+
a = "Test"
|
135
|
+
worked = false
|
136
|
+
Misc.redefine_method a, :reverse, :old_reverse do worked = true; self.old_reverse end
|
137
|
+
assert_equal "Test".reverse, a.reverse
|
138
|
+
assert worked
|
139
|
+
end
|
140
|
+
|
141
|
+
def test_merge_sorted_arrays
|
142
|
+
assert_equal [1,2,3,4], Misc.merge_sorted_arrays([1,3], [2,4])
|
143
|
+
end
|
144
|
+
|
115
145
|
end
|
data/test/rbbt/util/test_open.rb
CHANGED
@@ -49,7 +49,7 @@ class TestOpen < Test::Unit::TestCase
|
|
49
49
|
end
|
50
50
|
end
|
51
51
|
|
52
|
-
|
52
|
+
def test_read_grep
|
53
53
|
content =<<-EOF
|
54
54
|
1
|
55
55
|
2
|
@@ -67,7 +67,7 @@ class TestOpen < Test::Unit::TestCase
|
|
67
67
|
Open.read(file, :grep => ["1","3"]) do |line| sum += line.to_i end
|
68
68
|
assert_equal(1 + 3, sum)
|
69
69
|
end
|
70
|
-
|
70
|
+
|
71
71
|
end
|
72
72
|
|
73
73
|
def test_gzip
|
@@ -153,11 +153,11 @@ row2 2 4 6 8
|
|
153
153
|
end
|
154
154
|
|
155
155
|
def test_non_blocking
|
156
|
-
$a = TSV.new
|
156
|
+
$a = TSV.new datafile_test('test.tsv'), :persistence => true, :persistence_dir => Rbbt.tmp.test.persistence
|
157
157
|
$a.data.read
|
158
158
|
|
159
159
|
pid = Process.fork do
|
160
|
-
$b = TSV.new
|
160
|
+
$b = TSV.new datafile_test('test.tsv'), :persistence => true, :persistence_dir => Rbbt.tmp.test.persistence
|
161
161
|
$b.data.close
|
162
162
|
end
|
163
163
|
|
@@ -13,18 +13,6 @@ file 'foo' do |t|
|
|
13
13
|
end
|
14
14
|
EOF
|
15
15
|
|
16
|
-
tmp.work.define_as_rake tmp.Rakefile.find.produce
|
17
|
-
|
18
|
-
tmp.test.install.xclip.define_as_string <<-EOF
|
19
|
-
name="xclip:0.12"
|
20
|
-
url="http://downloads.sourceforge.net/project/xclip/xclip/0.12/xclip-0.12.tar.gz?r=http%3A%2F%2Fsourceforge.net%2Fprojects%2Fxclip%2F&ts=1286472387&use_mirror=sunet"
|
21
|
-
|
22
|
-
install_src "$name" "$url"
|
23
|
-
EOF
|
24
|
-
|
25
|
-
FileUtils.chmod 0770, tmp.test.install.xclip.produce
|
26
|
-
|
27
|
-
software.opt.xclip.define_as_install tmp.test.install.xclip.find
|
28
16
|
end
|
29
17
|
|
30
18
|
Open.cachedir = Rbbt.tmp.cache.find :user
|
@@ -36,10 +24,10 @@ end
|
|
36
24
|
|
37
25
|
class TestResource < Test::Unit::TestCase
|
38
26
|
def test_methods
|
39
|
-
assert Resource.methods.include?("resources")
|
40
|
-
assert ! Resource.methods.include?("pkgdir")
|
41
|
-
assert ! Phgx.methods.include?("resources")
|
42
|
-
assert Phgx.methods.include?("pkgdir")
|
27
|
+
assert Resource.methods.collect{|m| m.to_s}.include?("resources")
|
28
|
+
assert ! Resource.methods.collect{|m| m.to_s}.include?("pkgdir")
|
29
|
+
assert ! Phgx.methods.collect{|m| m.to_s}.include?("resources")
|
30
|
+
assert Phgx.methods.collect{|m| m.to_s}.include?("pkgdir")
|
43
31
|
|
44
32
|
end
|
45
33
|
def test_resolve
|
@@ -84,9 +72,5 @@ class TestResource < Test::Unit::TestCase
|
|
84
72
|
FileUtils.rm Rbbt.tmp.url.find if File.exists? Rbbt.tmp.url.find
|
85
73
|
end
|
86
74
|
end
|
87
|
-
|
88
|
-
def test_install
|
89
|
-
assert File.exists?(Rbbt.software.opt.xclip.produce)
|
90
|
-
end
|
91
75
|
end
|
92
76
|
|
@@ -86,5 +86,43 @@ class TestTCHash < Test::Unit::TestCase
|
|
86
86
|
Process.wait pid
|
87
87
|
end
|
88
88
|
end
|
89
|
+
|
90
|
+
def test_serializer_alias
|
91
|
+
TmpFile.with_file do |f|
|
92
|
+
t = TCHash.get f, true, :double
|
93
|
+
t["1"] = [[1],[2]]
|
94
|
+
t["2"] = [[3],[4,5]]
|
95
|
+
|
96
|
+
t = TCHash.get f
|
97
|
+
assert_equal [["3"],["4","5"]], t["2"]
|
98
|
+
|
99
|
+
t.close
|
100
|
+
TCHash::CONNECTIONS.clear
|
101
|
+
|
102
|
+
t = TCHash.get f
|
103
|
+
assert_equal [["3"],["4","5"]], t["2"]
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def test_serializer_reload
|
108
|
+
TmpFile.with_file do |f|
|
109
|
+
t = TCHash.get f, true, :double
|
110
|
+
t["1"] = [[1],[2]]
|
111
|
+
t["2"] = [[3],[4,5]]
|
112
|
+
|
113
|
+
t = TCHash.get f
|
114
|
+
assert_equal TCHash::StringDoubleArraySerializer, t.serializer
|
115
|
+
assert_equal [["3"],["4","5"]], t["2"]
|
116
|
+
|
117
|
+
t.close
|
118
|
+
TCHash::CONNECTIONS.clear
|
119
|
+
|
120
|
+
t = TCHash.get f
|
121
|
+
assert_equal [["3"],["4","5"]], t["2"]
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
|
89
127
|
end
|
90
128
|
|