rbbt-util 3.1.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/rbbt_query.rb +1 -1
- data/lib/rbbt/util/cmd.rb +115 -67
- data/lib/rbbt/util/fix_width_table.rb +18 -3
- data/lib/rbbt/util/misc.rb +106 -6
- data/lib/rbbt/util/open.rb +9 -7
- data/lib/rbbt/util/persistence.rb +17 -14
- data/lib/rbbt/util/resource.rb +10 -3
- data/lib/rbbt/util/task.rb +2 -2
- data/lib/rbbt/util/task/job.rb +16 -3
- data/lib/rbbt/util/tc_hash.rb +64 -27
- data/lib/rbbt/util/tsv.rb +44 -21
- data/lib/rbbt/util/tsv/accessor.rb +8 -6
- data/lib/rbbt/util/tsv/attach.rb +19 -28
- data/lib/rbbt/util/tsv/filters.rb +193 -0
- data/lib/rbbt/util/tsv/index.rb +80 -8
- data/lib/rbbt/util/tsv/manipulate.rb +17 -6
- data/lib/rbbt/util/tsv/misc.rb +10 -0
- data/lib/rbbt/util/tsv/parse.rb +18 -1
- data/lib/rbbt/util/workflow.rb +12 -3
- data/lib/rbbt/util/workflow/soap.rb +0 -1
- data/share/install/software/lib/install_helpers +0 -2
- data/share/lib/R/util.R +3 -3
- data/test/rbbt/util/test_cmd.rb +23 -0
- data/test/rbbt/util/test_excel2tsv.rb +1 -1
- data/test/rbbt/util/test_misc.rb +41 -11
- data/test/rbbt/util/test_open.rb +2 -2
- data/test/rbbt/util/test_persistence.rb +2 -2
- data/test/rbbt/util/test_resource.rb +4 -20
- data/test/rbbt/util/test_tc_hash.rb +38 -0
- data/test/rbbt/util/test_tmpfile.rb +1 -1
- data/test/rbbt/util/test_tsv.rb +6 -0
- data/test/rbbt/util/test_workflow.rb +14 -10
- data/test/rbbt/util/tsv/test_accessor.rb +42 -0
- data/test/rbbt/util/tsv/test_filters.rb +141 -0
- data/test/rbbt/util/tsv/test_index.rb +32 -0
- data/test/rbbt/util/tsv/test_manipulate.rb +18 -0
- data/test/test_helper.rb +3 -1
- metadata +41 -38
@@ -12,7 +12,7 @@ class TSV
|
|
12
12
|
when Integer === new_fields
|
13
13
|
[new_fields]
|
14
14
|
when String === new_fields
|
15
|
-
[identify_field
|
15
|
+
[identify_field(new_fields)]
|
16
16
|
when Array === new_fields
|
17
17
|
new_fields.collect{|new_field| identify_field new_field}
|
18
18
|
when new_fields == :key
|
@@ -46,7 +46,7 @@ class TSV
|
|
46
46
|
|
47
47
|
# Cycle through
|
48
48
|
if monitor
|
49
|
-
desc = "
|
49
|
+
desc = "Iterating TSV"
|
50
50
|
step = 100
|
51
51
|
if Hash === monitor
|
52
52
|
desc = monitor[:desc] if monitor.include? :desc
|
@@ -84,14 +84,22 @@ class TSV
|
|
84
84
|
f
|
85
85
|
else
|
86
86
|
f = fields.dup
|
87
|
-
|
87
|
+
case
|
88
|
+
when type == :single
|
89
|
+
f = [f,key]
|
90
|
+
when type == :double
|
88
91
|
f.push [key]
|
89
92
|
else
|
90
93
|
f.push key
|
91
94
|
end
|
92
95
|
f.values_at *new_field_positions.collect{|pos| pos == :key ? -1 : pos }
|
93
96
|
end
|
94
|
-
|
97
|
+
|
98
|
+
if type == :single
|
99
|
+
new_field_values = new_field_values.first
|
100
|
+
else
|
101
|
+
new_field_values = NamedArray.name new_field_values, new_field_names unless unnamed
|
102
|
+
end
|
95
103
|
|
96
104
|
next if new_key_value.nil? or (String === new_key_value and new_key_value.empty?)
|
97
105
|
yield new_key_value, new_field_values
|
@@ -221,14 +229,17 @@ class TSV
|
|
221
229
|
method.each{|item| new[item] = self[item] if self.include? item}
|
222
230
|
when Array === method
|
223
231
|
through :key, key do |key, values|
|
232
|
+
values = [values] if type == :single
|
224
233
|
new[key] = self[key] if (values.flatten & method).any?
|
225
234
|
end
|
226
235
|
when Regexp === method
|
227
236
|
through :key, key do |key, values|
|
237
|
+
values = [values] if type == :single
|
228
238
|
new[key] = self[key] if values.flatten.select{|v| v =~ method}.any?
|
229
239
|
end
|
230
240
|
when String === method
|
231
241
|
through :key, key do |key, values|
|
242
|
+
values = [values] if type == :single
|
232
243
|
new[key] = self[key] if values.flatten.select{|v| v == method}.any?
|
233
244
|
end
|
234
245
|
end
|
@@ -267,7 +278,7 @@ class TSV
|
|
267
278
|
end
|
268
279
|
|
269
280
|
def add_field(name = nil)
|
270
|
-
|
281
|
+
through do |key, values|
|
271
282
|
new_values = yield(key, values)
|
272
283
|
new_values = [new_values] if type == :double and not Array === new_values
|
273
284
|
|
@@ -280,7 +291,7 @@ class TSV
|
|
280
291
|
end
|
281
292
|
|
282
293
|
def add_fields(names = nil)
|
283
|
-
|
294
|
+
through do |key, values|
|
284
295
|
new_values = yield(key, values)
|
285
296
|
new_values = [new_values] if type == :double and not Array == new_values
|
286
297
|
|
data/lib/rbbt/util/tsv/misc.rb
CHANGED
data/lib/rbbt/util/tsv/parse.rb
CHANGED
@@ -142,12 +142,29 @@ class TSV
|
|
142
142
|
|
143
143
|
#{{{ Process rest
|
144
144
|
data = options[:persistence_data] || {}
|
145
|
+
if Persistence::TSV === data
|
146
|
+
serializer = case
|
147
|
+
when ((cast == "to_i" or cast == :to_i) and type == :single)
|
148
|
+
:integer
|
149
|
+
when ((cast == "to_i" or cast == :to_i) and (type == :flat or type == :list))
|
150
|
+
:integer_array
|
151
|
+
when (type == :list or type == :flat)
|
152
|
+
:list
|
153
|
+
when type == :single
|
154
|
+
:single
|
155
|
+
else
|
156
|
+
:double
|
157
|
+
end
|
158
|
+
data.serializer = serializer
|
159
|
+
end
|
160
|
+
|
161
|
+
|
145
162
|
single = type.to_sym != :double
|
146
163
|
max_cols = 0
|
147
164
|
while line do
|
148
165
|
line.chomp!
|
149
166
|
|
150
|
-
progress_monitor.tick(stream.pos) if progress_monitor
|
167
|
+
progress_monitor.tick(stream.pos) if progress_monitor
|
151
168
|
|
152
169
|
if line.empty? or
|
153
170
|
(exclude and exclude.call(line)) or
|
data/lib/rbbt/util/workflow.rb
CHANGED
@@ -4,6 +4,15 @@ require 'rbbt/util/persistence'
|
|
4
4
|
require 'rbbt/util/misc'
|
5
5
|
|
6
6
|
module WorkFlow
|
7
|
+
|
8
|
+
def self.require_workflow(path)
|
9
|
+
if Rbbt.etc.workflow_dir.exists?
|
10
|
+
require Dir.glob(File.join(Rbbt.etc.workflow_dir.read.strip, '*', path + '.rb')).first
|
11
|
+
else
|
12
|
+
require Dir.glob(File.join(Rbbt.share.workflows.find, '*', path + '.rb')).first
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
7
16
|
def self.extended(base)
|
8
17
|
class << base
|
9
18
|
attr_accessor :tasks, :jobdir, :dangling_options, :dangling_option_descriptions,
|
@@ -53,8 +62,8 @@ module WorkFlow
|
|
53
62
|
@dangling_option_defaults[name] = default if default
|
54
63
|
end
|
55
64
|
|
56
|
-
def task_dependencies(dependencies)
|
57
|
-
dependencies =
|
65
|
+
def task_dependencies(*dependencies)
|
66
|
+
dependencies = dependencies.flatten
|
58
67
|
@dangling_dependencies = dependencies.collect{|dep| Symbol === dep ? tasks[dep] : dep }
|
59
68
|
end
|
60
69
|
|
@@ -93,7 +102,7 @@ module WorkFlow
|
|
93
102
|
@last_task = task
|
94
103
|
end
|
95
104
|
|
96
|
-
def job(task, jobname, *args)
|
105
|
+
def job(task, jobname = "Default", *args)
|
97
106
|
tasks[task].job(jobname, *args)
|
98
107
|
end
|
99
108
|
|
@@ -8,8 +8,6 @@ OPT_SCM_DIR="$SOFTWARE_DIR/scm"; [ -d $OPT_SCM_DIR ] || mkdir -p $OPT_SCM_DIR
|
|
8
8
|
OPT_JAR_DIR="$OPT_DIR/jars"; [ -d $OPT_JAR_DIR ] || mkdir -p $OPT_JAR_DIR
|
9
9
|
OPT_BUILD_DIR="$SOFTWARE_DIR/.build"; [ -d $OPT_BUILD_DIR ] || mkdir -p $OPT_BUILD_DIR
|
10
10
|
|
11
|
-
#source "$HOME/config/bash/_utility_functions"
|
12
|
-
|
13
11
|
function expand_path(){
|
14
12
|
name=$(basename $1)
|
15
13
|
dir=$(dirname $1)
|
data/share/lib/R/util.R
CHANGED
@@ -26,7 +26,7 @@ rbbt.load.data <- function(filename, sep = "\t", ...){
|
|
26
26
|
}
|
27
27
|
|
28
28
|
rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", row.names=1, ...){
|
29
|
-
data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names=row.names, comment.char = comment.char, ...);
|
29
|
+
data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names= row.names, comment.char = comment.char, ...);
|
30
30
|
f = file(filename, 'r');
|
31
31
|
headers = readLines(f, 1);
|
32
32
|
if (length(grep("^#: ", headers)) > 0){
|
@@ -93,7 +93,7 @@ rbbt.sort_by_field <- function(data, field, is.numeric=TRUE){
|
|
93
93
|
|
94
94
|
rbbt.add <- function(data, new){
|
95
95
|
if (is.null(data)){
|
96
|
-
return(new);
|
96
|
+
return(c(new));
|
97
97
|
}else{
|
98
98
|
return(c(data, new));
|
99
99
|
}
|
@@ -101,7 +101,7 @@ rbbt.add <- function(data, new){
|
|
101
101
|
|
102
102
|
rbbt.acc <- function(data, new){
|
103
103
|
if (is.null(data)){
|
104
|
-
return(new);
|
104
|
+
return(c(new));
|
105
105
|
}else{
|
106
106
|
return(unique(c(data, new)));
|
107
107
|
}
|
data/test/rbbt/util/test_cmd.rb
CHANGED
@@ -22,6 +22,7 @@ class TestCmd < Test::Unit::TestCase
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def test_pipe
|
25
|
+
assert_equal("test\n", CMD.cmd("echo test", :pipe => true).read)
|
25
26
|
assert_equal("test\n", CMD.cmd("echo '{opt}' test", :pipe => true).read)
|
26
27
|
assert_equal("test", CMD.cmd("echo '{opt}' test", "-n" => true, :pipe => true).read)
|
27
28
|
assert_equal("test2\n", CMD.cmd("cut", "-f" => 2, "-d" => '" "', :in => "test1 test2", :pipe => true).read)
|
@@ -41,4 +42,26 @@ class TestCmd < Test::Unit::TestCase
|
|
41
42
|
assert_raise CMD::CMDError do CMD.cmd('ls -fake_option', :stderr => true, :pipe => true).read end
|
42
43
|
end
|
43
44
|
|
45
|
+
def test_pipes
|
46
|
+
text = <<-EOF
|
47
|
+
line1
|
48
|
+
line2
|
49
|
+
line3
|
50
|
+
line11
|
51
|
+
line22
|
52
|
+
line33
|
53
|
+
EOF
|
54
|
+
|
55
|
+
TmpFile.with_file(text * 100) do |file|
|
56
|
+
CMD.cmd("gzip #{ file }")
|
57
|
+
|
58
|
+
gz = CMD.cmd("gunzip", :in => File.open(file + '.gz'), :pipe => true)
|
59
|
+
io = CMD.cmd('tail -n 10', :in => gz, :pipe => true)
|
60
|
+
assert_equal 10, io.read.split(/\n/).length
|
61
|
+
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
44
67
|
end
|
@@ -3,7 +3,7 @@ require 'rbbt/util/excel2tsv'
|
|
3
3
|
|
4
4
|
class TestTSV < Test::Unit::TestCase
|
5
5
|
def test_tsv2excel
|
6
|
-
tsv = TSV.excel2tsv(
|
6
|
+
tsv = TSV.excel2tsv(datafile_test('Test.xls'), :header => true)
|
7
7
|
assert_equal 'Id', tsv.key_field
|
8
8
|
end
|
9
9
|
end
|
data/test/rbbt/util/test_misc.rb
CHANGED
@@ -5,7 +5,7 @@ require 'test/unit'
|
|
5
5
|
class TestMisc < Test::Unit::TestCase
|
6
6
|
|
7
7
|
def test_pdf2text_example
|
8
|
-
assert PDF2Text.pdf2text(
|
8
|
+
assert PDF2Text.pdf2text(datafile_test('example.pdf')).read =~ /An Example Paper/i
|
9
9
|
end
|
10
10
|
|
11
11
|
def test_pdf2text_EPAR
|
@@ -13,7 +13,7 @@ class TestMisc < Test::Unit::TestCase
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def test_pdf2text_wrong
|
16
|
-
assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#") end
|
16
|
+
assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#").read end
|
17
17
|
end
|
18
18
|
|
19
19
|
def test_string2hash
|
@@ -84,32 +84,62 @@ This is an example file. Entries are separated by Entry
|
|
84
84
|
a = {:a => 1, "b" => 2}
|
85
85
|
a.extend IndiferentHash
|
86
86
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
87
|
+
assert_equal 1, a["a"]
|
88
|
+
assert_equal 1, a[:a]
|
89
|
+
assert_equal 2, a["b"]
|
90
|
+
assert_equal 2, a[:b]
|
91
91
|
end
|
92
92
|
|
93
93
|
def test_lockfile
|
94
|
+
|
94
95
|
TmpFile.with_file do |tmpfile|
|
95
96
|
pids = []
|
96
|
-
|
97
|
+
4.times do |i|
|
97
98
|
pids << Process.fork do
|
98
|
-
pid = pid.to_s
|
99
|
-
Misc.lock(tmpfile, pid) do |f, val|
|
99
|
+
pid = Process.pid().to_s
|
100
|
+
status = Misc.lock(tmpfile, pid) do |f, val|
|
100
101
|
Open.write(f, val)
|
101
102
|
sleep rand * 2
|
102
103
|
if pid == Open.read(tmpfile)
|
103
|
-
|
104
|
+
0
|
104
105
|
else
|
105
|
-
|
106
|
+
1
|
106
107
|
end
|
107
108
|
end
|
109
|
+
exit(status)
|
108
110
|
end
|
111
|
+
|
109
112
|
end
|
110
113
|
pids.each do |pid| Process.waitpid pid; assert $?.success? end
|
111
114
|
end
|
112
115
|
|
113
116
|
end
|
114
117
|
|
118
|
+
def test_divide
|
119
|
+
assert_equal 2, Misc.divide(%w(1 2 3 4 5 6 7 8 9),2).length
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_process_to_hash
|
123
|
+
list = [1,2,3,4]
|
124
|
+
assert_equal 4, Misc.process_to_hash(list){|l| l.collect{|e| e * 2}}[2]
|
125
|
+
end
|
126
|
+
|
127
|
+
def test_add_method
|
128
|
+
a = "Test"
|
129
|
+
Misc.add_method a, :invert do self.reverse end
|
130
|
+
assert_equal "Test".reverse, a.invert
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_redefine_method
|
134
|
+
a = "Test"
|
135
|
+
worked = false
|
136
|
+
Misc.redefine_method a, :reverse, :old_reverse do worked = true; self.old_reverse end
|
137
|
+
assert_equal "Test".reverse, a.reverse
|
138
|
+
assert worked
|
139
|
+
end
|
140
|
+
|
141
|
+
def test_merge_sorted_arrays
|
142
|
+
assert_equal [1,2,3,4], Misc.merge_sorted_arrays([1,3], [2,4])
|
143
|
+
end
|
144
|
+
|
115
145
|
end
|
data/test/rbbt/util/test_open.rb
CHANGED
@@ -49,7 +49,7 @@ class TestOpen < Test::Unit::TestCase
|
|
49
49
|
end
|
50
50
|
end
|
51
51
|
|
52
|
-
|
52
|
+
def test_read_grep
|
53
53
|
content =<<-EOF
|
54
54
|
1
|
55
55
|
2
|
@@ -67,7 +67,7 @@ class TestOpen < Test::Unit::TestCase
|
|
67
67
|
Open.read(file, :grep => ["1","3"]) do |line| sum += line.to_i end
|
68
68
|
assert_equal(1 + 3, sum)
|
69
69
|
end
|
70
|
-
|
70
|
+
|
71
71
|
end
|
72
72
|
|
73
73
|
def test_gzip
|
@@ -153,11 +153,11 @@ row2 2 4 6 8
|
|
153
153
|
end
|
154
154
|
|
155
155
|
def test_non_blocking
|
156
|
-
$a = TSV.new
|
156
|
+
$a = TSV.new datafile_test('test.tsv'), :persistence => true, :persistence_dir => Rbbt.tmp.test.persistence
|
157
157
|
$a.data.read
|
158
158
|
|
159
159
|
pid = Process.fork do
|
160
|
-
$b = TSV.new
|
160
|
+
$b = TSV.new datafile_test('test.tsv'), :persistence => true, :persistence_dir => Rbbt.tmp.test.persistence
|
161
161
|
$b.data.close
|
162
162
|
end
|
163
163
|
|
@@ -13,18 +13,6 @@ file 'foo' do |t|
|
|
13
13
|
end
|
14
14
|
EOF
|
15
15
|
|
16
|
-
tmp.work.define_as_rake tmp.Rakefile.find.produce
|
17
|
-
|
18
|
-
tmp.test.install.xclip.define_as_string <<-EOF
|
19
|
-
name="xclip:0.12"
|
20
|
-
url="http://downloads.sourceforge.net/project/xclip/xclip/0.12/xclip-0.12.tar.gz?r=http%3A%2F%2Fsourceforge.net%2Fprojects%2Fxclip%2F&ts=1286472387&use_mirror=sunet"
|
21
|
-
|
22
|
-
install_src "$name" "$url"
|
23
|
-
EOF
|
24
|
-
|
25
|
-
FileUtils.chmod 0770, tmp.test.install.xclip.produce
|
26
|
-
|
27
|
-
software.opt.xclip.define_as_install tmp.test.install.xclip.find
|
28
16
|
end
|
29
17
|
|
30
18
|
Open.cachedir = Rbbt.tmp.cache.find :user
|
@@ -36,10 +24,10 @@ end
|
|
36
24
|
|
37
25
|
class TestResource < Test::Unit::TestCase
|
38
26
|
def test_methods
|
39
|
-
assert Resource.methods.include?("resources")
|
40
|
-
assert ! Resource.methods.include?("pkgdir")
|
41
|
-
assert ! Phgx.methods.include?("resources")
|
42
|
-
assert Phgx.methods.include?("pkgdir")
|
27
|
+
assert Resource.methods.collect{|m| m.to_s}.include?("resources")
|
28
|
+
assert ! Resource.methods.collect{|m| m.to_s}.include?("pkgdir")
|
29
|
+
assert ! Phgx.methods.collect{|m| m.to_s}.include?("resources")
|
30
|
+
assert Phgx.methods.collect{|m| m.to_s}.include?("pkgdir")
|
43
31
|
|
44
32
|
end
|
45
33
|
def test_resolve
|
@@ -84,9 +72,5 @@ class TestResource < Test::Unit::TestCase
|
|
84
72
|
FileUtils.rm Rbbt.tmp.url.find if File.exists? Rbbt.tmp.url.find
|
85
73
|
end
|
86
74
|
end
|
87
|
-
|
88
|
-
def test_install
|
89
|
-
assert File.exists?(Rbbt.software.opt.xclip.produce)
|
90
|
-
end
|
91
75
|
end
|
92
76
|
|
@@ -86,5 +86,43 @@ class TestTCHash < Test::Unit::TestCase
|
|
86
86
|
Process.wait pid
|
87
87
|
end
|
88
88
|
end
|
89
|
+
|
90
|
+
def test_serializer_alias
|
91
|
+
TmpFile.with_file do |f|
|
92
|
+
t = TCHash.get f, true, :double
|
93
|
+
t["1"] = [[1],[2]]
|
94
|
+
t["2"] = [[3],[4,5]]
|
95
|
+
|
96
|
+
t = TCHash.get f
|
97
|
+
assert_equal [["3"],["4","5"]], t["2"]
|
98
|
+
|
99
|
+
t.close
|
100
|
+
TCHash::CONNECTIONS.clear
|
101
|
+
|
102
|
+
t = TCHash.get f
|
103
|
+
assert_equal [["3"],["4","5"]], t["2"]
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def test_serializer_reload
|
108
|
+
TmpFile.with_file do |f|
|
109
|
+
t = TCHash.get f, true, :double
|
110
|
+
t["1"] = [[1],[2]]
|
111
|
+
t["2"] = [[3],[4,5]]
|
112
|
+
|
113
|
+
t = TCHash.get f
|
114
|
+
assert_equal TCHash::StringDoubleArraySerializer, t.serializer
|
115
|
+
assert_equal [["3"],["4","5"]], t["2"]
|
116
|
+
|
117
|
+
t.close
|
118
|
+
TCHash::CONNECTIONS.clear
|
119
|
+
|
120
|
+
t = TCHash.get f
|
121
|
+
assert_equal [["3"],["4","5"]], t["2"]
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
|
89
127
|
end
|
90
128
|
|