rbbt-util 1.2.1 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt-util.rb +2 -1
- data/lib/rbbt/util/R.rb +18 -1
- data/lib/rbbt/util/cmd.rb +7 -6
- data/lib/rbbt/util/data_module.rb +31 -11
- data/lib/rbbt/util/fix_width_table.rb +209 -0
- data/lib/rbbt/util/log.rb +12 -2
- data/lib/rbbt/util/misc.rb +91 -12
- data/lib/rbbt/util/open.rb +18 -9
- data/lib/rbbt/util/path.rb +152 -0
- data/lib/rbbt/util/persistence.rb +282 -75
- data/lib/rbbt/util/pkg_data.rb +16 -59
- data/lib/rbbt/util/pkg_software.rb +15 -1
- data/lib/rbbt/util/rake.rb +5 -1
- data/lib/rbbt/util/tc_hash.rb +129 -59
- data/lib/rbbt/util/tsv.rb +109 -1284
- data/lib/rbbt/util/tsv/accessor.rb +273 -0
- data/lib/rbbt/util/tsv/attach.rb +228 -0
- data/lib/rbbt/util/tsv/index.rb +303 -0
- data/lib/rbbt/util/tsv/manipulate.rb +271 -0
- data/lib/rbbt/util/tsv/parse.rb +258 -0
- data/share/lib/R/util.R +5 -3
- data/test/rbbt/util/test_R.rb +9 -1
- data/test/rbbt/util/test_data_module.rb +5 -0
- data/test/rbbt/util/test_fix_width_table.rb +107 -0
- data/test/rbbt/util/test_misc.rb +43 -0
- data/test/rbbt/util/test_open.rb +0 -1
- data/test/rbbt/util/test_path.rb +10 -0
- data/test/rbbt/util/test_persistence.rb +63 -2
- data/test/rbbt/util/test_pkg_data.rb +29 -8
- data/test/rbbt/util/test_tc_hash.rb +52 -0
- data/test/rbbt/util/test_tsv.rb +55 -678
- data/test/rbbt/util/tsv/test_accessor.rb +109 -0
- data/test/rbbt/util/tsv/test_attach.rb +271 -0
- data/test/rbbt/util/tsv/test_index.rb +158 -0
- data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
- data/test/rbbt/util/tsv/test_parse.rb +72 -0
- data/test/test_helper.rb +1 -0
- metadata +25 -4
@@ -0,0 +1,258 @@
|
|
1
|
+
require 'rbbt/util/misc'
|
2
|
+
class TSV
|
3
|
+
|
4
|
+
def self.parse_fields(io, delimiter = "\t")
|
5
|
+
return [] if io.nil?
|
6
|
+
|
7
|
+
## split with delimiter, do not remove empty
|
8
|
+
fields = io.split(delimiter, -1)
|
9
|
+
|
10
|
+
fields
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.parse_header(stream, sep = nil, header_hash = nil)
|
14
|
+
sep = /\t/ if sep.nil?
|
15
|
+
header_hash = "#" if header_hash.nil?
|
16
|
+
|
17
|
+
fields, key_field = nil
|
18
|
+
options = {}
|
19
|
+
|
20
|
+
# Get line
|
21
|
+
|
22
|
+
line = stream.gets
|
23
|
+
raise "Empty content" if line.nil?
|
24
|
+
|
25
|
+
# Process options line
|
26
|
+
|
27
|
+
if line and line =~ /^#{header_hash}: (.*)/
|
28
|
+
options = Misc.string2hash $1
|
29
|
+
line = stream.gets
|
30
|
+
end
|
31
|
+
|
32
|
+
# Determine separator
|
33
|
+
|
34
|
+
sep = options[:sep] if options[:sep]
|
35
|
+
|
36
|
+
# Process fields line
|
37
|
+
|
38
|
+
if line and line =~ /^#{header_hash}/
|
39
|
+
line.chomp!
|
40
|
+
fields = parse_fields(line, sep)
|
41
|
+
key_field = fields.shift
|
42
|
+
key_field = key_field[(0 + header_hash.length)..-1] # Remove initial hash character
|
43
|
+
line = stream.gets
|
44
|
+
end
|
45
|
+
|
46
|
+
# Return fields, options and first line
|
47
|
+
|
48
|
+
return key_field, fields, options, line
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.parse(stream, options = {})
|
52
|
+
|
53
|
+
# Prepare options
|
54
|
+
|
55
|
+
key_field, other_fields, more_options, line = TSV.parse_header(stream, options[:sep], options[:header_hash])
|
56
|
+
|
57
|
+
options = Misc.add_defaults options, more_options
|
58
|
+
|
59
|
+
options = Misc.add_defaults options,
|
60
|
+
:case_insensitive => false,
|
61
|
+
:type => :double,
|
62
|
+
:namespace => nil,
|
63
|
+
:identifiers => nil,
|
64
|
+
|
65
|
+
:merge => false,
|
66
|
+
:keep_empty => (options[:type] != :flat and options[:type] != :single),
|
67
|
+
:cast => nil,
|
68
|
+
|
69
|
+
:header_hash => '#',
|
70
|
+
:sep => "\t",
|
71
|
+
:sep2 => "|",
|
72
|
+
|
73
|
+
:key => 0,
|
74
|
+
:fields => nil,
|
75
|
+
|
76
|
+
:fix => nil,
|
77
|
+
:exclude => nil,
|
78
|
+
:select => nil,
|
79
|
+
:grep => nil
|
80
|
+
|
81
|
+
header_hash, sep, sep2 =
|
82
|
+
Misc.process_options options, :header_hash, :sep, :sep2
|
83
|
+
|
84
|
+
key, fields =
|
85
|
+
Misc.process_options options, :key, :fields
|
86
|
+
|
87
|
+
if key_field.nil?
|
88
|
+
key_pos = key
|
89
|
+
key_field, fields = nil
|
90
|
+
else
|
91
|
+
all_fields = [key_field].concat other_fields
|
92
|
+
|
93
|
+
key_pos = Misc.field_position(all_fields, key)
|
94
|
+
|
95
|
+
if String === fields or Symbol === fields
|
96
|
+
fields = [fields]
|
97
|
+
end
|
98
|
+
|
99
|
+
if fields.nil?
|
100
|
+
other_pos = (0..(all_fields.length - 1)).to_a
|
101
|
+
other_pos.delete key_pos
|
102
|
+
else
|
103
|
+
if Array === fields
|
104
|
+
other_pos = fields.collect{|field| Misc.field_position(all_fields, field)}
|
105
|
+
else
|
106
|
+
other_pos = Misc.field_position(all_fields, fields)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
key_field = all_fields[key_pos]
|
111
|
+
fields = all_fields.values_at *other_pos
|
112
|
+
end
|
113
|
+
|
114
|
+
case_insensitive, type, namespace, merge, keep_empty, cast =
|
115
|
+
Misc.process_options options, :case_insensitive, :type, :namespace, :merge, :keep_empty, :cast
|
116
|
+
fix, exclude, select, grep =
|
117
|
+
Misc.process_options options, :fix, :exclude, :select, :grep
|
118
|
+
|
119
|
+
#{{{ Process rest
|
120
|
+
data = {}
|
121
|
+
single = type.to_sym != :double
|
122
|
+
max_cols = 0
|
123
|
+
while line do
|
124
|
+
line.chomp!
|
125
|
+
|
126
|
+
if line.empty? or
|
127
|
+
(exclude and exclude.call(line)) or
|
128
|
+
(select and not select.call(line))
|
129
|
+
|
130
|
+
line = stream.gets
|
131
|
+
next
|
132
|
+
end
|
133
|
+
|
134
|
+
line = fix.call line if fix
|
135
|
+
break if not line
|
136
|
+
|
137
|
+
|
138
|
+
if header_hash and not header_hash.empty? and line =~ /^#{header_hash}/
|
139
|
+
line = stream.gets
|
140
|
+
next
|
141
|
+
end
|
142
|
+
|
143
|
+
# Chunk fields
|
144
|
+
parts = parse_fields(line, sep)
|
145
|
+
|
146
|
+
# Get next line
|
147
|
+
line = stream.gets
|
148
|
+
|
149
|
+
# Get id field
|
150
|
+
next if parts[key_pos].nil? || parts[key_pos].empty?
|
151
|
+
|
152
|
+
if single
|
153
|
+
ids = parse_fields(parts[key_pos], sep2)
|
154
|
+
ids.collect!{|id| id.downcase} if case_insensitive
|
155
|
+
|
156
|
+
id = ids.shift
|
157
|
+
ids.each do |id2| data[id2] = "__Ref:#{id}" end
|
158
|
+
|
159
|
+
next if data.include?(id) and type != :flat
|
160
|
+
|
161
|
+
if key_field.nil?
|
162
|
+
other_pos = (0..(parts.length - 1)).to_a
|
163
|
+
other_pos.delete key_pos
|
164
|
+
end
|
165
|
+
|
166
|
+
if type == :flat
|
167
|
+
extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)}.flatten
|
168
|
+
else
|
169
|
+
extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2).first}
|
170
|
+
end
|
171
|
+
|
172
|
+
extra.collect! do |elem|
|
173
|
+
case
|
174
|
+
when String === cast
|
175
|
+
elem.send(cast)
|
176
|
+
when Proc === cast
|
177
|
+
cast.call elem
|
178
|
+
end
|
179
|
+
end if cast
|
180
|
+
|
181
|
+
case
|
182
|
+
when type == :single
|
183
|
+
data[id] = extra.first
|
184
|
+
when type == :flat
|
185
|
+
if data.include? id
|
186
|
+
data[id].concat extra
|
187
|
+
else
|
188
|
+
data[id] = extra
|
189
|
+
end
|
190
|
+
else
|
191
|
+
data[id] = extra
|
192
|
+
end
|
193
|
+
|
194
|
+
max_cols = extra.size if extra.size > (max_cols || 0) unless type == :flat
|
195
|
+
else
|
196
|
+
ids = parse_fields(parts[key_pos], sep2)
|
197
|
+
ids.collect!{|id| id.downcase} if case_insensitive
|
198
|
+
|
199
|
+
id = ids.shift
|
200
|
+
ids.each do |id2| data[id2] = "__Ref:#{id}" end
|
201
|
+
|
202
|
+
if key_field.nil?
|
203
|
+
other_pos = (0..(parts.length - 1)).to_a
|
204
|
+
other_pos.delete key_pos
|
205
|
+
end
|
206
|
+
|
207
|
+
extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)}
|
208
|
+
extra.collect! do |list|
|
209
|
+
case
|
210
|
+
when String === cast
|
211
|
+
list.collect{|elem| elem.send(cast)}
|
212
|
+
when Proc === cast
|
213
|
+
list.collect{|elem| cast.call elem}
|
214
|
+
end
|
215
|
+
end if cast
|
216
|
+
|
217
|
+
max_cols = extra.size if extra.size > (max_cols || 0)
|
218
|
+
if not merge
|
219
|
+
data[id] = extra unless data.include? id
|
220
|
+
else
|
221
|
+
if not data.include? id
|
222
|
+
data[id] = extra
|
223
|
+
else
|
224
|
+
entry = data[id]
|
225
|
+
while entry =~ /__Ref:(.*)/ do entry = data[$1] end
|
226
|
+
extra.each_with_index do |f, i|
|
227
|
+
if f.empty?
|
228
|
+
next unless keep_empty
|
229
|
+
f= [""]
|
230
|
+
end
|
231
|
+
entry[i] ||= []
|
232
|
+
entry[i] = entry[i].concat f
|
233
|
+
end
|
234
|
+
data[id] = entry
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
if keep_empty and max_cols > 0
|
241
|
+
data.each do |key, values|
|
242
|
+
next if values =~ /__Ref:/
|
243
|
+
new_values = values
|
244
|
+
max_cols.times do |i|
|
245
|
+
if type == :double
|
246
|
+
new_values[i] = [""] if new_values[i].nil? or new_values[i].empty?
|
247
|
+
else
|
248
|
+
new_values[i] = "" if new_values[i].nil?
|
249
|
+
end
|
250
|
+
end
|
251
|
+
data[key] = new_values
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
[data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :datadir => options[:datadir], :identifiers => options[:identifiers], :cast => !!cast}]
|
256
|
+
end
|
257
|
+
|
258
|
+
end
|
data/share/lib/R/util.R
CHANGED
@@ -18,12 +18,15 @@ rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", ...){
|
|
18
18
|
data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names=1, comment.char = comment.char, ...);
|
19
19
|
f = file(filename, 'r');
|
20
20
|
headers = readLines(f, 1);
|
21
|
-
|
22
|
-
|
21
|
+
if (length(grep("^#: ", headers)) > 0){
|
22
|
+
headers = readLines(f, 1);
|
23
|
+
}
|
24
|
+
if (length(grep("^#", headers)) > 0){
|
23
25
|
fields = strsplit(headers, sep)[[1]];
|
24
26
|
fields = fields[2:length(fields)];
|
25
27
|
names(data) <- fields;
|
26
28
|
}
|
29
|
+
close(f);
|
27
30
|
return(data);
|
28
31
|
}
|
29
32
|
|
@@ -36,7 +39,6 @@ rbbt.tsv.write <- function(filename, data, key.field = NULL){
|
|
36
39
|
for (name in colnames(data)){ header = paste(header, name, sep="\t");}
|
37
40
|
header = paste(header, "\n", sep="");
|
38
41
|
cat(header, file=f);
|
39
|
-
cat(header, file=stderr());
|
40
42
|
|
41
43
|
close(f);
|
42
44
|
|
data/test/rbbt/util/test_R.rb
CHANGED
@@ -3,7 +3,15 @@ require 'rbbt/util/R'
|
|
3
3
|
|
4
4
|
class TestR < Test::Unit::TestCase
|
5
5
|
def test_sum
|
6
|
-
assert_equal "6", R.run('cat(3+3)').read
|
6
|
+
assert_equal "6", R.run('cat(3+3)').read.split(/\n/).last
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_tsv_R
|
10
|
+
tsv = TSV.new({:a => 1, :b => 2})
|
11
|
+
tsv2 = tsv.R <<-EOF
|
12
|
+
data = data + 1
|
13
|
+
EOF
|
14
|
+
puts tsv2.to_s
|
7
15
|
end
|
8
16
|
end
|
9
17
|
|
@@ -8,6 +8,7 @@ SHAREDIR = File.join(PKGData.sharedir_for_file(__FILE__), 'install/DataTest')
|
|
8
8
|
FileUtils.mkdir_p SHAREDIR
|
9
9
|
File.open(File.join(SHAREDIR, 'Rakefile'), 'w') do |f|
|
10
10
|
f.puts "file :file1 do |t| File.open(t.name, 'w') do |f| f.write 'File 1' end end"
|
11
|
+
f.puts "file :tsv_file do |t| File.open(t.name, 'w') do |f| f.write 'a\t1\nb\t2\n' end end"
|
11
12
|
end
|
12
13
|
|
13
14
|
module DataTest
|
@@ -27,14 +28,18 @@ class TestDataModule < Test::Unit::TestCase
|
|
27
28
|
FileUtils.mkdir_p SHAREDIR
|
28
29
|
File.open(File.join(SHAREDIR, 'Rakefile'), 'w') do |f|
|
29
30
|
f.puts "file :file1 do |t| File.open(t.name, 'w') do |f| f.write 'File 1' end end"
|
31
|
+
f.puts "file :tsv_file do |t| File.open(t.name, 'w') do |f| f.write 'a\t1\nb\t2\n' end end"
|
30
32
|
end
|
31
33
|
end
|
32
34
|
|
33
35
|
def test_rakefile
|
36
|
+
assert_equal Rbbt.files.DataTest, DataTest.datadir
|
34
37
|
assert_equal "File 1", Rbbt.files.DataTest.file1.read
|
35
38
|
assert_equal "Hello world", DataTest.salute("world")
|
36
39
|
assert_equal "Hello world", DataTest::with_key("world").salute
|
37
40
|
assert_equal "Hello world", DataTest::World.salute
|
41
|
+
assert_equal "DataTest", Rbbt.files.DataTest.tsv_file.namespace
|
42
|
+
assert_equal "DataTest", Rbbt.files.DataTest.tsv_file.tsv.namespace
|
38
43
|
FileUtils.rm_rf File.join(Rbbt.datadir, 'DataTest')
|
39
44
|
end
|
40
45
|
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/util/fix_width_table'
|
3
|
+
require 'rbbt/util/tsv'
|
4
|
+
|
5
|
+
class TestFixWidthTable < Test::Unit::TestCase
|
6
|
+
def load_data(data)
|
7
|
+
Log.debug("Data:\n#{Open.read(data)}")
|
8
|
+
tsv = TSV.new(data, :list, :sep=>":", :cast => proc{|e| e =~ /(\s*)(_*)/; ($1.length..($1.length + $2.length - 1))})
|
9
|
+
tsv.add_field "Start" do |key, values|
|
10
|
+
values["Range"].first
|
11
|
+
end
|
12
|
+
tsv.add_field "End" do |key, values|
|
13
|
+
values["Range"].last
|
14
|
+
end
|
15
|
+
|
16
|
+
tsv = tsv.slice ["Start", "End"]
|
17
|
+
|
18
|
+
tsv
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_options
|
22
|
+
TmpFile.with_file do |filename|
|
23
|
+
f = FixWidthTable.new filename, 100, true
|
24
|
+
f.close
|
25
|
+
|
26
|
+
f1 = FixWidthTable.new filename, 100, false
|
27
|
+
|
28
|
+
assert_equal true, f1.range
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_add
|
33
|
+
TmpFile.with_file do |filename|
|
34
|
+
f = FixWidthTable.new filename, 100, true
|
35
|
+
f.add [1,2,0], "test1"
|
36
|
+
f.add [3,4,0], "test2"
|
37
|
+
f.read
|
38
|
+
|
39
|
+
assert_equal 1, f.pos(0)
|
40
|
+
assert_equal 3, f.pos(1)
|
41
|
+
assert_equal 2, f.pos_end(0)
|
42
|
+
assert_equal 4, f.pos_end(1)
|
43
|
+
assert_equal 0, f.overlap(0)
|
44
|
+
assert_equal 0, f.overlap(1)
|
45
|
+
assert_equal "test1", f.value(0)
|
46
|
+
assert_equal "test2", f.value(1)
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_point
|
52
|
+
data =<<-EOF
|
53
|
+
#: :sep=/\\s+/#:type=:single#:cast=to_i
|
54
|
+
#ID Pos
|
55
|
+
a 1
|
56
|
+
b 10
|
57
|
+
c 20
|
58
|
+
d 12
|
59
|
+
e 26
|
60
|
+
f 11
|
61
|
+
g 25
|
62
|
+
EOF
|
63
|
+
TmpFile.with_file(data) do |datafile|
|
64
|
+
tsv = TSV.new datafile
|
65
|
+
ddd tsv
|
66
|
+
TmpFile.with_file do |filename|
|
67
|
+
f = FixWidthTable.new filename, 100, false
|
68
|
+
f.add_point tsv
|
69
|
+
f.read
|
70
|
+
|
71
|
+
assert_equal %w(), f[0].sort
|
72
|
+
assert_equal %w(b), f[10].sort
|
73
|
+
assert_equal %w(a b c d f), f[(0..20)].sort
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_range
|
79
|
+
data =<<-EOF
|
80
|
+
#ID:Range
|
81
|
+
#:012345678901234567890
|
82
|
+
a: ______
|
83
|
+
b: ______
|
84
|
+
c: _______
|
85
|
+
d: ____
|
86
|
+
e: ______
|
87
|
+
f: ___
|
88
|
+
g: ____
|
89
|
+
EOF
|
90
|
+
TmpFile.with_file(data) do |datafile|
|
91
|
+
tsv = TSV.new load_data(datafile)
|
92
|
+
TmpFile.with_file do |filename|
|
93
|
+
f = FixWidthTable.new filename, 100, true
|
94
|
+
f.add_range tsv
|
95
|
+
f.read
|
96
|
+
|
97
|
+
assert_equal %w(), f[0].sort
|
98
|
+
assert_equal %w(b), f[1].sort
|
99
|
+
assert_equal %w(), f[20].sort
|
100
|
+
assert_equal %w(), f[(20..100)].sort
|
101
|
+
assert_equal %w(a b d), f[3].sort
|
102
|
+
assert_equal %w(a b c d e), f[(3..4)].sort
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
data/test/rbbt/util/test_misc.rb
CHANGED
@@ -32,4 +32,47 @@ class TestMisc < Test::Unit::TestCase
|
|
32
32
|
assert_equal(1, a['a'])
|
33
33
|
end
|
34
34
|
|
35
|
+
def test_path_relative_to
|
36
|
+
assert_equal "test/foo", Misc.path_relative_to('test/test/foo', 'test')
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_chunk
|
40
|
+
test =<<-EOF
|
41
|
+
This is an example file. Entries are separated by Entry
|
42
|
+
-- Entry
|
43
|
+
1
|
44
|
+
2
|
45
|
+
3
|
46
|
+
-- Entry
|
47
|
+
4
|
48
|
+
5
|
49
|
+
6
|
50
|
+
EOF
|
51
|
+
|
52
|
+
assert_equal "1\n2\n3", Misc.chunk(test, /^-- Entry/).first.strip
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_hash2string
|
56
|
+
hash = {}
|
57
|
+
assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
|
58
|
+
|
59
|
+
hash = {:a => 1}
|
60
|
+
assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
|
61
|
+
|
62
|
+
hash = {:a => true}
|
63
|
+
assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
|
64
|
+
|
65
|
+
hash = {:a => Misc}
|
66
|
+
assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
|
67
|
+
|
68
|
+
hash = {:a => :b}
|
69
|
+
assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
|
70
|
+
|
71
|
+
hash = {:a => /test/}
|
72
|
+
assert_equal({}, Misc.string2hash(Misc.hash2string(hash)))
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
end
|
77
|
+
|
35
78
|
end
|