rbbt-util 1.2.1 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt-util.rb +2 -1
- data/lib/rbbt/util/R.rb +18 -1
- data/lib/rbbt/util/cmd.rb +7 -6
- data/lib/rbbt/util/data_module.rb +31 -11
- data/lib/rbbt/util/fix_width_table.rb +209 -0
- data/lib/rbbt/util/log.rb +12 -2
- data/lib/rbbt/util/misc.rb +91 -12
- data/lib/rbbt/util/open.rb +18 -9
- data/lib/rbbt/util/path.rb +152 -0
- data/lib/rbbt/util/persistence.rb +282 -75
- data/lib/rbbt/util/pkg_data.rb +16 -59
- data/lib/rbbt/util/pkg_software.rb +15 -1
- data/lib/rbbt/util/rake.rb +5 -1
- data/lib/rbbt/util/tc_hash.rb +129 -59
- data/lib/rbbt/util/tsv.rb +109 -1284
- data/lib/rbbt/util/tsv/accessor.rb +273 -0
- data/lib/rbbt/util/tsv/attach.rb +228 -0
- data/lib/rbbt/util/tsv/index.rb +303 -0
- data/lib/rbbt/util/tsv/manipulate.rb +271 -0
- data/lib/rbbt/util/tsv/parse.rb +258 -0
- data/share/lib/R/util.R +5 -3
- data/test/rbbt/util/test_R.rb +9 -1
- data/test/rbbt/util/test_data_module.rb +5 -0
- data/test/rbbt/util/test_fix_width_table.rb +107 -0
- data/test/rbbt/util/test_misc.rb +43 -0
- data/test/rbbt/util/test_open.rb +0 -1
- data/test/rbbt/util/test_path.rb +10 -0
- data/test/rbbt/util/test_persistence.rb +63 -2
- data/test/rbbt/util/test_pkg_data.rb +29 -8
- data/test/rbbt/util/test_tc_hash.rb +52 -0
- data/test/rbbt/util/test_tsv.rb +55 -678
- data/test/rbbt/util/tsv/test_accessor.rb +109 -0
- data/test/rbbt/util/tsv/test_attach.rb +271 -0
- data/test/rbbt/util/tsv/test_index.rb +158 -0
- data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
- data/test/rbbt/util/tsv/test_parse.rb +72 -0
- data/test/test_helper.rb +1 -0
- metadata +25 -4
@@ -0,0 +1,258 @@
|
|
1
|
+
require 'rbbt/util/misc'
|
2
|
+
class TSV
|
3
|
+
|
4
|
+
def self.parse_fields(io, delimiter = "\t")
|
5
|
+
return [] if io.nil?
|
6
|
+
|
7
|
+
## split with delimiter, do not remove empty
|
8
|
+
fields = io.split(delimiter, -1)
|
9
|
+
|
10
|
+
fields
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.parse_header(stream, sep = nil, header_hash = nil)
|
14
|
+
sep = /\t/ if sep.nil?
|
15
|
+
header_hash = "#" if header_hash.nil?
|
16
|
+
|
17
|
+
fields, key_field = nil
|
18
|
+
options = {}
|
19
|
+
|
20
|
+
# Get line
|
21
|
+
|
22
|
+
line = stream.gets
|
23
|
+
raise "Empty content" if line.nil?
|
24
|
+
|
25
|
+
# Process options line
|
26
|
+
|
27
|
+
if line and line =~ /^#{header_hash}: (.*)/
|
28
|
+
options = Misc.string2hash $1
|
29
|
+
line = stream.gets
|
30
|
+
end
|
31
|
+
|
32
|
+
# Determine separator
|
33
|
+
|
34
|
+
sep = options[:sep] if options[:sep]
|
35
|
+
|
36
|
+
# Process fields line
|
37
|
+
|
38
|
+
if line and line =~ /^#{header_hash}/
|
39
|
+
line.chomp!
|
40
|
+
fields = parse_fields(line, sep)
|
41
|
+
key_field = fields.shift
|
42
|
+
key_field = key_field[(0 + header_hash.length)..-1] # Remove initial hash character
|
43
|
+
line = stream.gets
|
44
|
+
end
|
45
|
+
|
46
|
+
# Return fields, options and first line
|
47
|
+
|
48
|
+
return key_field, fields, options, line
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.parse(stream, options = {})
|
52
|
+
|
53
|
+
# Prepare options
|
54
|
+
|
55
|
+
key_field, other_fields, more_options, line = TSV.parse_header(stream, options[:sep], options[:header_hash])
|
56
|
+
|
57
|
+
options = Misc.add_defaults options, more_options
|
58
|
+
|
59
|
+
options = Misc.add_defaults options,
|
60
|
+
:case_insensitive => false,
|
61
|
+
:type => :double,
|
62
|
+
:namespace => nil,
|
63
|
+
:identifiers => nil,
|
64
|
+
|
65
|
+
:merge => false,
|
66
|
+
:keep_empty => (options[:type] != :flat and options[:type] != :single),
|
67
|
+
:cast => nil,
|
68
|
+
|
69
|
+
:header_hash => '#',
|
70
|
+
:sep => "\t",
|
71
|
+
:sep2 => "|",
|
72
|
+
|
73
|
+
:key => 0,
|
74
|
+
:fields => nil,
|
75
|
+
|
76
|
+
:fix => nil,
|
77
|
+
:exclude => nil,
|
78
|
+
:select => nil,
|
79
|
+
:grep => nil
|
80
|
+
|
81
|
+
header_hash, sep, sep2 =
|
82
|
+
Misc.process_options options, :header_hash, :sep, :sep2
|
83
|
+
|
84
|
+
key, fields =
|
85
|
+
Misc.process_options options, :key, :fields
|
86
|
+
|
87
|
+
if key_field.nil?
|
88
|
+
key_pos = key
|
89
|
+
key_field, fields = nil
|
90
|
+
else
|
91
|
+
all_fields = [key_field].concat other_fields
|
92
|
+
|
93
|
+
key_pos = Misc.field_position(all_fields, key)
|
94
|
+
|
95
|
+
if String === fields or Symbol === fields
|
96
|
+
fields = [fields]
|
97
|
+
end
|
98
|
+
|
99
|
+
if fields.nil?
|
100
|
+
other_pos = (0..(all_fields.length - 1)).to_a
|
101
|
+
other_pos.delete key_pos
|
102
|
+
else
|
103
|
+
if Array === fields
|
104
|
+
other_pos = fields.collect{|field| Misc.field_position(all_fields, field)}
|
105
|
+
else
|
106
|
+
other_pos = Misc.field_position(all_fields, fields)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
key_field = all_fields[key_pos]
|
111
|
+
fields = all_fields.values_at *other_pos
|
112
|
+
end
|
113
|
+
|
114
|
+
case_insensitive, type, namespace, merge, keep_empty, cast =
|
115
|
+
Misc.process_options options, :case_insensitive, :type, :namespace, :merge, :keep_empty, :cast
|
116
|
+
fix, exclude, select, grep =
|
117
|
+
Misc.process_options options, :fix, :exclude, :select, :grep
|
118
|
+
|
119
|
+
#{{{ Process rest
|
120
|
+
data = {}
|
121
|
+
single = type.to_sym != :double
|
122
|
+
max_cols = 0
|
123
|
+
while line do
|
124
|
+
line.chomp!
|
125
|
+
|
126
|
+
if line.empty? or
|
127
|
+
(exclude and exclude.call(line)) or
|
128
|
+
(select and not select.call(line))
|
129
|
+
|
130
|
+
line = stream.gets
|
131
|
+
next
|
132
|
+
end
|
133
|
+
|
134
|
+
line = fix.call line if fix
|
135
|
+
break if not line
|
136
|
+
|
137
|
+
|
138
|
+
if header_hash and not header_hash.empty? and line =~ /^#{header_hash}/
|
139
|
+
line = stream.gets
|
140
|
+
next
|
141
|
+
end
|
142
|
+
|
143
|
+
# Chunk fields
|
144
|
+
parts = parse_fields(line, sep)
|
145
|
+
|
146
|
+
# Get next line
|
147
|
+
line = stream.gets
|
148
|
+
|
149
|
+
# Get id field
|
150
|
+
next if parts[key_pos].nil? || parts[key_pos].empty?
|
151
|
+
|
152
|
+
if single
|
153
|
+
ids = parse_fields(parts[key_pos], sep2)
|
154
|
+
ids.collect!{|id| id.downcase} if case_insensitive
|
155
|
+
|
156
|
+
id = ids.shift
|
157
|
+
ids.each do |id2| data[id2] = "__Ref:#{id}" end
|
158
|
+
|
159
|
+
next if data.include?(id) and type != :flat
|
160
|
+
|
161
|
+
if key_field.nil?
|
162
|
+
other_pos = (0..(parts.length - 1)).to_a
|
163
|
+
other_pos.delete key_pos
|
164
|
+
end
|
165
|
+
|
166
|
+
if type == :flat
|
167
|
+
extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)}.flatten
|
168
|
+
else
|
169
|
+
extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2).first}
|
170
|
+
end
|
171
|
+
|
172
|
+
extra.collect! do |elem|
|
173
|
+
case
|
174
|
+
when String === cast
|
175
|
+
elem.send(cast)
|
176
|
+
when Proc === cast
|
177
|
+
cast.call elem
|
178
|
+
end
|
179
|
+
end if cast
|
180
|
+
|
181
|
+
case
|
182
|
+
when type == :single
|
183
|
+
data[id] = extra.first
|
184
|
+
when type == :flat
|
185
|
+
if data.include? id
|
186
|
+
data[id].concat extra
|
187
|
+
else
|
188
|
+
data[id] = extra
|
189
|
+
end
|
190
|
+
else
|
191
|
+
data[id] = extra
|
192
|
+
end
|
193
|
+
|
194
|
+
max_cols = extra.size if extra.size > (max_cols || 0) unless type == :flat
|
195
|
+
else
|
196
|
+
ids = parse_fields(parts[key_pos], sep2)
|
197
|
+
ids.collect!{|id| id.downcase} if case_insensitive
|
198
|
+
|
199
|
+
id = ids.shift
|
200
|
+
ids.each do |id2| data[id2] = "__Ref:#{id}" end
|
201
|
+
|
202
|
+
if key_field.nil?
|
203
|
+
other_pos = (0..(parts.length - 1)).to_a
|
204
|
+
other_pos.delete key_pos
|
205
|
+
end
|
206
|
+
|
207
|
+
extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)}
|
208
|
+
extra.collect! do |list|
|
209
|
+
case
|
210
|
+
when String === cast
|
211
|
+
list.collect{|elem| elem.send(cast)}
|
212
|
+
when Proc === cast
|
213
|
+
list.collect{|elem| cast.call elem}
|
214
|
+
end
|
215
|
+
end if cast
|
216
|
+
|
217
|
+
max_cols = extra.size if extra.size > (max_cols || 0)
|
218
|
+
if not merge
|
219
|
+
data[id] = extra unless data.include? id
|
220
|
+
else
|
221
|
+
if not data.include? id
|
222
|
+
data[id] = extra
|
223
|
+
else
|
224
|
+
entry = data[id]
|
225
|
+
while entry =~ /__Ref:(.*)/ do entry = data[$1] end
|
226
|
+
extra.each_with_index do |f, i|
|
227
|
+
if f.empty?
|
228
|
+
next unless keep_empty
|
229
|
+
f= [""]
|
230
|
+
end
|
231
|
+
entry[i] ||= []
|
232
|
+
entry[i] = entry[i].concat f
|
233
|
+
end
|
234
|
+
data[id] = entry
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
if keep_empty and max_cols > 0
|
241
|
+
data.each do |key, values|
|
242
|
+
next if values =~ /__Ref:/
|
243
|
+
new_values = values
|
244
|
+
max_cols.times do |i|
|
245
|
+
if type == :double
|
246
|
+
new_values[i] = [""] if new_values[i].nil? or new_values[i].empty?
|
247
|
+
else
|
248
|
+
new_values[i] = "" if new_values[i].nil?
|
249
|
+
end
|
250
|
+
end
|
251
|
+
data[key] = new_values
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
[data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :datadir => options[:datadir], :identifiers => options[:identifiers], :cast => !!cast}]
|
256
|
+
end
|
257
|
+
|
258
|
+
end
|
data/share/lib/R/util.R
CHANGED
@@ -18,12 +18,15 @@ rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", ...){
|
|
18
18
|
data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names=1, comment.char = comment.char, ...);
|
19
19
|
f = file(filename, 'r');
|
20
20
|
headers = readLines(f, 1);
|
21
|
-
|
22
|
-
|
21
|
+
if (length(grep("^#: ", headers)) > 0){
|
22
|
+
headers = readLines(f, 1);
|
23
|
+
}
|
24
|
+
if (length(grep("^#", headers)) > 0){
|
23
25
|
fields = strsplit(headers, sep)[[1]];
|
24
26
|
fields = fields[2:length(fields)];
|
25
27
|
names(data) <- fields;
|
26
28
|
}
|
29
|
+
close(f);
|
27
30
|
return(data);
|
28
31
|
}
|
29
32
|
|
@@ -36,7 +39,6 @@ rbbt.tsv.write <- function(filename, data, key.field = NULL){
|
|
36
39
|
for (name in colnames(data)){ header = paste(header, name, sep="\t");}
|
37
40
|
header = paste(header, "\n", sep="");
|
38
41
|
cat(header, file=f);
|
39
|
-
cat(header, file=stderr());
|
40
42
|
|
41
43
|
close(f);
|
42
44
|
|
data/test/rbbt/util/test_R.rb
CHANGED
@@ -3,7 +3,15 @@ require 'rbbt/util/R'
|
|
3
3
|
|
4
4
|
class TestR < Test::Unit::TestCase
|
5
5
|
def test_sum
|
6
|
-
assert_equal "6", R.run('cat(3+3)').read
|
6
|
+
assert_equal "6", R.run('cat(3+3)').read.split(/\n/).last
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_tsv_R
|
10
|
+
tsv = TSV.new({:a => 1, :b => 2})
|
11
|
+
tsv2 = tsv.R <<-EOF
|
12
|
+
data = data + 1
|
13
|
+
EOF
|
14
|
+
puts tsv2.to_s
|
7
15
|
end
|
8
16
|
end
|
9
17
|
|
@@ -8,6 +8,7 @@ SHAREDIR = File.join(PKGData.sharedir_for_file(__FILE__), 'install/DataTest')
|
|
8
8
|
FileUtils.mkdir_p SHAREDIR
|
9
9
|
File.open(File.join(SHAREDIR, 'Rakefile'), 'w') do |f|
|
10
10
|
f.puts "file :file1 do |t| File.open(t.name, 'w') do |f| f.write 'File 1' end end"
|
11
|
+
f.puts "file :tsv_file do |t| File.open(t.name, 'w') do |f| f.write 'a\t1\nb\t2\n' end end"
|
11
12
|
end
|
12
13
|
|
13
14
|
module DataTest
|
@@ -27,14 +28,18 @@ class TestDataModule < Test::Unit::TestCase
|
|
27
28
|
FileUtils.mkdir_p SHAREDIR
|
28
29
|
File.open(File.join(SHAREDIR, 'Rakefile'), 'w') do |f|
|
29
30
|
f.puts "file :file1 do |t| File.open(t.name, 'w') do |f| f.write 'File 1' end end"
|
31
|
+
f.puts "file :tsv_file do |t| File.open(t.name, 'w') do |f| f.write 'a\t1\nb\t2\n' end end"
|
30
32
|
end
|
31
33
|
end
|
32
34
|
|
33
35
|
def test_rakefile
|
36
|
+
assert_equal Rbbt.files.DataTest, DataTest.datadir
|
34
37
|
assert_equal "File 1", Rbbt.files.DataTest.file1.read
|
35
38
|
assert_equal "Hello world", DataTest.salute("world")
|
36
39
|
assert_equal "Hello world", DataTest::with_key("world").salute
|
37
40
|
assert_equal "Hello world", DataTest::World.salute
|
41
|
+
assert_equal "DataTest", Rbbt.files.DataTest.tsv_file.namespace
|
42
|
+
assert_equal "DataTest", Rbbt.files.DataTest.tsv_file.tsv.namespace
|
38
43
|
FileUtils.rm_rf File.join(Rbbt.datadir, 'DataTest')
|
39
44
|
end
|
40
45
|
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/util/fix_width_table'
|
3
|
+
require 'rbbt/util/tsv'
|
4
|
+
|
5
|
+
class TestFixWidthTable < Test::Unit::TestCase
|
6
|
+
def load_data(data)
|
7
|
+
Log.debug("Data:\n#{Open.read(data)}")
|
8
|
+
tsv = TSV.new(data, :list, :sep=>":", :cast => proc{|e| e =~ /(\s*)(_*)/; ($1.length..($1.length + $2.length - 1))})
|
9
|
+
tsv.add_field "Start" do |key, values|
|
10
|
+
values["Range"].first
|
11
|
+
end
|
12
|
+
tsv.add_field "End" do |key, values|
|
13
|
+
values["Range"].last
|
14
|
+
end
|
15
|
+
|
16
|
+
tsv = tsv.slice ["Start", "End"]
|
17
|
+
|
18
|
+
tsv
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_options
|
22
|
+
TmpFile.with_file do |filename|
|
23
|
+
f = FixWidthTable.new filename, 100, true
|
24
|
+
f.close
|
25
|
+
|
26
|
+
f1 = FixWidthTable.new filename, 100, false
|
27
|
+
|
28
|
+
assert_equal true, f1.range
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_add
|
33
|
+
TmpFile.with_file do |filename|
|
34
|
+
f = FixWidthTable.new filename, 100, true
|
35
|
+
f.add [1,2,0], "test1"
|
36
|
+
f.add [3,4,0], "test2"
|
37
|
+
f.read
|
38
|
+
|
39
|
+
assert_equal 1, f.pos(0)
|
40
|
+
assert_equal 3, f.pos(1)
|
41
|
+
assert_equal 2, f.pos_end(0)
|
42
|
+
assert_equal 4, f.pos_end(1)
|
43
|
+
assert_equal 0, f.overlap(0)
|
44
|
+
assert_equal 0, f.overlap(1)
|
45
|
+
assert_equal "test1", f.value(0)
|
46
|
+
assert_equal "test2", f.value(1)
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_point
|
52
|
+
data =<<-EOF
|
53
|
+
#: :sep=/\\s+/#:type=:single#:cast=to_i
|
54
|
+
#ID Pos
|
55
|
+
a 1
|
56
|
+
b 10
|
57
|
+
c 20
|
58
|
+
d 12
|
59
|
+
e 26
|
60
|
+
f 11
|
61
|
+
g 25
|
62
|
+
EOF
|
63
|
+
TmpFile.with_file(data) do |datafile|
|
64
|
+
tsv = TSV.new datafile
|
65
|
+
ddd tsv
|
66
|
+
TmpFile.with_file do |filename|
|
67
|
+
f = FixWidthTable.new filename, 100, false
|
68
|
+
f.add_point tsv
|
69
|
+
f.read
|
70
|
+
|
71
|
+
assert_equal %w(), f[0].sort
|
72
|
+
assert_equal %w(b), f[10].sort
|
73
|
+
assert_equal %w(a b c d f), f[(0..20)].sort
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_range
|
79
|
+
data =<<-EOF
|
80
|
+
#ID:Range
|
81
|
+
#:012345678901234567890
|
82
|
+
a: ______
|
83
|
+
b: ______
|
84
|
+
c: _______
|
85
|
+
d: ____
|
86
|
+
e: ______
|
87
|
+
f: ___
|
88
|
+
g: ____
|
89
|
+
EOF
|
90
|
+
TmpFile.with_file(data) do |datafile|
|
91
|
+
tsv = TSV.new load_data(datafile)
|
92
|
+
TmpFile.with_file do |filename|
|
93
|
+
f = FixWidthTable.new filename, 100, true
|
94
|
+
f.add_range tsv
|
95
|
+
f.read
|
96
|
+
|
97
|
+
assert_equal %w(), f[0].sort
|
98
|
+
assert_equal %w(b), f[1].sort
|
99
|
+
assert_equal %w(), f[20].sort
|
100
|
+
assert_equal %w(), f[(20..100)].sort
|
101
|
+
assert_equal %w(a b d), f[3].sort
|
102
|
+
assert_equal %w(a b c d e), f[(3..4)].sort
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
data/test/rbbt/util/test_misc.rb
CHANGED
@@ -32,4 +32,47 @@ class TestMisc < Test::Unit::TestCase
|
|
32
32
|
assert_equal(1, a['a'])
|
33
33
|
end
|
34
34
|
|
35
|
+
def test_path_relative_to
|
36
|
+
assert_equal "test/foo", Misc.path_relative_to('test/test/foo', 'test')
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_chunk
|
40
|
+
test =<<-EOF
|
41
|
+
This is an example file. Entries are separated by Entry
|
42
|
+
-- Entry
|
43
|
+
1
|
44
|
+
2
|
45
|
+
3
|
46
|
+
-- Entry
|
47
|
+
4
|
48
|
+
5
|
49
|
+
6
|
50
|
+
EOF
|
51
|
+
|
52
|
+
assert_equal "1\n2\n3", Misc.chunk(test, /^-- Entry/).first.strip
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_hash2string
|
56
|
+
hash = {}
|
57
|
+
assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
|
58
|
+
|
59
|
+
hash = {:a => 1}
|
60
|
+
assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
|
61
|
+
|
62
|
+
hash = {:a => true}
|
63
|
+
assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
|
64
|
+
|
65
|
+
hash = {:a => Misc}
|
66
|
+
assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
|
67
|
+
|
68
|
+
hash = {:a => :b}
|
69
|
+
assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
|
70
|
+
|
71
|
+
hash = {:a => /test/}
|
72
|
+
assert_equal({}, Misc.string2hash(Misc.hash2string(hash)))
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
end
|
77
|
+
|
35
78
|
end
|