rbbt-util 1.2.1 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/lib/rbbt-util.rb +2 -1
  2. data/lib/rbbt/util/R.rb +18 -1
  3. data/lib/rbbt/util/cmd.rb +7 -6
  4. data/lib/rbbt/util/data_module.rb +31 -11
  5. data/lib/rbbt/util/fix_width_table.rb +209 -0
  6. data/lib/rbbt/util/log.rb +12 -2
  7. data/lib/rbbt/util/misc.rb +91 -12
  8. data/lib/rbbt/util/open.rb +18 -9
  9. data/lib/rbbt/util/path.rb +152 -0
  10. data/lib/rbbt/util/persistence.rb +282 -75
  11. data/lib/rbbt/util/pkg_data.rb +16 -59
  12. data/lib/rbbt/util/pkg_software.rb +15 -1
  13. data/lib/rbbt/util/rake.rb +5 -1
  14. data/lib/rbbt/util/tc_hash.rb +129 -59
  15. data/lib/rbbt/util/tsv.rb +109 -1284
  16. data/lib/rbbt/util/tsv/accessor.rb +273 -0
  17. data/lib/rbbt/util/tsv/attach.rb +228 -0
  18. data/lib/rbbt/util/tsv/index.rb +303 -0
  19. data/lib/rbbt/util/tsv/manipulate.rb +271 -0
  20. data/lib/rbbt/util/tsv/parse.rb +258 -0
  21. data/share/lib/R/util.R +5 -3
  22. data/test/rbbt/util/test_R.rb +9 -1
  23. data/test/rbbt/util/test_data_module.rb +5 -0
  24. data/test/rbbt/util/test_fix_width_table.rb +107 -0
  25. data/test/rbbt/util/test_misc.rb +43 -0
  26. data/test/rbbt/util/test_open.rb +0 -1
  27. data/test/rbbt/util/test_path.rb +10 -0
  28. data/test/rbbt/util/test_persistence.rb +63 -2
  29. data/test/rbbt/util/test_pkg_data.rb +29 -8
  30. data/test/rbbt/util/test_tc_hash.rb +52 -0
  31. data/test/rbbt/util/test_tsv.rb +55 -678
  32. data/test/rbbt/util/tsv/test_accessor.rb +109 -0
  33. data/test/rbbt/util/tsv/test_attach.rb +271 -0
  34. data/test/rbbt/util/tsv/test_index.rb +158 -0
  35. data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
  36. data/test/rbbt/util/tsv/test_parse.rb +72 -0
  37. data/test/test_helper.rb +1 -0
  38. metadata +25 -4
@@ -0,0 +1,258 @@
1
+ require 'rbbt/util/misc'
2
+ class TSV
3
+
4
+ def self.parse_fields(io, delimiter = "\t")
5
+ return [] if io.nil?
6
+
7
+ ## split with delimiter, do not remove empty
8
+ fields = io.split(delimiter, -1)
9
+
10
+ fields
11
+ end
12
+
13
+ def self.parse_header(stream, sep = nil, header_hash = nil)
14
+ sep = /\t/ if sep.nil?
15
+ header_hash = "#" if header_hash.nil?
16
+
17
+ fields, key_field = nil
18
+ options = {}
19
+
20
+ # Get line
21
+
22
+ line = stream.gets
23
+ raise "Empty content" if line.nil?
24
+
25
+ # Process options line
26
+
27
+ if line and line =~ /^#{header_hash}: (.*)/
28
+ options = Misc.string2hash $1
29
+ line = stream.gets
30
+ end
31
+
32
+ # Determine separator
33
+
34
+ sep = options[:sep] if options[:sep]
35
+
36
+ # Process fields line
37
+
38
+ if line and line =~ /^#{header_hash}/
39
+ line.chomp!
40
+ fields = parse_fields(line, sep)
41
+ key_field = fields.shift
42
+ key_field = key_field[(0 + header_hash.length)..-1] # Remove initial hash character
43
+ line = stream.gets
44
+ end
45
+
46
+ # Return fields, options and first line
47
+
48
+ return key_field, fields, options, line
49
+ end
50
+
51
+ def self.parse(stream, options = {})
52
+
53
+ # Prepare options
54
+
55
+ key_field, other_fields, more_options, line = TSV.parse_header(stream, options[:sep], options[:header_hash])
56
+
57
+ options = Misc.add_defaults options, more_options
58
+
59
+ options = Misc.add_defaults options,
60
+ :case_insensitive => false,
61
+ :type => :double,
62
+ :namespace => nil,
63
+ :identifiers => nil,
64
+
65
+ :merge => false,
66
+ :keep_empty => (options[:type] != :flat and options[:type] != :single),
67
+ :cast => nil,
68
+
69
+ :header_hash => '#',
70
+ :sep => "\t",
71
+ :sep2 => "|",
72
+
73
+ :key => 0,
74
+ :fields => nil,
75
+
76
+ :fix => nil,
77
+ :exclude => nil,
78
+ :select => nil,
79
+ :grep => nil
80
+
81
+ header_hash, sep, sep2 =
82
+ Misc.process_options options, :header_hash, :sep, :sep2
83
+
84
+ key, fields =
85
+ Misc.process_options options, :key, :fields
86
+
87
+ if key_field.nil?
88
+ key_pos = key
89
+ key_field, fields = nil
90
+ else
91
+ all_fields = [key_field].concat other_fields
92
+
93
+ key_pos = Misc.field_position(all_fields, key)
94
+
95
+ if String === fields or Symbol === fields
96
+ fields = [fields]
97
+ end
98
+
99
+ if fields.nil?
100
+ other_pos = (0..(all_fields.length - 1)).to_a
101
+ other_pos.delete key_pos
102
+ else
103
+ if Array === fields
104
+ other_pos = fields.collect{|field| Misc.field_position(all_fields, field)}
105
+ else
106
+ other_pos = Misc.field_position(all_fields, fields)
107
+ end
108
+ end
109
+
110
+ key_field = all_fields[key_pos]
111
+ fields = all_fields.values_at *other_pos
112
+ end
113
+
114
+ case_insensitive, type, namespace, merge, keep_empty, cast =
115
+ Misc.process_options options, :case_insensitive, :type, :namespace, :merge, :keep_empty, :cast
116
+ fix, exclude, select, grep =
117
+ Misc.process_options options, :fix, :exclude, :select, :grep
118
+
119
+ #{{{ Process rest
120
+ data = {}
121
+ single = type.to_sym != :double
122
+ max_cols = 0
123
+ while line do
124
+ line.chomp!
125
+
126
+ if line.empty? or
127
+ (exclude and exclude.call(line)) or
128
+ (select and not select.call(line))
129
+
130
+ line = stream.gets
131
+ next
132
+ end
133
+
134
+ line = fix.call line if fix
135
+ break if not line
136
+
137
+
138
+ if header_hash and not header_hash.empty? and line =~ /^#{header_hash}/
139
+ line = stream.gets
140
+ next
141
+ end
142
+
143
+ # Chunk fields
144
+ parts = parse_fields(line, sep)
145
+
146
+ # Get next line
147
+ line = stream.gets
148
+
149
+ # Get id field
150
+ next if parts[key_pos].nil? || parts[key_pos].empty?
151
+
152
+ if single
153
+ ids = parse_fields(parts[key_pos], sep2)
154
+ ids.collect!{|id| id.downcase} if case_insensitive
155
+
156
+ id = ids.shift
157
+ ids.each do |id2| data[id2] = "__Ref:#{id}" end
158
+
159
+ next if data.include?(id) and type != :flat
160
+
161
+ if key_field.nil?
162
+ other_pos = (0..(parts.length - 1)).to_a
163
+ other_pos.delete key_pos
164
+ end
165
+
166
+ if type == :flat
167
+ extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)}.flatten
168
+ else
169
+ extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2).first}
170
+ end
171
+
172
+ extra.collect! do |elem|
173
+ case
174
+ when String === cast
175
+ elem.send(cast)
176
+ when Proc === cast
177
+ cast.call elem
178
+ end
179
+ end if cast
180
+
181
+ case
182
+ when type == :single
183
+ data[id] = extra.first
184
+ when type == :flat
185
+ if data.include? id
186
+ data[id].concat extra
187
+ else
188
+ data[id] = extra
189
+ end
190
+ else
191
+ data[id] = extra
192
+ end
193
+
194
+ max_cols = extra.size if extra.size > (max_cols || 0) unless type == :flat
195
+ else
196
+ ids = parse_fields(parts[key_pos], sep2)
197
+ ids.collect!{|id| id.downcase} if case_insensitive
198
+
199
+ id = ids.shift
200
+ ids.each do |id2| data[id2] = "__Ref:#{id}" end
201
+
202
+ if key_field.nil?
203
+ other_pos = (0..(parts.length - 1)).to_a
204
+ other_pos.delete key_pos
205
+ end
206
+
207
+ extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)}
208
+ extra.collect! do |list|
209
+ case
210
+ when String === cast
211
+ list.collect{|elem| elem.send(cast)}
212
+ when Proc === cast
213
+ list.collect{|elem| cast.call elem}
214
+ end
215
+ end if cast
216
+
217
+ max_cols = extra.size if extra.size > (max_cols || 0)
218
+ if not merge
219
+ data[id] = extra unless data.include? id
220
+ else
221
+ if not data.include? id
222
+ data[id] = extra
223
+ else
224
+ entry = data[id]
225
+ while entry =~ /__Ref:(.*)/ do entry = data[$1] end
226
+ extra.each_with_index do |f, i|
227
+ if f.empty?
228
+ next unless keep_empty
229
+ f= [""]
230
+ end
231
+ entry[i] ||= []
232
+ entry[i] = entry[i].concat f
233
+ end
234
+ data[id] = entry
235
+ end
236
+ end
237
+ end
238
+ end
239
+
240
+ if keep_empty and max_cols > 0
241
+ data.each do |key, values|
242
+ next if values =~ /__Ref:/
243
+ new_values = values
244
+ max_cols.times do |i|
245
+ if type == :double
246
+ new_values[i] = [""] if new_values[i].nil? or new_values[i].empty?
247
+ else
248
+ new_values[i] = "" if new_values[i].nil?
249
+ end
250
+ end
251
+ data[key] = new_values
252
+ end
253
+ end
254
+
255
+ [data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :datadir => options[:datadir], :identifiers => options[:identifiers], :cast => !!cast}]
256
+ end
257
+
258
+ end
data/share/lib/R/util.R CHANGED
@@ -18,12 +18,15 @@ rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", ...){
18
18
  data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names=1, comment.char = comment.char, ...);
19
19
  f = file(filename, 'r');
20
20
  headers = readLines(f, 1);
21
- close(f);
22
- if (grep(paste("^", comment.char, sep=""), headers)){
21
+ if (length(grep("^#: ", headers)) > 0){
22
+ headers = readLines(f, 1);
23
+ }
24
+ if (length(grep("^#", headers)) > 0){
23
25
  fields = strsplit(headers, sep)[[1]];
24
26
  fields = fields[2:length(fields)];
25
27
  names(data) <- fields;
26
28
  }
29
+ close(f);
27
30
  return(data);
28
31
  }
29
32
 
@@ -36,7 +39,6 @@ rbbt.tsv.write <- function(filename, data, key.field = NULL){
36
39
  for (name in colnames(data)){ header = paste(header, name, sep="\t");}
37
40
  header = paste(header, "\n", sep="");
38
41
  cat(header, file=f);
39
- cat(header, file=stderr());
40
42
 
41
43
  close(f);
42
44
 
@@ -3,7 +3,15 @@ require 'rbbt/util/R'
3
3
 
4
4
  class TestR < Test::Unit::TestCase
5
5
  def test_sum
6
- assert_equal "6", R.run('cat(3+3)').read
6
+ assert_equal "6", R.run('cat(3+3)').read.split(/\n/).last
7
+ end
8
+
9
+ def test_tsv_R
10
+ tsv = TSV.new({:a => 1, :b => 2})
11
+ tsv2 = tsv.R <<-EOF
12
+ data = data + 1
13
+ EOF
14
+ puts tsv2.to_s
7
15
  end
8
16
  end
9
17
 
@@ -8,6 +8,7 @@ SHAREDIR = File.join(PKGData.sharedir_for_file(__FILE__), 'install/DataTest')
8
8
  FileUtils.mkdir_p SHAREDIR
9
9
  File.open(File.join(SHAREDIR, 'Rakefile'), 'w') do |f|
10
10
  f.puts "file :file1 do |t| File.open(t.name, 'w') do |f| f.write 'File 1' end end"
11
+ f.puts "file :tsv_file do |t| File.open(t.name, 'w') do |f| f.write 'a\t1\nb\t2\n' end end"
11
12
  end
12
13
 
13
14
  module DataTest
@@ -27,14 +28,18 @@ class TestDataModule < Test::Unit::TestCase
27
28
  FileUtils.mkdir_p SHAREDIR
28
29
  File.open(File.join(SHAREDIR, 'Rakefile'), 'w') do |f|
29
30
  f.puts "file :file1 do |t| File.open(t.name, 'w') do |f| f.write 'File 1' end end"
31
+ f.puts "file :tsv_file do |t| File.open(t.name, 'w') do |f| f.write 'a\t1\nb\t2\n' end end"
30
32
  end
31
33
  end
32
34
 
33
35
  def test_rakefile
36
+ assert_equal Rbbt.files.DataTest, DataTest.datadir
34
37
  assert_equal "File 1", Rbbt.files.DataTest.file1.read
35
38
  assert_equal "Hello world", DataTest.salute("world")
36
39
  assert_equal "Hello world", DataTest::with_key("world").salute
37
40
  assert_equal "Hello world", DataTest::World.salute
41
+ assert_equal "DataTest", Rbbt.files.DataTest.tsv_file.namespace
42
+ assert_equal "DataTest", Rbbt.files.DataTest.tsv_file.tsv.namespace
38
43
  FileUtils.rm_rf File.join(Rbbt.datadir, 'DataTest')
39
44
  end
40
45
 
@@ -0,0 +1,107 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/util/fix_width_table'
3
+ require 'rbbt/util/tsv'
4
+
5
+ class TestFixWidthTable < Test::Unit::TestCase
6
+ def load_data(data)
7
+ Log.debug("Data:\n#{Open.read(data)}")
8
+ tsv = TSV.new(data, :list, :sep=>":", :cast => proc{|e| e =~ /(\s*)(_*)/; ($1.length..($1.length + $2.length - 1))})
9
+ tsv.add_field "Start" do |key, values|
10
+ values["Range"].first
11
+ end
12
+ tsv.add_field "End" do |key, values|
13
+ values["Range"].last
14
+ end
15
+
16
+ tsv = tsv.slice ["Start", "End"]
17
+
18
+ tsv
19
+ end
20
+
21
+ def test_options
22
+ TmpFile.with_file do |filename|
23
+ f = FixWidthTable.new filename, 100, true
24
+ f.close
25
+
26
+ f1 = FixWidthTable.new filename, 100, false
27
+
28
+ assert_equal true, f1.range
29
+ end
30
+ end
31
+
32
+ def test_add
33
+ TmpFile.with_file do |filename|
34
+ f = FixWidthTable.new filename, 100, true
35
+ f.add [1,2,0], "test1"
36
+ f.add [3,4,0], "test2"
37
+ f.read
38
+
39
+ assert_equal 1, f.pos(0)
40
+ assert_equal 3, f.pos(1)
41
+ assert_equal 2, f.pos_end(0)
42
+ assert_equal 4, f.pos_end(1)
43
+ assert_equal 0, f.overlap(0)
44
+ assert_equal 0, f.overlap(1)
45
+ assert_equal "test1", f.value(0)
46
+ assert_equal "test2", f.value(1)
47
+ end
48
+
49
+ end
50
+
51
+ def test_point
52
+ data =<<-EOF
53
+ #: :sep=/\\s+/#:type=:single#:cast=to_i
54
+ #ID Pos
55
+ a 1
56
+ b 10
57
+ c 20
58
+ d 12
59
+ e 26
60
+ f 11
61
+ g 25
62
+ EOF
63
+ TmpFile.with_file(data) do |datafile|
64
+ tsv = TSV.new datafile
65
+ ddd tsv
66
+ TmpFile.with_file do |filename|
67
+ f = FixWidthTable.new filename, 100, false
68
+ f.add_point tsv
69
+ f.read
70
+
71
+ assert_equal %w(), f[0].sort
72
+ assert_equal %w(b), f[10].sort
73
+ assert_equal %w(a b c d f), f[(0..20)].sort
74
+ end
75
+ end
76
+ end
77
+
78
+ def test_range
79
+ data =<<-EOF
80
+ #ID:Range
81
+ #:012345678901234567890
82
+ a: ______
83
+ b: ______
84
+ c: _______
85
+ d: ____
86
+ e: ______
87
+ f: ___
88
+ g: ____
89
+ EOF
90
+ TmpFile.with_file(data) do |datafile|
91
+ tsv = TSV.new load_data(datafile)
92
+ TmpFile.with_file do |filename|
93
+ f = FixWidthTable.new filename, 100, true
94
+ f.add_range tsv
95
+ f.read
96
+
97
+ assert_equal %w(), f[0].sort
98
+ assert_equal %w(b), f[1].sort
99
+ assert_equal %w(), f[20].sort
100
+ assert_equal %w(), f[(20..100)].sort
101
+ assert_equal %w(a b d), f[3].sort
102
+ assert_equal %w(a b c d e), f[(3..4)].sort
103
+ end
104
+ end
105
+ end
106
+ end
107
+
@@ -32,4 +32,47 @@ class TestMisc < Test::Unit::TestCase
32
32
  assert_equal(1, a['a'])
33
33
  end
34
34
 
35
+ def test_path_relative_to
36
+ assert_equal "test/foo", Misc.path_relative_to('test/test/foo', 'test')
37
+ end
38
+
39
+ def test_chunk
40
+ test =<<-EOF
41
+ This is an example file. Entries are separated by Entry
42
+ -- Entry
43
+ 1
44
+ 2
45
+ 3
46
+ -- Entry
47
+ 4
48
+ 5
49
+ 6
50
+ EOF
51
+
52
+ assert_equal "1\n2\n3", Misc.chunk(test, /^-- Entry/).first.strip
53
+ end
54
+
55
+ def test_hash2string
56
+ hash = {}
57
+ assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
58
+
59
+ hash = {:a => 1}
60
+ assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
61
+
62
+ hash = {:a => true}
63
+ assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
64
+
65
+ hash = {:a => Misc}
66
+ assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
67
+
68
+ hash = {:a => :b}
69
+ assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
70
+
71
+ hash = {:a => /test/}
72
+ assert_equal({}, Misc.string2hash(Misc.hash2string(hash)))
73
+
74
+
75
+
76
+ end
77
+
35
78
  end