rbbt-util 1.2.1 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/lib/rbbt-util.rb +2 -1
  2. data/lib/rbbt/util/R.rb +18 -1
  3. data/lib/rbbt/util/cmd.rb +7 -6
  4. data/lib/rbbt/util/data_module.rb +31 -11
  5. data/lib/rbbt/util/fix_width_table.rb +209 -0
  6. data/lib/rbbt/util/log.rb +12 -2
  7. data/lib/rbbt/util/misc.rb +91 -12
  8. data/lib/rbbt/util/open.rb +18 -9
  9. data/lib/rbbt/util/path.rb +152 -0
  10. data/lib/rbbt/util/persistence.rb +282 -75
  11. data/lib/rbbt/util/pkg_data.rb +16 -59
  12. data/lib/rbbt/util/pkg_software.rb +15 -1
  13. data/lib/rbbt/util/rake.rb +5 -1
  14. data/lib/rbbt/util/tc_hash.rb +129 -59
  15. data/lib/rbbt/util/tsv.rb +109 -1284
  16. data/lib/rbbt/util/tsv/accessor.rb +273 -0
  17. data/lib/rbbt/util/tsv/attach.rb +228 -0
  18. data/lib/rbbt/util/tsv/index.rb +303 -0
  19. data/lib/rbbt/util/tsv/manipulate.rb +271 -0
  20. data/lib/rbbt/util/tsv/parse.rb +258 -0
  21. data/share/lib/R/util.R +5 -3
  22. data/test/rbbt/util/test_R.rb +9 -1
  23. data/test/rbbt/util/test_data_module.rb +5 -0
  24. data/test/rbbt/util/test_fix_width_table.rb +107 -0
  25. data/test/rbbt/util/test_misc.rb +43 -0
  26. data/test/rbbt/util/test_open.rb +0 -1
  27. data/test/rbbt/util/test_path.rb +10 -0
  28. data/test/rbbt/util/test_persistence.rb +63 -2
  29. data/test/rbbt/util/test_pkg_data.rb +29 -8
  30. data/test/rbbt/util/test_tc_hash.rb +52 -0
  31. data/test/rbbt/util/test_tsv.rb +55 -678
  32. data/test/rbbt/util/tsv/test_accessor.rb +109 -0
  33. data/test/rbbt/util/tsv/test_attach.rb +271 -0
  34. data/test/rbbt/util/tsv/test_index.rb +158 -0
  35. data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
  36. data/test/rbbt/util/tsv/test_parse.rb +72 -0
  37. data/test/test_helper.rb +1 -0
  38. metadata +25 -4
@@ -0,0 +1,258 @@
1
+ require 'rbbt/util/misc'
2
+ class TSV
3
+
4
+ def self.parse_fields(io, delimiter = "\t")
5
+ return [] if io.nil?
6
+
7
+ ## split with delimiter, do not remove empty
8
+ fields = io.split(delimiter, -1)
9
+
10
+ fields
11
+ end
12
+
13
+ def self.parse_header(stream, sep = nil, header_hash = nil)
14
+ sep = /\t/ if sep.nil?
15
+ header_hash = "#" if header_hash.nil?
16
+
17
+ fields, key_field = nil
18
+ options = {}
19
+
20
+ # Get line
21
+
22
+ line = stream.gets
23
+ raise "Empty content" if line.nil?
24
+
25
+ # Process options line
26
+
27
+ if line and line =~ /^#{header_hash}: (.*)/
28
+ options = Misc.string2hash $1
29
+ line = stream.gets
30
+ end
31
+
32
+ # Determine separator
33
+
34
+ sep = options[:sep] if options[:sep]
35
+
36
+ # Process fields line
37
+
38
+ if line and line =~ /^#{header_hash}/
39
+ line.chomp!
40
+ fields = parse_fields(line, sep)
41
+ key_field = fields.shift
42
+ key_field = key_field[(0 + header_hash.length)..-1] # Remove initial hash character
43
+ line = stream.gets
44
+ end
45
+
46
+ # Return fields, options and first line
47
+
48
+ return key_field, fields, options, line
49
+ end
50
+
51
+ def self.parse(stream, options = {})
52
+
53
+ # Prepare options
54
+
55
+ key_field, other_fields, more_options, line = TSV.parse_header(stream, options[:sep], options[:header_hash])
56
+
57
+ options = Misc.add_defaults options, more_options
58
+
59
+ options = Misc.add_defaults options,
60
+ :case_insensitive => false,
61
+ :type => :double,
62
+ :namespace => nil,
63
+ :identifiers => nil,
64
+
65
+ :merge => false,
66
+ :keep_empty => (options[:type] != :flat and options[:type] != :single),
67
+ :cast => nil,
68
+
69
+ :header_hash => '#',
70
+ :sep => "\t",
71
+ :sep2 => "|",
72
+
73
+ :key => 0,
74
+ :fields => nil,
75
+
76
+ :fix => nil,
77
+ :exclude => nil,
78
+ :select => nil,
79
+ :grep => nil
80
+
81
+ header_hash, sep, sep2 =
82
+ Misc.process_options options, :header_hash, :sep, :sep2
83
+
84
+ key, fields =
85
+ Misc.process_options options, :key, :fields
86
+
87
+ if key_field.nil?
88
+ key_pos = key
89
+ key_field, fields = nil
90
+ else
91
+ all_fields = [key_field].concat other_fields
92
+
93
+ key_pos = Misc.field_position(all_fields, key)
94
+
95
+ if String === fields or Symbol === fields
96
+ fields = [fields]
97
+ end
98
+
99
+ if fields.nil?
100
+ other_pos = (0..(all_fields.length - 1)).to_a
101
+ other_pos.delete key_pos
102
+ else
103
+ if Array === fields
104
+ other_pos = fields.collect{|field| Misc.field_position(all_fields, field)}
105
+ else
106
+ other_pos = Misc.field_position(all_fields, fields)
107
+ end
108
+ end
109
+
110
+ key_field = all_fields[key_pos]
111
+ fields = all_fields.values_at *other_pos
112
+ end
113
+
114
+ case_insensitive, type, namespace, merge, keep_empty, cast =
115
+ Misc.process_options options, :case_insensitive, :type, :namespace, :merge, :keep_empty, :cast
116
+ fix, exclude, select, grep =
117
+ Misc.process_options options, :fix, :exclude, :select, :grep
118
+
119
+ #{{{ Process rest
120
+ data = {}
121
+ single = type.to_sym != :double
122
+ max_cols = 0
123
+ while line do
124
+ line.chomp!
125
+
126
+ if line.empty? or
127
+ (exclude and exclude.call(line)) or
128
+ (select and not select.call(line))
129
+
130
+ line = stream.gets
131
+ next
132
+ end
133
+
134
+ line = fix.call line if fix
135
+ break if not line
136
+
137
+
138
+ if header_hash and not header_hash.empty? and line =~ /^#{header_hash}/
139
+ line = stream.gets
140
+ next
141
+ end
142
+
143
+ # Chunk fields
144
+ parts = parse_fields(line, sep)
145
+
146
+ # Get next line
147
+ line = stream.gets
148
+
149
+ # Get id field
150
+ next if parts[key_pos].nil? || parts[key_pos].empty?
151
+
152
+ if single
153
+ ids = parse_fields(parts[key_pos], sep2)
154
+ ids.collect!{|id| id.downcase} if case_insensitive
155
+
156
+ id = ids.shift
157
+ ids.each do |id2| data[id2] = "__Ref:#{id}" end
158
+
159
+ next if data.include?(id) and type != :flat
160
+
161
+ if key_field.nil?
162
+ other_pos = (0..(parts.length - 1)).to_a
163
+ other_pos.delete key_pos
164
+ end
165
+
166
+ if type == :flat
167
+ extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)}.flatten
168
+ else
169
+ extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2).first}
170
+ end
171
+
172
+ extra.collect! do |elem|
173
+ case
174
+ when String === cast
175
+ elem.send(cast)
176
+ when Proc === cast
177
+ cast.call elem
178
+ end
179
+ end if cast
180
+
181
+ case
182
+ when type == :single
183
+ data[id] = extra.first
184
+ when type == :flat
185
+ if data.include? id
186
+ data[id].concat extra
187
+ else
188
+ data[id] = extra
189
+ end
190
+ else
191
+ data[id] = extra
192
+ end
193
+
194
+ max_cols = extra.size if extra.size > (max_cols || 0) unless type == :flat
195
+ else
196
+ ids = parse_fields(parts[key_pos], sep2)
197
+ ids.collect!{|id| id.downcase} if case_insensitive
198
+
199
+ id = ids.shift
200
+ ids.each do |id2| data[id2] = "__Ref:#{id}" end
201
+
202
+ if key_field.nil?
203
+ other_pos = (0..(parts.length - 1)).to_a
204
+ other_pos.delete key_pos
205
+ end
206
+
207
+ extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)}
208
+ extra.collect! do |list|
209
+ case
210
+ when String === cast
211
+ list.collect{|elem| elem.send(cast)}
212
+ when Proc === cast
213
+ list.collect{|elem| cast.call elem}
214
+ end
215
+ end if cast
216
+
217
+ max_cols = extra.size if extra.size > (max_cols || 0)
218
+ if not merge
219
+ data[id] = extra unless data.include? id
220
+ else
221
+ if not data.include? id
222
+ data[id] = extra
223
+ else
224
+ entry = data[id]
225
+ while entry =~ /__Ref:(.*)/ do entry = data[$1] end
226
+ extra.each_with_index do |f, i|
227
+ if f.empty?
228
+ next unless keep_empty
229
+ f= [""]
230
+ end
231
+ entry[i] ||= []
232
+ entry[i] = entry[i].concat f
233
+ end
234
+ data[id] = entry
235
+ end
236
+ end
237
+ end
238
+ end
239
+
240
+ if keep_empty and max_cols > 0
241
+ data.each do |key, values|
242
+ next if values =~ /__Ref:/
243
+ new_values = values
244
+ max_cols.times do |i|
245
+ if type == :double
246
+ new_values[i] = [""] if new_values[i].nil? or new_values[i].empty?
247
+ else
248
+ new_values[i] = "" if new_values[i].nil?
249
+ end
250
+ end
251
+ data[key] = new_values
252
+ end
253
+ end
254
+
255
+ [data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :datadir => options[:datadir], :identifiers => options[:identifiers], :cast => !!cast}]
256
+ end
257
+
258
+ end
data/share/lib/R/util.R CHANGED
@@ -18,12 +18,15 @@ rbbt.tsv <- function(filename, sep = "\t", comment.char ="#", ...){
18
18
  data=read.table(file=filename, sep=sep, fill=TRUE, as.is=TRUE, row.names=1, comment.char = comment.char, ...);
19
19
  f = file(filename, 'r');
20
20
  headers = readLines(f, 1);
21
- close(f);
22
- if (grep(paste("^", comment.char, sep=""), headers)){
21
+ if (length(grep("^#: ", headers)) > 0){
22
+ headers = readLines(f, 1);
23
+ }
24
+ if (length(grep("^#", headers)) > 0){
23
25
  fields = strsplit(headers, sep)[[1]];
24
26
  fields = fields[2:length(fields)];
25
27
  names(data) <- fields;
26
28
  }
29
+ close(f);
27
30
  return(data);
28
31
  }
29
32
 
@@ -36,7 +39,6 @@ rbbt.tsv.write <- function(filename, data, key.field = NULL){
36
39
  for (name in colnames(data)){ header = paste(header, name, sep="\t");}
37
40
  header = paste(header, "\n", sep="");
38
41
  cat(header, file=f);
39
- cat(header, file=stderr());
40
42
 
41
43
  close(f);
42
44
 
@@ -3,7 +3,15 @@ require 'rbbt/util/R'
3
3
 
4
4
  class TestR < Test::Unit::TestCase
5
5
  def test_sum
6
- assert_equal "6", R.run('cat(3+3)').read
6
+ assert_equal "6", R.run('cat(3+3)').read.split(/\n/).last
7
+ end
8
+
9
+ def test_tsv_R
10
+ tsv = TSV.new({:a => 1, :b => 2})
11
+ tsv2 = tsv.R <<-EOF
12
+ data = data + 1
13
+ EOF
14
+ puts tsv2.to_s
7
15
  end
8
16
  end
9
17
 
@@ -8,6 +8,7 @@ SHAREDIR = File.join(PKGData.sharedir_for_file(__FILE__), 'install/DataTest')
8
8
  FileUtils.mkdir_p SHAREDIR
9
9
  File.open(File.join(SHAREDIR, 'Rakefile'), 'w') do |f|
10
10
  f.puts "file :file1 do |t| File.open(t.name, 'w') do |f| f.write 'File 1' end end"
11
+ f.puts "file :tsv_file do |t| File.open(t.name, 'w') do |f| f.write 'a\t1\nb\t2\n' end end"
11
12
  end
12
13
 
13
14
  module DataTest
@@ -27,14 +28,18 @@ class TestDataModule < Test::Unit::TestCase
27
28
  FileUtils.mkdir_p SHAREDIR
28
29
  File.open(File.join(SHAREDIR, 'Rakefile'), 'w') do |f|
29
30
  f.puts "file :file1 do |t| File.open(t.name, 'w') do |f| f.write 'File 1' end end"
31
+ f.puts "file :tsv_file do |t| File.open(t.name, 'w') do |f| f.write 'a\t1\nb\t2\n' end end"
30
32
  end
31
33
  end
32
34
 
33
35
  def test_rakefile
36
+ assert_equal Rbbt.files.DataTest, DataTest.datadir
34
37
  assert_equal "File 1", Rbbt.files.DataTest.file1.read
35
38
  assert_equal "Hello world", DataTest.salute("world")
36
39
  assert_equal "Hello world", DataTest::with_key("world").salute
37
40
  assert_equal "Hello world", DataTest::World.salute
41
+ assert_equal "DataTest", Rbbt.files.DataTest.tsv_file.namespace
42
+ assert_equal "DataTest", Rbbt.files.DataTest.tsv_file.tsv.namespace
38
43
  FileUtils.rm_rf File.join(Rbbt.datadir, 'DataTest')
39
44
  end
40
45
 
@@ -0,0 +1,107 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/util/fix_width_table'
3
+ require 'rbbt/util/tsv'
4
+
5
+ class TestFixWidthTable < Test::Unit::TestCase
6
+ def load_data(data)
7
+ Log.debug("Data:\n#{Open.read(data)}")
8
+ tsv = TSV.new(data, :list, :sep=>":", :cast => proc{|e| e =~ /(\s*)(_*)/; ($1.length..($1.length + $2.length - 1))})
9
+ tsv.add_field "Start" do |key, values|
10
+ values["Range"].first
11
+ end
12
+ tsv.add_field "End" do |key, values|
13
+ values["Range"].last
14
+ end
15
+
16
+ tsv = tsv.slice ["Start", "End"]
17
+
18
+ tsv
19
+ end
20
+
21
+ def test_options
22
+ TmpFile.with_file do |filename|
23
+ f = FixWidthTable.new filename, 100, true
24
+ f.close
25
+
26
+ f1 = FixWidthTable.new filename, 100, false
27
+
28
+ assert_equal true, f1.range
29
+ end
30
+ end
31
+
32
+ def test_add
33
+ TmpFile.with_file do |filename|
34
+ f = FixWidthTable.new filename, 100, true
35
+ f.add [1,2,0], "test1"
36
+ f.add [3,4,0], "test2"
37
+ f.read
38
+
39
+ assert_equal 1, f.pos(0)
40
+ assert_equal 3, f.pos(1)
41
+ assert_equal 2, f.pos_end(0)
42
+ assert_equal 4, f.pos_end(1)
43
+ assert_equal 0, f.overlap(0)
44
+ assert_equal 0, f.overlap(1)
45
+ assert_equal "test1", f.value(0)
46
+ assert_equal "test2", f.value(1)
47
+ end
48
+
49
+ end
50
+
51
+ def test_point
52
+ data =<<-EOF
53
+ #: :sep=/\\s+/#:type=:single#:cast=to_i
54
+ #ID Pos
55
+ a 1
56
+ b 10
57
+ c 20
58
+ d 12
59
+ e 26
60
+ f 11
61
+ g 25
62
+ EOF
63
+ TmpFile.with_file(data) do |datafile|
64
+ tsv = TSV.new datafile
65
+ ddd tsv
66
+ TmpFile.with_file do |filename|
67
+ f = FixWidthTable.new filename, 100, false
68
+ f.add_point tsv
69
+ f.read
70
+
71
+ assert_equal %w(), f[0].sort
72
+ assert_equal %w(b), f[10].sort
73
+ assert_equal %w(a b c d f), f[(0..20)].sort
74
+ end
75
+ end
76
+ end
77
+
78
+ def test_range
79
+ data =<<-EOF
80
+ #ID:Range
81
+ #:012345678901234567890
82
+ a: ______
83
+ b: ______
84
+ c: _______
85
+ d: ____
86
+ e: ______
87
+ f: ___
88
+ g: ____
89
+ EOF
90
+ TmpFile.with_file(data) do |datafile|
91
+ tsv = TSV.new load_data(datafile)
92
+ TmpFile.with_file do |filename|
93
+ f = FixWidthTable.new filename, 100, true
94
+ f.add_range tsv
95
+ f.read
96
+
97
+ assert_equal %w(), f[0].sort
98
+ assert_equal %w(b), f[1].sort
99
+ assert_equal %w(), f[20].sort
100
+ assert_equal %w(), f[(20..100)].sort
101
+ assert_equal %w(a b d), f[3].sort
102
+ assert_equal %w(a b c d e), f[(3..4)].sort
103
+ end
104
+ end
105
+ end
106
+ end
107
+
@@ -32,4 +32,47 @@ class TestMisc < Test::Unit::TestCase
32
32
  assert_equal(1, a['a'])
33
33
  end
34
34
 
35
+ def test_path_relative_to
36
+ assert_equal "test/foo", Misc.path_relative_to('test/test/foo', 'test')
37
+ end
38
+
39
+ def test_chunk
40
+ test =<<-EOF
41
+ This is an example file. Entries are separated by Entry
42
+ -- Entry
43
+ 1
44
+ 2
45
+ 3
46
+ -- Entry
47
+ 4
48
+ 5
49
+ 6
50
+ EOF
51
+
52
+ assert_equal "1\n2\n3", Misc.chunk(test, /^-- Entry/).first.strip
53
+ end
54
+
55
+ def test_hash2string
56
+ hash = {}
57
+ assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
58
+
59
+ hash = {:a => 1}
60
+ assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
61
+
62
+ hash = {:a => true}
63
+ assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
64
+
65
+ hash = {:a => Misc}
66
+ assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
67
+
68
+ hash = {:a => :b}
69
+ assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
70
+
71
+ hash = {:a => /test/}
72
+ assert_equal({}, Misc.string2hash(Misc.hash2string(hash)))
73
+
74
+
75
+
76
+ end
77
+
35
78
  end