rbbt-util 1.2.1 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/lib/rbbt-util.rb +2 -1
  2. data/lib/rbbt/util/R.rb +18 -1
  3. data/lib/rbbt/util/cmd.rb +7 -6
  4. data/lib/rbbt/util/data_module.rb +31 -11
  5. data/lib/rbbt/util/fix_width_table.rb +209 -0
  6. data/lib/rbbt/util/log.rb +12 -2
  7. data/lib/rbbt/util/misc.rb +91 -12
  8. data/lib/rbbt/util/open.rb +18 -9
  9. data/lib/rbbt/util/path.rb +152 -0
  10. data/lib/rbbt/util/persistence.rb +282 -75
  11. data/lib/rbbt/util/pkg_data.rb +16 -59
  12. data/lib/rbbt/util/pkg_software.rb +15 -1
  13. data/lib/rbbt/util/rake.rb +5 -1
  14. data/lib/rbbt/util/tc_hash.rb +129 -59
  15. data/lib/rbbt/util/tsv.rb +109 -1284
  16. data/lib/rbbt/util/tsv/accessor.rb +273 -0
  17. data/lib/rbbt/util/tsv/attach.rb +228 -0
  18. data/lib/rbbt/util/tsv/index.rb +303 -0
  19. data/lib/rbbt/util/tsv/manipulate.rb +271 -0
  20. data/lib/rbbt/util/tsv/parse.rb +258 -0
  21. data/share/lib/R/util.R +5 -3
  22. data/test/rbbt/util/test_R.rb +9 -1
  23. data/test/rbbt/util/test_data_module.rb +5 -0
  24. data/test/rbbt/util/test_fix_width_table.rb +107 -0
  25. data/test/rbbt/util/test_misc.rb +43 -0
  26. data/test/rbbt/util/test_open.rb +0 -1
  27. data/test/rbbt/util/test_path.rb +10 -0
  28. data/test/rbbt/util/test_persistence.rb +63 -2
  29. data/test/rbbt/util/test_pkg_data.rb +29 -8
  30. data/test/rbbt/util/test_tc_hash.rb +52 -0
  31. data/test/rbbt/util/test_tsv.rb +55 -678
  32. data/test/rbbt/util/tsv/test_accessor.rb +109 -0
  33. data/test/rbbt/util/tsv/test_attach.rb +271 -0
  34. data/test/rbbt/util/tsv/test_index.rb +158 -0
  35. data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
  36. data/test/rbbt/util/tsv/test_parse.rb +72 -0
  37. data/test/test_helper.rb +1 -0
  38. metadata +25 -4
@@ -0,0 +1,109 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/util/tsv/accessor'
3
+ require 'rbbt/util/tsv'
4
+
5
+ class TestTSVAcessor < Test::Unit::TestCase
6
+
7
+ def test_zip_fields
8
+ a = [%w(1 2), %w(a b)]
9
+ assert_equal a, TSV.zip_fields(TSV.zip_fields(a))
10
+ end
11
+
12
+ def test_values_at
13
+ content =<<-EOF
14
+ #Id ValueA ValueB OtherID
15
+ row1 a|aa|aaa b Id1|Id2
16
+ row2 A B Id3
17
+ EOF
18
+
19
+ TmpFile.with_file(content) do |filename|
20
+ tsv = TSV.new(File.open(filename), :list, :sep => /\s+/, :key => "OtherID", :persistence => true)
21
+ assert_equal "row2", tsv.values_at("Id1", "Id3").last.first
22
+ end
23
+ end
24
+
25
+ def test_to_s
26
+ content =<<-EOF
27
+ #Id ValueA ValueB OtherID
28
+ row1 a|aa|aaa b Id1|Id2
29
+ row2 A B Id3
30
+ EOF
31
+
32
+ content2 =<<-EOF
33
+ #Id ValueA ValueB OtherID
34
+ row1 a|aa|aaa b Id1|Id2
35
+ row2 A B Id3
36
+ EOF
37
+
38
+ TmpFile.with_file(content) do |filename|
39
+ tsv = TSV.new(File.open(filename), :sep => /\s+/)
40
+ assert_equal content, tsv.to_s.sub(/^#: [^\n]*\n/s,'')
41
+ end
42
+ end
43
+
44
+ def test_to_s_ordered
45
+ content =<<-EOF
46
+ #Id ValueA ValueB OtherID
47
+ row1 a|aa|aaa b Id1|Id2
48
+ row2 A B Id3
49
+ EOF
50
+
51
+ content2 =<<-EOF
52
+ #Id ValueA ValueB OtherID
53
+ row2 A B Id3
54
+ row1 a|aa|aaa b Id1|Id2
55
+ EOF
56
+
57
+
58
+ TmpFile.with_file(content) do |filename|
59
+ tsv = TSV.new(File.open(filename), :sep => /\s+/)
60
+ assert_equal content, tsv.to_s(%w(row1 row2)).sub(/^#: [^\n]*\n/s,'')
61
+ assert_not_equal content, tsv.to_s(%w(row2 row1)).sub(/^#: [^\n]*\n/s,'')
62
+ assert_equal content2, tsv.to_s(%w(row2 row1)).sub(/^#: [^\n]*\n/s,'')
63
+ end
64
+ end
65
+
66
+ def test_field_compare
67
+ content =<<-EOF
68
+ #Id Letter:LetterValue Other:LetterValue OtherID
69
+ row1 a|aa|aaa b Id1|Id2
70
+ row2 A B Id3
71
+ row3 a C Id4
72
+ EOF
73
+
74
+ TmpFile.with_file(content) do |filename|
75
+ tsv = TSV.new(filename + '#:sep=/\s+/')
76
+
77
+ assert tsv.fields.include?("LetterValue")
78
+ end
79
+ end
80
+
81
+ def test_indentify_fields
82
+ content =<<-EOF
83
+ #ID ValueA ValueB Comment
84
+ row1 a b c
85
+ row2 A B C
86
+ EOF
87
+
88
+ TmpFile.with_file(content) do |filename|
89
+ tsv = TSV.new(File.open(filename), :double, :sep => /\s/)
90
+ assert_equal :key, tsv.identify_field("ID")
91
+ end
92
+ end
93
+
94
+ def test_named_fields
95
+ content =<<-EOF
96
+ #ID ValueA ValueB Comment
97
+ row1 a b c
98
+ row2 A B C
99
+ EOF
100
+
101
+ TmpFile.with_file(content) do |filename|
102
+ tsv = TSV.new(File.open(filename), :double, :sep => /\s/)
103
+
104
+ assert_equal "ValueA", tsv.fields["ValueA"]
105
+ end
106
+ end
107
+
108
+ end
109
+
@@ -0,0 +1,271 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/util/tsv'
3
+ require 'rbbt/util/tsv/attach'
4
+ require 'rbbt'
5
+
6
+ class TestAttach < Test::Unit::TestCase
7
+ def test_attach_same_key
8
+ content1 =<<-EOF
9
+ #Id ValueA ValueB
10
+ row1 a|aa|aaa b
11
+ row2 A B
12
+ EOF
13
+
14
+ content2 =<<-EOF
15
+ #ID ValueB OtherID
16
+ row1 b Id1|Id2
17
+ row3 B Id3
18
+ EOF
19
+
20
+ tsv1 = tsv2 = nil
21
+ TmpFile.with_file(content1) do |filename|
22
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/)
23
+ end
24
+
25
+ TmpFile.with_file(content2) do |filename|
26
+ tsv2 = TSV.new(File.open(filename), :double, :sep => /\s+/)
27
+ end
28
+
29
+ tsv1.attach_same_key tsv2, "OtherID"
30
+
31
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
32
+ assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
33
+
34
+ TmpFile.with_file(content1) do |filename|
35
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/)
36
+ end
37
+
38
+ tsv1.attach_same_key tsv2
39
+
40
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
41
+
42
+ tsv1 = tsv2 = nil
43
+ TmpFile.with_file(content1) do |filename|
44
+ tsv1 = TSV.new(File.open(filename), :list, :sep => /\s+/)
45
+ end
46
+
47
+ TmpFile.with_file(content2) do |filename|
48
+ tsv2 = TSV.new(File.open(filename), :double, :sep => /\s+/)
49
+ end
50
+
51
+ tsv1.attach_same_key tsv2, "OtherID"
52
+
53
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
54
+ assert_equal "Id1", tsv1["row1"]["OtherID"]
55
+ end
56
+
57
+ def test_attach_source_field
58
+ content1 =<<-EOF
59
+ #Id ValueA ValueB
60
+ row1 a|aa|aaa b
61
+ row2 A B
62
+ EOF
63
+
64
+ content2 =<<-EOF
65
+ #ValueB OtherID
66
+ b Id1|Id2
67
+ B Id3
68
+ EOF
69
+
70
+ tsv1 = tsv2 = nil
71
+ TmpFile.with_file(content1) do |filename|
72
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/)
73
+ end
74
+
75
+ TmpFile.with_file(content2) do |filename|
76
+ tsv2 = TSV.new(File.open(filename), :double, :sep => /\s+/)
77
+ end
78
+
79
+ tsv1.attach_source_key tsv2, "ValueB"
80
+
81
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
82
+ assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
83
+
84
+ TmpFile.with_file(content1) do |filename|
85
+ tsv1 = TSV.new(File.open(filename), :list, :sep => /\s+/)
86
+ end
87
+
88
+ tsv1.attach_source_key tsv2, "ValueB"
89
+
90
+
91
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
92
+ assert_equal "Id1", tsv1["row1"]["OtherID"]
93
+ end
94
+
95
+ def test_attach_index
96
+ content1 =<<-EOF
97
+ #Id ValueA ValueB
98
+ row1 a|aa|aaa b
99
+ row2 A B
100
+ EOF
101
+
102
+ content2 =<<-EOF
103
+ #ValueE OtherID
104
+ e Id1|Id2
105
+ E Id3
106
+ EOF
107
+
108
+ content_index =<<-EOF
109
+ #Id ValueE
110
+ row1 e
111
+ row2 E
112
+ EOF
113
+
114
+ tsv1 = tsv2 = index = nil
115
+ TmpFile.with_file(content1) do |filename|
116
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/)
117
+ end
118
+
119
+ TmpFile.with_file(content2) do |filename|
120
+ tsv2 = TSV.new(File.open(filename), :double, :sep => /\s+/)
121
+ end
122
+
123
+ TmpFile.with_file(content_index) do |filename|
124
+ index = TSV.new(File.open(filename), :flat, :sep => /\s+/)
125
+ end
126
+
127
+ tsv1.attach_index tsv2, index
128
+
129
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
130
+ assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
131
+
132
+ TmpFile.with_file(content1) do |filename|
133
+ tsv1 = TSV.new(File.open(filename), :list, :sep => /\s+/)
134
+ end
135
+
136
+ tsv1.attach_index tsv2, index
137
+
138
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
139
+ assert_equal "Id1", tsv1["row1"]["OtherID"]
140
+ end
141
+
142
+ def test_attach
143
+ content1 =<<-EOF
144
+ #Id ValueA ValueB
145
+ row1 a|aa|aaa b
146
+ row2 A B
147
+ EOF
148
+
149
+ content2 =<<-EOF
150
+ #Id ValueB OtherID
151
+ row1 b Id1|Id2
152
+ row3 B Id3
153
+ EOF
154
+
155
+ content3 =<<-EOF
156
+ #ValueB OtherID
157
+ b Id1|Id2
158
+ B Id3
159
+ EOF
160
+ tsv1 = tsv2 = tsv3 = nil
161
+ TmpFile.with_file(content1) do |filename|
162
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/)
163
+ end
164
+
165
+ TmpFile.with_file(content2) do |filename|
166
+ tsv2 = TSV.new(File.open(filename), :double, :sep => /\s+/)
167
+ end
168
+
169
+ TmpFile.with_file(content3) do |filename|
170
+ tsv3 = TSV.new(File.open(filename), :double, :sep => /\s+/)
171
+ end
172
+
173
+ tsv1.attach tsv2, "OtherID"
174
+
175
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
176
+ assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
177
+
178
+ TmpFile.with_file(content1) do |filename|
179
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/)
180
+ end
181
+
182
+ tsv1.attach tsv3
183
+
184
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
185
+ assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
186
+
187
+ end
188
+
189
+ def test_attach_using_index
190
+ content1 =<<-EOF
191
+ #Id ValueA ValueB
192
+ row1 a|aa|aaa b
193
+ row2 A B
194
+ EOF
195
+
196
+ content2 =<<-EOF
197
+ #ValueE OtherID
198
+ e Id1|Id2
199
+ E Id3
200
+ EOF
201
+
202
+ content_index =<<-EOF
203
+ #: :sep=/\\s+/
204
+ #Id ValueE
205
+ row1 e
206
+ row2 E
207
+ EOF
208
+
209
+
210
+ require 'rbbt/sources/organism'
211
+
212
+ Rbbt.claim "data", StringIO.new(content1), "Test1"
213
+ Rbbt.claim "data", StringIO.new(content2), "Test2"
214
+ Rbbt.claim "identifiers", StringIO.new(content_index), "Test2"
215
+
216
+ tsv1 = tsv2 = nil
217
+
218
+ tsv1 = Rbbt.files.Test1.data.tsv :double, :sep => /\s+/
219
+ tsv2 = Rbbt.files.Test2.data.tsv :double, :sep => /\s+/
220
+
221
+ tsv2.identifiers = Rbbt.files.Test2.identifiers
222
+
223
+ tsv1.attach tsv2, "OtherID", :in_namespace => false
224
+
225
+ assert_equal tsv1.fields,%w(ValueA ValueB OtherID)
226
+ assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
227
+
228
+ end
229
+
230
+ def test_find_path
231
+ content1 =<<-EOF
232
+ #: :sep=/\\s+/#:case_insensitive=false
233
+ #Id ValueA ValueB
234
+ row1 a|aa|aaa b
235
+ row2 A B
236
+ EOF
237
+
238
+ content2 =<<-EOF
239
+ #: :sep=/\\s+/#:case_insensitive=false
240
+ #OtherID ValueE
241
+ Id1|Id2 e
242
+ Id3 E
243
+ EOF
244
+
245
+ content_identifiers =<<-EOF
246
+ #: :sep=/\\s+/#:case_insensitive=false
247
+ #Id ValueE
248
+ row1 e
249
+ row2 E
250
+ EOF
251
+
252
+ tsv1 = tsv2 = identifiers = nil
253
+ TmpFile.with_file(content1) do |filename|
254
+ tsv1 = TSV.new(File.open(filename), :key => "Id")
255
+ end
256
+
257
+ TmpFile.with_file(content2) do |filename|
258
+ tsv2 = TSV.new(File.open(filename), :double)
259
+ end
260
+
261
+ TmpFile.with_file(content_identifiers) do |filename|
262
+ identifiers = TSV.new(File.open(filename), :flat, :sep => /\s+/)
263
+ end
264
+
265
+ tsv1.identifiers = identifiers
266
+ tsv1.attach tsv2
267
+
268
+ assert_equal %w(ValueA ValueB ValueE), tsv1.fields
269
+ end
270
+ end
271
+
@@ -0,0 +1,158 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/util/tsv'
3
+ require 'rbbt/util/tsv/index'
4
+
5
+ class TestTSVManipulate < Test::Unit::TestCase
6
+
7
+ def test_index
8
+ content =<<-EOF
9
+ #Id ValueA ValueB OtherID
10
+ row1 a|aa|aaa b Id1|Id2
11
+ row2 A B Id3
12
+ EOF
13
+
14
+ TmpFile.with_file(content) do |filename|
15
+ tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID", :persistence => false)
16
+ index = tsv.index(:case_insensitive => true, :persistence => true)
17
+ assert index["row1"].include? "Id1"
18
+ assert_equal "OtherID", index.fields.first
19
+ end
20
+
21
+ # TmpFile.with_file(content) do |filename|
22
+ # tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID")
23
+ # index = tsv.index(:case_insensitive => true)
24
+ # assert index["row1"].include? "Id1"
25
+ # assert_equal "OtherID", index.fields.first
26
+ # end
27
+ end
28
+
29
+ def test_index_headerless
30
+ content =<<-EOF
31
+ row1 a|aa|aaa b Id1|Id2
32
+ row2 A B Id3
33
+ EOF
34
+
35
+ TmpFile.with_file(content) do |filename|
36
+ tsv = TSV.new(File.open(filename), :sep => /\s+/)
37
+ index = tsv.index(:case_insensitive => true, :target => 2)
38
+ assert index["row1"].include? "Id1"
39
+ end
40
+ end
41
+
42
+
43
+ def test_best_index
44
+ content =<<-EOF
45
+ #Id ValueA ValueB OtherID
46
+ row1 a|aa|aaa b|A Id1
47
+ row2 A a|B Id3
48
+ row3 A a|B Id4
49
+ EOF
50
+
51
+ TmpFile.with_file(content) do |filename|
52
+ tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID", :persistence => true)
53
+ index = tsv.index(:case_insensitive => false, :order => true)
54
+ ddd index
55
+ assert_equal "Id1", index['a'].first
56
+ assert_equal "Id3", index['A'].first
57
+ assert_equal "OtherID", index.fields.first
58
+ end
59
+
60
+ TmpFile.with_file(content) do |filename|
61
+ tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID")
62
+ index = tsv.index(:case_insensitive => true)
63
+ assert index["row1"].include? "Id1"
64
+ assert_equal "OtherID", index.fields.first
65
+ end
66
+ end
67
+
68
+ #{{{ Test Attach
69
+
70
+ def ___test_smart_merge_single
71
+ content1 =<<-EOF
72
+ #Id ValueA ValueB
73
+ row1 a|aa|aaa b
74
+ row2 A B
75
+ EOF
76
+
77
+ content2 =<<-EOF
78
+ #ValueC ValueB OtherID
79
+ c|cc|ccc b Id1|Id2
80
+ C B Id3
81
+ EOF
82
+
83
+ tsv1 = tsv2 = nil
84
+ TmpFile.with_file(content1) do |filename|
85
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/)
86
+ end
87
+
88
+ TmpFile.with_file(content2) do |filename|
89
+ tsv2 = TSV.new(File.open(filename), :double, :sep => /\s+/)
90
+ end
91
+
92
+ tsv1 = tsv1.smart_merge tsv2, "ValueB"
93
+
94
+ assert_equal "C", tsv1["row2"]["ValueC"]
95
+ assert %w(c cc ccc).include? tsv1["row1"]["ValueC"]
96
+ assert_equal "Id1", tsv1["row1"]["OtherID"]
97
+ end
98
+
99
+ def test_index_to_key
100
+ content =<<-EOF
101
+ #: :sep=/\\s+/
102
+ #Id ValueA ValueB
103
+ row1 a|aa|aaa b
104
+ row2 A B
105
+ EOF
106
+
107
+ tsv1 = tsv2 = nil
108
+ TmpFile.with_file(content) do |filename|
109
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/, :key => "ValueA", :case_insensitive => true)
110
+ end
111
+ end
112
+
113
+ # {{{ Test sorted index
114
+
115
+ def load_data(data)
116
+ Log.debug("Data:\n#{Open.read(data)}")
117
+ tsv = TSV.new(data, :list, :sep=>":", :cast => proc{|e| e =~ /(\s*)(_*)/; ($1.length..($1.length + $2.length - 1))})
118
+ tsv.add_field "Start" do |key, values|
119
+ values["Range"].first
120
+ end
121
+ tsv.add_field "End" do |key, values|
122
+ values["Range"].last
123
+ end
124
+
125
+ ddd tsv.fields
126
+ tsv = tsv.slice ["Start", "End"]
127
+ ddd tsv.fields
128
+
129
+ tsv
130
+ end
131
+
132
+ def test_sorted_index
133
+ data =<<-EOF
134
+ #ID:Range
135
+ #:012345678901234567890
136
+ a: ______
137
+ b: ______
138
+ c: _______
139
+ d: ____
140
+ e: ______
141
+ f: ___
142
+ g: ____
143
+ EOF
144
+ TmpFile.with_file(data) do |datafile|
145
+ tsv = load_data(datafile)
146
+ f = tsv.sorted_index
147
+
148
+ assert_equal %w(), f[0].sort
149
+ assert_equal %w(b), f[1].sort
150
+ assert_equal %w(), f[20].sort
151
+ assert_equal %w(), f[(20..100)].sort
152
+ assert_equal %w(a b d), f[3].sort
153
+ assert_equal %w(a b c d e), f[(3..4)].sort
154
+ end
155
+ end
156
+
157
+ end
158
+