rbbt-util 1.2.1 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/lib/rbbt-util.rb +2 -1
  2. data/lib/rbbt/util/R.rb +18 -1
  3. data/lib/rbbt/util/cmd.rb +7 -6
  4. data/lib/rbbt/util/data_module.rb +31 -11
  5. data/lib/rbbt/util/fix_width_table.rb +209 -0
  6. data/lib/rbbt/util/log.rb +12 -2
  7. data/lib/rbbt/util/misc.rb +91 -12
  8. data/lib/rbbt/util/open.rb +18 -9
  9. data/lib/rbbt/util/path.rb +152 -0
  10. data/lib/rbbt/util/persistence.rb +282 -75
  11. data/lib/rbbt/util/pkg_data.rb +16 -59
  12. data/lib/rbbt/util/pkg_software.rb +15 -1
  13. data/lib/rbbt/util/rake.rb +5 -1
  14. data/lib/rbbt/util/tc_hash.rb +129 -59
  15. data/lib/rbbt/util/tsv.rb +109 -1284
  16. data/lib/rbbt/util/tsv/accessor.rb +273 -0
  17. data/lib/rbbt/util/tsv/attach.rb +228 -0
  18. data/lib/rbbt/util/tsv/index.rb +303 -0
  19. data/lib/rbbt/util/tsv/manipulate.rb +271 -0
  20. data/lib/rbbt/util/tsv/parse.rb +258 -0
  21. data/share/lib/R/util.R +5 -3
  22. data/test/rbbt/util/test_R.rb +9 -1
  23. data/test/rbbt/util/test_data_module.rb +5 -0
  24. data/test/rbbt/util/test_fix_width_table.rb +107 -0
  25. data/test/rbbt/util/test_misc.rb +43 -0
  26. data/test/rbbt/util/test_open.rb +0 -1
  27. data/test/rbbt/util/test_path.rb +10 -0
  28. data/test/rbbt/util/test_persistence.rb +63 -2
  29. data/test/rbbt/util/test_pkg_data.rb +29 -8
  30. data/test/rbbt/util/test_tc_hash.rb +52 -0
  31. data/test/rbbt/util/test_tsv.rb +55 -678
  32. data/test/rbbt/util/tsv/test_accessor.rb +109 -0
  33. data/test/rbbt/util/tsv/test_attach.rb +271 -0
  34. data/test/rbbt/util/tsv/test_index.rb +158 -0
  35. data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
  36. data/test/rbbt/util/tsv/test_parse.rb +72 -0
  37. data/test/test_helper.rb +1 -0
  38. metadata +25 -4
@@ -0,0 +1,109 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/util/tsv/accessor'
3
+ require 'rbbt/util/tsv'
4
+
5
+ class TestTSVAcessor < Test::Unit::TestCase
6
+
7
+ def test_zip_fields
8
+ a = [%w(1 2), %w(a b)]
9
+ assert_equal a, TSV.zip_fields(TSV.zip_fields(a))
10
+ end
11
+
12
+ def test_values_at
13
+ content =<<-EOF
14
+ #Id ValueA ValueB OtherID
15
+ row1 a|aa|aaa b Id1|Id2
16
+ row2 A B Id3
17
+ EOF
18
+
19
+ TmpFile.with_file(content) do |filename|
20
+ tsv = TSV.new(File.open(filename), :list, :sep => /\s+/, :key => "OtherID", :persistence => true)
21
+ assert_equal "row2", tsv.values_at("Id1", "Id3").last.first
22
+ end
23
+ end
24
+
25
+ def test_to_s
26
+ content =<<-EOF
27
+ #Id ValueA ValueB OtherID
28
+ row1 a|aa|aaa b Id1|Id2
29
+ row2 A B Id3
30
+ EOF
31
+
32
+ content2 =<<-EOF
33
+ #Id ValueA ValueB OtherID
34
+ row1 a|aa|aaa b Id1|Id2
35
+ row2 A B Id3
36
+ EOF
37
+
38
+ TmpFile.with_file(content) do |filename|
39
+ tsv = TSV.new(File.open(filename), :sep => /\s+/)
40
+ assert_equal content, tsv.to_s.sub(/^#: [^\n]*\n/s,'')
41
+ end
42
+ end
43
+
44
+ def test_to_s_ordered
45
+ content =<<-EOF
46
+ #Id ValueA ValueB OtherID
47
+ row1 a|aa|aaa b Id1|Id2
48
+ row2 A B Id3
49
+ EOF
50
+
51
+ content2 =<<-EOF
52
+ #Id ValueA ValueB OtherID
53
+ row2 A B Id3
54
+ row1 a|aa|aaa b Id1|Id2
55
+ EOF
56
+
57
+
58
+ TmpFile.with_file(content) do |filename|
59
+ tsv = TSV.new(File.open(filename), :sep => /\s+/)
60
+ assert_equal content, tsv.to_s(%w(row1 row2)).sub(/^#: [^\n]*\n/s,'')
61
+ assert_not_equal content, tsv.to_s(%w(row2 row1)).sub(/^#: [^\n]*\n/s,'')
62
+ assert_equal content2, tsv.to_s(%w(row2 row1)).sub(/^#: [^\n]*\n/s,'')
63
+ end
64
+ end
65
+
66
+ def test_field_compare
67
+ content =<<-EOF
68
+ #Id Letter:LetterValue Other:LetterValue OtherID
69
+ row1 a|aa|aaa b Id1|Id2
70
+ row2 A B Id3
71
+ row3 a C Id4
72
+ EOF
73
+
74
+ TmpFile.with_file(content) do |filename|
75
+ tsv = TSV.new(filename + '#:sep=/\s+/')
76
+
77
+ assert tsv.fields.include?("LetterValue")
78
+ end
79
+ end
80
+
81
+ def test_indentify_fields
82
+ content =<<-EOF
83
+ #ID ValueA ValueB Comment
84
+ row1 a b c
85
+ row2 A B C
86
+ EOF
87
+
88
+ TmpFile.with_file(content) do |filename|
89
+ tsv = TSV.new(File.open(filename), :double, :sep => /\s/)
90
+ assert_equal :key, tsv.identify_field("ID")
91
+ end
92
+ end
93
+
94
+ def test_named_fields
95
+ content =<<-EOF
96
+ #ID ValueA ValueB Comment
97
+ row1 a b c
98
+ row2 A B C
99
+ EOF
100
+
101
+ TmpFile.with_file(content) do |filename|
102
+ tsv = TSV.new(File.open(filename), :double, :sep => /\s/)
103
+
104
+ assert_equal "ValueA", tsv.fields["ValueA"]
105
+ end
106
+ end
107
+
108
+ end
109
+
@@ -0,0 +1,271 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/util/tsv'
3
+ require 'rbbt/util/tsv/attach'
4
+ require 'rbbt'
5
+
6
+ class TestAttach < Test::Unit::TestCase
7
+ def test_attach_same_key
8
+ content1 =<<-EOF
9
+ #Id ValueA ValueB
10
+ row1 a|aa|aaa b
11
+ row2 A B
12
+ EOF
13
+
14
+ content2 =<<-EOF
15
+ #ID ValueB OtherID
16
+ row1 b Id1|Id2
17
+ row3 B Id3
18
+ EOF
19
+
20
+ tsv1 = tsv2 = nil
21
+ TmpFile.with_file(content1) do |filename|
22
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/)
23
+ end
24
+
25
+ TmpFile.with_file(content2) do |filename|
26
+ tsv2 = TSV.new(File.open(filename), :double, :sep => /\s+/)
27
+ end
28
+
29
+ tsv1.attach_same_key tsv2, "OtherID"
30
+
31
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
32
+ assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
33
+
34
+ TmpFile.with_file(content1) do |filename|
35
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/)
36
+ end
37
+
38
+ tsv1.attach_same_key tsv2
39
+
40
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
41
+
42
+ tsv1 = tsv2 = nil
43
+ TmpFile.with_file(content1) do |filename|
44
+ tsv1 = TSV.new(File.open(filename), :list, :sep => /\s+/)
45
+ end
46
+
47
+ TmpFile.with_file(content2) do |filename|
48
+ tsv2 = TSV.new(File.open(filename), :double, :sep => /\s+/)
49
+ end
50
+
51
+ tsv1.attach_same_key tsv2, "OtherID"
52
+
53
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
54
+ assert_equal "Id1", tsv1["row1"]["OtherID"]
55
+ end
56
+
57
+ def test_attach_source_field
58
+ content1 =<<-EOF
59
+ #Id ValueA ValueB
60
+ row1 a|aa|aaa b
61
+ row2 A B
62
+ EOF
63
+
64
+ content2 =<<-EOF
65
+ #ValueB OtherID
66
+ b Id1|Id2
67
+ B Id3
68
+ EOF
69
+
70
+ tsv1 = tsv2 = nil
71
+ TmpFile.with_file(content1) do |filename|
72
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/)
73
+ end
74
+
75
+ TmpFile.with_file(content2) do |filename|
76
+ tsv2 = TSV.new(File.open(filename), :double, :sep => /\s+/)
77
+ end
78
+
79
+ tsv1.attach_source_key tsv2, "ValueB"
80
+
81
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
82
+ assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
83
+
84
+ TmpFile.with_file(content1) do |filename|
85
+ tsv1 = TSV.new(File.open(filename), :list, :sep => /\s+/)
86
+ end
87
+
88
+ tsv1.attach_source_key tsv2, "ValueB"
89
+
90
+
91
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
92
+ assert_equal "Id1", tsv1["row1"]["OtherID"]
93
+ end
94
+
95
+ def test_attach_index
96
+ content1 =<<-EOF
97
+ #Id ValueA ValueB
98
+ row1 a|aa|aaa b
99
+ row2 A B
100
+ EOF
101
+
102
+ content2 =<<-EOF
103
+ #ValueE OtherID
104
+ e Id1|Id2
105
+ E Id3
106
+ EOF
107
+
108
+ content_index =<<-EOF
109
+ #Id ValueE
110
+ row1 e
111
+ row2 E
112
+ EOF
113
+
114
+ tsv1 = tsv2 = index = nil
115
+ TmpFile.with_file(content1) do |filename|
116
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/)
117
+ end
118
+
119
+ TmpFile.with_file(content2) do |filename|
120
+ tsv2 = TSV.new(File.open(filename), :double, :sep => /\s+/)
121
+ end
122
+
123
+ TmpFile.with_file(content_index) do |filename|
124
+ index = TSV.new(File.open(filename), :flat, :sep => /\s+/)
125
+ end
126
+
127
+ tsv1.attach_index tsv2, index
128
+
129
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
130
+ assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
131
+
132
+ TmpFile.with_file(content1) do |filename|
133
+ tsv1 = TSV.new(File.open(filename), :list, :sep => /\s+/)
134
+ end
135
+
136
+ tsv1.attach_index tsv2, index
137
+
138
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
139
+ assert_equal "Id1", tsv1["row1"]["OtherID"]
140
+ end
141
+
142
+ def test_attach
143
+ content1 =<<-EOF
144
+ #Id ValueA ValueB
145
+ row1 a|aa|aaa b
146
+ row2 A B
147
+ EOF
148
+
149
+ content2 =<<-EOF
150
+ #Id ValueB OtherID
151
+ row1 b Id1|Id2
152
+ row3 B Id3
153
+ EOF
154
+
155
+ content3 =<<-EOF
156
+ #ValueB OtherID
157
+ b Id1|Id2
158
+ B Id3
159
+ EOF
160
+ tsv1 = tsv2 = tsv3 = nil
161
+ TmpFile.with_file(content1) do |filename|
162
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/)
163
+ end
164
+
165
+ TmpFile.with_file(content2) do |filename|
166
+ tsv2 = TSV.new(File.open(filename), :double, :sep => /\s+/)
167
+ end
168
+
169
+ TmpFile.with_file(content3) do |filename|
170
+ tsv3 = TSV.new(File.open(filename), :double, :sep => /\s+/)
171
+ end
172
+
173
+ tsv1.attach tsv2, "OtherID"
174
+
175
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
176
+ assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
177
+
178
+ TmpFile.with_file(content1) do |filename|
179
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/)
180
+ end
181
+
182
+ tsv1.attach tsv3
183
+
184
+ assert_equal %w(ValueA ValueB OtherID), tsv1.fields
185
+ assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
186
+
187
+ end
188
+
189
+ def test_attach_using_index
190
+ content1 =<<-EOF
191
+ #Id ValueA ValueB
192
+ row1 a|aa|aaa b
193
+ row2 A B
194
+ EOF
195
+
196
+ content2 =<<-EOF
197
+ #ValueE OtherID
198
+ e Id1|Id2
199
+ E Id3
200
+ EOF
201
+
202
+ content_index =<<-EOF
203
+ #: :sep=/\\s+/
204
+ #Id ValueE
205
+ row1 e
206
+ row2 E
207
+ EOF
208
+
209
+
210
+ require 'rbbt/sources/organism'
211
+
212
+ Rbbt.claim "data", StringIO.new(content1), "Test1"
213
+ Rbbt.claim "data", StringIO.new(content2), "Test2"
214
+ Rbbt.claim "identifiers", StringIO.new(content_index), "Test2"
215
+
216
+ tsv1 = tsv2 = nil
217
+
218
+ tsv1 = Rbbt.files.Test1.data.tsv :double, :sep => /\s+/
219
+ tsv2 = Rbbt.files.Test2.data.tsv :double, :sep => /\s+/
220
+
221
+ tsv2.identifiers = Rbbt.files.Test2.identifiers
222
+
223
+ tsv1.attach tsv2, "OtherID", :in_namespace => false
224
+
225
+ assert_equal tsv1.fields,%w(ValueA ValueB OtherID)
226
+ assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
227
+
228
+ end
229
+
230
+ def test_find_path
231
+ content1 =<<-EOF
232
+ #: :sep=/\\s+/#:case_insensitive=false
233
+ #Id ValueA ValueB
234
+ row1 a|aa|aaa b
235
+ row2 A B
236
+ EOF
237
+
238
+ content2 =<<-EOF
239
+ #: :sep=/\\s+/#:case_insensitive=false
240
+ #OtherID ValueE
241
+ Id1|Id2 e
242
+ Id3 E
243
+ EOF
244
+
245
+ content_identifiers =<<-EOF
246
+ #: :sep=/\\s+/#:case_insensitive=false
247
+ #Id ValueE
248
+ row1 e
249
+ row2 E
250
+ EOF
251
+
252
+ tsv1 = tsv2 = identifiers = nil
253
+ TmpFile.with_file(content1) do |filename|
254
+ tsv1 = TSV.new(File.open(filename), :key => "Id")
255
+ end
256
+
257
+ TmpFile.with_file(content2) do |filename|
258
+ tsv2 = TSV.new(File.open(filename), :double)
259
+ end
260
+
261
+ TmpFile.with_file(content_identifiers) do |filename|
262
+ identifiers = TSV.new(File.open(filename), :flat, :sep => /\s+/)
263
+ end
264
+
265
+ tsv1.identifiers = identifiers
266
+ tsv1.attach tsv2
267
+
268
+ assert_equal %w(ValueA ValueB ValueE), tsv1.fields
269
+ end
270
+ end
271
+
@@ -0,0 +1,158 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/util/tsv'
3
+ require 'rbbt/util/tsv/index'
4
+
5
+ class TestTSVManipulate < Test::Unit::TestCase
6
+
7
+ def test_index
8
+ content =<<-EOF
9
+ #Id ValueA ValueB OtherID
10
+ row1 a|aa|aaa b Id1|Id2
11
+ row2 A B Id3
12
+ EOF
13
+
14
+ TmpFile.with_file(content) do |filename|
15
+ tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID", :persistence => false)
16
+ index = tsv.index(:case_insensitive => true, :persistence => true)
17
+ assert index["row1"].include? "Id1"
18
+ assert_equal "OtherID", index.fields.first
19
+ end
20
+
21
+ # TmpFile.with_file(content) do |filename|
22
+ # tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID")
23
+ # index = tsv.index(:case_insensitive => true)
24
+ # assert index["row1"].include? "Id1"
25
+ # assert_equal "OtherID", index.fields.first
26
+ # end
27
+ end
28
+
29
+ def test_index_headerless
30
+ content =<<-EOF
31
+ row1 a|aa|aaa b Id1|Id2
32
+ row2 A B Id3
33
+ EOF
34
+
35
+ TmpFile.with_file(content) do |filename|
36
+ tsv = TSV.new(File.open(filename), :sep => /\s+/)
37
+ index = tsv.index(:case_insensitive => true, :target => 2)
38
+ assert index["row1"].include? "Id1"
39
+ end
40
+ end
41
+
42
+
43
+ def test_best_index
44
+ content =<<-EOF
45
+ #Id ValueA ValueB OtherID
46
+ row1 a|aa|aaa b|A Id1
47
+ row2 A a|B Id3
48
+ row3 A a|B Id4
49
+ EOF
50
+
51
+ TmpFile.with_file(content) do |filename|
52
+ tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID", :persistence => true)
53
+ index = tsv.index(:case_insensitive => false, :order => true)
54
+ ddd index
55
+ assert_equal "Id1", index['a'].first
56
+ assert_equal "Id3", index['A'].first
57
+ assert_equal "OtherID", index.fields.first
58
+ end
59
+
60
+ TmpFile.with_file(content) do |filename|
61
+ tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID")
62
+ index = tsv.index(:case_insensitive => true)
63
+ assert index["row1"].include? "Id1"
64
+ assert_equal "OtherID", index.fields.first
65
+ end
66
+ end
67
+
68
+ #{{{ Test Attach
69
+
70
+ def ___test_smart_merge_single
71
+ content1 =<<-EOF
72
+ #Id ValueA ValueB
73
+ row1 a|aa|aaa b
74
+ row2 A B
75
+ EOF
76
+
77
+ content2 =<<-EOF
78
+ #ValueC ValueB OtherID
79
+ c|cc|ccc b Id1|Id2
80
+ C B Id3
81
+ EOF
82
+
83
+ tsv1 = tsv2 = nil
84
+ TmpFile.with_file(content1) do |filename|
85
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/)
86
+ end
87
+
88
+ TmpFile.with_file(content2) do |filename|
89
+ tsv2 = TSV.new(File.open(filename), :double, :sep => /\s+/)
90
+ end
91
+
92
+ tsv1 = tsv1.smart_merge tsv2, "ValueB"
93
+
94
+ assert_equal "C", tsv1["row2"]["ValueC"]
95
+ assert %w(c cc ccc).include? tsv1["row1"]["ValueC"]
96
+ assert_equal "Id1", tsv1["row1"]["OtherID"]
97
+ end
98
+
99
+ def test_index_to_key
100
+ content =<<-EOF
101
+ #: :sep=/\\s+/
102
+ #Id ValueA ValueB
103
+ row1 a|aa|aaa b
104
+ row2 A B
105
+ EOF
106
+
107
+ tsv1 = tsv2 = nil
108
+ TmpFile.with_file(content) do |filename|
109
+ tsv1 = TSV.new(File.open(filename), :double, :sep => /\s+/, :key => "ValueA", :case_insensitive => true)
110
+ end
111
+ end
112
+
113
+ # {{{ Test sorted index
114
+
115
+ def load_data(data)
116
+ Log.debug("Data:\n#{Open.read(data)}")
117
+ tsv = TSV.new(data, :list, :sep=>":", :cast => proc{|e| e =~ /(\s*)(_*)/; ($1.length..($1.length + $2.length - 1))})
118
+ tsv.add_field "Start" do |key, values|
119
+ values["Range"].first
120
+ end
121
+ tsv.add_field "End" do |key, values|
122
+ values["Range"].last
123
+ end
124
+
125
+ ddd tsv.fields
126
+ tsv = tsv.slice ["Start", "End"]
127
+ ddd tsv.fields
128
+
129
+ tsv
130
+ end
131
+
132
+ def test_sorted_index
133
+ data =<<-EOF
134
+ #ID:Range
135
+ #:012345678901234567890
136
+ a: ______
137
+ b: ______
138
+ c: _______
139
+ d: ____
140
+ e: ______
141
+ f: ___
142
+ g: ____
143
+ EOF
144
+ TmpFile.with_file(data) do |datafile|
145
+ tsv = load_data(datafile)
146
+ f = tsv.sorted_index
147
+
148
+ assert_equal %w(), f[0].sort
149
+ assert_equal %w(b), f[1].sort
150
+ assert_equal %w(), f[20].sort
151
+ assert_equal %w(), f[(20..100)].sort
152
+ assert_equal %w(a b d), f[3].sort
153
+ assert_equal %w(a b c d e), f[(3..4)].sort
154
+ end
155
+ end
156
+
157
+ end
158
+