bio 1.4.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. data/ChangeLog +1712 -0
  2. data/KNOWN_ISSUES.rdoc +11 -1
  3. data/README.rdoc +3 -2
  4. data/RELEASE_NOTES.rdoc +65 -127
  5. data/bioruby.gemspec +38 -2
  6. data/doc/RELEASE_NOTES-1.4.0.rdoc +167 -0
  7. data/doc/Tutorial.rd +74 -16
  8. data/doc/Tutorial.rd.html +68 -16
  9. data/lib/bio.rb +2 -0
  10. data/lib/bio/appl/clustalw/report.rb +18 -0
  11. data/lib/bio/appl/paml/codeml/report.rb +579 -21
  12. data/lib/bio/command.rb +149 -21
  13. data/lib/bio/db/aaindex.rb +11 -1
  14. data/lib/bio/db/embl/sptr.rb +1 -1
  15. data/lib/bio/db/fasta/defline.rb +7 -2
  16. data/lib/bio/db/fasta/qual.rb +24 -0
  17. data/lib/bio/db/fasta/qual_to_biosequence.rb +29 -0
  18. data/lib/bio/db/fastq.rb +15 -0
  19. data/lib/bio/db/go.rb +2 -2
  20. data/lib/bio/db/kegg/common.rb +109 -5
  21. data/lib/bio/db/kegg/genes.rb +61 -15
  22. data/lib/bio/db/kegg/genome.rb +43 -38
  23. data/lib/bio/db/kegg/module.rb +158 -0
  24. data/lib/bio/db/kegg/orthology.rb +40 -1
  25. data/lib/bio/db/kegg/pathway.rb +254 -0
  26. data/lib/bio/db/medline.rb +6 -2
  27. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  28. data/lib/bio/location.rb +39 -0
  29. data/lib/bio/reference.rb +24 -0
  30. data/lib/bio/sequence.rb +2 -0
  31. data/lib/bio/sequence/adapter.rb +1 -0
  32. data/lib/bio/sequence/format.rb +14 -0
  33. data/lib/bio/sequence/sequence_masker.rb +95 -0
  34. data/lib/bio/tree.rb +4 -4
  35. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +5 -0
  36. data/lib/bio/version.rb +1 -1
  37. data/setup.rb +5 -0
  38. data/test/data/KEGG/K02338.orthology +180 -52
  39. data/test/data/KEGG/M00118.module +44 -0
  40. data/test/data/KEGG/T00005.genome +140 -0
  41. data/test/data/KEGG/T00070.genome +34 -0
  42. data/test/data/KEGG/b0529.gene +47 -0
  43. data/test/data/KEGG/ec00072.pathway +23 -0
  44. data/test/data/KEGG/hsa00790.pathway +59 -0
  45. data/test/data/KEGG/ko00312.pathway +16 -0
  46. data/test/data/KEGG/map00030.pathway +37 -0
  47. data/test/data/KEGG/map00052.pathway +13 -0
  48. data/test/data/KEGG/rn00250.pathway +114 -0
  49. data/test/data/clustalw/example1.aln +58 -0
  50. data/test/data/go/selected_component.ontology +12 -0
  51. data/test/data/go/selected_gene_association.sgd +31 -0
  52. data/test/data/go/selected_wikipedia2go +13 -0
  53. data/test/data/medline/20146148_modified.medline +54 -0
  54. data/test/data/paml/codeml/models/aa.aln +26 -0
  55. data/test/data/paml/codeml/models/aa.dnd +13 -0
  56. data/test/data/paml/codeml/models/aa.ph +13 -0
  57. data/test/data/paml/codeml/models/alignment.phy +49 -0
  58. data/test/data/paml/codeml/models/results0-3.txt +312 -0
  59. data/test/data/paml/codeml/models/results7-8.txt +340 -0
  60. data/test/functional/bio/io/test_togows.rb +8 -8
  61. data/test/functional/bio/test_command.rb +7 -6
  62. data/test/unit/bio/appl/clustalw/test_report.rb +80 -0
  63. data/test/unit/bio/appl/paml/codeml/test_rates.rb +6 -6
  64. data/test/unit/bio/appl/paml/codeml/test_report.rb +231 -24
  65. data/test/unit/bio/appl/paml/codeml/test_report_single.rb +46 -0
  66. data/test/unit/bio/db/embl/test_sptr.rb +1 -1
  67. data/test/unit/bio/db/fasta/test_defline.rb +160 -0
  68. data/test/unit/bio/db/fasta/test_defline_misc.rb +490 -0
  69. data/test/unit/bio/db/kegg/test_genes.rb +281 -1
  70. data/test/unit/bio/db/kegg/test_genome.rb +408 -0
  71. data/test/unit/bio/db/kegg/test_module.rb +246 -0
  72. data/test/unit/bio/db/kegg/test_orthology.rb +95 -0
  73. data/test/unit/bio/db/kegg/test_pathway.rb +1250 -0
  74. data/test/unit/bio/db/test_aaindex.rb +8 -7
  75. data/test/unit/bio/db/test_fastq.rb +36 -0
  76. data/test/unit/bio/db/test_go.rb +171 -0
  77. data/test/unit/bio/db/test_medline.rb +148 -0
  78. data/test/unit/bio/db/test_qual.rb +9 -2
  79. data/test/unit/bio/sequence/test_sequence_masker.rb +169 -0
  80. data/test/unit/bio/test_tree.rb +260 -1
  81. data/test/unit/bio/util/test_contingency_table.rb +7 -7
  82. metadata +53 -6
@@ -0,0 +1,80 @@
1
+ #
2
+ # test/unit/bio/appl/clustalw/test_report.rb - Unit test for Bio::ClustalW::Report
3
+ #
4
+ # Copyright:: Copyright (C) 2010 Pjotr Prins <pjotr.prins@thebird.nl>
5
+ # License:: The Ruby License
6
+ #
7
+
8
+ require 'pathname'
9
+ load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
10
+ 'bioruby_test_helper.rb')).cleanpath.to_s
11
+ require 'test/unit'
12
+ require 'bio/appl/clustalw/report'
13
+
14
+ module Bio
15
+
16
+ class TestClustalWReport < Test::Unit::TestCase
17
+
18
+ def setup
19
+ test_data_path = Pathname.new(File.join(BioRubyTestDataPath, 'clustalw')).cleanpath.to_s
20
+ aln_filename = File.join(test_data_path, 'example1.aln')
21
+ text = File.read(aln_filename)
22
+ @aln = Bio::ClustalW::Report.new(text)
23
+ end
24
+
25
+ # CLUSTAL 2.0.9 multiple sequence alignment
26
+ #
27
+ #
28
+ # The alignment reads like:
29
+ #
30
+ # query -MKNTLLKLGVCVSLLGITPFVSTISSVQAERTVEHKVIKNETGTISISQ
31
+ # gi|115023|sp|P10425| MKKNTLLKVGLCVSLLGTTQFVSTISSVQASQKVEQIVIKNETGTISISQ
32
+ # .: :
33
+ #
34
+ # query LNKNVWVHTELGYFSG-EAVPSNGLVLNTSKGLVLVDSSWDDKLTKELIE
35
+ # gi|115023|sp|P10425| LNKNVWVHTELGYFNG-EAVPSNGLVLNTSKGLVLVDSSWDNKLTKELIE
36
+ # *: . . **. . .. ::*: . * :
37
+
38
+ def test_header
39
+ assert_equal('CLUSTAL 2.0.9 multiple sequence alignment',@aln.header)
40
+ end
41
+
42
+ def test_sequences
43
+ seq = @aln.get_sequence(0)
44
+ assert_equal('query',seq.definition)
45
+ assert_equal("-MKNTLLKLGVCVSLLGITPFVSTISSVQAERTVEHKVIKNETGTISISQLNKNVWVHTELGYFSG-EAVPSNGLVLNTSKGLVLVDSSWDDKLTKELIEMVEKKFKKRVTDVIITHAHADRIGGMKTLKERGIKAHSTALTAELAKKNG--------------------YEEPLGDLQSVTNLKFGN----MKVETFYPGKGHTEDNIVVWLPQYQILAGGCLVKSASSKDLGNVADAYVNEWSTSIENVLKRYGNINLVVPGHGEVGDR-----GLLLHTLDLLK---------------------------------------------------------------------",seq.to_s)
46
+ seq = @aln.get_sequence(1)
47
+ assert_equal('gi|115023|sp|P10425|',seq.definition)
48
+ assert_equal("MKKNTLLKVGLCVSLLGTTQFVSTISSVQASQKVEQIVIKNETGTISISQLNKNVWVHTELGYFNG-EAVPSNGLVLNTSKGLVLVDSSWDNKLTKELIEMVEKKFQKRVTDVIITHAHADRIGGITALKERGIKAHSTALTAELAKKSG--------------------YEEPLGDLQTVTNLKFGN----TKVETFYPGKGHTEDNIVVWLPQYQILAGGCLVKSAEAKNLGNVADAYVNEWSTSIENMLKRYRNINLVVPGHGKVGDK-----GLLLHTLDLLK---------------------------------------------------------------------",seq.to_s)
49
+ end
50
+
51
+ def test_alignment
52
+ assert_equal("???????????SN?????????????D??????????L??????????????????H?H?D",@aln.alignment.consensus[60..120])
53
+ end
54
+
55
+ def test_match_line
56
+ assert_equal(" .: : *: . . **. . .. ::*: . * : : . .: .* * * * : * . : . . * : .: . .: .*: ::***:* .:* .* :: . . ::.: * : . " ,@aln.match_line)
57
+ end
58
+
59
+ end # class TestClustalwFormat
60
+
61
+ class TestClustalWReportWith2ndArgument < Test::Unit::TestCase
62
+
63
+ def setup
64
+ aln_filename = File.join(BioRubyTestDataPath, 'clustalw',
65
+ 'example1.aln')
66
+ text = File.read(aln_filename)
67
+ @aln = Bio::ClustalW::Report.new(text, "PROTEIN")
68
+ end
69
+
70
+ def test_sequences
71
+ seq = @aln.get_sequence(0)
72
+ assert_equal('query',seq.definition)
73
+ assert_equal("-MKNTLLKLGVCVSLLGITPFVSTISSVQAERTVEHKVIKNETGTISISQLNKNVWVHTELGYFSG-EAVPSNGLVLNTSKGLVLVDSSWDDKLTKELIEMVEKKFKKRVTDVIITHAHADRIGGMKTLKERGIKAHSTALTAELAKKNG--------------------YEEPLGDLQSVTNLKFGN----MKVETFYPGKGHTEDNIVVWLPQYQILAGGCLVKSASSKDLGNVADAYVNEWSTSIENVLKRYGNINLVVPGHGEVGDR-----GLLLHTLDLLK---------------------------------------------------------------------",seq.to_s)
74
+ seq = @aln.get_sequence(1)
75
+ assert_equal('gi|115023|sp|P10425|',seq.definition)
76
+ assert_equal("MKKNTLLKVGLCVSLLGTTQFVSTISSVQASQKVEQIVIKNETGTISISQLNKNVWVHTELGYFNG-EAVPSNGLVLNTSKGLVLVDSSWDNKLTKELIEMVEKKFQKRVTDVIITHAHADRIGGITALKERGIKAHSTALTAELAKKSG--------------------YEEPLGDLQTVTNLKFGN----TKVETFYPGKGHTEDNIVVWLPQYQILAGGCLVKSAEAKNLGNVADAYVNEWSTSIENMLKRYRNINLVVPGHGKVGDK-----GLLLHTLDLLK---------------------------------------------------------------------",seq.to_s)
77
+ end
78
+
79
+ end #class TestClustalWReportWith2ndArgument
80
+ end
@@ -31,15 +31,15 @@ class TestCodemlRates < Test::Unit::TestCase
31
31
  end
32
32
 
33
33
  def test_rates_hundred_and_fiftieth_position
34
- assert('GGGG', @example_rates[149][:data])
35
- assert(0.828, @example_rates[149][:rate])
36
- assert(9, @example_rates[149][:freq])
34
+ assert_equal('GGGG', @example_rates[150][:data])
35
+ assert_equal(0.828, @example_rates[150][:rate])
36
+ assert_equal(8, @example_rates[150][:freq])
37
37
  end
38
38
 
39
39
  def test_rates_last_position
40
- assert('PHPP', @example_rates.last[:data])
41
- assert(1.752, @example_rates.last[:rate])
42
- assert(1, @example_rates.last[:freq])
40
+ assert_equal('PHPP', @example_rates.last[:data])
41
+ assert_equal(1.752, @example_rates.last[:rate])
42
+ assert_equal(1, @example_rates.last[:freq])
43
43
  end
44
44
  end
45
45
 
@@ -1,7 +1,10 @@
1
1
  #
2
- # test/unit/bio/appl/paml/codeml/test_report.rb - Unit test for Bio::PAML::Codeml::Report
2
+ # = test/unit/bio/appl/paml/codeml/test_report.rb - Unit tests for Codeml report parser
3
+ #
4
+ # Copyright:: Copyright (C) 2008-2010
5
+ # Michael D. Barton <mail@michaelbarton.me.uk>,
6
+ # Pjotr Prins <pjotr.prins@thebird.nl>
3
7
  #
4
- # Copyright:: Copyright (C) 2008 Michael D. Barton <mail@michaelbarton.me.uk>
5
8
  # License:: The Ruby License
6
9
  #
7
10
 
@@ -14,33 +17,237 @@ load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5,
14
17
  require 'test/unit'
15
18
  require 'bio/appl/paml/codeml/report'
16
19
 
17
- module Bio; module TestPAMLCodeml
18
- class TestCodemlReport < Test::Unit::TestCase
20
+ module Bio
21
+
22
+ # The test code is copied from the examples of Bio::PAML::Codeml::Report
23
+ # described in lib/bio/appl/paml/codeml/report.rb.
24
+ module TestPAMLCodemlReportWithModels
25
+
26
+ FILENAME_M0M3 = File.join(BioRubyTestDataPath,
27
+ 'paml/codeml/models/results0-3.txt')
28
+
29
+ class TestCodemlReportM0M3 < Test::Unit::TestCase
30
+
31
+ def setup
32
+ buf = File.read(FILENAME_M0M3)
33
+ @c = Bio::PAML::Codeml::Report.new(buf)
34
+ end
35
+
36
+ # Invoke Bioruby's PAML codeml parser, after having read the contents
37
+ # of the codeml result file into _buf_ (for example using File.read)
38
+ def test_initialize
39
+ assert_instance_of(Bio::PAML::Codeml::Report, @c)
40
+ end
41
+
42
+ # Do we have two models?
43
+ def test_models
44
+ assert_equal(2, @c.models.size)
45
+ assert_equal("M0", @c.models[0].name)
46
+ assert_equal("M3", @c.models[1].name)
47
+ end
48
+
49
+ # Check the general information
50
+ def test_num_sequences
51
+ assert_equal(6, @c.num_sequences)
52
+ end
53
+
54
+ def test_num_codons
55
+ assert_equal(134, @c.num_codons)
56
+ end
57
+
58
+ def test_descr
59
+ assert_equal("M0-3", @c.descr)
60
+ end
61
+
62
+ # Test whether the second model M3 is significant over M0
63
+ def test_significant
64
+ assert_equal(true, @c.significant)
65
+ end
66
+
67
+ # Next take the overall posterior analysis
68
+ def test_nb_sites
69
+ assert_equal(44, @c.nb_sites.size)
70
+ assert_equal([17, "I", 0.988, 3.293], @c.nb_sites[0].to_a)
71
+ end
72
+
73
+ # We also provide the raw buffers to adhere to the principle of
74
+ # unexpected use. Test the raw buffers for content:
75
+ def test_header
76
+ assert_equal(1, @c.header.to_s =~ /seed/)
77
+ end
78
+
79
+ def test_footer
80
+ assert_equal(16, @c.footer.to_s =~ /Bayes/)
81
+ end
82
+
83
+ end #class TestCodemlReportM0M3
84
+
85
+ class TestCodemlModelM0M3 < Test::Unit::TestCase
86
+
87
+ # Now fetch the results of the first model M0, and check its values
88
+ def setup
89
+ buf = File.read(FILENAME_M0M3)
90
+ c = Bio::PAML::Codeml::Report.new(buf)
91
+ @m0 = c.models[0]
92
+ @m3 = c.models[1]
93
+ end
94
+
95
+ def test_tree_length
96
+ assert_equal(1.90227, @m0.tree_length)
97
+ end
98
+
99
+ def test_lnL
100
+ assert_equal(-1125.800375, @m0.lnL)
101
+ end
102
+
103
+ def test_omega
104
+ assert_equal(0.58589, @m0.omega)
105
+ end
106
+
107
+ def test_dN_dS
108
+ assert_equal(0.58589, @m0.dN_dS)
109
+ end
110
+
111
+ def test_kappa
112
+ assert_equal(2.14311, @m0.kappa)
113
+ end
114
+
115
+ def test_alpha
116
+ assert_equal(nil, @m0.alpha)
117
+ end
118
+
119
+ # We also have a tree (as a string)
120
+ def test_tree
121
+ str = "((((PITG_23265T0: 0.000004, PITG_23253T0: 0.400074): 0.000004, PITG_23257T0: 0.952614): 0.000004, PITG_23264T0: 0.445507): 0.000004, PITG_23267T0: 0.011814, PITG_23293T0: 0.092242);"
122
+ assert_equal(str, @m0.tree)
123
+ end
124
+
125
+ # Check the M3 and its specific values
126
+ def test_m3_lnL
127
+ assert_equal(-1070.964046, @m3.lnL)
128
+ end
129
+
130
+ def test_m3_classes
131
+ assert_equal(3, @m3.classes.size)
132
+ assert_equal({:w=>0.00928, :p=>0.56413}, @m3.classes[0])
133
+ end
134
+
135
+ def test_m3_tree
136
+ str = "((((PITG_23265T0: 0.000004, PITG_23253T0: 0.762597): 0.000004, PITG_23257T0: 2.721710): 0.000004, PITG_23264T0: 0.924326): 0.014562, PITG_23267T0: 0.000004, PITG_23293T0: 0.237433);"
137
+ assert_equal(str, @m3.tree)
138
+ end
139
+
140
+ def test_to_s
141
+ assert_equal(3, @m0.to_s =~ /one-ratio/)
142
+ end
143
+
144
+ def test_m3_to_s
145
+ assert_equal(3, @m3.to_s =~ /discrete/)
146
+ end
147
+
148
+ end #class TestCodemlModelM0M3
149
+
150
+ class TestCodemlPositiveSiteM0M3 < Test::Unit::TestCase
151
+
152
+ def setup
153
+ buf = File.read(FILENAME_M0M3)
154
+ c = Bio::PAML::Codeml::Report.new(buf)
155
+ @codon = c.nb_sites[0]
156
+ end
157
+
158
+ def test_position
159
+ assert_equal(17, @codon.position)
160
+ end
161
+
162
+ def test_probability
163
+ assert_equal(0.988, @codon.probability)
164
+ end
165
+
166
+ def test_dN_dS
167
+ assert_equal(3.293, @codon.dN_dS)
168
+ end
169
+
170
+ # with aliases
171
+ def test_p
172
+ assert_equal(0.988, @codon.p)
173
+ end
174
+
175
+ def test_w
176
+ assert_equal(3.293, @codon.w)
177
+ end
178
+
179
+ end #class TestCodemlPositiveSiteM0M3
180
+
181
+ class TestCodemlPositiveSitesM0M3 < Test::Unit::TestCase
182
+
183
+ def setup
184
+ buf = File.read(FILENAME_M0M3)
185
+ c = Bio::PAML::Codeml::Report.new(buf)
186
+ @nb_sites = c.nb_sites
187
+ end
188
+
189
+ # Now we generate special string 'graph' for positive selection. The
190
+ # following returns a string the length of the input alignment and
191
+ # shows the locations of positive selection:
192
+ def test_graph
193
+ str = " ** * * *"
194
+ assert_equal(str, @nb_sites.graph[0..32])
195
+ end
196
+
197
+ # And with dN/dS (high values are still an asterisk *)
198
+ def test_graph_omega
199
+ str = " 3* 6 6 2"
200
+ assert_equal(str, @nb_sites.graph_omega[0..32])
201
+ end
202
+ end #class TestCodemlPositiveSitesM0M3
203
+
204
+ # Finally we do a test on an M7+M8 run.
205
+ FILENAME_M7M8 = File.join(BioRubyTestDataPath,
206
+ 'paml/codeml/models/results7-8.txt')
207
+
19
208
 
20
- TEST_DATA = Pathname.new(File.join(BioRubyTestDataPath, 'paml', 'codeml')).cleanpath.to_s
209
+ class TestCodemlReportM7M8 < Test::Unit::TestCase
21
210
 
22
- def setup
23
- str = File.read(File.join(TEST_DATA, 'output.txt'))
24
- @example_report = Bio::PAML::Codeml::Report.new(str)
25
- end
211
+ def setup
212
+ buf = File.read(FILENAME_M7M8)
213
+ @c = Bio::PAML::Codeml::Report.new(buf)
214
+ end
26
215
 
27
- def test_tree_log_likelihood
28
- assert_equal(-1817.465211, @example_report.tree_log_likelihood)
29
- end
216
+ # Do we have two models?
217
+ def test_models
218
+ assert_equal(2, @c.models.size)
219
+ assert_equal("M7", @c.models[0].name)
220
+ assert_equal("M8", @c.models[1].name)
221
+ end
30
222
 
31
- def test_tree_length
32
- assert_equal(0.77902, @example_report.tree_length)
33
- end
223
+ # Assert the results are significant
224
+ def test_significant
225
+ assert_equal(true, @c.significant)
226
+ end
34
227
 
35
- def test_alpha
36
- assert_equal(0.58871, @example_report.alpha)
37
- end
228
+ # Compared to M0/M3 there are some differences. The important ones
229
+ # are the parameters and the full Bayesian result available for M7/M8.
230
+ # This is the naive Bayesian result:
231
+ def test_nb_sites
232
+ assert_equal(10, @c.nb_sites.size)
233
+ end
38
234
 
39
- def test_tree
40
- tree = "(((rabbit: 0.082889, rat: 0.187866): 0.038008, human: 0.055050): 0.033639, goat-cow: 0.096992, marsupial: 0.284574);"
41
- assert_equal(tree, @example_report.tree)
42
- end
235
+ # And this is the full Bayesian result:
236
+ def test_sites
237
+ assert_equal(30, @c.sites.size)
238
+ array = [17, "I", 0.672, 2.847]
239
+ assert_equal(array, @c.sites[0].to_a)
240
+ str = " ** * * *"
241
+ assert_equal(str, @c.sites.graph[0..32])
242
+
243
+ # Note the differences of omega with earlier M0-M3 naive Bayesian
244
+ # analysis:
245
+ str2 = " 24 3 3 2"
246
+ assert_equal(str2, @c.sites.graph_omega[0..32])
247
+ # The locations are the same, but the omega differs.
248
+ end
249
+ end #class TestCodemlReportM7M8
43
250
 
44
- end
251
+ end #module TestPAMLCodemlReportWithModels
252
+ end #module Bio
45
253
 
46
- end; end #module TestPAMLCodeml; module Bio
@@ -0,0 +1,46 @@
1
+ #
2
+ # test/unit/bio/appl/paml/codeml/test_report_single.rb - Unit test for Bio::PAML::Codeml::Report
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Michael D. Barton <mail@michaelbarton.me.uk>
5
+ # License:: The Ruby License
6
+ #
7
+
8
+ # loading helper routine for testing bioruby
9
+ require 'pathname'
10
+ load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5,
11
+ 'bioruby_test_helper.rb')).cleanpath.to_s
12
+
13
+ # libraries needed for the tests
14
+ require 'test/unit'
15
+ require 'bio/appl/paml/codeml/report'
16
+
17
+ module Bio; module TestPAMLCodeml
18
+ class TestCodemlReport < Test::Unit::TestCase
19
+
20
+ TEST_DATA = Pathname.new(File.join(BioRubyTestDataPath, 'paml', 'codeml')).cleanpath.to_s
21
+
22
+ def setup
23
+ str = File.read(File.join(TEST_DATA, 'output.txt'))
24
+ @example_report = Bio::PAML::Codeml::Report.new(str)
25
+ end
26
+
27
+ def test_tree_log_likelihood
28
+ assert_equal(-1817.465211, @example_report.tree_log_likelihood)
29
+ end
30
+
31
+ def test_tree_length
32
+ assert_equal(0.77902, @example_report.tree_length)
33
+ end
34
+
35
+ def test_alpha
36
+ assert_equal(0.58871, @example_report.alpha)
37
+ end
38
+
39
+ def test_tree
40
+ tree = "(((rabbit: 0.082889, rat: 0.187866): 0.038008, human: 0.055050): 0.033639, goat-cow: 0.096992, marsupial: 0.284574);"
41
+ assert_equal(tree, @example_report.tree)
42
+ end
43
+
44
+ end
45
+
46
+ end; end #module TestPAMLCodeml; module Bio
@@ -1660,7 +1660,7 @@ CC -----------------------------------------------------------------------"
1660
1660
  " MEEPQSDPSV EPPLSQETFS DLWKLLPENN VLSPLPSQAM DDLMLSPDDI EQWFTEDPGP\n" * 200,
1661
1661
  "//\n"].join
1662
1662
  sp = SPTR.new(data)
1663
- assert(12000, sp.seq.size)
1663
+ assert_equal(12000, sp.seq.size)
1664
1664
  end
1665
1665
  end
1666
1666
 
@@ -0,0 +1,160 @@
1
+ #
2
+ # test/unit/bio/db/fasta/test_defline.rb - Unit test for Bio::FastaDefline
3
+ #
4
+ # Copyright:: Copyright (C) 2010 Kazuhiro Hayashi <k.hayashi.info@gmail.com>
5
+ # License:: The Ruby License
6
+ #
7
+
8
+ # loading helper routine for testing bioruby
9
+ require 'pathname'
10
+ load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
11
+ 'bioruby_test_helper.rb')).cleanpath.to_s
12
+
13
+ # libraries needed for the tests
14
+ require 'test/unit'
15
+ require 'bio/db/fasta/defline.rb'
16
+
17
+ module Bio
18
+ class TestBioFastaDefline < Test::Unit::TestCase
19
+
20
+ def setup
21
+ #test for all the patterns.
22
+ @rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]')
23
+ @ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]") #from an exaple in the class file
24
+ @sce = Bio::FastaDefline.new(">sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]") #from an exaple in the class file
25
+ @emb = Bio::FastaDefline.new(">emb:CACDC28 [X80034] C.albicans CDC28 gene") #from an exaple in the class file
26
+ @abc = Bio::FastaDefline.new(">ABC12345 this is test") #from an exaple in the class file
27
+ @etc = Bio::FastaDefline.new(">fasta1") # In this case, the defline has only a id like string?
28
+ #test for the other formats
29
+ end
30
+
31
+ def test_entry_id
32
+ assert_equal("gi|671595", @rub.entry_id)
33
+ assert_equal("gi|2495000", @ckr.entry_id)
34
+ assert_equal("sce:YBR160W", @sce.entry_id)
35
+ assert_equal("emb:CACDC28", @emb.entry_id)
36
+ assert_equal("ABC12345", @abc.entry_id)
37
+ assert_equal("fasta1", @etc.entry_id)
38
+ end
39
+
40
+ def test_to_s
41
+ assert_equal("gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]", @rub.to_s)
42
+ assert_equal("gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]", @ckr.to_s)
43
+ assert_equal("sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]", @sce.to_s)
44
+ assert_equal("emb:CACDC28 [X80034] C.albicans CDC28 gene", @emb.to_s)
45
+ assert_equal("ABC12345 this is test", @abc.to_s)
46
+ assert_equal("fasta1", @etc.to_s)
47
+ end
48
+
49
+ def test_description
50
+ assert_equal("rubisco large subunit [Perovskia abrotanoides]", @rub.description)
51
+ assert_equal("CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)", @ckr.description)
52
+ assert_equal("CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]", @sce.description)
53
+ assert_equal("[X80034] C.albicans CDC28 gene", @emb.description)
54
+ assert_equal("this is test", @abc.description)
55
+ assert_equal("", @etc.description)
56
+ end
57
+
58
+ def test_descriptions
59
+ assert_equal(["rubisco large subunit [Perovskia abrotanoides]"], @rub.descriptions)
60
+ assert_equal(["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)", "cholecystokinin A receptor - guinea pig", "cholecystokinin A receptor; CCK-A receptor [Cavia]"], @ckr.descriptions)
61
+ assert_equal(["CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]"], @sce.descriptions)
62
+ assert_equal(["[X80034] C.albicans CDC28 gene"], @emb.descriptions)
63
+ assert_equal("this is test", @abc.description)
64
+ assert_equal("", @etc.description) #this result that return a string is correct?
65
+ end
66
+
67
+ def test_id_strings
68
+ assert_equal(["671595", "CAA85678.1"], @rub.id_strings)
69
+ assert_equal(["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898", "544724", "AAB29504.1", "Cavia"], @ckr.id_strings)
70
+ assert_equal(["YBR160W", "2.7.1.-", "CC28_YEAST", "CC28_YEAST", "CDC28"], @sce.id_strings)
71
+ assert_equal(["CACDC28", "X80034", "CDC28", "X80034"] , @emb.id_strings) #this result that return "X80034" twice is correct?
72
+ assert_equal(["ABC12345"], @abc.id_strings)
73
+ assert_equal(["fasta1"], @etc.id_strings)
74
+ end
75
+
76
+ def test_words
77
+ assert_equal(["abrotanoides", "large", "perovskia", "rubisco", "subunit"], @rub.words)
78
+ assert_equal(["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig", "receptor", "type"], @ckr.words)
79
+ assert_equal(["catalytic", "cyclin-dependent", "kinase", "srm5", "subunit"], @sce.words)
80
+ assert_equal(["albicans"], @emb.words) #this result that return "X80034" twice is correct?
81
+ assert_equal(["test"], @abc.words)
82
+ assert_equal([], @etc.words)
83
+ assert_equal(["CCK-A", "CCK-AR", "CHOLECYSTOKININ", "Cavia", "RECEPTOR", "TYPE", "cholecystokinin", "guinea", "pig", "receptor"], @ckr.words(true)) #case sensitive
84
+ #probably, it need not check changes in second and third arguments.
85
+ end
86
+ def test_get
87
+ #get each db from each pattern except the duplicate.
88
+ assert_equal("671595", @rub.get("gi"))
89
+ assert_equal("CCKR_CAVPO", @ckr.get("sp"))
90
+ assert_equal("I51898", @ckr.get("pir"))
91
+ assert_equal("AAB29504.1", @ckr.get("gb"))
92
+ assert_equal("YBR160W", @sce.get("sce"))
93
+ assert_equal("2.7.1.-", @sce.get("EC"))
94
+ assert_equal("CC28_YEAST", @sce.get("SP"))
95
+ assert_equal("CACDC28", @emb.get("emb"))
96
+ #the other dbs
97
+ end
98
+ def test_get_by_type
99
+ #specify each type in each pattern while refering to NSIDs.
100
+ assert_equal("671595", @rub.get_by_type("gi"))
101
+ assert_equal("CAA85678.1", @rub.get_by_type("acc_version"))
102
+ assert_equal(nil, @rub.get_by_type("locus"))
103
+ assert_equal("Q63931", @ckr.get_by_type("accession"))
104
+ assert_equal("CCKR_CAVPO", @ckr.get_by_type("entry_id"))
105
+ end
106
+
107
+ def test_get_all_by_type
108
+ #specify each type in each pattern while refering to NSIDs.
109
+ assert_equal(["671595", "CAA85678.1"], @rub.get_all_by_type("gi","acc_version","locus"))
110
+ assert_equal(["Q63931", "CCKR_CAVPO", "I51898"], @ckr.get_all_by_type("accession","entry_id"))
111
+ end
112
+ def test_locus
113
+ #Any of the examples don't have the locus information ...
114
+ assert_equal(nil, @rub.locus)
115
+
116
+ end
117
+ def test_gi
118
+ assert_equal("671595", @rub.gi)
119
+ assert_equal("2495000", @ckr.gi)
120
+ assert_equal(nil, @sce.gi) #sce dosen't have "gi" in the type.
121
+ end
122
+ def test_acc_version
123
+ assert_equal("CAA85678.1", @rub.acc_version)
124
+ assert_equal("AAB29504.1", @ckr.acc_version)
125
+ assert_equal("CACDC28", @emb.acc_version)
126
+ end
127
+
128
+ def test_accessions
129
+ assert_equal(["CACDC28"], @emb.accessions)
130
+ assert_equal(["CAA85678"], @rub.accessions)
131
+ assert_equal(["Q63931", "AAB29504"], @ckr.accessions)
132
+ assert_raise(RuntimeError){@sce.accesions} #sce dosen't have "accession" in the type.
133
+ end
134
+ def test_accession
135
+ assert_equal("CACDC28", @emb.accession)
136
+ assert_equal("CAA85678", @rub.accession)
137
+ assert_equal("AAB29504", @ckr.accession)
138
+ assert_raise(RuntimeError){@sce.accesion} #sce dosen't have "accession" in the type.
139
+
140
+ # to cover the else statement
141
+ ckr2 = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)") #from an exaple in the class file
142
+ assert_equal("Q63931", ckr2.accession)
143
+
144
+ end
145
+
146
+
147
+ def test_method_missing
148
+ #Methods specified with the types are tested only in this test metho.d
149
+ assert_equal("CCKR_CAVPO", @ckr.sp)
150
+ assert_equal("I51898", @ckr.pir)
151
+ assert_equal("AAB29504.1", @ckr.gb)
152
+ assert_equal("YBR160W", @sce.sce)
153
+ assert_equal("2.7.1.-", @sce.EC)
154
+ assert_equal("CC28_YEAST", @sce.SP)
155
+ assert_equal("CACDC28", @emb.emb)
156
+ end
157
+
158
+ end #class TestBioFastaDefline
159
+ end #module Bio
160
+