bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,87 @@
1
+ #
2
+ # test/unit/bio/io/test_ddbjxml.rb - Unit test for DDBJ XML.
3
+ #
4
+ # Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: test_ddbjxml.rb,v 1.1 2005/12/11 14:59:25 nakao Exp $
21
+ #
22
+
23
+ require 'pathname'
24
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
25
+ $:.unshift(libpath) unless $:.include?(libpath)
26
+
27
+
28
+ require 'test/unit'
29
+ require 'bio/io/ddbjxml'
30
+
31
+ module Bio
32
+
33
+ class TestDDBJXMLConstants < Test::Unit::TestCase
34
+
35
+ def test_constants
36
+ constants = ["DDBJ", "TxSearch", "ClustalW", "PML", "Gib", "Fasta", "BASE_URI", "SRS", "Gtop", "GetEntry", "Blast"].sort
37
+ assert_equal(constants, Bio::DDBJ::XML.constants.sort)
38
+ end
39
+
40
+ def test_base_url
41
+ assert_equal("http://xml.nig.ac.jp/wsdl/", Bio::DDBJ::XML::BASE_URI)
42
+ end
43
+
44
+ def test_blast_server_rul
45
+ assert_equal("http://xml.nig.ac.jp/wsdl/Blast.wsdl", Bio::DDBJ::XML::Blast::SERVER_URI)
46
+ end
47
+
48
+ def test_clustalw_server_url
49
+ assert_equal("http://xml.nig.ac.jp/wsdl/ClustalW.wsdl", Bio::DDBJ::XML::ClustalW::SERVER_URI)
50
+ end
51
+
52
+ def test_ddbj_server_url
53
+ assert_equal("http://xml.nig.ac.jp/wsdl/DDBJ.wsdl", Bio::DDBJ::XML::DDBJ::SERVER_URI)
54
+ end
55
+
56
+ def test_fasta_server_url
57
+ assert_equal("http://xml.nig.ac.jp/wsdl/Fasta.wsdl", Bio::DDBJ::XML::Fasta::SERVER_URI)
58
+ end
59
+
60
+ def test_getentry_server_url
61
+ assert_equal("http://xml.nig.ac.jp/wsdl/GetEntry.wsdl", Bio::DDBJ::XML::GetEntry::SERVER_URI)
62
+ end
63
+
64
+ def test_gib_server_url
65
+ assert_equal("http://xml.nig.ac.jp/wsdl/Gib.wsdl", Bio::DDBJ::XML::Gib::SERVER_URI)
66
+ end
67
+
68
+ def test_gtop_server_url
69
+ assert_equal("http://xml.nig.ac.jp/wsdl/Gtop.wsdl", Bio::DDBJ::XML::Gtop::SERVER_URI)
70
+ end
71
+
72
+ def test_pml_server_url
73
+ assert_equal("http://xml.nig.ac.jp/wsdl/PML.wsdl", Bio::DDBJ::XML::PML::SERVER_URI)
74
+ end
75
+
76
+ def test_srs_server_url
77
+ assert_equal("http://xml.nig.ac.jp/wsdl/SRS.wsdl", Bio::DDBJ::XML::SRS::SERVER_URI)
78
+ end
79
+
80
+ def test_txsearch_server_url
81
+ assert_equal("http://xml.nig.ac.jp/wsdl/TxSearch.wsdl", Bio::DDBJ::XML::TxSearch::SERVER_URI)
82
+ end
83
+
84
+ end
85
+
86
+
87
+ end
@@ -0,0 +1,45 @@
1
+ #
2
+ # test/unit/bio/io/test_soapwsdl.rb - Unit test for SOAP/WSDL
3
+ #
4
+ # Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: test_soapwsdl.rb,v 1.1 2005/12/18 17:09:53 nakao Exp $
21
+ #
22
+
23
+ require 'pathname'
24
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
25
+ $:.unshift(libpath) unless $:.include?(libpath)
26
+
27
+
28
+ require 'test/unit'
29
+ require 'bio/io/soapwsdl'
30
+
31
+ module Bio
32
+
33
+ class TestSOAPWSDL < Test::Unit::TestCase
34
+
35
+ def setup
36
+ @obj = Bio::SOAPWSDL
37
+ end
38
+
39
+ def test_methods
40
+ methods = ['wsdl', 'wsdl=', 'log', 'log=']
41
+ assert_equal(methods.sort, (@obj.instance_methods - Object.methods).sort)
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,175 @@
1
+ #
2
+ # test/unit/bio/shell/plugin/test_seq.rb - Unit test for Bio::Shell plugin for biological sequence manipulations
3
+ #
4
+ # Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: test_seq.rb,v 1.5 2005/12/19 02:44:03 k Exp $
21
+ #
22
+
23
+ require 'pathname'
24
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
25
+ $:.unshift(libpath) unless $:.include?(libpath)
26
+
27
+ require 'test/unit'
28
+ require 'bioruby'
29
+
30
+ module Bio
31
+ class TestShellPluginSeq < Test::Unit::TestCase
32
+
33
+ # include Bio::Shell
34
+ # Bio::Shell.instance_variable_set :@config, {}
35
+
36
+ def test_naseq
37
+ str = 'ACGT'
38
+ assert_equal(Bio::Sequence::NA, seq(str).class)
39
+ assert_equal(Bio::Sequence::NA.new(str), seq(str))
40
+ assert_equal('acgt', seq(str))
41
+ end
42
+
43
+ def test_aaseq
44
+ str = 'WD'
45
+ assert_equal(Bio::Sequence::AA, seq(str).class)
46
+ assert_equal(Bio::Sequence::AA.new('WD'), seq(str))
47
+ assert_equal('WD', seq(str))
48
+ end
49
+
50
+ def test_na_seqstat
51
+ naseq = 'atgcatgcatgc'
52
+ output =<<END
53
+
54
+ * * * Sequence statistics * * *
55
+
56
+ 5'->3' sequence : atgcatgcatgc
57
+ 3'->5' sequence : gcatgcatgcat
58
+ Translation 1 : MHAC
59
+ Translation 2 : CMH
60
+ Translation 3 : ACM
61
+ Translation -1 : ACMH
62
+ Translation -2 : HAC
63
+ Translation -3 : MHA
64
+ Length : 12 bp
65
+ GC percent : 50 %
66
+ Composition : a - 3 ( 25.00 %)
67
+ c - 3 ( 25.00 %)
68
+ g - 3 ( 25.00 %)
69
+ t - 3 ( 25.00 %)
70
+ Codon usage :
71
+
72
+ *---------------------------------------------*
73
+ | | 2nd | |
74
+ | 1st |-------------------------------| 3rd |
75
+ | | U | C | A | G | |
76
+ |-------+-------+-------+-------+-------+-----|
77
+ | U U |F 0.0%|S 0.0%|Y 0.0%|C 0.0%| u |
78
+ | U U |F 0.0%|S 0.0%|Y 0.0%|C 25.0%| c |
79
+ | U U |L 0.0%|S 0.0%|* 0.0%|* 0.0%| a |
80
+ | UUU |L 0.0%|S 0.0%|* 0.0%|W 0.0%| g |
81
+ |-------+-------+-------+-------+-------+-----|
82
+ | CCCC |L 0.0%|P 0.0%|H 25.0%|R 0.0%| u |
83
+ | C |L 0.0%|P 0.0%|H 0.0%|R 0.0%| c |
84
+ | C |L 0.0%|P 0.0%|Q 0.0%|R 0.0%| a |
85
+ | CCCC |L 0.0%|P 0.0%|Q 0.0%|R 0.0%| g |
86
+ |-------+-------+-------+-------+-------+-----|
87
+ | A |I 0.0%|T 0.0%|N 0.0%|S 0.0%| u |
88
+ | A A |I 0.0%|T 0.0%|N 0.0%|S 0.0%| c |
89
+ | AAAAA |I 0.0%|T 0.0%|K 0.0%|R 0.0%| a |
90
+ | A A |M 25.0%|T 0.0%|K 0.0%|R 0.0%| g |
91
+ |-------+-------+-------+-------+-------+-----|
92
+ | GGGG |V 0.0%|A 0.0%|D 0.0%|G 0.0%| u |
93
+ | G |V 0.0%|A 0.0%|D 0.0%|G 0.0%| c |
94
+ | G GGG |V 0.0%|A 25.0%|E 0.0%|G 0.0%| a |
95
+ | GG G |V 0.0%|A 0.0%|E 0.0%|G 0.0%| g |
96
+ *---------------------------------------------*
97
+
98
+ Molecular weight : 3701.61444
99
+ Protein weight : 460.565
100
+ //
101
+ END
102
+ $str = ''
103
+ alias puts_orig puts
104
+ def puts(*args)
105
+ args.each do |obj|
106
+ $str << obj.to_s
107
+ end
108
+ end
109
+ seqstat(naseq)
110
+ undef puts
111
+ alias puts puts_orig
112
+ assert_equal(output, $str)
113
+ end
114
+
115
+ def test_aa_seqstat
116
+ aaseq = 'WD'
117
+ output =<<END
118
+
119
+ * * * Sequence statistics * * *
120
+
121
+ N->C sequence : WD
122
+ Length : 2 aa
123
+ Composition : D Asp - 1 ( 50.00 %) aspartic acid
124
+ W Trp - 1 ( 50.00 %) tryptophan
125
+ Protein weight : 319.315
126
+ //
127
+ END
128
+ $str = ''
129
+ alias puts_orig puts
130
+ def puts(*args)
131
+ args.each do |obj|
132
+ $str << obj.to_s
133
+ end
134
+ end
135
+ seqstat(aaseq)
136
+ undef puts
137
+ alias puts puts_orig
138
+ assert_equal(output, $str)
139
+ end
140
+
141
+ def test_doublehelix
142
+ seq = 'ACGTACGTACGTACGT'
143
+ output = <<END
144
+ at
145
+ c--g
146
+ g---c
147
+ t----a
148
+ a----t
149
+ c---g
150
+ g--c
151
+ ta
152
+ ta
153
+ g--c
154
+ c---g
155
+ a----t
156
+ t----a
157
+ g---c
158
+ c--g
159
+ at
160
+ END
161
+ $str = ''
162
+ alias puts_orig puts
163
+ def puts(*args)
164
+ args.each do |obj|
165
+ $str << obj.to_s
166
+ end
167
+ end
168
+ doublehelix(seq)
169
+ undef puts
170
+ alias puts puts_orig
171
+ assert_equal(output, $str)
172
+ end
173
+
174
+ end
175
+ end
@@ -0,0 +1,1028 @@
1
+ #
2
+ # test/unit/bio/test_alignment.rb - Unit test for Bio::Alignment
3
+ #
4
+ # Copyright (C) 2004 Moses Hohman <mmhohman@northwestern.edu>
5
+ # 2005 Naohisa Goto <ng@bioruby.org>
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: test_alignment.rb,v 1.6 2005/12/02 13:01:49 ngoto Exp $
22
+ #
23
+
24
+ require 'pathname'
25
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'lib')).cleanpath.to_s
26
+ $:.unshift(libpath) unless $:.include?(libpath)
27
+
28
+ require 'test/unit'
29
+ require 'bio/alignment'
30
+
31
+ module Bio
32
+
33
+ class TestAlignmentPropertyMethods < Test::Unit::TestCase
34
+
35
+ def setup
36
+ @obj = Object.new
37
+ @obj.extend(Alignment::PropertyMethods)
38
+ end
39
+
40
+ def test_is_gap_default_false
41
+ assert_equal(false, @obj.is_gap?('a'), "\"a\" isn't a gap")
42
+ end
43
+
44
+ def test_is_gap_default_true
45
+ assert_equal(true, @obj.is_gap?('-'), '"-" is a gap')
46
+ end
47
+
48
+ def test_gap_regexp
49
+ assert_not_nil(@obj.gap_regexp)
50
+ end
51
+
52
+ def test_gap_regexp_never_nil
53
+ @obj.gap_regexp = nil
54
+ assert_not_nil(@obj.gap_regexp)
55
+ end
56
+
57
+ def test_gap_regexp=()
58
+ @obj.gap_regexp = /[^a-zA-Z0-9]/
59
+ assert_equal(/[^a-zA-Z0-9]/, @obj.gap_regexp)
60
+ end
61
+
62
+ def test_is_gap_nodefault_false
63
+ @obj.gap_regexp = /[^a-zA-Z0-9]/
64
+ assert_equal(false, @obj.is_gap?('3'))
65
+ end
66
+
67
+ def test_is_gap_nodefault_true
68
+ @obj.gap_regexp = /[^atgc]/
69
+ assert_equal(true, @obj.is_gap?('z'))
70
+ end
71
+
72
+ def test_gap_char_default
73
+ assert_not_nil(@obj.gap_char)
74
+ end
75
+
76
+ def test_gap_char_never_nil
77
+ @obj.gap_char = nil
78
+ assert_not_nil(@obj.gap_char)
79
+ end
80
+
81
+ def test_gap_char=()
82
+ @obj.gap_char = '#'
83
+ assert_equal('#', @obj.gap_char)
84
+ end
85
+
86
+ def test_missing_char_default
87
+ assert_not_nil(@obj.missing_char)
88
+ end
89
+
90
+ def test_missing_char_never_nil
91
+ @obj.missing_char = nil
92
+ assert_not_nil(@obj.missing_char)
93
+ end
94
+
95
+ def test_missing_char=()
96
+ @obj.missing_char = '_'
97
+ assert_equal('_', @obj.missing_char)
98
+ end
99
+
100
+ def test_seqclass_default
101
+ assert_not_nil(@obj.seqclass)
102
+ end
103
+
104
+ def test_seqclass_never_nil
105
+ @obj.seqclass = nil
106
+ assert_not_nil(@obj.seqclass)
107
+ end
108
+
109
+ def test_seqclass=()
110
+ @obj.seqclass = Sequence::NA
111
+ assert_equal(Sequence::NA, @obj.seqclass)
112
+ end
113
+
114
+ def test_get_all_property_default
115
+ assert_equal({}, @obj.get_all_property)
116
+ end
117
+
118
+ def test_get_all_property_nodefault
119
+ @obj.gap_regexp = /[^acgt]/
120
+ @obj.gap_char = '#'
121
+ @obj.missing_char = '_'
122
+ @obj.seqclass = Sequence::NA
123
+ assert_equal({ :gap_regexp => /[^acgt]/,
124
+ :gap_char => '#',
125
+ :missing_char => '_',
126
+ :seqclass => Sequence::NA },
127
+ @obj.get_all_property)
128
+ end
129
+
130
+ def test_set_all_property
131
+ h = { :gap_regexp => /[^acgt]/,
132
+ :gap_char => '#',
133
+ :missing_char => '_',
134
+ :seqclass => Sequence::NA }
135
+ @obj.set_all_property(h)
136
+ assert_equal(h, @obj.get_all_property)
137
+ end
138
+ end #class TestAlignmentPropertyMethods
139
+
140
+ # This is a unit test of Bio::Alignment::Site class and
141
+ # Bio::Alignment::SiteMethods module.
142
+ # Since Bio::Alignment::Site includes Bio::Alignment::SiteMethods,
143
+ # we can test both at a time.
144
+ class TestAlignmentSite < Test::Unit::TestCase
145
+
146
+ def test_has_gap_true
147
+ site = Alignment::Site[ 'a', '-', 'c', 'g', 't' ]
148
+ assert_equal(true, site.has_gap?)
149
+ end
150
+
151
+ def test_has_gap_false
152
+ site = Alignment::Site[ 'a', 'c', 'g', 't' ]
153
+ assert_equal(false, site.has_gap?)
154
+ end
155
+
156
+ def test_remove_gaps!
157
+ site = Alignment::Site[ 'a', '-', 'c', '-' ]
158
+ assert_equal(Alignment::Site['a', 'c'], site.remove_gaps!)
159
+ end
160
+
161
+ def test_remove_gaps_bang_not_removed
162
+ site = Alignment::Site[ 'a', 'c']
163
+ assert_equal(nil, site.remove_gaps!)
164
+ end
165
+
166
+ def test_consensus_string_default
167
+ site = Alignment::Site[ 'a', 'a', 'a', 'a']
168
+ assert_equal('a', site.consensus_string)
169
+ end
170
+
171
+ def test_consensus_string_default_nil
172
+ site = Alignment::Site[ 'a', 'a', 'a', 'c']
173
+ assert_nil(site.consensus_string)
174
+ end
175
+
176
+ def test_consensus_string_50percent
177
+ site = Alignment::Site[ 'a', 'a', 'c', 'g']
178
+ assert_equal('a', site.consensus_string(0.5))
179
+ end
180
+
181
+ def test_consensus_string_50percent_nil
182
+ site = Alignment::Site[ 'a', 'c', 'g', 't']
183
+ assert_nil(site.consensus_string(0.5))
184
+ end
185
+
186
+ def test_consensus_iupac
187
+ data = {
188
+ 'a' => [ 'a' ],
189
+ 'c' => [ 'c' ],
190
+ 'g' => [ 'g' ],
191
+ 't' => [ 't' ],
192
+ 't' => [ 't', 'u' ],
193
+ 'm' => [ 'a', 'c' ],
194
+ 'r' => [ 'a', 'g' ],
195
+ 'w' => [ 'a', 't' ],
196
+ 's' => [ 'c', 'g' ],
197
+ 'y' => [ 'c', 't' ],
198
+ 'k' => [ 'g', 't' ],
199
+ 'v' => [ 'a', 'c', 'g' ],
200
+ 'h' => [ 'a', 'c', 't' ],
201
+ 'd' => [ 'a', 'g', 't' ],
202
+ 'b' => [ 'c', 'g', 't' ],
203
+ 'n' => [ 'a', 'c', 'g', 't' ],
204
+ nil => [ 'z', 'a' ]
205
+ }
206
+ data.each do |cons, testdata|
207
+ site = Alignment::Site[ *testdata ]
208
+ assert_equal(cons, site.consensus_iupac,
209
+ "IUPAC consensus of #{testdata.join(',')} is #{cons}")
210
+ end
211
+ end
212
+
213
+ def test_match_line_amino_missing
214
+ site = Alignment::Site[ 'P', 'Q', 'R', 'S' ]
215
+ assert_equal(' ', site.match_line_amino)
216
+ end
217
+
218
+ def test_match_line_amino_100percent
219
+ site = Alignment::Site[ 'M', 'M', 'M', 'M' ]
220
+ assert_equal('*', site.match_line_amino)
221
+ end
222
+
223
+ def test_match_line_amino_strong
224
+ site = Alignment::Site[ 'N', 'E', 'Q', 'K' ]
225
+ assert_equal(':', site.match_line_amino)
226
+ end
227
+
228
+ def test_match_line_amino_weak
229
+ site = Alignment::Site[ 'S', 'G', 'N', 'D' ]
230
+ assert_equal('.', site.match_line_amino)
231
+ end
232
+
233
+ def test_match_line_nuc_missing
234
+ site = Alignment::Site[ 'A', 'C', 'G', 'T' ]
235
+ assert_equal(' ', site.match_line_nuc)
236
+ end
237
+
238
+ def test_match_line_nuc_100percent
239
+ site = Alignment::Site[ 'G', 'G', 'G', 'G' ]
240
+ assert_equal('*', site.match_line_nuc)
241
+ end
242
+ end #class TestAlignmentSite
243
+
244
+ # This is sample class for testing Bio::Alignment::EnumerableExtension.
245
+ class A < Array
246
+ include Alignment::EnumerableExtension
247
+ end
248
+
249
+ class TestAlignmentEnumerableExtension < Test::Unit::TestCase
250
+ def test_each_seq
251
+ expected_results = [ 'atg', 'aag', 'acg' ]
252
+ a = A[ *expected_results ]
253
+ a.each_seq do |x|
254
+ assert_equal(expected_results.shift, x)
255
+ end
256
+ assert(expected_results.empty?)
257
+ end
258
+
259
+ def test_seqclass_default
260
+ a = A.new
261
+ assert_equal(String, a.seqclass)
262
+ end
263
+
264
+ def test_seqclass
265
+ a = A[ Bio::Sequence::NA.new('atg') ]
266
+ assert_equal(Bio::Sequence::NA, a.seqclass)
267
+ end
268
+
269
+ def test_seqclass=()
270
+ a = A.new
271
+ assert_equal(String, a.seqclass)
272
+ a << Bio::Sequence::NA.new('a')
273
+ assert_equal(Bio::Sequence::NA, a.seqclass)
274
+ a.seqclass = Bio::Sequence::AA
275
+ assert_equal(Bio::Sequence::AA, a.seqclass)
276
+ end
277
+
278
+ def test_alignment_length
279
+ a = A[ 'a', 'at', 'atgc', 'atg', '' ]
280
+ assert_equal(4, a.alignment_length)
281
+ end
282
+
283
+ def test_private_alignment_site
284
+ a = A[ 'a', 'at', 'atgc', 'atg', '' ]
285
+ assert_equal(Alignment::Site[ '-', 't', 't', 't', '-' ],
286
+ a.instance_eval { _alignment_site(1) })
287
+ end
288
+
289
+ def test_alignment_site
290
+ a = A[ 'a', 'at', 'atgc', 'atg', '' ]
291
+ assert_equal(Alignment::Site[ '-', 't', 't', 't', '-' ],
292
+ a.__send__(:_alignment_site, 1))
293
+ end
294
+
295
+ def test_each_site
296
+ expected_results = [
297
+ Alignment::Site[ 'a', 'a', 'a', 'a', '-' ],
298
+ Alignment::Site[ '-', 't', 't', 't', '-' ],
299
+ Alignment::Site[ '-', '-', 'g', 'g', '-' ],
300
+ Alignment::Site[ '-', '-', 'c', '-', '-' ]
301
+ ]
302
+ a = A[ 'a', 'at', 'atgc', 'atg', '' ]
303
+ a.each_site do |site|
304
+ assert_equal(expected_results.shift, site)
305
+ end
306
+ assert(expected_results.empty?)
307
+ end
308
+
309
+ def test_each_site_step
310
+ expected_results = [
311
+ Alignment::Site[ '-', 't', 't', 't', '-' ], # site 1
312
+ Alignment::Site[ '-', 'a', 'g', 't', '-' ], # site 3
313
+ ]
314
+ a = A[ 'a', 'atgatc', 'atggcc', 'atgtga', '' ]
315
+ a.each_site_step(1, 4, 2) do |site|
316
+ assert_equal(expected_results.shift, site)
317
+ end
318
+ assert(expected_results.empty?)
319
+ end
320
+
321
+ def test_alignment_collect
322
+ a = A[ 'a', 'at', 'atgc', 'atg', '' ]
323
+ assert_equal(Alignment::SequenceArray[ 'a', 'au', 'augc', 'aug', '' ],
324
+ a.alignment_collect { |x| x.gsub(/t/, 'u') })
325
+ end
326
+
327
+ def test_alignment_window
328
+ a = A[ 'a', 'at', 'atgca', 'atg', '' ]
329
+ assert_equal(Alignment::SequenceArray[ '', 't', 'tgc', 'tg', '' ],
330
+ a.alignment_window(1, 3))
331
+ end
332
+
333
+ def test_each_window
334
+ expected_results = [
335
+ Alignment::SequenceArray[ 'atg', 'tcg', '' ], # 0..2
336
+ Alignment::SequenceArray[ 'gca', 'gat', '' ], # 2..4
337
+ Alignment::SequenceArray[ 'atg', 'tgc', '' ], # 4..6
338
+ Alignment::SequenceArray[ 'c', 'a', '' ] # 7..7
339
+ ]
340
+ a = A[ 'atgcatgc', 'tcgatgca', '' ]
341
+ r = a.each_window(3, 2) do |x|
342
+ assert_equal(expected_results.shift, x)
343
+ end
344
+ assert_equal(expected_results.shift, r)
345
+ assert(expected_results.empty?)
346
+ end
347
+
348
+ def test_collect_each_site
349
+ a = A[ 'a', 'at', 'atgc', 'atg', '' ]
350
+ assert_equal(["aaaa-", "-ttt-", "--gg-", "--c--" ],
351
+ a.collect_each_site { |x| x.join('') })
352
+ end
353
+
354
+ def test_consensus_each_site_default
355
+ expected_results = [
356
+ Alignment::Site[ 'a', 'a', 'a', 'a', 'a' ],
357
+ Alignment::Site[ 'a', 'c', 'g', 't', '-' ]
358
+ ]
359
+
360
+ a = A[ 'aa', 'ac', 'ag', 'at', 'a-' ]
361
+ result = a.consensus_each_site do |site|
362
+ assert_equal(expected_results.shift, site)
363
+ 'x'
364
+ end
365
+ assert_equal('xx', result)
366
+ assert(expected_results.empty?)
367
+ end
368
+
369
+ def test_consensus_each_site_gap_mode_1
370
+ expected_results = [
371
+ Alignment::Site[ 'a', 'a', 'a', 'a', 'a' ]
372
+ ]
373
+
374
+ a = A[ 'aa', 'ac', 'ag', 'at', 'a-' ]
375
+ result = a.consensus_each_site(:gap_mode => 1) do |site|
376
+ assert_equal(expected_results.shift, site)
377
+ 'x'
378
+ end
379
+ assert_equal('x-', result)
380
+ assert(expected_results.empty?)
381
+ end
382
+
383
+ def test_consensus_each_site_gap_mode_minus1
384
+ expected_results = [
385
+ Alignment::Site[ 'a', 'a', 'a', 'a', 'a' ],
386
+ Alignment::Site[ 'a', 'c', 'g', 't' ]
387
+ ]
388
+
389
+ a = A[ 'aa', 'ac', 'ag', 'at', 'a-' ]
390
+ result = a.consensus_each_site(:gap_mode => -1) do |site|
391
+ assert_equal(expected_results.shift, site)
392
+ 'x'
393
+ end
394
+ assert_equal('xx', result)
395
+ assert(expected_results.empty?)
396
+ end
397
+
398
+ def test_consensus_string_default
399
+ a = A[ 'ata', 'aac', 'aag', 'aat' ]
400
+ assert_equal('a??', a.consensus_string)
401
+ end
402
+
403
+ def test_consensus_string_half
404
+ a = A[ 'ata', 'aac', 'aag', 'aat' ]
405
+ assert_equal('aa?', a.consensus_string(0.5))
406
+ end
407
+
408
+ def test_consensus_iupac
409
+ a = A[
410
+ 'acgtaaaccgaaacaz',
411
+ 'acgtaaaccgccggcz',
412
+ 'acgtcgtgttgtttgz',
413
+ 'acgtcgtgttaaactz'
414
+ ]
415
+ assert_equal('acgtmrwsykvhdbn?', a.consensus_iupac)
416
+ end
417
+
418
+ def test_match_line_amino
419
+ a = A[
420
+ 'M-SNNNQMMHF-CASSSSSNNFH-AW',
421
+ 'M-TEHDHIIYY-STATTGNDEVF-FW',
422
+ 'M-AQQERLLHW-AVGNPNDEQLY-HW',
423
+ 'M-SKKQKVFYF-CASKADEQHIH-LW',
424
+ 'M-TNNNQMMHY-STASSSQHRMF-QW',
425
+ 'M-AEHDHIIYW-AVGTTGKKKFY-YW'
426
+ #* ::::::::: ........... *
427
+ ]
428
+ assert_equal('* ::::::::: ........... *', a.match_line_amino)
429
+ end
430
+
431
+ def test_match_line_nuc
432
+ a = A[ 'aaa', 'aa-','aac', 'at-' ]
433
+ assert_equal('* ', a.match_line_nuc)
434
+ end
435
+
436
+ def test_match_line
437
+ a = A[
438
+ Sequence::AA.new('MNSA'),
439
+ Sequence::AA.new('MHTL'),
440
+ Sequence::AA.new('MQNV'),
441
+ Sequence::AA.new('MKKW'),
442
+ ]
443
+ assert_equal('*:. ', a.match_line)
444
+ assert_equal('*:. ', a.match_line(:type => :aa))
445
+ assert_equal('* ', a.match_line(:type => :na))
446
+ end
447
+
448
+ def test_convert_match
449
+ a = A[
450
+ 'aaaa',
451
+ 'accc',
452
+ 'acac',
453
+ 'actc'
454
+ ]
455
+ a.convert_match
456
+ assert_equal(A[ 'aaaa', '.ccc', '.c.c', '.ctc' ], a)
457
+ end
458
+
459
+ def test_convert_unmatch
460
+ a = A[ 'aaaa', '.ccc', '.c.c', '.ctc' ]
461
+ a.convert_unmatch
462
+ assert_equal(A[ 'aaaa', 'accc', 'acac', 'actc' ], a)
463
+ end
464
+
465
+ def test_alignment_normalize!
466
+ a = A[ 'a', 'atg', 'atgc', '' ]
467
+ a.alignment_normalize!
468
+ assert_equal(A[ 'a---', 'atg-', 'atgc', '----'], a)
469
+ end
470
+
471
+ def test_alignment_rstrip!
472
+ a = A[ '--aaa--', '--t-t--', '---g---', '--t' ]
473
+ assert(a.alignment_rstrip!)
474
+ assert_equal(A[ '--aaa', '--t-t', '---g-', '--t' ], a)
475
+ end
476
+
477
+ def test_alignment_rstrip_nil
478
+ a = A[ 'aa', '-a', 'a-' ]
479
+ assert_nil(a.alignment_rstrip!)
480
+ assert_equal(A[ 'aa', '-a', 'a-' ], a)
481
+ end
482
+
483
+ def test_alignment_lstrip!
484
+ a = A[ '--aaa--', '--t-t--', '---g---', '--t' ]
485
+ assert(a.alignment_lstrip!)
486
+ assert_equal(A[ 'aaa--', 't-t--', '-g---', 't' ], a)
487
+ end
488
+
489
+ def test_alignment_lstrip_nil
490
+ a = A[ 'aa', '-a', 'a-' ]
491
+ assert_nil(a.alignment_lstrip!)
492
+ assert_equal(A[ 'aa', '-a', 'a-' ], a)
493
+ end
494
+
495
+ def test_alignment_strip!
496
+ a = A[ '--aaa--', '--t-t--', '---g---', '--t' ]
497
+ assert(a.alignment_strip!)
498
+ assert_equal(A[ 'aaa', 't-t', '-g-', 't' ], a)
499
+ end
500
+
501
+ def test_alignment_strip_nil
502
+ a = A[ 'aa', '-a', 'a-' ]
503
+ assert_nil(a.alignment_strip!)
504
+ assert_equal(A[ 'aa', '-a', 'a-' ], a)
505
+ end
506
+
507
+ def test_remove_all_gaps!
508
+ a = A[ '--aaa--', '--t-t--', '---g---', '--t' ]
509
+ assert(a.remove_all_gaps!)
510
+ assert_equal(A[ 'aaa', 'tt', 'g', 't' ], a)
511
+ end
512
+
513
+ # test of alignment_slice.
514
+ # Please also refer alignment_window.
515
+ def test_alignment_slice
516
+ a = A[ 'a', 'at', 'atgca', 'atg', '' ]
517
+ assert_equal(Alignment::SequenceArray[ '', 't', 'tgc', 'tg', nil ],
518
+ a.alignment_slice(1, 3))
519
+ end
520
+
521
+ def test_alignment_subseq
522
+ a = A[ Sequence.new('a'), Sequence.new('at'), Sequence.new('atgca'),
523
+ Sequence.new('atg'), Sequence.new('') ]
524
+ assert_equal(Alignment::SequenceArray[ Sequence.new(''),
525
+ Sequence.new('t'), Sequence.new('tgc'),
526
+ Sequence.new('tg'), nil ],
527
+ a.alignment_subseq(2,4))
528
+ end
529
+
530
+ def test_alignment_concat
531
+ a = A[ 'aaa', 'c', 'gg', 't' ]
532
+ a.alignment_concat(A[ 'ttt', 'gg', 'aa', 'cc', 'aa' ])
533
+ assert_equal(A[ 'aaattt', 'cgg', 'ggaa', 'tcc' ], a)
534
+ a.alignment_concat([ 'c', 't' ])
535
+ assert_equal(A[ 'aaatttc', 'cggt', 'ggaa', 'tcc' ], a)
536
+ end
537
+ end #class TestAlignmentEnumerableExtension
538
+
539
+ class TestAlignmentClustalWFormatter < Test::Unit::TestCase
540
+ def setup
541
+ @obj = Object.new
542
+ @obj.extend(Alignment::ClustalWFormatter)
543
+ end
544
+
545
+ def test_have_same_name_true
546
+ assert_equal([ 0, 1 ], @obj.instance_eval {
547
+ have_same_name?([ 'ATP ATG', 'ATP ATA', 'BBB' ]) })
548
+ end
549
+
550
+ def test_have_same_name_false
551
+ assert_equal(false, @obj.instance_eval {
552
+ have_same_name?([ 'GTP ATG', 'ATP ATA', 'BBB' ]) })
553
+ end
554
+
555
+ def test_avoid_same_name
556
+ assert_equal([ 'ATP_ATG', 'ATP_ATA', 'BBB' ],
557
+ @obj.instance_eval {
558
+ avoid_same_name([ 'ATP ATG', 'ATP ATA', 'BBB' ]) })
559
+ end
560
+ def test_avoid_same_name_numbering
561
+ assert_equal([ '0_ATP', '1_ATP', '2_BBB' ],
562
+ @obj.instance_eval {
563
+ avoid_same_name([ 'ATP', 'ATP', 'BBB' ]) })
564
+ end
565
+
566
+ end #class TestAlignmentClustalWFormatter
567
+
568
+
569
+ class TestAlignment < Test::Unit::TestCase
570
+
571
+ # testing helper method
572
+ def build_na_alignment(*sequences)
573
+ sequences.inject(Alignment.new) { |alignment, sequence| alignment << Sequence::NA.new(sequence) }
574
+ end
575
+ private :build_na_alignment
576
+
577
+ def test_equals
578
+ alignment1 = Alignment.new([Sequence::NA.new("agct"), Sequence::NA.new("tagc")])
579
+ alignment2 = Alignment.new([Sequence::NA.new("agct"), Sequence::NA.new("tagc")])
580
+ assert_equal(alignment1, alignment2)
581
+ end
582
+
583
+ # Alignment#store
584
+
585
+ def test_store_cannot_override_key
586
+ alignment = Alignment.new
587
+ alignment.store("Cat DNA", Sequence::NA.new("cat"))
588
+ alignment.store("Cat DNA", Sequence::NA.new("gcat"))
589
+ assert_equal("cat", alignment["Cat DNA"])
590
+ end
591
+
592
+ def test_store_with_nil_key_uses_next_number_for_key
593
+ alignment = Alignment.new
594
+ alignment.store(nil, Sequence::NA.new("cat"))
595
+ alignment.store(nil, Sequence::NA.new("gat"))
596
+ alignment.store(nil, Sequence::NA.new("tat"))
597
+ assert_equal({0=>"cat",1=>"gat",2=>"tat"}, alignment.to_hash)
598
+ end
599
+
600
+ def test_store_with_default_keys_and_user_defined_keys
601
+ alignment = Alignment.new
602
+ alignment.store("cat key", Sequence::NA.new("cat"))
603
+ alignment.store(nil, Sequence::NA.new("cag"))
604
+ alignment.store("gat key", Sequence::NA.new("gat"))
605
+ alignment.store(nil, Sequence::NA.new("gag"))
606
+ assert_equal({"gat key"=>"gat",1=>"cag",3=>"gag","cat key"=>"cat"}, alignment.to_hash)
607
+ end
608
+
609
+ # Test append operator
610
+
611
+ def test_seqclass_when_sequence_used
612
+ alignment = Alignment.new
613
+ alignment << Sequence::NA.new("cat")
614
+ assert_equal({0=>"cat"}, alignment.to_hash)
615
+ end
616
+
617
+ # Test seqclass
618
+
619
+ def test_seqclass_when_sequence_used_no_seqclass_set
620
+ alignment = Alignment.new
621
+ alignment << Sequence::NA.new("cat")
622
+ assert_equal(Sequence::NA, alignment.seqclass)
623
+ end
624
+
625
+ def test_seqclass_String_seq_not_present_no_seqclass_set
626
+ alignment = Alignment.new
627
+ alignment << nil
628
+ assert_equal(String, alignment.seqclass)
629
+ end
630
+
631
+ def test_seqclass_when_seqclass_set
632
+ alignment = Alignment.new
633
+ alignment.seqclass = Fixnum
634
+ alignment << "this doesn't really make sense"
635
+ assert_equal(Fixnum, alignment.seqclass)
636
+ end
637
+
638
+ # Alignment#gap_char
639
+
640
+ def test_default_gap_char
641
+ alignment = Alignment.new
642
+ assert_equal("-", alignment.gap_char)
643
+ end
644
+
645
+ def test_set_and_get_gap_char
646
+ alignment = Alignment.new
647
+ alignment.gap_char = "+"
648
+ assert_equal("+", alignment.gap_char)
649
+ end
650
+
651
+ # Alignment#gap_regexp
652
+
653
+ def test_default_gap_regexp_matches_default_gap_char
654
+ alignment = Alignment.new
655
+ assert(alignment.gap_regexp.match(alignment.gap_char))
656
+ end
657
+
658
+ # Alignment#missing_char
659
+
660
+ def test_default_missing_char
661
+ alignment = Alignment.new
662
+ assert_equal("?", alignment.missing_char)
663
+ end
664
+
665
+ # Alignment#seq_length
666
+
667
+ def test_seq_length_when_one_sequence
668
+ alignment = build_na_alignment("agt")
669
+ assert_equal(3, alignment.seq_length)
670
+ end
671
+
672
+ def test_seq_length_is_max_seq_length
673
+ alignment = build_na_alignment("agt", "agtaa", "agta")
674
+ assert_equal(5, alignment.seq_length)
675
+ end
676
+
677
+ # Alignment#each_site
678
+
679
+ def test_each_site_equal_length
680
+ alignment = build_na_alignment("acg", "gta")
681
+ expected_sites = [["a", "g"], ["c", "t"], ["g", "a"]]
682
+ alignment.each_site do |site|
683
+ assert_equal expected_sites.shift, site, "site ##{3-expected_sites.size} wrong"
684
+ end
685
+ end
686
+
687
+ def test_each_site_unequal_length
688
+ alignment = build_na_alignment("ac", "gta")
689
+ expected_sites = [["a", "g"], ["c", "t"], ["-", "a"]]
690
+ alignment.each_site do |site|
691
+ assert_equal expected_sites.shift, site, "site ##{3-expected_sites.size} wrong"
692
+ end
693
+ end
694
+
695
+ #TODO: Lots of stuff needing tests here
696
+
697
+ # Alignment#add_seq
698
+
699
+ def test_add_seq_no_key
700
+ alignment = Alignment.new
701
+ alignment.add_seq("agct")
702
+ assert_equal(String, alignment.seqclass, "wrong class")
703
+ assert_equal({0=>"agct"}, alignment.to_hash, "wrong hash")
704
+ end
705
+
706
+ def test_add_seq_using_seq_with_seq_method
707
+ seq = "agtc"
708
+ class <<seq
709
+ def seq
710
+ Sequence::NA.new(self)
711
+ end
712
+ end
713
+
714
+ alignment = Alignment.new
715
+ alignment.add_seq(seq, "key")
716
+ assert_equal(Sequence::NA, alignment.seqclass, "wrong class")
717
+ assert_equal({"key"=>"agtc"}, alignment.to_hash, "wrong hash")
718
+ end
719
+
720
+ def test_add_seq_using_seq_with_naseq_method
721
+ seq = "agtc"
722
+ class <<seq
723
+ def naseq
724
+ Sequence::NA.new(self)
725
+ end
726
+ end
727
+
728
+ alignment = Alignment.new
729
+ alignment.add_seq(seq, "key")
730
+ assert_equal(Sequence::NA, alignment.seqclass, "wrong class")
731
+ assert_equal({"key"=>"agtc"}, alignment.to_hash, "wrong hash")
732
+ end
733
+
734
+ def test_add_seq_using_seq_with_aaseq_method
735
+ seq = "AVGR"
736
+ class <<seq
737
+ def aaseq
738
+ Sequence::AA.new(self)
739
+ end
740
+ end
741
+
742
+ alignment = Alignment.new
743
+ alignment.add_seq(seq, "key")
744
+ assert_equal(Sequence::AA, alignment.seqclass, "wrong class")
745
+ assert_equal({"key"=>"AVGR"}, alignment.to_hash, "wrong hash")
746
+ end
747
+
748
+ def test_add_seq_using_seq_with_definition_method
749
+ seq = "atgc"
750
+ class <<seq
751
+ def definition
752
+ "this is the key"
753
+ end
754
+ end
755
+
756
+ alignment = Alignment.new
757
+ alignment.add_seq(seq)
758
+ assert_equal({"this is the key"=>"atgc"}, alignment.to_hash, "wrong hash")
759
+ end
760
+
761
+ def test_add_seq_using_seq_with_entry_id_method
762
+ seq = "atgc"
763
+ class <<seq
764
+ def entry_id
765
+ 271828
766
+ end
767
+ end
768
+
769
+ alignment = Alignment.new
770
+ alignment.add_seq(seq)
771
+ assert_equal({271828=>"atgc"}, alignment.to_hash, "wrong hash")
772
+ end
773
+
774
+ # Alignment#consensus_string
775
+
776
+ def test_consensus_string_no_gaps
777
+ alignment = build_na_alignment("agtcgattaa",
778
+ "tttcgatgcc")
779
+ assert_equal("??tcgat???", alignment.consensus_string)
780
+ end
781
+
782
+ def test_consensus_threshold_two_sequences
783
+ alignment = build_na_alignment("agtcgattaa",
784
+ "tttcgatgcc")
785
+ # the threshold is the fraction of sequences in which a symbol must
786
+ # occur at a given position to be considered the consensus symbol
787
+ assert_equal("agtcgattaa", alignment.consensus(0.5))
788
+ assert_equal("??tcgat???", alignment.consensus(0.500000001))
789
+ end
790
+
791
+ def test_consensus_threshold_four_sequences
792
+ alignment = build_na_alignment("agtg",
793
+ "ttag",
794
+ "actc",
795
+ "tatc")
796
+ # ties go to the symbol that occurs in the earliest sequence
797
+ assert_equal("agtg", alignment.consensus(0.25))
798
+ assert_equal("a?tg", alignment.consensus(0.26))
799
+ end
800
+
801
+ def test_consensus_opt_gap_mode
802
+ alignment = build_na_alignment("gt-gt-a",
803
+ "ttcggc-",
804
+ "ttcggc-")
805
+ # using threshold = 0.5, that is a symbol must occur >= half the time in order to be consensus
806
+ # gap_mode -1 means gaps are ignored
807
+ assert_equal("ttcggca", alignment.consensus(0.5, :gap_mode => -1), "gap mode -1")
808
+ # gap_mode 0 means gaps are treated like regular symbols, yielding a gap in the last position
809
+ assert_equal("ttcggc-", alignment.consensus(0.5, :gap_mode => 0), "gap mode 0")
810
+ # gap_mode 1 means gaps take precedence over any other symbol, yielding two more gaps
811
+ assert_equal("tt-gg--", alignment.consensus(0.5, :gap_mode => 1), "gap mode 1")
812
+ end
813
+
814
+ def test_consensus_opt_missing_char
815
+ alignment = build_na_alignment("agtcgattaa",
816
+ "tttcgatgcc")
817
+ assert_equal("**tcgat***", alignment.consensus(1, :missing_char => "*"))
818
+ end
819
+
820
+ # Alignment#consensus_iupac
821
+
822
+ def test_consensus_iupac_no_gaps
823
+ alignment = build_na_alignment("agtcgattaa", "tttcgatgcc")
824
+ assert_equal("wktcgatkmm", alignment.consensus_iupac)
825
+ end
826
+
827
+ def test_consensus_iupac_of_ambiguous_bases
828
+ alignment = build_na_alignment("tmrwsykvhdbnd", "uaaaccgaaacab")
829
+ assert_equal("tmrwsykvhdbnn", alignment.consensus_iupac)
830
+ end
831
+
832
+ def test_consensus_iupac_gap_modes
833
+ alignment = build_na_alignment("a-t", "acc")
834
+ # gap_mode -1 means gaps are ignored
835
+ assert_equal("acy", alignment.consensus_iupac(:gap_mode => -1))
836
+ # gap_mode 0 means gaps are treated as normal characters, yielding a missing symbol
837
+ assert_equal("a?y", alignment.consensus_iupac(:gap_mode => 0))
838
+ # gap_mode 1 means gaps take precedence over everything, yielding a gap
839
+ assert_equal("a-y", alignment.consensus_iupac(:gap_mode => 1))
840
+ end
841
+
842
+ def test_consensus_iupac_yields_correct_ambiguous_bases
843
+ assert_equal "t", build_na_alignment("t", "u").consensus_iupac # not really IUPAC
844
+
845
+ # m = a c
846
+ assert_equal "m", build_na_alignment("a", "c").consensus_iupac, "m #1"
847
+ assert_equal "m", build_na_alignment("m", "c").consensus_iupac, "m #2"
848
+ assert_equal "m", build_na_alignment("a", "m").consensus_iupac, "m #3"
849
+ assert_equal "m", build_na_alignment("m", "a", "c").consensus_iupac, "m #4"
850
+
851
+ # r = a g
852
+ assert_equal "r", build_na_alignment("a", "g").consensus_iupac, "r #1"
853
+ assert_equal "r", build_na_alignment("r", "g").consensus_iupac, "r #2"
854
+ assert_equal "r", build_na_alignment("a", "r").consensus_iupac, "r #3"
855
+ assert_equal "r", build_na_alignment("a", "r", "g").consensus_iupac, "r #4"
856
+
857
+ # w = a t/u
858
+ assert_equal "w", build_na_alignment("a", "t").consensus_iupac, "w #1"
859
+ assert_equal "w", build_na_alignment("a", "u").consensus_iupac, "w #2"
860
+ assert_equal "w", build_na_alignment("w", "a").consensus_iupac, "w #3"
861
+ assert_equal "w", build_na_alignment("t", "w").consensus_iupac, "w #4"
862
+ assert_equal "w", build_na_alignment("w", "u").consensus_iupac, "w #5"
863
+ assert_equal "w", build_na_alignment("u", "t", "a").consensus_iupac, "w #6"
864
+ assert_equal "w", build_na_alignment("w", "u", "t", "a").consensus_iupac, "w #7"
865
+
866
+ # s = c g
867
+ assert_equal "s", build_na_alignment("c", "g").consensus_iupac, "s #1"
868
+ assert_equal "s", build_na_alignment("s", "g").consensus_iupac, "s #2"
869
+ assert_equal "s", build_na_alignment("c", "s").consensus_iupac, "s #3"
870
+ assert_equal "s", build_na_alignment("c", "s", "g").consensus_iupac, "s #4"
871
+
872
+ # y = c t/u
873
+ assert_equal "y", build_na_alignment("c", "t").consensus_iupac, "y #1"
874
+ assert_equal "y", build_na_alignment("c", "u").consensus_iupac, "y #2"
875
+ assert_equal "y", build_na_alignment("y", "c").consensus_iupac, "y #3"
876
+ assert_equal "y", build_na_alignment("t", "y").consensus_iupac, "y #4"
877
+ assert_equal "y", build_na_alignment("y", "u").consensus_iupac, "y #5"
878
+ assert_equal "y", build_na_alignment("u", "t", "c").consensus_iupac, "y #6"
879
+ assert_equal "y", build_na_alignment("y", "u", "t", "c").consensus_iupac, "y #7"
880
+
881
+ # k = g t/u
882
+ assert_equal "k", build_na_alignment("g", "t").consensus_iupac, "k #1"
883
+ assert_equal "k", build_na_alignment("g", "u").consensus_iupac, "k #2"
884
+ assert_equal "k", build_na_alignment("k", "g").consensus_iupac, "k #3"
885
+ assert_equal "k", build_na_alignment("t", "k").consensus_iupac, "k #4"
886
+ assert_equal "k", build_na_alignment("k", "u").consensus_iupac, "k #5"
887
+ assert_equal "k", build_na_alignment("u", "t", "g").consensus_iupac, "k #6"
888
+ assert_equal "k", build_na_alignment("k", "u", "t", "g").consensus_iupac, "k #7"
889
+
890
+ # v = a c g m r s
891
+ assert_equal "v", build_na_alignment("a", "c", "g").consensus_iupac, "v #1"
892
+ assert_equal "v", build_na_alignment("g", "m").consensus_iupac, "v #2"
893
+ assert_equal "v", build_na_alignment("a", "s").consensus_iupac, "v #3"
894
+ assert_equal "v", build_na_alignment("c", "r").consensus_iupac, "v #4"
895
+ assert_equal "v", build_na_alignment("m", "s").consensus_iupac, "v #5"
896
+ assert_equal "v", build_na_alignment("m", "r").consensus_iupac, "v #6"
897
+ assert_equal "v", build_na_alignment("s", "r").consensus_iupac, "v #7"
898
+ assert_equal "v", build_na_alignment("s", "r", "m").consensus_iupac, "v #8"
899
+ assert_equal "v", build_na_alignment("s", "r", "m", "a", "c", "g").consensus_iupac, "v #9"
900
+ assert_equal "v", build_na_alignment("v", "g").consensus_iupac, "v #10" # alright, enough
901
+
902
+ # b = t/u c g s y k
903
+ assert_equal "b", build_na_alignment("t", "c", "g").consensus_iupac, "b #1"
904
+ assert_equal "b", build_na_alignment("g", "y").consensus_iupac, "b #2"
905
+ assert_equal "b", build_na_alignment("t", "s").consensus_iupac, "b #3"
906
+ assert_equal "b", build_na_alignment("c", "k").consensus_iupac, "b #4"
907
+ assert_equal "b", build_na_alignment("y", "s").consensus_iupac, "b #5"
908
+ assert_equal "b", build_na_alignment("y", "k").consensus_iupac, "b #6"
909
+ assert_equal "b", build_na_alignment("s", "k").consensus_iupac, "b #7"
910
+ assert_equal "b", build_na_alignment("s", "k", "y").consensus_iupac, "b #8"
911
+ assert_equal "b", build_na_alignment("s", "k", "y", "u", "c", "g").consensus_iupac, "b #9"
912
+ assert_equal "b", build_na_alignment("b", "g").consensus_iupac, "b #10"
913
+
914
+ # h = t/u c a y w m
915
+ assert_equal "h", build_na_alignment("t", "c", "a").consensus_iupac, "h #1"
916
+ assert_equal "h", build_na_alignment("a", "y").consensus_iupac, "h #2"
917
+ assert_equal "h", build_na_alignment("c", "w").consensus_iupac, "h #3"
918
+ assert_equal "h", build_na_alignment("u", "m").consensus_iupac, "h #4"
919
+ assert_equal "h", build_na_alignment("y", "w").consensus_iupac, "h #5"
920
+ assert_equal "h", build_na_alignment("y", "m").consensus_iupac, "h #6"
921
+ assert_equal "h", build_na_alignment("y", "w").consensus_iupac, "h #7"
922
+ assert_equal "h", build_na_alignment("w", "m", "y").consensus_iupac, "h #8"
923
+ assert_equal "h", build_na_alignment("w", "m", "y", "t", "c", "a").consensus_iupac, "h #9"
924
+ assert_equal "h", build_na_alignment("h", "t").consensus_iupac, "h #10"
925
+
926
+ # d = t/u g a r w k
927
+ assert_equal "d", build_na_alignment("t", "g", "a").consensus_iupac, "d #1"
928
+ assert_equal "d", build_na_alignment("r", "t").consensus_iupac, "d #2"
929
+ assert_equal "d", build_na_alignment("w", "g").consensus_iupac, "d #3"
930
+ assert_equal "d", build_na_alignment("k", "a").consensus_iupac, "d #4"
931
+ assert_equal "d", build_na_alignment("k", "r").consensus_iupac, "d #5"
932
+ assert_equal "d", build_na_alignment("k", "w").consensus_iupac, "d #6"
933
+ assert_equal "d", build_na_alignment("r", "w").consensus_iupac, "d #7"
934
+ assert_equal "d", build_na_alignment("r", "w", "k").consensus_iupac, "d #8"
935
+ assert_equal "d", build_na_alignment("k", "r", "w", "t", "g", "a").consensus_iupac, "d #9"
936
+ assert_equal "d", build_na_alignment("d", "t").consensus_iupac, "d #10"
937
+
938
+ # n = anything
939
+ assert_equal "n", build_na_alignment("a", "g", "c", "t").consensus_iupac, "n #1"
940
+ assert_equal "n", build_na_alignment("a", "g", "c", "u").consensus_iupac, "n #2"
941
+ assert_equal "n", build_na_alignment("w", "s").consensus_iupac, "n #3"
942
+ assert_equal "n", build_na_alignment("k", "m").consensus_iupac, "n #4"
943
+ assert_equal "n", build_na_alignment("r", "y").consensus_iupac, "n #5"
944
+ end
945
+
946
+ def test_consensus_iupac_missing_char
947
+ alignment = build_na_alignment("a??", "ac?")
948
+ assert_equal("a??", alignment.consensus_iupac())
949
+ end
950
+
951
+ def test_consensus_iupac_missing_char_option
952
+ alignment = build_na_alignment("a**t", "ac**")
953
+ assert_equal("a***", alignment.consensus_iupac(:missing_char => "*"))
954
+ end
955
+
956
+ # Alignment#convert_match
957
+
958
+ def test_convert_match
959
+ alignment = Alignment.new
960
+ alignment << Sequence::NA.new("agtcgattaa")
961
+ alignment << Sequence::NA.new("tttcgatgcc")
962
+ match = alignment.convert_match
963
+ assert_equal(alignment[0], match[0], "first sequence altered")
964
+ assert_equal("tt.....gcc", match[1], "wrong match")
965
+ end
966
+
967
+ # Alignment#convert_unmatch
968
+
969
+ def test_convert_unmatch
970
+ alignment = Alignment.new
971
+ alignment << Sequence::NA.new("agtcgattaa")
972
+ alignment << Sequence::NA.new("tt.....gcc")
973
+ unmatched = alignment.convert_unmatch
974
+ assert_equal("agtcgattaa", unmatched[0], "first changed")
975
+ assert_equal("tttcgatgcc", unmatched[1], "second wrong")
976
+ end
977
+
978
+ def test_convert_unmatch_multiple_sequences
979
+ alignment = Alignment.new
980
+ alignment << Sequence::NA.new("agtcgattaa")
981
+ alignment << Sequence::NA.new("tt.....gcc")
982
+ alignment << Sequence::NA.new("c...c..g.c")
983
+ unmatched = alignment.convert_unmatch
984
+ assert_equal("agtcgattaa", unmatched[0], "first changed")
985
+ assert_equal("tttcgatgcc", unmatched[1], "second wrong")
986
+ assert_equal("cgtccatgac", unmatched[2], "third wrong")
987
+ end
988
+
989
+ def test_convert_unmatch_different_length_sequences_truncates_seq_if_last_matched
990
+ alignment = Alignment.new
991
+ alignment << Sequence::NA.new("agtcgatta")
992
+ alignment << Sequence::NA.new("tt.....gc.")
993
+ unmatched = alignment.convert_unmatch
994
+ assert_equal("agtcgatta", unmatched[0], "first changed")
995
+ assert_equal("tttcgatgc", unmatched[1], "second wrong") #TODO: verify this is correct, and not . at end
996
+ end
997
+
998
+ def test_convert_unmatch_different_match_char
999
+ alignment = Alignment.new
1000
+ alignment << Sequence::NA.new("agtcga")
1001
+ alignment << Sequence::NA.new("tt====")
1002
+ unmatched = alignment.convert_unmatch('=')
1003
+ assert_equal("agtcga", unmatched[0], "first changed")
1004
+ assert_equal("tttcga", unmatched[1], "second wrong")
1005
+ end
1006
+
1007
+ # Alignment#match_line
1008
+
1009
+ def test_match_line_protein
1010
+ alignment = Alignment.new
1011
+ alignment << Sequence::AA.new("AELFMCF")
1012
+ alignment << Sequence::AA.new("AKLVNNF")
1013
+ assert_equal "*:*. *", alignment.match_line
1014
+ end
1015
+
1016
+ #TODO: lots more on the consensus, match, etc.
1017
+
1018
+ # Alignment#normalize
1019
+
1020
+ def test_normalizebang_extends_sequences_with_gaps
1021
+ alignment = build_na_alignment("a", "ag", "agc", "agct")
1022
+ alignment.normalize!
1023
+ assert_equal({0=>"a---",1=>"ag--",2=>"agc-",3=>"agct"}, alignment.to_hash)
1024
+ end
1025
+
1026
+ # Alignment#to_clustal
1027
+ end
1028
+ end