bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,87 @@
1
+ #
2
+ # test/unit/bio/io/test_ddbjxml.rb - Unit test for DDBJ XML.
3
+ #
4
+ # Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: test_ddbjxml.rb,v 1.1 2005/12/11 14:59:25 nakao Exp $
21
+ #
22
+
23
+ require 'pathname'
24
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
25
+ $:.unshift(libpath) unless $:.include?(libpath)
26
+
27
+
28
+ require 'test/unit'
29
+ require 'bio/io/ddbjxml'
30
+
31
+ module Bio
32
+
33
+ class TestDDBJXMLConstants < Test::Unit::TestCase
34
+
35
+ def test_constants
36
+ constants = ["DDBJ", "TxSearch", "ClustalW", "PML", "Gib", "Fasta", "BASE_URI", "SRS", "Gtop", "GetEntry", "Blast"].sort
37
+ assert_equal(constants, Bio::DDBJ::XML.constants.sort)
38
+ end
39
+
40
+ def test_base_url
41
+ assert_equal("http://xml.nig.ac.jp/wsdl/", Bio::DDBJ::XML::BASE_URI)
42
+ end
43
+
44
+ def test_blast_server_rul
45
+ assert_equal("http://xml.nig.ac.jp/wsdl/Blast.wsdl", Bio::DDBJ::XML::Blast::SERVER_URI)
46
+ end
47
+
48
+ def test_clustalw_server_url
49
+ assert_equal("http://xml.nig.ac.jp/wsdl/ClustalW.wsdl", Bio::DDBJ::XML::ClustalW::SERVER_URI)
50
+ end
51
+
52
+ def test_ddbj_server_url
53
+ assert_equal("http://xml.nig.ac.jp/wsdl/DDBJ.wsdl", Bio::DDBJ::XML::DDBJ::SERVER_URI)
54
+ end
55
+
56
+ def test_fasta_server_url
57
+ assert_equal("http://xml.nig.ac.jp/wsdl/Fasta.wsdl", Bio::DDBJ::XML::Fasta::SERVER_URI)
58
+ end
59
+
60
+ def test_getentry_server_url
61
+ assert_equal("http://xml.nig.ac.jp/wsdl/GetEntry.wsdl", Bio::DDBJ::XML::GetEntry::SERVER_URI)
62
+ end
63
+
64
+ def test_gib_server_url
65
+ assert_equal("http://xml.nig.ac.jp/wsdl/Gib.wsdl", Bio::DDBJ::XML::Gib::SERVER_URI)
66
+ end
67
+
68
+ def test_gtop_server_url
69
+ assert_equal("http://xml.nig.ac.jp/wsdl/Gtop.wsdl", Bio::DDBJ::XML::Gtop::SERVER_URI)
70
+ end
71
+
72
+ def test_pml_server_url
73
+ assert_equal("http://xml.nig.ac.jp/wsdl/PML.wsdl", Bio::DDBJ::XML::PML::SERVER_URI)
74
+ end
75
+
76
+ def test_srs_server_url
77
+ assert_equal("http://xml.nig.ac.jp/wsdl/SRS.wsdl", Bio::DDBJ::XML::SRS::SERVER_URI)
78
+ end
79
+
80
+ def test_txsearch_server_url
81
+ assert_equal("http://xml.nig.ac.jp/wsdl/TxSearch.wsdl", Bio::DDBJ::XML::TxSearch::SERVER_URI)
82
+ end
83
+
84
+ end
85
+
86
+
87
+ end
@@ -0,0 +1,45 @@
1
+ #
2
+ # test/unit/bio/io/test_soapwsdl.rb - Unit test for SOAP/WSDL
3
+ #
4
+ # Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: test_soapwsdl.rb,v 1.1 2005/12/18 17:09:53 nakao Exp $
21
+ #
22
+
23
+ require 'pathname'
24
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
25
+ $:.unshift(libpath) unless $:.include?(libpath)
26
+
27
+
28
+ require 'test/unit'
29
+ require 'bio/io/soapwsdl'
30
+
31
+ module Bio
32
+
33
+ class TestSOAPWSDL < Test::Unit::TestCase
34
+
35
+ def setup
36
+ @obj = Bio::SOAPWSDL
37
+ end
38
+
39
+ def test_methods
40
+ methods = ['wsdl', 'wsdl=', 'log', 'log=']
41
+ assert_equal(methods.sort, (@obj.instance_methods - Object.methods).sort)
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,175 @@
1
+ #
2
+ # test/unit/bio/shell/plugin/test_seq.rb - Unit test for Bio::Shell plugin for biological sequence manipulations
3
+ #
4
+ # Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # $Id: test_seq.rb,v 1.5 2005/12/19 02:44:03 k Exp $
21
+ #
22
+
23
+ require 'pathname'
24
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
25
+ $:.unshift(libpath) unless $:.include?(libpath)
26
+
27
+ require 'test/unit'
28
+ require 'bioruby'
29
+
30
+ module Bio
31
+ class TestShellPluginSeq < Test::Unit::TestCase
32
+
33
+ # include Bio::Shell
34
+ # Bio::Shell.instance_variable_set :@config, {}
35
+
36
+ def test_naseq
37
+ str = 'ACGT'
38
+ assert_equal(Bio::Sequence::NA, seq(str).class)
39
+ assert_equal(Bio::Sequence::NA.new(str), seq(str))
40
+ assert_equal('acgt', seq(str))
41
+ end
42
+
43
+ def test_aaseq
44
+ str = 'WD'
45
+ assert_equal(Bio::Sequence::AA, seq(str).class)
46
+ assert_equal(Bio::Sequence::AA.new('WD'), seq(str))
47
+ assert_equal('WD', seq(str))
48
+ end
49
+
50
+ def test_na_seqstat
51
+ naseq = 'atgcatgcatgc'
52
+ output =<<END
53
+
54
+ * * * Sequence statistics * * *
55
+
56
+ 5'->3' sequence : atgcatgcatgc
57
+ 3'->5' sequence : gcatgcatgcat
58
+ Translation 1 : MHAC
59
+ Translation 2 : CMH
60
+ Translation 3 : ACM
61
+ Translation -1 : ACMH
62
+ Translation -2 : HAC
63
+ Translation -3 : MHA
64
+ Length : 12 bp
65
+ GC percent : 50 %
66
+ Composition : a - 3 ( 25.00 %)
67
+ c - 3 ( 25.00 %)
68
+ g - 3 ( 25.00 %)
69
+ t - 3 ( 25.00 %)
70
+ Codon usage :
71
+
72
+ *---------------------------------------------*
73
+ | | 2nd | |
74
+ | 1st |-------------------------------| 3rd |
75
+ | | U | C | A | G | |
76
+ |-------+-------+-------+-------+-------+-----|
77
+ | U U |F 0.0%|S 0.0%|Y 0.0%|C 0.0%| u |
78
+ | U U |F 0.0%|S 0.0%|Y 0.0%|C 25.0%| c |
79
+ | U U |L 0.0%|S 0.0%|* 0.0%|* 0.0%| a |
80
+ | UUU |L 0.0%|S 0.0%|* 0.0%|W 0.0%| g |
81
+ |-------+-------+-------+-------+-------+-----|
82
+ | CCCC |L 0.0%|P 0.0%|H 25.0%|R 0.0%| u |
83
+ | C |L 0.0%|P 0.0%|H 0.0%|R 0.0%| c |
84
+ | C |L 0.0%|P 0.0%|Q 0.0%|R 0.0%| a |
85
+ | CCCC |L 0.0%|P 0.0%|Q 0.0%|R 0.0%| g |
86
+ |-------+-------+-------+-------+-------+-----|
87
+ | A |I 0.0%|T 0.0%|N 0.0%|S 0.0%| u |
88
+ | A A |I 0.0%|T 0.0%|N 0.0%|S 0.0%| c |
89
+ | AAAAA |I 0.0%|T 0.0%|K 0.0%|R 0.0%| a |
90
+ | A A |M 25.0%|T 0.0%|K 0.0%|R 0.0%| g |
91
+ |-------+-------+-------+-------+-------+-----|
92
+ | GGGG |V 0.0%|A 0.0%|D 0.0%|G 0.0%| u |
93
+ | G |V 0.0%|A 0.0%|D 0.0%|G 0.0%| c |
94
+ | G GGG |V 0.0%|A 25.0%|E 0.0%|G 0.0%| a |
95
+ | GG G |V 0.0%|A 0.0%|E 0.0%|G 0.0%| g |
96
+ *---------------------------------------------*
97
+
98
+ Molecular weight : 3701.61444
99
+ Protein weight : 460.565
100
+ //
101
+ END
102
+ $str = ''
103
+ alias puts_orig puts
104
+ def puts(*args)
105
+ args.each do |obj|
106
+ $str << obj.to_s
107
+ end
108
+ end
109
+ seqstat(naseq)
110
+ undef puts
111
+ alias puts puts_orig
112
+ assert_equal(output, $str)
113
+ end
114
+
115
+ def test_aa_seqstat
116
+ aaseq = 'WD'
117
+ output =<<END
118
+
119
+ * * * Sequence statistics * * *
120
+
121
+ N->C sequence : WD
122
+ Length : 2 aa
123
+ Composition : D Asp - 1 ( 50.00 %) aspartic acid
124
+ W Trp - 1 ( 50.00 %) tryptophan
125
+ Protein weight : 319.315
126
+ //
127
+ END
128
+ $str = ''
129
+ alias puts_orig puts
130
+ def puts(*args)
131
+ args.each do |obj|
132
+ $str << obj.to_s
133
+ end
134
+ end
135
+ seqstat(aaseq)
136
+ undef puts
137
+ alias puts puts_orig
138
+ assert_equal(output, $str)
139
+ end
140
+
141
+ def test_doublehelix
142
+ seq = 'ACGTACGTACGTACGT'
143
+ output = <<END
144
+ at
145
+ c--g
146
+ g---c
147
+ t----a
148
+ a----t
149
+ c---g
150
+ g--c
151
+ ta
152
+ ta
153
+ g--c
154
+ c---g
155
+ a----t
156
+ t----a
157
+ g---c
158
+ c--g
159
+ at
160
+ END
161
+ $str = ''
162
+ alias puts_orig puts
163
+ def puts(*args)
164
+ args.each do |obj|
165
+ $str << obj.to_s
166
+ end
167
+ end
168
+ doublehelix(seq)
169
+ undef puts
170
+ alias puts puts_orig
171
+ assert_equal(output, $str)
172
+ end
173
+
174
+ end
175
+ end
@@ -0,0 +1,1028 @@
1
+ #
2
+ # test/unit/bio/test_alignment.rb - Unit test for Bio::Alignment
3
+ #
4
+ # Copyright (C) 2004 Moses Hohman <mmhohman@northwestern.edu>
5
+ # 2005 Naohisa Goto <ng@bioruby.org>
6
+ #
7
+ # This library is free software; you can redistribute it and/or
8
+ # modify it under the terms of the GNU Lesser General Public
9
+ # License as published by the Free Software Foundation; either
10
+ # version 2 of the License, or (at your option) any later version.
11
+ #
12
+ # This library is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ # Lesser General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Lesser General Public
18
+ # License along with this library; if not, write to the Free Software
19
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ #
21
+ # $Id: test_alignment.rb,v 1.6 2005/12/02 13:01:49 ngoto Exp $
22
+ #
23
+
24
+ require 'pathname'
25
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'lib')).cleanpath.to_s
26
+ $:.unshift(libpath) unless $:.include?(libpath)
27
+
28
+ require 'test/unit'
29
+ require 'bio/alignment'
30
+
31
+ module Bio
32
+
33
+ class TestAlignmentPropertyMethods < Test::Unit::TestCase
34
+
35
+ def setup
36
+ @obj = Object.new
37
+ @obj.extend(Alignment::PropertyMethods)
38
+ end
39
+
40
+ def test_is_gap_default_false
41
+ assert_equal(false, @obj.is_gap?('a'), "\"a\" isn't a gap")
42
+ end
43
+
44
+ def test_is_gap_default_true
45
+ assert_equal(true, @obj.is_gap?('-'), '"-" is a gap')
46
+ end
47
+
48
+ def test_gap_regexp
49
+ assert_not_nil(@obj.gap_regexp)
50
+ end
51
+
52
+ def test_gap_regexp_never_nil
53
+ @obj.gap_regexp = nil
54
+ assert_not_nil(@obj.gap_regexp)
55
+ end
56
+
57
+ def test_gap_regexp=()
58
+ @obj.gap_regexp = /[^a-zA-Z0-9]/
59
+ assert_equal(/[^a-zA-Z0-9]/, @obj.gap_regexp)
60
+ end
61
+
62
+ def test_is_gap_nodefault_false
63
+ @obj.gap_regexp = /[^a-zA-Z0-9]/
64
+ assert_equal(false, @obj.is_gap?('3'))
65
+ end
66
+
67
+ def test_is_gap_nodefault_true
68
+ @obj.gap_regexp = /[^atgc]/
69
+ assert_equal(true, @obj.is_gap?('z'))
70
+ end
71
+
72
+ def test_gap_char_default
73
+ assert_not_nil(@obj.gap_char)
74
+ end
75
+
76
+ def test_gap_char_never_nil
77
+ @obj.gap_char = nil
78
+ assert_not_nil(@obj.gap_char)
79
+ end
80
+
81
+ def test_gap_char=()
82
+ @obj.gap_char = '#'
83
+ assert_equal('#', @obj.gap_char)
84
+ end
85
+
86
+ def test_missing_char_default
87
+ assert_not_nil(@obj.missing_char)
88
+ end
89
+
90
+ def test_missing_char_never_nil
91
+ @obj.missing_char = nil
92
+ assert_not_nil(@obj.missing_char)
93
+ end
94
+
95
+ def test_missing_char=()
96
+ @obj.missing_char = '_'
97
+ assert_equal('_', @obj.missing_char)
98
+ end
99
+
100
+ def test_seqclass_default
101
+ assert_not_nil(@obj.seqclass)
102
+ end
103
+
104
+ def test_seqclass_never_nil
105
+ @obj.seqclass = nil
106
+ assert_not_nil(@obj.seqclass)
107
+ end
108
+
109
+ def test_seqclass=()
110
+ @obj.seqclass = Sequence::NA
111
+ assert_equal(Sequence::NA, @obj.seqclass)
112
+ end
113
+
114
+ def test_get_all_property_default
115
+ assert_equal({}, @obj.get_all_property)
116
+ end
117
+
118
+ def test_get_all_property_nodefault
119
+ @obj.gap_regexp = /[^acgt]/
120
+ @obj.gap_char = '#'
121
+ @obj.missing_char = '_'
122
+ @obj.seqclass = Sequence::NA
123
+ assert_equal({ :gap_regexp => /[^acgt]/,
124
+ :gap_char => '#',
125
+ :missing_char => '_',
126
+ :seqclass => Sequence::NA },
127
+ @obj.get_all_property)
128
+ end
129
+
130
+ def test_set_all_property
131
+ h = { :gap_regexp => /[^acgt]/,
132
+ :gap_char => '#',
133
+ :missing_char => '_',
134
+ :seqclass => Sequence::NA }
135
+ @obj.set_all_property(h)
136
+ assert_equal(h, @obj.get_all_property)
137
+ end
138
+ end #class TestAlignmentPropertyMethods
139
+
140
+ # This is a unit test of Bio::Alignment::Site class and
141
+ # Bio::Alignment::SiteMethods module.
142
+ # Since Bio::Alignment::Site includes Bio::Alignment::SiteMethods,
143
+ # we can test both at a time.
144
+ class TestAlignmentSite < Test::Unit::TestCase
145
+
146
+ def test_has_gap_true
147
+ site = Alignment::Site[ 'a', '-', 'c', 'g', 't' ]
148
+ assert_equal(true, site.has_gap?)
149
+ end
150
+
151
+ def test_has_gap_false
152
+ site = Alignment::Site[ 'a', 'c', 'g', 't' ]
153
+ assert_equal(false, site.has_gap?)
154
+ end
155
+
156
+ def test_remove_gaps!
157
+ site = Alignment::Site[ 'a', '-', 'c', '-' ]
158
+ assert_equal(Alignment::Site['a', 'c'], site.remove_gaps!)
159
+ end
160
+
161
+ def test_remove_gaps_bang_not_removed
162
+ site = Alignment::Site[ 'a', 'c']
163
+ assert_equal(nil, site.remove_gaps!)
164
+ end
165
+
166
+ def test_consensus_string_default
167
+ site = Alignment::Site[ 'a', 'a', 'a', 'a']
168
+ assert_equal('a', site.consensus_string)
169
+ end
170
+
171
+ def test_consensus_string_default_nil
172
+ site = Alignment::Site[ 'a', 'a', 'a', 'c']
173
+ assert_nil(site.consensus_string)
174
+ end
175
+
176
+ def test_consensus_string_50percent
177
+ site = Alignment::Site[ 'a', 'a', 'c', 'g']
178
+ assert_equal('a', site.consensus_string(0.5))
179
+ end
180
+
181
+ def test_consensus_string_50percent_nil
182
+ site = Alignment::Site[ 'a', 'c', 'g', 't']
183
+ assert_nil(site.consensus_string(0.5))
184
+ end
185
+
186
+ def test_consensus_iupac
187
+ data = {
188
+ 'a' => [ 'a' ],
189
+ 'c' => [ 'c' ],
190
+ 'g' => [ 'g' ],
191
+ 't' => [ 't' ],
192
+ 't' => [ 't', 'u' ],
193
+ 'm' => [ 'a', 'c' ],
194
+ 'r' => [ 'a', 'g' ],
195
+ 'w' => [ 'a', 't' ],
196
+ 's' => [ 'c', 'g' ],
197
+ 'y' => [ 'c', 't' ],
198
+ 'k' => [ 'g', 't' ],
199
+ 'v' => [ 'a', 'c', 'g' ],
200
+ 'h' => [ 'a', 'c', 't' ],
201
+ 'd' => [ 'a', 'g', 't' ],
202
+ 'b' => [ 'c', 'g', 't' ],
203
+ 'n' => [ 'a', 'c', 'g', 't' ],
204
+ nil => [ 'z', 'a' ]
205
+ }
206
+ data.each do |cons, testdata|
207
+ site = Alignment::Site[ *testdata ]
208
+ assert_equal(cons, site.consensus_iupac,
209
+ "IUPAC consensus of #{testdata.join(',')} is #{cons}")
210
+ end
211
+ end
212
+
213
+ def test_match_line_amino_missing
214
+ site = Alignment::Site[ 'P', 'Q', 'R', 'S' ]
215
+ assert_equal(' ', site.match_line_amino)
216
+ end
217
+
218
+ def test_match_line_amino_100percent
219
+ site = Alignment::Site[ 'M', 'M', 'M', 'M' ]
220
+ assert_equal('*', site.match_line_amino)
221
+ end
222
+
223
+ def test_match_line_amino_strong
224
+ site = Alignment::Site[ 'N', 'E', 'Q', 'K' ]
225
+ assert_equal(':', site.match_line_amino)
226
+ end
227
+
228
+ def test_match_line_amino_weak
229
+ site = Alignment::Site[ 'S', 'G', 'N', 'D' ]
230
+ assert_equal('.', site.match_line_amino)
231
+ end
232
+
233
+ def test_match_line_nuc_missing
234
+ site = Alignment::Site[ 'A', 'C', 'G', 'T' ]
235
+ assert_equal(' ', site.match_line_nuc)
236
+ end
237
+
238
+ def test_match_line_nuc_100percent
239
+ site = Alignment::Site[ 'G', 'G', 'G', 'G' ]
240
+ assert_equal('*', site.match_line_nuc)
241
+ end
242
+ end #class TestAlignmentSite
243
+
244
+ # This is sample class for testing Bio::Alignment::EnumerableExtension.
245
+ class A < Array
246
+ include Alignment::EnumerableExtension
247
+ end
248
+
249
+ class TestAlignmentEnumerableExtension < Test::Unit::TestCase
250
+ def test_each_seq
251
+ expected_results = [ 'atg', 'aag', 'acg' ]
252
+ a = A[ *expected_results ]
253
+ a.each_seq do |x|
254
+ assert_equal(expected_results.shift, x)
255
+ end
256
+ assert(expected_results.empty?)
257
+ end
258
+
259
+ def test_seqclass_default
260
+ a = A.new
261
+ assert_equal(String, a.seqclass)
262
+ end
263
+
264
+ def test_seqclass
265
+ a = A[ Bio::Sequence::NA.new('atg') ]
266
+ assert_equal(Bio::Sequence::NA, a.seqclass)
267
+ end
268
+
269
+ def test_seqclass=()
270
+ a = A.new
271
+ assert_equal(String, a.seqclass)
272
+ a << Bio::Sequence::NA.new('a')
273
+ assert_equal(Bio::Sequence::NA, a.seqclass)
274
+ a.seqclass = Bio::Sequence::AA
275
+ assert_equal(Bio::Sequence::AA, a.seqclass)
276
+ end
277
+
278
+ def test_alignment_length
279
+ a = A[ 'a', 'at', 'atgc', 'atg', '' ]
280
+ assert_equal(4, a.alignment_length)
281
+ end
282
+
283
+ def test_private_alignment_site
284
+ a = A[ 'a', 'at', 'atgc', 'atg', '' ]
285
+ assert_equal(Alignment::Site[ '-', 't', 't', 't', '-' ],
286
+ a.instance_eval { _alignment_site(1) })
287
+ end
288
+
289
+ def test_alignment_site
290
+ a = A[ 'a', 'at', 'atgc', 'atg', '' ]
291
+ assert_equal(Alignment::Site[ '-', 't', 't', 't', '-' ],
292
+ a.__send__(:_alignment_site, 1))
293
+ end
294
+
295
+ def test_each_site
296
+ expected_results = [
297
+ Alignment::Site[ 'a', 'a', 'a', 'a', '-' ],
298
+ Alignment::Site[ '-', 't', 't', 't', '-' ],
299
+ Alignment::Site[ '-', '-', 'g', 'g', '-' ],
300
+ Alignment::Site[ '-', '-', 'c', '-', '-' ]
301
+ ]
302
+ a = A[ 'a', 'at', 'atgc', 'atg', '' ]
303
+ a.each_site do |site|
304
+ assert_equal(expected_results.shift, site)
305
+ end
306
+ assert(expected_results.empty?)
307
+ end
308
+
309
+ def test_each_site_step
310
+ expected_results = [
311
+ Alignment::Site[ '-', 't', 't', 't', '-' ], # site 1
312
+ Alignment::Site[ '-', 'a', 'g', 't', '-' ], # site 3
313
+ ]
314
+ a = A[ 'a', 'atgatc', 'atggcc', 'atgtga', '' ]
315
+ a.each_site_step(1, 4, 2) do |site|
316
+ assert_equal(expected_results.shift, site)
317
+ end
318
+ assert(expected_results.empty?)
319
+ end
320
+
321
+ def test_alignment_collect
322
+ a = A[ 'a', 'at', 'atgc', 'atg', '' ]
323
+ assert_equal(Alignment::SequenceArray[ 'a', 'au', 'augc', 'aug', '' ],
324
+ a.alignment_collect { |x| x.gsub(/t/, 'u') })
325
+ end
326
+
327
+ def test_alignment_window
328
+ a = A[ 'a', 'at', 'atgca', 'atg', '' ]
329
+ assert_equal(Alignment::SequenceArray[ '', 't', 'tgc', 'tg', '' ],
330
+ a.alignment_window(1, 3))
331
+ end
332
+
333
+ def test_each_window
334
+ expected_results = [
335
+ Alignment::SequenceArray[ 'atg', 'tcg', '' ], # 0..2
336
+ Alignment::SequenceArray[ 'gca', 'gat', '' ], # 2..4
337
+ Alignment::SequenceArray[ 'atg', 'tgc', '' ], # 4..6
338
+ Alignment::SequenceArray[ 'c', 'a', '' ] # 7..7
339
+ ]
340
+ a = A[ 'atgcatgc', 'tcgatgca', '' ]
341
+ r = a.each_window(3, 2) do |x|
342
+ assert_equal(expected_results.shift, x)
343
+ end
344
+ assert_equal(expected_results.shift, r)
345
+ assert(expected_results.empty?)
346
+ end
347
+
348
+ def test_collect_each_site
349
+ a = A[ 'a', 'at', 'atgc', 'atg', '' ]
350
+ assert_equal(["aaaa-", "-ttt-", "--gg-", "--c--" ],
351
+ a.collect_each_site { |x| x.join('') })
352
+ end
353
+
354
+ def test_consensus_each_site_default
355
+ expected_results = [
356
+ Alignment::Site[ 'a', 'a', 'a', 'a', 'a' ],
357
+ Alignment::Site[ 'a', 'c', 'g', 't', '-' ]
358
+ ]
359
+
360
+ a = A[ 'aa', 'ac', 'ag', 'at', 'a-' ]
361
+ result = a.consensus_each_site do |site|
362
+ assert_equal(expected_results.shift, site)
363
+ 'x'
364
+ end
365
+ assert_equal('xx', result)
366
+ assert(expected_results.empty?)
367
+ end
368
+
369
+ def test_consensus_each_site_gap_mode_1
370
+ expected_results = [
371
+ Alignment::Site[ 'a', 'a', 'a', 'a', 'a' ]
372
+ ]
373
+
374
+ a = A[ 'aa', 'ac', 'ag', 'at', 'a-' ]
375
+ result = a.consensus_each_site(:gap_mode => 1) do |site|
376
+ assert_equal(expected_results.shift, site)
377
+ 'x'
378
+ end
379
+ assert_equal('x-', result)
380
+ assert(expected_results.empty?)
381
+ end
382
+
383
+ def test_consensus_each_site_gap_mode_minus1
384
+ expected_results = [
385
+ Alignment::Site[ 'a', 'a', 'a', 'a', 'a' ],
386
+ Alignment::Site[ 'a', 'c', 'g', 't' ]
387
+ ]
388
+
389
+ a = A[ 'aa', 'ac', 'ag', 'at', 'a-' ]
390
+ result = a.consensus_each_site(:gap_mode => -1) do |site|
391
+ assert_equal(expected_results.shift, site)
392
+ 'x'
393
+ end
394
+ assert_equal('xx', result)
395
+ assert(expected_results.empty?)
396
+ end
397
+
398
+ def test_consensus_string_default
399
+ a = A[ 'ata', 'aac', 'aag', 'aat' ]
400
+ assert_equal('a??', a.consensus_string)
401
+ end
402
+
403
+ def test_consensus_string_half
404
+ a = A[ 'ata', 'aac', 'aag', 'aat' ]
405
+ assert_equal('aa?', a.consensus_string(0.5))
406
+ end
407
+
408
+ def test_consensus_iupac
409
+ a = A[
410
+ 'acgtaaaccgaaacaz',
411
+ 'acgtaaaccgccggcz',
412
+ 'acgtcgtgttgtttgz',
413
+ 'acgtcgtgttaaactz'
414
+ ]
415
+ assert_equal('acgtmrwsykvhdbn?', a.consensus_iupac)
416
+ end
417
+
418
+ def test_match_line_amino
419
+ a = A[
420
+ 'M-SNNNQMMHF-CASSSSSNNFH-AW',
421
+ 'M-TEHDHIIYY-STATTGNDEVF-FW',
422
+ 'M-AQQERLLHW-AVGNPNDEQLY-HW',
423
+ 'M-SKKQKVFYF-CASKADEQHIH-LW',
424
+ 'M-TNNNQMMHY-STASSSQHRMF-QW',
425
+ 'M-AEHDHIIYW-AVGTTGKKKFY-YW'
426
+ #* ::::::::: ........... *
427
+ ]
428
+ assert_equal('* ::::::::: ........... *', a.match_line_amino)
429
+ end
430
+
431
+ def test_match_line_nuc
432
+ a = A[ 'aaa', 'aa-','aac', 'at-' ]
433
+ assert_equal('* ', a.match_line_nuc)
434
+ end
435
+
436
+ def test_match_line
437
+ a = A[
438
+ Sequence::AA.new('MNSA'),
439
+ Sequence::AA.new('MHTL'),
440
+ Sequence::AA.new('MQNV'),
441
+ Sequence::AA.new('MKKW'),
442
+ ]
443
+ assert_equal('*:. ', a.match_line)
444
+ assert_equal('*:. ', a.match_line(:type => :aa))
445
+ assert_equal('* ', a.match_line(:type => :na))
446
+ end
447
+
448
+ def test_convert_match
449
+ a = A[
450
+ 'aaaa',
451
+ 'accc',
452
+ 'acac',
453
+ 'actc'
454
+ ]
455
+ a.convert_match
456
+ assert_equal(A[ 'aaaa', '.ccc', '.c.c', '.ctc' ], a)
457
+ end
458
+
459
+ def test_convert_unmatch
460
+ a = A[ 'aaaa', '.ccc', '.c.c', '.ctc' ]
461
+ a.convert_unmatch
462
+ assert_equal(A[ 'aaaa', 'accc', 'acac', 'actc' ], a)
463
+ end
464
+
465
+ def test_alignment_normalize!
466
+ a = A[ 'a', 'atg', 'atgc', '' ]
467
+ a.alignment_normalize!
468
+ assert_equal(A[ 'a---', 'atg-', 'atgc', '----'], a)
469
+ end
470
+
471
+ def test_alignment_rstrip!
472
+ a = A[ '--aaa--', '--t-t--', '---g---', '--t' ]
473
+ assert(a.alignment_rstrip!)
474
+ assert_equal(A[ '--aaa', '--t-t', '---g-', '--t' ], a)
475
+ end
476
+
477
+ def test_alignment_rstrip_nil
478
+ a = A[ 'aa', '-a', 'a-' ]
479
+ assert_nil(a.alignment_rstrip!)
480
+ assert_equal(A[ 'aa', '-a', 'a-' ], a)
481
+ end
482
+
483
+ def test_alignment_lstrip!
484
+ a = A[ '--aaa--', '--t-t--', '---g---', '--t' ]
485
+ assert(a.alignment_lstrip!)
486
+ assert_equal(A[ 'aaa--', 't-t--', '-g---', 't' ], a)
487
+ end
488
+
489
+ def test_alignment_lstrip_nil
490
+ a = A[ 'aa', '-a', 'a-' ]
491
+ assert_nil(a.alignment_lstrip!)
492
+ assert_equal(A[ 'aa', '-a', 'a-' ], a)
493
+ end
494
+
495
+ def test_alignment_strip!
496
+ a = A[ '--aaa--', '--t-t--', '---g---', '--t' ]
497
+ assert(a.alignment_strip!)
498
+ assert_equal(A[ 'aaa', 't-t', '-g-', 't' ], a)
499
+ end
500
+
501
+ def test_alignment_strip_nil
502
+ a = A[ 'aa', '-a', 'a-' ]
503
+ assert_nil(a.alignment_strip!)
504
+ assert_equal(A[ 'aa', '-a', 'a-' ], a)
505
+ end
506
+
507
+ def test_remove_all_gaps!
508
+ a = A[ '--aaa--', '--t-t--', '---g---', '--t' ]
509
+ assert(a.remove_all_gaps!)
510
+ assert_equal(A[ 'aaa', 'tt', 'g', 't' ], a)
511
+ end
512
+
513
+ # test of alignment_slice.
514
+ # Please also refer alignment_window.
515
+ def test_alignment_slice
516
+ a = A[ 'a', 'at', 'atgca', 'atg', '' ]
517
+ assert_equal(Alignment::SequenceArray[ '', 't', 'tgc', 'tg', nil ],
518
+ a.alignment_slice(1, 3))
519
+ end
520
+
521
+ def test_alignment_subseq
522
+ a = A[ Sequence.new('a'), Sequence.new('at'), Sequence.new('atgca'),
523
+ Sequence.new('atg'), Sequence.new('') ]
524
+ assert_equal(Alignment::SequenceArray[ Sequence.new(''),
525
+ Sequence.new('t'), Sequence.new('tgc'),
526
+ Sequence.new('tg'), nil ],
527
+ a.alignment_subseq(2,4))
528
+ end
529
+
530
+ def test_alignment_concat
531
+ a = A[ 'aaa', 'c', 'gg', 't' ]
532
+ a.alignment_concat(A[ 'ttt', 'gg', 'aa', 'cc', 'aa' ])
533
+ assert_equal(A[ 'aaattt', 'cgg', 'ggaa', 'tcc' ], a)
534
+ a.alignment_concat([ 'c', 't' ])
535
+ assert_equal(A[ 'aaatttc', 'cggt', 'ggaa', 'tcc' ], a)
536
+ end
537
+ end #class TestAlignmentEnumerableExtension
538
+
539
+ class TestAlignmentClustalWFormatter < Test::Unit::TestCase
540
+ def setup
541
+ @obj = Object.new
542
+ @obj.extend(Alignment::ClustalWFormatter)
543
+ end
544
+
545
+ def test_have_same_name_true
546
+ assert_equal([ 0, 1 ], @obj.instance_eval {
547
+ have_same_name?([ 'ATP ATG', 'ATP ATA', 'BBB' ]) })
548
+ end
549
+
550
+ def test_have_same_name_false
551
+ assert_equal(false, @obj.instance_eval {
552
+ have_same_name?([ 'GTP ATG', 'ATP ATA', 'BBB' ]) })
553
+ end
554
+
555
+ def test_avoid_same_name
556
+ assert_equal([ 'ATP_ATG', 'ATP_ATA', 'BBB' ],
557
+ @obj.instance_eval {
558
+ avoid_same_name([ 'ATP ATG', 'ATP ATA', 'BBB' ]) })
559
+ end
560
+ def test_avoid_same_name_numbering
561
+ assert_equal([ '0_ATP', '1_ATP', '2_BBB' ],
562
+ @obj.instance_eval {
563
+ avoid_same_name([ 'ATP', 'ATP', 'BBB' ]) })
564
+ end
565
+
566
+ end #class TestAlignmentClustalWFormatter
567
+
568
+
569
+ class TestAlignment < Test::Unit::TestCase
570
+
571
+ # testing helper method
572
+ def build_na_alignment(*sequences)
573
+ sequences.inject(Alignment.new) { |alignment, sequence| alignment << Sequence::NA.new(sequence) }
574
+ end
575
+ private :build_na_alignment
576
+
577
+ def test_equals
578
+ alignment1 = Alignment.new([Sequence::NA.new("agct"), Sequence::NA.new("tagc")])
579
+ alignment2 = Alignment.new([Sequence::NA.new("agct"), Sequence::NA.new("tagc")])
580
+ assert_equal(alignment1, alignment2)
581
+ end
582
+
583
+ # Alignment#store
584
+
585
+ def test_store_cannot_override_key
586
+ alignment = Alignment.new
587
+ alignment.store("Cat DNA", Sequence::NA.new("cat"))
588
+ alignment.store("Cat DNA", Sequence::NA.new("gcat"))
589
+ assert_equal("cat", alignment["Cat DNA"])
590
+ end
591
+
592
+ def test_store_with_nil_key_uses_next_number_for_key
593
+ alignment = Alignment.new
594
+ alignment.store(nil, Sequence::NA.new("cat"))
595
+ alignment.store(nil, Sequence::NA.new("gat"))
596
+ alignment.store(nil, Sequence::NA.new("tat"))
597
+ assert_equal({0=>"cat",1=>"gat",2=>"tat"}, alignment.to_hash)
598
+ end
599
+
600
+ def test_store_with_default_keys_and_user_defined_keys
601
+ alignment = Alignment.new
602
+ alignment.store("cat key", Sequence::NA.new("cat"))
603
+ alignment.store(nil, Sequence::NA.new("cag"))
604
+ alignment.store("gat key", Sequence::NA.new("gat"))
605
+ alignment.store(nil, Sequence::NA.new("gag"))
606
+ assert_equal({"gat key"=>"gat",1=>"cag",3=>"gag","cat key"=>"cat"}, alignment.to_hash)
607
+ end
608
+
609
+ # Test append operator
610
+
611
+ def test_seqclass_when_sequence_used
612
+ alignment = Alignment.new
613
+ alignment << Sequence::NA.new("cat")
614
+ assert_equal({0=>"cat"}, alignment.to_hash)
615
+ end
616
+
617
+ # Test seqclass
618
+
619
+ def test_seqclass_when_sequence_used_no_seqclass_set
620
+ alignment = Alignment.new
621
+ alignment << Sequence::NA.new("cat")
622
+ assert_equal(Sequence::NA, alignment.seqclass)
623
+ end
624
+
625
+ def test_seqclass_String_seq_not_present_no_seqclass_set
626
+ alignment = Alignment.new
627
+ alignment << nil
628
+ assert_equal(String, alignment.seqclass)
629
+ end
630
+
631
+ def test_seqclass_when_seqclass_set
632
+ alignment = Alignment.new
633
+ alignment.seqclass = Fixnum
634
+ alignment << "this doesn't really make sense"
635
+ assert_equal(Fixnum, alignment.seqclass)
636
+ end
637
+
638
+ # Alignment#gap_char
639
+
640
+ def test_default_gap_char
641
+ alignment = Alignment.new
642
+ assert_equal("-", alignment.gap_char)
643
+ end
644
+
645
+ def test_set_and_get_gap_char
646
+ alignment = Alignment.new
647
+ alignment.gap_char = "+"
648
+ assert_equal("+", alignment.gap_char)
649
+ end
650
+
651
+ # Alignment#gap_regexp
652
+
653
+ def test_default_gap_regexp_matches_default_gap_char
654
+ alignment = Alignment.new
655
+ assert(alignment.gap_regexp.match(alignment.gap_char))
656
+ end
657
+
658
+ # Alignment#missing_char
659
+
660
+ def test_default_missing_char
661
+ alignment = Alignment.new
662
+ assert_equal("?", alignment.missing_char)
663
+ end
664
+
665
+ # Alignment#seq_length
666
+
667
+ def test_seq_length_when_one_sequence
668
+ alignment = build_na_alignment("agt")
669
+ assert_equal(3, alignment.seq_length)
670
+ end
671
+
672
+ def test_seq_length_is_max_seq_length
673
+ alignment = build_na_alignment("agt", "agtaa", "agta")
674
+ assert_equal(5, alignment.seq_length)
675
+ end
676
+
677
+ # Alignment#each_site
678
+
679
+ def test_each_site_equal_length
680
+ alignment = build_na_alignment("acg", "gta")
681
+ expected_sites = [["a", "g"], ["c", "t"], ["g", "a"]]
682
+ alignment.each_site do |site|
683
+ assert_equal expected_sites.shift, site, "site ##{3-expected_sites.size} wrong"
684
+ end
685
+ end
686
+
687
+ def test_each_site_unequal_length
688
+ alignment = build_na_alignment("ac", "gta")
689
+ expected_sites = [["a", "g"], ["c", "t"], ["-", "a"]]
690
+ alignment.each_site do |site|
691
+ assert_equal expected_sites.shift, site, "site ##{3-expected_sites.size} wrong"
692
+ end
693
+ end
694
+
695
+ #TODO: Lots of stuff needing tests here
696
+
697
+ # Alignment#add_seq
698
+
699
+ def test_add_seq_no_key
700
+ alignment = Alignment.new
701
+ alignment.add_seq("agct")
702
+ assert_equal(String, alignment.seqclass, "wrong class")
703
+ assert_equal({0=>"agct"}, alignment.to_hash, "wrong hash")
704
+ end
705
+
706
+ def test_add_seq_using_seq_with_seq_method
707
+ seq = "agtc"
708
+ class <<seq
709
+ def seq
710
+ Sequence::NA.new(self)
711
+ end
712
+ end
713
+
714
+ alignment = Alignment.new
715
+ alignment.add_seq(seq, "key")
716
+ assert_equal(Sequence::NA, alignment.seqclass, "wrong class")
717
+ assert_equal({"key"=>"agtc"}, alignment.to_hash, "wrong hash")
718
+ end
719
+
720
+ def test_add_seq_using_seq_with_naseq_method
721
+ seq = "agtc"
722
+ class <<seq
723
+ def naseq
724
+ Sequence::NA.new(self)
725
+ end
726
+ end
727
+
728
+ alignment = Alignment.new
729
+ alignment.add_seq(seq, "key")
730
+ assert_equal(Sequence::NA, alignment.seqclass, "wrong class")
731
+ assert_equal({"key"=>"agtc"}, alignment.to_hash, "wrong hash")
732
+ end
733
+
734
+ def test_add_seq_using_seq_with_aaseq_method
735
+ seq = "AVGR"
736
+ class <<seq
737
+ def aaseq
738
+ Sequence::AA.new(self)
739
+ end
740
+ end
741
+
742
+ alignment = Alignment.new
743
+ alignment.add_seq(seq, "key")
744
+ assert_equal(Sequence::AA, alignment.seqclass, "wrong class")
745
+ assert_equal({"key"=>"AVGR"}, alignment.to_hash, "wrong hash")
746
+ end
747
+
748
+ def test_add_seq_using_seq_with_definition_method
749
+ seq = "atgc"
750
+ class <<seq
751
+ def definition
752
+ "this is the key"
753
+ end
754
+ end
755
+
756
+ alignment = Alignment.new
757
+ alignment.add_seq(seq)
758
+ assert_equal({"this is the key"=>"atgc"}, alignment.to_hash, "wrong hash")
759
+ end
760
+
761
+ def test_add_seq_using_seq_with_entry_id_method
762
+ seq = "atgc"
763
+ class <<seq
764
+ def entry_id
765
+ 271828
766
+ end
767
+ end
768
+
769
+ alignment = Alignment.new
770
+ alignment.add_seq(seq)
771
+ assert_equal({271828=>"atgc"}, alignment.to_hash, "wrong hash")
772
+ end
773
+
774
+ # Alignment#consensus_string
775
+
776
+ def test_consensus_string_no_gaps
777
+ alignment = build_na_alignment("agtcgattaa",
778
+ "tttcgatgcc")
779
+ assert_equal("??tcgat???", alignment.consensus_string)
780
+ end
781
+
782
+ def test_consensus_threshold_two_sequences
783
+ alignment = build_na_alignment("agtcgattaa",
784
+ "tttcgatgcc")
785
+ # the threshold is the fraction of sequences in which a symbol must
786
+ # occur at a given position to be considered the consensus symbol
787
+ assert_equal("agtcgattaa", alignment.consensus(0.5))
788
+ assert_equal("??tcgat???", alignment.consensus(0.500000001))
789
+ end
790
+
791
+ def test_consensus_threshold_four_sequences
792
+ alignment = build_na_alignment("agtg",
793
+ "ttag",
794
+ "actc",
795
+ "tatc")
796
+ # ties go to the symbol that occurs in the earliest sequence
797
+ assert_equal("agtg", alignment.consensus(0.25))
798
+ assert_equal("a?tg", alignment.consensus(0.26))
799
+ end
800
+
801
+ def test_consensus_opt_gap_mode
802
+ alignment = build_na_alignment("gt-gt-a",
803
+ "ttcggc-",
804
+ "ttcggc-")
805
+ # using threshold = 0.5, that is a symbol must occur >= half the time in order to be consensus
806
+ # gap_mode -1 means gaps are ignored
807
+ assert_equal("ttcggca", alignment.consensus(0.5, :gap_mode => -1), "gap mode -1")
808
+ # gap_mode 0 means gaps are treated like regular symbols, yielding a gap in the last position
809
+ assert_equal("ttcggc-", alignment.consensus(0.5, :gap_mode => 0), "gap mode 0")
810
+ # gap_mode 1 means gaps take precedence over any other symbol, yielding two more gaps
811
+ assert_equal("tt-gg--", alignment.consensus(0.5, :gap_mode => 1), "gap mode 1")
812
+ end
813
+
814
+ def test_consensus_opt_missing_char
815
+ alignment = build_na_alignment("agtcgattaa",
816
+ "tttcgatgcc")
817
+ assert_equal("**tcgat***", alignment.consensus(1, :missing_char => "*"))
818
+ end
819
+
820
+ # Alignment#consensus_iupac
821
+
822
+ def test_consensus_iupac_no_gaps
823
+ alignment = build_na_alignment("agtcgattaa", "tttcgatgcc")
824
+ assert_equal("wktcgatkmm", alignment.consensus_iupac)
825
+ end
826
+
827
+ def test_consensus_iupac_of_ambiguous_bases
828
+ alignment = build_na_alignment("tmrwsykvhdbnd", "uaaaccgaaacab")
829
+ assert_equal("tmrwsykvhdbnn", alignment.consensus_iupac)
830
+ end
831
+
832
+ def test_consensus_iupac_gap_modes
833
+ alignment = build_na_alignment("a-t", "acc")
834
+ # gap_mode -1 means gaps are ignored
835
+ assert_equal("acy", alignment.consensus_iupac(:gap_mode => -1))
836
+ # gap_mode 0 means gaps are treated as normal characters, yielding a missing symbol
837
+ assert_equal("a?y", alignment.consensus_iupac(:gap_mode => 0))
838
+ # gap_mode 1 means gaps take precedence over everything, yielding a gap
839
+ assert_equal("a-y", alignment.consensus_iupac(:gap_mode => 1))
840
+ end
841
+
842
+ def test_consensus_iupac_yields_correct_ambiguous_bases
843
+ assert_equal "t", build_na_alignment("t", "u").consensus_iupac # not really IUPAC
844
+
845
+ # m = a c
846
+ assert_equal "m", build_na_alignment("a", "c").consensus_iupac, "m #1"
847
+ assert_equal "m", build_na_alignment("m", "c").consensus_iupac, "m #2"
848
+ assert_equal "m", build_na_alignment("a", "m").consensus_iupac, "m #3"
849
+ assert_equal "m", build_na_alignment("m", "a", "c").consensus_iupac, "m #4"
850
+
851
+ # r = a g
852
+ assert_equal "r", build_na_alignment("a", "g").consensus_iupac, "r #1"
853
+ assert_equal "r", build_na_alignment("r", "g").consensus_iupac, "r #2"
854
+ assert_equal "r", build_na_alignment("a", "r").consensus_iupac, "r #3"
855
+ assert_equal "r", build_na_alignment("a", "r", "g").consensus_iupac, "r #4"
856
+
857
+ # w = a t/u
858
+ assert_equal "w", build_na_alignment("a", "t").consensus_iupac, "w #1"
859
+ assert_equal "w", build_na_alignment("a", "u").consensus_iupac, "w #2"
860
+ assert_equal "w", build_na_alignment("w", "a").consensus_iupac, "w #3"
861
+ assert_equal "w", build_na_alignment("t", "w").consensus_iupac, "w #4"
862
+ assert_equal "w", build_na_alignment("w", "u").consensus_iupac, "w #5"
863
+ assert_equal "w", build_na_alignment("u", "t", "a").consensus_iupac, "w #6"
864
+ assert_equal "w", build_na_alignment("w", "u", "t", "a").consensus_iupac, "w #7"
865
+
866
+ # s = c g
867
+ assert_equal "s", build_na_alignment("c", "g").consensus_iupac, "s #1"
868
+ assert_equal "s", build_na_alignment("s", "g").consensus_iupac, "s #2"
869
+ assert_equal "s", build_na_alignment("c", "s").consensus_iupac, "s #3"
870
+ assert_equal "s", build_na_alignment("c", "s", "g").consensus_iupac, "s #4"
871
+
872
+ # y = c t/u
873
+ assert_equal "y", build_na_alignment("c", "t").consensus_iupac, "y #1"
874
+ assert_equal "y", build_na_alignment("c", "u").consensus_iupac, "y #2"
875
+ assert_equal "y", build_na_alignment("y", "c").consensus_iupac, "y #3"
876
+ assert_equal "y", build_na_alignment("t", "y").consensus_iupac, "y #4"
877
+ assert_equal "y", build_na_alignment("y", "u").consensus_iupac, "y #5"
878
+ assert_equal "y", build_na_alignment("u", "t", "c").consensus_iupac, "y #6"
879
+ assert_equal "y", build_na_alignment("y", "u", "t", "c").consensus_iupac, "y #7"
880
+
881
+ # k = g t/u
882
+ assert_equal "k", build_na_alignment("g", "t").consensus_iupac, "k #1"
883
+ assert_equal "k", build_na_alignment("g", "u").consensus_iupac, "k #2"
884
+ assert_equal "k", build_na_alignment("k", "g").consensus_iupac, "k #3"
885
+ assert_equal "k", build_na_alignment("t", "k").consensus_iupac, "k #4"
886
+ assert_equal "k", build_na_alignment("k", "u").consensus_iupac, "k #5"
887
+ assert_equal "k", build_na_alignment("u", "t", "g").consensus_iupac, "k #6"
888
+ assert_equal "k", build_na_alignment("k", "u", "t", "g").consensus_iupac, "k #7"
889
+
890
+ # v = a c g m r s
891
+ assert_equal "v", build_na_alignment("a", "c", "g").consensus_iupac, "v #1"
892
+ assert_equal "v", build_na_alignment("g", "m").consensus_iupac, "v #2"
893
+ assert_equal "v", build_na_alignment("a", "s").consensus_iupac, "v #3"
894
+ assert_equal "v", build_na_alignment("c", "r").consensus_iupac, "v #4"
895
+ assert_equal "v", build_na_alignment("m", "s").consensus_iupac, "v #5"
896
+ assert_equal "v", build_na_alignment("m", "r").consensus_iupac, "v #6"
897
+ assert_equal "v", build_na_alignment("s", "r").consensus_iupac, "v #7"
898
+ assert_equal "v", build_na_alignment("s", "r", "m").consensus_iupac, "v #8"
899
+ assert_equal "v", build_na_alignment("s", "r", "m", "a", "c", "g").consensus_iupac, "v #9"
900
+ assert_equal "v", build_na_alignment("v", "g").consensus_iupac, "v #10" # alright, enough
901
+
902
+ # b = t/u c g s y k
903
+ assert_equal "b", build_na_alignment("t", "c", "g").consensus_iupac, "b #1"
904
+ assert_equal "b", build_na_alignment("g", "y").consensus_iupac, "b #2"
905
+ assert_equal "b", build_na_alignment("t", "s").consensus_iupac, "b #3"
906
+ assert_equal "b", build_na_alignment("c", "k").consensus_iupac, "b #4"
907
+ assert_equal "b", build_na_alignment("y", "s").consensus_iupac, "b #5"
908
+ assert_equal "b", build_na_alignment("y", "k").consensus_iupac, "b #6"
909
+ assert_equal "b", build_na_alignment("s", "k").consensus_iupac, "b #7"
910
+ assert_equal "b", build_na_alignment("s", "k", "y").consensus_iupac, "b #8"
911
+ assert_equal "b", build_na_alignment("s", "k", "y", "u", "c", "g").consensus_iupac, "b #9"
912
+ assert_equal "b", build_na_alignment("b", "g").consensus_iupac, "b #10"
913
+
914
+ # h = t/u c a y w m
915
+ assert_equal "h", build_na_alignment("t", "c", "a").consensus_iupac, "h #1"
916
+ assert_equal "h", build_na_alignment("a", "y").consensus_iupac, "h #2"
917
+ assert_equal "h", build_na_alignment("c", "w").consensus_iupac, "h #3"
918
+ assert_equal "h", build_na_alignment("u", "m").consensus_iupac, "h #4"
919
+ assert_equal "h", build_na_alignment("y", "w").consensus_iupac, "h #5"
920
+ assert_equal "h", build_na_alignment("y", "m").consensus_iupac, "h #6"
921
+ assert_equal "h", build_na_alignment("y", "w").consensus_iupac, "h #7"
922
+ assert_equal "h", build_na_alignment("w", "m", "y").consensus_iupac, "h #8"
923
+ assert_equal "h", build_na_alignment("w", "m", "y", "t", "c", "a").consensus_iupac, "h #9"
924
+ assert_equal "h", build_na_alignment("h", "t").consensus_iupac, "h #10"
925
+
926
+ # d = t/u g a r w k
927
+ assert_equal "d", build_na_alignment("t", "g", "a").consensus_iupac, "d #1"
928
+ assert_equal "d", build_na_alignment("r", "t").consensus_iupac, "d #2"
929
+ assert_equal "d", build_na_alignment("w", "g").consensus_iupac, "d #3"
930
+ assert_equal "d", build_na_alignment("k", "a").consensus_iupac, "d #4"
931
+ assert_equal "d", build_na_alignment("k", "r").consensus_iupac, "d #5"
932
+ assert_equal "d", build_na_alignment("k", "w").consensus_iupac, "d #6"
933
+ assert_equal "d", build_na_alignment("r", "w").consensus_iupac, "d #7"
934
+ assert_equal "d", build_na_alignment("r", "w", "k").consensus_iupac, "d #8"
935
+ assert_equal "d", build_na_alignment("k", "r", "w", "t", "g", "a").consensus_iupac, "d #9"
936
+ assert_equal "d", build_na_alignment("d", "t").consensus_iupac, "d #10"
937
+
938
+ # n = anything
939
+ assert_equal "n", build_na_alignment("a", "g", "c", "t").consensus_iupac, "n #1"
940
+ assert_equal "n", build_na_alignment("a", "g", "c", "u").consensus_iupac, "n #2"
941
+ assert_equal "n", build_na_alignment("w", "s").consensus_iupac, "n #3"
942
+ assert_equal "n", build_na_alignment("k", "m").consensus_iupac, "n #4"
943
+ assert_equal "n", build_na_alignment("r", "y").consensus_iupac, "n #5"
944
+ end
945
+
946
+ def test_consensus_iupac_missing_char
947
+ alignment = build_na_alignment("a??", "ac?")
948
+ assert_equal("a??", alignment.consensus_iupac())
949
+ end
950
+
951
+ def test_consensus_iupac_missing_char_option
952
+ alignment = build_na_alignment("a**t", "ac**")
953
+ assert_equal("a***", alignment.consensus_iupac(:missing_char => "*"))
954
+ end
955
+
956
+ # Alignment#convert_match
957
+
958
+ def test_convert_match
959
+ alignment = Alignment.new
960
+ alignment << Sequence::NA.new("agtcgattaa")
961
+ alignment << Sequence::NA.new("tttcgatgcc")
962
+ match = alignment.convert_match
963
+ assert_equal(alignment[0], match[0], "first sequence altered")
964
+ assert_equal("tt.....gcc", match[1], "wrong match")
965
+ end
966
+
967
+ # Alignment#convert_unmatch
968
+
969
+ def test_convert_unmatch
970
+ alignment = Alignment.new
971
+ alignment << Sequence::NA.new("agtcgattaa")
972
+ alignment << Sequence::NA.new("tt.....gcc")
973
+ unmatched = alignment.convert_unmatch
974
+ assert_equal("agtcgattaa", unmatched[0], "first changed")
975
+ assert_equal("tttcgatgcc", unmatched[1], "second wrong")
976
+ end
977
+
978
+ def test_convert_unmatch_multiple_sequences
979
+ alignment = Alignment.new
980
+ alignment << Sequence::NA.new("agtcgattaa")
981
+ alignment << Sequence::NA.new("tt.....gcc")
982
+ alignment << Sequence::NA.new("c...c..g.c")
983
+ unmatched = alignment.convert_unmatch
984
+ assert_equal("agtcgattaa", unmatched[0], "first changed")
985
+ assert_equal("tttcgatgcc", unmatched[1], "second wrong")
986
+ assert_equal("cgtccatgac", unmatched[2], "third wrong")
987
+ end
988
+
989
+ def test_convert_unmatch_different_length_sequences_truncates_seq_if_last_matched
990
+ alignment = Alignment.new
991
+ alignment << Sequence::NA.new("agtcgatta")
992
+ alignment << Sequence::NA.new("tt.....gc.")
993
+ unmatched = alignment.convert_unmatch
994
+ assert_equal("agtcgatta", unmatched[0], "first changed")
995
+ assert_equal("tttcgatgc", unmatched[1], "second wrong") #TODO: verify this is correct, and not . at end
996
+ end
997
+
998
+ def test_convert_unmatch_different_match_char
999
+ alignment = Alignment.new
1000
+ alignment << Sequence::NA.new("agtcga")
1001
+ alignment << Sequence::NA.new("tt====")
1002
+ unmatched = alignment.convert_unmatch('=')
1003
+ assert_equal("agtcga", unmatched[0], "first changed")
1004
+ assert_equal("tttcga", unmatched[1], "second wrong")
1005
+ end
1006
+
1007
+ # Alignment#match_line
1008
+
1009
+ def test_match_line_protein
1010
+ alignment = Alignment.new
1011
+ alignment << Sequence::AA.new("AELFMCF")
1012
+ alignment << Sequence::AA.new("AKLVNNF")
1013
+ assert_equal "*:*. *", alignment.match_line
1014
+ end
1015
+
1016
+ #TODO: lots more on the consensus, match, etc.
1017
+
1018
+ # Alignment#normalize
1019
+
1020
+ def test_normalizebang_extends_sequences_with_gaps
1021
+ alignment = build_na_alignment("a", "ag", "agc", "agct")
1022
+ alignment.normalize!
1023
+ assert_equal({0=>"a---",1=>"ag--",2=>"agc-",3=>"agct"}, alignment.to_hash)
1024
+ end
1025
+
1026
+ # Alignment#to_clustal
1027
+ end
1028
+ end