bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#
|
|
2
|
+
# test/unit/bio/io/test_ddbjxml.rb - Unit test for DDBJ XML.
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: test_ddbjxml.rb,v 1.1 2005/12/11 14:59:25 nakao Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'pathname'
|
|
24
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
|
|
25
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
require 'test/unit'
|
|
29
|
+
require 'bio/io/ddbjxml'
|
|
30
|
+
|
|
31
|
+
module Bio
|
|
32
|
+
|
|
33
|
+
class TestDDBJXMLConstants < Test::Unit::TestCase
|
|
34
|
+
|
|
35
|
+
def test_constants
|
|
36
|
+
constants = ["DDBJ", "TxSearch", "ClustalW", "PML", "Gib", "Fasta", "BASE_URI", "SRS", "Gtop", "GetEntry", "Blast"].sort
|
|
37
|
+
assert_equal(constants, Bio::DDBJ::XML.constants.sort)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def test_base_url
|
|
41
|
+
assert_equal("http://xml.nig.ac.jp/wsdl/", Bio::DDBJ::XML::BASE_URI)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def test_blast_server_rul
|
|
45
|
+
assert_equal("http://xml.nig.ac.jp/wsdl/Blast.wsdl", Bio::DDBJ::XML::Blast::SERVER_URI)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def test_clustalw_server_url
|
|
49
|
+
assert_equal("http://xml.nig.ac.jp/wsdl/ClustalW.wsdl", Bio::DDBJ::XML::ClustalW::SERVER_URI)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def test_ddbj_server_url
|
|
53
|
+
assert_equal("http://xml.nig.ac.jp/wsdl/DDBJ.wsdl", Bio::DDBJ::XML::DDBJ::SERVER_URI)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def test_fasta_server_url
|
|
57
|
+
assert_equal("http://xml.nig.ac.jp/wsdl/Fasta.wsdl", Bio::DDBJ::XML::Fasta::SERVER_URI)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def test_getentry_server_url
|
|
61
|
+
assert_equal("http://xml.nig.ac.jp/wsdl/GetEntry.wsdl", Bio::DDBJ::XML::GetEntry::SERVER_URI)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def test_gib_server_url
|
|
65
|
+
assert_equal("http://xml.nig.ac.jp/wsdl/Gib.wsdl", Bio::DDBJ::XML::Gib::SERVER_URI)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def test_gtop_server_url
|
|
69
|
+
assert_equal("http://xml.nig.ac.jp/wsdl/Gtop.wsdl", Bio::DDBJ::XML::Gtop::SERVER_URI)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def test_pml_server_url
|
|
73
|
+
assert_equal("http://xml.nig.ac.jp/wsdl/PML.wsdl", Bio::DDBJ::XML::PML::SERVER_URI)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def test_srs_server_url
|
|
77
|
+
assert_equal("http://xml.nig.ac.jp/wsdl/SRS.wsdl", Bio::DDBJ::XML::SRS::SERVER_URI)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def test_txsearch_server_url
|
|
81
|
+
assert_equal("http://xml.nig.ac.jp/wsdl/TxSearch.wsdl", Bio::DDBJ::XML::TxSearch::SERVER_URI)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#
|
|
2
|
+
# test/unit/bio/io/test_soapwsdl.rb - Unit test for SOAP/WSDL
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: test_soapwsdl.rb,v 1.1 2005/12/18 17:09:53 nakao Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'pathname'
|
|
24
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
|
|
25
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
require 'test/unit'
|
|
29
|
+
require 'bio/io/soapwsdl'
|
|
30
|
+
|
|
31
|
+
module Bio
|
|
32
|
+
|
|
33
|
+
class TestSOAPWSDL < Test::Unit::TestCase
|
|
34
|
+
|
|
35
|
+
def setup
|
|
36
|
+
@obj = Bio::SOAPWSDL
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def test_methods
|
|
40
|
+
methods = ['wsdl', 'wsdl=', 'log', 'log=']
|
|
41
|
+
assert_equal(methods.sort, (@obj.instance_methods - Object.methods).sort)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
#
|
|
2
|
+
# test/unit/bio/shell/plugin/test_seq.rb - Unit test for Bio::Shell plugin for biological sequence manipulations
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: test_seq.rb,v 1.5 2005/12/19 02:44:03 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'pathname'
|
|
24
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
|
|
25
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
|
26
|
+
|
|
27
|
+
require 'test/unit'
|
|
28
|
+
require 'bioruby'
|
|
29
|
+
|
|
30
|
+
module Bio
|
|
31
|
+
class TestShellPluginSeq < Test::Unit::TestCase
|
|
32
|
+
|
|
33
|
+
# include Bio::Shell
|
|
34
|
+
# Bio::Shell.instance_variable_set :@config, {}
|
|
35
|
+
|
|
36
|
+
def test_naseq
|
|
37
|
+
str = 'ACGT'
|
|
38
|
+
assert_equal(Bio::Sequence::NA, seq(str).class)
|
|
39
|
+
assert_equal(Bio::Sequence::NA.new(str), seq(str))
|
|
40
|
+
assert_equal('acgt', seq(str))
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def test_aaseq
|
|
44
|
+
str = 'WD'
|
|
45
|
+
assert_equal(Bio::Sequence::AA, seq(str).class)
|
|
46
|
+
assert_equal(Bio::Sequence::AA.new('WD'), seq(str))
|
|
47
|
+
assert_equal('WD', seq(str))
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def test_na_seqstat
|
|
51
|
+
naseq = 'atgcatgcatgc'
|
|
52
|
+
output =<<END
|
|
53
|
+
|
|
54
|
+
* * * Sequence statistics * * *
|
|
55
|
+
|
|
56
|
+
5'->3' sequence : atgcatgcatgc
|
|
57
|
+
3'->5' sequence : gcatgcatgcat
|
|
58
|
+
Translation 1 : MHAC
|
|
59
|
+
Translation 2 : CMH
|
|
60
|
+
Translation 3 : ACM
|
|
61
|
+
Translation -1 : ACMH
|
|
62
|
+
Translation -2 : HAC
|
|
63
|
+
Translation -3 : MHA
|
|
64
|
+
Length : 12 bp
|
|
65
|
+
GC percent : 50 %
|
|
66
|
+
Composition : a - 3 ( 25.00 %)
|
|
67
|
+
c - 3 ( 25.00 %)
|
|
68
|
+
g - 3 ( 25.00 %)
|
|
69
|
+
t - 3 ( 25.00 %)
|
|
70
|
+
Codon usage :
|
|
71
|
+
|
|
72
|
+
*---------------------------------------------*
|
|
73
|
+
| | 2nd | |
|
|
74
|
+
| 1st |-------------------------------| 3rd |
|
|
75
|
+
| | U | C | A | G | |
|
|
76
|
+
|-------+-------+-------+-------+-------+-----|
|
|
77
|
+
| U U |F 0.0%|S 0.0%|Y 0.0%|C 0.0%| u |
|
|
78
|
+
| U U |F 0.0%|S 0.0%|Y 0.0%|C 25.0%| c |
|
|
79
|
+
| U U |L 0.0%|S 0.0%|* 0.0%|* 0.0%| a |
|
|
80
|
+
| UUU |L 0.0%|S 0.0%|* 0.0%|W 0.0%| g |
|
|
81
|
+
|-------+-------+-------+-------+-------+-----|
|
|
82
|
+
| CCCC |L 0.0%|P 0.0%|H 25.0%|R 0.0%| u |
|
|
83
|
+
| C |L 0.0%|P 0.0%|H 0.0%|R 0.0%| c |
|
|
84
|
+
| C |L 0.0%|P 0.0%|Q 0.0%|R 0.0%| a |
|
|
85
|
+
| CCCC |L 0.0%|P 0.0%|Q 0.0%|R 0.0%| g |
|
|
86
|
+
|-------+-------+-------+-------+-------+-----|
|
|
87
|
+
| A |I 0.0%|T 0.0%|N 0.0%|S 0.0%| u |
|
|
88
|
+
| A A |I 0.0%|T 0.0%|N 0.0%|S 0.0%| c |
|
|
89
|
+
| AAAAA |I 0.0%|T 0.0%|K 0.0%|R 0.0%| a |
|
|
90
|
+
| A A |M 25.0%|T 0.0%|K 0.0%|R 0.0%| g |
|
|
91
|
+
|-------+-------+-------+-------+-------+-----|
|
|
92
|
+
| GGGG |V 0.0%|A 0.0%|D 0.0%|G 0.0%| u |
|
|
93
|
+
| G |V 0.0%|A 0.0%|D 0.0%|G 0.0%| c |
|
|
94
|
+
| G GGG |V 0.0%|A 25.0%|E 0.0%|G 0.0%| a |
|
|
95
|
+
| GG G |V 0.0%|A 0.0%|E 0.0%|G 0.0%| g |
|
|
96
|
+
*---------------------------------------------*
|
|
97
|
+
|
|
98
|
+
Molecular weight : 3701.61444
|
|
99
|
+
Protein weight : 460.565
|
|
100
|
+
//
|
|
101
|
+
END
|
|
102
|
+
$str = ''
|
|
103
|
+
alias puts_orig puts
|
|
104
|
+
def puts(*args)
|
|
105
|
+
args.each do |obj|
|
|
106
|
+
$str << obj.to_s
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
seqstat(naseq)
|
|
110
|
+
undef puts
|
|
111
|
+
alias puts puts_orig
|
|
112
|
+
assert_equal(output, $str)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def test_aa_seqstat
|
|
116
|
+
aaseq = 'WD'
|
|
117
|
+
output =<<END
|
|
118
|
+
|
|
119
|
+
* * * Sequence statistics * * *
|
|
120
|
+
|
|
121
|
+
N->C sequence : WD
|
|
122
|
+
Length : 2 aa
|
|
123
|
+
Composition : D Asp - 1 ( 50.00 %) aspartic acid
|
|
124
|
+
W Trp - 1 ( 50.00 %) tryptophan
|
|
125
|
+
Protein weight : 319.315
|
|
126
|
+
//
|
|
127
|
+
END
|
|
128
|
+
$str = ''
|
|
129
|
+
alias puts_orig puts
|
|
130
|
+
def puts(*args)
|
|
131
|
+
args.each do |obj|
|
|
132
|
+
$str << obj.to_s
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
seqstat(aaseq)
|
|
136
|
+
undef puts
|
|
137
|
+
alias puts puts_orig
|
|
138
|
+
assert_equal(output, $str)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def test_doublehelix
|
|
142
|
+
seq = 'ACGTACGTACGTACGT'
|
|
143
|
+
output = <<END
|
|
144
|
+
at
|
|
145
|
+
c--g
|
|
146
|
+
g---c
|
|
147
|
+
t----a
|
|
148
|
+
a----t
|
|
149
|
+
c---g
|
|
150
|
+
g--c
|
|
151
|
+
ta
|
|
152
|
+
ta
|
|
153
|
+
g--c
|
|
154
|
+
c---g
|
|
155
|
+
a----t
|
|
156
|
+
t----a
|
|
157
|
+
g---c
|
|
158
|
+
c--g
|
|
159
|
+
at
|
|
160
|
+
END
|
|
161
|
+
$str = ''
|
|
162
|
+
alias puts_orig puts
|
|
163
|
+
def puts(*args)
|
|
164
|
+
args.each do |obj|
|
|
165
|
+
$str << obj.to_s
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
doublehelix(seq)
|
|
169
|
+
undef puts
|
|
170
|
+
alias puts puts_orig
|
|
171
|
+
assert_equal(output, $str)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
end
|
|
175
|
+
end
|
|
@@ -0,0 +1,1028 @@
|
|
|
1
|
+
#
|
|
2
|
+
# test/unit/bio/test_alignment.rb - Unit test for Bio::Alignment
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2004 Moses Hohman <mmhohman@northwestern.edu>
|
|
5
|
+
# 2005 Naohisa Goto <ng@bioruby.org>
|
|
6
|
+
#
|
|
7
|
+
# This library is free software; you can redistribute it and/or
|
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
9
|
+
# License as published by the Free Software Foundation; either
|
|
10
|
+
# version 2 of the License, or (at your option) any later version.
|
|
11
|
+
#
|
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
15
|
+
# Lesser General Public License for more details.
|
|
16
|
+
#
|
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
18
|
+
# License along with this library; if not, write to the Free Software
|
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
20
|
+
#
|
|
21
|
+
# $Id: test_alignment.rb,v 1.6 2005/12/02 13:01:49 ngoto Exp $
|
|
22
|
+
#
|
|
23
|
+
|
|
24
|
+
require 'pathname'
|
|
25
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'lib')).cleanpath.to_s
|
|
26
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
|
27
|
+
|
|
28
|
+
require 'test/unit'
|
|
29
|
+
require 'bio/alignment'
|
|
30
|
+
|
|
31
|
+
module Bio
|
|
32
|
+
|
|
33
|
+
class TestAlignmentPropertyMethods < Test::Unit::TestCase
|
|
34
|
+
|
|
35
|
+
def setup
|
|
36
|
+
@obj = Object.new
|
|
37
|
+
@obj.extend(Alignment::PropertyMethods)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def test_is_gap_default_false
|
|
41
|
+
assert_equal(false, @obj.is_gap?('a'), "\"a\" isn't a gap")
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def test_is_gap_default_true
|
|
45
|
+
assert_equal(true, @obj.is_gap?('-'), '"-" is a gap')
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def test_gap_regexp
|
|
49
|
+
assert_not_nil(@obj.gap_regexp)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def test_gap_regexp_never_nil
|
|
53
|
+
@obj.gap_regexp = nil
|
|
54
|
+
assert_not_nil(@obj.gap_regexp)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def test_gap_regexp=()
|
|
58
|
+
@obj.gap_regexp = /[^a-zA-Z0-9]/
|
|
59
|
+
assert_equal(/[^a-zA-Z0-9]/, @obj.gap_regexp)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def test_is_gap_nodefault_false
|
|
63
|
+
@obj.gap_regexp = /[^a-zA-Z0-9]/
|
|
64
|
+
assert_equal(false, @obj.is_gap?('3'))
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def test_is_gap_nodefault_true
|
|
68
|
+
@obj.gap_regexp = /[^atgc]/
|
|
69
|
+
assert_equal(true, @obj.is_gap?('z'))
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def test_gap_char_default
|
|
73
|
+
assert_not_nil(@obj.gap_char)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def test_gap_char_never_nil
|
|
77
|
+
@obj.gap_char = nil
|
|
78
|
+
assert_not_nil(@obj.gap_char)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def test_gap_char=()
|
|
82
|
+
@obj.gap_char = '#'
|
|
83
|
+
assert_equal('#', @obj.gap_char)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def test_missing_char_default
|
|
87
|
+
assert_not_nil(@obj.missing_char)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def test_missing_char_never_nil
|
|
91
|
+
@obj.missing_char = nil
|
|
92
|
+
assert_not_nil(@obj.missing_char)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def test_missing_char=()
|
|
96
|
+
@obj.missing_char = '_'
|
|
97
|
+
assert_equal('_', @obj.missing_char)
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def test_seqclass_default
|
|
101
|
+
assert_not_nil(@obj.seqclass)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def test_seqclass_never_nil
|
|
105
|
+
@obj.seqclass = nil
|
|
106
|
+
assert_not_nil(@obj.seqclass)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def test_seqclass=()
|
|
110
|
+
@obj.seqclass = Sequence::NA
|
|
111
|
+
assert_equal(Sequence::NA, @obj.seqclass)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def test_get_all_property_default
|
|
115
|
+
assert_equal({}, @obj.get_all_property)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def test_get_all_property_nodefault
|
|
119
|
+
@obj.gap_regexp = /[^acgt]/
|
|
120
|
+
@obj.gap_char = '#'
|
|
121
|
+
@obj.missing_char = '_'
|
|
122
|
+
@obj.seqclass = Sequence::NA
|
|
123
|
+
assert_equal({ :gap_regexp => /[^acgt]/,
|
|
124
|
+
:gap_char => '#',
|
|
125
|
+
:missing_char => '_',
|
|
126
|
+
:seqclass => Sequence::NA },
|
|
127
|
+
@obj.get_all_property)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def test_set_all_property
|
|
131
|
+
h = { :gap_regexp => /[^acgt]/,
|
|
132
|
+
:gap_char => '#',
|
|
133
|
+
:missing_char => '_',
|
|
134
|
+
:seqclass => Sequence::NA }
|
|
135
|
+
@obj.set_all_property(h)
|
|
136
|
+
assert_equal(h, @obj.get_all_property)
|
|
137
|
+
end
|
|
138
|
+
end #class TestAlignmentPropertyMethods
|
|
139
|
+
|
|
140
|
+
# This is a unit test of Bio::Alignment::Site class and
|
|
141
|
+
# Bio::Alignment::SiteMethods module.
|
|
142
|
+
# Since Bio::Alignment::Site includes Bio::Alignment::SiteMethods,
|
|
143
|
+
# we can test both at a time.
|
|
144
|
+
class TestAlignmentSite < Test::Unit::TestCase
|
|
145
|
+
|
|
146
|
+
def test_has_gap_true
|
|
147
|
+
site = Alignment::Site[ 'a', '-', 'c', 'g', 't' ]
|
|
148
|
+
assert_equal(true, site.has_gap?)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def test_has_gap_false
|
|
152
|
+
site = Alignment::Site[ 'a', 'c', 'g', 't' ]
|
|
153
|
+
assert_equal(false, site.has_gap?)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def test_remove_gaps!
|
|
157
|
+
site = Alignment::Site[ 'a', '-', 'c', '-' ]
|
|
158
|
+
assert_equal(Alignment::Site['a', 'c'], site.remove_gaps!)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def test_remove_gaps_bang_not_removed
|
|
162
|
+
site = Alignment::Site[ 'a', 'c']
|
|
163
|
+
assert_equal(nil, site.remove_gaps!)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def test_consensus_string_default
|
|
167
|
+
site = Alignment::Site[ 'a', 'a', 'a', 'a']
|
|
168
|
+
assert_equal('a', site.consensus_string)
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def test_consensus_string_default_nil
|
|
172
|
+
site = Alignment::Site[ 'a', 'a', 'a', 'c']
|
|
173
|
+
assert_nil(site.consensus_string)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def test_consensus_string_50percent
|
|
177
|
+
site = Alignment::Site[ 'a', 'a', 'c', 'g']
|
|
178
|
+
assert_equal('a', site.consensus_string(0.5))
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def test_consensus_string_50percent_nil
|
|
182
|
+
site = Alignment::Site[ 'a', 'c', 'g', 't']
|
|
183
|
+
assert_nil(site.consensus_string(0.5))
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def test_consensus_iupac
|
|
187
|
+
data = {
|
|
188
|
+
'a' => [ 'a' ],
|
|
189
|
+
'c' => [ 'c' ],
|
|
190
|
+
'g' => [ 'g' ],
|
|
191
|
+
't' => [ 't' ],
|
|
192
|
+
't' => [ 't', 'u' ],
|
|
193
|
+
'm' => [ 'a', 'c' ],
|
|
194
|
+
'r' => [ 'a', 'g' ],
|
|
195
|
+
'w' => [ 'a', 't' ],
|
|
196
|
+
's' => [ 'c', 'g' ],
|
|
197
|
+
'y' => [ 'c', 't' ],
|
|
198
|
+
'k' => [ 'g', 't' ],
|
|
199
|
+
'v' => [ 'a', 'c', 'g' ],
|
|
200
|
+
'h' => [ 'a', 'c', 't' ],
|
|
201
|
+
'd' => [ 'a', 'g', 't' ],
|
|
202
|
+
'b' => [ 'c', 'g', 't' ],
|
|
203
|
+
'n' => [ 'a', 'c', 'g', 't' ],
|
|
204
|
+
nil => [ 'z', 'a' ]
|
|
205
|
+
}
|
|
206
|
+
data.each do |cons, testdata|
|
|
207
|
+
site = Alignment::Site[ *testdata ]
|
|
208
|
+
assert_equal(cons, site.consensus_iupac,
|
|
209
|
+
"IUPAC consensus of #{testdata.join(',')} is #{cons}")
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def test_match_line_amino_missing
|
|
214
|
+
site = Alignment::Site[ 'P', 'Q', 'R', 'S' ]
|
|
215
|
+
assert_equal(' ', site.match_line_amino)
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def test_match_line_amino_100percent
|
|
219
|
+
site = Alignment::Site[ 'M', 'M', 'M', 'M' ]
|
|
220
|
+
assert_equal('*', site.match_line_amino)
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def test_match_line_amino_strong
|
|
224
|
+
site = Alignment::Site[ 'N', 'E', 'Q', 'K' ]
|
|
225
|
+
assert_equal(':', site.match_line_amino)
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def test_match_line_amino_weak
|
|
229
|
+
site = Alignment::Site[ 'S', 'G', 'N', 'D' ]
|
|
230
|
+
assert_equal('.', site.match_line_amino)
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def test_match_line_nuc_missing
|
|
234
|
+
site = Alignment::Site[ 'A', 'C', 'G', 'T' ]
|
|
235
|
+
assert_equal(' ', site.match_line_nuc)
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def test_match_line_nuc_100percent
|
|
239
|
+
site = Alignment::Site[ 'G', 'G', 'G', 'G' ]
|
|
240
|
+
assert_equal('*', site.match_line_nuc)
|
|
241
|
+
end
|
|
242
|
+
end #class TestAlignmentSite
|
|
243
|
+
|
|
244
|
+
# This is sample class for testing Bio::Alignment::EnumerableExtension.
|
|
245
|
+
class A < Array
|
|
246
|
+
include Alignment::EnumerableExtension
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
class TestAlignmentEnumerableExtension < Test::Unit::TestCase
|
|
250
|
+
def test_each_seq
|
|
251
|
+
expected_results = [ 'atg', 'aag', 'acg' ]
|
|
252
|
+
a = A[ *expected_results ]
|
|
253
|
+
a.each_seq do |x|
|
|
254
|
+
assert_equal(expected_results.shift, x)
|
|
255
|
+
end
|
|
256
|
+
assert(expected_results.empty?)
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def test_seqclass_default
|
|
260
|
+
a = A.new
|
|
261
|
+
assert_equal(String, a.seqclass)
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
def test_seqclass
|
|
265
|
+
a = A[ Bio::Sequence::NA.new('atg') ]
|
|
266
|
+
assert_equal(Bio::Sequence::NA, a.seqclass)
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def test_seqclass=()
|
|
270
|
+
a = A.new
|
|
271
|
+
assert_equal(String, a.seqclass)
|
|
272
|
+
a << Bio::Sequence::NA.new('a')
|
|
273
|
+
assert_equal(Bio::Sequence::NA, a.seqclass)
|
|
274
|
+
a.seqclass = Bio::Sequence::AA
|
|
275
|
+
assert_equal(Bio::Sequence::AA, a.seqclass)
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
def test_alignment_length
|
|
279
|
+
a = A[ 'a', 'at', 'atgc', 'atg', '' ]
|
|
280
|
+
assert_equal(4, a.alignment_length)
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def test_private_alignment_site
|
|
284
|
+
a = A[ 'a', 'at', 'atgc', 'atg', '' ]
|
|
285
|
+
assert_equal(Alignment::Site[ '-', 't', 't', 't', '-' ],
|
|
286
|
+
a.instance_eval { _alignment_site(1) })
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def test_alignment_site
|
|
290
|
+
a = A[ 'a', 'at', 'atgc', 'atg', '' ]
|
|
291
|
+
assert_equal(Alignment::Site[ '-', 't', 't', 't', '-' ],
|
|
292
|
+
a.__send__(:_alignment_site, 1))
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
def test_each_site
|
|
296
|
+
expected_results = [
|
|
297
|
+
Alignment::Site[ 'a', 'a', 'a', 'a', '-' ],
|
|
298
|
+
Alignment::Site[ '-', 't', 't', 't', '-' ],
|
|
299
|
+
Alignment::Site[ '-', '-', 'g', 'g', '-' ],
|
|
300
|
+
Alignment::Site[ '-', '-', 'c', '-', '-' ]
|
|
301
|
+
]
|
|
302
|
+
a = A[ 'a', 'at', 'atgc', 'atg', '' ]
|
|
303
|
+
a.each_site do |site|
|
|
304
|
+
assert_equal(expected_results.shift, site)
|
|
305
|
+
end
|
|
306
|
+
assert(expected_results.empty?)
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
def test_each_site_step
|
|
310
|
+
expected_results = [
|
|
311
|
+
Alignment::Site[ '-', 't', 't', 't', '-' ], # site 1
|
|
312
|
+
Alignment::Site[ '-', 'a', 'g', 't', '-' ], # site 3
|
|
313
|
+
]
|
|
314
|
+
a = A[ 'a', 'atgatc', 'atggcc', 'atgtga', '' ]
|
|
315
|
+
a.each_site_step(1, 4, 2) do |site|
|
|
316
|
+
assert_equal(expected_results.shift, site)
|
|
317
|
+
end
|
|
318
|
+
assert(expected_results.empty?)
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
def test_alignment_collect
|
|
322
|
+
a = A[ 'a', 'at', 'atgc', 'atg', '' ]
|
|
323
|
+
assert_equal(Alignment::SequenceArray[ 'a', 'au', 'augc', 'aug', '' ],
|
|
324
|
+
a.alignment_collect { |x| x.gsub(/t/, 'u') })
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
def test_alignment_window
|
|
328
|
+
a = A[ 'a', 'at', 'atgca', 'atg', '' ]
|
|
329
|
+
assert_equal(Alignment::SequenceArray[ '', 't', 'tgc', 'tg', '' ],
|
|
330
|
+
a.alignment_window(1, 3))
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
def test_each_window
|
|
334
|
+
expected_results = [
|
|
335
|
+
Alignment::SequenceArray[ 'atg', 'tcg', '' ], # 0..2
|
|
336
|
+
Alignment::SequenceArray[ 'gca', 'gat', '' ], # 2..4
|
|
337
|
+
Alignment::SequenceArray[ 'atg', 'tgc', '' ], # 4..6
|
|
338
|
+
Alignment::SequenceArray[ 'c', 'a', '' ] # 7..7
|
|
339
|
+
]
|
|
340
|
+
a = A[ 'atgcatgc', 'tcgatgca', '' ]
|
|
341
|
+
r = a.each_window(3, 2) do |x|
|
|
342
|
+
assert_equal(expected_results.shift, x)
|
|
343
|
+
end
|
|
344
|
+
assert_equal(expected_results.shift, r)
|
|
345
|
+
assert(expected_results.empty?)
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
def test_collect_each_site
|
|
349
|
+
a = A[ 'a', 'at', 'atgc', 'atg', '' ]
|
|
350
|
+
assert_equal(["aaaa-", "-ttt-", "--gg-", "--c--" ],
|
|
351
|
+
a.collect_each_site { |x| x.join('') })
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
def test_consensus_each_site_default
|
|
355
|
+
expected_results = [
|
|
356
|
+
Alignment::Site[ 'a', 'a', 'a', 'a', 'a' ],
|
|
357
|
+
Alignment::Site[ 'a', 'c', 'g', 't', '-' ]
|
|
358
|
+
]
|
|
359
|
+
|
|
360
|
+
a = A[ 'aa', 'ac', 'ag', 'at', 'a-' ]
|
|
361
|
+
result = a.consensus_each_site do |site|
|
|
362
|
+
assert_equal(expected_results.shift, site)
|
|
363
|
+
'x'
|
|
364
|
+
end
|
|
365
|
+
assert_equal('xx', result)
|
|
366
|
+
assert(expected_results.empty?)
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
def test_consensus_each_site_gap_mode_1
|
|
370
|
+
expected_results = [
|
|
371
|
+
Alignment::Site[ 'a', 'a', 'a', 'a', 'a' ]
|
|
372
|
+
]
|
|
373
|
+
|
|
374
|
+
a = A[ 'aa', 'ac', 'ag', 'at', 'a-' ]
|
|
375
|
+
result = a.consensus_each_site(:gap_mode => 1) do |site|
|
|
376
|
+
assert_equal(expected_results.shift, site)
|
|
377
|
+
'x'
|
|
378
|
+
end
|
|
379
|
+
assert_equal('x-', result)
|
|
380
|
+
assert(expected_results.empty?)
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
def test_consensus_each_site_gap_mode_minus1
|
|
384
|
+
expected_results = [
|
|
385
|
+
Alignment::Site[ 'a', 'a', 'a', 'a', 'a' ],
|
|
386
|
+
Alignment::Site[ 'a', 'c', 'g', 't' ]
|
|
387
|
+
]
|
|
388
|
+
|
|
389
|
+
a = A[ 'aa', 'ac', 'ag', 'at', 'a-' ]
|
|
390
|
+
result = a.consensus_each_site(:gap_mode => -1) do |site|
|
|
391
|
+
assert_equal(expected_results.shift, site)
|
|
392
|
+
'x'
|
|
393
|
+
end
|
|
394
|
+
assert_equal('xx', result)
|
|
395
|
+
assert(expected_results.empty?)
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
def test_consensus_string_default
|
|
399
|
+
a = A[ 'ata', 'aac', 'aag', 'aat' ]
|
|
400
|
+
assert_equal('a??', a.consensus_string)
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
def test_consensus_string_half
|
|
404
|
+
a = A[ 'ata', 'aac', 'aag', 'aat' ]
|
|
405
|
+
assert_equal('aa?', a.consensus_string(0.5))
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
def test_consensus_iupac
|
|
409
|
+
a = A[
|
|
410
|
+
'acgtaaaccgaaacaz',
|
|
411
|
+
'acgtaaaccgccggcz',
|
|
412
|
+
'acgtcgtgttgtttgz',
|
|
413
|
+
'acgtcgtgttaaactz'
|
|
414
|
+
]
|
|
415
|
+
assert_equal('acgtmrwsykvhdbn?', a.consensus_iupac)
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
def test_match_line_amino
|
|
419
|
+
a = A[
|
|
420
|
+
'M-SNNNQMMHF-CASSSSSNNFH-AW',
|
|
421
|
+
'M-TEHDHIIYY-STATTGNDEVF-FW',
|
|
422
|
+
'M-AQQERLLHW-AVGNPNDEQLY-HW',
|
|
423
|
+
'M-SKKQKVFYF-CASKADEQHIH-LW',
|
|
424
|
+
'M-TNNNQMMHY-STASSSQHRMF-QW',
|
|
425
|
+
'M-AEHDHIIYW-AVGTTGKKKFY-YW'
|
|
426
|
+
#* ::::::::: ........... *
|
|
427
|
+
]
|
|
428
|
+
assert_equal('* ::::::::: ........... *', a.match_line_amino)
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
def test_match_line_nuc
|
|
432
|
+
a = A[ 'aaa', 'aa-','aac', 'at-' ]
|
|
433
|
+
assert_equal('* ', a.match_line_nuc)
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
def test_match_line
|
|
437
|
+
a = A[
|
|
438
|
+
Sequence::AA.new('MNSA'),
|
|
439
|
+
Sequence::AA.new('MHTL'),
|
|
440
|
+
Sequence::AA.new('MQNV'),
|
|
441
|
+
Sequence::AA.new('MKKW'),
|
|
442
|
+
]
|
|
443
|
+
assert_equal('*:. ', a.match_line)
|
|
444
|
+
assert_equal('*:. ', a.match_line(:type => :aa))
|
|
445
|
+
assert_equal('* ', a.match_line(:type => :na))
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
def test_convert_match
|
|
449
|
+
a = A[
|
|
450
|
+
'aaaa',
|
|
451
|
+
'accc',
|
|
452
|
+
'acac',
|
|
453
|
+
'actc'
|
|
454
|
+
]
|
|
455
|
+
a.convert_match
|
|
456
|
+
assert_equal(A[ 'aaaa', '.ccc', '.c.c', '.ctc' ], a)
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
def test_convert_unmatch
|
|
460
|
+
a = A[ 'aaaa', '.ccc', '.c.c', '.ctc' ]
|
|
461
|
+
a.convert_unmatch
|
|
462
|
+
assert_equal(A[ 'aaaa', 'accc', 'acac', 'actc' ], a)
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
def test_alignment_normalize!
|
|
466
|
+
a = A[ 'a', 'atg', 'atgc', '' ]
|
|
467
|
+
a.alignment_normalize!
|
|
468
|
+
assert_equal(A[ 'a---', 'atg-', 'atgc', '----'], a)
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
def test_alignment_rstrip!
|
|
472
|
+
a = A[ '--aaa--', '--t-t--', '---g---', '--t' ]
|
|
473
|
+
assert(a.alignment_rstrip!)
|
|
474
|
+
assert_equal(A[ '--aaa', '--t-t', '---g-', '--t' ], a)
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
def test_alignment_rstrip_nil
|
|
478
|
+
a = A[ 'aa', '-a', 'a-' ]
|
|
479
|
+
assert_nil(a.alignment_rstrip!)
|
|
480
|
+
assert_equal(A[ 'aa', '-a', 'a-' ], a)
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
def test_alignment_lstrip!
|
|
484
|
+
a = A[ '--aaa--', '--t-t--', '---g---', '--t' ]
|
|
485
|
+
assert(a.alignment_lstrip!)
|
|
486
|
+
assert_equal(A[ 'aaa--', 't-t--', '-g---', 't' ], a)
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
def test_alignment_lstrip_nil
|
|
490
|
+
a = A[ 'aa', '-a', 'a-' ]
|
|
491
|
+
assert_nil(a.alignment_lstrip!)
|
|
492
|
+
assert_equal(A[ 'aa', '-a', 'a-' ], a)
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
def test_alignment_strip!
|
|
496
|
+
a = A[ '--aaa--', '--t-t--', '---g---', '--t' ]
|
|
497
|
+
assert(a.alignment_strip!)
|
|
498
|
+
assert_equal(A[ 'aaa', 't-t', '-g-', 't' ], a)
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
def test_alignment_strip_nil
|
|
502
|
+
a = A[ 'aa', '-a', 'a-' ]
|
|
503
|
+
assert_nil(a.alignment_strip!)
|
|
504
|
+
assert_equal(A[ 'aa', '-a', 'a-' ], a)
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
def test_remove_all_gaps!
|
|
508
|
+
a = A[ '--aaa--', '--t-t--', '---g---', '--t' ]
|
|
509
|
+
assert(a.remove_all_gaps!)
|
|
510
|
+
assert_equal(A[ 'aaa', 'tt', 'g', 't' ], a)
|
|
511
|
+
end
|
|
512
|
+
|
|
513
|
+
# test of alignment_slice.
|
|
514
|
+
# Please also refer alignment_window.
|
|
515
|
+
def test_alignment_slice
|
|
516
|
+
a = A[ 'a', 'at', 'atgca', 'atg', '' ]
|
|
517
|
+
assert_equal(Alignment::SequenceArray[ '', 't', 'tgc', 'tg', nil ],
|
|
518
|
+
a.alignment_slice(1, 3))
|
|
519
|
+
end
|
|
520
|
+
|
|
521
|
+
def test_alignment_subseq
|
|
522
|
+
a = A[ Sequence.new('a'), Sequence.new('at'), Sequence.new('atgca'),
|
|
523
|
+
Sequence.new('atg'), Sequence.new('') ]
|
|
524
|
+
assert_equal(Alignment::SequenceArray[ Sequence.new(''),
|
|
525
|
+
Sequence.new('t'), Sequence.new('tgc'),
|
|
526
|
+
Sequence.new('tg'), nil ],
|
|
527
|
+
a.alignment_subseq(2,4))
|
|
528
|
+
end
|
|
529
|
+
|
|
530
|
+
def test_alignment_concat
|
|
531
|
+
a = A[ 'aaa', 'c', 'gg', 't' ]
|
|
532
|
+
a.alignment_concat(A[ 'ttt', 'gg', 'aa', 'cc', 'aa' ])
|
|
533
|
+
assert_equal(A[ 'aaattt', 'cgg', 'ggaa', 'tcc' ], a)
|
|
534
|
+
a.alignment_concat([ 'c', 't' ])
|
|
535
|
+
assert_equal(A[ 'aaatttc', 'cggt', 'ggaa', 'tcc' ], a)
|
|
536
|
+
end
|
|
537
|
+
end #class TestAlignmentEnumerableExtension
|
|
538
|
+
|
|
539
|
+
class TestAlignmentClustalWFormatter < Test::Unit::TestCase
|
|
540
|
+
def setup
|
|
541
|
+
@obj = Object.new
|
|
542
|
+
@obj.extend(Alignment::ClustalWFormatter)
|
|
543
|
+
end
|
|
544
|
+
|
|
545
|
+
def test_have_same_name_true
|
|
546
|
+
assert_equal([ 0, 1 ], @obj.instance_eval {
|
|
547
|
+
have_same_name?([ 'ATP ATG', 'ATP ATA', 'BBB' ]) })
|
|
548
|
+
end
|
|
549
|
+
|
|
550
|
+
def test_have_same_name_false
|
|
551
|
+
assert_equal(false, @obj.instance_eval {
|
|
552
|
+
have_same_name?([ 'GTP ATG', 'ATP ATA', 'BBB' ]) })
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
def test_avoid_same_name
|
|
556
|
+
assert_equal([ 'ATP_ATG', 'ATP_ATA', 'BBB' ],
|
|
557
|
+
@obj.instance_eval {
|
|
558
|
+
avoid_same_name([ 'ATP ATG', 'ATP ATA', 'BBB' ]) })
|
|
559
|
+
end
|
|
560
|
+
def test_avoid_same_name_numbering
|
|
561
|
+
assert_equal([ '0_ATP', '1_ATP', '2_BBB' ],
|
|
562
|
+
@obj.instance_eval {
|
|
563
|
+
avoid_same_name([ 'ATP', 'ATP', 'BBB' ]) })
|
|
564
|
+
end
|
|
565
|
+
|
|
566
|
+
end #class TestAlignmentClustalWFormatter
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
class TestAlignment < Test::Unit::TestCase
|
|
570
|
+
|
|
571
|
+
# testing helper method
|
|
572
|
+
def build_na_alignment(*sequences)
|
|
573
|
+
sequences.inject(Alignment.new) { |alignment, sequence| alignment << Sequence::NA.new(sequence) }
|
|
574
|
+
end
|
|
575
|
+
private :build_na_alignment
|
|
576
|
+
|
|
577
|
+
def test_equals
|
|
578
|
+
alignment1 = Alignment.new([Sequence::NA.new("agct"), Sequence::NA.new("tagc")])
|
|
579
|
+
alignment2 = Alignment.new([Sequence::NA.new("agct"), Sequence::NA.new("tagc")])
|
|
580
|
+
assert_equal(alignment1, alignment2)
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
# Alignment#store
|
|
584
|
+
|
|
585
|
+
def test_store_cannot_override_key
|
|
586
|
+
alignment = Alignment.new
|
|
587
|
+
alignment.store("Cat DNA", Sequence::NA.new("cat"))
|
|
588
|
+
alignment.store("Cat DNA", Sequence::NA.new("gcat"))
|
|
589
|
+
assert_equal("cat", alignment["Cat DNA"])
|
|
590
|
+
end
|
|
591
|
+
|
|
592
|
+
def test_store_with_nil_key_uses_next_number_for_key
|
|
593
|
+
alignment = Alignment.new
|
|
594
|
+
alignment.store(nil, Sequence::NA.new("cat"))
|
|
595
|
+
alignment.store(nil, Sequence::NA.new("gat"))
|
|
596
|
+
alignment.store(nil, Sequence::NA.new("tat"))
|
|
597
|
+
assert_equal({0=>"cat",1=>"gat",2=>"tat"}, alignment.to_hash)
|
|
598
|
+
end
|
|
599
|
+
|
|
600
|
+
def test_store_with_default_keys_and_user_defined_keys
|
|
601
|
+
alignment = Alignment.new
|
|
602
|
+
alignment.store("cat key", Sequence::NA.new("cat"))
|
|
603
|
+
alignment.store(nil, Sequence::NA.new("cag"))
|
|
604
|
+
alignment.store("gat key", Sequence::NA.new("gat"))
|
|
605
|
+
alignment.store(nil, Sequence::NA.new("gag"))
|
|
606
|
+
assert_equal({"gat key"=>"gat",1=>"cag",3=>"gag","cat key"=>"cat"}, alignment.to_hash)
|
|
607
|
+
end
|
|
608
|
+
|
|
609
|
+
# Test append operator
|
|
610
|
+
|
|
611
|
+
def test_seqclass_when_sequence_used
|
|
612
|
+
alignment = Alignment.new
|
|
613
|
+
alignment << Sequence::NA.new("cat")
|
|
614
|
+
assert_equal({0=>"cat"}, alignment.to_hash)
|
|
615
|
+
end
|
|
616
|
+
|
|
617
|
+
# Test seqclass
|
|
618
|
+
|
|
619
|
+
def test_seqclass_when_sequence_used_no_seqclass_set
|
|
620
|
+
alignment = Alignment.new
|
|
621
|
+
alignment << Sequence::NA.new("cat")
|
|
622
|
+
assert_equal(Sequence::NA, alignment.seqclass)
|
|
623
|
+
end
|
|
624
|
+
|
|
625
|
+
def test_seqclass_String_seq_not_present_no_seqclass_set
|
|
626
|
+
alignment = Alignment.new
|
|
627
|
+
alignment << nil
|
|
628
|
+
assert_equal(String, alignment.seqclass)
|
|
629
|
+
end
|
|
630
|
+
|
|
631
|
+
def test_seqclass_when_seqclass_set
|
|
632
|
+
alignment = Alignment.new
|
|
633
|
+
alignment.seqclass = Fixnum
|
|
634
|
+
alignment << "this doesn't really make sense"
|
|
635
|
+
assert_equal(Fixnum, alignment.seqclass)
|
|
636
|
+
end
|
|
637
|
+
|
|
638
|
+
# Alignment#gap_char
|
|
639
|
+
|
|
640
|
+
def test_default_gap_char
|
|
641
|
+
alignment = Alignment.new
|
|
642
|
+
assert_equal("-", alignment.gap_char)
|
|
643
|
+
end
|
|
644
|
+
|
|
645
|
+
def test_set_and_get_gap_char
|
|
646
|
+
alignment = Alignment.new
|
|
647
|
+
alignment.gap_char = "+"
|
|
648
|
+
assert_equal("+", alignment.gap_char)
|
|
649
|
+
end
|
|
650
|
+
|
|
651
|
+
# Alignment#gap_regexp
|
|
652
|
+
|
|
653
|
+
def test_default_gap_regexp_matches_default_gap_char
|
|
654
|
+
alignment = Alignment.new
|
|
655
|
+
assert(alignment.gap_regexp.match(alignment.gap_char))
|
|
656
|
+
end
|
|
657
|
+
|
|
658
|
+
# Alignment#missing_char
|
|
659
|
+
|
|
660
|
+
def test_default_missing_char
|
|
661
|
+
alignment = Alignment.new
|
|
662
|
+
assert_equal("?", alignment.missing_char)
|
|
663
|
+
end
|
|
664
|
+
|
|
665
|
+
# Alignment#seq_length
|
|
666
|
+
|
|
667
|
+
def test_seq_length_when_one_sequence
|
|
668
|
+
alignment = build_na_alignment("agt")
|
|
669
|
+
assert_equal(3, alignment.seq_length)
|
|
670
|
+
end
|
|
671
|
+
|
|
672
|
+
def test_seq_length_is_max_seq_length
|
|
673
|
+
alignment = build_na_alignment("agt", "agtaa", "agta")
|
|
674
|
+
assert_equal(5, alignment.seq_length)
|
|
675
|
+
end
|
|
676
|
+
|
|
677
|
+
# Alignment#each_site
|
|
678
|
+
|
|
679
|
+
def test_each_site_equal_length
|
|
680
|
+
alignment = build_na_alignment("acg", "gta")
|
|
681
|
+
expected_sites = [["a", "g"], ["c", "t"], ["g", "a"]]
|
|
682
|
+
alignment.each_site do |site|
|
|
683
|
+
assert_equal expected_sites.shift, site, "site ##{3-expected_sites.size} wrong"
|
|
684
|
+
end
|
|
685
|
+
end
|
|
686
|
+
|
|
687
|
+
def test_each_site_unequal_length
|
|
688
|
+
alignment = build_na_alignment("ac", "gta")
|
|
689
|
+
expected_sites = [["a", "g"], ["c", "t"], ["-", "a"]]
|
|
690
|
+
alignment.each_site do |site|
|
|
691
|
+
assert_equal expected_sites.shift, site, "site ##{3-expected_sites.size} wrong"
|
|
692
|
+
end
|
|
693
|
+
end
|
|
694
|
+
|
|
695
|
+
#TODO: Lots of stuff needing tests here
|
|
696
|
+
|
|
697
|
+
# Alignment#add_seq
|
|
698
|
+
|
|
699
|
+
def test_add_seq_no_key
|
|
700
|
+
alignment = Alignment.new
|
|
701
|
+
alignment.add_seq("agct")
|
|
702
|
+
assert_equal(String, alignment.seqclass, "wrong class")
|
|
703
|
+
assert_equal({0=>"agct"}, alignment.to_hash, "wrong hash")
|
|
704
|
+
end
|
|
705
|
+
|
|
706
|
+
def test_add_seq_using_seq_with_seq_method
|
|
707
|
+
seq = "agtc"
|
|
708
|
+
class <<seq
|
|
709
|
+
def seq
|
|
710
|
+
Sequence::NA.new(self)
|
|
711
|
+
end
|
|
712
|
+
end
|
|
713
|
+
|
|
714
|
+
alignment = Alignment.new
|
|
715
|
+
alignment.add_seq(seq, "key")
|
|
716
|
+
assert_equal(Sequence::NA, alignment.seqclass, "wrong class")
|
|
717
|
+
assert_equal({"key"=>"agtc"}, alignment.to_hash, "wrong hash")
|
|
718
|
+
end
|
|
719
|
+
|
|
720
|
+
def test_add_seq_using_seq_with_naseq_method
|
|
721
|
+
seq = "agtc"
|
|
722
|
+
class <<seq
|
|
723
|
+
def naseq
|
|
724
|
+
Sequence::NA.new(self)
|
|
725
|
+
end
|
|
726
|
+
end
|
|
727
|
+
|
|
728
|
+
alignment = Alignment.new
|
|
729
|
+
alignment.add_seq(seq, "key")
|
|
730
|
+
assert_equal(Sequence::NA, alignment.seqclass, "wrong class")
|
|
731
|
+
assert_equal({"key"=>"agtc"}, alignment.to_hash, "wrong hash")
|
|
732
|
+
end
|
|
733
|
+
|
|
734
|
+
def test_add_seq_using_seq_with_aaseq_method
|
|
735
|
+
seq = "AVGR"
|
|
736
|
+
class <<seq
|
|
737
|
+
def aaseq
|
|
738
|
+
Sequence::AA.new(self)
|
|
739
|
+
end
|
|
740
|
+
end
|
|
741
|
+
|
|
742
|
+
alignment = Alignment.new
|
|
743
|
+
alignment.add_seq(seq, "key")
|
|
744
|
+
assert_equal(Sequence::AA, alignment.seqclass, "wrong class")
|
|
745
|
+
assert_equal({"key"=>"AVGR"}, alignment.to_hash, "wrong hash")
|
|
746
|
+
end
|
|
747
|
+
|
|
748
|
+
def test_add_seq_using_seq_with_definition_method
|
|
749
|
+
seq = "atgc"
|
|
750
|
+
class <<seq
|
|
751
|
+
def definition
|
|
752
|
+
"this is the key"
|
|
753
|
+
end
|
|
754
|
+
end
|
|
755
|
+
|
|
756
|
+
alignment = Alignment.new
|
|
757
|
+
alignment.add_seq(seq)
|
|
758
|
+
assert_equal({"this is the key"=>"atgc"}, alignment.to_hash, "wrong hash")
|
|
759
|
+
end
|
|
760
|
+
|
|
761
|
+
def test_add_seq_using_seq_with_entry_id_method
|
|
762
|
+
seq = "atgc"
|
|
763
|
+
class <<seq
|
|
764
|
+
def entry_id
|
|
765
|
+
271828
|
|
766
|
+
end
|
|
767
|
+
end
|
|
768
|
+
|
|
769
|
+
alignment = Alignment.new
|
|
770
|
+
alignment.add_seq(seq)
|
|
771
|
+
assert_equal({271828=>"atgc"}, alignment.to_hash, "wrong hash")
|
|
772
|
+
end
|
|
773
|
+
|
|
774
|
+
# Alignment#consensus_string
|
|
775
|
+
|
|
776
|
+
def test_consensus_string_no_gaps
|
|
777
|
+
alignment = build_na_alignment("agtcgattaa",
|
|
778
|
+
"tttcgatgcc")
|
|
779
|
+
assert_equal("??tcgat???", alignment.consensus_string)
|
|
780
|
+
end
|
|
781
|
+
|
|
782
|
+
def test_consensus_threshold_two_sequences
|
|
783
|
+
alignment = build_na_alignment("agtcgattaa",
|
|
784
|
+
"tttcgatgcc")
|
|
785
|
+
# the threshold is the fraction of sequences in which a symbol must
|
|
786
|
+
# occur at a given position to be considered the consensus symbol
|
|
787
|
+
assert_equal("agtcgattaa", alignment.consensus(0.5))
|
|
788
|
+
assert_equal("??tcgat???", alignment.consensus(0.500000001))
|
|
789
|
+
end
|
|
790
|
+
|
|
791
|
+
def test_consensus_threshold_four_sequences
|
|
792
|
+
alignment = build_na_alignment("agtg",
|
|
793
|
+
"ttag",
|
|
794
|
+
"actc",
|
|
795
|
+
"tatc")
|
|
796
|
+
# ties go to the symbol that occurs in the earliest sequence
|
|
797
|
+
assert_equal("agtg", alignment.consensus(0.25))
|
|
798
|
+
assert_equal("a?tg", alignment.consensus(0.26))
|
|
799
|
+
end
|
|
800
|
+
|
|
801
|
+
def test_consensus_opt_gap_mode
|
|
802
|
+
alignment = build_na_alignment("gt-gt-a",
|
|
803
|
+
"ttcggc-",
|
|
804
|
+
"ttcggc-")
|
|
805
|
+
# using threshold = 0.5, that is a symbol must occur >= half the time in order to be consensus
|
|
806
|
+
# gap_mode -1 means gaps are ignored
|
|
807
|
+
assert_equal("ttcggca", alignment.consensus(0.5, :gap_mode => -1), "gap mode -1")
|
|
808
|
+
# gap_mode 0 means gaps are treated like regular symbols, yielding a gap in the last position
|
|
809
|
+
assert_equal("ttcggc-", alignment.consensus(0.5, :gap_mode => 0), "gap mode 0")
|
|
810
|
+
# gap_mode 1 means gaps take precedence over any other symbol, yielding two more gaps
|
|
811
|
+
assert_equal("tt-gg--", alignment.consensus(0.5, :gap_mode => 1), "gap mode 1")
|
|
812
|
+
end
|
|
813
|
+
|
|
814
|
+
def test_consensus_opt_missing_char
|
|
815
|
+
alignment = build_na_alignment("agtcgattaa",
|
|
816
|
+
"tttcgatgcc")
|
|
817
|
+
assert_equal("**tcgat***", alignment.consensus(1, :missing_char => "*"))
|
|
818
|
+
end
|
|
819
|
+
|
|
820
|
+
# Alignment#consensus_iupac
|
|
821
|
+
|
|
822
|
+
def test_consensus_iupac_no_gaps
|
|
823
|
+
alignment = build_na_alignment("agtcgattaa", "tttcgatgcc")
|
|
824
|
+
assert_equal("wktcgatkmm", alignment.consensus_iupac)
|
|
825
|
+
end
|
|
826
|
+
|
|
827
|
+
def test_consensus_iupac_of_ambiguous_bases
|
|
828
|
+
alignment = build_na_alignment("tmrwsykvhdbnd", "uaaaccgaaacab")
|
|
829
|
+
assert_equal("tmrwsykvhdbnn", alignment.consensus_iupac)
|
|
830
|
+
end
|
|
831
|
+
|
|
832
|
+
def test_consensus_iupac_gap_modes
|
|
833
|
+
alignment = build_na_alignment("a-t", "acc")
|
|
834
|
+
# gap_mode -1 means gaps are ignored
|
|
835
|
+
assert_equal("acy", alignment.consensus_iupac(:gap_mode => -1))
|
|
836
|
+
# gap_mode 0 means gaps are treated as normal characters, yielding a missing symbol
|
|
837
|
+
assert_equal("a?y", alignment.consensus_iupac(:gap_mode => 0))
|
|
838
|
+
# gap_mode 1 means gaps take precedence over everything, yielding a gap
|
|
839
|
+
assert_equal("a-y", alignment.consensus_iupac(:gap_mode => 1))
|
|
840
|
+
end
|
|
841
|
+
|
|
842
|
+
def test_consensus_iupac_yields_correct_ambiguous_bases
|
|
843
|
+
assert_equal "t", build_na_alignment("t", "u").consensus_iupac # not really IUPAC
|
|
844
|
+
|
|
845
|
+
# m = a c
|
|
846
|
+
assert_equal "m", build_na_alignment("a", "c").consensus_iupac, "m #1"
|
|
847
|
+
assert_equal "m", build_na_alignment("m", "c").consensus_iupac, "m #2"
|
|
848
|
+
assert_equal "m", build_na_alignment("a", "m").consensus_iupac, "m #3"
|
|
849
|
+
assert_equal "m", build_na_alignment("m", "a", "c").consensus_iupac, "m #4"
|
|
850
|
+
|
|
851
|
+
# r = a g
|
|
852
|
+
assert_equal "r", build_na_alignment("a", "g").consensus_iupac, "r #1"
|
|
853
|
+
assert_equal "r", build_na_alignment("r", "g").consensus_iupac, "r #2"
|
|
854
|
+
assert_equal "r", build_na_alignment("a", "r").consensus_iupac, "r #3"
|
|
855
|
+
assert_equal "r", build_na_alignment("a", "r", "g").consensus_iupac, "r #4"
|
|
856
|
+
|
|
857
|
+
# w = a t/u
|
|
858
|
+
assert_equal "w", build_na_alignment("a", "t").consensus_iupac, "w #1"
|
|
859
|
+
assert_equal "w", build_na_alignment("a", "u").consensus_iupac, "w #2"
|
|
860
|
+
assert_equal "w", build_na_alignment("w", "a").consensus_iupac, "w #3"
|
|
861
|
+
assert_equal "w", build_na_alignment("t", "w").consensus_iupac, "w #4"
|
|
862
|
+
assert_equal "w", build_na_alignment("w", "u").consensus_iupac, "w #5"
|
|
863
|
+
assert_equal "w", build_na_alignment("u", "t", "a").consensus_iupac, "w #6"
|
|
864
|
+
assert_equal "w", build_na_alignment("w", "u", "t", "a").consensus_iupac, "w #7"
|
|
865
|
+
|
|
866
|
+
# s = c g
|
|
867
|
+
assert_equal "s", build_na_alignment("c", "g").consensus_iupac, "s #1"
|
|
868
|
+
assert_equal "s", build_na_alignment("s", "g").consensus_iupac, "s #2"
|
|
869
|
+
assert_equal "s", build_na_alignment("c", "s").consensus_iupac, "s #3"
|
|
870
|
+
assert_equal "s", build_na_alignment("c", "s", "g").consensus_iupac, "s #4"
|
|
871
|
+
|
|
872
|
+
# y = c t/u
|
|
873
|
+
assert_equal "y", build_na_alignment("c", "t").consensus_iupac, "y #1"
|
|
874
|
+
assert_equal "y", build_na_alignment("c", "u").consensus_iupac, "y #2"
|
|
875
|
+
assert_equal "y", build_na_alignment("y", "c").consensus_iupac, "y #3"
|
|
876
|
+
assert_equal "y", build_na_alignment("t", "y").consensus_iupac, "y #4"
|
|
877
|
+
assert_equal "y", build_na_alignment("y", "u").consensus_iupac, "y #5"
|
|
878
|
+
assert_equal "y", build_na_alignment("u", "t", "c").consensus_iupac, "y #6"
|
|
879
|
+
assert_equal "y", build_na_alignment("y", "u", "t", "c").consensus_iupac, "y #7"
|
|
880
|
+
|
|
881
|
+
# k = g t/u
|
|
882
|
+
assert_equal "k", build_na_alignment("g", "t").consensus_iupac, "k #1"
|
|
883
|
+
assert_equal "k", build_na_alignment("g", "u").consensus_iupac, "k #2"
|
|
884
|
+
assert_equal "k", build_na_alignment("k", "g").consensus_iupac, "k #3"
|
|
885
|
+
assert_equal "k", build_na_alignment("t", "k").consensus_iupac, "k #4"
|
|
886
|
+
assert_equal "k", build_na_alignment("k", "u").consensus_iupac, "k #5"
|
|
887
|
+
assert_equal "k", build_na_alignment("u", "t", "g").consensus_iupac, "k #6"
|
|
888
|
+
assert_equal "k", build_na_alignment("k", "u", "t", "g").consensus_iupac, "k #7"
|
|
889
|
+
|
|
890
|
+
# v = a c g m r s
|
|
891
|
+
assert_equal "v", build_na_alignment("a", "c", "g").consensus_iupac, "v #1"
|
|
892
|
+
assert_equal "v", build_na_alignment("g", "m").consensus_iupac, "v #2"
|
|
893
|
+
assert_equal "v", build_na_alignment("a", "s").consensus_iupac, "v #3"
|
|
894
|
+
assert_equal "v", build_na_alignment("c", "r").consensus_iupac, "v #4"
|
|
895
|
+
assert_equal "v", build_na_alignment("m", "s").consensus_iupac, "v #5"
|
|
896
|
+
assert_equal "v", build_na_alignment("m", "r").consensus_iupac, "v #6"
|
|
897
|
+
assert_equal "v", build_na_alignment("s", "r").consensus_iupac, "v #7"
|
|
898
|
+
assert_equal "v", build_na_alignment("s", "r", "m").consensus_iupac, "v #8"
|
|
899
|
+
assert_equal "v", build_na_alignment("s", "r", "m", "a", "c", "g").consensus_iupac, "v #9"
|
|
900
|
+
assert_equal "v", build_na_alignment("v", "g").consensus_iupac, "v #10" # alright, enough
|
|
901
|
+
|
|
902
|
+
# b = t/u c g s y k
|
|
903
|
+
assert_equal "b", build_na_alignment("t", "c", "g").consensus_iupac, "b #1"
|
|
904
|
+
assert_equal "b", build_na_alignment("g", "y").consensus_iupac, "b #2"
|
|
905
|
+
assert_equal "b", build_na_alignment("t", "s").consensus_iupac, "b #3"
|
|
906
|
+
assert_equal "b", build_na_alignment("c", "k").consensus_iupac, "b #4"
|
|
907
|
+
assert_equal "b", build_na_alignment("y", "s").consensus_iupac, "b #5"
|
|
908
|
+
assert_equal "b", build_na_alignment("y", "k").consensus_iupac, "b #6"
|
|
909
|
+
assert_equal "b", build_na_alignment("s", "k").consensus_iupac, "b #7"
|
|
910
|
+
assert_equal "b", build_na_alignment("s", "k", "y").consensus_iupac, "b #8"
|
|
911
|
+
assert_equal "b", build_na_alignment("s", "k", "y", "u", "c", "g").consensus_iupac, "b #9"
|
|
912
|
+
assert_equal "b", build_na_alignment("b", "g").consensus_iupac, "b #10"
|
|
913
|
+
|
|
914
|
+
# h = t/u c a y w m
|
|
915
|
+
assert_equal "h", build_na_alignment("t", "c", "a").consensus_iupac, "h #1"
|
|
916
|
+
assert_equal "h", build_na_alignment("a", "y").consensus_iupac, "h #2"
|
|
917
|
+
assert_equal "h", build_na_alignment("c", "w").consensus_iupac, "h #3"
|
|
918
|
+
assert_equal "h", build_na_alignment("u", "m").consensus_iupac, "h #4"
|
|
919
|
+
assert_equal "h", build_na_alignment("y", "w").consensus_iupac, "h #5"
|
|
920
|
+
assert_equal "h", build_na_alignment("y", "m").consensus_iupac, "h #6"
|
|
921
|
+
assert_equal "h", build_na_alignment("y", "w").consensus_iupac, "h #7"
|
|
922
|
+
assert_equal "h", build_na_alignment("w", "m", "y").consensus_iupac, "h #8"
|
|
923
|
+
assert_equal "h", build_na_alignment("w", "m", "y", "t", "c", "a").consensus_iupac, "h #9"
|
|
924
|
+
assert_equal "h", build_na_alignment("h", "t").consensus_iupac, "h #10"
|
|
925
|
+
|
|
926
|
+
# d = t/u g a r w k
|
|
927
|
+
assert_equal "d", build_na_alignment("t", "g", "a").consensus_iupac, "d #1"
|
|
928
|
+
assert_equal "d", build_na_alignment("r", "t").consensus_iupac, "d #2"
|
|
929
|
+
assert_equal "d", build_na_alignment("w", "g").consensus_iupac, "d #3"
|
|
930
|
+
assert_equal "d", build_na_alignment("k", "a").consensus_iupac, "d #4"
|
|
931
|
+
assert_equal "d", build_na_alignment("k", "r").consensus_iupac, "d #5"
|
|
932
|
+
assert_equal "d", build_na_alignment("k", "w").consensus_iupac, "d #6"
|
|
933
|
+
assert_equal "d", build_na_alignment("r", "w").consensus_iupac, "d #7"
|
|
934
|
+
assert_equal "d", build_na_alignment("r", "w", "k").consensus_iupac, "d #8"
|
|
935
|
+
assert_equal "d", build_na_alignment("k", "r", "w", "t", "g", "a").consensus_iupac, "d #9"
|
|
936
|
+
assert_equal "d", build_na_alignment("d", "t").consensus_iupac, "d #10"
|
|
937
|
+
|
|
938
|
+
# n = anything
|
|
939
|
+
assert_equal "n", build_na_alignment("a", "g", "c", "t").consensus_iupac, "n #1"
|
|
940
|
+
assert_equal "n", build_na_alignment("a", "g", "c", "u").consensus_iupac, "n #2"
|
|
941
|
+
assert_equal "n", build_na_alignment("w", "s").consensus_iupac, "n #3"
|
|
942
|
+
assert_equal "n", build_na_alignment("k", "m").consensus_iupac, "n #4"
|
|
943
|
+
assert_equal "n", build_na_alignment("r", "y").consensus_iupac, "n #5"
|
|
944
|
+
end
|
|
945
|
+
|
|
946
|
+
def test_consensus_iupac_missing_char
|
|
947
|
+
alignment = build_na_alignment("a??", "ac?")
|
|
948
|
+
assert_equal("a??", alignment.consensus_iupac())
|
|
949
|
+
end
|
|
950
|
+
|
|
951
|
+
def test_consensus_iupac_missing_char_option
|
|
952
|
+
alignment = build_na_alignment("a**t", "ac**")
|
|
953
|
+
assert_equal("a***", alignment.consensus_iupac(:missing_char => "*"))
|
|
954
|
+
end
|
|
955
|
+
|
|
956
|
+
# Alignment#convert_match
|
|
957
|
+
|
|
958
|
+
def test_convert_match
|
|
959
|
+
alignment = Alignment.new
|
|
960
|
+
alignment << Sequence::NA.new("agtcgattaa")
|
|
961
|
+
alignment << Sequence::NA.new("tttcgatgcc")
|
|
962
|
+
match = alignment.convert_match
|
|
963
|
+
assert_equal(alignment[0], match[0], "first sequence altered")
|
|
964
|
+
assert_equal("tt.....gcc", match[1], "wrong match")
|
|
965
|
+
end
|
|
966
|
+
|
|
967
|
+
# Alignment#convert_unmatch
|
|
968
|
+
|
|
969
|
+
def test_convert_unmatch
|
|
970
|
+
alignment = Alignment.new
|
|
971
|
+
alignment << Sequence::NA.new("agtcgattaa")
|
|
972
|
+
alignment << Sequence::NA.new("tt.....gcc")
|
|
973
|
+
unmatched = alignment.convert_unmatch
|
|
974
|
+
assert_equal("agtcgattaa", unmatched[0], "first changed")
|
|
975
|
+
assert_equal("tttcgatgcc", unmatched[1], "second wrong")
|
|
976
|
+
end
|
|
977
|
+
|
|
978
|
+
def test_convert_unmatch_multiple_sequences
|
|
979
|
+
alignment = Alignment.new
|
|
980
|
+
alignment << Sequence::NA.new("agtcgattaa")
|
|
981
|
+
alignment << Sequence::NA.new("tt.....gcc")
|
|
982
|
+
alignment << Sequence::NA.new("c...c..g.c")
|
|
983
|
+
unmatched = alignment.convert_unmatch
|
|
984
|
+
assert_equal("agtcgattaa", unmatched[0], "first changed")
|
|
985
|
+
assert_equal("tttcgatgcc", unmatched[1], "second wrong")
|
|
986
|
+
assert_equal("cgtccatgac", unmatched[2], "third wrong")
|
|
987
|
+
end
|
|
988
|
+
|
|
989
|
+
def test_convert_unmatch_different_length_sequences_truncates_seq_if_last_matched
|
|
990
|
+
alignment = Alignment.new
|
|
991
|
+
alignment << Sequence::NA.new("agtcgatta")
|
|
992
|
+
alignment << Sequence::NA.new("tt.....gc.")
|
|
993
|
+
unmatched = alignment.convert_unmatch
|
|
994
|
+
assert_equal("agtcgatta", unmatched[0], "first changed")
|
|
995
|
+
assert_equal("tttcgatgc", unmatched[1], "second wrong") #TODO: verify this is correct, and not . at end
|
|
996
|
+
end
|
|
997
|
+
|
|
998
|
+
def test_convert_unmatch_different_match_char
|
|
999
|
+
alignment = Alignment.new
|
|
1000
|
+
alignment << Sequence::NA.new("agtcga")
|
|
1001
|
+
alignment << Sequence::NA.new("tt====")
|
|
1002
|
+
unmatched = alignment.convert_unmatch('=')
|
|
1003
|
+
assert_equal("agtcga", unmatched[0], "first changed")
|
|
1004
|
+
assert_equal("tttcga", unmatched[1], "second wrong")
|
|
1005
|
+
end
|
|
1006
|
+
|
|
1007
|
+
# Alignment#match_line
|
|
1008
|
+
|
|
1009
|
+
def test_match_line_protein
|
|
1010
|
+
alignment = Alignment.new
|
|
1011
|
+
alignment << Sequence::AA.new("AELFMCF")
|
|
1012
|
+
alignment << Sequence::AA.new("AKLVNNF")
|
|
1013
|
+
assert_equal "*:*. *", alignment.match_line
|
|
1014
|
+
end
|
|
1015
|
+
|
|
1016
|
+
#TODO: lots more on the consensus, match, etc.
|
|
1017
|
+
|
|
1018
|
+
# Alignment#normalize
|
|
1019
|
+
|
|
1020
|
+
def test_normalizebang_extends_sequences_with_gaps
|
|
1021
|
+
alignment = build_na_alignment("a", "ag", "agc", "agct")
|
|
1022
|
+
alignment.normalize!
|
|
1023
|
+
assert_equal({0=>"a---",1=>"ag--",2=>"agc-",3=>"agct"}, alignment.to_hash)
|
|
1024
|
+
end
|
|
1025
|
+
|
|
1026
|
+
# Alignment#to_clustal
|
|
1027
|
+
end
|
|
1028
|
+
end
|