bio 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# Copyright:: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
|
|
5
5
|
# License:: The Ruby License
|
|
6
6
|
#
|
|
7
|
-
# $Id: test_embl.rb,v 1.5
|
|
7
|
+
# $Id: test_embl.rb,v 1.5.2.1 2008/02/20 09:56:22 aerts Exp $
|
|
8
8
|
#
|
|
9
9
|
|
|
10
10
|
require 'pathname'
|
|
@@ -150,7 +150,7 @@ module Bio
|
|
|
150
150
|
|
|
151
151
|
# Bio::EMBLDB::COMMON#references
|
|
152
152
|
def test_references
|
|
153
|
-
assert_equal(
|
|
153
|
+
assert_equal(Array, @obj.references.class)
|
|
154
154
|
end
|
|
155
155
|
|
|
156
156
|
# Bio::EMBLDB::COMMON#dr
|
|
@@ -163,7 +163,7 @@ module Bio
|
|
|
163
163
|
end
|
|
164
164
|
|
|
165
165
|
def test_ft
|
|
166
|
-
assert_equal(
|
|
166
|
+
assert_equal(Array, @obj.ft.class)
|
|
167
167
|
end
|
|
168
168
|
|
|
169
169
|
def test_ft_iterator
|
|
@@ -173,7 +173,7 @@ module Bio
|
|
|
173
173
|
end
|
|
174
174
|
|
|
175
175
|
def test_ft_accessor
|
|
176
|
-
assert_equal('CDS', @obj.ft
|
|
176
|
+
assert_equal('CDS', @obj.ft[1].feature)
|
|
177
177
|
end
|
|
178
178
|
|
|
179
179
|
def test_each_cds
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# Copyright:: Copyright (C) 2007 Mitsuteru Nakao <n@bioruby.org>
|
|
5
5
|
# License:: The Ruby License
|
|
6
6
|
#
|
|
7
|
-
# $Id: test_embl_rel89.rb,v 1.2
|
|
7
|
+
# $Id: test_embl_rel89.rb,v 1.2.2.1 2008/02/20 09:56:22 aerts Exp $
|
|
8
8
|
#
|
|
9
9
|
|
|
10
10
|
require 'pathname'
|
|
@@ -16,7 +16,7 @@ require 'test/unit'
|
|
|
16
16
|
require 'bio/db/embl/embl'
|
|
17
17
|
|
|
18
18
|
module Bio
|
|
19
|
-
class
|
|
19
|
+
class TestEMBL89 < Test::Unit::TestCase
|
|
20
20
|
|
|
21
21
|
def setup
|
|
22
22
|
bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
|
|
@@ -155,7 +155,7 @@ module Bio
|
|
|
155
155
|
|
|
156
156
|
# Bio::EMBLDB::COMMON#references
|
|
157
157
|
def test_references
|
|
158
|
-
assert_equal(
|
|
158
|
+
assert_equal(Array, @obj.references.class)
|
|
159
159
|
end
|
|
160
160
|
|
|
161
161
|
# Bio::EMBLDB::COMMON#dr
|
|
@@ -168,7 +168,7 @@ module Bio
|
|
|
168
168
|
end
|
|
169
169
|
|
|
170
170
|
def test_ft
|
|
171
|
-
assert_equal(
|
|
171
|
+
assert_equal(Array, @obj.ft.class)
|
|
172
172
|
end
|
|
173
173
|
|
|
174
174
|
def test_ft_iterator
|
|
@@ -178,7 +178,7 @@ module Bio
|
|
|
178
178
|
end
|
|
179
179
|
|
|
180
180
|
def test_ft_accessor
|
|
181
|
-
assert_equal('CDS', @obj.ft
|
|
181
|
+
assert_equal('CDS', @obj.ft[1].feature)
|
|
182
182
|
end
|
|
183
183
|
|
|
184
184
|
def test_each_cds
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
#
|
|
2
|
+
# test/unit/bio/db/embl/test_embl.rb - Unit test for Bio::EMBL
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2005, 2008
|
|
5
|
+
# Mitsuteru Nakao <n@bioruby.org>
|
|
6
|
+
# Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
|
7
|
+
# License:: The Ruby License
|
|
8
|
+
#
|
|
9
|
+
# $Id:$
|
|
10
|
+
#
|
|
11
|
+
|
|
12
|
+
require 'pathname'
|
|
13
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
|
|
14
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
|
15
|
+
|
|
16
|
+
require 'test/unit'
|
|
17
|
+
require 'bio'
|
|
18
|
+
require 'bio/db/embl/embl'
|
|
19
|
+
|
|
20
|
+
module Bio
|
|
21
|
+
class TestEMBLToBioSequence < Test::Unit::TestCase
|
|
22
|
+
|
|
23
|
+
def setup
|
|
24
|
+
bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
|
|
25
|
+
input = File.open(File.join(bioruby_root, 'test', 'data', 'embl', 'AB090716.embl.rel89')).read
|
|
26
|
+
embl_object = Bio::EMBL.new(input)
|
|
27
|
+
embl_object.instance_eval { @data['OS'] = "Haplochromis sp. 'muzu rukwa'" }
|
|
28
|
+
@bio_seq = embl_object.to_biosequence
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def test_entry_id
|
|
32
|
+
assert_equal('AB090716', @bio_seq.entry_id)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def test_primary_accession
|
|
36
|
+
assert_equal('AB090716', @bio_seq.primary_accession)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def test_secondary_accessions
|
|
40
|
+
assert_equal([], @bio_seq.secondary_accessions)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def test_molecule_type
|
|
44
|
+
assert_equal('genomic DNA', @bio_seq.molecule_type)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def test_definition
|
|
48
|
+
assert_equal("Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive opsin, partial cds, specimen_voucher:specimen No. HT-9361.", @bio_seq.definition)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def test_topology
|
|
52
|
+
assert_equal('linear', @bio_seq.topology)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def test_date_created
|
|
56
|
+
# '25-OCT-2002 (Rel. 73, Created)'
|
|
57
|
+
assert_equal(Date.parse('25-OCT-2002'), @bio_seq.date_created)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def test_date_modified
|
|
61
|
+
# '14-NOV-2006 (Rel. 89, Last updated, Version 3)'
|
|
62
|
+
assert_equal(Date.parse('14-NOV-2006'), @bio_seq.date_modified)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def test_release_created
|
|
66
|
+
assert_equal('73', @bio_seq.release_created)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def test_release_modified
|
|
70
|
+
assert_equal('89', @bio_seq.release_modified)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def test_entry_version
|
|
74
|
+
assert_equal('3', @bio_seq.entry_version)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def test_division
|
|
78
|
+
assert_equal('VRT', @bio_seq.division)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def test_sequence_version
|
|
82
|
+
assert_equal(1, @bio_seq.sequence_version)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def test_keywords
|
|
86
|
+
assert_equal([], @bio_seq.keywords)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def test_species
|
|
90
|
+
assert_equal("Haplochromis sp. 'muzu, rukwa'", @bio_seq.species)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def test_classification
|
|
94
|
+
assert_equal(['Eukaryota','Metazoa','Chordata','Craniata','Vertebrata','Euteleostomi','Actinopterygii','Neopterygii','Teleostei','Euteleostei','Neoteleostei','Acanthomorpha','Acanthopterygii','Percomorpha','Perciformes','Labroidei','Cichlidae','African cichlids','Pseudocrenilabrinae','Haplochromini','Haplochromis'], @bio_seq.classification)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def test_references
|
|
100
|
+
assert_equal(2, @bio_seq.references.length)
|
|
101
|
+
assert_equal(Bio::Reference, @bio_seq.references[0].class)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def test_features
|
|
105
|
+
assert_equal(3, @bio_seq.features.length)
|
|
106
|
+
assert_equal(Bio::Feature, @bio_seq.features[0].class)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# To really test the Bio::EMBL to Bio::Sequence conversion, we need to test if
|
|
112
|
+
# that Bio::Sequence can be made into a valid Bio::EMBL again.
|
|
113
|
+
class TestEMBLToBioSequenceRoundTrip < Test::Unit::TestCase
|
|
114
|
+
def setup
|
|
115
|
+
bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
|
|
116
|
+
input = File.open(File.join(bioruby_root, 'test', 'data', 'embl', 'AB090716.embl.rel89')).read
|
|
117
|
+
embl_object_1 = Bio::EMBL.new(input)
|
|
118
|
+
embl_object_1.instance_eval { @data['OS'] = "Haplochromis sp. 'muzu rukwa'" }
|
|
119
|
+
@bio_seq_1 = embl_object_1.to_biosequence
|
|
120
|
+
embl_object_2 = Bio::EMBL.new(@bio_seq_1.output(:embl))
|
|
121
|
+
@bio_seq_2 = embl_object_2.to_biosequence
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def test_entry_id
|
|
125
|
+
assert_equal('AB090716', @bio_seq_2.entry_id)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def test_primary_accession
|
|
129
|
+
assert_equal('AB090716', @bio_seq_2.primary_accession)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def test_secondary_accessions
|
|
133
|
+
assert_equal([], @bio_seq_2.secondary_accessions)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def test_molecule_type
|
|
137
|
+
assert_equal('genomic DNA', @bio_seq_2.molecule_type)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def test_definition
|
|
141
|
+
assert_equal("Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive opsin, partial cds, specimen_voucher:specimen No. HT-9361.", @bio_seq_2.definition)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def test_topology
|
|
145
|
+
assert_equal('linear', @bio_seq_2.topology)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def test_date_created
|
|
149
|
+
# '25-OCT-2002 (Rel. 73, Created)'
|
|
150
|
+
assert_equal(Date.parse('25-OCT-2002'), @bio_seq_2.date_created)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def test_date_modified
|
|
154
|
+
# '14-NOV-2006 (Rel. 89, Last updated, Version 3)'
|
|
155
|
+
assert_equal(Date.parse('14-NOV-2006'), @bio_seq_2.date_modified)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def test_release_created
|
|
159
|
+
assert_equal('73', @bio_seq_2.release_created)
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def test_release_modified
|
|
163
|
+
assert_equal('89', @bio_seq_2.release_modified)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def test_entry_version
|
|
167
|
+
assert_equal('3', @bio_seq_2.entry_version)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def test_division
|
|
171
|
+
assert_equal('VRT', @bio_seq_2.division)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def test_sequence_version
|
|
175
|
+
assert_equal(1, @bio_seq_2.sequence_version)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def test_keywords
|
|
179
|
+
assert_equal([], @bio_seq_2.keywords)
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def test_species
|
|
183
|
+
assert_equal("Haplochromis sp. 'muzu, rukwa'", @bio_seq_2.species)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def test_classification
|
|
187
|
+
assert_equal(['Eukaryota','Metazoa','Chordata','Craniata','Vertebrata','Euteleostomi','Actinopterygii','Neopterygii','Teleostei','Euteleostei','Neoteleostei','Acanthomorpha','Acanthopterygii','Percomorpha','Perciformes','Labroidei','Cichlidae','African cichlids','Pseudocrenilabrinae','Haplochromini','Haplochromis'], @bio_seq_2.classification)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def test_references
|
|
193
|
+
assert_equal(2, @bio_seq_2.references.length)
|
|
194
|
+
assert_equal(Bio::Reference, @bio_seq_2.references[0].class)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def test_features
|
|
198
|
+
assert_equal(3, @bio_seq_2.features.length)
|
|
199
|
+
assert_equal(Bio::Feature, @bio_seq_2.features[0].class)
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# Copyright::: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
|
|
5
5
|
# License:: The Ruby License
|
|
6
6
|
#
|
|
7
|
-
# $Id
|
|
7
|
+
# $Id:$
|
|
8
8
|
#
|
|
9
9
|
|
|
10
10
|
require 'pathname'
|
|
@@ -78,6 +78,43 @@ module Bio
|
|
|
78
78
|
assert_equal('P04637', @obj.accession)
|
|
79
79
|
end
|
|
80
80
|
|
|
81
|
+
def test_dr
|
|
82
|
+
assert_equal(17, @obj.dr.size)
|
|
83
|
+
assert_equal(27, @obj.dr['GO'].size)
|
|
84
|
+
assert_equal([["IPR002117", "P53"],
|
|
85
|
+
["IPR011615", "P53_DNA_bd"],
|
|
86
|
+
["IPR012346", "P53_RUNT_DNA_bd"],
|
|
87
|
+
["IPR010991", "p53_tetrameristn"]],
|
|
88
|
+
@obj.dr['InterPro'])
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def test_dr_with_key
|
|
92
|
+
pfam = [
|
|
93
|
+
{ " " => "1",
|
|
94
|
+
"Version" => "P53",
|
|
95
|
+
"Accession" => "PF00870",
|
|
96
|
+
"Molecular Type" => nil
|
|
97
|
+
},
|
|
98
|
+
{ " " => "1",
|
|
99
|
+
"Version" => "P53_tetramer",
|
|
100
|
+
"Accession" => "PF07710",
|
|
101
|
+
"Molecular Type" => nil
|
|
102
|
+
}
|
|
103
|
+
]
|
|
104
|
+
assert_equal(pfam, @obj.dr('Pfam'))
|
|
105
|
+
embl3 = {
|
|
106
|
+
" " => "JOINED",
|
|
107
|
+
"Version" => "AAA59987.1",
|
|
108
|
+
"Accession" => "M13113",
|
|
109
|
+
"Molecular Type" => "Genomic_DNA"
|
|
110
|
+
}
|
|
111
|
+
assert_equal(embl3, @obj.dr('EMBL')[3])
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def test_dr_with_key_empty
|
|
115
|
+
assert_equal([], @obj.dr('NOT_A_DATABASE'))
|
|
116
|
+
end
|
|
117
|
+
|
|
81
118
|
def test_dt
|
|
82
119
|
assert(@obj.dt)
|
|
83
120
|
end
|
|
@@ -6,11 +6,11 @@
|
|
|
6
6
|
#
|
|
7
7
|
# License:: The Ruby License
|
|
8
8
|
#
|
|
9
|
-
# $Id
|
|
9
|
+
# $Id:$
|
|
10
10
|
#
|
|
11
11
|
|
|
12
12
|
require 'pathname'
|
|
13
|
-
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] *
|
|
13
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
|
|
14
14
|
$:.unshift(libpath) unless $:.include?(libpath)
|
|
15
15
|
|
|
16
16
|
require 'test/unit'
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
#
|
|
2
2
|
# test/unit/bio/db/test_gff.rb - Unit test for Bio::GFF
|
|
3
3
|
#
|
|
4
|
-
# Copyright:: Copyright (C) 2005
|
|
4
|
+
# Copyright:: Copyright (C) 2005, 2008
|
|
5
|
+
# Mitsuteru Nakao <n@bioruby.org>
|
|
6
|
+
# Naohisa Goto <ng@bioruby.org>
|
|
5
7
|
# License:: The Ruby License
|
|
6
8
|
#
|
|
7
|
-
# $Id
|
|
9
|
+
# $Id:$
|
|
8
10
|
#
|
|
9
11
|
|
|
10
12
|
require 'pathname'
|
|
@@ -12,13 +14,14 @@ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cle
|
|
|
12
14
|
$:.unshift(libpath) unless $:.include?(libpath)
|
|
13
15
|
|
|
14
16
|
require 'test/unit'
|
|
17
|
+
require 'digest/sha1'
|
|
15
18
|
require 'bio/db/gff'
|
|
16
19
|
|
|
17
20
|
module Bio
|
|
18
21
|
class TestGFF < Test::Unit::TestCase
|
|
19
22
|
|
|
20
23
|
def setup
|
|
21
|
-
data = <<
|
|
24
|
+
data = <<END_OF_DATA
|
|
22
25
|
I sgd CEN 151453 151591 . + . CEN "CEN1" ; Note "CEN1\; Chromosome I Centromere"
|
|
23
26
|
I sgd gene 147591 151163 . - . Gene "TFC3" ; Note "transcription factor tau (TFIIIC) subunit 138"
|
|
24
27
|
I sgd gene 147591 151163 . - . Gene "FUN24" ; Note "transcription factor tau (TFIIIC) subunit 138"
|
|
@@ -27,7 +30,7 @@ I sgd ORF 147591 151163 . - . ORF "YAL001C" ; Note "TFC3\; transcription factor
|
|
|
27
30
|
I sgd gene 143998 147528 . + . Gene "VPS8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
|
|
28
31
|
I sgd gene 143998 147528 . + . Gene "FUN15" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
|
|
29
32
|
I sgd gene 143998 147528 . + . Gene "VPT8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
|
|
30
|
-
|
|
33
|
+
END_OF_DATA
|
|
31
34
|
@obj = Bio::GFF.new(data)
|
|
32
35
|
end
|
|
33
36
|
|
|
@@ -41,27 +44,12 @@ END
|
|
|
41
44
|
|
|
42
45
|
end # class TestGFF
|
|
43
46
|
|
|
44
|
-
|
|
45
|
-
class TestGFF2 < Test::Unit::TestCase
|
|
46
|
-
def test_version
|
|
47
|
-
assert_equal(2, Bio::GFF::GFF2::VERSION)
|
|
48
|
-
end
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
class TestGFF3 < Test::Unit::TestCase
|
|
53
|
-
def test_version
|
|
54
|
-
assert_equal(3, Bio::GFF::GFF3::VERSION)
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
|
|
59
47
|
class TestGFFRecord < Test::Unit::TestCase
|
|
60
48
|
|
|
61
49
|
def setup
|
|
62
|
-
data =<<
|
|
50
|
+
data =<<END_OF_DATA
|
|
63
51
|
I sgd gene 151453 151591 . + . Gene "CEN1" ; Note "Chromosome I Centromere"
|
|
64
|
-
|
|
52
|
+
END_OF_DATA
|
|
65
53
|
@obj = Bio::GFF::Record.new(data)
|
|
66
54
|
end
|
|
67
55
|
|
|
@@ -102,13 +90,12 @@ END
|
|
|
102
90
|
assert_equal(at, @obj.attributes)
|
|
103
91
|
end
|
|
104
92
|
|
|
105
|
-
def
|
|
106
|
-
assert_equal(nil, @obj.
|
|
93
|
+
def test_comment
|
|
94
|
+
assert_equal(nil, @obj.comment)
|
|
107
95
|
end
|
|
108
96
|
|
|
109
97
|
end # class TestGFFRecord
|
|
110
98
|
|
|
111
|
-
|
|
112
99
|
class TestGFFRecordConstruct < Test::Unit::TestCase
|
|
113
100
|
|
|
114
101
|
def setup
|
|
@@ -124,4 +111,1143 @@ END
|
|
|
124
111
|
end
|
|
125
112
|
|
|
126
113
|
end # class TestGFFRecordConstruct
|
|
127
|
-
|
|
114
|
+
|
|
115
|
+
class TestGFF2 < Test::Unit::TestCase
|
|
116
|
+
def setup
|
|
117
|
+
data = <<END_OF_DATA
|
|
118
|
+
##gff-version 2
|
|
119
|
+
##date 2008-09-22
|
|
120
|
+
I sgd CEN 151453 151591 . + . CEN "CEN1" ; Note "CEN1; Chromosome I Centromere"
|
|
121
|
+
I sgd gene 147591 151163 . - . Gene "TFC3" ; Note "transcription factor tau (TFIIIC) subunit 138"
|
|
122
|
+
I sgd gene 147591 151163 . - . Gene "FUN24" ; Note "transcription factor tau (TFIIIC) subunit 138"
|
|
123
|
+
I sgd gene 147591 151163 . - . Gene "TSV115" ; Note "transcription factor tau (TFIIIC) subunit 138"
|
|
124
|
+
I sgd ORF 147591 151163 . - . ORF "YAL001C" ; Note "TFC3; transcription factor tau (TFIIIC) subunit 138"
|
|
125
|
+
I sgd gene 143998 147528 . + . Gene "VPS8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
|
|
126
|
+
I sgd gene 143998 147528 . + . Gene "FUN15" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
|
|
127
|
+
I sgd gene 143998 147528 . + . Gene "VPT8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
|
|
128
|
+
END_OF_DATA
|
|
129
|
+
@obj = Bio::GFF::GFF2.new(data)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def test_const_version
|
|
133
|
+
assert_equal(2, Bio::GFF::GFF2::VERSION)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def test_gff_version
|
|
137
|
+
assert_equal('2', @obj.gff_version)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def test_metadata_size
|
|
141
|
+
assert_equal(1, @obj.metadata.size)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def test_metadata
|
|
145
|
+
assert_equal(Bio::GFF::GFF2::MetaData.new('date', '2008-09-22'),
|
|
146
|
+
@obj.metadata[0])
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def test_records_size
|
|
150
|
+
assert_equal(8, @obj.records.size)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def test_to_s
|
|
154
|
+
str = <<END_OF_DATA
|
|
155
|
+
##gff-version 2
|
|
156
|
+
##date 2008-09-22
|
|
157
|
+
I sgd CEN 151453 151591 . + . CEN CEN1 ; Note "CEN1; Chromosome I Centromere"
|
|
158
|
+
I sgd gene 147591 151163 . - . Gene TFC3 ; Note "transcription factor tau (TFIIIC) subunit 138"
|
|
159
|
+
I sgd gene 147591 151163 . - . Gene FUN24 ; Note "transcription factor tau (TFIIIC) subunit 138"
|
|
160
|
+
I sgd gene 147591 151163 . - . Gene TSV115 ; Note "transcription factor tau (TFIIIC) subunit 138"
|
|
161
|
+
I sgd ORF 147591 151163 . - . ORF YAL001C ; Note "TFC3; transcription factor tau (TFIIIC) subunit 138"
|
|
162
|
+
I sgd gene 143998 147528 . + . Gene VPS8 ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
|
|
163
|
+
I sgd gene 143998 147528 . + . Gene FUN15 ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
|
|
164
|
+
I sgd gene 143998 147528 . + . Gene VPT8 ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
|
|
165
|
+
END_OF_DATA
|
|
166
|
+
assert_equal(str, @obj.to_s)
|
|
167
|
+
end
|
|
168
|
+
end #class TestGFF2
|
|
169
|
+
|
|
170
|
+
class TestGFF2Record < Test::Unit::TestCase
|
|
171
|
+
def setup
|
|
172
|
+
str = "seq1\tBLASTX\tsimilarity\t101\t235\t87.1\t+\t0\tTarget \"HBA_HUMAN\" 11 55 ; E_value 0.0003 ; Align 101 11 ; Align 179 36 ; Comment \"Please ignore this \\\"Comment\\\" attribute; Escape \\x1a\\037 and \\\\\\t\\r\\n\\f\\b\\a\\e\\v; This is test.\" 123 4.56e-34 \"Test for freetext\" ; Note \"\"; Misc IdString; Misc \"free text\"; Misc 5678 "
|
|
173
|
+
|
|
174
|
+
@obj = Bio::GFF::GFF2::Record.new(str)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def test_to_s
|
|
178
|
+
str = "seq1\tBLASTX\tsimilarity\t101\t235\t87.1\t+\t0\tTarget HBA_HUMAN 11 55 ; E_value 0.0003 ; Align 101 11 ; Align 179 36 ; Comment \"Please ignore this \\\"Comment\\\" attribute; Escape \\032\\037 and \\\\\\t\\r\\n\\f\\b\\a\\e\\v; This is test.\" 123 4.56e-34 \"Test for freetext\" ; Note \"\" ; Misc IdString ; Misc \"free text\" ; Misc 5678\n"
|
|
179
|
+
|
|
180
|
+
assert_equal(str, @obj.to_s)
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def test_eqeq
|
|
184
|
+
obj2 = Bio::GFF::GFF2::Record.new(@obj.to_s)
|
|
185
|
+
assert_equal(true, @obj == obj2)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def test_eqeq_false
|
|
189
|
+
obj2 = Bio::GFF::GFF2::Record.new(@obj.to_s)
|
|
190
|
+
obj2.seqname = 'seq2'
|
|
191
|
+
assert_equal(false, @obj == obj2)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def test_comment_only?
|
|
195
|
+
assert_equal(false, @obj.comment_only?)
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def test_seqname
|
|
199
|
+
assert_equal('seq1', @obj.seqname)
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def test_source
|
|
203
|
+
assert_equal('BLASTX', @obj.source)
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def test_feature
|
|
207
|
+
assert_equal('similarity', @obj.feature)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def test_start
|
|
211
|
+
assert_equal(101, @obj.start)
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def test_end
|
|
215
|
+
assert_equal(235, @obj.end)
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def test_score
|
|
219
|
+
assert_equal(87.1, @obj.score)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def test_strand
|
|
223
|
+
assert_equal('+', @obj.strand)
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def test_frame
|
|
227
|
+
assert_equal(0, @obj.frame)
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def test_attributes_to_hash
|
|
231
|
+
hash = {
|
|
232
|
+
'Target' =>
|
|
233
|
+
Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']),
|
|
234
|
+
'E_value' => '0.0003',
|
|
235
|
+
'Align' =>
|
|
236
|
+
Bio::GFF::GFF2::Record::Value.new(['101', '11']),
|
|
237
|
+
'Comment' =>
|
|
238
|
+
Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]),
|
|
239
|
+
'Note' => '',
|
|
240
|
+
'Misc' => 'IdString'
|
|
241
|
+
}
|
|
242
|
+
assert_equal(hash, @obj.attributes_to_hash)
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def test_attributes
|
|
246
|
+
attributes =
|
|
247
|
+
[ [ 'Target',
|
|
248
|
+
Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']) ],
|
|
249
|
+
[ 'E_value', '0.0003' ],
|
|
250
|
+
[ 'Align',
|
|
251
|
+
Bio::GFF::GFF2::Record::Value.new(['101', '11']) ],
|
|
252
|
+
[ 'Align',
|
|
253
|
+
Bio::GFF::GFF2::Record::Value.new(['179', '36']) ],
|
|
254
|
+
[ 'Comment',
|
|
255
|
+
Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]) ],
|
|
256
|
+
[ 'Note', '' ],
|
|
257
|
+
[ 'Misc', 'IdString' ],
|
|
258
|
+
[ 'Misc', 'free text' ],
|
|
259
|
+
[ 'Misc', '5678' ]
|
|
260
|
+
]
|
|
261
|
+
assert_equal(attributes, @obj.attributes)
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
def test_attribute
|
|
265
|
+
val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55'])
|
|
266
|
+
assert_equal(val_Target, @obj.attribute('Target'))
|
|
267
|
+
assert_equal('0.0003', @obj.attribute('E_value'))
|
|
268
|
+
val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11'])
|
|
269
|
+
val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36'])
|
|
270
|
+
assert_equal(val_Align0, @obj.attribute('Align'))
|
|
271
|
+
val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"])
|
|
272
|
+
assert_equal(val_Comment, @obj.attribute('Comment'))
|
|
273
|
+
assert_equal('', @obj.attribute('Note'))
|
|
274
|
+
assert_equal('IdString', @obj.attribute('Misc'))
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
def test_attribute_nonexistent
|
|
278
|
+
assert_equal(nil, @obj.attribute('NonExistent'))
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def test_get_attribute
|
|
282
|
+
val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55'])
|
|
283
|
+
assert_equal(val_Target, @obj.get_attribute('Target'))
|
|
284
|
+
assert_equal('0.0003', @obj.get_attribute('E_value'))
|
|
285
|
+
val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11'])
|
|
286
|
+
val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36'])
|
|
287
|
+
assert_equal(val_Align0, @obj.get_attribute('Align'))
|
|
288
|
+
val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"])
|
|
289
|
+
assert_equal(val_Comment, @obj.get_attribute('Comment'))
|
|
290
|
+
assert_equal('', @obj.get_attribute('Note'))
|
|
291
|
+
assert_equal('IdString', @obj.get_attribute('Misc'))
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
def test_get_attribute_nonexistent
|
|
295
|
+
assert_equal(nil, @obj.get_attribute('NonExistent'))
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def test_get_attributes
|
|
299
|
+
val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55'])
|
|
300
|
+
assert_equal([ val_Target ], @obj.get_attributes('Target'))
|
|
301
|
+
assert_equal([ '0.0003' ], @obj.get_attributes('E_value'))
|
|
302
|
+
val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11'])
|
|
303
|
+
val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36'])
|
|
304
|
+
assert_equal([ val_Align0, val_Align1 ],
|
|
305
|
+
@obj.get_attributes('Align'))
|
|
306
|
+
val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"])
|
|
307
|
+
assert_equal([ val_Comment ], @obj.get_attributes('Comment'))
|
|
308
|
+
assert_equal([ '' ], @obj.get_attributes('Note'))
|
|
309
|
+
assert_equal([ 'IdString', 'free text', '5678' ],
|
|
310
|
+
@obj.get_attributes('Misc'))
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def test_get_attributes_nonexistent
|
|
314
|
+
assert_equal([], @obj.get_attributes('NonExistent'))
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def test_set_attribute
|
|
318
|
+
assert_equal('0.0003', @obj.attribute('E_value'))
|
|
319
|
+
assert_equal('1e-10', @obj.set_attribute('E_value', '1e-10'))
|
|
320
|
+
assert_equal('1e-10', @obj.attribute('E_value'))
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
def test_set_attribute_multiple
|
|
324
|
+
assert_equal([ 'IdString', 'free text', '5678' ],
|
|
325
|
+
@obj.get_attributes('Misc'))
|
|
326
|
+
assert_equal('Replaced',
|
|
327
|
+
@obj.set_attribute('Misc', 'Replaced'))
|
|
328
|
+
assert_equal([ 'Replaced', 'free text', '5678' ],
|
|
329
|
+
@obj.get_attributes('Misc'))
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
def test_set_attribute_nonexistent
|
|
333
|
+
assert_equal(nil, @obj.attribute('NonExistent'))
|
|
334
|
+
assert_equal('test', @obj.set_attribute('NonExistent', 'test'))
|
|
335
|
+
assert_equal('test', @obj.attribute('NonExistent'))
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def test_replace_attributes
|
|
339
|
+
assert_equal([ '0.0003' ], @obj.get_attributes('E_value'))
|
|
340
|
+
assert_equal(@obj, @obj.replace_attributes('E_value', '1e-10'))
|
|
341
|
+
assert_equal([ '1e-10' ], @obj.get_attributes('E_value'))
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def test_replace_attributes_single_multiple
|
|
345
|
+
assert_equal([ '0.0003' ], @obj.get_attributes('E_value'))
|
|
346
|
+
assert_equal(@obj, @obj.replace_attributes('E_value',
|
|
347
|
+
'1e-10', '3.14', '2.718'))
|
|
348
|
+
assert_equal([ '1e-10', '3.14', '2.718' ],
|
|
349
|
+
@obj.get_attributes('E_value'))
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
def test_replace_attributes_multiple_single
|
|
353
|
+
assert_equal([ 'IdString', 'free text', '5678' ],
|
|
354
|
+
@obj.get_attributes('Misc'))
|
|
355
|
+
assert_equal(@obj,
|
|
356
|
+
@obj.replace_attributes('Misc', 'Replaced_All'))
|
|
357
|
+
assert_equal([ 'Replaced_All' ],
|
|
358
|
+
@obj.get_attributes('Misc'))
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
def test_replace_attributes_multiple_multiple_two
|
|
362
|
+
assert_equal([ 'IdString', 'free text', '5678' ],
|
|
363
|
+
@obj.get_attributes('Misc'))
|
|
364
|
+
assert_equal(@obj,
|
|
365
|
+
@obj.replace_attributes('Misc',
|
|
366
|
+
'Replaced', 'test2'))
|
|
367
|
+
assert_equal([ 'Replaced', 'test2' ],
|
|
368
|
+
@obj.get_attributes('Misc'))
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
def test_replace_attributes_multiple_multiple_same
|
|
372
|
+
assert_equal([ 'IdString', 'free text', '5678' ],
|
|
373
|
+
@obj.get_attributes('Misc'))
|
|
374
|
+
assert_equal(@obj,
|
|
375
|
+
@obj.replace_attributes('Misc',
|
|
376
|
+
'Replaced', 'test2', 'test3'))
|
|
377
|
+
assert_equal([ 'Replaced', 'test2', 'test3' ],
|
|
378
|
+
@obj.get_attributes('Misc'))
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
def test_replace_attributes_multiple_multiple_over
|
|
382
|
+
assert_equal([ 'IdString', 'free text', '5678' ],
|
|
383
|
+
@obj.get_attributes('Misc'))
|
|
384
|
+
assert_equal(@obj,
|
|
385
|
+
@obj.replace_attributes('Misc',
|
|
386
|
+
'Replaced', 'test2', 'test3', '4'))
|
|
387
|
+
assert_equal([ 'Replaced', 'test2', 'test3', '4' ],
|
|
388
|
+
@obj.get_attributes('Misc'))
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
def test_replace_attributes_nonexistent
|
|
392
|
+
assert_equal(nil, @obj.attribute('NonExistent'))
|
|
393
|
+
assert_equal(@obj, @obj.replace_attributes('NonExistent', 'test'))
|
|
394
|
+
assert_equal([ 'test' ], @obj.get_attributes('NonExistent'))
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def test_replace_attributes_nonexistent_multiple
|
|
398
|
+
assert_equal(nil, @obj.attribute('NonExistent'))
|
|
399
|
+
assert_equal(@obj,
|
|
400
|
+
@obj.replace_attributes('NonExistent',
|
|
401
|
+
'test', 'gff2', 'attr'))
|
|
402
|
+
assert_equal([ 'test', 'gff2', 'attr' ],
|
|
403
|
+
@obj.get_attributes('NonExistent'))
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
def test_delete_attribute
|
|
407
|
+
assert_equal('0.0003', @obj.attribute('E_value'))
|
|
408
|
+
assert_equal('0.0003', @obj.delete_attribute('E_value', '0.0003'))
|
|
409
|
+
assert_equal(nil, @obj.attribute('E_value'))
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
def test_delete_attribute_nil
|
|
413
|
+
assert_equal('0.0003', @obj.attribute('E_value'))
|
|
414
|
+
assert_equal(nil, @obj.delete_attribute('E_value', '3'))
|
|
415
|
+
assert_equal('0.0003', @obj.attribute('E_value'))
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
def test_delete_attribute_multiple
|
|
419
|
+
assert_equal([ 'IdString', 'free text', '5678' ],
|
|
420
|
+
@obj.get_attributes('Misc'))
|
|
421
|
+
assert_equal('free text',
|
|
422
|
+
@obj.delete_attribute('Misc', 'free text'))
|
|
423
|
+
assert_equal([ 'IdString', '5678' ],
|
|
424
|
+
@obj.get_attributes('Misc'))
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
def test_delete_attribute_multiple2
|
|
428
|
+
assert_equal([ 'IdString', 'free text', '5678' ],
|
|
429
|
+
@obj.get_attributes('Misc'))
|
|
430
|
+
assert_equal('IdString',
|
|
431
|
+
@obj.delete_attribute('Misc', 'IdString'))
|
|
432
|
+
assert_equal([ 'free text', '5678' ],
|
|
433
|
+
@obj.get_attributes('Misc'))
|
|
434
|
+
assert_equal('5678',
|
|
435
|
+
@obj.delete_attribute('Misc', '5678'))
|
|
436
|
+
assert_equal([ 'free text' ],
|
|
437
|
+
@obj.get_attributes('Misc'))
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
def test_delete_attribute_multiple_nil
|
|
441
|
+
assert_equal([ 'IdString', 'free text', '5678' ],
|
|
442
|
+
@obj.get_attributes('Misc'))
|
|
443
|
+
assert_equal(nil,
|
|
444
|
+
@obj.delete_attribute('Misc', 'test'))
|
|
445
|
+
assert_equal([ 'IdString', 'free text', '5678' ],
|
|
446
|
+
@obj.get_attributes('Misc'))
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
def test_delete_attribute_nonexistent
|
|
450
|
+
assert_equal(nil, @obj.attribute('NonExistent'))
|
|
451
|
+
assert_equal(nil, @obj.delete_attribute('NonExistent', 'test'))
|
|
452
|
+
assert_equal([], @obj.get_attributes('NonExistent'))
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def test_delete_attributes
|
|
456
|
+
assert_equal('0.0003', @obj.attribute('E_value'))
|
|
457
|
+
assert_equal(@obj, @obj.delete_attributes('E_value'))
|
|
458
|
+
assert_equal(nil, @obj.attribute('E_value'))
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
def test_delete_attributes_multiple
|
|
462
|
+
assert_equal([ 'IdString', 'free text', '5678' ],
|
|
463
|
+
@obj.get_attributes('Misc'))
|
|
464
|
+
assert_equal(@obj, @obj.delete_attributes('Misc'))
|
|
465
|
+
assert_equal([], @obj.get_attributes('Misc'))
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
def test_delete_attributes_nonexistent
|
|
469
|
+
assert_equal(nil, @obj.attribute('NonExistent'))
|
|
470
|
+
assert_equal(nil, @obj.delete_attributes('NonExistent'))
|
|
471
|
+
assert_equal([], @obj.get_attributes('NonExistent'))
|
|
472
|
+
end
|
|
473
|
+
|
|
474
|
+
def test_sort_attributes_by_tag!
|
|
475
|
+
tags = %w( Comment Align E_value Note )
|
|
476
|
+
assert_equal(@obj, @obj.sort_attributes_by_tag!(tags))
|
|
477
|
+
assert_equal(%w( Comment Align Align E_value Note Target
|
|
478
|
+
Misc Misc Misc ),
|
|
479
|
+
@obj.attributes.collect { |x| x[0] })
|
|
480
|
+
# check if the order of 'Misc' is not changed
|
|
481
|
+
assert_equal([ 'IdString', 'free text', '5678' ],
|
|
482
|
+
@obj.get_attributes('Misc'))
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
def test_sort_attributes_by_tag_bang_test2
|
|
486
|
+
tags = %w( E_value Misc Note Target )
|
|
487
|
+
assert_equal(@obj, @obj.sort_attributes_by_tag!(tags))
|
|
488
|
+
assert_equal(%w( E_value Misc Misc Misc Note Target
|
|
489
|
+
Align Align Comment ),
|
|
490
|
+
@obj.attributes.collect { |x| x[0] })
|
|
491
|
+
# check if the order of 'Misc' is not changed
|
|
492
|
+
assert_equal([ 'IdString', 'free text', '5678' ],
|
|
493
|
+
@obj.get_attributes('Misc'))
|
|
494
|
+
end
|
|
495
|
+
|
|
496
|
+
def test_sort_attributes_by_tag_bang_with_block
|
|
497
|
+
assert_equal(@obj,
|
|
498
|
+
@obj.sort_attributes_by_tag! { |x, y|
|
|
499
|
+
x <=> y
|
|
500
|
+
})
|
|
501
|
+
assert_equal(%w( Align Align Comment E_value Misc Misc Misc
|
|
502
|
+
Note Target ),
|
|
503
|
+
@obj.attributes.collect { |x| x[0] })
|
|
504
|
+
# check if the order of 'Misc' is not changed
|
|
505
|
+
assert_equal([ 'IdString', 'free text', '5678' ],
|
|
506
|
+
@obj.get_attributes('Misc'))
|
|
507
|
+
end
|
|
508
|
+
end #class TestGFF2Record
|
|
509
|
+
|
|
510
|
+
class TestGFF2RecordEmpty < Test::Unit::TestCase
|
|
511
|
+
def setup
|
|
512
|
+
@obj = Bio::GFF::GFF2::Record.new('# test comment')
|
|
513
|
+
end
|
|
514
|
+
|
|
515
|
+
def test_comment_only?
|
|
516
|
+
assert_equal(true, @obj.comment_only?)
|
|
517
|
+
end
|
|
518
|
+
|
|
519
|
+
def test_comment_only_false
|
|
520
|
+
@obj.seqname = 'test'
|
|
521
|
+
assert_equal(false, @obj.comment_only?)
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
def test_to_s
|
|
525
|
+
assert_equal("# test comment\n", @obj.to_s)
|
|
526
|
+
end
|
|
527
|
+
|
|
528
|
+
def test_to_s_not_empty
|
|
529
|
+
@obj.seqname = 'test'
|
|
530
|
+
@obj.feature = 'region'
|
|
531
|
+
@obj.start = 1
|
|
532
|
+
@obj.end = 100
|
|
533
|
+
assert_equal("test\t.\tregion\t1\t100\t.\t.\t.\t\t# test comment\n",
|
|
534
|
+
@obj.to_s)
|
|
535
|
+
@obj.add_attribute('Gene', 'unknown')
|
|
536
|
+
assert_equal("test\t.\tregion\t1\t100\t.\t.\t.\tGene unknown\t# test comment\n",
|
|
537
|
+
@obj.to_s)
|
|
538
|
+
end
|
|
539
|
+
|
|
540
|
+
def test_comment
|
|
541
|
+
assert_equal(' test comment', @obj.comment)
|
|
542
|
+
end
|
|
543
|
+
|
|
544
|
+
def test_comment_eq
|
|
545
|
+
assert_equal('changed the comment',
|
|
546
|
+
@obj.comment = 'changed the comment')
|
|
547
|
+
end
|
|
548
|
+
end #class TestGFF2RecordEmpty
|
|
549
|
+
|
|
550
|
+
class TestGFF2ComplexAttributes < Test::Unit::TestCase
|
|
551
|
+
|
|
552
|
+
# The test string comes from the Popular genome annotation from the JGI.
|
|
553
|
+
# ftp://ftp.jgi-psf.org/pub/JGI_data/Poplar/annotation/v1.1/Poptr1_1.JamboreeModels.gff.gz
|
|
554
|
+
# Thanks to Tomoaki NISHIYAMA who picks up the example line.
|
|
555
|
+
def test_attributes_case1
|
|
556
|
+
str = "LG_I\tJGI\tCDS\t11052\t11064\t.\t-\t0\tname \"grail3.0116000101\"; proteinId 639579; exonNumber 3\n"
|
|
557
|
+
|
|
558
|
+
attributes = [
|
|
559
|
+
[ "name", "grail3.0116000101" ],
|
|
560
|
+
[ "proteinId", "639579" ],
|
|
561
|
+
[ "exonNumber", "3" ]
|
|
562
|
+
]
|
|
563
|
+
record = Bio::GFF::GFF2::Record.new(str)
|
|
564
|
+
assert_equal(attributes, record.attributes)
|
|
565
|
+
end
|
|
566
|
+
|
|
567
|
+
# The test string is modified from that of test_attributes_case1.
|
|
568
|
+
def test_attributes_case2
|
|
569
|
+
str = "LG_I\tJGI\tCDS\t11052\t11064\t.\t-\t0\tname \"grail3.0116000101\"; proteinId 639579; exonNumber 3; Note \"Semicolons ; and \;, and quote \\\" can be OK\"; Comment \"This is the \\\"comment\\\"\"\n"
|
|
570
|
+
|
|
571
|
+
attributes = [
|
|
572
|
+
[ "name", "grail3.0116000101" ],
|
|
573
|
+
[ "proteinId", "639579" ],
|
|
574
|
+
[ "exonNumber", "3" ],
|
|
575
|
+
[ "Note", "Semicolons ; and ;, and quote \" can be OK" ],
|
|
576
|
+
[ "Comment", "This is the \"comment\"" ]
|
|
577
|
+
]
|
|
578
|
+
record = Bio::GFF::GFF2::Record.new(str)
|
|
579
|
+
assert_equal(attributes, record.attributes)
|
|
580
|
+
end
|
|
581
|
+
|
|
582
|
+
def test_attributes_incompatible_backslash_semicolon
|
|
583
|
+
# No special treatments for backslash-semicolon outside the free text.
|
|
584
|
+
str =<<END_OF_DATA
|
|
585
|
+
I sgd gene 151453 151591 . + . Gene "CEN1" ; Note "Chromosome I Centromere"; Semicolon a "b;c" d "e;f;g" h; Illegal a\\;b c d; Comment "a ; b"
|
|
586
|
+
END_OF_DATA
|
|
587
|
+
|
|
588
|
+
attributes = [
|
|
589
|
+
[ 'Gene', 'CEN1' ],
|
|
590
|
+
[ 'Note', 'Chromosome I Centromere' ],
|
|
591
|
+
[ 'Semicolon',
|
|
592
|
+
Bio::GFF::GFF2::Record::Value.new(['a', 'b;c', 'd', 'e;f;g', 'h']) ],
|
|
593
|
+
[ 'Illegal', "a\\" ],
|
|
594
|
+
[ 'b', Bio::GFF::GFF2::Record::Value.new(['c', 'd']) ],
|
|
595
|
+
[ 'Comment', 'a ; b' ]
|
|
596
|
+
]
|
|
597
|
+
record = Bio::GFF::GFF2::Record.new(str)
|
|
598
|
+
assert_equal(attributes, record.attributes)
|
|
599
|
+
end
|
|
600
|
+
|
|
601
|
+
end #class TestGFF2ComplexAttributes
|
|
602
|
+
|
|
603
|
+
class TestGFF2MetaData < Test::Unit::TestCase
|
|
604
|
+
def setup
|
|
605
|
+
@data =
|
|
606
|
+
Bio::GFF::GFF2::MetaData.new('date', '2008-09-22')
|
|
607
|
+
end
|
|
608
|
+
|
|
609
|
+
def test_parse
|
|
610
|
+
assert_equal(@data,
|
|
611
|
+
Bio::GFF::GFF2::MetaData.parse('##date 2008-09-22'))
|
|
612
|
+
end
|
|
613
|
+
|
|
614
|
+
def test_directive
|
|
615
|
+
assert_equal('date', @data.directive)
|
|
616
|
+
end
|
|
617
|
+
|
|
618
|
+
def test_data
|
|
619
|
+
assert_equal('2008-09-22', @data.data)
|
|
620
|
+
end
|
|
621
|
+
end #class TestGFF2MetaData
|
|
622
|
+
|
|
623
|
+
class TestGFF3 < Test::Unit::TestCase
|
|
624
|
+
def setup
|
|
625
|
+
@data =<<END_OF_DATA
|
|
626
|
+
##gff-version 3
|
|
627
|
+
##sequence-region test01 1 400
|
|
628
|
+
test01 RANDOM contig 1 400 . + . ID=test01;Note=this is test
|
|
629
|
+
test01 . mRNA 101 230 . + . ID=mrna01;Name=testmRNA;Note=this is test mRNA
|
|
630
|
+
test01 . mRNA 101 280 . + . ID=mrna01a;Name=testmRNAalterative;Note=test of alternative splicing variant
|
|
631
|
+
test01 . exon 101 160 . + . ID=exon01;Name=exon01;Alias=exon 1;Parent=mrna01,mrna01a
|
|
632
|
+
test01 . exon 201 230 . + . ID=exon02;Name=exon02;Alias=exon 2;Parent=mrna01
|
|
633
|
+
test01 . exon 251 280 . + . ID=exon02a;Name=exon02a;Alias=exon 2a;Parent=mrna01a
|
|
634
|
+
test01 . Match 101 123 . . . ID=match01;Name=match01;Target=EST101 1 21;Gap=M8 D3 M6 I1 M6
|
|
635
|
+
##FASTA
|
|
636
|
+
>test01
|
|
637
|
+
ACGAAGATTTGTATGACTGATTTATCCTGGACAGGCATTGGTCAGATGTCTCCTTCCGTATCGTCGTTTA
|
|
638
|
+
GTTGCAAATCCGAGTGTTCGGGGGTATTGCTATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACA
|
|
639
|
+
CCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGAT
|
|
640
|
+
AATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT
|
|
641
|
+
GCCCAAGAATGCGATCCCAGAAGTCTTGGTTCTAAAGTCGTCGGAAAGATTTGAGGAACTGCCATACAGC
|
|
642
|
+
CCGTGGGTGAAACTGTCGACATCCATTGTGCGAATAGGCCTGCTAGTGAC
|
|
643
|
+
END_OF_DATA
|
|
644
|
+
@gff3 = Bio::GFF::GFF3.new(@data)
|
|
645
|
+
end
|
|
646
|
+
|
|
647
|
+
def test_const_version
|
|
648
|
+
assert_equal(3, Bio::GFF::GFF3::VERSION)
|
|
649
|
+
end
|
|
650
|
+
|
|
651
|
+
def test_sequence_regions
|
|
652
|
+
region = Bio::GFF::GFF3::SequenceRegion.new('test01', 1, 400)
|
|
653
|
+
assert_equal([ region ], @gff3.sequence_regions)
|
|
654
|
+
end
|
|
655
|
+
|
|
656
|
+
def test_gff_version
|
|
657
|
+
assert_equal('3', @gff3.gff_version)
|
|
658
|
+
end
|
|
659
|
+
|
|
660
|
+
def test_records
|
|
661
|
+
assert_equal(7, @gff3.records.size)
|
|
662
|
+
r_test01 = Bio::GFF::GFF3::Record.new('test01',
|
|
663
|
+
'RANDOM',
|
|
664
|
+
'contig',
|
|
665
|
+
1, 400, nil, '+', nil,
|
|
666
|
+
[ ['ID', 'test01'],
|
|
667
|
+
['Note', 'this is test'] ])
|
|
668
|
+
r_mrna01 = Bio::GFF::GFF3::Record.new('test01',
|
|
669
|
+
nil,
|
|
670
|
+
'mRNA',
|
|
671
|
+
101, 230, nil, '+', nil,
|
|
672
|
+
[ ['ID', 'mrna01'],
|
|
673
|
+
['Name', 'testmRNA'],
|
|
674
|
+
['Note', 'this is test mRNA'] ])
|
|
675
|
+
r_exon01 = Bio::GFF::GFF3::Record.new('test01',
|
|
676
|
+
nil,
|
|
677
|
+
'exon',
|
|
678
|
+
101, 160, nil, '+', nil,
|
|
679
|
+
[ ['ID', 'exon01'],
|
|
680
|
+
['Name', 'exon01'],
|
|
681
|
+
['Alias', 'exon 1'],
|
|
682
|
+
['Parent', 'mrna01'],
|
|
683
|
+
['Parent', 'mrna01a'] ])
|
|
684
|
+
|
|
685
|
+
target = Bio::GFF::GFF3::Record::Target.new('EST101', 1, 21)
|
|
686
|
+
gap = Bio::GFF::GFF3::Record::Gap.new('M8 D3 M6 I1 M6')
|
|
687
|
+
r_match01 =Bio::GFF::GFF3::Record.new('test01',
|
|
688
|
+
nil,
|
|
689
|
+
'Match',
|
|
690
|
+
101, 123, nil, nil, nil,
|
|
691
|
+
[ ['ID', 'match01'],
|
|
692
|
+
['Name', 'match01'],
|
|
693
|
+
['Target', target],
|
|
694
|
+
['Gap', gap] ])
|
|
695
|
+
assert_equal(r_test01, @gff3.records[0])
|
|
696
|
+
assert_equal(r_mrna01, @gff3.records[1])
|
|
697
|
+
assert_equal(r_exon01, @gff3.records[3])
|
|
698
|
+
assert_equal(r_match01, @gff3.records[6])
|
|
699
|
+
end
|
|
700
|
+
|
|
701
|
+
def test_sequences
|
|
702
|
+
assert_equal(1, @gff3.sequences.size)
|
|
703
|
+
assert_equal('test01', @gff3.sequences[0].entry_id)
|
|
704
|
+
assert_equal('3510a3c4f66f9c2ab8d4d97446490aced7ed1fa4',
|
|
705
|
+
Digest::SHA1.hexdigest(@gff3.sequences[0].seq.to_s))
|
|
706
|
+
end
|
|
707
|
+
|
|
708
|
+
def test_to_s
|
|
709
|
+
assert_equal(@data, @gff3.to_s)
|
|
710
|
+
end
|
|
711
|
+
|
|
712
|
+
end #class TestGFF3
|
|
713
|
+
|
|
714
|
+
class TestGFF3Record < Test::Unit::TestCase
|
|
715
|
+
|
|
716
|
+
def setup
|
|
717
|
+
data =<<END_OF_DATA
|
|
718
|
+
chrI SGD centromere 151467 151584 . + . ID=CEN1;Name=CEN1;gene=CEN1;Alias=CEN1,test%3B0001;Note=Chromosome%20I%20centromere;dbxref=SGD:S000006463;Target=test%2002 123 456 -,test%2C03 159 314;memo%3Dtest%3Battr=99.9%25%09match
|
|
719
|
+
END_OF_DATA
|
|
720
|
+
@obj = Bio::GFF::GFF3::Record.new(data)
|
|
721
|
+
end
|
|
722
|
+
|
|
723
|
+
def test_seqname
|
|
724
|
+
assert_equal('chrI', @obj.seqname)
|
|
725
|
+
end
|
|
726
|
+
|
|
727
|
+
def test_source
|
|
728
|
+
assert_equal('SGD', @obj.source)
|
|
729
|
+
end
|
|
730
|
+
|
|
731
|
+
def test_feature
|
|
732
|
+
assert_equal('centromere', @obj.feature)
|
|
733
|
+
end
|
|
734
|
+
|
|
735
|
+
def test_start
|
|
736
|
+
assert_equal(151467, @obj.start)
|
|
737
|
+
end
|
|
738
|
+
|
|
739
|
+
def test_end
|
|
740
|
+
assert_equal(151584, @obj.end)
|
|
741
|
+
end
|
|
742
|
+
|
|
743
|
+
def test_score
|
|
744
|
+
assert_equal(nil, @obj.score)
|
|
745
|
+
end
|
|
746
|
+
|
|
747
|
+
def test_strand
|
|
748
|
+
assert_equal('+', @obj.strand)
|
|
749
|
+
end
|
|
750
|
+
|
|
751
|
+
def test_frame
|
|
752
|
+
assert_equal(nil, @obj.frame)
|
|
753
|
+
end
|
|
754
|
+
|
|
755
|
+
def test_attributes
|
|
756
|
+
attr = [
|
|
757
|
+
['ID', 'CEN1'],
|
|
758
|
+
['Name', 'CEN1'],
|
|
759
|
+
['gene', 'CEN1'],
|
|
760
|
+
['Alias', 'CEN1'],
|
|
761
|
+
['Alias', 'test;0001'],
|
|
762
|
+
['Note', 'Chromosome I centromere'],
|
|
763
|
+
['dbxref', 'SGD:S000006463'],
|
|
764
|
+
['Target',
|
|
765
|
+
Bio::GFF::GFF3::Record::Target.new('test 02', 123, 456, '-')],
|
|
766
|
+
['Target',
|
|
767
|
+
Bio::GFF::GFF3::Record::Target.new('test,03', 159, 314)],
|
|
768
|
+
['memo=test;attr', "99.9%\tmatch"]
|
|
769
|
+
]
|
|
770
|
+
assert_equal(attr, @obj.attributes)
|
|
771
|
+
end
|
|
772
|
+
|
|
773
|
+
def test_id
|
|
774
|
+
assert_equal('CEN1', @obj.id)
|
|
775
|
+
end
|
|
776
|
+
|
|
777
|
+
def test_to_s
|
|
778
|
+
str =<<END_OF_DATA
|
|
779
|
+
chrI SGD centromere 151467 151584 . + . ID=CEN1;Name=CEN1;gene=CEN1;Alias=CEN1,test%3B0001;Note=Chromosome I centromere;dbxref=SGD:S000006463;Target=test%2002 123 456 -,test%2C03 159 314;memo%3Dtest%3Battr=99.9%25%09match
|
|
780
|
+
END_OF_DATA
|
|
781
|
+
assert_equal(str, @obj.to_s)
|
|
782
|
+
end
|
|
783
|
+
|
|
784
|
+
def test_to_s_attr_order_changed
|
|
785
|
+
str = <<END_OF_STR
|
|
786
|
+
chrI SGD centromere 151467 151584 . + . ID=CEN1;Name=CEN1;Alias=CEN1,test%3B0001;Target=test%2002 123 456 -,test%2C03 159 314;Note=Chromosome I centromere;dbxref=SGD:S000006463;gene=CEN1;memo%3Dtest%3Battr=99.9%25%09match
|
|
787
|
+
END_OF_STR
|
|
788
|
+
|
|
789
|
+
keys = [ 'ID', 'Name', 'Alias', 'Target', 'Note', 'dbxref', 'gene' ]
|
|
790
|
+
@obj.sort_attributes_by_tag!(keys)
|
|
791
|
+
assert_equal(str, @obj.to_s)
|
|
792
|
+
end
|
|
793
|
+
end #class TestGFF3Record
|
|
794
|
+
|
|
795
|
+
class TestGFF3RecordMisc < Test::Unit::TestCase
|
|
796
|
+
def test_attributes_none
|
|
797
|
+
# test blank with tab
|
|
798
|
+
data =<<END_OF_DATA
|
|
799
|
+
I sgd gene 151453 151591 . + .
|
|
800
|
+
END_OF_DATA
|
|
801
|
+
obj = Bio::GFF::GFF3::Record.new(data)
|
|
802
|
+
assert_equal([], obj.attributes)
|
|
803
|
+
|
|
804
|
+
# test blank with no tab at end
|
|
805
|
+
data =<<END_OF_DATA
|
|
806
|
+
I sgd gene 151453 151591 . + .
|
|
807
|
+
END_OF_DATA
|
|
808
|
+
obj = Bio::GFF::GFF3::Record.new(data)
|
|
809
|
+
assert_equal([], obj.attributes)
|
|
810
|
+
end
|
|
811
|
+
|
|
812
|
+
def test_attributes_one
|
|
813
|
+
data =<<END_OF_DATA
|
|
814
|
+
I sgd gene 151453 151591 . + . ID=CEN1
|
|
815
|
+
END_OF_DATA
|
|
816
|
+
obj = Bio::GFF::GFF3::Record.new(data)
|
|
817
|
+
at = [ ["ID", 'CEN1'] ]
|
|
818
|
+
assert_equal(at, obj.attributes)
|
|
819
|
+
end
|
|
820
|
+
|
|
821
|
+
def test_attributes_with_escaping
|
|
822
|
+
data =<<END_OF_DATA
|
|
823
|
+
I sgd gene 151453 151591 . + . ID=CEN1;gene=CEN1%3Boh;Note=Chromosome I Centromere
|
|
824
|
+
END_OF_DATA
|
|
825
|
+
obj = Bio::GFF::GFF3::Record.new(data)
|
|
826
|
+
at = [ ['ID', 'CEN1'],
|
|
827
|
+
["gene", 'CEN1;oh'],
|
|
828
|
+
["Note", 'Chromosome I Centromere']
|
|
829
|
+
]
|
|
830
|
+
assert_equal(at, obj.attributes)
|
|
831
|
+
end
|
|
832
|
+
|
|
833
|
+
def test_score
|
|
834
|
+
data =<<END_OF_DATA
|
|
835
|
+
ctg123 src match 456 788 1e-10 - . ID=test01
|
|
836
|
+
END_OF_DATA
|
|
837
|
+
obj = Bio::GFF::GFF3::Record.new(data)
|
|
838
|
+
assert_equal(1e-10, obj.score)
|
|
839
|
+
obj.score = 0.5
|
|
840
|
+
assert_equal(0.5, obj.score)
|
|
841
|
+
end
|
|
842
|
+
|
|
843
|
+
def test_phase
|
|
844
|
+
data =<<END_OF_DATA
|
|
845
|
+
ctg123 src CDS 456 788 . - 2 ID=test02
|
|
846
|
+
END_OF_DATA
|
|
847
|
+
obj = Bio::GFF::GFF3::Record.new(data)
|
|
848
|
+
assert_equal(2, obj.phase)
|
|
849
|
+
assert_equal(2, obj.frame)
|
|
850
|
+
obj.phase = 1
|
|
851
|
+
assert_equal(1, obj.phase)
|
|
852
|
+
assert_equal(1, obj.frame)
|
|
853
|
+
end
|
|
854
|
+
|
|
855
|
+
def test_id_replace
|
|
856
|
+
data =<<END_OF_DATA
|
|
857
|
+
ctg123 src CDS 456 788 1e-10 - 2 ID=test03
|
|
858
|
+
END_OF_DATA
|
|
859
|
+
obj = Bio::GFF::GFF3::Record.new(data)
|
|
860
|
+
assert_equal('test03', obj.id)
|
|
861
|
+
assert_equal('test_id', obj.id = 'test_id')
|
|
862
|
+
assert_equal('test_id', obj.id)
|
|
863
|
+
end
|
|
864
|
+
|
|
865
|
+
def test_id_set
|
|
866
|
+
data =<<END_OF_DATA
|
|
867
|
+
ctg123 src CDS 456 788 1e-10 - 2 NAME=test03
|
|
868
|
+
END_OF_DATA
|
|
869
|
+
obj = Bio::GFF::GFF3::Record.new(data)
|
|
870
|
+
assert_nil(obj.id)
|
|
871
|
+
assert_equal('test_id', obj.id = 'test_id')
|
|
872
|
+
assert_equal('test_id', obj.id)
|
|
873
|
+
assert_equal('next_test', obj.id = 'next_test')
|
|
874
|
+
assert_equal('next_test', obj.id)
|
|
875
|
+
end
|
|
876
|
+
|
|
877
|
+
def test_id_multiple
|
|
878
|
+
# Note: Two ID attributes in a record is illegal in GFF3.
|
|
879
|
+
data =<<END_OF_DATA
|
|
880
|
+
ctg123 src CDS 456 788 . - 2 ID=test03,test04
|
|
881
|
+
END_OF_DATA
|
|
882
|
+
obj = Bio::GFF::GFF3::Record.new(data)
|
|
883
|
+
assert_equal([ [ 'ID', 'test03' ], [ 'ID', 'test04' ] ],
|
|
884
|
+
obj.attributes)
|
|
885
|
+
assert_equal('test03', obj.id)
|
|
886
|
+
assert_equal('test_id', obj.id = 'test_id')
|
|
887
|
+
assert_equal('test_id', obj.id)
|
|
888
|
+
assert_equal([ [ 'ID', 'test_id' ], [ 'ID', 'test04' ] ],
|
|
889
|
+
obj.attributes)
|
|
890
|
+
str = "ctg123\tsrc\tCDS\t456\t788\t.\t-\t2\tID=test_id,test04\n"
|
|
891
|
+
assert_equal(str, obj.to_s)
|
|
892
|
+
end
|
|
893
|
+
|
|
894
|
+
def test_id_multiple2
|
|
895
|
+
# Note: Two ID attributes in a record is illegal in GFF3.
|
|
896
|
+
data =<<END_OF_DATA
|
|
897
|
+
ctg123 src CDS 456 788 . - 2 ID=test03;ID=test04
|
|
898
|
+
END_OF_DATA
|
|
899
|
+
obj = Bio::GFF::GFF3::Record.new(data)
|
|
900
|
+
assert_equal([ [ 'ID', 'test03' ], [ 'ID', 'test04' ] ],
|
|
901
|
+
obj.attributes)
|
|
902
|
+
assert_equal('test03', obj.id)
|
|
903
|
+
assert_equal('test_id', obj.id = 'test_id')
|
|
904
|
+
assert_equal('test_id', obj.id)
|
|
905
|
+
assert_equal([ [ 'ID', 'test_id' ], [ 'ID', 'test04' ] ],
|
|
906
|
+
obj.attributes)
|
|
907
|
+
|
|
908
|
+
# The "XXX=test03;XXX=test04" is automatically changed to
|
|
909
|
+
# "XXX=test03,test04", as defined in the GFF3 spec.
|
|
910
|
+
str = "ctg123\tsrc\tCDS\t456\t788\t.\t-\t2\tID=test_id,test04\n"
|
|
911
|
+
assert_equal(str, obj.to_s)
|
|
912
|
+
end
|
|
913
|
+
|
|
914
|
+
def test_initialize_9
|
|
915
|
+
obj = Bio::GFF::GFF3::Record.new('test01',
|
|
916
|
+
'testsrc',
|
|
917
|
+
'exon',
|
|
918
|
+
1, 400, nil, '+', nil,
|
|
919
|
+
[ ['ID', 'test01'],
|
|
920
|
+
['Note', 'this is test'] ])
|
|
921
|
+
assert_equal('test01', obj.seqid)
|
|
922
|
+
end
|
|
923
|
+
|
|
924
|
+
def test_to_s_void
|
|
925
|
+
obj = Bio::GFF::GFF3::Record.new
|
|
926
|
+
assert_equal(".\t.\t.\t.\t.\t.\t.\t.\t.\n", obj.to_s)
|
|
927
|
+
end
|
|
928
|
+
|
|
929
|
+
end #class TestGFF3RecordMisc
|
|
930
|
+
|
|
931
|
+
class TestGFF3RecordEscape < Test::Unit::TestCase
|
|
932
|
+
def setup
|
|
933
|
+
@obj = Object.new.extend(Bio::GFF::GFF3::Escape)
|
|
934
|
+
@str = "A>B\tC=100%;d=e,f,g h"
|
|
935
|
+
end
|
|
936
|
+
|
|
937
|
+
def test_escape
|
|
938
|
+
str = @str
|
|
939
|
+
assert_equal('A>B%09C=100%25;d=e,f,g h',
|
|
940
|
+
@obj.instance_eval { escape(str) })
|
|
941
|
+
end
|
|
942
|
+
|
|
943
|
+
def test_escape_attribute
|
|
944
|
+
str = @str
|
|
945
|
+
assert_equal('A>B%09C%3D100%25%3Bd%3De%2Cf%2Cg h',
|
|
946
|
+
@obj.instance_eval { escape_attribute(str) })
|
|
947
|
+
end
|
|
948
|
+
|
|
949
|
+
def test_escape_seqid
|
|
950
|
+
str = @str
|
|
951
|
+
assert_equal('A%3EB%09C%3D100%25%3Bd%3De%2Cf%2Cg%20h',
|
|
952
|
+
@obj.instance_eval { escape_seqid(str) })
|
|
953
|
+
end
|
|
954
|
+
|
|
955
|
+
def test_unescape
|
|
956
|
+
escaped_str = 'A%3EB%09C%3D100%25%3Bd%3De%2Cf%2Cg%20h'
|
|
957
|
+
assert_equal(@str,
|
|
958
|
+
@obj.instance_eval {
|
|
959
|
+
unescape(escaped_str) })
|
|
960
|
+
end
|
|
961
|
+
end #class TestGFF3RecordEscape
|
|
962
|
+
|
|
963
|
+
class TestGFF3RecordTarget < Test::Unit::TestCase
|
|
964
|
+
|
|
965
|
+
def setup
|
|
966
|
+
@target =
|
|
967
|
+
[ Bio::GFF::GFF3::Record::Target.new('ABCD1234', 123, 456, '+'),
|
|
968
|
+
Bio::GFF::GFF3::Record::Target.new(">X Y=Z;P%,Q\tR", 78, 90),
|
|
969
|
+
Bio::GFF::GFF3::Record::Target.new(nil, nil, nil),
|
|
970
|
+
]
|
|
971
|
+
end
|
|
972
|
+
|
|
973
|
+
def test_parse
|
|
974
|
+
strings =
|
|
975
|
+
[ 'ABCD1234 123 456 +',
|
|
976
|
+
'%3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90',
|
|
977
|
+
''
|
|
978
|
+
]
|
|
979
|
+
@target.each do |target|
|
|
980
|
+
str = strings.shift
|
|
981
|
+
assert_equal(target, Bio::GFF::GFF3::Record::Target.parse(str))
|
|
982
|
+
end
|
|
983
|
+
end
|
|
984
|
+
|
|
985
|
+
def test_target_id
|
|
986
|
+
assert_equal('ABCD1234', @target[0].target_id)
|
|
987
|
+
assert_equal(">X Y=Z;P%,Q\tR", @target[1].target_id)
|
|
988
|
+
assert_equal(nil, @target[2].target_id)
|
|
989
|
+
end
|
|
990
|
+
|
|
991
|
+
def test_start
|
|
992
|
+
assert_equal(123, @target[0].start)
|
|
993
|
+
assert_equal(78, @target[1].start)
|
|
994
|
+
assert_nil(@target[2].start)
|
|
995
|
+
end
|
|
996
|
+
|
|
997
|
+
def test_end
|
|
998
|
+
assert_equal(456, @target[0].end)
|
|
999
|
+
assert_equal(90, @target[1].end)
|
|
1000
|
+
assert_nil(@target[2].end)
|
|
1001
|
+
end
|
|
1002
|
+
|
|
1003
|
+
def test_strand
|
|
1004
|
+
assert_equal('+', @target[0].strand)
|
|
1005
|
+
assert_nil(@target[1].strand)
|
|
1006
|
+
assert_nil(@target[2].strand)
|
|
1007
|
+
end
|
|
1008
|
+
|
|
1009
|
+
def test_to_s
|
|
1010
|
+
assert_equal('ABCD1234 123 456 +', @target[0].to_s)
|
|
1011
|
+
assert_equal('%3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90', @target[1].to_s)
|
|
1012
|
+
assert_equal('. . .', @target[2].to_s)
|
|
1013
|
+
end
|
|
1014
|
+
|
|
1015
|
+
end #class TestGFF3RecordTarget
|
|
1016
|
+
|
|
1017
|
+
class TestGFF3RecordGap < Test::Unit::TestCase
|
|
1018
|
+
def setup
|
|
1019
|
+
# examples taken from http://song.sourceforge.net/gff3.shtml
|
|
1020
|
+
@gaps_src = [ 'M8 D3 M6 I1 M6',
|
|
1021
|
+
'M3 I1 M2 F1 M4',
|
|
1022
|
+
'M3 I1 M2 R1 M4' ]
|
|
1023
|
+
@gaps = @gaps_src.collect { |x| Bio::GFF::GFF3::Record::Gap.new(x) }
|
|
1024
|
+
end
|
|
1025
|
+
|
|
1026
|
+
def test_to_s
|
|
1027
|
+
@gaps_src.each do |src|
|
|
1028
|
+
assert_equal(src, @gaps.shift.to_s)
|
|
1029
|
+
end
|
|
1030
|
+
end
|
|
1031
|
+
|
|
1032
|
+
def test_eqeq
|
|
1033
|
+
gap = Bio::GFF::GFF3::Record::Gap.new('M8 D3 M6 I1 M6')
|
|
1034
|
+
assert(gap == @gaps[0])
|
|
1035
|
+
assert_equal(false, gap == @gaps[1])
|
|
1036
|
+
end
|
|
1037
|
+
|
|
1038
|
+
def test_process_sequences_na
|
|
1039
|
+
ref = 'CAAGACCTAAACTGGATTCCAAT'
|
|
1040
|
+
tgt = 'CAAGACCTCTGGATATCCAAT'
|
|
1041
|
+
ref_aligned = 'CAAGACCTAAACTGGAT-TCCAAT'
|
|
1042
|
+
tgt_aligned = 'CAAGACCT---CTGGATATCCAAT'
|
|
1043
|
+
assert_equal([ ref_aligned, tgt_aligned ],
|
|
1044
|
+
@gaps[0].process_sequences_na(ref, tgt))
|
|
1045
|
+
end
|
|
1046
|
+
|
|
1047
|
+
def test_process_sequences_na_tooshort
|
|
1048
|
+
ref = 'CAAGACCTAAACTGGATTCCAA'
|
|
1049
|
+
tgt = 'CAAGACCTCTGGATATCCAA'
|
|
1050
|
+
assert_raise(RuntimeError) { @gaps[0].process_sequences_na(ref, tgt) }
|
|
1051
|
+
ref = 'c'
|
|
1052
|
+
tgt = 'c'
|
|
1053
|
+
assert_raise(RuntimeError) { @gaps[0].process_sequences_na(ref, tgt) }
|
|
1054
|
+
end
|
|
1055
|
+
|
|
1056
|
+
def test_process_sequences_na_aa
|
|
1057
|
+
ref1 = 'atgaaggaggttattgaatgtcggcggt'
|
|
1058
|
+
tgt1 = 'MKEVVINVGG'
|
|
1059
|
+
ref1_aligned = 'atgaaggag---gttattgaatgtcggcggt'
|
|
1060
|
+
tgt1_aligned = 'M K E V V I >N V G G '
|
|
1061
|
+
assert_equal([ ref1_aligned, tgt1_aligned ],
|
|
1062
|
+
@gaps[1].process_sequences_na_aa(ref1, tgt1))
|
|
1063
|
+
end
|
|
1064
|
+
|
|
1065
|
+
def test_process_sequences_na_aa_reverse_frameshift
|
|
1066
|
+
ref2 = 'atgaaggaggttataatgtcggcggt'
|
|
1067
|
+
tgt2 = 'MKEVVINVGG'
|
|
1068
|
+
ref2_aligned = 'atgaaggag---gttat<aatgtcggcggt'
|
|
1069
|
+
tgt2_aligned = 'M K E V V I N V G G '
|
|
1070
|
+
assert_equal([ ref2_aligned, tgt2_aligned ],
|
|
1071
|
+
@gaps[2].process_sequences_na_aa(ref2, tgt2))
|
|
1072
|
+
end
|
|
1073
|
+
|
|
1074
|
+
def test_process_sequences_na_aa_reverse_frameshift_more
|
|
1075
|
+
gap = Bio::GFF::GFF3::Record::Gap.new("M3 R3 M3")
|
|
1076
|
+
ref = 'atgaagattaatgtc'
|
|
1077
|
+
tgt = 'MKIINV'
|
|
1078
|
+
ref_aligned = 'atgaag<<<attaatgtc'
|
|
1079
|
+
tgt_aligned = 'M K I I N V '
|
|
1080
|
+
assert_equal([ ref_aligned, tgt_aligned ],
|
|
1081
|
+
gap.process_sequences_na_aa(ref, tgt))
|
|
1082
|
+
end
|
|
1083
|
+
|
|
1084
|
+
def test_process_sequences_na_aa_tooshort
|
|
1085
|
+
ref2 = 'atgaaggaggttataatgtcggcgg'
|
|
1086
|
+
tgt2 = 'MKEVVINVG'
|
|
1087
|
+
assert_raise(RuntimeError) do
|
|
1088
|
+
@gaps[2].process_sequences_na_aa(ref2, tgt2)
|
|
1089
|
+
end
|
|
1090
|
+
|
|
1091
|
+
ref2 = 'atg'
|
|
1092
|
+
tgt2 = 'M'
|
|
1093
|
+
assert_raise(RuntimeError) do
|
|
1094
|
+
@gaps[2].process_sequences_na_aa(ref2, tgt2)
|
|
1095
|
+
end
|
|
1096
|
+
end
|
|
1097
|
+
|
|
1098
|
+
def test___scan_gap
|
|
1099
|
+
str1 = 'CAAGACCT---CTGGATATCCAAT'
|
|
1100
|
+
str2 = '-aaaaaaa-a-a---ggag--'
|
|
1101
|
+
c = Bio::GFF::GFF3::Record::Gap::Code
|
|
1102
|
+
data1 = [ c.new(:M, 8), c.new(:I, 3), c.new(:M, 13) ]
|
|
1103
|
+
data2 = [ c.new(:I, 1), c.new(:M, 7), c.new(:I, 1),
|
|
1104
|
+
c.new(:M, 1), c.new(:I, 1), c.new(:M, 1),
|
|
1105
|
+
c.new(:I, 3), c.new(:M, 4), c.new(:I, 2) ]
|
|
1106
|
+
|
|
1107
|
+
assert_equal(data1, @gaps[0].instance_eval { __scan_gap(str1) })
|
|
1108
|
+
assert_equal(data2, @gaps[0].instance_eval { __scan_gap(str2) })
|
|
1109
|
+
end
|
|
1110
|
+
|
|
1111
|
+
def test_new_from_sequences_na
|
|
1112
|
+
ref_aligned = 'CAAGACCTAAACTGGAT-TCCAAT'
|
|
1113
|
+
tgt_aligned = 'CAAGACCT---CTGGATATCCAAT'
|
|
1114
|
+
|
|
1115
|
+
assert_equal(@gaps[0], Bio::GFF::GFF3::Record::Gap.new_from_sequences_na(ref_aligned, tgt_aligned))
|
|
1116
|
+
end
|
|
1117
|
+
|
|
1118
|
+
def test_new_from_sequences_na_aa
|
|
1119
|
+
ref = 'atgaaggag---gttattgaatgtcggcggt'
|
|
1120
|
+
tgt = 'M K E V V I >N V G G '
|
|
1121
|
+
assert_equal(@gaps[1],
|
|
1122
|
+
Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref,
|
|
1123
|
+
tgt))
|
|
1124
|
+
end
|
|
1125
|
+
|
|
1126
|
+
def test_new_from_sequences_na_aa_reverse_frameshift
|
|
1127
|
+
ref = 'atgaaggag---gttat<aatgtcggcggt'
|
|
1128
|
+
tgt = 'M K E V V I N V G G '
|
|
1129
|
+
assert_equal(@gaps[2],
|
|
1130
|
+
Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref,
|
|
1131
|
+
tgt))
|
|
1132
|
+
end
|
|
1133
|
+
|
|
1134
|
+
def test_new_from_sequences_na_aa_reverse_frameshift_more
|
|
1135
|
+
gap = Bio::GFF::GFF3::Record::Gap.new("M3 R3 M3")
|
|
1136
|
+
ref = 'atgaag<<<attaatgtc'
|
|
1137
|
+
tgt = 'M K I I N V '
|
|
1138
|
+
assert_equal(gap,
|
|
1139
|
+
Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref,
|
|
1140
|
+
tgt))
|
|
1141
|
+
end
|
|
1142
|
+
|
|
1143
|
+
def test_new_from_sequences_na_aa_boundary_gap
|
|
1144
|
+
g = Bio::GFF::GFF3::Record::Gap
|
|
1145
|
+
|
|
1146
|
+
ref = '---atgatg'
|
|
1147
|
+
tgt = 'K M M '
|
|
1148
|
+
assert_equal(Bio::GFF::GFF3::Record::Gap.new('I1 M2'),
|
|
1149
|
+
g.new_from_sequences_na_aa(ref, tgt))
|
|
1150
|
+
|
|
1151
|
+
ref = 'atgatg---'
|
|
1152
|
+
tgt = 'M M K '
|
|
1153
|
+
assert_equal(Bio::GFF::GFF3::Record::Gap.new('M2 I1'),
|
|
1154
|
+
g.new_from_sequences_na_aa(ref, tgt))
|
|
1155
|
+
|
|
1156
|
+
ref = 'atgatgatg'
|
|
1157
|
+
tgt = '- M M '
|
|
1158
|
+
assert_equal(Bio::GFF::GFF3::Record::Gap.new('D1 M2'),
|
|
1159
|
+
g.new_from_sequences_na_aa(ref, tgt))
|
|
1160
|
+
|
|
1161
|
+
ref = 'atgatgatg'
|
|
1162
|
+
tgt = 'M M - '
|
|
1163
|
+
assert_equal(Bio::GFF::GFF3::Record::Gap.new('M2 D1'),
|
|
1164
|
+
g.new_from_sequences_na_aa(ref, tgt))
|
|
1165
|
+
end
|
|
1166
|
+
|
|
1167
|
+
def test_new_from_sequences_na_aa_example
|
|
1168
|
+
gap = Bio::GFF::GFF3::Record::Gap.new('M2 R1 M1 F2 M1')
|
|
1169
|
+
ref1 = 'atgg-taagac-att'
|
|
1170
|
+
tgt1 = 'M V K - I '
|
|
1171
|
+
ref2 = 'atggt<aagacatt'
|
|
1172
|
+
tgt2 = 'M V K >>I '
|
|
1173
|
+
gap1 = Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref1, tgt1)
|
|
1174
|
+
assert_equal(gap, gap1)
|
|
1175
|
+
gap2 = Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref2, tgt2)
|
|
1176
|
+
assert_equal(gap, gap2)
|
|
1177
|
+
end
|
|
1178
|
+
end #class TestGFF3RecordGap
|
|
1179
|
+
|
|
1180
|
+
class TestGFF3SequenceRegion < Test::Unit::TestCase
|
|
1181
|
+
|
|
1182
|
+
def setup
|
|
1183
|
+
@data =
|
|
1184
|
+
[ Bio::GFF::GFF3::SequenceRegion.new('ABCD1234', 123, 456),
|
|
1185
|
+
Bio::GFF::GFF3::SequenceRegion.new(">X Y=Z;P%,Q\tR", 78, 90),
|
|
1186
|
+
Bio::GFF::GFF3::SequenceRegion.new(nil, nil, nil),
|
|
1187
|
+
]
|
|
1188
|
+
end
|
|
1189
|
+
|
|
1190
|
+
def test_parse
|
|
1191
|
+
strings =
|
|
1192
|
+
[ '##sequence-region ABCD1234 123 456',
|
|
1193
|
+
'##sequence-region %3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90',
|
|
1194
|
+
'##sequence-region'
|
|
1195
|
+
]
|
|
1196
|
+
@data.each do |reg|
|
|
1197
|
+
str = strings.shift
|
|
1198
|
+
assert_equal(reg, Bio::GFF::GFF3::SequenceRegion.parse(str))
|
|
1199
|
+
end
|
|
1200
|
+
end
|
|
1201
|
+
|
|
1202
|
+
def test_seqid
|
|
1203
|
+
assert_equal('ABCD1234', @data[0].seqid)
|
|
1204
|
+
assert_equal(">X Y=Z;P%,Q\tR", @data[1].seqid)
|
|
1205
|
+
assert_equal(nil, @data[2].seqid)
|
|
1206
|
+
end
|
|
1207
|
+
|
|
1208
|
+
def test_start
|
|
1209
|
+
assert_equal(123, @data[0].start)
|
|
1210
|
+
assert_equal(78, @data[1].start)
|
|
1211
|
+
assert_nil(@data[2].start)
|
|
1212
|
+
end
|
|
1213
|
+
|
|
1214
|
+
def test_end
|
|
1215
|
+
assert_equal(456, @data[0].end)
|
|
1216
|
+
assert_equal(90, @data[1].end)
|
|
1217
|
+
assert_nil(@data[2].end)
|
|
1218
|
+
end
|
|
1219
|
+
|
|
1220
|
+
def test_to_s
|
|
1221
|
+
assert_equal("##sequence-region ABCD1234 123 456\n", @data[0].to_s)
|
|
1222
|
+
assert_equal("##sequence-region %3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90\n",
|
|
1223
|
+
@data[1].to_s)
|
|
1224
|
+
assert_equal("##sequence-region . . .\n", @data[2].to_s)
|
|
1225
|
+
end
|
|
1226
|
+
|
|
1227
|
+
end #class TestGFF3SequenceRegion
|
|
1228
|
+
|
|
1229
|
+
class TestGFF3MetaData < Test::Unit::TestCase
|
|
1230
|
+
|
|
1231
|
+
def setup
|
|
1232
|
+
@data =
|
|
1233
|
+
Bio::GFF::GFF3::MetaData.new('feature-ontology',
|
|
1234
|
+
'http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12')
|
|
1235
|
+
end
|
|
1236
|
+
|
|
1237
|
+
def test_parse
|
|
1238
|
+
assert_equal(@data,
|
|
1239
|
+
Bio::GFF::GFF3::MetaData.parse('##feature-ontology http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12'))
|
|
1240
|
+
end
|
|
1241
|
+
|
|
1242
|
+
def test_directive
|
|
1243
|
+
assert_equal('feature-ontology', @data.directive)
|
|
1244
|
+
end
|
|
1245
|
+
|
|
1246
|
+
def test_data
|
|
1247
|
+
assert_equal('http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12', @data.data)
|
|
1248
|
+
end
|
|
1249
|
+
end #class TestGFF3MetaData
|
|
1250
|
+
|
|
1251
|
+
end #module Bio
|
|
1252
|
+
|
|
1253
|
+
|