bio 1.4.3.0001 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +39 -33
- data/BSDL +22 -0
- data/COPYING +2 -2
- data/COPYING.ja +36 -36
- data/ChangeLog +2404 -1025
- data/KNOWN_ISSUES.rdoc +15 -55
- data/README.rdoc +17 -23
- data/RELEASE_NOTES.rdoc +246 -183
- data/Rakefile +3 -2
- data/bin/br_biofetch.rb +29 -5
- data/bioruby.gemspec +15 -32
- data/bioruby.gemspec.erb +10 -20
- data/doc/ChangeLog-1.4.3 +1478 -0
- data/doc/RELEASE_NOTES-1.4.3.rdoc +204 -0
- data/doc/Tutorial.rd +0 -6
- data/doc/Tutorial.rd.html +7 -12
- data/doc/Tutorial.rd.ja +960 -1064
- data/doc/Tutorial.rd.ja.html +977 -1067
- data/gemfiles/Gemfile.travis-jruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-jruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-rbx +13 -0
- data/gemfiles/Gemfile.travis-ruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-ruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-ruby2.2 +9 -0
- data/lib/bio.rb +10 -43
- data/lib/bio/alignment.rb +8 -14
- data/lib/bio/appl/blast.rb +1 -2
- data/lib/bio/appl/blast/format0.rb +18 -7
- data/lib/bio/appl/blast/remote.rb +0 -9
- data/lib/bio/appl/blast/report.rb +1 -1
- data/lib/bio/appl/clustalw/report.rb +3 -1
- data/lib/bio/appl/genscan/report.rb +1 -2
- data/lib/bio/appl/iprscan/report.rb +1 -2
- data/lib/bio/appl/meme/mast.rb +4 -4
- data/lib/bio/appl/meme/mast/report.rb +1 -1
- data/lib/bio/appl/paml/codeml.rb +2 -2
- data/lib/bio/appl/paml/codeml/report.rb +1 -0
- data/lib/bio/appl/paml/common.rb +1 -1
- data/lib/bio/appl/sosui/report.rb +1 -2
- data/lib/bio/command.rb +62 -2
- data/lib/bio/data/aa.rb +13 -31
- data/lib/bio/data/codontable.rb +1 -2
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +1 -0
- data/lib/bio/db/biosql/sequence.rb +1 -1
- data/lib/bio/db/embl/common.rb +1 -1
- data/lib/bio/db/embl/embl.rb +5 -4
- data/lib/bio/db/embl/format_embl.rb +3 -3
- data/lib/bio/db/embl/sptr.rb +9 -1444
- data/lib/bio/db/embl/swissprot.rb +12 -29
- data/lib/bio/db/embl/trembl.rb +13 -30
- data/lib/bio/db/embl/uniprot.rb +12 -29
- data/lib/bio/db/embl/uniprotkb.rb +1455 -0
- data/lib/bio/db/fasta.rb +17 -0
- data/lib/bio/db/fasta/defline.rb +1 -3
- data/lib/bio/db/fastq.rb +1 -1
- data/lib/bio/db/genbank/ddbj.rb +9 -5
- data/lib/bio/db/genbank/refseq.rb +11 -3
- data/lib/bio/db/gff.rb +3 -4
- data/lib/bio/db/go.rb +5 -6
- data/lib/bio/db/kegg/module.rb +4 -5
- data/lib/bio/db/kegg/pathway.rb +4 -5
- data/lib/bio/db/kegg/reaction.rb +1 -1
- data/lib/bio/db/nexus.rb +3 -2
- data/lib/bio/db/pdb/pdb.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +82 -59
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +1 -2
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +1 -2
- data/lib/bio/db/transfac.rb +1 -1
- data/lib/bio/io/das.rb +40 -41
- data/lib/bio/io/fastacmd.rb +0 -16
- data/lib/bio/io/fetch.rb +111 -55
- data/lib/bio/io/flatfile/buffer.rb +4 -5
- data/lib/bio/io/hinv.rb +2 -3
- data/lib/bio/io/ncbirest.rb +43 -6
- data/lib/bio/io/pubmed.rb +76 -81
- data/lib/bio/io/togows.rb +33 -10
- data/lib/bio/map.rb +1 -1
- data/lib/bio/pathway.rb +1 -1
- data/lib/bio/sequence/compat.rb +1 -1
- data/lib/bio/sequence/na.rb +63 -12
- data/lib/bio/shell.rb +0 -2
- data/lib/bio/shell/core.rb +5 -6
- data/lib/bio/shell/interface.rb +3 -4
- data/lib/bio/shell/irb.rb +1 -2
- data/lib/bio/shell/plugin/entry.rb +2 -3
- data/lib/bio/shell/plugin/seq.rb +7 -6
- data/lib/bio/shell/setup.rb +1 -2
- data/lib/bio/tree.rb +2 -2
- data/lib/bio/util/contingency_table.rb +0 -2
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +2 -2
- data/lib/bio/util/sirna.rb +76 -16
- data/lib/bio/version.rb +8 -9
- data/sample/benchmark_clustalw_report.rb +47 -0
- data/sample/biofetch.rb +248 -151
- data/setup.rb +6 -7
- data/test/data/clustalw/example1-seqnos.aln +58 -0
- data/test/network/bio/appl/blast/test_remote.rb +1 -15
- data/test/network/bio/appl/test_blast.rb +0 -12
- data/test/network/bio/io/test_pubmed.rb +49 -0
- data/test/network/bio/io/test_togows.rb +0 -1
- data/test/network/bio/test_command.rb +65 -2
- data/test/unit/bio/appl/bl2seq/test_report.rb +0 -1
- data/test/unit/bio/appl/blast/test_report.rb +110 -48
- data/test/unit/bio/appl/clustalw/test_report.rb +67 -51
- data/test/unit/bio/appl/sim4/test_report.rb +46 -17
- data/test/unit/bio/appl/test_blast.rb +2 -2
- data/test/unit/bio/db/embl/test_embl.rb +0 -1
- data/test/unit/bio/db/embl/test_embl_rel89.rb +0 -1
- data/test/unit/bio/db/embl/{test_sptr.rb → test_uniprotkb.rb} +111 -115
- data/test/unit/bio/db/embl/{test_uniprot_new_part.rb → test_uniprotkb_new_part.rb} +11 -11
- data/test/unit/bio/db/genbank/test_genbank.rb +10 -4
- data/test/unit/bio/db/pdb/test_pdb.rb +14 -8
- data/test/unit/bio/db/test_fasta.rb +41 -1
- data/test/unit/bio/db/test_fastq.rb +14 -4
- data/test/unit/bio/db/test_gff.rb +2 -2
- data/test/unit/bio/db/test_phyloxml.rb +30 -30
- data/test/unit/bio/db/test_phyloxml_writer.rb +2 -2
- data/test/unit/bio/io/flatfile/test_autodetection.rb +1 -2
- data/test/unit/bio/io/flatfile/test_buffer.rb +7 -1
- data/test/unit/bio/io/flatfile/test_splitter.rb +1 -1
- data/test/unit/bio/io/test_togows.rb +3 -2
- data/test/unit/bio/sequence/test_dblink.rb +1 -1
- data/test/unit/bio/sequence/test_na.rb +3 -1
- data/test/unit/bio/test_alignment.rb +1 -2
- data/test/unit/bio/test_command.rb +5 -4
- data/test/unit/bio/test_db.rb +4 -2
- data/test/unit/bio/test_pathway.rb +25 -10
- data/test/unit/bio/util/test_sirna.rb +22 -22
- metadata +656 -1430
- data/doc/KEGG_API.rd +0 -1843
- data/doc/KEGG_API.rd.ja +0 -1834
- data/extconf.rb +0 -2
- data/lib/bio/appl/blast/ddbj.rb +0 -131
- data/lib/bio/db/kegg/taxonomy.rb +0 -280
- data/lib/bio/io/dbget.rb +0 -194
- data/lib/bio/io/ddbjrest.rb +0 -344
- data/lib/bio/io/ddbjxml.rb +0 -458
- data/lib/bio/io/ebisoap.rb +0 -158
- data/lib/bio/io/ensembl.rb +0 -229
- data/lib/bio/io/higet.rb +0 -73
- data/lib/bio/io/keggapi.rb +0 -363
- data/lib/bio/io/ncbisoap.rb +0 -156
- data/lib/bio/io/soapwsdl.rb +0 -119
- data/lib/bio/shell/plugin/keggapi.rb +0 -181
- data/lib/bio/shell/plugin/soap.rb +0 -87
- data/sample/dbget +0 -37
- data/sample/demo_ddbjxml.rb +0 -212
- data/sample/demo_kegg_taxonomy.rb +0 -92
- data/sample/demo_keggapi.rb +0 -502
- data/sample/psortplot_html.rb +0 -214
- data/test/network/bio/io/test_ddbjrest.rb +0 -47
- data/test/network/bio/io/test_ensembl.rb +0 -230
- data/test/network/bio/io/test_soapwsdl.rb +0 -53
- data/test/unit/bio/io/test_ddbjxml.rb +0 -81
- data/test/unit/bio/io/test_ensembl.rb +0 -111
- data/test/unit/bio/io/test_soapwsdl.rb +0 -33
@@ -13,15 +13,9 @@ require 'bio/appl/clustalw/report'
|
|
13
13
|
|
14
14
|
module Bio
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
def setup
|
19
|
-
test_data_path = Pathname.new(File.join(BioRubyTestDataPath, 'clustalw')).cleanpath.to_s
|
20
|
-
aln_filename = File.join(test_data_path, 'example1.aln')
|
21
|
-
text = File.read(aln_filename)
|
22
|
-
@aln = Bio::ClustalW::Report.new(text)
|
23
|
-
end
|
16
|
+
module TestClustalWReport
|
24
17
|
|
18
|
+
#--
|
25
19
|
# CLUSTAL 2.0.9 multiple sequence alignment
|
26
20
|
#
|
27
21
|
#
|
@@ -34,47 +28,69 @@ module Bio
|
|
34
28
|
# query LNKNVWVHTELGYFSG-EAVPSNGLVLNTSKGLVLVDSSWDDKLTKELIE
|
35
29
|
# gi|115023|sp|P10425| LNKNVWVHTELGYFNG-EAVPSNGLVLNTSKGLVLVDSSWDNKLTKELIE
|
36
30
|
# *: . . **. . .. ::*: . * :
|
31
|
+
#++
|
32
|
+
|
33
|
+
module CommonTestMethods
|
34
|
+
|
35
|
+
def test_header
|
36
|
+
assert_equal('CLUSTAL 2.0.9 multiple sequence alignment',@aln.header)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_sequence0
|
40
|
+
seq = @aln.get_sequence(0)
|
41
|
+
assert_equal('query',seq.definition)
|
42
|
+
assert_equal("-MKNTLLKLGVCVSLLGITPFVSTISSVQAERTVEHKVIKNETGTISISQLNKNVWVHTELGYFSG-EAVPSNGLVLNTSKGLVLVDSSWDDKLTKELIEMVEKKFKKRVTDVIITHAHADRIGGMKTLKERGIKAHSTALTAELAKKNG--------------------YEEPLGDLQSVTNLKFGN----MKVETFYPGKGHTEDNIVVWLPQYQILAGGCLVKSASSKDLGNVADAYVNEWSTSIENVLKRYGNINLVVPGHGEVGDR-----GLLLHTLDLLK---------------------------------------------------------------------",seq.to_s)
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_sequence1
|
46
|
+
seq = @aln.get_sequence(1)
|
47
|
+
assert_equal('gi|115023|sp|P10425|',seq.definition)
|
48
|
+
assert_equal("MKKNTLLKVGLCVSLLGTTQFVSTISSVQASQKVEQIVIKNETGTISISQLNKNVWVHTELGYFNG-EAVPSNGLVLNTSKGLVLVDSSWDNKLTKELIEMVEKKFQKRVTDVIITHAHADRIGGITALKERGIKAHSTALTAELAKKSG--------------------YEEPLGDLQTVTNLKFGN----TKVETFYPGKGHTEDNIVVWLPQYQILAGGCLVKSAEAKNLGNVADAYVNEWSTSIENMLKRYRNINLVVPGHGKVGDK-----GLLLHTLDLLK---------------------------------------------------------------------",seq.to_s)
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_alignment
|
52
|
+
assert_equal("???????????SN?????????????D??????????L??????????????????H?H?D",@aln.alignment.consensus[60..120])
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_match_line
|
56
|
+
assert_equal(" .: : *: . . **. . .. ::*: . * : : . .: .* * * * : * . : . . * : .: . .: .*: ::***:* .:* .* :: . . ::.: * : . " ,@aln.match_line)
|
57
|
+
end
|
58
|
+
|
59
|
+
end #module CommonTestMethods
|
60
|
+
|
61
|
+
class TestClustalWReport < Test::Unit::TestCase
|
62
|
+
include CommonTestMethods
|
63
|
+
|
64
|
+
def setup
|
65
|
+
aln_filename = File.join(BioRubyTestDataPath, 'clustalw',
|
66
|
+
'example1.aln')
|
67
|
+
text = File.read(aln_filename)
|
68
|
+
@aln = Bio::ClustalW::Report.new(text)
|
69
|
+
end
|
70
|
+
end # class TestClustalWReport
|
71
|
+
|
72
|
+
class TestClustalWReportWith2ndArgument < Test::Unit::TestCase
|
73
|
+
include CommonTestMethods
|
74
|
+
|
75
|
+
def setup
|
76
|
+
aln_filename = File.join(BioRubyTestDataPath, 'clustalw',
|
77
|
+
'example1.aln')
|
78
|
+
text = File.read(aln_filename)
|
79
|
+
@aln = Bio::ClustalW::Report.new(text, "PROTEIN")
|
80
|
+
end
|
81
|
+
end #class TestClustalWReportWith2ndArgument
|
82
|
+
|
83
|
+
class TestClustalWReportSeqnos < Test::Unit::TestCase
|
84
|
+
include CommonTestMethods
|
85
|
+
|
86
|
+
def setup
|
87
|
+
aln_filename = File.join(BioRubyTestDataPath, 'clustalw',
|
88
|
+
'example1-seqnos.aln')
|
89
|
+
text = File.read(aln_filename)
|
90
|
+
@aln = Bio::ClustalW::Report.new(text)
|
91
|
+
end
|
92
|
+
end #class TestClustalWReportSeqnos
|
93
|
+
|
94
|
+
end #module TestClustalWReport
|
37
95
|
|
38
|
-
|
39
|
-
assert_equal('CLUSTAL 2.0.9 multiple sequence alignment',@aln.header)
|
40
|
-
end
|
41
|
-
|
42
|
-
def test_sequences
|
43
|
-
seq = @aln.get_sequence(0)
|
44
|
-
assert_equal('query',seq.definition)
|
45
|
-
assert_equal("-MKNTLLKLGVCVSLLGITPFVSTISSVQAERTVEHKVIKNETGTISISQLNKNVWVHTELGYFSG-EAVPSNGLVLNTSKGLVLVDSSWDDKLTKELIEMVEKKFKKRVTDVIITHAHADRIGGMKTLKERGIKAHSTALTAELAKKNG--------------------YEEPLGDLQSVTNLKFGN----MKVETFYPGKGHTEDNIVVWLPQYQILAGGCLVKSASSKDLGNVADAYVNEWSTSIENVLKRYGNINLVVPGHGEVGDR-----GLLLHTLDLLK---------------------------------------------------------------------",seq.to_s)
|
46
|
-
seq = @aln.get_sequence(1)
|
47
|
-
assert_equal('gi|115023|sp|P10425|',seq.definition)
|
48
|
-
assert_equal("MKKNTLLKVGLCVSLLGTTQFVSTISSVQASQKVEQIVIKNETGTISISQLNKNVWVHTELGYFNG-EAVPSNGLVLNTSKGLVLVDSSWDNKLTKELIEMVEKKFQKRVTDVIITHAHADRIGGITALKERGIKAHSTALTAELAKKSG--------------------YEEPLGDLQTVTNLKFGN----TKVETFYPGKGHTEDNIVVWLPQYQILAGGCLVKSAEAKNLGNVADAYVNEWSTSIENMLKRYRNINLVVPGHGKVGDK-----GLLLHTLDLLK---------------------------------------------------------------------",seq.to_s)
|
49
|
-
end
|
50
|
-
|
51
|
-
def test_alignment
|
52
|
-
assert_equal("???????????SN?????????????D??????????L??????????????????H?H?D",@aln.alignment.consensus[60..120])
|
53
|
-
end
|
54
|
-
|
55
|
-
def test_match_line
|
56
|
-
assert_equal(" .: : *: . . **. . .. ::*: . * : : . .: .* * * * : * . : . . * : .: . .: .*: ::***:* .:* .* :: . . ::.: * : . " ,@aln.match_line)
|
57
|
-
end
|
58
|
-
|
59
|
-
end # class TestClustalwFormat
|
60
|
-
|
61
|
-
class TestClustalWReportWith2ndArgument < Test::Unit::TestCase
|
62
|
-
|
63
|
-
def setup
|
64
|
-
aln_filename = File.join(BioRubyTestDataPath, 'clustalw',
|
65
|
-
'example1.aln')
|
66
|
-
text = File.read(aln_filename)
|
67
|
-
@aln = Bio::ClustalW::Report.new(text, "PROTEIN")
|
68
|
-
end
|
69
|
-
|
70
|
-
def test_sequences
|
71
|
-
seq = @aln.get_sequence(0)
|
72
|
-
assert_equal('query',seq.definition)
|
73
|
-
assert_equal("-MKNTLLKLGVCVSLLGITPFVSTISSVQAERTVEHKVIKNETGTISISQLNKNVWVHTELGYFSG-EAVPSNGLVLNTSKGLVLVDSSWDDKLTKELIEMVEKKFKKRVTDVIITHAHADRIGGMKTLKERGIKAHSTALTAELAKKNG--------------------YEEPLGDLQSVTNLKFGN----MKVETFYPGKGHTEDNIVVWLPQYQILAGGCLVKSASSKDLGNVADAYVNEWSTSIENVLKRYGNINLVVPGHGEVGDR-----GLLLHTLDLLK---------------------------------------------------------------------",seq.to_s)
|
74
|
-
seq = @aln.get_sequence(1)
|
75
|
-
assert_equal('gi|115023|sp|P10425|',seq.definition)
|
76
|
-
assert_equal("MKKNTLLKVGLCVSLLGTTQFVSTISSVQASQKVEQIVIKNETGTISISQLNKNVWVHTELGYFNG-EAVPSNGLVLNTSKGLVLVDSSWDNKLTKELIEMVEKKFQKRVTDVIITHAHADRIGGITALKERGIKAHSTALTAELAKKSG--------------------YEEPLGDLQTVTNLKFGN----TKVETFYPGKGHTEDNIVVWLPQYQILAGGCLVKSAEAKNLGNVADAYVNEWSTSIENMLKRYRNINLVVPGHGKVGDK-----GLLLHTLDLLK---------------------------------------------------------------------",seq.to_s)
|
77
|
-
end
|
78
|
-
|
79
|
-
end #class TestClustalWReportWith2ndArgument
|
80
|
-
end
|
96
|
+
end #module Bio
|
@@ -5,7 +5,6 @@
|
|
5
5
|
# Naohisa Goto <ng@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id:$
|
9
8
|
#
|
10
9
|
|
11
10
|
# loading helper routine for testing bioruby
|
@@ -56,7 +55,7 @@ module Bio
|
|
56
55
|
end #class TestDataForSim4Report
|
57
56
|
|
58
57
|
|
59
|
-
|
58
|
+
module TemplateTestSim4Report
|
60
59
|
|
61
60
|
def setup
|
62
61
|
@sim4 = TestDataForSim4Report.report1
|
@@ -126,9 +125,9 @@ module Bio
|
|
126
125
|
assert_equal(94, @sim4.query_len)
|
127
126
|
end
|
128
127
|
|
129
|
-
end #
|
128
|
+
end #module TemplateTestSim4Report
|
130
129
|
|
131
|
-
|
130
|
+
module TemplateTestSim4ReportHit
|
132
131
|
|
133
132
|
def setup
|
134
133
|
@hit = TestDataForSim4Report.report1.hits.first
|
@@ -258,9 +257,9 @@ module Bio
|
|
258
257
|
def test_target_len
|
259
258
|
assert_equal(599, @hit.target_len)
|
260
259
|
end
|
261
|
-
end #
|
260
|
+
end #module TemplateTestSim4ReportHit
|
262
261
|
|
263
|
-
|
262
|
+
module TemplateTestSim4ReportSegmentPair_exon
|
264
263
|
def setup
|
265
264
|
@exon = TestDataForSim4Report.report1.hits[0].exons[1]
|
266
265
|
end
|
@@ -329,9 +328,9 @@ module Bio
|
|
329
328
|
assert_equal("TCTACACATCACTAGCCTGGGTGGGCGGAA GAGCAGCTCGCCACTTCAAGCTAA",
|
330
329
|
@exon.seq2.seq)
|
331
330
|
end
|
332
|
-
end #
|
331
|
+
end #module TemplateTestSim4ReportSegmentPair_exon
|
333
332
|
|
334
|
-
|
333
|
+
module TemplateTestSim4ReportSegmentPair_intron
|
335
334
|
def setup
|
336
335
|
@intron = TestDataForSim4Report.report1.hits[0].introns[0]
|
337
336
|
end
|
@@ -392,10 +391,28 @@ module Bio
|
|
392
391
|
assert_equal(403, @intron.seq2.to)
|
393
392
|
assert_equal("CTG...TAC", @intron.seq2.seq)
|
394
393
|
end
|
395
|
-
end #
|
394
|
+
end #module TemplateTestSim4ReportSegmentPair_intron
|
395
|
+
|
396
|
+
|
397
|
+
class TestSim4Report < Test::Unit::TestCase
|
398
|
+
include TemplateTestSim4Report
|
399
|
+
end
|
400
|
+
|
401
|
+
class TestSim4ReportHit < Test::Unit::TestCase
|
402
|
+
include TemplateTestSim4ReportHit
|
403
|
+
end
|
396
404
|
|
405
|
+
class TestSim4ReportSegmentPair_exon < Test::Unit::TestCase
|
406
|
+
include TemplateTestSim4ReportSegmentPair_exon
|
407
|
+
end
|
408
|
+
|
409
|
+
class TestSim4ReportSegmentPair_intron < Test::Unit::TestCase
|
410
|
+
include TemplateTestSim4ReportSegmentPair_intron
|
411
|
+
end
|
412
|
+
|
413
|
+
class TestSim4Report2 < Test::Unit::TestCase
|
414
|
+
include TemplateTestSim4Report
|
397
415
|
|
398
|
-
class TestSim4Report2 < TestSim4Report
|
399
416
|
def setup
|
400
417
|
@sim4 = TestDataForSim4Report.report2
|
401
418
|
end
|
@@ -410,7 +427,9 @@ module Bio
|
|
410
427
|
private :exec_test_seq1_len
|
411
428
|
end #class TestSim4Report2
|
412
429
|
|
413
|
-
class TestSim4ReportHit2 <
|
430
|
+
class TestSim4ReportHit2 < Test::Unit::TestCase
|
431
|
+
include TemplateTestSim4ReportHit
|
432
|
+
|
414
433
|
def setup
|
415
434
|
@hit = TestDataForSim4Report.report2.hits.first
|
416
435
|
end
|
@@ -441,7 +460,9 @@ module Bio
|
|
441
460
|
end #class TestSim4ReportHit2
|
442
461
|
|
443
462
|
|
444
|
-
class TestSim4ReportSegmentPair2_exon <
|
463
|
+
class TestSim4ReportSegmentPair2_exon < Test::Unit::TestCase
|
464
|
+
include TemplateTestSim4ReportSegmentPair_exon
|
465
|
+
|
445
466
|
def setup
|
446
467
|
@exon = TestDataForSim4Report.report2.hits[0].exons[1]
|
447
468
|
end
|
@@ -463,14 +484,17 @@ module Bio
|
|
463
484
|
end #class TestSim4ReportSegmentPair2_exon
|
464
485
|
|
465
486
|
|
466
|
-
class TestSim4ReportSegmentPair2_intron <
|
487
|
+
class TestSim4ReportSegmentPair2_intron < Test::Unit::TestCase
|
488
|
+
include TemplateTestSim4ReportSegmentPair_intron
|
489
|
+
|
467
490
|
def setup
|
468
491
|
@intron = TestDataForSim4Report.report2.hits[0].introns[0]
|
469
492
|
end
|
470
493
|
end #class TestSim4ReportSegmentPair2_intron
|
471
494
|
|
472
495
|
|
473
|
-
class TestSim4Report4 <
|
496
|
+
class TestSim4Report4 < Test::Unit::TestCase
|
497
|
+
include TemplateTestSim4Report
|
474
498
|
|
475
499
|
def setup
|
476
500
|
@sim4 = TestDataForSim4Report.report4
|
@@ -504,7 +528,8 @@ module Bio
|
|
504
528
|
|
505
529
|
end #class TestSim4Report4
|
506
530
|
|
507
|
-
class TestSim4ReportHit4 <
|
531
|
+
class TestSim4ReportHit4 < Test::Unit::TestCase
|
532
|
+
include TemplateTestSim4ReportHit
|
508
533
|
|
509
534
|
def setup
|
510
535
|
@hit = TestDataForSim4Report.report4.hits.first
|
@@ -632,7 +657,9 @@ module Bio
|
|
632
657
|
end
|
633
658
|
end #class TestSim4ReportHit4
|
634
659
|
|
635
|
-
class TestSim4ReportSegmentPair4_exon <
|
660
|
+
class TestSim4ReportSegmentPair4_exon < Test::Unit::TestCase
|
661
|
+
include TemplateTestSim4ReportSegmentPair_exon
|
662
|
+
|
636
663
|
def setup
|
637
664
|
@exon = TestDataForSim4Report.report4.hits[0].exons[1]
|
638
665
|
end
|
@@ -704,7 +731,9 @@ module Bio
|
|
704
731
|
end #class TestSim4ReportSegmentPair4_exon
|
705
732
|
|
706
733
|
|
707
|
-
class TestSim4ReportSegmentPair4_intron <
|
734
|
+
class TestSim4ReportSegmentPair4_intron < Test::Unit::TestCase
|
735
|
+
include TemplateTestSim4ReportSegmentPair_intron
|
736
|
+
|
708
737
|
def setup
|
709
738
|
@intron = TestDataForSim4Report.report4.hits[0].introns[0]
|
710
739
|
end
|
@@ -1,10 +1,9 @@
|
|
1
1
|
#
|
2
|
-
# test/unit/bio/db/embl/
|
2
|
+
# test/unit/bio/db/embl/test_uniprotkb.rb - Unit tests for Bio::UniProtKB
|
3
3
|
#
|
4
4
|
# Copyright::: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
|
-
# $Id:$
|
8
7
|
#
|
9
8
|
|
10
9
|
# loading helper routine for testing bioruby
|
@@ -14,15 +13,15 @@ load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
|
|
14
13
|
|
15
14
|
# libraries needed for the tests
|
16
15
|
require 'test/unit'
|
17
|
-
require 'bio/db/embl/
|
16
|
+
require 'bio/db/embl/uniprotkb'
|
18
17
|
|
19
18
|
module Bio
|
20
|
-
class
|
19
|
+
class TestUniProtKB < Test::Unit::TestCase
|
21
20
|
|
22
21
|
def setup
|
23
22
|
data = File.read(File.join(BioRubyTestDataPath,
|
24
23
|
'uniprot', 'p53_human.uniprot'))
|
25
|
-
@obj = Bio::
|
24
|
+
@obj = Bio::UniProtKB.new(data)
|
26
25
|
end
|
27
26
|
|
28
27
|
def test_id_line
|
@@ -150,13 +149,11 @@ module Bio
|
|
150
149
|
end
|
151
150
|
|
152
151
|
def test_gn_uniprot_parser
|
153
|
-
gn_uniprot_data = ''
|
154
152
|
assert_equal([{:orfs=>[], :loci=>[], :name=>"TP53", :synonyms=>["P53"]}],
|
155
153
|
@obj.instance_eval("gn_uniprot_parser"))
|
156
154
|
end
|
157
155
|
|
158
156
|
def test_gn_old_parser
|
159
|
-
gn_old_data = ''
|
160
157
|
assert_equal([["Name=TP53; Synonyms=P53;"]],
|
161
158
|
@obj.instance_eval("gn_old_parser"))
|
162
159
|
end
|
@@ -235,7 +232,7 @@ module Bio
|
|
235
232
|
assert_equal({"NCBI_TaxID"=>["9606"]}, @obj.ox)
|
236
233
|
end
|
237
234
|
|
238
|
-
def test_ref # Bio::
|
235
|
+
def test_ref # Bio::UniProtKB#ref
|
239
236
|
assert_equal(Array, @obj.ref.class)
|
240
237
|
end
|
241
238
|
|
@@ -326,15 +323,15 @@ module Bio
|
|
326
323
|
assert_equal(seq, @obj.aaseq)
|
327
324
|
end
|
328
325
|
|
329
|
-
end # class
|
326
|
+
end # class TestUniProtKB
|
330
327
|
|
331
328
|
|
332
329
|
|
333
|
-
class
|
330
|
+
class TestUniProtKB_CC < Test::Unit::TestCase
|
334
331
|
def test_allergen
|
335
332
|
# ALLERGEN Information relevant to allergenic proteins
|
336
333
|
data = 'CC -!- ALLERGEN: Causes an allergic reaction in human.'
|
337
|
-
sp = Bio::
|
334
|
+
sp = Bio::UniProtKB.new(data)
|
338
335
|
assert_equal(['Causes an allergic reaction in human.'],
|
339
336
|
sp.cc['ALLERGEN'])
|
340
337
|
assert_equal(['Causes an allergic reaction in human.'],
|
@@ -352,7 +349,7 @@ CC Note=Contains a N-acetylmethionine at position 1 (By
|
|
352
349
|
CC similarity);"
|
353
350
|
|
354
351
|
res = ["Event=Alternative initiation; Named isoforms=2; Name=Long; IsoId=P68250-1; Sequence=Displayed; Name=Short; IsoId=P68250-2; Sequence=VSP_018631; Note=Contains a N-acetylmethionine at position 1 (By similarity);"]
|
355
|
-
sp = Bio::
|
352
|
+
sp = Bio::UniProtKB.new(data)
|
356
353
|
assert_equal(res,
|
357
354
|
sp.cc['ALTERNATIVE PRODUCTS'])
|
358
355
|
end
|
@@ -369,7 +366,7 @@ CC IsoId=P68250-2; Sequence=VSP_018631;
|
|
369
366
|
CC Note=Contains a N-acetylmethionine at position 1 (By
|
370
367
|
CC similarity);"
|
371
368
|
|
372
|
-
sp = Bio::
|
369
|
+
sp = Bio::UniProtKB.new(data)
|
373
370
|
assert_equal({"Comment"=>"",
|
374
371
|
"Named isoforms"=>"2",
|
375
372
|
"Variants"=>
|
@@ -393,7 +390,7 @@ CC Name=2; Synonyms=I9RET;
|
|
393
390
|
CC IsoId=P04637-2; Sequence=VSP_006535, VSP_006536;
|
394
391
|
CC Note=Seems to be non-functional. Expressed in quiescent
|
395
392
|
CC lymphocytes;"
|
396
|
-
sp = Bio::
|
393
|
+
sp = Bio::UniProtKB.new(data)
|
397
394
|
assert_equal({"Comment"=>"",
|
398
395
|
"Named isoforms"=>"2",
|
399
396
|
"Variants"=>
|
@@ -425,7 +422,7 @@ CC Note=May be produced by alternative promoter usage;
|
|
425
422
|
CC Name=5; Synonyms=AAT1-beta, AAT1-gamma;
|
426
423
|
CC IsoId=Q7Z4T9-5; Sequence=VSP_014909;
|
427
424
|
CC Note=May be produced by alternative promoter usage;"
|
428
|
-
sp = Bio::
|
425
|
+
sp = Bio::UniProtKB.new(data)
|
429
426
|
assert_equal({"Comment"=>"Additional isoforms (AAT-1L and AAT-1S) may exist",
|
430
427
|
"Named isoforms"=>"5",
|
431
428
|
"Variants"=>
|
@@ -454,7 +451,7 @@ CC Note=May be produced by alternative promoter usage;"
|
|
454
451
|
end
|
455
452
|
def test_alternative_products_rf
|
456
453
|
data = ""
|
457
|
-
sp = Bio::
|
454
|
+
sp = Bio::UniProtKB.new(data)
|
458
455
|
assert_equal({},
|
459
456
|
sp.cc('ALTERNATIVE PRODUCTS'))
|
460
457
|
end
|
@@ -468,7 +465,7 @@ CC KM=45 uM for AdoMet;
|
|
468
465
|
CC Vmax=32 uM/h/mg enzyme;
|
469
466
|
CC pH dependence:
|
470
467
|
CC Optimum pH is 8.2;'
|
471
|
-
sp = Bio::
|
468
|
+
sp = Bio::UniProtKB.new(data)
|
472
469
|
assert_equal(["Kinetic parameters: KM=45 uM for AdoMet; Vmax=32 uM/h/mg enzyme; pH dependence: Optimum pH is 8.2;"],
|
473
470
|
sp.cc['BIOPHYSICOCHEMICAL PROPERTIES'])
|
474
471
|
assert_equal({"Redox potential" => "",
|
@@ -494,7 +491,7 @@ CC Redox potential:
|
|
494
491
|
CC free_text;
|
495
492
|
CC Temperature dependence:
|
496
493
|
CC free_text;"
|
497
|
-
sp = Bio::
|
494
|
+
sp = Bio::UniProtKB.new(data)
|
498
495
|
assert_equal({"Redox potential"=>"free_text",
|
499
496
|
"Temperature dependence"=>"free_text",
|
500
497
|
"Kinetic parameters"=>
|
@@ -514,7 +511,7 @@ CC improved ripening tomato by Monsanto. ACC is the immediate
|
|
514
511
|
CC precursor of the phytohormone ethylene which is involved in the
|
515
512
|
CC control of ripening. ACC deaminase reduces ethylene biosynthesis
|
516
513
|
CC and thus extends the shelf life of fruits and vegetables.'
|
517
|
-
sp = Bio::
|
514
|
+
sp = Bio::UniProtKB.new(data)
|
518
515
|
assert_equal(["Introduced by genetic manipulation and expressed in improved ripening tomato by Monsanto. ACC is the immediate precursor of the phytohormone ethylene which is involved in the control of ripening. ACC deaminase reduces ethylene biosynthesis and thus extends the shelf life of fruits and vegetables."],
|
519
516
|
sp.cc['BIOTECHNOLOGY'])
|
520
517
|
end
|
@@ -524,7 +521,7 @@ CC and thus extends the shelf life of fruits and vegetables.'
|
|
524
521
|
data = 'CC -!- CATALYTIC ACTIVITY: Hydrolysis of alkylated DNA, releasing 3-
|
525
522
|
CC methyladenine, 3-methylguanine, 7-methylguanine and 7-
|
526
523
|
CC methyladenine.'
|
527
|
-
sp = Bio::
|
524
|
+
sp = Bio::UniProtKB.new(data)
|
528
525
|
assert_equal(["Hydrolysis of alkylated DNA, releasing 3-methyladenine, 3-methylguanine, 7-methylguanine and 7-methyladenine."],
|
529
526
|
sp.cc['CATALYTIC ACTIVITY'])
|
530
527
|
end
|
@@ -534,7 +531,7 @@ CC methyladenine.'
|
|
534
531
|
data = 'CC -!- CAUTION: Ref.1 sequence differs from that shown due to a Leu codon
|
535
532
|
CC in position 480 which was translated as a stop codon to shorten
|
536
533
|
CC the sequence.'
|
537
|
-
sp = Bio::
|
534
|
+
sp = Bio::UniProtKB.new(data)
|
538
535
|
assert_equal(["Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence."],
|
539
536
|
sp.cc['CAUTION'])
|
540
537
|
assert_equal("Ref.1 sequence differs from that shown due to a Leu codon in position 480 which was translated as a stop codon to shorten the sequence.",
|
@@ -546,7 +543,7 @@ CC the sequence.'
|
|
546
543
|
# COFACTOR Description of any non-protein substance required by an enzyme for its catalytic activity
|
547
544
|
data = 'CC -!- COFACTOR: Cl(-). Is unique in requiring Cl(-) for its activity.
|
548
545
|
CC -!- COFACTOR: Mg(2+).'
|
549
|
-
sp = Bio::
|
546
|
+
sp = Bio::UniProtKB.new(data)
|
550
547
|
assert_equal(["Cl(-). Is unique in requiring Cl(-) for its activity.",
|
551
548
|
"Mg(2+)."],
|
552
549
|
sp.cc['COFACTOR'])
|
@@ -561,7 +558,7 @@ CC -!- COFACTOR: Mg(2+).'
|
|
561
558
|
data = 'CC -!- DEVELOPMENTAL STAGE: In females, isoform 1 is expressed at day 35
|
562
559
|
CC with higher levels detected at day 56. Isoform 1 is not detected
|
563
560
|
CC in males of any age.'
|
564
|
-
sp = Bio::
|
561
|
+
sp = Bio::UniProtKB.new(data)
|
565
562
|
assert_equal(["In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age."],
|
566
563
|
sp.cc['DEVELOPMENTAL STAGE'])
|
567
564
|
assert_equal("In females, isoform 1 is expressed at day 35 with higher levels detected at day 56. Isoform 1 is not detected in males of any age.",
|
@@ -580,7 +577,7 @@ CC and aggressiveness of the disease. The Iowa type demonstrated no
|
|
580
577
|
CC cerebral hemorrhaging but is characterized by progressive
|
581
578
|
CC cognitive decline. Beta-APP40 is the predominant form of
|
582
579
|
CC cerebrovascular amyloid.'
|
583
|
-
sp = Bio::
|
580
|
+
sp = Bio::UniProtKB.new(data)
|
584
581
|
assert_equal(["Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid."],
|
585
582
|
sp.cc['DISEASE'])
|
586
583
|
assert_equal("Defects in APP are a cause of hereditary cerebral hemorrhage with amyloidosis (HCHWAD) [MIM:609065, 104760]. This disorder is characterized by amyloid deposits in cerebral vessels. The principal clinical characteristics are recurring cerebral hemorrhages, sometimes preceded by migrainous headaches or mental cleavage. Various types of HCHWAD are known. They differ in onset and aggressiveness of the disease. The Iowa type demonstrated no cerebral hemorrhaging but is characterized by progressive cognitive decline. Beta-APP40 is the predominant form of cerebrovascular amyloid.",
|
@@ -601,7 +598,7 @@ CC require the YENPTY motif for full interaction. These interactions
|
|
601
598
|
CC are independent of phosphorylation on the terminal tyrosine
|
602
599
|
CC residue. The NPXY site is also involved in clathrin-mediated
|
603
600
|
CC endocytosis (By similarity).'
|
604
|
-
sp = Bio::
|
601
|
+
sp = Bio::UniProtKB.new(data)
|
605
602
|
assert_equal(["The basolateral sorting signal (BaSS) is required for sorting of membrane proteins to the basolateral surface of epithelial cells.",
|
606
603
|
"The NPXY sequence motif found in many tyrosine-phosphorylated proteins is required for the specific binding of the PID domain. However, additional amino acids either N-or C-terminal to the NPXY motif are often required for complete interaction. The PID domain-containing proteins which bind APP require the YENPTY motif for full interaction. These interactions are independent of phosphorylation on the terminal tyrosine residue. The NPXY site is also involved in clathrin-mediated endocytosis (By similarity)."],
|
607
604
|
sp.cc['DOMAIN'])
|
@@ -614,7 +611,7 @@ CC endocytosis (By similarity).'
|
|
614
611
|
# ENZYME REGULATION Description of an enzyme regulatory mechanism
|
615
612
|
data = 'CC -!- ENZYME REGULATION: Insensitive to calcium/calmodulin. Stimulated
|
616
613
|
CC by the G protein beta and gamma subunit complex.'
|
617
|
-
sp = Bio::
|
614
|
+
sp = Bio::UniProtKB.new(data)
|
618
615
|
assert_equal(["Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex."],
|
619
616
|
sp.cc['ENZYME REGULATION'])
|
620
617
|
assert_equal("Insensitive to calcium/calmodulin. Stimulated by the G protein beta and gamma subunit complex.",
|
@@ -627,7 +624,7 @@ CC by the G protein beta and gamma subunit complex.'
|
|
627
624
|
CC interplay between intracellular calcium and cAMP determines the
|
628
625
|
CC cellular function. May be a physiologically relevant docking site
|
629
626
|
CC for calcineurin (By similarity).'
|
630
|
-
sp = Bio::
|
627
|
+
sp = Bio::UniProtKB.new(data)
|
631
628
|
assert_equal(["May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity)."],
|
632
629
|
sp.cc['FUNCTION'])
|
633
630
|
assert_equal("May play a fundamental role in situations where fine interplay between intracellular calcium and cAMP determines the cellular function. May be a physiologically relevant docking site for calcineurin (By similarity).",
|
@@ -637,7 +634,7 @@ CC for calcineurin (By similarity).'
|
|
637
634
|
def test_induction
|
638
635
|
# INDUCTION Description of the compound(s) or condition(s) that regulate gene expression
|
639
636
|
data = 'CC -!- INDUCTION: By pheromone (alpha-factor).'
|
640
|
-
sp = Bio::
|
637
|
+
sp = Bio::UniProtKB.new(data)
|
641
638
|
assert_equal(["By pheromone (alpha-factor)."],
|
642
639
|
sp.cc['INDUCTION'])
|
643
640
|
assert_equal("By pheromone (alpha-factor).",
|
@@ -649,7 +646,7 @@ CC for calcineurin (By similarity).'
|
|
649
646
|
data = 'CC -!- INTERACTION:
|
650
647
|
CC P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435;
|
651
648
|
CC P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;'
|
652
|
-
sp = Bio::
|
649
|
+
sp = Bio::UniProtKB.new(data)
|
653
650
|
assert_equal(["P62158:CALM1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397435; P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;"],
|
654
651
|
sp.cc['INTERACTION'])
|
655
652
|
assert_equal([{'SP_Ac' => 'P62158',
|
@@ -671,7 +668,7 @@ CC P62155:calm1 (xeno); NbExp=1; IntAct=EBI-457011, EBI-397568;'
|
|
671
668
|
CC NOTE=Ref.1.
|
672
669
|
CC -!- MASS SPECTROMETRY: MW=2892.2; METHOD=Electrospray; RANGE=1-29;
|
673
670
|
CC NOTE=Ref.2."
|
674
|
-
sp = Bio::
|
671
|
+
sp = Bio::UniProtKB.new(data)
|
675
672
|
assert_equal(["MW=2894.9; MW_ERR=3; METHOD=MALDI; RANGE=1-29; NOTE=Ref.1.",
|
676
673
|
"MW=2892.2; METHOD=Electrospray; RANGE=1-29; NOTE=Ref.2."],
|
677
674
|
sp.cc['MASS SPECTROMETRY'])
|
@@ -692,7 +689,7 @@ CC NOTE=Ref.2."
|
|
692
689
|
# MISCELLANEOUS Any comment which does not belong to any of the other defined topics
|
693
690
|
data = 'CC -!- MISCELLANEOUS: There are two isozymes; a cytoplasmic one and a
|
694
691
|
CC mitochondrial one.'
|
695
|
-
sp = Bio::
|
692
|
+
sp = Bio::UniProtKB.new(data)
|
696
693
|
assert_equal(["There are two isozymes; a cytoplasmic one and a mitochondrial one."],
|
697
694
|
sp.cc['MISCELLANEOUS'])
|
698
695
|
end
|
@@ -701,7 +698,7 @@ CC mitochondrial one.'
|
|
701
698
|
# PATHWAY Description of the metabolic pathway(s) with which a protein is associated
|
702
699
|
data = 'CC -!- PATHWAY: Carbohydrate degradation; glycolysis; D-glyceraldehyde 3-
|
703
700
|
CC phosphate and glycerone phosphate from D-glucose: step 4.'
|
704
|
-
sp = Bio::
|
701
|
+
sp = Bio::UniProtKB.new(data)
|
705
702
|
assert_equal(["Carbohydrate degradation; glycolysis; D-glyceraldehyde 3-phosphate and glycerone phosphate from D-glucose: step 4."],
|
706
703
|
sp.cc['PATHWAY'])
|
707
704
|
assert_equal(["Carbohydrate degradation",
|
@@ -718,7 +715,7 @@ CC phosphate and glycerone phosphate from D-glucose: step 4.'
|
|
718
715
|
CC Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm
|
719
716
|
CC (Serono). Used in evaluating hypothalamic-pituitary gonadotropic
|
720
717
|
CC function.'
|
721
|
-
sp = Bio::
|
718
|
+
sp = Bio::UniProtKB.new(data)
|
722
719
|
assert_equal(["Available under the names Factrel (Ayerst Labs), Lutrepulse or Lutrelef (Ferring Pharmaceuticals) and Relisorm (Serono). Used in evaluating hypothalamic-pituitary gonadotropic function."],
|
723
720
|
sp.cc['PHARMACEUTICAL'])
|
724
721
|
end
|
@@ -734,7 +731,7 @@ CC like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown
|
|
734
731
|
CC here) contains one repeat starting at position 415, allele C
|
735
732
|
CC contains two repeats, allele B contains three repeats and allele A
|
736
733
|
CC contains four repeats.'
|
737
|
-
sp = Bio::
|
734
|
+
sp = Bio::UniProtKB.new(data)
|
738
735
|
assert_equal(["Position 161 is associated with platelet-specific alloantigen Siba. Siba(-) has Thr-161 and Siba(+) has Met-161. Siba is involved in neonatal alloimmune thrombocytopenia (NATP).",
|
739
736
|
"Polymorphisms arise from a variable number of tandem 13-amino acid repeats of S-E-P-A-P-S-P-T-T-P-E-P-T in the mucin-like macroglycopeptide (Pro/Thr-rich) domain. Allele D (shown here) contains one repeat starting at position 415, allele C contains two repeats, allele B contains three repeats and allele A contains four repeats."],
|
740
737
|
sp.cc['POLYMORPHISM'])
|
@@ -745,7 +742,7 @@ CC contains four repeats.'
|
|
745
742
|
data = 'CC -!- PTM: N-glycosylated, contains approximately 8 kDa of N-linked
|
746
743
|
CC carbohydrate.
|
747
744
|
CC -!- PTM: Palmitoylated.'
|
748
|
-
sp = Bio::
|
745
|
+
sp = Bio::UniProtKB.new(data)
|
749
746
|
assert_equal(["N-glycosylated, contains approximately 8 kDa of N-linked carbohydrate.",
|
750
747
|
"Palmitoylated."],
|
751
748
|
sp.cc['PTM'])
|
@@ -762,7 +759,7 @@ CC positions 50, 78, 104, 260 and 264 are modified to sense codons.'
|
|
762
759
|
CC brain. Heteromerically expressed edited GLUR2 (R) receptor
|
763
760
|
CC complexes are impermeable to calcium, whereas the unedited (Q)
|
764
761
|
CC forms are highly permeable to divalent ions (By similarity).'
|
765
|
-
sp = Bio::
|
762
|
+
sp = Bio::UniProtKB.new(data)
|
766
763
|
assert_equal(["Modified_positions=607; Note=Fully edited in the brain. Heteromerically expressed edited GLUR2 (R) receptor complexes are impermeable to calcium, whereas the unedited (Q) forms are highly permeable to divalent ions (By similarity)."],
|
767
764
|
sp.cc['RNA EDITING'])
|
768
765
|
assert_equal({"Modified_positions" => ['607'],
|
@@ -774,7 +771,7 @@ CC forms are highly permeable to divalent ions (By similarity).'
|
|
774
771
|
# SIMILARITY Description of the similaritie(s) (sequence or structural) of a protein with other proteins
|
775
772
|
data = 'CC -!- SIMILARITY: Contains 1 protein kinase domain.
|
776
773
|
CC -!- SIMILARITY: Contains 1 RGS domain.'
|
777
|
-
sp = Bio::
|
774
|
+
sp = Bio::UniProtKB.new(data)
|
778
775
|
assert_equal(["Contains 1 protein kinase domain.", "Contains 1 RGS domain."],
|
779
776
|
sp.cc['SIMILARITY'])
|
780
777
|
end
|
@@ -803,7 +800,7 @@ CC endocytic compartment. Associates with lysosome membranes."
|
|
803
800
|
data = "CC -!- SUBCELLULAR LOCATION: Plastid; chloroplast; chloroplast membrane;
|
804
801
|
CC peripheral membrane protein. Plastid; chloroplast; chloroplast
|
805
802
|
CC stroma."
|
806
|
-
sp = Bio::
|
803
|
+
sp = Bio::UniProtKB.new(data)
|
807
804
|
assert_equal(["Plastid; chloroplast; chloroplast membrane; peripheral membrane protein. Plastid; chloroplast; chloroplast stroma."],
|
808
805
|
sp.cc['SUBCELLULAR LOCATION'])
|
809
806
|
assert_equal([["Plastid",
|
@@ -822,7 +819,7 @@ CC stroma."
|
|
822
819
|
CC MAPK9, MAPK10 and MAPK12.'
|
823
820
|
|
824
821
|
data = 'CC -!- SUBUNIT: Homotetramer.'
|
825
|
-
sp = Bio::
|
822
|
+
sp = Bio::UniProtKB.new(data)
|
826
823
|
assert_equal(["Homotetramer."],
|
827
824
|
sp.cc['SUBUNIT'])
|
828
825
|
end
|
@@ -838,7 +835,7 @@ CC thymus, testis, embryo and proliferating blood lymphocytes."
|
|
838
835
|
CC heart, spleen, kidney and blood. Isoform 2 is expressed (at
|
839
836
|
CC protein level) in the spleen, skeletal muscle and gastrointestinal
|
840
837
|
CC epithelia."
|
841
|
-
sp = Bio::
|
838
|
+
sp = Bio::UniProtKB.new(data)
|
842
839
|
assert_equal(["Isoform 2 is highly expressed in the brain, heart, spleen, kidney and blood. Isoform 2 is expressed (at protein level) in the spleen, skeletal muscle and gastrointestinal epithelia."],
|
843
840
|
sp.cc['TISSUE SPECIFICITY'])
|
844
841
|
end
|
@@ -846,7 +843,7 @@ CC epithelia."
|
|
846
843
|
def test_toxic_dose
|
847
844
|
# TOXIC DOSE Description of the lethal dose (LD), paralytic dose (PD) or effective dose of a protein
|
848
845
|
data = 'CC -!- TOXIC DOSE: LD(50) is 12 mg/kg by intraperitoneal injection.'
|
849
|
-
sp = Bio::
|
846
|
+
sp = Bio::UniProtKB.new(data)
|
850
847
|
assert_equal(["LD(50) is 12 mg/kg by intraperitoneal injection."],
|
851
848
|
sp.cc['TOXIC DOSE'])
|
852
849
|
end
|
@@ -859,7 +856,7 @@ CC -!- WEB RESOURCE: NAME=Connexin-deafness homepage;
|
|
859
856
|
CC URL="http://www.crg.es/deafness/".
|
860
857
|
CC -!- WEB RESOURCE: NAME=GeneReviews;
|
861
858
|
CC URL="http://www.genetests.org/query?gene=GJB1".'
|
862
|
-
sp = Bio::
|
859
|
+
sp = Bio::UniProtKB.new(data)
|
863
860
|
assert_equal(['NAME=Inherited peripheral neuropathies mutation db; URL="http://www.molgen.ua.ac.be/CMTMutations/".',
|
864
861
|
'NAME=Connexin-deafness homepage; URL="http://www.crg.es/deafness/".',
|
865
862
|
'NAME=GeneReviews; URL="http://www.genetests.org/query?gene=GJB1".'],
|
@@ -874,10 +871,10 @@ CC URL="http://www.genetests.org/query?gene=GJB1".'
|
|
874
871
|
|
875
872
|
end
|
876
873
|
|
877
|
-
end # class
|
874
|
+
end # class TestUniProtKB_CC
|
878
875
|
|
879
876
|
# http://br.expasy.org/sprot/userman.html#Ref_line
|
880
|
-
class
|
877
|
+
class TestUniProtKB_Ref < Test::Unit::TestCase
|
881
878
|
|
882
879
|
def setup
|
883
880
|
data = 'RN [1]
|
@@ -894,7 +891,7 @@ RT exceptionally basic N-terminal domains to capture and localize an
|
|
894
891
|
RT atypical protein kinase C: characterization of Caenorhabditis elegans
|
895
892
|
RT C kinase adapter 1, a protein that avidly binds protein kinase C3.";
|
896
893
|
RL J. Biol. Chem. 276:10463-10475(2001).'
|
897
|
-
@obj =
|
894
|
+
@obj = UniProtKB.new(data)
|
898
895
|
end
|
899
896
|
|
900
897
|
def test_ref
|
@@ -963,11 +960,11 @@ RL J. Biol. Chem. 276:10463-10475(2001).'
|
|
963
960
|
@obj.ref.first['RL'])
|
964
961
|
end
|
965
962
|
|
966
|
-
end # class
|
963
|
+
end # class TestUniProtKB_References
|
967
964
|
|
968
965
|
|
969
966
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.0
|
970
|
-
class
|
967
|
+
class TestUniProtKB_SwissProtRel41_0 < Test::Unit::TestCase
|
971
968
|
# Progress in the conversion of Swiss-Prot to mixed-case characters
|
972
969
|
|
973
970
|
# Multiple RP lines
|
@@ -975,7 +972,7 @@ RL J. Biol. Chem. 276:10463-10475(2001).'
|
|
975
972
|
data = "RN [1]
|
976
973
|
RP SEQUENCE FROM N.A., SEQUENCE OF 23-42 AND 351-365, AND
|
977
974
|
RP CHARACTERIZATION."
|
978
|
-
sp =
|
975
|
+
sp = UniProtKB.new(data)
|
979
976
|
assert_equal(['SEQUENCE FROM N.A.',
|
980
977
|
'SEQUENCE OF 23-42 AND 351-365',
|
981
978
|
'CHARACTERIZATION'],
|
@@ -985,7 +982,7 @@ RP CHARACTERIZATION."
|
|
985
982
|
|
986
983
|
|
987
984
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.1
|
988
|
-
class
|
985
|
+
class TestUniProtKB_SwissProtRel41_1 < Test::Unit::TestCase
|
989
986
|
# New syntax of the CC line topic ALTERNATIVE PRODUCTS
|
990
987
|
def test_alternative_products
|
991
988
|
data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
|
@@ -1004,7 +1001,7 @@ CC Sequence=VSP_identifier_1, VSP_identifier_2;
|
|
1004
1001
|
CC Note=Free text;
|
1005
1002
|
CC Event=Alternative initiation;
|
1006
1003
|
CC Comment=Free text;"
|
1007
|
-
sp =
|
1004
|
+
sp = UniProtKB.new(data)
|
1008
1005
|
res = {"Comment" => "Free text",
|
1009
1006
|
"Named isoforms" => "2",
|
1010
1007
|
"Variants" => [{"Name" => "Isoform_1",
|
@@ -1049,7 +1046,7 @@ FT VARSPLIC 1655 1705 Missing (in isoform 3A and isoform 3B).
|
|
1049
1046
|
FT /FTId=VSP_004794.
|
1050
1047
|
FT VARSPLIC 1790 1790 Missing (in isoform Del-1790).
|
1051
1048
|
FT /FTId=VSP_004795."
|
1052
|
-
sp =
|
1049
|
+
sp = UniProtKB.new(data)
|
1053
1050
|
|
1054
1051
|
assert_equal({"Comment" => "",
|
1055
1052
|
"Named isoforms" => "6",
|
@@ -1114,14 +1111,14 @@ FT /FTId=VSP_004795."
|
|
1114
1111
|
|
1115
1112
|
|
1116
1113
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.10
|
1117
|
-
class
|
1114
|
+
class TestUniProtKB_SwissProtRel41_10 < Test::Unit::TestCase
|
1118
1115
|
# Reference Comment (RC) line topics may span lines
|
1119
1116
|
def test_RC_lines
|
1120
1117
|
data = "RN [1]
|
1121
1118
|
RC STRAIN=AZ.026, DC.005, GA.039, GA2181, IL.014, IN.018, KY.172, KY2.37,
|
1122
1119
|
RC LA.013, MN.001, MNb027, MS.040, NY.016, OH.036, TN.173, TN2.38,
|
1123
1120
|
RC UT.002, AL.012, AZ.180, MI.035, VA.015, and IL2.17;"
|
1124
|
-
sp =
|
1121
|
+
sp = UniProtKB.new(data)
|
1125
1122
|
assert_equal([{"Text"=>"AZ.026", "Token"=>"STRAIN"},
|
1126
1123
|
{"Text"=>"DC.005", "Token"=>"STRAIN"},
|
1127
1124
|
{"Text"=>"GA.039", "Token"=>"STRAIN"},
|
@@ -1150,11 +1147,11 @@ RC UT.002, AL.012, AZ.180, MI.035, VA.015, and IL2.17;"
|
|
1150
1147
|
|
1151
1148
|
|
1152
1149
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel41.20
|
1153
|
-
class
|
1150
|
+
class TestUniProtKB_SwissProtRel41_20 < Test::Unit::TestCase
|
1154
1151
|
# Case and wording change for submissions to Swiss-Prot in reference location (RL) lines
|
1155
1152
|
def test_RL_lines
|
1156
1153
|
data = "RL Submitted (MAY-2002) to the SWISS-PROT data bank."
|
1157
|
-
sp =
|
1154
|
+
sp = UniProtKB.new(data)
|
1158
1155
|
assert_equal('',
|
1159
1156
|
sp.ref.first['RL'])
|
1160
1157
|
end
|
@@ -1164,7 +1161,7 @@ RC UT.002, AL.012, AZ.180, MI.035, VA.015, and IL2.17;"
|
|
1164
1161
|
data = "CC -!- ALLERGEN: Causes an allergic reaction in human. Binds IgE. It is a
|
1165
1162
|
CC partially heat-labile allergen that may cause both respiratory and
|
1166
1163
|
CC food-allergy symptoms in patients with the bird-egg syndrome."
|
1167
|
-
sp =
|
1164
|
+
sp = UniProtKB.new(data)
|
1168
1165
|
assert_equal(["Causes an allergic reaction in human. Binds IgE. It is a partially heat-labile allergen that may cause both respiratory and food-allergy symptoms in patients with the bird-egg syndrome."],
|
1169
1166
|
sp.cc("ALLERGEN"))
|
1170
1167
|
end
|
@@ -1172,11 +1169,11 @@ CC food-allergy symptoms in patients with the bird-egg syndrome."
|
|
1172
1169
|
|
1173
1170
|
|
1174
1171
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel42.6
|
1175
|
-
class
|
1172
|
+
class TestUniProtKB_SwissProtRel42_6 < Test::Unit::TestCase
|
1176
1173
|
# New comment line (CC) topic RNA EDITING
|
1177
1174
|
def test_CC_rna_editing
|
1178
1175
|
data = "CC -!- RNA EDITING: Modified_positions=393, 431, 452, 495."
|
1179
|
-
sp =
|
1176
|
+
sp = UniProtKB.new(data)
|
1180
1177
|
assert_equal({"Note"=>"",
|
1181
1178
|
"Modified_positions"=>['393', '431', '452', '495']},
|
1182
1179
|
sp.cc("RNA EDITING"))
|
@@ -1184,7 +1181,7 @@ CC food-allergy symptoms in patients with the bird-egg syndrome."
|
|
1184
1181
|
data = "CC -!- RNA EDITING: Modified_positions=59, 78, 94, 98, 102, 121; Note=The
|
1185
1182
|
CC stop codon at position 121 is created by RNA editing. The nonsense
|
1186
1183
|
CC codon at position 59 is modified to a sense codon."
|
1187
|
-
sp =
|
1184
|
+
sp = UniProtKB.new(data)
|
1188
1185
|
assert_equal({"Note"=>"The stop codon at position 121 is created by RNA editing. The nonsense codon at position 59 is modified to a sense codon.",
|
1189
1186
|
"Modified_positions"=>['59', '78', '94', '98', '102', '121']},
|
1190
1187
|
sp.cc("RNA EDITING"))
|
@@ -1192,7 +1189,7 @@ CC codon at position 59 is modified to a sense codon."
|
|
1192
1189
|
data = "CC -!- RNA EDITING: Modified_positions=Not_applicable; Note=Some
|
1193
1190
|
CC positions are modified by RNA editing via nucleotide insertion or
|
1194
1191
|
CC deletion. The initiator methionine is created by RNA editing."
|
1195
|
-
sp =
|
1192
|
+
sp = UniProtKB.new(data)
|
1196
1193
|
assert_equal({'Modified_positions' => ['Not_applicable'],
|
1197
1194
|
'Note' => "Some positions are modified by RNA editing via nucleotide insertion or deletion. The initiator methionine is created by RNA editing."},
|
1198
1195
|
sp.cc("RNA EDITING"))
|
@@ -1201,14 +1198,14 @@ CC deletion. The initiator methionine is created by RNA editing."
|
|
1201
1198
|
|
1202
1199
|
|
1203
1200
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel1_12
|
1204
|
-
class
|
1201
|
+
class TestUniProtKB_UniProtRel1_12 < Test::Unit::TestCase
|
1205
1202
|
# Digital Object Identifier (DOI) in the RX line
|
1206
1203
|
def test_DOI_in_RX_line
|
1207
1204
|
# RX [MEDLINE=Medline_identifier; ][PubMed=Pubmed_identifier; ][DOI=Digital_object_identifier;]
|
1208
1205
|
data = "
|
1209
1206
|
RN [1]
|
1210
1207
|
RX MEDLINE=97291283; PubMed=9145897; DOI=10.1007/s00248-002-2038-4;"
|
1211
|
-
sp =
|
1208
|
+
sp = UniProtKB.new(data)
|
1212
1209
|
assert_equal({'MEDLINE' => '97291283',
|
1213
1210
|
'PubMed' => '9145897',
|
1214
1211
|
'DOI' => '10.1007/s00248-002-2038-4'},
|
@@ -1221,7 +1218,7 @@ RX MEDLINE=97291283; PubMed=9145897; DOI=10.1007/s00248-002-2038-4;"
|
|
1221
1218
|
RN [1]
|
1222
1219
|
RG The C. elegans sequencing consortium;
|
1223
1220
|
RG The Brazilian network for HIV isolation and characterization;"
|
1224
|
-
sp =
|
1221
|
+
sp = UniProtKB.new(data)
|
1225
1222
|
assert_equal(['The C. elegans sequencing consortium',
|
1226
1223
|
'The Brazilian network for HIV isolation and characterization'],
|
1227
1224
|
sp.ref.first['RG'])
|
@@ -1230,14 +1227,14 @@ RG The Brazilian network for HIV isolation and characterization;"
|
|
1230
1227
|
|
1231
1228
|
|
1232
1229
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel2_0
|
1233
|
-
class
|
1230
|
+
class TestUniProtKB_UniProtRel2_0 < Test::Unit::TestCase
|
1234
1231
|
# New format for the GN (Gene Name) line
|
1235
1232
|
# GN Name=<name>; Synonyms=<name1>[, <name2>...]; OrderedLocusNames=<name1>[, <name2>...];
|
1236
1233
|
# xsGN ORFNames=<name1>[, <name2>...];
|
1237
1234
|
def test_GN_line
|
1238
1235
|
data = "GN Name=atpG; Synonyms=uncG, papC;
|
1239
1236
|
GN OrderedLocusNames=b3733, c4659, z5231, ECs4675, SF3813, S3955;"
|
1240
|
-
sp =
|
1237
|
+
sp = UniProtKB.new(data)
|
1241
1238
|
assert_equal([{:orfs => [],
|
1242
1239
|
:loci => ["b3733", "c4659", "z5231", "ECs4675", "SF3813", "S3955"],
|
1243
1240
|
:name => "atpG",
|
@@ -1245,7 +1242,7 @@ GN OrderedLocusNames=b3733, c4659, z5231, ECs4675, SF3813, S3955;"
|
|
1245
1242
|
sp.gn)
|
1246
1243
|
|
1247
1244
|
data = "GN ORFNames=SPAC1834.11c;"
|
1248
|
-
sp =
|
1245
|
+
sp = UniProtKB.new(data)
|
1249
1246
|
assert_equal([{:orfs => ['SPAC1834.11c'],
|
1250
1247
|
:loci => [],
|
1251
1248
|
:name => '',
|
@@ -1256,7 +1253,7 @@ GN OrderedLocusNames=b3733, c4659, z5231, ECs4675, SF3813, S3955;"
|
|
1256
1253
|
GN ORFNames=MTCY164.27;
|
1257
1254
|
GN and
|
1258
1255
|
GN Name=cysA2; OrderedLocusNames=Rv0815c, MT0837; ORFNames=MTV043.07c;"
|
1259
|
-
sp =
|
1256
|
+
sp = UniProtKB.new(data)
|
1260
1257
|
assert_equal([{:orfs => ["MTCY164.27"],
|
1261
1258
|
:loci => ["Rv3117", "MT3199"],
|
1262
1259
|
:name => "cysA1",
|
@@ -1270,12 +1267,12 @@ GN Name=cysA2; OrderedLocusNames=Rv0815c, MT0837; ORFNames=MTV043.07c;"
|
|
1270
1267
|
end
|
1271
1268
|
|
1272
1269
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel2_1
|
1273
|
-
class
|
1270
|
+
class TestUniProtKB_UniProtRel2_1 < Test::Unit::TestCase
|
1274
1271
|
# Format change in the comment line (CC) topic: MASS SPECTROMETRY
|
1275
1272
|
def test_CC_mass_spectrometry
|
1276
1273
|
data = "CC -!- MASS SPECTROMETRY: MW=32875.93; METHOD=MALDI;
|
1277
1274
|
CC RANGE=1-284 (Isoform 3); NOTE=Ref.6."
|
1278
|
-
sp =
|
1275
|
+
sp = UniProtKB.new(data)
|
1279
1276
|
assert_equal([{"RANGE"=>"1-284",
|
1280
1277
|
"METHOD"=>"MALDI",
|
1281
1278
|
"MW_ERR"=>nil,
|
@@ -1287,11 +1284,11 @@ CC RANGE=1-284 (Isoform 3); NOTE=Ref.6."
|
|
1287
1284
|
|
1288
1285
|
|
1289
1286
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel2_3
|
1290
|
-
class
|
1287
|
+
class TestUniProtKB_UniProtRel2_3 < Test::Unit::TestCase
|
1291
1288
|
# New RL line structure for electronic publications
|
1292
1289
|
def test_RL_line
|
1293
1290
|
data = "RL Submitted (XXX-YYYY) to the HIV data bank."
|
1294
|
-
sp =
|
1291
|
+
sp = UniProtKB.new(data)
|
1295
1292
|
assert_equal('',
|
1296
1293
|
sp.ref.first['RL'])
|
1297
1294
|
end
|
@@ -1299,7 +1296,7 @@ CC RANGE=1-284 (Isoform 3); NOTE=Ref.6."
|
|
1299
1296
|
# Format change in the cross-reference to PDB
|
1300
1297
|
def test_DR_PDB
|
1301
1298
|
data = "DR PDB; 1NB3; X-ray; A/B/C/D=116-335, P/R/S/T=98-105."
|
1302
|
-
sp =
|
1299
|
+
sp = UniProtKB.new(data)
|
1303
1300
|
assert_equal([["1NB3", "X-ray", "A/B/C/D=116-335, P/R/S/T=98-105"]],
|
1304
1301
|
sp.dr['PDB'])
|
1305
1302
|
end
|
@@ -1307,7 +1304,7 @@ CC RANGE=1-284 (Isoform 3); NOTE=Ref.6."
|
|
1307
1304
|
|
1308
1305
|
|
1309
1306
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel3_4
|
1310
|
-
class
|
1307
|
+
class TestUniProtKB_UniProtRel3_4 < Test::Unit::TestCase
|
1311
1308
|
# Changes in the RP (Reference Position) line
|
1312
1309
|
def test_RP_line
|
1313
1310
|
data = "
|
@@ -1315,7 +1312,7 @@ RN [1]
|
|
1315
1312
|
RP NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 1), PROTEIN SEQUENCE
|
1316
1313
|
RP OF 108-131; 220-231 AND 349-393, CHARACTERIZATION, AND MUTAGENESIS OF
|
1317
1314
|
RP ARG-336."
|
1318
|
-
sp =
|
1315
|
+
sp = UniProtKB.new(data)
|
1319
1316
|
assert_equal(['NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 1)',
|
1320
1317
|
'PROTEIN SEQUENCE OF 108-131; 220-231 AND 349-393',
|
1321
1318
|
'CHARACTERIZATION',
|
@@ -1325,7 +1322,7 @@ RP ARG-336."
|
|
1325
1322
|
data = "
|
1326
1323
|
RN [1]
|
1327
1324
|
RP NUCLEOTIDE SEQUENCE [GENOMIC DNA / MRNA]."
|
1328
|
-
sp =
|
1325
|
+
sp = UniProtKB.new(data)
|
1329
1326
|
assert_equal(['NUCLEOTIDE SEQUENCE [GENOMIC DNA / MRNA]'],
|
1330
1327
|
sp.ref.first['RP'])
|
1331
1328
|
end
|
@@ -1339,7 +1336,7 @@ CC Abs(max)=395 nm;
|
|
1339
1336
|
CC Note=Exhibits a smaller absorbance peak at 470 nm. The
|
1340
1337
|
CC fluorescence emission spectrum peaks at 509 nm with a shoulder
|
1341
1338
|
CC at 540 nm;"
|
1342
|
-
sp =
|
1339
|
+
sp = UniProtKB.new(data)
|
1343
1340
|
assert_equal({"Redox potential" => "",
|
1344
1341
|
"Temperature dependence" => "",
|
1345
1342
|
"Kinetic parameters" => {},
|
@@ -1357,7 +1354,7 @@ CC Vmax=0.11 mmol/min/mg enzyme with maltose as substrate;
|
|
1357
1354
|
CC Note=Acetylates glucose, maltose, mannose, galactose, and
|
1358
1355
|
CC fructose with a decreasing relative rate of 1, 0.55, 0.20, 0.07,
|
1359
1356
|
CC 0.04;"
|
1360
|
-
sp =
|
1357
|
+
sp = UniProtKB.new(data)
|
1361
1358
|
assert_equal({"Redox potential" => "",
|
1362
1359
|
"Temperature dependence" => "",
|
1363
1360
|
"Kinetic parameters" => {"KM" => "62 mM for glucose; KM=90 mM for maltose",
|
@@ -1375,7 +1372,7 @@ CC Optimum pH is 7.5. Active from pH 5.0 to 9.0;
|
|
1375
1372
|
CC Temperature dependence:
|
1376
1373
|
CC Optimum temperature is 45 degrees Celsius. Active from 30 to 60
|
1377
1374
|
CC degrees Celsius;"
|
1378
|
-
sp =
|
1375
|
+
sp = UniProtKB.new(data)
|
1379
1376
|
assert_equal({"Redox potential" => "",
|
1380
1377
|
"Temperature dependence" => "Optimum temperature is 45 degrees Celsius. Active from 30 to 60 degrees Celsius",
|
1381
1378
|
"Kinetic parameters" => {},
|
@@ -1387,7 +1384,7 @@ CC degrees Celsius;"
|
|
1387
1384
|
|
1388
1385
|
|
1389
1386
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel3_5
|
1390
|
-
class
|
1387
|
+
class TestUniProtKB_UniProtRel3_5 < Test::Unit::TestCase
|
1391
1388
|
# Extension of the Swiss-Prot entry name format
|
1392
1389
|
def test_entry_name_format
|
1393
1390
|
# TBD
|
@@ -1395,7 +1392,7 @@ CC degrees Celsius;"
|
|
1395
1392
|
end
|
1396
1393
|
|
1397
1394
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel4_0
|
1398
|
-
class
|
1395
|
+
class TestUniProtKB_UniProtRel4_0 < Test::Unit::TestCase
|
1399
1396
|
# Extension of the TrEMBL entry name format
|
1400
1397
|
|
1401
1398
|
# Change of the entry name in many Swiss-Prot entries
|
@@ -1404,7 +1401,7 @@ CC degrees Celsius;"
|
|
1404
1401
|
def test_CC_interaction
|
1405
1402
|
data = "CC -!- INTERACTION:
|
1406
1403
|
CC P11450:fcp3c; NbExp=1; IntAct=EBI-126914, EBI-159556;"
|
1407
|
-
sp =
|
1404
|
+
sp = UniProtKB.new(data)
|
1408
1405
|
assert_equal([{"SP_Ac" => "P11450",
|
1409
1406
|
"identifier" => "fcp3c",
|
1410
1407
|
"optional_identifier" => nil,
|
@@ -1416,7 +1413,7 @@ CC P11450:fcp3c; NbExp=1; IntAct=EBI-126914, EBI-159556;"
|
|
1416
1413
|
def test_CC_interaction_isoform
|
1417
1414
|
data = "CC -!- INTERACTION:
|
1418
1415
|
CC Q9W1K5-1:cg11299; NbExp=1; IntAct=EBI-133844, EBI-212772;"
|
1419
|
-
sp =
|
1416
|
+
sp = UniProtKB.new(data)
|
1420
1417
|
assert_equal([{"SP_Ac" => 'Q9W1K5-1',
|
1421
1418
|
"identifier" => 'cg11299',
|
1422
1419
|
"optional_identifier" => nil,
|
@@ -1428,7 +1425,7 @@ CC Q9W1K5-1:cg11299; NbExp=1; IntAct=EBI-133844, EBI-212772;"
|
|
1428
1425
|
def test_CC_interaction_no_gene_name
|
1429
1426
|
data = "CC -!- INTERACTION:
|
1430
1427
|
CC Q8NI08:-; NbExp=1; IntAct=EBI-80809, EBI-80799;"
|
1431
|
-
sp =
|
1428
|
+
sp = UniProtKB.new(data)
|
1432
1429
|
assert_equal([{"SP_Ac" => 'Q8NI08',
|
1433
1430
|
"identifier" => '-',
|
1434
1431
|
"optional_identifier" => nil,
|
@@ -1441,7 +1438,7 @@ CC Q8NI08:-; NbExp=1; IntAct=EBI-80809, EBI-80799;"
|
|
1441
1438
|
data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
|
1442
1439
|
CC -!- INTERACTION:
|
1443
1440
|
CC Self; NbExp=1; IntAct=EBI-123485, EBI-123485;"
|
1444
|
-
sp =
|
1441
|
+
sp = UniProtKB.new(data)
|
1445
1442
|
assert_equal([{"SP_Ac" => 'TEST_ENTRY',
|
1446
1443
|
"identifier" => 'TEST_ENTRY',
|
1447
1444
|
"optional_identifier" => nil,
|
@@ -1453,7 +1450,7 @@ CC Self; NbExp=1; IntAct=EBI-123485, EBI-123485;"
|
|
1453
1450
|
def test_CC_interaction_The_source_organisms_of_the_interacting_proteins_are_different
|
1454
1451
|
data = "CC -!- INTERACTION:
|
1455
1452
|
CC Q8C1S0:2410018m14rik (xeno); NbExp=1; IntAct=EBI-394562, EBI-398761;"
|
1456
|
-
sp =
|
1453
|
+
sp = UniProtKB.new(data)
|
1457
1454
|
assert_equal([{"SP_Ac" => 'Q8C1S0',
|
1458
1455
|
"identifier" => '2410018m14rik',
|
1459
1456
|
"optional_identifier" => '(xeno)',
|
@@ -1466,7 +1463,7 @@ CC Q8C1S0:2410018m14rik (xeno); NbExp=1; IntAct=EBI-394562, EBI-398761;"
|
|
1466
1463
|
data = "CC -!- INTERACTION:
|
1467
1464
|
CC P51617:irak1; NbExp=1; IntAct=EBI-448466, EBI-358664;
|
1468
1465
|
CC P51617:irak1; NbExp=1; IntAct=EBI-448472, EBI-358664;"
|
1469
|
-
sp =
|
1466
|
+
sp = UniProtKB.new(data)
|
1470
1467
|
assert_equal([{"SP_Ac" => "P51617",
|
1471
1468
|
"identifier" => "irak1",
|
1472
1469
|
"optional_identifier" => nil,
|
@@ -1483,7 +1480,7 @@ CC P51617:irak1; NbExp=1; IntAct=EBI-448472, EBI-358664;"
|
|
1483
1480
|
|
1484
1481
|
|
1485
1482
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel5_0
|
1486
|
-
class
|
1483
|
+
class TestUniProtKB_UniProtRel5_0 < Test::Unit::TestCase
|
1487
1484
|
# Format change in the DR line
|
1488
1485
|
# DR DATABASE_IDENTIFIER; PRIMARY_IDENTIFIER; SECONDARY_IDENTIFIER[; TERTIARY_IDENTIFIER][; QUATERNARY_IDENTIFIER].
|
1489
1486
|
def test_DR_line
|
@@ -1491,7 +1488,7 @@ CC P51617:irak1; NbExp=1; IntAct=EBI-448472, EBI-358664;"
|
|
1491
1488
|
DR EMBL; M68939; AAA26107.1; -; Genomic_DNA.
|
1492
1489
|
DR EMBL; U56386; AAB72034.1; -; mRNA."
|
1493
1490
|
|
1494
|
-
sp =
|
1491
|
+
sp = UniProtKB.new(data)
|
1495
1492
|
assert_equal([["M68939", "AAA26107.1", "-", "Genomic_DNA"],
|
1496
1493
|
["U56386", "AAB72034.1", "-", "mRNA"]],
|
1497
1494
|
sp.dr['EMBL'])
|
@@ -1512,12 +1509,12 @@ DR EMBL; U56386; AAB72034.1; -; mRNA."
|
|
1512
1509
|
|
1513
1510
|
|
1514
1511
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel5_4
|
1515
|
-
class
|
1512
|
+
class TestUniProtKB_UniProtRel5_4 < Test::Unit::TestCase
|
1516
1513
|
# Multiple comment line (CC) topics COFACTOR
|
1517
1514
|
def test_multiple_cofactors
|
1518
1515
|
data = "CC -!- COFACTOR: Binds 1 2Fe-2S cluster per subunit (By similarity).
|
1519
1516
|
CC -!- COFACTOR: Binds 1 Fe(2+) ion per subunit (By similarity)."
|
1520
|
-
sp =
|
1517
|
+
sp = UniProtKB.new(data)
|
1521
1518
|
assert_equal(["Binds 1 2Fe-2S cluster per subunit (By similarity).",
|
1522
1519
|
"Binds 1 Fe(2+) ion per subunit (By similarity)."],
|
1523
1520
|
sp.cc['COFACTOR'])
|
@@ -1529,38 +1526,38 @@ CC -!- COFACTOR: Binds 1 Fe(2+) ion per subunit (By similarity)."
|
|
1529
1526
|
|
1530
1527
|
|
1531
1528
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel6_0
|
1532
|
-
class
|
1529
|
+
class TestUniProtKB_UniProtRel6_0 < Test::Unit::TestCase
|
1533
1530
|
# Changes in the OG (OrGanelle) line
|
1534
1531
|
def test_OG_line
|
1535
1532
|
data = "OG Plastid."
|
1536
|
-
sp =
|
1533
|
+
sp = UniProtKB.new(data)
|
1537
1534
|
assert_equal(['Plastid'], sp.og)
|
1538
1535
|
|
1539
1536
|
data = "OG Plastid; Apicoplast."
|
1540
|
-
sp =
|
1537
|
+
sp = UniProtKB.new(data)
|
1541
1538
|
assert_equal(['Plastid', 'Apicoplast'], sp.og)
|
1542
1539
|
|
1543
1540
|
data = "OG Plastid; Chloroplast."
|
1544
|
-
sp =
|
1541
|
+
sp = UniProtKB.new(data)
|
1545
1542
|
assert_equal(['Plastid', 'Chloroplast'], sp.og)
|
1546
1543
|
|
1547
1544
|
data = "OG Plastid; Cyanelle."
|
1548
|
-
sp =
|
1545
|
+
sp = UniProtKB.new(data)
|
1549
1546
|
assert_equal(['Plastid', 'Cyanelle'], sp.og)
|
1550
1547
|
|
1551
1548
|
data = "OG Plastid; Non-photosynthetic plastid."
|
1552
|
-
sp =
|
1549
|
+
sp = UniProtKB.new(data)
|
1553
1550
|
assert_equal(['Plastid', 'Non-photosynthetic plastid'], sp.og)
|
1554
1551
|
end
|
1555
1552
|
end
|
1556
1553
|
|
1557
1554
|
|
1558
1555
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel6_1
|
1559
|
-
class
|
1556
|
+
class TestUniProtKB_UniProtRel6_1 < Test::Unit::TestCase
|
1560
1557
|
# Annotation changes concerning the feature key METAL
|
1561
1558
|
def test_FT_metal
|
1562
1559
|
old_data = "FT METAL 61 61 Copper and zinc."
|
1563
|
-
sp =
|
1560
|
+
sp = UniProtKB.new(old_data)
|
1564
1561
|
assert_equal([{'From' => 61,
|
1565
1562
|
'To' => 61,
|
1566
1563
|
'Description' => 'Copper and zinc.',
|
@@ -1571,7 +1568,7 @@ CC -!- COFACTOR: Binds 1 Fe(2+) ion per subunit (By similarity)."
|
|
1571
1568
|
|
1572
1569
|
new_data = "FT METAL 61 61 Copper.
|
1573
1570
|
FT METAL 61 61 Zinc."
|
1574
|
-
sp =
|
1571
|
+
sp = UniProtKB.new(new_data)
|
1575
1572
|
assert_equal([{"From" => 61,
|
1576
1573
|
"To" => 61,
|
1577
1574
|
"Description" => "Copper.",
|
@@ -1590,7 +1587,7 @@ FT METAL 61 61 Zinc."
|
|
1590
1587
|
|
1591
1588
|
|
1592
1589
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel6_5
|
1593
|
-
class
|
1590
|
+
class TestUniProtKB_UniProtRel6_5 < Test::Unit::TestCase
|
1594
1591
|
# Changes in the keywlist.txt file
|
1595
1592
|
# * Modification of the HI line format:
|
1596
1593
|
def test_HI_line
|
@@ -1599,7 +1596,7 @@ FT METAL 61 61 Zinc."
|
|
1599
1596
|
data = "HI Molecular function: Ionic channel; Calcium channel.
|
1600
1597
|
HI Biological process: Transport; Ion transport; Calcium transport; Calcium channel.
|
1601
1598
|
HI Ligand: Calcium; Calcium channel."
|
1602
|
-
sp =
|
1599
|
+
sp = UniProtKB.new(data)
|
1603
1600
|
assert_equal([{'Category' => 'Molecular function',
|
1604
1601
|
'Keywords' => ['Ionic channel'],
|
1605
1602
|
'Keyword' => 'Calcium channel'},
|
@@ -1615,13 +1612,13 @@ HI Ligand: Calcium; Calcium channel."
|
|
1615
1612
|
|
1616
1613
|
|
1617
1614
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel7.0
|
1618
|
-
class
|
1615
|
+
class TestUniProtKB_UniProtRel7_0 < Test::Unit::TestCase
|
1619
1616
|
# Changes concerning dates and versions numbers (DT lines)
|
1620
1617
|
def test_DT_line
|
1621
1618
|
up_sp_data = "DT 01-JAN-1998, integrated into UniProtKB/Swiss-Prot.
|
1622
1619
|
DT 15-OCT-2001, sequence version 3.
|
1623
1620
|
DT 01-APR-2004, entry version 14."
|
1624
|
-
sp =
|
1621
|
+
sp = UniProtKB.new(up_sp_data)
|
1625
1622
|
assert_equal({"sequence" => "15-OCT-2001, sequence version 3.",
|
1626
1623
|
"annotation" => "01-APR-2004, entry version 14.",
|
1627
1624
|
"created" => "01-JAN-1998, integrated into UniProtKB/Swiss-Prot."},
|
@@ -1630,7 +1627,7 @@ DT 01-APR-2004, entry version 14."
|
|
1630
1627
|
up_tr_data = "DT 01-FEB-1999, integrated into UniProtKB/TrEMBL.
|
1631
1628
|
DT 15-OCT-2000, sequence version 2.
|
1632
1629
|
DT 15-DEC-2004, entry version 5."
|
1633
|
-
sp =
|
1630
|
+
sp = UniProtKB.new(up_tr_data)
|
1634
1631
|
assert_equal({"sequence" => "15-OCT-2000, sequence version 2.",
|
1635
1632
|
"annotation" => "15-DEC-2004, entry version 5.",
|
1636
1633
|
"created" => "01-FEB-1999, integrated into UniProtKB/TrEMBL."},
|
@@ -1645,33 +1642,32 @@ DT 15-DEC-2004, entry version 5."
|
|
1645
1642
|
CC Copyrighted by the UniProt Consortium, see http://www.uniprot.org/terms
|
1646
1643
|
CC Distributed under the Creative Commons Attribution-NoDerivs License
|
1647
1644
|
CC -----------------------------------------------------------------------"
|
1648
|
-
sp =
|
1645
|
+
sp = UniProtKB.new(data)
|
1649
1646
|
assert_equal({}, sp.cc)
|
1650
1647
|
end
|
1651
1648
|
end
|
1652
1649
|
|
1653
1650
|
|
1654
1651
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel7.6
|
1655
|
-
class
|
1652
|
+
class TestUniProtKB_UniProtRel7_6 < Test::Unit::TestCase
|
1656
1653
|
# Sequences with over 10000 amino acids in UniProtKB/Swiss-Prot
|
1657
1654
|
def test_10000aa
|
1658
|
-
entry_id = 'Q09165'
|
1659
1655
|
data = ["SQ SEQUENCE 393 AA; 43653 MW; AD5C149FD8106131 CRC64;\n",
|
1660
1656
|
" MEEPQSDPSV EPPLSQETFS DLWKLLPENN VLSPLPSQAM DDLMLSPDDI EQWFTEDPGP\n" * 200,
|
1661
1657
|
"//\n"].join
|
1662
|
-
sp =
|
1658
|
+
sp = UniProtKB.new(data)
|
1663
1659
|
assert_equal(12000, sp.seq.size)
|
1664
1660
|
end
|
1665
1661
|
end
|
1666
1662
|
|
1667
1663
|
|
1668
1664
|
# Changes in http://br.expasy.org/sprot/relnotes/sp_news.html#rel8.0
|
1669
|
-
class
|
1665
|
+
class TestUniProtKB_UniProtRel8_0 < Test::Unit::TestCase
|
1670
1666
|
# Replacement of the feature key VARSPLIC by VAR_SEQ
|
1671
1667
|
def test_FT_VER_SEQ
|
1672
1668
|
data = "FT VAR_SEQ 1 34 Missing (in isoform 3).
|
1673
1669
|
FT /FTId=VSP_004099."
|
1674
|
-
sp =
|
1670
|
+
sp = UniProtKB.new(data)
|
1675
1671
|
res = [{'From' => 1,
|
1676
1672
|
'To' => 34,
|
1677
1673
|
'Description' => 'Missing (in isoform 3).',
|
@@ -1709,7 +1705,7 @@ CC Name=2; Synonyms=p19ARF;
|
|
1709
1705
|
CC IsoId=O77618-1; Sequence=External;
|
1710
1706
|
FT VAR_SEQ 1 34 Missing (in isoform 3).
|
1711
1707
|
FT /FTId=VSP_004099."
|
1712
|
-
sp =
|
1708
|
+
sp = UniProtKB.new(data)
|
1713
1709
|
assert_equal({"Comment" => "Isoform 1 and isoform 2 arise due to the use of two alternative first exons joined to a common exon 2 at the same acceptor site but in different reading frames, resulting in two completely different isoforms",
|
1714
1710
|
"Named isoforms" => "3",
|
1715
1711
|
"Variants" => [{"IsoId" => ["O77617-1"],
|
@@ -1779,14 +1775,14 @@ OH NCBI_TaxID=3603; Vitis.'
|
|
1779
1775
|
{'NCBI_TaxID' => '4113', 'HostName' => 'Solanum tuberosum (Potato)'},
|
1780
1776
|
{'NCBI_TaxID' => '13305', 'HostName' => 'Tulipa'},
|
1781
1777
|
{'NCBI_TaxID' => '3603', 'HostName' => 'Vitis'}]
|
1782
|
-
sp =
|
1778
|
+
sp = UniProtKB.new(data)
|
1783
1779
|
assert_equal(res, sp.oh)
|
1784
1780
|
end
|
1785
1781
|
|
1786
1782
|
def test_OH_line_exception
|
1787
1783
|
data = "ID TEST_ENTRY STANDARD; PRT; 393 AA.
|
1788
1784
|
OH NCBI_TaxID=23216x: Rubus (bramble)."
|
1789
|
-
sp =
|
1785
|
+
sp = UniProtKB.new(data)
|
1790
1786
|
assert_raise(ArgumentError) { sp.oh }
|
1791
1787
|
end
|
1792
1788
|
|
@@ -1795,13 +1791,13 @@ OH NCBI_TaxID=23216x: Rubus (bramble)."
|
|
1795
1791
|
class TestOSLine < Test::Unit::TestCase
|
1796
1792
|
def test_uncapitalized_letter_Q32725_9POAL
|
1797
1793
|
data = "OS unknown cyperaceous sp.\n"
|
1798
|
-
sp =
|
1794
|
+
sp = UniProtKB.new(data)
|
1799
1795
|
assert_equal('unknown cyperaceous sp.', sp.os.first['os'])
|
1800
1796
|
end
|
1801
1797
|
|
1802
1798
|
def test_period_trancation_O63147
|
1803
1799
|
data = "OS Hippotis sp. Clark and Watts 825.\n"
|
1804
|
-
sp =
|
1800
|
+
sp = UniProtKB.new(data)
|
1805
1801
|
assert_equal('Hippotis sp. Clark and Watts 825.', sp.os.first['os'])
|
1806
1802
|
end
|
1807
1803
|
end
|