bio 1.4.1 → 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/ChangeLog +954 -0
  2. data/KNOWN_ISSUES.rdoc +40 -5
  3. data/README.rdoc +36 -35
  4. data/RELEASE_NOTES.rdoc +87 -59
  5. data/bioruby.gemspec +24 -2
  6. data/doc/RELEASE_NOTES-1.4.1.rdoc +104 -0
  7. data/doc/Tutorial.rd +162 -200
  8. data/doc/Tutorial.rd.html +149 -146
  9. data/lib/bio.rb +1 -0
  10. data/lib/bio/appl/blast.rb +1 -1
  11. data/lib/bio/appl/blast/ddbj.rb +26 -34
  12. data/lib/bio/appl/blast/genomenet.rb +21 -11
  13. data/lib/bio/db/embl/sptr.rb +193 -21
  14. data/lib/bio/db/fasta.rb +1 -1
  15. data/lib/bio/db/fastq.rb +14 -0
  16. data/lib/bio/db/fastq/format_fastq.rb +2 -2
  17. data/lib/bio/db/genbank/ddbj.rb +1 -2
  18. data/lib/bio/db/genbank/format_genbank.rb +1 -1
  19. data/lib/bio/db/medline.rb +1 -0
  20. data/lib/bio/db/newick.rb +3 -1
  21. data/lib/bio/db/pdb/pdb.rb +9 -9
  22. data/lib/bio/db/pdb/residue.rb +2 -2
  23. data/lib/bio/io/ddbjrest.rb +344 -0
  24. data/lib/bio/io/ncbirest.rb +121 -1
  25. data/lib/bio/location.rb +2 -2
  26. data/lib/bio/reference.rb +3 -4
  27. data/lib/bio/shell/plugin/entry.rb +7 -3
  28. data/lib/bio/shell/plugin/ncbirest.rb +5 -1
  29. data/lib/bio/util/restriction_enzyme.rb +3 -0
  30. data/lib/bio/util/restriction_enzyme/dense_int_array.rb +195 -0
  31. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +7 -7
  32. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +57 -18
  33. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +2 -2
  34. data/lib/bio/util/restriction_enzyme/sorted_num_array.rb +219 -0
  35. data/lib/bio/version.rb +1 -1
  36. data/sample/test_restriction_enzyme_long.rb +4403 -0
  37. data/test/data/fasta/EFTU_BACSU.fasta +8 -0
  38. data/test/data/genbank/CAA35997.gp +48 -0
  39. data/test/data/genbank/SCU49845.gb +167 -0
  40. data/test/data/litdb/1717226.litdb +13 -0
  41. data/test/data/pir/CRAB_ANAPL.pir +6 -0
  42. data/test/functional/bio/appl/blast/test_remote.rb +93 -0
  43. data/test/functional/bio/appl/test_blast.rb +61 -0
  44. data/test/functional/bio/io/test_ddbjrest.rb +47 -0
  45. data/test/functional/bio/test_command.rb +3 -3
  46. data/test/unit/bio/db/embl/test_sptr.rb +6 -6
  47. data/test/unit/bio/db/embl/test_uniprot_new_part.rb +208 -0
  48. data/test/unit/bio/db/genbank/test_common.rb +274 -0
  49. data/test/unit/bio/db/genbank/test_genbank.rb +401 -0
  50. data/test/unit/bio/db/genbank/test_genpept.rb +81 -0
  51. data/test/unit/bio/db/pdb/test_pdb.rb +3287 -11
  52. data/test/unit/bio/db/test_fasta.rb +34 -12
  53. data/test/unit/bio/db/test_fastq.rb +26 -0
  54. data/test/unit/bio/db/test_litdb.rb +95 -0
  55. data/test/unit/bio/db/test_medline.rb +1 -0
  56. data/test/unit/bio/db/test_nbrf.rb +82 -0
  57. data/test/unit/bio/db/test_newick.rb +22 -4
  58. data/test/unit/bio/test_reference.rb +35 -0
  59. data/test/unit/bio/util/restriction_enzyme/test_dense_int_array.rb +201 -0
  60. data/test/unit/bio/util/restriction_enzyme/test_sorted_num_array.rb +281 -0
  61. metadata +44 -38
@@ -0,0 +1,401 @@
1
+ #
2
+ # test/unit/bio/db/genbank/test_genbank.rb - Unit test for Bio::GenBank
3
+ #
4
+ # Copyright:: Copyright (C) 2010 Kazuhiro Hayashi <k.hayashi.info@gmail.com>
5
+ # License:: The Ruby License
6
+ #
7
+
8
+ # loading helper routine for testing bioruby
9
+ require 'pathname'
10
+ load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
11
+ 'bioruby_test_helper.rb')).cleanpath.to_s
12
+
13
+ # libraries needed for the tests
14
+ require 'test/unit'
15
+ require 'bio/sequence'
16
+ require 'bio/reference'
17
+ require 'bio/feature'
18
+ require 'bio/compat/features'
19
+ require 'bio/compat/references'
20
+ require 'bio/db/genbank/genbank'
21
+ require 'bio/db/genbank/genbank_to_biosequence'
22
+
23
+
24
+ module Bio
25
+ class TestBioGenBank < Test::Unit::TestCase
26
+
27
+ def setup
28
+ filename = File.join(BioRubyTestDataPath, 'genbank', 'SCU49845.gb')
29
+ @obj = Bio::GenBank.new(File.read(filename))
30
+ end
31
+
32
+ def test_locus_class
33
+ expected = Bio::GenBank::Locus
34
+ assert_equal(expected, @obj.locus.class)
35
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"#another type of LOCUS line.(release 126)
36
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
37
+ assert_equal(Bio::GenBank::Locus, obj_rel126.locus.class)
38
+ end
39
+ def test_locus_circular
40
+ expected = "linear"
41
+ assert_equal(expected, @obj.locus.circular)
42
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"
43
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
44
+ assert_equal("circular", obj_rel126.locus.circular)
45
+ end
46
+ def test_locus_date
47
+ expected = "23-MAR-2010"
48
+ assert_equal(expected, @obj.locus.date)
49
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"
50
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
51
+ assert_equal("05-FEB-1999", obj_rel126.locus.date)
52
+ end
53
+ def test_locus_division
54
+ expected = "PLN"
55
+ assert_equal(expected, @obj.locus.division)
56
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"
57
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
58
+ assert_equal("VRL", obj_rel126.locus.division)
59
+ end
60
+ def test_locus_entry_id
61
+ expected = "SCU49845"
62
+ assert_equal(expected, @obj.locus.entry_id)
63
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"
64
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
65
+ assert_equal("AB000383", obj_rel126.locus.entry_id)
66
+ end
67
+ def test_locus_length
68
+ expected = 5028
69
+ assert_equal(expected, @obj.locus.length)
70
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"
71
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
72
+ assert_equal(5423, obj_rel126.locus.length)
73
+ end
74
+ def test_locus_natype
75
+ expected = "DNA"
76
+ assert_equal(expected, @obj.locus.natype)
77
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"
78
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
79
+ assert_equal("DNA", obj_rel126.locus.natype)
80
+ end
81
+ def test_locus_strand
82
+ expected = ""
83
+ assert_equal(expected, @obj.locus.strand)
84
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"
85
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
86
+ assert_equal("", obj_rel126.locus.strand)
87
+ end
88
+ def test_entry_id
89
+ assert_equal("SCU49845", @obj.entry_id)
90
+ end
91
+
92
+ def test_length
93
+ assert_equal(5028, @obj.length)
94
+ end
95
+
96
+ def test_circular
97
+ assert_equal("linear", @obj.circular)
98
+ end
99
+
100
+ def test_division
101
+ assert_equal("PLN", @obj.division)
102
+ end
103
+
104
+ def test_date
105
+ assert_equal("23-MAR-2010", @obj.date)
106
+ end
107
+
108
+ def test_strand
109
+ assert_equal("", @obj.strand)
110
+ end
111
+
112
+ def test_natype
113
+ assert_equal("DNA", @obj.natype)
114
+ end
115
+
116
+ def test_each_cds_feature
117
+ @obj.each_cds do |feature|
118
+ assert_equal("CDS", feature.feature)
119
+ end
120
+ end
121
+ =begin
122
+ def test_each_cds_qualifiers
123
+ @obj.each_cds do |feature|
124
+ feature.qualifiers do |qualifier|
125
+ assert_equal(Bio::Feature::Qualifier, qualifier.class)
126
+ end
127
+ end
128
+ end
129
+ =end
130
+ def test_each_cds_qualifiers
131
+ expected = [[["codon_start", 3],
132
+ ["product", "TCP1-beta"],
133
+ ["protein_id", "AAA98665.1"],
134
+ ["db_xref", "GI:1293614"],
135
+ ["translation", "SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM"]],
136
+ [["gene", "AXL2"],
137
+ ["note", "plasma membrane glycoprotein"],
138
+ ["codon_start", 1],
139
+ ["product", "Axl2p"],
140
+ ["protein_id", "AAA98666.1"],
141
+ ["db_xref", "GI:1293615"],
142
+ ["translation", "MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML"]],
143
+ [["gene", "REV7"],
144
+ ["codon_start", 1],
145
+ ["product", "Rev7p"],
146
+ ["protein_id", "AAA98667.1"],
147
+ ["db_xref", "GI:1293616"],
148
+ ["translation", "MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF"]]]
149
+ actual = []
150
+ @obj.each_cds do |feature|
151
+ tmp = []
152
+ feature.qualifiers.each{|qualifier|
153
+ tmp << [qualifier.qualifier, qualifier.value]
154
+ }
155
+ actual << tmp
156
+ end
157
+ assert_equal(expected, actual)
158
+ end
159
+ def test_each_gene
160
+ expected_position = ["<687..>3158", "complement(<3300..>4037)"]
161
+ expected_gene = [["gene","AXL2"], ["gene","REV7"]]
162
+ actual_position = []
163
+ actual_gene = []
164
+ @obj.each_gene do |gene|
165
+ assert_equal("gene", gene.feature)
166
+ actual_position << gene.position
167
+ gene.qualifiers.each do |qualifier|
168
+ actual_gene << [qualifier.qualifier, qualifier.value]
169
+ end
170
+ end
171
+ assert_equal(expected_position,actual_position)
172
+ assert_equal(expected_gene, actual_gene)
173
+ end
174
+
175
+ def test_basecount
176
+ assert_equal({}, @obj.basecount)
177
+ end
178
+
179
+ def test_seq
180
+ expected = "gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgatc"
181
+ assert_equal(expected, @obj.seq)
182
+ end
183
+
184
+ def test_seq_len
185
+ assert_equal(5028, @obj.seq_len)
186
+ end
187
+
188
+ def test_date_modified
189
+ assert_equal(Date, @obj.date_modified.class)
190
+ assert_equal('2010-03-23', @obj.date_modified.to_s)
191
+ end
192
+
193
+ def test_classification
194
+ expected = ["Eukaryota",
195
+ "Fungi",
196
+ "Dikarya",
197
+ "Ascomycota",
198
+ "Saccharomyceta",
199
+ "Saccharomycotina",
200
+ "Saccharomycetes",
201
+ "Saccharomycetales",
202
+ "Saccharomycetaceae",
203
+ "Saccharomyces"]
204
+ assert_equal(expected, @obj.classification)
205
+ end
206
+
207
+ def test_strandedness
208
+ assert_equal(nil, @obj.strandedness)
209
+ end
210
+
211
+ #test for bio_to_sequence
212
+ def test_to_biosequence
213
+ seq = @obj.to_biosequence
214
+ expected_seq = "gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgatc"
215
+ expected_id_namespace = "GenBank"
216
+ expected_entry_id = "SCU49845"
217
+ expected_primary_accession = "U49845"
218
+ expected_secondary_accessions = []
219
+ expected_other_seqids = ["1293613", "GI", []]
220
+ expected_molecule_type = "DNA"
221
+ expected_division = "PLN"
222
+ expected_topology = "linear"
223
+ expected_strandedness = nil
224
+ expected_keywords = []
225
+ expected_sequence_version = "1"
226
+ expected_date_modified = "2010-03-23"
227
+ expected_definition = "Saccharomyces cerevisiae TCP1-beta gene, partial cds; and Axl2p (AXL2) and Rev7p (REV7) genes, complete cds."
228
+ expected_species = []
229
+ expected_classification= ["Eukaryota", "Fungi", "Dikarya", "Ascomycota", "Saccharomyceta", "Saccharomycotina", "Saccharomycetes", "Saccharomycetales", "Saccharomycetaceae", "Saccharomyces"]
230
+ expected_comments = ""
231
+ expected_references = [{
232
+ :abstract=>"",
233
+ :affiliations=>[],
234
+ :authors=>["Roemer, T.", "Madden, K.", "Chang, J.", "Snyder, M."],
235
+ :comments=>nil,
236
+ :doi=>nil,
237
+ :embl_gb_record_number=>1,
238
+ :issue=>"7",
239
+ :journal=>"Genes Dev.",
240
+ :medline=>"",
241
+ :mesh=>[],
242
+ :pages=>"777-793",
243
+ :pubmed=>"8846915",
244
+ :sequence_position=>"1-5028",
245
+ :title=>
246
+ "Selection of axial growth sites in yeast requires Axl2p, a novel plasma membrane glycoprotein",
247
+ :url=>nil,
248
+ :volume=>"10",
249
+ :year=>"1996"},
250
+
251
+ {:abstract=>"",
252
+ :affiliations=>[],
253
+ :authors=>["Roemer, T."],
254
+ :comments=>nil,
255
+ :doi=>nil,
256
+ :embl_gb_record_number=>2,
257
+ :issue=>"",
258
+ :journal=>
259
+ "Submitted (22-FEB-1996) Biology, Yale University, New Haven, CT 06520, USA",
260
+ :medline=>"",
261
+ :mesh=>[],
262
+ :pages=>"",
263
+ :pubmed=>"",
264
+ :sequence_position=>"1-5028",
265
+ :title=>"Direct Submission",
266
+ :url=>nil,
267
+ :volume=>"",
268
+ :year=>""}]
269
+
270
+ expected_features = [
271
+ {:feature=>"source",
272
+ :position=>"1..5028",
273
+ :qualifiers=>
274
+ [{:qualifier=>"organism",
275
+ :value=>"Saccharomyces cerevisiae"},
276
+ {:qualifier=>"mol_type",
277
+ :value=>"genomic DNA"},
278
+ {:qualifier=>"db_xref",
279
+ :value=>"taxon:4932"},
280
+ {:qualifier=>"chromosome",
281
+ :value=>"IX"}]},
282
+ {:feature=>"mRNA",
283
+ :position=>"<1..>206",
284
+ :qualifiers=>
285
+ [{
286
+ :qualifier=>"product",
287
+ :value=>"TCP1-beta"}]},
288
+ {:feature=>"CDS",
289
+ :position=>"<1..206",
290
+ :qualifiers=> [{:qualifier=>"codon_start", :value=>3}, {:qualifier=>"product", :value=>"TCP1-beta"},
291
+ {:qualifier=>"protein_id",
292
+ :value=>"AAA98665.1"},
293
+ {:qualifier=>"db_xref",
294
+ :value=>"GI:1293614"},
295
+ {:qualifier=>"translation",
296
+ :value=>
297
+ "SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM"}]},
298
+ {:feature=>"gene",
299
+ :position=>"<687..>3158",
300
+ :qualifiers=>
301
+ [{:qualifier=>"gene", :value=>"AXL2"}]},
302
+ {:feature=>"mRNA",
303
+ :position=>"<687..>3158",
304
+ :qualifiers=>
305
+ [{:qualifier=>"gene", :value=>"AXL2"},
306
+ {:qualifier=>"product",
307
+ :value=>"Axl2p"}]},
308
+ {:feature=>"CDS",
309
+ :position=>"687..3158",
310
+ :qualifiers=>
311
+ [{:qualifier=>"gene", :value=>"AXL2"},
312
+ {:qualifier=>"note",
313
+ :value=>"plasma membrane glycoprotein"},
314
+ {:qualifier=>"codon_start", :value=>1}, {:qualifier=>"product",
315
+ :value=>"Axl2p"},
316
+ {:qualifier=>"protein_id",
317
+ :value=>"AAA98666.1"},
318
+ {:qualifier=>"db_xref",
319
+ :value=>"GI:1293615"},
320
+ {:qualifier=>"translation",
321
+ :value=>
322
+ "MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML"}]},
323
+ {:feature=>"gene",
324
+ :position=>"complement(<3300..>4037)",
325
+ :qualifiers=>
326
+ [{:qualifier=>"gene", :value=>"REV7"}]},
327
+ {:feature=>"mRNA",
328
+ :position=>"complement(<3300..>4037)",
329
+ :qualifiers=>
330
+ [{:qualifier=>"gene", :value=>"REV7"},
331
+ {:qualifier=>"product",
332
+ :value=>"Rev7p"}]},
333
+ {:feature=>"CDS",
334
+ :position=>"complement(3300..4037)",
335
+ :qualifiers=>
336
+ [{:qualifier=>"gene", :value=>"REV7"},
337
+ {:qualifier=>"codon_start", :value=>1},
338
+ {:qualifier=>"product",
339
+ :value=>"Rev7p"},
340
+ {:qualifier=>"protein_id",
341
+ :value=>"AAA98667.1"},
342
+ {:qualifier=>"db_xref",
343
+ :value=>"GI:1293616"},
344
+ {:qualifier=>"translation",
345
+ :value=>
346
+ "MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF"}]}]
347
+
348
+ assert_equal(expected_seq, seq.seq)
349
+ assert_equal(expected_id_namespace, seq.id_namespace)
350
+ assert_equal(expected_entry_id, seq.entry_id)
351
+ assert_equal(expected_primary_accession, seq.primary_accession)
352
+ assert_equal(expected_secondary_accessions, seq.secondary_accessions)
353
+ seqids = seq.other_seqids.first
354
+ actual_other_seqids = [seqids.id, seqids.database, seqids.secondary_ids]
355
+ assert_equal(expected_other_seqids, actual_other_seqids)
356
+ assert_equal(expected_division, seq.division)
357
+ assert_equal(expected_strandedness, seq.strandedness)
358
+ assert_equal(expected_keywords, seq.keywords)
359
+ assert_equal(expected_classification, seq.classification)
360
+ assert_equal(expected_comments, seq.comments)
361
+ refs = seq.references
362
+ actual_references = []
363
+ refs.each do |ref|
364
+ actual_references << {:abstract => ref.abstract,
365
+ :affiliations => ref.affiliations,
366
+ :authors => ref.authors,
367
+ :comments => ref.comments,
368
+ :doi => ref.doi,
369
+ :embl_gb_record_number => ref.embl_gb_record_number,
370
+ :issue => ref.issue,
371
+ :journal => ref.journal,
372
+ :medline => ref.medline,
373
+ :mesh => ref.mesh,
374
+ :pages => ref.pages,
375
+ :pubmed => ref.pubmed,
376
+ :sequence_position => ref.sequence_position,
377
+ :title => ref.title,
378
+ :url => ref.url,
379
+ :volume => ref.volume,
380
+ :year => ref.year}
381
+ end
382
+ assert_equal(expected_references, actual_references)
383
+ fets = seq.features
384
+ actual_features = []
385
+ fets.each do |fet|
386
+ feature = fet.feature
387
+ position = fet.position
388
+ quals = []
389
+ fet.qualifiers.each do |qual|
390
+ quals << {:qualifier => qual.qualifier, :value => qual.value}
391
+ end
392
+ actual_features << {:feature => feature, :position => position, :qualifiers => quals}
393
+ end
394
+ assert_equal(expected_features, actual_features) # skip
395
+
396
+
397
+ end
398
+
399
+ end #class TestBioGenBank
400
+ end #module Bio
401
+
@@ -0,0 +1,81 @@
1
+ #
2
+ # test/unit/bio/db/genbank/test_genpept.rb - Unit test for Bio::GenPept
3
+ #
4
+ # Copyright:: Copyright (C) 2010 Kazuhiro Hayashi <k.hayashi.info@gmail.com>
5
+ # License:: The Ruby License
6
+ #
7
+
8
+ # loading helper routine for testing bioruby
9
+ require 'pathname'
10
+ load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
11
+ 'bioruby_test_helper.rb')).cleanpath.to_s
12
+
13
+ # libraries needed for the tests
14
+ require 'test/unit'
15
+ require 'bio/db/genbank/genpept.rb'
16
+
17
+ #The coverage of this class is 100%
18
+ #It tests only the methods descripbed in the soruce class.(It dosen't test the inherited methods from NCBIDB)
19
+ module Bio
20
+ class TestBioGenPept < Test::Unit::TestCase
21
+
22
+ def setup
23
+ filename = File.join(BioRubyTestDataPath, 'genbank', 'CAA35997.gp')
24
+ @obj = Bio::GenPept.new(File.read(filename))
25
+ end
26
+
27
+ def test_locus
28
+ expected =
29
+ {:circular=>"linear",
30
+ :date=>"12-SEP-1993",
31
+ :division=>"MAM",
32
+ :entry_id=>"CAA35997",
33
+ :length=>100}
34
+ locus = @obj.locus
35
+ actual =
36
+ {:entry_id=>locus.entry_id,
37
+ :circular=>locus.circular,
38
+ :date=>locus.date,
39
+ :division=>locus.division,
40
+ :length=>locus.length}
41
+
42
+ assert_equal(expected, actual)
43
+ end
44
+
45
+ def test_entry_id
46
+ assert_equal("CAA35997", @obj.entry_id)
47
+ end
48
+
49
+ def test_length
50
+ assert_equal(100, @obj.length)
51
+ end
52
+
53
+ def test_circular
54
+ assert_equal("linear", @obj.circular)
55
+ end
56
+
57
+ def test_division
58
+ assert_equal("MAM", @obj.division)
59
+ end
60
+
61
+ def test_date
62
+ assert_equal("12-SEP-1993", @obj.date)
63
+ end
64
+
65
+ def test_seq
66
+ expected = "MRTPMLLALLALATLCLAGRADAKPGDAESGKGAAFVSKQEGSEVVKRLRRYLDHWLGAPAPYPDPLEPKREVCELNPDCDELADHIGFQEAYRRFYGPV"
67
+ assert_equal(expected, @obj.seq)
68
+ end
69
+
70
+ def test_seq_len
71
+ assert_equal(100, @obj.seq_len)
72
+ end
73
+
74
+ def test_dbsource
75
+ expected = "DBSOURCE embl accession X51700.1\n"
76
+ assert_equal(expected, @obj.dbsource)
77
+ end
78
+
79
+ end #class TestBioGenPept
80
+ end #module Bio
81
+