bio 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/ChangeLog +954 -0
  2. data/KNOWN_ISSUES.rdoc +40 -5
  3. data/README.rdoc +36 -35
  4. data/RELEASE_NOTES.rdoc +87 -59
  5. data/bioruby.gemspec +24 -2
  6. data/doc/RELEASE_NOTES-1.4.1.rdoc +104 -0
  7. data/doc/Tutorial.rd +162 -200
  8. data/doc/Tutorial.rd.html +149 -146
  9. data/lib/bio.rb +1 -0
  10. data/lib/bio/appl/blast.rb +1 -1
  11. data/lib/bio/appl/blast/ddbj.rb +26 -34
  12. data/lib/bio/appl/blast/genomenet.rb +21 -11
  13. data/lib/bio/db/embl/sptr.rb +193 -21
  14. data/lib/bio/db/fasta.rb +1 -1
  15. data/lib/bio/db/fastq.rb +14 -0
  16. data/lib/bio/db/fastq/format_fastq.rb +2 -2
  17. data/lib/bio/db/genbank/ddbj.rb +1 -2
  18. data/lib/bio/db/genbank/format_genbank.rb +1 -1
  19. data/lib/bio/db/medline.rb +1 -0
  20. data/lib/bio/db/newick.rb +3 -1
  21. data/lib/bio/db/pdb/pdb.rb +9 -9
  22. data/lib/bio/db/pdb/residue.rb +2 -2
  23. data/lib/bio/io/ddbjrest.rb +344 -0
  24. data/lib/bio/io/ncbirest.rb +121 -1
  25. data/lib/bio/location.rb +2 -2
  26. data/lib/bio/reference.rb +3 -4
  27. data/lib/bio/shell/plugin/entry.rb +7 -3
  28. data/lib/bio/shell/plugin/ncbirest.rb +5 -1
  29. data/lib/bio/util/restriction_enzyme.rb +3 -0
  30. data/lib/bio/util/restriction_enzyme/dense_int_array.rb +195 -0
  31. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +7 -7
  32. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +57 -18
  33. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +2 -2
  34. data/lib/bio/util/restriction_enzyme/sorted_num_array.rb +219 -0
  35. data/lib/bio/version.rb +1 -1
  36. data/sample/test_restriction_enzyme_long.rb +4403 -0
  37. data/test/data/fasta/EFTU_BACSU.fasta +8 -0
  38. data/test/data/genbank/CAA35997.gp +48 -0
  39. data/test/data/genbank/SCU49845.gb +167 -0
  40. data/test/data/litdb/1717226.litdb +13 -0
  41. data/test/data/pir/CRAB_ANAPL.pir +6 -0
  42. data/test/functional/bio/appl/blast/test_remote.rb +93 -0
  43. data/test/functional/bio/appl/test_blast.rb +61 -0
  44. data/test/functional/bio/io/test_ddbjrest.rb +47 -0
  45. data/test/functional/bio/test_command.rb +3 -3
  46. data/test/unit/bio/db/embl/test_sptr.rb +6 -6
  47. data/test/unit/bio/db/embl/test_uniprot_new_part.rb +208 -0
  48. data/test/unit/bio/db/genbank/test_common.rb +274 -0
  49. data/test/unit/bio/db/genbank/test_genbank.rb +401 -0
  50. data/test/unit/bio/db/genbank/test_genpept.rb +81 -0
  51. data/test/unit/bio/db/pdb/test_pdb.rb +3287 -11
  52. data/test/unit/bio/db/test_fasta.rb +34 -12
  53. data/test/unit/bio/db/test_fastq.rb +26 -0
  54. data/test/unit/bio/db/test_litdb.rb +95 -0
  55. data/test/unit/bio/db/test_medline.rb +1 -0
  56. data/test/unit/bio/db/test_nbrf.rb +82 -0
  57. data/test/unit/bio/db/test_newick.rb +22 -4
  58. data/test/unit/bio/test_reference.rb +35 -0
  59. data/test/unit/bio/util/restriction_enzyme/test_dense_int_array.rb +201 -0
  60. data/test/unit/bio/util/restriction_enzyme/test_sorted_num_array.rb +281 -0
  61. metadata +44 -38
@@ -0,0 +1,401 @@
1
+ #
2
+ # test/unit/bio/db/genbank/test_genbank.rb - Unit test for Bio::GenBank
3
+ #
4
+ # Copyright:: Copyright (C) 2010 Kazuhiro Hayashi <k.hayashi.info@gmail.com>
5
+ # License:: The Ruby License
6
+ #
7
+
8
+ # loading helper routine for testing bioruby
9
+ require 'pathname'
10
+ load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
11
+ 'bioruby_test_helper.rb')).cleanpath.to_s
12
+
13
+ # libraries needed for the tests
14
+ require 'test/unit'
15
+ require 'bio/sequence'
16
+ require 'bio/reference'
17
+ require 'bio/feature'
18
+ require 'bio/compat/features'
19
+ require 'bio/compat/references'
20
+ require 'bio/db/genbank/genbank'
21
+ require 'bio/db/genbank/genbank_to_biosequence'
22
+
23
+
24
+ module Bio
25
+ class TestBioGenBank < Test::Unit::TestCase
26
+
27
+ def setup
28
+ filename = File.join(BioRubyTestDataPath, 'genbank', 'SCU49845.gb')
29
+ @obj = Bio::GenBank.new(File.read(filename))
30
+ end
31
+
32
+ def test_locus_class
33
+ expected = Bio::GenBank::Locus
34
+ assert_equal(expected, @obj.locus.class)
35
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"#another type of LOCUS line.(release 126)
36
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
37
+ assert_equal(Bio::GenBank::Locus, obj_rel126.locus.class)
38
+ end
39
+ def test_locus_circular
40
+ expected = "linear"
41
+ assert_equal(expected, @obj.locus.circular)
42
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"
43
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
44
+ assert_equal("circular", obj_rel126.locus.circular)
45
+ end
46
+ def test_locus_date
47
+ expected = "23-MAR-2010"
48
+ assert_equal(expected, @obj.locus.date)
49
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"
50
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
51
+ assert_equal("05-FEB-1999", obj_rel126.locus.date)
52
+ end
53
+ def test_locus_division
54
+ expected = "PLN"
55
+ assert_equal(expected, @obj.locus.division)
56
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"
57
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
58
+ assert_equal("VRL", obj_rel126.locus.division)
59
+ end
60
+ def test_locus_entry_id
61
+ expected = "SCU49845"
62
+ assert_equal(expected, @obj.locus.entry_id)
63
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"
64
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
65
+ assert_equal("AB000383", obj_rel126.locus.entry_id)
66
+ end
67
+ def test_locus_length
68
+ expected = 5028
69
+ assert_equal(expected, @obj.locus.length)
70
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"
71
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
72
+ assert_equal(5423, obj_rel126.locus.length)
73
+ end
74
+ def test_locus_natype
75
+ expected = "DNA"
76
+ assert_equal(expected, @obj.locus.natype)
77
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"
78
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
79
+ assert_equal("DNA", obj_rel126.locus.natype)
80
+ end
81
+ def test_locus_strand
82
+ expected = ""
83
+ assert_equal(expected, @obj.locus.strand)
84
+ locus_rel126 = "LOCUS AB000383 5423 bp DNA circular VRL 05-FEB-1999"
85
+ obj_rel126 = Bio::GenBank.new(locus_rel126)
86
+ assert_equal("", obj_rel126.locus.strand)
87
+ end
88
+ def test_entry_id
89
+ assert_equal("SCU49845", @obj.entry_id)
90
+ end
91
+
92
+ def test_length
93
+ assert_equal(5028, @obj.length)
94
+ end
95
+
96
+ def test_circular
97
+ assert_equal("linear", @obj.circular)
98
+ end
99
+
100
+ def test_division
101
+ assert_equal("PLN", @obj.division)
102
+ end
103
+
104
+ def test_date
105
+ assert_equal("23-MAR-2010", @obj.date)
106
+ end
107
+
108
+ def test_strand
109
+ assert_equal("", @obj.strand)
110
+ end
111
+
112
+ def test_natype
113
+ assert_equal("DNA", @obj.natype)
114
+ end
115
+
116
+ def test_each_cds_feature
117
+ @obj.each_cds do |feature|
118
+ assert_equal("CDS", feature.feature)
119
+ end
120
+ end
121
+ =begin
122
+ def test_each_cds_qualifiers
123
+ @obj.each_cds do |feature|
124
+ feature.qualifiers do |qualifier|
125
+ assert_equal(Bio::Feature::Qualifier, qualifier.class)
126
+ end
127
+ end
128
+ end
129
+ =end
130
+ def test_each_cds_qualifiers
131
+ expected = [[["codon_start", 3],
132
+ ["product", "TCP1-beta"],
133
+ ["protein_id", "AAA98665.1"],
134
+ ["db_xref", "GI:1293614"],
135
+ ["translation", "SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM"]],
136
+ [["gene", "AXL2"],
137
+ ["note", "plasma membrane glycoprotein"],
138
+ ["codon_start", 1],
139
+ ["product", "Axl2p"],
140
+ ["protein_id", "AAA98666.1"],
141
+ ["db_xref", "GI:1293615"],
142
+ ["translation", "MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML"]],
143
+ [["gene", "REV7"],
144
+ ["codon_start", 1],
145
+ ["product", "Rev7p"],
146
+ ["protein_id", "AAA98667.1"],
147
+ ["db_xref", "GI:1293616"],
148
+ ["translation", "MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF"]]]
149
+ actual = []
150
+ @obj.each_cds do |feature|
151
+ tmp = []
152
+ feature.qualifiers.each{|qualifier|
153
+ tmp << [qualifier.qualifier, qualifier.value]
154
+ }
155
+ actual << tmp
156
+ end
157
+ assert_equal(expected, actual)
158
+ end
159
+ def test_each_gene
160
+ expected_position = ["<687..>3158", "complement(<3300..>4037)"]
161
+ expected_gene = [["gene","AXL2"], ["gene","REV7"]]
162
+ actual_position = []
163
+ actual_gene = []
164
+ @obj.each_gene do |gene|
165
+ assert_equal("gene", gene.feature)
166
+ actual_position << gene.position
167
+ gene.qualifiers.each do |qualifier|
168
+ actual_gene << [qualifier.qualifier, qualifier.value]
169
+ end
170
+ end
171
+ assert_equal(expected_position,actual_position)
172
+ assert_equal(expected_gene, actual_gene)
173
+ end
174
+
175
+ def test_basecount
176
+ assert_equal({}, @obj.basecount)
177
+ end
178
+
179
+ def test_seq
180
+ expected = "gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgatc"
181
+ assert_equal(expected, @obj.seq)
182
+ end
183
+
184
+ def test_seq_len
185
+ assert_equal(5028, @obj.seq_len)
186
+ end
187
+
188
+ def test_date_modified
189
+ assert_equal(Date, @obj.date_modified.class)
190
+ assert_equal('2010-03-23', @obj.date_modified.to_s)
191
+ end
192
+
193
+ def test_classification
194
+ expected = ["Eukaryota",
195
+ "Fungi",
196
+ "Dikarya",
197
+ "Ascomycota",
198
+ "Saccharomyceta",
199
+ "Saccharomycotina",
200
+ "Saccharomycetes",
201
+ "Saccharomycetales",
202
+ "Saccharomycetaceae",
203
+ "Saccharomyces"]
204
+ assert_equal(expected, @obj.classification)
205
+ end
206
+
207
+ def test_strandedness
208
+ assert_equal(nil, @obj.strandedness)
209
+ end
210
+
211
+ #test for bio_to_sequence
212
+ def test_to_biosequence
213
+ seq = @obj.to_biosequence
214
+ expected_seq = "gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaagaaccgccaatagacaacatatgtaacatatttaggatatacctcgaaaataataaaccgccacactgtcattattataattagaaacagaacgcaaaaattatccactatataattcaaagacgcgaaaaaaaaagaacaacgcgtcatagaacttttggcaattcgcgtcacaaataaattttggcaacttatgtttcctcttcgagcagtactcgagccctgtctcaagaatgtaataatacccatcgtaggtatggttaaagatagcatctccacaacctcaaagctccttgccgagagtcgccctcctttgtcgagtaattttcacttttcatatgagaacttattttcttattctttactctcacatcctgtagtgattgacactgcaacagccaccatcactagaagaacagaacaattacttaatagaaaaattatatcttcctcgaaacgatttcctgcttccaacatctacgtatatcaagaagcattcacttaccatgacacagcttcagatttcattattgctgacagctactatatcactactccatctagtagtggccacgccctatgaggcatatcctatcggaaaacaataccccccagtggcaagagtcaatgaatcgtttacatttcaaatttccaatgatacctataaatcgtctgtagacaagacagctcaaataacatacaattgcttcgacttaccgagctggctttcgtttgactctagttctagaacgttctcaggtgaaccttcttctgacttactatctgatgcgaacaccacgttgtatttcaatgtaatactcgagggtacggactctgccgacagcacgtctttgaacaatacataccaatttgttgttacaaaccgtccatccatctcgctatcgtcagatttcaatctattggcgttgttaaaaaactatggttatactaacggcaaaaacgctctgaaactagatcctaatgaagtcttcaacgtgacttttgaccgttcaatgttcactaacgaagaatccattgtgtcgtattacggacgttctcagttgtataatgcgccgttacccaattggctgttcttcgattctggcgagttgaagtttactgggacggcaccggtgataaactcggcgattgctccagaaacaagctacagttttgtcatcatcgctacagacattgaaggattttctgccgttgaggtagaattcgaattagtcatcggggctcaccagttaactacctctattcaaaatagtttgataatcaacgttactgacacaggtaacgtttcatatgacttacctctaaactatgtttatctcgatgacgatcctatttcttctgataaattgggttctataaacttattggatgctccagactgggtggcattagataatgctaccatttccgggtctgtcccagatgaattactcggtaagaactccaatcctgccaatttttctgtgtccatttatgatacttatggtgatgtgatttatttcaacttcgaagttgtctccacaacggatttgtttgccattagttctcttcccaatattaacgctacaaggggtgaatggttctcctactattttttgccttctcagtttacagactacgtgaatacaaacgtttcattagagtttactaattcaagccaagaccatgactgggtgaaattccaatcatctaatttaacattagctggagaagtgcccaagaatttcgacaagctttcattaggtttgaaagcgaaccaaggttcacaatctcaagagctatattttaacatcattggcatggattcaaagataactcactcaaaccacagtgcgaatgcaacgtccacaagaagttctcaccactccacctcaacaagttcttacacatcttctacttacactgcaaaaatttcttctacctccgctgctgctacttcttctgctccagcagcgctgccagcagccaataaaacttcatctcacaataaaaaagcagtagcaattgcgtgcggtgttgctatcccattaggcgttatcctagtagctctcatttgcttcctaatattctggagacgcagaagggaaaatccagacgatgaaaacttaccgcatgctattagtggacctgatttgaataatcctgcaaataaaccaaatcaagaaaacgctacacctttgaacaacccctttgatgatgatgcttcctcgtacgatgatacttcaatagcaagaagattggctgctttgaacactttgaaattggataaccactctgccactgaatctgatatttccagcgtggatgaaaagagagattctctatcaggtatgaatacatacaatgatcagttccaatcccaaagtaaagaagaattattagcaaaacccccagtacagcctccagagagcccgttctttgacccacagaataggtcttcttctgtgtatatggatagtgaaccagcagtaaataaatcctggcgatatactggcaacctgtcaccagtctctgatattgtcagagacagttacggatcacaaaaaactgttgatacagaaaaacttttcgatttagaagcaccagagaaggaaaaacgtacgtcaagggatgtcactatgtcttcactggacccttggaacagcaatattagcccttctcccgtaagaaaatcagtaacaccatcaccatataacgtaacgaagcatcgtaaccgccacttacaaaatattcaagactctcaaagcggtaaaaacggaatcactcccacaacaatgtcaacttcatcttctgacgattttgttccggttaaagatggtgaaaatttttgctgggtccatagcatggaaccagacagaagaccaagtaagaaaaggttagtagatttttcaaataagagtaatgtcaatgttggtcaagttaaggacattcacggacgcatcccagaaatgctgtgattatacgcaacgatattttgcttaattttattttcctgttttattttttattagtggtttacagataccctatattttatttagtttttatacttagagacatttaattttaattccattcttcaaatttcatttttgcacttaaaacaaagatccaaaaatgctctcgccctcttcatattgagaatacactccattcaaaattttgtcgtcaccgctgattaatttttcactaaactgatgaataatcaaaggccccacgtcagaaccgactaaagaagtgagttttattttaggaggttgaaaaccattattgtctggtaaattttcatcttcttgacatttaacccagtttgaatccctttcaatttctgctttttcctccaaactatcgaccctcctgtttctgtccaacttatgtcctagttccaattcgatcgcattaataactgcttcaaatgttattgtgtcatcgttgactttaggtaatttctccaaatgcataatcaaactatttaaggaagatcggaattcgtcgaacacttcagtttccgtaatgatctgatcgtctttatccacatgttgtaattcactaaaatctaaaacgtatttttcaatgcataaatcgttctttttattaataatgcagatggaaaatctgtaaacgtgcgttaatttagaaagaacatccagtataagttcttctatatagtcaattaaagcaggatgcctattaatgggaacgaactgcggcaagttgaatgactggtaagtagtgtagtcgaatgactgaggtgggtatacatttctataaaataaaatcaaattaatgtagcattttaagtataccctcagccacttctctacccatctattcataaagctgacgcaacgattactattttttttttcttcttggatctcagtcgtcgcaaaaacgtataccttctttttccgaccttttttttagctttctggaaaagtttatattagttaaacagggtctagtcttagtgtgaaagctagtggtttcgattgactgatattaagaaagtggaaattaaattagtagtgtagacgtatatgcatatgtatttctcgcctgtttatgtttctacgtacttttgatttatagcaaggggaaaagaaatacatactattttttggtaaaggtgaaagcataatgtaaaagctagaataaaatggacgaaataaagagaggcttagttcatcttttttccaaaaagcacccaatgataataactaaaatgaaaaggatttgccatctgtcagcaacatcagttgtgtgagcaataataaaatcatcacctccgttgcctttagcgcgtttgtcgtttgtatcttccgtaattttagtcttatcaatgggaatcataaattttccaatgaattagcaatttcgtccaattctttttgagcttcttcatatttgctttggaattcttcgcacttcttttcccattcatctctttcttcttccaaagcaacgatccttctacccatttgctcagagttcaaatcggcctctttcagtttatccattgcttccttcagtttggcttcactgtcttctagctgttgttctagatcctggtttttcttggtgtagttctcattattagatctcaagttattggagtcttcagccaattgctttgtatcagacaattgactctctaacttctccacttcactgtcgagttgctcgtttttagcggacaaagatttaatctcgttttctttttcagtgttagattgctctaattctttgagctgttctctcagctcctcatatttttcttgccatgactcagattctaattttaagctattcaatttctctttgatc"
215
+ expected_id_namespace = "GenBank"
216
+ expected_entry_id = "SCU49845"
217
+ expected_primary_accession = "U49845"
218
+ expected_secondary_accessions = []
219
+ expected_other_seqids = ["1293613", "GI", []]
220
+ expected_molecule_type = "DNA"
221
+ expected_division = "PLN"
222
+ expected_topology = "linear"
223
+ expected_strandedness = nil
224
+ expected_keywords = []
225
+ expected_sequence_version = "1"
226
+ expected_date_modified = "2010-03-23"
227
+ expected_definition = "Saccharomyces cerevisiae TCP1-beta gene, partial cds; and Axl2p (AXL2) and Rev7p (REV7) genes, complete cds."
228
+ expected_species = []
229
+ expected_classification= ["Eukaryota", "Fungi", "Dikarya", "Ascomycota", "Saccharomyceta", "Saccharomycotina", "Saccharomycetes", "Saccharomycetales", "Saccharomycetaceae", "Saccharomyces"]
230
+ expected_comments = ""
231
+ expected_references = [{
232
+ :abstract=>"",
233
+ :affiliations=>[],
234
+ :authors=>["Roemer, T.", "Madden, K.", "Chang, J.", "Snyder, M."],
235
+ :comments=>nil,
236
+ :doi=>nil,
237
+ :embl_gb_record_number=>1,
238
+ :issue=>"7",
239
+ :journal=>"Genes Dev.",
240
+ :medline=>"",
241
+ :mesh=>[],
242
+ :pages=>"777-793",
243
+ :pubmed=>"8846915",
244
+ :sequence_position=>"1-5028",
245
+ :title=>
246
+ "Selection of axial growth sites in yeast requires Axl2p, a novel plasma membrane glycoprotein",
247
+ :url=>nil,
248
+ :volume=>"10",
249
+ :year=>"1996"},
250
+
251
+ {:abstract=>"",
252
+ :affiliations=>[],
253
+ :authors=>["Roemer, T."],
254
+ :comments=>nil,
255
+ :doi=>nil,
256
+ :embl_gb_record_number=>2,
257
+ :issue=>"",
258
+ :journal=>
259
+ "Submitted (22-FEB-1996) Biology, Yale University, New Haven, CT 06520, USA",
260
+ :medline=>"",
261
+ :mesh=>[],
262
+ :pages=>"",
263
+ :pubmed=>"",
264
+ :sequence_position=>"1-5028",
265
+ :title=>"Direct Submission",
266
+ :url=>nil,
267
+ :volume=>"",
268
+ :year=>""}]
269
+
270
+ expected_features = [
271
+ {:feature=>"source",
272
+ :position=>"1..5028",
273
+ :qualifiers=>
274
+ [{:qualifier=>"organism",
275
+ :value=>"Saccharomyces cerevisiae"},
276
+ {:qualifier=>"mol_type",
277
+ :value=>"genomic DNA"},
278
+ {:qualifier=>"db_xref",
279
+ :value=>"taxon:4932"},
280
+ {:qualifier=>"chromosome",
281
+ :value=>"IX"}]},
282
+ {:feature=>"mRNA",
283
+ :position=>"<1..>206",
284
+ :qualifiers=>
285
+ [{
286
+ :qualifier=>"product",
287
+ :value=>"TCP1-beta"}]},
288
+ {:feature=>"CDS",
289
+ :position=>"<1..206",
290
+ :qualifiers=> [{:qualifier=>"codon_start", :value=>3}, {:qualifier=>"product", :value=>"TCP1-beta"},
291
+ {:qualifier=>"protein_id",
292
+ :value=>"AAA98665.1"},
293
+ {:qualifier=>"db_xref",
294
+ :value=>"GI:1293614"},
295
+ {:qualifier=>"translation",
296
+ :value=>
297
+ "SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEAAEVLLRVDNIIRARPRTANRQHM"}]},
298
+ {:feature=>"gene",
299
+ :position=>"<687..>3158",
300
+ :qualifiers=>
301
+ [{:qualifier=>"gene", :value=>"AXL2"}]},
302
+ {:feature=>"mRNA",
303
+ :position=>"<687..>3158",
304
+ :qualifiers=>
305
+ [{:qualifier=>"gene", :value=>"AXL2"},
306
+ {:qualifier=>"product",
307
+ :value=>"Axl2p"}]},
308
+ {:feature=>"CDS",
309
+ :position=>"687..3158",
310
+ :qualifiers=>
311
+ [{:qualifier=>"gene", :value=>"AXL2"},
312
+ {:qualifier=>"note",
313
+ :value=>"plasma membrane glycoprotein"},
314
+ {:qualifier=>"codon_start", :value=>1}, {:qualifier=>"product",
315
+ :value=>"Axl2p"},
316
+ {:qualifier=>"protein_id",
317
+ :value=>"AAA98666.1"},
318
+ {:qualifier=>"db_xref",
319
+ :value=>"GI:1293615"},
320
+ {:qualifier=>"translation",
321
+ :value=>
322
+ "MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESFTFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFNVILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNEVFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPETSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYVYLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYGDVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQDHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSANATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIACGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLNNPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQSQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDSYGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTKHRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRLVDFSNKSNVNVGQVKDIHGRIPEML"}]},
323
+ {:feature=>"gene",
324
+ :position=>"complement(<3300..>4037)",
325
+ :qualifiers=>
326
+ [{:qualifier=>"gene", :value=>"REV7"}]},
327
+ {:feature=>"mRNA",
328
+ :position=>"complement(<3300..>4037)",
329
+ :qualifiers=>
330
+ [{:qualifier=>"gene", :value=>"REV7"},
331
+ {:qualifier=>"product",
332
+ :value=>"Rev7p"}]},
333
+ {:feature=>"CDS",
334
+ :position=>"complement(3300..4037)",
335
+ :qualifiers=>
336
+ [{:qualifier=>"gene", :value=>"REV7"},
337
+ {:qualifier=>"codon_start", :value=>1},
338
+ {:qualifier=>"product",
339
+ :value=>"Rev7p"},
340
+ {:qualifier=>"protein_id",
341
+ :value=>"AAA98667.1"},
342
+ {:qualifier=>"db_xref",
343
+ :value=>"GI:1293616"},
344
+ {:qualifier=>"translation",
345
+ :value=>
346
+ "MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESIFGSLF"}]}]
347
+
348
+ assert_equal(expected_seq, seq.seq)
349
+ assert_equal(expected_id_namespace, seq.id_namespace)
350
+ assert_equal(expected_entry_id, seq.entry_id)
351
+ assert_equal(expected_primary_accession, seq.primary_accession)
352
+ assert_equal(expected_secondary_accessions, seq.secondary_accessions)
353
+ seqids = seq.other_seqids.first
354
+ actual_other_seqids = [seqids.id, seqids.database, seqids.secondary_ids]
355
+ assert_equal(expected_other_seqids, actual_other_seqids)
356
+ assert_equal(expected_division, seq.division)
357
+ assert_equal(expected_strandedness, seq.strandedness)
358
+ assert_equal(expected_keywords, seq.keywords)
359
+ assert_equal(expected_classification, seq.classification)
360
+ assert_equal(expected_comments, seq.comments)
361
+ refs = seq.references
362
+ actual_references = []
363
+ refs.each do |ref|
364
+ actual_references << {:abstract => ref.abstract,
365
+ :affiliations => ref.affiliations,
366
+ :authors => ref.authors,
367
+ :comments => ref.comments,
368
+ :doi => ref.doi,
369
+ :embl_gb_record_number => ref.embl_gb_record_number,
370
+ :issue => ref.issue,
371
+ :journal => ref.journal,
372
+ :medline => ref.medline,
373
+ :mesh => ref.mesh,
374
+ :pages => ref.pages,
375
+ :pubmed => ref.pubmed,
376
+ :sequence_position => ref.sequence_position,
377
+ :title => ref.title,
378
+ :url => ref.url,
379
+ :volume => ref.volume,
380
+ :year => ref.year}
381
+ end
382
+ assert_equal(expected_references, actual_references)
383
+ fets = seq.features
384
+ actual_features = []
385
+ fets.each do |fet|
386
+ feature = fet.feature
387
+ position = fet.position
388
+ quals = []
389
+ fet.qualifiers.each do |qual|
390
+ quals << {:qualifier => qual.qualifier, :value => qual.value}
391
+ end
392
+ actual_features << {:feature => feature, :position => position, :qualifiers => quals}
393
+ end
394
+ assert_equal(expected_features, actual_features) # skip
395
+
396
+
397
+ end
398
+
399
+ end #class TestBioGenBank
400
+ end #module Bio
401
+
@@ -0,0 +1,81 @@
1
+ #
2
+ # test/unit/bio/db/genbank/test_genpept.rb - Unit test for Bio::GenPept
3
+ #
4
+ # Copyright:: Copyright (C) 2010 Kazuhiro Hayashi <k.hayashi.info@gmail.com>
5
+ # License:: The Ruby License
6
+ #
7
+
8
+ # loading helper routine for testing bioruby
9
+ require 'pathname'
10
+ load Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4,
11
+ 'bioruby_test_helper.rb')).cleanpath.to_s
12
+
13
+ # libraries needed for the tests
14
+ require 'test/unit'
15
+ require 'bio/db/genbank/genpept.rb'
16
+
17
+ #The coverage of this class is 100%
18
+ #It tests only the methods descripbed in the soruce class.(It dosen't test the inherited methods from NCBIDB)
19
+ module Bio
20
+ class TestBioGenPept < Test::Unit::TestCase
21
+
22
+ def setup
23
+ filename = File.join(BioRubyTestDataPath, 'genbank', 'CAA35997.gp')
24
+ @obj = Bio::GenPept.new(File.read(filename))
25
+ end
26
+
27
+ def test_locus
28
+ expected =
29
+ {:circular=>"linear",
30
+ :date=>"12-SEP-1993",
31
+ :division=>"MAM",
32
+ :entry_id=>"CAA35997",
33
+ :length=>100}
34
+ locus = @obj.locus
35
+ actual =
36
+ {:entry_id=>locus.entry_id,
37
+ :circular=>locus.circular,
38
+ :date=>locus.date,
39
+ :division=>locus.division,
40
+ :length=>locus.length}
41
+
42
+ assert_equal(expected, actual)
43
+ end
44
+
45
+ def test_entry_id
46
+ assert_equal("CAA35997", @obj.entry_id)
47
+ end
48
+
49
+ def test_length
50
+ assert_equal(100, @obj.length)
51
+ end
52
+
53
+ def test_circular
54
+ assert_equal("linear", @obj.circular)
55
+ end
56
+
57
+ def test_division
58
+ assert_equal("MAM", @obj.division)
59
+ end
60
+
61
+ def test_date
62
+ assert_equal("12-SEP-1993", @obj.date)
63
+ end
64
+
65
+ def test_seq
66
+ expected = "MRTPMLLALLALATLCLAGRADAKPGDAESGKGAAFVSKQEGSEVVKRLRRYLDHWLGAPAPYPDPLEPKREVCELNPDCDELADHIGFQEAYRRFYGPV"
67
+ assert_equal(expected, @obj.seq)
68
+ end
69
+
70
+ def test_seq_len
71
+ assert_equal(100, @obj.seq_len)
72
+ end
73
+
74
+ def test_dbsource
75
+ expected = "DBSOURCE embl accession X51700.1\n"
76
+ assert_equal(expected, @obj.dbsource)
77
+ end
78
+
79
+ end #class TestBioGenPept
80
+ end #module Bio
81
+