bio 1.2.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -4,7 +4,7 @@
4
4
  # Copyright:: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: test_embl.rb,v 1.5 2007/04/05 23:35:43 trevor Exp $
7
+ # $Id: test_embl.rb,v 1.5.2.1 2008/02/20 09:56:22 aerts Exp $
8
8
  #
9
9
 
10
10
  require 'pathname'
@@ -150,7 +150,7 @@ module Bio
150
150
 
151
151
  # Bio::EMBLDB::COMMON#references
152
152
  def test_references
153
- assert_equal(Bio::References, @obj.references.class)
153
+ assert_equal(Array, @obj.references.class)
154
154
  end
155
155
 
156
156
  # Bio::EMBLDB::COMMON#dr
@@ -163,7 +163,7 @@ module Bio
163
163
  end
164
164
 
165
165
  def test_ft
166
- assert_equal(Bio::Features, @obj.ft.class)
166
+ assert_equal(Array, @obj.ft.class)
167
167
  end
168
168
 
169
169
  def test_ft_iterator
@@ -173,7 +173,7 @@ module Bio
173
173
  end
174
174
 
175
175
  def test_ft_accessor
176
- assert_equal('CDS', @obj.ft.features[1].feature)
176
+ assert_equal('CDS', @obj.ft[1].feature)
177
177
  end
178
178
 
179
179
  def test_each_cds
@@ -4,7 +4,7 @@
4
4
  # Copyright:: Copyright (C) 2007 Mitsuteru Nakao <n@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: test_embl_rel89.rb,v 1.2 2007/04/05 23:35:43 trevor Exp $
7
+ # $Id: test_embl_rel89.rb,v 1.2.2.1 2008/02/20 09:56:22 aerts Exp $
8
8
  #
9
9
 
10
10
  require 'pathname'
@@ -16,7 +16,7 @@ require 'test/unit'
16
16
  require 'bio/db/embl/embl'
17
17
 
18
18
  module Bio
19
- class TestEMBL < Test::Unit::TestCase
19
+ class TestEMBL89 < Test::Unit::TestCase
20
20
 
21
21
  def setup
22
22
  bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
@@ -155,7 +155,7 @@ module Bio
155
155
 
156
156
  # Bio::EMBLDB::COMMON#references
157
157
  def test_references
158
- assert_equal(Bio::References, @obj.references.class)
158
+ assert_equal(Array, @obj.references.class)
159
159
  end
160
160
 
161
161
  # Bio::EMBLDB::COMMON#dr
@@ -168,7 +168,7 @@ module Bio
168
168
  end
169
169
 
170
170
  def test_ft
171
- assert_equal(Bio::Features, @obj.ft.class)
171
+ assert_equal(Array, @obj.ft.class)
172
172
  end
173
173
 
174
174
  def test_ft_iterator
@@ -178,7 +178,7 @@ module Bio
178
178
  end
179
179
 
180
180
  def test_ft_accessor
181
- assert_equal('CDS', @obj.ft.features[1].feature)
181
+ assert_equal('CDS', @obj.ft[1].feature)
182
182
  end
183
183
 
184
184
  def test_each_cds
@@ -0,0 +1,203 @@
1
+ #
2
+ # test/unit/bio/db/embl/test_embl.rb - Unit test for Bio::EMBL
3
+ #
4
+ # Copyright:: Copyright (C) 2005, 2008
5
+ # Mitsuteru Nakao <n@bioruby.org>
6
+ # Jan Aerts <jan.aerts@bbsrc.ac.uk>
7
+ # License:: The Ruby License
8
+ #
9
+ # $Id:$
10
+ #
11
+
12
+ require 'pathname'
13
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
14
+ $:.unshift(libpath) unless $:.include?(libpath)
15
+
16
+ require 'test/unit'
17
+ require 'bio'
18
+ require 'bio/db/embl/embl'
19
+
20
+ module Bio
21
+ class TestEMBLToBioSequence < Test::Unit::TestCase
22
+
23
+ def setup
24
+ bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
25
+ input = File.open(File.join(bioruby_root, 'test', 'data', 'embl', 'AB090716.embl.rel89')).read
26
+ embl_object = Bio::EMBL.new(input)
27
+ embl_object.instance_eval { @data['OS'] = "Haplochromis sp. 'muzu rukwa'" }
28
+ @bio_seq = embl_object.to_biosequence
29
+ end
30
+
31
+ def test_entry_id
32
+ assert_equal('AB090716', @bio_seq.entry_id)
33
+ end
34
+
35
+ def test_primary_accession
36
+ assert_equal('AB090716', @bio_seq.primary_accession)
37
+ end
38
+
39
+ def test_secondary_accessions
40
+ assert_equal([], @bio_seq.secondary_accessions)
41
+ end
42
+
43
+ def test_molecule_type
44
+ assert_equal('genomic DNA', @bio_seq.molecule_type)
45
+ end
46
+
47
+ def test_definition
48
+ assert_equal("Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive opsin, partial cds, specimen_voucher:specimen No. HT-9361.", @bio_seq.definition)
49
+ end
50
+
51
+ def test_topology
52
+ assert_equal('linear', @bio_seq.topology)
53
+ end
54
+
55
+ def test_date_created
56
+ # '25-OCT-2002 (Rel. 73, Created)'
57
+ assert_equal(Date.parse('25-OCT-2002'), @bio_seq.date_created)
58
+ end
59
+
60
+ def test_date_modified
61
+ # '14-NOV-2006 (Rel. 89, Last updated, Version 3)'
62
+ assert_equal(Date.parse('14-NOV-2006'), @bio_seq.date_modified)
63
+ end
64
+
65
+ def test_release_created
66
+ assert_equal('73', @bio_seq.release_created)
67
+ end
68
+
69
+ def test_release_modified
70
+ assert_equal('89', @bio_seq.release_modified)
71
+ end
72
+
73
+ def test_entry_version
74
+ assert_equal('3', @bio_seq.entry_version)
75
+ end
76
+
77
+ def test_division
78
+ assert_equal('VRT', @bio_seq.division)
79
+ end
80
+
81
+ def test_sequence_version
82
+ assert_equal(1, @bio_seq.sequence_version)
83
+ end
84
+
85
+ def test_keywords
86
+ assert_equal([], @bio_seq.keywords)
87
+ end
88
+
89
+ def test_species
90
+ assert_equal("Haplochromis sp. 'muzu, rukwa'", @bio_seq.species)
91
+ end
92
+
93
+ def test_classification
94
+ assert_equal(['Eukaryota','Metazoa','Chordata','Craniata','Vertebrata','Euteleostomi','Actinopterygii','Neopterygii','Teleostei','Euteleostei','Neoteleostei','Acanthomorpha','Acanthopterygii','Percomorpha','Perciformes','Labroidei','Cichlidae','African cichlids','Pseudocrenilabrinae','Haplochromini','Haplochromis'], @bio_seq.classification)
95
+
96
+
97
+ end
98
+
99
+ def test_references
100
+ assert_equal(2, @bio_seq.references.length)
101
+ assert_equal(Bio::Reference, @bio_seq.references[0].class)
102
+ end
103
+
104
+ def test_features
105
+ assert_equal(3, @bio_seq.features.length)
106
+ assert_equal(Bio::Feature, @bio_seq.features[0].class)
107
+ end
108
+
109
+ end
110
+
111
+ # To really test the Bio::EMBL to Bio::Sequence conversion, we need to test if
112
+ # that Bio::Sequence can be made into a valid Bio::EMBL again.
113
+ class TestEMBLToBioSequenceRoundTrip < Test::Unit::TestCase
114
+ def setup
115
+ bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
116
+ input = File.open(File.join(bioruby_root, 'test', 'data', 'embl', 'AB090716.embl.rel89')).read
117
+ embl_object_1 = Bio::EMBL.new(input)
118
+ embl_object_1.instance_eval { @data['OS'] = "Haplochromis sp. 'muzu rukwa'" }
119
+ @bio_seq_1 = embl_object_1.to_biosequence
120
+ embl_object_2 = Bio::EMBL.new(@bio_seq_1.output(:embl))
121
+ @bio_seq_2 = embl_object_2.to_biosequence
122
+ end
123
+
124
+ def test_entry_id
125
+ assert_equal('AB090716', @bio_seq_2.entry_id)
126
+ end
127
+
128
+ def test_primary_accession
129
+ assert_equal('AB090716', @bio_seq_2.primary_accession)
130
+ end
131
+
132
+ def test_secondary_accessions
133
+ assert_equal([], @bio_seq_2.secondary_accessions)
134
+ end
135
+
136
+ def test_molecule_type
137
+ assert_equal('genomic DNA', @bio_seq_2.molecule_type)
138
+ end
139
+
140
+ def test_definition
141
+ assert_equal("Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive opsin, partial cds, specimen_voucher:specimen No. HT-9361.", @bio_seq_2.definition)
142
+ end
143
+
144
+ def test_topology
145
+ assert_equal('linear', @bio_seq_2.topology)
146
+ end
147
+
148
+ def test_date_created
149
+ # '25-OCT-2002 (Rel. 73, Created)'
150
+ assert_equal(Date.parse('25-OCT-2002'), @bio_seq_2.date_created)
151
+ end
152
+
153
+ def test_date_modified
154
+ # '14-NOV-2006 (Rel. 89, Last updated, Version 3)'
155
+ assert_equal(Date.parse('14-NOV-2006'), @bio_seq_2.date_modified)
156
+ end
157
+
158
+ def test_release_created
159
+ assert_equal('73', @bio_seq_2.release_created)
160
+ end
161
+
162
+ def test_release_modified
163
+ assert_equal('89', @bio_seq_2.release_modified)
164
+ end
165
+
166
+ def test_entry_version
167
+ assert_equal('3', @bio_seq_2.entry_version)
168
+ end
169
+
170
+ def test_division
171
+ assert_equal('VRT', @bio_seq_2.division)
172
+ end
173
+
174
+ def test_sequence_version
175
+ assert_equal(1, @bio_seq_2.sequence_version)
176
+ end
177
+
178
+ def test_keywords
179
+ assert_equal([], @bio_seq_2.keywords)
180
+ end
181
+
182
+ def test_species
183
+ assert_equal("Haplochromis sp. 'muzu, rukwa'", @bio_seq_2.species)
184
+ end
185
+
186
+ def test_classification
187
+ assert_equal(['Eukaryota','Metazoa','Chordata','Craniata','Vertebrata','Euteleostomi','Actinopterygii','Neopterygii','Teleostei','Euteleostei','Neoteleostei','Acanthomorpha','Acanthopterygii','Percomorpha','Perciformes','Labroidei','Cichlidae','African cichlids','Pseudocrenilabrinae','Haplochromini','Haplochromis'], @bio_seq_2.classification)
188
+
189
+
190
+ end
191
+
192
+ def test_references
193
+ assert_equal(2, @bio_seq_2.references.length)
194
+ assert_equal(Bio::Reference, @bio_seq_2.references[0].class)
195
+ end
196
+
197
+ def test_features
198
+ assert_equal(3, @bio_seq_2.features.length)
199
+ assert_equal(Bio::Feature, @bio_seq_2.features[0].class)
200
+ end
201
+ end
202
+ end
203
+
@@ -4,7 +4,7 @@
4
4
  # Copyright::: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: test_sptr.rb,v 1.7 2007/04/05 23:35:43 trevor Exp $
7
+ # $Id:$
8
8
  #
9
9
 
10
10
  require 'pathname'
@@ -78,6 +78,43 @@ module Bio
78
78
  assert_equal('P04637', @obj.accession)
79
79
  end
80
80
 
81
+ def test_dr
82
+ assert_equal(17, @obj.dr.size)
83
+ assert_equal(27, @obj.dr['GO'].size)
84
+ assert_equal([["IPR002117", "P53"],
85
+ ["IPR011615", "P53_DNA_bd"],
86
+ ["IPR012346", "P53_RUNT_DNA_bd"],
87
+ ["IPR010991", "p53_tetrameristn"]],
88
+ @obj.dr['InterPro'])
89
+ end
90
+
91
+ def test_dr_with_key
92
+ pfam = [
93
+ { " " => "1",
94
+ "Version" => "P53",
95
+ "Accession" => "PF00870",
96
+ "Molecular Type" => nil
97
+ },
98
+ { " " => "1",
99
+ "Version" => "P53_tetramer",
100
+ "Accession" => "PF07710",
101
+ "Molecular Type" => nil
102
+ }
103
+ ]
104
+ assert_equal(pfam, @obj.dr('Pfam'))
105
+ embl3 = {
106
+ " " => "JOINED",
107
+ "Version" => "AAA59987.1",
108
+ "Accession" => "M13113",
109
+ "Molecular Type" => "Genomic_DNA"
110
+ }
111
+ assert_equal(embl3, @obj.dr('EMBL')[3])
112
+ end
113
+
114
+ def test_dr_with_key_empty
115
+ assert_equal([], @obj.dr('NOT_A_DATABASE'))
116
+ end
117
+
81
118
  def test_dt
82
119
  assert(@obj.dt)
83
120
  end
@@ -6,11 +6,11 @@
6
6
  #
7
7
  # License:: The Ruby License
8
8
  #
9
- # $Id: test_pdb.rb,v 1.3 2007/04/05 23:35:43 trevor Exp $
9
+ # $Id:$
10
10
  #
11
11
 
12
12
  require 'pathname'
13
- libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
13
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
14
14
  $:.unshift(libpath) unless $:.include?(libpath)
15
15
 
16
16
  require 'test/unit'
@@ -1,10 +1,12 @@
1
1
  #
2
2
  # test/unit/bio/db/test_gff.rb - Unit test for Bio::GFF
3
3
  #
4
- # Copyright:: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
4
+ # Copyright:: Copyright (C) 2005, 2008
5
+ # Mitsuteru Nakao <n@bioruby.org>
6
+ # Naohisa Goto <ng@bioruby.org>
5
7
  # License:: The Ruby License
6
8
  #
7
- # $Id: test_gff.rb,v 1.6 2007/04/05 23:35:43 trevor Exp $
9
+ # $Id:$
8
10
  #
9
11
 
10
12
  require 'pathname'
@@ -12,13 +14,14 @@ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cle
12
14
  $:.unshift(libpath) unless $:.include?(libpath)
13
15
 
14
16
  require 'test/unit'
17
+ require 'digest/sha1'
15
18
  require 'bio/db/gff'
16
19
 
17
20
  module Bio
18
21
  class TestGFF < Test::Unit::TestCase
19
22
 
20
23
  def setup
21
- data = <<END
24
+ data = <<END_OF_DATA
22
25
  I sgd CEN 151453 151591 . + . CEN "CEN1" ; Note "CEN1\; Chromosome I Centromere"
23
26
  I sgd gene 147591 151163 . - . Gene "TFC3" ; Note "transcription factor tau (TFIIIC) subunit 138"
24
27
  I sgd gene 147591 151163 . - . Gene "FUN24" ; Note "transcription factor tau (TFIIIC) subunit 138"
@@ -27,7 +30,7 @@ I sgd ORF 147591 151163 . - . ORF "YAL001C" ; Note "TFC3\; transcription factor
27
30
  I sgd gene 143998 147528 . + . Gene "VPS8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
28
31
  I sgd gene 143998 147528 . + . Gene "FUN15" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
29
32
  I sgd gene 143998 147528 . + . Gene "VPT8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
30
- END
33
+ END_OF_DATA
31
34
  @obj = Bio::GFF.new(data)
32
35
  end
33
36
 
@@ -41,27 +44,12 @@ END
41
44
 
42
45
  end # class TestGFF
43
46
 
44
-
45
- class TestGFF2 < Test::Unit::TestCase
46
- def test_version
47
- assert_equal(2, Bio::GFF::GFF2::VERSION)
48
- end
49
- end
50
-
51
-
52
- class TestGFF3 < Test::Unit::TestCase
53
- def test_version
54
- assert_equal(3, Bio::GFF::GFF3::VERSION)
55
- end
56
- end
57
-
58
-
59
47
  class TestGFFRecord < Test::Unit::TestCase
60
48
 
61
49
  def setup
62
- data =<<END
50
+ data =<<END_OF_DATA
63
51
  I sgd gene 151453 151591 . + . Gene "CEN1" ; Note "Chromosome I Centromere"
64
- END
52
+ END_OF_DATA
65
53
  @obj = Bio::GFF::Record.new(data)
66
54
  end
67
55
 
@@ -102,13 +90,12 @@ END
102
90
  assert_equal(at, @obj.attributes)
103
91
  end
104
92
 
105
- def test_comments
106
- assert_equal(nil, @obj.comments)
93
+ def test_comment
94
+ assert_equal(nil, @obj.comment)
107
95
  end
108
96
 
109
97
  end # class TestGFFRecord
110
98
 
111
-
112
99
  class TestGFFRecordConstruct < Test::Unit::TestCase
113
100
 
114
101
  def setup
@@ -124,4 +111,1143 @@ END
124
111
  end
125
112
 
126
113
  end # class TestGFFRecordConstruct
127
- end
114
+
115
+ class TestGFF2 < Test::Unit::TestCase
116
+ def setup
117
+ data = <<END_OF_DATA
118
+ ##gff-version 2
119
+ ##date 2008-09-22
120
+ I sgd CEN 151453 151591 . + . CEN "CEN1" ; Note "CEN1; Chromosome I Centromere"
121
+ I sgd gene 147591 151163 . - . Gene "TFC3" ; Note "transcription factor tau (TFIIIC) subunit 138"
122
+ I sgd gene 147591 151163 . - . Gene "FUN24" ; Note "transcription factor tau (TFIIIC) subunit 138"
123
+ I sgd gene 147591 151163 . - . Gene "TSV115" ; Note "transcription factor tau (TFIIIC) subunit 138"
124
+ I sgd ORF 147591 151163 . - . ORF "YAL001C" ; Note "TFC3; transcription factor tau (TFIIIC) subunit 138"
125
+ I sgd gene 143998 147528 . + . Gene "VPS8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
126
+ I sgd gene 143998 147528 . + . Gene "FUN15" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
127
+ I sgd gene 143998 147528 . + . Gene "VPT8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
128
+ END_OF_DATA
129
+ @obj = Bio::GFF::GFF2.new(data)
130
+ end
131
+
132
+ def test_const_version
133
+ assert_equal(2, Bio::GFF::GFF2::VERSION)
134
+ end
135
+
136
+ def test_gff_version
137
+ assert_equal('2', @obj.gff_version)
138
+ end
139
+
140
+ def test_metadata_size
141
+ assert_equal(1, @obj.metadata.size)
142
+ end
143
+
144
+ def test_metadata
145
+ assert_equal(Bio::GFF::GFF2::MetaData.new('date', '2008-09-22'),
146
+ @obj.metadata[0])
147
+ end
148
+
149
+ def test_records_size
150
+ assert_equal(8, @obj.records.size)
151
+ end
152
+
153
+ def test_to_s
154
+ str = <<END_OF_DATA
155
+ ##gff-version 2
156
+ ##date 2008-09-22
157
+ I sgd CEN 151453 151591 . + . CEN CEN1 ; Note "CEN1; Chromosome I Centromere"
158
+ I sgd gene 147591 151163 . - . Gene TFC3 ; Note "transcription factor tau (TFIIIC) subunit 138"
159
+ I sgd gene 147591 151163 . - . Gene FUN24 ; Note "transcription factor tau (TFIIIC) subunit 138"
160
+ I sgd gene 147591 151163 . - . Gene TSV115 ; Note "transcription factor tau (TFIIIC) subunit 138"
161
+ I sgd ORF 147591 151163 . - . ORF YAL001C ; Note "TFC3; transcription factor tau (TFIIIC) subunit 138"
162
+ I sgd gene 143998 147528 . + . Gene VPS8 ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
163
+ I sgd gene 143998 147528 . + . Gene FUN15 ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
164
+ I sgd gene 143998 147528 . + . Gene VPT8 ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
165
+ END_OF_DATA
166
+ assert_equal(str, @obj.to_s)
167
+ end
168
+ end #class TestGFF2
169
+
170
+ class TestGFF2Record < Test::Unit::TestCase
171
+ def setup
172
+ str = "seq1\tBLASTX\tsimilarity\t101\t235\t87.1\t+\t0\tTarget \"HBA_HUMAN\" 11 55 ; E_value 0.0003 ; Align 101 11 ; Align 179 36 ; Comment \"Please ignore this \\\"Comment\\\" attribute; Escape \\x1a\\037 and \\\\\\t\\r\\n\\f\\b\\a\\e\\v; This is test.\" 123 4.56e-34 \"Test for freetext\" ; Note \"\"; Misc IdString; Misc \"free text\"; Misc 5678 "
173
+
174
+ @obj = Bio::GFF::GFF2::Record.new(str)
175
+ end
176
+
177
+ def test_to_s
178
+ str = "seq1\tBLASTX\tsimilarity\t101\t235\t87.1\t+\t0\tTarget HBA_HUMAN 11 55 ; E_value 0.0003 ; Align 101 11 ; Align 179 36 ; Comment \"Please ignore this \\\"Comment\\\" attribute; Escape \\032\\037 and \\\\\\t\\r\\n\\f\\b\\a\\e\\v; This is test.\" 123 4.56e-34 \"Test for freetext\" ; Note \"\" ; Misc IdString ; Misc \"free text\" ; Misc 5678\n"
179
+
180
+ assert_equal(str, @obj.to_s)
181
+ end
182
+
183
+ def test_eqeq
184
+ obj2 = Bio::GFF::GFF2::Record.new(@obj.to_s)
185
+ assert_equal(true, @obj == obj2)
186
+ end
187
+
188
+ def test_eqeq_false
189
+ obj2 = Bio::GFF::GFF2::Record.new(@obj.to_s)
190
+ obj2.seqname = 'seq2'
191
+ assert_equal(false, @obj == obj2)
192
+ end
193
+
194
+ def test_comment_only?
195
+ assert_equal(false, @obj.comment_only?)
196
+ end
197
+
198
+ def test_seqname
199
+ assert_equal('seq1', @obj.seqname)
200
+ end
201
+
202
+ def test_source
203
+ assert_equal('BLASTX', @obj.source)
204
+ end
205
+
206
+ def test_feature
207
+ assert_equal('similarity', @obj.feature)
208
+ end
209
+
210
+ def test_start
211
+ assert_equal(101, @obj.start)
212
+ end
213
+
214
+ def test_end
215
+ assert_equal(235, @obj.end)
216
+ end
217
+
218
+ def test_score
219
+ assert_equal(87.1, @obj.score)
220
+ end
221
+
222
+ def test_strand
223
+ assert_equal('+', @obj.strand)
224
+ end
225
+
226
+ def test_frame
227
+ assert_equal(0, @obj.frame)
228
+ end
229
+
230
+ def test_attributes_to_hash
231
+ hash = {
232
+ 'Target' =>
233
+ Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']),
234
+ 'E_value' => '0.0003',
235
+ 'Align' =>
236
+ Bio::GFF::GFF2::Record::Value.new(['101', '11']),
237
+ 'Comment' =>
238
+ Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]),
239
+ 'Note' => '',
240
+ 'Misc' => 'IdString'
241
+ }
242
+ assert_equal(hash, @obj.attributes_to_hash)
243
+ end
244
+
245
+ def test_attributes
246
+ attributes =
247
+ [ [ 'Target',
248
+ Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']) ],
249
+ [ 'E_value', '0.0003' ],
250
+ [ 'Align',
251
+ Bio::GFF::GFF2::Record::Value.new(['101', '11']) ],
252
+ [ 'Align',
253
+ Bio::GFF::GFF2::Record::Value.new(['179', '36']) ],
254
+ [ 'Comment',
255
+ Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]) ],
256
+ [ 'Note', '' ],
257
+ [ 'Misc', 'IdString' ],
258
+ [ 'Misc', 'free text' ],
259
+ [ 'Misc', '5678' ]
260
+ ]
261
+ assert_equal(attributes, @obj.attributes)
262
+ end
263
+
264
+ def test_attribute
265
+ val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55'])
266
+ assert_equal(val_Target, @obj.attribute('Target'))
267
+ assert_equal('0.0003', @obj.attribute('E_value'))
268
+ val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11'])
269
+ val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36'])
270
+ assert_equal(val_Align0, @obj.attribute('Align'))
271
+ val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"])
272
+ assert_equal(val_Comment, @obj.attribute('Comment'))
273
+ assert_equal('', @obj.attribute('Note'))
274
+ assert_equal('IdString', @obj.attribute('Misc'))
275
+ end
276
+
277
+ def test_attribute_nonexistent
278
+ assert_equal(nil, @obj.attribute('NonExistent'))
279
+ end
280
+
281
+ def test_get_attribute
282
+ val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55'])
283
+ assert_equal(val_Target, @obj.get_attribute('Target'))
284
+ assert_equal('0.0003', @obj.get_attribute('E_value'))
285
+ val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11'])
286
+ val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36'])
287
+ assert_equal(val_Align0, @obj.get_attribute('Align'))
288
+ val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"])
289
+ assert_equal(val_Comment, @obj.get_attribute('Comment'))
290
+ assert_equal('', @obj.get_attribute('Note'))
291
+ assert_equal('IdString', @obj.get_attribute('Misc'))
292
+ end
293
+
294
+ def test_get_attribute_nonexistent
295
+ assert_equal(nil, @obj.get_attribute('NonExistent'))
296
+ end
297
+
298
+ def test_get_attributes
299
+ val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55'])
300
+ assert_equal([ val_Target ], @obj.get_attributes('Target'))
301
+ assert_equal([ '0.0003' ], @obj.get_attributes('E_value'))
302
+ val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11'])
303
+ val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36'])
304
+ assert_equal([ val_Align0, val_Align1 ],
305
+ @obj.get_attributes('Align'))
306
+ val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"])
307
+ assert_equal([ val_Comment ], @obj.get_attributes('Comment'))
308
+ assert_equal([ '' ], @obj.get_attributes('Note'))
309
+ assert_equal([ 'IdString', 'free text', '5678' ],
310
+ @obj.get_attributes('Misc'))
311
+ end
312
+
313
+ def test_get_attributes_nonexistent
314
+ assert_equal([], @obj.get_attributes('NonExistent'))
315
+ end
316
+
317
+ def test_set_attribute
318
+ assert_equal('0.0003', @obj.attribute('E_value'))
319
+ assert_equal('1e-10', @obj.set_attribute('E_value', '1e-10'))
320
+ assert_equal('1e-10', @obj.attribute('E_value'))
321
+ end
322
+
323
+ def test_set_attribute_multiple
324
+ assert_equal([ 'IdString', 'free text', '5678' ],
325
+ @obj.get_attributes('Misc'))
326
+ assert_equal('Replaced',
327
+ @obj.set_attribute('Misc', 'Replaced'))
328
+ assert_equal([ 'Replaced', 'free text', '5678' ],
329
+ @obj.get_attributes('Misc'))
330
+ end
331
+
332
+ def test_set_attribute_nonexistent
333
+ assert_equal(nil, @obj.attribute('NonExistent'))
334
+ assert_equal('test', @obj.set_attribute('NonExistent', 'test'))
335
+ assert_equal('test', @obj.attribute('NonExistent'))
336
+ end
337
+
338
+ def test_replace_attributes
339
+ assert_equal([ '0.0003' ], @obj.get_attributes('E_value'))
340
+ assert_equal(@obj, @obj.replace_attributes('E_value', '1e-10'))
341
+ assert_equal([ '1e-10' ], @obj.get_attributes('E_value'))
342
+ end
343
+
344
+ def test_replace_attributes_single_multiple
345
+ assert_equal([ '0.0003' ], @obj.get_attributes('E_value'))
346
+ assert_equal(@obj, @obj.replace_attributes('E_value',
347
+ '1e-10', '3.14', '2.718'))
348
+ assert_equal([ '1e-10', '3.14', '2.718' ],
349
+ @obj.get_attributes('E_value'))
350
+ end
351
+
352
+ def test_replace_attributes_multiple_single
353
+ assert_equal([ 'IdString', 'free text', '5678' ],
354
+ @obj.get_attributes('Misc'))
355
+ assert_equal(@obj,
356
+ @obj.replace_attributes('Misc', 'Replaced_All'))
357
+ assert_equal([ 'Replaced_All' ],
358
+ @obj.get_attributes('Misc'))
359
+ end
360
+
361
+ def test_replace_attributes_multiple_multiple_two
362
+ assert_equal([ 'IdString', 'free text', '5678' ],
363
+ @obj.get_attributes('Misc'))
364
+ assert_equal(@obj,
365
+ @obj.replace_attributes('Misc',
366
+ 'Replaced', 'test2'))
367
+ assert_equal([ 'Replaced', 'test2' ],
368
+ @obj.get_attributes('Misc'))
369
+ end
370
+
371
+ def test_replace_attributes_multiple_multiple_same
372
+ assert_equal([ 'IdString', 'free text', '5678' ],
373
+ @obj.get_attributes('Misc'))
374
+ assert_equal(@obj,
375
+ @obj.replace_attributes('Misc',
376
+ 'Replaced', 'test2', 'test3'))
377
+ assert_equal([ 'Replaced', 'test2', 'test3' ],
378
+ @obj.get_attributes('Misc'))
379
+ end
380
+
381
+ def test_replace_attributes_multiple_multiple_over
382
+ assert_equal([ 'IdString', 'free text', '5678' ],
383
+ @obj.get_attributes('Misc'))
384
+ assert_equal(@obj,
385
+ @obj.replace_attributes('Misc',
386
+ 'Replaced', 'test2', 'test3', '4'))
387
+ assert_equal([ 'Replaced', 'test2', 'test3', '4' ],
388
+ @obj.get_attributes('Misc'))
389
+ end
390
+
391
+ def test_replace_attributes_nonexistent
392
+ assert_equal(nil, @obj.attribute('NonExistent'))
393
+ assert_equal(@obj, @obj.replace_attributes('NonExistent', 'test'))
394
+ assert_equal([ 'test' ], @obj.get_attributes('NonExistent'))
395
+ end
396
+
397
+ def test_replace_attributes_nonexistent_multiple
398
+ assert_equal(nil, @obj.attribute('NonExistent'))
399
+ assert_equal(@obj,
400
+ @obj.replace_attributes('NonExistent',
401
+ 'test', 'gff2', 'attr'))
402
+ assert_equal([ 'test', 'gff2', 'attr' ],
403
+ @obj.get_attributes('NonExistent'))
404
+ end
405
+
406
+ def test_delete_attribute
407
+ assert_equal('0.0003', @obj.attribute('E_value'))
408
+ assert_equal('0.0003', @obj.delete_attribute('E_value', '0.0003'))
409
+ assert_equal(nil, @obj.attribute('E_value'))
410
+ end
411
+
412
+ def test_delete_attribute_nil
413
+ assert_equal('0.0003', @obj.attribute('E_value'))
414
+ assert_equal(nil, @obj.delete_attribute('E_value', '3'))
415
+ assert_equal('0.0003', @obj.attribute('E_value'))
416
+ end
417
+
418
+ def test_delete_attribute_multiple
419
+ assert_equal([ 'IdString', 'free text', '5678' ],
420
+ @obj.get_attributes('Misc'))
421
+ assert_equal('free text',
422
+ @obj.delete_attribute('Misc', 'free text'))
423
+ assert_equal([ 'IdString', '5678' ],
424
+ @obj.get_attributes('Misc'))
425
+ end
426
+
427
+ def test_delete_attribute_multiple2
428
+ assert_equal([ 'IdString', 'free text', '5678' ],
429
+ @obj.get_attributes('Misc'))
430
+ assert_equal('IdString',
431
+ @obj.delete_attribute('Misc', 'IdString'))
432
+ assert_equal([ 'free text', '5678' ],
433
+ @obj.get_attributes('Misc'))
434
+ assert_equal('5678',
435
+ @obj.delete_attribute('Misc', '5678'))
436
+ assert_equal([ 'free text' ],
437
+ @obj.get_attributes('Misc'))
438
+ end
439
+
440
+ def test_delete_attribute_multiple_nil
441
+ assert_equal([ 'IdString', 'free text', '5678' ],
442
+ @obj.get_attributes('Misc'))
443
+ assert_equal(nil,
444
+ @obj.delete_attribute('Misc', 'test'))
445
+ assert_equal([ 'IdString', 'free text', '5678' ],
446
+ @obj.get_attributes('Misc'))
447
+ end
448
+
449
+ def test_delete_attribute_nonexistent
450
+ assert_equal(nil, @obj.attribute('NonExistent'))
451
+ assert_equal(nil, @obj.delete_attribute('NonExistent', 'test'))
452
+ assert_equal([], @obj.get_attributes('NonExistent'))
453
+ end
454
+
455
+ def test_delete_attributes
456
+ assert_equal('0.0003', @obj.attribute('E_value'))
457
+ assert_equal(@obj, @obj.delete_attributes('E_value'))
458
+ assert_equal(nil, @obj.attribute('E_value'))
459
+ end
460
+
461
+ def test_delete_attributes_multiple
462
+ assert_equal([ 'IdString', 'free text', '5678' ],
463
+ @obj.get_attributes('Misc'))
464
+ assert_equal(@obj, @obj.delete_attributes('Misc'))
465
+ assert_equal([], @obj.get_attributes('Misc'))
466
+ end
467
+
468
+ def test_delete_attributes_nonexistent
469
+ assert_equal(nil, @obj.attribute('NonExistent'))
470
+ assert_equal(nil, @obj.delete_attributes('NonExistent'))
471
+ assert_equal([], @obj.get_attributes('NonExistent'))
472
+ end
473
+
474
+ def test_sort_attributes_by_tag!
475
+ tags = %w( Comment Align E_value Note )
476
+ assert_equal(@obj, @obj.sort_attributes_by_tag!(tags))
477
+ assert_equal(%w( Comment Align Align E_value Note Target
478
+ Misc Misc Misc ),
479
+ @obj.attributes.collect { |x| x[0] })
480
+ # check if the order of 'Misc' is not changed
481
+ assert_equal([ 'IdString', 'free text', '5678' ],
482
+ @obj.get_attributes('Misc'))
483
+ end
484
+
485
+ def test_sort_attributes_by_tag_bang_test2
486
+ tags = %w( E_value Misc Note Target )
487
+ assert_equal(@obj, @obj.sort_attributes_by_tag!(tags))
488
+ assert_equal(%w( E_value Misc Misc Misc Note Target
489
+ Align Align Comment ),
490
+ @obj.attributes.collect { |x| x[0] })
491
+ # check if the order of 'Misc' is not changed
492
+ assert_equal([ 'IdString', 'free text', '5678' ],
493
+ @obj.get_attributes('Misc'))
494
+ end
495
+
496
+ def test_sort_attributes_by_tag_bang_with_block
497
+ assert_equal(@obj,
498
+ @obj.sort_attributes_by_tag! { |x, y|
499
+ x <=> y
500
+ })
501
+ assert_equal(%w( Align Align Comment E_value Misc Misc Misc
502
+ Note Target ),
503
+ @obj.attributes.collect { |x| x[0] })
504
+ # check if the order of 'Misc' is not changed
505
+ assert_equal([ 'IdString', 'free text', '5678' ],
506
+ @obj.get_attributes('Misc'))
507
+ end
508
+ end #class TestGFF2Record
509
+
510
+ class TestGFF2RecordEmpty < Test::Unit::TestCase
511
+ def setup
512
+ @obj = Bio::GFF::GFF2::Record.new('# test comment')
513
+ end
514
+
515
+ def test_comment_only?
516
+ assert_equal(true, @obj.comment_only?)
517
+ end
518
+
519
+ def test_comment_only_false
520
+ @obj.seqname = 'test'
521
+ assert_equal(false, @obj.comment_only?)
522
+ end
523
+
524
+ def test_to_s
525
+ assert_equal("# test comment\n", @obj.to_s)
526
+ end
527
+
528
+ def test_to_s_not_empty
529
+ @obj.seqname = 'test'
530
+ @obj.feature = 'region'
531
+ @obj.start = 1
532
+ @obj.end = 100
533
+ assert_equal("test\t.\tregion\t1\t100\t.\t.\t.\t\t# test comment\n",
534
+ @obj.to_s)
535
+ @obj.add_attribute('Gene', 'unknown')
536
+ assert_equal("test\t.\tregion\t1\t100\t.\t.\t.\tGene unknown\t# test comment\n",
537
+ @obj.to_s)
538
+ end
539
+
540
+ def test_comment
541
+ assert_equal(' test comment', @obj.comment)
542
+ end
543
+
544
+ def test_comment_eq
545
+ assert_equal('changed the comment',
546
+ @obj.comment = 'changed the comment')
547
+ end
548
+ end #class TestGFF2RecordEmpty
549
+
550
+ class TestGFF2ComplexAttributes < Test::Unit::TestCase
551
+
552
+ # The test string comes from the Popular genome annotation from the JGI.
553
+ # ftp://ftp.jgi-psf.org/pub/JGI_data/Poplar/annotation/v1.1/Poptr1_1.JamboreeModels.gff.gz
554
+ # Thanks to Tomoaki NISHIYAMA who picks up the example line.
555
+ def test_attributes_case1
556
+ str = "LG_I\tJGI\tCDS\t11052\t11064\t.\t-\t0\tname \"grail3.0116000101\"; proteinId 639579; exonNumber 3\n"
557
+
558
+ attributes = [
559
+ [ "name", "grail3.0116000101" ],
560
+ [ "proteinId", "639579" ],
561
+ [ "exonNumber", "3" ]
562
+ ]
563
+ record = Bio::GFF::GFF2::Record.new(str)
564
+ assert_equal(attributes, record.attributes)
565
+ end
566
+
567
+ # The test string is modified from that of test_attributes_case1.
568
+ def test_attributes_case2
569
+ str = "LG_I\tJGI\tCDS\t11052\t11064\t.\t-\t0\tname \"grail3.0116000101\"; proteinId 639579; exonNumber 3; Note \"Semicolons ; and \;, and quote \\\" can be OK\"; Comment \"This is the \\\"comment\\\"\"\n"
570
+
571
+ attributes = [
572
+ [ "name", "grail3.0116000101" ],
573
+ [ "proteinId", "639579" ],
574
+ [ "exonNumber", "3" ],
575
+ [ "Note", "Semicolons ; and ;, and quote \" can be OK" ],
576
+ [ "Comment", "This is the \"comment\"" ]
577
+ ]
578
+ record = Bio::GFF::GFF2::Record.new(str)
579
+ assert_equal(attributes, record.attributes)
580
+ end
581
+
582
+ def test_attributes_incompatible_backslash_semicolon
583
+ # No special treatments for backslash-semicolon outside the free text.
584
+ str =<<END_OF_DATA
585
+ I sgd gene 151453 151591 . + . Gene "CEN1" ; Note "Chromosome I Centromere"; Semicolon a "b;c" d "e;f;g" h; Illegal a\\;b c d; Comment "a ; b"
586
+ END_OF_DATA
587
+
588
+ attributes = [
589
+ [ 'Gene', 'CEN1' ],
590
+ [ 'Note', 'Chromosome I Centromere' ],
591
+ [ 'Semicolon',
592
+ Bio::GFF::GFF2::Record::Value.new(['a', 'b;c', 'd', 'e;f;g', 'h']) ],
593
+ [ 'Illegal', "a\\" ],
594
+ [ 'b', Bio::GFF::GFF2::Record::Value.new(['c', 'd']) ],
595
+ [ 'Comment', 'a ; b' ]
596
+ ]
597
+ record = Bio::GFF::GFF2::Record.new(str)
598
+ assert_equal(attributes, record.attributes)
599
+ end
600
+
601
+ end #class TestGFF2ComplexAttributes
602
+
603
+ class TestGFF2MetaData < Test::Unit::TestCase
604
+ def setup
605
+ @data =
606
+ Bio::GFF::GFF2::MetaData.new('date', '2008-09-22')
607
+ end
608
+
609
+ def test_parse
610
+ assert_equal(@data,
611
+ Bio::GFF::GFF2::MetaData.parse('##date 2008-09-22'))
612
+ end
613
+
614
+ def test_directive
615
+ assert_equal('date', @data.directive)
616
+ end
617
+
618
+ def test_data
619
+ assert_equal('2008-09-22', @data.data)
620
+ end
621
+ end #class TestGFF2MetaData
622
+
623
+ class TestGFF3 < Test::Unit::TestCase
624
+ def setup
625
+ @data =<<END_OF_DATA
626
+ ##gff-version 3
627
+ ##sequence-region test01 1 400
628
+ test01 RANDOM contig 1 400 . + . ID=test01;Note=this is test
629
+ test01 . mRNA 101 230 . + . ID=mrna01;Name=testmRNA;Note=this is test mRNA
630
+ test01 . mRNA 101 280 . + . ID=mrna01a;Name=testmRNAalterative;Note=test of alternative splicing variant
631
+ test01 . exon 101 160 . + . ID=exon01;Name=exon01;Alias=exon 1;Parent=mrna01,mrna01a
632
+ test01 . exon 201 230 . + . ID=exon02;Name=exon02;Alias=exon 2;Parent=mrna01
633
+ test01 . exon 251 280 . + . ID=exon02a;Name=exon02a;Alias=exon 2a;Parent=mrna01a
634
+ test01 . Match 101 123 . . . ID=match01;Name=match01;Target=EST101 1 21;Gap=M8 D3 M6 I1 M6
635
+ ##FASTA
636
+ >test01
637
+ ACGAAGATTTGTATGACTGATTTATCCTGGACAGGCATTGGTCAGATGTCTCCTTCCGTATCGTCGTTTA
638
+ GTTGCAAATCCGAGTGTTCGGGGGTATTGCTATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACA
639
+ CCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGAT
640
+ AATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT
641
+ GCCCAAGAATGCGATCCCAGAAGTCTTGGTTCTAAAGTCGTCGGAAAGATTTGAGGAACTGCCATACAGC
642
+ CCGTGGGTGAAACTGTCGACATCCATTGTGCGAATAGGCCTGCTAGTGAC
643
+ END_OF_DATA
644
+ @gff3 = Bio::GFF::GFF3.new(@data)
645
+ end
646
+
647
+ def test_const_version
648
+ assert_equal(3, Bio::GFF::GFF3::VERSION)
649
+ end
650
+
651
+ def test_sequence_regions
652
+ region = Bio::GFF::GFF3::SequenceRegion.new('test01', 1, 400)
653
+ assert_equal([ region ], @gff3.sequence_regions)
654
+ end
655
+
656
+ def test_gff_version
657
+ assert_equal('3', @gff3.gff_version)
658
+ end
659
+
660
+ def test_records
661
+ assert_equal(7, @gff3.records.size)
662
+ r_test01 = Bio::GFF::GFF3::Record.new('test01',
663
+ 'RANDOM',
664
+ 'contig',
665
+ 1, 400, nil, '+', nil,
666
+ [ ['ID', 'test01'],
667
+ ['Note', 'this is test'] ])
668
+ r_mrna01 = Bio::GFF::GFF3::Record.new('test01',
669
+ nil,
670
+ 'mRNA',
671
+ 101, 230, nil, '+', nil,
672
+ [ ['ID', 'mrna01'],
673
+ ['Name', 'testmRNA'],
674
+ ['Note', 'this is test mRNA'] ])
675
+ r_exon01 = Bio::GFF::GFF3::Record.new('test01',
676
+ nil,
677
+ 'exon',
678
+ 101, 160, nil, '+', nil,
679
+ [ ['ID', 'exon01'],
680
+ ['Name', 'exon01'],
681
+ ['Alias', 'exon 1'],
682
+ ['Parent', 'mrna01'],
683
+ ['Parent', 'mrna01a'] ])
684
+
685
+ target = Bio::GFF::GFF3::Record::Target.new('EST101', 1, 21)
686
+ gap = Bio::GFF::GFF3::Record::Gap.new('M8 D3 M6 I1 M6')
687
+ r_match01 =Bio::GFF::GFF3::Record.new('test01',
688
+ nil,
689
+ 'Match',
690
+ 101, 123, nil, nil, nil,
691
+ [ ['ID', 'match01'],
692
+ ['Name', 'match01'],
693
+ ['Target', target],
694
+ ['Gap', gap] ])
695
+ assert_equal(r_test01, @gff3.records[0])
696
+ assert_equal(r_mrna01, @gff3.records[1])
697
+ assert_equal(r_exon01, @gff3.records[3])
698
+ assert_equal(r_match01, @gff3.records[6])
699
+ end
700
+
701
+ def test_sequences
702
+ assert_equal(1, @gff3.sequences.size)
703
+ assert_equal('test01', @gff3.sequences[0].entry_id)
704
+ assert_equal('3510a3c4f66f9c2ab8d4d97446490aced7ed1fa4',
705
+ Digest::SHA1.hexdigest(@gff3.sequences[0].seq.to_s))
706
+ end
707
+
708
+ def test_to_s
709
+ assert_equal(@data, @gff3.to_s)
710
+ end
711
+
712
+ end #class TestGFF3
713
+
714
+ class TestGFF3Record < Test::Unit::TestCase
715
+
716
+ def setup
717
+ data =<<END_OF_DATA
718
+ chrI SGD centromere 151467 151584 . + . ID=CEN1;Name=CEN1;gene=CEN1;Alias=CEN1,test%3B0001;Note=Chromosome%20I%20centromere;dbxref=SGD:S000006463;Target=test%2002 123 456 -,test%2C03 159 314;memo%3Dtest%3Battr=99.9%25%09match
719
+ END_OF_DATA
720
+ @obj = Bio::GFF::GFF3::Record.new(data)
721
+ end
722
+
723
+ def test_seqname
724
+ assert_equal('chrI', @obj.seqname)
725
+ end
726
+
727
+ def test_source
728
+ assert_equal('SGD', @obj.source)
729
+ end
730
+
731
+ def test_feature
732
+ assert_equal('centromere', @obj.feature)
733
+ end
734
+
735
+ def test_start
736
+ assert_equal(151467, @obj.start)
737
+ end
738
+
739
+ def test_end
740
+ assert_equal(151584, @obj.end)
741
+ end
742
+
743
+ def test_score
744
+ assert_equal(nil, @obj.score)
745
+ end
746
+
747
+ def test_strand
748
+ assert_equal('+', @obj.strand)
749
+ end
750
+
751
+ def test_frame
752
+ assert_equal(nil, @obj.frame)
753
+ end
754
+
755
+ def test_attributes
756
+ attr = [
757
+ ['ID', 'CEN1'],
758
+ ['Name', 'CEN1'],
759
+ ['gene', 'CEN1'],
760
+ ['Alias', 'CEN1'],
761
+ ['Alias', 'test;0001'],
762
+ ['Note', 'Chromosome I centromere'],
763
+ ['dbxref', 'SGD:S000006463'],
764
+ ['Target',
765
+ Bio::GFF::GFF3::Record::Target.new('test 02', 123, 456, '-')],
766
+ ['Target',
767
+ Bio::GFF::GFF3::Record::Target.new('test,03', 159, 314)],
768
+ ['memo=test;attr', "99.9%\tmatch"]
769
+ ]
770
+ assert_equal(attr, @obj.attributes)
771
+ end
772
+
773
+ def test_id
774
+ assert_equal('CEN1', @obj.id)
775
+ end
776
+
777
+ def test_to_s
778
+ str =<<END_OF_DATA
779
+ chrI SGD centromere 151467 151584 . + . ID=CEN1;Name=CEN1;gene=CEN1;Alias=CEN1,test%3B0001;Note=Chromosome I centromere;dbxref=SGD:S000006463;Target=test%2002 123 456 -,test%2C03 159 314;memo%3Dtest%3Battr=99.9%25%09match
780
+ END_OF_DATA
781
+ assert_equal(str, @obj.to_s)
782
+ end
783
+
784
+ def test_to_s_attr_order_changed
785
+ str = <<END_OF_STR
786
+ chrI SGD centromere 151467 151584 . + . ID=CEN1;Name=CEN1;Alias=CEN1,test%3B0001;Target=test%2002 123 456 -,test%2C03 159 314;Note=Chromosome I centromere;dbxref=SGD:S000006463;gene=CEN1;memo%3Dtest%3Battr=99.9%25%09match
787
+ END_OF_STR
788
+
789
+ keys = [ 'ID', 'Name', 'Alias', 'Target', 'Note', 'dbxref', 'gene' ]
790
+ @obj.sort_attributes_by_tag!(keys)
791
+ assert_equal(str, @obj.to_s)
792
+ end
793
+ end #class TestGFF3Record
794
+
795
+ class TestGFF3RecordMisc < Test::Unit::TestCase
796
+ def test_attributes_none
797
+ # test blank with tab
798
+ data =<<END_OF_DATA
799
+ I sgd gene 151453 151591 . + .
800
+ END_OF_DATA
801
+ obj = Bio::GFF::GFF3::Record.new(data)
802
+ assert_equal([], obj.attributes)
803
+
804
+ # test blank with no tab at end
805
+ data =<<END_OF_DATA
806
+ I sgd gene 151453 151591 . + .
807
+ END_OF_DATA
808
+ obj = Bio::GFF::GFF3::Record.new(data)
809
+ assert_equal([], obj.attributes)
810
+ end
811
+
812
+ def test_attributes_one
813
+ data =<<END_OF_DATA
814
+ I sgd gene 151453 151591 . + . ID=CEN1
815
+ END_OF_DATA
816
+ obj = Bio::GFF::GFF3::Record.new(data)
817
+ at = [ ["ID", 'CEN1'] ]
818
+ assert_equal(at, obj.attributes)
819
+ end
820
+
821
+ def test_attributes_with_escaping
822
+ data =<<END_OF_DATA
823
+ I sgd gene 151453 151591 . + . ID=CEN1;gene=CEN1%3Boh;Note=Chromosome I Centromere
824
+ END_OF_DATA
825
+ obj = Bio::GFF::GFF3::Record.new(data)
826
+ at = [ ['ID', 'CEN1'],
827
+ ["gene", 'CEN1;oh'],
828
+ ["Note", 'Chromosome I Centromere']
829
+ ]
830
+ assert_equal(at, obj.attributes)
831
+ end
832
+
833
+ def test_score
834
+ data =<<END_OF_DATA
835
+ ctg123 src match 456 788 1e-10 - . ID=test01
836
+ END_OF_DATA
837
+ obj = Bio::GFF::GFF3::Record.new(data)
838
+ assert_equal(1e-10, obj.score)
839
+ obj.score = 0.5
840
+ assert_equal(0.5, obj.score)
841
+ end
842
+
843
+ def test_phase
844
+ data =<<END_OF_DATA
845
+ ctg123 src CDS 456 788 . - 2 ID=test02
846
+ END_OF_DATA
847
+ obj = Bio::GFF::GFF3::Record.new(data)
848
+ assert_equal(2, obj.phase)
849
+ assert_equal(2, obj.frame)
850
+ obj.phase = 1
851
+ assert_equal(1, obj.phase)
852
+ assert_equal(1, obj.frame)
853
+ end
854
+
855
+ def test_id_replace
856
+ data =<<END_OF_DATA
857
+ ctg123 src CDS 456 788 1e-10 - 2 ID=test03
858
+ END_OF_DATA
859
+ obj = Bio::GFF::GFF3::Record.new(data)
860
+ assert_equal('test03', obj.id)
861
+ assert_equal('test_id', obj.id = 'test_id')
862
+ assert_equal('test_id', obj.id)
863
+ end
864
+
865
+ def test_id_set
866
+ data =<<END_OF_DATA
867
+ ctg123 src CDS 456 788 1e-10 - 2 NAME=test03
868
+ END_OF_DATA
869
+ obj = Bio::GFF::GFF3::Record.new(data)
870
+ assert_nil(obj.id)
871
+ assert_equal('test_id', obj.id = 'test_id')
872
+ assert_equal('test_id', obj.id)
873
+ assert_equal('next_test', obj.id = 'next_test')
874
+ assert_equal('next_test', obj.id)
875
+ end
876
+
877
+ def test_id_multiple
878
+ # Note: Two ID attributes in a record is illegal in GFF3.
879
+ data =<<END_OF_DATA
880
+ ctg123 src CDS 456 788 . - 2 ID=test03,test04
881
+ END_OF_DATA
882
+ obj = Bio::GFF::GFF3::Record.new(data)
883
+ assert_equal([ [ 'ID', 'test03' ], [ 'ID', 'test04' ] ],
884
+ obj.attributes)
885
+ assert_equal('test03', obj.id)
886
+ assert_equal('test_id', obj.id = 'test_id')
887
+ assert_equal('test_id', obj.id)
888
+ assert_equal([ [ 'ID', 'test_id' ], [ 'ID', 'test04' ] ],
889
+ obj.attributes)
890
+ str = "ctg123\tsrc\tCDS\t456\t788\t.\t-\t2\tID=test_id,test04\n"
891
+ assert_equal(str, obj.to_s)
892
+ end
893
+
894
+ def test_id_multiple2
895
+ # Note: Two ID attributes in a record is illegal in GFF3.
896
+ data =<<END_OF_DATA
897
+ ctg123 src CDS 456 788 . - 2 ID=test03;ID=test04
898
+ END_OF_DATA
899
+ obj = Bio::GFF::GFF3::Record.new(data)
900
+ assert_equal([ [ 'ID', 'test03' ], [ 'ID', 'test04' ] ],
901
+ obj.attributes)
902
+ assert_equal('test03', obj.id)
903
+ assert_equal('test_id', obj.id = 'test_id')
904
+ assert_equal('test_id', obj.id)
905
+ assert_equal([ [ 'ID', 'test_id' ], [ 'ID', 'test04' ] ],
906
+ obj.attributes)
907
+
908
+ # The "XXX=test03;XXX=test04" is automatically changed to
909
+ # "XXX=test03,test04", as defined in the GFF3 spec.
910
+ str = "ctg123\tsrc\tCDS\t456\t788\t.\t-\t2\tID=test_id,test04\n"
911
+ assert_equal(str, obj.to_s)
912
+ end
913
+
914
+ def test_initialize_9
915
+ obj = Bio::GFF::GFF3::Record.new('test01',
916
+ 'testsrc',
917
+ 'exon',
918
+ 1, 400, nil, '+', nil,
919
+ [ ['ID', 'test01'],
920
+ ['Note', 'this is test'] ])
921
+ assert_equal('test01', obj.seqid)
922
+ end
923
+
924
+ def test_to_s_void
925
+ obj = Bio::GFF::GFF3::Record.new
926
+ assert_equal(".\t.\t.\t.\t.\t.\t.\t.\t.\n", obj.to_s)
927
+ end
928
+
929
+ end #class TestGFF3RecordMisc
930
+
931
+ class TestGFF3RecordEscape < Test::Unit::TestCase
932
+ def setup
933
+ @obj = Object.new.extend(Bio::GFF::GFF3::Escape)
934
+ @str = "A>B\tC=100%;d=e,f,g h"
935
+ end
936
+
937
+ def test_escape
938
+ str = @str
939
+ assert_equal('A>B%09C=100%25;d=e,f,g h',
940
+ @obj.instance_eval { escape(str) })
941
+ end
942
+
943
+ def test_escape_attribute
944
+ str = @str
945
+ assert_equal('A>B%09C%3D100%25%3Bd%3De%2Cf%2Cg h',
946
+ @obj.instance_eval { escape_attribute(str) })
947
+ end
948
+
949
+ def test_escape_seqid
950
+ str = @str
951
+ assert_equal('A%3EB%09C%3D100%25%3Bd%3De%2Cf%2Cg%20h',
952
+ @obj.instance_eval { escape_seqid(str) })
953
+ end
954
+
955
+ def test_unescape
956
+ escaped_str = 'A%3EB%09C%3D100%25%3Bd%3De%2Cf%2Cg%20h'
957
+ assert_equal(@str,
958
+ @obj.instance_eval {
959
+ unescape(escaped_str) })
960
+ end
961
+ end #class TestGFF3RecordEscape
962
+
963
+ class TestGFF3RecordTarget < Test::Unit::TestCase
964
+
965
+ def setup
966
+ @target =
967
+ [ Bio::GFF::GFF3::Record::Target.new('ABCD1234', 123, 456, '+'),
968
+ Bio::GFF::GFF3::Record::Target.new(">X Y=Z;P%,Q\tR", 78, 90),
969
+ Bio::GFF::GFF3::Record::Target.new(nil, nil, nil),
970
+ ]
971
+ end
972
+
973
+ def test_parse
974
+ strings =
975
+ [ 'ABCD1234 123 456 +',
976
+ '%3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90',
977
+ ''
978
+ ]
979
+ @target.each do |target|
980
+ str = strings.shift
981
+ assert_equal(target, Bio::GFF::GFF3::Record::Target.parse(str))
982
+ end
983
+ end
984
+
985
+ def test_target_id
986
+ assert_equal('ABCD1234', @target[0].target_id)
987
+ assert_equal(">X Y=Z;P%,Q\tR", @target[1].target_id)
988
+ assert_equal(nil, @target[2].target_id)
989
+ end
990
+
991
+ def test_start
992
+ assert_equal(123, @target[0].start)
993
+ assert_equal(78, @target[1].start)
994
+ assert_nil(@target[2].start)
995
+ end
996
+
997
+ def test_end
998
+ assert_equal(456, @target[0].end)
999
+ assert_equal(90, @target[1].end)
1000
+ assert_nil(@target[2].end)
1001
+ end
1002
+
1003
+ def test_strand
1004
+ assert_equal('+', @target[0].strand)
1005
+ assert_nil(@target[1].strand)
1006
+ assert_nil(@target[2].strand)
1007
+ end
1008
+
1009
+ def test_to_s
1010
+ assert_equal('ABCD1234 123 456 +', @target[0].to_s)
1011
+ assert_equal('%3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90', @target[1].to_s)
1012
+ assert_equal('. . .', @target[2].to_s)
1013
+ end
1014
+
1015
+ end #class TestGFF3RecordTarget
1016
+
1017
+ class TestGFF3RecordGap < Test::Unit::TestCase
1018
+ def setup
1019
+ # examples taken from http://song.sourceforge.net/gff3.shtml
1020
+ @gaps_src = [ 'M8 D3 M6 I1 M6',
1021
+ 'M3 I1 M2 F1 M4',
1022
+ 'M3 I1 M2 R1 M4' ]
1023
+ @gaps = @gaps_src.collect { |x| Bio::GFF::GFF3::Record::Gap.new(x) }
1024
+ end
1025
+
1026
+ def test_to_s
1027
+ @gaps_src.each do |src|
1028
+ assert_equal(src, @gaps.shift.to_s)
1029
+ end
1030
+ end
1031
+
1032
+ def test_eqeq
1033
+ gap = Bio::GFF::GFF3::Record::Gap.new('M8 D3 M6 I1 M6')
1034
+ assert(gap == @gaps[0])
1035
+ assert_equal(false, gap == @gaps[1])
1036
+ end
1037
+
1038
+ def test_process_sequences_na
1039
+ ref = 'CAAGACCTAAACTGGATTCCAAT'
1040
+ tgt = 'CAAGACCTCTGGATATCCAAT'
1041
+ ref_aligned = 'CAAGACCTAAACTGGAT-TCCAAT'
1042
+ tgt_aligned = 'CAAGACCT---CTGGATATCCAAT'
1043
+ assert_equal([ ref_aligned, tgt_aligned ],
1044
+ @gaps[0].process_sequences_na(ref, tgt))
1045
+ end
1046
+
1047
+ def test_process_sequences_na_tooshort
1048
+ ref = 'CAAGACCTAAACTGGATTCCAA'
1049
+ tgt = 'CAAGACCTCTGGATATCCAA'
1050
+ assert_raise(RuntimeError) { @gaps[0].process_sequences_na(ref, tgt) }
1051
+ ref = 'c'
1052
+ tgt = 'c'
1053
+ assert_raise(RuntimeError) { @gaps[0].process_sequences_na(ref, tgt) }
1054
+ end
1055
+
1056
+ def test_process_sequences_na_aa
1057
+ ref1 = 'atgaaggaggttattgaatgtcggcggt'
1058
+ tgt1 = 'MKEVVINVGG'
1059
+ ref1_aligned = 'atgaaggag---gttattgaatgtcggcggt'
1060
+ tgt1_aligned = 'M K E V V I >N V G G '
1061
+ assert_equal([ ref1_aligned, tgt1_aligned ],
1062
+ @gaps[1].process_sequences_na_aa(ref1, tgt1))
1063
+ end
1064
+
1065
+ def test_process_sequences_na_aa_reverse_frameshift
1066
+ ref2 = 'atgaaggaggttataatgtcggcggt'
1067
+ tgt2 = 'MKEVVINVGG'
1068
+ ref2_aligned = 'atgaaggag---gttat<aatgtcggcggt'
1069
+ tgt2_aligned = 'M K E V V I N V G G '
1070
+ assert_equal([ ref2_aligned, tgt2_aligned ],
1071
+ @gaps[2].process_sequences_na_aa(ref2, tgt2))
1072
+ end
1073
+
1074
+ def test_process_sequences_na_aa_reverse_frameshift_more
1075
+ gap = Bio::GFF::GFF3::Record::Gap.new("M3 R3 M3")
1076
+ ref = 'atgaagattaatgtc'
1077
+ tgt = 'MKIINV'
1078
+ ref_aligned = 'atgaag<<<attaatgtc'
1079
+ tgt_aligned = 'M K I I N V '
1080
+ assert_equal([ ref_aligned, tgt_aligned ],
1081
+ gap.process_sequences_na_aa(ref, tgt))
1082
+ end
1083
+
1084
+ def test_process_sequences_na_aa_tooshort
1085
+ ref2 = 'atgaaggaggttataatgtcggcgg'
1086
+ tgt2 = 'MKEVVINVG'
1087
+ assert_raise(RuntimeError) do
1088
+ @gaps[2].process_sequences_na_aa(ref2, tgt2)
1089
+ end
1090
+
1091
+ ref2 = 'atg'
1092
+ tgt2 = 'M'
1093
+ assert_raise(RuntimeError) do
1094
+ @gaps[2].process_sequences_na_aa(ref2, tgt2)
1095
+ end
1096
+ end
1097
+
1098
+ def test___scan_gap
1099
+ str1 = 'CAAGACCT---CTGGATATCCAAT'
1100
+ str2 = '-aaaaaaa-a-a---ggag--'
1101
+ c = Bio::GFF::GFF3::Record::Gap::Code
1102
+ data1 = [ c.new(:M, 8), c.new(:I, 3), c.new(:M, 13) ]
1103
+ data2 = [ c.new(:I, 1), c.new(:M, 7), c.new(:I, 1),
1104
+ c.new(:M, 1), c.new(:I, 1), c.new(:M, 1),
1105
+ c.new(:I, 3), c.new(:M, 4), c.new(:I, 2) ]
1106
+
1107
+ assert_equal(data1, @gaps[0].instance_eval { __scan_gap(str1) })
1108
+ assert_equal(data2, @gaps[0].instance_eval { __scan_gap(str2) })
1109
+ end
1110
+
1111
+ def test_new_from_sequences_na
1112
+ ref_aligned = 'CAAGACCTAAACTGGAT-TCCAAT'
1113
+ tgt_aligned = 'CAAGACCT---CTGGATATCCAAT'
1114
+
1115
+ assert_equal(@gaps[0], Bio::GFF::GFF3::Record::Gap.new_from_sequences_na(ref_aligned, tgt_aligned))
1116
+ end
1117
+
1118
+ def test_new_from_sequences_na_aa
1119
+ ref = 'atgaaggag---gttattgaatgtcggcggt'
1120
+ tgt = 'M K E V V I >N V G G '
1121
+ assert_equal(@gaps[1],
1122
+ Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref,
1123
+ tgt))
1124
+ end
1125
+
1126
+ def test_new_from_sequences_na_aa_reverse_frameshift
1127
+ ref = 'atgaaggag---gttat<aatgtcggcggt'
1128
+ tgt = 'M K E V V I N V G G '
1129
+ assert_equal(@gaps[2],
1130
+ Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref,
1131
+ tgt))
1132
+ end
1133
+
1134
+ def test_new_from_sequences_na_aa_reverse_frameshift_more
1135
+ gap = Bio::GFF::GFF3::Record::Gap.new("M3 R3 M3")
1136
+ ref = 'atgaag<<<attaatgtc'
1137
+ tgt = 'M K I I N V '
1138
+ assert_equal(gap,
1139
+ Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref,
1140
+ tgt))
1141
+ end
1142
+
1143
+ def test_new_from_sequences_na_aa_boundary_gap
1144
+ g = Bio::GFF::GFF3::Record::Gap
1145
+
1146
+ ref = '---atgatg'
1147
+ tgt = 'K M M '
1148
+ assert_equal(Bio::GFF::GFF3::Record::Gap.new('I1 M2'),
1149
+ g.new_from_sequences_na_aa(ref, tgt))
1150
+
1151
+ ref = 'atgatg---'
1152
+ tgt = 'M M K '
1153
+ assert_equal(Bio::GFF::GFF3::Record::Gap.new('M2 I1'),
1154
+ g.new_from_sequences_na_aa(ref, tgt))
1155
+
1156
+ ref = 'atgatgatg'
1157
+ tgt = '- M M '
1158
+ assert_equal(Bio::GFF::GFF3::Record::Gap.new('D1 M2'),
1159
+ g.new_from_sequences_na_aa(ref, tgt))
1160
+
1161
+ ref = 'atgatgatg'
1162
+ tgt = 'M M - '
1163
+ assert_equal(Bio::GFF::GFF3::Record::Gap.new('M2 D1'),
1164
+ g.new_from_sequences_na_aa(ref, tgt))
1165
+ end
1166
+
1167
+ def test_new_from_sequences_na_aa_example
1168
+ gap = Bio::GFF::GFF3::Record::Gap.new('M2 R1 M1 F2 M1')
1169
+ ref1 = 'atgg-taagac-att'
1170
+ tgt1 = 'M V K - I '
1171
+ ref2 = 'atggt<aagacatt'
1172
+ tgt2 = 'M V K >>I '
1173
+ gap1 = Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref1, tgt1)
1174
+ assert_equal(gap, gap1)
1175
+ gap2 = Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref2, tgt2)
1176
+ assert_equal(gap, gap2)
1177
+ end
1178
+ end #class TestGFF3RecordGap
1179
+
1180
+ class TestGFF3SequenceRegion < Test::Unit::TestCase
1181
+
1182
+ def setup
1183
+ @data =
1184
+ [ Bio::GFF::GFF3::SequenceRegion.new('ABCD1234', 123, 456),
1185
+ Bio::GFF::GFF3::SequenceRegion.new(">X Y=Z;P%,Q\tR", 78, 90),
1186
+ Bio::GFF::GFF3::SequenceRegion.new(nil, nil, nil),
1187
+ ]
1188
+ end
1189
+
1190
+ def test_parse
1191
+ strings =
1192
+ [ '##sequence-region ABCD1234 123 456',
1193
+ '##sequence-region %3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90',
1194
+ '##sequence-region'
1195
+ ]
1196
+ @data.each do |reg|
1197
+ str = strings.shift
1198
+ assert_equal(reg, Bio::GFF::GFF3::SequenceRegion.parse(str))
1199
+ end
1200
+ end
1201
+
1202
+ def test_seqid
1203
+ assert_equal('ABCD1234', @data[0].seqid)
1204
+ assert_equal(">X Y=Z;P%,Q\tR", @data[1].seqid)
1205
+ assert_equal(nil, @data[2].seqid)
1206
+ end
1207
+
1208
+ def test_start
1209
+ assert_equal(123, @data[0].start)
1210
+ assert_equal(78, @data[1].start)
1211
+ assert_nil(@data[2].start)
1212
+ end
1213
+
1214
+ def test_end
1215
+ assert_equal(456, @data[0].end)
1216
+ assert_equal(90, @data[1].end)
1217
+ assert_nil(@data[2].end)
1218
+ end
1219
+
1220
+ def test_to_s
1221
+ assert_equal("##sequence-region ABCD1234 123 456\n", @data[0].to_s)
1222
+ assert_equal("##sequence-region %3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90\n",
1223
+ @data[1].to_s)
1224
+ assert_equal("##sequence-region . . .\n", @data[2].to_s)
1225
+ end
1226
+
1227
+ end #class TestGFF3SequenceRegion
1228
+
1229
+ class TestGFF3MetaData < Test::Unit::TestCase
1230
+
1231
+ def setup
1232
+ @data =
1233
+ Bio::GFF::GFF3::MetaData.new('feature-ontology',
1234
+ 'http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12')
1235
+ end
1236
+
1237
+ def test_parse
1238
+ assert_equal(@data,
1239
+ Bio::GFF::GFF3::MetaData.parse('##feature-ontology http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12'))
1240
+ end
1241
+
1242
+ def test_directive
1243
+ assert_equal('feature-ontology', @data.directive)
1244
+ end
1245
+
1246
+ def test_data
1247
+ assert_equal('http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12', @data.data)
1248
+ end
1249
+ end #class TestGFF3MetaData
1250
+
1251
+ end #module Bio
1252
+
1253
+