bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -4,7 +4,7 @@
4
4
  # Copyright:: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: test_embl.rb,v 1.5 2007/04/05 23:35:43 trevor Exp $
7
+ # $Id: test_embl.rb,v 1.5.2.1 2008/02/20 09:56:22 aerts Exp $
8
8
  #
9
9
 
10
10
  require 'pathname'
@@ -150,7 +150,7 @@ module Bio
150
150
 
151
151
  # Bio::EMBLDB::COMMON#references
152
152
  def test_references
153
- assert_equal(Bio::References, @obj.references.class)
153
+ assert_equal(Array, @obj.references.class)
154
154
  end
155
155
 
156
156
  # Bio::EMBLDB::COMMON#dr
@@ -163,7 +163,7 @@ module Bio
163
163
  end
164
164
 
165
165
  def test_ft
166
- assert_equal(Bio::Features, @obj.ft.class)
166
+ assert_equal(Array, @obj.ft.class)
167
167
  end
168
168
 
169
169
  def test_ft_iterator
@@ -173,7 +173,7 @@ module Bio
173
173
  end
174
174
 
175
175
  def test_ft_accessor
176
- assert_equal('CDS', @obj.ft.features[1].feature)
176
+ assert_equal('CDS', @obj.ft[1].feature)
177
177
  end
178
178
 
179
179
  def test_each_cds
@@ -4,7 +4,7 @@
4
4
  # Copyright:: Copyright (C) 2007 Mitsuteru Nakao <n@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: test_embl_rel89.rb,v 1.2 2007/04/05 23:35:43 trevor Exp $
7
+ # $Id: test_embl_rel89.rb,v 1.2.2.1 2008/02/20 09:56:22 aerts Exp $
8
8
  #
9
9
 
10
10
  require 'pathname'
@@ -16,7 +16,7 @@ require 'test/unit'
16
16
  require 'bio/db/embl/embl'
17
17
 
18
18
  module Bio
19
- class TestEMBL < Test::Unit::TestCase
19
+ class TestEMBL89 < Test::Unit::TestCase
20
20
 
21
21
  def setup
22
22
  bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
@@ -155,7 +155,7 @@ module Bio
155
155
 
156
156
  # Bio::EMBLDB::COMMON#references
157
157
  def test_references
158
- assert_equal(Bio::References, @obj.references.class)
158
+ assert_equal(Array, @obj.references.class)
159
159
  end
160
160
 
161
161
  # Bio::EMBLDB::COMMON#dr
@@ -168,7 +168,7 @@ module Bio
168
168
  end
169
169
 
170
170
  def test_ft
171
- assert_equal(Bio::Features, @obj.ft.class)
171
+ assert_equal(Array, @obj.ft.class)
172
172
  end
173
173
 
174
174
  def test_ft_iterator
@@ -178,7 +178,7 @@ module Bio
178
178
  end
179
179
 
180
180
  def test_ft_accessor
181
- assert_equal('CDS', @obj.ft.features[1].feature)
181
+ assert_equal('CDS', @obj.ft[1].feature)
182
182
  end
183
183
 
184
184
  def test_each_cds
@@ -0,0 +1,203 @@
1
+ #
2
+ # test/unit/bio/db/embl/test_embl.rb - Unit test for Bio::EMBL
3
+ #
4
+ # Copyright:: Copyright (C) 2005, 2008
5
+ # Mitsuteru Nakao <n@bioruby.org>
6
+ # Jan Aerts <jan.aerts@bbsrc.ac.uk>
7
+ # License:: The Ruby License
8
+ #
9
+ # $Id:$
10
+ #
11
+
12
+ require 'pathname'
13
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
14
+ $:.unshift(libpath) unless $:.include?(libpath)
15
+
16
+ require 'test/unit'
17
+ require 'bio'
18
+ require 'bio/db/embl/embl'
19
+
20
+ module Bio
21
+ class TestEMBLToBioSequence < Test::Unit::TestCase
22
+
23
+ def setup
24
+ bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
25
+ input = File.open(File.join(bioruby_root, 'test', 'data', 'embl', 'AB090716.embl.rel89')).read
26
+ embl_object = Bio::EMBL.new(input)
27
+ embl_object.instance_eval { @data['OS'] = "Haplochromis sp. 'muzu rukwa'" }
28
+ @bio_seq = embl_object.to_biosequence
29
+ end
30
+
31
+ def test_entry_id
32
+ assert_equal('AB090716', @bio_seq.entry_id)
33
+ end
34
+
35
+ def test_primary_accession
36
+ assert_equal('AB090716', @bio_seq.primary_accession)
37
+ end
38
+
39
+ def test_secondary_accessions
40
+ assert_equal([], @bio_seq.secondary_accessions)
41
+ end
42
+
43
+ def test_molecule_type
44
+ assert_equal('genomic DNA', @bio_seq.molecule_type)
45
+ end
46
+
47
+ def test_definition
48
+ assert_equal("Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive opsin, partial cds, specimen_voucher:specimen No. HT-9361.", @bio_seq.definition)
49
+ end
50
+
51
+ def test_topology
52
+ assert_equal('linear', @bio_seq.topology)
53
+ end
54
+
55
+ def test_date_created
56
+ # '25-OCT-2002 (Rel. 73, Created)'
57
+ assert_equal(Date.parse('25-OCT-2002'), @bio_seq.date_created)
58
+ end
59
+
60
+ def test_date_modified
61
+ # '14-NOV-2006 (Rel. 89, Last updated, Version 3)'
62
+ assert_equal(Date.parse('14-NOV-2006'), @bio_seq.date_modified)
63
+ end
64
+
65
+ def test_release_created
66
+ assert_equal('73', @bio_seq.release_created)
67
+ end
68
+
69
+ def test_release_modified
70
+ assert_equal('89', @bio_seq.release_modified)
71
+ end
72
+
73
+ def test_entry_version
74
+ assert_equal('3', @bio_seq.entry_version)
75
+ end
76
+
77
+ def test_division
78
+ assert_equal('VRT', @bio_seq.division)
79
+ end
80
+
81
+ def test_sequence_version
82
+ assert_equal(1, @bio_seq.sequence_version)
83
+ end
84
+
85
+ def test_keywords
86
+ assert_equal([], @bio_seq.keywords)
87
+ end
88
+
89
+ def test_species
90
+ assert_equal("Haplochromis sp. 'muzu, rukwa'", @bio_seq.species)
91
+ end
92
+
93
+ def test_classification
94
+ assert_equal(['Eukaryota','Metazoa','Chordata','Craniata','Vertebrata','Euteleostomi','Actinopterygii','Neopterygii','Teleostei','Euteleostei','Neoteleostei','Acanthomorpha','Acanthopterygii','Percomorpha','Perciformes','Labroidei','Cichlidae','African cichlids','Pseudocrenilabrinae','Haplochromini','Haplochromis'], @bio_seq.classification)
95
+
96
+
97
+ end
98
+
99
+ def test_references
100
+ assert_equal(2, @bio_seq.references.length)
101
+ assert_equal(Bio::Reference, @bio_seq.references[0].class)
102
+ end
103
+
104
+ def test_features
105
+ assert_equal(3, @bio_seq.features.length)
106
+ assert_equal(Bio::Feature, @bio_seq.features[0].class)
107
+ end
108
+
109
+ end
110
+
111
+ # To really test the Bio::EMBL to Bio::Sequence conversion, we need to test if
112
+ # that Bio::Sequence can be made into a valid Bio::EMBL again.
113
+ class TestEMBLToBioSequenceRoundTrip < Test::Unit::TestCase
114
+ def setup
115
+ bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
116
+ input = File.open(File.join(bioruby_root, 'test', 'data', 'embl', 'AB090716.embl.rel89')).read
117
+ embl_object_1 = Bio::EMBL.new(input)
118
+ embl_object_1.instance_eval { @data['OS'] = "Haplochromis sp. 'muzu rukwa'" }
119
+ @bio_seq_1 = embl_object_1.to_biosequence
120
+ embl_object_2 = Bio::EMBL.new(@bio_seq_1.output(:embl))
121
+ @bio_seq_2 = embl_object_2.to_biosequence
122
+ end
123
+
124
+ def test_entry_id
125
+ assert_equal('AB090716', @bio_seq_2.entry_id)
126
+ end
127
+
128
+ def test_primary_accession
129
+ assert_equal('AB090716', @bio_seq_2.primary_accession)
130
+ end
131
+
132
+ def test_secondary_accessions
133
+ assert_equal([], @bio_seq_2.secondary_accessions)
134
+ end
135
+
136
+ def test_molecule_type
137
+ assert_equal('genomic DNA', @bio_seq_2.molecule_type)
138
+ end
139
+
140
+ def test_definition
141
+ assert_equal("Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive opsin, partial cds, specimen_voucher:specimen No. HT-9361.", @bio_seq_2.definition)
142
+ end
143
+
144
+ def test_topology
145
+ assert_equal('linear', @bio_seq_2.topology)
146
+ end
147
+
148
+ def test_date_created
149
+ # '25-OCT-2002 (Rel. 73, Created)'
150
+ assert_equal(Date.parse('25-OCT-2002'), @bio_seq_2.date_created)
151
+ end
152
+
153
+ def test_date_modified
154
+ # '14-NOV-2006 (Rel. 89, Last updated, Version 3)'
155
+ assert_equal(Date.parse('14-NOV-2006'), @bio_seq_2.date_modified)
156
+ end
157
+
158
+ def test_release_created
159
+ assert_equal('73', @bio_seq_2.release_created)
160
+ end
161
+
162
+ def test_release_modified
163
+ assert_equal('89', @bio_seq_2.release_modified)
164
+ end
165
+
166
+ def test_entry_version
167
+ assert_equal('3', @bio_seq_2.entry_version)
168
+ end
169
+
170
+ def test_division
171
+ assert_equal('VRT', @bio_seq_2.division)
172
+ end
173
+
174
+ def test_sequence_version
175
+ assert_equal(1, @bio_seq_2.sequence_version)
176
+ end
177
+
178
+ def test_keywords
179
+ assert_equal([], @bio_seq_2.keywords)
180
+ end
181
+
182
+ def test_species
183
+ assert_equal("Haplochromis sp. 'muzu, rukwa'", @bio_seq_2.species)
184
+ end
185
+
186
+ def test_classification
187
+ assert_equal(['Eukaryota','Metazoa','Chordata','Craniata','Vertebrata','Euteleostomi','Actinopterygii','Neopterygii','Teleostei','Euteleostei','Neoteleostei','Acanthomorpha','Acanthopterygii','Percomorpha','Perciformes','Labroidei','Cichlidae','African cichlids','Pseudocrenilabrinae','Haplochromini','Haplochromis'], @bio_seq_2.classification)
188
+
189
+
190
+ end
191
+
192
+ def test_references
193
+ assert_equal(2, @bio_seq_2.references.length)
194
+ assert_equal(Bio::Reference, @bio_seq_2.references[0].class)
195
+ end
196
+
197
+ def test_features
198
+ assert_equal(3, @bio_seq_2.features.length)
199
+ assert_equal(Bio::Feature, @bio_seq_2.features[0].class)
200
+ end
201
+ end
202
+ end
203
+
@@ -4,7 +4,7 @@
4
4
  # Copyright::: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: test_sptr.rb,v 1.7 2007/04/05 23:35:43 trevor Exp $
7
+ # $Id:$
8
8
  #
9
9
 
10
10
  require 'pathname'
@@ -78,6 +78,43 @@ module Bio
78
78
  assert_equal('P04637', @obj.accession)
79
79
  end
80
80
 
81
+ def test_dr
82
+ assert_equal(17, @obj.dr.size)
83
+ assert_equal(27, @obj.dr['GO'].size)
84
+ assert_equal([["IPR002117", "P53"],
85
+ ["IPR011615", "P53_DNA_bd"],
86
+ ["IPR012346", "P53_RUNT_DNA_bd"],
87
+ ["IPR010991", "p53_tetrameristn"]],
88
+ @obj.dr['InterPro'])
89
+ end
90
+
91
+ def test_dr_with_key
92
+ pfam = [
93
+ { " " => "1",
94
+ "Version" => "P53",
95
+ "Accession" => "PF00870",
96
+ "Molecular Type" => nil
97
+ },
98
+ { " " => "1",
99
+ "Version" => "P53_tetramer",
100
+ "Accession" => "PF07710",
101
+ "Molecular Type" => nil
102
+ }
103
+ ]
104
+ assert_equal(pfam, @obj.dr('Pfam'))
105
+ embl3 = {
106
+ " " => "JOINED",
107
+ "Version" => "AAA59987.1",
108
+ "Accession" => "M13113",
109
+ "Molecular Type" => "Genomic_DNA"
110
+ }
111
+ assert_equal(embl3, @obj.dr('EMBL')[3])
112
+ end
113
+
114
+ def test_dr_with_key_empty
115
+ assert_equal([], @obj.dr('NOT_A_DATABASE'))
116
+ end
117
+
81
118
  def test_dt
82
119
  assert(@obj.dt)
83
120
  end
@@ -6,11 +6,11 @@
6
6
  #
7
7
  # License:: The Ruby License
8
8
  #
9
- # $Id: test_pdb.rb,v 1.3 2007/04/05 23:35:43 trevor Exp $
9
+ # $Id:$
10
10
  #
11
11
 
12
12
  require 'pathname'
13
- libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
13
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
14
14
  $:.unshift(libpath) unless $:.include?(libpath)
15
15
 
16
16
  require 'test/unit'
@@ -1,10 +1,12 @@
1
1
  #
2
2
  # test/unit/bio/db/test_gff.rb - Unit test for Bio::GFF
3
3
  #
4
- # Copyright:: Copyright (C) 2005 Mitsuteru Nakao <n@bioruby.org>
4
+ # Copyright:: Copyright (C) 2005, 2008
5
+ # Mitsuteru Nakao <n@bioruby.org>
6
+ # Naohisa Goto <ng@bioruby.org>
5
7
  # License:: The Ruby License
6
8
  #
7
- # $Id: test_gff.rb,v 1.6 2007/04/05 23:35:43 trevor Exp $
9
+ # $Id:$
8
10
  #
9
11
 
10
12
  require 'pathname'
@@ -12,13 +14,14 @@ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cle
12
14
  $:.unshift(libpath) unless $:.include?(libpath)
13
15
 
14
16
  require 'test/unit'
17
+ require 'digest/sha1'
15
18
  require 'bio/db/gff'
16
19
 
17
20
  module Bio
18
21
  class TestGFF < Test::Unit::TestCase
19
22
 
20
23
  def setup
21
- data = <<END
24
+ data = <<END_OF_DATA
22
25
  I sgd CEN 151453 151591 . + . CEN "CEN1" ; Note "CEN1\; Chromosome I Centromere"
23
26
  I sgd gene 147591 151163 . - . Gene "TFC3" ; Note "transcription factor tau (TFIIIC) subunit 138"
24
27
  I sgd gene 147591 151163 . - . Gene "FUN24" ; Note "transcription factor tau (TFIIIC) subunit 138"
@@ -27,7 +30,7 @@ I sgd ORF 147591 151163 . - . ORF "YAL001C" ; Note "TFC3\; transcription factor
27
30
  I sgd gene 143998 147528 . + . Gene "VPS8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
28
31
  I sgd gene 143998 147528 . + . Gene "FUN15" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
29
32
  I sgd gene 143998 147528 . + . Gene "VPT8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
30
- END
33
+ END_OF_DATA
31
34
  @obj = Bio::GFF.new(data)
32
35
  end
33
36
 
@@ -41,27 +44,12 @@ END
41
44
 
42
45
  end # class TestGFF
43
46
 
44
-
45
- class TestGFF2 < Test::Unit::TestCase
46
- def test_version
47
- assert_equal(2, Bio::GFF::GFF2::VERSION)
48
- end
49
- end
50
-
51
-
52
- class TestGFF3 < Test::Unit::TestCase
53
- def test_version
54
- assert_equal(3, Bio::GFF::GFF3::VERSION)
55
- end
56
- end
57
-
58
-
59
47
  class TestGFFRecord < Test::Unit::TestCase
60
48
 
61
49
  def setup
62
- data =<<END
50
+ data =<<END_OF_DATA
63
51
  I sgd gene 151453 151591 . + . Gene "CEN1" ; Note "Chromosome I Centromere"
64
- END
52
+ END_OF_DATA
65
53
  @obj = Bio::GFF::Record.new(data)
66
54
  end
67
55
 
@@ -102,13 +90,12 @@ END
102
90
  assert_equal(at, @obj.attributes)
103
91
  end
104
92
 
105
- def test_comments
106
- assert_equal(nil, @obj.comments)
93
+ def test_comment
94
+ assert_equal(nil, @obj.comment)
107
95
  end
108
96
 
109
97
  end # class TestGFFRecord
110
98
 
111
-
112
99
  class TestGFFRecordConstruct < Test::Unit::TestCase
113
100
 
114
101
  def setup
@@ -124,4 +111,1143 @@ END
124
111
  end
125
112
 
126
113
  end # class TestGFFRecordConstruct
127
- end
114
+
115
+ class TestGFF2 < Test::Unit::TestCase
116
+ def setup
117
+ data = <<END_OF_DATA
118
+ ##gff-version 2
119
+ ##date 2008-09-22
120
+ I sgd CEN 151453 151591 . + . CEN "CEN1" ; Note "CEN1; Chromosome I Centromere"
121
+ I sgd gene 147591 151163 . - . Gene "TFC3" ; Note "transcription factor tau (TFIIIC) subunit 138"
122
+ I sgd gene 147591 151163 . - . Gene "FUN24" ; Note "transcription factor tau (TFIIIC) subunit 138"
123
+ I sgd gene 147591 151163 . - . Gene "TSV115" ; Note "transcription factor tau (TFIIIC) subunit 138"
124
+ I sgd ORF 147591 151163 . - . ORF "YAL001C" ; Note "TFC3; transcription factor tau (TFIIIC) subunit 138"
125
+ I sgd gene 143998 147528 . + . Gene "VPS8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
126
+ I sgd gene 143998 147528 . + . Gene "FUN15" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
127
+ I sgd gene 143998 147528 . + . Gene "VPT8" ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
128
+ END_OF_DATA
129
+ @obj = Bio::GFF::GFF2.new(data)
130
+ end
131
+
132
+ def test_const_version
133
+ assert_equal(2, Bio::GFF::GFF2::VERSION)
134
+ end
135
+
136
+ def test_gff_version
137
+ assert_equal('2', @obj.gff_version)
138
+ end
139
+
140
+ def test_metadata_size
141
+ assert_equal(1, @obj.metadata.size)
142
+ end
143
+
144
+ def test_metadata
145
+ assert_equal(Bio::GFF::GFF2::MetaData.new('date', '2008-09-22'),
146
+ @obj.metadata[0])
147
+ end
148
+
149
+ def test_records_size
150
+ assert_equal(8, @obj.records.size)
151
+ end
152
+
153
+ def test_to_s
154
+ str = <<END_OF_DATA
155
+ ##gff-version 2
156
+ ##date 2008-09-22
157
+ I sgd CEN 151453 151591 . + . CEN CEN1 ; Note "CEN1; Chromosome I Centromere"
158
+ I sgd gene 147591 151163 . - . Gene TFC3 ; Note "transcription factor tau (TFIIIC) subunit 138"
159
+ I sgd gene 147591 151163 . - . Gene FUN24 ; Note "transcription factor tau (TFIIIC) subunit 138"
160
+ I sgd gene 147591 151163 . - . Gene TSV115 ; Note "transcription factor tau (TFIIIC) subunit 138"
161
+ I sgd ORF 147591 151163 . - . ORF YAL001C ; Note "TFC3; transcription factor tau (TFIIIC) subunit 138"
162
+ I sgd gene 143998 147528 . + . Gene VPS8 ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
163
+ I sgd gene 143998 147528 . + . Gene FUN15 ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
164
+ I sgd gene 143998 147528 . + . Gene VPT8 ; Note "Vps8p is a membrane-associated hydrophilic protein which contains a C-terminal cysteine-rich region that conforms to the H2 variant of the RING finger Zn2+ binding motif."
165
+ END_OF_DATA
166
+ assert_equal(str, @obj.to_s)
167
+ end
168
+ end #class TestGFF2
169
+
170
+ class TestGFF2Record < Test::Unit::TestCase
171
+ def setup
172
+ str = "seq1\tBLASTX\tsimilarity\t101\t235\t87.1\t+\t0\tTarget \"HBA_HUMAN\" 11 55 ; E_value 0.0003 ; Align 101 11 ; Align 179 36 ; Comment \"Please ignore this \\\"Comment\\\" attribute; Escape \\x1a\\037 and \\\\\\t\\r\\n\\f\\b\\a\\e\\v; This is test.\" 123 4.56e-34 \"Test for freetext\" ; Note \"\"; Misc IdString; Misc \"free text\"; Misc 5678 "
173
+
174
+ @obj = Bio::GFF::GFF2::Record.new(str)
175
+ end
176
+
177
+ def test_to_s
178
+ str = "seq1\tBLASTX\tsimilarity\t101\t235\t87.1\t+\t0\tTarget HBA_HUMAN 11 55 ; E_value 0.0003 ; Align 101 11 ; Align 179 36 ; Comment \"Please ignore this \\\"Comment\\\" attribute; Escape \\032\\037 and \\\\\\t\\r\\n\\f\\b\\a\\e\\v; This is test.\" 123 4.56e-34 \"Test for freetext\" ; Note \"\" ; Misc IdString ; Misc \"free text\" ; Misc 5678\n"
179
+
180
+ assert_equal(str, @obj.to_s)
181
+ end
182
+
183
+ def test_eqeq
184
+ obj2 = Bio::GFF::GFF2::Record.new(@obj.to_s)
185
+ assert_equal(true, @obj == obj2)
186
+ end
187
+
188
+ def test_eqeq_false
189
+ obj2 = Bio::GFF::GFF2::Record.new(@obj.to_s)
190
+ obj2.seqname = 'seq2'
191
+ assert_equal(false, @obj == obj2)
192
+ end
193
+
194
+ def test_comment_only?
195
+ assert_equal(false, @obj.comment_only?)
196
+ end
197
+
198
+ def test_seqname
199
+ assert_equal('seq1', @obj.seqname)
200
+ end
201
+
202
+ def test_source
203
+ assert_equal('BLASTX', @obj.source)
204
+ end
205
+
206
+ def test_feature
207
+ assert_equal('similarity', @obj.feature)
208
+ end
209
+
210
+ def test_start
211
+ assert_equal(101, @obj.start)
212
+ end
213
+
214
+ def test_end
215
+ assert_equal(235, @obj.end)
216
+ end
217
+
218
+ def test_score
219
+ assert_equal(87.1, @obj.score)
220
+ end
221
+
222
+ def test_strand
223
+ assert_equal('+', @obj.strand)
224
+ end
225
+
226
+ def test_frame
227
+ assert_equal(0, @obj.frame)
228
+ end
229
+
230
+ def test_attributes_to_hash
231
+ hash = {
232
+ 'Target' =>
233
+ Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']),
234
+ 'E_value' => '0.0003',
235
+ 'Align' =>
236
+ Bio::GFF::GFF2::Record::Value.new(['101', '11']),
237
+ 'Comment' =>
238
+ Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]),
239
+ 'Note' => '',
240
+ 'Misc' => 'IdString'
241
+ }
242
+ assert_equal(hash, @obj.attributes_to_hash)
243
+ end
244
+
245
+ def test_attributes
246
+ attributes =
247
+ [ [ 'Target',
248
+ Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']) ],
249
+ [ 'E_value', '0.0003' ],
250
+ [ 'Align',
251
+ Bio::GFF::GFF2::Record::Value.new(['101', '11']) ],
252
+ [ 'Align',
253
+ Bio::GFF::GFF2::Record::Value.new(['179', '36']) ],
254
+ [ 'Comment',
255
+ Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]) ],
256
+ [ 'Note', '' ],
257
+ [ 'Misc', 'IdString' ],
258
+ [ 'Misc', 'free text' ],
259
+ [ 'Misc', '5678' ]
260
+ ]
261
+ assert_equal(attributes, @obj.attributes)
262
+ end
263
+
264
+ def test_attribute
265
+ val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55'])
266
+ assert_equal(val_Target, @obj.attribute('Target'))
267
+ assert_equal('0.0003', @obj.attribute('E_value'))
268
+ val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11'])
269
+ val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36'])
270
+ assert_equal(val_Align0, @obj.attribute('Align'))
271
+ val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"])
272
+ assert_equal(val_Comment, @obj.attribute('Comment'))
273
+ assert_equal('', @obj.attribute('Note'))
274
+ assert_equal('IdString', @obj.attribute('Misc'))
275
+ end
276
+
277
+ def test_attribute_nonexistent
278
+ assert_equal(nil, @obj.attribute('NonExistent'))
279
+ end
280
+
281
+ def test_get_attribute
282
+ val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55'])
283
+ assert_equal(val_Target, @obj.get_attribute('Target'))
284
+ assert_equal('0.0003', @obj.get_attribute('E_value'))
285
+ val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11'])
286
+ val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36'])
287
+ assert_equal(val_Align0, @obj.get_attribute('Align'))
288
+ val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"])
289
+ assert_equal(val_Comment, @obj.get_attribute('Comment'))
290
+ assert_equal('', @obj.get_attribute('Note'))
291
+ assert_equal('IdString', @obj.get_attribute('Misc'))
292
+ end
293
+
294
+ def test_get_attribute_nonexistent
295
+ assert_equal(nil, @obj.get_attribute('NonExistent'))
296
+ end
297
+
298
+ def test_get_attributes
299
+ val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55'])
300
+ assert_equal([ val_Target ], @obj.get_attributes('Target'))
301
+ assert_equal([ '0.0003' ], @obj.get_attributes('E_value'))
302
+ val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11'])
303
+ val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36'])
304
+ assert_equal([ val_Align0, val_Align1 ],
305
+ @obj.get_attributes('Align'))
306
+ val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"])
307
+ assert_equal([ val_Comment ], @obj.get_attributes('Comment'))
308
+ assert_equal([ '' ], @obj.get_attributes('Note'))
309
+ assert_equal([ 'IdString', 'free text', '5678' ],
310
+ @obj.get_attributes('Misc'))
311
+ end
312
+
313
+ def test_get_attributes_nonexistent
314
+ assert_equal([], @obj.get_attributes('NonExistent'))
315
+ end
316
+
317
+ def test_set_attribute
318
+ assert_equal('0.0003', @obj.attribute('E_value'))
319
+ assert_equal('1e-10', @obj.set_attribute('E_value', '1e-10'))
320
+ assert_equal('1e-10', @obj.attribute('E_value'))
321
+ end
322
+
323
+ def test_set_attribute_multiple
324
+ assert_equal([ 'IdString', 'free text', '5678' ],
325
+ @obj.get_attributes('Misc'))
326
+ assert_equal('Replaced',
327
+ @obj.set_attribute('Misc', 'Replaced'))
328
+ assert_equal([ 'Replaced', 'free text', '5678' ],
329
+ @obj.get_attributes('Misc'))
330
+ end
331
+
332
+ def test_set_attribute_nonexistent
333
+ assert_equal(nil, @obj.attribute('NonExistent'))
334
+ assert_equal('test', @obj.set_attribute('NonExistent', 'test'))
335
+ assert_equal('test', @obj.attribute('NonExistent'))
336
+ end
337
+
338
+ def test_replace_attributes
339
+ assert_equal([ '0.0003' ], @obj.get_attributes('E_value'))
340
+ assert_equal(@obj, @obj.replace_attributes('E_value', '1e-10'))
341
+ assert_equal([ '1e-10' ], @obj.get_attributes('E_value'))
342
+ end
343
+
344
+ def test_replace_attributes_single_multiple
345
+ assert_equal([ '0.0003' ], @obj.get_attributes('E_value'))
346
+ assert_equal(@obj, @obj.replace_attributes('E_value',
347
+ '1e-10', '3.14', '2.718'))
348
+ assert_equal([ '1e-10', '3.14', '2.718' ],
349
+ @obj.get_attributes('E_value'))
350
+ end
351
+
352
+ def test_replace_attributes_multiple_single
353
+ assert_equal([ 'IdString', 'free text', '5678' ],
354
+ @obj.get_attributes('Misc'))
355
+ assert_equal(@obj,
356
+ @obj.replace_attributes('Misc', 'Replaced_All'))
357
+ assert_equal([ 'Replaced_All' ],
358
+ @obj.get_attributes('Misc'))
359
+ end
360
+
361
+ def test_replace_attributes_multiple_multiple_two
362
+ assert_equal([ 'IdString', 'free text', '5678' ],
363
+ @obj.get_attributes('Misc'))
364
+ assert_equal(@obj,
365
+ @obj.replace_attributes('Misc',
366
+ 'Replaced', 'test2'))
367
+ assert_equal([ 'Replaced', 'test2' ],
368
+ @obj.get_attributes('Misc'))
369
+ end
370
+
371
+ def test_replace_attributes_multiple_multiple_same
372
+ assert_equal([ 'IdString', 'free text', '5678' ],
373
+ @obj.get_attributes('Misc'))
374
+ assert_equal(@obj,
375
+ @obj.replace_attributes('Misc',
376
+ 'Replaced', 'test2', 'test3'))
377
+ assert_equal([ 'Replaced', 'test2', 'test3' ],
378
+ @obj.get_attributes('Misc'))
379
+ end
380
+
381
+ def test_replace_attributes_multiple_multiple_over
382
+ assert_equal([ 'IdString', 'free text', '5678' ],
383
+ @obj.get_attributes('Misc'))
384
+ assert_equal(@obj,
385
+ @obj.replace_attributes('Misc',
386
+ 'Replaced', 'test2', 'test3', '4'))
387
+ assert_equal([ 'Replaced', 'test2', 'test3', '4' ],
388
+ @obj.get_attributes('Misc'))
389
+ end
390
+
391
+ def test_replace_attributes_nonexistent
392
+ assert_equal(nil, @obj.attribute('NonExistent'))
393
+ assert_equal(@obj, @obj.replace_attributes('NonExistent', 'test'))
394
+ assert_equal([ 'test' ], @obj.get_attributes('NonExistent'))
395
+ end
396
+
397
+ def test_replace_attributes_nonexistent_multiple
398
+ assert_equal(nil, @obj.attribute('NonExistent'))
399
+ assert_equal(@obj,
400
+ @obj.replace_attributes('NonExistent',
401
+ 'test', 'gff2', 'attr'))
402
+ assert_equal([ 'test', 'gff2', 'attr' ],
403
+ @obj.get_attributes('NonExistent'))
404
+ end
405
+
406
+ def test_delete_attribute
407
+ assert_equal('0.0003', @obj.attribute('E_value'))
408
+ assert_equal('0.0003', @obj.delete_attribute('E_value', '0.0003'))
409
+ assert_equal(nil, @obj.attribute('E_value'))
410
+ end
411
+
412
+ def test_delete_attribute_nil
413
+ assert_equal('0.0003', @obj.attribute('E_value'))
414
+ assert_equal(nil, @obj.delete_attribute('E_value', '3'))
415
+ assert_equal('0.0003', @obj.attribute('E_value'))
416
+ end
417
+
418
+ def test_delete_attribute_multiple
419
+ assert_equal([ 'IdString', 'free text', '5678' ],
420
+ @obj.get_attributes('Misc'))
421
+ assert_equal('free text',
422
+ @obj.delete_attribute('Misc', 'free text'))
423
+ assert_equal([ 'IdString', '5678' ],
424
+ @obj.get_attributes('Misc'))
425
+ end
426
+
427
+ def test_delete_attribute_multiple2
428
+ assert_equal([ 'IdString', 'free text', '5678' ],
429
+ @obj.get_attributes('Misc'))
430
+ assert_equal('IdString',
431
+ @obj.delete_attribute('Misc', 'IdString'))
432
+ assert_equal([ 'free text', '5678' ],
433
+ @obj.get_attributes('Misc'))
434
+ assert_equal('5678',
435
+ @obj.delete_attribute('Misc', '5678'))
436
+ assert_equal([ 'free text' ],
437
+ @obj.get_attributes('Misc'))
438
+ end
439
+
440
+ def test_delete_attribute_multiple_nil
441
+ assert_equal([ 'IdString', 'free text', '5678' ],
442
+ @obj.get_attributes('Misc'))
443
+ assert_equal(nil,
444
+ @obj.delete_attribute('Misc', 'test'))
445
+ assert_equal([ 'IdString', 'free text', '5678' ],
446
+ @obj.get_attributes('Misc'))
447
+ end
448
+
449
+ def test_delete_attribute_nonexistent
450
+ assert_equal(nil, @obj.attribute('NonExistent'))
451
+ assert_equal(nil, @obj.delete_attribute('NonExistent', 'test'))
452
+ assert_equal([], @obj.get_attributes('NonExistent'))
453
+ end
454
+
455
+ def test_delete_attributes
456
+ assert_equal('0.0003', @obj.attribute('E_value'))
457
+ assert_equal(@obj, @obj.delete_attributes('E_value'))
458
+ assert_equal(nil, @obj.attribute('E_value'))
459
+ end
460
+
461
+ def test_delete_attributes_multiple
462
+ assert_equal([ 'IdString', 'free text', '5678' ],
463
+ @obj.get_attributes('Misc'))
464
+ assert_equal(@obj, @obj.delete_attributes('Misc'))
465
+ assert_equal([], @obj.get_attributes('Misc'))
466
+ end
467
+
468
+ def test_delete_attributes_nonexistent
469
+ assert_equal(nil, @obj.attribute('NonExistent'))
470
+ assert_equal(nil, @obj.delete_attributes('NonExistent'))
471
+ assert_equal([], @obj.get_attributes('NonExistent'))
472
+ end
473
+
474
+ def test_sort_attributes_by_tag!
475
+ tags = %w( Comment Align E_value Note )
476
+ assert_equal(@obj, @obj.sort_attributes_by_tag!(tags))
477
+ assert_equal(%w( Comment Align Align E_value Note Target
478
+ Misc Misc Misc ),
479
+ @obj.attributes.collect { |x| x[0] })
480
+ # check if the order of 'Misc' is not changed
481
+ assert_equal([ 'IdString', 'free text', '5678' ],
482
+ @obj.get_attributes('Misc'))
483
+ end
484
+
485
+ def test_sort_attributes_by_tag_bang_test2
486
+ tags = %w( E_value Misc Note Target )
487
+ assert_equal(@obj, @obj.sort_attributes_by_tag!(tags))
488
+ assert_equal(%w( E_value Misc Misc Misc Note Target
489
+ Align Align Comment ),
490
+ @obj.attributes.collect { |x| x[0] })
491
+ # check if the order of 'Misc' is not changed
492
+ assert_equal([ 'IdString', 'free text', '5678' ],
493
+ @obj.get_attributes('Misc'))
494
+ end
495
+
496
+ def test_sort_attributes_by_tag_bang_with_block
497
+ assert_equal(@obj,
498
+ @obj.sort_attributes_by_tag! { |x, y|
499
+ x <=> y
500
+ })
501
+ assert_equal(%w( Align Align Comment E_value Misc Misc Misc
502
+ Note Target ),
503
+ @obj.attributes.collect { |x| x[0] })
504
+ # check if the order of 'Misc' is not changed
505
+ assert_equal([ 'IdString', 'free text', '5678' ],
506
+ @obj.get_attributes('Misc'))
507
+ end
508
+ end #class TestGFF2Record
509
+
510
+ class TestGFF2RecordEmpty < Test::Unit::TestCase
511
+ def setup
512
+ @obj = Bio::GFF::GFF2::Record.new('# test comment')
513
+ end
514
+
515
+ def test_comment_only?
516
+ assert_equal(true, @obj.comment_only?)
517
+ end
518
+
519
+ def test_comment_only_false
520
+ @obj.seqname = 'test'
521
+ assert_equal(false, @obj.comment_only?)
522
+ end
523
+
524
+ def test_to_s
525
+ assert_equal("# test comment\n", @obj.to_s)
526
+ end
527
+
528
+ def test_to_s_not_empty
529
+ @obj.seqname = 'test'
530
+ @obj.feature = 'region'
531
+ @obj.start = 1
532
+ @obj.end = 100
533
+ assert_equal("test\t.\tregion\t1\t100\t.\t.\t.\t\t# test comment\n",
534
+ @obj.to_s)
535
+ @obj.add_attribute('Gene', 'unknown')
536
+ assert_equal("test\t.\tregion\t1\t100\t.\t.\t.\tGene unknown\t# test comment\n",
537
+ @obj.to_s)
538
+ end
539
+
540
+ def test_comment
541
+ assert_equal(' test comment', @obj.comment)
542
+ end
543
+
544
+ def test_comment_eq
545
+ assert_equal('changed the comment',
546
+ @obj.comment = 'changed the comment')
547
+ end
548
+ end #class TestGFF2RecordEmpty
549
+
550
+ class TestGFF2ComplexAttributes < Test::Unit::TestCase
551
+
552
+ # The test string comes from the Popular genome annotation from the JGI.
553
+ # ftp://ftp.jgi-psf.org/pub/JGI_data/Poplar/annotation/v1.1/Poptr1_1.JamboreeModels.gff.gz
554
+ # Thanks to Tomoaki NISHIYAMA who picks up the example line.
555
+ def test_attributes_case1
556
+ str = "LG_I\tJGI\tCDS\t11052\t11064\t.\t-\t0\tname \"grail3.0116000101\"; proteinId 639579; exonNumber 3\n"
557
+
558
+ attributes = [
559
+ [ "name", "grail3.0116000101" ],
560
+ [ "proteinId", "639579" ],
561
+ [ "exonNumber", "3" ]
562
+ ]
563
+ record = Bio::GFF::GFF2::Record.new(str)
564
+ assert_equal(attributes, record.attributes)
565
+ end
566
+
567
+ # The test string is modified from that of test_attributes_case1.
568
+ def test_attributes_case2
569
+ str = "LG_I\tJGI\tCDS\t11052\t11064\t.\t-\t0\tname \"grail3.0116000101\"; proteinId 639579; exonNumber 3; Note \"Semicolons ; and \;, and quote \\\" can be OK\"; Comment \"This is the \\\"comment\\\"\"\n"
570
+
571
+ attributes = [
572
+ [ "name", "grail3.0116000101" ],
573
+ [ "proteinId", "639579" ],
574
+ [ "exonNumber", "3" ],
575
+ [ "Note", "Semicolons ; and ;, and quote \" can be OK" ],
576
+ [ "Comment", "This is the \"comment\"" ]
577
+ ]
578
+ record = Bio::GFF::GFF2::Record.new(str)
579
+ assert_equal(attributes, record.attributes)
580
+ end
581
+
582
+ def test_attributes_incompatible_backslash_semicolon
583
+ # No special treatments for backslash-semicolon outside the free text.
584
+ str =<<END_OF_DATA
585
+ I sgd gene 151453 151591 . + . Gene "CEN1" ; Note "Chromosome I Centromere"; Semicolon a "b;c" d "e;f;g" h; Illegal a\\;b c d; Comment "a ; b"
586
+ END_OF_DATA
587
+
588
+ attributes = [
589
+ [ 'Gene', 'CEN1' ],
590
+ [ 'Note', 'Chromosome I Centromere' ],
591
+ [ 'Semicolon',
592
+ Bio::GFF::GFF2::Record::Value.new(['a', 'b;c', 'd', 'e;f;g', 'h']) ],
593
+ [ 'Illegal', "a\\" ],
594
+ [ 'b', Bio::GFF::GFF2::Record::Value.new(['c', 'd']) ],
595
+ [ 'Comment', 'a ; b' ]
596
+ ]
597
+ record = Bio::GFF::GFF2::Record.new(str)
598
+ assert_equal(attributes, record.attributes)
599
+ end
600
+
601
+ end #class TestGFF2ComplexAttributes
602
+
603
+ class TestGFF2MetaData < Test::Unit::TestCase
604
+ def setup
605
+ @data =
606
+ Bio::GFF::GFF2::MetaData.new('date', '2008-09-22')
607
+ end
608
+
609
+ def test_parse
610
+ assert_equal(@data,
611
+ Bio::GFF::GFF2::MetaData.parse('##date 2008-09-22'))
612
+ end
613
+
614
+ def test_directive
615
+ assert_equal('date', @data.directive)
616
+ end
617
+
618
+ def test_data
619
+ assert_equal('2008-09-22', @data.data)
620
+ end
621
+ end #class TestGFF2MetaData
622
+
623
+ class TestGFF3 < Test::Unit::TestCase
624
+ def setup
625
+ @data =<<END_OF_DATA
626
+ ##gff-version 3
627
+ ##sequence-region test01 1 400
628
+ test01 RANDOM contig 1 400 . + . ID=test01;Note=this is test
629
+ test01 . mRNA 101 230 . + . ID=mrna01;Name=testmRNA;Note=this is test mRNA
630
+ test01 . mRNA 101 280 . + . ID=mrna01a;Name=testmRNAalterative;Note=test of alternative splicing variant
631
+ test01 . exon 101 160 . + . ID=exon01;Name=exon01;Alias=exon 1;Parent=mrna01,mrna01a
632
+ test01 . exon 201 230 . + . ID=exon02;Name=exon02;Alias=exon 2;Parent=mrna01
633
+ test01 . exon 251 280 . + . ID=exon02a;Name=exon02a;Alias=exon 2a;Parent=mrna01a
634
+ test01 . Match 101 123 . . . ID=match01;Name=match01;Target=EST101 1 21;Gap=M8 D3 M6 I1 M6
635
+ ##FASTA
636
+ >test01
637
+ ACGAAGATTTGTATGACTGATTTATCCTGGACAGGCATTGGTCAGATGTCTCCTTCCGTATCGTCGTTTA
638
+ GTTGCAAATCCGAGTGTTCGGGGGTATTGCTATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACA
639
+ CCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGAT
640
+ AATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT
641
+ GCCCAAGAATGCGATCCCAGAAGTCTTGGTTCTAAAGTCGTCGGAAAGATTTGAGGAACTGCCATACAGC
642
+ CCGTGGGTGAAACTGTCGACATCCATTGTGCGAATAGGCCTGCTAGTGAC
643
+ END_OF_DATA
644
+ @gff3 = Bio::GFF::GFF3.new(@data)
645
+ end
646
+
647
+ def test_const_version
648
+ assert_equal(3, Bio::GFF::GFF3::VERSION)
649
+ end
650
+
651
+ def test_sequence_regions
652
+ region = Bio::GFF::GFF3::SequenceRegion.new('test01', 1, 400)
653
+ assert_equal([ region ], @gff3.sequence_regions)
654
+ end
655
+
656
+ def test_gff_version
657
+ assert_equal('3', @gff3.gff_version)
658
+ end
659
+
660
+ def test_records
661
+ assert_equal(7, @gff3.records.size)
662
+ r_test01 = Bio::GFF::GFF3::Record.new('test01',
663
+ 'RANDOM',
664
+ 'contig',
665
+ 1, 400, nil, '+', nil,
666
+ [ ['ID', 'test01'],
667
+ ['Note', 'this is test'] ])
668
+ r_mrna01 = Bio::GFF::GFF3::Record.new('test01',
669
+ nil,
670
+ 'mRNA',
671
+ 101, 230, nil, '+', nil,
672
+ [ ['ID', 'mrna01'],
673
+ ['Name', 'testmRNA'],
674
+ ['Note', 'this is test mRNA'] ])
675
+ r_exon01 = Bio::GFF::GFF3::Record.new('test01',
676
+ nil,
677
+ 'exon',
678
+ 101, 160, nil, '+', nil,
679
+ [ ['ID', 'exon01'],
680
+ ['Name', 'exon01'],
681
+ ['Alias', 'exon 1'],
682
+ ['Parent', 'mrna01'],
683
+ ['Parent', 'mrna01a'] ])
684
+
685
+ target = Bio::GFF::GFF3::Record::Target.new('EST101', 1, 21)
686
+ gap = Bio::GFF::GFF3::Record::Gap.new('M8 D3 M6 I1 M6')
687
+ r_match01 =Bio::GFF::GFF3::Record.new('test01',
688
+ nil,
689
+ 'Match',
690
+ 101, 123, nil, nil, nil,
691
+ [ ['ID', 'match01'],
692
+ ['Name', 'match01'],
693
+ ['Target', target],
694
+ ['Gap', gap] ])
695
+ assert_equal(r_test01, @gff3.records[0])
696
+ assert_equal(r_mrna01, @gff3.records[1])
697
+ assert_equal(r_exon01, @gff3.records[3])
698
+ assert_equal(r_match01, @gff3.records[6])
699
+ end
700
+
701
+ def test_sequences
702
+ assert_equal(1, @gff3.sequences.size)
703
+ assert_equal('test01', @gff3.sequences[0].entry_id)
704
+ assert_equal('3510a3c4f66f9c2ab8d4d97446490aced7ed1fa4',
705
+ Digest::SHA1.hexdigest(@gff3.sequences[0].seq.to_s))
706
+ end
707
+
708
+ def test_to_s
709
+ assert_equal(@data, @gff3.to_s)
710
+ end
711
+
712
+ end #class TestGFF3
713
+
714
+ class TestGFF3Record < Test::Unit::TestCase
715
+
716
+ def setup
717
+ data =<<END_OF_DATA
718
+ chrI SGD centromere 151467 151584 . + . ID=CEN1;Name=CEN1;gene=CEN1;Alias=CEN1,test%3B0001;Note=Chromosome%20I%20centromere;dbxref=SGD:S000006463;Target=test%2002 123 456 -,test%2C03 159 314;memo%3Dtest%3Battr=99.9%25%09match
719
+ END_OF_DATA
720
+ @obj = Bio::GFF::GFF3::Record.new(data)
721
+ end
722
+
723
+ def test_seqname
724
+ assert_equal('chrI', @obj.seqname)
725
+ end
726
+
727
+ def test_source
728
+ assert_equal('SGD', @obj.source)
729
+ end
730
+
731
+ def test_feature
732
+ assert_equal('centromere', @obj.feature)
733
+ end
734
+
735
+ def test_start
736
+ assert_equal(151467, @obj.start)
737
+ end
738
+
739
+ def test_end
740
+ assert_equal(151584, @obj.end)
741
+ end
742
+
743
+ def test_score
744
+ assert_equal(nil, @obj.score)
745
+ end
746
+
747
+ def test_strand
748
+ assert_equal('+', @obj.strand)
749
+ end
750
+
751
+ def test_frame
752
+ assert_equal(nil, @obj.frame)
753
+ end
754
+
755
+ def test_attributes
756
+ attr = [
757
+ ['ID', 'CEN1'],
758
+ ['Name', 'CEN1'],
759
+ ['gene', 'CEN1'],
760
+ ['Alias', 'CEN1'],
761
+ ['Alias', 'test;0001'],
762
+ ['Note', 'Chromosome I centromere'],
763
+ ['dbxref', 'SGD:S000006463'],
764
+ ['Target',
765
+ Bio::GFF::GFF3::Record::Target.new('test 02', 123, 456, '-')],
766
+ ['Target',
767
+ Bio::GFF::GFF3::Record::Target.new('test,03', 159, 314)],
768
+ ['memo=test;attr', "99.9%\tmatch"]
769
+ ]
770
+ assert_equal(attr, @obj.attributes)
771
+ end
772
+
773
+ def test_id
774
+ assert_equal('CEN1', @obj.id)
775
+ end
776
+
777
+ def test_to_s
778
+ str =<<END_OF_DATA
779
+ chrI SGD centromere 151467 151584 . + . ID=CEN1;Name=CEN1;gene=CEN1;Alias=CEN1,test%3B0001;Note=Chromosome I centromere;dbxref=SGD:S000006463;Target=test%2002 123 456 -,test%2C03 159 314;memo%3Dtest%3Battr=99.9%25%09match
780
+ END_OF_DATA
781
+ assert_equal(str, @obj.to_s)
782
+ end
783
+
784
+ def test_to_s_attr_order_changed
785
+ str = <<END_OF_STR
786
+ chrI SGD centromere 151467 151584 . + . ID=CEN1;Name=CEN1;Alias=CEN1,test%3B0001;Target=test%2002 123 456 -,test%2C03 159 314;Note=Chromosome I centromere;dbxref=SGD:S000006463;gene=CEN1;memo%3Dtest%3Battr=99.9%25%09match
787
+ END_OF_STR
788
+
789
+ keys = [ 'ID', 'Name', 'Alias', 'Target', 'Note', 'dbxref', 'gene' ]
790
+ @obj.sort_attributes_by_tag!(keys)
791
+ assert_equal(str, @obj.to_s)
792
+ end
793
+ end #class TestGFF3Record
794
+
795
+ class TestGFF3RecordMisc < Test::Unit::TestCase
796
+ def test_attributes_none
797
+ # test blank with tab
798
+ data =<<END_OF_DATA
799
+ I sgd gene 151453 151591 . + .
800
+ END_OF_DATA
801
+ obj = Bio::GFF::GFF3::Record.new(data)
802
+ assert_equal([], obj.attributes)
803
+
804
+ # test blank with no tab at end
805
+ data =<<END_OF_DATA
806
+ I sgd gene 151453 151591 . + .
807
+ END_OF_DATA
808
+ obj = Bio::GFF::GFF3::Record.new(data)
809
+ assert_equal([], obj.attributes)
810
+ end
811
+
812
+ def test_attributes_one
813
+ data =<<END_OF_DATA
814
+ I sgd gene 151453 151591 . + . ID=CEN1
815
+ END_OF_DATA
816
+ obj = Bio::GFF::GFF3::Record.new(data)
817
+ at = [ ["ID", 'CEN1'] ]
818
+ assert_equal(at, obj.attributes)
819
+ end
820
+
821
+ def test_attributes_with_escaping
822
+ data =<<END_OF_DATA
823
+ I sgd gene 151453 151591 . + . ID=CEN1;gene=CEN1%3Boh;Note=Chromosome I Centromere
824
+ END_OF_DATA
825
+ obj = Bio::GFF::GFF3::Record.new(data)
826
+ at = [ ['ID', 'CEN1'],
827
+ ["gene", 'CEN1;oh'],
828
+ ["Note", 'Chromosome I Centromere']
829
+ ]
830
+ assert_equal(at, obj.attributes)
831
+ end
832
+
833
+ def test_score
834
+ data =<<END_OF_DATA
835
+ ctg123 src match 456 788 1e-10 - . ID=test01
836
+ END_OF_DATA
837
+ obj = Bio::GFF::GFF3::Record.new(data)
838
+ assert_equal(1e-10, obj.score)
839
+ obj.score = 0.5
840
+ assert_equal(0.5, obj.score)
841
+ end
842
+
843
+ def test_phase
844
+ data =<<END_OF_DATA
845
+ ctg123 src CDS 456 788 . - 2 ID=test02
846
+ END_OF_DATA
847
+ obj = Bio::GFF::GFF3::Record.new(data)
848
+ assert_equal(2, obj.phase)
849
+ assert_equal(2, obj.frame)
850
+ obj.phase = 1
851
+ assert_equal(1, obj.phase)
852
+ assert_equal(1, obj.frame)
853
+ end
854
+
855
+ def test_id_replace
856
+ data =<<END_OF_DATA
857
+ ctg123 src CDS 456 788 1e-10 - 2 ID=test03
858
+ END_OF_DATA
859
+ obj = Bio::GFF::GFF3::Record.new(data)
860
+ assert_equal('test03', obj.id)
861
+ assert_equal('test_id', obj.id = 'test_id')
862
+ assert_equal('test_id', obj.id)
863
+ end
864
+
865
+ def test_id_set
866
+ data =<<END_OF_DATA
867
+ ctg123 src CDS 456 788 1e-10 - 2 NAME=test03
868
+ END_OF_DATA
869
+ obj = Bio::GFF::GFF3::Record.new(data)
870
+ assert_nil(obj.id)
871
+ assert_equal('test_id', obj.id = 'test_id')
872
+ assert_equal('test_id', obj.id)
873
+ assert_equal('next_test', obj.id = 'next_test')
874
+ assert_equal('next_test', obj.id)
875
+ end
876
+
877
+ def test_id_multiple
878
+ # Note: Two ID attributes in a record is illegal in GFF3.
879
+ data =<<END_OF_DATA
880
+ ctg123 src CDS 456 788 . - 2 ID=test03,test04
881
+ END_OF_DATA
882
+ obj = Bio::GFF::GFF3::Record.new(data)
883
+ assert_equal([ [ 'ID', 'test03' ], [ 'ID', 'test04' ] ],
884
+ obj.attributes)
885
+ assert_equal('test03', obj.id)
886
+ assert_equal('test_id', obj.id = 'test_id')
887
+ assert_equal('test_id', obj.id)
888
+ assert_equal([ [ 'ID', 'test_id' ], [ 'ID', 'test04' ] ],
889
+ obj.attributes)
890
+ str = "ctg123\tsrc\tCDS\t456\t788\t.\t-\t2\tID=test_id,test04\n"
891
+ assert_equal(str, obj.to_s)
892
+ end
893
+
894
+ def test_id_multiple2
895
+ # Note: Two ID attributes in a record is illegal in GFF3.
896
+ data =<<END_OF_DATA
897
+ ctg123 src CDS 456 788 . - 2 ID=test03;ID=test04
898
+ END_OF_DATA
899
+ obj = Bio::GFF::GFF3::Record.new(data)
900
+ assert_equal([ [ 'ID', 'test03' ], [ 'ID', 'test04' ] ],
901
+ obj.attributes)
902
+ assert_equal('test03', obj.id)
903
+ assert_equal('test_id', obj.id = 'test_id')
904
+ assert_equal('test_id', obj.id)
905
+ assert_equal([ [ 'ID', 'test_id' ], [ 'ID', 'test04' ] ],
906
+ obj.attributes)
907
+
908
+ # The "XXX=test03;XXX=test04" is automatically changed to
909
+ # "XXX=test03,test04", as defined in the GFF3 spec.
910
+ str = "ctg123\tsrc\tCDS\t456\t788\t.\t-\t2\tID=test_id,test04\n"
911
+ assert_equal(str, obj.to_s)
912
+ end
913
+
914
+ def test_initialize_9
915
+ obj = Bio::GFF::GFF3::Record.new('test01',
916
+ 'testsrc',
917
+ 'exon',
918
+ 1, 400, nil, '+', nil,
919
+ [ ['ID', 'test01'],
920
+ ['Note', 'this is test'] ])
921
+ assert_equal('test01', obj.seqid)
922
+ end
923
+
924
+ def test_to_s_void
925
+ obj = Bio::GFF::GFF3::Record.new
926
+ assert_equal(".\t.\t.\t.\t.\t.\t.\t.\t.\n", obj.to_s)
927
+ end
928
+
929
+ end #class TestGFF3RecordMisc
930
+
931
+ class TestGFF3RecordEscape < Test::Unit::TestCase
932
+ def setup
933
+ @obj = Object.new.extend(Bio::GFF::GFF3::Escape)
934
+ @str = "A>B\tC=100%;d=e,f,g h"
935
+ end
936
+
937
+ def test_escape
938
+ str = @str
939
+ assert_equal('A>B%09C=100%25;d=e,f,g h',
940
+ @obj.instance_eval { escape(str) })
941
+ end
942
+
943
+ def test_escape_attribute
944
+ str = @str
945
+ assert_equal('A>B%09C%3D100%25%3Bd%3De%2Cf%2Cg h',
946
+ @obj.instance_eval { escape_attribute(str) })
947
+ end
948
+
949
+ def test_escape_seqid
950
+ str = @str
951
+ assert_equal('A%3EB%09C%3D100%25%3Bd%3De%2Cf%2Cg%20h',
952
+ @obj.instance_eval { escape_seqid(str) })
953
+ end
954
+
955
+ def test_unescape
956
+ escaped_str = 'A%3EB%09C%3D100%25%3Bd%3De%2Cf%2Cg%20h'
957
+ assert_equal(@str,
958
+ @obj.instance_eval {
959
+ unescape(escaped_str) })
960
+ end
961
+ end #class TestGFF3RecordEscape
962
+
963
+ class TestGFF3RecordTarget < Test::Unit::TestCase
964
+
965
+ def setup
966
+ @target =
967
+ [ Bio::GFF::GFF3::Record::Target.new('ABCD1234', 123, 456, '+'),
968
+ Bio::GFF::GFF3::Record::Target.new(">X Y=Z;P%,Q\tR", 78, 90),
969
+ Bio::GFF::GFF3::Record::Target.new(nil, nil, nil),
970
+ ]
971
+ end
972
+
973
+ def test_parse
974
+ strings =
975
+ [ 'ABCD1234 123 456 +',
976
+ '%3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90',
977
+ ''
978
+ ]
979
+ @target.each do |target|
980
+ str = strings.shift
981
+ assert_equal(target, Bio::GFF::GFF3::Record::Target.parse(str))
982
+ end
983
+ end
984
+
985
+ def test_target_id
986
+ assert_equal('ABCD1234', @target[0].target_id)
987
+ assert_equal(">X Y=Z;P%,Q\tR", @target[1].target_id)
988
+ assert_equal(nil, @target[2].target_id)
989
+ end
990
+
991
+ def test_start
992
+ assert_equal(123, @target[0].start)
993
+ assert_equal(78, @target[1].start)
994
+ assert_nil(@target[2].start)
995
+ end
996
+
997
+ def test_end
998
+ assert_equal(456, @target[0].end)
999
+ assert_equal(90, @target[1].end)
1000
+ assert_nil(@target[2].end)
1001
+ end
1002
+
1003
+ def test_strand
1004
+ assert_equal('+', @target[0].strand)
1005
+ assert_nil(@target[1].strand)
1006
+ assert_nil(@target[2].strand)
1007
+ end
1008
+
1009
+ def test_to_s
1010
+ assert_equal('ABCD1234 123 456 +', @target[0].to_s)
1011
+ assert_equal('%3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90', @target[1].to_s)
1012
+ assert_equal('. . .', @target[2].to_s)
1013
+ end
1014
+
1015
+ end #class TestGFF3RecordTarget
1016
+
1017
+ class TestGFF3RecordGap < Test::Unit::TestCase
1018
+ def setup
1019
+ # examples taken from http://song.sourceforge.net/gff3.shtml
1020
+ @gaps_src = [ 'M8 D3 M6 I1 M6',
1021
+ 'M3 I1 M2 F1 M4',
1022
+ 'M3 I1 M2 R1 M4' ]
1023
+ @gaps = @gaps_src.collect { |x| Bio::GFF::GFF3::Record::Gap.new(x) }
1024
+ end
1025
+
1026
+ def test_to_s
1027
+ @gaps_src.each do |src|
1028
+ assert_equal(src, @gaps.shift.to_s)
1029
+ end
1030
+ end
1031
+
1032
+ def test_eqeq
1033
+ gap = Bio::GFF::GFF3::Record::Gap.new('M8 D3 M6 I1 M6')
1034
+ assert(gap == @gaps[0])
1035
+ assert_equal(false, gap == @gaps[1])
1036
+ end
1037
+
1038
+ def test_process_sequences_na
1039
+ ref = 'CAAGACCTAAACTGGATTCCAAT'
1040
+ tgt = 'CAAGACCTCTGGATATCCAAT'
1041
+ ref_aligned = 'CAAGACCTAAACTGGAT-TCCAAT'
1042
+ tgt_aligned = 'CAAGACCT---CTGGATATCCAAT'
1043
+ assert_equal([ ref_aligned, tgt_aligned ],
1044
+ @gaps[0].process_sequences_na(ref, tgt))
1045
+ end
1046
+
1047
+ def test_process_sequences_na_tooshort
1048
+ ref = 'CAAGACCTAAACTGGATTCCAA'
1049
+ tgt = 'CAAGACCTCTGGATATCCAA'
1050
+ assert_raise(RuntimeError) { @gaps[0].process_sequences_na(ref, tgt) }
1051
+ ref = 'c'
1052
+ tgt = 'c'
1053
+ assert_raise(RuntimeError) { @gaps[0].process_sequences_na(ref, tgt) }
1054
+ end
1055
+
1056
+ def test_process_sequences_na_aa
1057
+ ref1 = 'atgaaggaggttattgaatgtcggcggt'
1058
+ tgt1 = 'MKEVVINVGG'
1059
+ ref1_aligned = 'atgaaggag---gttattgaatgtcggcggt'
1060
+ tgt1_aligned = 'M K E V V I >N V G G '
1061
+ assert_equal([ ref1_aligned, tgt1_aligned ],
1062
+ @gaps[1].process_sequences_na_aa(ref1, tgt1))
1063
+ end
1064
+
1065
+ def test_process_sequences_na_aa_reverse_frameshift
1066
+ ref2 = 'atgaaggaggttataatgtcggcggt'
1067
+ tgt2 = 'MKEVVINVGG'
1068
+ ref2_aligned = 'atgaaggag---gttat<aatgtcggcggt'
1069
+ tgt2_aligned = 'M K E V V I N V G G '
1070
+ assert_equal([ ref2_aligned, tgt2_aligned ],
1071
+ @gaps[2].process_sequences_na_aa(ref2, tgt2))
1072
+ end
1073
+
1074
+ def test_process_sequences_na_aa_reverse_frameshift_more
1075
+ gap = Bio::GFF::GFF3::Record::Gap.new("M3 R3 M3")
1076
+ ref = 'atgaagattaatgtc'
1077
+ tgt = 'MKIINV'
1078
+ ref_aligned = 'atgaag<<<attaatgtc'
1079
+ tgt_aligned = 'M K I I N V '
1080
+ assert_equal([ ref_aligned, tgt_aligned ],
1081
+ gap.process_sequences_na_aa(ref, tgt))
1082
+ end
1083
+
1084
+ def test_process_sequences_na_aa_tooshort
1085
+ ref2 = 'atgaaggaggttataatgtcggcgg'
1086
+ tgt2 = 'MKEVVINVG'
1087
+ assert_raise(RuntimeError) do
1088
+ @gaps[2].process_sequences_na_aa(ref2, tgt2)
1089
+ end
1090
+
1091
+ ref2 = 'atg'
1092
+ tgt2 = 'M'
1093
+ assert_raise(RuntimeError) do
1094
+ @gaps[2].process_sequences_na_aa(ref2, tgt2)
1095
+ end
1096
+ end
1097
+
1098
+ def test___scan_gap
1099
+ str1 = 'CAAGACCT---CTGGATATCCAAT'
1100
+ str2 = '-aaaaaaa-a-a---ggag--'
1101
+ c = Bio::GFF::GFF3::Record::Gap::Code
1102
+ data1 = [ c.new(:M, 8), c.new(:I, 3), c.new(:M, 13) ]
1103
+ data2 = [ c.new(:I, 1), c.new(:M, 7), c.new(:I, 1),
1104
+ c.new(:M, 1), c.new(:I, 1), c.new(:M, 1),
1105
+ c.new(:I, 3), c.new(:M, 4), c.new(:I, 2) ]
1106
+
1107
+ assert_equal(data1, @gaps[0].instance_eval { __scan_gap(str1) })
1108
+ assert_equal(data2, @gaps[0].instance_eval { __scan_gap(str2) })
1109
+ end
1110
+
1111
+ def test_new_from_sequences_na
1112
+ ref_aligned = 'CAAGACCTAAACTGGAT-TCCAAT'
1113
+ tgt_aligned = 'CAAGACCT---CTGGATATCCAAT'
1114
+
1115
+ assert_equal(@gaps[0], Bio::GFF::GFF3::Record::Gap.new_from_sequences_na(ref_aligned, tgt_aligned))
1116
+ end
1117
+
1118
+ def test_new_from_sequences_na_aa
1119
+ ref = 'atgaaggag---gttattgaatgtcggcggt'
1120
+ tgt = 'M K E V V I >N V G G '
1121
+ assert_equal(@gaps[1],
1122
+ Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref,
1123
+ tgt))
1124
+ end
1125
+
1126
+ def test_new_from_sequences_na_aa_reverse_frameshift
1127
+ ref = 'atgaaggag---gttat<aatgtcggcggt'
1128
+ tgt = 'M K E V V I N V G G '
1129
+ assert_equal(@gaps[2],
1130
+ Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref,
1131
+ tgt))
1132
+ end
1133
+
1134
+ def test_new_from_sequences_na_aa_reverse_frameshift_more
1135
+ gap = Bio::GFF::GFF3::Record::Gap.new("M3 R3 M3")
1136
+ ref = 'atgaag<<<attaatgtc'
1137
+ tgt = 'M K I I N V '
1138
+ assert_equal(gap,
1139
+ Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref,
1140
+ tgt))
1141
+ end
1142
+
1143
+ def test_new_from_sequences_na_aa_boundary_gap
1144
+ g = Bio::GFF::GFF3::Record::Gap
1145
+
1146
+ ref = '---atgatg'
1147
+ tgt = 'K M M '
1148
+ assert_equal(Bio::GFF::GFF3::Record::Gap.new('I1 M2'),
1149
+ g.new_from_sequences_na_aa(ref, tgt))
1150
+
1151
+ ref = 'atgatg---'
1152
+ tgt = 'M M K '
1153
+ assert_equal(Bio::GFF::GFF3::Record::Gap.new('M2 I1'),
1154
+ g.new_from_sequences_na_aa(ref, tgt))
1155
+
1156
+ ref = 'atgatgatg'
1157
+ tgt = '- M M '
1158
+ assert_equal(Bio::GFF::GFF3::Record::Gap.new('D1 M2'),
1159
+ g.new_from_sequences_na_aa(ref, tgt))
1160
+
1161
+ ref = 'atgatgatg'
1162
+ tgt = 'M M - '
1163
+ assert_equal(Bio::GFF::GFF3::Record::Gap.new('M2 D1'),
1164
+ g.new_from_sequences_na_aa(ref, tgt))
1165
+ end
1166
+
1167
+ def test_new_from_sequences_na_aa_example
1168
+ gap = Bio::GFF::GFF3::Record::Gap.new('M2 R1 M1 F2 M1')
1169
+ ref1 = 'atgg-taagac-att'
1170
+ tgt1 = 'M V K - I '
1171
+ ref2 = 'atggt<aagacatt'
1172
+ tgt2 = 'M V K >>I '
1173
+ gap1 = Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref1, tgt1)
1174
+ assert_equal(gap, gap1)
1175
+ gap2 = Bio::GFF::GFF3::Record::Gap.new_from_sequences_na_aa(ref2, tgt2)
1176
+ assert_equal(gap, gap2)
1177
+ end
1178
+ end #class TestGFF3RecordGap
1179
+
1180
+ class TestGFF3SequenceRegion < Test::Unit::TestCase
1181
+
1182
+ def setup
1183
+ @data =
1184
+ [ Bio::GFF::GFF3::SequenceRegion.new('ABCD1234', 123, 456),
1185
+ Bio::GFF::GFF3::SequenceRegion.new(">X Y=Z;P%,Q\tR", 78, 90),
1186
+ Bio::GFF::GFF3::SequenceRegion.new(nil, nil, nil),
1187
+ ]
1188
+ end
1189
+
1190
+ def test_parse
1191
+ strings =
1192
+ [ '##sequence-region ABCD1234 123 456',
1193
+ '##sequence-region %3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90',
1194
+ '##sequence-region'
1195
+ ]
1196
+ @data.each do |reg|
1197
+ str = strings.shift
1198
+ assert_equal(reg, Bio::GFF::GFF3::SequenceRegion.parse(str))
1199
+ end
1200
+ end
1201
+
1202
+ def test_seqid
1203
+ assert_equal('ABCD1234', @data[0].seqid)
1204
+ assert_equal(">X Y=Z;P%,Q\tR", @data[1].seqid)
1205
+ assert_equal(nil, @data[2].seqid)
1206
+ end
1207
+
1208
+ def test_start
1209
+ assert_equal(123, @data[0].start)
1210
+ assert_equal(78, @data[1].start)
1211
+ assert_nil(@data[2].start)
1212
+ end
1213
+
1214
+ def test_end
1215
+ assert_equal(456, @data[0].end)
1216
+ assert_equal(90, @data[1].end)
1217
+ assert_nil(@data[2].end)
1218
+ end
1219
+
1220
+ def test_to_s
1221
+ assert_equal("##sequence-region ABCD1234 123 456\n", @data[0].to_s)
1222
+ assert_equal("##sequence-region %3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90\n",
1223
+ @data[1].to_s)
1224
+ assert_equal("##sequence-region . . .\n", @data[2].to_s)
1225
+ end
1226
+
1227
+ end #class TestGFF3SequenceRegion
1228
+
1229
+ class TestGFF3MetaData < Test::Unit::TestCase
1230
+
1231
+ def setup
1232
+ @data =
1233
+ Bio::GFF::GFF3::MetaData.new('feature-ontology',
1234
+ 'http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12')
1235
+ end
1236
+
1237
+ def test_parse
1238
+ assert_equal(@data,
1239
+ Bio::GFF::GFF3::MetaData.parse('##feature-ontology http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12'))
1240
+ end
1241
+
1242
+ def test_directive
1243
+ assert_equal('feature-ontology', @data.directive)
1244
+ end
1245
+
1246
+ def test_data
1247
+ assert_equal('http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12', @data.data)
1248
+ end
1249
+ end #class TestGFF3MetaData
1250
+
1251
+ end #module Bio
1252
+
1253
+