bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -0,0 +1,251 @@
1
+ #
2
+ # = test/unit/bio/io/flatfile/test_buffer.rb - unit test for Bio::FlatFile::BufferedInputStream
3
+ #
4
+ # Copyright (C) 2006 Naohisa Goto <ng@bioruby.org>
5
+ #
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+
11
+ require 'pathname'
12
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
13
+ $:.unshift(libpath) unless $:.include?(libpath)
14
+
15
+ require 'test/unit'
16
+ require 'bio'
17
+ require 'stringio'
18
+
19
+ module Bio::TestFlatFileBufferedInputStream
20
+
21
+ bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
22
+ TestDataPath = Pathname.new(File.join(bioruby_root, 'test', 'data')).cleanpath.to_s
23
+ TestDataFastaFormat01 = File.join(TestDataPath, 'fasta', 'example1.txt')
24
+
25
+ class TestBufferedInputStreamClassMethod < Test::Unit::TestCase
26
+
27
+ def test_self_for_io
28
+ io = File.open(TestDataFastaFormat01)
29
+ obj = Bio::FlatFile::BufferedInputStream.for_io(io)
30
+ assert_instance_of(Bio::FlatFile::BufferedInputStream, obj)
31
+ assert_equal(TestDataFastaFormat01, obj.path)
32
+ end
33
+
34
+ def test_self_open_file
35
+ obj = Bio::FlatFile::BufferedInputStream.open_file(TestDataFastaFormat01)
36
+ assert_instance_of(Bio::FlatFile::BufferedInputStream, obj)
37
+ assert_equal(TestDataFastaFormat01, obj.path)
38
+ end
39
+
40
+ def test_self_open_file_with_block
41
+ obj2 = nil
42
+ Bio::FlatFile::BufferedInputStream.open_file(TestDataFastaFormat01) do |obj|
43
+ assert_instance_of(Bio::FlatFile::BufferedInputStream, obj)
44
+ assert_equal(TestDataFastaFormat01, obj.path)
45
+ obj2 = obj
46
+ end
47
+ assert_raise(IOError) { obj2.close }
48
+ end
49
+ end #class TestBufferedInputStreamClassMethod
50
+
51
+ class TestBufferedInputStream < Test::Unit::TestCase
52
+ def setup
53
+ io = File.open(TestDataFastaFormat01)
54
+ path = TestDataFastaFormat01
55
+ @obj = Bio::FlatFile::BufferedInputStream.new(io, path)
56
+ end
57
+
58
+ def test_to_io
59
+ assert_kind_of(IO, @obj.to_io)
60
+ end
61
+
62
+ def test_close
63
+ assert_nil(@obj.close)
64
+ end
65
+
66
+ def test_rewind
67
+ @obj.prefetch_gets
68
+ @obj.rewind
69
+ assert_equal('', @obj.prefetch_buffer)
70
+ end
71
+
72
+ def test_pos
73
+ @obj.gets
74
+ @obj.gets
75
+ @obj.prefetch_gets
76
+ assert_equal(117, @obj.pos) #the number depends on original data
77
+ end
78
+
79
+ def test_pos=()
80
+ str = @obj.gets
81
+ assert_equal(0, @obj.pos = 0)
82
+ end
83
+
84
+ def test_eof_false_first
85
+ assert_equal(false, @obj.eof?)
86
+ end
87
+
88
+ def test_eof_false_after_prefetch
89
+ while @obj.prefetch_gets; nil; end
90
+ assert_equal(false, @obj.eof?)
91
+ end
92
+
93
+ def test_eof_true
94
+ while @obj.gets; nil; end
95
+ assert_equal(true, @obj.eof?)
96
+ end
97
+
98
+ def test_gets
99
+ @obj.gets
100
+ @obj.gets
101
+ assert_equal("gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc", @obj.gets.chomp)
102
+ end
103
+
104
+ def test_gets_equal_prefetch_gets
105
+ @obj.prefetch_gets
106
+ str = @obj.prefetch_gets
107
+ @obj.prefetch_gets
108
+ @obj.gets
109
+ assert_equal(@obj.gets, str)
110
+ end
111
+
112
+ def test_gets_rs
113
+ rs = 'tggtg'
114
+ str = <<__END_OF_STR__
115
+ aggcactagaattgagcagtgaa
116
+ gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc
117
+ gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga
118
+ agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtg
119
+ __END_OF_STR__
120
+ @obj.gets(rs)
121
+ @obj.gets(rs)
122
+ assert_equal(str.chomp, @obj.gets(rs))
123
+ end
124
+
125
+ def test_gets_rs_equal_prefetch_gets
126
+ rs = 'tggtg'
127
+ @obj.prefetch_gets(rs)
128
+ str = @obj.prefetch_gets(rs)
129
+ @obj.prefetch_gets(rs)
130
+ @obj.gets(rs)
131
+ assert_equal(@obj.gets(rs), str)
132
+ end
133
+
134
+ def test_gets_rs_within_buffer
135
+ rs = 'tggtg'
136
+ a = []
137
+ 20.times {a.push @obj.gets }
138
+ @obj.ungets(a.join(''))
139
+
140
+ assert_equal(">At1g02580 mRNA (2291 bp) UTR's and CDS\naggcgagtggttaatggagaaggaaaaccatgaggacgatggtg", @obj.gets(rs))
141
+
142
+ assert_equal('ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtg',
143
+ @obj.gets(rs).split(/\n/)[-1])
144
+
145
+ assert_equal('aggcactagaattgagcagtgaa',
146
+ @obj.gets(rs).split(/\n/)[0])
147
+
148
+ assert_equal('aggcttct', @obj.gets(rs).split(/\n/)[0])
149
+
150
+ assert_equal('agacacc', @obj.gets(rs).split(/\n/)[0])
151
+ end
152
+
153
+ def test_gets_paragraph_mode
154
+ @obj.gets('')
155
+ @obj.gets('')
156
+ assert_equal('>At1g65300: mRNA 837bp (shortened at end)',
157
+ @obj.gets('').split(/\n/)[0])
158
+ end
159
+
160
+ def test_gets_paragraph_mode_equal_prefetch_gets
161
+ rs = ''
162
+ @obj.prefetch_gets(rs)
163
+ str = @obj.prefetch_gets(rs)
164
+ @obj.prefetch_gets(rs)
165
+ @obj.gets(rs)
166
+ assert_equal(@obj.gets(rs), str)
167
+ end
168
+
169
+ def test_gets_paragraph_mode_within_buffer
170
+ @obj.gets('')
171
+ a = []
172
+ 20.times {a.push @obj.gets }
173
+ @obj.ungets(a.join(''))
174
+
175
+ assert_equal('>At1g65300: mRNA 837bp',
176
+ @obj.gets('').split(/\n/)[0])
177
+
178
+ assert_equal('>At1g65300: mRNA 837bp (shortened at end)',
179
+ @obj.gets('').split(/\n/)[0])
180
+
181
+ assert_equal('>At1g65300: mRNA 837bp (shortened from start)',
182
+ @obj.gets('').split(/\n/)[0])
183
+ end
184
+
185
+ def test_ungets
186
+ @obj.gets
187
+ @obj.gets
188
+ str1 = @obj.gets
189
+ str2 = @obj.gets
190
+ assert_nil(@obj.ungets(str2))
191
+ assert_nil(@obj.ungets(str1))
192
+ assert_equal(str1, @obj.gets)
193
+ assert_equal(str2, @obj.gets)
194
+ end
195
+
196
+ def test_getc
197
+ assert_equal(?>, @obj.getc)
198
+ end
199
+
200
+ def test_getc_after_prefetch
201
+ @obj.prefetch_gets
202
+ assert_equal(?>, @obj.getc)
203
+ end
204
+
205
+ def test_ungetc
206
+ c = @obj.getc
207
+ assert_nil(@obj.ungetc(c))
208
+ assert_equal(c, @obj.getc)
209
+ end
210
+
211
+ def test_ungetc_after_prefetch
212
+ str = @obj.prefetch_gets
213
+ c = @obj.getc
214
+ assert_nil(@obj.ungetc(c))
215
+ assert_equal(str, @obj.gets)
216
+ end
217
+
218
+ def test_prefetch_buffer
219
+ str = @obj.prefetch_gets
220
+ str += @obj.prefetch_gets
221
+ assert_equal(str, @obj.prefetch_buffer)
222
+ end
223
+
224
+ def test_prefetch_gets
225
+ @obj.prefetch_gets
226
+ @obj.prefetch_gets
227
+ @obj.gets
228
+ str = @obj.prefetch_gets
229
+ @obj.gets
230
+ assert_equal(str, @obj.gets)
231
+ end
232
+
233
+ def test_prefetch_gets_with_arg
234
+ # test @obj.gets
235
+ str = @obj.prefetch_gets("\n>")
236
+ assert_equal(str, @obj.gets("\n>"))
237
+ # test using IO object
238
+ io = @obj.to_io
239
+ io.rewind
240
+ assert_equal(str, io.gets("\n>"))
241
+ end
242
+
243
+ def test_skip_spaces
244
+ @obj.gets('CDS')
245
+ assert_nil(@obj.skip_spaces)
246
+ assert_equal(?a, @obj.getc)
247
+ end
248
+
249
+ end #class TestBufferedInputStream
250
+ end #module Bio::TestFlatFile
251
+
@@ -0,0 +1,369 @@
1
+ #
2
+ # = test/unit/bio/io/flatfile/test_splitter.rb - unit test for Bio::FlatFile::Splitter
3
+ #
4
+ # Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
5
+ #
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+
11
+ require 'pathname'
12
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
13
+ $:.unshift(libpath) unless $:.include?(libpath)
14
+
15
+ require 'test/unit'
16
+ require 'stringio'
17
+
18
+ require 'bio'
19
+ require 'bio/io/flatfile/splitter'
20
+ require 'bio/io/flatfile/buffer'
21
+
22
+ module Bio::TestFlatFileSplitter
23
+
24
+ class TestDataClass
25
+ # Fake fasta format
26
+ DELIMITER = RS = "\n>"
27
+ DELIMITER_OVERRUN = 1 # '>'.length
28
+ FLATFILE_HEADER = '>'
29
+ def initialize(str)
30
+ @str = str
31
+ end
32
+ attr_reader :str
33
+ protected :str
34
+
35
+ def ==(other)
36
+ self.str == other.str
37
+ end
38
+ end #class TestDataClass
39
+
40
+ TestData01 = <<__END_OF_TESTDATA__
41
+
42
+ # This is test
43
+
44
+ >test1
45
+ aaagggtttcccaaagggtttccc
46
+ >testC
47
+ cccccccccccccccccccccccc
48
+ >testG
49
+ gggggggggggggggggggggggg
50
+ >test2
51
+ tttttttttttttttttttttttt
52
+ tttttttttttttttttttttttt
53
+
54
+ >test3
55
+ atatatatatatatatatatatat
56
+ __END_OF_TESTDATA__
57
+ TestData01.chomp!
58
+ # workaround for Windows
59
+ TestData01.gsub!(/\r\n/, "\n")
60
+
61
+ class TestTemplate < Test::Unit::TestCase
62
+ def setup
63
+ @stream = Bio::FlatFile::BufferedInputStream.new(StringIO.new(TestData01), 'TestData01')
64
+ @obj = Bio::FlatFile::Splitter::Template.new(TestDataClass, @stream)
65
+ end
66
+
67
+ def test_skip_leader
68
+ assert_raise(NotImplementedError) { @obj.skip_leader }
69
+ end
70
+
71
+ def test_get_entry
72
+ assert_raise(NotImplementedError) { @obj.get_entry }
73
+ end
74
+
75
+ def test_entry
76
+ assert_nothing_raised {
77
+ @obj.instance_eval { self.entry = 'test' } }
78
+ assert_equal('test', @obj.entry)
79
+ end
80
+
81
+ def test_entry_pos_flag
82
+ # default is nil or false
83
+ assert(!@obj.entry_pos_flag)
84
+
85
+ # set a value
86
+ assert_equal(true, @obj.entry_pos_flag = true)
87
+ assert_equal(true, @obj.entry_pos_flag)
88
+ end
89
+
90
+ def test_entry_start_pos
91
+ assert_nothing_raised {
92
+ @obj.instance_eval { self.entry_start_pos = 123 } }
93
+ assert_equal(123, @obj.entry_start_pos)
94
+ end
95
+
96
+ def test_entry_ended_pos
97
+ assert_nothing_raised {
98
+ @obj.instance_eval { self.entry_ended_pos = 456 } }
99
+ assert_equal(456, @obj.entry_ended_pos)
100
+ end
101
+
102
+ def test_stream
103
+ assert_equal(@stream, @obj.instance_eval { stream })
104
+ end
105
+
106
+ def test_dbclass
107
+ assert_equal(TestDataClass, @obj.instance_eval { dbclass })
108
+ end
109
+
110
+ def test_stream_pos
111
+ assert_nil(@obj.instance_eval { stream_pos })
112
+ @obj.entry_pos_flag = true
113
+ assert_equal(0, @obj.instance_eval { stream_pos })
114
+ @stream.gets
115
+ assert_not_equal(0, @obj.instance_eval { stream.pos })
116
+ end
117
+
118
+ def test_rewind
119
+ @obj.entry_pos_flag = true
120
+ @stream.gets
121
+ assert_not_equal(0, @stream.pos)
122
+ @obj.rewind
123
+ assert_equal(0, @stream.pos)
124
+ end
125
+
126
+ end #class TestTemplate
127
+
128
+ class TestDefault < TestTemplate # < Test::Unit::TestCase
129
+ def setup
130
+ @stream = Bio::FlatFile::BufferedInputStream.new(StringIO.new(TestData01), 'TestData01')
131
+ @obj = Bio::FlatFile::Splitter::Default.new(TestDataClass, @stream)
132
+ end
133
+
134
+ def test_delimiter
135
+ assert_equal("\n>", @obj.delimiter)
136
+ end
137
+
138
+ def test_header
139
+ assert_equal('>', @obj.header)
140
+ end
141
+
142
+ def test_delimiter_overrun
143
+ assert_equal(1, @obj.delimiter_overrun)
144
+ end
145
+
146
+ def test_skip_leader
147
+ assert_nothing_raised { @obj.skip_leader }
148
+ assert(@stream.pos > 0)
149
+ assert_equal('>test1', @stream.gets.chomp)
150
+ end
151
+
152
+ def test_skip_leader_without_header
153
+ @obj.header = nil
154
+ assert_nothing_raised { @obj.skip_leader }
155
+ assert(@stream.pos > 0)
156
+ assert_equal('# This is test', @stream.gets.chomp)
157
+ end
158
+
159
+ def test_get_entry
160
+ str0 = "\n # This is test\n\n"
161
+ str1 = ">test1\naaagggtttcccaaagggtttccc\n"
162
+ str2 = ">testC\ncccccccccccccccccccccccc\n"
163
+ str3 = ">testG\ngggggggggggggggggggggggg\n"
164
+ str4 = ">test2\ntttttttttttttttttttttttt\ntttttttttttttttttttttttt\n\n"
165
+ str5 = ">test3\natatatatatatatatatatatat"
166
+ assert_equal(str0, @obj.get_entry)
167
+ assert_equal(str1, @obj.get_entry)
168
+ assert_equal(str2, @obj.get_entry)
169
+ assert_equal(str3, @obj.get_entry)
170
+ assert_equal(str4, @obj.get_entry)
171
+ assert_equal(str5, @obj.get_entry)
172
+ assert(@stream.eof?)
173
+ end
174
+
175
+ def test_get_parsed_entry
176
+ str1 = ">test1\naaagggtttcccaaagggtttccc\n"
177
+ str2 = ">testC\ncccccccccccccccccccccccc\n"
178
+ str3 = ">testG\ngggggggggggggggggggggggg\n"
179
+ str4 = ">test2\ntttttttttttttttttttttttt\ntttttttttttttttttttttttt\n\n"
180
+ str5 = ">test3\natatatatatatatatatatatat"
181
+
182
+ @obj.skip_leader
183
+ assert_equal(TestDataClass.new(str1), @obj.get_parsed_entry)
184
+ assert_equal(TestDataClass.new(str2), @obj.get_parsed_entry)
185
+ assert_equal(TestDataClass.new(str3), @obj.get_parsed_entry)
186
+ assert_equal(TestDataClass.new(str4), @obj.get_parsed_entry)
187
+ assert_equal(TestDataClass.new(str5), @obj.get_parsed_entry)
188
+ assert(@stream.eof?)
189
+ end
190
+
191
+ def test_entry
192
+ str1 = ">test1\naaagggtttcccaaagggtttccc\n"
193
+ @obj.skip_leader
194
+ @obj.get_entry
195
+ assert_equal(str1, @obj.entry)
196
+ end
197
+
198
+ def test_entry_start_pos_default_nil
199
+ @obj.skip_leader
200
+ @obj.get_entry
201
+ assert_nil(@obj.entry_start_pos)
202
+ end
203
+
204
+ def test_entry_ended_pos_default_nil
205
+ @obj.skip_leader
206
+ @obj.get_entry
207
+ assert_nil(@obj.entry_ended_pos)
208
+ end
209
+
210
+ def test_entry_start_pos
211
+ @obj.entry_pos_flag = true
212
+ @obj.skip_leader
213
+ @obj.get_entry
214
+ assert_equal(25, @obj.entry_start_pos)
215
+ end
216
+
217
+ def test_entry_ended_pos
218
+ @obj.entry_pos_flag = true
219
+ @obj.skip_leader
220
+ @obj.get_entry
221
+ assert_equal(57, @obj.entry_ended_pos)
222
+ end
223
+
224
+ end #class TestDefault
225
+
226
+ class TestLineOriented < TestTemplate # < Test::Unit::TestCase
227
+ testdata02 = <<__END_OF_DATA__
228
+ #this is header line 1
229
+ #this is header line 2
230
+ test01 1 2 3
231
+ test02 4 5 6
232
+ test02 7 8 9
233
+ test02 10 11 12
234
+ test03 13 14 15
235
+
236
+ test03 16 17 18
237
+ __END_OF_DATA__
238
+ TestData02 = testdata02.gsub(/\r\n/, "\n")
239
+
240
+ class TestData02Class
241
+ FLATFILE_SPLITTER = Bio::FlatFile::Splitter::LineOriented
242
+
243
+ LineData = Struct.new(:name, :data)
244
+
245
+ def initialize(str = '')
246
+ @headers = []
247
+ @lines = []
248
+ flag_header = true
249
+ str.each_line do |line|
250
+ if flag_header then
251
+ flag_header = add_header_line(line)
252
+ end
253
+ unless flag_header then
254
+ r = add_line(line)
255
+ end
256
+ end
257
+ end
258
+
259
+ attr_reader :headers
260
+ attr_reader :lines
261
+
262
+ def ==(other)
263
+ self.headers == other.headers and
264
+ self.lines == other.lines ? true : false
265
+ end
266
+
267
+ def add_header_line(line)
268
+ #puts "add_header_line: #{@headers.inspect} #{line.inspect}"
269
+ case line
270
+ when /\A\#/
271
+ @headers.push line
272
+ return self
273
+ else
274
+ return false
275
+ end
276
+ end
277
+
278
+ def add_line(line)
279
+ #puts "add_line: #{@lines.inspect} #{line.inspect}"
280
+ if /\A\s*\z/ =~ line then
281
+ return @lines.empty? ? self : false
282
+ end
283
+ parsed = parse_line(line)
284
+ if @lines.empty? or @lines.first.name == parsed.name then
285
+ @lines.push parsed
286
+ return self
287
+ else
288
+ return false
289
+ end
290
+ end
291
+
292
+ def parse_line(line)
293
+ LineData.new(*(line.chomp.split(/\s+/, 2)))
294
+ end
295
+ private :parse_line
296
+
297
+ end #class TestData02Class
298
+
299
+ def setup
300
+ @stream = Bio::FlatFile::BufferedInputStream.new(StringIO.new(TestData02), 'TestData02')
301
+ @obj = Bio::FlatFile::Splitter::LineOriented.new(TestData02Class, @stream)
302
+ @raw_entries =
303
+ [
304
+ "#this is header line 1\n#this is header line 2\ntest01 1 2 3\n",
305
+ "test02 4 5 6\ntest02 7 8 9\ntest02 10 11 12\n",
306
+ "test03 13 14 15\n",
307
+ "\ntest03 16 17 18\n",
308
+ ]
309
+ @entries = @raw_entries.collect do |str|
310
+ TestData02Class.new(str)
311
+ end
312
+ end
313
+
314
+ def test_get_parsed_entry
315
+ @entries.each do |ent|
316
+ assert_equal(ent, @obj.get_parsed_entry)
317
+ end
318
+ assert_nil(@obj.get_parsed_entry)
319
+ end
320
+
321
+ def test_get_entry
322
+ @raw_entries.each do |raw|
323
+ assert_equal(raw, @obj.get_entry)
324
+ end
325
+ assert_nil(@obj.get_entry)
326
+ end
327
+
328
+ def test_rewind
329
+ while @obj.get_parsed_entry; end
330
+ assert_equal(0, @obj.rewind)
331
+ end
332
+
333
+ def test_flag_to_fetch_header
334
+ assert(@obj.instance_eval { flag_to_fetch_header })
335
+ @obj.get_parsed_entry
336
+ assert(!@obj.instance_eval { flag_to_fetch_header })
337
+ @obj.rewind
338
+ assert(@obj.instance_eval { flag_to_fetch_header })
339
+ end
340
+
341
+ def test_skip_leader
342
+ assert_nil(@obj.skip_leader)
343
+ end
344
+
345
+ def test_dbclass
346
+ assert_equal(TestData02Class, @obj.instance_eval { dbclass })
347
+ end
348
+
349
+ def test_entry_start_pos
350
+ @obj.entry_pos_flag = true
351
+ @obj.skip_leader
352
+ @obj.get_entry
353
+ assert_equal(0, @obj.entry_start_pos)
354
+ @obj.get_entry
355
+ assert_equal(59, @obj.entry_start_pos)
356
+ end
357
+
358
+ def test_entry_ended_pos
359
+ @obj.entry_pos_flag = true
360
+ @obj.skip_leader
361
+ @obj.get_entry
362
+ assert_equal(59, @obj.entry_ended_pos)
363
+ @obj.get_entry
364
+ assert_equal(101, @obj.entry_ended_pos)
365
+ end
366
+
367
+ end #class TestLineOriented
368
+
369
+ end