bio 1.2.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -0,0 +1,251 @@
1
+ #
2
+ # = test/unit/bio/io/flatfile/test_buffer.rb - unit test for Bio::FlatFile::BufferedInputStream
3
+ #
4
+ # Copyright (C) 2006 Naohisa Goto <ng@bioruby.org>
5
+ #
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+
11
+ require 'pathname'
12
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
13
+ $:.unshift(libpath) unless $:.include?(libpath)
14
+
15
+ require 'test/unit'
16
+ require 'bio'
17
+ require 'stringio'
18
+
19
+ module Bio::TestFlatFileBufferedInputStream
20
+
21
+ bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
22
+ TestDataPath = Pathname.new(File.join(bioruby_root, 'test', 'data')).cleanpath.to_s
23
+ TestDataFastaFormat01 = File.join(TestDataPath, 'fasta', 'example1.txt')
24
+
25
+ class TestBufferedInputStreamClassMethod < Test::Unit::TestCase
26
+
27
+ def test_self_for_io
28
+ io = File.open(TestDataFastaFormat01)
29
+ obj = Bio::FlatFile::BufferedInputStream.for_io(io)
30
+ assert_instance_of(Bio::FlatFile::BufferedInputStream, obj)
31
+ assert_equal(TestDataFastaFormat01, obj.path)
32
+ end
33
+
34
+ def test_self_open_file
35
+ obj = Bio::FlatFile::BufferedInputStream.open_file(TestDataFastaFormat01)
36
+ assert_instance_of(Bio::FlatFile::BufferedInputStream, obj)
37
+ assert_equal(TestDataFastaFormat01, obj.path)
38
+ end
39
+
40
+ def test_self_open_file_with_block
41
+ obj2 = nil
42
+ Bio::FlatFile::BufferedInputStream.open_file(TestDataFastaFormat01) do |obj|
43
+ assert_instance_of(Bio::FlatFile::BufferedInputStream, obj)
44
+ assert_equal(TestDataFastaFormat01, obj.path)
45
+ obj2 = obj
46
+ end
47
+ assert_raise(IOError) { obj2.close }
48
+ end
49
+ end #class TestBufferedInputStreamClassMethod
50
+
51
+ class TestBufferedInputStream < Test::Unit::TestCase
52
+ def setup
53
+ io = File.open(TestDataFastaFormat01)
54
+ path = TestDataFastaFormat01
55
+ @obj = Bio::FlatFile::BufferedInputStream.new(io, path)
56
+ end
57
+
58
+ def test_to_io
59
+ assert_kind_of(IO, @obj.to_io)
60
+ end
61
+
62
+ def test_close
63
+ assert_nil(@obj.close)
64
+ end
65
+
66
+ def test_rewind
67
+ @obj.prefetch_gets
68
+ @obj.rewind
69
+ assert_equal('', @obj.prefetch_buffer)
70
+ end
71
+
72
+ def test_pos
73
+ @obj.gets
74
+ @obj.gets
75
+ @obj.prefetch_gets
76
+ assert_equal(117, @obj.pos) #the number depends on original data
77
+ end
78
+
79
+ def test_pos=()
80
+ str = @obj.gets
81
+ assert_equal(0, @obj.pos = 0)
82
+ end
83
+
84
+ def test_eof_false_first
85
+ assert_equal(false, @obj.eof?)
86
+ end
87
+
88
+ def test_eof_false_after_prefetch
89
+ while @obj.prefetch_gets; nil; end
90
+ assert_equal(false, @obj.eof?)
91
+ end
92
+
93
+ def test_eof_true
94
+ while @obj.gets; nil; end
95
+ assert_equal(true, @obj.eof?)
96
+ end
97
+
98
+ def test_gets
99
+ @obj.gets
100
+ @obj.gets
101
+ assert_equal("gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc", @obj.gets.chomp)
102
+ end
103
+
104
+ def test_gets_equal_prefetch_gets
105
+ @obj.prefetch_gets
106
+ str = @obj.prefetch_gets
107
+ @obj.prefetch_gets
108
+ @obj.gets
109
+ assert_equal(@obj.gets, str)
110
+ end
111
+
112
+ def test_gets_rs
113
+ rs = 'tggtg'
114
+ str = <<__END_OF_STR__
115
+ aggcactagaattgagcagtgaa
116
+ gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc
117
+ gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga
118
+ agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtg
119
+ __END_OF_STR__
120
+ @obj.gets(rs)
121
+ @obj.gets(rs)
122
+ assert_equal(str.chomp, @obj.gets(rs))
123
+ end
124
+
125
+ def test_gets_rs_equal_prefetch_gets
126
+ rs = 'tggtg'
127
+ @obj.prefetch_gets(rs)
128
+ str = @obj.prefetch_gets(rs)
129
+ @obj.prefetch_gets(rs)
130
+ @obj.gets(rs)
131
+ assert_equal(@obj.gets(rs), str)
132
+ end
133
+
134
+ def test_gets_rs_within_buffer
135
+ rs = 'tggtg'
136
+ a = []
137
+ 20.times {a.push @obj.gets }
138
+ @obj.ungets(a.join(''))
139
+
140
+ assert_equal(">At1g02580 mRNA (2291 bp) UTR's and CDS\naggcgagtggttaatggagaaggaaaaccatgaggacgatggtg", @obj.gets(rs))
141
+
142
+ assert_equal('ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtg',
143
+ @obj.gets(rs).split(/\n/)[-1])
144
+
145
+ assert_equal('aggcactagaattgagcagtgaa',
146
+ @obj.gets(rs).split(/\n/)[0])
147
+
148
+ assert_equal('aggcttct', @obj.gets(rs).split(/\n/)[0])
149
+
150
+ assert_equal('agacacc', @obj.gets(rs).split(/\n/)[0])
151
+ end
152
+
153
+ def test_gets_paragraph_mode
154
+ @obj.gets('')
155
+ @obj.gets('')
156
+ assert_equal('>At1g65300: mRNA 837bp (shortened at end)',
157
+ @obj.gets('').split(/\n/)[0])
158
+ end
159
+
160
+ def test_gets_paragraph_mode_equal_prefetch_gets
161
+ rs = ''
162
+ @obj.prefetch_gets(rs)
163
+ str = @obj.prefetch_gets(rs)
164
+ @obj.prefetch_gets(rs)
165
+ @obj.gets(rs)
166
+ assert_equal(@obj.gets(rs), str)
167
+ end
168
+
169
+ def test_gets_paragraph_mode_within_buffer
170
+ @obj.gets('')
171
+ a = []
172
+ 20.times {a.push @obj.gets }
173
+ @obj.ungets(a.join(''))
174
+
175
+ assert_equal('>At1g65300: mRNA 837bp',
176
+ @obj.gets('').split(/\n/)[0])
177
+
178
+ assert_equal('>At1g65300: mRNA 837bp (shortened at end)',
179
+ @obj.gets('').split(/\n/)[0])
180
+
181
+ assert_equal('>At1g65300: mRNA 837bp (shortened from start)',
182
+ @obj.gets('').split(/\n/)[0])
183
+ end
184
+
185
+ def test_ungets
186
+ @obj.gets
187
+ @obj.gets
188
+ str1 = @obj.gets
189
+ str2 = @obj.gets
190
+ assert_nil(@obj.ungets(str2))
191
+ assert_nil(@obj.ungets(str1))
192
+ assert_equal(str1, @obj.gets)
193
+ assert_equal(str2, @obj.gets)
194
+ end
195
+
196
+ def test_getc
197
+ assert_equal(?>, @obj.getc)
198
+ end
199
+
200
+ def test_getc_after_prefetch
201
+ @obj.prefetch_gets
202
+ assert_equal(?>, @obj.getc)
203
+ end
204
+
205
+ def test_ungetc
206
+ c = @obj.getc
207
+ assert_nil(@obj.ungetc(c))
208
+ assert_equal(c, @obj.getc)
209
+ end
210
+
211
+ def test_ungetc_after_prefetch
212
+ str = @obj.prefetch_gets
213
+ c = @obj.getc
214
+ assert_nil(@obj.ungetc(c))
215
+ assert_equal(str, @obj.gets)
216
+ end
217
+
218
+ def test_prefetch_buffer
219
+ str = @obj.prefetch_gets
220
+ str += @obj.prefetch_gets
221
+ assert_equal(str, @obj.prefetch_buffer)
222
+ end
223
+
224
+ def test_prefetch_gets
225
+ @obj.prefetch_gets
226
+ @obj.prefetch_gets
227
+ @obj.gets
228
+ str = @obj.prefetch_gets
229
+ @obj.gets
230
+ assert_equal(str, @obj.gets)
231
+ end
232
+
233
+ def test_prefetch_gets_with_arg
234
+ # test @obj.gets
235
+ str = @obj.prefetch_gets("\n>")
236
+ assert_equal(str, @obj.gets("\n>"))
237
+ # test using IO object
238
+ io = @obj.to_io
239
+ io.rewind
240
+ assert_equal(str, io.gets("\n>"))
241
+ end
242
+
243
+ def test_skip_spaces
244
+ @obj.gets('CDS')
245
+ assert_nil(@obj.skip_spaces)
246
+ assert_equal(?a, @obj.getc)
247
+ end
248
+
249
+ end #class TestBufferedInputStream
250
+ end #module Bio::TestFlatFile
251
+
@@ -0,0 +1,369 @@
1
+ #
2
+ # = test/unit/bio/io/flatfile/test_splitter.rb - unit test for Bio::FlatFile::Splitter
3
+ #
4
+ # Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
5
+ #
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+
11
+ require 'pathname'
12
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s
13
+ $:.unshift(libpath) unless $:.include?(libpath)
14
+
15
+ require 'test/unit'
16
+ require 'stringio'
17
+
18
+ require 'bio'
19
+ require 'bio/io/flatfile/splitter'
20
+ require 'bio/io/flatfile/buffer'
21
+
22
+ module Bio::TestFlatFileSplitter
23
+
24
+ class TestDataClass
25
+ # Fake fasta format
26
+ DELIMITER = RS = "\n>"
27
+ DELIMITER_OVERRUN = 1 # '>'.length
28
+ FLATFILE_HEADER = '>'
29
+ def initialize(str)
30
+ @str = str
31
+ end
32
+ attr_reader :str
33
+ protected :str
34
+
35
+ def ==(other)
36
+ self.str == other.str
37
+ end
38
+ end #class TestDataClass
39
+
40
+ TestData01 = <<__END_OF_TESTDATA__
41
+
42
+ # This is test
43
+
44
+ >test1
45
+ aaagggtttcccaaagggtttccc
46
+ >testC
47
+ cccccccccccccccccccccccc
48
+ >testG
49
+ gggggggggggggggggggggggg
50
+ >test2
51
+ tttttttttttttttttttttttt
52
+ tttttttttttttttttttttttt
53
+
54
+ >test3
55
+ atatatatatatatatatatatat
56
+ __END_OF_TESTDATA__
57
+ TestData01.chomp!
58
+ # workaround for Windows
59
+ TestData01.gsub!(/\r\n/, "\n")
60
+
61
+ class TestTemplate < Test::Unit::TestCase
62
+ def setup
63
+ @stream = Bio::FlatFile::BufferedInputStream.new(StringIO.new(TestData01), 'TestData01')
64
+ @obj = Bio::FlatFile::Splitter::Template.new(TestDataClass, @stream)
65
+ end
66
+
67
+ def test_skip_leader
68
+ assert_raise(NotImplementedError) { @obj.skip_leader }
69
+ end
70
+
71
+ def test_get_entry
72
+ assert_raise(NotImplementedError) { @obj.get_entry }
73
+ end
74
+
75
+ def test_entry
76
+ assert_nothing_raised {
77
+ @obj.instance_eval { self.entry = 'test' } }
78
+ assert_equal('test', @obj.entry)
79
+ end
80
+
81
+ def test_entry_pos_flag
82
+ # default is nil or false
83
+ assert(!@obj.entry_pos_flag)
84
+
85
+ # set a value
86
+ assert_equal(true, @obj.entry_pos_flag = true)
87
+ assert_equal(true, @obj.entry_pos_flag)
88
+ end
89
+
90
+ def test_entry_start_pos
91
+ assert_nothing_raised {
92
+ @obj.instance_eval { self.entry_start_pos = 123 } }
93
+ assert_equal(123, @obj.entry_start_pos)
94
+ end
95
+
96
+ def test_entry_ended_pos
97
+ assert_nothing_raised {
98
+ @obj.instance_eval { self.entry_ended_pos = 456 } }
99
+ assert_equal(456, @obj.entry_ended_pos)
100
+ end
101
+
102
+ def test_stream
103
+ assert_equal(@stream, @obj.instance_eval { stream })
104
+ end
105
+
106
+ def test_dbclass
107
+ assert_equal(TestDataClass, @obj.instance_eval { dbclass })
108
+ end
109
+
110
+ def test_stream_pos
111
+ assert_nil(@obj.instance_eval { stream_pos })
112
+ @obj.entry_pos_flag = true
113
+ assert_equal(0, @obj.instance_eval { stream_pos })
114
+ @stream.gets
115
+ assert_not_equal(0, @obj.instance_eval { stream.pos })
116
+ end
117
+
118
+ def test_rewind
119
+ @obj.entry_pos_flag = true
120
+ @stream.gets
121
+ assert_not_equal(0, @stream.pos)
122
+ @obj.rewind
123
+ assert_equal(0, @stream.pos)
124
+ end
125
+
126
+ end #class TestTemplate
127
+
128
+ class TestDefault < TestTemplate # < Test::Unit::TestCase
129
+ def setup
130
+ @stream = Bio::FlatFile::BufferedInputStream.new(StringIO.new(TestData01), 'TestData01')
131
+ @obj = Bio::FlatFile::Splitter::Default.new(TestDataClass, @stream)
132
+ end
133
+
134
+ def test_delimiter
135
+ assert_equal("\n>", @obj.delimiter)
136
+ end
137
+
138
+ def test_header
139
+ assert_equal('>', @obj.header)
140
+ end
141
+
142
+ def test_delimiter_overrun
143
+ assert_equal(1, @obj.delimiter_overrun)
144
+ end
145
+
146
+ def test_skip_leader
147
+ assert_nothing_raised { @obj.skip_leader }
148
+ assert(@stream.pos > 0)
149
+ assert_equal('>test1', @stream.gets.chomp)
150
+ end
151
+
152
+ def test_skip_leader_without_header
153
+ @obj.header = nil
154
+ assert_nothing_raised { @obj.skip_leader }
155
+ assert(@stream.pos > 0)
156
+ assert_equal('# This is test', @stream.gets.chomp)
157
+ end
158
+
159
+ def test_get_entry
160
+ str0 = "\n # This is test\n\n"
161
+ str1 = ">test1\naaagggtttcccaaagggtttccc\n"
162
+ str2 = ">testC\ncccccccccccccccccccccccc\n"
163
+ str3 = ">testG\ngggggggggggggggggggggggg\n"
164
+ str4 = ">test2\ntttttttttttttttttttttttt\ntttttttttttttttttttttttt\n\n"
165
+ str5 = ">test3\natatatatatatatatatatatat"
166
+ assert_equal(str0, @obj.get_entry)
167
+ assert_equal(str1, @obj.get_entry)
168
+ assert_equal(str2, @obj.get_entry)
169
+ assert_equal(str3, @obj.get_entry)
170
+ assert_equal(str4, @obj.get_entry)
171
+ assert_equal(str5, @obj.get_entry)
172
+ assert(@stream.eof?)
173
+ end
174
+
175
+ def test_get_parsed_entry
176
+ str1 = ">test1\naaagggtttcccaaagggtttccc\n"
177
+ str2 = ">testC\ncccccccccccccccccccccccc\n"
178
+ str3 = ">testG\ngggggggggggggggggggggggg\n"
179
+ str4 = ">test2\ntttttttttttttttttttttttt\ntttttttttttttttttttttttt\n\n"
180
+ str5 = ">test3\natatatatatatatatatatatat"
181
+
182
+ @obj.skip_leader
183
+ assert_equal(TestDataClass.new(str1), @obj.get_parsed_entry)
184
+ assert_equal(TestDataClass.new(str2), @obj.get_parsed_entry)
185
+ assert_equal(TestDataClass.new(str3), @obj.get_parsed_entry)
186
+ assert_equal(TestDataClass.new(str4), @obj.get_parsed_entry)
187
+ assert_equal(TestDataClass.new(str5), @obj.get_parsed_entry)
188
+ assert(@stream.eof?)
189
+ end
190
+
191
+ def test_entry
192
+ str1 = ">test1\naaagggtttcccaaagggtttccc\n"
193
+ @obj.skip_leader
194
+ @obj.get_entry
195
+ assert_equal(str1, @obj.entry)
196
+ end
197
+
198
+ def test_entry_start_pos_default_nil
199
+ @obj.skip_leader
200
+ @obj.get_entry
201
+ assert_nil(@obj.entry_start_pos)
202
+ end
203
+
204
+ def test_entry_ended_pos_default_nil
205
+ @obj.skip_leader
206
+ @obj.get_entry
207
+ assert_nil(@obj.entry_ended_pos)
208
+ end
209
+
210
+ def test_entry_start_pos
211
+ @obj.entry_pos_flag = true
212
+ @obj.skip_leader
213
+ @obj.get_entry
214
+ assert_equal(25, @obj.entry_start_pos)
215
+ end
216
+
217
+ def test_entry_ended_pos
218
+ @obj.entry_pos_flag = true
219
+ @obj.skip_leader
220
+ @obj.get_entry
221
+ assert_equal(57, @obj.entry_ended_pos)
222
+ end
223
+
224
+ end #class TestDefault
225
+
226
+ class TestLineOriented < TestTemplate # < Test::Unit::TestCase
227
+ testdata02 = <<__END_OF_DATA__
228
+ #this is header line 1
229
+ #this is header line 2
230
+ test01 1 2 3
231
+ test02 4 5 6
232
+ test02 7 8 9
233
+ test02 10 11 12
234
+ test03 13 14 15
235
+
236
+ test03 16 17 18
237
+ __END_OF_DATA__
238
+ TestData02 = testdata02.gsub(/\r\n/, "\n")
239
+
240
+ class TestData02Class
241
+ FLATFILE_SPLITTER = Bio::FlatFile::Splitter::LineOriented
242
+
243
+ LineData = Struct.new(:name, :data)
244
+
245
+ def initialize(str = '')
246
+ @headers = []
247
+ @lines = []
248
+ flag_header = true
249
+ str.each_line do |line|
250
+ if flag_header then
251
+ flag_header = add_header_line(line)
252
+ end
253
+ unless flag_header then
254
+ r = add_line(line)
255
+ end
256
+ end
257
+ end
258
+
259
+ attr_reader :headers
260
+ attr_reader :lines
261
+
262
+ def ==(other)
263
+ self.headers == other.headers and
264
+ self.lines == other.lines ? true : false
265
+ end
266
+
267
+ def add_header_line(line)
268
+ #puts "add_header_line: #{@headers.inspect} #{line.inspect}"
269
+ case line
270
+ when /\A\#/
271
+ @headers.push line
272
+ return self
273
+ else
274
+ return false
275
+ end
276
+ end
277
+
278
+ def add_line(line)
279
+ #puts "add_line: #{@lines.inspect} #{line.inspect}"
280
+ if /\A\s*\z/ =~ line then
281
+ return @lines.empty? ? self : false
282
+ end
283
+ parsed = parse_line(line)
284
+ if @lines.empty? or @lines.first.name == parsed.name then
285
+ @lines.push parsed
286
+ return self
287
+ else
288
+ return false
289
+ end
290
+ end
291
+
292
+ def parse_line(line)
293
+ LineData.new(*(line.chomp.split(/\s+/, 2)))
294
+ end
295
+ private :parse_line
296
+
297
+ end #class TestData02Class
298
+
299
+ def setup
300
+ @stream = Bio::FlatFile::BufferedInputStream.new(StringIO.new(TestData02), 'TestData02')
301
+ @obj = Bio::FlatFile::Splitter::LineOriented.new(TestData02Class, @stream)
302
+ @raw_entries =
303
+ [
304
+ "#this is header line 1\n#this is header line 2\ntest01 1 2 3\n",
305
+ "test02 4 5 6\ntest02 7 8 9\ntest02 10 11 12\n",
306
+ "test03 13 14 15\n",
307
+ "\ntest03 16 17 18\n",
308
+ ]
309
+ @entries = @raw_entries.collect do |str|
310
+ TestData02Class.new(str)
311
+ end
312
+ end
313
+
314
+ def test_get_parsed_entry
315
+ @entries.each do |ent|
316
+ assert_equal(ent, @obj.get_parsed_entry)
317
+ end
318
+ assert_nil(@obj.get_parsed_entry)
319
+ end
320
+
321
+ def test_get_entry
322
+ @raw_entries.each do |raw|
323
+ assert_equal(raw, @obj.get_entry)
324
+ end
325
+ assert_nil(@obj.get_entry)
326
+ end
327
+
328
+ def test_rewind
329
+ while @obj.get_parsed_entry; end
330
+ assert_equal(0, @obj.rewind)
331
+ end
332
+
333
+ def test_flag_to_fetch_header
334
+ assert(@obj.instance_eval { flag_to_fetch_header })
335
+ @obj.get_parsed_entry
336
+ assert(!@obj.instance_eval { flag_to_fetch_header })
337
+ @obj.rewind
338
+ assert(@obj.instance_eval { flag_to_fetch_header })
339
+ end
340
+
341
+ def test_skip_leader
342
+ assert_nil(@obj.skip_leader)
343
+ end
344
+
345
+ def test_dbclass
346
+ assert_equal(TestData02Class, @obj.instance_eval { dbclass })
347
+ end
348
+
349
+ def test_entry_start_pos
350
+ @obj.entry_pos_flag = true
351
+ @obj.skip_leader
352
+ @obj.get_entry
353
+ assert_equal(0, @obj.entry_start_pos)
354
+ @obj.get_entry
355
+ assert_equal(59, @obj.entry_start_pos)
356
+ end
357
+
358
+ def test_entry_ended_pos
359
+ @obj.entry_pos_flag = true
360
+ @obj.skip_leader
361
+ @obj.get_entry
362
+ assert_equal(59, @obj.entry_ended_pos)
363
+ @obj.get_entry
364
+ assert_equal(101, @obj.entry_ended_pos)
365
+ end
366
+
367
+ end #class TestLineOriented
368
+
369
+ end