bio 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (303) hide show
  1. data/ChangeLog +2105 -3728
  2. data/KNOWN_ISSUES.rdoc +35 -3
  3. data/README.rdoc +8 -2
  4. data/RELEASE_NOTES.rdoc +166 -0
  5. data/bin/bioruby +4 -1
  6. data/bioruby.gemspec +146 -1
  7. data/bioruby.gemspec.erb +3 -1
  8. data/doc/ChangeLog-before-1.3.1 +3961 -0
  9. data/doc/Tutorial.rd +154 -22
  10. data/doc/Tutorial.rd.html +125 -68
  11. data/lib/bio.rb +21 -6
  12. data/lib/bio/appl/bl2seq/report.rb +11 -202
  13. data/lib/bio/appl/blast/format0.rb +0 -193
  14. data/lib/bio/appl/blast/report.rb +2 -147
  15. data/lib/bio/appl/blast/wublast.rb +0 -208
  16. data/lib/bio/appl/fasta.rb +4 -19
  17. data/lib/bio/appl/fasta/format10.rb +0 -14
  18. data/lib/bio/appl/genscan/report.rb +0 -176
  19. data/lib/bio/appl/hmmer.rb +1 -15
  20. data/lib/bio/appl/hmmer/report.rb +0 -100
  21. data/lib/bio/appl/meme/mast.rb +156 -0
  22. data/lib/bio/appl/meme/mast/report.rb +91 -0
  23. data/lib/bio/appl/meme/motif.rb +48 -0
  24. data/lib/bio/appl/psort.rb +0 -111
  25. data/lib/bio/appl/psort/report.rb +1 -45
  26. data/lib/bio/appl/pts1.rb +2 -4
  27. data/lib/bio/appl/sosui/report.rb +5 -54
  28. data/lib/bio/appl/targetp/report.rb +1 -104
  29. data/lib/bio/appl/tmhmm/report.rb +0 -36
  30. data/lib/bio/command.rb +94 -10
  31. data/lib/bio/data/aa.rb +1 -77
  32. data/lib/bio/data/codontable.rb +1 -95
  33. data/lib/bio/data/na.rb +1 -26
  34. data/lib/bio/db/aaindex.rb +1 -38
  35. data/lib/bio/db/fasta.rb +1 -134
  36. data/lib/bio/db/fasta/format_qual.rb +204 -0
  37. data/lib/bio/db/fasta/qual.rb +102 -0
  38. data/lib/bio/db/fastq.rb +645 -0
  39. data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
  40. data/lib/bio/db/fastq/format_fastq.rb +175 -0
  41. data/lib/bio/db/genbank/genbank.rb +1 -86
  42. data/lib/bio/db/gff.rb +0 -17
  43. data/lib/bio/db/go.rb +4 -72
  44. data/lib/bio/db/kegg/common.rb +112 -0
  45. data/lib/bio/db/kegg/compound.rb +29 -20
  46. data/lib/bio/db/kegg/drug.rb +74 -34
  47. data/lib/bio/db/kegg/enzyme.rb +26 -5
  48. data/lib/bio/db/kegg/genes.rb +128 -15
  49. data/lib/bio/db/kegg/genome.rb +3 -41
  50. data/lib/bio/db/kegg/glycan.rb +19 -24
  51. data/lib/bio/db/kegg/orthology.rb +16 -56
  52. data/lib/bio/db/kegg/reaction.rb +81 -28
  53. data/lib/bio/db/kegg/taxonomy.rb +1 -52
  54. data/lib/bio/db/litdb.rb +1 -16
  55. data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
  56. data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
  57. data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
  58. data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
  59. data/lib/bio/db/prosite.rb +2 -95
  60. data/lib/bio/db/rebase.rb +5 -6
  61. data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
  62. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
  63. data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
  64. data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
  65. data/lib/bio/io/das.rb +0 -44
  66. data/lib/bio/io/ddbjxml.rb +1 -181
  67. data/lib/bio/io/flatfile.rb +1 -7
  68. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  69. data/lib/bio/io/keggapi.rb +0 -442
  70. data/lib/bio/io/ncbirest.rb +130 -132
  71. data/lib/bio/io/ncbisoap.rb +2 -1
  72. data/lib/bio/io/pubmed.rb +0 -88
  73. data/lib/bio/location.rb +0 -73
  74. data/lib/bio/pathway.rb +0 -171
  75. data/lib/bio/sequence.rb +18 -1
  76. data/lib/bio/sequence/adapter.rb +3 -0
  77. data/lib/bio/sequence/format.rb +16 -0
  78. data/lib/bio/sequence/quality_score.rb +205 -0
  79. data/lib/bio/tree.rb +70 -5
  80. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
  81. data/lib/bio/util/sirna.rb +1 -23
  82. data/lib/bio/version.rb +1 -1
  83. data/sample/demo_aaindex.rb +67 -0
  84. data/sample/demo_aminoacid.rb +101 -0
  85. data/sample/demo_bl2seq_report.rb +220 -0
  86. data/sample/demo_blast_report.rb +285 -0
  87. data/sample/demo_codontable.rb +119 -0
  88. data/sample/demo_das.rb +105 -0
  89. data/sample/demo_ddbjxml.rb +212 -0
  90. data/sample/demo_fasta_remote.rb +51 -0
  91. data/sample/demo_fastaformat.rb +105 -0
  92. data/sample/demo_genbank.rb +132 -0
  93. data/sample/demo_genscan_report.rb +202 -0
  94. data/sample/demo_gff1.rb +49 -0
  95. data/sample/demo_go.rb +98 -0
  96. data/sample/demo_hmmer_report.rb +149 -0
  97. data/sample/demo_kegg_compound.rb +57 -0
  98. data/sample/demo_kegg_drug.rb +65 -0
  99. data/sample/demo_kegg_genome.rb +74 -0
  100. data/sample/demo_kegg_glycan.rb +72 -0
  101. data/sample/demo_kegg_orthology.rb +62 -0
  102. data/sample/demo_kegg_reaction.rb +66 -0
  103. data/sample/demo_kegg_taxonomy.rb +92 -0
  104. data/sample/demo_keggapi.rb +502 -0
  105. data/sample/demo_litdb.rb +42 -0
  106. data/sample/demo_locations.rb +99 -0
  107. data/sample/demo_ncbi_rest.rb +130 -0
  108. data/sample/demo_nucleicacid.rb +49 -0
  109. data/sample/demo_pathway.rb +196 -0
  110. data/sample/demo_prosite.rb +120 -0
  111. data/sample/demo_psort.rb +138 -0
  112. data/sample/demo_psort_report.rb +70 -0
  113. data/sample/demo_pubmed.rb +118 -0
  114. data/sample/demo_sirna.rb +63 -0
  115. data/sample/demo_sosui_report.rb +89 -0
  116. data/sample/demo_targetp_report.rb +135 -0
  117. data/sample/demo_tmhmm_report.rb +68 -0
  118. data/sample/pmfetch.rb +13 -4
  119. data/sample/pmsearch.rb +15 -4
  120. data/sample/test_phyloxml_big.rb +205 -0
  121. data/test/bioruby_test_helper.rb +61 -0
  122. data/test/data/KEGG/1.1.1.1.enzyme +935 -0
  123. data/test/data/KEGG/C00025.compound +102 -0
  124. data/test/data/KEGG/D00063.drug +104 -0
  125. data/test/data/KEGG/G00024.glycan +47 -0
  126. data/test/data/KEGG/G01366.glycan +18 -0
  127. data/test/data/KEGG/K02338.orthology +902 -0
  128. data/test/data/KEGG/R00006.reaction +14 -0
  129. data/test/data/fastq/README.txt +109 -0
  130. data/test/data/fastq/error_diff_ids.fastq +20 -0
  131. data/test/data/fastq/error_double_qual.fastq +22 -0
  132. data/test/data/fastq/error_double_seq.fastq +22 -0
  133. data/test/data/fastq/error_long_qual.fastq +20 -0
  134. data/test/data/fastq/error_no_qual.fastq +20 -0
  135. data/test/data/fastq/error_qual_del.fastq +20 -0
  136. data/test/data/fastq/error_qual_escape.fastq +20 -0
  137. data/test/data/fastq/error_qual_null.fastq +0 -0
  138. data/test/data/fastq/error_qual_space.fastq +21 -0
  139. data/test/data/fastq/error_qual_tab.fastq +21 -0
  140. data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
  141. data/test/data/fastq/error_qual_vtab.fastq +20 -0
  142. data/test/data/fastq/error_short_qual.fastq +20 -0
  143. data/test/data/fastq/error_spaces.fastq +20 -0
  144. data/test/data/fastq/error_tabs.fastq +21 -0
  145. data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
  146. data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
  147. data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
  148. data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
  149. data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
  150. data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
  151. data/test/data/fastq/error_trunc_in_title.fastq +17 -0
  152. data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
  153. data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
  154. data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
  155. data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
  156. data/test/data/fastq/longreads_as_illumina.fastq +40 -0
  157. data/test/data/fastq/longreads_as_sanger.fastq +40 -0
  158. data/test/data/fastq/longreads_as_solexa.fastq +40 -0
  159. data/test/data/fastq/longreads_original_sanger.fastq +120 -0
  160. data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
  161. data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
  162. data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
  163. data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
  164. data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
  165. data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
  166. data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
  167. data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
  168. data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
  169. data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
  170. data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
  171. data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
  172. data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
  173. data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
  174. data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
  175. data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
  176. data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
  177. data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
  178. data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
  179. data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
  180. data/test/data/meme/db +0 -0
  181. data/test/data/meme/mast +0 -0
  182. data/test/data/meme/mast.out +13 -0
  183. data/test/data/meme/meme.out +3 -0
  184. data/test/data/phyloxml/apaf.xml +666 -0
  185. data/test/data/phyloxml/bcl_2.xml +2097 -0
  186. data/test/data/phyloxml/made_up.xml +144 -0
  187. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
  188. data/test/data/phyloxml/phyloxml_examples.xml +415 -0
  189. data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
  190. data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
  191. data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
  192. data/test/functional/bio/appl/test_pts1.rb +7 -5
  193. data/test/functional/bio/io/test_ensembl.rb +4 -3
  194. data/test/functional/bio/io/test_pubmed.rb +9 -3
  195. data/test/functional/bio/io/test_soapwsdl.rb +5 -4
  196. data/test/functional/bio/io/test_togows.rb +5 -4
  197. data/test/functional/bio/sequence/test_output_embl.rb +6 -4
  198. data/test/functional/bio/test_command.rb +54 -5
  199. data/test/runner.rb +5 -3
  200. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
  201. data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
  202. data/test/unit/bio/appl/blast/test_report.rb +5 -4
  203. data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
  204. data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
  205. data/test/unit/bio/appl/genscan/test_report.rb +8 -9
  206. data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
  207. data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
  208. data/test/unit/bio/appl/mafft/test_report.rb +6 -5
  209. data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
  210. data/test/unit/bio/appl/meme/test_mast.rb +103 -0
  211. data/test/unit/bio/appl/meme/test_motif.rb +38 -0
  212. data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
  213. data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
  214. data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
  215. data/test/unit/bio/appl/sim4/test_report.rb +5 -4
  216. data/test/unit/bio/appl/sosui/test_report.rb +6 -5
  217. data/test/unit/bio/appl/targetp/test_report.rb +5 -3
  218. data/test/unit/bio/appl/test_blast.rb +5 -4
  219. data/test/unit/bio/appl/test_fasta.rb +4 -2
  220. data/test/unit/bio/appl/test_pts1.rb +4 -2
  221. data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
  222. data/test/unit/bio/data/test_aa.rb +5 -3
  223. data/test/unit/bio/data/test_codontable.rb +5 -4
  224. data/test/unit/bio/data/test_na.rb +5 -3
  225. data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
  226. data/test/unit/bio/db/embl/test_common.rb +4 -2
  227. data/test/unit/bio/db/embl/test_embl.rb +6 -6
  228. data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
  229. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
  230. data/test/unit/bio/db/embl/test_sptr.rb +6 -8
  231. data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
  232. data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
  233. data/test/unit/bio/db/kegg/test_compound.rb +146 -0
  234. data/test/unit/bio/db/kegg/test_drug.rb +194 -0
  235. data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
  236. data/test/unit/bio/db/kegg/test_genes.rb +32 -4
  237. data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
  238. data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
  239. data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
  240. data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
  241. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
  242. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
  243. data/test/unit/bio/db/test_aaindex.rb +6 -6
  244. data/test/unit/bio/db/test_fasta.rb +5 -46
  245. data/test/unit/bio/db/test_fastq.rb +829 -0
  246. data/test/unit/bio/db/test_gff.rb +4 -2
  247. data/test/unit/bio/db/test_lasergene.rb +7 -5
  248. data/test/unit/bio/db/test_medline.rb +4 -2
  249. data/test/unit/bio/db/test_newick.rb +6 -6
  250. data/test/unit/bio/db/test_nexus.rb +4 -2
  251. data/test/unit/bio/db/test_phyloxml.rb +769 -0
  252. data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
  253. data/test/unit/bio/db/test_prosite.rb +6 -5
  254. data/test/unit/bio/db/test_qual.rb +63 -0
  255. data/test/unit/bio/db/test_rebase.rb +5 -3
  256. data/test/unit/bio/db/test_soft.rb +7 -6
  257. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
  258. data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
  259. data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
  260. data/test/unit/bio/io/test_ddbjxml.rb +4 -3
  261. data/test/unit/bio/io/test_ensembl.rb +5 -3
  262. data/test/unit/bio/io/test_fastacmd.rb +4 -3
  263. data/test/unit/bio/io/test_flatfile.rb +6 -5
  264. data/test/unit/bio/io/test_soapwsdl.rb +4 -3
  265. data/test/unit/bio/io/test_togows.rb +4 -2
  266. data/test/unit/bio/sequence/test_aa.rb +5 -3
  267. data/test/unit/bio/sequence/test_common.rb +4 -2
  268. data/test/unit/bio/sequence/test_compat.rb +4 -2
  269. data/test/unit/bio/sequence/test_dblink.rb +5 -3
  270. data/test/unit/bio/sequence/test_na.rb +4 -2
  271. data/test/unit/bio/sequence/test_quality_score.rb +330 -0
  272. data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
  273. data/test/unit/bio/test_alignment.rb +5 -3
  274. data/test/unit/bio/test_command.rb +4 -3
  275. data/test/unit/bio/test_db.rb +5 -3
  276. data/test/unit/bio/test_feature.rb +4 -2
  277. data/test/unit/bio/test_location.rb +4 -2
  278. data/test/unit/bio/test_map.rb +5 -3
  279. data/test/unit/bio/test_pathway.rb +4 -2
  280. data/test/unit/bio/test_reference.rb +4 -2
  281. data/test/unit/bio/test_sequence.rb +5 -3
  282. data/test/unit/bio/test_shell.rb +5 -3
  283. data/test/unit/bio/test_tree.rb +6 -6
  284. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
  285. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
  286. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
  287. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
  288. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
  289. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
  290. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
  291. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
  292. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
  293. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
  294. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
  295. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
  296. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
  297. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
  298. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
  299. data/test/unit/bio/util/test_color_scheme.rb +5 -3
  300. data/test/unit/bio/util/test_contingency_table.rb +5 -3
  301. data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
  302. data/test/unit/bio/util/test_sirna.rb +6 -4
  303. metadata +147 -2
@@ -0,0 +1,40 @@
1
+ #
2
+ # = bio/db/fastq/fastq_to_biosequence.rb - Bio::Fastq to Bio::Sequence adapter module
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Naohisa Goto <ng@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+
9
+ require 'bio/sequence'
10
+ require 'bio/sequence/adapter'
11
+
12
+ # Internal use only. Normal users should not use this module.
13
+ #
14
+ # Bio::Fastq to Bio::Sequence adapter module.
15
+ # It is internally used in Bio::Fastq#to_biosequence.
16
+ #
17
+ module Bio::Sequence::Adapter::Fastq
18
+
19
+ extend Bio::Sequence::Adapter
20
+
21
+ private
22
+
23
+ def_biosequence_adapter :seq
24
+
25
+ def_biosequence_adapter :entry_id
26
+
27
+ # primary accession
28
+ def_biosequence_adapter :primary_accession do |orig|
29
+ orig.entry_id
30
+ end
31
+
32
+ def_biosequence_adapter :definition
33
+
34
+ def_biosequence_adapter :quality_scores
35
+
36
+ def_biosequence_adapter :quality_score_type
37
+
38
+ def_biosequence_adapter :error_probabilities
39
+
40
+ end #module Bio::Sequence::Adapter::Fastq
@@ -0,0 +1,175 @@
1
+ #
2
+ # = bio/db/fasta/format_fastq.rb - FASTQ format generater
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Naohisa Goto <ng@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+
9
+ require 'bio/db/fastq'
10
+ require 'bio/sequence/format'
11
+
12
+ module Bio::Sequence::Format::Formatter
13
+
14
+ # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
15
+ #
16
+ # FASTQ format output class for Bio::Sequence.
17
+ #
18
+ # The default FASTQ format is fastq-sanger.
19
+ class Fastq < Bio::Sequence::Format::FormatterBase
20
+
21
+ # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
22
+ #
23
+ # Creates a new Fasta format generater object from the sequence.
24
+ #
25
+ # ---
26
+ # *Arguments*:
27
+ # * _sequence_: Bio::Sequence object
28
+ # * (optional) :repeat_title => (true or false) if true, repeating title in the "+" line; if not true, "+" only (default false)
29
+ # * (optional) :width => _width_: (Fixnum) width to wrap sequence and quality lines; nil to prevent wrapping (default 70)
30
+ # * (optional) :title => _title_: (String) completely replaces title line with the _title_ (default nil)
31
+ # * (optional) :default_score => _score_: (Integer) default score for bases that have no valid quality scores or error probabilities; false or nil means the lowest score, true means the highest score (default nil)
32
+ def initialize; end if false # dummy for RDoc
33
+
34
+ # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
35
+ #
36
+ # Output the FASTQ format string of the sequence.
37
+ #
38
+ # Currently, this method is used in Bio::Sequence#output like so,
39
+ #
40
+ # s = Bio::Sequence.new('atgc')
41
+ # puts s.output(:fastq_sanger)
42
+ # ---
43
+ # *Returns*:: String object
44
+ def output
45
+ title = @options[:title]
46
+ width = @options.has_key?(:width) ? @options[:width] : 70
47
+ seq = @sequence.seq.to_s
48
+ entry_id = @sequence.entry_id ||
49
+ "#{@sequence.primary_accession}.#{@sequence.sequence_version}"
50
+ definition = @sequence.definition
51
+ unless title then
52
+ title = definition.to_s
53
+ unless title[0, entry_id.length] == entry_id and
54
+ /\s/ =~ title[entry_id.length, 1].to_s then
55
+ title = "#{entry_id} #{title}"
56
+ end
57
+ end
58
+ title2 = @options[:repeat_title] ? title : ''
59
+ qstr = fastq_quality_string(seq, @options[:default_score])
60
+
61
+ "@#{title}\n" +
62
+ if width then
63
+ seq.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
64
+ else
65
+ seq + "\n"
66
+ end +
67
+ "+#{title2}\n" +
68
+ if width then
69
+ qstr.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
70
+ else
71
+ qstr + "\n"
72
+ end
73
+ end
74
+
75
+ private
76
+ def fastq_format_data
77
+ Bio::Fastq::FormatData::FASTQ_SANGER.instance
78
+ end
79
+
80
+ def fastq_quality_string(seq, default_score)
81
+ sc = fastq_quality_scores(seq)
82
+ if sc.size < seq.length then
83
+ if default_score == true then
84
+ # when true, the highest score
85
+ default_score = fastq_format_data.score_range.end
86
+ else
87
+ # when false or nil, the lowest score
88
+ default_score ||= fastq_format_data.score_range.begin
89
+ end
90
+ sc = sc + ([ default_score ] * (seq.length - sc.size))
91
+ end
92
+ fastq_format_data.scores2str(sc)
93
+ end
94
+
95
+ def fastq_quality_scores(seq)
96
+ return [] if seq.length <= 0
97
+ fmt = fastq_format_data
98
+ # checks quality_scores
99
+ qsc = @sequence.quality_scores
100
+ qsc_type = @sequence.quality_score_type
101
+ if qsc and qsc_type and
102
+ qsc_type == fmt.quality_score_type and
103
+ qsc.size >= seq.length then
104
+ return qsc
105
+ end
106
+
107
+ # checks error_probabilities
108
+ ep = @sequence.error_probabilities
109
+ if ep and ep.size >= seq.length then
110
+ return fmt.p2q(ep[0, seq.length])
111
+ end
112
+
113
+ # If quality score type of the sequence is nil, regarded as :phred.
114
+ qsc_type ||= :phred
115
+
116
+ # checks if scores can be converted
117
+ if qsc and qsc.size >= seq.length then
118
+ case [ qsc_type, fmt.quality_score_type ]
119
+ when [ :phred, :solexa ]
120
+ return fmt.convert_scores_from_phred_to_solexa(qsc[0, seq.length])
121
+ when [ :solexa, :phred ]
122
+ return fmt.convert_scores_from_solexa_to_phred(qsc[0, seq.length])
123
+ end
124
+ end
125
+
126
+ # checks quality scores type
127
+ case qsc_type
128
+ when :phred, :solexa
129
+ #does nothing
130
+ else
131
+ qsc_type = nil
132
+ qsc = nil
133
+ end
134
+
135
+ # collects piece of information
136
+ qsc_cov = qsc ? qsc.size.quo(seq.length) : 0
137
+ ep_cov = ep ? ep.size.quo(seq.length) : 0
138
+ if qsc_cov > ep_cov then
139
+ case [ qsc_type, fmt.quality_score_type ]
140
+ when [ :phred, :phred ], [ :solexa, :solexa ]
141
+ return qsc
142
+ when [ :phred, :solexa ]
143
+ return fmt.convert_scores_from_phred_to_solexa(qsc)
144
+ when [ :solexa, :phred ]
145
+ return fmt.convert_scores_from_solexa_to_phred(qsc)
146
+ end
147
+ elsif ep_cov > qsc_cov then
148
+ return fmt.p2q(ep)
149
+ end
150
+
151
+ # if no information, returns empty array
152
+ return []
153
+ end
154
+ end #class Fastq
155
+
156
+ # class Fastq_sanger is the same as the Fastq class.
157
+ Fastq_sanger = Fastq
158
+
159
+ class Fastq_solexa < Fastq
160
+ private
161
+ def fastq_format_data
162
+ Bio::Fastq::FormatData::FASTQ_SOLEXA.instance
163
+ end
164
+ end #class Fastq_solexa
165
+
166
+ class Fastq_illumina < Fastq
167
+ private
168
+ def fastq_format_data
169
+ Bio::Fastq::FormatData::FASTQ_ILLUMINA.instance
170
+ end
171
+ end #class Fastq_illumina
172
+
173
+ end #module Bio::Sequence::Format::Formatter
174
+
175
+
@@ -4,7 +4,7 @@
4
4
  # Copyright:: Copyright (C) 2000-2005 Toshiaki Katayama <k@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: genbank.rb,v 0.40.2.4 2008/06/17 15:56:18 ngoto Exp $
7
+ # $Id:$
8
8
  #
9
9
 
10
10
  require 'date'
@@ -163,88 +163,3 @@ class GenBank < NCBIDB
163
163
  end # GenBank
164
164
  end # Bio
165
165
 
166
-
167
-
168
- if __FILE__ == $0
169
-
170
- begin
171
- require 'pp'
172
- alias p pp
173
- rescue LoadError
174
- end
175
-
176
- puts "### GenBank"
177
- if ARGV.size > 0
178
- gb = Bio::GenBank.new(ARGF.read)
179
- else
180
- require 'bio/io/fetch'
181
- gb = Bio::GenBank.new(Bio::Fetch.query('gb', 'LPATOVGNS'))
182
- end
183
-
184
- puts "## LOCUS"
185
- puts "# GenBank.locus"
186
- p gb.locus
187
- puts "# GenBank.entry_id"
188
- p gb.entry_id
189
- puts "# GenBank.nalen"
190
- p gb.nalen
191
- puts "# GenBank.strand"
192
- p gb.strand
193
- puts "# GenBank.natype"
194
- p gb.natype
195
- puts "# GenBank.circular"
196
- p gb.circular
197
- puts "# GenBank.division"
198
- p gb.division
199
- puts "# GenBank.date"
200
- p gb.date
201
-
202
- puts "## DEFINITION"
203
- p gb.definition
204
-
205
- puts "## ACCESSION"
206
- p gb.accession
207
-
208
- puts "## VERSION"
209
- p gb.versions
210
- p gb.version
211
- p gb.gi
212
-
213
- puts "## NID"
214
- p gb.nid
215
-
216
- puts "## KEYWORDS"
217
- p gb.keywords
218
-
219
- puts "## SEGMENT"
220
- p gb.segment
221
-
222
- puts "## SOURCE"
223
- p gb.source
224
- p gb.common_name
225
- p gb.vernacular_name
226
- p gb.organism
227
- p gb.taxonomy
228
-
229
- puts "## REFERENCE"
230
- p gb.references
231
-
232
- puts "## COMMENT"
233
- p gb.comment
234
-
235
- puts "## FEATURES"
236
- p gb.features
237
-
238
- puts "## BASE COUNT"
239
- p gb.basecount
240
- p gb.basecount('a')
241
- p gb.basecount('A')
242
-
243
- puts "## ORIGIN"
244
- p gb.origin
245
- p gb.naseq
246
-
247
- end
248
-
249
-
250
-
@@ -1827,20 +1827,3 @@ module Bio
1827
1827
 
1828
1828
  end # module Bio
1829
1829
 
1830
-
1831
- if __FILE__ == $0
1832
- begin
1833
- require 'pp'
1834
- alias p pp
1835
- rescue LoadError
1836
- end
1837
-
1838
- this_gff = "SEQ1\tEMBL\tatg\t103\t105\t.\t+\t0\n"
1839
- this_gff << "SEQ1\tEMBL\texon\t103\t172\t.\t+\t0\n"
1840
- this_gff << "SEQ1\tEMBL\tsplice5\t172\t173\t.\t+\t.\n"
1841
- this_gff << "SEQ1\tnetgene\tsplice5\t172\t173\t0.94\t+\t.\n"
1842
- this_gff << "SEQ1\tgenie\tsp5-20\t163\t182\t2.3\t+\t.\n"
1843
- this_gff << "SEQ1\tgenie\tsp5-10\t168\t177\t2.1\t+\t.\n"
1844
- this_gff << "SEQ1\tgrail\tATG\t17\t19\t2.1\t-\t0\n"
1845
- p Bio::GFF.new(this_gff)
1846
- end
@@ -5,7 +5,7 @@
5
5
  # Mitsuteru C. Nakao <n@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: go.rb,v 1.11 2007/04/05 23:35:40 trevor Exp $
8
+ # $Id:$
9
9
  #
10
10
  # == Gene Ontology
11
11
  #
@@ -89,7 +89,7 @@ class GO
89
89
  stack = []
90
90
  adj_list = []
91
91
 
92
- str.each {|line|
92
+ str.each_line {|line|
93
93
  if /^!(.+?):\s+(\S.+)$/ =~ line # Parsing head lines
94
94
  tag = $1
95
95
  value = $2
@@ -198,13 +198,13 @@ class GO
198
198
  # Block is acceptable.
199
199
  def self.parser(str)
200
200
  if block_given?
201
- str.each(DELIMITER) {|line|
201
+ str.each_line(DELIMITER) {|line|
202
202
  next if /^!/ =~ line
203
203
  yield GeneAssociation.new(line)
204
204
  }
205
205
  else
206
206
  galist = []
207
- str.each(DELIMITER) {|line|
207
+ str.each_line(DELIMITER) {|line|
208
208
  next if /^!/ =~ line
209
209
  galist << GeneAssociation.new(line)
210
210
  }
@@ -411,71 +411,3 @@ end # module Bio
411
411
 
412
412
 
413
413
 
414
- if __FILE__ == $0
415
-
416
- require 'net/http'
417
-
418
- def wget(url)
419
- if /http:\/\/(.+?)\// =~ url
420
- host = $1
421
- path = url[(url.index(host) + host.size)..url.size]
422
- else
423
- raise ArgumentError, "Invalid URL\n#{url}"
424
- end
425
-
426
- result = Net::HTTP.new(host).get(path).body
427
- end
428
-
429
-
430
-
431
- go_c_url = 'http://www.geneontology.org/ontology/component.ontology'
432
- ga_url = 'http://www.geneontology.org/gene-associations/gene_association.sgd.gz'
433
- e2g_url = 'http://www.geneontology.org/external2go/spkw2go'
434
-
435
-
436
-
437
- puts "\n #==> Bio::GO::Ontology"
438
- p go_c_url
439
- component_ontology = wget(go_c_url)
440
- comp = Bio::GO::Ontology.new(component_ontology)
441
-
442
- [['0003673', '0005632'],
443
- ['0003673', '0005619'],
444
- ['0003673', '0004649']].each {|pair|
445
- puts
446
- p pair
447
- p [:pair, pair.map {|i| [comp.id2term[i], comp.goid2term(i)] }]
448
- puts "\n #==> comp.bfs_shortest_path(pair[0], pair[1])"
449
- p comp.bfs_shortest_path(pair[0], pair[1])
450
- }
451
-
452
-
453
- puts "\n #==> Bio::GO::External2go"
454
- p e2g_url
455
- spkw2go = Bio::GO::External2go.new(wget(e2g_url))
456
-
457
- puts "\n #==> spkw2go.db"
458
- p spkw2go.db
459
-
460
- puts "\n #==> spkw2go[1]"
461
- p spkw2go[1]
462
-
463
-
464
-
465
- require 'zlib'
466
- puts "\n #==> Bio::GO::GeenAssociation"
467
- p ga_url
468
- ga = Zlib::Inflate.inflate(wget(ga_url))
469
- ga = Bio::GO::GeneAssociation.parser(ga)
470
-
471
- puts "\n #==> ga.size"
472
- p ga.size
473
-
474
- puts "\n #==> ga[100]"
475
- p ga[100]
476
-
477
-
478
-
479
-
480
-
481
- end
@@ -0,0 +1,112 @@
1
+ #
2
+ # = bio/db/kegg/common.rb - Common methods for KEGG database classes
3
+ #
4
+ # Copyright:: Copyright (C) 2003-2007 Toshiaki Katayama <k@bioruby.org>
5
+ # Copyright:: Copyright (C) 2003 Masumi Itoh <m@bioruby.org>
6
+ # Copyright:: Copyright (C) 2009 Kozo Nishida <kozo-ni@is.naist.jp>
7
+ # License:: The Ruby License
8
+ #
9
+ #
10
+ #
11
+ # == Description
12
+ #
13
+ # Note that the modules in this file are intended to be Bio::KEGG::*
14
+ # internal use only.
15
+ #
16
+ # This file contains modules that implement methods commonly used from
17
+ # KEGG database parser classes.
18
+ #
19
+
20
+ module Bio
21
+ class KEGG
22
+
23
+ # Namespace for methods commonly used in the Bio::KEGG::* classes.
24
+ module Common
25
+
26
+ # The module providing dblinks_as_hash methods.
27
+ #
28
+ # Bio::KEGG::* internal use only.
29
+ module DblinksAsHash
30
+
31
+ # Returns a Hash of the DB name and an Array of entry IDs in
32
+ # DBLINKS field.
33
+ def dblinks_as_hash
34
+ unless defined? @dblinks_as_hash
35
+ hash = {}
36
+ dblinks_as_strings.each do |line|
37
+ db, ids = line.split(/\:\s*/, 2)
38
+ list = ids.split(/\s+/)
39
+ hash[db] = list
40
+ end
41
+ @dblinks_as_hash = hash
42
+ end
43
+ @dblinks_as_hash
44
+ end
45
+ end #module DblinksAsHash
46
+
47
+ # The module providing pathways_as_hash method.
48
+ #
49
+ # Bio::KEGG::* internal use only.
50
+ module PathwaysAsHash
51
+
52
+ # Returns a Hash of the pathway ID and name in PATHWAY field.
53
+ def pathways_as_hash
54
+ unless defined? @pathways_as_hash then
55
+ hash = {}
56
+ pathways_as_strings.each do |line|
57
+ sign, entry_id, name = line.split(/\s+/, 3)
58
+ hash[entry_id] = name
59
+ end
60
+ @pathways_as_hash = hash
61
+ end
62
+ @pathways_as_hash
63
+ end
64
+ end #module PathwaysAsHash
65
+
66
+ # This module provides orthologs_as_hash method.
67
+ #
68
+ # Bio::KEGG::* internal use only.
69
+ module OrthologsAsHash
70
+
71
+ # Returns a Hash of the orthology ID and definition in ORTHOLOGY field.
72
+ def orthologs_as_hash
73
+ unless defined? @orthologs_as_hash
74
+ kos = {}
75
+ orthologs_as_strings.each do |ko|
76
+ entry = ko.scan(/K[0-9]{5}/)[0]
77
+ sign, entry_id, definition = ko.split(/\s+/, 3)
78
+ kos[entry_id] = definition
79
+ end
80
+ @orthologs_as_hash = kos
81
+ end
82
+ @orthologs_as_hash
83
+ end
84
+ end #module OrthologsAsHash
85
+
86
+ # This module provides genes_as_hash method.
87
+ #
88
+ # Bio::KEGG::* internal use only.
89
+ module GenesAsHash
90
+
91
+ # Returns a Hash of the organism ID and an Array of entry IDs in
92
+ # GENES field.
93
+ def genes_as_hash
94
+ unless defined? @genes_as_hash
95
+ hash = {}
96
+ genes_as_strings.each do |line|
97
+ name, *list = line.split(/\s+/)
98
+ org = name.downcase.sub(/:/, '')
99
+ genes = list.map {|x| x.sub(/\(.*\)/, '')}
100
+ #names = list.map {|x| x.scan(/.*\((.*)\)/)}
101
+ hash[org] = genes
102
+ end
103
+ @genes_as_hash = hash
104
+ end
105
+ @genes_as_hash
106
+ end
107
+ end #module GenesAsHash
108
+
109
+ end #module Common
110
+ end #class KEGG
111
+ end #module Bio
112
+