bio 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (303) hide show
  1. data/ChangeLog +2105 -3728
  2. data/KNOWN_ISSUES.rdoc +35 -3
  3. data/README.rdoc +8 -2
  4. data/RELEASE_NOTES.rdoc +166 -0
  5. data/bin/bioruby +4 -1
  6. data/bioruby.gemspec +146 -1
  7. data/bioruby.gemspec.erb +3 -1
  8. data/doc/ChangeLog-before-1.3.1 +3961 -0
  9. data/doc/Tutorial.rd +154 -22
  10. data/doc/Tutorial.rd.html +125 -68
  11. data/lib/bio.rb +21 -6
  12. data/lib/bio/appl/bl2seq/report.rb +11 -202
  13. data/lib/bio/appl/blast/format0.rb +0 -193
  14. data/lib/bio/appl/blast/report.rb +2 -147
  15. data/lib/bio/appl/blast/wublast.rb +0 -208
  16. data/lib/bio/appl/fasta.rb +4 -19
  17. data/lib/bio/appl/fasta/format10.rb +0 -14
  18. data/lib/bio/appl/genscan/report.rb +0 -176
  19. data/lib/bio/appl/hmmer.rb +1 -15
  20. data/lib/bio/appl/hmmer/report.rb +0 -100
  21. data/lib/bio/appl/meme/mast.rb +156 -0
  22. data/lib/bio/appl/meme/mast/report.rb +91 -0
  23. data/lib/bio/appl/meme/motif.rb +48 -0
  24. data/lib/bio/appl/psort.rb +0 -111
  25. data/lib/bio/appl/psort/report.rb +1 -45
  26. data/lib/bio/appl/pts1.rb +2 -4
  27. data/lib/bio/appl/sosui/report.rb +5 -54
  28. data/lib/bio/appl/targetp/report.rb +1 -104
  29. data/lib/bio/appl/tmhmm/report.rb +0 -36
  30. data/lib/bio/command.rb +94 -10
  31. data/lib/bio/data/aa.rb +1 -77
  32. data/lib/bio/data/codontable.rb +1 -95
  33. data/lib/bio/data/na.rb +1 -26
  34. data/lib/bio/db/aaindex.rb +1 -38
  35. data/lib/bio/db/fasta.rb +1 -134
  36. data/lib/bio/db/fasta/format_qual.rb +204 -0
  37. data/lib/bio/db/fasta/qual.rb +102 -0
  38. data/lib/bio/db/fastq.rb +645 -0
  39. data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
  40. data/lib/bio/db/fastq/format_fastq.rb +175 -0
  41. data/lib/bio/db/genbank/genbank.rb +1 -86
  42. data/lib/bio/db/gff.rb +0 -17
  43. data/lib/bio/db/go.rb +4 -72
  44. data/lib/bio/db/kegg/common.rb +112 -0
  45. data/lib/bio/db/kegg/compound.rb +29 -20
  46. data/lib/bio/db/kegg/drug.rb +74 -34
  47. data/lib/bio/db/kegg/enzyme.rb +26 -5
  48. data/lib/bio/db/kegg/genes.rb +128 -15
  49. data/lib/bio/db/kegg/genome.rb +3 -41
  50. data/lib/bio/db/kegg/glycan.rb +19 -24
  51. data/lib/bio/db/kegg/orthology.rb +16 -56
  52. data/lib/bio/db/kegg/reaction.rb +81 -28
  53. data/lib/bio/db/kegg/taxonomy.rb +1 -52
  54. data/lib/bio/db/litdb.rb +1 -16
  55. data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
  56. data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
  57. data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
  58. data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
  59. data/lib/bio/db/prosite.rb +2 -95
  60. data/lib/bio/db/rebase.rb +5 -6
  61. data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
  62. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
  63. data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
  64. data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
  65. data/lib/bio/io/das.rb +0 -44
  66. data/lib/bio/io/ddbjxml.rb +1 -181
  67. data/lib/bio/io/flatfile.rb +1 -7
  68. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  69. data/lib/bio/io/keggapi.rb +0 -442
  70. data/lib/bio/io/ncbirest.rb +130 -132
  71. data/lib/bio/io/ncbisoap.rb +2 -1
  72. data/lib/bio/io/pubmed.rb +0 -88
  73. data/lib/bio/location.rb +0 -73
  74. data/lib/bio/pathway.rb +0 -171
  75. data/lib/bio/sequence.rb +18 -1
  76. data/lib/bio/sequence/adapter.rb +3 -0
  77. data/lib/bio/sequence/format.rb +16 -0
  78. data/lib/bio/sequence/quality_score.rb +205 -0
  79. data/lib/bio/tree.rb +70 -5
  80. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
  81. data/lib/bio/util/sirna.rb +1 -23
  82. data/lib/bio/version.rb +1 -1
  83. data/sample/demo_aaindex.rb +67 -0
  84. data/sample/demo_aminoacid.rb +101 -0
  85. data/sample/demo_bl2seq_report.rb +220 -0
  86. data/sample/demo_blast_report.rb +285 -0
  87. data/sample/demo_codontable.rb +119 -0
  88. data/sample/demo_das.rb +105 -0
  89. data/sample/demo_ddbjxml.rb +212 -0
  90. data/sample/demo_fasta_remote.rb +51 -0
  91. data/sample/demo_fastaformat.rb +105 -0
  92. data/sample/demo_genbank.rb +132 -0
  93. data/sample/demo_genscan_report.rb +202 -0
  94. data/sample/demo_gff1.rb +49 -0
  95. data/sample/demo_go.rb +98 -0
  96. data/sample/demo_hmmer_report.rb +149 -0
  97. data/sample/demo_kegg_compound.rb +57 -0
  98. data/sample/demo_kegg_drug.rb +65 -0
  99. data/sample/demo_kegg_genome.rb +74 -0
  100. data/sample/demo_kegg_glycan.rb +72 -0
  101. data/sample/demo_kegg_orthology.rb +62 -0
  102. data/sample/demo_kegg_reaction.rb +66 -0
  103. data/sample/demo_kegg_taxonomy.rb +92 -0
  104. data/sample/demo_keggapi.rb +502 -0
  105. data/sample/demo_litdb.rb +42 -0
  106. data/sample/demo_locations.rb +99 -0
  107. data/sample/demo_ncbi_rest.rb +130 -0
  108. data/sample/demo_nucleicacid.rb +49 -0
  109. data/sample/demo_pathway.rb +196 -0
  110. data/sample/demo_prosite.rb +120 -0
  111. data/sample/demo_psort.rb +138 -0
  112. data/sample/demo_psort_report.rb +70 -0
  113. data/sample/demo_pubmed.rb +118 -0
  114. data/sample/demo_sirna.rb +63 -0
  115. data/sample/demo_sosui_report.rb +89 -0
  116. data/sample/demo_targetp_report.rb +135 -0
  117. data/sample/demo_tmhmm_report.rb +68 -0
  118. data/sample/pmfetch.rb +13 -4
  119. data/sample/pmsearch.rb +15 -4
  120. data/sample/test_phyloxml_big.rb +205 -0
  121. data/test/bioruby_test_helper.rb +61 -0
  122. data/test/data/KEGG/1.1.1.1.enzyme +935 -0
  123. data/test/data/KEGG/C00025.compound +102 -0
  124. data/test/data/KEGG/D00063.drug +104 -0
  125. data/test/data/KEGG/G00024.glycan +47 -0
  126. data/test/data/KEGG/G01366.glycan +18 -0
  127. data/test/data/KEGG/K02338.orthology +902 -0
  128. data/test/data/KEGG/R00006.reaction +14 -0
  129. data/test/data/fastq/README.txt +109 -0
  130. data/test/data/fastq/error_diff_ids.fastq +20 -0
  131. data/test/data/fastq/error_double_qual.fastq +22 -0
  132. data/test/data/fastq/error_double_seq.fastq +22 -0
  133. data/test/data/fastq/error_long_qual.fastq +20 -0
  134. data/test/data/fastq/error_no_qual.fastq +20 -0
  135. data/test/data/fastq/error_qual_del.fastq +20 -0
  136. data/test/data/fastq/error_qual_escape.fastq +20 -0
  137. data/test/data/fastq/error_qual_null.fastq +0 -0
  138. data/test/data/fastq/error_qual_space.fastq +21 -0
  139. data/test/data/fastq/error_qual_tab.fastq +21 -0
  140. data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
  141. data/test/data/fastq/error_qual_vtab.fastq +20 -0
  142. data/test/data/fastq/error_short_qual.fastq +20 -0
  143. data/test/data/fastq/error_spaces.fastq +20 -0
  144. data/test/data/fastq/error_tabs.fastq +21 -0
  145. data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
  146. data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
  147. data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
  148. data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
  149. data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
  150. data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
  151. data/test/data/fastq/error_trunc_in_title.fastq +17 -0
  152. data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
  153. data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
  154. data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
  155. data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
  156. data/test/data/fastq/longreads_as_illumina.fastq +40 -0
  157. data/test/data/fastq/longreads_as_sanger.fastq +40 -0
  158. data/test/data/fastq/longreads_as_solexa.fastq +40 -0
  159. data/test/data/fastq/longreads_original_sanger.fastq +120 -0
  160. data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
  161. data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
  162. data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
  163. data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
  164. data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
  165. data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
  166. data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
  167. data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
  168. data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
  169. data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
  170. data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
  171. data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
  172. data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
  173. data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
  174. data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
  175. data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
  176. data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
  177. data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
  178. data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
  179. data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
  180. data/test/data/meme/db +0 -0
  181. data/test/data/meme/mast +0 -0
  182. data/test/data/meme/mast.out +13 -0
  183. data/test/data/meme/meme.out +3 -0
  184. data/test/data/phyloxml/apaf.xml +666 -0
  185. data/test/data/phyloxml/bcl_2.xml +2097 -0
  186. data/test/data/phyloxml/made_up.xml +144 -0
  187. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
  188. data/test/data/phyloxml/phyloxml_examples.xml +415 -0
  189. data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
  190. data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
  191. data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
  192. data/test/functional/bio/appl/test_pts1.rb +7 -5
  193. data/test/functional/bio/io/test_ensembl.rb +4 -3
  194. data/test/functional/bio/io/test_pubmed.rb +9 -3
  195. data/test/functional/bio/io/test_soapwsdl.rb +5 -4
  196. data/test/functional/bio/io/test_togows.rb +5 -4
  197. data/test/functional/bio/sequence/test_output_embl.rb +6 -4
  198. data/test/functional/bio/test_command.rb +54 -5
  199. data/test/runner.rb +5 -3
  200. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
  201. data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
  202. data/test/unit/bio/appl/blast/test_report.rb +5 -4
  203. data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
  204. data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
  205. data/test/unit/bio/appl/genscan/test_report.rb +8 -9
  206. data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
  207. data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
  208. data/test/unit/bio/appl/mafft/test_report.rb +6 -5
  209. data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
  210. data/test/unit/bio/appl/meme/test_mast.rb +103 -0
  211. data/test/unit/bio/appl/meme/test_motif.rb +38 -0
  212. data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
  213. data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
  214. data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
  215. data/test/unit/bio/appl/sim4/test_report.rb +5 -4
  216. data/test/unit/bio/appl/sosui/test_report.rb +6 -5
  217. data/test/unit/bio/appl/targetp/test_report.rb +5 -3
  218. data/test/unit/bio/appl/test_blast.rb +5 -4
  219. data/test/unit/bio/appl/test_fasta.rb +4 -2
  220. data/test/unit/bio/appl/test_pts1.rb +4 -2
  221. data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
  222. data/test/unit/bio/data/test_aa.rb +5 -3
  223. data/test/unit/bio/data/test_codontable.rb +5 -4
  224. data/test/unit/bio/data/test_na.rb +5 -3
  225. data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
  226. data/test/unit/bio/db/embl/test_common.rb +4 -2
  227. data/test/unit/bio/db/embl/test_embl.rb +6 -6
  228. data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
  229. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
  230. data/test/unit/bio/db/embl/test_sptr.rb +6 -8
  231. data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
  232. data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
  233. data/test/unit/bio/db/kegg/test_compound.rb +146 -0
  234. data/test/unit/bio/db/kegg/test_drug.rb +194 -0
  235. data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
  236. data/test/unit/bio/db/kegg/test_genes.rb +32 -4
  237. data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
  238. data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
  239. data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
  240. data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
  241. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
  242. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
  243. data/test/unit/bio/db/test_aaindex.rb +6 -6
  244. data/test/unit/bio/db/test_fasta.rb +5 -46
  245. data/test/unit/bio/db/test_fastq.rb +829 -0
  246. data/test/unit/bio/db/test_gff.rb +4 -2
  247. data/test/unit/bio/db/test_lasergene.rb +7 -5
  248. data/test/unit/bio/db/test_medline.rb +4 -2
  249. data/test/unit/bio/db/test_newick.rb +6 -6
  250. data/test/unit/bio/db/test_nexus.rb +4 -2
  251. data/test/unit/bio/db/test_phyloxml.rb +769 -0
  252. data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
  253. data/test/unit/bio/db/test_prosite.rb +6 -5
  254. data/test/unit/bio/db/test_qual.rb +63 -0
  255. data/test/unit/bio/db/test_rebase.rb +5 -3
  256. data/test/unit/bio/db/test_soft.rb +7 -6
  257. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
  258. data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
  259. data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
  260. data/test/unit/bio/io/test_ddbjxml.rb +4 -3
  261. data/test/unit/bio/io/test_ensembl.rb +5 -3
  262. data/test/unit/bio/io/test_fastacmd.rb +4 -3
  263. data/test/unit/bio/io/test_flatfile.rb +6 -5
  264. data/test/unit/bio/io/test_soapwsdl.rb +4 -3
  265. data/test/unit/bio/io/test_togows.rb +4 -2
  266. data/test/unit/bio/sequence/test_aa.rb +5 -3
  267. data/test/unit/bio/sequence/test_common.rb +4 -2
  268. data/test/unit/bio/sequence/test_compat.rb +4 -2
  269. data/test/unit/bio/sequence/test_dblink.rb +5 -3
  270. data/test/unit/bio/sequence/test_na.rb +4 -2
  271. data/test/unit/bio/sequence/test_quality_score.rb +330 -0
  272. data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
  273. data/test/unit/bio/test_alignment.rb +5 -3
  274. data/test/unit/bio/test_command.rb +4 -3
  275. data/test/unit/bio/test_db.rb +5 -3
  276. data/test/unit/bio/test_feature.rb +4 -2
  277. data/test/unit/bio/test_location.rb +4 -2
  278. data/test/unit/bio/test_map.rb +5 -3
  279. data/test/unit/bio/test_pathway.rb +4 -2
  280. data/test/unit/bio/test_reference.rb +4 -2
  281. data/test/unit/bio/test_sequence.rb +5 -3
  282. data/test/unit/bio/test_shell.rb +5 -3
  283. data/test/unit/bio/test_tree.rb +6 -6
  284. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
  285. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
  286. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
  287. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
  288. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
  289. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
  290. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
  291. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
  292. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
  293. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
  294. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
  295. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
  296. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
  297. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
  298. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
  299. data/test/unit/bio/util/test_color_scheme.rb +5 -3
  300. data/test/unit/bio/util/test_contingency_table.rb +5 -3
  301. data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
  302. data/test/unit/bio/util/test_sirna.rb +6 -4
  303. metadata +147 -2
@@ -787,174 +787,3 @@ end # Relation
787
787
 
788
788
  end # Bio
789
789
 
790
-
791
-
792
- if __FILE__ == $0
793
-
794
- puts "--- Test === method true/false"
795
- r1 = Bio::Relation.new('a', 'b', 1)
796
- r2 = Bio::Relation.new('b', 'a', 1)
797
- r3 = Bio::Relation.new('b', 'a', 2)
798
- r4 = Bio::Relation.new('a', 'b', 1)
799
- p r1 === r2
800
- p r1 === r3
801
- p r1 === r4
802
- p [ r1, r2, r3, r4 ].uniq
803
- p r1.eql?(r2)
804
- p r3.eql?(r2)
805
-
806
- # Sample Graph :
807
- # +----------------+
808
- # | |
809
- # v |
810
- # +---------(q)-->(t)------->(y)<----(r)
811
- # | | | ^ |
812
- # v | v | |
813
- # +--(s)<--+ | (x)<---+ (u)<-----+
814
- # | | | | |
815
- # v | | v |
816
- # (v)----->(w)<---+ (z)----+
817
-
818
- data = [
819
- [ 'q', 's', 1, ],
820
- [ 'q', 't', 1, ],
821
- [ 'q', 'w', 1, ],
822
- [ 'r', 'u', 1, ],
823
- [ 'r', 'y', 1, ],
824
- [ 's', 'v', 1, ],
825
- [ 't', 'x', 1, ],
826
- [ 't', 'y', 1, ],
827
- [ 'u', 'y', 1, ],
828
- [ 'v', 'w', 1, ],
829
- [ 'w', 's', 1, ],
830
- [ 'x', 'z', 1, ],
831
- [ 'y', 'q', 1, ],
832
- [ 'z', 'x', 1, ],
833
- ]
834
-
835
- ary = []
836
-
837
- puts "--- List of relations"
838
- data.each do |x|
839
- ary << Bio::Relation.new(*x)
840
- end
841
- p ary
842
-
843
- puts "--- Generate graph from list of relations"
844
- graph = Bio::Pathway.new(ary)
845
- p graph
846
-
847
- puts "--- Test to_matrix method"
848
- p graph.to_matrix
849
-
850
- puts "--- Test dump_matrix method"
851
- puts graph.dump_matrix(0)
852
-
853
- puts "--- Test dump_list method"
854
- puts graph.dump_list
855
-
856
- puts "--- Labeling some nodes"
857
- hash = { 'q' => "L1", 's' => "L2", 'v' => "L3", 'w' => "L4" }
858
- graph.label = hash
859
- p graph
860
-
861
- puts "--- Extract subgraph by label"
862
- p graph.subgraph
863
-
864
- puts "--- Extract subgraph by list"
865
- p graph.subgraph(['q', 't', 'x', 'y', 'z'])
866
-
867
- puts "--- Test cliquishness of the node 'q'"
868
- p graph.cliquishness('q')
869
-
870
- puts "--- Test cliquishness of the node 'q' (undirected)"
871
- u_graph = Bio::Pathway.new(ary, 'undirected')
872
- p u_graph.cliquishness('q')
873
-
874
- puts "--- Test small_world histgram"
875
- p graph.small_world
876
-
877
- puts "--- Test breadth_first_search method"
878
- distance, predecessor = graph.breadth_first_search('q')
879
- p distance
880
- p predecessor
881
-
882
- puts "--- Test bfs_shortest_path method"
883
- step, path = graph.bfs_shortest_path('y', 'w')
884
- p step
885
- p path
886
-
887
- puts "--- Test depth_first_search method"
888
- timestamp, tree, back, cross, forward = graph.depth_first_search
889
- p timestamp
890
- print "tree edges : "; p tree
891
- print "back edges : "; p back
892
- print "cross edges : "; p cross
893
- print "forward edges : "; p forward
894
-
895
- puts "--- Test dfs_topological_sort method"
896
- #
897
- # Professor Bumstead topologically sorts his clothing when getting dressed.
898
- #
899
- # "undershorts" "socks"
900
- # | | |
901
- # v | v "watch"
902
- # "pants" --+-------> "shoes"
903
- # |
904
- # v
905
- # "belt" <----- "shirt" ----> "tie" ----> "jacket"
906
- # | ^
907
- # `---------------------------------------'
908
- #
909
- dag = Bio::Pathway.new([
910
- Bio::Relation.new("undeershorts", "pants", true),
911
- Bio::Relation.new("undeershorts", "shoes", true),
912
- Bio::Relation.new("socks", "shoes", true),
913
- Bio::Relation.new("watch", "watch", true),
914
- Bio::Relation.new("pants", "belt", true),
915
- Bio::Relation.new("pants", "shoes", true),
916
- Bio::Relation.new("shirt", "belt", true),
917
- Bio::Relation.new("shirt", "tie", true),
918
- Bio::Relation.new("tie", "jacket", true),
919
- Bio::Relation.new("belt", "jacket", true),
920
- ])
921
- p dag.dfs_topological_sort
922
-
923
- puts "--- Test dijkstra method"
924
- distance, predecessor = graph.dijkstra('q')
925
- p distance
926
- p predecessor
927
-
928
- puts "--- Test dijkstra method by weighted graph"
929
- #
930
- # 'a' --> 'b'
931
- # | 1 | 3
932
- # |5 v
933
- # `----> 'c'
934
- #
935
- r1 = Bio::Relation.new('a', 'b', 1)
936
- r2 = Bio::Relation.new('a', 'c', 5)
937
- r3 = Bio::Relation.new('b', 'c', 3)
938
- w_graph = Bio::Pathway.new([r1, r2, r3])
939
- p w_graph
940
- p w_graph.dijkstra('a')
941
-
942
- puts "--- Test bellman_ford method by negative weighted graph"
943
- #
944
- # ,-- 'a' --> 'b'
945
- # | | 1 | 3
946
- # | |5 v
947
- # | `----> 'c'
948
- # | ^
949
- # |2 | -5
950
- # `--> 'd' ----'
951
- #
952
- r4 = Bio::Relation.new('a', 'd', 2)
953
- r5 = Bio::Relation.new('d', 'c', -5)
954
- w_graph.append(r4)
955
- w_graph.append(r5)
956
- p w_graph.bellman_ford('a')
957
- p graph.bellman_ford('q')
958
-
959
- end
960
-
@@ -9,7 +9,7 @@
9
9
  # Jan Aerts <jan.aerts@bbsrc.ac.uk>
10
10
  # License:: The Ruby License
11
11
  #
12
- # $Id: sequence.rb,v 0.58.2.12 2008/06/17 15:25:22 ngoto Exp $
12
+ # $Id:$
13
13
  #
14
14
 
15
15
  require 'bio/sequence/compat'
@@ -71,6 +71,7 @@ class Sequence
71
71
  autoload :Generic, 'bio/sequence/generic'
72
72
  autoload :Format, 'bio/sequence/format'
73
73
  autoload :Adapter, 'bio/sequence/adapter'
74
+ autoload :QualityScore, 'bio/sequence/quality_score'
74
75
 
75
76
  include Format
76
77
 
@@ -150,6 +151,22 @@ class Sequence
150
151
  # but could be a simple String
151
152
  attr_accessor :seq
152
153
 
154
+ # Quality scores of the bases/residues in the sequence.
155
+ # (Array containing Integer, or nil)
156
+ attr_accessor :quality_scores
157
+
158
+ # The meaning (calculation method) of the quality scores stored in
159
+ # the <tt>quality_scores</tt> attribute.
160
+ # Maybe one of :phred, :solexa, or nil.
161
+ #
162
+ # Note that if it is nil, and <tt>error_probabilities</tt> is empty,
163
+ # some methods implicitly assumes that it is :phred (PHRED score).
164
+ attr_accessor :quality_score_type
165
+
166
+ # Error probabilities of the bases/residues in the sequence.
167
+ # (Array containing Float, or nil)
168
+ attr_accessor :error_probabilities
169
+
153
170
  #---
154
171
  # Attributes below have been added during BioHackathon2008
155
172
  #+++
@@ -23,6 +23,9 @@ module Bio::Sequence::Adapter
23
23
  autoload :EMBL, 'bio/db/embl/embl_to_biosequence'
24
24
  autoload :FastaFormat, 'bio/db/fasta/fasta_to_biosequence'
25
25
  autoload :BioSQL, 'bio/db/biosql/biosql_to_biosequence'
26
+ autoload :SangerChromatogram,
27
+ 'bio/db/sanger_chromatogram/chromatogram_to_biosequence'
28
+ autoload :Fastq, 'bio/db/fastq/fastq_to_biosequence'
26
29
 
27
30
  private
28
31
 
@@ -47,6 +47,22 @@ module Format
47
47
  # (resemble to EMBOSS "ncbi" format)
48
48
  autoload :Fasta_ncbi, 'bio/db/fasta/format_fasta'
49
49
 
50
+ # FASTQ "fastq-sanger" format generator
51
+ autoload :Fastq, 'bio/db/fastq/format_fastq'
52
+ # FASTQ "fastq-sanger" format generator
53
+ autoload :Fastq_sanger, 'bio/db/fastq/format_fastq'
54
+ # FASTQ "fastq-solexa" format generator
55
+ autoload :Fastq_solexa, 'bio/db/fastq/format_fastq'
56
+ # FASTQ "fastq-illumina" format generator
57
+ autoload :Fastq_illumina, 'bio/db/fastq/format_fastq'
58
+
59
+ # FastaNumericFormat format generator
60
+ autoload :Fasta_numeric, 'bio/db/fasta/format_qual'
61
+ # Qual format generator.
62
+ # Its format is the same as Fasta_numeric, but it would perform
63
+ # to convert quality score or generates scores from error probability.
64
+ autoload :Qual, 'bio/db/fasta/format_qual'
65
+
50
66
  end #module Formatter
51
67
 
52
68
  # Repository of nucleotide sequence formatter classes
@@ -0,0 +1,205 @@
1
+ #
2
+ # = bio/sequence/quality_score.rb - Sequence quality score manipulation modules
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Naohisa Goto <ng@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ # == Description
9
+ #
10
+ # Sequence quality score manipulation modules, mainly used by Bio::Fastq
11
+ # and related classes.
12
+ #
13
+ # == References
14
+ #
15
+ # * FASTQ format specification
16
+ # http://maq.sourceforge.net/fastq.shtml
17
+ #
18
+
19
+ module Bio
20
+
21
+ class Sequence
22
+
23
+ # Bio::Sequence::QualityScore is a name space for quality score modules.
24
+ # BioRuby internal use only (mainly from Bio::Fastq).
25
+ module QualityScore
26
+
27
+ # Converter methods between PHRED and Solexa quality scores.
28
+ module Converter
29
+
30
+ # Converts PHRED scores to Solexa scores.
31
+ #
32
+ # The values may be truncated or incorrect if overflows/underflows
33
+ # occurred during the calculation.
34
+ # ---
35
+ # *Arguments*:
36
+ # * (required) _scores_: (Array containing Integer) quality scores
37
+ # *Returns*:: (Array containing Integer) quality scores
38
+ def convert_scores_from_phred_to_solexa(scores)
39
+ sc = scores.collect do |q|
40
+ t = 10 ** (q / 10.0) - 1
41
+ t = Float::MIN if t < Float::MIN
42
+ r = 10 * Math.log10(t)
43
+ r.finite? ? r.round : r
44
+ end
45
+ sc
46
+ end
47
+
48
+ # Converts Solexa scores to PHRED scores.
49
+ #
50
+ # The values may be truncated if overflows/underflows occurred
51
+ # during the calculation.
52
+ # ---
53
+ # *Arguments*:
54
+ # * (required) _scores_: (Array containing Integer) quality scores
55
+ # *Returns*:: (Array containing Integer) quality scores
56
+ def convert_scores_from_solexa_to_phred(scores)
57
+ sc = scores.collect do |q|
58
+ r = 10 * Math.log10(10 ** (q / 10.0) + 1)
59
+ r.finite? ? r.round : r
60
+ end
61
+ sc
62
+ end
63
+
64
+ # Does nothing and simply returns the given argument.
65
+ #
66
+ # ---
67
+ # *Arguments*:
68
+ # * (required) _scores_: (Array containing Integer) quality scores
69
+ # *Returns*:: (Array containing Integer) quality scores
70
+ def convert_nothing(scores)
71
+ scores
72
+ end
73
+
74
+ end #module Converter
75
+
76
+ # Bio::Sequence::QualityScore::Phred is a module having quality calculation
77
+ # methods for the PHRED quality score.
78
+ #
79
+ # BioRuby internal use only (mainly from Bio::Fastq).
80
+ module Phred
81
+
82
+ include Converter
83
+
84
+ # Type of quality scores.
85
+ # ---
86
+ # *Returns*:: (Symbol) the type of quality score.
87
+ def quality_score_type
88
+ :phred
89
+ end
90
+
91
+ # PHRED score to probability conversion.
92
+ # ---
93
+ # *Arguments*:
94
+ # * (required) _scores_: (Array containing Integer) scores
95
+ # *Returns*:: (Array containing Float) probabilities (0<=p<=1)
96
+ def phred_q2p(scores)
97
+ scores.collect do |q|
98
+ r = 10 ** (- q / 10.0)
99
+ if r > 1.0 then
100
+ r = 1.0
101
+ #elsif r < 0.0 then
102
+ # r = 0.0
103
+ end
104
+ r
105
+ end
106
+ end
107
+ alias q2p phred_q2p
108
+ module_function :q2p
109
+ public :q2p
110
+
111
+ # Probability to PHRED score conversion.
112
+ #
113
+ # The values may be truncated or incorrect if overflows/underflows
114
+ # occurred during the calculation.
115
+ # ---
116
+ # *Arguments*:
117
+ # * (required) _probabilities_: (Array containing Float) probabilities
118
+ # *Returns*:: (Array containing Float) scores
119
+ def phred_p2q(probabilities)
120
+ probabilities.collect do |p|
121
+ p = Float::MIN if p < Float::MIN
122
+ q = -10 * Math.log10(p)
123
+ q.finite? ? q.round : q
124
+ end
125
+ end
126
+ alias p2q phred_p2q
127
+ module_function :p2q
128
+ public :p2q
129
+
130
+ alias convert_scores_from_phred convert_nothing
131
+ alias convert_scores_to_phred convert_nothing
132
+ alias convert_scores_from_solexa convert_scores_from_solexa_to_phred
133
+ alias convert_scores_to_solexa convert_scores_from_phred_to_solexa
134
+ module_function :convert_scores_to_solexa
135
+ public :convert_scores_to_solexa
136
+
137
+ end #module Phred
138
+
139
+ # Bio::Sequence::QualityScore::Solexa is a module having quality
140
+ # calculation methods for the Solexa quality score.
141
+ #
142
+ # BioRuby internal use only (mainly from Bio::Fastq).
143
+ module Solexa
144
+
145
+ include Converter
146
+
147
+ # Type of quality scores.
148
+ # ---
149
+ # *Returns*:: (Symbol) the type of quality score.
150
+ def quality_score_type
151
+ :solexa
152
+ end
153
+
154
+ # Solexa score to probability conversion.
155
+ # ---
156
+ # *Arguments*:
157
+ # * (required) _scores_: (Array containing Integer) scores
158
+ # *Returns*:: (Array containing Float) probabilities
159
+ def solexa_q2p(scores)
160
+ scores.collect do |q|
161
+ t = 10 ** (- q / 10.0)
162
+ t /= (1.0 + t)
163
+ if t > 1.0 then
164
+ t = 1.0
165
+ #elsif t < 0.0 then
166
+ # t = 0.0
167
+ end
168
+ t
169
+ end
170
+ end
171
+ alias q2p solexa_q2p
172
+ module_function :q2p
173
+ public :q2p
174
+
175
+ # Probability to Solexa score conversion.
176
+ # ---
177
+ # *Arguments*:
178
+ # * (required) _probabilities_: (Array containing Float) probabilities
179
+ # *Returns*:: (Array containing Float) scores
180
+ def solexa_p2q(probabilities)
181
+ probabilities.collect do |p|
182
+ t = p / (1.0 - p)
183
+ t = Float::MIN if t < Float::MIN
184
+ q = -10 * Math.log10(t)
185
+ q.finite? ? q.round : q
186
+ end
187
+ end
188
+ alias p2q solexa_p2q
189
+ module_function :p2q
190
+ public :p2q
191
+
192
+ alias convert_scores_from_solexa convert_nothing
193
+ alias convert_scores_to_solexa convert_nothing
194
+ alias convert_scores_from_phred convert_scores_from_phred_to_solexa
195
+ alias convert_scores_to_phred convert_scores_from_solexa_to_phred
196
+ module_function :convert_scores_to_phred
197
+ public :convert_scores_to_phred
198
+
199
+ end #module Solexa
200
+
201
+ end #module QualityScore
202
+
203
+ end #class Sequence
204
+
205
+ end #module Bio