bio 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (303) hide show
  1. data/ChangeLog +2105 -3728
  2. data/KNOWN_ISSUES.rdoc +35 -3
  3. data/README.rdoc +8 -2
  4. data/RELEASE_NOTES.rdoc +166 -0
  5. data/bin/bioruby +4 -1
  6. data/bioruby.gemspec +146 -1
  7. data/bioruby.gemspec.erb +3 -1
  8. data/doc/ChangeLog-before-1.3.1 +3961 -0
  9. data/doc/Tutorial.rd +154 -22
  10. data/doc/Tutorial.rd.html +125 -68
  11. data/lib/bio.rb +21 -6
  12. data/lib/bio/appl/bl2seq/report.rb +11 -202
  13. data/lib/bio/appl/blast/format0.rb +0 -193
  14. data/lib/bio/appl/blast/report.rb +2 -147
  15. data/lib/bio/appl/blast/wublast.rb +0 -208
  16. data/lib/bio/appl/fasta.rb +4 -19
  17. data/lib/bio/appl/fasta/format10.rb +0 -14
  18. data/lib/bio/appl/genscan/report.rb +0 -176
  19. data/lib/bio/appl/hmmer.rb +1 -15
  20. data/lib/bio/appl/hmmer/report.rb +0 -100
  21. data/lib/bio/appl/meme/mast.rb +156 -0
  22. data/lib/bio/appl/meme/mast/report.rb +91 -0
  23. data/lib/bio/appl/meme/motif.rb +48 -0
  24. data/lib/bio/appl/psort.rb +0 -111
  25. data/lib/bio/appl/psort/report.rb +1 -45
  26. data/lib/bio/appl/pts1.rb +2 -4
  27. data/lib/bio/appl/sosui/report.rb +5 -54
  28. data/lib/bio/appl/targetp/report.rb +1 -104
  29. data/lib/bio/appl/tmhmm/report.rb +0 -36
  30. data/lib/bio/command.rb +94 -10
  31. data/lib/bio/data/aa.rb +1 -77
  32. data/lib/bio/data/codontable.rb +1 -95
  33. data/lib/bio/data/na.rb +1 -26
  34. data/lib/bio/db/aaindex.rb +1 -38
  35. data/lib/bio/db/fasta.rb +1 -134
  36. data/lib/bio/db/fasta/format_qual.rb +204 -0
  37. data/lib/bio/db/fasta/qual.rb +102 -0
  38. data/lib/bio/db/fastq.rb +645 -0
  39. data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
  40. data/lib/bio/db/fastq/format_fastq.rb +175 -0
  41. data/lib/bio/db/genbank/genbank.rb +1 -86
  42. data/lib/bio/db/gff.rb +0 -17
  43. data/lib/bio/db/go.rb +4 -72
  44. data/lib/bio/db/kegg/common.rb +112 -0
  45. data/lib/bio/db/kegg/compound.rb +29 -20
  46. data/lib/bio/db/kegg/drug.rb +74 -34
  47. data/lib/bio/db/kegg/enzyme.rb +26 -5
  48. data/lib/bio/db/kegg/genes.rb +128 -15
  49. data/lib/bio/db/kegg/genome.rb +3 -41
  50. data/lib/bio/db/kegg/glycan.rb +19 -24
  51. data/lib/bio/db/kegg/orthology.rb +16 -56
  52. data/lib/bio/db/kegg/reaction.rb +81 -28
  53. data/lib/bio/db/kegg/taxonomy.rb +1 -52
  54. data/lib/bio/db/litdb.rb +1 -16
  55. data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
  56. data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
  57. data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
  58. data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
  59. data/lib/bio/db/prosite.rb +2 -95
  60. data/lib/bio/db/rebase.rb +5 -6
  61. data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
  62. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
  63. data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
  64. data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
  65. data/lib/bio/io/das.rb +0 -44
  66. data/lib/bio/io/ddbjxml.rb +1 -181
  67. data/lib/bio/io/flatfile.rb +1 -7
  68. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  69. data/lib/bio/io/keggapi.rb +0 -442
  70. data/lib/bio/io/ncbirest.rb +130 -132
  71. data/lib/bio/io/ncbisoap.rb +2 -1
  72. data/lib/bio/io/pubmed.rb +0 -88
  73. data/lib/bio/location.rb +0 -73
  74. data/lib/bio/pathway.rb +0 -171
  75. data/lib/bio/sequence.rb +18 -1
  76. data/lib/bio/sequence/adapter.rb +3 -0
  77. data/lib/bio/sequence/format.rb +16 -0
  78. data/lib/bio/sequence/quality_score.rb +205 -0
  79. data/lib/bio/tree.rb +70 -5
  80. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
  81. data/lib/bio/util/sirna.rb +1 -23
  82. data/lib/bio/version.rb +1 -1
  83. data/sample/demo_aaindex.rb +67 -0
  84. data/sample/demo_aminoacid.rb +101 -0
  85. data/sample/demo_bl2seq_report.rb +220 -0
  86. data/sample/demo_blast_report.rb +285 -0
  87. data/sample/demo_codontable.rb +119 -0
  88. data/sample/demo_das.rb +105 -0
  89. data/sample/demo_ddbjxml.rb +212 -0
  90. data/sample/demo_fasta_remote.rb +51 -0
  91. data/sample/demo_fastaformat.rb +105 -0
  92. data/sample/demo_genbank.rb +132 -0
  93. data/sample/demo_genscan_report.rb +202 -0
  94. data/sample/demo_gff1.rb +49 -0
  95. data/sample/demo_go.rb +98 -0
  96. data/sample/demo_hmmer_report.rb +149 -0
  97. data/sample/demo_kegg_compound.rb +57 -0
  98. data/sample/demo_kegg_drug.rb +65 -0
  99. data/sample/demo_kegg_genome.rb +74 -0
  100. data/sample/demo_kegg_glycan.rb +72 -0
  101. data/sample/demo_kegg_orthology.rb +62 -0
  102. data/sample/demo_kegg_reaction.rb +66 -0
  103. data/sample/demo_kegg_taxonomy.rb +92 -0
  104. data/sample/demo_keggapi.rb +502 -0
  105. data/sample/demo_litdb.rb +42 -0
  106. data/sample/demo_locations.rb +99 -0
  107. data/sample/demo_ncbi_rest.rb +130 -0
  108. data/sample/demo_nucleicacid.rb +49 -0
  109. data/sample/demo_pathway.rb +196 -0
  110. data/sample/demo_prosite.rb +120 -0
  111. data/sample/demo_psort.rb +138 -0
  112. data/sample/demo_psort_report.rb +70 -0
  113. data/sample/demo_pubmed.rb +118 -0
  114. data/sample/demo_sirna.rb +63 -0
  115. data/sample/demo_sosui_report.rb +89 -0
  116. data/sample/demo_targetp_report.rb +135 -0
  117. data/sample/demo_tmhmm_report.rb +68 -0
  118. data/sample/pmfetch.rb +13 -4
  119. data/sample/pmsearch.rb +15 -4
  120. data/sample/test_phyloxml_big.rb +205 -0
  121. data/test/bioruby_test_helper.rb +61 -0
  122. data/test/data/KEGG/1.1.1.1.enzyme +935 -0
  123. data/test/data/KEGG/C00025.compound +102 -0
  124. data/test/data/KEGG/D00063.drug +104 -0
  125. data/test/data/KEGG/G00024.glycan +47 -0
  126. data/test/data/KEGG/G01366.glycan +18 -0
  127. data/test/data/KEGG/K02338.orthology +902 -0
  128. data/test/data/KEGG/R00006.reaction +14 -0
  129. data/test/data/fastq/README.txt +109 -0
  130. data/test/data/fastq/error_diff_ids.fastq +20 -0
  131. data/test/data/fastq/error_double_qual.fastq +22 -0
  132. data/test/data/fastq/error_double_seq.fastq +22 -0
  133. data/test/data/fastq/error_long_qual.fastq +20 -0
  134. data/test/data/fastq/error_no_qual.fastq +20 -0
  135. data/test/data/fastq/error_qual_del.fastq +20 -0
  136. data/test/data/fastq/error_qual_escape.fastq +20 -0
  137. data/test/data/fastq/error_qual_null.fastq +0 -0
  138. data/test/data/fastq/error_qual_space.fastq +21 -0
  139. data/test/data/fastq/error_qual_tab.fastq +21 -0
  140. data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
  141. data/test/data/fastq/error_qual_vtab.fastq +20 -0
  142. data/test/data/fastq/error_short_qual.fastq +20 -0
  143. data/test/data/fastq/error_spaces.fastq +20 -0
  144. data/test/data/fastq/error_tabs.fastq +21 -0
  145. data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
  146. data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
  147. data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
  148. data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
  149. data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
  150. data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
  151. data/test/data/fastq/error_trunc_in_title.fastq +17 -0
  152. data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
  153. data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
  154. data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
  155. data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
  156. data/test/data/fastq/longreads_as_illumina.fastq +40 -0
  157. data/test/data/fastq/longreads_as_sanger.fastq +40 -0
  158. data/test/data/fastq/longreads_as_solexa.fastq +40 -0
  159. data/test/data/fastq/longreads_original_sanger.fastq +120 -0
  160. data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
  161. data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
  162. data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
  163. data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
  164. data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
  165. data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
  166. data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
  167. data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
  168. data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
  169. data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
  170. data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
  171. data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
  172. data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
  173. data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
  174. data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
  175. data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
  176. data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
  177. data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
  178. data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
  179. data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
  180. data/test/data/meme/db +0 -0
  181. data/test/data/meme/mast +0 -0
  182. data/test/data/meme/mast.out +13 -0
  183. data/test/data/meme/meme.out +3 -0
  184. data/test/data/phyloxml/apaf.xml +666 -0
  185. data/test/data/phyloxml/bcl_2.xml +2097 -0
  186. data/test/data/phyloxml/made_up.xml +144 -0
  187. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
  188. data/test/data/phyloxml/phyloxml_examples.xml +415 -0
  189. data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
  190. data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
  191. data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
  192. data/test/functional/bio/appl/test_pts1.rb +7 -5
  193. data/test/functional/bio/io/test_ensembl.rb +4 -3
  194. data/test/functional/bio/io/test_pubmed.rb +9 -3
  195. data/test/functional/bio/io/test_soapwsdl.rb +5 -4
  196. data/test/functional/bio/io/test_togows.rb +5 -4
  197. data/test/functional/bio/sequence/test_output_embl.rb +6 -4
  198. data/test/functional/bio/test_command.rb +54 -5
  199. data/test/runner.rb +5 -3
  200. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
  201. data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
  202. data/test/unit/bio/appl/blast/test_report.rb +5 -4
  203. data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
  204. data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
  205. data/test/unit/bio/appl/genscan/test_report.rb +8 -9
  206. data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
  207. data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
  208. data/test/unit/bio/appl/mafft/test_report.rb +6 -5
  209. data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
  210. data/test/unit/bio/appl/meme/test_mast.rb +103 -0
  211. data/test/unit/bio/appl/meme/test_motif.rb +38 -0
  212. data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
  213. data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
  214. data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
  215. data/test/unit/bio/appl/sim4/test_report.rb +5 -4
  216. data/test/unit/bio/appl/sosui/test_report.rb +6 -5
  217. data/test/unit/bio/appl/targetp/test_report.rb +5 -3
  218. data/test/unit/bio/appl/test_blast.rb +5 -4
  219. data/test/unit/bio/appl/test_fasta.rb +4 -2
  220. data/test/unit/bio/appl/test_pts1.rb +4 -2
  221. data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
  222. data/test/unit/bio/data/test_aa.rb +5 -3
  223. data/test/unit/bio/data/test_codontable.rb +5 -4
  224. data/test/unit/bio/data/test_na.rb +5 -3
  225. data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
  226. data/test/unit/bio/db/embl/test_common.rb +4 -2
  227. data/test/unit/bio/db/embl/test_embl.rb +6 -6
  228. data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
  229. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
  230. data/test/unit/bio/db/embl/test_sptr.rb +6 -8
  231. data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
  232. data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
  233. data/test/unit/bio/db/kegg/test_compound.rb +146 -0
  234. data/test/unit/bio/db/kegg/test_drug.rb +194 -0
  235. data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
  236. data/test/unit/bio/db/kegg/test_genes.rb +32 -4
  237. data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
  238. data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
  239. data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
  240. data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
  241. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
  242. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
  243. data/test/unit/bio/db/test_aaindex.rb +6 -6
  244. data/test/unit/bio/db/test_fasta.rb +5 -46
  245. data/test/unit/bio/db/test_fastq.rb +829 -0
  246. data/test/unit/bio/db/test_gff.rb +4 -2
  247. data/test/unit/bio/db/test_lasergene.rb +7 -5
  248. data/test/unit/bio/db/test_medline.rb +4 -2
  249. data/test/unit/bio/db/test_newick.rb +6 -6
  250. data/test/unit/bio/db/test_nexus.rb +4 -2
  251. data/test/unit/bio/db/test_phyloxml.rb +769 -0
  252. data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
  253. data/test/unit/bio/db/test_prosite.rb +6 -5
  254. data/test/unit/bio/db/test_qual.rb +63 -0
  255. data/test/unit/bio/db/test_rebase.rb +5 -3
  256. data/test/unit/bio/db/test_soft.rb +7 -6
  257. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
  258. data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
  259. data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
  260. data/test/unit/bio/io/test_ddbjxml.rb +4 -3
  261. data/test/unit/bio/io/test_ensembl.rb +5 -3
  262. data/test/unit/bio/io/test_fastacmd.rb +4 -3
  263. data/test/unit/bio/io/test_flatfile.rb +6 -5
  264. data/test/unit/bio/io/test_soapwsdl.rb +4 -3
  265. data/test/unit/bio/io/test_togows.rb +4 -2
  266. data/test/unit/bio/sequence/test_aa.rb +5 -3
  267. data/test/unit/bio/sequence/test_common.rb +4 -2
  268. data/test/unit/bio/sequence/test_compat.rb +4 -2
  269. data/test/unit/bio/sequence/test_dblink.rb +5 -3
  270. data/test/unit/bio/sequence/test_na.rb +4 -2
  271. data/test/unit/bio/sequence/test_quality_score.rb +330 -0
  272. data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
  273. data/test/unit/bio/test_alignment.rb +5 -3
  274. data/test/unit/bio/test_command.rb +4 -3
  275. data/test/unit/bio/test_db.rb +5 -3
  276. data/test/unit/bio/test_feature.rb +4 -2
  277. data/test/unit/bio/test_location.rb +4 -2
  278. data/test/unit/bio/test_map.rb +5 -3
  279. data/test/unit/bio/test_pathway.rb +4 -2
  280. data/test/unit/bio/test_reference.rb +4 -2
  281. data/test/unit/bio/test_sequence.rb +5 -3
  282. data/test/unit/bio/test_shell.rb +5 -3
  283. data/test/unit/bio/test_tree.rb +6 -6
  284. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
  285. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
  286. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
  287. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
  288. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
  289. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
  290. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
  291. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
  292. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
  293. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
  294. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
  295. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
  296. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
  297. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
  298. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
  299. data/test/unit/bio/util/test_color_scheme.rb +5 -3
  300. data/test/unit/bio/util/test_contingency_table.rb +5 -3
  301. data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
  302. data/test/unit/bio/util/test_sirna.rb +6 -4
  303. metadata +147 -2
@@ -5,7 +5,7 @@
5
5
  # Toshiaki Katayama <k@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: codontable.rb,v 0.18 2007/04/05 23:35:40 trevor Exp $
8
+ # $Id:$
9
9
  #
10
10
  # == Data source
11
11
  #
@@ -626,97 +626,3 @@ end # CodonTable
626
626
 
627
627
  end # module Bio
628
628
 
629
-
630
- if __FILE__ == $0
631
-
632
- begin
633
- require 'pp'
634
- alias p pp
635
- rescue LoadError
636
- end
637
-
638
- puts "### Bio::CodonTable[1]"
639
- p ct1 = Bio::CodonTable[1]
640
-
641
- puts ">>> Bio::CodonTable#table"
642
- p ct1.table
643
-
644
- puts ">>> Bio::CodonTable#each"
645
- ct1.each do |codon, aa|
646
- puts "#{codon} -- #{aa}"
647
- end
648
-
649
- puts ">>> Bio::CodonTable#definition"
650
- p ct1.definition
651
-
652
- puts ">>> Bio::CodonTable#['atg']"
653
- p ct1['atg']
654
-
655
- puts ">>> Bio::CodonTable#revtrans('A')"
656
- p ct1.revtrans('A')
657
-
658
- puts ">>> Bio::CodonTable#start_codon?('atg')"
659
- p ct1.start_codon?('atg')
660
-
661
- puts ">>> Bio::CodonTable#start_codon?('aaa')"
662
- p ct1.start_codon?('aaa')
663
-
664
- puts ">>> Bio::CodonTable#stop_codon?('tag')"
665
- p ct1.stop_codon?('tag')
666
-
667
- puts ">>> Bio::CodonTable#stop_codon?('aaa')"
668
- p ct1.stop_codon?('aaa')
669
-
670
- puts ">>> ct1_copy = Bio::CodonTable.copy(1)"
671
- p ct1_copy = Bio::CodonTable.copy(1)
672
- puts ">>> ct1_copy['tga'] = 'U'"
673
- p ct1_copy['tga'] = 'U'
674
- puts " orig : #{ct1['tga']}"
675
- puts " copy : #{ct1_copy['tga']}"
676
-
677
-
678
- puts "### ct = Bio::CodonTable.new(hash, definition)"
679
- hash = {
680
- 'ttt' => 'F', 'tct' => 'S', 'tat' => 'Y', 'tgt' => 'C',
681
- 'ttc' => 'F', 'tcc' => 'S', 'tac' => 'Y', 'tgc' => 'C',
682
- 'tta' => 'L', 'tca' => 'S', 'taa' => '*', 'tga' => 'U',
683
- 'ttg' => 'L', 'tcg' => 'S', 'tag' => '*', 'tgg' => 'W',
684
-
685
- 'ctt' => 'L', 'cct' => 'P', 'cat' => 'H', 'cgt' => 'R',
686
- 'ctc' => 'L', 'ccc' => 'P', 'cac' => 'H', 'cgc' => 'R',
687
- 'cta' => 'L', 'cca' => 'P', 'caa' => 'Q', 'cga' => 'R',
688
- 'ctg' => 'L', 'ccg' => 'P', 'cag' => 'Q', 'cgg' => 'R',
689
-
690
- 'att' => 'I', 'act' => 'T', 'aat' => 'N', 'agt' => 'S',
691
- 'atc' => 'I', 'acc' => 'T', 'aac' => 'N', 'agc' => 'S',
692
- 'ata' => 'I', 'aca' => 'T', 'aaa' => 'K', 'aga' => 'R',
693
- 'atg' => 'M', 'acg' => 'T', 'aag' => 'K', 'agg' => 'R',
694
-
695
- 'gtt' => 'V', 'gct' => 'A', 'gat' => 'D', 'ggt' => 'G',
696
- 'gtc' => 'V', 'gcc' => 'A', 'gac' => 'D', 'ggc' => 'G',
697
- 'gta' => 'V', 'gca' => 'A', 'gaa' => 'E', 'gga' => 'G',
698
- 'gtg' => 'V', 'gcg' => 'A', 'gag' => 'E', 'ggg' => 'G',
699
- }
700
- my_ct = Bio::CodonTable.new(hash, "my codon table")
701
-
702
- puts ">>> ct.definition"
703
- puts my_ct.definition
704
-
705
- puts ">>> ct.definition=(str)"
706
- my_ct.definition = "selenoproteins (Eukaryote)"
707
- puts my_ct.definition
708
-
709
- puts ">>> ct['tga']"
710
- puts my_ct['tga']
711
-
712
- puts ">>> ct.revtrans('U')"
713
- puts my_ct.revtrans('U')
714
-
715
- puts ">>> ct.stop_codon?('tga')"
716
- puts my_ct.stop_codon?('tga')
717
-
718
- puts ">>> ct.stop_codon?('tag')"
719
- puts my_ct.stop_codon?('tag')
720
-
721
- end
722
-
@@ -5,7 +5,7 @@
5
5
  # Toshiaki Katayama <k@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: na.rb,v 0.23 2007/04/06 04:41:28 k Exp $
8
+ # $Id:$
9
9
  #
10
10
  # == Synopsis
11
11
  #
@@ -196,28 +196,3 @@ end
196
196
 
197
197
  end # module Bio
198
198
 
199
-
200
- if __FILE__ == $0
201
-
202
- puts "### na = Bio::NucleicAcid.new"
203
- na = Bio::NucleicAcid.new
204
-
205
- puts "# na.to_re('yrwskmbdhvnatgc')"
206
- p na.to_re('yrwskmbdhvnatgc')
207
-
208
- puts "# Bio::NucleicAcid.to_re('yrwskmbdhvnatgc')"
209
- p Bio::NucleicAcid.to_re('yrwskmbdhvnatgc')
210
-
211
- puts "# na.weight('A')"
212
- p na.weight('A')
213
-
214
- puts "# Bio::NucleicAcid.weight('A')"
215
- p Bio::NucleicAcid.weight('A')
216
-
217
- puts "# na.weight('atgc')"
218
- p na.weight('atgc')
219
-
220
- puts "# Bio::NucleicAcid.weight('atgc')"
221
- p Bio::NucleicAcid.weight('atgc')
222
-
223
- end
@@ -7,7 +7,7 @@
7
7
  # Mitsuteru C. Nakao <n@bioruby.org>
8
8
  # License:: The Ruby License
9
9
  #
10
- # $Id: aaindex.rb,v 1.20 2007/04/05 23:35:40 trevor Exp $
10
+ # $Id:$
11
11
  #
12
12
  # == Description
13
13
  #
@@ -318,40 +318,3 @@ module Bio
318
318
 
319
319
  end # module Bio
320
320
 
321
-
322
- if __FILE__ == $0
323
- require 'bio/io/fetch'
324
-
325
- puts "### AAindex1 (PRAM900102)"
326
- aax1 = Bio::AAindex1.new(Bio::Fetch.query('aaindex', 'PRAM900102', 'raw'))
327
- p aax1.entry_id
328
- p aax1.definition
329
- p aax1.dblinks
330
- p aax1.author
331
- p aax1.title
332
- p aax1.journal
333
- p aax1.comment
334
- p aax1.correlation_coefficient
335
- p aax1.index
336
- p aax1
337
- puts "### AAindex2 (DAYM780301)"
338
- aax2 = Bio::AAindex2.new(Bio::Fetch.query('aaindex', 'DAYM780301', 'raw'))
339
- p aax2.entry_id
340
- p aax2.definition
341
- p aax2.dblinks
342
- p aax2.author
343
- p aax2.title
344
- p aax2.journal
345
- p aax1.comment
346
- p aax2.rows
347
- p aax2.cols
348
- p aax2.matrix
349
- p aax2.matrix[2,2]
350
- p aax2.matrix[2,3]
351
- p aax2.matrix[4,3]
352
- p aax2.matrix.determinant
353
- p aax2.matrix.rank
354
- p aax2.matrix.transpose
355
- p aax2
356
- end
357
-
@@ -6,7 +6,7 @@
6
6
  # Toshiaki Katayama <k@bioruby.org>
7
7
  # License:: The Ruby License
8
8
  #
9
- # $Id: fasta.rb,v 1.28.2.3 2008/06/20 13:43:36 ngoto Exp $
9
+ # $Id:$
10
10
  #
11
11
  # == Description
12
12
  #
@@ -273,138 +273,5 @@ module Bio
273
273
 
274
274
  end #class FastaFormat
275
275
 
276
- # Treats a FASTA formatted numerical entry, such as:
277
- #
278
- # >id and/or some comments <== comment line
279
- # 24 15 23 29 20 13 20 21 21 23 22 25 13 <== numerical data
280
- # 22 17 15 25 27 32 26 32 29 29 25
281
- #
282
- # The precedent '>' can be omitted and the trailing '>' will be removed
283
- # automatically.
284
- #
285
- # --- Bio::FastaNumericFormat.new(entry)
286
- #
287
- # Stores the comment and the list of the numerical data.
288
- #
289
- # --- Bio::FastaNumericFormat#definition
290
- #
291
- # The comment line of the FASTA formatted data.
292
- #
293
- # * FASTA format (Wikipedia)
294
- # http://en.wikipedia.org/wiki/FASTA_format
295
- class FastaNumericFormat < FastaFormat
296
-
297
- # Returns the list of the numerical data (typically the quality score
298
- # of its corresponding sequence) as an Array.
299
- def data
300
- unless @list
301
- @list = @data.strip.split(/\s+/).map {|x| x.to_i}
302
- end
303
- @list
304
- end
305
-
306
- # Returns the number of elements in the numerical data.
307
- def length
308
- data.length
309
- end
310
-
311
- # Yields on each elements of the numerical data.
312
- def each
313
- data.each do |x|
314
- yield x
315
- end
316
- end
317
-
318
- # Returns the n-th element.
319
- def [](n)
320
- data[n]
321
- end
322
-
323
- undef query, blast, fasta, seq, naseq, nalen, aaseq, aalen
324
-
325
- end #class FastaNumericFormat
326
-
327
276
  end #module Bio
328
277
 
329
- if __FILE__ == $0
330
-
331
- f_str = <<END
332
- >sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]
333
- MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEG
334
- VPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYME
335
- GIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNL
336
- KLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGC
337
- IFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFP
338
- QWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES
339
- >sce:YBR274W CHK1; probable serine/threonine-protein kinase [EC:2.7.1.-] [SP:KB9S_YEAST]
340
- MSLSQVSPLPHIKDVVLGDTVGQGAFACVKNAHLQMDPSIILAVKFIHVP
341
- TCKKMGLSDKDITKEVVLQSKCSKHPNVLRLIDCNVSKEYMWIILEMADG
342
- GDLFDKIEPDVGVDSDVAQFYFQQLVSAINYLHVECGVAHRDIKPENILL
343
- DKNGNLKLADFGLASQFRRKDGTLRVSMDQRGSPPYMAPEVLYSEEGYYA
344
- DRTDIWSIGILLFVLLTGQTPWELPSLENEDFVFFIENDGNLNWGPWSKI
345
- EFTHLNLLRKILQPDPNKRVTLKALKLHPWVLRRASFSGDDGLCNDPELL
346
- AKKLFSHLKVSLSNENYLKFTQDTNSNNRYISTQPIGNELAELEHDSMHF
347
- QTVSNTQRAFTSYDSNTNYNSGTGMTQEAKWTQFISYDIAALQFHSDEND
348
- CNELVKRHLQFNPNKLTKFYTLQPMDVLLPILEKALNLSQIRVKPDLFAN
349
- FERLCELLGYDNVFPLIINIKTKSNGGYQLCGSISIIKIEEELKSVGFER
350
- KTGDPLEWRRLFKKISTICRDIILIPN
351
- END
352
-
353
- f = Bio::FastaFormat.new(f_str)
354
- puts "### FastaFormat"
355
- puts "# entry"
356
- puts f.entry
357
- puts "# entry_id"
358
- p f.entry_id
359
- puts "# definition"
360
- p f.definition
361
- puts "# data"
362
- p f.data
363
- puts "# seq"
364
- p f.seq
365
- puts "# seq.type"
366
- p f.seq.type
367
- puts "# length"
368
- p f.length
369
- puts "# aaseq"
370
- p f.aaseq
371
- puts "# aaseq.type"
372
- p f.aaseq.type
373
- puts "# aaseq.composition"
374
- p f.aaseq.composition
375
- puts "# aalen"
376
- p f.aalen
377
-
378
- puts
379
-
380
- n_str = <<END
381
- >CRA3575282.F
382
- 24 15 23 29 20 13 20 21 21 23 22 25 13 22 17 15 25 27 32 26
383
- 32 29 29 25
384
- END
385
-
386
- n = Bio::FastaNumericFormat.new(n_str)
387
- puts "### FastaNumericFormat"
388
- puts "# entry"
389
- puts n.entry
390
- puts "# entry_id"
391
- p n.entry_id
392
- puts "# definition"
393
- p n.definition
394
- puts "# data"
395
- p n.data
396
- puts "# length"
397
- p n.length
398
- puts "# percent to ratio by yield"
399
- n.each do |x|
400
- p x/100.0
401
- end
402
- puts "# first three"
403
- p n[0]
404
- p n[1]
405
- p n[2]
406
- puts "# last one"
407
- p n[-1]
408
-
409
- end
410
-
@@ -0,0 +1,204 @@
1
+ #
2
+ # = bio/db/fasta/format_qual.rb - Qual format and FastaNumericFormat generater
3
+ #
4
+ # Copyright:: Copyright (C) 2009
5
+ # Naohisa Goto <ng@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+
11
+ require 'bio/sequence/format'
12
+ require 'bio/sequence/quality_score'
13
+
14
+ module Bio::Sequence::Format::Formatter
15
+
16
+ # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
17
+ # Simple FastaNumeric format output class for Bio::Sequence.
18
+ class Fasta_numeric < Bio::Sequence::Format::FormatterBase
19
+
20
+ # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
21
+ #
22
+ # Creates a new FastaNumericFormat generater object from the sequence.
23
+ #
24
+ # It does not care whether the content of the quality score is
25
+ # consistent with the sequence or not, e.g. it does not check
26
+ # length of the quality score.
27
+ #
28
+ # ---
29
+ # *Arguments*:
30
+ # * _sequence_: Bio::Sequence object
31
+ # * (optional) :header => _header_: (String) (default nil)
32
+ # * (optional) :width => _width_: (Fixnum) (default 70)
33
+ def initialize; end if false # dummy for RDoc
34
+
35
+ # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
36
+ #
37
+ # Output the FASTA format string of the sequence.
38
+ #
39
+ # Currently, this method is used in Bio::Sequence#output like so,
40
+ #
41
+ # s = Bio::Sequence.new('atgc')
42
+ # s.quality_scores = [ 70, 80, 90, 100 ]
43
+ # puts s.output(:fasta_numeric)
44
+ # ---
45
+ # *Returns*:: String object
46
+ def output
47
+ header = @options[:header]
48
+ width = @options.has_key?(:width) ? @options[:width] : 70
49
+ seq = @sequence.seq.to_s
50
+ entry_id = @sequence.entry_id ||
51
+ "#{@sequence.primary_accession}.#{@sequence.sequence_version}"
52
+ definition = @sequence.definition
53
+ header ||= "#{entry_id} #{definition}"
54
+
55
+ sc = fastanumeric_quality_scores(seq)
56
+ if width then
57
+ if width <= 0 then
58
+ main = sc.join("\n")
59
+ else
60
+ len = 0
61
+ main = sc.collect do |x|
62
+ str = (len == 0) ? "#{x}" : " #{x}"
63
+ len += str.size
64
+ if len > width then
65
+ len = "#{x}".size
66
+ str = "\n#{x}"
67
+ end
68
+ str
69
+ end.join('')
70
+ end
71
+ else
72
+ main = sc.join(' ')
73
+ end
74
+
75
+ ">#{header}\n#{main}\n"
76
+ end
77
+
78
+ private
79
+
80
+ def fastanumeric_quality_scores(seq)
81
+ @sequence.quality_scores || []
82
+ end
83
+
84
+ end #class Fasta_numeric
85
+
86
+ # INTERNAL USE ONLY, YOU SHOULD NOT USE THIS CLASS.
87
+ # Simple Qual format (sequence quality) output class for Bio::Sequence.
88
+ class Qual < Fasta_numeric
89
+
90
+ # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD.
91
+ #
92
+ # Creates a new Qual format generater object from the sequence.
93
+ #
94
+ # The only difference from Fastanumeric is that Qual outputs
95
+ # Phred score by default, and data conversion will be performed
96
+ # if needed. Output score type can be changed by the
97
+ # ":quality_score_type" option.
98
+ #
99
+ # If the sequence have no quality score type information
100
+ # and no error probabilities, but the score exists,
101
+ # the score is regarded as :phred (Phred score).
102
+ #
103
+ # ---
104
+ # *Arguments*:
105
+ # * _sequence_: Bio::Sequence object
106
+ # * (optional) :header => _header_: (String) (default nil)
107
+ # * (optional) :width => _width_: (Fixnum) (default 70)
108
+ # * (optional) :quality_score_type => _type_: (Symbol) (default nil)
109
+ # * (optional) :default_score => _score_: (Integer) default score for bases that have no valid quality scores or error probabilities (default 0)
110
+ def initialize; end if false # dummy for RDoc
111
+
112
+ private
113
+
114
+ def fastanumeric_quality_scores(seq)
115
+ qsc = qual_quality_scores(seq)
116
+ if qsc.size > seq.length then
117
+ qsc = qsc[0, seq.length]
118
+ elsif qsc.size < seq.length then
119
+ padding = @options[:default_score] || 0
120
+ psize = seq.length - qsc.size
121
+ qsc += Array.new(psize, padding)
122
+ end
123
+ qsc
124
+ end
125
+
126
+ def qual_quality_scores(seq)
127
+ return [] if seq.length <= 0
128
+
129
+ # get output quality score type
130
+ fmt = @options[:quality_score_type]
131
+
132
+ qsc = @sequence.quality_scores
133
+ qsc_type = @sequence.quality_score_type
134
+
135
+ # checks if no need to convert
136
+ if qsc and qsc_type == fmt and
137
+ qsc.size >= seq.length then
138
+ return qsc
139
+ end
140
+
141
+ # default output quality score type is :phred
142
+ fmt ||= :phred
143
+ # If quality score type of the sequence is nil, implicitly
144
+ # regarded as :phred.
145
+ qsc_type ||= :phred
146
+
147
+ # checks error_probabilities
148
+ ep = @sequence.error_probabilities
149
+ if ep and ep.size >= seq.length then
150
+ case fmt
151
+ when :phred
152
+ return Bio::Sequence::QualityScore::Phred.p2q(ep[0, seq.length])
153
+ when :solexa
154
+ return Bio::Sequence::QualityScore::Solexa.p2q(ep[0, seq.length])
155
+ end
156
+ end
157
+
158
+ # Checks if scores can be converted.
159
+ if qsc and qsc.size >= seq.length then
160
+ case [ qsc_type, fmt ]
161
+ when [ :phred, :solexa ]
162
+ return Bio::Sequence::QualityScore::Phred.convert_scores_to_solexa(qsc[0, seq.length])
163
+ when [ :solexa, :phred ]
164
+ return Bio::Sequence::QualityScore::Solexa.convert_scores_to_phred(qsc[0, seq.length])
165
+ end
166
+ end
167
+
168
+ # checks quality scores type
169
+ case qsc_type
170
+ when :phred, :solexa
171
+ #does nothing
172
+ else
173
+ qsc_type = nil
174
+ qsc = nil
175
+ end
176
+
177
+ # collects piece of information
178
+ qsc_cov = qsc ? qsc.size.quo(seq.length) : 0
179
+ ep_cov = ep ? ep.size.quo(seq.length) : 0
180
+ if qsc_cov > ep_cov then
181
+ case [ qsc_type, fmt ]
182
+ when [ :phred, :phred ], [ :solexa, :solexa ]
183
+ return qsc
184
+ when [ :phred, :solexa ]
185
+ return Bio::Sequence::QualityScore::Phred.convert_scores_to_solexa(qsc)
186
+ when [ :solexa, :phred ]
187
+ return Bio::Sequence::QualityScore::Solexa.convert_scores_to_phred(qsc)
188
+ end
189
+ elsif ep_cov > qsc_cov then
190
+ case fmt
191
+ when :phred
192
+ return Bio::Sequence::QualityScore::Phred.p2q(ep)
193
+ when :solexa
194
+ return Bio::Sequence::QualityScore::Solexa.p2q(ep)
195
+ end
196
+ end
197
+
198
+ # if no information, returns empty array
199
+ return []
200
+ end
201
+ end #class Qual
202
+
203
+ end #module Bio::Sequence::Format::Formatter
204
+