bio 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (303) hide show
  1. data/ChangeLog +2105 -3728
  2. data/KNOWN_ISSUES.rdoc +35 -3
  3. data/README.rdoc +8 -2
  4. data/RELEASE_NOTES.rdoc +166 -0
  5. data/bin/bioruby +4 -1
  6. data/bioruby.gemspec +146 -1
  7. data/bioruby.gemspec.erb +3 -1
  8. data/doc/ChangeLog-before-1.3.1 +3961 -0
  9. data/doc/Tutorial.rd +154 -22
  10. data/doc/Tutorial.rd.html +125 -68
  11. data/lib/bio.rb +21 -6
  12. data/lib/bio/appl/bl2seq/report.rb +11 -202
  13. data/lib/bio/appl/blast/format0.rb +0 -193
  14. data/lib/bio/appl/blast/report.rb +2 -147
  15. data/lib/bio/appl/blast/wublast.rb +0 -208
  16. data/lib/bio/appl/fasta.rb +4 -19
  17. data/lib/bio/appl/fasta/format10.rb +0 -14
  18. data/lib/bio/appl/genscan/report.rb +0 -176
  19. data/lib/bio/appl/hmmer.rb +1 -15
  20. data/lib/bio/appl/hmmer/report.rb +0 -100
  21. data/lib/bio/appl/meme/mast.rb +156 -0
  22. data/lib/bio/appl/meme/mast/report.rb +91 -0
  23. data/lib/bio/appl/meme/motif.rb +48 -0
  24. data/lib/bio/appl/psort.rb +0 -111
  25. data/lib/bio/appl/psort/report.rb +1 -45
  26. data/lib/bio/appl/pts1.rb +2 -4
  27. data/lib/bio/appl/sosui/report.rb +5 -54
  28. data/lib/bio/appl/targetp/report.rb +1 -104
  29. data/lib/bio/appl/tmhmm/report.rb +0 -36
  30. data/lib/bio/command.rb +94 -10
  31. data/lib/bio/data/aa.rb +1 -77
  32. data/lib/bio/data/codontable.rb +1 -95
  33. data/lib/bio/data/na.rb +1 -26
  34. data/lib/bio/db/aaindex.rb +1 -38
  35. data/lib/bio/db/fasta.rb +1 -134
  36. data/lib/bio/db/fasta/format_qual.rb +204 -0
  37. data/lib/bio/db/fasta/qual.rb +102 -0
  38. data/lib/bio/db/fastq.rb +645 -0
  39. data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
  40. data/lib/bio/db/fastq/format_fastq.rb +175 -0
  41. data/lib/bio/db/genbank/genbank.rb +1 -86
  42. data/lib/bio/db/gff.rb +0 -17
  43. data/lib/bio/db/go.rb +4 -72
  44. data/lib/bio/db/kegg/common.rb +112 -0
  45. data/lib/bio/db/kegg/compound.rb +29 -20
  46. data/lib/bio/db/kegg/drug.rb +74 -34
  47. data/lib/bio/db/kegg/enzyme.rb +26 -5
  48. data/lib/bio/db/kegg/genes.rb +128 -15
  49. data/lib/bio/db/kegg/genome.rb +3 -41
  50. data/lib/bio/db/kegg/glycan.rb +19 -24
  51. data/lib/bio/db/kegg/orthology.rb +16 -56
  52. data/lib/bio/db/kegg/reaction.rb +81 -28
  53. data/lib/bio/db/kegg/taxonomy.rb +1 -52
  54. data/lib/bio/db/litdb.rb +1 -16
  55. data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
  56. data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
  57. data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
  58. data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
  59. data/lib/bio/db/prosite.rb +2 -95
  60. data/lib/bio/db/rebase.rb +5 -6
  61. data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
  62. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
  63. data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
  64. data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
  65. data/lib/bio/io/das.rb +0 -44
  66. data/lib/bio/io/ddbjxml.rb +1 -181
  67. data/lib/bio/io/flatfile.rb +1 -7
  68. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  69. data/lib/bio/io/keggapi.rb +0 -442
  70. data/lib/bio/io/ncbirest.rb +130 -132
  71. data/lib/bio/io/ncbisoap.rb +2 -1
  72. data/lib/bio/io/pubmed.rb +0 -88
  73. data/lib/bio/location.rb +0 -73
  74. data/lib/bio/pathway.rb +0 -171
  75. data/lib/bio/sequence.rb +18 -1
  76. data/lib/bio/sequence/adapter.rb +3 -0
  77. data/lib/bio/sequence/format.rb +16 -0
  78. data/lib/bio/sequence/quality_score.rb +205 -0
  79. data/lib/bio/tree.rb +70 -5
  80. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
  81. data/lib/bio/util/sirna.rb +1 -23
  82. data/lib/bio/version.rb +1 -1
  83. data/sample/demo_aaindex.rb +67 -0
  84. data/sample/demo_aminoacid.rb +101 -0
  85. data/sample/demo_bl2seq_report.rb +220 -0
  86. data/sample/demo_blast_report.rb +285 -0
  87. data/sample/demo_codontable.rb +119 -0
  88. data/sample/demo_das.rb +105 -0
  89. data/sample/demo_ddbjxml.rb +212 -0
  90. data/sample/demo_fasta_remote.rb +51 -0
  91. data/sample/demo_fastaformat.rb +105 -0
  92. data/sample/demo_genbank.rb +132 -0
  93. data/sample/demo_genscan_report.rb +202 -0
  94. data/sample/demo_gff1.rb +49 -0
  95. data/sample/demo_go.rb +98 -0
  96. data/sample/demo_hmmer_report.rb +149 -0
  97. data/sample/demo_kegg_compound.rb +57 -0
  98. data/sample/demo_kegg_drug.rb +65 -0
  99. data/sample/demo_kegg_genome.rb +74 -0
  100. data/sample/demo_kegg_glycan.rb +72 -0
  101. data/sample/demo_kegg_orthology.rb +62 -0
  102. data/sample/demo_kegg_reaction.rb +66 -0
  103. data/sample/demo_kegg_taxonomy.rb +92 -0
  104. data/sample/demo_keggapi.rb +502 -0
  105. data/sample/demo_litdb.rb +42 -0
  106. data/sample/demo_locations.rb +99 -0
  107. data/sample/demo_ncbi_rest.rb +130 -0
  108. data/sample/demo_nucleicacid.rb +49 -0
  109. data/sample/demo_pathway.rb +196 -0
  110. data/sample/demo_prosite.rb +120 -0
  111. data/sample/demo_psort.rb +138 -0
  112. data/sample/demo_psort_report.rb +70 -0
  113. data/sample/demo_pubmed.rb +118 -0
  114. data/sample/demo_sirna.rb +63 -0
  115. data/sample/demo_sosui_report.rb +89 -0
  116. data/sample/demo_targetp_report.rb +135 -0
  117. data/sample/demo_tmhmm_report.rb +68 -0
  118. data/sample/pmfetch.rb +13 -4
  119. data/sample/pmsearch.rb +15 -4
  120. data/sample/test_phyloxml_big.rb +205 -0
  121. data/test/bioruby_test_helper.rb +61 -0
  122. data/test/data/KEGG/1.1.1.1.enzyme +935 -0
  123. data/test/data/KEGG/C00025.compound +102 -0
  124. data/test/data/KEGG/D00063.drug +104 -0
  125. data/test/data/KEGG/G00024.glycan +47 -0
  126. data/test/data/KEGG/G01366.glycan +18 -0
  127. data/test/data/KEGG/K02338.orthology +902 -0
  128. data/test/data/KEGG/R00006.reaction +14 -0
  129. data/test/data/fastq/README.txt +109 -0
  130. data/test/data/fastq/error_diff_ids.fastq +20 -0
  131. data/test/data/fastq/error_double_qual.fastq +22 -0
  132. data/test/data/fastq/error_double_seq.fastq +22 -0
  133. data/test/data/fastq/error_long_qual.fastq +20 -0
  134. data/test/data/fastq/error_no_qual.fastq +20 -0
  135. data/test/data/fastq/error_qual_del.fastq +20 -0
  136. data/test/data/fastq/error_qual_escape.fastq +20 -0
  137. data/test/data/fastq/error_qual_null.fastq +0 -0
  138. data/test/data/fastq/error_qual_space.fastq +21 -0
  139. data/test/data/fastq/error_qual_tab.fastq +21 -0
  140. data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
  141. data/test/data/fastq/error_qual_vtab.fastq +20 -0
  142. data/test/data/fastq/error_short_qual.fastq +20 -0
  143. data/test/data/fastq/error_spaces.fastq +20 -0
  144. data/test/data/fastq/error_tabs.fastq +21 -0
  145. data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
  146. data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
  147. data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
  148. data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
  149. data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
  150. data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
  151. data/test/data/fastq/error_trunc_in_title.fastq +17 -0
  152. data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
  153. data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
  154. data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
  155. data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
  156. data/test/data/fastq/longreads_as_illumina.fastq +40 -0
  157. data/test/data/fastq/longreads_as_sanger.fastq +40 -0
  158. data/test/data/fastq/longreads_as_solexa.fastq +40 -0
  159. data/test/data/fastq/longreads_original_sanger.fastq +120 -0
  160. data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
  161. data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
  162. data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
  163. data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
  164. data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
  165. data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
  166. data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
  167. data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
  168. data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
  169. data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
  170. data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
  171. data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
  172. data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
  173. data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
  174. data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
  175. data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
  176. data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
  177. data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
  178. data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
  179. data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
  180. data/test/data/meme/db +0 -0
  181. data/test/data/meme/mast +0 -0
  182. data/test/data/meme/mast.out +13 -0
  183. data/test/data/meme/meme.out +3 -0
  184. data/test/data/phyloxml/apaf.xml +666 -0
  185. data/test/data/phyloxml/bcl_2.xml +2097 -0
  186. data/test/data/phyloxml/made_up.xml +144 -0
  187. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
  188. data/test/data/phyloxml/phyloxml_examples.xml +415 -0
  189. data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
  190. data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
  191. data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
  192. data/test/functional/bio/appl/test_pts1.rb +7 -5
  193. data/test/functional/bio/io/test_ensembl.rb +4 -3
  194. data/test/functional/bio/io/test_pubmed.rb +9 -3
  195. data/test/functional/bio/io/test_soapwsdl.rb +5 -4
  196. data/test/functional/bio/io/test_togows.rb +5 -4
  197. data/test/functional/bio/sequence/test_output_embl.rb +6 -4
  198. data/test/functional/bio/test_command.rb +54 -5
  199. data/test/runner.rb +5 -3
  200. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
  201. data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
  202. data/test/unit/bio/appl/blast/test_report.rb +5 -4
  203. data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
  204. data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
  205. data/test/unit/bio/appl/genscan/test_report.rb +8 -9
  206. data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
  207. data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
  208. data/test/unit/bio/appl/mafft/test_report.rb +6 -5
  209. data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
  210. data/test/unit/bio/appl/meme/test_mast.rb +103 -0
  211. data/test/unit/bio/appl/meme/test_motif.rb +38 -0
  212. data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
  213. data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
  214. data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
  215. data/test/unit/bio/appl/sim4/test_report.rb +5 -4
  216. data/test/unit/bio/appl/sosui/test_report.rb +6 -5
  217. data/test/unit/bio/appl/targetp/test_report.rb +5 -3
  218. data/test/unit/bio/appl/test_blast.rb +5 -4
  219. data/test/unit/bio/appl/test_fasta.rb +4 -2
  220. data/test/unit/bio/appl/test_pts1.rb +4 -2
  221. data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
  222. data/test/unit/bio/data/test_aa.rb +5 -3
  223. data/test/unit/bio/data/test_codontable.rb +5 -4
  224. data/test/unit/bio/data/test_na.rb +5 -3
  225. data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
  226. data/test/unit/bio/db/embl/test_common.rb +4 -2
  227. data/test/unit/bio/db/embl/test_embl.rb +6 -6
  228. data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
  229. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
  230. data/test/unit/bio/db/embl/test_sptr.rb +6 -8
  231. data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
  232. data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
  233. data/test/unit/bio/db/kegg/test_compound.rb +146 -0
  234. data/test/unit/bio/db/kegg/test_drug.rb +194 -0
  235. data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
  236. data/test/unit/bio/db/kegg/test_genes.rb +32 -4
  237. data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
  238. data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
  239. data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
  240. data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
  241. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
  242. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
  243. data/test/unit/bio/db/test_aaindex.rb +6 -6
  244. data/test/unit/bio/db/test_fasta.rb +5 -46
  245. data/test/unit/bio/db/test_fastq.rb +829 -0
  246. data/test/unit/bio/db/test_gff.rb +4 -2
  247. data/test/unit/bio/db/test_lasergene.rb +7 -5
  248. data/test/unit/bio/db/test_medline.rb +4 -2
  249. data/test/unit/bio/db/test_newick.rb +6 -6
  250. data/test/unit/bio/db/test_nexus.rb +4 -2
  251. data/test/unit/bio/db/test_phyloxml.rb +769 -0
  252. data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
  253. data/test/unit/bio/db/test_prosite.rb +6 -5
  254. data/test/unit/bio/db/test_qual.rb +63 -0
  255. data/test/unit/bio/db/test_rebase.rb +5 -3
  256. data/test/unit/bio/db/test_soft.rb +7 -6
  257. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
  258. data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
  259. data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
  260. data/test/unit/bio/io/test_ddbjxml.rb +4 -3
  261. data/test/unit/bio/io/test_ensembl.rb +5 -3
  262. data/test/unit/bio/io/test_fastacmd.rb +4 -3
  263. data/test/unit/bio/io/test_flatfile.rb +6 -5
  264. data/test/unit/bio/io/test_soapwsdl.rb +4 -3
  265. data/test/unit/bio/io/test_togows.rb +4 -2
  266. data/test/unit/bio/sequence/test_aa.rb +5 -3
  267. data/test/unit/bio/sequence/test_common.rb +4 -2
  268. data/test/unit/bio/sequence/test_compat.rb +4 -2
  269. data/test/unit/bio/sequence/test_dblink.rb +5 -3
  270. data/test/unit/bio/sequence/test_na.rb +4 -2
  271. data/test/unit/bio/sequence/test_quality_score.rb +330 -0
  272. data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
  273. data/test/unit/bio/test_alignment.rb +5 -3
  274. data/test/unit/bio/test_command.rb +4 -3
  275. data/test/unit/bio/test_db.rb +5 -3
  276. data/test/unit/bio/test_feature.rb +4 -2
  277. data/test/unit/bio/test_location.rb +4 -2
  278. data/test/unit/bio/test_map.rb +5 -3
  279. data/test/unit/bio/test_pathway.rb +4 -2
  280. data/test/unit/bio/test_reference.rb +4 -2
  281. data/test/unit/bio/test_sequence.rb +5 -3
  282. data/test/unit/bio/test_shell.rb +5 -3
  283. data/test/unit/bio/test_tree.rb +6 -6
  284. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
  285. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
  286. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
  287. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
  288. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
  289. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
  290. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
  291. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
  292. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
  293. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
  294. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
  295. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
  296. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
  297. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
  298. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
  299. data/test/unit/bio/util/test_color_scheme.rb +5 -3
  300. data/test/unit/bio/util/test_contingency_table.rb +5 -3
  301. data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
  302. data/test/unit/bio/util/test_sirna.rb +6 -4
  303. metadata +147 -2
@@ -4,7 +4,7 @@
4
4
  # Copyright:: Copyright (C) 2001, 2002, 2007 Toshiaki Katayama <k@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: genome.rb,v 0.18 2007/06/28 11:27:24 k Exp $
7
+ # $Id:$
8
8
  #
9
9
 
10
10
  require 'bio/db'
@@ -19,6 +19,8 @@ class KEGG
19
19
  # == References
20
20
  #
21
21
  # * ftp://ftp.genome.jp/pub/kegg/genomes/genome
22
+ # * http://www.genome.jp/dbget-bin/www_bfind?genome
23
+ # * http://www.genome.jp/kegg/catalog/org_list.html
22
24
  #
23
25
  class GENOME < KEGGDB
24
26
 
@@ -199,43 +201,3 @@ end # GENOME
199
201
  end # KEGG
200
202
  end # Bio
201
203
 
202
-
203
-
204
- if __FILE__ == $0
205
-
206
- begin
207
- require 'pp'
208
- def p(arg); pp(arg); end
209
- rescue LoadError
210
- end
211
-
212
- require 'bio/io/flatfile'
213
-
214
- ff = Bio::FlatFile.new(Bio::KEGG::GENOME, ARGF)
215
-
216
- ff.each do |genome|
217
-
218
- puts "### Tags"
219
- p genome.tags
220
-
221
- [
222
- %w( ENTRY entry_id ),
223
- %w( NAME name ),
224
- %w( DEFINITION definition ),
225
- %w( TAXONOMY taxonomy taxid lineage ),
226
- %w( REFERENCE references ),
227
- %w( CHROMOSOME chromosomes ),
228
- %w( PLASMID plasmids ),
229
- %w( STATISTICS statistics nalen num_gene num_rna ),
230
- ].each do |x|
231
- puts "### " + x.shift
232
- x.each do |m|
233
- p genome.send(m)
234
- end
235
- end
236
-
237
- end
238
-
239
- end
240
-
241
-
@@ -8,6 +8,7 @@
8
8
  #
9
9
 
10
10
  require 'bio/db'
11
+ require 'bio/db/kegg/common'
11
12
 
12
13
  module Bio
13
14
  class KEGG
@@ -17,6 +18,21 @@ class GLYCAN < KEGGDB
17
18
  DELIMITER = RS = "\n///\n"
18
19
  TAGSIZE = 12
19
20
 
21
+ include Common::DblinksAsHash
22
+ # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field.
23
+ def dblinks_as_hash; super; end if false #dummy for RDoc
24
+ alias dblinks dblinks_as_hash
25
+
26
+ include Common::PathwaysAsHash
27
+ # Returns a Hash of the pathway ID and name in PATHWAY field.
28
+ def pathways_as_hash; super; end if false #dummy for RDoc
29
+ alias pathways pathways_as_hash
30
+
31
+ include Common::OrthologsAsHash
32
+ # Returns a Hash of the orthology ID and definition in ORTHOLOGY field.
33
+ def orthologs_as_hash; super; end if false #dummy for RDoc
34
+ alias orthologs orthologs_as_hash
35
+
20
36
  def initialize(entry)
21
37
  super(entry, TAGSIZE)
22
38
  end
@@ -73,7 +89,7 @@ class GLYCAN < KEGGDB
73
89
  end
74
90
 
75
91
  # PATHWAY
76
- def pathways
92
+ def pathways_as_strings
77
93
  lines_fetch('PATHWAY')
78
94
  end
79
95
 
@@ -91,7 +107,7 @@ class GLYCAN < KEGGDB
91
107
  end
92
108
 
93
109
  # ORTHOLOGY
94
- def orthologs
110
+ def orthologs_as_strings
95
111
  unless @data['ORTHOLOGY']
96
112
  @data['ORTHOLOGY'] = lines_fetch('ORTHOLOGY')
97
113
  end
@@ -126,7 +142,7 @@ class GLYCAN < KEGGDB
126
142
  end
127
143
 
128
144
  # DBLINKS
129
- def dblinks
145
+ def dblinks_as_strings
130
146
  unless @data['DBLINKS']
131
147
  @data['DBLINKS'] = lines_fetch('DBLINKS')
132
148
  end
@@ -143,24 +159,3 @@ end # GLYCAN
143
159
  end # KEGG
144
160
  end # Bio
145
161
 
146
-
147
- if __FILE__ == $0
148
- entry = ARGF.read # gl:G00024
149
- gl = Bio::KEGG::GLYCAN.new(entry)
150
- p gl.entry_id
151
- p gl.name
152
- p gl.composition
153
- p gl.mass
154
- p gl.keggclass
155
- p gl.bindings
156
- p gl.compounds
157
- p gl.reactions
158
- p gl.pathways
159
- p gl.enzymes
160
- p gl.orthologs
161
- p gl.references
162
- p gl.dblinks
163
- p gl.kcf
164
- end
165
-
166
-
@@ -5,10 +5,11 @@
5
5
  # Copyright:: Copyright (C) 2003 Masumi Itoh <m@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: orthology.rb,v 1.10 2007/12/14 16:19:54 k Exp $
8
+ # $Id:$
9
9
  #
10
10
 
11
11
  require 'bio/db'
12
+ require 'bio/db/kegg/common'
12
13
 
13
14
  module Bio
14
15
  class KEGG
@@ -27,6 +28,16 @@ class ORTHOLOGY < KEGGDB
27
28
  DELIMITER = RS = "\n///\n"
28
29
  TAGSIZE = 12
29
30
 
31
+ include Common::DblinksAsHash
32
+ # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field.
33
+ def dblinks_as_hash; super; end if false #dummy for RDoc
34
+ alias dblinks dblinks_as_hash
35
+
36
+ include Common::GenesAsHash
37
+ # Returns a Hash of the organism ID and an Array of entry IDs in GENES field.
38
+ def genes_as_hash; super; end if false #dummy for RDoc
39
+ alias genes genes_as_hash
40
+
30
41
  # Reads a flat file format entry of the KO database.
31
42
  def initialize(entry)
32
43
  super(entry, TAGSIZE)
@@ -68,69 +79,18 @@ class ORTHOLOGY < KEGGDB
68
79
  end
69
80
 
70
81
  # Returns an Array of a database name and entry IDs in DBLINKS field.
71
- def dblinks
72
- unless @data['DBLINKS']
73
- @data['DBLINKS'] = lines_fetch('DBLINKS')
74
- end
75
- @data['DBLINKS']
76
- end
77
-
78
- # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field.
79
- def dblinks_as_hash
80
- hash = {}
81
- dblinks.each do |line|
82
- name, *list = line.split(/\s+/)
83
- db = name.downcase.sub(/:/, '')
84
- hash[db] = list
85
- end
86
- return hash
82
+ def dblinks_as_strings
83
+ lines_fetch('DBLINKS')
87
84
  end
88
85
 
89
86
  # Returns an Array of the organism ID and entry IDs in GENES field.
90
- def genes
91
- unless @data['GENES']
92
- @data['GENES'] = lines_fetch('GENES')
93
- end
94
- @data['GENES']
87
+ def genes_as_strings
88
+ lines_fetch('GENES')
95
89
  end
96
90
 
97
- # Returns a Hash of the organism ID and an Array of entry IDs in GENES field.
98
- def genes_as_hash
99
- hash = {}
100
- genes.each do |line|
101
- name, *list = line.split(/\s+/)
102
- org = name.downcase.sub(/:/, '')
103
- genes = list.map {|x| x.sub(/\(.*\)/, '')}
104
- #names = list.map {|x| x.scan(/.*\((.*)\)/)}
105
- hash[org] = genes
106
- end
107
- return hash
108
- end
109
-
110
91
  end # ORTHOLOGY
111
92
 
112
93
  end # KEGG
113
94
  end # Bio
114
95
 
115
96
 
116
-
117
- if __FILE__ == $0
118
-
119
- require 'bio/io/fetch'
120
-
121
- flat = Bio::Fetch.query('ko', 'K00001')
122
- entry = Bio::KEGG::ORTHOLOGY.new(flat)
123
-
124
- p entry.entry_id
125
- p entry.name
126
- p entry.names
127
- p entry.definition
128
- p entry.keggclass
129
- p entry.keggclasses
130
- p entry.pathways
131
- p entry.dblinks
132
- p entry.genes
133
-
134
- end
135
-
136
-
@@ -2,12 +2,15 @@
2
2
  # = bio/db/kegg/reaction.rb - KEGG REACTION database class
3
3
  #
4
4
  # Copyright:: Copyright (C) 2004 Toshiaki Katayama <k@bioruby.org>
5
+ # Copyright:: Copyright (C) 2009 Kozo Nishida <kozo-ni@is.naist.jp>
5
6
  # License:: The Ruby License
6
7
  #
7
- # $Id: reaction.rb,v 1.6 2007/06/28 11:27:24 k Exp $
8
+ # $Id:$
8
9
  #
9
10
 
10
11
  require 'bio/db'
12
+ require 'bio/db/kegg/common'
13
+ require 'enumerator'
11
14
 
12
15
  module Bio
13
16
  class KEGG
@@ -17,44 +20,100 @@ class REACTION < KEGGDB
17
20
  DELIMITER = RS = "\n///\n"
18
21
  TAGSIZE = 12
19
22
 
23
+ include Common::PathwaysAsHash
24
+ # Returns a Hash of the pathway ID and name in PATHWAY field.
25
+ def pathways_as_hash; super; end if false #dummy for RDoc
26
+ alias pathways pathways_as_hash
27
+
28
+ include Common::OrthologsAsHash
29
+ # Returns a Hash of the orthology ID and definition in ORTHOLOGY field.
30
+ def orthologs_as_hash; super; end if false #dummy for RDoc
31
+ alias orthologs orthologs_as_hash
32
+
33
+ # Creates a new Bio::KEGG::REACTION object.
34
+ # ---
35
+ # *Arguments*:
36
+ # * (required) _entry_: (String) single entry as a string
37
+ # *Returns*:: Bio::KEGG::REACTION object
20
38
  def initialize(entry)
21
39
  super(entry, TAGSIZE)
22
40
  end
23
41
 
24
- # ENTRY
42
+ # ID of the entry, described in the ENTRY line.
43
+ # ---
44
+ # *Returns*:: String
25
45
  def entry_id
26
46
  field_fetch('ENTRY')[/\S+/]
27
47
  end
28
48
 
29
- # NAME
49
+ # Name of the reaction, described in the NAME line.
50
+ # ---
51
+ # *Returns*:: String
30
52
  def name
31
- field_fetch('NAME')
53
+ field_fetch('NAME')
32
54
  end
33
55
 
34
- # DEFINITION
56
+ # Definition of the reaction, described in the DEFINITION line.
57
+ # ---
58
+ # *Returns*:: String
35
59
  def definition
36
60
  field_fetch('DEFINITION')
37
61
  end
38
62
 
39
- # EQUATION
63
+ # Chemical equation, described in the EQUATION line.
64
+ # ---
65
+ # *Returns*:: String
40
66
  def equation
41
67
  field_fetch('EQUATION')
42
68
  end
43
69
 
44
- # RPAIR
45
- def rpairs
46
- unless @data['RPAIR']
47
- @data['RPAIR'] = fetch('RPAIR').split(/\s+/)
70
+ # KEGG RPAIR (ReactantPair) information, described in the RPAIR lines.
71
+ # ---
72
+ # *Returns*:: Array containing String
73
+ def rpairs_as_strings
74
+ lines_fetch('RPAIR')
75
+ end
76
+
77
+ # KEGG RPAIR (ReactantPair) information, described in the RPAIR lines.
78
+ # Returns a hash of RPair IDs and [ name, type ] informations, for example,
79
+ # { "RP12733" => [ "C00022_C00900", "trans" ],
80
+ # "RP05698" => [ "C00011_C00022", "leave" ],
81
+ # "RP00440" => [ "C00022_C00900", "main" ]
82
+ # }
83
+ # ---
84
+ # *Returns*:: Hash
85
+ def rpairs_as_hash
86
+ unless defined? @rpairs_as_hash
87
+ rps = {}
88
+ rpairs_as_strings.each do |line|
89
+ namespace, entry_id, name, rptype = line.split(/\s+/)
90
+ rps[entry_id] = [ name, rptype ]
91
+ end
92
+ @rpairs_as_hash = rps
48
93
  end
49
- @data['RPAIR']
94
+ @rpairs_as_hash
95
+ end
96
+
97
+ alias rpairs rpairs_as_hash
98
+
99
+ # Returns the content of the RPAIR entry as tokens
100
+ # (RPair signature, RPair ID, , RPair type).
101
+ # ---
102
+ # *Returns*:: Array containing String
103
+ def rpairs_as_tokens
104
+ fetch('RPAIR').split(/\s+/)
50
105
  end
51
106
 
52
- # PATHWAY
53
- def pathways
54
- lines_fetch('PATHWAY')
107
+ # Pathway information, described in the PATHWAY lines.
108
+ # ---
109
+ # *Returns*:: Array containing String
110
+ def pathways_as_strings
111
+ lines_fetch('PATHWAY')
55
112
  end
56
113
 
57
- # ENZYME
114
+ # Enzymes described in the ENZYME line.
115
+ # ---
116
+ # *Returns*:: Array containing String
58
117
  def enzymes
59
118
  unless @data['ENZYME']
60
119
  @data['ENZYME'] = fetch('ENZYME').scan(/\S+/)
@@ -62,21 +121,15 @@ class REACTION < KEGGDB
62
121
  @data['ENZYME']
63
122
  end
64
123
 
124
+ # Orthologs described in the ORTHOLOGY lines.
125
+ # ---
126
+ # *Returns*:: Array containing String
127
+ def orthologs_as_strings
128
+ lines_fetch('ORTHOLOGY')
129
+ end
130
+
65
131
  end # REACTION
66
132
 
67
133
  end # KEGG
68
134
  end # Bio
69
135
 
70
-
71
- if __FILE__ == $0
72
- entry = ARGF.read
73
- rn = Bio::KEGG::REACTION.new(entry)
74
- p rn.entry_id
75
- p rn.name
76
- p rn.definition
77
- p rn.equation
78
- p rn.rpairs
79
- p rn.pathways
80
- p rn.enzymes
81
- end
82
-
@@ -4,7 +4,7 @@
4
4
  # Copyright:: Copyright (C) 2007 Toshiaki Katayama <k@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: taxonomy.rb,v 1.2 2007/07/09 10:29:16 k Exp $
7
+ # $Id:$
8
8
  #
9
9
 
10
10
  module Bio
@@ -278,54 +278,3 @@ end # Taxonomy
278
278
  end # KEGG
279
279
  end # Bio
280
280
 
281
-
282
-
283
- if __FILE__ == $0
284
-
285
- # Usage:
286
- # % wget ftp://ftp.genome.jp/pub/kegg/genes/taxonomy
287
- # % ruby taxonomy.rb taxonomy | less -S
288
-
289
- taxonomy = ARGV.shift
290
- org_list = ARGV.shift || nil
291
-
292
- if org_list
293
- orgs = File.readlines(org_list).map{|x| x.strip}
294
- else
295
- orgs = nil
296
- end
297
-
298
- tree = Bio::KEGG::Taxonomy.new(taxonomy, orgs)
299
-
300
- puts ">>> tree - original"
301
- puts tree
302
-
303
- puts ">>> tree - after compact"
304
- tree.compact
305
- puts tree
306
-
307
- puts ">>> tree - after reduce"
308
- tree.reduce
309
- puts tree
310
-
311
- puts ">>> path - sorted"
312
- tree.path.sort.each do |path|
313
- puts path.join("/")
314
- end
315
-
316
- puts ">>> group : orgs"
317
- tree.dfs(tree.root) do |parent, children|
318
- if orgs = tree.organisms(parent)
319
- puts "#{parent.ljust(30)} (#{orgs.size})\t#{orgs.join(', ')}"
320
- end
321
- end
322
-
323
- puts ">>> group : subgroups"
324
- tree.dfs_with_level(tree.root) do |parent, children, level|
325
- subgroups = children.keys.sort
326
- indent = " " * level
327
- label = "#{indent} #{level} #{parent}"
328
- puts "#{label.ljust(35)}\t#{subgroups.join(', ')}"
329
- end
330
-
331
- end