bio 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -1,194 +1,151 @@
1
1
  #
2
- # bio/db/kegg/glycan.rb - KEGG GLYCAN database class
2
+ # = bio/db/kegg/glycan.rb - KEGG GLYCAN database class
3
3
  #
4
- # Copyright (C) 2004 KATAYAMA Toshiaki <k@bioruby.org>
4
+ # Copyright:: Copyright (C) 2004 Toshiaki Katayama <k@bioruby.org>
5
+ # License:: The Ruby License
5
6
  #
6
- # This library is free software; you can redistribute it and/or
7
- # modify it under the terms of the GNU Lesser General Public
8
- # License as published by the Free Software Foundation; either
9
- # version 2 of the License, or (at your option) any later version.
10
- #
11
- # This library is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
- # Lesser General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU Lesser General Public
17
- # License along with this library; if not, write to the Free Software
18
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
- #
20
- # $Id: glycan.rb,v 1.2 2005/09/08 01:22:11 k Exp $
7
+ # $Id: glycan.rb,v 1.6 2007/06/28 11:27:24 k Exp $
21
8
  #
22
9
 
23
10
  require 'bio/db'
24
11
 
25
12
  module Bio
13
+ class KEGG
26
14
 
27
- class KEGG
28
-
29
- class GLYCAN < KEGGDB
15
+ class GLYCAN < KEGGDB
30
16
 
31
- DELIMITER = RS = "\n///\n"
32
- TAGSIZE = 12
17
+ DELIMITER = RS = "\n///\n"
18
+ TAGSIZE = 12
33
19
 
34
- def initialize(entry)
35
- super(entry, TAGSIZE)
36
- end
20
+ def initialize(entry)
21
+ super(entry, TAGSIZE)
22
+ end
37
23
 
38
- # ENTRY
39
- def entry_id
40
- unless @data['ENTRY']
41
- @data['ENTRY'] = fetch('ENTRY').split(/\s+/).first
42
- end
43
- @data['ENTRY']
44
- end
24
+ # ENTRY
25
+ def entry_id
26
+ field_fetch('ENTRY')[/\S+/]
27
+ end
45
28
 
46
- # NAME
47
- def name
48
- field_fetch('NAME')
49
- end
29
+ # NAME
30
+ def name
31
+ field_fetch('NAME')
32
+ end
50
33
 
51
- # COMPOSITION
52
- def composition
53
- unless @data['COMPOSITION']
54
- hash = Hash.new(0)
55
- fetch('COMPOSITION').scan(/\((\S+)\)(\d+)/).each do |key, val|
56
- hash[key] = val.to_i
57
- end
58
- @data['COMPOSITION'] = hash
59
- end
60
- @data['COMPOSITION']
34
+ # COMPOSITION
35
+ def composition
36
+ unless @data['COMPOSITION']
37
+ hash = Hash.new(0)
38
+ fetch('COMPOSITION').scan(/\((\S+)\)(\d+)/).each do |key, val|
39
+ hash[key] = val.to_i
61
40
  end
41
+ @data['COMPOSITION'] = hash
42
+ end
43
+ @data['COMPOSITION']
44
+ end
62
45
 
63
- # MASS
64
- def mass
65
- unless @data['MASS']
66
- hash = Hash.new
67
- fetch('MASS').scan(/(\S+)\s+\((\S+)\)/).each do |val, key|
68
- hash[key] = val.to_f
69
- end
70
- @data['MASS'] = hash
71
- end
72
- @data['MASS']
46
+ # MASS
47
+ def mass
48
+ unless @data['MASS']
49
+ hash = Hash.new
50
+ fetch('MASS').scan(/(\S+)\s+\((\S+)\)/).each do |val, key|
51
+ hash[key] = val.to_f
73
52
  end
53
+ @data['MASS'] = hash
54
+ end
55
+ @data['MASS']
56
+ end
74
57
 
75
- # CLASS
76
- def keggclass
77
- field_fetch('CLASS')
78
- end
58
+ # CLASS
59
+ def keggclass
60
+ field_fetch('CLASS')
61
+ end
79
62
 
80
- # BINDING
81
- def bindings
82
- unless @data['BINDING']
83
- ary = Array.new
84
- lines = lines_fetch('BINDING')
85
- lines.each do |line|
86
- if /^\S/.match(line)
87
- ary << line
88
- else
89
- ary.last << " #{line.strip}"
90
- end
91
- end
92
- @data['BINDING'] = ary
93
- end
94
- @data['BINDING']
95
- end
63
+ # COMPOUND
64
+ def compounds
65
+ unless @data['COMPOUND']
66
+ @data['COMPOUND'] = fetch('COMPOUND').split(/\s+/)
67
+ end
68
+ @data['COMPOUND']
69
+ end
96
70
 
97
- # COMPOUND
98
- def compounds
99
- unless @data['COMPOUND']
100
- @data['COMPOUND'] = fetch('COMPOUND').split(/\s+/)
101
- end
102
- @data['COMPOUND']
103
- end
71
+ # REACTION
72
+ def reactions
73
+ unless @data['REACTION']
74
+ @data['REACTION'] = fetch('REACTION').split(/\s+/)
75
+ end
76
+ @data['REACTION']
77
+ end
104
78
 
105
- # REACTION
106
- def reactions
107
- unless @data['REACTION']
108
- @data['REACTION'] = fetch('REACTION').split(/\s+/)
109
- end
110
- @data['REACTION']
111
- end
79
+ # PATHWAY
80
+ def pathways
81
+ lines_fetch('PATHWAY')
82
+ end
112
83
 
113
- # PATHWAY
114
- def pathways
115
- lines_fetch('PATHWAY')
84
+ # ENZYME
85
+ def enzymes
86
+ unless @data['ENZYME']
87
+ field = fetch('ENZYME')
88
+ if /\(/.match(field) # old version
89
+ @data['ENZYME'] = field.scan(/\S+ \(\S+\)/)
90
+ else
91
+ @data['ENZYME'] = field.scan(/\S+/)
116
92
  end
93
+ end
94
+ @data['ENZYME']
95
+ end
117
96
 
118
- # ENZYME
119
- def enzymes
120
- unless @data['ENZYME']
121
- field = fetch('ENZYME')
122
- if /\(/.match(field) # old version
123
- @data['ENZYME'] = field.scan(/\S+ \(\S+\)/)
124
- else
125
- @data['ENZYME'] = field.scan(/\S+/)
126
- end
127
- end
128
- @data['ENZYME']
129
- end
97
+ # ORTHOLOG
98
+ def orthologs
99
+ unless @data['ORTHOLOG']
100
+ @data['ORTHOLOG'] = lines_fetch('ORTHOLOG')
101
+ end
102
+ @data['ORTHOLOG']
103
+ end
130
104
 
131
- # ORTHOLOG
132
- def orthologs
133
- unless @data['ORTHOLOG']
134
- ary = Array.new
135
- lines = lines_fetch('ORTHOLOG')
136
- lines.each do |line|
137
- if /^\S/.match(line)
138
- ary << line
139
- else
140
- ary.last << " #{line.strip}"
141
- end
142
- end
143
- @data['ORTHOLOG'] = ary
144
- end
145
- @data['ORTHOLOG']
146
- end
105
+ # COMMENT
106
+ def comment
107
+ field_fetch('COMMENT')
108
+ end
147
109
 
148
- # REFERENCE
149
- def references
150
- unless @data['REFERENCE']
151
- ary = Array.new
152
- lines = lines_fetch('REFERENCE')
153
- lines.each do |line|
154
- if /^\d+\s+\[PMID/.match(line)
155
- ary << line
156
- else
157
- ary.last << " #{line.strip}"
158
- end
159
- end
160
- @data['REFERENCE'] = ary
161
- end
162
- @data['REFERENCE']
163
- end
110
+ # REMARK
111
+ def remark
112
+ field_fetch('REMARK')
113
+ end
164
114
 
165
- # DBLINKS
166
- def dblinks
167
- unless @data['DBLINKS']
168
- ary = Array.new
169
- lines = lines_fetch('DBLINKS')
170
- lines.each do |line|
171
- if /^\S/.match(line)
172
- ary << line
173
- else
174
- ary.last << " #{line.strip}"
175
- end
176
- end
177
- @data['DBLINKS'] = ary
115
+ # REFERENCE
116
+ def references
117
+ unless @data['REFERENCE']
118
+ ary = Array.new
119
+ lines = lines_fetch('REFERENCE')
120
+ lines.each do |line|
121
+ if /^\d+\s+\[PMID/.match(line)
122
+ ary << line
123
+ else
124
+ ary.last << " #{line.strip}"
178
125
  end
179
- @data['DBLINKS']
180
- end
181
-
182
- # ATOM, BOND
183
- def kcf
184
- return "#{get('NODE')}#{get('EDGE')}"
185
126
  end
127
+ @data['REFERENCE'] = ary
128
+ end
129
+ @data['REFERENCE']
130
+ end
186
131
 
132
+ # DBLINKS
133
+ def dblinks
134
+ unless @data['DBLINKS']
135
+ @data['DBLINKS'] = lines_fetch('DBLINKS')
187
136
  end
137
+ @data['DBLINKS']
138
+ end
188
139
 
140
+ # ATOM, BOND
141
+ def kcf
142
+ return "#{get('NODE')}#{get('EDGE')}"
189
143
  end
190
144
 
191
- end
145
+ end # GLYCAN
146
+
147
+ end # KEGG
148
+ end # Bio
192
149
 
193
150
 
194
151
  if __FILE__ == $0
@@ -1,215 +1,283 @@
1
1
  #
2
- # bio/db/kegg/keggtab.rb - KEGG keggtab class
2
+ # = bio/db/kegg/keggtab.rb - KEGG keggtab class
3
3
  #
4
- # Copyright (C) 2001 Mitsuteru C. Nakao <n@bioruby.org>
5
- # Copyright (C) 2003 KATAYAMA Toshiaki <k@bioruby.org>
4
+ # Copyright:: Copyright (C) 2001 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # Copyright (C) 2003, 2006 Toshiaki Katayama <k@bioruby.org>
6
+ # License:: The Ruby License
6
7
  #
7
- # This library is free software; you can redistribute it and/or
8
- # modify it under the terms of the GNU Lesser General Public
9
- # License as published by the Free Software Foundation; either
10
- # version 2 of the License, or (at your option) any later version.
11
- #
12
- # This library is distributed in the hope that it will be useful,
13
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
- # Lesser General Public License for more details.
16
- #
17
- # You should have received a copy of the GNU Lesser General Public
18
- # License along with this library; if not, write to the Free Software
19
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
- #
21
- # $Id: keggtab.rb,v 1.7 2005/09/26 13:00:07 k Exp $
8
+ # $Id: keggtab.rb,v 1.10 2007/04/05 23:35:41 trevor Exp $
22
9
  #
23
10
 
24
11
  module Bio
25
- class KEGG
12
+ class KEGG
26
13
 
27
- class Keggtab
14
+ # == Description
15
+ #
16
+ # Parse 'keggtab' KEGG database definition file which also includes
17
+ # Taxonomic category of the KEGG organisms.
18
+ #
19
+ # == References
20
+ #
21
+ # The 'keggtab' file is included in
22
+ #
23
+ # * ftp://ftp.genome.jp/pub/kegg/tarfiles/genes.tar.gz
24
+ # * ftp://ftp.genome.jp/pub/kegg/tarfiles/genes.weekly.last.tar.Z
25
+ #
26
+ # == Format
27
+ #
28
+ # File format is something like
29
+ #
30
+ # # KEGGTAB
31
+ # #
32
+ # # name type directory abbreviation
33
+ # #
34
+ # enzyme enzyme $BIOROOT/db/ideas/ligand ec
35
+ # ec alias enzyme
36
+ # (snip)
37
+ # # Human
38
+ # h.sapiens genes $BIOROOT/db/kegg/genes hsa
39
+ # H.sapiens alias h.sapiens
40
+ # hsa alias h.sapiens
41
+ # (snip)
42
+ # #
43
+ # # Taxonomy
44
+ # #
45
+ # (snip)
46
+ # animals alias hsa+mmu+rno+dre+dme+cel
47
+ # eukaryotes alias animals+plants+protists+fungi
48
+ # genes alias eubacteria+archaea+eukaryotes
49
+ #
50
+ class Keggtab
51
+
52
+ # Path for keggtab file and optionally set bioroot top directory.
53
+ # Environmental variable BIOROOT overrides bioroot.
54
+ def initialize(file_path, bioroot = nil)
55
+ @bioroot = ENV['BIOROOT'] || bioroot
56
+ @db_names = Hash.new
57
+ @database = Hash.new
58
+ @taxonomy = Hash.new
59
+ File.open(file_path) do |f|
60
+ parse_keggtab(f.read)
61
+ end
62
+ end
28
63
 
29
- def initialize(file_path, bioroot = nil)
30
- @bioroot = ENV['BIOROOT'] || bioroot
31
- @db_names = Hash.new
32
- @database = Hash.new
33
- @taxonomy = Hash.new
34
- parse_keggtab(File.open(file_path).read)
35
- end
36
- attr_reader :bioroot, :db_names
64
+ # Returns a string of the BIOROOT path prefix.
65
+ attr_reader :bioroot
66
+ attr_reader :db_names
37
67
 
38
68
 
39
- # Bio::KEGG::Keggtab::DB
69
+ # Bio::KEGG::Keggtab::DB
40
70
 
41
- class DB
42
- def initialize(db_name, db_type, db_path, db_abbrev)
43
- @name = db_name
44
- @type = db_type
45
- @path = db_path
46
- @abbrev = db_abbrev
47
- @aliases = Array.new
48
- end
49
- attr_reader :name, :type, :path, :abbrev, :aliases
50
- alias korg abbrev
51
- alias keggorg abbrev
52
- end
71
+ class DB
72
+ # Create a container object for database definitions.
73
+ def initialize(db_name, db_type, db_path, db_abbrev)
74
+ @name = db_name
75
+ @type = db_type
76
+ @path = db_path
77
+ @abbrev = db_abbrev
78
+ @aliases = Array.new
79
+ end
80
+ # Database name. (e.g. 'enzyme', 'h.sapies', 'e.coli', ...)
81
+ attr_reader :name
82
+ # Definition type. (e.g. 'enzyme', 'alias', 'genes', ...)
83
+ attr_reader :type
84
+ # Database flat file path. (e.g. '$BIOROOT/db/kegg/genes', ...)
85
+ attr_reader :path
86
+ # Short name for the database. (e.g. 'ec', 'hsa', 'eco', ...)
87
+ # korg and keggorg are alias for abbrev method.
88
+ attr_reader :abbrev
89
+ # Array containing all alias names for the database.
90
+ # (e.g. ["H.sapiens", "hsa"], ["E.coli", "eco"], ...)
91
+ attr_reader :aliases
92
+
93
+ alias korg abbrev
94
+ alias keggorg abbrev
95
+ end
53
96
 
54
97
 
55
- # DB section
98
+ # DB section
56
99
 
57
- def database(db_abbrev = nil)
58
- if db_abbrev
59
- @database[db_abbrev]
60
- else
61
- @database
62
- end
63
- end
100
+ # Returns a hash containing DB definition section of the keggtab file.
101
+ # If database name is given as an argument, returns a Keggtab::DB object.
102
+ def database(db_abbrev = nil)
103
+ if db_abbrev
104
+ @database[db_abbrev]
105
+ else
106
+ @database
107
+ end
108
+ end
64
109
 
65
- def aliases(db_abbrev)
66
- if @database[db_abbrev]
67
- @database[db_abbrev].aliases
68
- end
69
- end
110
+ # Returns an Array containing all alias names for the database.
111
+ # (e.g. 'hsa' -> ["H.sapiens", "hsa"], 'hpj' -> ["H.pylori_J99", "hpj"])
112
+ def aliases(db_abbrev)
113
+ if @database[db_abbrev]
114
+ @database[db_abbrev].aliases
115
+ end
116
+ end
70
117
 
71
- def name(db_abbrev)
72
- if @database[db_abbrev]
73
- @database[db_abbrev].name
74
- end
75
- end
118
+ # Returns a canonical database name for the abbreviation.
119
+ # (e.g. 'ec' -> 'enzyme', 'hsa' -> 'h.sapies', ...)
120
+ def name(db_abbrev)
121
+ if @database[db_abbrev]
122
+ @database[db_abbrev].name
123
+ end
124
+ end
76
125
 
77
- def path(db_abbrev)
78
- if @database[db_abbrev]
79
- file = @database[db_abbrev].name
80
- if @bioroot
81
- "#{@database[db_abbrev].path.sub(/\$BIOROOT/,@bioroot)}/#{file}"
82
- else
83
- "#{@database[db_abbrev].path}/#{file}"
84
- end
85
- end
126
+ # Returns an absolute path for the flat file database.
127
+ # (e.g. '/bio/db/kegg/genes', ...)
128
+ def path(db_abbrev)
129
+ if @database[db_abbrev]
130
+ file = @database[db_abbrev].name
131
+ if @bioroot
132
+ "#{@database[db_abbrev].path.sub(/\$BIOROOT/,@bioroot)}/#{file}"
133
+ else
134
+ "#{@database[db_abbrev].path}/#{file}"
86
135
  end
136
+ end
137
+ end
87
138
 
88
139
 
89
- def alias_list(db_name)
90
- if @db_names[db_name]
91
- @db_names[db_name].aliases
92
- end
93
- end
94
-
95
- def db_path(db_name)
96
- if @bioroot
97
- "#{@db_names[db_name].path.sub(/\$BIOROOT/,@bioroot)}/#{db_name}"
98
- else
99
- "#{@db_names[db_name].path}/#{db_name}"
100
- end
101
- end
140
+ # deprecated
141
+ def alias_list(db_name)
142
+ if @db_names[db_name]
143
+ @db_names[db_name].aliases
144
+ end
145
+ end
102
146
 
103
- def db_by_abbrev(db_abbrev)
104
- @db_names.each do |k, db|
105
- return db if db.abbrev == db_abbrev
106
- end
107
- return nil
108
- end
147
+ # deprecated
148
+ def db_path(db_name)
149
+ if @bioroot
150
+ "#{@db_names[db_name].path.sub(/\$BIOROOT/,@bioroot)}/#{db_name}"
151
+ else
152
+ "#{@db_names[db_name].path}/#{db_name}"
153
+ end
154
+ end
109
155
 
110
- def name_by_abbrev(db_abbrev)
111
- db_by_abbrev(db_abbrev).name
112
- end
156
+ # deprecated
157
+ def db_by_abbrev(db_abbrev)
158
+ @db_names.each do |k, db|
159
+ return db if db.abbrev == db_abbrev
160
+ end
161
+ return nil
162
+ end
113
163
 
114
- def db_path_by_abbrev(db_abbrev)
115
- db_name = name_by_abbrev(db_abbrev)
116
- db_path(db_name)
117
- end
164
+ # deprecated
165
+ def name_by_abbrev(db_abbrev)
166
+ db_by_abbrev(db_abbrev).name
167
+ end
118
168
 
169
+ # deprecated
170
+ def db_path_by_abbrev(db_abbrev)
171
+ db_name = name_by_abbrev(db_abbrev)
172
+ db_path(db_name)
173
+ end
119
174
 
120
- # Taxonomy section
121
175
 
122
- def taxonomy(node = nil)
123
- if node
124
- @taxonomy[node]
125
- else
126
- @taxonomy
127
- end
128
- end
176
+ # Taxonomy section
129
177
 
130
- def taxa_list
131
- @taxonomy.keys.sort
132
- end
178
+ # Returns a hash containing Taxonomy section of the keggtab file.
179
+ # If argument is given, returns a List of all child nodes belongs
180
+ # to the label node.
181
+ # (e.g. "eukaryotes" -> ["animals", "plants", "protists", "fungi"], ...)
182
+ def taxonomy(node = nil)
183
+ if node
184
+ @taxonomy[node]
185
+ else
186
+ @taxonomy
187
+ end
188
+ end
133
189
 
134
- def child_nodes(node = 'genes')
135
- return @taxonomy[node]
136
- end
190
+ # List of all node labels from Taxonomy section.
191
+ # (e.g. ["actinobacteria", "animals", "archaea", "bacillales", ...)
192
+ def taxa_list
193
+ @taxonomy.keys.sort
194
+ end
137
195
 
138
- def taxo2korgs(node = 'genes')
139
- if node.length == 3
140
- return node
141
- else
142
- if @taxonomy[node]
143
- tmp = Array.new
144
- @taxonomy[node].each do |x|
145
- tmp.push(taxo2korgs(x))
146
- end
147
- return tmp
148
- else
149
- return nil
150
- end
151
- end
152
- end
153
- alias taxo2keggorgs taxo2korgs
154
- alias taxon2korgs taxo2korgs
155
- alias taxon2keggorgs taxo2korgs
196
+ def child_nodes(node = 'genes')
197
+ return @taxonomy[node]
198
+ end
156
199
 
157
- def korg2taxo(keggorg)
200
+ # Returns an array of organism names included in the specified taxon
201
+ # label. (e.g. 'proteobeta' -> ["nme", "nma", "rso"])
202
+ # This method has taxo2keggorgs, taxon2korgs, and taxon2keggorgs aliases.
203
+ def taxo2korgs(node = 'genes')
204
+ if node.length == 3
205
+ return node
206
+ else
207
+ if @taxonomy[node]
158
208
  tmp = Array.new
159
- traverse = Proc.new {|keggorg|
160
- @taxonomy.each do |k,v|
161
- if v.include?(keggorg)
162
- tmp.push(k)
163
- traverse.call(k)
164
- break
165
- end
166
- end
167
- }
168
- traverse.call(keggorg)
209
+ @taxonomy[node].each do |x|
210
+ tmp.push(taxo2korgs(x))
211
+ end
169
212
  return tmp
213
+ else
214
+ return nil
170
215
  end
171
- alias keggorg2taxo korg2taxo
172
- alias korg2taxonomy korg2taxo
173
- alias keggorg2taxonomy korg2taxo
174
-
175
-
176
- private
177
-
178
- def parse_keggtab(keggtab)
179
- in_taxonomy = nil
180
- keggtab.each do |line|
181
- case line
182
- when /^# Taxonomy/ # beginning of the taxonomy section
183
- in_taxonomy = true
184
- when /^#|^$/
185
- next
186
- when /(^\w\S+)\s+(\w+)\s+(\$\S+)\s+(\w+)/ # db
187
- db_name = $1
188
- db_type = $2
189
- db_path = $3
190
- db_abbrev = $4
191
- @db_names[db_name] =
192
- Bio::KEGG::Keggtab::DB.new(db_name, db_type, db_path, db_abbrev)
193
- when /(^\w\S+)\s+alias\s+(\w.+\w)/ # alias
194
- db_alias = $1
195
- db_name = $2#.downcase
196
- if in_taxonomy
197
- @taxonomy.update(db_alias => db_name.split('+'))
198
- elsif @db_names[db_name]
199
- @db_names[db_name].aliases.push(db_alias)
200
- end
201
- end
216
+ end
217
+ end
218
+ alias taxo2keggorgs taxo2korgs
219
+ alias taxon2korgs taxo2korgs
220
+ alias taxon2keggorgs taxo2korgs
221
+
222
+ # Returns an array of taxonomy names the organism belongs.
223
+ # (e.g. 'eco' -> ['proteogamma','proteobacteria','eubacteria','genes'])
224
+ # This method has aliases as keggorg2taxo, korg2taxonomy, keggorg2taxonomy.
225
+ def korg2taxo(keggorg)
226
+ tmp = Array.new
227
+ traverse = Proc.new {|keggorg|
228
+ @taxonomy.each do |k,v|
229
+ if v.include?(keggorg)
230
+ tmp.push(k)
231
+ traverse.call(k)
232
+ break
202
233
  end
203
- # convert keys-by-names hash @db_names to keys-by-abbrev hash @database
204
- @db_names.each do |k,v|
205
- @database[v.abbrev] = v
234
+ end
235
+ }
236
+ traverse.call(keggorg)
237
+ return tmp
238
+ end
239
+ alias keggorg2taxo korg2taxo
240
+ alias korg2taxonomy korg2taxo
241
+ alias keggorg2taxonomy korg2taxo
242
+
243
+
244
+ private
245
+
246
+ def parse_keggtab(keggtab)
247
+ in_taxonomy = nil
248
+ keggtab.each do |line|
249
+ case line
250
+ when /^# Taxonomy/ # beginning of the taxonomy section
251
+ in_taxonomy = true
252
+ when /^#|^$/
253
+ next
254
+ when /(^\w\S+)\s+(\w+)\s+(\$\S+)\s+(\w+)/ # db
255
+ db_name = $1
256
+ db_type = $2
257
+ db_path = $3
258
+ db_abbrev = $4
259
+ @db_names[db_name] =
260
+ Bio::KEGG::Keggtab::DB.new(db_name, db_type, db_path, db_abbrev)
261
+ when /(^\w\S+)\s+alias\s+(\w.+\w)/ # alias
262
+ db_alias = $1
263
+ db_name = $2#.downcase
264
+ if in_taxonomy
265
+ @taxonomy.update(db_alias => db_name.split('+'))
266
+ elsif @db_names[db_name]
267
+ @db_names[db_name].aliases.push(db_alias)
206
268
  end
207
269
  end
208
-
209
270
  end
210
-
271
+ # convert keys-by-names hash @db_names to keys-by-abbrev hash @database
272
+ @db_names.each do |k,v|
273
+ @database[v.abbrev] = v
274
+ end
211
275
  end
212
- end
276
+
277
+ end # Keggtab
278
+
279
+ end # KEGG
280
+ end # Bio
213
281
 
214
282
 
215
283
 
@@ -287,132 +355,3 @@ if __FILE__ == $0
287
355
  end
288
356
 
289
357
 
290
-
291
- =begin
292
-
293
- The keggtab file is included in
294
-
295
- * ((URL:ftp://ftp.genome.jp/pub/kegg/tarfiles/genes.weekly.last.tar.Z>))
296
-
297
- File format is something like
298
-
299
- # KEGGTAB
300
- #
301
- # name type directory abbreviation
302
- #
303
- enzyme enzyme $BIOROOT/db/ideas/ligand ec
304
- ec alias enzyme
305
- (snip)
306
- # Human
307
- h.sapiens genes $BIOROOT/db/kegg/genes hsa
308
- H.sapiens alias h.sapiens
309
- hsa alias h.sapiens
310
- (snip)
311
- #
312
- # Taxonomy
313
- #
314
- (snip)
315
- animals alias hsa+mmu+rno+dre+dme+cel
316
- eukaryotes alias animals+plants+protists+fungi
317
- genes alias eubacteria+archaea+eukaryotes
318
-
319
- = Bio::KEGG::Keggtab
320
-
321
- --- Bio::KEGG::Keggtab.new(file_path, bioroot = nil)
322
-
323
- Path for keggtab file and optionally set bioroot top directory.
324
- Environmental variable BIOROOT overrides bioroot.
325
-
326
- --- Bio::KEGG::Keggtab#database -> Hash
327
-
328
- Returns a hash containing DB definition section of the keggtab file.
329
-
330
- --- Bio::KEGG::Keggtab#database(db_abbrev) -> Keggtab::DB
331
-
332
- Returns a Keggtab::DB object.
333
-
334
- --- Bio::KEGG::Keggtab#taxonomy -> Hash
335
-
336
- Returns a hash containing Taxonomy section of the keggtab file.
337
-
338
- --- Bio::KEGG::Keggtab#taxonomy(node) -> Array
339
-
340
- Returns a List of all child nodes belongs to the label node.
341
- (e.g. "eukaryotes" -> ["animals", "plants", "protists", "fungi"], ...)
342
-
343
- --- Bio::KEGG::Keggtab#bioroot -> String
344
-
345
- Returns a string of the BIOROOT path prefix.
346
-
347
- --- Bio::KEGG::Keggtab#name(db_abbrev) -> String
348
-
349
- Returns a canonical database name for the abbreviation.
350
- (e.g. 'ec' -> 'enzyme', 'hsa' -> 'h.sapies', ...)
351
-
352
- --- Bio::KEGG::Keggtab#aliases(db_abbrev) -> Array
353
-
354
- Returns an Array containing all alias names for the database.
355
- (e.g. 'hsa' -> ["H.sapiens", "hsa"], 'hpj' -> ["H.pylori_J99", "hpj"])
356
-
357
- --- Bio::KEGG::Keggtab#path(db_abbrev) -> String
358
-
359
- Returns an absolute path for the flat file database.
360
- (e.g. '/bio/db/kegg/genes', ...)
361
-
362
- --- Bio::KEGG::Keggtab#taxa_list -> Array
363
-
364
- List of all node labels from Taxonomy section.
365
- (e.g. ["actinobacteria", "animals", "archaea", "bacillales", ...)
366
-
367
- --- Bio::KEGG::Keggtab#taxo2korgs(taxon) -> Array
368
-
369
- Returns an array of organism names included in the specified taxon
370
- label. (e.g. 'proteobeta' -> ["nme", "nma", "rso"])
371
- This method has taxo2keggorgs, taxon2korgs, and taxon2keggorgs aliases.
372
-
373
- --- Bio::KEGG::Keggtab#korg2taxo(keggorg) -> Array
374
-
375
- Returns an array of taxonomy names the organism belongs.
376
- (e.g. 'eco' -> ['proteogamma','proteobacteria','eubacteria','genes'])
377
- This method has aliases as keggorg2taxo, korg2taxonomy, keggorg2taxonomy.
378
-
379
- * following methods are deprecated
380
-
381
- --- Bio::KEGG::Keggtab#db_names[db_name] -> Keggtab::DB
382
- --- Bio::KEGG::Keggtab#db_by_abbrev(db_abbrev) -> Keggtab::DB
383
- --- Bio::KEGG::Keggtab#alias_list(db_name) -> Array
384
- --- Bio::KEGG::Keggtab#name_by_abbrev(db_abbrev) -> String
385
- --- Bio::KEGG::Keggtab#db_path(db_name) -> String
386
- --- Bio::KEGG::Keggtab#db_path_by_abbrev(keggorg) -> String
387
-
388
-
389
- == Bio::KEGG::Keggtab::DB
390
-
391
- --- Bio::KEGG::Keggtab::DB.new(db_name, db_type, db_path, db_abbrev)
392
-
393
- Create a container object for database definitions.
394
-
395
- --- Bio::KEGG::Keggtab::DB#name -> String
396
-
397
- Database name. (e.g. 'enzyme', 'h.sapies', 'e.coli', ...)
398
-
399
- --- Bio::KEGG::Keggtab::DB#type -> String
400
-
401
- Definition type. (e.g. 'enzyme', 'alias', 'genes', ...)
402
-
403
- --- Bio::KEGG::Keggtab::DB#path -> String
404
-
405
- Database flat file path. (e.g. '$BIOROOT/db/kegg/genes', ...)
406
-
407
- --- Bio::KEGG::Keggtab::DB#abbrev -> String
408
-
409
- Short name for the database. (e.g. 'ec', 'hsa', 'eco', ...)
410
- korg and keggorg are alias for abbrev method.
411
-
412
- --- Bio::KEGG::Keggtab::DB#aliases -> Array
413
-
414
- Array containing all alias names for the database.
415
- (e.g. ["H.sapiens", "hsa"], ["E.coli", "eco"], ...)
416
-
417
- =end
418
-