bio 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -1,86 +1,71 @@
1
1
  #
2
- # bio/db/kegg/reaction.rb - KEGG REACTION database class
2
+ # = bio/db/kegg/reaction.rb - KEGG REACTION database class
3
3
  #
4
- # Copyright (C) 2004 KATAYAMA Toshiaki <k@bioruby.org>
4
+ # Copyright:: Copyright (C) 2004 Toshiaki Katayama <k@bioruby.org>
5
+ # License:: The Ruby License
5
6
  #
6
- # This library is free software; you can redistribute it and/or
7
- # modify it under the terms of the GNU Lesser General Public
8
- # License as published by the Free Software Foundation; either
9
- # version 2 of the License, or (at your option) any later version.
10
- #
11
- # This library is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
- # Lesser General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU Lesser General Public
17
- # License along with this library; if not, write to the Free Software
18
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
- #
20
- # $Id: reaction.rb,v 1.3 2005/09/08 01:22:11 k Exp $
7
+ # $Id: reaction.rb,v 1.6 2007/06/28 11:27:24 k Exp $
21
8
  #
22
9
 
23
10
  require 'bio/db'
24
11
 
25
12
  module Bio
13
+ class KEGG
14
+
15
+ class REACTION < KEGGDB
16
+
17
+ DELIMITER = RS = "\n///\n"
18
+ TAGSIZE = 12
19
+
20
+ def initialize(entry)
21
+ super(entry, TAGSIZE)
22
+ end
23
+
24
+ # ENTRY
25
+ def entry_id
26
+ field_fetch('ENTRY')[/\S+/]
27
+ end
28
+
29
+ # NAME
30
+ def name
31
+ field_fetch('NAME')
32
+ end
26
33
 
27
- class KEGG
28
-
29
- class REACTION < KEGGDB
30
-
31
- DELIMITER = RS = "\n///\n"
32
- TAGSIZE = 12
33
-
34
- def initialize(entry)
35
- super(entry, TAGSIZE)
36
- end
37
-
38
- # ENTRY
39
- def entry_id
40
- field_fetch('ENTRY')
41
- end
42
-
43
- # NAME
44
- def name
45
- field_fetch('NAME')
46
- end
47
-
48
- # DEFINITION
49
- def definition
50
- field_fetch('DEFINITION')
51
- end
52
-
53
- # EQUATION
54
- def equation
55
- field_fetch('EQUATION')
56
- end
57
-
58
- # RPAIR
59
- def rpairs
60
- unless @data['RPAIR']
61
- @data['RPAIR'] = fetch('RPAIR').split(/\s+/)
62
- end
63
- @data['RPAIR']
64
- end
65
-
66
- # PATHWAY
67
- def pathways
68
- lines_fetch('PATHWAY')
69
- end
70
-
71
- # ENZYME
72
- def enzymes
73
- unless @data['ENZYME']
74
- @data['ENZYME'] = fetch('ENZYME').scan(/\S+/)
75
- end
76
- @data['ENZYME']
77
- end
34
+ # DEFINITION
35
+ def definition
36
+ field_fetch('DEFINITION')
37
+ end
78
38
 
39
+ # EQUATION
40
+ def equation
41
+ field_fetch('EQUATION')
42
+ end
43
+
44
+ # RPAIR
45
+ def rpairs
46
+ unless @data['RPAIR']
47
+ @data['RPAIR'] = fetch('RPAIR').split(/\s+/)
79
48
  end
49
+ @data['RPAIR']
50
+ end
80
51
 
52
+ # PATHWAY
53
+ def pathways
54
+ lines_fetch('PATHWAY')
81
55
  end
82
56
 
83
- end
57
+ # ENZYME
58
+ def enzymes
59
+ unless @data['ENZYME']
60
+ @data['ENZYME'] = fetch('ENZYME').scan(/\S+/)
61
+ end
62
+ @data['ENZYME']
63
+ end
64
+
65
+ end # REACTION
66
+
67
+ end # KEGG
68
+ end # Bio
84
69
 
85
70
 
86
71
  if __FILE__ == $0
@@ -0,0 +1,331 @@
1
+ #
2
+ # = bio/db/kegg/taxonomy.rb - KEGG taxonomy parser class
3
+ #
4
+ # Copyright:: Copyright (C) 2007 Toshiaki Katayama <k@bioruby.org>
5
+ # License:: The Ruby License
6
+ #
7
+ # $Id: taxonomy.rb,v 1.2 2007/07/09 10:29:16 k Exp $
8
+ #
9
+
10
+ module Bio
11
+ class KEGG
12
+
13
+ # == Description
14
+ #
15
+ # Parse the KEGG 'taxonomy' file which describes taxonomic classification
16
+ # of organisms.
17
+ #
18
+ # == References
19
+ #
20
+ # The KEGG 'taxonomy' file is available at
21
+ #
22
+ # * ftp://ftp.genome.jp/pub/kegg/genes/taxonomy
23
+ #
24
+ class Taxonomy
25
+
26
+ def initialize(filename, orgs = [])
27
+ # Stores the taxonomic tree as a linked list (implemented in Hash), so
28
+ # every node need to have unique name (key) to work correctly
29
+ @tree = Hash.new
30
+
31
+ # Also stores the taxonomic tree as a list of arrays (full path)
32
+ @path = Array.new
33
+
34
+ # Also stores all leaf nodes (organism codes) of every intermediate nodes
35
+ @leaves = Hash.new
36
+
37
+ # tentative name for the root node (use accessor to change)
38
+ @root = 'Genes'
39
+
40
+ hier = Array.new
41
+ level = 0
42
+ label = nil
43
+
44
+ File.open(filename).each do |line|
45
+ next if line.strip.empty?
46
+
47
+ # line for taxonomic hierarchy (indent according to the number of # marks)
48
+ if line[/^#/]
49
+ level = line[/^#+/].length
50
+ label = line[/[A-z].*/]
51
+ hier[level] = sanitize(label)
52
+
53
+ # line for organims name (unify different strains of a species)
54
+ else
55
+ tax, org, name, desc = line.chomp.split("\t")
56
+ if orgs.nil? or orgs.empty? or orgs.include?(org)
57
+ species, strain, = name.split('_')
58
+ # (0) Grouping of the strains of the same species.
59
+ # If the name of species is the same as the previous line,
60
+ # add the species to the same species group.
61
+ # ex. Gamma/enterobacteria has a large number of organisms,
62
+ # so sub grouping of strains is needed for E.coli strains etc.
63
+ #
64
+ # However, if the species name is already used, need to avoid
65
+ # collision of species name as the current implementation stores
66
+ # the tree as a Hash, which may cause the infinite loop.
67
+ #
68
+ # (1) If species name == the intermediate node of other lineage
69
+ # Add '_sp' to the species name to avoid the conflict (1-1), and if
70
+ # 'species_sp' is already taken, use 'species_strain' instead (1-2).
71
+ # ex. Bacteria/Proteobacteria/Beta/T.denitrificans/tbd
72
+ # Bacteria/Proteobacteria/Epsilon/T.denitrificans_ATCC33889/tdn
73
+ # -> Bacteria/Proteobacteria/Beta/T.denitrificans/tbd
74
+ # Bacteria/Proteobacteria/Epsilon/T.denitrificans_sp/tdn
75
+ #
76
+ # (2) If species name == the intermediate node of the same lineage
77
+ # Add '_sp' to the species name to avoid the conflict.
78
+ # ex. Bacteria/Cyanobacgteria/Cyanobacteria_CYA/cya
79
+ # Bacteria/Cyanobacgteria/Cyanobacteria_CYB/cya
80
+ # Bacteria/Proteobacteria/Magnetococcus/Magnetococcus_MC1/mgm
81
+ # -> Bacteria/Cyanobacgteria/Cyanobacteria_sp/cya
82
+ # Bacteria/Cyanobacgteria/Cyanobacteria_sp/cya
83
+ # Bacteria/Proteobacteria/Magnetococcus/Magnetococcus_sp/mgm
84
+ sp_group = "#{species}_sp"
85
+ if @tree[species]
86
+ if hier[level+1] == species
87
+ # case (0)
88
+ else
89
+ # case (1-1)
90
+ species = sp_group
91
+ # case (1-2)
92
+ if @tree[sp_group] and hier[level+1] != species
93
+ species = name
94
+ end
95
+ end
96
+ else
97
+ if hier[level] == species
98
+ # case (2)
99
+ species = sp_group
100
+ end
101
+ end
102
+ # 'hier' is an array of the taxonomic tree + species and strain name.
103
+ # ex. [nil, Eukaryotes, Fungi, Ascomycetes, Saccharomycetes] +
104
+ # [S_cerevisiae, sce]
105
+ hier[level+1] = species # sanitize(species)
106
+ hier[level+2] = org
107
+ ary = hier[1, level+2]
108
+ warn ary.inspect if $DEBUG
109
+ add_to_tree(ary)
110
+ add_to_leaves(ary)
111
+ add_to_path(ary)
112
+ end
113
+ end
114
+ end
115
+ return tree
116
+ end
117
+
118
+ attr_reader :tree
119
+ attr_reader :path
120
+ attr_reader :leaves
121
+ attr_accessor :root
122
+
123
+ def organisms(group)
124
+ @leaves[group]
125
+ end
126
+
127
+ # Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
128
+ # and every intermediate nodes stores their child nodes as a Hash.
129
+ def add_to_tree(ary)
130
+ parent = @root
131
+ ary.each do |node|
132
+ @tree[parent] ||= Hash.new
133
+ @tree[parent][node] = nil
134
+ parent = node
135
+ end
136
+ end
137
+
138
+ # Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
139
+ # and stores leaf nodes to the every intermediate nodes as an Array.
140
+ def add_to_leaves(ary)
141
+ leaf = ary.last
142
+ ary.each do |node|
143
+ @leaves[node] ||= Array.new
144
+ @leaves[node] << leaf
145
+ end
146
+ end
147
+
148
+ # Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
149
+ # and stores the path itself in an Array.
150
+ def add_to_path(ary)
151
+ @path << ary
152
+ end
153
+
154
+ # Compaction of intermediate nodes of the resulted taxonomic tree.
155
+ # - If child node has only one child node (grandchild), make the child of
156
+ # grandchild as a grandchild.
157
+ # ex.
158
+ # Plants / Monocotyledons / grass family / osa
159
+ # --> Plants / Monocotyledons / osa
160
+ #
161
+ def compact(node = root)
162
+ # if the node has children
163
+ if subnodes = @tree[node]
164
+ # obtain grandchildren for each child
165
+ subnodes.keys.each do |subnode|
166
+ if subsubnodes = @tree[subnode]
167
+ # if the number of grandchild node is 1
168
+ if subsubnodes.keys.size == 1
169
+ # obtain the name of the grandchild node
170
+ subsubnode = subsubnodes.keys.first
171
+ # obtain the child of the grandchlid node
172
+ if subsubsubnodes = @tree[subsubnode]
173
+ # make the child of grandchild node as a chlid of child node
174
+ @tree[subnode] = subsubsubnodes
175
+ # delete grandchild node
176
+ @tree[subnode].delete(subsubnode)
177
+ warn "--- compact: #{subsubnode} is replaced by #{subsubsubnodes}" if $DEBUG
178
+ # retry until new grandchild also needed to be compacted.
179
+ retry
180
+ end
181
+ end
182
+ end
183
+ # repeat recurseively
184
+ compact(subnode)
185
+ end
186
+ end
187
+ end
188
+
189
+ # Reduction of the leaf node of the resulted taxonomic tree.
190
+ # - If the parent node have only one leaf node, replace parent node
191
+ # with the leaf node.
192
+ # ex.
193
+ # Plants / Monocotyledons / osa
194
+ # --> Plants / osa
195
+ #
196
+ def reduce(node = root)
197
+ # if the node has children
198
+ if subnodes = @tree[node]
199
+ # obtain grandchildren for each child
200
+ subnodes.keys.each do |subnode|
201
+ if subsubnodes = @tree[subnode]
202
+ # if the number of grandchild node is 1
203
+ if subsubnodes.keys.size == 1
204
+ # obtain the name of the grandchild node
205
+ subsubnode = subsubnodes.keys.first
206
+ # if the grandchild node is a leaf node
207
+ unless @tree[subsubnode]
208
+ # make the grandchild node as a child node
209
+ @tree[node].update(subsubnodes)
210
+ # delete child node
211
+ @tree[node].delete(subnode)
212
+ warn "--- reduce: #{subnode} is replaced by #{subsubnode}" if $DEBUG
213
+ end
214
+ end
215
+ end
216
+ # repeat recursively
217
+ reduce(subnode)
218
+ end
219
+ end
220
+ end
221
+
222
+ # Traverse the taxonomic tree by the depth first search method
223
+ # under the given (root or intermediate) node.
224
+ def dfs(parent, &block)
225
+ if children = @tree[parent]
226
+ yield parent, children
227
+ children.keys.each do |child|
228
+ dfs(child, &block)
229
+ end
230
+ end
231
+ end
232
+
233
+ # Similar to the dfs method but also passes the current level of the nest
234
+ # to the iterator.
235
+ def dfs_with_level(parent, &block)
236
+ @level ||= 0
237
+ if children = @tree[parent]
238
+ yield parent, children, @level
239
+ @level += 1
240
+ children.keys.each do |child|
241
+ dfs_with_level(child, &block)
242
+ end
243
+ @level -= 1
244
+ end
245
+ end
246
+
247
+ # Convert the taxonomic tree structure to a simple ascii art.
248
+ def to_s
249
+ result = "#{@root}\n"
250
+ @tree[@root].keys.each do |node|
251
+ result += ascii_tree(node, " ")
252
+ end
253
+ return result
254
+ end
255
+
256
+ private
257
+
258
+ # Helper method for the to_s method.
259
+ def ascii_tree(node, indent)
260
+ result = "#{indent}+- #{node}\n"
261
+ indent += " "
262
+ @tree[node].keys.each do |child|
263
+ if @tree[child]
264
+ result += ascii_tree(child, indent)
265
+ else
266
+ result += "#{indent}+- #{child}\n"
267
+ end
268
+ end
269
+ return result
270
+ end
271
+
272
+ def sanitize(str)
273
+ str.gsub(/[^A-z0-9]/, '_')
274
+ end
275
+
276
+ end # Taxonomy
277
+
278
+ end # KEGG
279
+ end # Bio
280
+
281
+
282
+
283
+ if __FILE__ == $0
284
+
285
+ # Usage:
286
+ # % wget ftp://ftp.genome.jp/pub/kegg/genes/taxonomy
287
+ # % ruby taxonomy.rb taxonomy | less -S
288
+
289
+ taxonomy = ARGV.shift
290
+ org_list = ARGV.shift || nil
291
+
292
+ if org_list
293
+ orgs = File.readlines(org_list).map{|x| x.strip}
294
+ else
295
+ orgs = nil
296
+ end
297
+
298
+ tree = Bio::KEGG::Taxonomy.new(taxonomy, orgs)
299
+
300
+ puts ">>> tree - original"
301
+ puts tree
302
+
303
+ puts ">>> tree - after compact"
304
+ tree.compact
305
+ puts tree
306
+
307
+ puts ">>> tree - after reduce"
308
+ tree.reduce
309
+ puts tree
310
+
311
+ puts ">>> path - sorted"
312
+ tree.path.sort.each do |path|
313
+ puts path.join("/")
314
+ end
315
+
316
+ puts ">>> group : orgs"
317
+ tree.dfs(tree.root) do |parent, children|
318
+ if orgs = tree.organisms(parent)
319
+ puts "#{parent.ljust(30)} (#{orgs.size})\t#{orgs.join(', ')}"
320
+ end
321
+ end
322
+
323
+ puts ">>> group : subgroups"
324
+ tree.dfs_with_level(tree.root) do |parent, children, level|
325
+ subgroups = children.keys.sort
326
+ indent = " " * level
327
+ label = "#{indent} #{level} #{parent}"
328
+ puts "#{label.ljust(35)}\t#{subgroups.join(', ')}"
329
+ end
330
+
331
+ end