bio 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -5,9 +5,9 @@
5
5
  # Hiroshi Suga <suga@biophys.kyoto-u.ac.jp>,
6
6
  # Copyright:: Copyright (C) 2005
7
7
  # Masashi Fujita <fujita@kuicr.kyoto-u.ac.jp>
8
- # Lisence:: LGPL
8
+ # License:: The Ruby License
9
9
  #
10
- # $Id: report.rb,v 1.10 2006/02/02 17:08:36 nakao Exp $
10
+ # $Id: report.rb,v 1.13 2007/04/05 23:35:40 trevor Exp $
11
11
  #
12
12
  # == Description
13
13
  #
@@ -38,24 +38,6 @@
38
38
  # * HMMER
39
39
  # http://hmmer.wustl.edu/
40
40
  #
41
- #--
42
- #
43
- # This library is free software; you can redistribute it and/or
44
- # modify it under the terms of the GNU Lesser General Public
45
- # License as published by the Free Software Foundation; either
46
- # version 2 of the License, or (at your option) any later version.
47
- #
48
- # This library is distributed in the hope that it will be useful,
49
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
50
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
51
- # Lesser General Public License for more details.
52
- #
53
- # You should have received a copy of the GNU Lesser General Public
54
- # License along with this library; if not, write to the Free Software
55
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
56
- #
57
- #++
58
- #
59
41
 
60
42
  require 'bio/appl/hmmer'
61
43
 
@@ -0,0 +1,374 @@
1
+ #
2
+ # = bio/appl/iprscan/report.rb - a class for iprscan output.
3
+ #
4
+ # Copyright:: Copyright (C) 2006
5
+ # Mitsuteru C. Nakao <mn@kazusa.or.jp>
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id: report.rb,v 1.9 2007/07/18 11:11:57 nakao Exp $
9
+ #
10
+ # == Report classes for the iprscan program.
11
+ #
12
+
13
+
14
+ module Bio
15
+
16
+ class Iprscan
17
+
18
+ # = DESCRIPTION
19
+ # Class for InterProScan report. It is used to parse results and reformat
20
+ # results from (raw|xml|txt) into (html, xml, ebihtml, txt, gff3) format.
21
+ #
22
+ # See ftp://ftp.ebi.ac.uk/pub/software/unix/iprscan/README.html
23
+ #
24
+ # == USAGE
25
+ # # Read a marged.txt and split each entry.
26
+ # Bio::Iprscan::Report.parse_txt(File.read("marged.txt")) do |report|
27
+ # report.query_id
28
+ # report.matches.size
29
+ # report.matches.each do |match|
30
+ # match.ipr_id #=> 'IPR...'
31
+ # match.ipr_description
32
+ # match.method
33
+ # match.accession
34
+ # match.description
35
+ # match.match_start
36
+ # match.match_end
37
+ # match.evalue
38
+ # end
39
+ # # report.to_gff3
40
+ # # report.to_html
41
+ # end
42
+ #
43
+ # Bio::Iprscan::Report.parse_raw(File.read("marged.raw")) do |report|
44
+ # report.class #=> Bio::Iprscan::Report
45
+ # end
46
+ #
47
+ class Report
48
+ # Entry delimiter pattern.
49
+ RS = DELIMITER = "\n\/\/\n"
50
+
51
+ # Qeury sequence name (entry_id).
52
+ attr_accessor :query_id
53
+ alias :entry_id :query_id
54
+
55
+ # Qeury sequence length.
56
+ attr_accessor :query_length
57
+
58
+ # CRC64 checksum of query sequence.
59
+ attr_accessor :crc64
60
+
61
+ # Matched InterPro motifs in Hash. Each InterPro motif have :name,
62
+ # :definition, :accession and :motifs keys. And :motifs key contains
63
+ # motifs in Array. Each motif have :method, :accession, :definition,
64
+ # :score, :location_from and :location_to keys.
65
+ attr_accessor :matches
66
+
67
+ # == USAGE
68
+ # Bio::Iprscan::Report.parse_raw(File.open("merged.raw")) do |report|
69
+ # report
70
+ # end
71
+ #
72
+ def self.parse_raw(io)
73
+ entry = ''
74
+ while line = io.gets
75
+ if entry != '' and entry.split("\t").first == line.split("\t").first
76
+ entry << line
77
+ elsif entry != ''
78
+ yield Bio::Iprscan::Report.parse_raw_entry(entry)
79
+ entry = line
80
+ else
81
+ entry << line
82
+ end
83
+ end
84
+ yield Bio::Iprscan::Report.parse_raw_entry(entry) if entry != ''
85
+ end
86
+
87
+ # Parser method for a raw formated entry. Retruns a Bio::Iprscan::Report
88
+ # object.
89
+ def self.parse_raw_entry(str)
90
+ report = self.new
91
+ str.split(/\n/).each do |line|
92
+ line = line.split("\t")
93
+ report.matches << Match.new(:query_id => line[0],
94
+ :crc64 => line[1],
95
+ :query_length => line[2].to_i,
96
+ :method => line[3],
97
+ :accession => line[4],
98
+ :description => line[5],
99
+ :match_start => line[6].to_i,
100
+ :match_end => line[7].to_i,
101
+ :evalue => line[8],
102
+ :status => line[9],
103
+ :date => line[10])
104
+ if line[11]
105
+ report.matches.last.ipr_id = line[11]
106
+ report.matches.last.ipr_description = line[12]
107
+ end
108
+ report.matches.last.go_terms = line[13].scan(/(\w+ \w+\:.+? \(GO:\d+\))/).flatten if line[13]
109
+ end
110
+ report.query_id = report.matches.first.query_id
111
+ report.query_length = report.matches.first.query_length
112
+ report
113
+ end
114
+
115
+
116
+
117
+ # Parser method for a xml formated entry. Retruns a Bio::Iprscan::Report
118
+ # object.
119
+ # def self.parse_xml(str)
120
+ # end
121
+
122
+ # Splits the entry stream.
123
+ #
124
+ # == Usage
125
+ #
126
+ # Bio::Iprscan::Report.reports_txt(File.open("merged.txt")) do |report|
127
+ # report.class #=> Bio::Iprscan::Report
128
+ # end
129
+ #
130
+ def self.parse_txt(io)
131
+ io.each("\n\nSequence") do |entry|
132
+ if entry =~ /Sequence$/
133
+ entry = entry.sub(/Sequence$/, '')
134
+ end
135
+ unless entry =~ /^Sequence/
136
+ entry = 'Sequence' + entry
137
+ end
138
+ yield self.parse_txt_entry(entry)
139
+ end
140
+ end
141
+
142
+
143
+
144
+ # Parser method for a txt formated entry. Returns a Bio::Iprscan::Report
145
+ # object.
146
+ #
147
+ def self.parse_txt_entry(str)
148
+ unless str =~ /^Sequence /
149
+ raise ArgumentError, "Invalid format: \n\n#{str}"
150
+ end
151
+ header, *matches = str.split(/\n\n/)
152
+ report = self.new
153
+ report.query_id = if header =~ /Sequence \"(.+)\" / then $1 else '' end
154
+ report.query_length = if header =~ /length: (\d+) aa./ then $1.to_i else nil end
155
+ report.crc64 = if header =~ /crc64 checksum: (\S+) / then $1 else nil end
156
+ ipr_line = ''
157
+ go_annotation = ''
158
+ matches.each do |m|
159
+ m = m.split(/\n/).map {|x| x.split(/ +/) }
160
+ m.each do |match|
161
+ case match[0]
162
+ when 'method'
163
+ when /(Molecular Function|Cellular Component|Biological Process):/
164
+ go_annotation = match[0].scan(/([MCB]\w+ \w+): (\S.+?\S) \((GO:\d+)\),*/)
165
+ when 'InterPro'
166
+ ipr_line = match
167
+ else
168
+ pos_scores = match[3].scan(/(\S)\[(\d+)-(\d+)\] (\S+) */)
169
+ pos_scores.each do |pos_score|
170
+ report.matches << Match.new(:ipr_id => ipr_line[1],
171
+ :ipr_description => ipr_line[2],
172
+ :method => match[0],
173
+ :accession => match[1],
174
+ :description => match[2],
175
+ :evalue => pos_score[3],
176
+ :status => pos_score[0],
177
+ :match_start => pos_score[1].to_i,
178
+ :match_end => pos_score[2].to_i,
179
+ :go_terms => go_annotation)
180
+ end
181
+ end
182
+ end
183
+ end
184
+ return report
185
+ end
186
+
187
+
188
+ # Splits entry stream.
189
+ #
190
+ # == Usage
191
+ # Bio::Iprscan::Report.parse_ptxt(File.open("merged.txt")) do |report|
192
+ # report
193
+ # end
194
+ def self.parse_ptxt(io)
195
+ io.each("\n\/\/\n") do |entry|
196
+ yield self.parse_ptxt_entry(entry)
197
+ end
198
+ end
199
+
200
+ # Parser method for a pseudo-txt formated entry. Retruns a Bio::Iprscan::Report
201
+ # object.
202
+ #
203
+ # == Usage
204
+ #
205
+ # File.read("marged.txt").each(Bio::Iprscan::Report::RS) do |e|
206
+ # report = Bio::Iprscan::Report.parse_ptxt_entry(e)
207
+ # end
208
+ #
209
+ def self.parse_ptxt_entry(str)
210
+ report = self.new
211
+ ipr_line = ''
212
+ str.split(/\n/).each do |line|
213
+ line = line.split("\t")
214
+ if line.size == 2
215
+ report.query_id = line[0]
216
+ report.query_length = line[1].to_i
217
+ elsif line.first == '//'
218
+ elsif line.first == 'InterPro'
219
+ ipr_line = line
220
+ else
221
+ startp, endp = line[4].split("-")
222
+ report.matches << Match.new(:ipr_id => ipr_line[1],
223
+ :ipr_description => ipr_line[2],
224
+ :method => line[0],
225
+ :accession => line[1],
226
+ :description => line[2],
227
+ :evalue => line[3],
228
+ :match_start => startp.to_i,
229
+ :match_end => endp.to_i)
230
+ end
231
+ end
232
+ report
233
+ end
234
+
235
+ #
236
+ def initialize
237
+ @query_id = nil
238
+ @query_length = nil
239
+ @crc64 = nil
240
+ @matches = []
241
+ end
242
+
243
+
244
+ # Output interpro matches in the format_type.
245
+ def output(format_type)
246
+ case format_type
247
+ when 'raw', :raw
248
+ format_raw
249
+ else
250
+ raise NameError, "Invalid format_type."
251
+ end
252
+ end
253
+
254
+ # def format_html
255
+ # end
256
+
257
+ # def format_xml
258
+ # end
259
+
260
+ # def format_ebixml
261
+ # end
262
+
263
+ # def format_txt
264
+ # end
265
+
266
+ def format_raw
267
+ @matches.map { |match|
268
+ [self.query_id,
269
+ self.crc64,
270
+ self.query_length,
271
+ match.method_name,
272
+ match.accession,
273
+ match.description,
274
+ match.match_start,
275
+ match.match_end,
276
+ match.evalue,
277
+ match.status,
278
+ match.date,
279
+ match.ipr_id,
280
+ match.ipr_description,
281
+ match.go_terms.map {|x| x[0] + ': ' + x[1] + ' (' + x[2] + ')' }.join(', ')
282
+ ].join("\t")
283
+ }.join("\n")
284
+ end
285
+
286
+ # def format_gff3
287
+ # end
288
+
289
+
290
+ # Returns a Hash (key as an Interpro ID and value as a Match).
291
+ #
292
+ # report.to_hash.each do |ipr_id, matches|
293
+ # matches.each do |match|
294
+ # report.matches.ipr_id == ipr_id #=> true
295
+ # end
296
+ # end
297
+ #
298
+ def to_hash
299
+ unless @ipr_ids
300
+ @ipr_ids = {}
301
+ @matches.each_with_index do |match, i|
302
+ @ipr_ids[match.ipr_id] ||= []
303
+ @ipr_ids[match.ipr_id] << match
304
+ end
305
+ return @ipr_ids
306
+ else
307
+ return @ipr_ids
308
+ end
309
+ end
310
+
311
+
312
+
313
+ # == Description
314
+ # Container class for InterProScan matches.
315
+ #
316
+ # == Usage
317
+ # match = Match.new(:query_id => ...)
318
+ #
319
+ # match.ipr_id = 'IPR001234'
320
+ # match.ipr_id #=> 'IPR001234'
321
+ #
322
+ class Match
323
+ def initialize(hash)
324
+ @data = Hash.new
325
+ hash.each do |key, value|
326
+ @data[key.to_sym] = value
327
+ end
328
+ end
329
+
330
+ # Date for computation.
331
+ def date; @data[:date]; end
332
+ # CRC64 checksum of query sequence.
333
+ def crc64; @data[:crc64]; end
334
+ # E-value of the match
335
+ def evalue; @data[:evalue]; end
336
+ # Status of the match (T for true / M for marginal).
337
+ def status; @data[:status]; end
338
+ # the corresponding InterPro entry (if any).
339
+ def ipr_id; @data[:ipr_id]; end
340
+ # the length of the sequence in AA.
341
+ def length; @data[:length]; end
342
+ # the analysis method launched.
343
+ def method_name; @data[:method]; end
344
+ # the Gene Ontology description for the InterPro entry, in "Aspect :term (ID)" format.
345
+ def go_terms; @data[:go_terms]; end
346
+ # Id of the input sequence.
347
+ def query_id; @data[:query_id]; end
348
+ # the end of the domain match.
349
+ def match_end; @data[:match_end]; end
350
+ # the database members entry for this match.
351
+ def accession; @data[:accession]; end
352
+ # the database mambers description for this match.
353
+ def description; @data[:description]; end
354
+ # the start of the domain match.
355
+ def match_start; @data[:match_start]; end
356
+ # the descriotion of the InterPro entry.
357
+ def ipr_odescription; @data[:ipr_description]; end
358
+
359
+ def method_missing(name, arg = nil)
360
+ if arg
361
+ name = name.to_s.sub(/=$/, '')
362
+ @data[name.to_sym] = arg
363
+ else
364
+ @data[name.to_sym]
365
+ end
366
+ end
367
+
368
+ end # class Match
369
+
370
+ end # class Report
371
+
372
+ end # class Iprscan
373
+
374
+ end # module Bio
@@ -2,25 +2,9 @@
2
2
  # = bio/appl/mafft.rb - MAFFT wrapper class
3
3
  #
4
4
  # Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
5
- # License:: LGPL
5
+ # License:: The Ruby License
6
6
  #
7
- #--
8
- # This library is free software; you can redistribute it and/or
9
- # modify it under the terms of the GNU Lesser General Public
10
- # License as published by the Free Software Foundation; either
11
- # version 2 of the License, or (at your option) any later version.
12
- #
13
- # This library is distributed in the hope that it will be useful,
14
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
- # Lesser General Public License for more details.
17
- #
18
- # You should have received a copy of the GNU Lesser General Public
19
- # License along with this library; if not, write to the Free Software
20
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
- #++
22
- #
23
- # $Id: mafft.rb,v 1.9 2005/12/18 15:58:40 k Exp $
7
+ # $Id: mafft.rb,v 1.18 2007/07/16 12:27:29 ngoto Exp $
24
8
  #
25
9
  # Bio::MAFFT is a wrapper class to execute MAFFT.
26
10
  # MAFFT is a very fast multiple sequence alignment software.
@@ -39,14 +23,13 @@
39
23
  # * http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft/
40
24
  #
41
25
 
26
+ require 'tempfile'
27
+
28
+ require 'bio/command'
29
+
42
30
  require 'bio/db/fasta'
43
31
  require 'bio/io/flatfile'
44
32
 
45
- #--
46
- # We use Open3.popen3, because MAFFT on win32 requires Cygwin.
47
- #++
48
- require 'open3'
49
-
50
33
  module Bio
51
34
 
52
35
  # Bio::MAFFT is a wrapper class to execute MAFFT.
@@ -122,62 +105,116 @@ module Bio
122
105
  # Creates a new alignment factory.
123
106
  # +program+ is the name of the program.
124
107
  # +opt+ is options of the program.
125
- def initialize(program, option)
108
+ def initialize(program = 'mafft', opt = [])
126
109
  @program = program
127
- @option = option
110
+ @options = opt
128
111
  @command = nil
129
112
  @output = nil
130
113
  @report = nil
131
- @log = nil
114
+ @data_stdout = nil
115
+ @exit_status = nil
132
116
  end
133
117
 
134
- # program name
118
+ # program name (usually 'mafft' in UNIX)
135
119
  attr_accessor :program
136
120
 
137
121
  # options
138
- attr_accessor :option
122
+ attr_accessor :options
123
+
124
+ # option is deprecated. Instead, please use options.
125
+ def option
126
+ warn "Bio::MAFFT#option is deprecated. Please use options."
127
+ options
128
+ end
139
129
 
140
130
  # Shows last command-line string. Returns nil or an array of String.
141
131
  # Note that filenames described in the command-line may already
142
132
  # be removed because they are temporary files.
143
133
  attr_reader :command
144
134
 
135
+ #---
145
136
  # last message to STDERR when executing the program.
146
- attr_reader :log
137
+ #attr_reader :log
138
+ #+++
139
+
140
+ #log is deprecated (no replacement) and returns empty string.
141
+ def log
142
+ warn "Bio::MAFFT#log is deprecated (no replacement) and returns empty string."
143
+ ''
144
+ end
147
145
 
148
146
  # Shows latest raw alignment result.
149
- # Since a result of MAFFT is simply a multiple-fasta format,
150
- # it returns an array of Bio::FastaFormat instances
151
- # instead of raw string.
147
+ # Return a string. (Changed in bioruby-1.1.0).
148
+ # Compatibility note:
149
+ # If you want an array of Bio::FastaFormat instances,
150
+ # you should use report.data instead.
152
151
  attr_reader :output
153
152
 
154
153
  # Shows last alignment result (instance of Bio::MAFFT::Report class)
155
154
  # performed by the factory.
156
155
  attr_reader :report
157
156
 
157
+ # Last exit status
158
+ attr_reader :exit_status
159
+
160
+ # Last output to the stdout.
161
+ attr_accessor :data_stdout
162
+
163
+ # Clear the internal data and status, except program and options.
164
+ def reset
165
+ @command = nil
166
+ @output = nil
167
+ @report = nil
168
+ @exit_status = nil
169
+ @data_stdout = nil
170
+ end
171
+
158
172
  # Executes the program.
159
173
  # If +seqs+ is not nil, perform alignment for seqs.
160
174
  # If +seqs+ is nil, simply executes the program.
175
+ #
176
+ # Compatibility note: When seqs is nil,
177
+ # returns true if the program exits normally, and
178
+ # returns false if the program exits abnormally.
161
179
  def query(seqs)
162
180
  if seqs then
163
181
  query_align(seqs)
164
182
  else
165
- exec_local(@option)
183
+ exec_local(@options)
184
+ @exit_status.exitstatus == 0 ? true : false
166
185
  end
167
186
  end
168
187
 
188
+ # Note that this method will be renamed to query_alignment.
189
+ #
169
190
  # Performs alignment for seqs.
170
191
  # +seqs+ should be Bio::Alignment or Array of sequences or nil.
192
+ #
193
+ # Compatibility Note: arg is deprecated and ignored.
171
194
  def query_align(seqs, *arg)
195
+ if arg.size > 0 then
196
+ warn '2nd and other arguments of Bio::MAFFT#query_align is ignored'
197
+ end
172
198
  unless seqs.is_a?(Bio::Alignment)
173
- seqs = Bio::Alignment.new(seqs, *arg)
199
+ seqs = Bio::Alignment.new(seqs)
174
200
  end
175
- query_string(seqs.to_fasta(70))
201
+ query_string(seqs.output_fasta(:width => 70))
202
+ end
203
+
204
+ # Performs alignment for seqs.
205
+ # +seqs+ should be Bio::Alignment or Array of sequences or nil.
206
+ def query_alignment(seqs)
207
+ query_align(seqs)
176
208
  end
177
209
 
178
210
  # Performs alignment for +str+.
179
211
  # Str should be a string that can be recognized by the program.
212
+ #
213
+ # Compatibility Note: arg is deprecated and ignored.
180
214
  def query_string(str, *arg)
215
+ if arg.size > 0 then
216
+ warn '2nd and other arguments of Bio::MAFFT#query_string is ignored'
217
+ end
181
218
  begin
182
219
  tf_in = Tempfile.open('align')
183
220
  tf_in.print str
@@ -190,10 +227,15 @@ module Bio
190
227
  end
191
228
 
192
229
  # Performs alignment of sequences in the file named +fn+.
193
- def query_by_filename(fn, seqtype = nil)
194
- opt = @option + [ fn ]
230
+ #
231
+ # Compatibility Note: 2nd argument (seqtype) is deprecated and ignored.
232
+ def query_by_filename(fn, *arg)
233
+ if arg.size > 0 then
234
+ warn '2nd argument of Bio::MAFFT#query_filename is ignored'
235
+ end
236
+ opt = @options + [ fn ]
195
237
  exec_local(opt)
196
- @report = Report.new(@output, seqtype)
238
+ @report = Report.new(@output)
197
239
  @report
198
240
  end
199
241
 
@@ -202,19 +244,14 @@ module Bio
202
244
  def exec_local(opt)
203
245
  @command = [ @program, *opt ]
204
246
  #STDERR.print "DEBUG: ", @command.join(" "), "\n"
205
- @output = nil
206
- @log = nil
207
- Open3.popen3(*@command) do |din, dout, derr|
208
- din.close
209
- derr.sync = true
210
- t = Thread.start do
211
- @log = derr.read
212
- end
213
- ff = Bio::FlatFile.new(Bio::FastaFormat, dout)
214
- @output = ff.to_a
215
- t.join
247
+ @data_stdout = nil
248
+ @exit_status = nil
249
+ Bio::Command.call_command(@command) do |io|
250
+ io.close_write
251
+ @data_stdout = io.read
216
252
  end
217
- @log
253
+ @output = @data_stdout
254
+ @exit_status = $?
218
255
  end
219
256
 
220
257
  end #class MAFFT