bio 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -1,26 +1,10 @@
1
1
  #
2
2
  # = bio/appl/mafft/report.rb - MAFFT report class
3
3
  #
4
- # Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
5
- # License:: LGPL
4
+ # Copyright:: Copyright (C) 2003, 2007 Naohisa Goto <ng@bioruby.org>
5
+ # License:: The Ruby License
6
6
  #
7
- #--
8
- # This library is free software; you can redistribute it and/or
9
- # modify it under the terms of the GNU Lesser General Public
10
- # License as published by the Free Software Foundation; either
11
- # version 2 of the License, or (at your option) any later version.
12
- #
13
- # This library is distributed in the hope that it will be useful,
14
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
- # Lesser General Public License for more details.
17
- #
18
- # You should have received a copy of the GNU Lesser General Public
19
- # License along with this library; if not, write to the Free Software
20
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
- #++
22
- #
23
- # $Id: report.rb,v 1.8 2005/12/18 15:58:40 k Exp $
7
+ # $Id: report.rb,v 1.13 2007/07/16 12:21:39 ngoto Exp $
24
8
  #
25
9
  # MAFFT result parser class.
26
10
  # MAFFT is a very fast multiple sequence alignment software.
@@ -29,6 +13,10 @@
29
13
  # the significance of this class is to keep standard form and
30
14
  # interface between Bio::ClustalW::Report.
31
15
  #
16
+ # Bio::Alignment::MultiFastaFormat is a generic data class for
17
+ # fasta-formatted multiple sequence alignment data.
18
+ # Bio::MAFFT::Report inherits Bio::Alignment::MultiFastaFormat.
19
+ #
32
20
  # == References
33
21
  #
34
22
  # * K. Katoh, K. Misawa, K. Kuma and T. Miyata.
@@ -38,11 +26,97 @@
38
26
  # * http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft/
39
27
  #
40
28
 
29
+ require 'stringio'
41
30
  require 'bio/db/fasta'
42
31
  require 'bio/io/flatfile'
32
+ require 'bio/alignment'
43
33
  require 'bio/appl/mafft'
44
34
 
45
35
  module Bio
36
+ module Alignment
37
+ # Data class for fasta-formatted multiple sequence alignment data,
38
+ # which is simply multiple entiries of fasta formatted sequences.
39
+ class MultiFastaFormat
40
+
41
+ # delimiter for flatfile
42
+ DELIMITER = RS = nil
43
+
44
+ # Creates a new data object.
45
+ # +str+ should be a (multi-)fasta formatted string.
46
+ def initialize(str)
47
+ ff = Bio::FlatFile.new(Bio::FastaFormat, StringIO.new(str))
48
+ @data = ff.to_a
49
+ @alignment = nil
50
+ @seq_method = nil
51
+ end
52
+
53
+ # Gets an multiple alignment.
54
+ # Returns a Bio::Alignment object.
55
+ # +method+ should be one of :naseq, :aaseq, :seq, or nil (default).
56
+ # nil means to automatically determine nucleotide or amino acid.
57
+ #
58
+ # This method returns previously parsed object
59
+ # if the same method is given (or guessed method is the same).
60
+ def alignment(method = nil)
61
+ m = determine_seq_method(@data, method)
62
+ if !@alignment or m != @seq_method then
63
+ @seq_method = m
64
+ @alignment = do_parse(@data, @seq_method)
65
+ end
66
+ @alignment
67
+ end
68
+
69
+ # Gets an array of the fasta formatted sequence objects.
70
+ # Returns an array of Bio::FastaFormat objects.
71
+ def entries
72
+ @data
73
+ end
74
+
75
+ private
76
+ # determines seqtype.
77
+ # if nil is given, try to guess DNA or protein.
78
+ def determine_seq_method(data, m = nil)
79
+ case m
80
+ when :aaseq
81
+ :aaseq
82
+ when :naseq
83
+ :naseq
84
+ when :seq
85
+ :seq
86
+ when nil
87
+ # auto-detection
88
+ score = 0
89
+ data[0, 3].each do |e|
90
+ k = e.to_seq.guess
91
+ if k == Bio::Sequence::NA then
92
+ score += 1
93
+ elsif k == Bio::Sequence::AA then
94
+ score -= 1
95
+ end
96
+ end
97
+ if score > 0 then
98
+ :naseq
99
+ elsif score < 0 then
100
+ :aaseq
101
+ else
102
+ :seq
103
+ end
104
+ else
105
+ raise 'one of :naseq, :aaseq, :seq, or nil should be given'
106
+ end
107
+ end
108
+
109
+ # Parses a result.
110
+ def do_parse(ary, seqmethod)
111
+ a = Bio::Alignment.new
112
+ a.add_sequences(ary) do |x|
113
+ [ x.__send__(seqmethod), x.definition ]
114
+ end
115
+ a
116
+ end
117
+ end #class MultiFastaFormat
118
+ end #module Alignment
119
+
46
120
  class MAFFT
47
121
 
48
122
  # MAFFT result parser class.
@@ -51,26 +125,41 @@ module Bio
51
125
  # Since a result of MAFFT is simply a multiple-fasta format,
52
126
  # the significance of this class is to keep standard form and
53
127
  # interface between Bio::ClustalW::Report.
54
- class Report
128
+ class Report < Bio::Alignment::MultiFastaFormat
55
129
 
56
130
  # Creates a new Report object.
57
- # +ary+ should be an Array of Bio::FastaFormat.
58
- # +seqclass+ should on of following:
131
+ # +str+ should be multi-fasta formatted text as a string.
132
+ #
133
+ # Compatibility Note: the old usage (to get array of Bio::FastaFormat
134
+ # objects) is deprecated.
135
+ #
136
+ # Compatibility Note 2: the argument +seqclass+ is deprecated.
137
+ #
138
+ # +seqclass+ should be one of following:
59
139
  # Class: Bio::Sequence::AA, Bio::Sequence::NA, ...
60
140
  # String: 'PROTEIN', 'DNA', ...
61
- def initialize(ary, seqclass = nil)
62
- @data = ary
63
- @align = nil
64
- case seqclass
65
- when /PROTEIN/i
66
- @seqclass = Bio::Sequence::AA
67
- when /[DR]NA/i
68
- @seqclass = Bio::Sequence::NA
141
+ #
142
+ def initialize(str, seqclass = nil)
143
+ if str.is_a?(Array) then
144
+ warn "Array of Bio::FastaFormat objects will be no longer accepted."
145
+ @data = str
69
146
  else
70
- if seqclass.is_a?(Module) then
71
- @seqclass = seqclass
147
+ super(str)
148
+ end
149
+
150
+ if seqclass then
151
+ warn "the 2nd argument (seqclass) will be no deprecated."
152
+ case seqclass
153
+ when /PROTEIN/i
154
+ @seqclass = Bio::Sequence::AA
155
+ when /[DR]NA/i
156
+ @seqclass = Bio::Sequence::NA
72
157
  else
73
- @seqclass = Bio::Sequence
158
+ if seqclass.is_a?(Module) then
159
+ @seqclass = seqclass
160
+ else
161
+ @seqclass = nil
162
+ end
74
163
  end
75
164
  end
76
165
  end
@@ -79,24 +168,39 @@ module Bio
79
168
  attr_reader :data
80
169
 
81
170
  # Sequence class (Bio::Sequence::AA, Bio::Sequence::NA, ...)
171
+ #
172
+ # Compatibility note: This method will be removed in the tufure.
82
173
  attr_reader :seqclass
83
174
 
84
175
  # Gets an multiple alignment.
85
- # Returns an instance of Bio::Alignment class.
176
+ # Returns a Bio::Alignment object.
177
+ def alignment(method = nil)
178
+ super
179
+ end
180
+
181
+ # This method will be deprecated. Instead, please use alignment.
182
+ #
183
+ # Gets an multiple alignment.
184
+ # Returns a Bio::Alignment object.
86
185
  def align
87
- do_parse() unless @align
88
- @align
186
+ warn "Bio::MAFFT::Report#align is deprecated. Please use \'alignment\'."
187
+ alignment
89
188
  end
90
- alias alignment align
91
189
 
190
+ # This will be deprecated. Instead, please use alignment.output_fasta.
191
+ #
92
192
  # Gets an fasta-format string of the sequences.
93
193
  # Returns a string.
94
194
  # Same as align.to_fasta.
95
- # Please refer to Bio::Alignment#to_fasta for arguments.
195
+ # Please refer to Bio::Alignment#output_fasta for arguments.
96
196
  def to_fasta(*arg)
97
- align.to_fasta(*arg)
197
+ warn "Bio::MAFFT::report#to_fasta is deprecated. Please use \'alignment.output_fasta\'"
198
+ alignment.output_fasta(*arg)
98
199
  end
99
200
 
201
+ # Compatibility note: Behavior of the method will be changed
202
+ # in the future.
203
+ #
100
204
  # Gets an array of the sequences.
101
205
  # Returns an array of Bio::FastaFormat instances.
102
206
  def to_a
@@ -105,12 +209,15 @@ module Bio
105
209
 
106
210
  private
107
211
  # Parsing a result.
108
- def do_parse
109
- return nil if @align
110
- @align = Bio::Alignment.new(@data) do |x|
111
- [ @seqclass.new(x.seq), x.definition ]
212
+ def do_parse(ary, seqmethod)
213
+ if @seqclass then
214
+ a = Bio::Alignment.new
215
+ a.add_sequences(ary) do |x|
216
+ [ @seqclass.new(x.seq), x.definition ]
217
+ end
218
+ else
219
+ super(ary, seqmethod)
112
220
  end
113
- nil
114
221
  end
115
222
 
116
223
  end #class Report
@@ -0,0 +1,52 @@
1
+ #
2
+ # = bio/appl/muscle.rb - MUSCLE application wrapper class
3
+ #
4
+ # Copyright:: Copyright (C) 2006-2007
5
+ # Jeffrey Blakeslee and John Conery University of Oregon <jeffb@uoregon.edu>
6
+ # Naohisa Goto <ng@bioruby.org>
7
+ # License:: The Ruby License
8
+ #
9
+ # $Id: muscle.rb,v 1.1 2007/07/16 12:25:50 ngoto Exp $
10
+ #
11
+ #
12
+ # Bio::Muscle is a wrapper class to execute MUSCLE.
13
+ #
14
+ # == References
15
+ #
16
+ # * http://www.drive5.com/muscle/
17
+ # * Edgar R.C.
18
+ # MUSCLE: multiple sequence alignment with high accuracy and
19
+ # high throughput. Nucleic Acids Res. 32: 1792-1797, 2004.
20
+ # * Edgar, R.C.
21
+ # MUSCLE: a multiple sequence alignment method with reduced time
22
+ # and space complexity. BMC Bioinformatics 5: 113, 2004.
23
+ #
24
+
25
+ module Bio
26
+
27
+ # Bio::Muscle is a wrapper class to execute MUSCLE.
28
+ #
29
+ # Please refer documents in bio/apple/muscle.rb for references.
30
+ class Muscle < Bio::Alignment::FactoryTemplate::StdinInFileOut
31
+
32
+ # default program name
33
+ DEFAULT_PROGRAM = 'muscle'.freeze
34
+
35
+ # default report parser
36
+ DEFAULT_PARSER = Bio::Alignment::MultiFastaFormat
37
+
38
+ private
39
+ # generates options specifying input filename.
40
+ # returns an array of string
41
+ def _option_input_file(fn)
42
+ [ '-in', fn ]
43
+ end
44
+
45
+ # generates options specifying output filename.
46
+ # returns an array of string
47
+ def _option_output_file(fn)
48
+ [ '-out', fn ]
49
+ end
50
+ end #class Muscle
51
+
52
+ end #module Bio
@@ -0,0 +1,129 @@
1
+ #
2
+ # = bio/appl/phylip/alignment.rb - phylip multiple alignment format parser
3
+ #
4
+ # Copyright:: Copyright (C) 2006
5
+ # GOTO Naohisa <ng@bioruby.org>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+ # $Id: alignment.rb,v 1.2 2007/04/05 23:35:40 trevor Exp $
10
+ #
11
+ # = About Bio::Phylip::PhylipFormat
12
+ #
13
+ # Please refer document of Bio::Phylip::PhylipFormat class.
14
+ #
15
+
16
+ module Bio
17
+ module Phylip
18
+
19
+ # This is phylip multiple alignment format parser.
20
+ # The two formats, interleaved and non-interleaved, are
21
+ # automatically determined.
22
+ #
23
+ class PhylipFormat
24
+
25
+ # create a new object from a string
26
+ def initialize(str)
27
+ @data = str.strip.split(/(?:\r\n|\r|\n)/)
28
+ @first_line = @data.shift
29
+ @number_of_sequences, @alignment_length =
30
+ @first_line.to_s.strip.split(/\s+/).collect { |x| x.to_i }
31
+ end
32
+
33
+ # number of sequences
34
+ attr_reader :number_of_sequences
35
+
36
+ # alignment length
37
+ attr_reader :alignment_length
38
+
39
+ # If the alignment format is "interleaved", returns true.
40
+ # If not, returns false.
41
+ # It would mistake to determine if the alignment is very short.
42
+ def interleaved?
43
+ unless defined? @interleaved_flag then
44
+ if /\A +/ =~ @data[1].to_s then
45
+ @interleaved_flag = false
46
+ else
47
+ @interleaved_flag = true
48
+ end
49
+ end
50
+ @interleaved_flag
51
+ end
52
+
53
+ # Gets the alignment. Returns a Bio::Alignment object.
54
+ def alignment
55
+ unless defined? @alignment then
56
+ do_parse
57
+ a = Bio::Alignment.new
58
+ (0...@number_of_sequences).each do |i|
59
+ a.add_seq(@sequences[i], @sequence_names[i])
60
+ end
61
+ @alignment = a
62
+ end
63
+ @alignment
64
+ end
65
+
66
+ private
67
+
68
+ def do_parse
69
+ if interleaved? then
70
+ do_parse_interleaved
71
+ else
72
+ do_parse_noninterleaved
73
+ end
74
+ end
75
+
76
+ def do_parse_interleaved
77
+ first_block = @data[0, @number_of_sequences]
78
+ @data[0, @number_of_sequences] = ''
79
+ @sequence_names = Array.new(@number_of_sequences) { '' }
80
+ @sequences = Array.new(@number_of_sequences) do
81
+ ' ' * @alignment_length
82
+ end
83
+ first_block.each_with_index do |x, i|
84
+ n, s = x.split(/ +/, 2)
85
+ @sequence_names[i] = n
86
+ @sequences[i].replace(s.gsub(/\s+/, ''))
87
+ end
88
+ i = 0
89
+ @data.each do |x|
90
+ if x.strip.length <= 0 then
91
+ i = 0
92
+ else
93
+ @sequences[i] << x.gsub(/\s+/, '')
94
+ i = (i + 1) % @number_of_sequences
95
+ end
96
+ end
97
+ @data.clear
98
+ true
99
+ end
100
+
101
+ def do_parse_noninterleaved
102
+ @sequence_names = Array.new(@number_of_sequences) { '' }
103
+ @sequences = Array.new(@number_of_sequences) do
104
+ ' ' * @alignment_length
105
+ end
106
+ curseq = nil
107
+ i = 0
108
+ @data.each do |x|
109
+ next if x.strip.length <= 0
110
+ if !curseq or
111
+ curseq.length > @alignment_length or /^\s/ !~ x then
112
+ p i
113
+ n, s = x.strip.split(/ +/, 2)
114
+ @sequence_names[i] = n
115
+ curseq = @sequences[i]
116
+ curseq.replace(s.gsub(/\s+/, ''))
117
+ i += 1
118
+ else
119
+ curseq << x.gsub(/\s+/, '')
120
+ end
121
+ end
122
+ @data.clear
123
+ true
124
+ end
125
+
126
+ end #class PhylipFormat
127
+ end #module Phylip
128
+ end #module Bio
129
+
@@ -0,0 +1,96 @@
1
+ #
2
+ # = bio/appl/phylip/distance_matrix.rb - phylip distance matrix parser
3
+ #
4
+ # Copyright:: Copyright (C) 2006
5
+ # GOTO Naohisa <ng@bioruby.org>
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+ # $Id: distance_matrix.rb,v 1.3 2007/04/05 23:35:40 trevor Exp $
10
+ #
11
+ # = About Bio::Phylip::DistanceMatrix
12
+ #
13
+ # Please refer document of Bio::Phylip::DistanceMatrix class.
14
+ #
15
+
16
+ require 'matrix'
17
+
18
+ module Bio
19
+ module Phylip
20
+
21
+ # This is a parser class for phylip distance matrix data
22
+ # created by dnadist, protdist, or restdist commands.
23
+ #
24
+ class DistanceMatrix
25
+
26
+ # creates a new distance matrix object
27
+ def initialize(str)
28
+ data = str.strip.split(/(?:\r\n|\r|\n)/)
29
+ @otus = data.shift.to_s.strip.to_i
30
+ prev = nil
31
+ data.collect! do |x|
32
+ if /\A +/ =~ x and prev then
33
+ prev.concat x.strip.split(/\s+/)
34
+ nil
35
+ else
36
+ prev = x.strip.split(/\s+/)
37
+ prev
38
+ end
39
+ end
40
+ data.compact!
41
+ if data.size != @otus then
42
+ raise "inconsistent data (OTUs=#{@otus} but #{data.size} rows)"
43
+ end
44
+ @otu_names = data.collect { |x| x.shift }
45
+ mat = data.collect do |x|
46
+ if x.size != @otus then
47
+ raise "inconsistent data (OTUs=#{@otus} but #{x.size} columns)"
48
+ end
49
+ x.collect { |y| y.to_f }
50
+ end
51
+ @matrix = Matrix.rows(mat, false)
52
+ @original_matrix = Matrix.rows(data, false)
53
+ end
54
+
55
+ # distance matrix (returns Ruby's Matrix object)
56
+ attr_reader :matrix
57
+
58
+ # matrix contains values as original strings.
59
+ # Use it when you doubt precision of floating-point numbers.
60
+ attr_reader :original_matrix
61
+
62
+ # number of OTUs
63
+ attr_reader :otus
64
+
65
+ # names of OTUs
66
+ attr_reader :otu_names
67
+
68
+ # Generates a new phylip distance matrix formatted text as a string.
69
+ def self.generate(matrix, otu_names = nil, options = {})
70
+ if matrix.row_size != matrix.column_size then
71
+ raise "must be a square matrix"
72
+ end
73
+ otus = matrix.row_size
74
+ names = (0...otus).collect do |i|
75
+ name = ((otu_names and otu_names[i]) or "OTU#{i.to_s}")
76
+ name
77
+ end
78
+ data = (0...otus).collect do |i|
79
+ x = (0...otus).collect { |j| sprintf("%9.6f", matrix[i, j]) }
80
+ x.unshift(sprintf("%-10s", names[i])[0, 10])
81
+
82
+ str = x[0, 7].join(' ') + "\n"
83
+ 7.step(otus + 1, 7) do |k|
84
+ str << ' ' + x[k, 7].join(' ') + "\n"
85
+ end
86
+ str
87
+ end
88
+ sprintf("%5d\n", otus) + data.join('')
89
+ end
90
+
91
+ end #class DistanceMatrix
92
+
93
+ end #module Phylip
94
+
95
+ end #module Bio
96
+