bio 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -2,25 +2,9 @@
2
2
  # = bio/appl/blat/report.rb - BLAT result parser
3
3
  #
4
4
  # Copyright:: Copyright (C) 2004 GOTO Naohisa <ng@bioruby.org>
5
- # License:: LGPL
5
+ # License:: The Ruby License
6
6
  #
7
- #--
8
- # This library is free software; you can redistribute it and/or
9
- # modify it under the terms of the GNU Lesser General Public
10
- # License as published by the Free Software Foundation; either
11
- # version 2 of the License, or (at your option) any later version.
12
- #
13
- # This library is distributed in the hope that it will be useful,
14
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
- # Lesser General Public License for more details.
17
- #
18
- # You should have received a copy of the GNU Lesser General Public
19
- # License along with this library; if not, write to the Free Software
20
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
- #++
22
- #
23
- # $Id: report.rb,v 1.6 2005/12/18 15:58:39 k Exp $
7
+ # $Id: report.rb,v 1.13 2007/04/05 23:35:39 trevor Exp $
24
8
  #
25
9
  # BLAT result parser (psl / pslx format).
26
10
  #
@@ -36,7 +20,7 @@
36
20
  # * Kent, W.J., BLAT--the BLAST-like alignment tool,
37
21
  # Genome Research, 12, 656--664, 2002.
38
22
  # http://www.genome.org/cgi/content/abstract/12/4/656
39
- #
23
+ # * http://genome.ucsc.edu/goldenPath/help/blatSpec.html
40
24
 
41
25
  require 'bio'
42
26
 
@@ -49,7 +33,8 @@ module Bio
49
33
  # In BLAT results, the start position of a sequnece is numbered as 0.
50
34
  # On the other hand, in many other homology search programs,
51
35
  # the start position of a sequence is numbered as 1.
52
- # To keep compatibility, the BLAT parser adds 1 to every position number.
36
+ # To keep compatibility, the BLAT parser adds 1 to every position number
37
+ # except Bio::Blat::Report::Seqdesc and some Bio::Blat specific methods.
53
38
  #
54
39
  # Note that Bio::Blat::Report#query_def, #query_id, #query_len methods
55
40
  # simply return first hit's query_*.
@@ -73,6 +58,11 @@ module Bio
73
58
  if flag then
74
59
  @hits << Hit.new(line)
75
60
  else
61
+ # for headerless data
62
+ if /^\d/ =~ line then
63
+ flag = true
64
+ redo
65
+ end
76
66
  line = line.chomp
77
67
  if /\A\-+\s*\z/ =~ line
78
68
  flag = true
@@ -95,7 +85,15 @@ module Bio
95
85
 
96
86
  # Parses headers.
97
87
  def parse_header(ary)
98
- ary.shift # first line is removed
88
+ while x = ary.shift
89
+ if /psLayout version (\S+)/ =~ x then
90
+ @psl_version = $1
91
+ break
92
+ elsif !(x.strip.empty?)
93
+ ary.unshift(x)
94
+ break
95
+ end
96
+ end
99
97
  a0 = ary.collect { |x| x.split(/\t/) }
100
98
  k = []
101
99
  a0.each do |x|
@@ -109,6 +107,9 @@ module Bio
109
107
  end
110
108
  private :parse_header
111
109
 
110
+ # version of the psl format (String or nil).
111
+ attr_reader :psl_version
112
+
112
113
  # Bio::Blat::Report::SeqDesc stores sequence information of
113
114
  # query or subject of the BLAT report.
114
115
  # It also includes some hit information.
@@ -154,12 +155,14 @@ module Bio
154
155
  # Creates a new SegmentPair object.
155
156
  # It is designed to be called internally from Bio::Blat::Report class.
156
157
  # Users shall not use it directly.
157
- def initialize(query_len, strand,
158
- blksize, qstart, tstart, qseq, tseq)
158
+ def initialize(query_len, target_len, strand,
159
+ blksize, qstart, tstart, qseq, tseq,
160
+ protein_flag)
159
161
  @blocksize = blksize
160
162
  @qseq = qseq
161
163
  @hseq = hseq
162
164
  @hit_strand = 'plus'
165
+ w = (protein_flag ? 3 : 1) # 3 means query=protein target=dna
163
166
  case strand
164
167
  when '-'
165
168
  # query is minus strand
@@ -170,15 +173,26 @@ module Bio
170
173
  # To keep compatibility, with other homology search programs,
171
174
  # we add 1 to each position number.
172
175
  @hit_from = tstart + 1
173
- @hit_to = tstart + blksize # - 1 + 1
174
- else #when '+'
176
+ @hit_to = tstart + blksize * w # - 1 + 1
177
+ when '+-'
178
+ # hit is minus strand
179
+ @query_strand = 'plus'
180
+ @hit_strand = 'minus'
181
+ # To keep compatibility, with other homology search programs,
182
+ # we add 1 to each position number.
183
+ @query_from = qstart + 1
184
+ @query_to = qstart + blksize # - 1 + 1
185
+ # convert positions
186
+ @hit_from = target_len - tstart
187
+ @hit_to = target_len - tstart - blksize * w + 1
188
+ else #when '+', '++'
175
189
  @query_strand = 'plus'
176
190
  # To keep compatibility with other homology search programs,
177
191
  # we add 1 to each position number.
178
192
  @query_from = qstart + 1
179
193
  @query_to = qstart + blksize # - 1 + 1
180
194
  @hit_from = tstart + 1
181
- @hit_to = tstart + blksize # - 1 + 1
195
+ @hit_to = tstart + blksize * w # - 1 + 1
182
196
  end
183
197
  end
184
198
  # Returns query start position.
@@ -277,9 +291,13 @@ module Bio
277
291
  def match; @data[0].to_i; end
278
292
  # Mismatch nucleotides.
279
293
  def mismatch; @data[1].to_i; end
280
- # rep. match (???)
294
+
295
+ # "rep. match".
296
+ # Number of bases that match but are part of repeats.
297
+ # Note that current version of BLAT always set 0.
281
298
  def rep_match; @data[2].to_i; end
282
- # N's (???)
299
+
300
+ # "N's". Number of 'N' bases.
283
301
  def n_s; @data[3].to_i; end
284
302
 
285
303
  # Returns strand information of the hit.
@@ -308,9 +326,11 @@ module Bio
308
326
  tst = target.starts
309
327
  qseqs = query.seqs
310
328
  tseqs = target.seqs
329
+ pflag = self.protein?
311
330
  @blocks = (0...block_count).collect do |i|
312
- SegmentPair.new(query.size, strand, bs[i],
313
- qst[i], tst[i], qseqs[i], tseqs[i])
331
+ SegmentPair.new(query.size, target.size, strand, bs[i],
332
+ qst[i], tst[i], qseqs[i], tseqs[i],
333
+ pflag)
314
334
  end
315
335
  end
316
336
  @blocks
@@ -343,6 +363,83 @@ module Bio
343
363
  def each(&x) #:yields: segmentpair
344
364
  exons.each(&x)
345
365
  end
366
+
367
+ #--
368
+ # methods described in the BLAT FAQ at the UCSC genome browser.
369
+ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4)
370
+ #++
371
+
372
+ # Calculates the pslCalcMilliBad value defined in the
373
+ # BLAT FAQ (http://genome.ucsc.edu/FAQ/FAQblat#blat4).
374
+ #
375
+ # The algorithm is taken from the BLAT FAQ
376
+ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4).
377
+ def milli_bad
378
+ w = (self.protein? ? 3 : 1)
379
+ qalen = w * (self.query.end - self.query.start)
380
+ talen = self.target.end - self.target.start
381
+ alen = (if qalen < talen then qalen; else talen; end)
382
+ return 0 if alen <= 0
383
+ d = qalen - talen
384
+ d = 0 if d < 0
385
+ total = w * (self.match + self.rep_match + self.mismatch)
386
+ return 0 if total == 0
387
+ return (1000 * (self.mismatch * w + self.query.gap_count +
388
+ (3 * Math.log(1 + d)).round) / total)
389
+ end
390
+
391
+ # Calculates the percent identity compatible with the BLAT web server
392
+ # as described in the BLAT FAQ
393
+ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4).
394
+ #
395
+ # The algorithm is taken from the BLAT FAQ
396
+ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4).
397
+ def percent_identity
398
+ 100.0 - self.milli_bad * 0.1
399
+ end
400
+
401
+ # When the output data comes from the protein query, returns true.
402
+ # Otherwise (nucleotide query), returns false.
403
+ # It returns nil if this cannot be determined.
404
+ #
405
+ # The algorithm is taken from the BLAT FAQ
406
+ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4).
407
+ #
408
+ # Note: It seems that it returns true only when protein query
409
+ # with nucleotide database (blat options: -q=prot -t=dnax).
410
+ def protein?
411
+ return nil if self.block_sizes.empty?
412
+ case self.strand[1,1]
413
+ when '+'
414
+ if self.target.end == self.target.starts[-1] +
415
+ 3 * self.block_sizes[-1] then
416
+ true
417
+ else
418
+ false
419
+ end
420
+ when '-'
421
+ if self.target.start == self.target.size -
422
+ self.target.starts[-1] - 3 * self.block_sizes[-1] then
423
+ true
424
+ else
425
+ false
426
+ end
427
+ else
428
+ nil
429
+ end
430
+ end
431
+
432
+ # Calculates the score compatible with the BLAT web server
433
+ # as described in the BLAT FAQ
434
+ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4).
435
+ #
436
+ # The algorithm is taken from the BLAT FAQ
437
+ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4).
438
+ def score
439
+ w = (self.protein? ? 3 : 1)
440
+ w * (self.match + (self.rep_match >> 1)) -
441
+ w * self.mismatch - self.query.gap_count - self.target.gap_count
442
+ end
346
443
  end #class Hit
347
444
 
348
445
  #--
@@ -2,28 +2,12 @@
2
2
  # = bio/appl/clustalw.rb - CLUSTAL W wrapper class
3
3
  #
4
4
  # Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
5
- # License:: LGPL
5
+ # License:: The Ruby License
6
6
  #
7
- #--
8
- # This library is free software; you can redistribute it and/or
9
- # modify it under the terms of the GNU Lesser General Public
10
- # License as published by the Free Software Foundation; either
11
- # version 2 of the License, or (at your option) any later version.
12
- #
13
- # This library is distributed in the hope that it will be useful,
14
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
- # Lesser General Public License for more details.
17
- #
18
- # You should have received a copy of the GNU Lesser General Public
19
- # License along with this library; if not, write to the Free Software
20
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
- #++
22
- #
23
- # $Id: clustalw.rb,v 1.10 2005/12/18 15:58:40 k Exp $
7
+ # $Id: clustalw.rb,v 1.19 2007/07/16 12:27:29 ngoto Exp $
24
8
  #
25
9
  # Bio::ClustalW is a CLUSTAL W execution wrapper class.
26
- # Its object is also called an alignment factory.
10
+ # It can also be called as an alignment factory.
27
11
  # CLUSTAL W is a very popular software for multiple sequence alignment.
28
12
  #
29
13
  # == References
@@ -39,8 +23,8 @@
39
23
 
40
24
 
41
25
  require 'tempfile'
42
- require 'open3'
43
26
 
27
+ require 'bio/command'
44
28
  require 'bio/sequence'
45
29
  require 'bio/alignment'
46
30
 
@@ -54,20 +38,28 @@ module Bio
54
38
  autoload :Report, 'bio/appl/clustalw/report'
55
39
 
56
40
  # Creates a new CLUSTAL W execution wrapper object (alignment factory).
57
- def initialize(program = 'clustalw', option = [])
41
+ def initialize(program = 'clustalw', opt = [])
58
42
  @program = program
59
- @option = option
43
+ @options = opt
60
44
  @command = nil
61
45
  @output = nil
62
46
  @report = nil
63
- @log = nil
47
+ @data_stdout = nil
48
+ @exit_status = nil
49
+ @output_dnd = nil
64
50
  end
65
51
 
66
52
  # name of the program (usually 'clustalw' in UNIX)
67
53
  attr_accessor :program
68
54
 
69
55
  # options
70
- attr_accessor :option
56
+ attr_accessor :options
57
+
58
+ # option is deprecated. Instead, please use options.
59
+ def option
60
+ warn "Bio::ClustalW#option is deprecated. Please use options."
61
+ options
62
+ end
71
63
 
72
64
  # Returns last command-line strings executed by this factory.
73
65
  # Note that filenames described in the command-line may already
@@ -75,62 +67,99 @@ module Bio
75
67
  # Returns an array.
76
68
  attr_reader :command
77
69
 
70
+ # This method will be deprecated.
71
+ #
78
72
  # Returns last messages of CLUSTAL W execution.
79
- attr_reader :log
73
+ def log
74
+ #warn 'Bio::ClustalW#log will be deprecated.'
75
+ @data_stdout
76
+ end
80
77
 
81
- # Returns last raw alignment result (String).
78
+ # Returns last raw alignment result (String or nil).
82
79
  attr_reader :output
83
80
 
84
81
  # Returns last alignment result.
85
82
  # Returns a Bio::ClustalW::Report object.
86
83
  attr_reader :report
87
84
 
85
+ # Last exit status
86
+ attr_reader :exit_status
87
+
88
+ # Last output to the stdout.
89
+ attr_accessor :data_stdout
90
+
91
+ # Clear the internal data and status, except program and options.
92
+ def reset
93
+ @command = nil
94
+ @output = nil
95
+ @report = nil
96
+ @exit_status = nil
97
+ @data_stdout = nil
98
+ @output_dnd = nil
99
+ end
100
+
88
101
  # Executes the program(clustalw).
89
102
  # If +seqs+ is not nil, perform alignment for seqs.
90
103
  # If +seqs+ is nil, simply executes CLUSTAL W.
104
+ #
105
+ # Compatibility note: When seqs is nil,
106
+ # returns true if the program exits normally, and
107
+ # returns false if the program exits abnormally.
91
108
  def query(seqs)
92
109
  if seqs then
93
110
  query_align(seqs)
94
111
  else
95
- exec_local(@option)
112
+ exec_local(@options)
113
+ @exit_status.exitstatus == 0 ? true : false
96
114
  end
97
115
  end
98
116
 
117
+ # Note that this method will be renamed to query_alignment.
118
+ #
99
119
  # Performs alignment for +seqs+.
100
120
  # +seqs+ should be Bio::Alignment or Array of sequences or nil.
121
+ #
122
+ # Compatibility Note: Nucleic or amino is not determined by this method.
101
123
  def query_align(seqs)
102
- seqtype = nil
103
124
  unless seqs.is_a?(Bio::Alignment)
104
125
  seqs = Bio::Alignment.new(seqs)
105
126
  end
106
- seqs.each do |s|
107
- if s.is_a?(Bio::Sequence::AA) then
108
- seqtype = 'PROTEIN'
109
- elsif s.is_a?(Bio::Sequence::NA) then
110
- seqtype = 'DNA'
111
- end
112
- break if seqtype
113
- end
114
- query_string(seqs.to_fasta(70, :avoid_same_name => true), seqtype)
127
+ query_string(seqs.output_fasta(:width => 70,
128
+ :avoid_same_name => true))
129
+ end
130
+
131
+ # Performs alignment for +seqs+.
132
+ # +seqs+ should be Bio::Alignment or Array of sequences or nil.
133
+ def query_alignment(seqs)
134
+ query_align(seqs)
115
135
  end
116
136
 
117
137
  # Performs alignment for +str+.
118
138
  # +str+ should be a string that can be recognized by CLUSTAL W.
139
+ #
140
+ # Compatibility Note: 2nd argument is deprecated and ignored.
119
141
  def query_string(str, *arg)
142
+ if arg.size > 0 then
143
+ warn '2nd argument of Bio::ClustalW#query_string is ignored'
144
+ end
120
145
  begin
121
146
  tf_in = Tempfile.open('align')
122
147
  tf_in.print str
123
148
  ensure
124
149
  tf_in.close(false)
125
150
  end
126
- r = query_by_filename(tf_in.path, *arg)
151
+ r = query_by_filename(tf_in.path)
127
152
  tf_in.close(true)
128
153
  r
129
154
  end
130
155
 
131
156
  # Performs alignment of sequences in the file named +path+.
132
- def query_by_filename(path, seqtype = nil)
133
- require 'bio/appl/clustalw/report'
157
+ #
158
+ # Compatibility Note: 2nd argument (seqtype) is deprecated and ignored.
159
+ def query_by_filename(path, *arg)
160
+ if arg.size > 0 then
161
+ warn '2nd argument of Bio::ClustalW#query_by_filename is ignored'
162
+ end
134
163
 
135
164
  tf_out = Tempfile.open('clustalout')
136
165
  tf_out.close(false)
@@ -143,8 +172,8 @@ module Bio
143
172
  "-newtree=#{tf_dnd.path}",
144
173
  "-outorder=input"
145
174
  ]
146
- opt << "-type=#{seqtype}" if seqtype
147
- opt.concat(@option)
175
+ #opt << "-type=#{seqtype}" if seqtype
176
+ opt.concat(@options)
148
177
  exec_local(opt)
149
178
  tf_out.open
150
179
  @output = tf_out.read
@@ -152,37 +181,36 @@ module Bio
152
181
  tf_dnd.open
153
182
  @output_dnd = tf_dnd.read
154
183
  tf_dnd.close(true)
155
- @report = Report.new(@output, seqtype)
184
+ @report = Report.new(@output)
156
185
  @report
157
186
  end
158
187
 
159
188
  # Returns last alignment guild-tree (file.dnd).
160
189
  attr_reader :output_dnd
161
190
 
191
+ #---
162
192
  # Returns last error messages (to stderr) of CLUSTAL W execution.
163
- attr_reader :errorlog
193
+ #attr_reader :errorlog
194
+ #+++
195
+ #errorlog is deprecated (no replacement) and returns empty string.
196
+ def errorlog
197
+ warn "errorlog is deprecated (no replacement) and returns empty string."
198
+ ''
199
+ end
164
200
 
165
201
  private
166
202
  # Executes the program in the local machine.
167
203
  def exec_local(opt)
168
204
  @command = [ @program, *opt ]
169
205
  #STDERR.print "DEBUG: ", @command.join(" "), "\n"
170
- @log = nil
171
-
172
- Open3.popen3(*@command) do |din, dout, derr|
173
- din.close
174
- t = Thread.start do
175
- @errorlog = derr.read
176
- end
177
- @log = dout.read
178
- t.join
206
+ @data_stdout = nil
207
+ @exit_status = nil
208
+
209
+ Bio::Command.call_command(@command) do |io|
210
+ io.close_write
211
+ @data_stdout = io.read
179
212
  end
180
- # @command_string = @command.join(" ")
181
- # IO.popen(@command, "r") do |io|
182
- # io.sync = true
183
- # @log = io.read
184
- # end
185
- @log
213
+ @exit_status = $?
186
214
  end
187
215
 
188
216
  end #class ClustalW