bio 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -2,10 +2,11 @@
2
2
  # = bio/sequence/common.rb - common methods for biological sequence
3
3
  #
4
4
  # Copyright:: Copyright (C) 2006
5
- # Toshiaki Katayama <k@bioruby.org>
6
- # License:: Ruby's
5
+ # Toshiaki Katayama <k@bioruby.org>,
6
+ # Ryan Raaum <ryan@raaum.org>
7
+ # License:: The Ruby License
7
8
  #
8
- # $Id: common.rb,v 1.2 2006/02/06 14:16:17 k Exp $
9
+ # $Id: common.rb,v 1.4 2007/04/05 23:35:41 trevor Exp $
9
10
  #
10
11
 
11
12
  module Bio
@@ -14,38 +15,128 @@ module Bio
14
15
 
15
16
  class Sequence
16
17
 
17
- # This module provides common methods for biological sequence classes
18
- # which must inherit String.
18
+ # = DESCRIPTION
19
+ # Bio::Sequence::Common is a
20
+ # Mixin[http://www.rubycentral.com/book/tut_modules.html]
21
+ # implementing methods common to
22
+ # Bio::Sequence::AA and Bio::Sequence::NA. All of these methods
23
+ # are available to either Amino Acid or Nucleic Acid sequences, and
24
+ # by encapsulation are also available to Bio::Sequence objects.
25
+ #
26
+ # = USAGE
27
+ #
28
+ # # Create a sequence
29
+ # dna = Bio::Sequence.auto('atgcatgcatgc')
30
+ #
31
+ # # Splice out a subsequence using a Genbank-style location string
32
+ # puts dna.splice('complement(1..4)')
33
+ #
34
+ # # What is the base composition?
35
+ # puts dna.composition
36
+ #
37
+ # # Create a random sequence with the composition of a current sequence
38
+ # puts dna.randomize
19
39
  module Common
20
40
 
41
+ # Return sequence as
42
+ # String[http://corelib.rubyonrails.org/classes/String.html].
43
+ # The original sequence is unchanged.
44
+ #
45
+ # seq = Bio::Sequence::NA.new('atgc')
46
+ # puts s.to_s #=> 'atgc'
47
+ # puts s.to_s.class #=> String
48
+ # puts s #=> 'atgc'
49
+ # puts s.class #=> Bio::Sequence::NA
50
+ # ---
51
+ # *Returns*:: String object
21
52
  def to_s
22
53
  String.new(self)
23
54
  end
24
55
  alias to_str to_s
25
56
 
26
- # Force self to re-initialize for clean up (remove white spaces,
27
- # case unification).
57
+ # Create a new sequence based on the current sequence.
58
+ # The original sequence is unchanged.
59
+ #
60
+ # s = Bio::Sequence::NA.new('atgc')
61
+ # s2 = s.seq
62
+ # puts s2 #=> 'atgc'
63
+ # ---
64
+ # *Returns*:: new Bio::Sequence::NA/AA object
28
65
  def seq
29
66
  self.class.new(self)
30
67
  end
31
68
 
32
- # Similar to the 'seq' method, but changes the self object destructively.
69
+ # Normalize the current sequence, removing all whitespace and
70
+ # transforming all positions to uppercase if the sequence is AA or
71
+ # transforming all positions to lowercase if the sequence is NA.
72
+ # The original sequence is modified.
73
+ #
74
+ # s = Bio::Sequence::NA.new('atgc')
75
+ # s.normalize!
76
+ # ---
77
+ # *Returns*:: current Bio::Sequence::NA/AA object (modified)
33
78
  def normalize!
34
79
  initialize(self)
35
80
  self
36
81
  end
37
82
  alias seq! normalize!
38
83
 
84
+ # Add new data to the end of the current sequence.
85
+ # The original sequence is modified.
86
+ #
87
+ # s = Bio::Sequence::NA.new('atgc')
88
+ # s << 'atgc'
89
+ # puts s #=> "atgcatgc"
90
+ # s << s
91
+ # puts s #=> "atgcatgcatgcatgc"
92
+ # ---
93
+ # *Returns*:: current Bio::Sequence::NA/AA object (modified)
39
94
  def <<(*arg)
40
95
  super(self.class.new(*arg))
41
96
  end
42
97
  alias concat <<
43
98
 
99
+ # Create a new sequence by adding to an existing sequence.
100
+ # The existing sequence is not modified.
101
+ #
102
+ # s = Bio::Sequence::NA.new('atgc')
103
+ # s2 = s + 'atgc'
104
+ # puts s2 #=> "atgcatgc"
105
+ # puts s #=> "atgc"
106
+ #
107
+ # The new sequence is of the same class as the existing sequence if
108
+ # the new data was added to an existing sequence,
109
+ #
110
+ # puts s2.class == s.class #=> true
111
+ #
112
+ # but if an existing sequence is added to a String, the result is a String
113
+ #
114
+ # s3 = 'atgc' + s
115
+ # puts s3.class #=> String
116
+ # ---
117
+ # *Returns*:: new Bio::Sequence::NA/AA *or* String object
44
118
  def +(*arg)
45
119
  self.class.new(super(*arg))
46
120
  end
47
121
 
48
- # Returns the subsequence of the self string.
122
+ # Returns a new sequence containing the subsequence identified by the
123
+ # start and end numbers given as parameters. *Important:* Biological
124
+ # sequence numbering conventions (one-based) rather than ruby's
125
+ # (zero-based) numbering conventions are used.
126
+ #
127
+ # s = Bio::Sequence::NA.new('atggaatga')
128
+ # puts s.subseq(1,3) #=> "atg"
129
+ #
130
+ # Start defaults to 1 and end defaults to the entire existing string, so
131
+ # subseq called without any parameters simply returns a new sequence
132
+ # identical to the existing sequence.
133
+ #
134
+ # puts s.subseq #=> "atggaatga"
135
+ # ---
136
+ # *Arguments*:
137
+ # * (optional) _s_(start): Integer (default 1)
138
+ # * (optional) _e_(end): Integer (default current sequence length)
139
+ # *Returns*:: new Bio::Sequence::NA/AA object
49
140
  def subseq(s = 1, e = self.length)
50
141
  raise "Error: start/end position must be a positive integer" unless s > 0 and e > 0
51
142
  s -= 1
@@ -53,29 +144,35 @@ module Common
53
144
  self[s..e]
54
145
  end
55
146
 
56
- # This method iterates on sub string with specified length 'window_size'.
57
- # By specifing 'step_size', codon sized shifting or spliting genome
58
- # sequence with ovelapping each end can easily be yielded.
147
+ # This method steps through a sequences in steps of 'step_size' by
148
+ # subsequences of 'window_size'. Typically used with a block.
149
+ # Any remaining sequence at the terminal end will be returned.
59
150
  #
60
- # The remainder sequence at the terminal end will be returned.
151
+ # Prints average GC% on each 100bp
61
152
  #
62
- # Example:
63
- # # prints average GC% on each 100bp
64
- # seq.window_search(100) do |subseq|
153
+ # s.window_search(100) do |subseq|
65
154
  # puts subseq.gc
66
155
  # end
67
- # # prints every translated peptide (length 5aa) in the same frame
68
- # seq.window_search(15, 3) do |subseq|
156
+ #
157
+ # Prints every translated peptide (length 5aa) in the same frame
158
+ #
159
+ # s.window_search(15, 3) do |subseq|
69
160
  # puts subseq.translate
70
161
  # end
71
- # # split genome sequence by 10000bp with 1000bp overlap in fasta format
162
+ #
163
+ # Split genome sequence by 10000bp with 1000bp overlap in fasta format
164
+ #
72
165
  # i = 1
73
- # remainder = seq.window_search(10000, 9000) do |subseq|
166
+ # remainder = s.window_search(10000, 9000) do |subseq|
74
167
  # puts subseq.to_fasta("segment #{i}", 60)
75
168
  # i += 1
76
169
  # end
77
170
  # puts remainder.to_fasta("segment #{i}", 60)
78
- #
171
+ # ---
172
+ # *Arguments*:
173
+ # * (required) _window_size_: Fixnum
174
+ # * (optional) _step_size_: Fixnum (default 1)
175
+ # *Returns*:: new Bio::Sequence::NA/AA object
79
176
  def window_search(window_size, step_size = 1)
80
177
  i = 0
81
178
  0.step(self.length - window_size, step_size) do |i|
@@ -84,9 +181,16 @@ module Common
84
181
  return self[i + window_size .. -1]
85
182
  end
86
183
 
87
- # This method receive a hash of residues/bases to the particular values,
88
- # and sum up the value along with the self sequence. Especially useful
89
- # to use with the window_search method and amino acid indices etc.
184
+ # Returns a float total value for the sequence given a hash of
185
+ # base or residue values,
186
+ #
187
+ # values = {'a' => 0.1, 't' => 0.2, 'g' => 0.3, 'c' => 0.4}
188
+ # s = Bio::Sequence::NA.new('atgc')
189
+ # puts s.total(values) #=> 1.0
190
+ # ---
191
+ # *Arguments*:
192
+ # * (required) _hash_: Hash object
193
+ # *Returns*:: Float object
90
194
  def total(hash)
91
195
  hash.default = 0.0 unless hash.default
92
196
  sum = 0.0
@@ -99,6 +203,11 @@ module Common
99
203
  end
100
204
 
101
205
  # Returns a hash of the occurrence counts for each residue or base.
206
+ #
207
+ # s = Bio::Sequence::NA.new('atgc')
208
+ # puts s.composition #=> {"a"=>1, "c"=>1, "g"=>1, "t"=>1}
209
+ # ---
210
+ # *Returns*:: Hash object
102
211
  def composition
103
212
  count = Hash.new(0)
104
213
  self.scan(/./) do |x|
@@ -107,13 +216,30 @@ module Common
107
216
  return count
108
217
  end
109
218
 
110
- # Returns a randomized sequence keeping its composition by default.
111
- # The argument is required when generating a random sequence from the empty
112
- # sequence (used by the class methods NA.randomize, AA.randomize).
113
- # If the block is given, yields for each random residue/base.
219
+ # Returns a randomized sequence. The default is to retain the same
220
+ # base/residue composition as the original. If a hash of base/residue
221
+ # counts is given, the new sequence will be based on that hash
222
+ # composition. If a block is given, each new randomly selected
223
+ # position will be passed into the block. In all cases, the
224
+ # original sequence is not modified.
225
+ #
226
+ # s = Bio::Sequence::NA.new('atgc')
227
+ # puts s.randomize #=> "tcag" (for example)
228
+ #
229
+ # new_composition = {'a' => 2, 't' => 2}
230
+ # puts s.randomize(new_composition) #=> "ttaa" (for example)
231
+ #
232
+ # count = 0
233
+ # s.randomize { |x| count += 1 }
234
+ # puts count #=> 4
235
+ # ---
236
+ # *Arguments*:
237
+ # * (optional) _hash_: Hash object
238
+ # *Returns*:: new Bio::Sequence::NA/AA object
114
239
  def randomize(hash = nil)
115
240
  length = self.length
116
241
  if hash
242
+ length = 0
117
243
  count = hash.clone
118
244
  count.each_value {|x| length += x}
119
245
  else
@@ -138,15 +264,43 @@ module Common
138
264
  return self.class.new(seq)
139
265
  end
140
266
 
141
- # Generate a new random sequence with the given frequency of bases
142
- # or residues. The sequence length is determined by the sum of each
143
- # base/residue occurences.
267
+ # Generate a new random sequence with the given frequency of bases.
268
+ # The sequence length is determined by their cumulative sum.
269
+ # (See also Bio::Sequence::Common#randomize which creates a new
270
+ # randomized sequence object using the base composition of an existing
271
+ # sequence instance).
272
+ #
273
+ # counts = {'R'=>1,'L'=>2,'E'=>3,'A'=>4}
274
+ # puts Bio::Sequence::AA.randomize(counts) #=> "AAEAELALRE" (for example)
275
+ #
276
+ # You may also feed the output of randomize into a block
277
+ #
278
+ # actual_counts = {'R'=>0,'L'=>0,'E'=>0,'A'=>0}
279
+ # Bio::Sequence::AA.randomize(counts) {|x| actual_counts[x] += 1}
280
+ # actual_counts #=> {"A"=>4, "L"=>2, "E"=>3, "R"=>1}
281
+ # ---
282
+ # *Arguments*:
283
+ # * (optional) _hash_: Hash object
284
+ # *Returns*:: Bio::Sequence::NA/AA object
144
285
  def self.randomize(*arg, &block)
145
286
  self.new('').randomize(*arg, &block)
146
287
  end
147
288
 
148
- # Receive a GenBank style position string and convert it to the Locations
149
- # objects to splice the sequence itself. See also: bio/location.rb
289
+ # Return a new sequence extracted from the original using a GenBank style
290
+ # position string. See also documentation for the Bio::Location class.
291
+ #
292
+ # s = Bio::Sequence::NA.new('atgcatgcatgcatgc')
293
+ # puts s.splice('1..3') #=> "atg"
294
+ # puts s.splice('join(1..3,8..10)') #=> "atgcat"
295
+ # puts s.splice('complement(1..3)') #=> "cat"
296
+ # puts s.splice('complement(join(1..3,8..10))') #=> "atgcat"
297
+ #
298
+ # Note that 'complement'ed Genbank position strings will have no
299
+ # effect on Bio::Sequence::AA objects.
300
+ # ---
301
+ # *Arguments*:
302
+ # * (required) _position_: String *or* Bio::Location object
303
+ # *Returns*:: Bio::Sequence::NA/AA object
150
304
  def splice(position)
151
305
  unless position.is_a?(Locations) then
152
306
  position = Locations.new(position)
@@ -2,10 +2,11 @@
2
2
  # = bio/sequence/compat.rb - methods for backward compatibility
3
3
  #
4
4
  # Copyright:: Copyright (C) 2006
5
- # Toshiaki Katayama <k@bioruby.org>
6
- # License:: Ruby's
5
+ # Toshiaki Katayama <k@bioruby.org>,
6
+ # Ryan Raaum <ryan@raaum.org>
7
+ # License:: The Ruby License
7
8
  #
8
- # $Id: compat.rb,v 1.2 2006/02/06 14:18:03 k Exp $
9
+ # $Id: compat.rb,v 1.4 2007/04/05 23:35:41 trevor Exp $
9
10
  #
10
11
 
11
12
 
@@ -17,6 +18,17 @@ class Sequence
17
18
  autoload :NA, 'bio/sequence/na'
18
19
  autoload :AA, 'bio/sequence/aa'
19
20
 
21
+ # Return sequence as
22
+ # String[http://corelib.rubyonrails.org/classes/String.html].
23
+ # The original sequence is unchanged.
24
+ #
25
+ # seq = Bio::Sequence.new('atgc')
26
+ # puts s.to_s #=> 'atgc'
27
+ # puts s.to_s.class #=> String
28
+ # puts s #=> 'atgc'
29
+ # puts s.class #=> Bio::Sequence
30
+ # ---
31
+ # *Returns*:: String object
20
32
  def to_s
21
33
  String.new(@seq)
22
34
  end
@@ -25,9 +37,16 @@ class Sequence
25
37
 
26
38
  module Common
27
39
 
40
+ # *DEPRECIATED* Do not use! Use Bio::Sequence#output instead.
41
+ #
28
42
  # Output the FASTA format string of the sequence. The 1st argument is
29
43
  # used as the comment string. If the 2nd option is given, the output
30
44
  # sequence will be folded.
45
+ # ---
46
+ # *Arguments*:
47
+ # * (optional) _header_: String object
48
+ # * (optional) _width_: Fixnum object (default nil)
49
+ # *Returns*:: String
31
50
  def to_fasta(header = '', width = nil)
32
51
  warn "Bio::Sequence#to_fasta is obsolete. Use Bio::Sequence#output(:fasta) instead" if $DEBUG
33
52
  ">#{header}\n" +
@@ -43,11 +62,29 @@ end # Common
43
62
 
44
63
  class NA
45
64
 
65
+ # Generate a new random sequence with the given frequency of bases.
66
+ # The sequence length is determined by their cumulative sum.
67
+ # (See also Bio::Sequence::Common#randomize which creates a new
68
+ # randomized sequence object using the base composition of an existing
69
+ # sequence instance).
70
+ #
71
+ # counts = {'a'=>1,'c'=>2,'g'=>3,'t'=>4}
72
+ # puts Bio::Sequence::NA.randomize(counts) #=> "ggcttgttac" (for example)
73
+ #
74
+ # You may also feed the output of randomize into a block
75
+ #
76
+ # actual_counts = {'a'=>0, 'c'=>0, 'g'=>0, 't'=>0}
77
+ # Bio::Sequence::NA.randomize(counts) {|x| actual_counts[x] += 1}
78
+ # actual_counts #=> {"a"=>1, "c"=>2, "g"=>3, "t"=>4}
79
+ # ---
80
+ # *Arguments*:
81
+ # * (optional) _hash_: Hash object
82
+ # *Returns*:: Bio::Sequence::NA object
46
83
  def self.randomize(*arg, &block)
47
84
  self.new('').randomize(*arg, &block)
48
85
  end
49
86
 
50
- def pikachu
87
+ def pikachu #:nodoc:
51
88
  self.dna.tr("atgc", "pika") # joke, of course :-)
52
89
  end
53
90
 
@@ -56,6 +93,24 @@ end # NA
56
93
 
57
94
  class AA
58
95
 
96
+ # Generate a new random sequence with the given frequency of bases.
97
+ # The sequence length is determined by their cumulative sum.
98
+ # (See also Bio::Sequence::Common#randomize which creates a new
99
+ # randomized sequence object using the base composition of an existing
100
+ # sequence instance).
101
+ #
102
+ # counts = {'R'=>1,'L'=>2,'E'=>3,'A'=>4}
103
+ # puts Bio::Sequence::AA.randomize(counts) #=> "AAEAELALRE" (for example)
104
+ #
105
+ # You may also feed the output of randomize into a block
106
+ #
107
+ # actual_counts = {'R'=>0,'L'=>0,'E'=>0,'A'=>0}
108
+ # Bio::Sequence::AA.randomize(counts) {|x| actual_counts[x] += 1}
109
+ # actual_counts #=> {"A"=>4, "L"=>2, "E"=>3, "R"=>1}
110
+ # ---
111
+ # *Arguments*:
112
+ # * (optional) _hash_: Hash object
113
+ # *Returns*:: Bio::Sequence::AA object
59
114
  def self.randomize(*arg, &block)
60
115
  self.new('').randomize(*arg, &block)
61
116
  end
@@ -3,14 +3,15 @@
3
3
  #
4
4
  # Copyright:: Copyright (C) 2006
5
5
  # Toshiaki Katayama <k@bioruby.org>,
6
- # Naohisa Goto <ng@bioruby.org>
7
- # License:: Ruby's
6
+ # Naohisa Goto <ng@bioruby.org>,
7
+ # Ryan Raaum <ryan@raaum.org>
8
+ # License:: The Ruby License
8
9
  #
9
10
  # = TODO
10
11
  #
11
12
  # porting from N. Goto's feature-output.rb on BioRuby list.
12
13
  #
13
- # $Id: format.rb,v 1.2 2006/02/06 14:20:35 k Exp $
14
+ # $Id: format.rb,v 1.4 2007/04/05 23:35:41 trevor Exp $
14
15
  #
15
16
 
16
17
 
@@ -20,11 +21,37 @@ module Bio
20
21
 
21
22
  class Sequence
22
23
 
24
+ # = DESCRIPTION
25
+ # A Mixin[http://www.rubycentral.com/book/tut_modules.html]
26
+ # of methods used by Bio::Sequence#output to output sequences in
27
+ # common bioinformatic formats. These are not called in isolation.
28
+ #
29
+ # = USAGE
30
+ # # Given a Bio::Sequence object,
31
+ # puts s.output(:fasta)
32
+ # puts s.output(:genbank)
33
+ # puts s.output(:embl)
23
34
  module Format
24
35
 
25
- # Output the FASTA format string of the sequence. The 1st argument is
26
- # used in the comment line. If the 2nd argument (integer) is given,
27
- # the output sequence will be folded.
36
+ # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
37
+ # case, it would be difficult to successfully call this method outside
38
+ # its expected context).
39
+ #
40
+ # Output the FASTA format string of the sequence.
41
+ #
42
+ # UNFORTUNATLY, the current implementation of Bio::Sequence is incapable of
43
+ # using either the header or width arguments. So something needs to be
44
+ # changed...
45
+ #
46
+ # Currently, this method is used in Bio::Sequence#output like so,
47
+ #
48
+ # s = Bio::Sequence.new('atgc')
49
+ # puts s.output(:fasta) #=> "> \natgc\n"
50
+ # ---
51
+ # *Arguments*:
52
+ # * (optional) _header_: String (default nil)
53
+ # * (optional) _width_: Fixnum (default nil)
54
+ # *Returns*:: String object
28
55
  def format_fasta(header = nil, width = nil)
29
56
  header ||= "#{@entry_id} #{@definition}"
30
57
 
@@ -36,10 +63,22 @@ module Format
36
63
  end
37
64
  end
38
65
 
39
- def format_gff
66
+ # Not yet implemented :)
67
+ # Remove the nodoc command after implementation!
68
+ # ---
69
+ # *Returns*:: String object
70
+ def format_gff #:nodoc:
40
71
  raise NotImplementedError
41
72
  end
42
73
 
74
+ # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
75
+ # case, it would be difficult to successfully call this method outside
76
+ # its expected context).
77
+ #
78
+ # Output the Genbank format string of the sequence.
79
+ # Used in Bio::Sequence#output.
80
+ # ---
81
+ # *Returns*:: String object
43
82
  def format_genbank
44
83
  prefix = ' ' * 5
45
84
  indent = prefix + ' ' * 16
@@ -48,6 +87,14 @@ module Format
48
87
  format_features(prefix, indent, fwidth)
49
88
  end
50
89
 
90
+ # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
91
+ # case, it would be difficult to successfully call this method outside
92
+ # its expected context).
93
+ #
94
+ # Output the EMBL format string of the sequence.
95
+ # Used in Bio::Sequence#output.
96
+ # ---
97
+ # *Returns*:: String object
51
98
  def format_embl
52
99
  prefix = 'FT '
53
100
  indent = prefix + ' ' * 16