bio 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -3,9 +3,9 @@
3
3
  #
4
4
  # Copyright:: Copyright (C) 2006
5
5
  # Toshiaki Katayama <k@bioruby.org>
6
- # License:: Ruby's
6
+ # License:: The Ruby License
7
7
  #
8
- # $Id: generic.rb,v 1.3 2006/02/06 14:26:04 k Exp $
8
+ # $Id: generic.rb,v 1.5 2007/04/05 23:35:41 trevor Exp $
9
9
  #
10
10
 
11
11
  require 'bio/sequence/common'
@@ -13,7 +13,7 @@ require 'bio/sequence/common'
13
13
  module Bio
14
14
  class Sequence
15
15
 
16
- class Generic < String
16
+ class Generic < String #:nodoc:
17
17
 
18
18
  include Bio::Sequence::Common
19
19
 
@@ -2,10 +2,11 @@
2
2
  # = bio/sequence/na.rb - nucleic acid sequence class
3
3
  #
4
4
  # Copyright:: Copyright (C) 2006
5
- # Toshiaki Katayama <k@bioruby.org>
6
- # License:: Ruby's
5
+ # Toshiaki Katayama <k@bioruby.org>,
6
+ # Ryan Raaum <ryan@raaum.org>
7
+ # License:: The Ruby License
7
8
  #
8
- # $Id: na.rb,v 1.2 2006/02/06 14:13:52 k Exp $
9
+ # $Id: na.rb,v 1.7 2007/04/23 16:43:51 trevor Exp $
9
10
  #
10
11
 
11
12
  require 'bio/sequence/common'
@@ -18,20 +19,69 @@ module Bio
18
19
  class Sequence
19
20
 
20
21
 
21
- # Nucleic Acid sequence
22
+ # = DESCRIPTION
23
+ # Bio::Sequence::NA represents a bare Nucleic Acid sequence in bioruby.
24
+ #
25
+ # = USAGE
26
+ # # Create a Nucleic Acid sequence.
27
+ # dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA')
28
+ # rna = Bio::Sequence.auto('augcaugcaugcaugcaaaa')
29
+ #
30
+ # # What are the names of all the bases?
31
+ # puts dna.names
32
+ # puts rna.names
33
+ #
34
+ # # What is the GC percentage?
35
+ # puts dna.gc_percent
36
+ # puts rna.gc_percent
37
+ #
38
+ # # What is the molecular weight?
39
+ # puts dna.molecular_weight
40
+ # puts rna.molecular_weight
41
+ #
42
+ # # What is the reverse complement?
43
+ # puts dna.reverse_complement
44
+ # puts dna.complement
45
+ #
46
+ # # Is this sequence DNA or RNA?
47
+ # puts dna.rna?
48
+ #
49
+ # # Translate my sequence (see method docs for many options)
50
+ # puts dna.translate
51
+ # puts rna.translate
22
52
  class NA < String
23
53
 
24
54
  include Bio::Sequence::Common
25
55
 
26
- # Generate a nucleic acid sequence object from a string.
56
+ # Generate an nucleic acid sequence object from a string.
57
+ #
58
+ # s = Bio::Sequence::NA.new("aagcttggaccgttgaagt")
59
+ #
60
+ # or maybe (if you have an nucleic acid sequence in a file)
61
+ #
62
+ # s = Bio::Sequence:NA.new(File.open('dna.txt').read)
63
+ #
64
+ # Nucleic Acid sequences are *always* all lowercase in bioruby
65
+ #
66
+ # s = Bio::Sequence::NA.new("AAGcTtGG")
67
+ # puts s #=> "aagcttgg"
68
+ #
69
+ # Whitespace is stripped from the sequence
70
+ #
71
+ # seq = Bio::Sequence::NA.new("atg\nggg\ttt\r gc")
72
+ # puts s #=> "atggggttgc"
73
+ # ---
74
+ # *Arguments*:
75
+ # * (required) _str_: String
76
+ # *Returns*:: Bio::Sequence::NA object
27
77
  def initialize(str)
28
78
  super
29
79
  self.downcase!
30
80
  self.tr!(" \t\n\r",'')
31
81
  end
32
82
 
33
- # This method depends on Locations class, see bio/location.rb
34
- def splicing(position)
83
+ # Alias of Bio::Sequence::Common splice method, documented there.
84
+ def splicing(position) #:nodoc:
35
85
  mRNA = super
36
86
  if mRNA.rna?
37
87
  mRNA.tr!('t', 'u')
@@ -41,14 +91,28 @@ class NA < String
41
91
  mRNA
42
92
  end
43
93
 
44
- # Returns complement sequence without reversing ("atgc" -> "tacg")
94
+ # Returns a new complementary sequence object (without reversing).
95
+ # The original sequence object is not modified.
96
+ #
97
+ # s = Bio::Sequence::NA.new('atgc')
98
+ # puts s.forward_complement #=> 'tacg'
99
+ # puts s #=> 'atgc'
100
+ # ---
101
+ # *Returns*:: new Bio::Sequence::NA object
45
102
  def forward_complement
46
103
  s = self.class.new(self)
47
104
  s.forward_complement!
48
105
  s
49
106
  end
50
107
 
51
- # Convert to complement sequence without reversing ("atgc" -> "tacg")
108
+ # Converts the current sequence into its complement (without reversing).
109
+ # The original sequence object is modified.
110
+ #
111
+ # seq = Bio::Sequence::NA.new('atgc')
112
+ # puts s.forward_complement! #=> 'tacg'
113
+ # puts s #=> 'tacg'
114
+ # ---
115
+ # *Returns*:: current Bio::Sequence::NA object (modified)
52
116
  def forward_complement!
53
117
  if self.rna?
54
118
  self.tr!('augcrymkdhvbswn', 'uacgyrkmhdbvswn')
@@ -58,31 +122,115 @@ class NA < String
58
122
  self
59
123
  end
60
124
 
61
- # Returns reverse complement sequence ("atgc" -> "gcat")
125
+ # Returns a new sequence object with the reverse complement
126
+ # sequence to the original. The original sequence is not modified.
127
+ #
128
+ # s = Bio::Sequence::NA.new('atgc')
129
+ # puts s.reverse_complement #=> 'gcat'
130
+ # puts s #=> 'atgc'
131
+ # ---
132
+ # *Returns*:: new Bio::Sequence::NA object
62
133
  def reverse_complement
63
134
  s = self.class.new(self)
64
135
  s.reverse_complement!
65
136
  s
66
137
  end
67
138
 
68
- # Convert to reverse complement sequence ("atgc" -> "gcat")
139
+ # Converts the original sequence into its reverse complement.
140
+ # The original sequence is modified.
141
+ #
142
+ # s = Bio::Sequence::NA.new('atgc')
143
+ # puts s.reverse_complement #=> 'gcat'
144
+ # puts s #=> 'gcat'
145
+ # ---
146
+ # *Returns*:: current Bio::Sequence::NA object (modified)
69
147
  def reverse_complement!
70
148
  self.reverse!
71
149
  self.forward_complement!
72
150
  end
73
151
 
74
- # Aliases for short
152
+ # Alias for Bio::Sequence::NA#reverse_complement
75
153
  alias complement reverse_complement
154
+
155
+ # Alias for Bio::Sequence::NA#reverse_complement!
76
156
  alias complement! reverse_complement!
77
157
 
78
158
 
79
- # Translate into the amino acid sequence from the given frame and the
80
- # selected codon table. The table also can be a Bio::CodonTable object.
81
- # The 'unknown' character is used for invalid/unknown codon (can be
82
- # used for 'nnn' and/or gap translation in practice).
159
+ # Translate into an amino acid sequence.
160
+ #
161
+ # s = Bio::Sequence::NA.new('atggcgtga')
162
+ # puts s.translate #=> "MA*"
83
163
  #
84
- # Frame can be 1, 2 or 3 for the forward strand and -1, -2 or -3
85
- # (4, 5 or 6 is also accepted) for the reverse strand.
164
+ # By default, translate starts in reading frame position 1, but you
165
+ # can start in either 2 or 3 as well,
166
+ #
167
+ # puts s.translate(2) #=> "WR"
168
+ # puts s.translate(3) #=> "GV"
169
+ #
170
+ # You may also translate the reverse complement in one step by using frame
171
+ # values of -1, -2, and -3 (or 4, 5, and 6)
172
+ #
173
+ # puts s.translate(-1) #=> "SRH"
174
+ # puts s.translate(4) #=> "SRH"
175
+ # puts s.reverse_complement.translate(1) #=> "SRH"
176
+ #
177
+ # The default codon table in the translate function is the Standard
178
+ # Eukaryotic codon table. The translate function takes either a
179
+ # number or a Bio::CodonTable object for its table argument.
180
+ # The available tables are
181
+ # (NCBI[http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t]):
182
+ #
183
+ # 1. "Standard (Eukaryote)"
184
+ # 2. "Vertebrate Mitochondrial"
185
+ # 3. "Yeast Mitochondorial"
186
+ # 4. "Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma"
187
+ # 5. "Invertebrate Mitochondrial"
188
+ # 6. "Ciliate Macronuclear and Dasycladacean"
189
+ # 9. "Echinoderm Mitochondrial"
190
+ # 10. "Euplotid Nuclear"
191
+ # 11. "Bacteria"
192
+ # 12. "Alternative Yeast Nuclear"
193
+ # 13. "Ascidian Mitochondrial"
194
+ # 14. "Flatworm Mitochondrial"
195
+ # 15. "Blepharisma Macronuclear"
196
+ # 16. "Chlorophycean Mitochondrial"
197
+ # 21. "Trematode Mitochondrial"
198
+ # 22. "Scenedesmus obliquus mitochondrial"
199
+ # 23. "Thraustochytrium Mitochondrial"
200
+ #
201
+ # If you are using anything other than the default table, you must specify
202
+ # frame in the translate method call,
203
+ #
204
+ # puts s.translate #=> "MA*" (using defaults)
205
+ # puts s.translate(1,1) #=> "MA*" (same as above, but explicit)
206
+ # puts s.translate(1,2) #=> "MAW" (different codon table)
207
+ #
208
+ # and using a Bio::CodonTable instance in the translate method call,
209
+ #
210
+ # mt_table = Bio::CodonTable[2]
211
+ # puts s.translate(1, mt_table) #=> "MAW"
212
+ #
213
+ # By default, any invalid or unknown codons (as could happen if the
214
+ # sequence contains ambiguities) will be represented by 'X' in the
215
+ # translated sequence.
216
+ # You may change this to any character of your choice.
217
+ #
218
+ # s = Bio::Sequence::NA.new('atgcNNtga')
219
+ # puts s.translate #=> "MX*"
220
+ # puts s.translate(1,1,'9') #=> "M9*"
221
+ #
222
+ # The translate method considers gaps to be unknown characters and treats
223
+ # them as such (i.e. does not collapse sequences prior to translation), so
224
+ #
225
+ # s = Bio::Sequence::NA.new('atgc--tga')
226
+ # puts s.translate #=> "MX*"
227
+ # ---
228
+ # *Arguments*:
229
+ # * (optional) _frame_: one of 1,2,3,4,5,6,-1,-2,-3 (default 1)
230
+ # * (optional) _table_: Fixnum in range 1,23 or Bio::CodonTable object
231
+ # (default 1)
232
+ # * (optional) _unknown_: Character (default 'X')
233
+ # *Returns*:: Bio::Sequence::AA object
86
234
  def translate(frame = 1, table = 1, unknown = 'X')
87
235
  if table.is_a?(Bio::CodonTable)
88
236
  ct = table
@@ -108,7 +256,22 @@ class NA < String
108
256
  return Bio::Sequence::AA.new(aaseq)
109
257
  end
110
258
 
111
- # Returns counts of the each codon in the sequence by Hash.
259
+ # Returns counts of each codon in the sequence in a hash.
260
+ #
261
+ # s = Bio::Sequence::NA.new('atggcgtga')
262
+ # puts s.codon_usage #=> {"gcg"=>1, "tga"=>1, "atg"=>1}
263
+ #
264
+ # This method does not validate codons! Any three letter group is a 'codon'. So,
265
+ #
266
+ # s = Bio::Sequence::NA.new('atggNNtga')
267
+ # puts s.codon_usage #=> {"tga"=>1, "gnn"=>1, "atg"=>1}
268
+ #
269
+ # seq = Bio::Sequence::NA.new('atgg--tga')
270
+ # puts s.codon_usage #=> {"tga"=>1, "g--"=>1, "atg"=>1}
271
+ #
272
+ # Also, there is no option to work in any frame other than the first.
273
+ # ---
274
+ # *Returns*:: Hash object
112
275
  def codon_usage
113
276
  hash = Hash.new(0)
114
277
  self.window_search(3, 3) do |codon|
@@ -117,21 +280,101 @@ class NA < String
117
280
  return hash
118
281
  end
119
282
 
120
- # Calculate the ratio of GC / ATGC bases in percent.
283
+ # Calculate the ratio of GC / ATGC bases as a percentage rounded to
284
+ # the nearest whole number. U is regarded as T.
285
+ #
286
+ # s = Bio::Sequence::NA.new('atggcgtga')
287
+ # puts s.gc_percent #=> 55
288
+ # ---
289
+ # *Returns*:: Fixnum
121
290
  def gc_percent
122
291
  count = self.composition
123
292
  at = count['a'] + count['t'] + count['u']
124
293
  gc = count['g'] + count['c']
294
+ return 0 if at + gc == 0
125
295
  gc = 100 * gc / (at + gc)
126
296
  return gc
127
297
  end
128
298
 
129
- # Show abnormal bases other than 'atgcu'.
299
+ # Calculate the ratio of GC / ATGC bases. U is regarded as T.
300
+ #
301
+ # s = Bio::Sequence::NA.new('atggcgtga')
302
+ # puts s.gc_content #=> 0.555555555555556
303
+ # ---
304
+ # *Returns*:: Float
305
+ def gc_content
306
+ count = self.composition
307
+ at = count['a'] + count['t'] + count['u']
308
+ gc = count['g'] + count['c']
309
+ return 0.0 if at + gc == 0
310
+ return gc.quo(at + gc)
311
+ end
312
+
313
+ # Calculate the ratio of AT / ATGC bases. U is regarded as T.
314
+ #
315
+ # s = Bio::Sequence::NA.new('atggcgtga')
316
+ # puts s.at_content #=> 0.444444444444444
317
+ # ---
318
+ # *Returns*:: Float
319
+ def at_content
320
+ count = self.composition
321
+ at = count['a'] + count['t'] + count['u']
322
+ gc = count['g'] + count['c']
323
+ return 0.0 if at + gc == 0
324
+ return at.quo(at + gc)
325
+ end
326
+
327
+ # Calculate the ratio of (G - C) / (G + C) bases.
328
+ #
329
+ # s = Bio::Sequence::NA.new('atggcgtga')
330
+ # puts s.gc_skew #=> 0.6
331
+ # ---
332
+ # *Returns*:: Float
333
+ def gc_skew
334
+ count = self.composition
335
+ g = count['g']
336
+ c = count['c']
337
+ return 0.0 if g + c == 0
338
+ return (g - c).quo(g + c)
339
+ end
340
+
341
+ # Calculate the ratio of (A - T) / (A + T) bases. U is regarded as T.
342
+ #
343
+ # s = Bio::Sequence::NA.new('atgttgttgttc')
344
+ # puts s.at_skew #=> -0.75
345
+ # ---
346
+ # *Returns*:: Float
347
+ def at_skew
348
+ count = self.composition
349
+ a = count['a']
350
+ t = count['t'] + count['u']
351
+ return 0.0 if a + t == 0
352
+ return (a - t).quo(a + t)
353
+ end
354
+
355
+ # Returns an alphabetically sorted array of any non-standard bases
356
+ # (other than 'atgcu').
357
+ #
358
+ # s = Bio::Sequence::NA.new('atgStgQccR')
359
+ # puts s.illegal_bases #=> ["q", "r", "s"]
360
+ # ---
361
+ # *Returns*:: Array object
130
362
  def illegal_bases
131
363
  self.scan(/[^atgcu]/).sort.uniq
132
364
  end
133
365
 
134
- # Estimate the weight of this biological string molecule.
366
+ # Estimate molecular weight (using the values from BioPerl's
367
+ # SeqStats.pm[http://doc.bioperl.org/releases/bioperl-1.0.1/Bio/Tools/SeqStats.html] module).
368
+ #
369
+ # s = Bio::Sequence::NA.new('atggcgtga')
370
+ # puts s.molecular_weight #=> 2841.00708
371
+ #
372
+ # RNA and DNA do not have the same molecular weights,
373
+ #
374
+ # s = Bio::Sequence::NA.new('auggcguga')
375
+ # puts s.molecular_weight #=> 2956.94708
376
+ # ---
377
+ # *Returns*:: Float object
135
378
  def molecular_weight
136
379
  if self.rna?
137
380
  Bio::NucleicAcid.weight(self, true)
@@ -140,7 +383,13 @@ class NA < String
140
383
  end
141
384
  end
142
385
 
143
- # Convert the universal code string into the regular expression.
386
+ # Create a ruby regular expression instance
387
+ # (Regexp)[http://corelib.rubyonrails.org/classes/Regexp.html]
388
+ #
389
+ # s = Bio::Sequence::NA.new('atggcgtga')
390
+ # puts s.to_re #=> /atggcgtga/
391
+ # ---
392
+ # *Returns*:: Regexp object
144
393
  def to_re
145
394
  if self.rna?
146
395
  Bio::NucleicAcid.to_re(self.dna, true)
@@ -149,7 +398,14 @@ class NA < String
149
398
  end
150
399
  end
151
400
 
152
- # Convert the self string into the list of the names of the each base.
401
+ # Generate the list of the names of each nucleotide along with the
402
+ # sequence (full name). Names used in bioruby are found in the
403
+ # Bio::AminoAcid::NAMES hash.
404
+ #
405
+ # s = Bio::Sequence::NA.new('atg')
406
+ # puts s.names #=> ["Adenine", "Thymine", "Guanine"]
407
+ # ---
408
+ # *Returns*:: Array object
153
409
  def names
154
410
  array = []
155
411
  self.each_byte do |x|
@@ -158,20 +414,50 @@ class NA < String
158
414
  return array
159
415
  end
160
416
 
161
- # Output a DNA string by substituting 'u' to 't'.
417
+ # Returns a new sequence object with any 'u' bases changed to 't'.
418
+ # The original sequence is not modified.
419
+ #
420
+ # s = Bio::Sequence::NA.new('augc')
421
+ # puts s.dna #=> 'atgc'
422
+ # puts s #=> 'augc'
423
+ # ---
424
+ # *Returns*:: new Bio::Sequence::NA object
162
425
  def dna
163
426
  self.tr('u', 't')
164
427
  end
165
428
 
429
+ # Changes any 'u' bases in the original sequence to 't'.
430
+ # The original sequence is modified.
431
+ #
432
+ # s = Bio::Sequence::NA.new('augc')
433
+ # puts s.dna! #=> 'atgc'
434
+ # puts s #=> 'atgc'
435
+ # ---
436
+ # *Returns*:: current Bio::Sequence::NA object (modified)
166
437
  def dna!
167
438
  self.tr!('u', 't')
168
439
  end
169
440
 
170
- # Output a RNA string by substituting 't' to 'u'.
441
+ # Returns a new sequence object with any 't' bases changed to 'u'.
442
+ # The original sequence is not modified.
443
+ #
444
+ # s = Bio::Sequence::NA.new('atgc')
445
+ # puts s.dna #=> 'augc'
446
+ # puts s #=> 'atgc'
447
+ # ---
448
+ # *Returns*:: new Bio::Sequence::NA object
171
449
  def rna
172
450
  self.tr('t', 'u')
173
451
  end
174
452
 
453
+ # Changes any 't' bases in the original sequence to 'u'.
454
+ # The original sequence is modified.
455
+ #
456
+ # s = Bio::Sequence::NA.new('atgc')
457
+ # puts s.dna! #=> 'augc'
458
+ # puts s #=> 'augc'
459
+ # ---
460
+ # *Returns*:: current Bio::Sequence::NA object (modified)
175
461
  def rna!
176
462
  self.tr!('t', 'u')
177
463
  end
@@ -181,6 +467,22 @@ class NA < String
181
467
  end
182
468
  protected :rna?
183
469
 
470
+ # Example:
471
+ #
472
+ # seq = Bio::Sequence::NA.new('gaattc')
473
+ # cuts = seq.cut_with_enzyme('EcoRI')
474
+ #
475
+ # _or_
476
+ #
477
+ # seq = Bio::Sequence::NA.new('gaattc')
478
+ # cuts = seq.cut_with_enzyme('g^aattc')
479
+ # ---
480
+ # See Bio::RestrictionEnzyme::Analysis.cut
481
+ def cut_with_enzyme(*args)
482
+ Bio::RestrictionEnzyme::Analysis.cut(self, *args)
483
+ end
484
+ alias cut_with_enzymes cut_with_enzyme
485
+
184
486
  end # NA
185
487
 
186
488
  end # Sequence