bio 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -4,16 +4,65 @@
4
4
  # Copyright:: Copyright (C) 2000-2006
5
5
  # Toshiaki Katayama <k@bioruby.org>,
6
6
  # Yoshinori K. Okuji <okuji@enbug.org>,
7
- # Naohisa Goto <ng@bioruby.org>
8
- # License:: Ruby's
7
+ # Naohisa Goto <ng@bioruby.org>,
8
+ # Ryan Raaum <ryan@raaum.org>,
9
+ # Jan Aerts <jan.aerts@bbsrc.ac.uk>
10
+ # License:: The Ruby License
9
11
  #
10
- # $Id: sequence.rb,v 0.56 2006/02/17 17:15:08 k Exp $
12
+ # $Id: sequence.rb,v 0.58 2007/04/05 23:35:39 trevor Exp $
11
13
  #
12
14
 
13
15
  require 'bio/sequence/compat'
14
16
 
15
17
  module Bio
16
18
 
19
+ # = DESCRIPTION
20
+ # Bio::Sequence objects represent annotated sequences in bioruby.
21
+ # A Bio::Sequence object is a wrapper around the actual sequence,
22
+ # represented as either a Bio::Sequence::NA or a Bio::Sequence::AA object.
23
+ # For most users, this encapsulation will be completely transparent.
24
+ # Bio::Sequence responds to all methods defined for Bio::Sequence::NA/AA
25
+ # objects using the same arguments and returning the same values (even though
26
+ # these methods are not documented specifically for Bio::Sequence).
27
+ #
28
+ # = USAGE
29
+ # # Create a nucleic or amino acid sequence
30
+ # dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA')
31
+ # rna = Bio::Sequence.auto('augcaugcaugcaugcaaaa')
32
+ # aa = Bio::Sequence.auto('ACDEFGHIKLMNPQRSTVWYU')
33
+ #
34
+ # # Print it out
35
+ # puts dna.to_s
36
+ # puts aa.to_s
37
+ #
38
+ # # Get a subsequence, bioinformatics style (first nucleotide is '1')
39
+ # puts dna.subseq(2,6)
40
+ #
41
+ # # Get a subsequence, informatics style (first nucleotide is '0')
42
+ # puts dna[2,6]
43
+ #
44
+ # # Print in FASTA format
45
+ # puts dna.output(:fasta)
46
+ #
47
+ # # Print all codons
48
+ # dna.window_search(3,3) do |codon|
49
+ # puts codon
50
+ # end
51
+ #
52
+ # # Splice or otherwise mangle your sequence
53
+ # puts dna.splicing("complement(join(1..5,16..20))")
54
+ # puts rna.splicing("complement(join(1..5,16..20))")
55
+ #
56
+ # # Convert a sequence containing ambiguity codes into a
57
+ # # regular expression you can use for subsequent searching
58
+ # puts aa.to_re
59
+ #
60
+ # # These should speak for themselves
61
+ # puts dna.complement
62
+ # puts dna.composition
63
+ # puts dna.molecular_weight
64
+ # puts dna.translate
65
+ # puts dna.gc_percent
17
66
  class Sequence
18
67
 
19
68
  autoload :Common, 'bio/sequence/common'
@@ -22,17 +71,82 @@ class Sequence
22
71
  autoload :Generic, 'bio/sequence/generic'
23
72
  autoload :Format, 'bio/sequence/format'
24
73
 
74
+ # Create a new Bio::Sequence object
75
+ #
76
+ # s = Bio::Sequence.new('atgc')
77
+ # puts s #=> 'atgc'
78
+ #
79
+ # Note that this method does not intialize the contained sequence
80
+ # as any kind of bioruby object, only as a simple string
81
+ #
82
+ # puts s.seq.class #=> String
83
+ #
84
+ # See Bio::Sequence#na, Bio::Sequence#aa, and Bio::Sequence#auto
85
+ # for methods to transform the basic String of a just created
86
+ # Bio::Sequence object to a proper bioruby object
87
+ # ---
88
+ # *Arguments*:
89
+ # * (required) _str_: String or Bio::Sequence::NA/AA object
90
+ # *Returns*:: Bio::Sequence object
25
91
  def initialize(str)
26
92
  @seq = str
27
93
  end
28
94
 
29
- def method_missing(*arg)
30
- @seq.send(*arg)
95
+ # Pass any unknown method calls to the wrapped sequence object. see
96
+ # http://www.rubycentral.com/book/ref_c_object.html#Object.method_missing
97
+ def method_missing(sym, *args, &block) #:nodoc:
98
+ @seq.send(sym, *args, &block)
31
99
  end
32
-
33
- attr_accessor :entry_id, :definition, :features, :references, :comments,
34
- :date, :keywords, :dblinks, :taxonomy, :moltype, :seq
35
-
100
+
101
+ # The sequence identifier. For example, for a sequence
102
+ # of Genbank origin, this is the accession number.
103
+ attr_accessor :entry_id
104
+
105
+ # A String with a description of the sequence
106
+ attr_accessor :definition
107
+
108
+ # An Array of Bio::Feature objects
109
+ attr_accessor :features
110
+
111
+ # An Array of Bio::Reference objects
112
+ attr_accessor :references
113
+
114
+ # A comment String
115
+ attr_accessor :comments
116
+
117
+ # Date from sequence source. Often date of deposition.
118
+ attr_accessor :date
119
+
120
+ # An Array of Strings
121
+ attr_accessor :keywords
122
+
123
+ # An Array of Strings; links to other database entries.
124
+ attr_accessor :dblinks
125
+
126
+ # A taxonomy String
127
+ attr_accessor :taxonomy
128
+
129
+ # Bio::Sequence::NA/AA
130
+ attr_accessor :moltype
131
+
132
+ # The sequence object, usually Bio::Sequence::NA/AA,
133
+ # but could be a simple String
134
+ attr_accessor :seq
135
+
136
+ # Using Bio::Sequence::Format, return a String with the Bio::Sequence
137
+ # object formatted in the given style.
138
+ #
139
+ # Formats currently implemented are: 'fasta', 'genbank', and 'embl'
140
+ #
141
+ # s = Bio::Sequence.new('atgc')
142
+ # puts s.output(:fasta) #=> "> \natgc\n"
143
+ #
144
+ # The style argument is given as a Ruby
145
+ # Symbol(http://www.ruby-doc.org/core/classes/Symbol.html)
146
+ # ---
147
+ # *Arguments*:
148
+ # * (required) _style_: :fasta, :genbank, *or* :embl
149
+ # *Returns*:: String object
36
150
  def output(style)
37
151
  extend Bio::Sequence::Format
38
152
  case style
@@ -47,6 +161,16 @@ class Sequence
47
161
  end
48
162
  end
49
163
 
164
+ # Guess the type of sequence, Amino Acid or Nucleic Acid, and create a
165
+ # new sequence object (Bio::Sequence::AA or Bio::Sequence::NA) on the basis
166
+ # of this guess. This method will change the current Bio::Sequence object.
167
+ #
168
+ # s = Bio::Sequence.new('atgc')
169
+ # puts s.seq.class #=> String
170
+ # s.auto
171
+ # puts s.seq.class #=> Bio::Sequence::NA
172
+ # ---
173
+ # *Returns*:: Bio::Sequence::NA/AA object
50
174
  def auto
51
175
  @moltype = guess
52
176
  if @moltype == NA
@@ -56,20 +180,69 @@ class Sequence
56
180
  end
57
181
  end
58
182
 
183
+ # Given a sequence String, guess its type, Amino Acid or Nucleic Acid, and
184
+ # return a new Bio::Sequence object wrapping a sequence of the guessed type
185
+ # (either Bio::Sequence::AA or Bio::Sequence::NA)
186
+ #
187
+ # s = Bio::Sequence.auto('atgc')
188
+ # puts s.seq.class #=> Bio::Sequence::NA
189
+ # ---
190
+ # *Arguments*:
191
+ # * (required) _str_: String *or* Bio::Sequence::NA/AA object
192
+ # *Returns*:: Bio::Sequence object
59
193
  def self.auto(str)
60
194
  seq = self.new(str)
61
195
  seq.auto
62
196
  return seq
63
197
  end
64
198
 
199
+ # Guess the class of the current sequence. Returns the class
200
+ # (Bio::Sequence::AA or Bio::Sequence::NA) guessed. In general, used by
201
+ # developers only, but if you know what you are doing, feel free.
202
+ #
203
+ # s = Bio::Sequence.new('atgc')
204
+ # puts s.guess #=> Bio::Sequence::NA
205
+ #
206
+ # There are three parameters: `threshold`, `length`, and `index`.
207
+ #
208
+ # The `threshold` value (defaults to 0.9) is the frequency of
209
+ # nucleic acid bases [AGCTUagctu] required in the sequence for this method
210
+ # to produce a Bio::Sequence::NA "guess". In the default case, if less
211
+ # than 90% of the bases (after excluding [Nn]) are in the set [AGCTUagctu],
212
+ # then the guess is Bio::Sequence::AA.
213
+ #
214
+ # s = Bio::Sequence.new('atgcatgcqq')
215
+ # puts s.guess #=> Bio::Sequence::AA
216
+ # puts s.guess(0.8) #=> Bio::Sequence::AA
217
+ # puts s.guess(0.7) #=> Bio::Sequence::NA
218
+ #
219
+ # The `length` value is how much of the total sequence to use in the
220
+ # guess (default 10000). If your sequence is very long, you may
221
+ # want to use a smaller amount to reduce the computational burden.
222
+ #
223
+ # s = Bio::Sequence.new(A VERY LONG SEQUENCE)
224
+ # puts s.guess(0.9, 1000) # limit the guess to the first 1000 positions
225
+ #
226
+ # The `index` value is where to start the guess. Perhaps you know there
227
+ # are a lot of gaps at the start...
228
+ #
229
+ # s = Bio::Sequence.new('-----atgcc')
230
+ # puts s.guess #=> Bio::Sequence::AA
231
+ # puts s.guess(0.9,10000,5) #=> Bio::Sequence::NA
232
+ # ---
233
+ # *Arguments*:
234
+ # * (optional) _threshold_: Float in range 0,1 (default 0.9)
235
+ # * (optional) _length_: Fixnum (default 10000)
236
+ # * (optional) _index_: Fixnum (default 1)
237
+ # *Returns*:: Bio::Sequence::NA/AA
65
238
  def guess(threshold = 0.9, length = 10000, index = 0)
66
239
  str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
67
240
  cmp = str.composition
68
241
 
69
- bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] +
70
- cmp['a'] + cmp['t'] + cmp['g'] + cmp['c']
242
+ bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] + cmp['U'] +
243
+ cmp['a'] + cmp['t'] + cmp['g'] + cmp['c'] + cmp['u']
71
244
 
72
- total = @seq.length - cmp['N'] - cmp['n']
245
+ total = str.length - cmp['N'] - cmp['n']
73
246
 
74
247
  if bases.to_f / total > threshold
75
248
  return NA
@@ -78,20 +251,89 @@ class Sequence
78
251
  end
79
252
  end
80
253
 
254
+ # Guess the class of a given sequence. Returns the class
255
+ # (Bio::Sequence::AA or Bio::Sequence::NA) guessed. In general, used by
256
+ # developers only, but if you know what you are doing, feel free.
257
+ #
258
+ # puts .guess('atgc') #=> Bio::Sequence::NA
259
+ #
260
+ # There are three optional parameters: `threshold`, `length`, and `index`.
261
+ #
262
+ # The `threshold` value (defaults to 0.9) is the frequency of
263
+ # nucleic acid bases [AGCTUagctu] required in the sequence for this method
264
+ # to produce a Bio::Sequence::NA "guess". In the default case, if less
265
+ # than 90% of the bases (after excluding [Nn]) are in the set [AGCTUagctu],
266
+ # then the guess is Bio::Sequence::AA.
267
+ #
268
+ # puts Bio::Sequence.guess('atgcatgcqq') #=> Bio::Sequence::AA
269
+ # puts Bio::Sequence.guess('atgcatgcqq', 0.8) #=> Bio::Sequence::AA
270
+ # puts Bio::Sequence.guess('atgcatgcqq', 0.7) #=> Bio::Sequence::NA
271
+ #
272
+ # The `length` value is how much of the total sequence to use in the
273
+ # guess (default 10000). If your sequence is very long, you may
274
+ # want to use a smaller amount to reduce the computational burden.
275
+ #
276
+ # # limit the guess to the first 1000 positions
277
+ # puts Bio::Sequence.guess('A VERY LONG SEQUENCE', 0.9, 1000)
278
+ #
279
+ # The `index` value is where to start the guess. Perhaps you know there
280
+ # are a lot of gaps at the start...
281
+ #
282
+ # puts Bio::Sequence.guess('-----atgcc') #=> Bio::Sequence::AA
283
+ # puts Bio::Sequence.guess('-----atgcc',0.9,10000,5) #=> Bio::Sequence::NA
284
+ # ---
285
+ # *Arguments*:
286
+ # * (required) _str_: String *or* Bio::Sequence::NA/AA object
287
+ # * (optional) _threshold_: Float in range 0,1 (default 0.9)
288
+ # * (optional) _length_: Fixnum (default 10000)
289
+ # * (optional) _index_: Fixnum (default 1)
290
+ # *Returns*:: Bio::Sequence::NA/AA
81
291
  def self.guess(str, *args)
82
292
  self.new(str).guess(*args)
83
293
  end
84
294
 
295
+ # Transform the sequence wrapped in the current Bio::Sequence object
296
+ # into a Bio::Sequence::NA object. This method will change the current
297
+ # object. This method does not validate your choice, so be careful!
298
+ #
299
+ # s = Bio::Sequence.new('RRLE')
300
+ # puts s.seq.class #=> String
301
+ # s.na
302
+ # puts s.seq.class #=> Bio::Sequence::NA !!!
303
+ #
304
+ # However, if you know your sequence type, this method may be
305
+ # constructively used after initialization,
306
+ #
307
+ # s = Bio::Sequence.new('atgc')
308
+ # s.na
309
+ # ---
310
+ # *Returns*:: Bio::Sequence::NA
85
311
  def na
86
312
  @seq = NA.new(@seq)
87
313
  @moltype = NA
88
314
  end
89
315
 
316
+ # Transform the sequence wrapped in the current Bio::Sequence object
317
+ # into a Bio::Sequence::NA object. This method will change the current
318
+ # object. This method does not validate your choice, so be careful!
319
+ #
320
+ # s = Bio::Sequence.new('atgc')
321
+ # puts s.seq.class #=> String
322
+ # s.aa
323
+ # puts s.seq.class #=> Bio::Sequence::AA !!!
324
+ #
325
+ # However, if you know your sequence type, this method may be
326
+ # constructively used after initialization,
327
+ #
328
+ # s = Bio::Sequence.new('RRLE')
329
+ # s.aa
330
+ # ---
331
+ # *Returns*:: Bio::Sequence::AA
90
332
  def aa
91
333
  @seq = AA.new(@seq)
92
334
  @moltype = AA
93
335
  end
94
-
336
+
95
337
  end # Sequence
96
338
 
97
339
 
@@ -2,10 +2,11 @@
2
2
  # = bio/sequence/aa.rb - amino acid sequence class
3
3
  #
4
4
  # Copyright:: Copyright (C) 2006
5
- # Toshiaki Katayama <k@bioruby.org>
6
- # License:: Ruby's
5
+ # Toshiaki Katayama <k@bioruby.org>,
6
+ # Ryan Raaum <ryan@raaum.org>
7
+ # License:: The Ruby License
7
8
  #
8
- # $Id: aa.rb,v 1.2 2006/02/06 14:11:31 k Exp $
9
+ # $Id: aa.rb,v 1.4 2007/04/05 23:35:41 trevor Exp $
9
10
  #
10
11
 
11
12
  require 'bio/sequence/common'
@@ -16,13 +17,46 @@ module Bio
16
17
 
17
18
  class Sequence
18
19
 
19
-
20
- # Amino Acid sequence
20
+ # = DESCRIPTION
21
+ # Bio::Sequence::AA represents a bare Amino Acid sequence in bioruby.
22
+ #
23
+ # = USAGE
24
+ # # Create an Amino Acid sequence.
25
+ # aa = Bio::Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU')
26
+ #
27
+ # # What are the three-letter codes for all the residues?
28
+ # puts aa.codes
29
+ #
30
+ # # What are the names of all the residues?
31
+ # puts aa.names
32
+ #
33
+ # # What is the molecular weight of this peptide?
34
+ # puts aa.molecular_weight
21
35
  class AA < String
22
36
 
23
37
  include Bio::Sequence::Common
24
38
 
25
- # Generate a amino acid sequence object from a string.
39
+ # Generate an amino acid sequence object from a string.
40
+ #
41
+ # s = Bio::Sequence::AA.new("RRLEHTFVFLRNFSLMLLRY")
42
+ #
43
+ # or maybe (if you have an amino acid sequence in a file)
44
+ #
45
+ # s = Bio::Sequence:AA.new(File.open('aa.txt').read)
46
+ #
47
+ # Amino Acid sequences are *always* all uppercase in bioruby
48
+ #
49
+ # s = Bio::Sequence::AA.new("rrLeHtfV")
50
+ # puts s #=> "RRLEHTFVF"
51
+ #
52
+ # Whitespace is stripped from the sequence
53
+ #
54
+ # s = Bio::Sequence::AA.new("RRL\nELA\tRG\r RL")
55
+ # puts s #=> "RRLELARGRL"
56
+ # ---
57
+ # *Arguments*:
58
+ # * (required) _str_: String
59
+ # *Returns*:: Bio::Sequence::AA object
26
60
  def initialize(str)
27
61
  super
28
62
  self.upcase!
@@ -30,17 +64,36 @@ class AA < String
30
64
  end
31
65
 
32
66
 
33
- # Estimate the weight of this protein.
67
+ # Estimate molecular weight based on
68
+ # Fasman1976[http://www.genome.ad.jp/dbget-bin/www_bget?aaindex+FASG760101]
69
+ #
70
+ # s = Bio::Sequence::AA.new("RRLE")
71
+ # puts s.molecular_weight #=> 572.655
72
+ # ---
73
+ # *Returns*:: Float object
34
74
  def molecular_weight
35
75
  Bio::AminoAcid.weight(self)
36
76
  end
37
77
 
78
+ # Create a ruby regular expression instance
79
+ # (Regexp)[http://corelib.rubyonrails.org/classes/Regexp.html]
80
+ #
81
+ # s = Bio::Sequence::AA.new("RRLE")
82
+ # puts s.to_re #=> /RRLE/
83
+ # ---
84
+ # *Returns*:: Regexp object
38
85
  def to_re
39
86
  Bio::AminoAcid.to_re(self)
40
87
  end
41
88
 
42
- # Generate the list of the names of the each residue along with the
43
- # sequence (3 letters code).
89
+ # Generate the list of the names of each residue along with the
90
+ # sequence (3 letters code). Codes used in bioruby are found in the
91
+ # Bio::AminoAcid::NAMES hash.
92
+ #
93
+ # s = Bio::Sequence::AA.new("RRLE")
94
+ # puts s.codes #=> ["Arg", "Arg", "Leu", "Glu"]
95
+ # ---
96
+ # *Returns*:: Array object
44
97
  def codes
45
98
  array = []
46
99
  self.each_byte do |x|
@@ -49,7 +102,15 @@ class AA < String
49
102
  return array
50
103
  end
51
104
 
52
- # Similar to codes but returns long names.
105
+ # Generate the list of the names of each residue along with the
106
+ # sequence (full name). Names used in bioruby are found in the
107
+ # Bio::AminoAcid::NAMES hash.
108
+ #
109
+ # s = Bio::Sequence::AA.new("RRLE")
110
+ # puts s.names
111
+ # #=> ["arginine", "arginine", "leucine", "glutamic acid"]
112
+ # ---
113
+ # *Returns*:: Array object
53
114
  def names
54
115
  self.codes.map do |x|
55
116
  Bio::AminoAcid.names[x]