bio 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (340) hide show
  1. data/bin/bioruby +14 -122
  2. data/bin/br_biofetch.rb +2 -2
  3. data/bin/br_bioflat.rb +2 -2
  4. data/bin/br_biogetseq.rb +2 -2
  5. data/bin/br_pmfetch.rb +3 -3
  6. data/doc/Changes-0.7.rd +77 -0
  7. data/doc/KEGG_API.rd +523 -232
  8. data/doc/KEGG_API.rd.ja +529 -207
  9. data/doc/Tutorial.rd +48 -11
  10. data/lib/bio.rb +59 -6
  11. data/lib/bio/alignment.rb +713 -103
  12. data/lib/bio/appl/bl2seq/report.rb +2 -18
  13. data/lib/bio/appl/blast.rb +108 -91
  14. data/lib/bio/appl/blast/format0.rb +33 -18
  15. data/lib/bio/appl/blast/format8.rb +6 -20
  16. data/lib/bio/appl/blast/report.rb +293 -429
  17. data/lib/bio/appl/blast/rexml.rb +8 -22
  18. data/lib/bio/appl/blast/wublast.rb +21 -12
  19. data/lib/bio/appl/blast/xmlparser.rb +180 -183
  20. data/lib/bio/appl/blat/report.rb +127 -30
  21. data/lib/bio/appl/clustalw.rb +87 -59
  22. data/lib/bio/appl/clustalw/report.rb +20 -22
  23. data/lib/bio/appl/emboss.rb +113 -20
  24. data/lib/bio/appl/fasta.rb +173 -198
  25. data/lib/bio/appl/fasta/format10.rb +244 -347
  26. data/lib/bio/appl/gcg/msf.rb +212 -0
  27. data/lib/bio/appl/gcg/seq.rb +195 -0
  28. data/lib/bio/appl/genscan/report.rb +5 -23
  29. data/lib/bio/appl/hmmer.rb +8 -45
  30. data/lib/bio/appl/hmmer/report.rb +2 -20
  31. data/lib/bio/appl/iprscan/report.rb +374 -0
  32. data/lib/bio/appl/mafft.rb +87 -50
  33. data/lib/bio/appl/mafft/report.rb +151 -44
  34. data/lib/bio/appl/muscle.rb +52 -0
  35. data/lib/bio/appl/phylip/alignment.rb +129 -0
  36. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  37. data/lib/bio/appl/probcons.rb +41 -0
  38. data/lib/bio/appl/psort.rb +89 -96
  39. data/lib/bio/appl/psort/report.rb +6 -22
  40. data/lib/bio/appl/pts1.rb +263 -0
  41. data/lib/bio/appl/sim4.rb +26 -36
  42. data/lib/bio/appl/sim4/report.rb +2 -18
  43. data/lib/bio/appl/sosui/report.rb +5 -20
  44. data/lib/bio/appl/spidey/report.rb +2 -2
  45. data/lib/bio/appl/targetp/report.rb +4 -20
  46. data/lib/bio/appl/tcoffee.rb +55 -0
  47. data/lib/bio/appl/tmhmm/report.rb +4 -20
  48. data/lib/bio/command.rb +235 -64
  49. data/lib/bio/data/aa.rb +21 -26
  50. data/lib/bio/data/codontable.rb +2 -20
  51. data/lib/bio/data/na.rb +19 -4
  52. data/lib/bio/db.rb +27 -12
  53. data/lib/bio/db/aaindex.rb +2 -20
  54. data/lib/bio/db/embl/common.rb +4 -21
  55. data/lib/bio/db/embl/embl.rb +33 -85
  56. data/lib/bio/db/embl/sptr.rb +612 -302
  57. data/lib/bio/db/embl/swissprot.rb +10 -29
  58. data/lib/bio/db/embl/trembl.rb +10 -29
  59. data/lib/bio/db/embl/uniprot.rb +10 -29
  60. data/lib/bio/db/fantom.rb +15 -20
  61. data/lib/bio/db/fasta.rb +3 -3
  62. data/lib/bio/db/genbank/common.rb +37 -46
  63. data/lib/bio/db/genbank/ddbj.rb +6 -18
  64. data/lib/bio/db/genbank/genbank.rb +47 -186
  65. data/lib/bio/db/genbank/genpept.rb +4 -17
  66. data/lib/bio/db/genbank/refseq.rb +4 -17
  67. data/lib/bio/db/gff.rb +103 -35
  68. data/lib/bio/db/go.rb +4 -20
  69. data/lib/bio/db/kegg/brite.rb +26 -36
  70. data/lib/bio/db/kegg/compound.rb +81 -85
  71. data/lib/bio/db/kegg/drug.rb +98 -0
  72. data/lib/bio/db/kegg/enzyme.rb +133 -110
  73. data/lib/bio/db/kegg/expression.rb +2 -20
  74. data/lib/bio/db/kegg/genes.rb +208 -238
  75. data/lib/bio/db/kegg/genome.rb +164 -285
  76. data/lib/bio/db/kegg/glycan.rb +114 -157
  77. data/lib/bio/db/kegg/keggtab.rb +242 -303
  78. data/lib/bio/db/kegg/kgml.rb +117 -160
  79. data/lib/bio/db/kegg/orthology.rb +112 -0
  80. data/lib/bio/db/kegg/reaction.rb +54 -69
  81. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  82. data/lib/bio/db/lasergene.rb +209 -0
  83. data/lib/bio/db/litdb.rb +3 -27
  84. data/lib/bio/db/medline.rb +228 -249
  85. data/lib/bio/db/nbrf.rb +3 -3
  86. data/lib/bio/db/newick.rb +510 -0
  87. data/lib/bio/db/nexus.rb +1854 -0
  88. data/lib/bio/db/pdb.rb +5 -17
  89. data/lib/bio/db/pdb/atom.rb +2 -18
  90. data/lib/bio/db/pdb/chain.rb +2 -18
  91. data/lib/bio/db/pdb/chemicalcomponent.rb +2 -18
  92. data/lib/bio/db/pdb/model.rb +2 -18
  93. data/lib/bio/db/pdb/pdb.rb +73 -34
  94. data/lib/bio/db/pdb/residue.rb +4 -20
  95. data/lib/bio/db/pdb/utils.rb +2 -18
  96. data/lib/bio/db/prosite.rb +403 -422
  97. data/lib/bio/db/rebase.rb +84 -40
  98. data/lib/bio/db/soft.rb +404 -0
  99. data/lib/bio/db/transfac.rb +5 -17
  100. data/lib/bio/feature.rb +106 -52
  101. data/lib/bio/io/das.rb +32 -42
  102. data/lib/bio/io/dbget.rb +2 -20
  103. data/lib/bio/io/ddbjxml.rb +77 -138
  104. data/lib/bio/io/ebisoap.rb +158 -0
  105. data/lib/bio/io/ensembl.rb +229 -0
  106. data/lib/bio/io/fastacmd.rb +89 -82
  107. data/lib/bio/io/fetch.rb +163 -96
  108. data/lib/bio/io/flatfile.rb +170 -73
  109. data/lib/bio/io/flatfile/bdb.rb +3 -16
  110. data/lib/bio/io/flatfile/index.rb +2 -2
  111. data/lib/bio/io/flatfile/indexer.rb +3 -2
  112. data/lib/bio/io/higet.rb +12 -31
  113. data/lib/bio/io/keggapi.rb +210 -269
  114. data/lib/bio/io/ncbisoap.rb +155 -0
  115. data/lib/bio/io/pubmed.rb +169 -147
  116. data/lib/bio/io/registry.rb +4 -20
  117. data/lib/bio/io/soapwsdl.rb +43 -38
  118. data/lib/bio/io/sql.rb +242 -305
  119. data/lib/bio/location.rb +407 -285
  120. data/lib/bio/map.rb +410 -0
  121. data/lib/bio/pathway.rb +558 -695
  122. data/lib/bio/reference.rb +272 -75
  123. data/lib/bio/sequence.rb +255 -13
  124. data/lib/bio/sequence/aa.rb +71 -10
  125. data/lib/bio/sequence/common.rb +187 -33
  126. data/lib/bio/sequence/compat.rb +59 -4
  127. data/lib/bio/sequence/format.rb +54 -7
  128. data/lib/bio/sequence/generic.rb +3 -3
  129. data/lib/bio/sequence/na.rb +328 -26
  130. data/lib/bio/shell.rb +11 -4
  131. data/lib/bio/shell/core.rb +221 -160
  132. data/lib/bio/shell/demo.rb +18 -15
  133. data/lib/bio/shell/interface.rb +14 -12
  134. data/lib/bio/shell/irb.rb +95 -0
  135. data/lib/bio/shell/object.rb +45 -26
  136. data/lib/bio/shell/plugin/blast.rb +42 -0
  137. data/lib/bio/shell/plugin/codon.rb +22 -14
  138. data/lib/bio/shell/plugin/das.rb +58 -0
  139. data/lib/bio/shell/plugin/emboss.rb +2 -2
  140. data/lib/bio/shell/plugin/entry.rb +22 -11
  141. data/lib/bio/shell/plugin/flatfile.rb +2 -2
  142. data/lib/bio/shell/plugin/keggapi.rb +13 -6
  143. data/lib/bio/shell/plugin/midi.rb +4 -4
  144. data/lib/bio/shell/plugin/obda.rb +2 -2
  145. data/lib/bio/shell/plugin/psort.rb +56 -0
  146. data/lib/bio/shell/plugin/seq.rb +35 -8
  147. data/lib/bio/shell/plugin/soap.rb +87 -0
  148. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +29 -0
  149. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_classes.rhtml +4 -0
  150. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_log.rhtml +27 -0
  151. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_methods.rhtml +11 -0
  152. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_modules.rhtml +4 -0
  153. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/_variables.rhtml +7 -0
  154. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-bg.gif +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
  156. data/lib/bio/shell/rails/{public/images/icon.png → vendor/plugins/generators/bioruby/templates/bioruby-gem.png} +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-link.gif +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +369 -0
  159. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +47 -0
  160. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_controller.rb +144 -0
  161. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby_helper.rb +47 -0
  162. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/commands.rhtml +8 -0
  163. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/history.rhtml +10 -0
  164. data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +22 -0
  165. data/lib/bio/shell/script.rb +25 -0
  166. data/lib/bio/shell/setup.rb +109 -0
  167. data/lib/bio/shell/web.rb +70 -58
  168. data/lib/bio/tree.rb +850 -0
  169. data/lib/bio/util/color_scheme.rb +84 -107
  170. data/lib/bio/util/color_scheme/buried.rb +5 -24
  171. data/lib/bio/util/color_scheme/helix.rb +5 -24
  172. data/lib/bio/util/color_scheme/hydropathy.rb +5 -24
  173. data/lib/bio/util/color_scheme/nucleotide.rb +5 -24
  174. data/lib/bio/util/color_scheme/strand.rb +5 -24
  175. data/lib/bio/util/color_scheme/taylor.rb +5 -24
  176. data/lib/bio/util/color_scheme/turn.rb +5 -24
  177. data/lib/bio/util/color_scheme/zappo.rb +5 -24
  178. data/lib/bio/util/contingency_table.rb +70 -43
  179. data/lib/bio/util/restriction_enzyme.rb +228 -0
  180. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  181. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  182. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  183. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  184. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  185. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  186. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  187. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  188. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  189. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  190. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  191. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  192. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  193. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  194. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  195. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  196. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  197. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  198. data/lib/bio/util/restriction_enzyme/single_strand.rb +199 -0
  199. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  200. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  201. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  202. data/lib/bio/util/sirna.rb +4 -22
  203. data/sample/color_scheme_na.rb +4 -12
  204. data/sample/enzymes.rb +78 -0
  205. data/sample/goslim.rb +5 -13
  206. data/sample/psortplot_html.rb +4 -12
  207. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  208. data/test/data/embl/AB090716.embl.rel89 +63 -0
  209. data/test/data/fasta/example1.txt +75 -0
  210. data/test/data/fasta/example2.txt +21 -0
  211. data/test/data/iprscan/merged.raw +32 -0
  212. data/test/data/iprscan/merged.txt +74 -0
  213. data/test/data/soft/GDS100_partial.soft +92 -0
  214. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  215. data/test/functional/bio/io/test_ensembl.rb +103 -0
  216. data/test/functional/bio/io/test_soapwsdl.rb +5 -17
  217. data/test/unit/bio/appl/bl2seq/test_report.rb +2 -2
  218. data/test/unit/bio/appl/blast/test_report.rb +3 -16
  219. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -16
  220. data/test/unit/bio/appl/genscan/test_report.rb +3 -16
  221. data/test/unit/bio/appl/hmmer/test_report.rb +3 -16
  222. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  223. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  224. data/test/unit/bio/appl/sosui/test_report.rb +3 -16
  225. data/test/unit/bio/appl/targetp/test_report.rb +3 -16
  226. data/test/unit/bio/appl/test_blast.rb +3 -16
  227. data/test/unit/bio/appl/test_fasta.rb +4 -16
  228. data/test/unit/bio/appl/test_pts1.rb +140 -0
  229. data/test/unit/bio/appl/tmhmm/test_report.rb +3 -16
  230. data/test/unit/bio/data/test_aa.rb +4 -17
  231. data/test/unit/bio/data/test_codontable.rb +3 -16
  232. data/test/unit/bio/data/test_na.rb +3 -3
  233. data/test/unit/bio/db/embl/test_common.rb +3 -16
  234. data/test/unit/bio/db/embl/test_embl.rb +3 -16
  235. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  236. data/test/unit/bio/db/embl/test_sptr.rb +1548 -41
  237. data/test/unit/bio/db/embl/test_uniprot.rb +3 -16
  238. data/test/unit/bio/db/kegg/test_genes.rb +3 -16
  239. data/test/unit/bio/db/pdb/test_pdb.rb +7 -24
  240. data/test/unit/bio/db/test_aaindex.rb +2 -2
  241. data/test/unit/bio/db/test_fasta.rb +3 -16
  242. data/test/unit/bio/db/test_gff.rb +3 -16
  243. data/test/unit/bio/db/test_lasergene.rb +95 -0
  244. data/test/unit/bio/db/test_newick.rb +56 -0
  245. data/test/unit/bio/db/test_nexus.rb +360 -0
  246. data/test/unit/bio/db/test_prosite.rb +5 -18
  247. data/test/unit/bio/db/test_rebase.rb +11 -25
  248. data/test/unit/bio/db/test_soft.rb +138 -0
  249. data/test/unit/bio/io/test_ddbjxml.rb +5 -17
  250. data/test/unit/bio/io/test_ensembl.rb +109 -0
  251. data/test/unit/bio/io/test_fastacmd.rb +3 -16
  252. data/test/unit/bio/io/test_flatfile.rb +237 -0
  253. data/test/unit/bio/io/test_soapwsdl.rb +4 -17
  254. data/test/unit/bio/sequence/test_aa.rb +3 -3
  255. data/test/unit/bio/sequence/test_common.rb +3 -16
  256. data/test/unit/bio/sequence/test_compat.rb +3 -16
  257. data/test/unit/bio/sequence/test_na.rb +29 -3
  258. data/test/unit/bio/shell/plugin/test_seq.rb +8 -8
  259. data/test/unit/bio/test_alignment.rb +16 -27
  260. data/test/unit/bio/test_command.rb +242 -25
  261. data/test/unit/bio/test_db.rb +3 -16
  262. data/test/unit/bio/test_feature.rb +4 -16
  263. data/test/unit/bio/test_location.rb +4 -16
  264. data/test/unit/bio/test_map.rb +230 -0
  265. data/test/unit/bio/test_pathway.rb +4 -16
  266. data/test/unit/bio/test_reference.rb +2 -2
  267. data/test/unit/bio/test_sequence.rb +7 -19
  268. data/test/unit/bio/test_shell.rb +3 -16
  269. data/test/unit/bio/test_tree.rb +593 -0
  270. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  271. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  272. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  273. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +100 -0
  274. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  275. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  276. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  277. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  278. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  279. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  280. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  281. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  282. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  283. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  284. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  285. data/test/unit/bio/util/test_color_scheme.rb +6 -18
  286. data/test/unit/bio/util/test_contingency_table.rb +6 -18
  287. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  288. data/test/unit/bio/util/test_sirna.rb +3 -16
  289. metadata +228 -169
  290. data/doc/BioRuby.rd.ja +0 -225
  291. data/doc/Design.rd.ja +0 -341
  292. data/doc/TODO.rd.ja +0 -138
  293. data/lib/bio/appl/fasta/format6.rb +0 -37
  294. data/lib/bio/db/kegg/cell.rb +0 -88
  295. data/lib/bio/db/kegg/ko.rb +0 -178
  296. data/lib/bio/shell/rails/Rakefile +0 -10
  297. data/lib/bio/shell/rails/app/controllers/application.rb +0 -4
  298. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +0 -94
  299. data/lib/bio/shell/rails/app/helpers/application_helper.rb +0 -3
  300. data/lib/bio/shell/rails/app/models/shell_connection.rb +0 -30
  301. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +0 -37
  302. data/lib/bio/shell/rails/app/views/shell/history.rhtml +0 -5
  303. data/lib/bio/shell/rails/app/views/shell/index.rhtml +0 -2
  304. data/lib/bio/shell/rails/app/views/shell/show.rhtml +0 -13
  305. data/lib/bio/shell/rails/config/boot.rb +0 -19
  306. data/lib/bio/shell/rails/config/database.yml +0 -85
  307. data/lib/bio/shell/rails/config/environment.rb +0 -53
  308. data/lib/bio/shell/rails/config/environments/development.rb +0 -19
  309. data/lib/bio/shell/rails/config/environments/production.rb +0 -19
  310. data/lib/bio/shell/rails/config/environments/test.rb +0 -19
  311. data/lib/bio/shell/rails/config/routes.rb +0 -19
  312. data/lib/bio/shell/rails/doc/README_FOR_APP +0 -2
  313. data/lib/bio/shell/rails/public/404.html +0 -8
  314. data/lib/bio/shell/rails/public/500.html +0 -8
  315. data/lib/bio/shell/rails/public/dispatch.cgi +0 -10
  316. data/lib/bio/shell/rails/public/dispatch.fcgi +0 -24
  317. data/lib/bio/shell/rails/public/dispatch.rb +0 -10
  318. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  319. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  320. data/lib/bio/shell/rails/public/index.html +0 -277
  321. data/lib/bio/shell/rails/public/javascripts/controls.js +0 -750
  322. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +0 -584
  323. data/lib/bio/shell/rails/public/javascripts/effects.js +0 -854
  324. data/lib/bio/shell/rails/public/javascripts/prototype.js +0 -1785
  325. data/lib/bio/shell/rails/public/robots.txt +0 -1
  326. data/lib/bio/shell/rails/public/stylesheets/main.css +0 -187
  327. data/lib/bio/shell/rails/script/about +0 -3
  328. data/lib/bio/shell/rails/script/breakpointer +0 -3
  329. data/lib/bio/shell/rails/script/console +0 -3
  330. data/lib/bio/shell/rails/script/destroy +0 -3
  331. data/lib/bio/shell/rails/script/generate +0 -3
  332. data/lib/bio/shell/rails/script/performance/benchmarker +0 -3
  333. data/lib/bio/shell/rails/script/performance/profiler +0 -3
  334. data/lib/bio/shell/rails/script/plugin +0 -3
  335. data/lib/bio/shell/rails/script/process/reaper +0 -3
  336. data/lib/bio/shell/rails/script/process/spawner +0 -3
  337. data/lib/bio/shell/rails/script/process/spinner +0 -3
  338. data/lib/bio/shell/rails/script/runner +0 -3
  339. data/lib/bio/shell/rails/script/server +0 -42
  340. data/lib/bio/shell/rails/test/test_helper.rb +0 -28
@@ -0,0 +1,107 @@
1
+ #
2
+ # bio/util/restriction_enzyme/cut_symbol.rb - Defines the symbol used to mark a cut in an enzyme sequence
3
+ #
4
+ # Author:: Trevor Wennblom <mailto:trevor@corevx.com>
5
+ # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id: cut_symbol.rb,v 1.6 2007/07/16 19:28:48 k Exp $
9
+ #
10
+
11
+ module Bio
12
+ class RestrictionEnzyme
13
+
14
+ # = Usage
15
+ #
16
+ # #require 'bio/util/restriction_enzyme/cut_symbol'
17
+ # require 'cut_symbol'
18
+ # include Bio::RestrictionEnzyme::CutSymbol
19
+ #
20
+ # cut_symbol # => "^"
21
+ # set_cut_symbol('|') # => "|"
22
+ # cut_symbol # => "|"
23
+ # escaped_cut_symbol # => "\\|"
24
+ # re_cut_symbol # => /\|/
25
+ # set_cut_symbol('^') # => "^"
26
+ # "abc^de" =~ re_cut_symbol # => 3
27
+ # "abc^de" =~ re_cut_symbol_adjacent # => nil
28
+ # "abc^^de" =~ re_cut_symbol_adjacent # => 3
29
+ # "a^bc^^de" =~ re_cut_symbol_adjacent # => 4
30
+ # "a^bc^de" =~ re_cut_symbol_adjacent # => nil
31
+ #
32
+ module CutSymbol
33
+
34
+ # Set the token to be used as the cut symbol in a restriction enzyme sequece
35
+ #
36
+ # Starts as +^+ character
37
+ #
38
+ # ---
39
+ # *Arguments*
40
+ # * +glyph+: The single character to be used as the cut symbol in an enzyme sequence
41
+ # *Returns*:: +glyph+
42
+ def set_cut_symbol(glyph)
43
+ CutSymbol__.cut_symbol = glyph
44
+ end
45
+
46
+ # Get the token that's used as the cut symbol in a restriction enzyme sequece
47
+ #
48
+ # ---
49
+ # *Arguments*
50
+ # * _none_
51
+ # *Returns*:: +glyph+
52
+ def cut_symbol; CutSymbol__.cut_symbol; end
53
+
54
+ # Get the token that's used as the cut symbol in a restriction enzyme sequece with
55
+ # a back-slash preceding it.
56
+ #
57
+ # ---
58
+ # *Arguments*
59
+ # * _none_
60
+ # *Returns*:: +\glyph+
61
+ def escaped_cut_symbol; CutSymbol__.escaped_cut_symbol; end
62
+
63
+ # Used to check if multiple cut symbols are next to each other.
64
+ #
65
+ # ---
66
+ # *Arguments*
67
+ # * _none_
68
+ # *Returns*:: +RegExp+
69
+ def re_cut_symbol_adjacent
70
+ %r"#{escaped_cut_symbol}{2}"
71
+ end
72
+
73
+ # A Regexp of the cut_symbol.
74
+ #
75
+ # ---
76
+ # *Arguments*
77
+ # * _none_
78
+ # *Returns*:: +RegExp+
79
+ def re_cut_symbol
80
+ %r"#{escaped_cut_symbol}"
81
+ end
82
+
83
+ #########
84
+ #protected # NOTE this is a Module, can't hide CutSymbol__
85
+ #########
86
+
87
+ require 'singleton'
88
+
89
+ # Class to keep state
90
+ class CutSymbol__
91
+ include Singleton
92
+
93
+ @cut_symbol = '^'
94
+
95
+ def self.cut_symbol; @cut_symbol; end
96
+
97
+ def self.cut_symbol=(glyph);
98
+ raise ArgumentError if glyph.size != 1
99
+ @cut_symbol = glyph
100
+ end
101
+
102
+ def self.escaped_cut_symbol; "\\" + self.cut_symbol; end
103
+ end
104
+
105
+ end # CutSymbol
106
+ end # RestrictionEnzyme
107
+ end # Bio
@@ -0,0 +1,321 @@
1
+ #
2
+ # bio/util/restriction_enzyme/double_stranded.rb - DoubleStranded restriction enzyme sequence
3
+ #
4
+ # Author:: Trevor Wennblom <mailto:trevor@corevx.com>
5
+ # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id: double_stranded.rb,v 1.11 2007/07/16 19:28:48 k Exp $
9
+ #
10
+
11
+ require 'bio/util/restriction_enzyme'
12
+
13
+ module Bio
14
+ class RestrictionEnzyme
15
+
16
+ # A pair of SingleStrand and SingleStrandComplement objects with methods to
17
+ # add utility to their relation.
18
+ #
19
+ # = Notes
20
+ # * This is created by Bio::RestrictionEnzyme.new for convenience.
21
+ # * The two strands accessible are +primary+ and +complement+.
22
+ # * SingleStrand methods may be used on DoubleStranded and they will be passed to +primary+.
23
+ #
24
+ #
25
+ # FIXME needs better docs
26
+ class DoubleStranded
27
+
28
+ autoload :AlignedStrands, 'bio/util/restriction_enzyme/double_stranded/aligned_strands'
29
+ autoload :CutLocations, 'bio/util/restriction_enzyme/double_stranded/cut_locations'
30
+ autoload :CutLocationPair, 'bio/util/restriction_enzyme/double_stranded/cut_location_pair'
31
+ autoload :CutLocationsInEnzymeNotation, 'bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation'
32
+ autoload :CutLocationPairInEnzymeNotation, 'bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation'
33
+
34
+ include CutSymbol
35
+ extend CutSymbol
36
+ include StringFormatting
37
+ extend StringFormatting
38
+
39
+ # The primary strand
40
+ attr_reader :primary
41
+
42
+ # The complement strand
43
+ attr_reader :complement
44
+
45
+ # Cut locations in 0-based index format, DoubleStranded::CutLocations object
46
+ attr_reader :cut_locations
47
+
48
+ # Cut locations in enzyme index notation, DoubleStranded::CutLocationsInEnzymeNotation object
49
+ attr_reader :cut_locations_in_enzyme_notation
50
+
51
+ # [+erp+] One of three possible parameters: The name of an enzyme, a REBASE::EnzymeEntry object, or a nucleotide pattern with a cut mark.
52
+ # [+raw_cut_pairs+] The cut locations in enzyme index notation.
53
+ #
54
+ # Enzyme index notation:: 1.._n_, value before 1 is -1
55
+ #
56
+ # Examples of the allowable cut locations for +raw_cut_pairs+ follows. 'p' and
57
+ # 'c' refer to a cut location on the 'p'rimary and 'c'omplement strands.
58
+ #
59
+ # 1, [3,2], [20,22], 57
60
+ # p, [p,c], [p, c], p
61
+ #
62
+ # Which is the same as:
63
+ #
64
+ # 1, (3..2), (20..22), 57
65
+ # p, (p..c), (p..c), p
66
+ #
67
+ # Examples of partial cuts:
68
+ # 1, [nil,2], [20,nil], 57
69
+ # p, [p, c], [p, c], p
70
+ #
71
+ def initialize(erp, *raw_cut_pairs)
72
+ # 'erp' : 'E'nzyme / 'R'ebase / 'P'attern
73
+ k = erp.class
74
+
75
+ if k == Bio::REBASE::EnzymeEntry
76
+ # Passed a Bio::REBASE::EnzymeEntry object
77
+
78
+ unless raw_cut_pairs.empty?
79
+ err = "A Bio::REBASE::EnzymeEntry object was passed, however the cut locations contained values. Ambiguous or redundant.\n"
80
+ err += "inspect = #{raw_cut_pairs.inspect}"
81
+ raise ArgumentError, err
82
+ end
83
+ initialize_with_rebase( erp )
84
+
85
+ elsif erp.kind_of? String
86
+ # Passed something that could be an enzyme pattern or an anzyme name
87
+
88
+ # Decide if this String is an enzyme name or a pattern
89
+ if Bio::RestrictionEnzyme.enzyme_name?( erp )
90
+ # FIXME we added this to rebase...
91
+ # Check if it's a known name
92
+ known_enzyme = false
93
+ known_enzyme = true if Bio::RestrictionEnzyme.rebase[ erp ]
94
+
95
+ # Try harder to find the enzyme
96
+ unless known_enzyme
97
+ re = %r"^#{erp}$"i
98
+ Bio::RestrictionEnzyme.rebase.each { |name, v| (known_enzyme = true; erp = name; break) if name =~ re }
99
+ end
100
+
101
+ if known_enzyme
102
+ initialize_with_rebase( Bio::RestrictionEnzyme.rebase[erp] )
103
+ else
104
+ raise IndexError, "No entry found for enzyme named '#{erp}'"
105
+ end
106
+
107
+ else
108
+ # Not an enzyme name, so a pattern is assumed
109
+ if erp =~ re_cut_symbol
110
+ initialize_with_pattern_and_cut_symbols( erp )
111
+ else
112
+ initialize_with_pattern_and_cut_locations( erp, raw_cut_pairs )
113
+ end
114
+ end
115
+
116
+ elsif k == NilClass
117
+ err = "Passed a nil value. Perhaps you tried to pass a Bio::REBASE::EnzymeEntry that does not exist?\n"
118
+ err += "inspect = #{erp.inspect}"
119
+ raise ArgumentError, err
120
+ else
121
+ err = "I don't know what to do with class #{k} for erp.\n"
122
+ err += "inspect = #{erp.inspect}"
123
+ raise ArgumentError, err
124
+ end
125
+
126
+ end
127
+
128
+ # See AlignedStrands.align
129
+ def aligned_strands
130
+ AlignedStrands.align(@primary.pattern, @complement.pattern)
131
+ end
132
+
133
+ # See AlignedStrands.align_with_cuts
134
+ def aligned_strands_with_cuts
135
+ AlignedStrands.align_with_cuts(@primary.pattern, @complement.pattern, @primary.cut_locations, @complement.cut_locations)
136
+ end
137
+
138
+ # Returns +true+ if the cut pattern creates blunt fragments.
139
+ # (opposite of sticky)
140
+ def blunt?
141
+ as = aligned_strands_with_cuts
142
+ ary = [as.primary, as.complement]
143
+ ary.collect! { |seq| seq.split( cut_symbol ) }
144
+ # convert the cut sections to their lengths
145
+ ary.each { |i| i.collect! { |c| c.length } }
146
+ ary[0] == ary[1]
147
+ end
148
+
149
+ # Returns +true+ if the cut pattern creates sticky fragments.
150
+ # (opposite of blunt)
151
+ def sticky?
152
+ !blunt?
153
+ end
154
+
155
+ # Takes a RestrictionEnzyme object and a numerical offset to the sequence and
156
+ # returns an EnzymeAction
157
+ #
158
+ # +restriction_enzyme+:: RestrictionEnzyme
159
+ # +offset+:: Numerical offset of where the enzyme action occurs on the seqeunce
160
+ def create_action_at( offset )
161
+ # x is the size of the fully aligned sequence with maximum padding needed
162
+ # to make a match on the primary and complement strand.
163
+ #
164
+ # For example -
165
+ # Note how EcoRII needs extra padding on the beginning and ending of the
166
+ # sequence 'ccagg' to make the match since the cut must occur between
167
+ # two nucleotides and can not occur on the very end of the sequence.
168
+ #
169
+ # EcoRII:
170
+ # :blunt: "0"
171
+ # :c2: "5"
172
+ # :c4: "0"
173
+ # :c1: "-1"
174
+ # :pattern: CCWGG
175
+ # :len: "5"
176
+ # :name: EcoRII
177
+ # :c3: "0"
178
+ # :ncuts: "2"
179
+ #
180
+ # -1 1 2 3 4 5
181
+ # 5' - n^c c w g g n - 3'
182
+ # 3' - n g g w c c^n - 5'
183
+ #
184
+ # (w == [at])
185
+
186
+ x = aligned_strands.primary.size
187
+
188
+ enzyme_action = EnzymeAction.new( offset,
189
+ offset + x-1,
190
+ offset,
191
+ offset + x-1)
192
+
193
+ @cut_locations.each do |cut_location_pair|
194
+ # cut_pair is a DoubleStranded::CutLocationPair
195
+ p, c = cut_location_pair.primary, cut_location_pair.complement
196
+ if c >= p
197
+ enzyme_action.add_cut_range(offset+p, nil, nil, offset+c)
198
+ else
199
+ enzyme_action.add_cut_range(nil, offset+p, offset+c, nil)
200
+ end
201
+ end
202
+
203
+ enzyme_action
204
+ end
205
+
206
+ # An EnzymeAction is a way of representing a potential effect that a
207
+ # RestrictionEnzyme may have on a nucleotide sequence, an 'action'.
208
+ #
209
+ # Multiple cuts in multiple locations on a sequence may occur in one
210
+ # 'action' if it is done by a single enzyme.
211
+ #
212
+ # An EnzymeAction is a series of locations that represents where the restriction
213
+ # enzyme will bind on the sequence, as well as what ranges are cut on the
214
+ # sequence itself. The complexity is due to the fact that our virtual
215
+ # restriction enzyme may create multiple segments from its cutting action,
216
+ # on which another restriction enzyme may operate upon.
217
+ #
218
+ # For example, the DNA sequence:
219
+ #
220
+ # 5' - G A A T A A A C G A - 3'
221
+ # 3' - C T T A T T T G C T - 5'
222
+ #
223
+ # When mixed with the restriction enzyme with the following cut pattern:
224
+ #
225
+ # 5' - A|A T A A A C|G - 3'
226
+ # +-+ +
227
+ # 3' - T T|A T T T G|C - 5'
228
+ #
229
+ # And also mixed with the restriction enzyme of the following cut pattern:
230
+ #
231
+ # 5' - A A|A C - 3'
232
+ # +-+
233
+ # 3' - T|T T G - 5'
234
+ #
235
+ # Would result in a DNA sequence with these cuts:
236
+ #
237
+ # 5' - G A|A T A A|A C|G A - 3'
238
+ # +-+ +-+ +
239
+ # 3' - C T T|A T|T T G|C T - 5'
240
+ #
241
+ # Or these separate "free-floating" sequences:
242
+ #
243
+ # 5' - G A - 3'
244
+ # 3' - C T T - 5'
245
+ #
246
+ # 5' - A T A A - 3'
247
+ # 3' - A T - 5'
248
+ #
249
+ # 5' - A C - 3'
250
+ # 3' - T T G - 5'
251
+ #
252
+ # 5' - G A - 3'
253
+ # 3' - C T - 5'
254
+ #
255
+ # This would be represented by two EnzymeActions - one for each
256
+ # RestrictionEnzyme.
257
+ #
258
+ # This is, however, subject to competition. If the second enzyme reaches
259
+ # the target first, the the first enzyme will not be able to find the
260
+ # appropriate bind site.
261
+ #
262
+ # FIXME complete these docs
263
+ #
264
+ # To initialize an EnzymeAction you must first instantiate it with the
265
+ # beginning and ending locations of where it will operate on a nucleotide
266
+ # sequence.
267
+ #
268
+ # Next the ranges of cu
269
+ #
270
+ # An EnzymeAction is
271
+ # Defines a single enzyme action, in this case being a range that correlates
272
+ # to the DNA sequence that may contain it's own internal cuts.
273
+ class EnzymeAction < Bio::RestrictionEnzyme::Range::SequenceRange
274
+ end
275
+
276
+ #########
277
+ protected
278
+ #########
279
+
280
+ def initialize_with_pattern_and_cut_symbols( s )
281
+ p_cl = SingleStrand::CutLocationsInEnzymeNotation.new( strip_padding(s) )
282
+ s = Bio::Sequence::NA.new( strip_cuts_and_padding(s) )
283
+
284
+ # * Reflect cuts that are in enzyme notation
285
+ # * 0 is not a valid enzyme index, decrement 0 and all negative
286
+ c_cl = p_cl.collect {|n| (n >= s.length or n < 1) ? ((s.length - n) - 1) : (s.length - n)}
287
+
288
+ create_cut_locations( p_cl.zip(c_cl) )
289
+ create_primary_and_complement( s, p_cl, c_cl )
290
+ end
291
+
292
+ def initialize_with_pattern_and_cut_locations( s, raw_cl )
293
+ create_cut_locations(raw_cl)
294
+ create_primary_and_complement( Bio::Sequence::NA.new(s), @cut_locations_in_enzyme_notation.primary, @cut_locations_in_enzyme_notation.complement )
295
+ end
296
+
297
+ def create_primary_and_complement(primary_seq, p_cuts, c_cuts)
298
+ @primary = SingleStrand.new( primary_seq, p_cuts )
299
+ @complement = SingleStrandComplement.new( primary_seq.forward_complement, c_cuts )
300
+ end
301
+
302
+ def create_cut_locations(raw_cl)
303
+ @cut_locations_in_enzyme_notation = CutLocationsInEnzymeNotation.new( *raw_cl.collect {|cl| CutLocationPairInEnzymeNotation.new(cl)} )
304
+ @cut_locations = @cut_locations_in_enzyme_notation.to_array_index
305
+ end
306
+
307
+ def initialize_with_rebase( e )
308
+ p_cl = [e.primary_strand_cut1, e.primary_strand_cut2]
309
+ c_cl = [e.complementary_strand_cut1, e.complementary_strand_cut2]
310
+
311
+ # If there's no cut in REBASE it's represented as a 0.
312
+ # 0 is an invalid index, it just means no cut.
313
+ p_cl.delete(0)
314
+ c_cl.delete(0)
315
+ raise IndexError unless p_cl.size == c_cl.size
316
+ initialize_with_pattern_and_cut_locations( e.pattern, p_cl.zip(c_cl) )
317
+ end
318
+
319
+ end # DoubleStranded
320
+ end # RestrictionEnzyme
321
+ end # Bio
@@ -0,0 +1,130 @@
1
+ #
2
+ # bio/util/restriction_enzyme/double_stranded/aligned_strands.rb - Align two SingleStrand objects
3
+ #
4
+ # Author:: Trevor Wennblom <mailto:trevor@corevx.com>
5
+ # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id: aligned_strands.rb,v 1.6 2007/07/16 19:28:48 k Exp $
9
+ #
10
+
11
+ require 'bio/util/restriction_enzyme'
12
+
13
+ module Bio
14
+ class RestrictionEnzyme
15
+ class DoubleStranded
16
+
17
+ # Align two SingleStrand objects and return a Result
18
+ # object with +primary+ and +complement+ accessors.
19
+ #
20
+ class AlignedStrands
21
+ extend CutSymbol
22
+ extend StringFormatting
23
+
24
+ # The object returned for alignments
25
+ Result = Struct.new(:primary, :complement)
26
+
27
+ # Pad and align two String objects without cut symbols.
28
+ #
29
+ # This will look for the sub-sequence without left and right 'n' padding
30
+ # and re-apply 'n' padding to both strings on both sides equal to the
31
+ # maximum previous padding on that side.
32
+ #
33
+ # The sub-sequences stripped of left and right 'n' padding must be of equal
34
+ # length.
35
+ #
36
+ # Example:
37
+ # AlignedStrands.align('nngattacannnnn', 'nnnnnctaatgtnn') # =>
38
+ # <struct Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands::Result
39
+ # primary="nnnnngattacannnnn",
40
+ # complement="nnnnnctaatgtnnnnn">
41
+ #
42
+ # ---
43
+ # *Arguments*
44
+ # * +a+: Primary strand
45
+ # * +b+: Complementary strand
46
+ # *Returns*:: +Result+ object with equal padding on both strings
47
+ def self.align(a, b)
48
+ a = a.to_s
49
+ b = b.to_s
50
+ validate_input( strip_padding(a), strip_padding(b) )
51
+ left = [left_padding(a), left_padding(b)].sort.last
52
+ right = [right_padding(a), right_padding(b)].sort.last
53
+
54
+ p = left + strip_padding(a) + right
55
+ c = left + strip_padding(b) + right
56
+ Result.new(p,c)
57
+ end
58
+
59
+ # Pad and align two String objects with cut symbols.
60
+ #
61
+ # Example:
62
+ # AlignedStrands.with_cuts('nngattacannnnn', 'nnnnnctaatgtnn', [0, 10, 12], [0, 2, 12]) # =>
63
+ # <struct Bio::RestrictionEnzyme::DoubleStranded::AlignedStrands::Result
64
+ # primary="n n n n^n g a t t a c a n n^n n^n",
65
+ # complement="n^n n^n n c t a a t g t n^n n n n">
66
+ #
67
+ # Notes:
68
+ # * To make room for the cut symbols each nucleotide is spaced out.
69
+ # * This is meant to be able to handle multiple cuts and completely
70
+ # unrelated cutsites on the two strands, therefore no biological
71
+ # algorithm assumptions (shortcuts) are made.
72
+ #
73
+ # The sequences stripped of left and right 'n' padding must be of equal
74
+ # length.
75
+ #
76
+ # ---
77
+ # *Arguments*
78
+ # * +a+: Primary sequence
79
+ # * +b+: Complementary sequence
80
+ # * +a_cuts+: Primary strand cut locations in 0-based index notation
81
+ # * +b_cuts+: Complementary strand cut locations in 0-based index notation
82
+ # *Returns*:: +Result+ object with equal padding on both strings and spacing between bases
83
+ def self.align_with_cuts(a,b,a_cuts,b_cuts)
84
+ a = a.to_s
85
+ b = b.to_s
86
+ validate_input( strip_padding(a), strip_padding(b) )
87
+
88
+ a_left, a_right = left_padding(a), right_padding(a)
89
+ b_left, b_right = left_padding(b), right_padding(b)
90
+
91
+ left_diff = a_left.length - b_left.length
92
+ right_diff = a_right.length - b_right.length
93
+
94
+ (right_diff > 0) ? (b_right += 'n' * right_diff) : (a_right += 'n' * right_diff.abs)
95
+
96
+ a_adjust = b_adjust = 0
97
+
98
+ if left_diff > 0
99
+ b_left += 'n' * left_diff
100
+ b_adjust = left_diff
101
+ else
102
+ a_left += 'n' * left_diff.abs
103
+ a_adjust = left_diff.abs
104
+ end
105
+
106
+ a = a_left + strip_padding(a) + a_right
107
+ b = b_left + strip_padding(b) + b_right
108
+
109
+ a_cuts.sort.reverse.each { |c| a.insert(c+1+a_adjust, cut_symbol) }
110
+ b_cuts.sort.reverse.each { |c| b.insert(c+1+b_adjust, cut_symbol) }
111
+
112
+ Result.new( add_spacing(a), add_spacing(b) )
113
+ end
114
+
115
+ #########
116
+ protected
117
+ #########
118
+
119
+ def self.validate_input(a,b)
120
+ unless a.size == b.size
121
+ err = "Result sequences are not the same size. Does not align sequences with differing lengths after strip_padding.\n"
122
+ err += "#{a.size}, #{a.inspect}\n"
123
+ err += "#{b.size}, #{b.inspect}"
124
+ raise ArgumentError, err
125
+ end
126
+ end
127
+ end # AlignedStrands
128
+ end # DoubleStranded
129
+ end # RestrictionEnzyme
130
+ end # Bio