wwood-bioruby 1.2.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (386) hide show
  1. data/README.rdoc +205 -0
  2. data/README_DEV.rdoc +285 -0
  3. data/VERSION.yml +4 -0
  4. data/bin/bioruby +44 -0
  5. data/bin/br_biofetch.rb +47 -0
  6. data/bin/br_bioflat.rb +293 -0
  7. data/bin/br_biogetseq.rb +45 -0
  8. data/bin/br_pmfetch.rb +421 -0
  9. data/lib/bio.rb +306 -0
  10. data/lib/bio/alignment.rb +2518 -0
  11. data/lib/bio/appl/bl2seq/report.rb +334 -0
  12. data/lib/bio/appl/blast.rb +505 -0
  13. data/lib/bio/appl/blast/ddbj.rb +142 -0
  14. data/lib/bio/appl/blast/format0.rb +1438 -0
  15. data/lib/bio/appl/blast/format8.rb +83 -0
  16. data/lib/bio/appl/blast/genomenet.rb +263 -0
  17. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  18. data/lib/bio/appl/blast/remote.rb +105 -0
  19. data/lib/bio/appl/blast/report.rb +767 -0
  20. data/lib/bio/appl/blast/rexml.rb +144 -0
  21. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  22. data/lib/bio/appl/blast/wublast.rb +635 -0
  23. data/lib/bio/appl/blast/xmlparser.rb +236 -0
  24. data/lib/bio/appl/blat/report.rb +530 -0
  25. data/lib/bio/appl/clustalw.rb +219 -0
  26. data/lib/bio/appl/clustalw/report.rb +152 -0
  27. data/lib/bio/appl/emboss.rb +203 -0
  28. data/lib/bio/appl/fasta.rb +235 -0
  29. data/lib/bio/appl/fasta/format10.rb +325 -0
  30. data/lib/bio/appl/gcg/msf.rb +212 -0
  31. data/lib/bio/appl/gcg/seq.rb +195 -0
  32. data/lib/bio/appl/genscan/report.rb +552 -0
  33. data/lib/bio/appl/hmmer.rb +126 -0
  34. data/lib/bio/appl/hmmer/report.rb +683 -0
  35. data/lib/bio/appl/iprscan/report.rb +374 -0
  36. data/lib/bio/appl/mafft.rb +259 -0
  37. data/lib/bio/appl/mafft/report.rb +226 -0
  38. data/lib/bio/appl/muscle.rb +52 -0
  39. data/lib/bio/appl/paml/baseml.rb +95 -0
  40. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  41. data/lib/bio/appl/paml/codeml.rb +242 -0
  42. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  43. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  44. data/lib/bio/appl/paml/common.rb +348 -0
  45. data/lib/bio/appl/paml/common_report.rb +38 -0
  46. data/lib/bio/appl/paml/yn00.rb +103 -0
  47. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  48. data/lib/bio/appl/phylip/alignment.rb +133 -0
  49. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  50. data/lib/bio/appl/probcons.rb +41 -0
  51. data/lib/bio/appl/psort.rb +548 -0
  52. data/lib/bio/appl/psort/report.rb +542 -0
  53. data/lib/bio/appl/pts1.rb +263 -0
  54. data/lib/bio/appl/sim4.rb +124 -0
  55. data/lib/bio/appl/sim4/report.rb +485 -0
  56. data/lib/bio/appl/sosui/report.rb +151 -0
  57. data/lib/bio/appl/spidey/report.rb +593 -0
  58. data/lib/bio/appl/targetp/report.rb +267 -0
  59. data/lib/bio/appl/tcoffee.rb +55 -0
  60. data/lib/bio/appl/tmhmm/report.rb +231 -0
  61. data/lib/bio/command.rb +593 -0
  62. data/lib/bio/compat/features.rb +157 -0
  63. data/lib/bio/compat/references.rb +128 -0
  64. data/lib/bio/data/aa.rb +353 -0
  65. data/lib/bio/data/codontable.rb +722 -0
  66. data/lib/bio/data/na.rb +223 -0
  67. data/lib/bio/db.rb +329 -0
  68. data/lib/bio/db/aaindex.rb +357 -0
  69. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  70. data/lib/bio/db/biosql/sequence.rb +508 -0
  71. data/lib/bio/db/embl/common.rb +352 -0
  72. data/lib/bio/db/embl/embl.rb +500 -0
  73. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  74. data/lib/bio/db/embl/format_embl.rb +190 -0
  75. data/lib/bio/db/embl/sptr.rb +1283 -0
  76. data/lib/bio/db/embl/swissprot.rb +42 -0
  77. data/lib/bio/db/embl/trembl.rb +41 -0
  78. data/lib/bio/db/embl/uniprot.rb +42 -0
  79. data/lib/bio/db/fantom.rb +597 -0
  80. data/lib/bio/db/fasta.rb +410 -0
  81. data/lib/bio/db/fasta/defline.rb +532 -0
  82. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  83. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  84. data/lib/bio/db/genbank/common.rb +307 -0
  85. data/lib/bio/db/genbank/ddbj.rb +22 -0
  86. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  87. data/lib/bio/db/genbank/genbank.rb +250 -0
  88. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  89. data/lib/bio/db/genbank/genpept.rb +60 -0
  90. data/lib/bio/db/genbank/refseq.rb +18 -0
  91. data/lib/bio/db/gff.rb +1846 -0
  92. data/lib/bio/db/go.rb +481 -0
  93. data/lib/bio/db/kegg/brite.rb +41 -0
  94. data/lib/bio/db/kegg/compound.rb +131 -0
  95. data/lib/bio/db/kegg/drug.rb +98 -0
  96. data/lib/bio/db/kegg/enzyme.rb +148 -0
  97. data/lib/bio/db/kegg/expression.rb +155 -0
  98. data/lib/bio/db/kegg/genes.rb +263 -0
  99. data/lib/bio/db/kegg/genome.rb +241 -0
  100. data/lib/bio/db/kegg/glycan.rb +166 -0
  101. data/lib/bio/db/kegg/keggtab.rb +357 -0
  102. data/lib/bio/db/kegg/kgml.rb +256 -0
  103. data/lib/bio/db/kegg/orthology.rb +136 -0
  104. data/lib/bio/db/kegg/reaction.rb +82 -0
  105. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  106. data/lib/bio/db/lasergene.rb +209 -0
  107. data/lib/bio/db/litdb.rb +107 -0
  108. data/lib/bio/db/medline.rb +326 -0
  109. data/lib/bio/db/nbrf.rb +191 -0
  110. data/lib/bio/db/newick.rb +658 -0
  111. data/lib/bio/db/nexus.rb +1854 -0
  112. data/lib/bio/db/pdb.rb +29 -0
  113. data/lib/bio/db/pdb/atom.rb +77 -0
  114. data/lib/bio/db/pdb/chain.rb +210 -0
  115. data/lib/bio/db/pdb/chemicalcomponent.rb +224 -0
  116. data/lib/bio/db/pdb/model.rb +148 -0
  117. data/lib/bio/db/pdb/pdb.rb +1911 -0
  118. data/lib/bio/db/pdb/residue.rb +176 -0
  119. data/lib/bio/db/pdb/utils.rb +399 -0
  120. data/lib/bio/db/prosite.rb +597 -0
  121. data/lib/bio/db/rebase.rb +456 -0
  122. data/lib/bio/db/soft.rb +404 -0
  123. data/lib/bio/db/transfac.rb +375 -0
  124. data/lib/bio/db/url.rb +42 -0
  125. data/lib/bio/feature.rb +139 -0
  126. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  127. data/lib/bio/io/biosql/bioentry.rb +29 -0
  128. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  129. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  130. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  131. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  132. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  133. data/lib/bio/io/biosql/biosequence.rb +11 -0
  134. data/lib/bio/io/biosql/comment.rb +7 -0
  135. data/lib/bio/io/biosql/config/database.yml +20 -0
  136. data/lib/bio/io/biosql/dbxref.rb +13 -0
  137. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  138. data/lib/bio/io/biosql/location.rb +32 -0
  139. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  140. data/lib/bio/io/biosql/ontology.rb +10 -0
  141. data/lib/bio/io/biosql/reference.rb +9 -0
  142. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  143. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  144. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  145. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  146. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  147. data/lib/bio/io/biosql/taxon.rb +12 -0
  148. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  149. data/lib/bio/io/biosql/term.rb +27 -0
  150. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  151. data/lib/bio/io/biosql/term_path.rb +12 -0
  152. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  153. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  154. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  155. data/lib/bio/io/das.rb +461 -0
  156. data/lib/bio/io/dbget.rb +194 -0
  157. data/lib/bio/io/ddbjxml.rb +638 -0
  158. data/lib/bio/io/ebisoap.rb +158 -0
  159. data/lib/bio/io/ensembl.rb +229 -0
  160. data/lib/bio/io/fastacmd.rb +163 -0
  161. data/lib/bio/io/fetch.rb +195 -0
  162. data/lib/bio/io/flatfile.rb +482 -0
  163. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  164. data/lib/bio/io/flatfile/bdb.rb +253 -0
  165. data/lib/bio/io/flatfile/buffer.rb +237 -0
  166. data/lib/bio/io/flatfile/index.rb +1381 -0
  167. data/lib/bio/io/flatfile/indexer.rb +805 -0
  168. data/lib/bio/io/flatfile/splitter.rb +297 -0
  169. data/lib/bio/io/higet.rb +73 -0
  170. data/lib/bio/io/hinv.rb +442 -0
  171. data/lib/bio/io/keggapi.rb +805 -0
  172. data/lib/bio/io/ncbirest.rb +733 -0
  173. data/lib/bio/io/ncbisoap.rb +155 -0
  174. data/lib/bio/io/pubmed.rb +307 -0
  175. data/lib/bio/io/registry.rb +292 -0
  176. data/lib/bio/io/soapwsdl.rb +119 -0
  177. data/lib/bio/io/sql.rb +186 -0
  178. data/lib/bio/location.rb +867 -0
  179. data/lib/bio/map.rb +410 -0
  180. data/lib/bio/pathway.rb +960 -0
  181. data/lib/bio/reference.rb +602 -0
  182. data/lib/bio/sequence.rb +456 -0
  183. data/lib/bio/sequence/aa.rb +152 -0
  184. data/lib/bio/sequence/adapter.rb +108 -0
  185. data/lib/bio/sequence/common.rb +310 -0
  186. data/lib/bio/sequence/compat.rb +123 -0
  187. data/lib/bio/sequence/dblink.rb +54 -0
  188. data/lib/bio/sequence/format.rb +358 -0
  189. data/lib/bio/sequence/format_raw.rb +23 -0
  190. data/lib/bio/sequence/generic.rb +24 -0
  191. data/lib/bio/sequence/na.rb +491 -0
  192. data/lib/bio/shell.rb +44 -0
  193. data/lib/bio/shell/core.rb +578 -0
  194. data/lib/bio/shell/demo.rb +146 -0
  195. data/lib/bio/shell/interface.rb +218 -0
  196. data/lib/bio/shell/irb.rb +95 -0
  197. data/lib/bio/shell/object.rb +71 -0
  198. data/lib/bio/shell/plugin/blast.rb +42 -0
  199. data/lib/bio/shell/plugin/codon.rb +218 -0
  200. data/lib/bio/shell/plugin/das.rb +58 -0
  201. data/lib/bio/shell/plugin/emboss.rb +23 -0
  202. data/lib/bio/shell/plugin/entry.rb +105 -0
  203. data/lib/bio/shell/plugin/flatfile.rb +101 -0
  204. data/lib/bio/shell/plugin/keggapi.rb +181 -0
  205. data/lib/bio/shell/plugin/midi.rb +430 -0
  206. data/lib/bio/shell/plugin/obda.rb +45 -0
  207. data/lib/bio/shell/plugin/psort.rb +56 -0
  208. data/lib/bio/shell/plugin/seq.rb +247 -0
  209. data/lib/bio/shell/plugin/soap.rb +87 -0
  210. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/bioruby_generator.rb +29 -0
  211. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_classes.rhtml +4 -0
  212. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_log.rhtml +27 -0
  213. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_methods.rhtml +11 -0
  214. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_modules.rhtml +4 -0
  215. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_variables.rhtml +7 -0
  216. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-bg.gif +0 -0
  217. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-gem.png +0 -0
  218. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-link.gif +0 -0
  219. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css +368 -0
  220. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.rhtml +47 -0
  221. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_controller.rb +144 -0
  222. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_helper.rb +47 -0
  223. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/commands.rhtml +8 -0
  224. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/history.rhtml +10 -0
  225. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/index.rhtml +26 -0
  226. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/spinner.gif +0 -0
  227. data/lib/bio/shell/script.rb +25 -0
  228. data/lib/bio/shell/setup.rb +109 -0
  229. data/lib/bio/shell/web.rb +102 -0
  230. data/lib/bio/tree.rb +852 -0
  231. data/lib/bio/util/color_scheme.rb +191 -0
  232. data/lib/bio/util/color_scheme/buried.rb +59 -0
  233. data/lib/bio/util/color_scheme/helix.rb +59 -0
  234. data/lib/bio/util/color_scheme/hydropathy.rb +64 -0
  235. data/lib/bio/util/color_scheme/nucleotide.rb +31 -0
  236. data/lib/bio/util/color_scheme/strand.rb +59 -0
  237. data/lib/bio/util/color_scheme/taylor.rb +50 -0
  238. data/lib/bio/util/color_scheme/turn.rb +59 -0
  239. data/lib/bio/util/color_scheme/zappo.rb +50 -0
  240. data/lib/bio/util/contingency_table.rb +370 -0
  241. data/lib/bio/util/restriction_enzyme.rb +228 -0
  242. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  243. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  244. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  245. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  246. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  247. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  248. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  249. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  250. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  251. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  252. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  253. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  254. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  255. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  256. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  257. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  258. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  259. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  260. data/lib/bio/util/restriction_enzyme/single_strand.rb +200 -0
  261. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  262. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  263. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  264. data/lib/bio/util/sirna.rb +288 -0
  265. data/test/data/HMMER/hmmpfam.out +64 -0
  266. data/test/data/HMMER/hmmsearch.out +88 -0
  267. data/test/data/SOSUI/sample.report +11 -0
  268. data/test/data/TMHMM/sample.report +21 -0
  269. data/test/data/aaindex/DAYM780301 +30 -0
  270. data/test/data/aaindex/PRAM900102 +20 -0
  271. data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
  272. data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
  273. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  274. data/test/data/blast/b0002.faa +15 -0
  275. data/test/data/blast/b0002.faa.m0 +128 -0
  276. data/test/data/blast/b0002.faa.m7 +65 -0
  277. data/test/data/blast/b0002.faa.m8 +1 -0
  278. data/test/data/blast/blastp-multi.m7 +188 -0
  279. data/test/data/command/echoarg2.bat +1 -0
  280. data/test/data/embl/AB090716.embl +65 -0
  281. data/test/data/embl/AB090716.embl.rel89 +63 -0
  282. data/test/data/fasta/example1.txt +75 -0
  283. data/test/data/fasta/example2.txt +21 -0
  284. data/test/data/genscan/sample.report +63 -0
  285. data/test/data/iprscan/merged.raw +32 -0
  286. data/test/data/iprscan/merged.txt +74 -0
  287. data/test/data/paml/codeml/control_file.txt +30 -0
  288. data/test/data/paml/codeml/output.txt +78 -0
  289. data/test/data/paml/codeml/rates +217 -0
  290. data/test/data/prosite/prosite.dat +2233 -0
  291. data/test/data/refseq/nm_126355.entret +64 -0
  292. data/test/data/rpsblast/misc.rpsblast +193 -0
  293. data/test/data/soft/GDS100_partial.soft +92 -0
  294. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  295. data/test/data/uniprot/p53_human.uniprot +1456 -0
  296. data/test/functional/bio/appl/test_pts1.rb +115 -0
  297. data/test/functional/bio/io/test_ensembl.rb +229 -0
  298. data/test/functional/bio/io/test_soapwsdl.rb +52 -0
  299. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  300. data/test/functional/bio/test_command.rb +301 -0
  301. data/test/runner.rb +14 -0
  302. data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
  303. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  304. data/test/unit/bio/appl/blast/test_report.rb +1135 -0
  305. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  306. data/test/unit/bio/appl/genscan/test_report.rb +182 -0
  307. data/test/unit/bio/appl/hmmer/test_report.rb +342 -0
  308. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  309. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  310. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  311. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  312. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  313. data/test/unit/bio/appl/sosui/test_report.rb +81 -0
  314. data/test/unit/bio/appl/targetp/test_report.rb +146 -0
  315. data/test/unit/bio/appl/test_blast.rb +277 -0
  316. data/test/unit/bio/appl/test_fasta.rb +130 -0
  317. data/test/unit/bio/appl/test_psort.rb +57 -0
  318. data/test/unit/bio/appl/test_pts1.rb +77 -0
  319. data/test/unit/bio/appl/tmhmm/test_report.rb +126 -0
  320. data/test/unit/bio/data/test_aa.rb +90 -0
  321. data/test/unit/bio/data/test_codontable.rb +107 -0
  322. data/test/unit/bio/data/test_na.rb +80 -0
  323. data/test/unit/bio/db/embl/test_common.rb +117 -0
  324. data/test/unit/bio/db/embl/test_embl.rb +214 -0
  325. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  326. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  327. data/test/unit/bio/db/embl/test_sptr.rb +1812 -0
  328. data/test/unit/bio/db/embl/test_uniprot.rb +31 -0
  329. data/test/unit/bio/db/kegg/test_genes.rb +45 -0
  330. data/test/unit/bio/db/pdb/test_pdb.rb +152 -0
  331. data/test/unit/bio/db/test_aaindex.rb +197 -0
  332. data/test/unit/bio/db/test_fasta.rb +250 -0
  333. data/test/unit/bio/db/test_gff.rb +1190 -0
  334. data/test/unit/bio/db/test_lasergene.rb +95 -0
  335. data/test/unit/bio/db/test_medline.rb +127 -0
  336. data/test/unit/bio/db/test_newick.rb +293 -0
  337. data/test/unit/bio/db/test_nexus.rb +364 -0
  338. data/test/unit/bio/db/test_prosite.rb +1437 -0
  339. data/test/unit/bio/db/test_rebase.rb +101 -0
  340. data/test/unit/bio/db/test_soft.rb +138 -0
  341. data/test/unit/bio/db/test_url.rb +36 -0
  342. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  343. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  344. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  345. data/test/unit/bio/io/test_ddbjxml.rb +80 -0
  346. data/test/unit/bio/io/test_ensembl.rb +109 -0
  347. data/test/unit/bio/io/test_fastacmd.rb +42 -0
  348. data/test/unit/bio/io/test_flatfile.rb +505 -0
  349. data/test/unit/bio/io/test_soapwsdl.rb +32 -0
  350. data/test/unit/bio/sequence/test_aa.rb +115 -0
  351. data/test/unit/bio/sequence/test_common.rb +373 -0
  352. data/test/unit/bio/sequence/test_compat.rb +69 -0
  353. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  354. data/test/unit/bio/sequence/test_na.rb +330 -0
  355. data/test/unit/bio/shell/plugin/test_seq.rb +185 -0
  356. data/test/unit/bio/test_alignment.rb +1025 -0
  357. data/test/unit/bio/test_command.rb +349 -0
  358. data/test/unit/bio/test_db.rb +96 -0
  359. data/test/unit/bio/test_feature.rb +144 -0
  360. data/test/unit/bio/test_location.rb +599 -0
  361. data/test/unit/bio/test_map.rb +230 -0
  362. data/test/unit/bio/test_pathway.rb +499 -0
  363. data/test/unit/bio/test_reference.rb +252 -0
  364. data/test/unit/bio/test_sequence.rb +329 -0
  365. data/test/unit/bio/test_shell.rb +18 -0
  366. data/test/unit/bio/test_tree.rb +593 -0
  367. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  368. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  369. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  370. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +101 -0
  371. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  372. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  373. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  374. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  375. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  376. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  377. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  378. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  379. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  380. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  381. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  382. data/test/unit/bio/util/test_color_scheme.rb +33 -0
  383. data/test/unit/bio/util/test_contingency_table.rb +94 -0
  384. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  385. data/test/unit/bio/util/test_sirna.rb +245 -0
  386. metadata +543 -0
@@ -0,0 +1,23 @@
1
+ #
2
+ # bio/util/restriction_enzyme/single_strand_complement.rb - Single strand restriction enzyme sequence in complement orientation
3
+ #
4
+ # Author:: Trevor Wennblom <mailto:trevor@corevx.com>
5
+ # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id: single_strand_complement.rb,v 1.5 2007/07/16 19:28:48 k Exp $
9
+ #
10
+
11
+ require 'bio/util/restriction_enzyme'
12
+
13
+ module Bio
14
+ class RestrictionEnzyme
15
+
16
+ # A single strand of restriction enzyme sequence pattern with a 3' to 5' orientation.
17
+ #
18
+ class SingleStrandComplement < SingleStrand
19
+ # Orientation of the strand, 3' to 5'
20
+ def orientation; [3, 5]; end
21
+ end # SingleStrandComplement
22
+ end # RestrictionEnzyme
23
+ end # Bio
@@ -0,0 +1,111 @@
1
+ #
2
+ # bio/util/restriction_enzyme/string_formatting.rb - Useful functions for string manipulation
3
+ #
4
+ # Author:: Trevor Wennblom <mailto:trevor@corevx.com>
5
+ # Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id: string_formatting.rb,v 1.6 2007/07/16 19:28:48 k Exp $
9
+ #
10
+
11
+ require 'bio/util/restriction_enzyme'
12
+
13
+ module Bio
14
+ class RestrictionEnzyme
15
+
16
+ module StringFormatting
17
+ include CutSymbol
18
+ extend CutSymbol
19
+
20
+ # Return the sequence with spacing for alignment. Does not add whitespace
21
+ # around cut symbols.
22
+ #
23
+ # Example:
24
+ # pattern = 'n^ng^arraxt^n'
25
+ # add_spacing( pattern ) # => "n^n g^a r r a x t^n"
26
+ #
27
+ # ---
28
+ # *Arguments*
29
+ # * +seq+: sequence with cut symbols
30
+ # * +cs+: (_optional_) Cut symbol along the string. The reason this is
31
+ # definable outside of CutSymbol is that this is a utility function used
32
+ # to form vertical and horizontal cuts such as:
33
+ #
34
+ # a|t g c
35
+ # +---+
36
+ # t a c|g
37
+ # *Returns*:: +String+ sequence with single character distance between bases
38
+ def add_spacing( seq, cs = cut_symbol )
39
+ str = ''
40
+ flag = false
41
+ seq.each_byte do |c|
42
+ c = c.chr
43
+ if c == cs
44
+ str += c
45
+ flag = false
46
+ elsif flag
47
+ str += ' ' + c
48
+ else
49
+ str += c
50
+ flag = true
51
+ end
52
+ end
53
+ str
54
+ end
55
+
56
+ # Remove extraneous nucleic acid wildcards ('n' padding) from the
57
+ # left and right sides
58
+ #
59
+ # ---
60
+ # *Arguments*
61
+ # * +s+: sequence with extraneous 'n' padding
62
+ # *Returns*:: +String+ sequence without 'n' padding on the sides
63
+ def strip_padding( s )
64
+ if s[0].chr == 'n'
65
+ s =~ %r{(n+)(.+)}
66
+ s = $2
67
+ end
68
+ if s[-1].chr == 'n'
69
+ s =~ %r{(.+?)(n+)$}
70
+ s = $1
71
+ end
72
+ s
73
+ end
74
+
75
+ # Remove extraneous nucleic acid wildcards ('n' padding) from the
76
+ # left and right sides and remove cut symbols
77
+ #
78
+ # ---
79
+ # *Arguments*
80
+ # * +s+: sequence with extraneous 'n' padding and cut symbols
81
+ # *Returns*:: +String+ sequence without 'n' padding on the sides or cut symbols
82
+ def strip_cuts_and_padding( s )
83
+ strip_padding( s.tr(cut_symbol, '') )
84
+ end
85
+
86
+ # Return the 'n' padding on the left side of the strand
87
+ #
88
+ # ---
89
+ # *Arguments*
90
+ # * +s+: sequence with extraneous 'n' padding on the left side of the strand
91
+ # *Returns*:: +String+ the 'n' padding from the left side
92
+ def left_padding( s )
93
+ s =~ %r{^n+}
94
+ ret = $&
95
+ ret ? ret : '' # Don't pass nil values
96
+ end
97
+
98
+ # Return the 'n' padding on the right side of the strand
99
+ #
100
+ # ---
101
+ # *Arguments*
102
+ # * +s+: sequence with extraneous 'n' padding on the right side of the strand
103
+ # *Returns*:: +String+ the 'n' padding from the right side
104
+ def right_padding( s )
105
+ s =~ %r{n+$}
106
+ ret = $&
107
+ ret ? ret : '' # Don't pass nil values
108
+ end
109
+ end # StringFormatting
110
+ end # RestrictionEnzyme
111
+ end # Bio
@@ -0,0 +1,288 @@
1
+ #
2
+ # = bio/util/sirna.rb - Class for designing small inhibitory RNAs
3
+ #
4
+ # Copyright:: Copyright (C) 2004, 2005
5
+ # Itoshi NIKAIDO <dritoshi@gmail.com>
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id: sirna.rb,v 1.11 2007/04/05 23:35:41 trevor Exp $
9
+ #
10
+ # == Bio::SiRNA - Designing siRNA.
11
+ #
12
+ # This class implements the selection rules described by Kumiko Ui-Tei
13
+ # et al. (2004) and Reynolds et al. (2004).
14
+ #
15
+ # == Example
16
+ #
17
+ # seq = Bio::Sequence::NA.new(ARGF.read)
18
+ #
19
+ # sirna = Bio::SiRNA.new(seq)
20
+ # pairs = sirna.design
21
+ #
22
+ # pairs.each do |pair|
23
+ # puts pair.report
24
+ # shrna = Bio::SiRNA::ShRNA.new(pair)
25
+ # shrna.design
26
+ # puts shrna.report
27
+ #
28
+ # puts shrna.top_strand.dna
29
+ # puts shrna.bottom_strand.dna
30
+ # end
31
+ #
32
+ # == References
33
+ #
34
+ # * Kumiko Ui-Tei et al. Guidelines for the selection of highly effective
35
+ # siRNA sequences for mammalian and chick RNA interference.
36
+ # Nucl. Acids. Res. 2004 32: 936-948.
37
+ #
38
+ # * Angela Reynolds et al. Rational siRNA design for RNA interference.
39
+ # Nature Biotech. 2004 22: 326-330.
40
+ #
41
+
42
+ require 'bio/sequence'
43
+
44
+ module Bio
45
+
46
+ # = Bio::SiRNA
47
+ # Designing siRNA.
48
+ #
49
+ # This class implements the selection rules described by Kumiko Ui-Tei
50
+ # et al. (2004) and Reynolds et al. (2004).
51
+ class SiRNA
52
+
53
+ # A parameter of size of antisense.
54
+ attr_accessor :antisense_size
55
+
56
+ # A parameter of maximal %GC.
57
+ attr_accessor :max_gc_percent
58
+
59
+ # A parameter of minimum %GC.
60
+ attr_accessor :min_gc_percent
61
+
62
+ # Input is a Bio::Sequence::NA object (the target sequence).
63
+ # Output is a list of Bio::SiRNA::Pair object.
64
+ def initialize(seq, antisense_size = 21, max_gc_percent = 60.0, min_gc_percent = 40.0)
65
+ @seq = seq.rna!
66
+ @pairs = Array.new
67
+ @antisense_size = antisense_size
68
+ @max_gc_percent = max_gc_percent
69
+ @min_gc_percent = min_gc_percent
70
+ end
71
+
72
+ # Ui-Tei's rule.
73
+ def uitei?(target)
74
+ return false unless /^.{2}[GC]/i =~ target
75
+ return false unless /[AU].{2}$/i =~ target
76
+ return false if /[GC]{9}/i =~ target
77
+
78
+ one_third = target.size * 1 / 3
79
+ start_pos = @target_size - one_third - 1
80
+ remain_seq = target.subseq(start_pos, @target_size - 2)
81
+ au_number = remain_seq.scan(/[AU]/i).size
82
+ return false if au_number < 5
83
+
84
+ return true
85
+ end
86
+
87
+ # Reynolds' rule.
88
+ def reynolds?(target)
89
+ return false if /[GC]{9}/i =~ target
90
+ return false unless /^.{4}A.{6}U.{2}[AUC].{5}[AU].{2}$/i =~ target
91
+ return true
92
+ end
93
+
94
+ # same as design('uitei').
95
+ def uitei
96
+ design('uitei')
97
+ end
98
+
99
+ # same as design('reynolds').
100
+ def reynolds
101
+ design('reynolds')
102
+ end
103
+
104
+ # rule can be one of 'uitei' (default) and 'reynolds'.
105
+ def design(rule = 'uitei')
106
+ @target_size = @antisense_size + 2
107
+
108
+ target_start = 0
109
+ @seq.window_search(@target_size) do |target|
110
+ antisense = target.subseq(1, @target_size - 2).complement.rna
111
+ sense = target.subseq(3, @target_size)
112
+
113
+ target_start += 1
114
+ target_stop = target_start + @target_size
115
+
116
+ antisense_gc_percent = antisense.gc_percent
117
+ next if antisense_gc_percent > @max_gc_percent
118
+ next if antisense_gc_percent < @min_gc_percent
119
+
120
+ case rule
121
+ when 'uitei'
122
+ next unless uitei?(target)
123
+ when 'reynolds'
124
+ next unless reynolds?(target)
125
+ else
126
+ raise NotImplementedError
127
+ end
128
+
129
+ pair = Bio::SiRNA::Pair.new(target, sense, antisense, target_start, target_stop, rule, antisense_gc_percent)
130
+ @pairs.push(pair)
131
+ end
132
+ return @pairs
133
+ end
134
+
135
+ # = Bio::SiRNA::Pair
136
+ class Pair
137
+
138
+ attr_accessor :target
139
+
140
+ attr_accessor :sense
141
+
142
+ attr_accessor :antisense
143
+
144
+ attr_accessor :start
145
+
146
+ attr_accessor :stop
147
+
148
+ attr_accessor :rule
149
+
150
+ attr_accessor :gc_percent
151
+
152
+ def initialize(target, sense, antisense, start, stop, rule, gc_percent)
153
+ @target = target
154
+ @sense = sense
155
+ @antisense = antisense
156
+ @start = start
157
+ @stop = stop
158
+ @rule = rule
159
+ @gc_percent = gc_percent
160
+ end
161
+
162
+ # human readable report
163
+ def report
164
+ report = "### siRNA\n"
165
+ report << 'Start: ' + @start.to_s + "\n"
166
+ report << 'Stop: ' + @stop.to_s + "\n"
167
+ report << 'Rule: ' + @rule.to_s + "\n"
168
+ report << 'GC %: ' + @gc_percent.to_s + "\n"
169
+ report << 'Target: ' + @target.upcase + "\n"
170
+ report << 'Sense: ' + ' ' + @sense.upcase + "\n"
171
+ report << 'Antisense: ' + @antisense.reverse.upcase + "\n"
172
+ end
173
+
174
+ # computer parsable report
175
+ #def to_s
176
+ # [ @antisense, @start, @stop ].join("\t")
177
+ #end
178
+
179
+ end # class Pair
180
+
181
+
182
+ # = Bio::SiRNA::ShRNA
183
+ # Designing shRNA.
184
+ class ShRNA
185
+
186
+ # Bio::Sequence::NA
187
+ attr_accessor :top_strand
188
+
189
+ # Bio::Sequence::NA
190
+ attr_accessor :bottom_strand
191
+
192
+ # Input is a Bio::SiRNA::Pair object (the target sequence).
193
+ def initialize(pair)
194
+ @pair = pair
195
+ end
196
+
197
+ # only the 'BLOCK-iT' rule is implemented for now.
198
+ def design(method = 'BLOCK-iT')
199
+ case method
200
+ when 'BLOCK-iT'
201
+ block_it
202
+ else
203
+ raise NotImplementedError
204
+ end
205
+ end
206
+
207
+
208
+ # same as design('BLOCK-iT').
209
+ # method can be one of 'piGENE' (default) and 'BLOCK-iT'.
210
+ def block_it(method = 'piGENE')
211
+ top = Bio::Sequence::NA.new('CACC') # top_strand_shrna_overhang
212
+ bot = Bio::Sequence::NA.new('AAAA') # bottom_strand_shrna_overhang
213
+ fwd = @pair.sense
214
+ rev = @pair.sense.complement
215
+
216
+ case method
217
+ when 'BLOCK-iT'
218
+ # From BLOCK-iT's manual
219
+ loop_fwd = Bio::Sequence::NA.new('CGAA')
220
+ loop_rev = loop_fwd.complement
221
+ when 'piGENE'
222
+ # From piGENE document
223
+ loop_fwd = Bio::Sequence::NA.new('GTGTGCTGTCC')
224
+ loop_rev = loop_fwd.complement
225
+ else
226
+ raise NotImplementedError
227
+ end
228
+
229
+ if /^G/i =~ fwd
230
+ @top_strand = top + fwd + loop_fwd + rev
231
+ @bottom_strand = bot + fwd + loop_rev + rev
232
+ else
233
+ @top_strand = top + 'G' + fwd + loop_fwd + rev
234
+ @bottom_strand = bot + fwd + loop_rev + rev + 'C'
235
+ end
236
+ end
237
+
238
+ # human readable report
239
+ def report
240
+ report = "### shRNA\n"
241
+ report << "Top strand shRNA (#{@top_strand.length} nt):\n"
242
+ report << " 5'-#{@top_strand.upcase}-3'\n"
243
+ report << "Bottom strand shRNA (#{@bottom_strand.length} nt):\n"
244
+ report << " 3'-#{@bottom_strand.reverse.upcase}-5'\n"
245
+ end
246
+
247
+ end # class ShRNA
248
+
249
+ end # class SiRNA
250
+
251
+ end # module Bio
252
+
253
+
254
+ if __FILE__ == $0
255
+
256
+ seq = Bio::Sequence::NA.new(ARGF.read)
257
+
258
+ sirna = Bio::SiRNA.new(seq)
259
+ pairs = sirna.design # or .design('uitei') or .uitei or .reynolds
260
+
261
+ pairs.each do |pair|
262
+ puts pair.report
263
+
264
+ shrna = Bio::SiRNA::ShRNA.new(pair)
265
+ shrna.design # or .design('BLOCK-iT') or .block_it
266
+ puts shrna.report
267
+
268
+ puts "# as DNA"
269
+ puts shrna.top_strand.dna
270
+ puts shrna.bottom_strand.dna
271
+ end
272
+
273
+ end
274
+
275
+ =begin
276
+
277
+ = ChangeLog
278
+
279
+ 2005/03/21 Itoshi NIKAIDO <itoshi.nikaido@nifty.com>
280
+ Bio::SiRNA#ShRNA_designer method was changed design method.
281
+
282
+ 2004/06/25
283
+ Bio::ShRNA class was added.
284
+
285
+ 2004/06/17 Itoshi NIKAIDO <itoshi.nikaido@nifty.com>
286
+ We can use shRNA loop sequence from piGene document.
287
+
288
+ =end
@@ -0,0 +1,64 @@
1
+ hmmpfam - search one or more sequences against HMM database
2
+ HMMER 2.3.2 (Oct 2003)
3
+ Copyright (C) 1992-2003 HHMI/Washington University School of Medicine
4
+ Freely distributed under the GNU General Public License (GPL)
5
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6
+ HMM file: /Users/nakao/Sites/iprscan/data/Pfam
7
+ Sequence file: /Users/nakao/Sites/iprscan/tmp/20050517/iprscan-20050517-16244071/chunk_1/iprscan-20050517-16244071.nocrc
8
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9
+
10
+ Query sequence: 104K_THEPA
11
+ Accession: [none]
12
+ Description: [none]
13
+
14
+ Scores for sequence family classification (score includes all domains):
15
+ Model Description Score E-value N
16
+ -------- ----------- ----- ------- ---
17
+ PF04385.4 Domain of unknown function, DUF529 259.3 6.6e-75 4
18
+
19
+ Parsed for domains:
20
+ Model Domain seq-f seq-t hmm-f hmm-t score E-value
21
+ -------- ------- ----- ----- ----- ----- ----- -------
22
+ PF04385.4 1/4 36 111 .. 1 80 [] 65.0 2e-16
23
+ PF04385.4 2/4 149 224 .. 1 80 [] 64.7 2.5e-16
24
+ PF04385.4 3/4 265 343 .. 1 80 [] 64.6 2.7e-16
25
+ PF04385.4 4/4 379 456 .. 1 80 [] 65.0 2e-16
26
+
27
+ Alignments of top-scoring domains:
28
+ PF04385.4: domain 1 of 4, from 36 to 111: score 65.0, E = 2e-16
29
+ *->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW
30
+ t+D+n++++ f +v+++g+++ + ++ ++v+++++++Gn+v+W
31
+ 104K_THEPA 36 TFDINSNQTG-PAFLTAVEMAGVKYLQVQHGSNVNIHRLVEGNVVIW 81
32
+
33
+ eseddpefglivtlsfyldsnkfLvlllintak<-*
34
+ e++ + +l++ ++++++++++++++++ +++
35
+ 104K_THEPA 82 ENA---STPLYTGAIVTNNDGPYMAYVEVLGDP 111
36
+
37
+ PF04385.4: domain 2 of 4, from 149 to 224: score 64.7, E = 2.5e-16
38
+ *->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW
39
+ +L++ ++ +++k+ + ++a+ng ++vt++p++G+ +++++++n++++
40
+ 104K_THEPA 149 SLNMAFQLENNKYEVETHAKNGANMVTFIPRNGHICKMVYHKNVRIY 195
41
+
42
+ eseddpefglivtlsfyldsnkfLvlllintak<-*
43
+ ++ ++++vt++++++ +++L+l+++ +
44
+ 104K_THEPA 196 KA----TGNDTVTSVVGFFRGLRLLLINVFSID 224
45
+
46
+ PF04385.4: domain 3 of 4, from 265 to 343: score 64.6, E = 2.7e-16
47
+ *->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW
48
+ +Dl+ +++ +++f+ + a+++ ++++++p++G+++tk++dG++v++
49
+ 104K_THEPA 265 PVDLDIKDIDYTMFHLADATYHEPCFKIIPNTGFCITKLFDGDQVLY 311
50
+
51
+ eseddpefglivtlsfyldsnkfLvlllintak<-*
52
+ es+ + + ++i +++y+++n ++++l++n+++
53
+ 104K_THEPA 312 ESFNP-LIHCINEVHIYDRNNGSIICLHLNYSP 343
54
+
55
+ PF04385.4: domain 4 of 4, from 379 to 456: score 65.0, E = 2e-16
56
+ *->tLDlndtgstlkqfdykvalngdivvty.tpkpGvkftkitdGnevv
57
+ +LD+n ++++k+ +++ +n d +t+ tp+p+ + +++dG+ev+
58
+ 104K_THEPA 379 ELDVN--FISDKDLYVAALTNADLNYTMvTPRPHRDVIRVSDGSEVL 423
59
+
60
+ WeseddpefglivtlsfyldsnkfLvlllintak<-*
61
+ W++e+ ++ l++++++++d++ +Lv+l+i++
62
+ 104K_THEPA 424 WYYEGL-DNFLVCAWIYVSDGVASLVHLRIKDRI 456
63
+
64
+ //