wwood-bioruby 1.2.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (386) hide show
  1. data/README.rdoc +205 -0
  2. data/README_DEV.rdoc +285 -0
  3. data/VERSION.yml +4 -0
  4. data/bin/bioruby +44 -0
  5. data/bin/br_biofetch.rb +47 -0
  6. data/bin/br_bioflat.rb +293 -0
  7. data/bin/br_biogetseq.rb +45 -0
  8. data/bin/br_pmfetch.rb +421 -0
  9. data/lib/bio.rb +306 -0
  10. data/lib/bio/alignment.rb +2518 -0
  11. data/lib/bio/appl/bl2seq/report.rb +334 -0
  12. data/lib/bio/appl/blast.rb +505 -0
  13. data/lib/bio/appl/blast/ddbj.rb +142 -0
  14. data/lib/bio/appl/blast/format0.rb +1438 -0
  15. data/lib/bio/appl/blast/format8.rb +83 -0
  16. data/lib/bio/appl/blast/genomenet.rb +263 -0
  17. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  18. data/lib/bio/appl/blast/remote.rb +105 -0
  19. data/lib/bio/appl/blast/report.rb +767 -0
  20. data/lib/bio/appl/blast/rexml.rb +144 -0
  21. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  22. data/lib/bio/appl/blast/wublast.rb +635 -0
  23. data/lib/bio/appl/blast/xmlparser.rb +236 -0
  24. data/lib/bio/appl/blat/report.rb +530 -0
  25. data/lib/bio/appl/clustalw.rb +219 -0
  26. data/lib/bio/appl/clustalw/report.rb +152 -0
  27. data/lib/bio/appl/emboss.rb +203 -0
  28. data/lib/bio/appl/fasta.rb +235 -0
  29. data/lib/bio/appl/fasta/format10.rb +325 -0
  30. data/lib/bio/appl/gcg/msf.rb +212 -0
  31. data/lib/bio/appl/gcg/seq.rb +195 -0
  32. data/lib/bio/appl/genscan/report.rb +552 -0
  33. data/lib/bio/appl/hmmer.rb +126 -0
  34. data/lib/bio/appl/hmmer/report.rb +683 -0
  35. data/lib/bio/appl/iprscan/report.rb +374 -0
  36. data/lib/bio/appl/mafft.rb +259 -0
  37. data/lib/bio/appl/mafft/report.rb +226 -0
  38. data/lib/bio/appl/muscle.rb +52 -0
  39. data/lib/bio/appl/paml/baseml.rb +95 -0
  40. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  41. data/lib/bio/appl/paml/codeml.rb +242 -0
  42. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  43. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  44. data/lib/bio/appl/paml/common.rb +348 -0
  45. data/lib/bio/appl/paml/common_report.rb +38 -0
  46. data/lib/bio/appl/paml/yn00.rb +103 -0
  47. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  48. data/lib/bio/appl/phylip/alignment.rb +133 -0
  49. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  50. data/lib/bio/appl/probcons.rb +41 -0
  51. data/lib/bio/appl/psort.rb +548 -0
  52. data/lib/bio/appl/psort/report.rb +542 -0
  53. data/lib/bio/appl/pts1.rb +263 -0
  54. data/lib/bio/appl/sim4.rb +124 -0
  55. data/lib/bio/appl/sim4/report.rb +485 -0
  56. data/lib/bio/appl/sosui/report.rb +151 -0
  57. data/lib/bio/appl/spidey/report.rb +593 -0
  58. data/lib/bio/appl/targetp/report.rb +267 -0
  59. data/lib/bio/appl/tcoffee.rb +55 -0
  60. data/lib/bio/appl/tmhmm/report.rb +231 -0
  61. data/lib/bio/command.rb +593 -0
  62. data/lib/bio/compat/features.rb +157 -0
  63. data/lib/bio/compat/references.rb +128 -0
  64. data/lib/bio/data/aa.rb +353 -0
  65. data/lib/bio/data/codontable.rb +722 -0
  66. data/lib/bio/data/na.rb +223 -0
  67. data/lib/bio/db.rb +329 -0
  68. data/lib/bio/db/aaindex.rb +357 -0
  69. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  70. data/lib/bio/db/biosql/sequence.rb +508 -0
  71. data/lib/bio/db/embl/common.rb +352 -0
  72. data/lib/bio/db/embl/embl.rb +500 -0
  73. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  74. data/lib/bio/db/embl/format_embl.rb +190 -0
  75. data/lib/bio/db/embl/sptr.rb +1283 -0
  76. data/lib/bio/db/embl/swissprot.rb +42 -0
  77. data/lib/bio/db/embl/trembl.rb +41 -0
  78. data/lib/bio/db/embl/uniprot.rb +42 -0
  79. data/lib/bio/db/fantom.rb +597 -0
  80. data/lib/bio/db/fasta.rb +410 -0
  81. data/lib/bio/db/fasta/defline.rb +532 -0
  82. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  83. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  84. data/lib/bio/db/genbank/common.rb +307 -0
  85. data/lib/bio/db/genbank/ddbj.rb +22 -0
  86. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  87. data/lib/bio/db/genbank/genbank.rb +250 -0
  88. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  89. data/lib/bio/db/genbank/genpept.rb +60 -0
  90. data/lib/bio/db/genbank/refseq.rb +18 -0
  91. data/lib/bio/db/gff.rb +1846 -0
  92. data/lib/bio/db/go.rb +481 -0
  93. data/lib/bio/db/kegg/brite.rb +41 -0
  94. data/lib/bio/db/kegg/compound.rb +131 -0
  95. data/lib/bio/db/kegg/drug.rb +98 -0
  96. data/lib/bio/db/kegg/enzyme.rb +148 -0
  97. data/lib/bio/db/kegg/expression.rb +155 -0
  98. data/lib/bio/db/kegg/genes.rb +263 -0
  99. data/lib/bio/db/kegg/genome.rb +241 -0
  100. data/lib/bio/db/kegg/glycan.rb +166 -0
  101. data/lib/bio/db/kegg/keggtab.rb +357 -0
  102. data/lib/bio/db/kegg/kgml.rb +256 -0
  103. data/lib/bio/db/kegg/orthology.rb +136 -0
  104. data/lib/bio/db/kegg/reaction.rb +82 -0
  105. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  106. data/lib/bio/db/lasergene.rb +209 -0
  107. data/lib/bio/db/litdb.rb +107 -0
  108. data/lib/bio/db/medline.rb +326 -0
  109. data/lib/bio/db/nbrf.rb +191 -0
  110. data/lib/bio/db/newick.rb +658 -0
  111. data/lib/bio/db/nexus.rb +1854 -0
  112. data/lib/bio/db/pdb.rb +29 -0
  113. data/lib/bio/db/pdb/atom.rb +77 -0
  114. data/lib/bio/db/pdb/chain.rb +210 -0
  115. data/lib/bio/db/pdb/chemicalcomponent.rb +224 -0
  116. data/lib/bio/db/pdb/model.rb +148 -0
  117. data/lib/bio/db/pdb/pdb.rb +1911 -0
  118. data/lib/bio/db/pdb/residue.rb +176 -0
  119. data/lib/bio/db/pdb/utils.rb +399 -0
  120. data/lib/bio/db/prosite.rb +597 -0
  121. data/lib/bio/db/rebase.rb +456 -0
  122. data/lib/bio/db/soft.rb +404 -0
  123. data/lib/bio/db/transfac.rb +375 -0
  124. data/lib/bio/db/url.rb +42 -0
  125. data/lib/bio/feature.rb +139 -0
  126. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  127. data/lib/bio/io/biosql/bioentry.rb +29 -0
  128. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  129. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  130. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  131. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  132. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  133. data/lib/bio/io/biosql/biosequence.rb +11 -0
  134. data/lib/bio/io/biosql/comment.rb +7 -0
  135. data/lib/bio/io/biosql/config/database.yml +20 -0
  136. data/lib/bio/io/biosql/dbxref.rb +13 -0
  137. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  138. data/lib/bio/io/biosql/location.rb +32 -0
  139. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  140. data/lib/bio/io/biosql/ontology.rb +10 -0
  141. data/lib/bio/io/biosql/reference.rb +9 -0
  142. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  143. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  144. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  145. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  146. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  147. data/lib/bio/io/biosql/taxon.rb +12 -0
  148. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  149. data/lib/bio/io/biosql/term.rb +27 -0
  150. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  151. data/lib/bio/io/biosql/term_path.rb +12 -0
  152. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  153. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  154. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  155. data/lib/bio/io/das.rb +461 -0
  156. data/lib/bio/io/dbget.rb +194 -0
  157. data/lib/bio/io/ddbjxml.rb +638 -0
  158. data/lib/bio/io/ebisoap.rb +158 -0
  159. data/lib/bio/io/ensembl.rb +229 -0
  160. data/lib/bio/io/fastacmd.rb +163 -0
  161. data/lib/bio/io/fetch.rb +195 -0
  162. data/lib/bio/io/flatfile.rb +482 -0
  163. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  164. data/lib/bio/io/flatfile/bdb.rb +253 -0
  165. data/lib/bio/io/flatfile/buffer.rb +237 -0
  166. data/lib/bio/io/flatfile/index.rb +1381 -0
  167. data/lib/bio/io/flatfile/indexer.rb +805 -0
  168. data/lib/bio/io/flatfile/splitter.rb +297 -0
  169. data/lib/bio/io/higet.rb +73 -0
  170. data/lib/bio/io/hinv.rb +442 -0
  171. data/lib/bio/io/keggapi.rb +805 -0
  172. data/lib/bio/io/ncbirest.rb +733 -0
  173. data/lib/bio/io/ncbisoap.rb +155 -0
  174. data/lib/bio/io/pubmed.rb +307 -0
  175. data/lib/bio/io/registry.rb +292 -0
  176. data/lib/bio/io/soapwsdl.rb +119 -0
  177. data/lib/bio/io/sql.rb +186 -0
  178. data/lib/bio/location.rb +867 -0
  179. data/lib/bio/map.rb +410 -0
  180. data/lib/bio/pathway.rb +960 -0
  181. data/lib/bio/reference.rb +602 -0
  182. data/lib/bio/sequence.rb +456 -0
  183. data/lib/bio/sequence/aa.rb +152 -0
  184. data/lib/bio/sequence/adapter.rb +108 -0
  185. data/lib/bio/sequence/common.rb +310 -0
  186. data/lib/bio/sequence/compat.rb +123 -0
  187. data/lib/bio/sequence/dblink.rb +54 -0
  188. data/lib/bio/sequence/format.rb +358 -0
  189. data/lib/bio/sequence/format_raw.rb +23 -0
  190. data/lib/bio/sequence/generic.rb +24 -0
  191. data/lib/bio/sequence/na.rb +491 -0
  192. data/lib/bio/shell.rb +44 -0
  193. data/lib/bio/shell/core.rb +578 -0
  194. data/lib/bio/shell/demo.rb +146 -0
  195. data/lib/bio/shell/interface.rb +218 -0
  196. data/lib/bio/shell/irb.rb +95 -0
  197. data/lib/bio/shell/object.rb +71 -0
  198. data/lib/bio/shell/plugin/blast.rb +42 -0
  199. data/lib/bio/shell/plugin/codon.rb +218 -0
  200. data/lib/bio/shell/plugin/das.rb +58 -0
  201. data/lib/bio/shell/plugin/emboss.rb +23 -0
  202. data/lib/bio/shell/plugin/entry.rb +105 -0
  203. data/lib/bio/shell/plugin/flatfile.rb +101 -0
  204. data/lib/bio/shell/plugin/keggapi.rb +181 -0
  205. data/lib/bio/shell/plugin/midi.rb +430 -0
  206. data/lib/bio/shell/plugin/obda.rb +45 -0
  207. data/lib/bio/shell/plugin/psort.rb +56 -0
  208. data/lib/bio/shell/plugin/seq.rb +247 -0
  209. data/lib/bio/shell/plugin/soap.rb +87 -0
  210. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/bioruby_generator.rb +29 -0
  211. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_classes.rhtml +4 -0
  212. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_log.rhtml +27 -0
  213. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_methods.rhtml +11 -0
  214. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_modules.rhtml +4 -0
  215. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_variables.rhtml +7 -0
  216. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-bg.gif +0 -0
  217. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-gem.png +0 -0
  218. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-link.gif +0 -0
  219. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css +368 -0
  220. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.rhtml +47 -0
  221. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_controller.rb +144 -0
  222. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_helper.rb +47 -0
  223. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/commands.rhtml +8 -0
  224. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/history.rhtml +10 -0
  225. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/index.rhtml +26 -0
  226. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/spinner.gif +0 -0
  227. data/lib/bio/shell/script.rb +25 -0
  228. data/lib/bio/shell/setup.rb +109 -0
  229. data/lib/bio/shell/web.rb +102 -0
  230. data/lib/bio/tree.rb +852 -0
  231. data/lib/bio/util/color_scheme.rb +191 -0
  232. data/lib/bio/util/color_scheme/buried.rb +59 -0
  233. data/lib/bio/util/color_scheme/helix.rb +59 -0
  234. data/lib/bio/util/color_scheme/hydropathy.rb +64 -0
  235. data/lib/bio/util/color_scheme/nucleotide.rb +31 -0
  236. data/lib/bio/util/color_scheme/strand.rb +59 -0
  237. data/lib/bio/util/color_scheme/taylor.rb +50 -0
  238. data/lib/bio/util/color_scheme/turn.rb +59 -0
  239. data/lib/bio/util/color_scheme/zappo.rb +50 -0
  240. data/lib/bio/util/contingency_table.rb +370 -0
  241. data/lib/bio/util/restriction_enzyme.rb +228 -0
  242. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  243. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  244. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  245. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  246. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  247. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  248. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  249. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  250. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  251. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  252. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  253. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  254. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  255. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  256. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  257. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  258. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  259. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  260. data/lib/bio/util/restriction_enzyme/single_strand.rb +200 -0
  261. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  262. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  263. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  264. data/lib/bio/util/sirna.rb +288 -0
  265. data/test/data/HMMER/hmmpfam.out +64 -0
  266. data/test/data/HMMER/hmmsearch.out +88 -0
  267. data/test/data/SOSUI/sample.report +11 -0
  268. data/test/data/TMHMM/sample.report +21 -0
  269. data/test/data/aaindex/DAYM780301 +30 -0
  270. data/test/data/aaindex/PRAM900102 +20 -0
  271. data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
  272. data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
  273. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  274. data/test/data/blast/b0002.faa +15 -0
  275. data/test/data/blast/b0002.faa.m0 +128 -0
  276. data/test/data/blast/b0002.faa.m7 +65 -0
  277. data/test/data/blast/b0002.faa.m8 +1 -0
  278. data/test/data/blast/blastp-multi.m7 +188 -0
  279. data/test/data/command/echoarg2.bat +1 -0
  280. data/test/data/embl/AB090716.embl +65 -0
  281. data/test/data/embl/AB090716.embl.rel89 +63 -0
  282. data/test/data/fasta/example1.txt +75 -0
  283. data/test/data/fasta/example2.txt +21 -0
  284. data/test/data/genscan/sample.report +63 -0
  285. data/test/data/iprscan/merged.raw +32 -0
  286. data/test/data/iprscan/merged.txt +74 -0
  287. data/test/data/paml/codeml/control_file.txt +30 -0
  288. data/test/data/paml/codeml/output.txt +78 -0
  289. data/test/data/paml/codeml/rates +217 -0
  290. data/test/data/prosite/prosite.dat +2233 -0
  291. data/test/data/refseq/nm_126355.entret +64 -0
  292. data/test/data/rpsblast/misc.rpsblast +193 -0
  293. data/test/data/soft/GDS100_partial.soft +92 -0
  294. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  295. data/test/data/uniprot/p53_human.uniprot +1456 -0
  296. data/test/functional/bio/appl/test_pts1.rb +115 -0
  297. data/test/functional/bio/io/test_ensembl.rb +229 -0
  298. data/test/functional/bio/io/test_soapwsdl.rb +52 -0
  299. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  300. data/test/functional/bio/test_command.rb +301 -0
  301. data/test/runner.rb +14 -0
  302. data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
  303. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  304. data/test/unit/bio/appl/blast/test_report.rb +1135 -0
  305. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  306. data/test/unit/bio/appl/genscan/test_report.rb +182 -0
  307. data/test/unit/bio/appl/hmmer/test_report.rb +342 -0
  308. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  309. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  310. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  311. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  312. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  313. data/test/unit/bio/appl/sosui/test_report.rb +81 -0
  314. data/test/unit/bio/appl/targetp/test_report.rb +146 -0
  315. data/test/unit/bio/appl/test_blast.rb +277 -0
  316. data/test/unit/bio/appl/test_fasta.rb +130 -0
  317. data/test/unit/bio/appl/test_psort.rb +57 -0
  318. data/test/unit/bio/appl/test_pts1.rb +77 -0
  319. data/test/unit/bio/appl/tmhmm/test_report.rb +126 -0
  320. data/test/unit/bio/data/test_aa.rb +90 -0
  321. data/test/unit/bio/data/test_codontable.rb +107 -0
  322. data/test/unit/bio/data/test_na.rb +80 -0
  323. data/test/unit/bio/db/embl/test_common.rb +117 -0
  324. data/test/unit/bio/db/embl/test_embl.rb +214 -0
  325. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  326. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  327. data/test/unit/bio/db/embl/test_sptr.rb +1812 -0
  328. data/test/unit/bio/db/embl/test_uniprot.rb +31 -0
  329. data/test/unit/bio/db/kegg/test_genes.rb +45 -0
  330. data/test/unit/bio/db/pdb/test_pdb.rb +152 -0
  331. data/test/unit/bio/db/test_aaindex.rb +197 -0
  332. data/test/unit/bio/db/test_fasta.rb +250 -0
  333. data/test/unit/bio/db/test_gff.rb +1190 -0
  334. data/test/unit/bio/db/test_lasergene.rb +95 -0
  335. data/test/unit/bio/db/test_medline.rb +127 -0
  336. data/test/unit/bio/db/test_newick.rb +293 -0
  337. data/test/unit/bio/db/test_nexus.rb +364 -0
  338. data/test/unit/bio/db/test_prosite.rb +1437 -0
  339. data/test/unit/bio/db/test_rebase.rb +101 -0
  340. data/test/unit/bio/db/test_soft.rb +138 -0
  341. data/test/unit/bio/db/test_url.rb +36 -0
  342. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  343. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  344. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  345. data/test/unit/bio/io/test_ddbjxml.rb +80 -0
  346. data/test/unit/bio/io/test_ensembl.rb +109 -0
  347. data/test/unit/bio/io/test_fastacmd.rb +42 -0
  348. data/test/unit/bio/io/test_flatfile.rb +505 -0
  349. data/test/unit/bio/io/test_soapwsdl.rb +32 -0
  350. data/test/unit/bio/sequence/test_aa.rb +115 -0
  351. data/test/unit/bio/sequence/test_common.rb +373 -0
  352. data/test/unit/bio/sequence/test_compat.rb +69 -0
  353. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  354. data/test/unit/bio/sequence/test_na.rb +330 -0
  355. data/test/unit/bio/shell/plugin/test_seq.rb +185 -0
  356. data/test/unit/bio/test_alignment.rb +1025 -0
  357. data/test/unit/bio/test_command.rb +349 -0
  358. data/test/unit/bio/test_db.rb +96 -0
  359. data/test/unit/bio/test_feature.rb +144 -0
  360. data/test/unit/bio/test_location.rb +599 -0
  361. data/test/unit/bio/test_map.rb +230 -0
  362. data/test/unit/bio/test_pathway.rb +499 -0
  363. data/test/unit/bio/test_reference.rb +252 -0
  364. data/test/unit/bio/test_sequence.rb +329 -0
  365. data/test/unit/bio/test_shell.rb +18 -0
  366. data/test/unit/bio/test_tree.rb +593 -0
  367. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  368. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  369. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  370. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +101 -0
  371. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  372. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  373. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  374. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  375. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  376. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  377. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  378. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  379. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  380. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  381. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  382. data/test/unit/bio/util/test_color_scheme.rb +33 -0
  383. data/test/unit/bio/util/test_contingency_table.rb +94 -0
  384. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  385. data/test/unit/bio/util/test_sirna.rb +245 -0
  386. metadata +543 -0
@@ -0,0 +1,236 @@
1
+ #
2
+ # = bio/appl/blast/xmlparser.rb - BLAST XML output (-m 7) parser by XMLParser
3
+ #
4
+ # Copyright:: Copyright (C) 2001
5
+ # Mitsuteru C. Nakao <n@bioruby.org>
6
+ # Copyright:: Copyright (C) 2003
7
+ # Toshiaki Katayama <k@bioruby.org>
8
+ # License:: The Ruby License
9
+ #
10
+ # $Id:$
11
+ #
12
+ # == Description
13
+ #
14
+ # A parser for blast XML report (format 7) based on the XMLParser.
15
+ # This file is automatically loaded by bio/appl/blast/report.rb if
16
+ # the XMLParser installed.
17
+ #
18
+ # BioRuby provides two implements of the paser for the blast XML format report
19
+ # (format 7) based on the XMLParser and the REXML.
20
+ #
21
+
22
+ begin
23
+ require 'xmlparser'
24
+ rescue LoadError
25
+ end
26
+
27
+ module Bio
28
+ class Blast
29
+ class Report
30
+
31
+ private
32
+
33
+ def xmlparser_parse(xml)
34
+ parser = XMLParser.new
35
+ def parser.default; end
36
+
37
+ begin
38
+ tag_stack = Array.new
39
+ hash = Hash.new
40
+
41
+ parser.parse(xml) do |type, name, data|
42
+ case type
43
+ when XMLParser::START_ELEM
44
+ tag_stack.push(name)
45
+ hash.update(data)
46
+ case name
47
+ when 'Iteration'
48
+ iteration = Iteration.new
49
+ @iterations.push(iteration)
50
+ when 'Hit'
51
+ hit = Hit.new
52
+ hit.query_id = @query_id
53
+ hit.query_def = @query_def
54
+ hit.query_len = @query_len
55
+ @iterations.last.hits.push(hit)
56
+ when 'Hsp'
57
+ hsp = Hsp.new
58
+ @iterations.last.hits.last.hsps.push(hsp)
59
+ end
60
+ when XMLParser::END_ELEM
61
+ case name
62
+ when /^BlastOutput/
63
+ xmlparser_parse_program(name,hash)
64
+ hash = Hash.new
65
+ when /^Parameters$/
66
+ xmlparser_parse_parameters(hash)
67
+ hash = Hash.new
68
+ when /^Iteration/
69
+ xmlparser_parse_iteration(name, hash)
70
+ hash = Hash.new
71
+ when /^Hit/
72
+ xmlparser_parse_hit(name, hash)
73
+ hash = Hash.new
74
+ when /^Hsp$/
75
+ xmlparser_parse_hsp(hash)
76
+ hash = Hash.new
77
+ when /^Statistics$/
78
+ xmlparser_parse_statistics(hash)
79
+ hash = Hash.new
80
+ end
81
+ tag_stack.pop
82
+ when XMLParser::CDATA
83
+ if hash[tag_stack.last].nil?
84
+ hash[tag_stack.last] = data unless data.strip.empty?
85
+ else
86
+ hash[tag_stack.last].concat(data) if data
87
+ end
88
+ when XMLParser::PI
89
+ end
90
+ end
91
+ rescue XMLParserError
92
+ line = parser.line
93
+ column = parser.column
94
+ print "Parse error at #{line}(#{column}) : #{$!}\n"
95
+ end
96
+ end
97
+
98
+
99
+ def xmlparser_parse_program(tag, hash)
100
+ case tag
101
+ when 'BlastOutput_program'
102
+ @program = hash[tag]
103
+ when 'BlastOutput_version'
104
+ @version = hash[tag]
105
+ when 'BlastOutput_reference'
106
+ @reference = hash[tag]
107
+ when 'BlastOutput_db'
108
+ @db = hash[tag].strip
109
+ when 'BlastOutput_query-ID'
110
+ @query_id = hash[tag]
111
+ when 'BlastOutput_query-def'
112
+ @query_def = hash[tag]
113
+ when 'BlastOutput_query-len'
114
+ @query_len = hash[tag].to_i
115
+ end
116
+ end
117
+
118
+ # set parameter of the key as val
119
+ def xml_set_parameter(key, val)
120
+ #labels = {
121
+ # 'matrix' => 'Parameters_matrix',
122
+ # 'expect' => 'Parameters_expect',
123
+ # 'include' => 'Parameters_include',
124
+ # 'sc-match' => 'Parameters_sc-match',
125
+ # 'sc-mismatch' => 'Parameters_sc-mismatch',
126
+ # 'gap-open' => 'Parameters_gap-open',
127
+ # 'gap-extend' => 'Parameters_gap-extend',
128
+ # 'filter' => 'Parameters_filter',
129
+ # 'pattern' => 'Parameters_pattern',
130
+ # 'entrez-query' => 'Parameters_entrez-query',
131
+ #}
132
+ k = key.sub(/\AParameters\_/, '')
133
+ @parameters[k] =
134
+ case k
135
+ when 'expect', 'include'
136
+ val.to_f
137
+ when /\Agap\-/, /\Asc\-/
138
+ val.to_i
139
+ else
140
+ val
141
+ end
142
+ end
143
+
144
+ def xmlparser_parse_parameters(hash)
145
+ hash.each do |k, v|
146
+ xml_set_parameter(k, v)
147
+ end
148
+ end
149
+
150
+ def xmlparser_parse_iteration(tag, hash)
151
+ case tag
152
+ when 'Iteration_iter-num'
153
+ @iterations.last.num = hash[tag].to_i
154
+ when 'Iteration_message'
155
+ @iterations.last.message = hash[tag].to_s
156
+
157
+ # for new BLAST XML format
158
+ when 'Iteration_query-ID'
159
+ @iterations.last.query_id = hash[tag].to_s
160
+ when 'Iteration_query-def'
161
+ @iterations.last.query_def = hash[tag].to_s
162
+ when 'Iteration_query-len'
163
+ @iterations.last.query_len = hash[tag].to_i
164
+ end
165
+ end
166
+
167
+ def xmlparser_parse_hit(tag, hash)
168
+ hit = @iterations.last.hits.last
169
+ case tag
170
+ when 'Hit_num'
171
+ hit.num = hash[tag].to_i
172
+ when 'Hit_id'
173
+ hit.hit_id = hash[tag].clone
174
+ when 'Hit_def'
175
+ hit.definition = hash[tag].clone
176
+ when 'Hit_accession'
177
+ hit.accession = hash[tag].clone
178
+ when 'Hit_len'
179
+ hit.len = hash[tag].clone.to_i
180
+ end
181
+ end
182
+
183
+ def xmlparser_parse_hsp(hash)
184
+ hsp = @iterations.last.hits.last.hsps.last
185
+ hsp.num = hash['Hsp_num'].to_i
186
+ hsp.bit_score = hash['Hsp_bit-score'].to_f
187
+ hsp.score = hash['Hsp_score'].to_i
188
+ hsp.evalue = hash['Hsp_evalue'].to_f
189
+ hsp.query_from = hash['Hsp_query-from'].to_i
190
+ hsp.query_to = hash['Hsp_query-to'].to_i
191
+ hsp.hit_from = hash['Hsp_hit-from'].to_i
192
+ hsp.hit_to = hash['Hsp_hit-to'].to_i
193
+ hsp.pattern_from = hash['Hsp_pattern-from'].to_i
194
+ hsp.pattern_to = hash['Hsp_pattern-to'].to_i
195
+ hsp.query_frame = hash['Hsp_query-frame'].to_i
196
+ hsp.hit_frame = hash['Hsp_hit-frame'].to_i
197
+ hsp.identity = hash['Hsp_identity'].to_i
198
+ hsp.positive = hash['Hsp_positive'].to_i
199
+ hsp.gaps = hash['Hsp_gaps'].to_i
200
+ hsp.align_len = hash['Hsp_align-len'].to_i
201
+ hsp.density = hash['Hsp_density'].to_i
202
+ hsp.qseq = hash['Hsp_qseq']
203
+ hsp.hseq = hash['Hsp_hseq']
204
+ hsp.midline = hash['Hsp_midline']
205
+ end
206
+
207
+ def xmlparser_parse_statistics(hash)
208
+ labels = {
209
+ 'db-num' => 'Statistics_db-num',
210
+ 'db-len' => 'Statistics_db-len',
211
+ 'hsp-len' => 'Statistics_hsp-len',
212
+ 'eff-space' => 'Statistics_eff-space',
213
+ 'kappa' => 'Statistics_kappa',
214
+ 'lambda' => 'Statistics_lambda',
215
+ 'entropy' => 'Statistics_entropy'
216
+ }
217
+ labels.each do |k,v|
218
+ case k
219
+ when 'db-num', 'db-len', 'hsp-len'
220
+ @iterations.last.statistics[k] = hash[v].to_i
221
+ else
222
+ @iterations.last.statistics[k] = hash[v].to_f
223
+ end
224
+ end
225
+ end
226
+
227
+ end # class Report
228
+ end # class Blast
229
+ end # module Bio
230
+
231
+
232
+ =begin
233
+
234
+ This file is automatically loaded by bio/appl/blast/report.rb
235
+
236
+ =end
@@ -0,0 +1,530 @@
1
+ #
2
+ # = bio/appl/blat/report.rb - BLAT result parser
3
+ #
4
+ # Copyright:: Copyright (C) 2004, 2006, 2008 Naohisa Goto <ng@bioruby.org>
5
+ # License:: The Ruby License
6
+ #
7
+ # $Id:$
8
+ #
9
+ # BLAT result parser (psl / pslx format).
10
+ #
11
+ # == Important Notes
12
+ #
13
+ # In BLAT results, the start position of a sequnece is numbered as 0.
14
+ # On the other hand, in many other homology search programs,
15
+ # the start position of a sequence is numbered as 1.
16
+ # To keep compatibility, the BLAT parser adds 1 to every position number.
17
+ #
18
+ # == References
19
+ #
20
+ # * Kent, W.J., BLAT--the BLAST-like alignment tool,
21
+ # Genome Research, 12, 656--664, 2002.
22
+ # http://www.genome.org/cgi/content/abstract/12/4/656
23
+ # * http://genome.ucsc.edu/goldenPath/help/blatSpec.html
24
+
25
+ require 'bio'
26
+
27
+ module Bio
28
+ class Blat
29
+
30
+ # Bio::Blat::Report is a BLAT report parser class.
31
+ # Its object may contain some Bio::Blat::Report::Hits objects.
32
+ #
33
+ # In BLAT results, the start position of a sequnece is numbered as 0.
34
+ # On the other hand, in many other homology search programs,
35
+ # the start position of a sequence is numbered as 1.
36
+ # To keep compatibility, the BLAT parser adds 1 to every position number
37
+ # except Bio::Blat::Report::Seqdesc and some Bio::Blat specific methods.
38
+ #
39
+ # Note that Bio::Blat::Report#query_def, #query_id, #query_len methods
40
+ # simply return first hit's query_*.
41
+ # If multiple query sequences are given, these values
42
+ # will be incorrect.
43
+ #
44
+ class Report #< DB
45
+ # Delimiter of each entry. Bio::FlatFile uses it.
46
+ # In Bio::Blat::Report, it it nil (1 entry 1 file).
47
+ DELIMITER = RS = nil # 1 file 1 entry
48
+
49
+ # Splitter for Bio::FlatFile
50
+ FLATFILE_SPLITTER = Bio::FlatFile::Splitter::LineOriented
51
+
52
+ # Creates a new Bio::Blat::Report object from BLAT result text (String).
53
+ # You can use Bio::FlatFile to read a file.
54
+ # Currently, results created with options -out=psl (default) or
55
+ # -out=pslx are supported.
56
+ def initialize(text = '')
57
+ flag = false
58
+ head = []
59
+ @hits = []
60
+ text.each_line do |line|
61
+ if flag then
62
+ @hits << Hit.new(line)
63
+ else
64
+ # for headerless data
65
+ if /^\d/ =~ line then
66
+ flag = true
67
+ redo
68
+ end
69
+ line = line.chomp
70
+ if /\A\-+\s*\z/ =~ line
71
+ flag = true
72
+ else
73
+ head << line
74
+ end
75
+ end
76
+ end
77
+ @columns = parse_header(head) unless head.empty?
78
+ end
79
+
80
+ # Adds a header line if the header data is not yet given and
81
+ # the given line is suitable for header.
82
+ # Returns self if adding header line is succeeded.
83
+ # Otherwise, returns false (the line is not added).
84
+ def add_header_line(line)
85
+ return false if defined? @columns
86
+ line = line.chomp
87
+ case line
88
+ when /^\d/
89
+ @columns = (defined? @header_lines) ? parse_header(@header_lines) : []
90
+ return false
91
+ when /\A\-+\s*\z/
92
+ @columns = (defined? @header_lines) ? parse_header(@header_lines) : []
93
+ return self
94
+ else
95
+ @header_lines ||= []
96
+ @header_lines.push line
97
+ end
98
+ end
99
+
100
+ # Adds a line to the entry if the given line is regarded as
101
+ # a part of the current entry.
102
+ # If the current entry (self) is empty, or the line has the same
103
+ # query name, the line is added and returns self.
104
+ # Otherwise, returns false (the line is not added).
105
+ def add_line(line)
106
+ if /\A\s*\z/ =~ line then
107
+ return @hits.empty? ? self : false
108
+ end
109
+ hit = Hit.new(line.chomp)
110
+ if @hits.empty? or @hits.first.query.name == hit.query.name then
111
+ @hits.push hit
112
+ return self
113
+ else
114
+ return false
115
+ end
116
+ end
117
+
118
+ # hits of the result.
119
+ # Returns an Array of Bio::Blat::Report::Hit objects.
120
+ attr_reader :hits
121
+
122
+ # Returns descriptions of columns.
123
+ # Returns an Array.
124
+ # This would be a Bio::Blat specific method.
125
+ attr_reader :columns
126
+
127
+ # Parses headers.
128
+ def parse_header(ary)
129
+ while x = ary.shift
130
+ if /psLayout version (\S+)/ =~ x then
131
+ @psl_version = $1
132
+ break
133
+ elsif !(x.strip.empty?)
134
+ ary.unshift(x)
135
+ break
136
+ end
137
+ end
138
+ a0 = ary.collect { |x| x.split(/\t/) }
139
+ k = []
140
+ a0.each do |x|
141
+ x.each_index do |i|
142
+ y = x[i].strip
143
+ k[i] = k[i].to_s + (y.sub!(/\-\z/, '') ? y : y + ' ')
144
+ end
145
+ end
146
+ k.each { |x| x.strip! }
147
+ k
148
+ end
149
+ private :parse_header
150
+
151
+ # version of the psl format (String or nil).
152
+ attr_reader :psl_version
153
+
154
+ # Bio::Blat::Report::SeqDesc stores sequence information of
155
+ # query or subject of the BLAT report.
156
+ # It also includes some hit information.
157
+ class SeqDesc
158
+ # Creates a new SeqDesc object.
159
+ # It is designed to be called internally from Bio::Blat::Report class.
160
+ # Users shall not use it directly.
161
+ def initialize(gap_count, gap_bases, name, size,
162
+ st, ed, starts, seqs)
163
+ @gap_count = gap_count.to_i
164
+ @gap_bases = gap_bases.to_i
165
+ @name = name
166
+ @size = size.to_i
167
+ @start = st.to_i
168
+ @end = ed.to_i
169
+ @starts = starts.collect { |x| x.to_i }
170
+ @seqs = seqs
171
+ end
172
+ # gap count
173
+ attr_reader :gap_count
174
+ # gap bases
175
+ attr_reader :gap_bases
176
+ # name of the sequence
177
+ attr_reader :name
178
+ # length of the sequence
179
+ attr_reader :size
180
+ # start position of the first segment
181
+ attr_reader :start
182
+ # end position of the final segment
183
+ attr_reader :end
184
+ # start positions of segments.
185
+ # Returns an array of numbers.
186
+ attr_reader :starts
187
+ # sequences of segments.
188
+ # Returns an array of String.
189
+ # Returns nil if there are no sequence data.
190
+ attr_reader :seqs
191
+ end #class SeqDesc
192
+
193
+ # Sequence segment pair of BLAT result.
194
+ # Similar to Bio::Blast::Report::Hsp but lacks many methods.
195
+ class SegmentPair
196
+ # Creates a new SegmentPair object.
197
+ # It is designed to be called internally from Bio::Blat::Report class.
198
+ # Users shall not use it directly.
199
+ def initialize(query_len, target_len, strand,
200
+ blksize, qstart, tstart, qseq, tseq,
201
+ protein_flag)
202
+ @blocksize = blksize
203
+ @qseq = qseq
204
+ @hseq = hseq
205
+ @hit_strand = 'plus'
206
+ w = (protein_flag ? 3 : 1) # 3 means query=protein target=dna
207
+ case strand
208
+ when '-'
209
+ # query is minus strand
210
+ @query_strand = 'minus'
211
+ # convert positions
212
+ @query_from = query_len - qstart
213
+ @query_to = query_len - qstart - blksize + 1
214
+ # To keep compatibility, with other homology search programs,
215
+ # we add 1 to each position number.
216
+ @hit_from = tstart + 1
217
+ @hit_to = tstart + blksize * w # - 1 + 1
218
+ when '+-'
219
+ # hit is minus strand
220
+ @query_strand = 'plus'
221
+ @hit_strand = 'minus'
222
+ # To keep compatibility, with other homology search programs,
223
+ # we add 1 to each position number.
224
+ @query_from = qstart + 1
225
+ @query_to = qstart + blksize # - 1 + 1
226
+ # convert positions
227
+ @hit_from = target_len - tstart
228
+ @hit_to = target_len - tstart - blksize * w + 1
229
+ else #when '+', '++'
230
+ @query_strand = 'plus'
231
+ # To keep compatibility with other homology search programs,
232
+ # we add 1 to each position number.
233
+ @query_from = qstart + 1
234
+ @query_to = qstart + blksize # - 1 + 1
235
+ @hit_from = tstart + 1
236
+ @hit_to = tstart + blksize * w # - 1 + 1
237
+ end
238
+ end
239
+ # Returns query start position.
240
+ # CAUTION: In Blat's raw result(psl format), first position is 0.
241
+ # To keep compatibility, the parser add 1 to the position.
242
+ attr_reader :query_from
243
+
244
+ # Returns query end position.
245
+ # CAUTION: In Blat's raw result(psl format), first position is 0.
246
+ # To keep compatibility, the parser add 1 to the position.
247
+ attr_reader :query_to
248
+
249
+ # Returns query sequence.
250
+ # If sequence data is not available, returns nil.
251
+ attr_reader :qseq
252
+
253
+ # Returns strand information of the query.
254
+ # Returns 'plus' or 'minus'.
255
+ attr_reader :query_strand
256
+
257
+ # Returns target (subject, hit) start position.
258
+ # CAUTION: In Blat's raw result(psl format), first position is 0.
259
+ # To keep compatibility, the parser add 1 to the position.
260
+ attr_reader :hit_from
261
+
262
+ # Returns target (subject, hit) end position.
263
+ # CAUTION: In Blat's raw result(psl format), first position is 0.
264
+ # To keep compatibility, the parser add 1 to the position.
265
+ attr_reader :hit_to
266
+
267
+ # Returns the target (subject, hit) sequence.
268
+ # If sequence data is not available, returns nil.
269
+ attr_reader :hseq
270
+
271
+ # Returns strand information of the target (subject, hit).
272
+ # Returns 'plus' or 'minus'.
273
+ attr_reader :hit_strand
274
+
275
+ # Returns block size (length) of the segment pair.
276
+ # This would be a Bio::Blat specific method.
277
+ attr_reader :blocksize
278
+
279
+ # Returns alignment length of the segment pair.
280
+ # Returns nil if no alignment data are available.
281
+ def align_len
282
+ @qseq ? @qseq.size : nil
283
+ end
284
+ end #class SegmentPair
285
+
286
+ # Hit class for the BLAT result parser.
287
+ # Similar to Bio::Blast::Report::Hit but lacks many methods.
288
+ # Its object may contain some Bio::Blat::Report::SegmentPair objects.
289
+ class Hit
290
+ # Creates a new Hit object from a piece of BLAT result text.
291
+ # It is designed to be called internally from Bio::Blat::Report object.
292
+ # Users shall not use it directly.
293
+ def initialize(str)
294
+ @data = str.chomp.split(/\t/)
295
+ end
296
+
297
+ # Raw data of the hit.
298
+ # (Note that it doesn't add 1 to position numbers.)
299
+ attr_reader :data
300
+
301
+ # split comma-separeted text
302
+ def split_comma(str)
303
+ str.to_s.sub(/\s*\,+\s*\z/, '').split(/\s*\,\s*/)
304
+ end
305
+ private :split_comma
306
+
307
+ # Returns sequence informations of the query.
308
+ # Returns a Bio::Blat::Report::SeqDesc object.
309
+ # This would be Bio::Blat specific method.
310
+ def query
311
+ unless defined?(@query)
312
+ d = @data
313
+ @query = SeqDesc.new(d[4], d[5], d[9], d[10], d[11], d[12],
314
+ split_comma(d[19]), split_comma(d[21]))
315
+ end
316
+ @query
317
+ end
318
+
319
+ # Returns sequence informations of the target(hit).
320
+ # Returns a Bio::Blat::Report::SeqDesc object.
321
+ # This would be Bio::Blat specific method.
322
+ def target
323
+ unless defined?(@target)
324
+ d = @data
325
+ @target = SeqDesc.new(d[6], d[7], d[13], d[14], d[15], d[16],
326
+ split_comma(d[20]), split_comma(d[22]))
327
+ end
328
+ @target
329
+ end
330
+
331
+ # Match nucleotides.
332
+ def match; @data[0].to_i; end
333
+ # Mismatch nucleotides.
334
+ def mismatch; @data[1].to_i; end
335
+
336
+ # "rep. match".
337
+ # Number of bases that match but are part of repeats.
338
+ # Note that current version of BLAT always set 0.
339
+ def rep_match; @data[2].to_i; end
340
+
341
+ # "N's". Number of 'N' bases.
342
+ def n_s; @data[3].to_i; end
343
+
344
+ # Returns strand information of the hit.
345
+ # Returns '+' or '-'.
346
+ # This would be a Bio::Blat specific method.
347
+ def strand; @data[8]; end
348
+
349
+ # Number of blocks(exons, segment pairs).
350
+ def block_count; @data[17].to_i; end
351
+
352
+ # Sizes of all blocks(exons, segment pairs).
353
+ # Returns an array of numbers.
354
+ def block_sizes
355
+ unless defined?(@block_sizes) then
356
+ @block_sizes = split_comma(@data[18]).collect { |x| x.to_i }
357
+ end
358
+ @block_sizes
359
+ end
360
+
361
+ # Returns blocks(exons, segment pairs) of the hit.
362
+ # Returns an array of Bio::Blat::Report::SegmentPair objects.
363
+ def blocks
364
+ unless defined?(@blocks)
365
+ bs = block_sizes
366
+ qst = query.starts
367
+ tst = target.starts
368
+ qseqs = query.seqs
369
+ tseqs = target.seqs
370
+ pflag = self.protein?
371
+ @blocks = (0...block_count).collect do |i|
372
+ SegmentPair.new(query.size, target.size, strand, bs[i],
373
+ qst[i], tst[i], qseqs[i], tseqs[i],
374
+ pflag)
375
+ end
376
+ end
377
+ @blocks
378
+ end
379
+ alias exons blocks
380
+
381
+ #--
382
+ # Bio::BLAST::*::Report::Hit compatible methods
383
+ #++
384
+ alias hsps blocks
385
+
386
+ # Returns the length of query sequence.
387
+ def query_len; query.size; end
388
+
389
+ # Returns the name of query sequence.
390
+ def query_def; query.name; end
391
+ alias query_id query_def
392
+
393
+ # Returns the length of the target(subject) sequence.
394
+ def target_len; target.size; end
395
+ alias len target_len
396
+
397
+ # Returns the name of the target(subject) sequence.
398
+ def target_def; target.name; end
399
+ alias target_id target_def
400
+ alias definition target_def
401
+
402
+ #Iterates over each block(exon, segment pair) of the hit.
403
+ # Yields a Bio::Blat::Report::SegmentPair object.
404
+ def each(&x) #:yields: segmentpair
405
+ exons.each(&x)
406
+ end
407
+
408
+ #--
409
+ # methods described in the BLAT FAQ at the UCSC genome browser.
410
+ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4)
411
+ #++
412
+
413
+ # Calculates the pslCalcMilliBad value defined in the
414
+ # BLAT FAQ (http://genome.ucsc.edu/FAQ/FAQblat#blat4).
415
+ #
416
+ # The algorithm is taken from the BLAT FAQ
417
+ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4).
418
+ def milli_bad
419
+ w = (self.protein? ? 3 : 1)
420
+ qalen = w * (self.query.end - self.query.start)
421
+ talen = self.target.end - self.target.start
422
+ alen = (if qalen < talen then qalen; else talen; end)
423
+ return 0 if alen <= 0
424
+ d = qalen - talen
425
+ d = 0 if d < 0
426
+ total = w * (self.match + self.rep_match + self.mismatch)
427
+ return 0 if total == 0
428
+ return (1000 * (self.mismatch * w + self.query.gap_count +
429
+ (3 * Math.log(1 + d)).round) / total)
430
+ end
431
+
432
+ # Calculates the percent identity compatible with the BLAT web server
433
+ # as described in the BLAT FAQ
434
+ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4).
435
+ #
436
+ # The algorithm is taken from the BLAT FAQ
437
+ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4).
438
+ def percent_identity
439
+ 100.0 - self.milli_bad * 0.1
440
+ end
441
+
442
+ # When the output data comes from the protein query, returns true.
443
+ # Otherwise (nucleotide query), returns false.
444
+ # It returns nil if this cannot be determined.
445
+ #
446
+ # The algorithm is taken from the BLAT FAQ
447
+ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4).
448
+ #
449
+ # Note: It seems that it returns true only when protein query
450
+ # with nucleotide database (blat options: -q=prot -t=dnax).
451
+ def protein?
452
+ return nil if self.block_sizes.empty?
453
+ case self.strand[1,1]
454
+ when '+'
455
+ if self.target.end == self.target.starts[-1] +
456
+ 3 * self.block_sizes[-1] then
457
+ true
458
+ else
459
+ false
460
+ end
461
+ when '-'
462
+ if self.target.start == self.target.size -
463
+ self.target.starts[-1] - 3 * self.block_sizes[-1] then
464
+ true
465
+ else
466
+ false
467
+ end
468
+ else
469
+ nil
470
+ end
471
+ end
472
+
473
+ # Calculates the score compatible with the BLAT web server
474
+ # as described in the BLAT FAQ
475
+ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4).
476
+ #
477
+ # The algorithm is taken from the BLAT FAQ
478
+ # (http://genome.ucsc.edu/FAQ/FAQblat#blat4).
479
+ def score
480
+ w = (self.protein? ? 3 : 1)
481
+ w * (self.match + (self.rep_match >> 1)) -
482
+ w * self.mismatch - self.query.gap_count - self.target.gap_count
483
+ end
484
+ end #class Hit
485
+
486
+ #--
487
+ #Bio::BLAST::*::Report compatible methods
488
+ #++
489
+
490
+ # Returns number of hits.
491
+ # Same as hits.size.
492
+ def num_hits; @hits.size; end
493
+
494
+ # Iterates over each Bio::Blat::Report::Hit object.
495
+ # Same as hits.each.
496
+ def each_hit(&x) #:yields: hit
497
+ @hits.each(&x)
498
+ end
499
+ alias each each_hit
500
+
501
+ # Returns the name of query sequence.
502
+ # CAUTION: query_* methods simply return first hit's query_*.
503
+ # If multiple query sequences are given, these values
504
+ # will be incorrect.
505
+ def query_def; (x = @hits.first) ? x.query_def : nil; end
506
+
507
+ # Returns the length of query sequence.
508
+ # CAUTION: query_* methods simply return first hit's query_*.
509
+ # If multiple query sequences are given, these values
510
+ # will be incorrect.
511
+ def query_len; (x = @hits.first) ? x.query_len : nil; end
512
+ alias query_id query_def
513
+ end #class Report
514
+
515
+ end #class Blat
516
+ end #module Bio
517
+
518
+ =begin
519
+
520
+ = Bio::Blat::Report
521
+
522
+ BLAT result parser. (psl / pslx format)
523
+
524
+ = References
525
+
526
+ * ((<URL:http://www.genome.org/cgi/content/abstract/12/4/656>))
527
+ Kent, W.J., BLAT--the BLAST-like alignment tool,
528
+ Genome Research, 12, 656--664, 2002.
529
+
530
+ =end