wwood-bioruby 1.2.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (386) hide show
  1. data/README.rdoc +205 -0
  2. data/README_DEV.rdoc +285 -0
  3. data/VERSION.yml +4 -0
  4. data/bin/bioruby +44 -0
  5. data/bin/br_biofetch.rb +47 -0
  6. data/bin/br_bioflat.rb +293 -0
  7. data/bin/br_biogetseq.rb +45 -0
  8. data/bin/br_pmfetch.rb +421 -0
  9. data/lib/bio.rb +306 -0
  10. data/lib/bio/alignment.rb +2518 -0
  11. data/lib/bio/appl/bl2seq/report.rb +334 -0
  12. data/lib/bio/appl/blast.rb +505 -0
  13. data/lib/bio/appl/blast/ddbj.rb +142 -0
  14. data/lib/bio/appl/blast/format0.rb +1438 -0
  15. data/lib/bio/appl/blast/format8.rb +83 -0
  16. data/lib/bio/appl/blast/genomenet.rb +263 -0
  17. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  18. data/lib/bio/appl/blast/remote.rb +105 -0
  19. data/lib/bio/appl/blast/report.rb +767 -0
  20. data/lib/bio/appl/blast/rexml.rb +144 -0
  21. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  22. data/lib/bio/appl/blast/wublast.rb +635 -0
  23. data/lib/bio/appl/blast/xmlparser.rb +236 -0
  24. data/lib/bio/appl/blat/report.rb +530 -0
  25. data/lib/bio/appl/clustalw.rb +219 -0
  26. data/lib/bio/appl/clustalw/report.rb +152 -0
  27. data/lib/bio/appl/emboss.rb +203 -0
  28. data/lib/bio/appl/fasta.rb +235 -0
  29. data/lib/bio/appl/fasta/format10.rb +325 -0
  30. data/lib/bio/appl/gcg/msf.rb +212 -0
  31. data/lib/bio/appl/gcg/seq.rb +195 -0
  32. data/lib/bio/appl/genscan/report.rb +552 -0
  33. data/lib/bio/appl/hmmer.rb +126 -0
  34. data/lib/bio/appl/hmmer/report.rb +683 -0
  35. data/lib/bio/appl/iprscan/report.rb +374 -0
  36. data/lib/bio/appl/mafft.rb +259 -0
  37. data/lib/bio/appl/mafft/report.rb +226 -0
  38. data/lib/bio/appl/muscle.rb +52 -0
  39. data/lib/bio/appl/paml/baseml.rb +95 -0
  40. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  41. data/lib/bio/appl/paml/codeml.rb +242 -0
  42. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  43. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  44. data/lib/bio/appl/paml/common.rb +348 -0
  45. data/lib/bio/appl/paml/common_report.rb +38 -0
  46. data/lib/bio/appl/paml/yn00.rb +103 -0
  47. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  48. data/lib/bio/appl/phylip/alignment.rb +133 -0
  49. data/lib/bio/appl/phylip/distance_matrix.rb +96 -0
  50. data/lib/bio/appl/probcons.rb +41 -0
  51. data/lib/bio/appl/psort.rb +548 -0
  52. data/lib/bio/appl/psort/report.rb +542 -0
  53. data/lib/bio/appl/pts1.rb +263 -0
  54. data/lib/bio/appl/sim4.rb +124 -0
  55. data/lib/bio/appl/sim4/report.rb +485 -0
  56. data/lib/bio/appl/sosui/report.rb +151 -0
  57. data/lib/bio/appl/spidey/report.rb +593 -0
  58. data/lib/bio/appl/targetp/report.rb +267 -0
  59. data/lib/bio/appl/tcoffee.rb +55 -0
  60. data/lib/bio/appl/tmhmm/report.rb +231 -0
  61. data/lib/bio/command.rb +593 -0
  62. data/lib/bio/compat/features.rb +157 -0
  63. data/lib/bio/compat/references.rb +128 -0
  64. data/lib/bio/data/aa.rb +353 -0
  65. data/lib/bio/data/codontable.rb +722 -0
  66. data/lib/bio/data/na.rb +223 -0
  67. data/lib/bio/db.rb +329 -0
  68. data/lib/bio/db/aaindex.rb +357 -0
  69. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  70. data/lib/bio/db/biosql/sequence.rb +508 -0
  71. data/lib/bio/db/embl/common.rb +352 -0
  72. data/lib/bio/db/embl/embl.rb +500 -0
  73. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  74. data/lib/bio/db/embl/format_embl.rb +190 -0
  75. data/lib/bio/db/embl/sptr.rb +1283 -0
  76. data/lib/bio/db/embl/swissprot.rb +42 -0
  77. data/lib/bio/db/embl/trembl.rb +41 -0
  78. data/lib/bio/db/embl/uniprot.rb +42 -0
  79. data/lib/bio/db/fantom.rb +597 -0
  80. data/lib/bio/db/fasta.rb +410 -0
  81. data/lib/bio/db/fasta/defline.rb +532 -0
  82. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  83. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  84. data/lib/bio/db/genbank/common.rb +307 -0
  85. data/lib/bio/db/genbank/ddbj.rb +22 -0
  86. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  87. data/lib/bio/db/genbank/genbank.rb +250 -0
  88. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  89. data/lib/bio/db/genbank/genpept.rb +60 -0
  90. data/lib/bio/db/genbank/refseq.rb +18 -0
  91. data/lib/bio/db/gff.rb +1846 -0
  92. data/lib/bio/db/go.rb +481 -0
  93. data/lib/bio/db/kegg/brite.rb +41 -0
  94. data/lib/bio/db/kegg/compound.rb +131 -0
  95. data/lib/bio/db/kegg/drug.rb +98 -0
  96. data/lib/bio/db/kegg/enzyme.rb +148 -0
  97. data/lib/bio/db/kegg/expression.rb +155 -0
  98. data/lib/bio/db/kegg/genes.rb +263 -0
  99. data/lib/bio/db/kegg/genome.rb +241 -0
  100. data/lib/bio/db/kegg/glycan.rb +166 -0
  101. data/lib/bio/db/kegg/keggtab.rb +357 -0
  102. data/lib/bio/db/kegg/kgml.rb +256 -0
  103. data/lib/bio/db/kegg/orthology.rb +136 -0
  104. data/lib/bio/db/kegg/reaction.rb +82 -0
  105. data/lib/bio/db/kegg/taxonomy.rb +331 -0
  106. data/lib/bio/db/lasergene.rb +209 -0
  107. data/lib/bio/db/litdb.rb +107 -0
  108. data/lib/bio/db/medline.rb +326 -0
  109. data/lib/bio/db/nbrf.rb +191 -0
  110. data/lib/bio/db/newick.rb +658 -0
  111. data/lib/bio/db/nexus.rb +1854 -0
  112. data/lib/bio/db/pdb.rb +29 -0
  113. data/lib/bio/db/pdb/atom.rb +77 -0
  114. data/lib/bio/db/pdb/chain.rb +210 -0
  115. data/lib/bio/db/pdb/chemicalcomponent.rb +224 -0
  116. data/lib/bio/db/pdb/model.rb +148 -0
  117. data/lib/bio/db/pdb/pdb.rb +1911 -0
  118. data/lib/bio/db/pdb/residue.rb +176 -0
  119. data/lib/bio/db/pdb/utils.rb +399 -0
  120. data/lib/bio/db/prosite.rb +597 -0
  121. data/lib/bio/db/rebase.rb +456 -0
  122. data/lib/bio/db/soft.rb +404 -0
  123. data/lib/bio/db/transfac.rb +375 -0
  124. data/lib/bio/db/url.rb +42 -0
  125. data/lib/bio/feature.rb +139 -0
  126. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  127. data/lib/bio/io/biosql/bioentry.rb +29 -0
  128. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  129. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  130. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  131. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  132. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  133. data/lib/bio/io/biosql/biosequence.rb +11 -0
  134. data/lib/bio/io/biosql/comment.rb +7 -0
  135. data/lib/bio/io/biosql/config/database.yml +20 -0
  136. data/lib/bio/io/biosql/dbxref.rb +13 -0
  137. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  138. data/lib/bio/io/biosql/location.rb +32 -0
  139. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  140. data/lib/bio/io/biosql/ontology.rb +10 -0
  141. data/lib/bio/io/biosql/reference.rb +9 -0
  142. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  143. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  144. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  145. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  146. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  147. data/lib/bio/io/biosql/taxon.rb +12 -0
  148. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  149. data/lib/bio/io/biosql/term.rb +27 -0
  150. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  151. data/lib/bio/io/biosql/term_path.rb +12 -0
  152. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  153. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  154. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  155. data/lib/bio/io/das.rb +461 -0
  156. data/lib/bio/io/dbget.rb +194 -0
  157. data/lib/bio/io/ddbjxml.rb +638 -0
  158. data/lib/bio/io/ebisoap.rb +158 -0
  159. data/lib/bio/io/ensembl.rb +229 -0
  160. data/lib/bio/io/fastacmd.rb +163 -0
  161. data/lib/bio/io/fetch.rb +195 -0
  162. data/lib/bio/io/flatfile.rb +482 -0
  163. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  164. data/lib/bio/io/flatfile/bdb.rb +253 -0
  165. data/lib/bio/io/flatfile/buffer.rb +237 -0
  166. data/lib/bio/io/flatfile/index.rb +1381 -0
  167. data/lib/bio/io/flatfile/indexer.rb +805 -0
  168. data/lib/bio/io/flatfile/splitter.rb +297 -0
  169. data/lib/bio/io/higet.rb +73 -0
  170. data/lib/bio/io/hinv.rb +442 -0
  171. data/lib/bio/io/keggapi.rb +805 -0
  172. data/lib/bio/io/ncbirest.rb +733 -0
  173. data/lib/bio/io/ncbisoap.rb +155 -0
  174. data/lib/bio/io/pubmed.rb +307 -0
  175. data/lib/bio/io/registry.rb +292 -0
  176. data/lib/bio/io/soapwsdl.rb +119 -0
  177. data/lib/bio/io/sql.rb +186 -0
  178. data/lib/bio/location.rb +867 -0
  179. data/lib/bio/map.rb +410 -0
  180. data/lib/bio/pathway.rb +960 -0
  181. data/lib/bio/reference.rb +602 -0
  182. data/lib/bio/sequence.rb +456 -0
  183. data/lib/bio/sequence/aa.rb +152 -0
  184. data/lib/bio/sequence/adapter.rb +108 -0
  185. data/lib/bio/sequence/common.rb +310 -0
  186. data/lib/bio/sequence/compat.rb +123 -0
  187. data/lib/bio/sequence/dblink.rb +54 -0
  188. data/lib/bio/sequence/format.rb +358 -0
  189. data/lib/bio/sequence/format_raw.rb +23 -0
  190. data/lib/bio/sequence/generic.rb +24 -0
  191. data/lib/bio/sequence/na.rb +491 -0
  192. data/lib/bio/shell.rb +44 -0
  193. data/lib/bio/shell/core.rb +578 -0
  194. data/lib/bio/shell/demo.rb +146 -0
  195. data/lib/bio/shell/interface.rb +218 -0
  196. data/lib/bio/shell/irb.rb +95 -0
  197. data/lib/bio/shell/object.rb +71 -0
  198. data/lib/bio/shell/plugin/blast.rb +42 -0
  199. data/lib/bio/shell/plugin/codon.rb +218 -0
  200. data/lib/bio/shell/plugin/das.rb +58 -0
  201. data/lib/bio/shell/plugin/emboss.rb +23 -0
  202. data/lib/bio/shell/plugin/entry.rb +105 -0
  203. data/lib/bio/shell/plugin/flatfile.rb +101 -0
  204. data/lib/bio/shell/plugin/keggapi.rb +181 -0
  205. data/lib/bio/shell/plugin/midi.rb +430 -0
  206. data/lib/bio/shell/plugin/obda.rb +45 -0
  207. data/lib/bio/shell/plugin/psort.rb +56 -0
  208. data/lib/bio/shell/plugin/seq.rb +247 -0
  209. data/lib/bio/shell/plugin/soap.rb +87 -0
  210. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/bioruby_generator.rb +29 -0
  211. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_classes.rhtml +4 -0
  212. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_log.rhtml +27 -0
  213. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_methods.rhtml +11 -0
  214. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_modules.rhtml +4 -0
  215. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/_variables.rhtml +7 -0
  216. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-bg.gif +0 -0
  217. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-gem.png +0 -0
  218. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby-link.gif +0 -0
  219. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css +368 -0
  220. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.rhtml +47 -0
  221. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_controller.rb +144 -0
  222. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby_helper.rb +47 -0
  223. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/commands.rhtml +8 -0
  224. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/history.rhtml +10 -0
  225. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/index.rhtml +26 -0
  226. data/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/spinner.gif +0 -0
  227. data/lib/bio/shell/script.rb +25 -0
  228. data/lib/bio/shell/setup.rb +109 -0
  229. data/lib/bio/shell/web.rb +102 -0
  230. data/lib/bio/tree.rb +852 -0
  231. data/lib/bio/util/color_scheme.rb +191 -0
  232. data/lib/bio/util/color_scheme/buried.rb +59 -0
  233. data/lib/bio/util/color_scheme/helix.rb +59 -0
  234. data/lib/bio/util/color_scheme/hydropathy.rb +64 -0
  235. data/lib/bio/util/color_scheme/nucleotide.rb +31 -0
  236. data/lib/bio/util/color_scheme/strand.rb +59 -0
  237. data/lib/bio/util/color_scheme/taylor.rb +50 -0
  238. data/lib/bio/util/color_scheme/turn.rb +59 -0
  239. data/lib/bio/util/color_scheme/zappo.rb +50 -0
  240. data/lib/bio/util/contingency_table.rb +370 -0
  241. data/lib/bio/util/restriction_enzyme.rb +228 -0
  242. data/lib/bio/util/restriction_enzyme/analysis.rb +249 -0
  243. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +217 -0
  244. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +107 -0
  245. data/lib/bio/util/restriction_enzyme/double_stranded.rb +321 -0
  246. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +130 -0
  247. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +103 -0
  248. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +38 -0
  249. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +76 -0
  250. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +107 -0
  251. data/lib/bio/util/restriction_enzyme/enzymes.yaml +7061 -0
  252. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +24 -0
  253. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +47 -0
  254. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +67 -0
  255. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +257 -0
  256. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +242 -0
  257. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +51 -0
  258. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +41 -0
  259. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +77 -0
  260. data/lib/bio/util/restriction_enzyme/single_strand.rb +200 -0
  261. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +135 -0
  262. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +23 -0
  263. data/lib/bio/util/restriction_enzyme/string_formatting.rb +111 -0
  264. data/lib/bio/util/sirna.rb +288 -0
  265. data/test/data/HMMER/hmmpfam.out +64 -0
  266. data/test/data/HMMER/hmmsearch.out +88 -0
  267. data/test/data/SOSUI/sample.report +11 -0
  268. data/test/data/TMHMM/sample.report +21 -0
  269. data/test/data/aaindex/DAYM780301 +30 -0
  270. data/test/data/aaindex/PRAM900102 +20 -0
  271. data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
  272. data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
  273. data/test/data/blast/2.2.15.blastp.m7 +876 -0
  274. data/test/data/blast/b0002.faa +15 -0
  275. data/test/data/blast/b0002.faa.m0 +128 -0
  276. data/test/data/blast/b0002.faa.m7 +65 -0
  277. data/test/data/blast/b0002.faa.m8 +1 -0
  278. data/test/data/blast/blastp-multi.m7 +188 -0
  279. data/test/data/command/echoarg2.bat +1 -0
  280. data/test/data/embl/AB090716.embl +65 -0
  281. data/test/data/embl/AB090716.embl.rel89 +63 -0
  282. data/test/data/fasta/example1.txt +75 -0
  283. data/test/data/fasta/example2.txt +21 -0
  284. data/test/data/genscan/sample.report +63 -0
  285. data/test/data/iprscan/merged.raw +32 -0
  286. data/test/data/iprscan/merged.txt +74 -0
  287. data/test/data/paml/codeml/control_file.txt +30 -0
  288. data/test/data/paml/codeml/output.txt +78 -0
  289. data/test/data/paml/codeml/rates +217 -0
  290. data/test/data/prosite/prosite.dat +2233 -0
  291. data/test/data/refseq/nm_126355.entret +64 -0
  292. data/test/data/rpsblast/misc.rpsblast +193 -0
  293. data/test/data/soft/GDS100_partial.soft +92 -0
  294. data/test/data/soft/GSE3457_family_partial.soft +874 -0
  295. data/test/data/uniprot/p53_human.uniprot +1456 -0
  296. data/test/functional/bio/appl/test_pts1.rb +115 -0
  297. data/test/functional/bio/io/test_ensembl.rb +229 -0
  298. data/test/functional/bio/io/test_soapwsdl.rb +52 -0
  299. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  300. data/test/functional/bio/test_command.rb +301 -0
  301. data/test/runner.rb +14 -0
  302. data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
  303. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  304. data/test/unit/bio/appl/blast/test_report.rb +1135 -0
  305. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  306. data/test/unit/bio/appl/genscan/test_report.rb +182 -0
  307. data/test/unit/bio/appl/hmmer/test_report.rb +342 -0
  308. data/test/unit/bio/appl/iprscan/test_report.rb +338 -0
  309. data/test/unit/bio/appl/mafft/test_report.rb +63 -0
  310. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  311. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  312. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  313. data/test/unit/bio/appl/sosui/test_report.rb +81 -0
  314. data/test/unit/bio/appl/targetp/test_report.rb +146 -0
  315. data/test/unit/bio/appl/test_blast.rb +277 -0
  316. data/test/unit/bio/appl/test_fasta.rb +130 -0
  317. data/test/unit/bio/appl/test_psort.rb +57 -0
  318. data/test/unit/bio/appl/test_pts1.rb +77 -0
  319. data/test/unit/bio/appl/tmhmm/test_report.rb +126 -0
  320. data/test/unit/bio/data/test_aa.rb +90 -0
  321. data/test/unit/bio/data/test_codontable.rb +107 -0
  322. data/test/unit/bio/data/test_na.rb +80 -0
  323. data/test/unit/bio/db/embl/test_common.rb +117 -0
  324. data/test/unit/bio/db/embl/test_embl.rb +214 -0
  325. data/test/unit/bio/db/embl/test_embl_rel89.rb +219 -0
  326. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  327. data/test/unit/bio/db/embl/test_sptr.rb +1812 -0
  328. data/test/unit/bio/db/embl/test_uniprot.rb +31 -0
  329. data/test/unit/bio/db/kegg/test_genes.rb +45 -0
  330. data/test/unit/bio/db/pdb/test_pdb.rb +152 -0
  331. data/test/unit/bio/db/test_aaindex.rb +197 -0
  332. data/test/unit/bio/db/test_fasta.rb +250 -0
  333. data/test/unit/bio/db/test_gff.rb +1190 -0
  334. data/test/unit/bio/db/test_lasergene.rb +95 -0
  335. data/test/unit/bio/db/test_medline.rb +127 -0
  336. data/test/unit/bio/db/test_newick.rb +293 -0
  337. data/test/unit/bio/db/test_nexus.rb +364 -0
  338. data/test/unit/bio/db/test_prosite.rb +1437 -0
  339. data/test/unit/bio/db/test_rebase.rb +101 -0
  340. data/test/unit/bio/db/test_soft.rb +138 -0
  341. data/test/unit/bio/db/test_url.rb +36 -0
  342. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  343. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  344. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  345. data/test/unit/bio/io/test_ddbjxml.rb +80 -0
  346. data/test/unit/bio/io/test_ensembl.rb +109 -0
  347. data/test/unit/bio/io/test_fastacmd.rb +42 -0
  348. data/test/unit/bio/io/test_flatfile.rb +505 -0
  349. data/test/unit/bio/io/test_soapwsdl.rb +32 -0
  350. data/test/unit/bio/sequence/test_aa.rb +115 -0
  351. data/test/unit/bio/sequence/test_common.rb +373 -0
  352. data/test/unit/bio/sequence/test_compat.rb +69 -0
  353. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  354. data/test/unit/bio/sequence/test_na.rb +330 -0
  355. data/test/unit/bio/shell/plugin/test_seq.rb +185 -0
  356. data/test/unit/bio/test_alignment.rb +1025 -0
  357. data/test/unit/bio/test_command.rb +349 -0
  358. data/test/unit/bio/test_db.rb +96 -0
  359. data/test/unit/bio/test_feature.rb +144 -0
  360. data/test/unit/bio/test_location.rb +599 -0
  361. data/test/unit/bio/test_map.rb +230 -0
  362. data/test/unit/bio/test_pathway.rb +499 -0
  363. data/test/unit/bio/test_reference.rb +252 -0
  364. data/test/unit/bio/test_sequence.rb +329 -0
  365. data/test/unit/bio/test_shell.rb +18 -0
  366. data/test/unit/bio/test_tree.rb +593 -0
  367. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +299 -0
  368. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +103 -0
  369. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +240 -0
  370. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +101 -0
  371. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +75 -0
  372. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +73 -0
  373. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +53 -0
  374. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +104 -0
  375. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +83 -0
  376. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +246 -0
  377. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +44 -0
  378. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +115 -0
  379. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +147 -0
  380. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +147 -0
  381. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +60 -0
  382. data/test/unit/bio/util/test_color_scheme.rb +33 -0
  383. data/test/unit/bio/util/test_contingency_table.rb +94 -0
  384. data/test/unit/bio/util/test_restriction_enzyme.rb +42 -0
  385. data/test/unit/bio/util/test_sirna.rb +245 -0
  386. metadata +543 -0
@@ -0,0 +1,142 @@
1
+ #
2
+ # = bio/appl/blast/ddbj.rb - Remote BLAST wrapper using DDBJ web service
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
5
+ # License:: The Ruby License
6
+ #
7
+ # $Id:$
8
+ #
9
+
10
+ require 'bio/appl/blast/remote'
11
+ require 'bio/io/ddbjxml'
12
+
13
+ module Bio::Blast::Remote
14
+
15
+ # Remote BLAST factory using DDBJ Web API for Biology
16
+ # (http://xml.nig.ac.jp/).
17
+ #
18
+ module DDBJ
19
+
20
+ # Creates a remote BLAST factory using DDBJ.
21
+ # Returns Bio::Blast object.
22
+ #
23
+ # Note for future improvement: In the future, it might return
24
+ # Bio::Blast::Remote::DDBJ or other object.
25
+ #
26
+ def self.new(program, db, options = [])
27
+ Bio::Blast.new(program, db, options, 'ddbj')
28
+ end
29
+
30
+ # Information about DDBJ BLAST.
31
+ module Information
32
+
33
+ include Bio::Blast::Remote::Information
34
+
35
+ # (private) parse database information
36
+ def _parse_databases
37
+ if defined? @parse_databases
38
+ return nil if @parse_databases
39
+ end
40
+ drv = Bio::DDBJ::XML::Blast.new
41
+ str = drv.getSupportDatabaseList
42
+
43
+ databases = {}
44
+ dbdescs = {}
45
+ key = 'blastn'
46
+ prefix = ''
47
+ databases[key] ||= []
48
+ dbdescs[key] ||= {}
49
+ str.each_line do |line|
50
+ a = line.strip.split(/\s*\-\s*/, 2)
51
+ case a.size
52
+ when 1
53
+ prefix = a[0].to_s.strip
54
+ prefix += ': ' unless prefix.empty?
55
+ key = 'blastn'
56
+ next #each_line
57
+ when 0
58
+ prefix = ''
59
+ key = 'blastp'
60
+ databases[key] ||= []
61
+ dbdescs[key] ||= {}
62
+ next #each_line
63
+ end
64
+ name = a[0].to_s.strip.freeze
65
+ desc = (prefix + a[1].to_s.strip).freeze
66
+ databases[key].push name
67
+ dbdescs[key][name] = desc
68
+ end
69
+
70
+ databases['blastp'] ||= []
71
+ dbdescs['blastp'] ||= []
72
+
73
+ databases['blastn'].freeze
74
+ databases['blastp'].freeze
75
+
76
+ databases['blastx'] = databases['blastp']
77
+ dbdescs['blastx'] = dbdescs['blastp']
78
+ databases['tblastn'] = databases['blastn']
79
+ dbdescs['tblastn'] = dbdescs['blastn']
80
+ databases['tblastx'] = databases['blastn']
81
+ dbdescs['tblastx'] = dbdescs['blastn']
82
+
83
+ @databases = databases
84
+ @database_descriptions = dbdescs
85
+ @parse_databases = true
86
+ true
87
+ end
88
+ private :_parse_databases
89
+
90
+ end #module Information
91
+
92
+ extend Information
93
+
94
+ # executes BLAST and returns result as a string
95
+ def exec_ddbj(query)
96
+ options = make_command_line_options
97
+ opt = Bio::Blast::NCBIOptions.new(options)
98
+
99
+ # SOAP objects are cached
100
+ @ddbj_remote_blast ||= Bio::DDBJ::XML::Blast.new
101
+ #@ddbj_request_manager ||= Bio::DDBJ::XML::RequestManager.new
102
+ # always use REST version to prevent warning messages
103
+ @ddbj_request_manager ||= Bio::DDBJ::XML::RequestManager::REST.new
104
+
105
+ program = opt.delete('-p')
106
+ db = opt.delete('-d')
107
+ optstr = Bio::Command.make_command_line_unix(opt.options)
108
+
109
+ # using searchParamAsync
110
+ qid = @ddbj_remote_blast.searchParamAsync(program, db, query, optstr)
111
+ @output = qid
112
+
113
+ sleeptime = 2
114
+ flag = true
115
+ while flag
116
+ if $VERBOSE then
117
+ $stderr.puts "DDBJ BLAST: ID: #{qid} -- waitng #{sleeptime} sec."
118
+ end
119
+ sleep(sleeptime)
120
+
121
+ result = @ddbj_request_manager.getAsyncResult(qid)
122
+ case result.to_s
123
+ when /The search and analysis service by WWW is very busy now/
124
+ raise result.to_s.strip + '(Alternatively, wrong options may be given.)'
125
+ when /Your job has not completed yet/
126
+ sleeptime = 5
127
+ else
128
+ flag = false
129
+ end
130
+ end while flag
131
+
132
+ @output = result
133
+ return @output
134
+ end
135
+
136
+ end #module DDBJ
137
+
138
+ # for lazy load DDBJ module
139
+ Ddbj = DDBJ
140
+
141
+ end #module Bio::Blast::Remote
142
+
@@ -0,0 +1,1438 @@
1
+ #
2
+ # = bio/appl/blast/format0.rb - BLAST default output (-m 0) parser
3
+ #
4
+ # Copyright:: Copyright (C) 2003-2006 GOTO Naohisa <ng@bioruby.org>
5
+ # License:: The Ruby License
6
+ #
7
+ # $Id:$
8
+ #
9
+ # == Description
10
+ #
11
+ # NCBI BLAST default (-m 0 option) output parser.
12
+ #
13
+ # == References
14
+ #
15
+ # * Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
16
+ # Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
17
+ # "Gapped BLAST and PSI-BLAST: a new generation of protein database search
18
+ # programs", Nucleic Acids Res. 25:3389-3402.
19
+ # * http://www.ncbi.nlm.nih.gov/blast/
20
+ #
21
+
22
+ begin
23
+ require 'strscan'
24
+ rescue LoadError
25
+ end
26
+ require 'singleton'
27
+
28
+ #--
29
+ #require 'bio/db'
30
+ #++
31
+ require 'bio/io/flatfile'
32
+
33
+ module Bio
34
+ class Blast
35
+ module Default #:nodoc:
36
+
37
+ # Bio::Blast::Default::Report parses NCBI BLAST default output
38
+ # and stores information in the data.
39
+ # It may store some Bio::Blast::Default::Report::Iteration objects.
40
+ class Report #< DB
41
+ # Delimiter of each entry. Bio::FlatFile uses it.
42
+ DELIMITER = RS = "\nBLAST"
43
+
44
+ # (Integer) excess read size included in DELIMITER.
45
+ DELIMITER_OVERRUN = 5 # "BLAST"
46
+
47
+ # Opens file by using Bio::FlatFile.open.
48
+ def self.open(filename, *mode)
49
+ Bio::FlatFile.open(self, filename, *mode)
50
+ end
51
+
52
+ # Creates a new Report object from BLAST result text.
53
+ def initialize(str)
54
+ str = str.sub(/\A\s+/, '')
55
+ str.sub!(/\n(T?BLAST.*)/m, "\n") # remove trailing entries for sure
56
+ @entry_overrun = $1
57
+ @entry = str
58
+ data = str.split(/(?:^[ \t]*\n)+/)
59
+
60
+ format0_split_headers(data)
61
+ @iterations = format0_split_search(data)
62
+ format0_split_stat_params(data)
63
+ end
64
+ # piece of next entry. Bio::FlatFile uses it.
65
+ attr_reader :entry_overrun
66
+
67
+ # (PSI-BLAST)
68
+ # Returns iterations.
69
+ # It returns an array of Bio::Blast::Default::Report::Iteration class.
70
+ # Note that normal blastall result usually contains one iteration.
71
+ attr_reader :iterations
72
+
73
+ # Returns whole entry as a string.
74
+ def to_s; @entry; end
75
+
76
+ #:stopdoc:
77
+ # prevent using StringScanner_R (in old version of strscan)
78
+ if !defined?(StringScanner) then
79
+ def initialize(*arg)
80
+ raise 'couldn\'t load strscan.so'
81
+ end #def
82
+ elsif StringScanner.name == 'StringScanner_R' then
83
+ def initialize(*arg)
84
+ raise 'cannot use StringScanner_R'
85
+ end #def
86
+ end
87
+ #:startdoc:
88
+
89
+ # Defines attributes which delegate to @f0dbstat objects.
90
+ def self.delegate_to_f0dbstat(*names)
91
+ names.each do |x|
92
+ module_eval("def #{x}; @f0dbstat.#{x}; end")
93
+ end
94
+ end
95
+ private_class_method :delegate_to_f0dbstat
96
+
97
+ # number of sequences in database
98
+ attr_reader :db_num if false #dummy
99
+ delegate_to_f0dbstat :db_num
100
+
101
+ # number of letters in database
102
+ attr_reader :db_len if false #dummy
103
+ delegate_to_f0dbstat :db_len
104
+
105
+ # posted date of the database
106
+ attr_reader :posted_date if false #dummy
107
+ delegate_to_f0dbstat :posted_date
108
+
109
+ # effective length of the database
110
+ attr_reader :eff_space if false #dummy
111
+ delegate_to_f0dbstat :eff_space
112
+
113
+ # name of the matrix
114
+ attr_reader :matrix if false #dummy
115
+ delegate_to_f0dbstat :matrix
116
+
117
+ # match score of the matrix
118
+ attr_reader :sc_match if false #dummy
119
+ delegate_to_f0dbstat :sc_match
120
+
121
+ # mismatch score of the matrix
122
+ attr_reader :sc_mismatch if false #dummy
123
+ delegate_to_f0dbstat :sc_mismatch
124
+
125
+ # gap open penalty
126
+ attr_reader :gap_open if false #dummy
127
+ delegate_to_f0dbstat :gap_open
128
+
129
+ # gap extend penalty
130
+ attr_reader :gap_extend if false #dummy
131
+ delegate_to_f0dbstat :gap_extend
132
+
133
+ # e-value threshold specified when BLAST was executed
134
+ attr_reader :expect if false #dummy
135
+ delegate_to_f0dbstat :expect
136
+
137
+ # number of hits. Note that this may differ from <tt>hits.size</tt>.
138
+ attr_reader :num_hits if false #dummy
139
+ delegate_to_f0dbstat :num_hits
140
+
141
+ # Same as <tt>iterations.last.kappa</tt>.
142
+ def kappa; @iterations.last.kappa; end
143
+ # Same as <tt>iterations.last.lambda</tt>.
144
+ def lambda; @iterations.last.lambda; end
145
+ # Same as <tt>iterations.last.entropy</tt>.
146
+ def entropy; @iterations.last.entropy; end
147
+
148
+ # Same as <tt>iterations.last.gapped_kappa</tt>.
149
+ def gapped_kappa; @iterations.last.gapped_kappa; end
150
+ # Same as <tt>iterations.last.gapped_lambda</tt>.
151
+ def gapped_lambda; @iterations.last.gapped_lambda; end
152
+ # Same as <tt>iterations.last.gapped_entropy</tt>.
153
+ def gapped_entropy; @iterations.last.gapped_entropy; end
154
+
155
+ # Returns program name.
156
+ def program; format0_parse_header; @program; end
157
+ # Returns version of the program.
158
+ def version; format0_parse_header; @version; end
159
+ # Returns version number string of the program.
160
+ def version_number; format0_parse_header; @version_number; end
161
+ # Returns released date of the program.
162
+ def version_date; format0_parse_header; @version_date; end
163
+
164
+ # Returns length of the query.
165
+ def query_len; format0_parse_query; @query_len; end
166
+
167
+ # Returns definition of the query.
168
+ def query_def; format0_parse_query; @query_def; end
169
+
170
+ # (PHI-BLAST)
171
+ # Same as <tt>iterations.first.pattern</tt>.
172
+ # Note that it returns the FIRST iteration's value.
173
+ def pattern; @iterations.first.pattern; end
174
+
175
+ # (PHI-BLAST)
176
+ # Same as <tt>iterations.first.pattern_positions</tt>.
177
+ # Note that it returns the FIRST iteration's value.
178
+ def pattern_positions
179
+ @iterations.first.pattern_positions
180
+ end
181
+
182
+ # (PSI-BLAST)
183
+ # Iterates over each iteration.
184
+ # Same as <tt>iterations.each</tt>.
185
+ # Yields a Bio::Blast::Default::Report::Iteration object.
186
+ def each_iteration
187
+ @iterations.each do |x|
188
+ yield x
189
+ end
190
+ end
191
+
192
+ # Iterates over each hit of the last iteration.
193
+ # Same as <tt>iterations.last.each_hit</tt>.
194
+ # Yields a Bio::Blast::Default::Report::Hit object.
195
+ # This is very useful in most cases, e.g. for blastall results.
196
+ def each_hit
197
+ @iterations.last.each do |x|
198
+ yield x
199
+ end
200
+ end
201
+ alias each each_hit
202
+
203
+ # Same as <tt>iterations.last.hits</tt>.
204
+ # Returns the last iteration's hits.
205
+ # Returns an array of Bio::Blast::Default::Report::Hit object.
206
+ # This is very useful in most cases, e.g. for blastall results.
207
+ def hits
208
+ @iterations.last.hits
209
+ end
210
+
211
+ # (PSI-BLAST)
212
+ # Same as <tt>iterations.last.message</tt>.
213
+ def message
214
+ @iterations.last.message
215
+ end
216
+
217
+ # (PSI-BLAST)
218
+ # Same as <tt>iterations.last.converged?</tt>.
219
+ # Returns true if the last iteration is converged,
220
+ # otherwise, returns false.
221
+ def converged?
222
+ @iterations.last.converged?
223
+ end
224
+
225
+ # Returns the bibliography reference of the BLAST software.
226
+ # Note that this method shows only the first reference.
227
+ # When you want to get additional references,
228
+ # you can use <tt>references</tt> method.
229
+ def reference
230
+ references[0]
231
+ end
232
+
233
+ # Returns the bibliography references of the BLAST software.
234
+ # Returns an array of strings.
235
+ def references
236
+ unless defined?(@references)
237
+ @references = @f0references.collect do |x|
238
+ x.to_s.gsub(/\s+/, ' ').strip
239
+ end
240
+ end #unless
241
+ @references
242
+ end
243
+
244
+ # Returns the name (filename or title) of the database.
245
+ def db
246
+ unless defined?(@db)
247
+ if /Database *\: *(.*)/m =~ @f0database then
248
+ a = $1.split(/^/)
249
+ a.pop if a.size > 1
250
+ @db = a.collect { |x| x.sub(/\s+\z/, '') }.join(' ')
251
+ end
252
+ end #unless
253
+ @db
254
+ end
255
+
256
+ private
257
+ # Parses the query lines (begins with "Query = ").
258
+ def format0_parse_query
259
+ unless defined?(@query_def)
260
+ sc = StringScanner.new(@f0query)
261
+ sc.skip(/\s*/)
262
+ if sc.skip_until(/Query\= */) then
263
+ q = []
264
+ begin
265
+ q << sc.scan(/.*/)
266
+ sc.skip(/\s*^ ?/)
267
+ end until !sc.rest or r = sc.skip(/ *\( *([\,\d]+) *letters *\)\s*\z/)
268
+ @query_len = sc[1].delete(',').to_i if r
269
+ @query_def = q.join(' ')
270
+ end
271
+ end
272
+ end
273
+
274
+ # Parses the first line of the BLAST result.
275
+ def format0_parse_header
276
+ unless defined?(@program)
277
+ if /([\-\w]+) +([\w\-\.\d]+) *\[ *([\-\.\w]+) *\] *(\[.+\])?/ =~ @f0header.to_s
278
+ @program = $1
279
+ @version = "#{$1} #{$2} [#{$3}]"
280
+ @version_number = $2
281
+ @version_date = $3
282
+ end
283
+ end
284
+ end
285
+
286
+ # Splits headers into the first line, reference, query line and
287
+ # database line.
288
+ def format0_split_headers(data)
289
+ @f0header = data.shift
290
+ @f0references = []
291
+ while data[0] and /\AQuery\=/ !~ data[0]
292
+ @f0references.push data.shift
293
+ end
294
+ @f0query = data.shift
295
+ @f0database = data.shift
296
+ # In special case, a void line is inserted after database name.
297
+ if /\A +[\d\,]+ +sequences\; +[\d\,]+ total +letters\s*\z/ =~ data[0] then
298
+ @f0database.concat "\n"
299
+ @f0database.concat data.shift
300
+ end
301
+ end
302
+
303
+ # Splits the statistical parameters.
304
+ def format0_split_stat_params(data)
305
+ dbs = []
306
+ while r = data.first and /^ *Database\:/ =~ r
307
+ dbs << data.shift
308
+ end
309
+ @f0dbstat = self.class::F0dbstat.new(dbs)
310
+ i = -1
311
+ while r = data[0] and /^Lambda/ =~ r
312
+ #i -= 1 unless /^Gapped/ =~ r
313
+ if itr = @iterations[i] then
314
+ x = data.shift; itr.instance_eval { @f0stat << x }
315
+ x = @f0dbstat; itr.instance_eval { @f0dbstat = x }
316
+ end
317
+ end
318
+ @f0dbstat.f0params = data
319
+ end
320
+
321
+ # Splits the search results.
322
+ def format0_split_search(data)
323
+ iterations = []
324
+ while r = data[0] and /^Searching/ =~ r
325
+ iterations << Iteration.new(data)
326
+ end
327
+ iterations
328
+ end
329
+
330
+ # Stores format0 database statistics.
331
+ # Internal use only. Users must not use the class.
332
+ class F0dbstat #:nodoc:
333
+ # Creates new F0dbstat class.
334
+ # Internal use only.
335
+ def initialize(ary)
336
+ @f0dbstat = ary
337
+ @hash = {}
338
+ end
339
+ attr_reader :f0dbstat
340
+ attr_accessor :f0params
341
+
342
+ # Parses colon-separeted lines (in +ary+) and stores to +hash+.
343
+ def parse_colon_separated_params(hash, ary)
344
+ ary.each do |str|
345
+ sc = StringScanner.new(str)
346
+ sc.skip(/\s*/)
347
+ while sc.rest?
348
+ if sc.match?(/Number of sequences better than +([e\+\-\.\d]+) *\: *(.+)/) then
349
+ ev = sc[1]
350
+ ev = '1' + ev if ev[0] == ?e
351
+ @expect = ev.to_f
352
+ @num_hits = sc[2].tr(',', '').to_i
353
+ end
354
+ if sc.skip(/([\-\,\.\'\(\)\#\w ]+)\: *(.*)/) then
355
+ hash[sc[1]] = sc[2]
356
+ else
357
+ #p sc.peek(20)
358
+ raise ScanError
359
+ end
360
+ sc.skip(/\s*/)
361
+ end #while
362
+ end #each
363
+ end #def
364
+ private :parse_colon_separated_params
365
+
366
+ # Parses parameters.
367
+ def parse_params
368
+ unless defined?(@parse_params)
369
+ parse_colon_separated_params(@hash, @f0params)
370
+ #p @hash
371
+ if val = @hash['Matrix'] then
372
+ if /blastn *matrix *\: *([e\+\-\.\d]+) +([e\+\-\.\d]+)/ =~ val then
373
+ @matrix = 'blastn'
374
+ @sc_match = $1.to_i
375
+ @sc_mismatch = $2.to_i
376
+ else
377
+ @matrix = val
378
+ end
379
+ end
380
+ if val = @hash['Gap Penalties'] then
381
+ if /Existence\: *([e\+\-\.\d]+)/ =~ val then
382
+ @gap_open = $1.to_i
383
+ end
384
+ if /Extension\: *([e\+\-\.\d]+)/ =~ val then
385
+ @gap_extend = $1.to_i
386
+ end
387
+ end
388
+ #@db_num = @hash['Number of Sequences'] unless defined?(@db_num)
389
+ #@db_len = @hash['length of database'] unless defined?(@db_len)
390
+ if val = @hash['effective search space'] then
391
+ @eff_space = val.tr(',', '').to_i
392
+ end
393
+ @parse_params = true
394
+ end #unless
395
+ end
396
+ private :parse_params
397
+
398
+ # Returns name of the matrix.
399
+ def matrix; parse_params; @matrix; end
400
+ # Returns the match score of the matrix.
401
+ def sc_match; parse_params; @sc_match; end
402
+ # Returns the mismatch score of the matrix.
403
+ def sc_mismatch; parse_params; @sc_mismatch; end
404
+
405
+ # Returns gap open penalty value.
406
+ def gap_open; parse_params; @gap_open; end
407
+ # Returns gap extend penalty value.
408
+ def gap_extend; parse_params; @gap_extend; end
409
+
410
+ # Returns effective length of the database.
411
+ def eff_space; parse_params; @eff_space; end
412
+
413
+ # Returns e-value threshold specified when BLAST was executed.
414
+ def expect; parse_params; @expect; end
415
+
416
+ # Returns number of hits.
417
+ def num_hits; parse_params; @num_hits; end
418
+
419
+ # Parses database statistics lines.
420
+ def parse_dbstat
421
+ a = @f0dbstat[0].to_s.split(/^/)
422
+ d = []
423
+ i = 3
424
+ while i > 0 and line = a.pop
425
+ case line
426
+ when /^\s+Posted date\:\s*(.*)$/
427
+ unless defined?(@posted_date)
428
+ @posted_date = $1.strip
429
+ i -= 1; d.clear
430
+ end
431
+ when /^\s+Number of letters in database\:\s*(.*)$/
432
+ unless defined?(@db_len)
433
+ @db_len = $1.tr(',', '').to_i
434
+ i -= 1; d.clear
435
+ end
436
+ when /^\s+Number of sequences in database\:\s*(.*)$/
437
+ unless defined?(@db_num)
438
+ @db_num = $1.tr(',', '').to_i
439
+ i -= 1; d.clear
440
+ end
441
+ else
442
+ d.unshift(line)
443
+ end
444
+ end #while
445
+ a.concat(d)
446
+ while line = a.shift
447
+ if /^\s+Database\:\s*(.*)$/ =~ line
448
+ a.unshift($1)
449
+ a.each { |x| x.strip! }
450
+ @database = a.join(' ')
451
+ break #while
452
+ end
453
+ end
454
+ end #def
455
+ private :parse_dbstat
456
+
457
+ # Returns name (title or filename) of the database.
458
+ def database
459
+ unless defined?(@database); parse_dbstat; end; @database
460
+ end
461
+
462
+ # Returns posted date of the database.
463
+ def posted_date
464
+ unless defined?(@posted_date); parse_dbstat; end; @posted_date
465
+ end
466
+
467
+ # Returns number of letters in database.
468
+ def db_len
469
+ unless defined?(@db_len); parse_dbstat; end; @db_len
470
+ end
471
+
472
+ # Returns number of sequences in database.
473
+ def db_num
474
+ unless defined?(@db_num); parse_dbstat; end; @db_num
475
+ end
476
+ end #class F0dbstat
477
+
478
+ # Provides a singleton object of which any methods always return nil.
479
+ # Internal use only. Users must not use the class.
480
+ class AlwaysNil #:nodoc:
481
+ include Singleton
482
+ def method_missing(*arg)
483
+ nil
484
+ end
485
+ end #class AlwaysNil
486
+
487
+ # Bio::Blast::Default::Report::Iteration stores information about
488
+ # a iteration.
489
+ # It may contain some Bio::Blast::Default::Report::Hit objects.
490
+ # Note that a PSI-BLAST (blastpgp command) result usually contain
491
+ # multiple iterations in it, and a normal BLAST (blastall command)
492
+ # result usually contain one iteration in it.
493
+ class Iteration
494
+ # Creates a new Iteration object.
495
+ # It is designed to be called only internally from
496
+ # the Bio::Blast::Default::Report class.
497
+ # Users shall not use the method directly.
498
+ def initialize(data)
499
+ @f0stat = []
500
+ @f0dbstat = AlwaysNil.instance
501
+ @f0hitlist = []
502
+ @hits = []
503
+ @num = 1
504
+ r = data.shift
505
+ @f0message = [ r ]
506
+ r.gsub!(/^Results from round (\d+).*\z/) { |x|
507
+ @num = $1.to_i
508
+ @f0message << x
509
+ ''
510
+ }
511
+ r = data.shift
512
+ while /^Number of occurrences of pattern in the database is +(\d+)/ =~ r
513
+ # PHI-BLAST
514
+ @pattern_in_database = $1.to_i
515
+ @f0message << r
516
+ r = data.shift
517
+ end
518
+ if /^Results from round (\d+)/ =~ r then
519
+ @num = $1.to_i
520
+ @f0message << r
521
+ r = data.shift
522
+ end
523
+ if r and !(/\*{5} No hits found \*{5}/ =~ r) then
524
+ @f0hitlist << r
525
+ begin
526
+ @f0hitlist << data.shift
527
+ end until r = data[0] and /^\>/ =~ r
528
+ if r and /^CONVERGED\!/ =~ r then
529
+ r.sub!(/(.*\n)*^CONVERGED\!.*\n/) { |x| @f0hitlist << x; '' }
530
+ end
531
+ if defined?(@pattern_in_database) and r = data.first then
532
+ #PHI-BLAST
533
+ while /^\>/ =~ r
534
+ @hits << Hit.new(data)
535
+ r = data.first
536
+ break unless r
537
+ while /^Significant alignments for pattern/ =~ r
538
+ data.shift
539
+ r = data.first
540
+ end
541
+ end
542
+ else
543
+ #not PHI-BLAST
544
+ while r = data[0] and /^\>/ =~ r
545
+ @hits << Hit.new(data)
546
+ end
547
+ end
548
+ end
549
+ if /^CONVERGED\!\s*$/ =~ @f0hitlist[-1].to_s then
550
+ @message = 'CONVERGED!'
551
+ @flag_converged = true
552
+ end
553
+ end
554
+
555
+ # (PSI-BLAST) Iteration round number.
556
+ attr_reader :num
557
+ # (PSI-BLAST) Messages of the iteration.
558
+ attr_reader :message
559
+ # (PHI-BLAST) Number of occurrences of pattern in the database.
560
+ attr_reader :pattern_in_database
561
+
562
+ # Returns the hits of the iteration.
563
+ # It returns an array of Bio::Blast::Default::Report::Hit objects.
564
+ def hits
565
+ parse_hitlist
566
+ @hits
567
+ end
568
+
569
+ # Iterates over each hit of the iteration.
570
+ # Yields a Bio::Blast::Default::Report::Hit object.
571
+ def each
572
+ hits.each do |x|
573
+ yield x
574
+ end
575
+ end
576
+
577
+ # (PSI-BLAST) Returns true if the iteration is converged.
578
+ # Otherwise, returns false.
579
+ def converged?
580
+ @flag_converged
581
+ end
582
+
583
+ # (PHI-BLAST) Returns pattern string.
584
+ # Returns nil if it is not a PHI-BLAST result.
585
+ def pattern
586
+ #PHI-BLAST
587
+ if !defined?(@pattern) and defined?(@pattern_in_database) then
588
+ @pattern = nil
589
+ @pattern_positions = []
590
+ @f0message.each do |r|
591
+ sc = StringScanner.new(r)
592
+ if sc.skip_until(/^ *pattern +([^\s]+)/) then
593
+ @pattern = sc[1] unless @pattern
594
+ sc.skip_until(/(?:^ *| +)at position +(\d+) +of +query +sequence/)
595
+ @pattern_positions << sc[1].to_i
596
+ end
597
+ end
598
+ end
599
+ @pattern
600
+ end
601
+
602
+ # (PHI-BLAST) Returns pattern positions.
603
+ # Returns nil if it is not a PHI-BLAST result.
604
+ def pattern_positions
605
+ #PHI-BLAST
606
+ pattern
607
+ @pattern_positions
608
+ end
609
+
610
+ # (PSI-BLAST)
611
+ # Returns hits which have been found again in the iteration.
612
+ # It returns an array of Bio::Blast::Default::Report::Hit objects.
613
+ def hits_found_again
614
+ parse_hitlist
615
+ @hits_found_again
616
+ end
617
+
618
+ # (PSI-BLAST)
619
+ # Returns hits which have been newly found in the iteration.
620
+ # It returns an array of Bio::Blast::Default::Report::Hit objects.
621
+ def hits_newly_found
622
+ parse_hitlist
623
+ @hits_newly_found
624
+ end
625
+
626
+ # (PHI-BLAST) Returns hits for pattern. ????
627
+ def hits_for_pattern
628
+ parse_hitlist
629
+ @hits_for_pattern
630
+ end
631
+
632
+ # Parses list of hits.
633
+ def parse_hitlist
634
+ unless defined?(@parse_hitlist)
635
+ @hits_found_again = []
636
+ @hits_newly_found = []
637
+ @hits_unknown_state = []
638
+ i = 0
639
+ a = @hits_newly_found
640
+ flag = true
641
+ @f0hitlist.each do |x|
642
+ sc = StringScanner.new(x)
643
+ if flag then
644
+ if sc.skip_until(/^Sequences used in model and found again\:\s*$/)
645
+ a = @hits_found_again
646
+ end
647
+ flag = nil
648
+ next
649
+ end
650
+ next if sc.skip(/^CONVERGED\!$/)
651
+ if sc.skip(/^Sequences not found previously or not previously below threshold\:\s*$/) then
652
+ a = @hits_newly_found
653
+ next
654
+ elsif sc.skip(/^Sequences.+\:\s*$/) then
655
+ #possibly a bug or unknown format?
656
+ a = @hits_unknown_state
657
+ next
658
+ elsif sc.skip(/^Significant (matches|alignments) for pattern/) then
659
+ # PHI-BLAST
660
+ # do nothing when 'alignments'
661
+ if sc[1] == 'matches' then
662
+ unless defined?(@hits_for_pattern)
663
+ @hits_for_pattern = []
664
+ end
665
+ a = []
666
+ @hits_for_pattern << a
667
+ end
668
+ next
669
+ end
670
+ b = x.split(/^/)
671
+ b.collect! { |y| y.empty? ? nil : y }
672
+ b.compact!
673
+ if i + b.size > @hits.size then
674
+ ((@hits.size - i)...(b.size)).each do |j|
675
+ y = b[j]; y.strip!
676
+ y.reverse!
677
+ z = y.split(/\s+/, 3)
678
+ z.each { |y| y.reverse! }
679
+ h = Hit.new([ z.pop.to_s.sub(/\.+\z/, '') ])
680
+ bs = z.pop.to_s
681
+ bs = '1' + bs if bs[0] == ?e
682
+ bs = (bs.empty? ? nil : bs.to_f)
683
+ ev = z.pop.to_s
684
+ ev = '1' + ev if ev[0] == ?e
685
+ ev = (ev.empty? ? (1.0/0.0) : ev.to_f)
686
+ h.instance_eval { @bit_score = bs; @evalue = ev }
687
+ @hits << h
688
+ end
689
+ end
690
+ a.concat(@hits[i, b.size])
691
+ i += b.size
692
+ end #each
693
+ @hits_found_again.each do |x|
694
+ x.instance_eval { @again = true }
695
+ end
696
+ @parse_hitlist = true
697
+ end #unless
698
+ end
699
+ private :parse_hitlist
700
+
701
+ # Parses statistics for the iteration.
702
+ def parse_stat
703
+ unless defined?(@parse_stat)
704
+ @f0stat.each do |x|
705
+ gapped = nil
706
+ sc = StringScanner.new(x)
707
+ sc.skip(/\s*/)
708
+ if sc.skip(/Gapped\s*/) then
709
+ gapped = true
710
+ end
711
+ s0 = []
712
+ h = {}
713
+ while r = sc.scan(/\w+/)
714
+ #p r
715
+ s0 << r
716
+ sc.skip(/ */)
717
+ end
718
+ sc.skip(/\s*/)
719
+ while r = sc.scan(/[e\+\-\.\d]+/)
720
+ #p r
721
+ h[s0.shift] = r
722
+ sc.skip(/ */)
723
+ end
724
+ if gapped then
725
+ @gapped_lambda = (v = h['Lambda']) ? v.to_f : nil
726
+ @gapped_kappa = (v = h['K']) ? v.to_f : nil
727
+ @gapped_entropy = (v = h['H']) ? v.to_f : nil
728
+ else
729
+ @lambda = (v = h['Lambda']) ? v.to_f : nil
730
+ @kappa = (v = h['K']) ? v.to_f : nil
731
+ @entropy = (v = h['H']) ? v.to_f : nil
732
+ end
733
+ end #each
734
+ @parse_stat = true
735
+ end #unless
736
+ end #def
737
+ private :parse_stat
738
+
739
+ # Defines attributes which call +parse_stat+ before accessing.
740
+ def self.method_after_parse_stat(*names)
741
+ names.each do |x|
742
+ module_eval("def #{x}; parse_stat; @#{x}; end")
743
+ end
744
+ end
745
+ private_class_method :method_after_parse_stat
746
+
747
+ # lambda of the database
748
+ attr_reader :lambda if false #dummy
749
+ method_after_parse_stat :lambda
750
+ # kappa of the database
751
+ attr_reader :kappa if false #dummy
752
+ method_after_parse_stat :kappa
753
+ # entropy of the database
754
+ attr_reader :entropy if false #dummy
755
+ method_after_parse_stat :entropy
756
+
757
+ # gapped lambda of the database
758
+ attr_reader :gapped_lambda if false #dummy
759
+ method_after_parse_stat :gapped_lambda
760
+ # gapped kappa of the database
761
+ attr_reader :gapped_kappa if false #dummy
762
+ method_after_parse_stat :gapped_kappa
763
+ # gapped entropy of the database
764
+ attr_reader :gapped_entropy if false #dummy
765
+ method_after_parse_stat :gapped_entropy
766
+
767
+ # Defines attributes which delegate to @f0dbstat objects.
768
+ def self.delegate_to_f0dbstat(*names)
769
+ names.each do |x|
770
+ module_eval("def #{x}; @f0dbstat.#{x}; end")
771
+ end
772
+ end
773
+ private_class_method :delegate_to_f0dbstat
774
+
775
+ # name (title or filename) of the database
776
+ attr_reader :database if false #dummy
777
+ delegate_to_f0dbstat :database
778
+ # posted date of the database
779
+ attr_reader :posted_date if false #dummy
780
+ delegate_to_f0dbstat :posted_date
781
+
782
+ # number of letters in database
783
+ attr_reader :db_num if false #dummy
784
+ delegate_to_f0dbstat :db_num
785
+ # number of sequences in database
786
+ attr_reader :db_len if false #dummy
787
+ delegate_to_f0dbstat :db_len
788
+ # effective length of the database
789
+ attr_reader :eff_space if false #dummy
790
+ delegate_to_f0dbstat :eff_space
791
+
792
+ # e-value threshold specified when BLAST was executed
793
+ attr_reader :expect if false #dummy
794
+ delegate_to_f0dbstat :expect
795
+
796
+ end #class Iteration
797
+
798
+ # Bio::Blast::Default::Report::Hit contains information about a hit.
799
+ # It may contain some Bio::Blast::Default::Report::HSP objects.
800
+ class Hit
801
+ # Creates a new Hit object.
802
+ # It is designed to be called only internally from the
803
+ # Bio::Blast::Default::Report::Iteration class.
804
+ # Users should not call the method directly.
805
+ def initialize(data)
806
+ @f0hitname = data.shift
807
+ @hsps = []
808
+ while r = data[0] and /\A\s+Score/ =~ r
809
+ @hsps << HSP.new(data)
810
+ end
811
+ @again = false
812
+ end
813
+
814
+ # Hsp(high-scoring segment pair)s of the hit.
815
+ # Returns an array of Bio::Blast::Default::Report::HSP objects.
816
+ attr_reader :hsps
817
+
818
+ # Iterates over each hsp(high-scoring segment pair) of the hit.
819
+ # Yields a Bio::Blast::Default::Report::HSP object.
820
+ def each
821
+ @hsps.each { |x| yield x }
822
+ end
823
+
824
+ # (PSI-BLAST)
825
+ # Returns true if the hit is found again in the iteration.
826
+ # Otherwise, returns false or nil.
827
+ def found_again?
828
+ @again
829
+ end
830
+
831
+ # Returns first hsp's score.
832
+ def score
833
+ (h = @hsps.first) ? h.score : nil
834
+ end
835
+
836
+ # Returns first hsp's bit score.
837
+ # (shown in hit list of BLAST result)
838
+ def bit_score
839
+ unless defined?(@bit_score)
840
+ if h = @hsps.first then
841
+ @bit_score = h.bit_score
842
+ end
843
+ end
844
+ @bit_score
845
+ end
846
+
847
+ # Returns first hsp's e-value.
848
+ # (shown in hit list of BLAST result)
849
+ def evalue
850
+ unless defined?(@evalue)
851
+ if h = @hsps.first then
852
+ @evalue = h.evalue
853
+ end
854
+ end
855
+ @evalue
856
+ end
857
+
858
+ # Parses name of the hit.
859
+ def parse_hitname
860
+ unless defined?(@parse_hitname)
861
+ sc = StringScanner.new(@f0hitname)
862
+ sc.skip(/\s*/)
863
+ sc.skip(/\>/)
864
+ d = []
865
+ begin
866
+ d << sc.scan(/.*/)
867
+ sc.skip(/\s*/)
868
+ end until !sc.rest? or r = sc.skip(/ *Length *\= *([\,\d]+)\s*\z/)
869
+ @len = (r ? sc[1].to_i : nil)
870
+ @definition = d.join(" ")
871
+ @parse_hitname = true
872
+ end
873
+ end
874
+ private :parse_hitname
875
+
876
+ # Returns length of the hit.
877
+ def len; parse_hitname; @len; end
878
+
879
+ # Returns definition of the hit.
880
+ def definition; parse_hitname; @definition; end
881
+
882
+ def target_id; definition[/^\s*(\S+)/, 1]; end
883
+
884
+ #--
885
+ # Aliases to keep compatibility with Bio::Fasta::Report::Hit.
886
+ alias target_def definition
887
+ alias target_len len
888
+ #++
889
+
890
+ # Sends given method to the first hsp or returns nil if
891
+ # there are no hsps.
892
+ def hsp_first(m)
893
+ (h = hsps.first) ? h.send(m) : nil
894
+ end
895
+ private :hsp_first
896
+
897
+ #--
898
+ # Shortcut methods for the best Hsp
899
+ # (Compatibility method with FASTA)
900
+ #++
901
+
902
+ # Same as hsps.first.identity.
903
+ # Returns nil if there are no hsp in the hit.
904
+ # (Compatibility method with FASTA)
905
+ def identity; hsp_first :identity; end
906
+
907
+ # Same as hsps.first.align_len.
908
+ # Returns nil if there are no hsp in the hit.
909
+ # (Compatibility method with FASTA)
910
+ def overlap; hsp_first :align_len; end
911
+
912
+ # Same as hsps.first.qseq.
913
+ # Returns nil if there are no hsp in the hit.
914
+ # (Compatibility method with FASTA)
915
+ def query_seq; hsp_first :qseq; end
916
+
917
+ # Same as hsps.first.hseq.
918
+ # Returns nil if there are no hsp in the hit.
919
+ # (Compatibility method with FASTA)
920
+ def target_seq; hsp_first :hseq; end
921
+
922
+ # Same as hsps.first.midline.
923
+ # Returns nil if there are no hsp in the hit.
924
+ # (Compatibility method with FASTA)
925
+ def midline; hsp_first :midline; end
926
+
927
+ # Same as hsps.first.query_from.
928
+ # Returns nil if there are no hsp in the hit.
929
+ # (Compatibility method with FASTA)
930
+ def query_start; hsp_first :query_from; end
931
+
932
+ # Same as hsps.first.query_to.
933
+ # Returns nil if there are no hsp in the hit.
934
+ # (Compatibility method with FASTA)
935
+ def query_end; hsp_first :query_to; end
936
+
937
+ # Same as hsps.first.hit_from.
938
+ # Returns nil if there are no hsp in the hit.
939
+ # (Compatibility method with FASTA)
940
+ def target_start; hsp_first :hit_from; end
941
+
942
+ # Same as hsps.first.hit_to.
943
+ # Returns nil if there are no hsp in the hit.
944
+ # (Compatibility method with FASTA)
945
+ def target_end; hsp_first :hit_to; end
946
+
947
+ # Returns an array which contains
948
+ # [ query_start, query_end, target_start, target_end ].
949
+ # (Compatibility method with FASTA)
950
+ def lap_at
951
+ [ query_start, query_end, target_start, target_end ]
952
+ end
953
+ end #class Hit
954
+
955
+ # Bio::Blast::Default::Report::HSP holds information about the hsp
956
+ # (high-scoring segment pair).
957
+ class HSP
958
+ # Creates new HSP object.
959
+ # It is designed to be called only internally from the
960
+ # Bio::Blast::Default::Report::Hit class.
961
+ # Users should not call the method directly.
962
+ def initialize(data)
963
+ @f0score = data.shift
964
+ @f0alignment = []
965
+ while r = data[0] and /^(Query|Sbjct)\:/ =~ r
966
+ @f0alignment << data.shift
967
+ end
968
+ end
969
+
970
+ # Parses scores, identities, positives, gaps, and so on.
971
+ def parse_score
972
+ unless defined?(@parse_score)
973
+ sc = StringScanner.new(@f0score)
974
+ while sc.rest?
975
+ sc.skip(/\s*/)
976
+ if sc.skip(/Expect(?:\(\d+\))? *\= *([e\+\-\.\d]+)/) then
977
+ ev = sc[1].to_s
978
+ ev = '1' + ev if ev[0] == ?e
979
+ @evalue = ev.to_f
980
+ elsif sc.skip(/Score *\= *([e\+\-\.\d]+) *bits *\( *([e\+\-\.\d]+) *\)/) then
981
+ bs = sc[1]
982
+ bs = '1' + bs if bs[0] == ?e
983
+ @bit_score = bs.to_f
984
+ @score = sc[2].to_i
985
+ elsif sc.skip(/(Identities|Positives|Gaps) *\= (\d+) *\/ *(\d+) *\(([\.\d]+) *\% *\)/) then
986
+ alen = sc[3].to_i
987
+ @align_len = alen unless defined?(@align_len)
988
+ raise ScanError if alen != @align_len
989
+ case sc[1]
990
+ when 'Identities'
991
+ @identity = sc[2].to_i
992
+ @percent_identity = sc[4].to_i
993
+ when 'Positives'
994
+ @positive = sc[2].to_i
995
+ @percent_positive = sc[4].to_i
996
+ when 'Gaps'
997
+ @gaps = sc[2].to_i
998
+ @percent_gaps = sc[4].to_i
999
+ else
1000
+ raise ScanError
1001
+ end
1002
+ elsif sc.skip(/Strand *\= *(Plus|Minus) *\/ *(Plus|Minus)/) then
1003
+ @query_strand = sc[1]
1004
+ @hit_strand = sc[2]
1005
+ if sc[1] == sc[2] then
1006
+ @query_frame = 1
1007
+ @hit_frame = 1
1008
+ elsif sc[1] == 'Plus' then # Plus/Minus
1009
+ # complement sequence against xml(-m 7)
1010
+ # In xml(-m 8), -1=>Plus, 1=>Minus ???
1011
+ #@query_frame = -1
1012
+ #@hit_frame = 1
1013
+ @query_frame = 1
1014
+ @hit_frame = -1
1015
+ else # Minus/Plus
1016
+ @query_frame = -1
1017
+ @hit_frame = 1
1018
+ end
1019
+ elsif sc.skip(/Frame *\= *([\-\+]\d+)( *\/ *([\-\+]\d+))?/) then
1020
+ @query_frame = sc[1].to_i
1021
+ if sc[2] then
1022
+ @hit_frame = sc[3].to_i
1023
+ end
1024
+ elsif sc.skip(/Score *\= *([e\+\-\.\d]+) +\(([e\+\-\.\d]+) *bits *\)/) then
1025
+ #WU-BLAST
1026
+ @score = sc[1].to_i
1027
+ bs = sc[2]
1028
+ bs = '1' + bs if bs[0] == ?e
1029
+ @bit_score = bs.to_f
1030
+ elsif sc.skip(/P *\= * ([e\+\-\.\d]+)/) then
1031
+ #WU-BLAST
1032
+ @p_sum_n = nil
1033
+ pv = sc[1]
1034
+ pv = '1' + pv if pv[0] == ?e
1035
+ @pvalue = pv.to_f
1036
+ elsif sc.skip(/Sum +P *\( *(\d+) *\) *\= *([e\+\-\.\d]+)/) then
1037
+ #WU-BLAST
1038
+ @p_sum_n = sc[1].to_i
1039
+ pv = sc[2]
1040
+ pv = '1' + pv if pv[0] == ?e
1041
+ @pvalue = pv.to_f
1042
+ elsif sc.skip(/Method\:\s*(.+)/) then
1043
+ # signature of composition-based statistics method
1044
+ # for example, "Method: Composition-based stats."
1045
+ @stat_method = sc[1]
1046
+ else
1047
+ raise ScanError
1048
+ end
1049
+ sc.skip(/\s*\,?\s*/)
1050
+ end
1051
+ @parse_score = true
1052
+ end
1053
+ end
1054
+ private :parse_score
1055
+
1056
+ # Defines attributes which call parse_score before accessing.
1057
+ def self.method_after_parse_score(*names)
1058
+ names.each do |x|
1059
+ module_eval("def #{x}; parse_score; @#{x}; end")
1060
+ end
1061
+ end
1062
+ private_class_method :method_after_parse_score
1063
+
1064
+ # bit score
1065
+ attr_reader :bit_score if false #dummy
1066
+ method_after_parse_score :bit_score
1067
+ # score
1068
+ attr_reader :score if false #dummy
1069
+ method_after_parse_score :score
1070
+
1071
+ # e-value
1072
+ attr_reader :evalue if false #dummy
1073
+ method_after_parse_score :evalue
1074
+
1075
+ # frame of the query
1076
+ attr_reader :query_frame if false #dummy
1077
+ method_after_parse_score :query_frame
1078
+ # frame of the hit
1079
+ attr_reader :hit_frame if false #dummy
1080
+ method_after_parse_score :hit_frame
1081
+
1082
+ # Identity (number of identical nucleotides or amino acids)
1083
+ attr_reader :identity if false #dummy
1084
+ method_after_parse_score :identity
1085
+ # percent of identical nucleotides or amino acids
1086
+ attr_reader :percent_identity if false #dummy
1087
+ method_after_parse_score :percent_identity
1088
+
1089
+ # Positives (number of positive hit amino acids or nucleotides)
1090
+ attr_reader :positive if false #dummy
1091
+ method_after_parse_score :positive
1092
+ # percent of positive hit amino acids or nucleotides
1093
+ attr_reader :percent_positive if false #dummy
1094
+ method_after_parse_score :percent_positive
1095
+
1096
+ # Gaps (number of gaps)
1097
+ attr_reader :gaps if false #dummy
1098
+ method_after_parse_score :gaps
1099
+ # percent of gaps
1100
+ attr_reader :percent_gaps if false #dummy
1101
+ method_after_parse_score :percent_gaps
1102
+
1103
+ # aligned length
1104
+ attr_reader :align_len if false #dummy
1105
+ method_after_parse_score :align_len
1106
+
1107
+ # strand of the query ("Plus" or "Minus" or nil)
1108
+ attr_reader :query_strand if false #dummy
1109
+ method_after_parse_score :query_strand
1110
+
1111
+ # strand of the hit ("Plus" or "Minus" or nil)
1112
+ attr_reader :hit_strand if false #dummy
1113
+ method_after_parse_score :hit_strand
1114
+
1115
+ # statistical method for calculating evalue and/or score
1116
+ # (nil or a string)
1117
+ # (note that composition-based statistics for blastp or tblastn
1118
+ # were enabled by default after NCBI BLAST 2.2.17)
1119
+ attr_reader :stat_method if false #dummy
1120
+ method_after_parse_score :stat_method
1121
+
1122
+ # Parses alignments.
1123
+ def parse_alignment
1124
+ unless defined?(@parse_alignment)
1125
+ qpos1 = nil
1126
+ qpos2 = nil
1127
+ spos1 = nil
1128
+ spos2 = nil
1129
+ qseq = []
1130
+ sseq = []
1131
+ mseq = []
1132
+ pos_st = nil
1133
+ len_seq = 0
1134
+ nextline = :q
1135
+ @f0alignment.each do |x|
1136
+ sc = StringScanner.new(x)
1137
+ while sc.rest?
1138
+ #p pos_st, len_seq
1139
+ #p nextline.to_s
1140
+ if r = sc.skip(/(Query|Sbjct)\: *(\d+) */) then
1141
+ pos_st = r
1142
+ qs = sc[1]
1143
+ pos1 = sc[2]
1144
+ len_seq = sc.skip(/[^ ]*/)
1145
+ seq = sc[0]
1146
+ sc.skip(/ *(\d+) *\n/)
1147
+ pos2 = sc[1]
1148
+ if qs == 'Query' then
1149
+ raise ScanError unless nextline == :q
1150
+ qpos1 = pos1.to_i unless qpos1
1151
+ qpos2 = pos2.to_i
1152
+ qseq << seq
1153
+ nextline = :m
1154
+ elsif qs == 'Sbjct' then
1155
+ if nextline == :m then
1156
+ mseq << (' ' * len_seq)
1157
+ end
1158
+ spos1 = pos1.to_i unless spos1
1159
+ spos2 = pos2.to_i
1160
+ sseq << seq
1161
+ nextline = :q
1162
+ else
1163
+ raise ScanError
1164
+ end
1165
+ elsif r = sc.scan(/ {6}.+/) then
1166
+ raise ScanError unless nextline == :m
1167
+ mseq << r[pos_st, len_seq]
1168
+ sc.skip(/\n/)
1169
+ nextline = :s
1170
+ elsif r = sc.skip(/pattern +\d+.+/) then
1171
+ # PHI-BLAST
1172
+ # do nothing
1173
+ sc.skip(/\n/)
1174
+ else
1175
+ raise ScanError
1176
+ end
1177
+ end #while
1178
+ end #each
1179
+ #p qseq, sseq, mseq
1180
+ @qseq = qseq.join('')
1181
+ @hseq = sseq.join('')
1182
+ @midline = mseq.join('')
1183
+ @query_from = qpos1
1184
+ @query_to = qpos2
1185
+ @hit_from = spos1
1186
+ @hit_to = spos2
1187
+ @parse_alignment = true
1188
+ end #unless
1189
+ end #def
1190
+ private :parse_alignment
1191
+
1192
+ # Defines attributes which call parse_alignment before accessing.
1193
+ def self.method_after_parse_alignment(*names)
1194
+ names.each do |x|
1195
+ module_eval("def #{x}; parse_alignment; @#{x}; end")
1196
+ end
1197
+ end
1198
+ private_class_method :method_after_parse_alignment
1199
+
1200
+ # query sequence (with gaps) of the alignment of the hsp
1201
+ attr_reader :qseq if false #dummy
1202
+ method_after_parse_alignment :qseq
1203
+ # hit sequence (with gaps) of the alignment of the hsp
1204
+ attr_reader :hseq if false #dummy
1205
+ method_after_parse_alignment :hseq
1206
+
1207
+ # middle line of the alignment of the hsp
1208
+ attr_reader :midline if false #dummy
1209
+ method_after_parse_alignment :midline
1210
+
1211
+ # start position of the query (the first position is 1)
1212
+ attr_reader :query_from if false #dummy
1213
+ method_after_parse_alignment :query_from
1214
+
1215
+ # end position of the query (including its position)
1216
+ attr_reader :query_to
1217
+ method_after_parse_alignment :query_to
1218
+
1219
+ # start position of the hit (the first position is 1)
1220
+ attr_reader :hit_from if false #dummy
1221
+ method_after_parse_alignment :hit_from
1222
+
1223
+ # end position of the hit (including its position)
1224
+ attr_reader :hit_to if false #dummy
1225
+ method_after_parse_alignment :hit_to
1226
+
1227
+ end #class HSP
1228
+
1229
+ end #class Report
1230
+
1231
+ # NCBI BLAST default (-m 0 option) output parser for TBLAST.
1232
+ # All methods are equal to Bio::Blast::Default::Report.
1233
+ # Only DELIMITER (and RS) is different.
1234
+ class Report_TBlast < Report
1235
+ # Delimter of each entry for TBLAST. Bio::FlatFile uses it.
1236
+ DELIMITER = RS = "\nTBLAST"
1237
+
1238
+ # (Integer) excess read size included in DELIMITER.
1239
+ DELIMITER_OVERRUN = 6 # "TBLAST"
1240
+ end #class Report_TBlast
1241
+
1242
+ end #module Default
1243
+ end #class Blast
1244
+ end #module Bio
1245
+
1246
+ ######################################################################
1247
+
1248
+ if __FILE__ == $0
1249
+
1250
+ Bio::FlatFile.open(Bio::Blast::Default::Report, ARGF) do |ff|
1251
+ ff.each do |rep|
1252
+
1253
+ print "# === Bio::Blast::Default::Report\n"
1254
+ puts
1255
+ print " rep.program #=> "; p rep.program
1256
+ print " rep.version #=> "; p rep.version
1257
+ print " rep.reference #=> "; p rep.reference
1258
+ print " rep.db #=> "; p rep.db
1259
+ #print " rep.query_id #=> "; p rep.query_id
1260
+ print " rep.query_def #=> "; p rep.query_def
1261
+ print " rep.query_len #=> "; p rep.query_len
1262
+ #puts
1263
+ print " rep.version_number #=> "; p rep.version_number
1264
+ print " rep.version_date #=> "; p rep.version_date
1265
+ puts
1266
+
1267
+ print "# === Parameters\n"
1268
+ #puts
1269
+ #print " rep.parameters #=> "; p rep.parameters
1270
+ puts
1271
+ print " rep.matrix #=> "; p rep.matrix
1272
+ print " rep.expect #=> "; p rep.expect
1273
+ #print " rep.inclusion #=> "; p rep.inclusion
1274
+ print " rep.sc_match #=> "; p rep.sc_match
1275
+ print " rep.sc_mismatch #=> "; p rep.sc_mismatch
1276
+ print " rep.gap_open #=> "; p rep.gap_open
1277
+ print " rep.gap_extend #=> "; p rep.gap_extend
1278
+ #print " rep.filter #=> "; p rep.filter
1279
+ print " rep.pattern #=> "; p rep.pattern
1280
+ #print " rep.entrez_query #=> "; p rep.entrez_query
1281
+ #puts
1282
+ print " rep.pattern_positions #=> "; p rep.pattern_positions
1283
+ puts
1284
+
1285
+ print "# === Statistics (last iteration's)\n"
1286
+ #puts
1287
+ #print " rep.statistics #=> "; p rep.statistics
1288
+ puts
1289
+ print " rep.db_num #=> "; p rep.db_num
1290
+ print " rep.db_len #=> "; p rep.db_len
1291
+ #print " rep.hsp_len #=> "; p rep.hsp_len
1292
+ print " rep.eff_space #=> "; p rep.eff_space
1293
+ print " rep.kappa #=> "; p rep.kappa
1294
+ print " rep.lambda #=> "; p rep.lambda
1295
+ print " rep.entropy #=> "; p rep.entropy
1296
+ puts
1297
+ print " rep.num_hits #=> "; p rep.num_hits
1298
+ print " rep.gapped_kappa #=> "; p rep.gapped_kappa
1299
+ print " rep.gapped_lambda #=> "; p rep.gapped_lambda
1300
+ print " rep.gapped_entropy #=> "; p rep.gapped_entropy
1301
+ print " rep.posted_date #=> "; p rep.posted_date
1302
+ puts
1303
+
1304
+ print "# === Message (last iteration's)\n"
1305
+ puts
1306
+ print " rep.message #=> "; p rep.message
1307
+ #puts
1308
+ print " rep.converged? #=> "; p rep.converged?
1309
+ puts
1310
+
1311
+ print "# === Iterations\n"
1312
+ puts
1313
+ print " rep.itrerations.each do |itr|\n"
1314
+ puts
1315
+
1316
+ rep.iterations.each do |itr|
1317
+
1318
+ print "# --- Bio::Blast::Default::Report::Iteration\n"
1319
+ puts
1320
+
1321
+ print " itr.num #=> "; p itr.num
1322
+ #print " itr.statistics #=> "; p itr.statistics
1323
+ print " itr.message #=> "; p itr.message
1324
+ print " itr.hits.size #=> "; p itr.hits.size
1325
+ #puts
1326
+ print " itr.hits_newly_found.size #=> "; p itr.hits_newly_found.size;
1327
+ print " itr.hits_found_again.size #=> "; p itr.hits_found_again.size;
1328
+ if itr.hits_for_pattern then
1329
+ itr.hits_for_pattern.each_with_index do |hp, hpi|
1330
+ print " itr.hits_for_pattern[#{hpi}].size #=> "; p hp.size;
1331
+ end
1332
+ end
1333
+ print " itr.converged? #=> "; p itr.converged?
1334
+ puts
1335
+
1336
+ print " itr.hits.each do |hit|\n"
1337
+ puts
1338
+
1339
+ itr.hits.each_with_index do |hit, i|
1340
+
1341
+ print "# --- Bio::Blast::Default::Report::Hit"
1342
+ print " ([#{i}])\n"
1343
+ puts
1344
+
1345
+ #print " hit.num #=> "; p hit.num
1346
+ #print " hit.hit_id #=> "; p hit.hit_id
1347
+ print " hit.len #=> "; p hit.len
1348
+ print " hit.definition #=> "; p hit.definition
1349
+ #print " hit.accession #=> "; p hit.accession
1350
+ #puts
1351
+ print " hit.found_again? #=> "; p hit.found_again?
1352
+
1353
+ print " --- compatible/shortcut ---\n"
1354
+ #print " hit.query_id #=> "; p hit.query_id
1355
+ #print " hit.query_def #=> "; p hit.query_def
1356
+ #print " hit.query_len #=> "; p hit.query_len
1357
+ #print " hit.target_id #=> "; p hit.target_id
1358
+ print " hit.target_def #=> "; p hit.target_def
1359
+ print " hit.target_len #=> "; p hit.target_len
1360
+
1361
+ print " --- first HSP's values (shortcut) ---\n"
1362
+ print " hit.evalue #=> "; p hit.evalue
1363
+ print " hit.bit_score #=> "; p hit.bit_score
1364
+ print " hit.identity #=> "; p hit.identity
1365
+ #print " hit.overlap #=> "; p hit.overlap
1366
+
1367
+ print " hit.query_seq #=> "; p hit.query_seq
1368
+ print " hit.midline #=> "; p hit.midline
1369
+ print " hit.target_seq #=> "; p hit.target_seq
1370
+
1371
+ print " hit.query_start #=> "; p hit.query_start
1372
+ print " hit.query_end #=> "; p hit.query_end
1373
+ print " hit.target_start #=> "; p hit.target_start
1374
+ print " hit.target_end #=> "; p hit.target_end
1375
+ print " hit.lap_at #=> "; p hit.lap_at
1376
+ print " --- first HSP's vaules (shortcut) ---\n"
1377
+ print " --- compatible/shortcut ---\n"
1378
+
1379
+ puts
1380
+ print " hit.hsps.size #=> "; p hit.hsps.size
1381
+ if hit.hsps.size == 0 then
1382
+ puts " (HSP not found: please see blastall's -b and -v options)"
1383
+ puts
1384
+ else
1385
+
1386
+ puts
1387
+ print " hit.hsps.each do |hsp|\n"
1388
+ puts
1389
+
1390
+ hit.hsps.each_with_index do |hsp, j|
1391
+
1392
+ print "# --- Bio::Blast::Default::Report::Hsp"
1393
+ print " ([#{j}])\n"
1394
+ puts
1395
+ #print " hsp.num #=> "; p hsp.num
1396
+ print " hsp.bit_score #=> "; p hsp.bit_score
1397
+ print " hsp.score #=> "; p hsp.score
1398
+ print " hsp.evalue #=> "; p hsp.evalue
1399
+ print " hsp.identity #=> "; p hsp.identity
1400
+ print " hsp.gaps #=> "; p hsp.gaps
1401
+ print " hsp.positive #=> "; p hsp.positive
1402
+ print " hsp.align_len #=> "; p hsp.align_len
1403
+ #print " hsp.density #=> "; p hsp.density
1404
+
1405
+ print " hsp.query_frame #=> "; p hsp.query_frame
1406
+ print " hsp.query_from #=> "; p hsp.query_from
1407
+ print " hsp.query_to #=> "; p hsp.query_to
1408
+
1409
+ print " hsp.hit_frame #=> "; p hsp.hit_frame
1410
+ print " hsp.hit_from #=> "; p hsp.hit_from
1411
+ print " hsp.hit_to #=> "; p hsp.hit_to
1412
+
1413
+ #print " hsp.pattern_from#=> "; p hsp.pattern_from
1414
+ #print " hsp.pattern_to #=> "; p hsp.pattern_to
1415
+
1416
+ print " hsp.qseq #=> "; p hsp.qseq
1417
+ print " hsp.midline #=> "; p hsp.midline
1418
+ print " hsp.hseq #=> "; p hsp.hseq
1419
+ puts
1420
+ print " hsp.percent_identity #=> "; p hsp.percent_identity
1421
+ #print " hsp.mismatch_count #=> "; p hsp.mismatch_count
1422
+ #
1423
+ print " hsp.query_strand #=> "; p hsp.query_strand
1424
+ print " hsp.hit_strand #=> "; p hsp.hit_strand
1425
+ print " hsp.percent_positive #=> "; p hsp.percent_positive
1426
+ print " hsp.percent_gaps #=> "; p hsp.percent_gaps
1427
+ puts
1428
+
1429
+ end #each
1430
+ end #if hit.hsps.size == 0
1431
+ end
1432
+ end
1433
+ end #ff.each
1434
+ end #FlatFile.open
1435
+
1436
+ end #if __FILE__ == $0
1437
+
1438
+ ######################################################################