bio 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (303) hide show
  1. data/ChangeLog +2105 -3728
  2. data/KNOWN_ISSUES.rdoc +35 -3
  3. data/README.rdoc +8 -2
  4. data/RELEASE_NOTES.rdoc +166 -0
  5. data/bin/bioruby +4 -1
  6. data/bioruby.gemspec +146 -1
  7. data/bioruby.gemspec.erb +3 -1
  8. data/doc/ChangeLog-before-1.3.1 +3961 -0
  9. data/doc/Tutorial.rd +154 -22
  10. data/doc/Tutorial.rd.html +125 -68
  11. data/lib/bio.rb +21 -6
  12. data/lib/bio/appl/bl2seq/report.rb +11 -202
  13. data/lib/bio/appl/blast/format0.rb +0 -193
  14. data/lib/bio/appl/blast/report.rb +2 -147
  15. data/lib/bio/appl/blast/wublast.rb +0 -208
  16. data/lib/bio/appl/fasta.rb +4 -19
  17. data/lib/bio/appl/fasta/format10.rb +0 -14
  18. data/lib/bio/appl/genscan/report.rb +0 -176
  19. data/lib/bio/appl/hmmer.rb +1 -15
  20. data/lib/bio/appl/hmmer/report.rb +0 -100
  21. data/lib/bio/appl/meme/mast.rb +156 -0
  22. data/lib/bio/appl/meme/mast/report.rb +91 -0
  23. data/lib/bio/appl/meme/motif.rb +48 -0
  24. data/lib/bio/appl/psort.rb +0 -111
  25. data/lib/bio/appl/psort/report.rb +1 -45
  26. data/lib/bio/appl/pts1.rb +2 -4
  27. data/lib/bio/appl/sosui/report.rb +5 -54
  28. data/lib/bio/appl/targetp/report.rb +1 -104
  29. data/lib/bio/appl/tmhmm/report.rb +0 -36
  30. data/lib/bio/command.rb +94 -10
  31. data/lib/bio/data/aa.rb +1 -77
  32. data/lib/bio/data/codontable.rb +1 -95
  33. data/lib/bio/data/na.rb +1 -26
  34. data/lib/bio/db/aaindex.rb +1 -38
  35. data/lib/bio/db/fasta.rb +1 -134
  36. data/lib/bio/db/fasta/format_qual.rb +204 -0
  37. data/lib/bio/db/fasta/qual.rb +102 -0
  38. data/lib/bio/db/fastq.rb +645 -0
  39. data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
  40. data/lib/bio/db/fastq/format_fastq.rb +175 -0
  41. data/lib/bio/db/genbank/genbank.rb +1 -86
  42. data/lib/bio/db/gff.rb +0 -17
  43. data/lib/bio/db/go.rb +4 -72
  44. data/lib/bio/db/kegg/common.rb +112 -0
  45. data/lib/bio/db/kegg/compound.rb +29 -20
  46. data/lib/bio/db/kegg/drug.rb +74 -34
  47. data/lib/bio/db/kegg/enzyme.rb +26 -5
  48. data/lib/bio/db/kegg/genes.rb +128 -15
  49. data/lib/bio/db/kegg/genome.rb +3 -41
  50. data/lib/bio/db/kegg/glycan.rb +19 -24
  51. data/lib/bio/db/kegg/orthology.rb +16 -56
  52. data/lib/bio/db/kegg/reaction.rb +81 -28
  53. data/lib/bio/db/kegg/taxonomy.rb +1 -52
  54. data/lib/bio/db/litdb.rb +1 -16
  55. data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
  56. data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
  57. data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
  58. data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
  59. data/lib/bio/db/prosite.rb +2 -95
  60. data/lib/bio/db/rebase.rb +5 -6
  61. data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
  62. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
  63. data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
  64. data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
  65. data/lib/bio/io/das.rb +0 -44
  66. data/lib/bio/io/ddbjxml.rb +1 -181
  67. data/lib/bio/io/flatfile.rb +1 -7
  68. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  69. data/lib/bio/io/keggapi.rb +0 -442
  70. data/lib/bio/io/ncbirest.rb +130 -132
  71. data/lib/bio/io/ncbisoap.rb +2 -1
  72. data/lib/bio/io/pubmed.rb +0 -88
  73. data/lib/bio/location.rb +0 -73
  74. data/lib/bio/pathway.rb +0 -171
  75. data/lib/bio/sequence.rb +18 -1
  76. data/lib/bio/sequence/adapter.rb +3 -0
  77. data/lib/bio/sequence/format.rb +16 -0
  78. data/lib/bio/sequence/quality_score.rb +205 -0
  79. data/lib/bio/tree.rb +70 -5
  80. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
  81. data/lib/bio/util/sirna.rb +1 -23
  82. data/lib/bio/version.rb +1 -1
  83. data/sample/demo_aaindex.rb +67 -0
  84. data/sample/demo_aminoacid.rb +101 -0
  85. data/sample/demo_bl2seq_report.rb +220 -0
  86. data/sample/demo_blast_report.rb +285 -0
  87. data/sample/demo_codontable.rb +119 -0
  88. data/sample/demo_das.rb +105 -0
  89. data/sample/demo_ddbjxml.rb +212 -0
  90. data/sample/demo_fasta_remote.rb +51 -0
  91. data/sample/demo_fastaformat.rb +105 -0
  92. data/sample/demo_genbank.rb +132 -0
  93. data/sample/demo_genscan_report.rb +202 -0
  94. data/sample/demo_gff1.rb +49 -0
  95. data/sample/demo_go.rb +98 -0
  96. data/sample/demo_hmmer_report.rb +149 -0
  97. data/sample/demo_kegg_compound.rb +57 -0
  98. data/sample/demo_kegg_drug.rb +65 -0
  99. data/sample/demo_kegg_genome.rb +74 -0
  100. data/sample/demo_kegg_glycan.rb +72 -0
  101. data/sample/demo_kegg_orthology.rb +62 -0
  102. data/sample/demo_kegg_reaction.rb +66 -0
  103. data/sample/demo_kegg_taxonomy.rb +92 -0
  104. data/sample/demo_keggapi.rb +502 -0
  105. data/sample/demo_litdb.rb +42 -0
  106. data/sample/demo_locations.rb +99 -0
  107. data/sample/demo_ncbi_rest.rb +130 -0
  108. data/sample/demo_nucleicacid.rb +49 -0
  109. data/sample/demo_pathway.rb +196 -0
  110. data/sample/demo_prosite.rb +120 -0
  111. data/sample/demo_psort.rb +138 -0
  112. data/sample/demo_psort_report.rb +70 -0
  113. data/sample/demo_pubmed.rb +118 -0
  114. data/sample/demo_sirna.rb +63 -0
  115. data/sample/demo_sosui_report.rb +89 -0
  116. data/sample/demo_targetp_report.rb +135 -0
  117. data/sample/demo_tmhmm_report.rb +68 -0
  118. data/sample/pmfetch.rb +13 -4
  119. data/sample/pmsearch.rb +15 -4
  120. data/sample/test_phyloxml_big.rb +205 -0
  121. data/test/bioruby_test_helper.rb +61 -0
  122. data/test/data/KEGG/1.1.1.1.enzyme +935 -0
  123. data/test/data/KEGG/C00025.compound +102 -0
  124. data/test/data/KEGG/D00063.drug +104 -0
  125. data/test/data/KEGG/G00024.glycan +47 -0
  126. data/test/data/KEGG/G01366.glycan +18 -0
  127. data/test/data/KEGG/K02338.orthology +902 -0
  128. data/test/data/KEGG/R00006.reaction +14 -0
  129. data/test/data/fastq/README.txt +109 -0
  130. data/test/data/fastq/error_diff_ids.fastq +20 -0
  131. data/test/data/fastq/error_double_qual.fastq +22 -0
  132. data/test/data/fastq/error_double_seq.fastq +22 -0
  133. data/test/data/fastq/error_long_qual.fastq +20 -0
  134. data/test/data/fastq/error_no_qual.fastq +20 -0
  135. data/test/data/fastq/error_qual_del.fastq +20 -0
  136. data/test/data/fastq/error_qual_escape.fastq +20 -0
  137. data/test/data/fastq/error_qual_null.fastq +0 -0
  138. data/test/data/fastq/error_qual_space.fastq +21 -0
  139. data/test/data/fastq/error_qual_tab.fastq +21 -0
  140. data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
  141. data/test/data/fastq/error_qual_vtab.fastq +20 -0
  142. data/test/data/fastq/error_short_qual.fastq +20 -0
  143. data/test/data/fastq/error_spaces.fastq +20 -0
  144. data/test/data/fastq/error_tabs.fastq +21 -0
  145. data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
  146. data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
  147. data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
  148. data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
  149. data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
  150. data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
  151. data/test/data/fastq/error_trunc_in_title.fastq +17 -0
  152. data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
  153. data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
  154. data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
  155. data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
  156. data/test/data/fastq/longreads_as_illumina.fastq +40 -0
  157. data/test/data/fastq/longreads_as_sanger.fastq +40 -0
  158. data/test/data/fastq/longreads_as_solexa.fastq +40 -0
  159. data/test/data/fastq/longreads_original_sanger.fastq +120 -0
  160. data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
  161. data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
  162. data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
  163. data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
  164. data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
  165. data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
  166. data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
  167. data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
  168. data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
  169. data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
  170. data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
  171. data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
  172. data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
  173. data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
  174. data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
  175. data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
  176. data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
  177. data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
  178. data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
  179. data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
  180. data/test/data/meme/db +0 -0
  181. data/test/data/meme/mast +0 -0
  182. data/test/data/meme/mast.out +13 -0
  183. data/test/data/meme/meme.out +3 -0
  184. data/test/data/phyloxml/apaf.xml +666 -0
  185. data/test/data/phyloxml/bcl_2.xml +2097 -0
  186. data/test/data/phyloxml/made_up.xml +144 -0
  187. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
  188. data/test/data/phyloxml/phyloxml_examples.xml +415 -0
  189. data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
  190. data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
  191. data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
  192. data/test/functional/bio/appl/test_pts1.rb +7 -5
  193. data/test/functional/bio/io/test_ensembl.rb +4 -3
  194. data/test/functional/bio/io/test_pubmed.rb +9 -3
  195. data/test/functional/bio/io/test_soapwsdl.rb +5 -4
  196. data/test/functional/bio/io/test_togows.rb +5 -4
  197. data/test/functional/bio/sequence/test_output_embl.rb +6 -4
  198. data/test/functional/bio/test_command.rb +54 -5
  199. data/test/runner.rb +5 -3
  200. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
  201. data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
  202. data/test/unit/bio/appl/blast/test_report.rb +5 -4
  203. data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
  204. data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
  205. data/test/unit/bio/appl/genscan/test_report.rb +8 -9
  206. data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
  207. data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
  208. data/test/unit/bio/appl/mafft/test_report.rb +6 -5
  209. data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
  210. data/test/unit/bio/appl/meme/test_mast.rb +103 -0
  211. data/test/unit/bio/appl/meme/test_motif.rb +38 -0
  212. data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
  213. data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
  214. data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
  215. data/test/unit/bio/appl/sim4/test_report.rb +5 -4
  216. data/test/unit/bio/appl/sosui/test_report.rb +6 -5
  217. data/test/unit/bio/appl/targetp/test_report.rb +5 -3
  218. data/test/unit/bio/appl/test_blast.rb +5 -4
  219. data/test/unit/bio/appl/test_fasta.rb +4 -2
  220. data/test/unit/bio/appl/test_pts1.rb +4 -2
  221. data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
  222. data/test/unit/bio/data/test_aa.rb +5 -3
  223. data/test/unit/bio/data/test_codontable.rb +5 -4
  224. data/test/unit/bio/data/test_na.rb +5 -3
  225. data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
  226. data/test/unit/bio/db/embl/test_common.rb +4 -2
  227. data/test/unit/bio/db/embl/test_embl.rb +6 -6
  228. data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
  229. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
  230. data/test/unit/bio/db/embl/test_sptr.rb +6 -8
  231. data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
  232. data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
  233. data/test/unit/bio/db/kegg/test_compound.rb +146 -0
  234. data/test/unit/bio/db/kegg/test_drug.rb +194 -0
  235. data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
  236. data/test/unit/bio/db/kegg/test_genes.rb +32 -4
  237. data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
  238. data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
  239. data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
  240. data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
  241. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
  242. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
  243. data/test/unit/bio/db/test_aaindex.rb +6 -6
  244. data/test/unit/bio/db/test_fasta.rb +5 -46
  245. data/test/unit/bio/db/test_fastq.rb +829 -0
  246. data/test/unit/bio/db/test_gff.rb +4 -2
  247. data/test/unit/bio/db/test_lasergene.rb +7 -5
  248. data/test/unit/bio/db/test_medline.rb +4 -2
  249. data/test/unit/bio/db/test_newick.rb +6 -6
  250. data/test/unit/bio/db/test_nexus.rb +4 -2
  251. data/test/unit/bio/db/test_phyloxml.rb +769 -0
  252. data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
  253. data/test/unit/bio/db/test_prosite.rb +6 -5
  254. data/test/unit/bio/db/test_qual.rb +63 -0
  255. data/test/unit/bio/db/test_rebase.rb +5 -3
  256. data/test/unit/bio/db/test_soft.rb +7 -6
  257. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
  258. data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
  259. data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
  260. data/test/unit/bio/io/test_ddbjxml.rb +4 -3
  261. data/test/unit/bio/io/test_ensembl.rb +5 -3
  262. data/test/unit/bio/io/test_fastacmd.rb +4 -3
  263. data/test/unit/bio/io/test_flatfile.rb +6 -5
  264. data/test/unit/bio/io/test_soapwsdl.rb +4 -3
  265. data/test/unit/bio/io/test_togows.rb +4 -2
  266. data/test/unit/bio/sequence/test_aa.rb +5 -3
  267. data/test/unit/bio/sequence/test_common.rb +4 -2
  268. data/test/unit/bio/sequence/test_compat.rb +4 -2
  269. data/test/unit/bio/sequence/test_dblink.rb +5 -3
  270. data/test/unit/bio/sequence/test_na.rb +4 -2
  271. data/test/unit/bio/sequence/test_quality_score.rb +330 -0
  272. data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
  273. data/test/unit/bio/test_alignment.rb +5 -3
  274. data/test/unit/bio/test_command.rb +4 -3
  275. data/test/unit/bio/test_db.rb +5 -3
  276. data/test/unit/bio/test_feature.rb +4 -2
  277. data/test/unit/bio/test_location.rb +4 -2
  278. data/test/unit/bio/test_map.rb +5 -3
  279. data/test/unit/bio/test_pathway.rb +4 -2
  280. data/test/unit/bio/test_reference.rb +4 -2
  281. data/test/unit/bio/test_sequence.rb +5 -3
  282. data/test/unit/bio/test_shell.rb +5 -3
  283. data/test/unit/bio/test_tree.rb +6 -6
  284. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
  285. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
  286. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
  287. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
  288. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
  289. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
  290. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
  291. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
  292. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
  293. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
  294. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
  295. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
  296. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
  297. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
  298. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
  299. data/test/unit/bio/util/test_color_scheme.rb +5 -3
  300. data/test/unit/bio/util/test_contingency_table.rb +5 -3
  301. data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
  302. data/test/unit/bio/util/test_sirna.rb +6 -4
  303. metadata +147 -2
@@ -5,7 +5,7 @@
5
5
  # Toshiaki Katayama <k@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: hmmer.rb,v 1.9 2007/04/05 23:35:39 trevor Exp $
8
+ # $Id:$
9
9
  #
10
10
 
11
11
  require 'bio/command'
@@ -110,17 +110,3 @@ end # class HMMER
110
110
 
111
111
  end # module Bio
112
112
 
113
-
114
-
115
- if __FILE__ == $0
116
-
117
- require 'pp'
118
-
119
- program = ARGV.shift # hmmsearch, hmmpfam
120
- hmmfile = ARGV.shift
121
- seqfile = ARGV.shift
122
-
123
- factory = Bio::HMMER.new(program, hmmfile, seqfile)
124
- pp factory.query
125
-
126
- end
@@ -566,7 +566,6 @@ end # class HMMER
566
566
  end # module Bio
567
567
 
568
568
 
569
- if __FILE__ == $0
570
569
 
571
570
  =begin
572
571
 
@@ -582,102 +581,3 @@ if __FILE__ == $0
582
581
 
583
582
  =end
584
583
 
585
- begin
586
- require 'pp'
587
- alias p pp
588
- rescue LoadError
589
- end
590
-
591
- rep = Bio::HMMER::Report.new(ARGF.read)
592
- p rep
593
-
594
- indent = 18
595
-
596
- puts "### hmmer result"
597
- print "name : ".rjust(indent)
598
- p rep.program['name']
599
- print "version : ".rjust(indent)
600
- p rep.program['version']
601
- print "copyright : ".rjust(indent)
602
- p rep.program['copyright']
603
- print "license : ".rjust(indent)
604
- p rep.program['license']
605
-
606
- print "HMM file : ".rjust(indent)
607
- p rep.parameter['HMM file']
608
- print "Sequence file : ".rjust(indent)
609
- p rep.parameter['Sequence file']
610
-
611
- print "Query sequence : ".rjust(indent)
612
- p rep.query_info['Query sequence']
613
- print "Accession : ".rjust(indent)
614
- p rep.query_info['Accession']
615
- print "Description : ".rjust(indent)
616
- p rep.query_info['Description']
617
-
618
- rep.each do |hit|
619
- puts "## each hit"
620
- print "accession : ".rjust(indent)
621
- p [ hit.accession, hit.target_id, hit.hit_id, hit.entry_id ]
622
- print "description : ".rjust(indent)
623
- p [ hit.description, hit.definition ]
624
- print "target_def : ".rjust(indent)
625
- p hit.target_def
626
- print "score : ".rjust(indent)
627
- p [ hit.score, hit.bit_score ]
628
- print "evalue : ".rjust(indent)
629
- p hit.evalue
630
- print "num : ".rjust(indent)
631
- p hit.num
632
-
633
- hit.each do |hsp|
634
- puts "## each hsp"
635
- print "accession : ".rjust(indent)
636
- p [ hsp.accession, hsp.target_id ]
637
- print "domain : ".rjust(indent)
638
- p hsp.domain
639
- print "seq_f : ".rjust(indent)
640
- p hsp.seq_f
641
- print "seq_t : ".rjust(indent)
642
- p hsp.seq_t
643
- print "seq_ft : ".rjust(indent)
644
- p hsp.seq_ft
645
- print "hmm_f : ".rjust(indent)
646
- p hsp.hmm_f
647
- print "hmm_t : ".rjust(indent)
648
- p hsp.hmm_t
649
- print "hmm_ft : ".rjust(indent)
650
- p hsp.hmm_ft
651
- print "score : ".rjust(indent)
652
- p [ hsp.score, hsp.bit_score ]
653
- print "evalue : ".rjust(indent)
654
- p hsp.evalue
655
- print "midline : ".rjust(indent)
656
- p hsp.midline
657
- print "hmmseq : ".rjust(indent)
658
- p hsp.hmmseq
659
- print "flatseq : ".rjust(indent)
660
- p hsp.flatseq
661
- print "query_frame : ".rjust(indent)
662
- p hsp.query_frame
663
- print "target_frame : ".rjust(indent)
664
- p hsp.target_frame
665
-
666
- print "query_seq : ".rjust(indent)
667
- p hsp.query_seq # hmmseq, flatseq
668
- print "target_seq : ".rjust(indent)
669
- p hsp.target_seq # flatseq, hmmseq
670
- print "target_from : ".rjust(indent)
671
- p hsp.target_from # seq_f, hmm_f
672
- print "target_to : ".rjust(indent)
673
- p hsp.target_to # seq_t, hmm_t
674
- print "query_from : ".rjust(indent)
675
- p hsp.query_from # hmm_f, seq_f
676
- print "query_to : ".rjust(indent)
677
- p hsp.query_to # hmm_t, seq_t
678
- end
679
- end
680
-
681
- end
682
-
683
-
@@ -0,0 +1,156 @@
1
+ #
2
+ # = bio/appl/meme/mast.rb - Wrapper for running MAST program
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Adam Kraut <adamnkraut@gmail.com>,
5
+ #
6
+ # License:: The Ruby License
7
+ #
8
+ # == Description
9
+ #
10
+ # This file contains a wrapper for running the MAST tool for searching sequence databases using motifs
11
+ #
12
+ # == References
13
+ #
14
+ # * http://meme.sdsc.edu/meme/intro.html
15
+ #
16
+ require "bio/command"
17
+
18
+ module Bio
19
+ module Meme
20
+
21
+ # == Description
22
+ #
23
+ # Bio::Meme::Mast is a wrapper for searching a database using sequence motifs. The code
24
+ # will read options from a Hash and run the program. Parsing of the output is provided by
25
+ # Bio::Meme::Mast::Report. Before running, options[:mfile] and options[:d] must be set
26
+ # in the constructor or Mast.config(options = {})
27
+ #
28
+ # == Usage
29
+ #
30
+ # mast = Mast.new('/path/to/mast')
31
+ # or with options
32
+ # mast = Mast.new('/path/to/mast', {:mfile => 'meme.out', :d => '/shared/db/nr'})
33
+ #
34
+ # report = Mast::Report.new(mast.run)
35
+ # report.each do |motif|
36
+ # puts motif.length
37
+ # end
38
+ #
39
+ #
40
+ class Mast
41
+
42
+ include Bio::Command
43
+
44
+ autoload :Report, 'bio/appl/meme/mast/report'
45
+
46
+ # A Hash of options for Mast
47
+ attr_accessor :options
48
+
49
+ DEFAULT_OPTIONS = {
50
+ # required
51
+ :mfile => nil,
52
+ :d => nil,
53
+ # optional
54
+ :stdin => nil, # may not work as expected
55
+ :count => nil,
56
+ :alphabet => nil,
57
+ :stdout => true,
58
+ :text => false,
59
+ :sep => false,
60
+ :norc => false,
61
+ :dna => false,
62
+ :comp => false,
63
+ :rank => nil,
64
+ :smax => nil,
65
+ :ev => nil,
66
+ :mt => nil,
67
+ :w => false,
68
+ :bfile => nil,
69
+ :seqp => false,
70
+ :mf => nil,
71
+ :df => nil,
72
+ :minseqs => nil,
73
+ :mev => nil,
74
+ :m => nil,
75
+ :diag => nil,
76
+ :best => false,
77
+ :remcorr => false,
78
+ :brief => false,
79
+ :b => false,
80
+ :nostatus => true,
81
+ :hit_list => true,
82
+ }
83
+
84
+ # The command line String to be executed
85
+ attr_reader :cmd
86
+
87
+ # Create a mast instance
88
+ #
89
+ # m = Mast.new('/usr/local/bin/mast')
90
+ # ---
91
+ # *Arguments*:
92
+ # * (required) _mast_location_: String
93
+ # *Raises*:: ArgumentError if mast program is not found
94
+ # *Returns*:: a Bio::Meme::Mast object
95
+
96
+ def initialize(mast_location, options = {})
97
+ unless File.exists?(mast_location)
98
+ raise ArgumentError.new("mast: command not found : #{mast_location}")
99
+ end
100
+ @binary = mast_location
101
+ options.empty? ? config(DEFAULT_OPTIONS) : config(options)
102
+ end
103
+
104
+ # Builds the command line string
105
+ # any options passed in will be merged with DEFAULT_OPTIONS
106
+ # Mast usage: mast <mfile> <opts> <flags>
107
+ #
108
+ # mast.config({:mfile => "meme.out", :d => "/path/to/fasta/db"})
109
+ # ---
110
+ # *Arguments*:
111
+ # * (required) _options_: Hash (see DEFAULT_OPTIONS)
112
+ # *Returns*:: the command line string
113
+
114
+ def config(options)
115
+ @options = DEFAULT_OPTIONS.merge(options)
116
+ mfile, opts, flags = "", "", ""
117
+ @options.each_pair do |opt, val|
118
+ if val.nil? or val == false
119
+ next
120
+ elsif opt == :mfile
121
+ mfile = val
122
+ elsif val == true
123
+ flags << " -#{opt}"
124
+ else
125
+ opts << " -#{opt} #{val}"
126
+ end
127
+ end
128
+ @cmd = "#{@binary} #{mfile + opts + flags}"
129
+ end
130
+
131
+ # Checks if input/database files exist and options are valid
132
+ # *Raises*:: ArgumentError if the motifs file does not exist
133
+ # *Raises*:: ArgumentError if the database file does not exist
134
+ # *Raises*:: ArgumentError if there is an invalid option
135
+
136
+ def check_options
137
+ @options.each_key do |k|
138
+ raise ArgumentError.new("Invalid option: #{k}") unless DEFAULT_OPTIONS.has_key?(k)
139
+ end
140
+ raise ArgumentError.new("Motif file not found: #{@options[:mfile]}") if @options[:mfile].nil? or !File.exists?(@options[:mfile])
141
+ raise ArgumentError.new("Database not found: #{@options[:d]}") if @options[:d].nil? or !File.exists?(@options[:d])
142
+ end
143
+
144
+ # Run the mast program
145
+ # ---
146
+ # *Returns*:: Bio::Meme::Mast::Report object
147
+
148
+ def run
149
+ check_options
150
+ call_command(@cmd) {|io| @output = io.read }
151
+ Report.new(@output)
152
+ end
153
+
154
+ end # End class Mast
155
+ end # End module Meme
156
+ end # End module Bio
@@ -0,0 +1,91 @@
1
+ #
2
+ # = bio/appl/meme/mast/report.rb - Mast output parser class
3
+ #
4
+ # Copyright:: Copyright (C) 2008, Adam Kraut <adamnkraut@gmail.com>,
5
+
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+ # == Description
10
+ #
11
+ # This file contains a class to parse Mast output
12
+ #
13
+ # == Examples
14
+ #
15
+ # == References
16
+ #
17
+ # * http://meme.sdsc.edu/meme/intro.html
18
+
19
+ require "bio/appl/meme/mast"
20
+ require "bio/appl/meme/motif"
21
+
22
+ module Bio
23
+ module Meme
24
+ class Mast
25
+
26
+ # == Description
27
+ #
28
+ # A class to parse the output from Mast
29
+ #
30
+ # WARNING: Currently support is only for -hit_list (machine readable) format
31
+ # HTML (default) output is not supported
32
+ #
33
+ # == Examples
34
+ #
35
+
36
+ class Report
37
+
38
+ attr_reader :motifs
39
+
40
+ def initialize(mast_hitlist)
41
+ @motifs = parse_hit_list(mast_hitlist)
42
+ end
43
+
44
+ # Iterates each motif (Bio::Meme::Motif)
45
+ def each
46
+ @motifs.each do |motif|
47
+ yield motif
48
+ end
49
+ end
50
+ alias :each_motif :each
51
+
52
+
53
+ private
54
+
55
+ # Each line corresponds to one motif occurrence in one sequence.
56
+ # The format of the hit lines is
57
+ # [<sequence_name> <strand><motif> <start> <end> <p-value>]+
58
+ # where
59
+ # <sequence_name> is the name of the sequence containing the hit
60
+ # <strand> is the strand (+ or - for DNA, blank for protein),
61
+ # <motif> is the motif number,
62
+ # <start> is the starting position of the hit,
63
+ # <end> is the ending position of the hit, and
64
+ # <p-value> is the position p-value of the hit.
65
+ def parse_hit_list(data)
66
+ motifs = []
67
+ data.each_line do |line|
68
+
69
+ line.chomp!
70
+
71
+ # skip comments
72
+ next if line =~ /^#/
73
+
74
+ fields = line.split(/\s/)
75
+
76
+ if fields.size == 5
77
+ motifs << Motif.new(fields[0], strand = nil, fields[1], fields[2], fields[3], fields[4])
78
+ elsif fields.size == 6
79
+ motifs << Motif.new(fields[0], fields[1], fields[2], fields[3], fields[4], fields[5])
80
+ else
81
+ raise RuntimeError.new("Could not parse mast output")
82
+ end
83
+
84
+ end
85
+ motifs
86
+ end
87
+
88
+ end # Result
89
+ end # Mast
90
+ end # Meme
91
+ end # Bio
@@ -0,0 +1,48 @@
1
+ #
2
+ # = bio/appl/meme/motif.rb - Class to represent a sequence motif
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Adam Kraut <adamnkraut@gmail.com>,
5
+ #
6
+ # License:: The Ruby License
7
+ #
8
+ # == Description
9
+ #
10
+ # This file contains a minimal class to represent meme motifs
11
+ #
12
+ # == References
13
+ #
14
+ # * http://meme.sdsc.edu/meme/intro.html
15
+ #
16
+ module Bio
17
+ module Meme
18
+
19
+ # == Description
20
+ #
21
+ # This class minimally represents a sequence motif according to the MEME program
22
+ #
23
+ # TODO: integrate with Bio::Sequence class
24
+ # TODO: parse PSSM data
25
+ #
26
+ class Motif
27
+ attr_accessor :sequence_name, :strand, :motif, :start_pos, :end_pos, :pvalue
28
+
29
+ # Creates a new Bio::Meme::Motif object
30
+ # arguments are
31
+ def initialize(sequence_name, strand, motif, start_pos, end_pos, pvalue)
32
+ @sequence_name = sequence_name.to_s
33
+ @strand = strand.to_s
34
+ @motif = motif.to_i
35
+ @start_pos = start_pos.to_i
36
+ @end_pos = end_pos.to_i
37
+ @pvalue = pvalue.to_f
38
+ end
39
+
40
+ # Computes the motif length
41
+ def length
42
+ @end_pos - @start_pos
43
+ end
44
+
45
+ end
46
+
47
+ end
48
+ end
@@ -435,114 +435,3 @@ require 'uri'
435
435
 
436
436
  end # module Bio
437
437
 
438
-
439
-
440
-
441
-
442
- if __FILE__ == $0
443
-
444
- begin
445
- require 'psort/report.rb'
446
- rescue LoadError
447
- end
448
-
449
-
450
- seq = ">hoge mit
451
- MALEPIDYTT RDEDDLDENE LLMKISNAAG SSRVNDNNDD LTFVENDKII
452
- ARYSIQTSSK QQGKASTPPV EEAEEAAPQL PSRSSAAPPP PPRRATPEKK
453
- DVKDLKSKFE GLAASEKEEE EMENKFAPPP KKSEPTIISP KPFSKPQEPV
454
- FKGYHVQVTA HSREIDAEYL KIVRGSDPDT TWLIISPNAK KEYEPESTGS
455
- KKSFTPSKSP APVSKKEPVK TPSPAPAAKI PKENPWATAE YDYDAAEDNE
456
- NIEFVDDDWW LGELEKDGSK GLFPSNYVSL LPSRNVASGA PVQKEEPEQE
457
- SFHDFLQLFD ETKVQYGLAR RKAKQNSGNA ETKAEAPKPE VPEDEPEGEP
458
- DDWNEPELKE RDFDQAPLKP NQSSYKPIGK IDLQKVIAEE KAKEDPRLVQ
459
- DYKKIGNPLP GMHIEADNEE EPEENDDDWD DDEDEAAQPP ANFAAVANNL
460
- KPTAAGSKID DDKVIKGFRN EKSPAQLWAE VSPPGSDVEK IIIIGWCPDS
461
- APLKTRASFA PSSDIANLKN ESKLKRDSEF NSFLGTTKPP SMTESSLKND
462
- KAEEAEQPKT EIAPSLPSRN SIPAPKQEEA PEQAPEEEIE GN
463
- "
464
- Seq1 = ">hgoe
465
- LTFVENDKII NI
466
- "
467
-
468
- puts "\n Bio::PSORT::PSORT"
469
-
470
- puts "\n ==> p serv = Bio::PSORT::PSORT.imsut"
471
- p serv = Bio::PSORT::PSORT1.imsut
472
-
473
- puts "\n ==> p serv.class "
474
- p serv.class
475
-
476
- puts "\n ==> p serv.title = 'Query_title_splited_by_white space'"
477
- p serv.title = 'Query_title_splited_by_white space'
478
-
479
- puts "\n ==> p serv.exec(seq, false) "
480
- p serv.exec(seq, false)
481
-
482
- puts "\n ==> p serv.exec(seq) "
483
- p serv.exec(seq)
484
-
485
- puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) "
486
- p report = serv.exec(Bio::FastaFormat.new(seq))
487
-
488
- puts "\n ==> p report.class"
489
- p report.class
490
-
491
-
492
- puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) "
493
- p report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
494
-
495
- puts "\n ==> p report_raw.class"
496
- p report_raw.class
497
-
498
-
499
- puts "\n ==> p report.methods"
500
- p report.methods
501
-
502
- methods = ['entry_id', 'origin', 'title', 'sequence','result_info',
503
- 'reasoning', 'final_result', 'raw']
504
- methods.each do |method|
505
- puts "\n ==> p report.#{method}"
506
- p eval("report.#{method}")
507
- end
508
-
509
-
510
-
511
- puts "\n Bio::PSORT::PSORT2"
512
-
513
- puts "\n ==> p serv = Bio::PSORT::PSORT2.imsut"
514
- p serv = Bio::PSORT::PSORT2.imsut
515
-
516
- puts "\n ==> p serv.class "
517
- p serv.class
518
-
519
- puts "\n ==> p seq "
520
- p seq
521
-
522
- puts "\n ==> p serv.title = 'Query_title_splited_by_white space'"
523
- p serv.title = 'Query_title_splited_by_white space'
524
-
525
- puts "\n ==> p serv.exec(seq) # parsed report"
526
- p serv.exec(seq)
527
-
528
- puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) # parsed report"
529
- p report = serv.exec(Bio::FastaFormat.new(seq))
530
-
531
-
532
-
533
- puts "\n ==> p serv.exec(seq, false) # report in plain text"
534
- p serv.exec(seq, false)
535
-
536
- puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) # report in plain text"
537
- p report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
538
-
539
-
540
- puts "\n ==> p report.methods"
541
- p report.methods
542
-
543
- methods = ['entry_id', 'scl', 'definition', 'seq', 'features', 'prob', 'pred', 'k', 'raw']
544
- methods.each do |method|
545
- puts "\n ==> p report.#{method}"
546
- p eval("report.#{method}")
547
- end
548
- end