bio 1.3.1 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (303) hide show
  1. data/ChangeLog +2105 -3728
  2. data/KNOWN_ISSUES.rdoc +35 -3
  3. data/README.rdoc +8 -2
  4. data/RELEASE_NOTES.rdoc +166 -0
  5. data/bin/bioruby +4 -1
  6. data/bioruby.gemspec +146 -1
  7. data/bioruby.gemspec.erb +3 -1
  8. data/doc/ChangeLog-before-1.3.1 +3961 -0
  9. data/doc/Tutorial.rd +154 -22
  10. data/doc/Tutorial.rd.html +125 -68
  11. data/lib/bio.rb +21 -6
  12. data/lib/bio/appl/bl2seq/report.rb +11 -202
  13. data/lib/bio/appl/blast/format0.rb +0 -193
  14. data/lib/bio/appl/blast/report.rb +2 -147
  15. data/lib/bio/appl/blast/wublast.rb +0 -208
  16. data/lib/bio/appl/fasta.rb +4 -19
  17. data/lib/bio/appl/fasta/format10.rb +0 -14
  18. data/lib/bio/appl/genscan/report.rb +0 -176
  19. data/lib/bio/appl/hmmer.rb +1 -15
  20. data/lib/bio/appl/hmmer/report.rb +0 -100
  21. data/lib/bio/appl/meme/mast.rb +156 -0
  22. data/lib/bio/appl/meme/mast/report.rb +91 -0
  23. data/lib/bio/appl/meme/motif.rb +48 -0
  24. data/lib/bio/appl/psort.rb +0 -111
  25. data/lib/bio/appl/psort/report.rb +1 -45
  26. data/lib/bio/appl/pts1.rb +2 -4
  27. data/lib/bio/appl/sosui/report.rb +5 -54
  28. data/lib/bio/appl/targetp/report.rb +1 -104
  29. data/lib/bio/appl/tmhmm/report.rb +0 -36
  30. data/lib/bio/command.rb +94 -10
  31. data/lib/bio/data/aa.rb +1 -77
  32. data/lib/bio/data/codontable.rb +1 -95
  33. data/lib/bio/data/na.rb +1 -26
  34. data/lib/bio/db/aaindex.rb +1 -38
  35. data/lib/bio/db/fasta.rb +1 -134
  36. data/lib/bio/db/fasta/format_qual.rb +204 -0
  37. data/lib/bio/db/fasta/qual.rb +102 -0
  38. data/lib/bio/db/fastq.rb +645 -0
  39. data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
  40. data/lib/bio/db/fastq/format_fastq.rb +175 -0
  41. data/lib/bio/db/genbank/genbank.rb +1 -86
  42. data/lib/bio/db/gff.rb +0 -17
  43. data/lib/bio/db/go.rb +4 -72
  44. data/lib/bio/db/kegg/common.rb +112 -0
  45. data/lib/bio/db/kegg/compound.rb +29 -20
  46. data/lib/bio/db/kegg/drug.rb +74 -34
  47. data/lib/bio/db/kegg/enzyme.rb +26 -5
  48. data/lib/bio/db/kegg/genes.rb +128 -15
  49. data/lib/bio/db/kegg/genome.rb +3 -41
  50. data/lib/bio/db/kegg/glycan.rb +19 -24
  51. data/lib/bio/db/kegg/orthology.rb +16 -56
  52. data/lib/bio/db/kegg/reaction.rb +81 -28
  53. data/lib/bio/db/kegg/taxonomy.rb +1 -52
  54. data/lib/bio/db/litdb.rb +1 -16
  55. data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
  56. data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
  57. data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
  58. data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
  59. data/lib/bio/db/prosite.rb +2 -95
  60. data/lib/bio/db/rebase.rb +5 -6
  61. data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
  62. data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
  63. data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
  64. data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
  65. data/lib/bio/io/das.rb +0 -44
  66. data/lib/bio/io/ddbjxml.rb +1 -181
  67. data/lib/bio/io/flatfile.rb +1 -7
  68. data/lib/bio/io/flatfile/autodetection.rb +6 -0
  69. data/lib/bio/io/keggapi.rb +0 -442
  70. data/lib/bio/io/ncbirest.rb +130 -132
  71. data/lib/bio/io/ncbisoap.rb +2 -1
  72. data/lib/bio/io/pubmed.rb +0 -88
  73. data/lib/bio/location.rb +0 -73
  74. data/lib/bio/pathway.rb +0 -171
  75. data/lib/bio/sequence.rb +18 -1
  76. data/lib/bio/sequence/adapter.rb +3 -0
  77. data/lib/bio/sequence/format.rb +16 -0
  78. data/lib/bio/sequence/quality_score.rb +205 -0
  79. data/lib/bio/tree.rb +70 -5
  80. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
  81. data/lib/bio/util/sirna.rb +1 -23
  82. data/lib/bio/version.rb +1 -1
  83. data/sample/demo_aaindex.rb +67 -0
  84. data/sample/demo_aminoacid.rb +101 -0
  85. data/sample/demo_bl2seq_report.rb +220 -0
  86. data/sample/demo_blast_report.rb +285 -0
  87. data/sample/demo_codontable.rb +119 -0
  88. data/sample/demo_das.rb +105 -0
  89. data/sample/demo_ddbjxml.rb +212 -0
  90. data/sample/demo_fasta_remote.rb +51 -0
  91. data/sample/demo_fastaformat.rb +105 -0
  92. data/sample/demo_genbank.rb +132 -0
  93. data/sample/demo_genscan_report.rb +202 -0
  94. data/sample/demo_gff1.rb +49 -0
  95. data/sample/demo_go.rb +98 -0
  96. data/sample/demo_hmmer_report.rb +149 -0
  97. data/sample/demo_kegg_compound.rb +57 -0
  98. data/sample/demo_kegg_drug.rb +65 -0
  99. data/sample/demo_kegg_genome.rb +74 -0
  100. data/sample/demo_kegg_glycan.rb +72 -0
  101. data/sample/demo_kegg_orthology.rb +62 -0
  102. data/sample/demo_kegg_reaction.rb +66 -0
  103. data/sample/demo_kegg_taxonomy.rb +92 -0
  104. data/sample/demo_keggapi.rb +502 -0
  105. data/sample/demo_litdb.rb +42 -0
  106. data/sample/demo_locations.rb +99 -0
  107. data/sample/demo_ncbi_rest.rb +130 -0
  108. data/sample/demo_nucleicacid.rb +49 -0
  109. data/sample/demo_pathway.rb +196 -0
  110. data/sample/demo_prosite.rb +120 -0
  111. data/sample/demo_psort.rb +138 -0
  112. data/sample/demo_psort_report.rb +70 -0
  113. data/sample/demo_pubmed.rb +118 -0
  114. data/sample/demo_sirna.rb +63 -0
  115. data/sample/demo_sosui_report.rb +89 -0
  116. data/sample/demo_targetp_report.rb +135 -0
  117. data/sample/demo_tmhmm_report.rb +68 -0
  118. data/sample/pmfetch.rb +13 -4
  119. data/sample/pmsearch.rb +15 -4
  120. data/sample/test_phyloxml_big.rb +205 -0
  121. data/test/bioruby_test_helper.rb +61 -0
  122. data/test/data/KEGG/1.1.1.1.enzyme +935 -0
  123. data/test/data/KEGG/C00025.compound +102 -0
  124. data/test/data/KEGG/D00063.drug +104 -0
  125. data/test/data/KEGG/G00024.glycan +47 -0
  126. data/test/data/KEGG/G01366.glycan +18 -0
  127. data/test/data/KEGG/K02338.orthology +902 -0
  128. data/test/data/KEGG/R00006.reaction +14 -0
  129. data/test/data/fastq/README.txt +109 -0
  130. data/test/data/fastq/error_diff_ids.fastq +20 -0
  131. data/test/data/fastq/error_double_qual.fastq +22 -0
  132. data/test/data/fastq/error_double_seq.fastq +22 -0
  133. data/test/data/fastq/error_long_qual.fastq +20 -0
  134. data/test/data/fastq/error_no_qual.fastq +20 -0
  135. data/test/data/fastq/error_qual_del.fastq +20 -0
  136. data/test/data/fastq/error_qual_escape.fastq +20 -0
  137. data/test/data/fastq/error_qual_null.fastq +0 -0
  138. data/test/data/fastq/error_qual_space.fastq +21 -0
  139. data/test/data/fastq/error_qual_tab.fastq +21 -0
  140. data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
  141. data/test/data/fastq/error_qual_vtab.fastq +20 -0
  142. data/test/data/fastq/error_short_qual.fastq +20 -0
  143. data/test/data/fastq/error_spaces.fastq +20 -0
  144. data/test/data/fastq/error_tabs.fastq +21 -0
  145. data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
  146. data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
  147. data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
  148. data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
  149. data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
  150. data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
  151. data/test/data/fastq/error_trunc_in_title.fastq +17 -0
  152. data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
  153. data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
  154. data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
  155. data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
  156. data/test/data/fastq/longreads_as_illumina.fastq +40 -0
  157. data/test/data/fastq/longreads_as_sanger.fastq +40 -0
  158. data/test/data/fastq/longreads_as_solexa.fastq +40 -0
  159. data/test/data/fastq/longreads_original_sanger.fastq +120 -0
  160. data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
  161. data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
  162. data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
  163. data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
  164. data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
  165. data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
  166. data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
  167. data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
  168. data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
  169. data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
  170. data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
  171. data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
  172. data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
  173. data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
  174. data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
  175. data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
  176. data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
  177. data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
  178. data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
  179. data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
  180. data/test/data/meme/db +0 -0
  181. data/test/data/meme/mast +0 -0
  182. data/test/data/meme/mast.out +13 -0
  183. data/test/data/meme/meme.out +3 -0
  184. data/test/data/phyloxml/apaf.xml +666 -0
  185. data/test/data/phyloxml/bcl_2.xml +2097 -0
  186. data/test/data/phyloxml/made_up.xml +144 -0
  187. data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
  188. data/test/data/phyloxml/phyloxml_examples.xml +415 -0
  189. data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
  190. data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
  191. data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
  192. data/test/functional/bio/appl/test_pts1.rb +7 -5
  193. data/test/functional/bio/io/test_ensembl.rb +4 -3
  194. data/test/functional/bio/io/test_pubmed.rb +9 -3
  195. data/test/functional/bio/io/test_soapwsdl.rb +5 -4
  196. data/test/functional/bio/io/test_togows.rb +5 -4
  197. data/test/functional/bio/sequence/test_output_embl.rb +6 -4
  198. data/test/functional/bio/test_command.rb +54 -5
  199. data/test/runner.rb +5 -3
  200. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
  201. data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
  202. data/test/unit/bio/appl/blast/test_report.rb +5 -4
  203. data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
  204. data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
  205. data/test/unit/bio/appl/genscan/test_report.rb +8 -9
  206. data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
  207. data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
  208. data/test/unit/bio/appl/mafft/test_report.rb +6 -5
  209. data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
  210. data/test/unit/bio/appl/meme/test_mast.rb +103 -0
  211. data/test/unit/bio/appl/meme/test_motif.rb +38 -0
  212. data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
  213. data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
  214. data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
  215. data/test/unit/bio/appl/sim4/test_report.rb +5 -4
  216. data/test/unit/bio/appl/sosui/test_report.rb +6 -5
  217. data/test/unit/bio/appl/targetp/test_report.rb +5 -3
  218. data/test/unit/bio/appl/test_blast.rb +5 -4
  219. data/test/unit/bio/appl/test_fasta.rb +4 -2
  220. data/test/unit/bio/appl/test_pts1.rb +4 -2
  221. data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
  222. data/test/unit/bio/data/test_aa.rb +5 -3
  223. data/test/unit/bio/data/test_codontable.rb +5 -4
  224. data/test/unit/bio/data/test_na.rb +5 -3
  225. data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
  226. data/test/unit/bio/db/embl/test_common.rb +4 -2
  227. data/test/unit/bio/db/embl/test_embl.rb +6 -6
  228. data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
  229. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
  230. data/test/unit/bio/db/embl/test_sptr.rb +6 -8
  231. data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
  232. data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
  233. data/test/unit/bio/db/kegg/test_compound.rb +146 -0
  234. data/test/unit/bio/db/kegg/test_drug.rb +194 -0
  235. data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
  236. data/test/unit/bio/db/kegg/test_genes.rb +32 -4
  237. data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
  238. data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
  239. data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
  240. data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
  241. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
  242. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
  243. data/test/unit/bio/db/test_aaindex.rb +6 -6
  244. data/test/unit/bio/db/test_fasta.rb +5 -46
  245. data/test/unit/bio/db/test_fastq.rb +829 -0
  246. data/test/unit/bio/db/test_gff.rb +4 -2
  247. data/test/unit/bio/db/test_lasergene.rb +7 -5
  248. data/test/unit/bio/db/test_medline.rb +4 -2
  249. data/test/unit/bio/db/test_newick.rb +6 -6
  250. data/test/unit/bio/db/test_nexus.rb +4 -2
  251. data/test/unit/bio/db/test_phyloxml.rb +769 -0
  252. data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
  253. data/test/unit/bio/db/test_prosite.rb +6 -5
  254. data/test/unit/bio/db/test_qual.rb +63 -0
  255. data/test/unit/bio/db/test_rebase.rb +5 -3
  256. data/test/unit/bio/db/test_soft.rb +7 -6
  257. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
  258. data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
  259. data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
  260. data/test/unit/bio/io/test_ddbjxml.rb +4 -3
  261. data/test/unit/bio/io/test_ensembl.rb +5 -3
  262. data/test/unit/bio/io/test_fastacmd.rb +4 -3
  263. data/test/unit/bio/io/test_flatfile.rb +6 -5
  264. data/test/unit/bio/io/test_soapwsdl.rb +4 -3
  265. data/test/unit/bio/io/test_togows.rb +4 -2
  266. data/test/unit/bio/sequence/test_aa.rb +5 -3
  267. data/test/unit/bio/sequence/test_common.rb +4 -2
  268. data/test/unit/bio/sequence/test_compat.rb +4 -2
  269. data/test/unit/bio/sequence/test_dblink.rb +5 -3
  270. data/test/unit/bio/sequence/test_na.rb +4 -2
  271. data/test/unit/bio/sequence/test_quality_score.rb +330 -0
  272. data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
  273. data/test/unit/bio/test_alignment.rb +5 -3
  274. data/test/unit/bio/test_command.rb +4 -3
  275. data/test/unit/bio/test_db.rb +5 -3
  276. data/test/unit/bio/test_feature.rb +4 -2
  277. data/test/unit/bio/test_location.rb +4 -2
  278. data/test/unit/bio/test_map.rb +5 -3
  279. data/test/unit/bio/test_pathway.rb +4 -2
  280. data/test/unit/bio/test_reference.rb +4 -2
  281. data/test/unit/bio/test_sequence.rb +5 -3
  282. data/test/unit/bio/test_shell.rb +5 -3
  283. data/test/unit/bio/test_tree.rb +6 -6
  284. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
  285. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
  286. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
  287. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
  288. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
  289. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
  290. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
  291. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
  292. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
  293. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
  294. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
  295. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
  296. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
  297. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
  298. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
  299. data/test/unit/bio/util/test_color_scheme.rb +5 -3
  300. data/test/unit/bio/util/test_contingency_table.rb +5 -3
  301. data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
  302. data/test/unit/bio/util/test_sirna.rb +6 -4
  303. metadata +147 -2
@@ -5,7 +5,7 @@
5
5
  # Toshiaki Katayama <k@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: hmmer.rb,v 1.9 2007/04/05 23:35:39 trevor Exp $
8
+ # $Id:$
9
9
  #
10
10
 
11
11
  require 'bio/command'
@@ -110,17 +110,3 @@ end # class HMMER
110
110
 
111
111
  end # module Bio
112
112
 
113
-
114
-
115
- if __FILE__ == $0
116
-
117
- require 'pp'
118
-
119
- program = ARGV.shift # hmmsearch, hmmpfam
120
- hmmfile = ARGV.shift
121
- seqfile = ARGV.shift
122
-
123
- factory = Bio::HMMER.new(program, hmmfile, seqfile)
124
- pp factory.query
125
-
126
- end
@@ -566,7 +566,6 @@ end # class HMMER
566
566
  end # module Bio
567
567
 
568
568
 
569
- if __FILE__ == $0
570
569
 
571
570
  =begin
572
571
 
@@ -582,102 +581,3 @@ if __FILE__ == $0
582
581
 
583
582
  =end
584
583
 
585
- begin
586
- require 'pp'
587
- alias p pp
588
- rescue LoadError
589
- end
590
-
591
- rep = Bio::HMMER::Report.new(ARGF.read)
592
- p rep
593
-
594
- indent = 18
595
-
596
- puts "### hmmer result"
597
- print "name : ".rjust(indent)
598
- p rep.program['name']
599
- print "version : ".rjust(indent)
600
- p rep.program['version']
601
- print "copyright : ".rjust(indent)
602
- p rep.program['copyright']
603
- print "license : ".rjust(indent)
604
- p rep.program['license']
605
-
606
- print "HMM file : ".rjust(indent)
607
- p rep.parameter['HMM file']
608
- print "Sequence file : ".rjust(indent)
609
- p rep.parameter['Sequence file']
610
-
611
- print "Query sequence : ".rjust(indent)
612
- p rep.query_info['Query sequence']
613
- print "Accession : ".rjust(indent)
614
- p rep.query_info['Accession']
615
- print "Description : ".rjust(indent)
616
- p rep.query_info['Description']
617
-
618
- rep.each do |hit|
619
- puts "## each hit"
620
- print "accession : ".rjust(indent)
621
- p [ hit.accession, hit.target_id, hit.hit_id, hit.entry_id ]
622
- print "description : ".rjust(indent)
623
- p [ hit.description, hit.definition ]
624
- print "target_def : ".rjust(indent)
625
- p hit.target_def
626
- print "score : ".rjust(indent)
627
- p [ hit.score, hit.bit_score ]
628
- print "evalue : ".rjust(indent)
629
- p hit.evalue
630
- print "num : ".rjust(indent)
631
- p hit.num
632
-
633
- hit.each do |hsp|
634
- puts "## each hsp"
635
- print "accession : ".rjust(indent)
636
- p [ hsp.accession, hsp.target_id ]
637
- print "domain : ".rjust(indent)
638
- p hsp.domain
639
- print "seq_f : ".rjust(indent)
640
- p hsp.seq_f
641
- print "seq_t : ".rjust(indent)
642
- p hsp.seq_t
643
- print "seq_ft : ".rjust(indent)
644
- p hsp.seq_ft
645
- print "hmm_f : ".rjust(indent)
646
- p hsp.hmm_f
647
- print "hmm_t : ".rjust(indent)
648
- p hsp.hmm_t
649
- print "hmm_ft : ".rjust(indent)
650
- p hsp.hmm_ft
651
- print "score : ".rjust(indent)
652
- p [ hsp.score, hsp.bit_score ]
653
- print "evalue : ".rjust(indent)
654
- p hsp.evalue
655
- print "midline : ".rjust(indent)
656
- p hsp.midline
657
- print "hmmseq : ".rjust(indent)
658
- p hsp.hmmseq
659
- print "flatseq : ".rjust(indent)
660
- p hsp.flatseq
661
- print "query_frame : ".rjust(indent)
662
- p hsp.query_frame
663
- print "target_frame : ".rjust(indent)
664
- p hsp.target_frame
665
-
666
- print "query_seq : ".rjust(indent)
667
- p hsp.query_seq # hmmseq, flatseq
668
- print "target_seq : ".rjust(indent)
669
- p hsp.target_seq # flatseq, hmmseq
670
- print "target_from : ".rjust(indent)
671
- p hsp.target_from # seq_f, hmm_f
672
- print "target_to : ".rjust(indent)
673
- p hsp.target_to # seq_t, hmm_t
674
- print "query_from : ".rjust(indent)
675
- p hsp.query_from # hmm_f, seq_f
676
- print "query_to : ".rjust(indent)
677
- p hsp.query_to # hmm_t, seq_t
678
- end
679
- end
680
-
681
- end
682
-
683
-
@@ -0,0 +1,156 @@
1
+ #
2
+ # = bio/appl/meme/mast.rb - Wrapper for running MAST program
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Adam Kraut <adamnkraut@gmail.com>,
5
+ #
6
+ # License:: The Ruby License
7
+ #
8
+ # == Description
9
+ #
10
+ # This file contains a wrapper for running the MAST tool for searching sequence databases using motifs
11
+ #
12
+ # == References
13
+ #
14
+ # * http://meme.sdsc.edu/meme/intro.html
15
+ #
16
+ require "bio/command"
17
+
18
+ module Bio
19
+ module Meme
20
+
21
+ # == Description
22
+ #
23
+ # Bio::Meme::Mast is a wrapper for searching a database using sequence motifs. The code
24
+ # will read options from a Hash and run the program. Parsing of the output is provided by
25
+ # Bio::Meme::Mast::Report. Before running, options[:mfile] and options[:d] must be set
26
+ # in the constructor or Mast.config(options = {})
27
+ #
28
+ # == Usage
29
+ #
30
+ # mast = Mast.new('/path/to/mast')
31
+ # or with options
32
+ # mast = Mast.new('/path/to/mast', {:mfile => 'meme.out', :d => '/shared/db/nr'})
33
+ #
34
+ # report = Mast::Report.new(mast.run)
35
+ # report.each do |motif|
36
+ # puts motif.length
37
+ # end
38
+ #
39
+ #
40
+ class Mast
41
+
42
+ include Bio::Command
43
+
44
+ autoload :Report, 'bio/appl/meme/mast/report'
45
+
46
+ # A Hash of options for Mast
47
+ attr_accessor :options
48
+
49
+ DEFAULT_OPTIONS = {
50
+ # required
51
+ :mfile => nil,
52
+ :d => nil,
53
+ # optional
54
+ :stdin => nil, # may not work as expected
55
+ :count => nil,
56
+ :alphabet => nil,
57
+ :stdout => true,
58
+ :text => false,
59
+ :sep => false,
60
+ :norc => false,
61
+ :dna => false,
62
+ :comp => false,
63
+ :rank => nil,
64
+ :smax => nil,
65
+ :ev => nil,
66
+ :mt => nil,
67
+ :w => false,
68
+ :bfile => nil,
69
+ :seqp => false,
70
+ :mf => nil,
71
+ :df => nil,
72
+ :minseqs => nil,
73
+ :mev => nil,
74
+ :m => nil,
75
+ :diag => nil,
76
+ :best => false,
77
+ :remcorr => false,
78
+ :brief => false,
79
+ :b => false,
80
+ :nostatus => true,
81
+ :hit_list => true,
82
+ }
83
+
84
+ # The command line String to be executed
85
+ attr_reader :cmd
86
+
87
+ # Create a mast instance
88
+ #
89
+ # m = Mast.new('/usr/local/bin/mast')
90
+ # ---
91
+ # *Arguments*:
92
+ # * (required) _mast_location_: String
93
+ # *Raises*:: ArgumentError if mast program is not found
94
+ # *Returns*:: a Bio::Meme::Mast object
95
+
96
+ def initialize(mast_location, options = {})
97
+ unless File.exists?(mast_location)
98
+ raise ArgumentError.new("mast: command not found : #{mast_location}")
99
+ end
100
+ @binary = mast_location
101
+ options.empty? ? config(DEFAULT_OPTIONS) : config(options)
102
+ end
103
+
104
+ # Builds the command line string
105
+ # any options passed in will be merged with DEFAULT_OPTIONS
106
+ # Mast usage: mast <mfile> <opts> <flags>
107
+ #
108
+ # mast.config({:mfile => "meme.out", :d => "/path/to/fasta/db"})
109
+ # ---
110
+ # *Arguments*:
111
+ # * (required) _options_: Hash (see DEFAULT_OPTIONS)
112
+ # *Returns*:: the command line string
113
+
114
+ def config(options)
115
+ @options = DEFAULT_OPTIONS.merge(options)
116
+ mfile, opts, flags = "", "", ""
117
+ @options.each_pair do |opt, val|
118
+ if val.nil? or val == false
119
+ next
120
+ elsif opt == :mfile
121
+ mfile = val
122
+ elsif val == true
123
+ flags << " -#{opt}"
124
+ else
125
+ opts << " -#{opt} #{val}"
126
+ end
127
+ end
128
+ @cmd = "#{@binary} #{mfile + opts + flags}"
129
+ end
130
+
131
+ # Checks if input/database files exist and options are valid
132
+ # *Raises*:: ArgumentError if the motifs file does not exist
133
+ # *Raises*:: ArgumentError if the database file does not exist
134
+ # *Raises*:: ArgumentError if there is an invalid option
135
+
136
+ def check_options
137
+ @options.each_key do |k|
138
+ raise ArgumentError.new("Invalid option: #{k}") unless DEFAULT_OPTIONS.has_key?(k)
139
+ end
140
+ raise ArgumentError.new("Motif file not found: #{@options[:mfile]}") if @options[:mfile].nil? or !File.exists?(@options[:mfile])
141
+ raise ArgumentError.new("Database not found: #{@options[:d]}") if @options[:d].nil? or !File.exists?(@options[:d])
142
+ end
143
+
144
+ # Run the mast program
145
+ # ---
146
+ # *Returns*:: Bio::Meme::Mast::Report object
147
+
148
+ def run
149
+ check_options
150
+ call_command(@cmd) {|io| @output = io.read }
151
+ Report.new(@output)
152
+ end
153
+
154
+ end # End class Mast
155
+ end # End module Meme
156
+ end # End module Bio
@@ -0,0 +1,91 @@
1
+ #
2
+ # = bio/appl/meme/mast/report.rb - Mast output parser class
3
+ #
4
+ # Copyright:: Copyright (C) 2008, Adam Kraut <adamnkraut@gmail.com>,
5
+
6
+ #
7
+ # License:: The Ruby License
8
+ #
9
+ # == Description
10
+ #
11
+ # This file contains a class to parse Mast output
12
+ #
13
+ # == Examples
14
+ #
15
+ # == References
16
+ #
17
+ # * http://meme.sdsc.edu/meme/intro.html
18
+
19
+ require "bio/appl/meme/mast"
20
+ require "bio/appl/meme/motif"
21
+
22
+ module Bio
23
+ module Meme
24
+ class Mast
25
+
26
+ # == Description
27
+ #
28
+ # A class to parse the output from Mast
29
+ #
30
+ # WARNING: Currently support is only for -hit_list (machine readable) format
31
+ # HTML (default) output is not supported
32
+ #
33
+ # == Examples
34
+ #
35
+
36
+ class Report
37
+
38
+ attr_reader :motifs
39
+
40
+ def initialize(mast_hitlist)
41
+ @motifs = parse_hit_list(mast_hitlist)
42
+ end
43
+
44
+ # Iterates each motif (Bio::Meme::Motif)
45
+ def each
46
+ @motifs.each do |motif|
47
+ yield motif
48
+ end
49
+ end
50
+ alias :each_motif :each
51
+
52
+
53
+ private
54
+
55
+ # Each line corresponds to one motif occurrence in one sequence.
56
+ # The format of the hit lines is
57
+ # [<sequence_name> <strand><motif> <start> <end> <p-value>]+
58
+ # where
59
+ # <sequence_name> is the name of the sequence containing the hit
60
+ # <strand> is the strand (+ or - for DNA, blank for protein),
61
+ # <motif> is the motif number,
62
+ # <start> is the starting position of the hit,
63
+ # <end> is the ending position of the hit, and
64
+ # <p-value> is the position p-value of the hit.
65
+ def parse_hit_list(data)
66
+ motifs = []
67
+ data.each_line do |line|
68
+
69
+ line.chomp!
70
+
71
+ # skip comments
72
+ next if line =~ /^#/
73
+
74
+ fields = line.split(/\s/)
75
+
76
+ if fields.size == 5
77
+ motifs << Motif.new(fields[0], strand = nil, fields[1], fields[2], fields[3], fields[4])
78
+ elsif fields.size == 6
79
+ motifs << Motif.new(fields[0], fields[1], fields[2], fields[3], fields[4], fields[5])
80
+ else
81
+ raise RuntimeError.new("Could not parse mast output")
82
+ end
83
+
84
+ end
85
+ motifs
86
+ end
87
+
88
+ end # Result
89
+ end # Mast
90
+ end # Meme
91
+ end # Bio
@@ -0,0 +1,48 @@
1
+ #
2
+ # = bio/appl/meme/motif.rb - Class to represent a sequence motif
3
+ #
4
+ # Copyright:: Copyright (C) 2008 Adam Kraut <adamnkraut@gmail.com>,
5
+ #
6
+ # License:: The Ruby License
7
+ #
8
+ # == Description
9
+ #
10
+ # This file contains a minimal class to represent meme motifs
11
+ #
12
+ # == References
13
+ #
14
+ # * http://meme.sdsc.edu/meme/intro.html
15
+ #
16
+ module Bio
17
+ module Meme
18
+
19
+ # == Description
20
+ #
21
+ # This class minimally represents a sequence motif according to the MEME program
22
+ #
23
+ # TODO: integrate with Bio::Sequence class
24
+ # TODO: parse PSSM data
25
+ #
26
+ class Motif
27
+ attr_accessor :sequence_name, :strand, :motif, :start_pos, :end_pos, :pvalue
28
+
29
+ # Creates a new Bio::Meme::Motif object
30
+ # arguments are
31
+ def initialize(sequence_name, strand, motif, start_pos, end_pos, pvalue)
32
+ @sequence_name = sequence_name.to_s
33
+ @strand = strand.to_s
34
+ @motif = motif.to_i
35
+ @start_pos = start_pos.to_i
36
+ @end_pos = end_pos.to_i
37
+ @pvalue = pvalue.to_f
38
+ end
39
+
40
+ # Computes the motif length
41
+ def length
42
+ @end_pos - @start_pos
43
+ end
44
+
45
+ end
46
+
47
+ end
48
+ end
@@ -435,114 +435,3 @@ require 'uri'
435
435
 
436
436
  end # module Bio
437
437
 
438
-
439
-
440
-
441
-
442
- if __FILE__ == $0
443
-
444
- begin
445
- require 'psort/report.rb'
446
- rescue LoadError
447
- end
448
-
449
-
450
- seq = ">hoge mit
451
- MALEPIDYTT RDEDDLDENE LLMKISNAAG SSRVNDNNDD LTFVENDKII
452
- ARYSIQTSSK QQGKASTPPV EEAEEAAPQL PSRSSAAPPP PPRRATPEKK
453
- DVKDLKSKFE GLAASEKEEE EMENKFAPPP KKSEPTIISP KPFSKPQEPV
454
- FKGYHVQVTA HSREIDAEYL KIVRGSDPDT TWLIISPNAK KEYEPESTGS
455
- KKSFTPSKSP APVSKKEPVK TPSPAPAAKI PKENPWATAE YDYDAAEDNE
456
- NIEFVDDDWW LGELEKDGSK GLFPSNYVSL LPSRNVASGA PVQKEEPEQE
457
- SFHDFLQLFD ETKVQYGLAR RKAKQNSGNA ETKAEAPKPE VPEDEPEGEP
458
- DDWNEPELKE RDFDQAPLKP NQSSYKPIGK IDLQKVIAEE KAKEDPRLVQ
459
- DYKKIGNPLP GMHIEADNEE EPEENDDDWD DDEDEAAQPP ANFAAVANNL
460
- KPTAAGSKID DDKVIKGFRN EKSPAQLWAE VSPPGSDVEK IIIIGWCPDS
461
- APLKTRASFA PSSDIANLKN ESKLKRDSEF NSFLGTTKPP SMTESSLKND
462
- KAEEAEQPKT EIAPSLPSRN SIPAPKQEEA PEQAPEEEIE GN
463
- "
464
- Seq1 = ">hgoe
465
- LTFVENDKII NI
466
- "
467
-
468
- puts "\n Bio::PSORT::PSORT"
469
-
470
- puts "\n ==> p serv = Bio::PSORT::PSORT.imsut"
471
- p serv = Bio::PSORT::PSORT1.imsut
472
-
473
- puts "\n ==> p serv.class "
474
- p serv.class
475
-
476
- puts "\n ==> p serv.title = 'Query_title_splited_by_white space'"
477
- p serv.title = 'Query_title_splited_by_white space'
478
-
479
- puts "\n ==> p serv.exec(seq, false) "
480
- p serv.exec(seq, false)
481
-
482
- puts "\n ==> p serv.exec(seq) "
483
- p serv.exec(seq)
484
-
485
- puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) "
486
- p report = serv.exec(Bio::FastaFormat.new(seq))
487
-
488
- puts "\n ==> p report.class"
489
- p report.class
490
-
491
-
492
- puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) "
493
- p report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
494
-
495
- puts "\n ==> p report_raw.class"
496
- p report_raw.class
497
-
498
-
499
- puts "\n ==> p report.methods"
500
- p report.methods
501
-
502
- methods = ['entry_id', 'origin', 'title', 'sequence','result_info',
503
- 'reasoning', 'final_result', 'raw']
504
- methods.each do |method|
505
- puts "\n ==> p report.#{method}"
506
- p eval("report.#{method}")
507
- end
508
-
509
-
510
-
511
- puts "\n Bio::PSORT::PSORT2"
512
-
513
- puts "\n ==> p serv = Bio::PSORT::PSORT2.imsut"
514
- p serv = Bio::PSORT::PSORT2.imsut
515
-
516
- puts "\n ==> p serv.class "
517
- p serv.class
518
-
519
- puts "\n ==> p seq "
520
- p seq
521
-
522
- puts "\n ==> p serv.title = 'Query_title_splited_by_white space'"
523
- p serv.title = 'Query_title_splited_by_white space'
524
-
525
- puts "\n ==> p serv.exec(seq) # parsed report"
526
- p serv.exec(seq)
527
-
528
- puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) # parsed report"
529
- p report = serv.exec(Bio::FastaFormat.new(seq))
530
-
531
-
532
-
533
- puts "\n ==> p serv.exec(seq, false) # report in plain text"
534
- p serv.exec(seq, false)
535
-
536
- puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) # report in plain text"
537
- p report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
538
-
539
-
540
- puts "\n ==> p report.methods"
541
- p report.methods
542
-
543
- methods = ['entry_id', 'scl', 'definition', 'seq', 'features', 'prob', 'pred', 'k', 'raw']
544
- methods.each do |method|
545
- puts "\n ==> p report.#{method}"
546
- p eval("report.#{method}")
547
- end
548
- end