bio 1.3.1 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +2105 -3728
- data/KNOWN_ISSUES.rdoc +35 -3
- data/README.rdoc +8 -2
- data/RELEASE_NOTES.rdoc +166 -0
- data/bin/bioruby +4 -1
- data/bioruby.gemspec +146 -1
- data/bioruby.gemspec.erb +3 -1
- data/doc/ChangeLog-before-1.3.1 +3961 -0
- data/doc/Tutorial.rd +154 -22
- data/doc/Tutorial.rd.html +125 -68
- data/lib/bio.rb +21 -6
- data/lib/bio/appl/bl2seq/report.rb +11 -202
- data/lib/bio/appl/blast/format0.rb +0 -193
- data/lib/bio/appl/blast/report.rb +2 -147
- data/lib/bio/appl/blast/wublast.rb +0 -208
- data/lib/bio/appl/fasta.rb +4 -19
- data/lib/bio/appl/fasta/format10.rb +0 -14
- data/lib/bio/appl/genscan/report.rb +0 -176
- data/lib/bio/appl/hmmer.rb +1 -15
- data/lib/bio/appl/hmmer/report.rb +0 -100
- data/lib/bio/appl/meme/mast.rb +156 -0
- data/lib/bio/appl/meme/mast/report.rb +91 -0
- data/lib/bio/appl/meme/motif.rb +48 -0
- data/lib/bio/appl/psort.rb +0 -111
- data/lib/bio/appl/psort/report.rb +1 -45
- data/lib/bio/appl/pts1.rb +2 -4
- data/lib/bio/appl/sosui/report.rb +5 -54
- data/lib/bio/appl/targetp/report.rb +1 -104
- data/lib/bio/appl/tmhmm/report.rb +0 -36
- data/lib/bio/command.rb +94 -10
- data/lib/bio/data/aa.rb +1 -77
- data/lib/bio/data/codontable.rb +1 -95
- data/lib/bio/data/na.rb +1 -26
- data/lib/bio/db/aaindex.rb +1 -38
- data/lib/bio/db/fasta.rb +1 -134
- data/lib/bio/db/fasta/format_qual.rb +204 -0
- data/lib/bio/db/fasta/qual.rb +102 -0
- data/lib/bio/db/fastq.rb +645 -0
- data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
- data/lib/bio/db/fastq/format_fastq.rb +175 -0
- data/lib/bio/db/genbank/genbank.rb +1 -86
- data/lib/bio/db/gff.rb +0 -17
- data/lib/bio/db/go.rb +4 -72
- data/lib/bio/db/kegg/common.rb +112 -0
- data/lib/bio/db/kegg/compound.rb +29 -20
- data/lib/bio/db/kegg/drug.rb +74 -34
- data/lib/bio/db/kegg/enzyme.rb +26 -5
- data/lib/bio/db/kegg/genes.rb +128 -15
- data/lib/bio/db/kegg/genome.rb +3 -41
- data/lib/bio/db/kegg/glycan.rb +19 -24
- data/lib/bio/db/kegg/orthology.rb +16 -56
- data/lib/bio/db/kegg/reaction.rb +81 -28
- data/lib/bio/db/kegg/taxonomy.rb +1 -52
- data/lib/bio/db/litdb.rb +1 -16
- data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
- data/lib/bio/db/prosite.rb +2 -95
- data/lib/bio/db/rebase.rb +5 -6
- data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
- data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
- data/lib/bio/io/das.rb +0 -44
- data/lib/bio/io/ddbjxml.rb +1 -181
- data/lib/bio/io/flatfile.rb +1 -7
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/io/keggapi.rb +0 -442
- data/lib/bio/io/ncbirest.rb +130 -132
- data/lib/bio/io/ncbisoap.rb +2 -1
- data/lib/bio/io/pubmed.rb +0 -88
- data/lib/bio/location.rb +0 -73
- data/lib/bio/pathway.rb +0 -171
- data/lib/bio/sequence.rb +18 -1
- data/lib/bio/sequence/adapter.rb +3 -0
- data/lib/bio/sequence/format.rb +16 -0
- data/lib/bio/sequence/quality_score.rb +205 -0
- data/lib/bio/tree.rb +70 -5
- data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
- data/lib/bio/util/sirna.rb +1 -23
- data/lib/bio/version.rb +1 -1
- data/sample/demo_aaindex.rb +67 -0
- data/sample/demo_aminoacid.rb +101 -0
- data/sample/demo_bl2seq_report.rb +220 -0
- data/sample/demo_blast_report.rb +285 -0
- data/sample/demo_codontable.rb +119 -0
- data/sample/demo_das.rb +105 -0
- data/sample/demo_ddbjxml.rb +212 -0
- data/sample/demo_fasta_remote.rb +51 -0
- data/sample/demo_fastaformat.rb +105 -0
- data/sample/demo_genbank.rb +132 -0
- data/sample/demo_genscan_report.rb +202 -0
- data/sample/demo_gff1.rb +49 -0
- data/sample/demo_go.rb +98 -0
- data/sample/demo_hmmer_report.rb +149 -0
- data/sample/demo_kegg_compound.rb +57 -0
- data/sample/demo_kegg_drug.rb +65 -0
- data/sample/demo_kegg_genome.rb +74 -0
- data/sample/demo_kegg_glycan.rb +72 -0
- data/sample/demo_kegg_orthology.rb +62 -0
- data/sample/demo_kegg_reaction.rb +66 -0
- data/sample/demo_kegg_taxonomy.rb +92 -0
- data/sample/demo_keggapi.rb +502 -0
- data/sample/demo_litdb.rb +42 -0
- data/sample/demo_locations.rb +99 -0
- data/sample/demo_ncbi_rest.rb +130 -0
- data/sample/demo_nucleicacid.rb +49 -0
- data/sample/demo_pathway.rb +196 -0
- data/sample/demo_prosite.rb +120 -0
- data/sample/demo_psort.rb +138 -0
- data/sample/demo_psort_report.rb +70 -0
- data/sample/demo_pubmed.rb +118 -0
- data/sample/demo_sirna.rb +63 -0
- data/sample/demo_sosui_report.rb +89 -0
- data/sample/demo_targetp_report.rb +135 -0
- data/sample/demo_tmhmm_report.rb +68 -0
- data/sample/pmfetch.rb +13 -4
- data/sample/pmsearch.rb +15 -4
- data/sample/test_phyloxml_big.rb +205 -0
- data/test/bioruby_test_helper.rb +61 -0
- data/test/data/KEGG/1.1.1.1.enzyme +935 -0
- data/test/data/KEGG/C00025.compound +102 -0
- data/test/data/KEGG/D00063.drug +104 -0
- data/test/data/KEGG/G00024.glycan +47 -0
- data/test/data/KEGG/G01366.glycan +18 -0
- data/test/data/KEGG/K02338.orthology +902 -0
- data/test/data/KEGG/R00006.reaction +14 -0
- data/test/data/fastq/README.txt +109 -0
- data/test/data/fastq/error_diff_ids.fastq +20 -0
- data/test/data/fastq/error_double_qual.fastq +22 -0
- data/test/data/fastq/error_double_seq.fastq +22 -0
- data/test/data/fastq/error_long_qual.fastq +20 -0
- data/test/data/fastq/error_no_qual.fastq +20 -0
- data/test/data/fastq/error_qual_del.fastq +20 -0
- data/test/data/fastq/error_qual_escape.fastq +20 -0
- data/test/data/fastq/error_qual_null.fastq +0 -0
- data/test/data/fastq/error_qual_space.fastq +21 -0
- data/test/data/fastq/error_qual_tab.fastq +21 -0
- data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
- data/test/data/fastq/error_qual_vtab.fastq +20 -0
- data/test/data/fastq/error_short_qual.fastq +20 -0
- data/test/data/fastq/error_spaces.fastq +20 -0
- data/test/data/fastq/error_tabs.fastq +21 -0
- data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
- data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
- data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_title.fastq +17 -0
- data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
- data/test/data/fastq/longreads_as_illumina.fastq +40 -0
- data/test/data/fastq/longreads_as_sanger.fastq +40 -0
- data/test/data/fastq/longreads_as_solexa.fastq +40 -0
- data/test/data/fastq/longreads_original_sanger.fastq +120 -0
- data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
- data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
- data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
- data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
- data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
- data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
- data/test/data/meme/db +0 -0
- data/test/data/meme/mast +0 -0
- data/test/data/meme/mast.out +13 -0
- data/test/data/meme/meme.out +3 -0
- data/test/data/phyloxml/apaf.xml +666 -0
- data/test/data/phyloxml/bcl_2.xml +2097 -0
- data/test/data/phyloxml/made_up.xml +144 -0
- data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
- data/test/data/phyloxml/phyloxml_examples.xml +415 -0
- data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
- data/test/functional/bio/appl/test_pts1.rb +7 -5
- data/test/functional/bio/io/test_ensembl.rb +4 -3
- data/test/functional/bio/io/test_pubmed.rb +9 -3
- data/test/functional/bio/io/test_soapwsdl.rb +5 -4
- data/test/functional/bio/io/test_togows.rb +5 -4
- data/test/functional/bio/sequence/test_output_embl.rb +6 -4
- data/test/functional/bio/test_command.rb +54 -5
- data/test/runner.rb +5 -3
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
- data/test/unit/bio/appl/blast/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
- data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
- data/test/unit/bio/appl/genscan/test_report.rb +8 -9
- data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
- data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
- data/test/unit/bio/appl/mafft/test_report.rb +6 -5
- data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
- data/test/unit/bio/appl/meme/test_mast.rb +103 -0
- data/test/unit/bio/appl/meme/test_motif.rb +38 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
- data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
- data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
- data/test/unit/bio/appl/sim4/test_report.rb +5 -4
- data/test/unit/bio/appl/sosui/test_report.rb +6 -5
- data/test/unit/bio/appl/targetp/test_report.rb +5 -3
- data/test/unit/bio/appl/test_blast.rb +5 -4
- data/test/unit/bio/appl/test_fasta.rb +4 -2
- data/test/unit/bio/appl/test_pts1.rb +4 -2
- data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
- data/test/unit/bio/data/test_aa.rb +5 -3
- data/test/unit/bio/data/test_codontable.rb +5 -4
- data/test/unit/bio/data/test_na.rb +5 -3
- data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
- data/test/unit/bio/db/embl/test_common.rb +4 -2
- data/test/unit/bio/db/embl/test_embl.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
- data/test/unit/bio/db/embl/test_sptr.rb +6 -8
- data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
- data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
- data/test/unit/bio/db/kegg/test_compound.rb +146 -0
- data/test/unit/bio/db/kegg/test_drug.rb +194 -0
- data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
- data/test/unit/bio/db/kegg/test_genes.rb +32 -4
- data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
- data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
- data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
- data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
- data/test/unit/bio/db/test_aaindex.rb +6 -6
- data/test/unit/bio/db/test_fasta.rb +5 -46
- data/test/unit/bio/db/test_fastq.rb +829 -0
- data/test/unit/bio/db/test_gff.rb +4 -2
- data/test/unit/bio/db/test_lasergene.rb +7 -5
- data/test/unit/bio/db/test_medline.rb +4 -2
- data/test/unit/bio/db/test_newick.rb +6 -6
- data/test/unit/bio/db/test_nexus.rb +4 -2
- data/test/unit/bio/db/test_phyloxml.rb +769 -0
- data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
- data/test/unit/bio/db/test_prosite.rb +6 -5
- data/test/unit/bio/db/test_qual.rb +63 -0
- data/test/unit/bio/db/test_rebase.rb +5 -3
- data/test/unit/bio/db/test_soft.rb +7 -6
- data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
- data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
- data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
- data/test/unit/bio/io/test_ddbjxml.rb +4 -3
- data/test/unit/bio/io/test_ensembl.rb +5 -3
- data/test/unit/bio/io/test_fastacmd.rb +4 -3
- data/test/unit/bio/io/test_flatfile.rb +6 -5
- data/test/unit/bio/io/test_soapwsdl.rb +4 -3
- data/test/unit/bio/io/test_togows.rb +4 -2
- data/test/unit/bio/sequence/test_aa.rb +5 -3
- data/test/unit/bio/sequence/test_common.rb +4 -2
- data/test/unit/bio/sequence/test_compat.rb +4 -2
- data/test/unit/bio/sequence/test_dblink.rb +5 -3
- data/test/unit/bio/sequence/test_na.rb +4 -2
- data/test/unit/bio/sequence/test_quality_score.rb +330 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
- data/test/unit/bio/test_alignment.rb +5 -3
- data/test/unit/bio/test_command.rb +4 -3
- data/test/unit/bio/test_db.rb +5 -3
- data/test/unit/bio/test_feature.rb +4 -2
- data/test/unit/bio/test_location.rb +4 -2
- data/test/unit/bio/test_map.rb +5 -3
- data/test/unit/bio/test_pathway.rb +4 -2
- data/test/unit/bio/test_reference.rb +4 -2
- data/test/unit/bio/test_sequence.rb +5 -3
- data/test/unit/bio/test_shell.rb +5 -3
- data/test/unit/bio/test_tree.rb +6 -6
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
- data/test/unit/bio/util/test_color_scheme.rb +5 -3
- data/test/unit/bio/util/test_contingency_table.rb +5 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
- data/test/unit/bio/util/test_sirna.rb +6 -4
- metadata +147 -2
data/lib/bio/appl/hmmer.rb
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id
|
8
|
+
# $Id:$
|
9
9
|
#
|
10
10
|
|
11
11
|
require 'bio/command'
|
@@ -110,17 +110,3 @@ end # class HMMER
|
|
110
110
|
|
111
111
|
end # module Bio
|
112
112
|
|
113
|
-
|
114
|
-
|
115
|
-
if __FILE__ == $0
|
116
|
-
|
117
|
-
require 'pp'
|
118
|
-
|
119
|
-
program = ARGV.shift # hmmsearch, hmmpfam
|
120
|
-
hmmfile = ARGV.shift
|
121
|
-
seqfile = ARGV.shift
|
122
|
-
|
123
|
-
factory = Bio::HMMER.new(program, hmmfile, seqfile)
|
124
|
-
pp factory.query
|
125
|
-
|
126
|
-
end
|
@@ -566,7 +566,6 @@ end # class HMMER
|
|
566
566
|
end # module Bio
|
567
567
|
|
568
568
|
|
569
|
-
if __FILE__ == $0
|
570
569
|
|
571
570
|
=begin
|
572
571
|
|
@@ -582,102 +581,3 @@ if __FILE__ == $0
|
|
582
581
|
|
583
582
|
=end
|
584
583
|
|
585
|
-
begin
|
586
|
-
require 'pp'
|
587
|
-
alias p pp
|
588
|
-
rescue LoadError
|
589
|
-
end
|
590
|
-
|
591
|
-
rep = Bio::HMMER::Report.new(ARGF.read)
|
592
|
-
p rep
|
593
|
-
|
594
|
-
indent = 18
|
595
|
-
|
596
|
-
puts "### hmmer result"
|
597
|
-
print "name : ".rjust(indent)
|
598
|
-
p rep.program['name']
|
599
|
-
print "version : ".rjust(indent)
|
600
|
-
p rep.program['version']
|
601
|
-
print "copyright : ".rjust(indent)
|
602
|
-
p rep.program['copyright']
|
603
|
-
print "license : ".rjust(indent)
|
604
|
-
p rep.program['license']
|
605
|
-
|
606
|
-
print "HMM file : ".rjust(indent)
|
607
|
-
p rep.parameter['HMM file']
|
608
|
-
print "Sequence file : ".rjust(indent)
|
609
|
-
p rep.parameter['Sequence file']
|
610
|
-
|
611
|
-
print "Query sequence : ".rjust(indent)
|
612
|
-
p rep.query_info['Query sequence']
|
613
|
-
print "Accession : ".rjust(indent)
|
614
|
-
p rep.query_info['Accession']
|
615
|
-
print "Description : ".rjust(indent)
|
616
|
-
p rep.query_info['Description']
|
617
|
-
|
618
|
-
rep.each do |hit|
|
619
|
-
puts "## each hit"
|
620
|
-
print "accession : ".rjust(indent)
|
621
|
-
p [ hit.accession, hit.target_id, hit.hit_id, hit.entry_id ]
|
622
|
-
print "description : ".rjust(indent)
|
623
|
-
p [ hit.description, hit.definition ]
|
624
|
-
print "target_def : ".rjust(indent)
|
625
|
-
p hit.target_def
|
626
|
-
print "score : ".rjust(indent)
|
627
|
-
p [ hit.score, hit.bit_score ]
|
628
|
-
print "evalue : ".rjust(indent)
|
629
|
-
p hit.evalue
|
630
|
-
print "num : ".rjust(indent)
|
631
|
-
p hit.num
|
632
|
-
|
633
|
-
hit.each do |hsp|
|
634
|
-
puts "## each hsp"
|
635
|
-
print "accession : ".rjust(indent)
|
636
|
-
p [ hsp.accession, hsp.target_id ]
|
637
|
-
print "domain : ".rjust(indent)
|
638
|
-
p hsp.domain
|
639
|
-
print "seq_f : ".rjust(indent)
|
640
|
-
p hsp.seq_f
|
641
|
-
print "seq_t : ".rjust(indent)
|
642
|
-
p hsp.seq_t
|
643
|
-
print "seq_ft : ".rjust(indent)
|
644
|
-
p hsp.seq_ft
|
645
|
-
print "hmm_f : ".rjust(indent)
|
646
|
-
p hsp.hmm_f
|
647
|
-
print "hmm_t : ".rjust(indent)
|
648
|
-
p hsp.hmm_t
|
649
|
-
print "hmm_ft : ".rjust(indent)
|
650
|
-
p hsp.hmm_ft
|
651
|
-
print "score : ".rjust(indent)
|
652
|
-
p [ hsp.score, hsp.bit_score ]
|
653
|
-
print "evalue : ".rjust(indent)
|
654
|
-
p hsp.evalue
|
655
|
-
print "midline : ".rjust(indent)
|
656
|
-
p hsp.midline
|
657
|
-
print "hmmseq : ".rjust(indent)
|
658
|
-
p hsp.hmmseq
|
659
|
-
print "flatseq : ".rjust(indent)
|
660
|
-
p hsp.flatseq
|
661
|
-
print "query_frame : ".rjust(indent)
|
662
|
-
p hsp.query_frame
|
663
|
-
print "target_frame : ".rjust(indent)
|
664
|
-
p hsp.target_frame
|
665
|
-
|
666
|
-
print "query_seq : ".rjust(indent)
|
667
|
-
p hsp.query_seq # hmmseq, flatseq
|
668
|
-
print "target_seq : ".rjust(indent)
|
669
|
-
p hsp.target_seq # flatseq, hmmseq
|
670
|
-
print "target_from : ".rjust(indent)
|
671
|
-
p hsp.target_from # seq_f, hmm_f
|
672
|
-
print "target_to : ".rjust(indent)
|
673
|
-
p hsp.target_to # seq_t, hmm_t
|
674
|
-
print "query_from : ".rjust(indent)
|
675
|
-
p hsp.query_from # hmm_f, seq_f
|
676
|
-
print "query_to : ".rjust(indent)
|
677
|
-
p hsp.query_to # hmm_t, seq_t
|
678
|
-
end
|
679
|
-
end
|
680
|
-
|
681
|
-
end
|
682
|
-
|
683
|
-
|
@@ -0,0 +1,156 @@
|
|
1
|
+
#
|
2
|
+
# = bio/appl/meme/mast.rb - Wrapper for running MAST program
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008 Adam Kraut <adamnkraut@gmail.com>,
|
5
|
+
#
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# == Description
|
9
|
+
#
|
10
|
+
# This file contains a wrapper for running the MAST tool for searching sequence databases using motifs
|
11
|
+
#
|
12
|
+
# == References
|
13
|
+
#
|
14
|
+
# * http://meme.sdsc.edu/meme/intro.html
|
15
|
+
#
|
16
|
+
require "bio/command"
|
17
|
+
|
18
|
+
module Bio
|
19
|
+
module Meme
|
20
|
+
|
21
|
+
# == Description
|
22
|
+
#
|
23
|
+
# Bio::Meme::Mast is a wrapper for searching a database using sequence motifs. The code
|
24
|
+
# will read options from a Hash and run the program. Parsing of the output is provided by
|
25
|
+
# Bio::Meme::Mast::Report. Before running, options[:mfile] and options[:d] must be set
|
26
|
+
# in the constructor or Mast.config(options = {})
|
27
|
+
#
|
28
|
+
# == Usage
|
29
|
+
#
|
30
|
+
# mast = Mast.new('/path/to/mast')
|
31
|
+
# or with options
|
32
|
+
# mast = Mast.new('/path/to/mast', {:mfile => 'meme.out', :d => '/shared/db/nr'})
|
33
|
+
#
|
34
|
+
# report = Mast::Report.new(mast.run)
|
35
|
+
# report.each do |motif|
|
36
|
+
# puts motif.length
|
37
|
+
# end
|
38
|
+
#
|
39
|
+
#
|
40
|
+
class Mast
|
41
|
+
|
42
|
+
include Bio::Command
|
43
|
+
|
44
|
+
autoload :Report, 'bio/appl/meme/mast/report'
|
45
|
+
|
46
|
+
# A Hash of options for Mast
|
47
|
+
attr_accessor :options
|
48
|
+
|
49
|
+
DEFAULT_OPTIONS = {
|
50
|
+
# required
|
51
|
+
:mfile => nil,
|
52
|
+
:d => nil,
|
53
|
+
# optional
|
54
|
+
:stdin => nil, # may not work as expected
|
55
|
+
:count => nil,
|
56
|
+
:alphabet => nil,
|
57
|
+
:stdout => true,
|
58
|
+
:text => false,
|
59
|
+
:sep => false,
|
60
|
+
:norc => false,
|
61
|
+
:dna => false,
|
62
|
+
:comp => false,
|
63
|
+
:rank => nil,
|
64
|
+
:smax => nil,
|
65
|
+
:ev => nil,
|
66
|
+
:mt => nil,
|
67
|
+
:w => false,
|
68
|
+
:bfile => nil,
|
69
|
+
:seqp => false,
|
70
|
+
:mf => nil,
|
71
|
+
:df => nil,
|
72
|
+
:minseqs => nil,
|
73
|
+
:mev => nil,
|
74
|
+
:m => nil,
|
75
|
+
:diag => nil,
|
76
|
+
:best => false,
|
77
|
+
:remcorr => false,
|
78
|
+
:brief => false,
|
79
|
+
:b => false,
|
80
|
+
:nostatus => true,
|
81
|
+
:hit_list => true,
|
82
|
+
}
|
83
|
+
|
84
|
+
# The command line String to be executed
|
85
|
+
attr_reader :cmd
|
86
|
+
|
87
|
+
# Create a mast instance
|
88
|
+
#
|
89
|
+
# m = Mast.new('/usr/local/bin/mast')
|
90
|
+
# ---
|
91
|
+
# *Arguments*:
|
92
|
+
# * (required) _mast_location_: String
|
93
|
+
# *Raises*:: ArgumentError if mast program is not found
|
94
|
+
# *Returns*:: a Bio::Meme::Mast object
|
95
|
+
|
96
|
+
def initialize(mast_location, options = {})
|
97
|
+
unless File.exists?(mast_location)
|
98
|
+
raise ArgumentError.new("mast: command not found : #{mast_location}")
|
99
|
+
end
|
100
|
+
@binary = mast_location
|
101
|
+
options.empty? ? config(DEFAULT_OPTIONS) : config(options)
|
102
|
+
end
|
103
|
+
|
104
|
+
# Builds the command line string
|
105
|
+
# any options passed in will be merged with DEFAULT_OPTIONS
|
106
|
+
# Mast usage: mast <mfile> <opts> <flags>
|
107
|
+
#
|
108
|
+
# mast.config({:mfile => "meme.out", :d => "/path/to/fasta/db"})
|
109
|
+
# ---
|
110
|
+
# *Arguments*:
|
111
|
+
# * (required) _options_: Hash (see DEFAULT_OPTIONS)
|
112
|
+
# *Returns*:: the command line string
|
113
|
+
|
114
|
+
def config(options)
|
115
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
116
|
+
mfile, opts, flags = "", "", ""
|
117
|
+
@options.each_pair do |opt, val|
|
118
|
+
if val.nil? or val == false
|
119
|
+
next
|
120
|
+
elsif opt == :mfile
|
121
|
+
mfile = val
|
122
|
+
elsif val == true
|
123
|
+
flags << " -#{opt}"
|
124
|
+
else
|
125
|
+
opts << " -#{opt} #{val}"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
@cmd = "#{@binary} #{mfile + opts + flags}"
|
129
|
+
end
|
130
|
+
|
131
|
+
# Checks if input/database files exist and options are valid
|
132
|
+
# *Raises*:: ArgumentError if the motifs file does not exist
|
133
|
+
# *Raises*:: ArgumentError if the database file does not exist
|
134
|
+
# *Raises*:: ArgumentError if there is an invalid option
|
135
|
+
|
136
|
+
def check_options
|
137
|
+
@options.each_key do |k|
|
138
|
+
raise ArgumentError.new("Invalid option: #{k}") unless DEFAULT_OPTIONS.has_key?(k)
|
139
|
+
end
|
140
|
+
raise ArgumentError.new("Motif file not found: #{@options[:mfile]}") if @options[:mfile].nil? or !File.exists?(@options[:mfile])
|
141
|
+
raise ArgumentError.new("Database not found: #{@options[:d]}") if @options[:d].nil? or !File.exists?(@options[:d])
|
142
|
+
end
|
143
|
+
|
144
|
+
# Run the mast program
|
145
|
+
# ---
|
146
|
+
# *Returns*:: Bio::Meme::Mast::Report object
|
147
|
+
|
148
|
+
def run
|
149
|
+
check_options
|
150
|
+
call_command(@cmd) {|io| @output = io.read }
|
151
|
+
Report.new(@output)
|
152
|
+
end
|
153
|
+
|
154
|
+
end # End class Mast
|
155
|
+
end # End module Meme
|
156
|
+
end # End module Bio
|
@@ -0,0 +1,91 @@
|
|
1
|
+
#
|
2
|
+
# = bio/appl/meme/mast/report.rb - Mast output parser class
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008, Adam Kraut <adamnkraut@gmail.com>,
|
5
|
+
|
6
|
+
#
|
7
|
+
# License:: The Ruby License
|
8
|
+
#
|
9
|
+
# == Description
|
10
|
+
#
|
11
|
+
# This file contains a class to parse Mast output
|
12
|
+
#
|
13
|
+
# == Examples
|
14
|
+
#
|
15
|
+
# == References
|
16
|
+
#
|
17
|
+
# * http://meme.sdsc.edu/meme/intro.html
|
18
|
+
|
19
|
+
require "bio/appl/meme/mast"
|
20
|
+
require "bio/appl/meme/motif"
|
21
|
+
|
22
|
+
module Bio
|
23
|
+
module Meme
|
24
|
+
class Mast
|
25
|
+
|
26
|
+
# == Description
|
27
|
+
#
|
28
|
+
# A class to parse the output from Mast
|
29
|
+
#
|
30
|
+
# WARNING: Currently support is only for -hit_list (machine readable) format
|
31
|
+
# HTML (default) output is not supported
|
32
|
+
#
|
33
|
+
# == Examples
|
34
|
+
#
|
35
|
+
|
36
|
+
class Report
|
37
|
+
|
38
|
+
attr_reader :motifs
|
39
|
+
|
40
|
+
def initialize(mast_hitlist)
|
41
|
+
@motifs = parse_hit_list(mast_hitlist)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Iterates each motif (Bio::Meme::Motif)
|
45
|
+
def each
|
46
|
+
@motifs.each do |motif|
|
47
|
+
yield motif
|
48
|
+
end
|
49
|
+
end
|
50
|
+
alias :each_motif :each
|
51
|
+
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
# Each line corresponds to one motif occurrence in one sequence.
|
56
|
+
# The format of the hit lines is
|
57
|
+
# [<sequence_name> <strand><motif> <start> <end> <p-value>]+
|
58
|
+
# where
|
59
|
+
# <sequence_name> is the name of the sequence containing the hit
|
60
|
+
# <strand> is the strand (+ or - for DNA, blank for protein),
|
61
|
+
# <motif> is the motif number,
|
62
|
+
# <start> is the starting position of the hit,
|
63
|
+
# <end> is the ending position of the hit, and
|
64
|
+
# <p-value> is the position p-value of the hit.
|
65
|
+
def parse_hit_list(data)
|
66
|
+
motifs = []
|
67
|
+
data.each_line do |line|
|
68
|
+
|
69
|
+
line.chomp!
|
70
|
+
|
71
|
+
# skip comments
|
72
|
+
next if line =~ /^#/
|
73
|
+
|
74
|
+
fields = line.split(/\s/)
|
75
|
+
|
76
|
+
if fields.size == 5
|
77
|
+
motifs << Motif.new(fields[0], strand = nil, fields[1], fields[2], fields[3], fields[4])
|
78
|
+
elsif fields.size == 6
|
79
|
+
motifs << Motif.new(fields[0], fields[1], fields[2], fields[3], fields[4], fields[5])
|
80
|
+
else
|
81
|
+
raise RuntimeError.new("Could not parse mast output")
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
motifs
|
86
|
+
end
|
87
|
+
|
88
|
+
end # Result
|
89
|
+
end # Mast
|
90
|
+
end # Meme
|
91
|
+
end # Bio
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#
|
2
|
+
# = bio/appl/meme/motif.rb - Class to represent a sequence motif
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2008 Adam Kraut <adamnkraut@gmail.com>,
|
5
|
+
#
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# == Description
|
9
|
+
#
|
10
|
+
# This file contains a minimal class to represent meme motifs
|
11
|
+
#
|
12
|
+
# == References
|
13
|
+
#
|
14
|
+
# * http://meme.sdsc.edu/meme/intro.html
|
15
|
+
#
|
16
|
+
module Bio
|
17
|
+
module Meme
|
18
|
+
|
19
|
+
# == Description
|
20
|
+
#
|
21
|
+
# This class minimally represents a sequence motif according to the MEME program
|
22
|
+
#
|
23
|
+
# TODO: integrate with Bio::Sequence class
|
24
|
+
# TODO: parse PSSM data
|
25
|
+
#
|
26
|
+
class Motif
|
27
|
+
attr_accessor :sequence_name, :strand, :motif, :start_pos, :end_pos, :pvalue
|
28
|
+
|
29
|
+
# Creates a new Bio::Meme::Motif object
|
30
|
+
# arguments are
|
31
|
+
def initialize(sequence_name, strand, motif, start_pos, end_pos, pvalue)
|
32
|
+
@sequence_name = sequence_name.to_s
|
33
|
+
@strand = strand.to_s
|
34
|
+
@motif = motif.to_i
|
35
|
+
@start_pos = start_pos.to_i
|
36
|
+
@end_pos = end_pos.to_i
|
37
|
+
@pvalue = pvalue.to_f
|
38
|
+
end
|
39
|
+
|
40
|
+
# Computes the motif length
|
41
|
+
def length
|
42
|
+
@end_pos - @start_pos
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
data/lib/bio/appl/psort.rb
CHANGED
@@ -435,114 +435,3 @@ require 'uri'
|
|
435
435
|
|
436
436
|
end # module Bio
|
437
437
|
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
if __FILE__ == $0
|
443
|
-
|
444
|
-
begin
|
445
|
-
require 'psort/report.rb'
|
446
|
-
rescue LoadError
|
447
|
-
end
|
448
|
-
|
449
|
-
|
450
|
-
seq = ">hoge mit
|
451
|
-
MALEPIDYTT RDEDDLDENE LLMKISNAAG SSRVNDNNDD LTFVENDKII
|
452
|
-
ARYSIQTSSK QQGKASTPPV EEAEEAAPQL PSRSSAAPPP PPRRATPEKK
|
453
|
-
DVKDLKSKFE GLAASEKEEE EMENKFAPPP KKSEPTIISP KPFSKPQEPV
|
454
|
-
FKGYHVQVTA HSREIDAEYL KIVRGSDPDT TWLIISPNAK KEYEPESTGS
|
455
|
-
KKSFTPSKSP APVSKKEPVK TPSPAPAAKI PKENPWATAE YDYDAAEDNE
|
456
|
-
NIEFVDDDWW LGELEKDGSK GLFPSNYVSL LPSRNVASGA PVQKEEPEQE
|
457
|
-
SFHDFLQLFD ETKVQYGLAR RKAKQNSGNA ETKAEAPKPE VPEDEPEGEP
|
458
|
-
DDWNEPELKE RDFDQAPLKP NQSSYKPIGK IDLQKVIAEE KAKEDPRLVQ
|
459
|
-
DYKKIGNPLP GMHIEADNEE EPEENDDDWD DDEDEAAQPP ANFAAVANNL
|
460
|
-
KPTAAGSKID DDKVIKGFRN EKSPAQLWAE VSPPGSDVEK IIIIGWCPDS
|
461
|
-
APLKTRASFA PSSDIANLKN ESKLKRDSEF NSFLGTTKPP SMTESSLKND
|
462
|
-
KAEEAEQPKT EIAPSLPSRN SIPAPKQEEA PEQAPEEEIE GN
|
463
|
-
"
|
464
|
-
Seq1 = ">hgoe
|
465
|
-
LTFVENDKII NI
|
466
|
-
"
|
467
|
-
|
468
|
-
puts "\n Bio::PSORT::PSORT"
|
469
|
-
|
470
|
-
puts "\n ==> p serv = Bio::PSORT::PSORT.imsut"
|
471
|
-
p serv = Bio::PSORT::PSORT1.imsut
|
472
|
-
|
473
|
-
puts "\n ==> p serv.class "
|
474
|
-
p serv.class
|
475
|
-
|
476
|
-
puts "\n ==> p serv.title = 'Query_title_splited_by_white space'"
|
477
|
-
p serv.title = 'Query_title_splited_by_white space'
|
478
|
-
|
479
|
-
puts "\n ==> p serv.exec(seq, false) "
|
480
|
-
p serv.exec(seq, false)
|
481
|
-
|
482
|
-
puts "\n ==> p serv.exec(seq) "
|
483
|
-
p serv.exec(seq)
|
484
|
-
|
485
|
-
puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) "
|
486
|
-
p report = serv.exec(Bio::FastaFormat.new(seq))
|
487
|
-
|
488
|
-
puts "\n ==> p report.class"
|
489
|
-
p report.class
|
490
|
-
|
491
|
-
|
492
|
-
puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) "
|
493
|
-
p report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
|
494
|
-
|
495
|
-
puts "\n ==> p report_raw.class"
|
496
|
-
p report_raw.class
|
497
|
-
|
498
|
-
|
499
|
-
puts "\n ==> p report.methods"
|
500
|
-
p report.methods
|
501
|
-
|
502
|
-
methods = ['entry_id', 'origin', 'title', 'sequence','result_info',
|
503
|
-
'reasoning', 'final_result', 'raw']
|
504
|
-
methods.each do |method|
|
505
|
-
puts "\n ==> p report.#{method}"
|
506
|
-
p eval("report.#{method}")
|
507
|
-
end
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
puts "\n Bio::PSORT::PSORT2"
|
512
|
-
|
513
|
-
puts "\n ==> p serv = Bio::PSORT::PSORT2.imsut"
|
514
|
-
p serv = Bio::PSORT::PSORT2.imsut
|
515
|
-
|
516
|
-
puts "\n ==> p serv.class "
|
517
|
-
p serv.class
|
518
|
-
|
519
|
-
puts "\n ==> p seq "
|
520
|
-
p seq
|
521
|
-
|
522
|
-
puts "\n ==> p serv.title = 'Query_title_splited_by_white space'"
|
523
|
-
p serv.title = 'Query_title_splited_by_white space'
|
524
|
-
|
525
|
-
puts "\n ==> p serv.exec(seq) # parsed report"
|
526
|
-
p serv.exec(seq)
|
527
|
-
|
528
|
-
puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) # parsed report"
|
529
|
-
p report = serv.exec(Bio::FastaFormat.new(seq))
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
puts "\n ==> p serv.exec(seq, false) # report in plain text"
|
534
|
-
p serv.exec(seq, false)
|
535
|
-
|
536
|
-
puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) # report in plain text"
|
537
|
-
p report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
|
538
|
-
|
539
|
-
|
540
|
-
puts "\n ==> p report.methods"
|
541
|
-
p report.methods
|
542
|
-
|
543
|
-
methods = ['entry_id', 'scl', 'definition', 'seq', 'features', 'prob', 'pred', 'k', 'raw']
|
544
|
-
methods.each do |method|
|
545
|
-
puts "\n ==> p report.#{method}"
|
546
|
-
p eval("report.#{method}")
|
547
|
-
end
|
548
|
-
end
|