bio 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +2105 -3728
- data/KNOWN_ISSUES.rdoc +35 -3
- data/README.rdoc +8 -2
- data/RELEASE_NOTES.rdoc +166 -0
- data/bin/bioruby +4 -1
- data/bioruby.gemspec +146 -1
- data/bioruby.gemspec.erb +3 -1
- data/doc/ChangeLog-before-1.3.1 +3961 -0
- data/doc/Tutorial.rd +154 -22
- data/doc/Tutorial.rd.html +125 -68
- data/lib/bio.rb +21 -6
- data/lib/bio/appl/bl2seq/report.rb +11 -202
- data/lib/bio/appl/blast/format0.rb +0 -193
- data/lib/bio/appl/blast/report.rb +2 -147
- data/lib/bio/appl/blast/wublast.rb +0 -208
- data/lib/bio/appl/fasta.rb +4 -19
- data/lib/bio/appl/fasta/format10.rb +0 -14
- data/lib/bio/appl/genscan/report.rb +0 -176
- data/lib/bio/appl/hmmer.rb +1 -15
- data/lib/bio/appl/hmmer/report.rb +0 -100
- data/lib/bio/appl/meme/mast.rb +156 -0
- data/lib/bio/appl/meme/mast/report.rb +91 -0
- data/lib/bio/appl/meme/motif.rb +48 -0
- data/lib/bio/appl/psort.rb +0 -111
- data/lib/bio/appl/psort/report.rb +1 -45
- data/lib/bio/appl/pts1.rb +2 -4
- data/lib/bio/appl/sosui/report.rb +5 -54
- data/lib/bio/appl/targetp/report.rb +1 -104
- data/lib/bio/appl/tmhmm/report.rb +0 -36
- data/lib/bio/command.rb +94 -10
- data/lib/bio/data/aa.rb +1 -77
- data/lib/bio/data/codontable.rb +1 -95
- data/lib/bio/data/na.rb +1 -26
- data/lib/bio/db/aaindex.rb +1 -38
- data/lib/bio/db/fasta.rb +1 -134
- data/lib/bio/db/fasta/format_qual.rb +204 -0
- data/lib/bio/db/fasta/qual.rb +102 -0
- data/lib/bio/db/fastq.rb +645 -0
- data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
- data/lib/bio/db/fastq/format_fastq.rb +175 -0
- data/lib/bio/db/genbank/genbank.rb +1 -86
- data/lib/bio/db/gff.rb +0 -17
- data/lib/bio/db/go.rb +4 -72
- data/lib/bio/db/kegg/common.rb +112 -0
- data/lib/bio/db/kegg/compound.rb +29 -20
- data/lib/bio/db/kegg/drug.rb +74 -34
- data/lib/bio/db/kegg/enzyme.rb +26 -5
- data/lib/bio/db/kegg/genes.rb +128 -15
- data/lib/bio/db/kegg/genome.rb +3 -41
- data/lib/bio/db/kegg/glycan.rb +19 -24
- data/lib/bio/db/kegg/orthology.rb +16 -56
- data/lib/bio/db/kegg/reaction.rb +81 -28
- data/lib/bio/db/kegg/taxonomy.rb +1 -52
- data/lib/bio/db/litdb.rb +1 -16
- data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
- data/lib/bio/db/prosite.rb +2 -95
- data/lib/bio/db/rebase.rb +5 -6
- data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
- data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
- data/lib/bio/io/das.rb +0 -44
- data/lib/bio/io/ddbjxml.rb +1 -181
- data/lib/bio/io/flatfile.rb +1 -7
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/io/keggapi.rb +0 -442
- data/lib/bio/io/ncbirest.rb +130 -132
- data/lib/bio/io/ncbisoap.rb +2 -1
- data/lib/bio/io/pubmed.rb +0 -88
- data/lib/bio/location.rb +0 -73
- data/lib/bio/pathway.rb +0 -171
- data/lib/bio/sequence.rb +18 -1
- data/lib/bio/sequence/adapter.rb +3 -0
- data/lib/bio/sequence/format.rb +16 -0
- data/lib/bio/sequence/quality_score.rb +205 -0
- data/lib/bio/tree.rb +70 -5
- data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
- data/lib/bio/util/sirna.rb +1 -23
- data/lib/bio/version.rb +1 -1
- data/sample/demo_aaindex.rb +67 -0
- data/sample/demo_aminoacid.rb +101 -0
- data/sample/demo_bl2seq_report.rb +220 -0
- data/sample/demo_blast_report.rb +285 -0
- data/sample/demo_codontable.rb +119 -0
- data/sample/demo_das.rb +105 -0
- data/sample/demo_ddbjxml.rb +212 -0
- data/sample/demo_fasta_remote.rb +51 -0
- data/sample/demo_fastaformat.rb +105 -0
- data/sample/demo_genbank.rb +132 -0
- data/sample/demo_genscan_report.rb +202 -0
- data/sample/demo_gff1.rb +49 -0
- data/sample/demo_go.rb +98 -0
- data/sample/demo_hmmer_report.rb +149 -0
- data/sample/demo_kegg_compound.rb +57 -0
- data/sample/demo_kegg_drug.rb +65 -0
- data/sample/demo_kegg_genome.rb +74 -0
- data/sample/demo_kegg_glycan.rb +72 -0
- data/sample/demo_kegg_orthology.rb +62 -0
- data/sample/demo_kegg_reaction.rb +66 -0
- data/sample/demo_kegg_taxonomy.rb +92 -0
- data/sample/demo_keggapi.rb +502 -0
- data/sample/demo_litdb.rb +42 -0
- data/sample/demo_locations.rb +99 -0
- data/sample/demo_ncbi_rest.rb +130 -0
- data/sample/demo_nucleicacid.rb +49 -0
- data/sample/demo_pathway.rb +196 -0
- data/sample/demo_prosite.rb +120 -0
- data/sample/demo_psort.rb +138 -0
- data/sample/demo_psort_report.rb +70 -0
- data/sample/demo_pubmed.rb +118 -0
- data/sample/demo_sirna.rb +63 -0
- data/sample/demo_sosui_report.rb +89 -0
- data/sample/demo_targetp_report.rb +135 -0
- data/sample/demo_tmhmm_report.rb +68 -0
- data/sample/pmfetch.rb +13 -4
- data/sample/pmsearch.rb +15 -4
- data/sample/test_phyloxml_big.rb +205 -0
- data/test/bioruby_test_helper.rb +61 -0
- data/test/data/KEGG/1.1.1.1.enzyme +935 -0
- data/test/data/KEGG/C00025.compound +102 -0
- data/test/data/KEGG/D00063.drug +104 -0
- data/test/data/KEGG/G00024.glycan +47 -0
- data/test/data/KEGG/G01366.glycan +18 -0
- data/test/data/KEGG/K02338.orthology +902 -0
- data/test/data/KEGG/R00006.reaction +14 -0
- data/test/data/fastq/README.txt +109 -0
- data/test/data/fastq/error_diff_ids.fastq +20 -0
- data/test/data/fastq/error_double_qual.fastq +22 -0
- data/test/data/fastq/error_double_seq.fastq +22 -0
- data/test/data/fastq/error_long_qual.fastq +20 -0
- data/test/data/fastq/error_no_qual.fastq +20 -0
- data/test/data/fastq/error_qual_del.fastq +20 -0
- data/test/data/fastq/error_qual_escape.fastq +20 -0
- data/test/data/fastq/error_qual_null.fastq +0 -0
- data/test/data/fastq/error_qual_space.fastq +21 -0
- data/test/data/fastq/error_qual_tab.fastq +21 -0
- data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
- data/test/data/fastq/error_qual_vtab.fastq +20 -0
- data/test/data/fastq/error_short_qual.fastq +20 -0
- data/test/data/fastq/error_spaces.fastq +20 -0
- data/test/data/fastq/error_tabs.fastq +21 -0
- data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
- data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
- data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_title.fastq +17 -0
- data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
- data/test/data/fastq/longreads_as_illumina.fastq +40 -0
- data/test/data/fastq/longreads_as_sanger.fastq +40 -0
- data/test/data/fastq/longreads_as_solexa.fastq +40 -0
- data/test/data/fastq/longreads_original_sanger.fastq +120 -0
- data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
- data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
- data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
- data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
- data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
- data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
- data/test/data/meme/db +0 -0
- data/test/data/meme/mast +0 -0
- data/test/data/meme/mast.out +13 -0
- data/test/data/meme/meme.out +3 -0
- data/test/data/phyloxml/apaf.xml +666 -0
- data/test/data/phyloxml/bcl_2.xml +2097 -0
- data/test/data/phyloxml/made_up.xml +144 -0
- data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
- data/test/data/phyloxml/phyloxml_examples.xml +415 -0
- data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
- data/test/functional/bio/appl/test_pts1.rb +7 -5
- data/test/functional/bio/io/test_ensembl.rb +4 -3
- data/test/functional/bio/io/test_pubmed.rb +9 -3
- data/test/functional/bio/io/test_soapwsdl.rb +5 -4
- data/test/functional/bio/io/test_togows.rb +5 -4
- data/test/functional/bio/sequence/test_output_embl.rb +6 -4
- data/test/functional/bio/test_command.rb +54 -5
- data/test/runner.rb +5 -3
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
- data/test/unit/bio/appl/blast/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
- data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
- data/test/unit/bio/appl/genscan/test_report.rb +8 -9
- data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
- data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
- data/test/unit/bio/appl/mafft/test_report.rb +6 -5
- data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
- data/test/unit/bio/appl/meme/test_mast.rb +103 -0
- data/test/unit/bio/appl/meme/test_motif.rb +38 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
- data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
- data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
- data/test/unit/bio/appl/sim4/test_report.rb +5 -4
- data/test/unit/bio/appl/sosui/test_report.rb +6 -5
- data/test/unit/bio/appl/targetp/test_report.rb +5 -3
- data/test/unit/bio/appl/test_blast.rb +5 -4
- data/test/unit/bio/appl/test_fasta.rb +4 -2
- data/test/unit/bio/appl/test_pts1.rb +4 -2
- data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
- data/test/unit/bio/data/test_aa.rb +5 -3
- data/test/unit/bio/data/test_codontable.rb +5 -4
- data/test/unit/bio/data/test_na.rb +5 -3
- data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
- data/test/unit/bio/db/embl/test_common.rb +4 -2
- data/test/unit/bio/db/embl/test_embl.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
- data/test/unit/bio/db/embl/test_sptr.rb +6 -8
- data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
- data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
- data/test/unit/bio/db/kegg/test_compound.rb +146 -0
- data/test/unit/bio/db/kegg/test_drug.rb +194 -0
- data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
- data/test/unit/bio/db/kegg/test_genes.rb +32 -4
- data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
- data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
- data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
- data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
- data/test/unit/bio/db/test_aaindex.rb +6 -6
- data/test/unit/bio/db/test_fasta.rb +5 -46
- data/test/unit/bio/db/test_fastq.rb +829 -0
- data/test/unit/bio/db/test_gff.rb +4 -2
- data/test/unit/bio/db/test_lasergene.rb +7 -5
- data/test/unit/bio/db/test_medline.rb +4 -2
- data/test/unit/bio/db/test_newick.rb +6 -6
- data/test/unit/bio/db/test_nexus.rb +4 -2
- data/test/unit/bio/db/test_phyloxml.rb +769 -0
- data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
- data/test/unit/bio/db/test_prosite.rb +6 -5
- data/test/unit/bio/db/test_qual.rb +63 -0
- data/test/unit/bio/db/test_rebase.rb +5 -3
- data/test/unit/bio/db/test_soft.rb +7 -6
- data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
- data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
- data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
- data/test/unit/bio/io/test_ddbjxml.rb +4 -3
- data/test/unit/bio/io/test_ensembl.rb +5 -3
- data/test/unit/bio/io/test_fastacmd.rb +4 -3
- data/test/unit/bio/io/test_flatfile.rb +6 -5
- data/test/unit/bio/io/test_soapwsdl.rb +4 -3
- data/test/unit/bio/io/test_togows.rb +4 -2
- data/test/unit/bio/sequence/test_aa.rb +5 -3
- data/test/unit/bio/sequence/test_common.rb +4 -2
- data/test/unit/bio/sequence/test_compat.rb +4 -2
- data/test/unit/bio/sequence/test_dblink.rb +5 -3
- data/test/unit/bio/sequence/test_na.rb +4 -2
- data/test/unit/bio/sequence/test_quality_score.rb +330 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
- data/test/unit/bio/test_alignment.rb +5 -3
- data/test/unit/bio/test_command.rb +4 -3
- data/test/unit/bio/test_db.rb +5 -3
- data/test/unit/bio/test_feature.rb +4 -2
- data/test/unit/bio/test_location.rb +4 -2
- data/test/unit/bio/test_map.rb +5 -3
- data/test/unit/bio/test_pathway.rb +4 -2
- data/test/unit/bio/test_reference.rb +4 -2
- data/test/unit/bio/test_sequence.rb +5 -3
- data/test/unit/bio/test_shell.rb +5 -3
- data/test/unit/bio/test_tree.rb +6 -6
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
- data/test/unit/bio/util/test_color_scheme.rb +5 -3
- data/test/unit/bio/util/test_contingency_table.rb +5 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
- data/test/unit/bio/util/test_sirna.rb +6 -4
- metadata +147 -2
data/lib/bio/appl/hmmer.rb
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
|
6
6
|
# License:: The Ruby License
|
|
7
7
|
#
|
|
8
|
-
# $Id
|
|
8
|
+
# $Id:$
|
|
9
9
|
#
|
|
10
10
|
|
|
11
11
|
require 'bio/command'
|
|
@@ -110,17 +110,3 @@ end # class HMMER
|
|
|
110
110
|
|
|
111
111
|
end # module Bio
|
|
112
112
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
if __FILE__ == $0
|
|
116
|
-
|
|
117
|
-
require 'pp'
|
|
118
|
-
|
|
119
|
-
program = ARGV.shift # hmmsearch, hmmpfam
|
|
120
|
-
hmmfile = ARGV.shift
|
|
121
|
-
seqfile = ARGV.shift
|
|
122
|
-
|
|
123
|
-
factory = Bio::HMMER.new(program, hmmfile, seqfile)
|
|
124
|
-
pp factory.query
|
|
125
|
-
|
|
126
|
-
end
|
|
@@ -566,7 +566,6 @@ end # class HMMER
|
|
|
566
566
|
end # module Bio
|
|
567
567
|
|
|
568
568
|
|
|
569
|
-
if __FILE__ == $0
|
|
570
569
|
|
|
571
570
|
=begin
|
|
572
571
|
|
|
@@ -582,102 +581,3 @@ if __FILE__ == $0
|
|
|
582
581
|
|
|
583
582
|
=end
|
|
584
583
|
|
|
585
|
-
begin
|
|
586
|
-
require 'pp'
|
|
587
|
-
alias p pp
|
|
588
|
-
rescue LoadError
|
|
589
|
-
end
|
|
590
|
-
|
|
591
|
-
rep = Bio::HMMER::Report.new(ARGF.read)
|
|
592
|
-
p rep
|
|
593
|
-
|
|
594
|
-
indent = 18
|
|
595
|
-
|
|
596
|
-
puts "### hmmer result"
|
|
597
|
-
print "name : ".rjust(indent)
|
|
598
|
-
p rep.program['name']
|
|
599
|
-
print "version : ".rjust(indent)
|
|
600
|
-
p rep.program['version']
|
|
601
|
-
print "copyright : ".rjust(indent)
|
|
602
|
-
p rep.program['copyright']
|
|
603
|
-
print "license : ".rjust(indent)
|
|
604
|
-
p rep.program['license']
|
|
605
|
-
|
|
606
|
-
print "HMM file : ".rjust(indent)
|
|
607
|
-
p rep.parameter['HMM file']
|
|
608
|
-
print "Sequence file : ".rjust(indent)
|
|
609
|
-
p rep.parameter['Sequence file']
|
|
610
|
-
|
|
611
|
-
print "Query sequence : ".rjust(indent)
|
|
612
|
-
p rep.query_info['Query sequence']
|
|
613
|
-
print "Accession : ".rjust(indent)
|
|
614
|
-
p rep.query_info['Accession']
|
|
615
|
-
print "Description : ".rjust(indent)
|
|
616
|
-
p rep.query_info['Description']
|
|
617
|
-
|
|
618
|
-
rep.each do |hit|
|
|
619
|
-
puts "## each hit"
|
|
620
|
-
print "accession : ".rjust(indent)
|
|
621
|
-
p [ hit.accession, hit.target_id, hit.hit_id, hit.entry_id ]
|
|
622
|
-
print "description : ".rjust(indent)
|
|
623
|
-
p [ hit.description, hit.definition ]
|
|
624
|
-
print "target_def : ".rjust(indent)
|
|
625
|
-
p hit.target_def
|
|
626
|
-
print "score : ".rjust(indent)
|
|
627
|
-
p [ hit.score, hit.bit_score ]
|
|
628
|
-
print "evalue : ".rjust(indent)
|
|
629
|
-
p hit.evalue
|
|
630
|
-
print "num : ".rjust(indent)
|
|
631
|
-
p hit.num
|
|
632
|
-
|
|
633
|
-
hit.each do |hsp|
|
|
634
|
-
puts "## each hsp"
|
|
635
|
-
print "accession : ".rjust(indent)
|
|
636
|
-
p [ hsp.accession, hsp.target_id ]
|
|
637
|
-
print "domain : ".rjust(indent)
|
|
638
|
-
p hsp.domain
|
|
639
|
-
print "seq_f : ".rjust(indent)
|
|
640
|
-
p hsp.seq_f
|
|
641
|
-
print "seq_t : ".rjust(indent)
|
|
642
|
-
p hsp.seq_t
|
|
643
|
-
print "seq_ft : ".rjust(indent)
|
|
644
|
-
p hsp.seq_ft
|
|
645
|
-
print "hmm_f : ".rjust(indent)
|
|
646
|
-
p hsp.hmm_f
|
|
647
|
-
print "hmm_t : ".rjust(indent)
|
|
648
|
-
p hsp.hmm_t
|
|
649
|
-
print "hmm_ft : ".rjust(indent)
|
|
650
|
-
p hsp.hmm_ft
|
|
651
|
-
print "score : ".rjust(indent)
|
|
652
|
-
p [ hsp.score, hsp.bit_score ]
|
|
653
|
-
print "evalue : ".rjust(indent)
|
|
654
|
-
p hsp.evalue
|
|
655
|
-
print "midline : ".rjust(indent)
|
|
656
|
-
p hsp.midline
|
|
657
|
-
print "hmmseq : ".rjust(indent)
|
|
658
|
-
p hsp.hmmseq
|
|
659
|
-
print "flatseq : ".rjust(indent)
|
|
660
|
-
p hsp.flatseq
|
|
661
|
-
print "query_frame : ".rjust(indent)
|
|
662
|
-
p hsp.query_frame
|
|
663
|
-
print "target_frame : ".rjust(indent)
|
|
664
|
-
p hsp.target_frame
|
|
665
|
-
|
|
666
|
-
print "query_seq : ".rjust(indent)
|
|
667
|
-
p hsp.query_seq # hmmseq, flatseq
|
|
668
|
-
print "target_seq : ".rjust(indent)
|
|
669
|
-
p hsp.target_seq # flatseq, hmmseq
|
|
670
|
-
print "target_from : ".rjust(indent)
|
|
671
|
-
p hsp.target_from # seq_f, hmm_f
|
|
672
|
-
print "target_to : ".rjust(indent)
|
|
673
|
-
p hsp.target_to # seq_t, hmm_t
|
|
674
|
-
print "query_from : ".rjust(indent)
|
|
675
|
-
p hsp.query_from # hmm_f, seq_f
|
|
676
|
-
print "query_to : ".rjust(indent)
|
|
677
|
-
p hsp.query_to # hmm_t, seq_t
|
|
678
|
-
end
|
|
679
|
-
end
|
|
680
|
-
|
|
681
|
-
end
|
|
682
|
-
|
|
683
|
-
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/meme/mast.rb - Wrapper for running MAST program
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2008 Adam Kraut <adamnkraut@gmail.com>,
|
|
5
|
+
#
|
|
6
|
+
# License:: The Ruby License
|
|
7
|
+
#
|
|
8
|
+
# == Description
|
|
9
|
+
#
|
|
10
|
+
# This file contains a wrapper for running the MAST tool for searching sequence databases using motifs
|
|
11
|
+
#
|
|
12
|
+
# == References
|
|
13
|
+
#
|
|
14
|
+
# * http://meme.sdsc.edu/meme/intro.html
|
|
15
|
+
#
|
|
16
|
+
require "bio/command"
|
|
17
|
+
|
|
18
|
+
module Bio
|
|
19
|
+
module Meme
|
|
20
|
+
|
|
21
|
+
# == Description
|
|
22
|
+
#
|
|
23
|
+
# Bio::Meme::Mast is a wrapper for searching a database using sequence motifs. The code
|
|
24
|
+
# will read options from a Hash and run the program. Parsing of the output is provided by
|
|
25
|
+
# Bio::Meme::Mast::Report. Before running, options[:mfile] and options[:d] must be set
|
|
26
|
+
# in the constructor or Mast.config(options = {})
|
|
27
|
+
#
|
|
28
|
+
# == Usage
|
|
29
|
+
#
|
|
30
|
+
# mast = Mast.new('/path/to/mast')
|
|
31
|
+
# or with options
|
|
32
|
+
# mast = Mast.new('/path/to/mast', {:mfile => 'meme.out', :d => '/shared/db/nr'})
|
|
33
|
+
#
|
|
34
|
+
# report = Mast::Report.new(mast.run)
|
|
35
|
+
# report.each do |motif|
|
|
36
|
+
# puts motif.length
|
|
37
|
+
# end
|
|
38
|
+
#
|
|
39
|
+
#
|
|
40
|
+
class Mast
|
|
41
|
+
|
|
42
|
+
include Bio::Command
|
|
43
|
+
|
|
44
|
+
autoload :Report, 'bio/appl/meme/mast/report'
|
|
45
|
+
|
|
46
|
+
# A Hash of options for Mast
|
|
47
|
+
attr_accessor :options
|
|
48
|
+
|
|
49
|
+
DEFAULT_OPTIONS = {
|
|
50
|
+
# required
|
|
51
|
+
:mfile => nil,
|
|
52
|
+
:d => nil,
|
|
53
|
+
# optional
|
|
54
|
+
:stdin => nil, # may not work as expected
|
|
55
|
+
:count => nil,
|
|
56
|
+
:alphabet => nil,
|
|
57
|
+
:stdout => true,
|
|
58
|
+
:text => false,
|
|
59
|
+
:sep => false,
|
|
60
|
+
:norc => false,
|
|
61
|
+
:dna => false,
|
|
62
|
+
:comp => false,
|
|
63
|
+
:rank => nil,
|
|
64
|
+
:smax => nil,
|
|
65
|
+
:ev => nil,
|
|
66
|
+
:mt => nil,
|
|
67
|
+
:w => false,
|
|
68
|
+
:bfile => nil,
|
|
69
|
+
:seqp => false,
|
|
70
|
+
:mf => nil,
|
|
71
|
+
:df => nil,
|
|
72
|
+
:minseqs => nil,
|
|
73
|
+
:mev => nil,
|
|
74
|
+
:m => nil,
|
|
75
|
+
:diag => nil,
|
|
76
|
+
:best => false,
|
|
77
|
+
:remcorr => false,
|
|
78
|
+
:brief => false,
|
|
79
|
+
:b => false,
|
|
80
|
+
:nostatus => true,
|
|
81
|
+
:hit_list => true,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
# The command line String to be executed
|
|
85
|
+
attr_reader :cmd
|
|
86
|
+
|
|
87
|
+
# Create a mast instance
|
|
88
|
+
#
|
|
89
|
+
# m = Mast.new('/usr/local/bin/mast')
|
|
90
|
+
# ---
|
|
91
|
+
# *Arguments*:
|
|
92
|
+
# * (required) _mast_location_: String
|
|
93
|
+
# *Raises*:: ArgumentError if mast program is not found
|
|
94
|
+
# *Returns*:: a Bio::Meme::Mast object
|
|
95
|
+
|
|
96
|
+
def initialize(mast_location, options = {})
|
|
97
|
+
unless File.exists?(mast_location)
|
|
98
|
+
raise ArgumentError.new("mast: command not found : #{mast_location}")
|
|
99
|
+
end
|
|
100
|
+
@binary = mast_location
|
|
101
|
+
options.empty? ? config(DEFAULT_OPTIONS) : config(options)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Builds the command line string
|
|
105
|
+
# any options passed in will be merged with DEFAULT_OPTIONS
|
|
106
|
+
# Mast usage: mast <mfile> <opts> <flags>
|
|
107
|
+
#
|
|
108
|
+
# mast.config({:mfile => "meme.out", :d => "/path/to/fasta/db"})
|
|
109
|
+
# ---
|
|
110
|
+
# *Arguments*:
|
|
111
|
+
# * (required) _options_: Hash (see DEFAULT_OPTIONS)
|
|
112
|
+
# *Returns*:: the command line string
|
|
113
|
+
|
|
114
|
+
def config(options)
|
|
115
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
|
116
|
+
mfile, opts, flags = "", "", ""
|
|
117
|
+
@options.each_pair do |opt, val|
|
|
118
|
+
if val.nil? or val == false
|
|
119
|
+
next
|
|
120
|
+
elsif opt == :mfile
|
|
121
|
+
mfile = val
|
|
122
|
+
elsif val == true
|
|
123
|
+
flags << " -#{opt}"
|
|
124
|
+
else
|
|
125
|
+
opts << " -#{opt} #{val}"
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
@cmd = "#{@binary} #{mfile + opts + flags}"
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Checks if input/database files exist and options are valid
|
|
132
|
+
# *Raises*:: ArgumentError if the motifs file does not exist
|
|
133
|
+
# *Raises*:: ArgumentError if the database file does not exist
|
|
134
|
+
# *Raises*:: ArgumentError if there is an invalid option
|
|
135
|
+
|
|
136
|
+
def check_options
|
|
137
|
+
@options.each_key do |k|
|
|
138
|
+
raise ArgumentError.new("Invalid option: #{k}") unless DEFAULT_OPTIONS.has_key?(k)
|
|
139
|
+
end
|
|
140
|
+
raise ArgumentError.new("Motif file not found: #{@options[:mfile]}") if @options[:mfile].nil? or !File.exists?(@options[:mfile])
|
|
141
|
+
raise ArgumentError.new("Database not found: #{@options[:d]}") if @options[:d].nil? or !File.exists?(@options[:d])
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Run the mast program
|
|
145
|
+
# ---
|
|
146
|
+
# *Returns*:: Bio::Meme::Mast::Report object
|
|
147
|
+
|
|
148
|
+
def run
|
|
149
|
+
check_options
|
|
150
|
+
call_command(@cmd) {|io| @output = io.read }
|
|
151
|
+
Report.new(@output)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
end # End class Mast
|
|
155
|
+
end # End module Meme
|
|
156
|
+
end # End module Bio
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/meme/mast/report.rb - Mast output parser class
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2008, Adam Kraut <adamnkraut@gmail.com>,
|
|
5
|
+
|
|
6
|
+
#
|
|
7
|
+
# License:: The Ruby License
|
|
8
|
+
#
|
|
9
|
+
# == Description
|
|
10
|
+
#
|
|
11
|
+
# This file contains a class to parse Mast output
|
|
12
|
+
#
|
|
13
|
+
# == Examples
|
|
14
|
+
#
|
|
15
|
+
# == References
|
|
16
|
+
#
|
|
17
|
+
# * http://meme.sdsc.edu/meme/intro.html
|
|
18
|
+
|
|
19
|
+
require "bio/appl/meme/mast"
|
|
20
|
+
require "bio/appl/meme/motif"
|
|
21
|
+
|
|
22
|
+
module Bio
|
|
23
|
+
module Meme
|
|
24
|
+
class Mast
|
|
25
|
+
|
|
26
|
+
# == Description
|
|
27
|
+
#
|
|
28
|
+
# A class to parse the output from Mast
|
|
29
|
+
#
|
|
30
|
+
# WARNING: Currently support is only for -hit_list (machine readable) format
|
|
31
|
+
# HTML (default) output is not supported
|
|
32
|
+
#
|
|
33
|
+
# == Examples
|
|
34
|
+
#
|
|
35
|
+
|
|
36
|
+
class Report
|
|
37
|
+
|
|
38
|
+
attr_reader :motifs
|
|
39
|
+
|
|
40
|
+
def initialize(mast_hitlist)
|
|
41
|
+
@motifs = parse_hit_list(mast_hitlist)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Iterates each motif (Bio::Meme::Motif)
|
|
45
|
+
def each
|
|
46
|
+
@motifs.each do |motif|
|
|
47
|
+
yield motif
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
alias :each_motif :each
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
# Each line corresponds to one motif occurrence in one sequence.
|
|
56
|
+
# The format of the hit lines is
|
|
57
|
+
# [<sequence_name> <strand><motif> <start> <end> <p-value>]+
|
|
58
|
+
# where
|
|
59
|
+
# <sequence_name> is the name of the sequence containing the hit
|
|
60
|
+
# <strand> is the strand (+ or - for DNA, blank for protein),
|
|
61
|
+
# <motif> is the motif number,
|
|
62
|
+
# <start> is the starting position of the hit,
|
|
63
|
+
# <end> is the ending position of the hit, and
|
|
64
|
+
# <p-value> is the position p-value of the hit.
|
|
65
|
+
def parse_hit_list(data)
|
|
66
|
+
motifs = []
|
|
67
|
+
data.each_line do |line|
|
|
68
|
+
|
|
69
|
+
line.chomp!
|
|
70
|
+
|
|
71
|
+
# skip comments
|
|
72
|
+
next if line =~ /^#/
|
|
73
|
+
|
|
74
|
+
fields = line.split(/\s/)
|
|
75
|
+
|
|
76
|
+
if fields.size == 5
|
|
77
|
+
motifs << Motif.new(fields[0], strand = nil, fields[1], fields[2], fields[3], fields[4])
|
|
78
|
+
elsif fields.size == 6
|
|
79
|
+
motifs << Motif.new(fields[0], fields[1], fields[2], fields[3], fields[4], fields[5])
|
|
80
|
+
else
|
|
81
|
+
raise RuntimeError.new("Could not parse mast output")
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
end
|
|
85
|
+
motifs
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
end # Result
|
|
89
|
+
end # Mast
|
|
90
|
+
end # Meme
|
|
91
|
+
end # Bio
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/meme/motif.rb - Class to represent a sequence motif
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2008 Adam Kraut <adamnkraut@gmail.com>,
|
|
5
|
+
#
|
|
6
|
+
# License:: The Ruby License
|
|
7
|
+
#
|
|
8
|
+
# == Description
|
|
9
|
+
#
|
|
10
|
+
# This file contains a minimal class to represent meme motifs
|
|
11
|
+
#
|
|
12
|
+
# == References
|
|
13
|
+
#
|
|
14
|
+
# * http://meme.sdsc.edu/meme/intro.html
|
|
15
|
+
#
|
|
16
|
+
module Bio
|
|
17
|
+
module Meme
|
|
18
|
+
|
|
19
|
+
# == Description
|
|
20
|
+
#
|
|
21
|
+
# This class minimally represents a sequence motif according to the MEME program
|
|
22
|
+
#
|
|
23
|
+
# TODO: integrate with Bio::Sequence class
|
|
24
|
+
# TODO: parse PSSM data
|
|
25
|
+
#
|
|
26
|
+
class Motif
|
|
27
|
+
attr_accessor :sequence_name, :strand, :motif, :start_pos, :end_pos, :pvalue
|
|
28
|
+
|
|
29
|
+
# Creates a new Bio::Meme::Motif object
|
|
30
|
+
# arguments are
|
|
31
|
+
def initialize(sequence_name, strand, motif, start_pos, end_pos, pvalue)
|
|
32
|
+
@sequence_name = sequence_name.to_s
|
|
33
|
+
@strand = strand.to_s
|
|
34
|
+
@motif = motif.to_i
|
|
35
|
+
@start_pos = start_pos.to_i
|
|
36
|
+
@end_pos = end_pos.to_i
|
|
37
|
+
@pvalue = pvalue.to_f
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Computes the motif length
|
|
41
|
+
def length
|
|
42
|
+
@end_pos - @start_pos
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
end
|
|
48
|
+
end
|
data/lib/bio/appl/psort.rb
CHANGED
|
@@ -435,114 +435,3 @@ require 'uri'
|
|
|
435
435
|
|
|
436
436
|
end # module Bio
|
|
437
437
|
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
if __FILE__ == $0
|
|
443
|
-
|
|
444
|
-
begin
|
|
445
|
-
require 'psort/report.rb'
|
|
446
|
-
rescue LoadError
|
|
447
|
-
end
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
seq = ">hoge mit
|
|
451
|
-
MALEPIDYTT RDEDDLDENE LLMKISNAAG SSRVNDNNDD LTFVENDKII
|
|
452
|
-
ARYSIQTSSK QQGKASTPPV EEAEEAAPQL PSRSSAAPPP PPRRATPEKK
|
|
453
|
-
DVKDLKSKFE GLAASEKEEE EMENKFAPPP KKSEPTIISP KPFSKPQEPV
|
|
454
|
-
FKGYHVQVTA HSREIDAEYL KIVRGSDPDT TWLIISPNAK KEYEPESTGS
|
|
455
|
-
KKSFTPSKSP APVSKKEPVK TPSPAPAAKI PKENPWATAE YDYDAAEDNE
|
|
456
|
-
NIEFVDDDWW LGELEKDGSK GLFPSNYVSL LPSRNVASGA PVQKEEPEQE
|
|
457
|
-
SFHDFLQLFD ETKVQYGLAR RKAKQNSGNA ETKAEAPKPE VPEDEPEGEP
|
|
458
|
-
DDWNEPELKE RDFDQAPLKP NQSSYKPIGK IDLQKVIAEE KAKEDPRLVQ
|
|
459
|
-
DYKKIGNPLP GMHIEADNEE EPEENDDDWD DDEDEAAQPP ANFAAVANNL
|
|
460
|
-
KPTAAGSKID DDKVIKGFRN EKSPAQLWAE VSPPGSDVEK IIIIGWCPDS
|
|
461
|
-
APLKTRASFA PSSDIANLKN ESKLKRDSEF NSFLGTTKPP SMTESSLKND
|
|
462
|
-
KAEEAEQPKT EIAPSLPSRN SIPAPKQEEA PEQAPEEEIE GN
|
|
463
|
-
"
|
|
464
|
-
Seq1 = ">hgoe
|
|
465
|
-
LTFVENDKII NI
|
|
466
|
-
"
|
|
467
|
-
|
|
468
|
-
puts "\n Bio::PSORT::PSORT"
|
|
469
|
-
|
|
470
|
-
puts "\n ==> p serv = Bio::PSORT::PSORT.imsut"
|
|
471
|
-
p serv = Bio::PSORT::PSORT1.imsut
|
|
472
|
-
|
|
473
|
-
puts "\n ==> p serv.class "
|
|
474
|
-
p serv.class
|
|
475
|
-
|
|
476
|
-
puts "\n ==> p serv.title = 'Query_title_splited_by_white space'"
|
|
477
|
-
p serv.title = 'Query_title_splited_by_white space'
|
|
478
|
-
|
|
479
|
-
puts "\n ==> p serv.exec(seq, false) "
|
|
480
|
-
p serv.exec(seq, false)
|
|
481
|
-
|
|
482
|
-
puts "\n ==> p serv.exec(seq) "
|
|
483
|
-
p serv.exec(seq)
|
|
484
|
-
|
|
485
|
-
puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) "
|
|
486
|
-
p report = serv.exec(Bio::FastaFormat.new(seq))
|
|
487
|
-
|
|
488
|
-
puts "\n ==> p report.class"
|
|
489
|
-
p report.class
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) "
|
|
493
|
-
p report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
|
|
494
|
-
|
|
495
|
-
puts "\n ==> p report_raw.class"
|
|
496
|
-
p report_raw.class
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
puts "\n ==> p report.methods"
|
|
500
|
-
p report.methods
|
|
501
|
-
|
|
502
|
-
methods = ['entry_id', 'origin', 'title', 'sequence','result_info',
|
|
503
|
-
'reasoning', 'final_result', 'raw']
|
|
504
|
-
methods.each do |method|
|
|
505
|
-
puts "\n ==> p report.#{method}"
|
|
506
|
-
p eval("report.#{method}")
|
|
507
|
-
end
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
puts "\n Bio::PSORT::PSORT2"
|
|
512
|
-
|
|
513
|
-
puts "\n ==> p serv = Bio::PSORT::PSORT2.imsut"
|
|
514
|
-
p serv = Bio::PSORT::PSORT2.imsut
|
|
515
|
-
|
|
516
|
-
puts "\n ==> p serv.class "
|
|
517
|
-
p serv.class
|
|
518
|
-
|
|
519
|
-
puts "\n ==> p seq "
|
|
520
|
-
p seq
|
|
521
|
-
|
|
522
|
-
puts "\n ==> p serv.title = 'Query_title_splited_by_white space'"
|
|
523
|
-
p serv.title = 'Query_title_splited_by_white space'
|
|
524
|
-
|
|
525
|
-
puts "\n ==> p serv.exec(seq) # parsed report"
|
|
526
|
-
p serv.exec(seq)
|
|
527
|
-
|
|
528
|
-
puts "\n ==> p report = serv.exec(Bio::FastaFormat.new(seq)) # parsed report"
|
|
529
|
-
p report = serv.exec(Bio::FastaFormat.new(seq))
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
puts "\n ==> p serv.exec(seq, false) # report in plain text"
|
|
534
|
-
p serv.exec(seq, false)
|
|
535
|
-
|
|
536
|
-
puts "\n ==> p report_raw = serv.exec(Bio::FastaFormat.new(seq), false) # report in plain text"
|
|
537
|
-
p report_raw = serv.exec(Bio::FastaFormat.new(seq), false)
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
puts "\n ==> p report.methods"
|
|
541
|
-
p report.methods
|
|
542
|
-
|
|
543
|
-
methods = ['entry_id', 'scl', 'definition', 'seq', 'features', 'prob', 'pred', 'k', 'raw']
|
|
544
|
-
methods.each do |method|
|
|
545
|
-
puts "\n ==> p report.#{method}"
|
|
546
|
-
p eval("report.#{method}")
|
|
547
|
-
end
|
|
548
|
-
end
|