bio 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +2105 -3728
- data/KNOWN_ISSUES.rdoc +35 -3
- data/README.rdoc +8 -2
- data/RELEASE_NOTES.rdoc +166 -0
- data/bin/bioruby +4 -1
- data/bioruby.gemspec +146 -1
- data/bioruby.gemspec.erb +3 -1
- data/doc/ChangeLog-before-1.3.1 +3961 -0
- data/doc/Tutorial.rd +154 -22
- data/doc/Tutorial.rd.html +125 -68
- data/lib/bio.rb +21 -6
- data/lib/bio/appl/bl2seq/report.rb +11 -202
- data/lib/bio/appl/blast/format0.rb +0 -193
- data/lib/bio/appl/blast/report.rb +2 -147
- data/lib/bio/appl/blast/wublast.rb +0 -208
- data/lib/bio/appl/fasta.rb +4 -19
- data/lib/bio/appl/fasta/format10.rb +0 -14
- data/lib/bio/appl/genscan/report.rb +0 -176
- data/lib/bio/appl/hmmer.rb +1 -15
- data/lib/bio/appl/hmmer/report.rb +0 -100
- data/lib/bio/appl/meme/mast.rb +156 -0
- data/lib/bio/appl/meme/mast/report.rb +91 -0
- data/lib/bio/appl/meme/motif.rb +48 -0
- data/lib/bio/appl/psort.rb +0 -111
- data/lib/bio/appl/psort/report.rb +1 -45
- data/lib/bio/appl/pts1.rb +2 -4
- data/lib/bio/appl/sosui/report.rb +5 -54
- data/lib/bio/appl/targetp/report.rb +1 -104
- data/lib/bio/appl/tmhmm/report.rb +0 -36
- data/lib/bio/command.rb +94 -10
- data/lib/bio/data/aa.rb +1 -77
- data/lib/bio/data/codontable.rb +1 -95
- data/lib/bio/data/na.rb +1 -26
- data/lib/bio/db/aaindex.rb +1 -38
- data/lib/bio/db/fasta.rb +1 -134
- data/lib/bio/db/fasta/format_qual.rb +204 -0
- data/lib/bio/db/fasta/qual.rb +102 -0
- data/lib/bio/db/fastq.rb +645 -0
- data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
- data/lib/bio/db/fastq/format_fastq.rb +175 -0
- data/lib/bio/db/genbank/genbank.rb +1 -86
- data/lib/bio/db/gff.rb +0 -17
- data/lib/bio/db/go.rb +4 -72
- data/lib/bio/db/kegg/common.rb +112 -0
- data/lib/bio/db/kegg/compound.rb +29 -20
- data/lib/bio/db/kegg/drug.rb +74 -34
- data/lib/bio/db/kegg/enzyme.rb +26 -5
- data/lib/bio/db/kegg/genes.rb +128 -15
- data/lib/bio/db/kegg/genome.rb +3 -41
- data/lib/bio/db/kegg/glycan.rb +19 -24
- data/lib/bio/db/kegg/orthology.rb +16 -56
- data/lib/bio/db/kegg/reaction.rb +81 -28
- data/lib/bio/db/kegg/taxonomy.rb +1 -52
- data/lib/bio/db/litdb.rb +1 -16
- data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
- data/lib/bio/db/prosite.rb +2 -95
- data/lib/bio/db/rebase.rb +5 -6
- data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
- data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
- data/lib/bio/io/das.rb +0 -44
- data/lib/bio/io/ddbjxml.rb +1 -181
- data/lib/bio/io/flatfile.rb +1 -7
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/io/keggapi.rb +0 -442
- data/lib/bio/io/ncbirest.rb +130 -132
- data/lib/bio/io/ncbisoap.rb +2 -1
- data/lib/bio/io/pubmed.rb +0 -88
- data/lib/bio/location.rb +0 -73
- data/lib/bio/pathway.rb +0 -171
- data/lib/bio/sequence.rb +18 -1
- data/lib/bio/sequence/adapter.rb +3 -0
- data/lib/bio/sequence/format.rb +16 -0
- data/lib/bio/sequence/quality_score.rb +205 -0
- data/lib/bio/tree.rb +70 -5
- data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
- data/lib/bio/util/sirna.rb +1 -23
- data/lib/bio/version.rb +1 -1
- data/sample/demo_aaindex.rb +67 -0
- data/sample/demo_aminoacid.rb +101 -0
- data/sample/demo_bl2seq_report.rb +220 -0
- data/sample/demo_blast_report.rb +285 -0
- data/sample/demo_codontable.rb +119 -0
- data/sample/demo_das.rb +105 -0
- data/sample/demo_ddbjxml.rb +212 -0
- data/sample/demo_fasta_remote.rb +51 -0
- data/sample/demo_fastaformat.rb +105 -0
- data/sample/demo_genbank.rb +132 -0
- data/sample/demo_genscan_report.rb +202 -0
- data/sample/demo_gff1.rb +49 -0
- data/sample/demo_go.rb +98 -0
- data/sample/demo_hmmer_report.rb +149 -0
- data/sample/demo_kegg_compound.rb +57 -0
- data/sample/demo_kegg_drug.rb +65 -0
- data/sample/demo_kegg_genome.rb +74 -0
- data/sample/demo_kegg_glycan.rb +72 -0
- data/sample/demo_kegg_orthology.rb +62 -0
- data/sample/demo_kegg_reaction.rb +66 -0
- data/sample/demo_kegg_taxonomy.rb +92 -0
- data/sample/demo_keggapi.rb +502 -0
- data/sample/demo_litdb.rb +42 -0
- data/sample/demo_locations.rb +99 -0
- data/sample/demo_ncbi_rest.rb +130 -0
- data/sample/demo_nucleicacid.rb +49 -0
- data/sample/demo_pathway.rb +196 -0
- data/sample/demo_prosite.rb +120 -0
- data/sample/demo_psort.rb +138 -0
- data/sample/demo_psort_report.rb +70 -0
- data/sample/demo_pubmed.rb +118 -0
- data/sample/demo_sirna.rb +63 -0
- data/sample/demo_sosui_report.rb +89 -0
- data/sample/demo_targetp_report.rb +135 -0
- data/sample/demo_tmhmm_report.rb +68 -0
- data/sample/pmfetch.rb +13 -4
- data/sample/pmsearch.rb +15 -4
- data/sample/test_phyloxml_big.rb +205 -0
- data/test/bioruby_test_helper.rb +61 -0
- data/test/data/KEGG/1.1.1.1.enzyme +935 -0
- data/test/data/KEGG/C00025.compound +102 -0
- data/test/data/KEGG/D00063.drug +104 -0
- data/test/data/KEGG/G00024.glycan +47 -0
- data/test/data/KEGG/G01366.glycan +18 -0
- data/test/data/KEGG/K02338.orthology +902 -0
- data/test/data/KEGG/R00006.reaction +14 -0
- data/test/data/fastq/README.txt +109 -0
- data/test/data/fastq/error_diff_ids.fastq +20 -0
- data/test/data/fastq/error_double_qual.fastq +22 -0
- data/test/data/fastq/error_double_seq.fastq +22 -0
- data/test/data/fastq/error_long_qual.fastq +20 -0
- data/test/data/fastq/error_no_qual.fastq +20 -0
- data/test/data/fastq/error_qual_del.fastq +20 -0
- data/test/data/fastq/error_qual_escape.fastq +20 -0
- data/test/data/fastq/error_qual_null.fastq +0 -0
- data/test/data/fastq/error_qual_space.fastq +21 -0
- data/test/data/fastq/error_qual_tab.fastq +21 -0
- data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
- data/test/data/fastq/error_qual_vtab.fastq +20 -0
- data/test/data/fastq/error_short_qual.fastq +20 -0
- data/test/data/fastq/error_spaces.fastq +20 -0
- data/test/data/fastq/error_tabs.fastq +21 -0
- data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
- data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
- data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_title.fastq +17 -0
- data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
- data/test/data/fastq/longreads_as_illumina.fastq +40 -0
- data/test/data/fastq/longreads_as_sanger.fastq +40 -0
- data/test/data/fastq/longreads_as_solexa.fastq +40 -0
- data/test/data/fastq/longreads_original_sanger.fastq +120 -0
- data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
- data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
- data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
- data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
- data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
- data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
- data/test/data/meme/db +0 -0
- data/test/data/meme/mast +0 -0
- data/test/data/meme/mast.out +13 -0
- data/test/data/meme/meme.out +3 -0
- data/test/data/phyloxml/apaf.xml +666 -0
- data/test/data/phyloxml/bcl_2.xml +2097 -0
- data/test/data/phyloxml/made_up.xml +144 -0
- data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
- data/test/data/phyloxml/phyloxml_examples.xml +415 -0
- data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
- data/test/functional/bio/appl/test_pts1.rb +7 -5
- data/test/functional/bio/io/test_ensembl.rb +4 -3
- data/test/functional/bio/io/test_pubmed.rb +9 -3
- data/test/functional/bio/io/test_soapwsdl.rb +5 -4
- data/test/functional/bio/io/test_togows.rb +5 -4
- data/test/functional/bio/sequence/test_output_embl.rb +6 -4
- data/test/functional/bio/test_command.rb +54 -5
- data/test/runner.rb +5 -3
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
- data/test/unit/bio/appl/blast/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
- data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
- data/test/unit/bio/appl/genscan/test_report.rb +8 -9
- data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
- data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
- data/test/unit/bio/appl/mafft/test_report.rb +6 -5
- data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
- data/test/unit/bio/appl/meme/test_mast.rb +103 -0
- data/test/unit/bio/appl/meme/test_motif.rb +38 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
- data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
- data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
- data/test/unit/bio/appl/sim4/test_report.rb +5 -4
- data/test/unit/bio/appl/sosui/test_report.rb +6 -5
- data/test/unit/bio/appl/targetp/test_report.rb +5 -3
- data/test/unit/bio/appl/test_blast.rb +5 -4
- data/test/unit/bio/appl/test_fasta.rb +4 -2
- data/test/unit/bio/appl/test_pts1.rb +4 -2
- data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
- data/test/unit/bio/data/test_aa.rb +5 -3
- data/test/unit/bio/data/test_codontable.rb +5 -4
- data/test/unit/bio/data/test_na.rb +5 -3
- data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
- data/test/unit/bio/db/embl/test_common.rb +4 -2
- data/test/unit/bio/db/embl/test_embl.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
- data/test/unit/bio/db/embl/test_sptr.rb +6 -8
- data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
- data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
- data/test/unit/bio/db/kegg/test_compound.rb +146 -0
- data/test/unit/bio/db/kegg/test_drug.rb +194 -0
- data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
- data/test/unit/bio/db/kegg/test_genes.rb +32 -4
- data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
- data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
- data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
- data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
- data/test/unit/bio/db/test_aaindex.rb +6 -6
- data/test/unit/bio/db/test_fasta.rb +5 -46
- data/test/unit/bio/db/test_fastq.rb +829 -0
- data/test/unit/bio/db/test_gff.rb +4 -2
- data/test/unit/bio/db/test_lasergene.rb +7 -5
- data/test/unit/bio/db/test_medline.rb +4 -2
- data/test/unit/bio/db/test_newick.rb +6 -6
- data/test/unit/bio/db/test_nexus.rb +4 -2
- data/test/unit/bio/db/test_phyloxml.rb +769 -0
- data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
- data/test/unit/bio/db/test_prosite.rb +6 -5
- data/test/unit/bio/db/test_qual.rb +63 -0
- data/test/unit/bio/db/test_rebase.rb +5 -3
- data/test/unit/bio/db/test_soft.rb +7 -6
- data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
- data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
- data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
- data/test/unit/bio/io/test_ddbjxml.rb +4 -3
- data/test/unit/bio/io/test_ensembl.rb +5 -3
- data/test/unit/bio/io/test_fastacmd.rb +4 -3
- data/test/unit/bio/io/test_flatfile.rb +6 -5
- data/test/unit/bio/io/test_soapwsdl.rb +4 -3
- data/test/unit/bio/io/test_togows.rb +4 -2
- data/test/unit/bio/sequence/test_aa.rb +5 -3
- data/test/unit/bio/sequence/test_common.rb +4 -2
- data/test/unit/bio/sequence/test_compat.rb +4 -2
- data/test/unit/bio/sequence/test_dblink.rb +5 -3
- data/test/unit/bio/sequence/test_na.rb +4 -2
- data/test/unit/bio/sequence/test_quality_score.rb +330 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
- data/test/unit/bio/test_alignment.rb +5 -3
- data/test/unit/bio/test_command.rb +4 -3
- data/test/unit/bio/test_db.rb +5 -3
- data/test/unit/bio/test_feature.rb +4 -2
- data/test/unit/bio/test_location.rb +4 -2
- data/test/unit/bio/test_map.rb +5 -3
- data/test/unit/bio/test_pathway.rb +4 -2
- data/test/unit/bio/test_reference.rb +4 -2
- data/test/unit/bio/test_sequence.rb +5 -3
- data/test/unit/bio/test_shell.rb +5 -3
- data/test/unit/bio/test_tree.rb +6 -6
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
- data/test/unit/bio/util/test_color_scheme.rb +5 -3
- data/test/unit/bio/util/test_contingency_table.rb +5 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
- data/test/unit/bio/util/test_sirna.rb +6 -4
- metadata +147 -2
data/lib/bio/pathway.rb
CHANGED
|
@@ -787,174 +787,3 @@ end # Relation
|
|
|
787
787
|
|
|
788
788
|
end # Bio
|
|
789
789
|
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
if __FILE__ == $0
|
|
793
|
-
|
|
794
|
-
puts "--- Test === method true/false"
|
|
795
|
-
r1 = Bio::Relation.new('a', 'b', 1)
|
|
796
|
-
r2 = Bio::Relation.new('b', 'a', 1)
|
|
797
|
-
r3 = Bio::Relation.new('b', 'a', 2)
|
|
798
|
-
r4 = Bio::Relation.new('a', 'b', 1)
|
|
799
|
-
p r1 === r2
|
|
800
|
-
p r1 === r3
|
|
801
|
-
p r1 === r4
|
|
802
|
-
p [ r1, r2, r3, r4 ].uniq
|
|
803
|
-
p r1.eql?(r2)
|
|
804
|
-
p r3.eql?(r2)
|
|
805
|
-
|
|
806
|
-
# Sample Graph :
|
|
807
|
-
# +----------------+
|
|
808
|
-
# | |
|
|
809
|
-
# v |
|
|
810
|
-
# +---------(q)-->(t)------->(y)<----(r)
|
|
811
|
-
# | | | ^ |
|
|
812
|
-
# v | v | |
|
|
813
|
-
# +--(s)<--+ | (x)<---+ (u)<-----+
|
|
814
|
-
# | | | | |
|
|
815
|
-
# v | | v |
|
|
816
|
-
# (v)----->(w)<---+ (z)----+
|
|
817
|
-
|
|
818
|
-
data = [
|
|
819
|
-
[ 'q', 's', 1, ],
|
|
820
|
-
[ 'q', 't', 1, ],
|
|
821
|
-
[ 'q', 'w', 1, ],
|
|
822
|
-
[ 'r', 'u', 1, ],
|
|
823
|
-
[ 'r', 'y', 1, ],
|
|
824
|
-
[ 's', 'v', 1, ],
|
|
825
|
-
[ 't', 'x', 1, ],
|
|
826
|
-
[ 't', 'y', 1, ],
|
|
827
|
-
[ 'u', 'y', 1, ],
|
|
828
|
-
[ 'v', 'w', 1, ],
|
|
829
|
-
[ 'w', 's', 1, ],
|
|
830
|
-
[ 'x', 'z', 1, ],
|
|
831
|
-
[ 'y', 'q', 1, ],
|
|
832
|
-
[ 'z', 'x', 1, ],
|
|
833
|
-
]
|
|
834
|
-
|
|
835
|
-
ary = []
|
|
836
|
-
|
|
837
|
-
puts "--- List of relations"
|
|
838
|
-
data.each do |x|
|
|
839
|
-
ary << Bio::Relation.new(*x)
|
|
840
|
-
end
|
|
841
|
-
p ary
|
|
842
|
-
|
|
843
|
-
puts "--- Generate graph from list of relations"
|
|
844
|
-
graph = Bio::Pathway.new(ary)
|
|
845
|
-
p graph
|
|
846
|
-
|
|
847
|
-
puts "--- Test to_matrix method"
|
|
848
|
-
p graph.to_matrix
|
|
849
|
-
|
|
850
|
-
puts "--- Test dump_matrix method"
|
|
851
|
-
puts graph.dump_matrix(0)
|
|
852
|
-
|
|
853
|
-
puts "--- Test dump_list method"
|
|
854
|
-
puts graph.dump_list
|
|
855
|
-
|
|
856
|
-
puts "--- Labeling some nodes"
|
|
857
|
-
hash = { 'q' => "L1", 's' => "L2", 'v' => "L3", 'w' => "L4" }
|
|
858
|
-
graph.label = hash
|
|
859
|
-
p graph
|
|
860
|
-
|
|
861
|
-
puts "--- Extract subgraph by label"
|
|
862
|
-
p graph.subgraph
|
|
863
|
-
|
|
864
|
-
puts "--- Extract subgraph by list"
|
|
865
|
-
p graph.subgraph(['q', 't', 'x', 'y', 'z'])
|
|
866
|
-
|
|
867
|
-
puts "--- Test cliquishness of the node 'q'"
|
|
868
|
-
p graph.cliquishness('q')
|
|
869
|
-
|
|
870
|
-
puts "--- Test cliquishness of the node 'q' (undirected)"
|
|
871
|
-
u_graph = Bio::Pathway.new(ary, 'undirected')
|
|
872
|
-
p u_graph.cliquishness('q')
|
|
873
|
-
|
|
874
|
-
puts "--- Test small_world histgram"
|
|
875
|
-
p graph.small_world
|
|
876
|
-
|
|
877
|
-
puts "--- Test breadth_first_search method"
|
|
878
|
-
distance, predecessor = graph.breadth_first_search('q')
|
|
879
|
-
p distance
|
|
880
|
-
p predecessor
|
|
881
|
-
|
|
882
|
-
puts "--- Test bfs_shortest_path method"
|
|
883
|
-
step, path = graph.bfs_shortest_path('y', 'w')
|
|
884
|
-
p step
|
|
885
|
-
p path
|
|
886
|
-
|
|
887
|
-
puts "--- Test depth_first_search method"
|
|
888
|
-
timestamp, tree, back, cross, forward = graph.depth_first_search
|
|
889
|
-
p timestamp
|
|
890
|
-
print "tree edges : "; p tree
|
|
891
|
-
print "back edges : "; p back
|
|
892
|
-
print "cross edges : "; p cross
|
|
893
|
-
print "forward edges : "; p forward
|
|
894
|
-
|
|
895
|
-
puts "--- Test dfs_topological_sort method"
|
|
896
|
-
#
|
|
897
|
-
# Professor Bumstead topologically sorts his clothing when getting dressed.
|
|
898
|
-
#
|
|
899
|
-
# "undershorts" "socks"
|
|
900
|
-
# | | |
|
|
901
|
-
# v | v "watch"
|
|
902
|
-
# "pants" --+-------> "shoes"
|
|
903
|
-
# |
|
|
904
|
-
# v
|
|
905
|
-
# "belt" <----- "shirt" ----> "tie" ----> "jacket"
|
|
906
|
-
# | ^
|
|
907
|
-
# `---------------------------------------'
|
|
908
|
-
#
|
|
909
|
-
dag = Bio::Pathway.new([
|
|
910
|
-
Bio::Relation.new("undeershorts", "pants", true),
|
|
911
|
-
Bio::Relation.new("undeershorts", "shoes", true),
|
|
912
|
-
Bio::Relation.new("socks", "shoes", true),
|
|
913
|
-
Bio::Relation.new("watch", "watch", true),
|
|
914
|
-
Bio::Relation.new("pants", "belt", true),
|
|
915
|
-
Bio::Relation.new("pants", "shoes", true),
|
|
916
|
-
Bio::Relation.new("shirt", "belt", true),
|
|
917
|
-
Bio::Relation.new("shirt", "tie", true),
|
|
918
|
-
Bio::Relation.new("tie", "jacket", true),
|
|
919
|
-
Bio::Relation.new("belt", "jacket", true),
|
|
920
|
-
])
|
|
921
|
-
p dag.dfs_topological_sort
|
|
922
|
-
|
|
923
|
-
puts "--- Test dijkstra method"
|
|
924
|
-
distance, predecessor = graph.dijkstra('q')
|
|
925
|
-
p distance
|
|
926
|
-
p predecessor
|
|
927
|
-
|
|
928
|
-
puts "--- Test dijkstra method by weighted graph"
|
|
929
|
-
#
|
|
930
|
-
# 'a' --> 'b'
|
|
931
|
-
# | 1 | 3
|
|
932
|
-
# |5 v
|
|
933
|
-
# `----> 'c'
|
|
934
|
-
#
|
|
935
|
-
r1 = Bio::Relation.new('a', 'b', 1)
|
|
936
|
-
r2 = Bio::Relation.new('a', 'c', 5)
|
|
937
|
-
r3 = Bio::Relation.new('b', 'c', 3)
|
|
938
|
-
w_graph = Bio::Pathway.new([r1, r2, r3])
|
|
939
|
-
p w_graph
|
|
940
|
-
p w_graph.dijkstra('a')
|
|
941
|
-
|
|
942
|
-
puts "--- Test bellman_ford method by negative weighted graph"
|
|
943
|
-
#
|
|
944
|
-
# ,-- 'a' --> 'b'
|
|
945
|
-
# | | 1 | 3
|
|
946
|
-
# | |5 v
|
|
947
|
-
# | `----> 'c'
|
|
948
|
-
# | ^
|
|
949
|
-
# |2 | -5
|
|
950
|
-
# `--> 'd' ----'
|
|
951
|
-
#
|
|
952
|
-
r4 = Bio::Relation.new('a', 'd', 2)
|
|
953
|
-
r5 = Bio::Relation.new('d', 'c', -5)
|
|
954
|
-
w_graph.append(r4)
|
|
955
|
-
w_graph.append(r5)
|
|
956
|
-
p w_graph.bellman_ford('a')
|
|
957
|
-
p graph.bellman_ford('q')
|
|
958
|
-
|
|
959
|
-
end
|
|
960
|
-
|
data/lib/bio/sequence.rb
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
# Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
|
10
10
|
# License:: The Ruby License
|
|
11
11
|
#
|
|
12
|
-
# $Id
|
|
12
|
+
# $Id:$
|
|
13
13
|
#
|
|
14
14
|
|
|
15
15
|
require 'bio/sequence/compat'
|
|
@@ -71,6 +71,7 @@ class Sequence
|
|
|
71
71
|
autoload :Generic, 'bio/sequence/generic'
|
|
72
72
|
autoload :Format, 'bio/sequence/format'
|
|
73
73
|
autoload :Adapter, 'bio/sequence/adapter'
|
|
74
|
+
autoload :QualityScore, 'bio/sequence/quality_score'
|
|
74
75
|
|
|
75
76
|
include Format
|
|
76
77
|
|
|
@@ -150,6 +151,22 @@ class Sequence
|
|
|
150
151
|
# but could be a simple String
|
|
151
152
|
attr_accessor :seq
|
|
152
153
|
|
|
154
|
+
# Quality scores of the bases/residues in the sequence.
|
|
155
|
+
# (Array containing Integer, or nil)
|
|
156
|
+
attr_accessor :quality_scores
|
|
157
|
+
|
|
158
|
+
# The meaning (calculation method) of the quality scores stored in
|
|
159
|
+
# the <tt>quality_scores</tt> attribute.
|
|
160
|
+
# Maybe one of :phred, :solexa, or nil.
|
|
161
|
+
#
|
|
162
|
+
# Note that if it is nil, and <tt>error_probabilities</tt> is empty,
|
|
163
|
+
# some methods implicitly assumes that it is :phred (PHRED score).
|
|
164
|
+
attr_accessor :quality_score_type
|
|
165
|
+
|
|
166
|
+
# Error probabilities of the bases/residues in the sequence.
|
|
167
|
+
# (Array containing Float, or nil)
|
|
168
|
+
attr_accessor :error_probabilities
|
|
169
|
+
|
|
153
170
|
#---
|
|
154
171
|
# Attributes below have been added during BioHackathon2008
|
|
155
172
|
#+++
|
data/lib/bio/sequence/adapter.rb
CHANGED
|
@@ -23,6 +23,9 @@ module Bio::Sequence::Adapter
|
|
|
23
23
|
autoload :EMBL, 'bio/db/embl/embl_to_biosequence'
|
|
24
24
|
autoload :FastaFormat, 'bio/db/fasta/fasta_to_biosequence'
|
|
25
25
|
autoload :BioSQL, 'bio/db/biosql/biosql_to_biosequence'
|
|
26
|
+
autoload :SangerChromatogram,
|
|
27
|
+
'bio/db/sanger_chromatogram/chromatogram_to_biosequence'
|
|
28
|
+
autoload :Fastq, 'bio/db/fastq/fastq_to_biosequence'
|
|
26
29
|
|
|
27
30
|
private
|
|
28
31
|
|
data/lib/bio/sequence/format.rb
CHANGED
|
@@ -47,6 +47,22 @@ module Format
|
|
|
47
47
|
# (resemble to EMBOSS "ncbi" format)
|
|
48
48
|
autoload :Fasta_ncbi, 'bio/db/fasta/format_fasta'
|
|
49
49
|
|
|
50
|
+
# FASTQ "fastq-sanger" format generator
|
|
51
|
+
autoload :Fastq, 'bio/db/fastq/format_fastq'
|
|
52
|
+
# FASTQ "fastq-sanger" format generator
|
|
53
|
+
autoload :Fastq_sanger, 'bio/db/fastq/format_fastq'
|
|
54
|
+
# FASTQ "fastq-solexa" format generator
|
|
55
|
+
autoload :Fastq_solexa, 'bio/db/fastq/format_fastq'
|
|
56
|
+
# FASTQ "fastq-illumina" format generator
|
|
57
|
+
autoload :Fastq_illumina, 'bio/db/fastq/format_fastq'
|
|
58
|
+
|
|
59
|
+
# FastaNumericFormat format generator
|
|
60
|
+
autoload :Fasta_numeric, 'bio/db/fasta/format_qual'
|
|
61
|
+
# Qual format generator.
|
|
62
|
+
# Its format is the same as Fasta_numeric, but it would perform
|
|
63
|
+
# to convert quality score or generates scores from error probability.
|
|
64
|
+
autoload :Qual, 'bio/db/fasta/format_qual'
|
|
65
|
+
|
|
50
66
|
end #module Formatter
|
|
51
67
|
|
|
52
68
|
# Repository of nucleotide sequence formatter classes
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/sequence/quality_score.rb - Sequence quality score manipulation modules
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2009
|
|
5
|
+
# Naohisa Goto <ng@bioruby.org>
|
|
6
|
+
# License:: The Ruby License
|
|
7
|
+
#
|
|
8
|
+
# == Description
|
|
9
|
+
#
|
|
10
|
+
# Sequence quality score manipulation modules, mainly used by Bio::Fastq
|
|
11
|
+
# and related classes.
|
|
12
|
+
#
|
|
13
|
+
# == References
|
|
14
|
+
#
|
|
15
|
+
# * FASTQ format specification
|
|
16
|
+
# http://maq.sourceforge.net/fastq.shtml
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
module Bio
|
|
20
|
+
|
|
21
|
+
class Sequence
|
|
22
|
+
|
|
23
|
+
# Bio::Sequence::QualityScore is a name space for quality score modules.
|
|
24
|
+
# BioRuby internal use only (mainly from Bio::Fastq).
|
|
25
|
+
module QualityScore
|
|
26
|
+
|
|
27
|
+
# Converter methods between PHRED and Solexa quality scores.
|
|
28
|
+
module Converter
|
|
29
|
+
|
|
30
|
+
# Converts PHRED scores to Solexa scores.
|
|
31
|
+
#
|
|
32
|
+
# The values may be truncated or incorrect if overflows/underflows
|
|
33
|
+
# occurred during the calculation.
|
|
34
|
+
# ---
|
|
35
|
+
# *Arguments*:
|
|
36
|
+
# * (required) _scores_: (Array containing Integer) quality scores
|
|
37
|
+
# *Returns*:: (Array containing Integer) quality scores
|
|
38
|
+
def convert_scores_from_phred_to_solexa(scores)
|
|
39
|
+
sc = scores.collect do |q|
|
|
40
|
+
t = 10 ** (q / 10.0) - 1
|
|
41
|
+
t = Float::MIN if t < Float::MIN
|
|
42
|
+
r = 10 * Math.log10(t)
|
|
43
|
+
r.finite? ? r.round : r
|
|
44
|
+
end
|
|
45
|
+
sc
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Converts Solexa scores to PHRED scores.
|
|
49
|
+
#
|
|
50
|
+
# The values may be truncated if overflows/underflows occurred
|
|
51
|
+
# during the calculation.
|
|
52
|
+
# ---
|
|
53
|
+
# *Arguments*:
|
|
54
|
+
# * (required) _scores_: (Array containing Integer) quality scores
|
|
55
|
+
# *Returns*:: (Array containing Integer) quality scores
|
|
56
|
+
def convert_scores_from_solexa_to_phred(scores)
|
|
57
|
+
sc = scores.collect do |q|
|
|
58
|
+
r = 10 * Math.log10(10 ** (q / 10.0) + 1)
|
|
59
|
+
r.finite? ? r.round : r
|
|
60
|
+
end
|
|
61
|
+
sc
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Does nothing and simply returns the given argument.
|
|
65
|
+
#
|
|
66
|
+
# ---
|
|
67
|
+
# *Arguments*:
|
|
68
|
+
# * (required) _scores_: (Array containing Integer) quality scores
|
|
69
|
+
# *Returns*:: (Array containing Integer) quality scores
|
|
70
|
+
def convert_nothing(scores)
|
|
71
|
+
scores
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
end #module Converter
|
|
75
|
+
|
|
76
|
+
# Bio::Sequence::QualityScore::Phred is a module having quality calculation
|
|
77
|
+
# methods for the PHRED quality score.
|
|
78
|
+
#
|
|
79
|
+
# BioRuby internal use only (mainly from Bio::Fastq).
|
|
80
|
+
module Phred
|
|
81
|
+
|
|
82
|
+
include Converter
|
|
83
|
+
|
|
84
|
+
# Type of quality scores.
|
|
85
|
+
# ---
|
|
86
|
+
# *Returns*:: (Symbol) the type of quality score.
|
|
87
|
+
def quality_score_type
|
|
88
|
+
:phred
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# PHRED score to probability conversion.
|
|
92
|
+
# ---
|
|
93
|
+
# *Arguments*:
|
|
94
|
+
# * (required) _scores_: (Array containing Integer) scores
|
|
95
|
+
# *Returns*:: (Array containing Float) probabilities (0<=p<=1)
|
|
96
|
+
def phred_q2p(scores)
|
|
97
|
+
scores.collect do |q|
|
|
98
|
+
r = 10 ** (- q / 10.0)
|
|
99
|
+
if r > 1.0 then
|
|
100
|
+
r = 1.0
|
|
101
|
+
#elsif r < 0.0 then
|
|
102
|
+
# r = 0.0
|
|
103
|
+
end
|
|
104
|
+
r
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
alias q2p phred_q2p
|
|
108
|
+
module_function :q2p
|
|
109
|
+
public :q2p
|
|
110
|
+
|
|
111
|
+
# Probability to PHRED score conversion.
|
|
112
|
+
#
|
|
113
|
+
# The values may be truncated or incorrect if overflows/underflows
|
|
114
|
+
# occurred during the calculation.
|
|
115
|
+
# ---
|
|
116
|
+
# *Arguments*:
|
|
117
|
+
# * (required) _probabilities_: (Array containing Float) probabilities
|
|
118
|
+
# *Returns*:: (Array containing Float) scores
|
|
119
|
+
def phred_p2q(probabilities)
|
|
120
|
+
probabilities.collect do |p|
|
|
121
|
+
p = Float::MIN if p < Float::MIN
|
|
122
|
+
q = -10 * Math.log10(p)
|
|
123
|
+
q.finite? ? q.round : q
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
alias p2q phred_p2q
|
|
127
|
+
module_function :p2q
|
|
128
|
+
public :p2q
|
|
129
|
+
|
|
130
|
+
alias convert_scores_from_phred convert_nothing
|
|
131
|
+
alias convert_scores_to_phred convert_nothing
|
|
132
|
+
alias convert_scores_from_solexa convert_scores_from_solexa_to_phred
|
|
133
|
+
alias convert_scores_to_solexa convert_scores_from_phred_to_solexa
|
|
134
|
+
module_function :convert_scores_to_solexa
|
|
135
|
+
public :convert_scores_to_solexa
|
|
136
|
+
|
|
137
|
+
end #module Phred
|
|
138
|
+
|
|
139
|
+
# Bio::Sequence::QualityScore::Solexa is a module having quality
|
|
140
|
+
# calculation methods for the Solexa quality score.
|
|
141
|
+
#
|
|
142
|
+
# BioRuby internal use only (mainly from Bio::Fastq).
|
|
143
|
+
module Solexa
|
|
144
|
+
|
|
145
|
+
include Converter
|
|
146
|
+
|
|
147
|
+
# Type of quality scores.
|
|
148
|
+
# ---
|
|
149
|
+
# *Returns*:: (Symbol) the type of quality score.
|
|
150
|
+
def quality_score_type
|
|
151
|
+
:solexa
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Solexa score to probability conversion.
|
|
155
|
+
# ---
|
|
156
|
+
# *Arguments*:
|
|
157
|
+
# * (required) _scores_: (Array containing Integer) scores
|
|
158
|
+
# *Returns*:: (Array containing Float) probabilities
|
|
159
|
+
def solexa_q2p(scores)
|
|
160
|
+
scores.collect do |q|
|
|
161
|
+
t = 10 ** (- q / 10.0)
|
|
162
|
+
t /= (1.0 + t)
|
|
163
|
+
if t > 1.0 then
|
|
164
|
+
t = 1.0
|
|
165
|
+
#elsif t < 0.0 then
|
|
166
|
+
# t = 0.0
|
|
167
|
+
end
|
|
168
|
+
t
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
alias q2p solexa_q2p
|
|
172
|
+
module_function :q2p
|
|
173
|
+
public :q2p
|
|
174
|
+
|
|
175
|
+
# Probability to Solexa score conversion.
|
|
176
|
+
# ---
|
|
177
|
+
# *Arguments*:
|
|
178
|
+
# * (required) _probabilities_: (Array containing Float) probabilities
|
|
179
|
+
# *Returns*:: (Array containing Float) scores
|
|
180
|
+
def solexa_p2q(probabilities)
|
|
181
|
+
probabilities.collect do |p|
|
|
182
|
+
t = p / (1.0 - p)
|
|
183
|
+
t = Float::MIN if t < Float::MIN
|
|
184
|
+
q = -10 * Math.log10(t)
|
|
185
|
+
q.finite? ? q.round : q
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
alias p2q solexa_p2q
|
|
189
|
+
module_function :p2q
|
|
190
|
+
public :p2q
|
|
191
|
+
|
|
192
|
+
alias convert_scores_from_solexa convert_nothing
|
|
193
|
+
alias convert_scores_to_solexa convert_nothing
|
|
194
|
+
alias convert_scores_from_phred convert_scores_from_phred_to_solexa
|
|
195
|
+
alias convert_scores_to_phred convert_scores_from_solexa_to_phred
|
|
196
|
+
module_function :convert_scores_to_phred
|
|
197
|
+
public :convert_scores_to_phred
|
|
198
|
+
|
|
199
|
+
end #module Solexa
|
|
200
|
+
|
|
201
|
+
end #module QualityScore
|
|
202
|
+
|
|
203
|
+
end #class Sequence
|
|
204
|
+
|
|
205
|
+
end #module Bio
|