bio 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +2105 -3728
- data/KNOWN_ISSUES.rdoc +35 -3
- data/README.rdoc +8 -2
- data/RELEASE_NOTES.rdoc +166 -0
- data/bin/bioruby +4 -1
- data/bioruby.gemspec +146 -1
- data/bioruby.gemspec.erb +3 -1
- data/doc/ChangeLog-before-1.3.1 +3961 -0
- data/doc/Tutorial.rd +154 -22
- data/doc/Tutorial.rd.html +125 -68
- data/lib/bio.rb +21 -6
- data/lib/bio/appl/bl2seq/report.rb +11 -202
- data/lib/bio/appl/blast/format0.rb +0 -193
- data/lib/bio/appl/blast/report.rb +2 -147
- data/lib/bio/appl/blast/wublast.rb +0 -208
- data/lib/bio/appl/fasta.rb +4 -19
- data/lib/bio/appl/fasta/format10.rb +0 -14
- data/lib/bio/appl/genscan/report.rb +0 -176
- data/lib/bio/appl/hmmer.rb +1 -15
- data/lib/bio/appl/hmmer/report.rb +0 -100
- data/lib/bio/appl/meme/mast.rb +156 -0
- data/lib/bio/appl/meme/mast/report.rb +91 -0
- data/lib/bio/appl/meme/motif.rb +48 -0
- data/lib/bio/appl/psort.rb +0 -111
- data/lib/bio/appl/psort/report.rb +1 -45
- data/lib/bio/appl/pts1.rb +2 -4
- data/lib/bio/appl/sosui/report.rb +5 -54
- data/lib/bio/appl/targetp/report.rb +1 -104
- data/lib/bio/appl/tmhmm/report.rb +0 -36
- data/lib/bio/command.rb +94 -10
- data/lib/bio/data/aa.rb +1 -77
- data/lib/bio/data/codontable.rb +1 -95
- data/lib/bio/data/na.rb +1 -26
- data/lib/bio/db/aaindex.rb +1 -38
- data/lib/bio/db/fasta.rb +1 -134
- data/lib/bio/db/fasta/format_qual.rb +204 -0
- data/lib/bio/db/fasta/qual.rb +102 -0
- data/lib/bio/db/fastq.rb +645 -0
- data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
- data/lib/bio/db/fastq/format_fastq.rb +175 -0
- data/lib/bio/db/genbank/genbank.rb +1 -86
- data/lib/bio/db/gff.rb +0 -17
- data/lib/bio/db/go.rb +4 -72
- data/lib/bio/db/kegg/common.rb +112 -0
- data/lib/bio/db/kegg/compound.rb +29 -20
- data/lib/bio/db/kegg/drug.rb +74 -34
- data/lib/bio/db/kegg/enzyme.rb +26 -5
- data/lib/bio/db/kegg/genes.rb +128 -15
- data/lib/bio/db/kegg/genome.rb +3 -41
- data/lib/bio/db/kegg/glycan.rb +19 -24
- data/lib/bio/db/kegg/orthology.rb +16 -56
- data/lib/bio/db/kegg/reaction.rb +81 -28
- data/lib/bio/db/kegg/taxonomy.rb +1 -52
- data/lib/bio/db/litdb.rb +1 -16
- data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
- data/lib/bio/db/prosite.rb +2 -95
- data/lib/bio/db/rebase.rb +5 -6
- data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
- data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
- data/lib/bio/io/das.rb +0 -44
- data/lib/bio/io/ddbjxml.rb +1 -181
- data/lib/bio/io/flatfile.rb +1 -7
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/io/keggapi.rb +0 -442
- data/lib/bio/io/ncbirest.rb +130 -132
- data/lib/bio/io/ncbisoap.rb +2 -1
- data/lib/bio/io/pubmed.rb +0 -88
- data/lib/bio/location.rb +0 -73
- data/lib/bio/pathway.rb +0 -171
- data/lib/bio/sequence.rb +18 -1
- data/lib/bio/sequence/adapter.rb +3 -0
- data/lib/bio/sequence/format.rb +16 -0
- data/lib/bio/sequence/quality_score.rb +205 -0
- data/lib/bio/tree.rb +70 -5
- data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
- data/lib/bio/util/sirna.rb +1 -23
- data/lib/bio/version.rb +1 -1
- data/sample/demo_aaindex.rb +67 -0
- data/sample/demo_aminoacid.rb +101 -0
- data/sample/demo_bl2seq_report.rb +220 -0
- data/sample/demo_blast_report.rb +285 -0
- data/sample/demo_codontable.rb +119 -0
- data/sample/demo_das.rb +105 -0
- data/sample/demo_ddbjxml.rb +212 -0
- data/sample/demo_fasta_remote.rb +51 -0
- data/sample/demo_fastaformat.rb +105 -0
- data/sample/demo_genbank.rb +132 -0
- data/sample/demo_genscan_report.rb +202 -0
- data/sample/demo_gff1.rb +49 -0
- data/sample/demo_go.rb +98 -0
- data/sample/demo_hmmer_report.rb +149 -0
- data/sample/demo_kegg_compound.rb +57 -0
- data/sample/demo_kegg_drug.rb +65 -0
- data/sample/demo_kegg_genome.rb +74 -0
- data/sample/demo_kegg_glycan.rb +72 -0
- data/sample/demo_kegg_orthology.rb +62 -0
- data/sample/demo_kegg_reaction.rb +66 -0
- data/sample/demo_kegg_taxonomy.rb +92 -0
- data/sample/demo_keggapi.rb +502 -0
- data/sample/demo_litdb.rb +42 -0
- data/sample/demo_locations.rb +99 -0
- data/sample/demo_ncbi_rest.rb +130 -0
- data/sample/demo_nucleicacid.rb +49 -0
- data/sample/demo_pathway.rb +196 -0
- data/sample/demo_prosite.rb +120 -0
- data/sample/demo_psort.rb +138 -0
- data/sample/demo_psort_report.rb +70 -0
- data/sample/demo_pubmed.rb +118 -0
- data/sample/demo_sirna.rb +63 -0
- data/sample/demo_sosui_report.rb +89 -0
- data/sample/demo_targetp_report.rb +135 -0
- data/sample/demo_tmhmm_report.rb +68 -0
- data/sample/pmfetch.rb +13 -4
- data/sample/pmsearch.rb +15 -4
- data/sample/test_phyloxml_big.rb +205 -0
- data/test/bioruby_test_helper.rb +61 -0
- data/test/data/KEGG/1.1.1.1.enzyme +935 -0
- data/test/data/KEGG/C00025.compound +102 -0
- data/test/data/KEGG/D00063.drug +104 -0
- data/test/data/KEGG/G00024.glycan +47 -0
- data/test/data/KEGG/G01366.glycan +18 -0
- data/test/data/KEGG/K02338.orthology +902 -0
- data/test/data/KEGG/R00006.reaction +14 -0
- data/test/data/fastq/README.txt +109 -0
- data/test/data/fastq/error_diff_ids.fastq +20 -0
- data/test/data/fastq/error_double_qual.fastq +22 -0
- data/test/data/fastq/error_double_seq.fastq +22 -0
- data/test/data/fastq/error_long_qual.fastq +20 -0
- data/test/data/fastq/error_no_qual.fastq +20 -0
- data/test/data/fastq/error_qual_del.fastq +20 -0
- data/test/data/fastq/error_qual_escape.fastq +20 -0
- data/test/data/fastq/error_qual_null.fastq +0 -0
- data/test/data/fastq/error_qual_space.fastq +21 -0
- data/test/data/fastq/error_qual_tab.fastq +21 -0
- data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
- data/test/data/fastq/error_qual_vtab.fastq +20 -0
- data/test/data/fastq/error_short_qual.fastq +20 -0
- data/test/data/fastq/error_spaces.fastq +20 -0
- data/test/data/fastq/error_tabs.fastq +21 -0
- data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
- data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
- data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_title.fastq +17 -0
- data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
- data/test/data/fastq/longreads_as_illumina.fastq +40 -0
- data/test/data/fastq/longreads_as_sanger.fastq +40 -0
- data/test/data/fastq/longreads_as_solexa.fastq +40 -0
- data/test/data/fastq/longreads_original_sanger.fastq +120 -0
- data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
- data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
- data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
- data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
- data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
- data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
- data/test/data/meme/db +0 -0
- data/test/data/meme/mast +0 -0
- data/test/data/meme/mast.out +13 -0
- data/test/data/meme/meme.out +3 -0
- data/test/data/phyloxml/apaf.xml +666 -0
- data/test/data/phyloxml/bcl_2.xml +2097 -0
- data/test/data/phyloxml/made_up.xml +144 -0
- data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
- data/test/data/phyloxml/phyloxml_examples.xml +415 -0
- data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
- data/test/functional/bio/appl/test_pts1.rb +7 -5
- data/test/functional/bio/io/test_ensembl.rb +4 -3
- data/test/functional/bio/io/test_pubmed.rb +9 -3
- data/test/functional/bio/io/test_soapwsdl.rb +5 -4
- data/test/functional/bio/io/test_togows.rb +5 -4
- data/test/functional/bio/sequence/test_output_embl.rb +6 -4
- data/test/functional/bio/test_command.rb +54 -5
- data/test/runner.rb +5 -3
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
- data/test/unit/bio/appl/blast/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
- data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
- data/test/unit/bio/appl/genscan/test_report.rb +8 -9
- data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
- data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
- data/test/unit/bio/appl/mafft/test_report.rb +6 -5
- data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
- data/test/unit/bio/appl/meme/test_mast.rb +103 -0
- data/test/unit/bio/appl/meme/test_motif.rb +38 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
- data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
- data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
- data/test/unit/bio/appl/sim4/test_report.rb +5 -4
- data/test/unit/bio/appl/sosui/test_report.rb +6 -5
- data/test/unit/bio/appl/targetp/test_report.rb +5 -3
- data/test/unit/bio/appl/test_blast.rb +5 -4
- data/test/unit/bio/appl/test_fasta.rb +4 -2
- data/test/unit/bio/appl/test_pts1.rb +4 -2
- data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
- data/test/unit/bio/data/test_aa.rb +5 -3
- data/test/unit/bio/data/test_codontable.rb +5 -4
- data/test/unit/bio/data/test_na.rb +5 -3
- data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
- data/test/unit/bio/db/embl/test_common.rb +4 -2
- data/test/unit/bio/db/embl/test_embl.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
- data/test/unit/bio/db/embl/test_sptr.rb +6 -8
- data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
- data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
- data/test/unit/bio/db/kegg/test_compound.rb +146 -0
- data/test/unit/bio/db/kegg/test_drug.rb +194 -0
- data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
- data/test/unit/bio/db/kegg/test_genes.rb +32 -4
- data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
- data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
- data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
- data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
- data/test/unit/bio/db/test_aaindex.rb +6 -6
- data/test/unit/bio/db/test_fasta.rb +5 -46
- data/test/unit/bio/db/test_fastq.rb +829 -0
- data/test/unit/bio/db/test_gff.rb +4 -2
- data/test/unit/bio/db/test_lasergene.rb +7 -5
- data/test/unit/bio/db/test_medline.rb +4 -2
- data/test/unit/bio/db/test_newick.rb +6 -6
- data/test/unit/bio/db/test_nexus.rb +4 -2
- data/test/unit/bio/db/test_phyloxml.rb +769 -0
- data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
- data/test/unit/bio/db/test_prosite.rb +6 -5
- data/test/unit/bio/db/test_qual.rb +63 -0
- data/test/unit/bio/db/test_rebase.rb +5 -3
- data/test/unit/bio/db/test_soft.rb +7 -6
- data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
- data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
- data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
- data/test/unit/bio/io/test_ddbjxml.rb +4 -3
- data/test/unit/bio/io/test_ensembl.rb +5 -3
- data/test/unit/bio/io/test_fastacmd.rb +4 -3
- data/test/unit/bio/io/test_flatfile.rb +6 -5
- data/test/unit/bio/io/test_soapwsdl.rb +4 -3
- data/test/unit/bio/io/test_togows.rb +4 -2
- data/test/unit/bio/sequence/test_aa.rb +5 -3
- data/test/unit/bio/sequence/test_common.rb +4 -2
- data/test/unit/bio/sequence/test_compat.rb +4 -2
- data/test/unit/bio/sequence/test_dblink.rb +5 -3
- data/test/unit/bio/sequence/test_na.rb +4 -2
- data/test/unit/bio/sequence/test_quality_score.rb +330 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
- data/test/unit/bio/test_alignment.rb +5 -3
- data/test/unit/bio/test_command.rb +4 -3
- data/test/unit/bio/test_db.rb +5 -3
- data/test/unit/bio/test_feature.rb +4 -2
- data/test/unit/bio/test_location.rb +4 -2
- data/test/unit/bio/test_map.rb +5 -3
- data/test/unit/bio/test_pathway.rb +4 -2
- data/test/unit/bio/test_reference.rb +4 -2
- data/test/unit/bio/test_sequence.rb +5 -3
- data/test/unit/bio/test_shell.rb +5 -3
- data/test/unit/bio/test_tree.rb +6 -6
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
- data/test/unit/bio/util/test_color_scheme.rb +5 -3
- data/test/unit/bio/util/test_contingency_table.rb +5 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
- data/test/unit/bio/util/test_sirna.rb +6 -4
- metadata +147 -2
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/db/phyloxml_writer.rb - PhyloXML writer
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2009
|
|
5
|
+
# Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
|
|
6
|
+
# License:: The Ruby License
|
|
7
|
+
#
|
|
8
|
+
# $Id:$
|
|
9
|
+
#
|
|
10
|
+
# == Description
|
|
11
|
+
#
|
|
12
|
+
# This file containts writer for PhyloXML.
|
|
13
|
+
#
|
|
14
|
+
# == Requirements
|
|
15
|
+
#
|
|
16
|
+
# Libxml2 XML parser is required. Install libxml-ruby bindings from
|
|
17
|
+
# http://libxml.rubyforge.org or
|
|
18
|
+
#
|
|
19
|
+
# gem install -r libxml-ruby
|
|
20
|
+
#
|
|
21
|
+
# == References
|
|
22
|
+
#
|
|
23
|
+
# * http://www.phyloxml.org
|
|
24
|
+
#
|
|
25
|
+
# * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby
|
|
26
|
+
|
|
27
|
+
require 'libxml'
|
|
28
|
+
require 'bio/db/phyloxml/phyloxml_elements'
|
|
29
|
+
|
|
30
|
+
module Bio
|
|
31
|
+
|
|
32
|
+
module PhyloXML
|
|
33
|
+
|
|
34
|
+
# == Description
|
|
35
|
+
#
|
|
36
|
+
# Bio::PhyloXML::Writer is for writing phyloXML (version 1.10) format files.
|
|
37
|
+
#
|
|
38
|
+
# == Requirements
|
|
39
|
+
#
|
|
40
|
+
# Libxml2 XML parser is required. Install libxml-ruby bindings from
|
|
41
|
+
# http://libxml.rubyforge.org or
|
|
42
|
+
#
|
|
43
|
+
# gem install -r libxml-ruby
|
|
44
|
+
#
|
|
45
|
+
# == Usage
|
|
46
|
+
#
|
|
47
|
+
# require 'bio'
|
|
48
|
+
#
|
|
49
|
+
# # Create new phyloxml parser
|
|
50
|
+
# phyloxml = Bio::PhyloXML::Parser.open('example.xml')
|
|
51
|
+
#
|
|
52
|
+
# # Read in some trees from file
|
|
53
|
+
# tree1 = phyloxml.next_tree
|
|
54
|
+
# tree2 = phyloxml.next_tree
|
|
55
|
+
#
|
|
56
|
+
# # Create new phyloxml writer
|
|
57
|
+
# writer = Bio::PhyloXML::Writer.new('tree.xml')
|
|
58
|
+
#
|
|
59
|
+
# # Write tree to the file tree.xml
|
|
60
|
+
# writer.write(tree1)
|
|
61
|
+
#
|
|
62
|
+
# # Add another tree to the file
|
|
63
|
+
# writer.write(tree2)
|
|
64
|
+
#
|
|
65
|
+
# == References
|
|
66
|
+
#
|
|
67
|
+
# http://www.phyloxml.org/documentation/version_100/phyloxml.xsd.html
|
|
68
|
+
|
|
69
|
+
class Writer
|
|
70
|
+
|
|
71
|
+
include LibXML
|
|
72
|
+
|
|
73
|
+
SCHEMA_LOCATION = 'http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd'
|
|
74
|
+
|
|
75
|
+
attr_accessor :write_branch_length_as_subelement
|
|
76
|
+
|
|
77
|
+
#
|
|
78
|
+
# Create new Writer object. As parameters provide filename of xml file
|
|
79
|
+
# you wish to create. Optional parameter is whether to indent or no.
|
|
80
|
+
# Default is true. By default branch_length is written as subelement of
|
|
81
|
+
# clade element.
|
|
82
|
+
#
|
|
83
|
+
def initialize(filename, indent=true)
|
|
84
|
+
@write_branch_length_as_subelement = true #default value
|
|
85
|
+
@filename = filename
|
|
86
|
+
@indent = indent
|
|
87
|
+
|
|
88
|
+
@doc = XML::Document.new()
|
|
89
|
+
@doc.root = XML::Node.new('phyloxml')
|
|
90
|
+
@root = @doc.root
|
|
91
|
+
@root['xmlns:xsi'] = 'http://www.w3.org/2001/XMLSchema-instance'
|
|
92
|
+
@root['xsi:schemaLocation'] = SCHEMA_LOCATION
|
|
93
|
+
@root['xmlns'] = 'http://www.phyloxml.org'
|
|
94
|
+
|
|
95
|
+
#@todo save encoding to be UTF-8. (However it is the default one).
|
|
96
|
+
#it gives error NameError: uninitialized constant LibXML::XML::Encoding
|
|
97
|
+
#@doc.encoding = XML::Encoding::UTF_8
|
|
98
|
+
|
|
99
|
+
@doc.save(@filename, :indent => true)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
#
|
|
103
|
+
# Write a tree to a file in phyloxml format.
|
|
104
|
+
#
|
|
105
|
+
# require 'Bio'
|
|
106
|
+
# writer = Bio::PhyloXML::Writer.new
|
|
107
|
+
# writer.write(tree)
|
|
108
|
+
#
|
|
109
|
+
def write(tree)
|
|
110
|
+
@root << phylogeny = XML::Node.new('phylogeny')
|
|
111
|
+
|
|
112
|
+
PhyloXML::Writer.generate_xml(phylogeny, tree, [
|
|
113
|
+
[:attr, 'rooted'],
|
|
114
|
+
[:simple, 'name', tree.name],
|
|
115
|
+
[:complex, 'id', tree.phylogeny_id],
|
|
116
|
+
[:simple, 'description', tree.description],
|
|
117
|
+
[:simple, 'date', tree.date],
|
|
118
|
+
[:objarr, 'confidence', 'confidences']])
|
|
119
|
+
|
|
120
|
+
root_clade = tree.root.to_xml(nil, @write_branch_length_as_subelement)
|
|
121
|
+
|
|
122
|
+
phylogeny << root_clade
|
|
123
|
+
|
|
124
|
+
tree.children(tree.root).each do |node|
|
|
125
|
+
root_clade << node_to_xml(tree, node, tree.root)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
Bio::PhyloXML::Writer::generate_xml(phylogeny, tree, [
|
|
129
|
+
[:objarr, 'clade_relation', 'clade_relations'],
|
|
130
|
+
[:objarr, 'sequence_relation', 'sequence_relations'],
|
|
131
|
+
[:objarr, 'property', 'properties']] )
|
|
132
|
+
|
|
133
|
+
@doc.save(@filename, :indent => @indent)
|
|
134
|
+
end #writer#write
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
#
|
|
138
|
+
# PhyloXML Schema allows to save data in different xml format after all
|
|
139
|
+
# phylogeny elements. This method is to write these additional data.
|
|
140
|
+
#
|
|
141
|
+
# parser = PhyloXML::Parser.open('phyloxml_examples.xml')
|
|
142
|
+
# writer = PhyloXML::Writer.new('new.xml')
|
|
143
|
+
#
|
|
144
|
+
# parser.each do |tree|
|
|
145
|
+
# writer.write(tree)
|
|
146
|
+
# end
|
|
147
|
+
#
|
|
148
|
+
# # When all the trees are read in by the parser, whats left is saved at
|
|
149
|
+
# # PhyloXML::Parser#other
|
|
150
|
+
# writer.write(parser.other)
|
|
151
|
+
#
|
|
152
|
+
|
|
153
|
+
def write_other(other_arr)
|
|
154
|
+
other_arr.each do |other_obj|
|
|
155
|
+
@root << other_obj.to_xml
|
|
156
|
+
end
|
|
157
|
+
@doc.save(@filename, :indent => @indent)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
#class method
|
|
161
|
+
|
|
162
|
+
#
|
|
163
|
+
# Used by to_xml methods of PhyloXML element classes. Generally not to be
|
|
164
|
+
# invoked directly.
|
|
165
|
+
#
|
|
166
|
+
def self.generate_xml(root, elem, subelement_array)
|
|
167
|
+
#example usage: generate_xml(node, self, [[ :complex,'accession', ], [:simple, 'name', @name], [:simple, 'location', @location]])
|
|
168
|
+
subelement_array.each do |subelem|
|
|
169
|
+
if subelem[0] == :simple
|
|
170
|
+
root << XML::Node.new(subelem[1], subelem[2].to_s) if subelem[2] != nil and not subelem[2].to_s.empty?
|
|
171
|
+
|
|
172
|
+
elsif subelem[0] == :complex
|
|
173
|
+
root << subelem[2].send("to_xml") if subelem[2] != nil
|
|
174
|
+
|
|
175
|
+
elsif subelem[0] == :pattern
|
|
176
|
+
#seq, self, [[:pattern, 'symbol', @symbol, "\S{1,10}"]
|
|
177
|
+
if subelem[2] != nil
|
|
178
|
+
if subelem[2] =~ subelem[3]
|
|
179
|
+
root << XML::Node.new(subelem[1], subelem[2])
|
|
180
|
+
else
|
|
181
|
+
raise "#{subelem[2]} is not a valid value of #{subelem[1]}. It should follow pattern #{subelem[3]}"
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
elsif subelem[0] == :objarr
|
|
186
|
+
#[:objarr, 'annotation', 'annotations']])
|
|
187
|
+
obj_arr = elem.send(subelem[2])
|
|
188
|
+
obj_arr.each do |arr_elem|
|
|
189
|
+
root << arr_elem.to_xml
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
elsif subelem[0] == :simplearr
|
|
193
|
+
# [:simplearr, 'common_name', @common_names]
|
|
194
|
+
subelem[2].each do |elem_val|
|
|
195
|
+
root << XML::Node.new(subelem[1], elem_val)
|
|
196
|
+
end
|
|
197
|
+
elsif subelem[0] == :attr
|
|
198
|
+
#[:attr, 'rooted']
|
|
199
|
+
obj = elem.send(subelem[1])
|
|
200
|
+
if obj != nil
|
|
201
|
+
root[subelem[1]] = obj.to_s
|
|
202
|
+
end
|
|
203
|
+
else
|
|
204
|
+
raise "Not supported type of element by method generate_xml."
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
return root
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
private
|
|
211
|
+
|
|
212
|
+
def node_to_xml(tree, node, parent)
|
|
213
|
+
edge = tree.get_edge(parent, node)
|
|
214
|
+
branch_length = edge.distance
|
|
215
|
+
|
|
216
|
+
clade = node.to_xml(branch_length, @write_branch_length_as_subelement)
|
|
217
|
+
|
|
218
|
+
tree.children(node).each do |new_node|
|
|
219
|
+
clade << node_to_xml(tree, new_node, node)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
return clade
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
end
|
|
228
|
+
end
|
data/lib/bio/db/prosite.rb
CHANGED
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
# = bio/db/prosite.rb - PROSITE database class
|
|
3
3
|
#
|
|
4
4
|
# Copyright:: Copyright (C) 2001 Toshiaki Katayama <k@bioruby.org>
|
|
5
|
-
#
|
|
5
|
+
# License:: The Ruby License
|
|
6
6
|
#
|
|
7
|
-
# $Id
|
|
7
|
+
# $Id:$
|
|
8
8
|
#
|
|
9
9
|
|
|
10
10
|
require 'bio/db'
|
|
@@ -502,96 +502,3 @@ end # PROSITE
|
|
|
502
502
|
|
|
503
503
|
end # Bio
|
|
504
504
|
|
|
505
|
-
|
|
506
|
-
if __FILE__ == $0
|
|
507
|
-
|
|
508
|
-
begin
|
|
509
|
-
require 'pp'
|
|
510
|
-
alias p pp
|
|
511
|
-
rescue LoadError
|
|
512
|
-
end
|
|
513
|
-
|
|
514
|
-
ps = Bio::PROSITE.new(ARGF.read)
|
|
515
|
-
|
|
516
|
-
list = %w(
|
|
517
|
-
name
|
|
518
|
-
division
|
|
519
|
-
ac
|
|
520
|
-
entry_id
|
|
521
|
-
dt
|
|
522
|
-
date
|
|
523
|
-
de
|
|
524
|
-
definition
|
|
525
|
-
pa
|
|
526
|
-
pattern
|
|
527
|
-
ma
|
|
528
|
-
profile
|
|
529
|
-
ru
|
|
530
|
-
rule
|
|
531
|
-
nr
|
|
532
|
-
statistics
|
|
533
|
-
release
|
|
534
|
-
swissprot_release_number
|
|
535
|
-
swissprot_release_sequences
|
|
536
|
-
total
|
|
537
|
-
total_hits
|
|
538
|
-
total_sequences
|
|
539
|
-
positive
|
|
540
|
-
positive_hits
|
|
541
|
-
positive_sequences
|
|
542
|
-
unknown
|
|
543
|
-
unknown_hits
|
|
544
|
-
unknown_sequences
|
|
545
|
-
false_pos
|
|
546
|
-
false_positive_hits
|
|
547
|
-
false_positive_sequences
|
|
548
|
-
false_neg
|
|
549
|
-
false_negative_hits
|
|
550
|
-
partial
|
|
551
|
-
cc
|
|
552
|
-
comment
|
|
553
|
-
max_repeat
|
|
554
|
-
site
|
|
555
|
-
skip_flag
|
|
556
|
-
dr
|
|
557
|
-
sp_xref
|
|
558
|
-
pdb_xref
|
|
559
|
-
pdoc_xref
|
|
560
|
-
)
|
|
561
|
-
|
|
562
|
-
list.each do |method|
|
|
563
|
-
puts ">>> #{method}"
|
|
564
|
-
p ps.send(method)
|
|
565
|
-
end
|
|
566
|
-
|
|
567
|
-
puts ">>> taxon_range"
|
|
568
|
-
p ps.taxon_range
|
|
569
|
-
puts ">>> taxon_range(expand)"
|
|
570
|
-
p ps.taxon_range(true)
|
|
571
|
-
|
|
572
|
-
puts ">>> list_truepositive"
|
|
573
|
-
p ps.list_truepositive
|
|
574
|
-
puts ">>> list_truepositive(by_name)"
|
|
575
|
-
p ps.list_truepositive(true)
|
|
576
|
-
|
|
577
|
-
puts ">>> list_falsenegative"
|
|
578
|
-
p ps.list_falsenegative
|
|
579
|
-
puts ">>> list_falsenegative(by_name)"
|
|
580
|
-
p ps.list_falsenegative(true)
|
|
581
|
-
|
|
582
|
-
puts ">>> list_falsepositive"
|
|
583
|
-
p ps.list_falsepositive
|
|
584
|
-
puts ">>> list_falsepositive(by_name)"
|
|
585
|
-
p ps.list_falsepositive(true)
|
|
586
|
-
|
|
587
|
-
puts ">>> list_potentialhit"
|
|
588
|
-
p ps.list_potentialhit
|
|
589
|
-
puts ">>> list_potentialhit(by_name)"
|
|
590
|
-
p ps.list_potentialhit(true)
|
|
591
|
-
|
|
592
|
-
puts ">>> list_unknown"
|
|
593
|
-
p ps.list_unknown
|
|
594
|
-
puts ">>> list_unknown(by_name)"
|
|
595
|
-
p ps.list_unknown(true)
|
|
596
|
-
|
|
597
|
-
end
|
data/lib/bio/db/rebase.rb
CHANGED
|
@@ -40,7 +40,7 @@ module Bio
|
|
|
40
40
|
# To easily get started with the data you can simply type this command
|
|
41
41
|
# at your shell prompt:
|
|
42
42
|
#
|
|
43
|
-
# % wget ftp://ftp.neb.com/pub/rebase/
|
|
43
|
+
# % wget "ftp://ftp.neb.com/pub/rebase/emboss_*"
|
|
44
44
|
#
|
|
45
45
|
#
|
|
46
46
|
# = Usage
|
|
@@ -195,7 +195,7 @@ class REBASE
|
|
|
195
195
|
# * _none_
|
|
196
196
|
# *Returns*:: +Array+ sorted enzyme names
|
|
197
197
|
def enzymes
|
|
198
|
-
@
|
|
198
|
+
@enzyme_names
|
|
199
199
|
end
|
|
200
200
|
|
|
201
201
|
# Check if supplied name is the name of an available enzyme
|
|
@@ -205,10 +205,7 @@ class REBASE
|
|
|
205
205
|
# * +name+: Enzyme name
|
|
206
206
|
# *Returns*:: +true/false+
|
|
207
207
|
def enzyme_name?(name)
|
|
208
|
-
|
|
209
|
-
return true if e.downcase == name.downcase
|
|
210
|
-
end
|
|
211
|
-
return false
|
|
208
|
+
@enzyme_names_downcased.include?(name.downcase)
|
|
212
209
|
end
|
|
213
210
|
|
|
214
211
|
# Save the current data
|
|
@@ -290,6 +287,8 @@ class REBASE
|
|
|
290
287
|
d.references = []
|
|
291
288
|
end
|
|
292
289
|
|
|
290
|
+
@enzyme_names = @data.keys.sort
|
|
291
|
+
@enzyme_names_downcased = @enzyme_names.map{|a| a.downcase}
|
|
293
292
|
setup_enzyme_and_reference_association
|
|
294
293
|
end
|
|
295
294
|
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/db/sanger_chromatogram/abif.rb - Abif class
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2009 Anthony Underwood <anthony.underwood@hpa.org.uk>, <email2ants@gmail.com>
|
|
5
|
+
# License:: The Ruby License
|
|
6
|
+
#
|
|
7
|
+
|
|
8
|
+
require 'bio/db/sanger_chromatogram/chromatogram'
|
|
9
|
+
|
|
10
|
+
module Bio
|
|
11
|
+
# == Description
|
|
12
|
+
#
|
|
13
|
+
# This class inherits from the SangerChromatogram superclass. It captures the information contained
|
|
14
|
+
# within an ABIF format chromatogram file generated by DNA sequencing. See the SangerChromatogram class
|
|
15
|
+
# for usage.
|
|
16
|
+
class Abif < SangerChromatogram
|
|
17
|
+
DATA_TYPES = { 1 => 'byte', 2 => 'char', 3 => 'word', 4 => 'short', 5 => 'long',
|
|
18
|
+
7 => 'float', 8 => 'double', 10 => 'date', 11 => 'time', 18 => 'pString',
|
|
19
|
+
19 => 'cString', 12 => 'thumb', 13 => 'bool', 6 => 'rational', 9 => 'BCD',
|
|
20
|
+
14 => 'point', 15 => 'rect', 16 => 'vPoint', 17 => 'vRect', 20 => 'tag',
|
|
21
|
+
128 => 'deltaComp', 256 => 'LZWComp', 384 => 'deltaLZW', 1024 => 'user'} # User defined data types have tags numbers >= 1024
|
|
22
|
+
|
|
23
|
+
PACK_TYPES = { 'byte' => 'C', 'char' => 'c', 'word' => 'n', 'short' => 'n', 'long' => 'N',
|
|
24
|
+
'date' => 'nCC', 'time' => 'CCCC', 'pString' => 'CA*', 'cString' => 'Z*',
|
|
25
|
+
'float' => 'g', 'double' => 'G',
|
|
26
|
+
'bool' => 'C', 'thumb' => 'NNCC', 'rational' => 'NN', 'point' => 'nn',
|
|
27
|
+
'rect' => 'nnnn', 'vPoint' => 'NN', 'vRect' => 'NNNN', 'tag' => 'NN'} # Specifies how to pack each data type
|
|
28
|
+
|
|
29
|
+
#sequence attributes
|
|
30
|
+
|
|
31
|
+
# The sample title as entered when sequencing the sample (String)
|
|
32
|
+
attr_accessor :sample_title
|
|
33
|
+
# The chemistry used when sequencing e.g Dye terminators => 'term.' (String)
|
|
34
|
+
attr_accessor :chemistry
|
|
35
|
+
|
|
36
|
+
# see SangerChromatogram class for how to create an Abif object and its usage
|
|
37
|
+
def initialize(string)
|
|
38
|
+
header = string.slice(0,128)
|
|
39
|
+
# read in header info
|
|
40
|
+
@chromatogram_type, @version, @directory_tag_name, @directory_tag_number, @directory_element_type, @directory_element_size, @directory_number_of_elements, @directory_data_size, @directory_data_offset, @directory_data_handle= header.unpack("a4 n a4 N n n N N N N")
|
|
41
|
+
@version = @version/100.to_f
|
|
42
|
+
get_directory_entries(string)
|
|
43
|
+
# get sequence
|
|
44
|
+
@sequence = @directory_entries["PBAS"][1].data.map{|char| char.chr.downcase}.join("")
|
|
45
|
+
#get peak indices
|
|
46
|
+
@peak_indices = @directory_entries["PLOC"][1].data
|
|
47
|
+
#get qualities
|
|
48
|
+
@qualities = @directory_entries["PCON"][1].data
|
|
49
|
+
# get sample title
|
|
50
|
+
@sample_title = @directory_entries["SMPL"][1].data
|
|
51
|
+
@directory_entries["PDMF"].size > 2 ? @dye_mobility = @directory_entries["PDMF"][2].data : @dye_mobility = @directory_entries["PDMF"][1].data
|
|
52
|
+
#get trace data
|
|
53
|
+
@chemistry = @directory_entries["phCH"][1].data
|
|
54
|
+
base_order = @directory_entries["FWO_"][1].data.map{|char| char.chr.downcase}
|
|
55
|
+
(9..12).each do |data_index|
|
|
56
|
+
self.instance_variable_set("@#{base_order[data_index-9]}trace", @directory_entries["DATA"][data_index].data)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Returns the data for the name.
|
|
62
|
+
# If not found, returns nil.
|
|
63
|
+
# ---
|
|
64
|
+
# *Arguments*:
|
|
65
|
+
# * (required) _name_: (String) name of the data
|
|
66
|
+
# * (required) <em>tag_number</em>: (Integer) tag number (default 1)
|
|
67
|
+
# *Returns*:: any data type or nil
|
|
68
|
+
def data(name, tag_number = 1)
|
|
69
|
+
d = @directory_entries[name]
|
|
70
|
+
d ? d[tag_number].data : nil
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
def get_directory_entries(string)
|
|
75
|
+
@directory_entries = Hash.new
|
|
76
|
+
offset = @directory_data_offset
|
|
77
|
+
@directory_number_of_elements.times do
|
|
78
|
+
entry = DirectoryEntry.new
|
|
79
|
+
entry_fields = string.slice(offset, @directory_element_size)
|
|
80
|
+
entry.name, entry.tag_number, entry.element_type, entry.element_size, entry.number_of_elements, entry.data_size, entry.data_offset = entry_fields.unpack("a4 N n n N N N")
|
|
81
|
+
# populate the entry with the data it refers to
|
|
82
|
+
if entry.data_size > 4
|
|
83
|
+
get_entry_data(entry, string)
|
|
84
|
+
else
|
|
85
|
+
get_entry_data(entry, entry_fields)
|
|
86
|
+
end
|
|
87
|
+
if @directory_entries.has_key?(entry.name)
|
|
88
|
+
@directory_entries[entry.name][entry.tag_number] = entry
|
|
89
|
+
else
|
|
90
|
+
@directory_entries[entry.name] = Array.new
|
|
91
|
+
@directory_entries[entry.name][entry.tag_number] = entry
|
|
92
|
+
end
|
|
93
|
+
offset += @directory_element_size
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
def get_entry_data(entry, string)
|
|
97
|
+
if entry.data_size > 4
|
|
98
|
+
raw_data = string.slice(entry.data_offset, entry.data_size)
|
|
99
|
+
else
|
|
100
|
+
raw_data = string.slice(20,4)
|
|
101
|
+
end
|
|
102
|
+
if entry.element_type > 1023
|
|
103
|
+
# user defined data: not processed as yet by this bioruby module
|
|
104
|
+
entry.data = raw_data
|
|
105
|
+
else
|
|
106
|
+
pack_type = PACK_TYPES[DATA_TYPES[entry.element_type]]
|
|
107
|
+
pack_type.match(/\*/) ? unpack_string = pack_type : unpack_string = "#{pack_type}#{entry.number_of_elements}"
|
|
108
|
+
entry.data = raw_data.unpack(unpack_string)
|
|
109
|
+
if pack_type == "CA*" # pascal string where the first byte is a charcter count and should therefore be removed
|
|
110
|
+
entry.data.shift
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
class DirectoryEntry
|
|
116
|
+
attr_accessor :name, :tag_number, :element_type, :element_size, :number_of_elements, :data_size, :data_offset
|
|
117
|
+
attr_accessor :data
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|