bio 1.3.1 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +2105 -3728
- data/KNOWN_ISSUES.rdoc +35 -3
- data/README.rdoc +8 -2
- data/RELEASE_NOTES.rdoc +166 -0
- data/bin/bioruby +4 -1
- data/bioruby.gemspec +146 -1
- data/bioruby.gemspec.erb +3 -1
- data/doc/ChangeLog-before-1.3.1 +3961 -0
- data/doc/Tutorial.rd +154 -22
- data/doc/Tutorial.rd.html +125 -68
- data/lib/bio.rb +21 -6
- data/lib/bio/appl/bl2seq/report.rb +11 -202
- data/lib/bio/appl/blast/format0.rb +0 -193
- data/lib/bio/appl/blast/report.rb +2 -147
- data/lib/bio/appl/blast/wublast.rb +0 -208
- data/lib/bio/appl/fasta.rb +4 -19
- data/lib/bio/appl/fasta/format10.rb +0 -14
- data/lib/bio/appl/genscan/report.rb +0 -176
- data/lib/bio/appl/hmmer.rb +1 -15
- data/lib/bio/appl/hmmer/report.rb +0 -100
- data/lib/bio/appl/meme/mast.rb +156 -0
- data/lib/bio/appl/meme/mast/report.rb +91 -0
- data/lib/bio/appl/meme/motif.rb +48 -0
- data/lib/bio/appl/psort.rb +0 -111
- data/lib/bio/appl/psort/report.rb +1 -45
- data/lib/bio/appl/pts1.rb +2 -4
- data/lib/bio/appl/sosui/report.rb +5 -54
- data/lib/bio/appl/targetp/report.rb +1 -104
- data/lib/bio/appl/tmhmm/report.rb +0 -36
- data/lib/bio/command.rb +94 -10
- data/lib/bio/data/aa.rb +1 -77
- data/lib/bio/data/codontable.rb +1 -95
- data/lib/bio/data/na.rb +1 -26
- data/lib/bio/db/aaindex.rb +1 -38
- data/lib/bio/db/fasta.rb +1 -134
- data/lib/bio/db/fasta/format_qual.rb +204 -0
- data/lib/bio/db/fasta/qual.rb +102 -0
- data/lib/bio/db/fastq.rb +645 -0
- data/lib/bio/db/fastq/fastq_to_biosequence.rb +40 -0
- data/lib/bio/db/fastq/format_fastq.rb +175 -0
- data/lib/bio/db/genbank/genbank.rb +1 -86
- data/lib/bio/db/gff.rb +0 -17
- data/lib/bio/db/go.rb +4 -72
- data/lib/bio/db/kegg/common.rb +112 -0
- data/lib/bio/db/kegg/compound.rb +29 -20
- data/lib/bio/db/kegg/drug.rb +74 -34
- data/lib/bio/db/kegg/enzyme.rb +26 -5
- data/lib/bio/db/kegg/genes.rb +128 -15
- data/lib/bio/db/kegg/genome.rb +3 -41
- data/lib/bio/db/kegg/glycan.rb +19 -24
- data/lib/bio/db/kegg/orthology.rb +16 -56
- data/lib/bio/db/kegg/reaction.rb +81 -28
- data/lib/bio/db/kegg/taxonomy.rb +1 -52
- data/lib/bio/db/litdb.rb +1 -16
- data/lib/bio/db/phyloxml/phyloxml.xsd +582 -0
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +1174 -0
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +954 -0
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +228 -0
- data/lib/bio/db/prosite.rb +2 -95
- data/lib/bio/db/rebase.rb +5 -6
- data/lib/bio/db/sanger_chromatogram/abif.rb +120 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +133 -0
- data/lib/bio/db/sanger_chromatogram/chromatogram_to_biosequence.rb +32 -0
- data/lib/bio/db/sanger_chromatogram/scf.rb +210 -0
- data/lib/bio/io/das.rb +0 -44
- data/lib/bio/io/ddbjxml.rb +1 -181
- data/lib/bio/io/flatfile.rb +1 -7
- data/lib/bio/io/flatfile/autodetection.rb +6 -0
- data/lib/bio/io/keggapi.rb +0 -442
- data/lib/bio/io/ncbirest.rb +130 -132
- data/lib/bio/io/ncbisoap.rb +2 -1
- data/lib/bio/io/pubmed.rb +0 -88
- data/lib/bio/location.rb +0 -73
- data/lib/bio/pathway.rb +0 -171
- data/lib/bio/sequence.rb +18 -1
- data/lib/bio/sequence/adapter.rb +3 -0
- data/lib/bio/sequence/format.rb +16 -0
- data/lib/bio/sequence/quality_score.rb +205 -0
- data/lib/bio/tree.rb +70 -5
- data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -2
- data/lib/bio/util/sirna.rb +1 -23
- data/lib/bio/version.rb +1 -1
- data/sample/demo_aaindex.rb +67 -0
- data/sample/demo_aminoacid.rb +101 -0
- data/sample/demo_bl2seq_report.rb +220 -0
- data/sample/demo_blast_report.rb +285 -0
- data/sample/demo_codontable.rb +119 -0
- data/sample/demo_das.rb +105 -0
- data/sample/demo_ddbjxml.rb +212 -0
- data/sample/demo_fasta_remote.rb +51 -0
- data/sample/demo_fastaformat.rb +105 -0
- data/sample/demo_genbank.rb +132 -0
- data/sample/demo_genscan_report.rb +202 -0
- data/sample/demo_gff1.rb +49 -0
- data/sample/demo_go.rb +98 -0
- data/sample/demo_hmmer_report.rb +149 -0
- data/sample/demo_kegg_compound.rb +57 -0
- data/sample/demo_kegg_drug.rb +65 -0
- data/sample/demo_kegg_genome.rb +74 -0
- data/sample/demo_kegg_glycan.rb +72 -0
- data/sample/demo_kegg_orthology.rb +62 -0
- data/sample/demo_kegg_reaction.rb +66 -0
- data/sample/demo_kegg_taxonomy.rb +92 -0
- data/sample/demo_keggapi.rb +502 -0
- data/sample/demo_litdb.rb +42 -0
- data/sample/demo_locations.rb +99 -0
- data/sample/demo_ncbi_rest.rb +130 -0
- data/sample/demo_nucleicacid.rb +49 -0
- data/sample/demo_pathway.rb +196 -0
- data/sample/demo_prosite.rb +120 -0
- data/sample/demo_psort.rb +138 -0
- data/sample/demo_psort_report.rb +70 -0
- data/sample/demo_pubmed.rb +118 -0
- data/sample/demo_sirna.rb +63 -0
- data/sample/demo_sosui_report.rb +89 -0
- data/sample/demo_targetp_report.rb +135 -0
- data/sample/demo_tmhmm_report.rb +68 -0
- data/sample/pmfetch.rb +13 -4
- data/sample/pmsearch.rb +15 -4
- data/sample/test_phyloxml_big.rb +205 -0
- data/test/bioruby_test_helper.rb +61 -0
- data/test/data/KEGG/1.1.1.1.enzyme +935 -0
- data/test/data/KEGG/C00025.compound +102 -0
- data/test/data/KEGG/D00063.drug +104 -0
- data/test/data/KEGG/G00024.glycan +47 -0
- data/test/data/KEGG/G01366.glycan +18 -0
- data/test/data/KEGG/K02338.orthology +902 -0
- data/test/data/KEGG/R00006.reaction +14 -0
- data/test/data/fastq/README.txt +109 -0
- data/test/data/fastq/error_diff_ids.fastq +20 -0
- data/test/data/fastq/error_double_qual.fastq +22 -0
- data/test/data/fastq/error_double_seq.fastq +22 -0
- data/test/data/fastq/error_long_qual.fastq +20 -0
- data/test/data/fastq/error_no_qual.fastq +20 -0
- data/test/data/fastq/error_qual_del.fastq +20 -0
- data/test/data/fastq/error_qual_escape.fastq +20 -0
- data/test/data/fastq/error_qual_null.fastq +0 -0
- data/test/data/fastq/error_qual_space.fastq +21 -0
- data/test/data/fastq/error_qual_tab.fastq +21 -0
- data/test/data/fastq/error_qual_unit_sep.fastq +20 -0
- data/test/data/fastq/error_qual_vtab.fastq +20 -0
- data/test/data/fastq/error_short_qual.fastq +20 -0
- data/test/data/fastq/error_spaces.fastq +20 -0
- data/test/data/fastq/error_tabs.fastq +21 -0
- data/test/data/fastq/error_trunc_at_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_at_qual.fastq +19 -0
- data/test/data/fastq/error_trunc_at_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_plus.fastq +19 -0
- data/test/data/fastq/error_trunc_in_qual.fastq +20 -0
- data/test/data/fastq/error_trunc_in_seq.fastq +18 -0
- data/test/data/fastq/error_trunc_in_title.fastq +17 -0
- data/test/data/fastq/illumina_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/illumina_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/illumina_full_range_original_illumina.fastq +8 -0
- data/test/data/fastq/longreads_as_illumina.fastq +40 -0
- data/test/data/fastq/longreads_as_sanger.fastq +40 -0
- data/test/data/fastq/longreads_as_solexa.fastq +40 -0
- data/test/data/fastq/longreads_original_sanger.fastq +120 -0
- data/test/data/fastq/misc_dna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_dna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_dna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_dna_original_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_illumina.fastq +16 -0
- data/test/data/fastq/misc_rna_as_sanger.fastq +16 -0
- data/test/data/fastq/misc_rna_as_solexa.fastq +16 -0
- data/test/data/fastq/misc_rna_original_sanger.fastq +16 -0
- data/test/data/fastq/sanger_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/sanger_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/sanger_full_range_original_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_illumina.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_sanger.fastq +8 -0
- data/test/data/fastq/solexa_full_range_as_solexa.fastq +8 -0
- data/test/data/fastq/solexa_full_range_original_solexa.fastq +8 -0
- data/test/data/fastq/wrapping_as_illumina.fastq +12 -0
- data/test/data/fastq/wrapping_as_sanger.fastq +12 -0
- data/test/data/fastq/wrapping_as_solexa.fastq +12 -0
- data/test/data/fastq/wrapping_original_sanger.fastq +24 -0
- data/test/data/meme/db +0 -0
- data/test/data/meme/mast +0 -0
- data/test/data/meme/mast.out +13 -0
- data/test/data/meme/meme.out +3 -0
- data/test/data/phyloxml/apaf.xml +666 -0
- data/test/data/phyloxml/bcl_2.xml +2097 -0
- data/test/data/phyloxml/made_up.xml +144 -0
- data/test/data/phyloxml/ncbi_taxonomy_mollusca_short.xml +65 -0
- data/test/data/phyloxml/phyloxml_examples.xml +415 -0
- data/test/data/sanger_chromatogram/test_chromatogram_abif.ab1 +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v2.scf +0 -0
- data/test/data/sanger_chromatogram/test_chromatogram_scf_v3.scf +0 -0
- data/test/functional/bio/appl/test_pts1.rb +7 -5
- data/test/functional/bio/io/test_ensembl.rb +4 -3
- data/test/functional/bio/io/test_pubmed.rb +9 -3
- data/test/functional/bio/io/test_soapwsdl.rb +5 -4
- data/test/functional/bio/io/test_togows.rb +5 -4
- data/test/functional/bio/sequence/test_output_embl.rb +6 -4
- data/test/functional/bio/test_command.rb +54 -5
- data/test/runner.rb +5 -3
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +4 -2
- data/test/unit/bio/appl/blast/test_report.rb +5 -4
- data/test/unit/bio/appl/blast/test_rpsblast.rb +5 -4
- data/test/unit/bio/appl/gcg/test_msf.rb +5 -5
- data/test/unit/bio/appl/genscan/test_report.rb +8 -9
- data/test/unit/bio/appl/hmmer/test_report.rb +5 -4
- data/test/unit/bio/appl/iprscan/test_report.rb +6 -5
- data/test/unit/bio/appl/mafft/test_report.rb +6 -5
- data/test/unit/bio/appl/meme/mast/test_report.rb +46 -0
- data/test/unit/bio/appl/meme/test_mast.rb +103 -0
- data/test/unit/bio/appl/meme/test_motif.rb +38 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +5 -4
- data/test/unit/bio/appl/paml/codeml/test_report.rb +5 -4
- data/test/unit/bio/appl/paml/test_codeml.rb +5 -4
- data/test/unit/bio/appl/sim4/test_report.rb +5 -4
- data/test/unit/bio/appl/sosui/test_report.rb +6 -5
- data/test/unit/bio/appl/targetp/test_report.rb +5 -3
- data/test/unit/bio/appl/test_blast.rb +5 -4
- data/test/unit/bio/appl/test_fasta.rb +4 -2
- data/test/unit/bio/appl/test_pts1.rb +4 -2
- data/test/unit/bio/appl/tmhmm/test_report.rb +6 -5
- data/test/unit/bio/data/test_aa.rb +5 -3
- data/test/unit/bio/data/test_codontable.rb +5 -4
- data/test/unit/bio/data/test_na.rb +5 -3
- data/test/unit/bio/db/biosql/tc_biosql.rb +5 -1
- data/test/unit/bio/db/embl/test_common.rb +4 -2
- data/test/unit/bio/db/embl/test_embl.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_rel89.rb +6 -6
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +7 -8
- data/test/unit/bio/db/embl/test_sptr.rb +6 -8
- data/test/unit/bio/db/embl/test_uniprot.rb +6 -5
- data/test/unit/bio/db/fasta/test_format_qual.rb +346 -0
- data/test/unit/bio/db/kegg/test_compound.rb +146 -0
- data/test/unit/bio/db/kegg/test_drug.rb +194 -0
- data/test/unit/bio/db/kegg/test_enzyme.rb +241 -0
- data/test/unit/bio/db/kegg/test_genes.rb +32 -4
- data/test/unit/bio/db/kegg/test_glycan.rb +260 -0
- data/test/unit/bio/db/kegg/test_orthology.rb +50 -0
- data/test/unit/bio/db/kegg/test_reaction.rb +96 -0
- data/test/unit/bio/db/pdb/test_pdb.rb +4 -2
- data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +76 -0
- data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +98 -0
- data/test/unit/bio/db/test_aaindex.rb +6 -6
- data/test/unit/bio/db/test_fasta.rb +5 -46
- data/test/unit/bio/db/test_fastq.rb +829 -0
- data/test/unit/bio/db/test_gff.rb +4 -2
- data/test/unit/bio/db/test_lasergene.rb +7 -5
- data/test/unit/bio/db/test_medline.rb +4 -2
- data/test/unit/bio/db/test_newick.rb +6 -6
- data/test/unit/bio/db/test_nexus.rb +4 -2
- data/test/unit/bio/db/test_phyloxml.rb +769 -0
- data/test/unit/bio/db/test_phyloxml_writer.rb +328 -0
- data/test/unit/bio/db/test_prosite.rb +6 -5
- data/test/unit/bio/db/test_qual.rb +63 -0
- data/test/unit/bio/db/test_rebase.rb +5 -3
- data/test/unit/bio/db/test_soft.rb +7 -6
- data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -7
- data/test/unit/bio/io/flatfile/test_buffer.rb +6 -5
- data/test/unit/bio/io/flatfile/test_splitter.rb +4 -4
- data/test/unit/bio/io/test_ddbjxml.rb +4 -3
- data/test/unit/bio/io/test_ensembl.rb +5 -3
- data/test/unit/bio/io/test_fastacmd.rb +4 -3
- data/test/unit/bio/io/test_flatfile.rb +6 -5
- data/test/unit/bio/io/test_soapwsdl.rb +4 -3
- data/test/unit/bio/io/test_togows.rb +4 -2
- data/test/unit/bio/sequence/test_aa.rb +5 -3
- data/test/unit/bio/sequence/test_common.rb +4 -2
- data/test/unit/bio/sequence/test_compat.rb +4 -2
- data/test/unit/bio/sequence/test_dblink.rb +5 -3
- data/test/unit/bio/sequence/test_na.rb +4 -2
- data/test/unit/bio/sequence/test_quality_score.rb +330 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +5 -3
- data/test/unit/bio/test_alignment.rb +5 -3
- data/test/unit/bio/test_command.rb +4 -3
- data/test/unit/bio/test_db.rb +5 -3
- data/test/unit/bio/test_feature.rb +4 -2
- data/test/unit/bio/test_location.rb +4 -2
- data/test/unit/bio/test_map.rb +5 -3
- data/test/unit/bio/test_pathway.rb +4 -2
- data/test/unit/bio/test_reference.rb +4 -2
- data/test/unit/bio/test_sequence.rb +5 -3
- data/test/unit/bio/test_shell.rb +5 -3
- data/test/unit/bio/test_tree.rb +6 -6
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +4 -2
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +17 -13
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +4 -2
- data/test/unit/bio/util/test_color_scheme.rb +5 -3
- data/test/unit/bio/util/test_contingency_table.rb +5 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +4 -2
- data/test/unit/bio/util/test_sirna.rb +6 -4
- metadata +147 -2
@@ -0,0 +1,228 @@
|
|
1
|
+
#
|
2
|
+
# = bio/db/phyloxml_writer.rb - PhyloXML writer
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2009
|
5
|
+
# Diana Jaunzeikare <latvianlinuxgirl@gmail.com>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# $Id:$
|
9
|
+
#
|
10
|
+
# == Description
|
11
|
+
#
|
12
|
+
# This file containts writer for PhyloXML.
|
13
|
+
#
|
14
|
+
# == Requirements
|
15
|
+
#
|
16
|
+
# Libxml2 XML parser is required. Install libxml-ruby bindings from
|
17
|
+
# http://libxml.rubyforge.org or
|
18
|
+
#
|
19
|
+
# gem install -r libxml-ruby
|
20
|
+
#
|
21
|
+
# == References
|
22
|
+
#
|
23
|
+
# * http://www.phyloxml.org
|
24
|
+
#
|
25
|
+
# * https://www.nescent.org/wg_phyloinformatics/PhyloSoC:PhyloXML_support_in_BioRuby
|
26
|
+
|
27
|
+
require 'libxml'
|
28
|
+
require 'bio/db/phyloxml/phyloxml_elements'
|
29
|
+
|
30
|
+
module Bio
|
31
|
+
|
32
|
+
module PhyloXML
|
33
|
+
|
34
|
+
# == Description
|
35
|
+
#
|
36
|
+
# Bio::PhyloXML::Writer is for writing phyloXML (version 1.10) format files.
|
37
|
+
#
|
38
|
+
# == Requirements
|
39
|
+
#
|
40
|
+
# Libxml2 XML parser is required. Install libxml-ruby bindings from
|
41
|
+
# http://libxml.rubyforge.org or
|
42
|
+
#
|
43
|
+
# gem install -r libxml-ruby
|
44
|
+
#
|
45
|
+
# == Usage
|
46
|
+
#
|
47
|
+
# require 'bio'
|
48
|
+
#
|
49
|
+
# # Create new phyloxml parser
|
50
|
+
# phyloxml = Bio::PhyloXML::Parser.open('example.xml')
|
51
|
+
#
|
52
|
+
# # Read in some trees from file
|
53
|
+
# tree1 = phyloxml.next_tree
|
54
|
+
# tree2 = phyloxml.next_tree
|
55
|
+
#
|
56
|
+
# # Create new phyloxml writer
|
57
|
+
# writer = Bio::PhyloXML::Writer.new('tree.xml')
|
58
|
+
#
|
59
|
+
# # Write tree to the file tree.xml
|
60
|
+
# writer.write(tree1)
|
61
|
+
#
|
62
|
+
# # Add another tree to the file
|
63
|
+
# writer.write(tree2)
|
64
|
+
#
|
65
|
+
# == References
|
66
|
+
#
|
67
|
+
# http://www.phyloxml.org/documentation/version_100/phyloxml.xsd.html
|
68
|
+
|
69
|
+
class Writer
|
70
|
+
|
71
|
+
include LibXML
|
72
|
+
|
73
|
+
SCHEMA_LOCATION = 'http://www.phyloxml.org http://www.phyloxml.org/1.10/phyloxml.xsd'
|
74
|
+
|
75
|
+
attr_accessor :write_branch_length_as_subelement
|
76
|
+
|
77
|
+
#
|
78
|
+
# Create new Writer object. As parameters provide filename of xml file
|
79
|
+
# you wish to create. Optional parameter is whether to indent or no.
|
80
|
+
# Default is true. By default branch_length is written as subelement of
|
81
|
+
# clade element.
|
82
|
+
#
|
83
|
+
def initialize(filename, indent=true)
|
84
|
+
@write_branch_length_as_subelement = true #default value
|
85
|
+
@filename = filename
|
86
|
+
@indent = indent
|
87
|
+
|
88
|
+
@doc = XML::Document.new()
|
89
|
+
@doc.root = XML::Node.new('phyloxml')
|
90
|
+
@root = @doc.root
|
91
|
+
@root['xmlns:xsi'] = 'http://www.w3.org/2001/XMLSchema-instance'
|
92
|
+
@root['xsi:schemaLocation'] = SCHEMA_LOCATION
|
93
|
+
@root['xmlns'] = 'http://www.phyloxml.org'
|
94
|
+
|
95
|
+
#@todo save encoding to be UTF-8. (However it is the default one).
|
96
|
+
#it gives error NameError: uninitialized constant LibXML::XML::Encoding
|
97
|
+
#@doc.encoding = XML::Encoding::UTF_8
|
98
|
+
|
99
|
+
@doc.save(@filename, :indent => true)
|
100
|
+
end
|
101
|
+
|
102
|
+
#
|
103
|
+
# Write a tree to a file in phyloxml format.
|
104
|
+
#
|
105
|
+
# require 'Bio'
|
106
|
+
# writer = Bio::PhyloXML::Writer.new
|
107
|
+
# writer.write(tree)
|
108
|
+
#
|
109
|
+
def write(tree)
|
110
|
+
@root << phylogeny = XML::Node.new('phylogeny')
|
111
|
+
|
112
|
+
PhyloXML::Writer.generate_xml(phylogeny, tree, [
|
113
|
+
[:attr, 'rooted'],
|
114
|
+
[:simple, 'name', tree.name],
|
115
|
+
[:complex, 'id', tree.phylogeny_id],
|
116
|
+
[:simple, 'description', tree.description],
|
117
|
+
[:simple, 'date', tree.date],
|
118
|
+
[:objarr, 'confidence', 'confidences']])
|
119
|
+
|
120
|
+
root_clade = tree.root.to_xml(nil, @write_branch_length_as_subelement)
|
121
|
+
|
122
|
+
phylogeny << root_clade
|
123
|
+
|
124
|
+
tree.children(tree.root).each do |node|
|
125
|
+
root_clade << node_to_xml(tree, node, tree.root)
|
126
|
+
end
|
127
|
+
|
128
|
+
Bio::PhyloXML::Writer::generate_xml(phylogeny, tree, [
|
129
|
+
[:objarr, 'clade_relation', 'clade_relations'],
|
130
|
+
[:objarr, 'sequence_relation', 'sequence_relations'],
|
131
|
+
[:objarr, 'property', 'properties']] )
|
132
|
+
|
133
|
+
@doc.save(@filename, :indent => @indent)
|
134
|
+
end #writer#write
|
135
|
+
|
136
|
+
|
137
|
+
#
|
138
|
+
# PhyloXML Schema allows to save data in different xml format after all
|
139
|
+
# phylogeny elements. This method is to write these additional data.
|
140
|
+
#
|
141
|
+
# parser = PhyloXML::Parser.open('phyloxml_examples.xml')
|
142
|
+
# writer = PhyloXML::Writer.new('new.xml')
|
143
|
+
#
|
144
|
+
# parser.each do |tree|
|
145
|
+
# writer.write(tree)
|
146
|
+
# end
|
147
|
+
#
|
148
|
+
# # When all the trees are read in by the parser, whats left is saved at
|
149
|
+
# # PhyloXML::Parser#other
|
150
|
+
# writer.write(parser.other)
|
151
|
+
#
|
152
|
+
|
153
|
+
def write_other(other_arr)
|
154
|
+
other_arr.each do |other_obj|
|
155
|
+
@root << other_obj.to_xml
|
156
|
+
end
|
157
|
+
@doc.save(@filename, :indent => @indent)
|
158
|
+
end
|
159
|
+
|
160
|
+
#class method
|
161
|
+
|
162
|
+
#
|
163
|
+
# Used by to_xml methods of PhyloXML element classes. Generally not to be
|
164
|
+
# invoked directly.
|
165
|
+
#
|
166
|
+
def self.generate_xml(root, elem, subelement_array)
|
167
|
+
#example usage: generate_xml(node, self, [[ :complex,'accession', ], [:simple, 'name', @name], [:simple, 'location', @location]])
|
168
|
+
subelement_array.each do |subelem|
|
169
|
+
if subelem[0] == :simple
|
170
|
+
root << XML::Node.new(subelem[1], subelem[2].to_s) if subelem[2] != nil and not subelem[2].to_s.empty?
|
171
|
+
|
172
|
+
elsif subelem[0] == :complex
|
173
|
+
root << subelem[2].send("to_xml") if subelem[2] != nil
|
174
|
+
|
175
|
+
elsif subelem[0] == :pattern
|
176
|
+
#seq, self, [[:pattern, 'symbol', @symbol, "\S{1,10}"]
|
177
|
+
if subelem[2] != nil
|
178
|
+
if subelem[2] =~ subelem[3]
|
179
|
+
root << XML::Node.new(subelem[1], subelem[2])
|
180
|
+
else
|
181
|
+
raise "#{subelem[2]} is not a valid value of #{subelem[1]}. It should follow pattern #{subelem[3]}"
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
elsif subelem[0] == :objarr
|
186
|
+
#[:objarr, 'annotation', 'annotations']])
|
187
|
+
obj_arr = elem.send(subelem[2])
|
188
|
+
obj_arr.each do |arr_elem|
|
189
|
+
root << arr_elem.to_xml
|
190
|
+
end
|
191
|
+
|
192
|
+
elsif subelem[0] == :simplearr
|
193
|
+
# [:simplearr, 'common_name', @common_names]
|
194
|
+
subelem[2].each do |elem_val|
|
195
|
+
root << XML::Node.new(subelem[1], elem_val)
|
196
|
+
end
|
197
|
+
elsif subelem[0] == :attr
|
198
|
+
#[:attr, 'rooted']
|
199
|
+
obj = elem.send(subelem[1])
|
200
|
+
if obj != nil
|
201
|
+
root[subelem[1]] = obj.to_s
|
202
|
+
end
|
203
|
+
else
|
204
|
+
raise "Not supported type of element by method generate_xml."
|
205
|
+
end
|
206
|
+
end
|
207
|
+
return root
|
208
|
+
end
|
209
|
+
|
210
|
+
private
|
211
|
+
|
212
|
+
def node_to_xml(tree, node, parent)
|
213
|
+
edge = tree.get_edge(parent, node)
|
214
|
+
branch_length = edge.distance
|
215
|
+
|
216
|
+
clade = node.to_xml(branch_length, @write_branch_length_as_subelement)
|
217
|
+
|
218
|
+
tree.children(node).each do |new_node|
|
219
|
+
clade << node_to_xml(tree, new_node, node)
|
220
|
+
end
|
221
|
+
|
222
|
+
return clade
|
223
|
+
end
|
224
|
+
|
225
|
+
end
|
226
|
+
|
227
|
+
end
|
228
|
+
end
|
data/lib/bio/db/prosite.rb
CHANGED
@@ -2,9 +2,9 @@
|
|
2
2
|
# = bio/db/prosite.rb - PROSITE database class
|
3
3
|
#
|
4
4
|
# Copyright:: Copyright (C) 2001 Toshiaki Katayama <k@bioruby.org>
|
5
|
-
#
|
5
|
+
# License:: The Ruby License
|
6
6
|
#
|
7
|
-
# $Id
|
7
|
+
# $Id:$
|
8
8
|
#
|
9
9
|
|
10
10
|
require 'bio/db'
|
@@ -502,96 +502,3 @@ end # PROSITE
|
|
502
502
|
|
503
503
|
end # Bio
|
504
504
|
|
505
|
-
|
506
|
-
if __FILE__ == $0
|
507
|
-
|
508
|
-
begin
|
509
|
-
require 'pp'
|
510
|
-
alias p pp
|
511
|
-
rescue LoadError
|
512
|
-
end
|
513
|
-
|
514
|
-
ps = Bio::PROSITE.new(ARGF.read)
|
515
|
-
|
516
|
-
list = %w(
|
517
|
-
name
|
518
|
-
division
|
519
|
-
ac
|
520
|
-
entry_id
|
521
|
-
dt
|
522
|
-
date
|
523
|
-
de
|
524
|
-
definition
|
525
|
-
pa
|
526
|
-
pattern
|
527
|
-
ma
|
528
|
-
profile
|
529
|
-
ru
|
530
|
-
rule
|
531
|
-
nr
|
532
|
-
statistics
|
533
|
-
release
|
534
|
-
swissprot_release_number
|
535
|
-
swissprot_release_sequences
|
536
|
-
total
|
537
|
-
total_hits
|
538
|
-
total_sequences
|
539
|
-
positive
|
540
|
-
positive_hits
|
541
|
-
positive_sequences
|
542
|
-
unknown
|
543
|
-
unknown_hits
|
544
|
-
unknown_sequences
|
545
|
-
false_pos
|
546
|
-
false_positive_hits
|
547
|
-
false_positive_sequences
|
548
|
-
false_neg
|
549
|
-
false_negative_hits
|
550
|
-
partial
|
551
|
-
cc
|
552
|
-
comment
|
553
|
-
max_repeat
|
554
|
-
site
|
555
|
-
skip_flag
|
556
|
-
dr
|
557
|
-
sp_xref
|
558
|
-
pdb_xref
|
559
|
-
pdoc_xref
|
560
|
-
)
|
561
|
-
|
562
|
-
list.each do |method|
|
563
|
-
puts ">>> #{method}"
|
564
|
-
p ps.send(method)
|
565
|
-
end
|
566
|
-
|
567
|
-
puts ">>> taxon_range"
|
568
|
-
p ps.taxon_range
|
569
|
-
puts ">>> taxon_range(expand)"
|
570
|
-
p ps.taxon_range(true)
|
571
|
-
|
572
|
-
puts ">>> list_truepositive"
|
573
|
-
p ps.list_truepositive
|
574
|
-
puts ">>> list_truepositive(by_name)"
|
575
|
-
p ps.list_truepositive(true)
|
576
|
-
|
577
|
-
puts ">>> list_falsenegative"
|
578
|
-
p ps.list_falsenegative
|
579
|
-
puts ">>> list_falsenegative(by_name)"
|
580
|
-
p ps.list_falsenegative(true)
|
581
|
-
|
582
|
-
puts ">>> list_falsepositive"
|
583
|
-
p ps.list_falsepositive
|
584
|
-
puts ">>> list_falsepositive(by_name)"
|
585
|
-
p ps.list_falsepositive(true)
|
586
|
-
|
587
|
-
puts ">>> list_potentialhit"
|
588
|
-
p ps.list_potentialhit
|
589
|
-
puts ">>> list_potentialhit(by_name)"
|
590
|
-
p ps.list_potentialhit(true)
|
591
|
-
|
592
|
-
puts ">>> list_unknown"
|
593
|
-
p ps.list_unknown
|
594
|
-
puts ">>> list_unknown(by_name)"
|
595
|
-
p ps.list_unknown(true)
|
596
|
-
|
597
|
-
end
|
data/lib/bio/db/rebase.rb
CHANGED
@@ -40,7 +40,7 @@ module Bio
|
|
40
40
|
# To easily get started with the data you can simply type this command
|
41
41
|
# at your shell prompt:
|
42
42
|
#
|
43
|
-
# % wget ftp://ftp.neb.com/pub/rebase/
|
43
|
+
# % wget "ftp://ftp.neb.com/pub/rebase/emboss_*"
|
44
44
|
#
|
45
45
|
#
|
46
46
|
# = Usage
|
@@ -195,7 +195,7 @@ class REBASE
|
|
195
195
|
# * _none_
|
196
196
|
# *Returns*:: +Array+ sorted enzyme names
|
197
197
|
def enzymes
|
198
|
-
@
|
198
|
+
@enzyme_names
|
199
199
|
end
|
200
200
|
|
201
201
|
# Check if supplied name is the name of an available enzyme
|
@@ -205,10 +205,7 @@ class REBASE
|
|
205
205
|
# * +name+: Enzyme name
|
206
206
|
# *Returns*:: +true/false+
|
207
207
|
def enzyme_name?(name)
|
208
|
-
|
209
|
-
return true if e.downcase == name.downcase
|
210
|
-
end
|
211
|
-
return false
|
208
|
+
@enzyme_names_downcased.include?(name.downcase)
|
212
209
|
end
|
213
210
|
|
214
211
|
# Save the current data
|
@@ -290,6 +287,8 @@ class REBASE
|
|
290
287
|
d.references = []
|
291
288
|
end
|
292
289
|
|
290
|
+
@enzyme_names = @data.keys.sort
|
291
|
+
@enzyme_names_downcased = @enzyme_names.map{|a| a.downcase}
|
293
292
|
setup_enzyme_and_reference_association
|
294
293
|
end
|
295
294
|
|
@@ -0,0 +1,120 @@
|
|
1
|
+
#
|
2
|
+
# = bio/db/sanger_chromatogram/abif.rb - Abif class
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2009 Anthony Underwood <anthony.underwood@hpa.org.uk>, <email2ants@gmail.com>
|
5
|
+
# License:: The Ruby License
|
6
|
+
#
|
7
|
+
|
8
|
+
require 'bio/db/sanger_chromatogram/chromatogram'
|
9
|
+
|
10
|
+
module Bio
|
11
|
+
# == Description
|
12
|
+
#
|
13
|
+
# This class inherits from the SangerChromatogram superclass. It captures the information contained
|
14
|
+
# within an ABIF format chromatogram file generated by DNA sequencing. See the SangerChromatogram class
|
15
|
+
# for usage.
|
16
|
+
class Abif < SangerChromatogram
|
17
|
+
DATA_TYPES = { 1 => 'byte', 2 => 'char', 3 => 'word', 4 => 'short', 5 => 'long',
|
18
|
+
7 => 'float', 8 => 'double', 10 => 'date', 11 => 'time', 18 => 'pString',
|
19
|
+
19 => 'cString', 12 => 'thumb', 13 => 'bool', 6 => 'rational', 9 => 'BCD',
|
20
|
+
14 => 'point', 15 => 'rect', 16 => 'vPoint', 17 => 'vRect', 20 => 'tag',
|
21
|
+
128 => 'deltaComp', 256 => 'LZWComp', 384 => 'deltaLZW', 1024 => 'user'} # User defined data types have tags numbers >= 1024
|
22
|
+
|
23
|
+
PACK_TYPES = { 'byte' => 'C', 'char' => 'c', 'word' => 'n', 'short' => 'n', 'long' => 'N',
|
24
|
+
'date' => 'nCC', 'time' => 'CCCC', 'pString' => 'CA*', 'cString' => 'Z*',
|
25
|
+
'float' => 'g', 'double' => 'G',
|
26
|
+
'bool' => 'C', 'thumb' => 'NNCC', 'rational' => 'NN', 'point' => 'nn',
|
27
|
+
'rect' => 'nnnn', 'vPoint' => 'NN', 'vRect' => 'NNNN', 'tag' => 'NN'} # Specifies how to pack each data type
|
28
|
+
|
29
|
+
#sequence attributes
|
30
|
+
|
31
|
+
# The sample title as entered when sequencing the sample (String)
|
32
|
+
attr_accessor :sample_title
|
33
|
+
# The chemistry used when sequencing e.g Dye terminators => 'term.' (String)
|
34
|
+
attr_accessor :chemistry
|
35
|
+
|
36
|
+
# see SangerChromatogram class for how to create an Abif object and its usage
|
37
|
+
def initialize(string)
|
38
|
+
header = string.slice(0,128)
|
39
|
+
# read in header info
|
40
|
+
@chromatogram_type, @version, @directory_tag_name, @directory_tag_number, @directory_element_type, @directory_element_size, @directory_number_of_elements, @directory_data_size, @directory_data_offset, @directory_data_handle= header.unpack("a4 n a4 N n n N N N N")
|
41
|
+
@version = @version/100.to_f
|
42
|
+
get_directory_entries(string)
|
43
|
+
# get sequence
|
44
|
+
@sequence = @directory_entries["PBAS"][1].data.map{|char| char.chr.downcase}.join("")
|
45
|
+
#get peak indices
|
46
|
+
@peak_indices = @directory_entries["PLOC"][1].data
|
47
|
+
#get qualities
|
48
|
+
@qualities = @directory_entries["PCON"][1].data
|
49
|
+
# get sample title
|
50
|
+
@sample_title = @directory_entries["SMPL"][1].data
|
51
|
+
@directory_entries["PDMF"].size > 2 ? @dye_mobility = @directory_entries["PDMF"][2].data : @dye_mobility = @directory_entries["PDMF"][1].data
|
52
|
+
#get trace data
|
53
|
+
@chemistry = @directory_entries["phCH"][1].data
|
54
|
+
base_order = @directory_entries["FWO_"][1].data.map{|char| char.chr.downcase}
|
55
|
+
(9..12).each do |data_index|
|
56
|
+
self.instance_variable_set("@#{base_order[data_index-9]}trace", @directory_entries["DATA"][data_index].data)
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
# Returns the data for the name.
|
62
|
+
# If not found, returns nil.
|
63
|
+
# ---
|
64
|
+
# *Arguments*:
|
65
|
+
# * (required) _name_: (String) name of the data
|
66
|
+
# * (required) <em>tag_number</em>: (Integer) tag number (default 1)
|
67
|
+
# *Returns*:: any data type or nil
|
68
|
+
def data(name, tag_number = 1)
|
69
|
+
d = @directory_entries[name]
|
70
|
+
d ? d[tag_number].data : nil
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
def get_directory_entries(string)
|
75
|
+
@directory_entries = Hash.new
|
76
|
+
offset = @directory_data_offset
|
77
|
+
@directory_number_of_elements.times do
|
78
|
+
entry = DirectoryEntry.new
|
79
|
+
entry_fields = string.slice(offset, @directory_element_size)
|
80
|
+
entry.name, entry.tag_number, entry.element_type, entry.element_size, entry.number_of_elements, entry.data_size, entry.data_offset = entry_fields.unpack("a4 N n n N N N")
|
81
|
+
# populate the entry with the data it refers to
|
82
|
+
if entry.data_size > 4
|
83
|
+
get_entry_data(entry, string)
|
84
|
+
else
|
85
|
+
get_entry_data(entry, entry_fields)
|
86
|
+
end
|
87
|
+
if @directory_entries.has_key?(entry.name)
|
88
|
+
@directory_entries[entry.name][entry.tag_number] = entry
|
89
|
+
else
|
90
|
+
@directory_entries[entry.name] = Array.new
|
91
|
+
@directory_entries[entry.name][entry.tag_number] = entry
|
92
|
+
end
|
93
|
+
offset += @directory_element_size
|
94
|
+
end
|
95
|
+
end
|
96
|
+
def get_entry_data(entry, string)
|
97
|
+
if entry.data_size > 4
|
98
|
+
raw_data = string.slice(entry.data_offset, entry.data_size)
|
99
|
+
else
|
100
|
+
raw_data = string.slice(20,4)
|
101
|
+
end
|
102
|
+
if entry.element_type > 1023
|
103
|
+
# user defined data: not processed as yet by this bioruby module
|
104
|
+
entry.data = raw_data
|
105
|
+
else
|
106
|
+
pack_type = PACK_TYPES[DATA_TYPES[entry.element_type]]
|
107
|
+
pack_type.match(/\*/) ? unpack_string = pack_type : unpack_string = "#{pack_type}#{entry.number_of_elements}"
|
108
|
+
entry.data = raw_data.unpack(unpack_string)
|
109
|
+
if pack_type == "CA*" # pascal string where the first byte is a charcter count and should therefore be removed
|
110
|
+
entry.data.shift
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
class DirectoryEntry
|
116
|
+
attr_accessor :name, :tag_number, :element_type, :element_size, :number_of_elements, :data_size, :data_offset
|
117
|
+
attr_accessor :data
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|