bio 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +4 -3
- data/lib/bio.rb +3 -3
- data/lib/bio/appl/blast/format0.rb +3 -2
- data/lib/bio/appl/blast/format8.rb +5 -3
- data/lib/bio/db/kegg/compound.rb +6 -1
- data/lib/bio/db/kegg/enzyme.rb +3 -3
- data/lib/bio/db/kegg/genes.rb +2 -2
- data/lib/bio/db/kegg/glycan.rb +5 -5
- data/lib/bio/db/kegg/orthology.rb +27 -3
- data/lib/bio/db/newick.rb +203 -55
- data/lib/bio/io/flatfile.rb +2 -2
- data/lib/bio/io/flatfile/indexer.rb +2 -2
- data/lib/bio/io/keggapi.rb +2 -1
- data/lib/bio/io/pubmed.rb +223 -81
- data/lib/bio/sequence/common.rb +6 -3
- data/lib/bio/shell/interface.rb +2 -2
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/bioruby_generator.rb +5 -5
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.css +7 -8
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby.rhtml +1 -1
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/index.rhtml +21 -17
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/spinner.gif +0 -0
- data/test/functional/bio/io/test_ensembl.rb +87 -4
- data/test/unit/bio/db/test_newick.rb +238 -1
- data/test/unit/bio/sequence/test_aa.rb +3 -2
- data/test/unit/bio/sequence/test_common.rb +11 -2
- data/test/unit/bio/sequence/test_na.rb +63 -1
- metadata +4 -4
- data/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates/bioruby-console.png +0 -0
    
        data/bin/bioruby
    CHANGED
    
    | @@ -2,17 +2,18 @@ | |
| 2 2 | 
             
            #
         | 
| 3 3 | 
             
            # = BioRuby shell - command line interface for the BioRuby library
         | 
| 4 4 | 
             
            #
         | 
| 5 | 
            -
            # Copyright::   Copyright (C) 2005, 2006
         | 
| 5 | 
            +
            # Copyright::   Copyright (C) 2005, 2006, 2007
         | 
| 6 6 | 
             
            #               Toshiaki Katayama <k@bioruby.org>
         | 
| 7 7 | 
             
            # License::     The Ruby License
         | 
| 8 8 | 
             
            #
         | 
| 9 | 
            -
            # $Id: bioruby,v 1. | 
| 9 | 
            +
            # $Id: bioruby,v 1.21 2007/07/26 10:46:46 k Exp $
         | 
| 10 10 | 
             
            #
         | 
| 11 11 |  | 
| 12 12 | 
             
            begin
         | 
| 13 13 | 
             
              require 'rubygems'
         | 
| 14 | 
            -
               | 
| 14 | 
            +
              gem 'bio', '>= 1.1.0'
         | 
| 15 15 | 
             
            rescue LoadError
         | 
| 16 | 
            +
              require 'bio'
         | 
| 16 17 | 
             
            end
         | 
| 17 18 | 
             
            require 'bio/shell'
         | 
| 18 19 |  | 
    
        data/lib/bio.rb
    CHANGED
    
    | @@ -1,16 +1,16 @@ | |
| 1 1 | 
             
            #
         | 
| 2 2 | 
             
            # = bio.rb - Loading all BioRuby modules
         | 
| 3 3 | 
             
            #
         | 
| 4 | 
            -
            # Copyright::	Copyright (C) 2001- | 
| 4 | 
            +
            # Copyright::	Copyright (C) 2001-2007
         | 
| 5 5 | 
             
            #		Toshiaki Katayama <k@bioruby.org>
         | 
| 6 6 | 
             
            # License::	The Ruby License
         | 
| 7 7 | 
             
            #
         | 
| 8 | 
            -
            # $Id: bio.rb,v 1. | 
| 8 | 
            +
            # $Id: bio.rb,v 1.87 2007/12/14 16:04:54 k Exp $
         | 
| 9 9 | 
             
            #
         | 
| 10 10 |  | 
| 11 11 | 
             
            module Bio
         | 
| 12 12 |  | 
| 13 | 
            -
              BIORUBY_VERSION = [1,  | 
| 13 | 
            +
              BIORUBY_VERSION = [1, 2, 0].extend(Comparable)
         | 
| 14 14 |  | 
| 15 15 | 
             
              ### Basic data types
         | 
| 16 16 |  | 
| @@ -4,7 +4,7 @@ | |
| 4 4 | 
             
            # Copyright::  Copyright (C) 2003-2006 GOTO Naohisa <ng@bioruby.org>
         | 
| 5 5 | 
             
            # License::    The Ruby License
         | 
| 6 6 | 
             
            #
         | 
| 7 | 
            -
            # $Id: format0.rb,v 1. | 
| 7 | 
            +
            # $Id: format0.rb,v 1.24 2007/12/14 16:12:17 k Exp $
         | 
| 8 8 | 
             
            #
         | 
| 9 9 | 
             
            # == Description
         | 
| 10 10 | 
             
            #
         | 
| @@ -860,9 +860,10 @@ module Bio | |
| 860 860 | 
             
                      # Returns definition of the hit.
         | 
| 861 861 | 
             
                      def definition; parse_hitname; @definition; end
         | 
| 862 862 |  | 
| 863 | 
            +
                      def target_id; definition[/^\s*(\S+)/, 1]; end
         | 
| 864 | 
            +
             | 
| 863 865 | 
             
                      #--
         | 
| 864 866 | 
             
                      # Aliases to keep compatibility with Bio::Fasta::Report::Hit.
         | 
| 865 | 
            -
                      #alias target_id accession
         | 
| 866 867 | 
             
                      alias target_def definition
         | 
| 867 868 | 
             
                      alias target_len len
         | 
| 868 869 | 
             
                      #++
         | 
| @@ -1,10 +1,10 @@ | |
| 1 1 | 
             
            #
         | 
| 2 2 | 
             
            # = bio/appl/blast/format8.rb - BLAST tab-delimited output (-m 8) parser
         | 
| 3 3 | 
             
            # 
         | 
| 4 | 
            -
            # Copyright::  Copyright (C) 2002, 2003 Toshiaki Katayama <k@bioruby.org>
         | 
| 4 | 
            +
            # Copyright::  Copyright (C) 2002, 2003, 2007 Toshiaki Katayama <k@bioruby.org>
         | 
| 5 5 | 
             
            # License::    The Ruby License
         | 
| 6 6 | 
             
            #
         | 
| 7 | 
            -
            # $Id: format8.rb,v 1. | 
| 7 | 
            +
            # $Id: format8.rb,v 1.8 2007/12/14 16:15:20 k Exp $
         | 
| 8 8 | 
             
            #
         | 
| 9 9 | 
             
            # == Note
         | 
| 10 10 | 
             
            #
         | 
| @@ -22,6 +22,7 @@ module Bio | |
| 22 22 | 
             
                    @iterations.push(iteration)
         | 
| 23 23 | 
             
                    @query_id = @query_def = data[/\S+/]
         | 
| 24 24 |  | 
| 25 | 
            +
                    query_prev = ''
         | 
| 25 26 | 
             
                    target_prev = ''
         | 
| 26 27 | 
             
                    hit_num = 1
         | 
| 27 28 | 
             
                    hsp_num = 1
         | 
| @@ -29,7 +30,7 @@ module Bio | |
| 29 30 | 
             
                    data.each do |line|
         | 
| 30 31 | 
             
                      ary = line.chomp.split("\t")
         | 
| 31 32 | 
             
                      query_id, target_id, hsp = tab_parse_hsp(ary)
         | 
| 32 | 
            -
                      if target_prev != target_id
         | 
| 33 | 
            +
                      if query_prev != query_id or target_prev != target_id
         | 
| 33 34 | 
             
                        hit = Hit.new
         | 
| 34 35 | 
             
                        hit.num = hit_num
         | 
| 35 36 | 
             
                        hit_num += 1
         | 
| @@ -41,6 +42,7 @@ module Bio | |
| 41 42 | 
             
                      hsp.num = hsp_num
         | 
| 42 43 | 
             
                      hsp_num += 1
         | 
| 43 44 | 
             
                      hit.hsps.push(hsp)
         | 
| 45 | 
            +
                      query_prev = query_id
         | 
| 44 46 | 
             
                      target_prev = target_id
         | 
| 45 47 | 
             
                    end
         | 
| 46 48 | 
             
                  end
         | 
    
        data/lib/bio/db/kegg/compound.rb
    CHANGED
    
    | @@ -4,7 +4,7 @@ | |
| 4 4 | 
             
            # Copyright::  Copyright (C) 2001, 2002, 2004, 2007 Toshiaki Katayama <k@bioruby.org>
         | 
| 5 5 | 
             
            # License::    The Ruby License
         | 
| 6 6 | 
             
            #
         | 
| 7 | 
            -
            # $Id: compound.rb,v 0. | 
| 7 | 
            +
            # $Id: compound.rb,v 0.17 2007/11/27 07:09:43 k Exp $
         | 
| 8 8 | 
             
            #
         | 
| 9 9 |  | 
| 10 10 | 
             
            require 'bio/db'
         | 
| @@ -45,6 +45,11 @@ class COMPOUND < KEGGDB | |
| 45 45 | 
             
                field_fetch('MASS').to_f
         | 
| 46 46 | 
             
              end
         | 
| 47 47 |  | 
| 48 | 
            +
              # REMARK
         | 
| 49 | 
            +
              def remark
         | 
| 50 | 
            +
                field_fetch('REMARK')
         | 
| 51 | 
            +
              end
         | 
| 52 | 
            +
             | 
| 48 53 | 
             
              # GLYCAN
         | 
| 49 54 | 
             
              def glycans
         | 
| 50 55 | 
             
                unless @data['GLYCAN']
         | 
    
        data/lib/bio/db/kegg/enzyme.rb
    CHANGED
    
    | @@ -4,7 +4,7 @@ | |
| 4 4 | 
             
            # Copyright::  Copyright (C) 2001, 2002, 2007 Toshiaki Katayama <k@bioruby.org>
         | 
| 5 5 | 
             
            # License::    The Ruby License
         | 
| 6 6 | 
             
            #
         | 
| 7 | 
            -
            # $Id: enzyme.rb,v 0. | 
| 7 | 
            +
            # $Id: enzyme.rb,v 0.12 2007/12/14 16:20:38 k Exp $
         | 
| 8 8 | 
             
            #
         | 
| 9 9 |  | 
| 10 10 | 
             
            require 'bio/db'
         | 
| @@ -106,9 +106,9 @@ class ENZYME < KEGGDB | |
| 106 106 | 
             
                lines_fetch('PATHWAY')
         | 
| 107 107 | 
             
              end
         | 
| 108 108 |  | 
| 109 | 
            -
              #  | 
| 109 | 
            +
              # ORTHOLOGY
         | 
| 110 110 | 
             
              def orthologs
         | 
| 111 | 
            -
                lines_fetch(' | 
| 111 | 
            +
                lines_fetch('ORTHOLOGY')
         | 
| 112 112 | 
             
              end
         | 
| 113 113 |  | 
| 114 114 | 
             
              # GENES
         | 
    
        data/lib/bio/db/kegg/genes.rb
    CHANGED
    
    | @@ -5,7 +5,7 @@ | |
| 5 5 | 
             
            #               Toshiaki Katayama <k@bioruby.org>
         | 
| 6 6 | 
             
            # License::     The Ruby License
         | 
| 7 7 | 
             
            #
         | 
| 8 | 
            -
            # $Id: genes.rb,v 0. | 
| 8 | 
            +
            # $Id: genes.rb,v 0.26 2007/12/14 16:20:38 k Exp $
         | 
| 9 9 | 
             
            #
         | 
| 10 10 | 
             
            #
         | 
| 11 11 | 
             
            # == KEGG GENES parser
         | 
| @@ -137,7 +137,7 @@ class GENES < KEGGDB | |
| 137 137 | 
             
              end
         | 
| 138 138 |  | 
| 139 139 | 
             
              def orthologs
         | 
| 140 | 
            -
                lines_fetch(' | 
| 140 | 
            +
                lines_fetch('ORTHOLOGY')
         | 
| 141 141 | 
             
              end
         | 
| 142 142 |  | 
| 143 143 | 
             
              def pathway
         | 
    
        data/lib/bio/db/kegg/glycan.rb
    CHANGED
    
    | @@ -4,7 +4,7 @@ | |
| 4 4 | 
             
            # Copyright::  Copyright (C) 2004 Toshiaki Katayama <k@bioruby.org>
         | 
| 5 5 | 
             
            # License::    The Ruby License
         | 
| 6 6 | 
             
            #
         | 
| 7 | 
            -
            # $Id: glycan.rb,v 1. | 
| 7 | 
            +
            # $Id: glycan.rb,v 1.7 2007/12/14 16:20:38 k Exp $
         | 
| 8 8 | 
             
            #
         | 
| 9 9 |  | 
| 10 10 | 
             
            require 'bio/db'
         | 
| @@ -94,12 +94,12 @@ class GLYCAN < KEGGDB | |
| 94 94 | 
             
                @data['ENZYME']
         | 
| 95 95 | 
             
              end
         | 
| 96 96 |  | 
| 97 | 
            -
              #  | 
| 97 | 
            +
              # ORTHOLOGY
         | 
| 98 98 | 
             
              def orthologs
         | 
| 99 | 
            -
                unless @data[' | 
| 100 | 
            -
                  @data[' | 
| 99 | 
            +
                unless @data['ORTHOLOGY']
         | 
| 100 | 
            +
                  @data['ORTHOLOGY'] = lines_fetch('ORTHOLOGY')
         | 
| 101 101 | 
             
                end
         | 
| 102 | 
            -
                @data[' | 
| 102 | 
            +
                @data['ORTHOLOGY']
         | 
| 103 103 | 
             
              end
         | 
| 104 104 |  | 
| 105 105 | 
             
              # COMMENT
         | 
| @@ -5,7 +5,7 @@ | |
| 5 5 | 
             
            # Copyright::  Copyright (C) 2003 Masumi Itoh <m@bioruby.org>
         | 
| 6 6 | 
             
            # License::    The Ruby License
         | 
| 7 7 | 
             
            #
         | 
| 8 | 
            -
            # $Id: orthology.rb,v 1. | 
| 8 | 
            +
            # $Id: orthology.rb,v 1.10 2007/12/14 16:19:54 k Exp $
         | 
| 9 9 | 
             
            #
         | 
| 10 10 |  | 
| 11 11 | 
             
            require 'bio/db'
         | 
| @@ -67,7 +67,7 @@ class ORTHOLOGY < KEGGDB | |
| 67 67 | 
             
                keggclass.scan(/\[PATH:(.*?)\]/).flatten
         | 
| 68 68 | 
             
              end
         | 
| 69 69 |  | 
| 70 | 
            -
              # Returns  | 
| 70 | 
            +
              # Returns an Array of a database name and entry IDs in DBLINKS field.
         | 
| 71 71 | 
             
              def dblinks
         | 
| 72 72 | 
             
                unless @data['DBLINKS']
         | 
| 73 73 | 
             
                  @data['DBLINKS'] = lines_fetch('DBLINKS')
         | 
| @@ -75,13 +75,37 @@ class ORTHOLOGY < KEGGDB | |
| 75 75 | 
             
                @data['DBLINKS']
         | 
| 76 76 | 
             
              end
         | 
| 77 77 |  | 
| 78 | 
            -
              # Returns a Hash of  | 
| 78 | 
            +
              # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field.
         | 
| 79 | 
            +
              def dblinks_as_hash
         | 
| 80 | 
            +
                hash = {}
         | 
| 81 | 
            +
                dblinks.each do |line|
         | 
| 82 | 
            +
                  name, *list = line.split(/\s+/)
         | 
| 83 | 
            +
                  db = name.downcase.sub(/:/, '')
         | 
| 84 | 
            +
                  hash[db] = list
         | 
| 85 | 
            +
                end
         | 
| 86 | 
            +
                return hash
         | 
| 87 | 
            +
              end
         | 
| 88 | 
            +
             | 
| 89 | 
            +
              # Returns an Array of the organism ID and entry IDs in GENES field.
         | 
| 79 90 | 
             
              def genes
         | 
| 80 91 | 
             
                unless @data['GENES']
         | 
| 81 92 | 
             
                  @data['GENES'] = lines_fetch('GENES')
         | 
| 82 93 | 
             
                end
         | 
| 83 94 | 
             
                @data['GENES']
         | 
| 84 95 | 
             
              end
         | 
| 96 | 
            +
             | 
| 97 | 
            +
              # Returns a Hash of the organism ID and an Array of entry IDs in GENES field.
         | 
| 98 | 
            +
              def genes_as_hash
         | 
| 99 | 
            +
                hash = {}
         | 
| 100 | 
            +
                genes.each do |line|
         | 
| 101 | 
            +
                  name, *list = line.split(/\s+/)
         | 
| 102 | 
            +
                  org = name.downcase.sub(/:/, '')
         | 
| 103 | 
            +
                  genes = list.map {|x| x.sub(/\(.*\)/, '')}
         | 
| 104 | 
            +
                  #names = list.map {|x| x.scan(/.*\((.*)\)/)}
         | 
| 105 | 
            +
                  hash[org] = genes
         | 
| 106 | 
            +
                end
         | 
| 107 | 
            +
                return hash
         | 
| 108 | 
            +
              end
         | 
| 85 109 |  | 
| 86 110 | 
             
            end # ORTHOLOGY
         | 
| 87 111 |  | 
    
        data/lib/bio/db/newick.rb
    CHANGED
    
    | @@ -6,9 +6,19 @@ | |
| 6 6 | 
             
            #               Daniel Amelang <dan@amelang.net>
         | 
| 7 7 | 
             
            # License::     The Ruby License
         | 
| 8 8 | 
             
            #
         | 
| 9 | 
            -
            # $Id: newick.rb,v 1. | 
| 9 | 
            +
            # $Id: newick.rb,v 1.8 2007/12/12 16:06:22 ngoto Exp $
         | 
| 10 | 
            +
            #
         | 
| 11 | 
            +
            # == Description
         | 
| 12 | 
            +
            #
         | 
| 13 | 
            +
            # This file contains parser and formatter of Newick and NHX.
         | 
| 14 | 
            +
            #
         | 
| 15 | 
            +
            # == References
         | 
| 16 | 
            +
            #
         | 
| 17 | 
            +
            # * http://evolution.genetics.washington.edu/phylip/newick_doc.html
         | 
| 18 | 
            +
            # * http://www.phylosoft.org/forester/NHX.html
         | 
| 10 19 | 
             
            #
         | 
| 11 20 |  | 
| 21 | 
            +
            require 'strscan'
         | 
| 12 22 | 
             
            require 'bio/tree'
         | 
| 13 23 |  | 
| 14 24 | 
             
            module Bio
         | 
| @@ -18,6 +28,7 @@ module Bio | |
| 18 28 | 
             
                # newick output
         | 
| 19 29 | 
             
                #+++
         | 
| 20 30 |  | 
| 31 | 
            +
                # default options
         | 
| 21 32 | 
             
                DEFAULT_OPTIONS =
         | 
| 22 33 | 
             
                  { :indent => '  ' }
         | 
| 23 34 |  | 
| @@ -32,10 +43,26 @@ module Bio | |
| 32 43 | 
             
                end
         | 
| 33 44 | 
             
                private :__get_option
         | 
| 34 45 |  | 
| 46 | 
            +
             | 
| 47 | 
            +
                # formats Newick label (unquoted_label or quoted_label)
         | 
| 48 | 
            +
                def __to_newick_format_label(str, options)
         | 
| 49 | 
            +
                  if __get_option(:parser, options) == :naive then
         | 
| 50 | 
            +
                    return str.to_s
         | 
| 51 | 
            +
                  end
         | 
| 52 | 
            +
                  str = str.to_s
         | 
| 53 | 
            +
                  if /([\(\)\,\:\[\]\_\'\x00-\x1f\x7f])/ =~ str then
         | 
| 54 | 
            +
                    # quoted_label
         | 
| 55 | 
            +
                    return "\'" + str.gsub(/\'/, "\'\'") + "\'"
         | 
| 56 | 
            +
                  end
         | 
| 57 | 
            +
                  # unquoted_label
         | 
| 58 | 
            +
                  return str.gsub(/ /, '_')
         | 
| 59 | 
            +
                end
         | 
| 60 | 
            +
                private :__to_newick_format_label
         | 
| 61 | 
            +
             | 
| 35 62 | 
             
                # formats leaf
         | 
| 36 63 | 
             
                def __to_newick_format_leaf(node, edge, options)
         | 
| 37 64 |  | 
| 38 | 
            -
                  label = get_node_name(node) | 
| 65 | 
            +
                  label = __to_newick_format_label(get_node_name(node), options)
         | 
| 39 66 |  | 
| 40 67 | 
             
                  dist = get_edge_distance_string(edge)
         | 
| 41 68 |  | 
| @@ -62,7 +89,7 @@ module Bio | |
| 62 89 | 
             
                # formats leaf for NHX
         | 
| 63 90 | 
             
                def __to_newick_format_leaf_NHX(node, edge, options)
         | 
| 64 91 |  | 
| 65 | 
            -
                  label = get_node_name(node) | 
| 92 | 
            +
                  label = __to_newick_format_label(get_node_name(node), options)
         | 
| 66 93 |  | 
| 67 94 | 
             
                  dist = get_edge_distance_string(edge)
         | 
| 68 95 |  | 
| @@ -165,11 +192,14 @@ module Bio | |
| 165 192 | 
             
                # Returns a newick formatted string.
         | 
| 166 193 | 
             
                # If block is given, the order of the node is sorted
         | 
| 167 194 | 
             
                # (as the same manner as Enumerable#sort).
         | 
| 168 | 
            -
                # | 
| 169 | 
            -
                # | 
| 170 | 
            -
                # | 
| 171 | 
            -
                # | 
| 172 | 
            -
                # | 
| 195 | 
            +
                #
         | 
| 196 | 
            +
                # Available options:
         | 
| 197 | 
            +
                # <tt>:indent</tt>::
         | 
| 198 | 
            +
                #     indent string; set false to disable (default: '  ')
         | 
| 199 | 
            +
                # <tt>:bootstrap_style</tt>::
         | 
| 200 | 
            +
                #     <tt>:disabled</tt> disables bootstrap representations.
         | 
| 201 | 
            +
                #     <tt>:traditional</tt> for traditional style.
         | 
| 202 | 
            +
                #     <tt>:molphy</tt> for Molphy style (default).
         | 
| 173 203 | 
             
                def output_newick(options = {}, &block) #:yields: node1, node2
         | 
| 174 204 | 
             
                  root = @root
         | 
| 175 205 | 
             
                  root ||= self.nodes.first
         | 
| @@ -185,8 +215,11 @@ module Bio | |
| 185 215 | 
             
                # Returns a NHX (New Hampshire eXtended) formatted string.
         | 
| 186 216 | 
             
                # If block is given, the order of the node is sorted
         | 
| 187 217 | 
             
                # (as the same manner as Enumerable#sort).
         | 
| 188 | 
            -
                # | 
| 189 | 
            -
                # | 
| 218 | 
            +
                #
         | 
| 219 | 
            +
                # Available options:
         | 
| 220 | 
            +
                # <tt>:indent</tt>::
         | 
| 221 | 
            +
                #     indent string; set false to disable (default: '  ')
         | 
| 222 | 
            +
                #
         | 
| 190 223 | 
             
                def output_nhx(options = {}, &block) #:yields: node1, node2
         | 
| 191 224 | 
             
                  root = @root
         | 
| 192 225 | 
             
                  root ||= self.nodes.first
         | 
| @@ -257,13 +290,28 @@ module Bio | |
| 257 290 | 
             
                # Creates a new Newick object.
         | 
| 258 291 | 
             
                # _options_ for parsing can be set.
         | 
| 259 292 | 
             
                #
         | 
| 260 | 
            -
                #  | 
| 261 | 
            -
                #  | 
| 262 | 
            -
                #  | 
| 293 | 
            +
                # Available options:
         | 
| 294 | 
            +
                # <tt>:bootstrap_style</tt>::
         | 
| 295 | 
            +
                #     <tt>:traditional</tt> for traditional bootstrap style,
         | 
| 296 | 
            +
                #     <tt>:molphy</tt> for molphy style,
         | 
| 297 | 
            +
                #     <tt>:disabled</tt> to ignore bootstrap strings.
         | 
| 298 | 
            +
                #     For details of default actions, please read the notes below.
         | 
| 299 | 
            +
                # <tt>:parser</tt>::
         | 
| 300 | 
            +
                #     <tt>:naive</tt> for using naive parser, compatible with
         | 
| 301 | 
            +
                #     BioRuby 1.1.0, which ignores quoted strings and
         | 
| 302 | 
            +
                #     do not convert underscores to spaces. 
         | 
| 303 | 
            +
                #
         | 
| 304 | 
            +
                # Notes for bootstrap style:
         | 
| 305 | 
            +
                # Molphy-style bootstrap values may always be parsed, even if
         | 
| 306 | 
            +
                # the <tt>options[:bootstrap_style]</tt> is set to
         | 
| 307 | 
            +
                # <tt>:traditional</tt> or <tt>:disabled</tt>.
         | 
| 308 | 
            +
                #
         | 
| 309 | 
            +
                # Note for default or traditional bootstrap style:
         | 
| 310 | 
            +
                # By default, if all of the internal node's names are numeric
         | 
| 263 311 | 
             
                # and there are no NHX and no molphy-style boostrap values,
         | 
| 264 312 | 
             
                # the names of internal nodes are regarded as bootstrap values.
         | 
| 265 | 
            -
                # options[:bootstrap_style] = :disabled or  | 
| 266 | 
            -
                # (or at least one NHX tag exists).
         | 
| 313 | 
            +
                # <tt>options[:bootstrap_style] = :disabled</tt> or <tt>:molphy</tt>
         | 
| 314 | 
            +
                # to disable the feature (or at least one NHX tag exists).
         | 
| 267 315 | 
             
                def initialize(str, options = nil)
         | 
| 268 316 | 
             
                  str = str.sub(/\;(.*)/m, ';')
         | 
| 269 317 | 
             
                  @original_string = str
         | 
| @@ -308,57 +356,66 @@ module Bio | |
| 308 356 | 
             
                end
         | 
| 309 357 |  | 
| 310 358 | 
             
                # Parses newick formatted leaf (or internal node) name.
         | 
| 311 | 
            -
                def __parse_newick_leaf( | 
| 312 | 
            -
                   | 
| 313 | 
            -
                   | 
| 314 | 
            -
                    node.name =  | 
| 315 | 
            -
                     | 
| 316 | 
            -
             | 
| 317 | 
            -
             | 
| 318 | 
            -
                   | 
| 319 | 
            -
                     | 
| 320 | 
            -
                     | 
| 321 | 
            -
             | 
| 322 | 
            -
             | 
| 323 | 
            -
                     | 
| 324 | 
            -
                    edge.distance_string = $2 if $2 and !($2.strip.empty?)
         | 
| 325 | 
            -
                  else
         | 
| 326 | 
            -
                    node.name = str
         | 
| 359 | 
            +
                def __parse_newick_leaf(leaf_tokens, node, edge, options)
         | 
| 360 | 
            +
                  t = leaf_tokens.shift
         | 
| 361 | 
            +
                  if !t.kind_of?(Symbol) then
         | 
| 362 | 
            +
                    node.name = t
         | 
| 363 | 
            +
                    t = leaf_tokens.shift
         | 
| 364 | 
            +
                  end
         | 
| 365 | 
            +
             | 
| 366 | 
            +
                  if t == :':' then
         | 
| 367 | 
            +
                    t = leaf_tokens.shift
         | 
| 368 | 
            +
                    if !t.kind_of?(Symbol) then
         | 
| 369 | 
            +
                      edge.distance_string = t if t and !(t.strip.empty?)
         | 
| 370 | 
            +
                      t = leaf_tokens.shift
         | 
| 371 | 
            +
                    end
         | 
| 327 372 | 
             
                  end
         | 
| 328 373 |  | 
| 329 | 
            -
                   | 
| 330 | 
            -
             | 
| 374 | 
            +
                  if t == :'[' then
         | 
| 375 | 
            +
                    btokens = leaf_tokens
         | 
| 331 376 | 
             
                    case __get_option(:original_format, options)
         | 
| 332 377 | 
             
                    when :nhx
         | 
| 333 378 | 
             
                      # regarded as NHX string which might be broken
         | 
| 334 | 
            -
                      __parse_nhx( | 
| 379 | 
            +
                      __parse_nhx(btokens, node, edge)
         | 
| 335 380 | 
             
                    when :traditional
         | 
| 336 381 | 
             
                      # simply ignored
         | 
| 337 382 | 
             
                    else
         | 
| 338 | 
            -
                      case  | 
| 383 | 
            +
                      case btokens[0].to_s.strip
         | 
| 384 | 
            +
                      when ''
         | 
| 385 | 
            +
                        # not automatically determined
         | 
| 339 386 | 
             
                      when /\A\&\&NHX/
         | 
| 340 387 | 
             
                        # NHX string
         | 
| 341 388 | 
             
                        # force to set NHX mode
         | 
| 342 389 | 
             
                        @options[:original_format] = :nhx
         | 
| 343 | 
            -
                        __parse_nhx( | 
| 390 | 
            +
                        __parse_nhx(btokens, node, edge)
         | 
| 344 391 | 
             
                      else
         | 
| 345 392 | 
             
                        # Molphy-style boostrap values
         | 
| 346 393 | 
             
                        # let molphy mode if nothing determined
         | 
| 347 394 | 
             
                        @options[:original_format] ||= :molphy
         | 
| 395 | 
            +
                        bstr = ''
         | 
| 396 | 
            +
                        while t = btokens.shift and t != :']'
         | 
| 397 | 
            +
                          bstr.concat t.to_s
         | 
| 398 | 
            +
                        end
         | 
| 348 399 | 
             
                        node.bootstrap_string = bstr
         | 
| 349 | 
            -
                      end #case  | 
| 400 | 
            +
                      end #case btokens[0]
         | 
| 350 401 | 
             
                    end
         | 
| 351 402 | 
             
                  end
         | 
| 352 403 |  | 
| 404 | 
            +
                  if !btokens and !leaf_tokens.empty? then
         | 
| 405 | 
            +
                    # syntax error?
         | 
| 406 | 
            +
                  end
         | 
| 407 | 
            +
                  node.name ||= '' # compatibility for older BioRuby
         | 
| 408 | 
            +
             | 
| 353 409 | 
             
                  # returns true
         | 
| 354 410 | 
             
                  true
         | 
| 355 411 | 
             
                end
         | 
| 356 412 |  | 
| 357 413 | 
             
                # Parses NHX (New Hampshire eXtended) string
         | 
| 358 | 
            -
                def __parse_nhx( | 
| 359 | 
            -
                   | 
| 360 | 
            -
                   | 
| 361 | 
            -
             | 
| 414 | 
            +
                def __parse_nhx(btokens, node, edge)
         | 
| 415 | 
            +
                  btokens.shift if btokens[0] == '&&NHX'
         | 
| 416 | 
            +
                  btokens.each do |str|
         | 
| 417 | 
            +
                    break if str == :']'
         | 
| 418 | 
            +
                    next if str.kind_of?(Symbol)
         | 
| 362 419 | 
             
                    tag, val = str.split(/\=/, 2)
         | 
| 363 420 | 
             
                    case tag
         | 
| 364 421 | 
             
                    when 'B'
         | 
| @@ -391,6 +448,97 @@ module Bio | |
| 391 448 | 
             
                  true
         | 
| 392 449 | 
             
                end
         | 
| 393 450 |  | 
| 451 | 
            +
                # splits string to tokens
         | 
| 452 | 
            +
                def __parse_newick_tokenize(str, options)
         | 
| 453 | 
            +
                  str = str.chop if str[-1..-1] == ';'
         | 
| 454 | 
            +
                  # http://evolution.genetics.washington.edu/phylip/newick_doc.html
         | 
| 455 | 
            +
                  # quoted_label ==> ' string_of_printing_characters '
         | 
| 456 | 
            +
                  # single quote in quoted_label is '' (two single quotes)
         | 
| 457 | 
            +
                  #
         | 
| 458 | 
            +
             | 
| 459 | 
            +
                  if __get_option(:parser, options) == :naive then
         | 
| 460 | 
            +
                    ary = str.split(/([\(\)\,\:\[\]])/)
         | 
| 461 | 
            +
                    ary.collect! { |x| x.strip!; x.empty? ? nil : x }
         | 
| 462 | 
            +
                    ary.compact!
         | 
| 463 | 
            +
                    ary.collect! do |x|
         | 
| 464 | 
            +
                      if /\A([\(\)\,\:\[\]])\z/ =~ x then
         | 
| 465 | 
            +
                        x.intern
         | 
| 466 | 
            +
                      else
         | 
| 467 | 
            +
                        x
         | 
| 468 | 
            +
                      end
         | 
| 469 | 
            +
                    end
         | 
| 470 | 
            +
                    return ary
         | 
| 471 | 
            +
                  end
         | 
| 472 | 
            +
             | 
| 473 | 
            +
                  tokens = []
         | 
| 474 | 
            +
                  ss = StringScanner.new(str)
         | 
| 475 | 
            +
             | 
| 476 | 
            +
                  while !(ss.eos?)
         | 
| 477 | 
            +
                    if ss.scan(/\s+/) then
         | 
| 478 | 
            +
                      # do nothing
         | 
| 479 | 
            +
             | 
| 480 | 
            +
                    elsif ss.scan(/[\(\)\,\:\[\]]/) then
         | 
| 481 | 
            +
                      # '(' or ')' or ',' or ':' or '[' or ']'
         | 
| 482 | 
            +
                      t = ss.matched
         | 
| 483 | 
            +
                      tokens.push t.intern
         | 
| 484 | 
            +
             | 
| 485 | 
            +
                    elsif ss.scan(/\'/) then
         | 
| 486 | 
            +
                      # quoted_label
         | 
| 487 | 
            +
                      t = ''
         | 
| 488 | 
            +
                      while true
         | 
| 489 | 
            +
                        if ss.scan(/([^\']*)\'/) then
         | 
| 490 | 
            +
                          t.concat ss[1]
         | 
| 491 | 
            +
                          if  ss.scan(/\'/) then
         | 
| 492 | 
            +
                            # single quote in quoted_label
         | 
| 493 | 
            +
                            t.concat ss.matched
         | 
| 494 | 
            +
                          else
         | 
| 495 | 
            +
                            break
         | 
| 496 | 
            +
                          end
         | 
| 497 | 
            +
                        else
         | 
| 498 | 
            +
                          # incomplete quoted_label?
         | 
| 499 | 
            +
                          break
         | 
| 500 | 
            +
                        end
         | 
| 501 | 
            +
                      end #while true
         | 
| 502 | 
            +
                      unless ss.match?(/\s*[\(\)\,\:\[\]]/) or ss.match?(/\s*\z/) then
         | 
| 503 | 
            +
                        # label continues? (illegal, but try to rescue)
         | 
| 504 | 
            +
                        if ss.scan(/[^\(\)\,\:\[\]]+/) then
         | 
| 505 | 
            +
                          t.concat ss.matched.lstrip
         | 
| 506 | 
            +
                        end
         | 
| 507 | 
            +
                      end
         | 
| 508 | 
            +
                      tokens.push t
         | 
| 509 | 
            +
             | 
| 510 | 
            +
                    elsif ss.scan(/[^\(\)\,\:\[\]]+/) then
         | 
| 511 | 
            +
                      # unquoted_label
         | 
| 512 | 
            +
                      t = ss.matched.strip
         | 
| 513 | 
            +
                      t.gsub!(/[\r\n]/, '')
         | 
| 514 | 
            +
                      # unquoted underscore should be converted to blank
         | 
| 515 | 
            +
                      t.gsub!(/\_/, ' ')
         | 
| 516 | 
            +
                      tokens.push t unless t.empty?
         | 
| 517 | 
            +
             | 
| 518 | 
            +
                    else
         | 
| 519 | 
            +
                      # unquoted_label in end of string
         | 
| 520 | 
            +
                      t = ss.rest.strip
         | 
| 521 | 
            +
                      t.gsub!(/[\r\n]/, '')
         | 
| 522 | 
            +
                      # unquoted underscore should be converted to blank
         | 
| 523 | 
            +
                      t.gsub!(/\_/, ' ')
         | 
| 524 | 
            +
                      tokens.push t unless t.empty?
         | 
| 525 | 
            +
                      ss.terminate
         | 
| 526 | 
            +
             | 
| 527 | 
            +
                    end
         | 
| 528 | 
            +
                  end #while !(ss.eos?)
         | 
| 529 | 
            +
             | 
| 530 | 
            +
                  tokens
         | 
| 531 | 
            +
                end
         | 
| 532 | 
            +
             | 
| 533 | 
            +
                # get tokens for a leaf
         | 
| 534 | 
            +
                def __parse_newick_get_tokens_for_leaf(ary)
         | 
| 535 | 
            +
                  r = []
         | 
| 536 | 
            +
                  while t = ary[0] and t != :',' and t != :')' and t != :'('
         | 
| 537 | 
            +
                    r.push ary.shift
         | 
| 538 | 
            +
                  end
         | 
| 539 | 
            +
                  r
         | 
| 540 | 
            +
                end
         | 
| 541 | 
            +
             | 
| 394 542 | 
             
                # Parses newick formatted string.
         | 
| 395 543 | 
             
                def __parse_newick(str, options = {})
         | 
| 396 544 | 
             
                  # initializing
         | 
| @@ -401,40 +549,37 @@ module Bio | |
| 401 549 | 
             
                  internal_nodes = []
         | 
| 402 550 | 
             
                  node_stack = []
         | 
| 403 551 | 
             
                  # preparation of tokens
         | 
| 404 | 
            -
                   | 
| 405 | 
            -
                  ary = str.split(/([\(\)\,])/)
         | 
| 406 | 
            -
                  ary.collect! { |x| x.strip!; x.empty? ? nil : x }
         | 
| 407 | 
            -
                  ary.compact!
         | 
| 552 | 
            +
                  ary = __parse_newick_tokenize(str, options)
         | 
| 408 553 | 
             
                  previous_token = nil
         | 
| 409 554 | 
             
                  # main loop
         | 
| 410 555 | 
             
                  while token = ary.shift
         | 
| 411 556 | 
             
                    #p token
         | 
| 412 557 | 
             
                    case token
         | 
| 413 | 
            -
                    when ','
         | 
| 414 | 
            -
                      if previous_token == ',' or previous_token == '(' then
         | 
| 558 | 
            +
                    when :','
         | 
| 559 | 
            +
                      if previous_token == :',' or previous_token == :'(' then
         | 
| 415 560 | 
             
                        # there is a leaf whose name is empty.
         | 
| 416 561 | 
             
                        ary.unshift(token)
         | 
| 417 562 | 
             
                        ary.unshift('')
         | 
| 418 563 | 
             
                        token = nil
         | 
| 419 564 | 
             
                      end
         | 
| 420 | 
            -
                    when '('
         | 
| 565 | 
            +
                    when :'('
         | 
| 421 566 | 
             
                      node = Node.new
         | 
| 422 567 | 
             
                      nodes << node
         | 
| 423 568 | 
             
                      internal_nodes << node
         | 
| 424 569 | 
             
                      node_stack.push(cur_node)
         | 
| 425 570 | 
             
                      cur_node = node
         | 
| 426 | 
            -
                    when ')'
         | 
| 427 | 
            -
                      if previous_token == ',' or previous_token == '(' then
         | 
| 571 | 
            +
                    when :')'
         | 
| 572 | 
            +
                      if previous_token == :',' or previous_token == :'(' then
         | 
| 428 573 | 
             
                        # there is a leaf whose name is empty.
         | 
| 429 574 | 
             
                        ary.unshift(token)
         | 
| 430 575 | 
             
                        ary.unshift('')
         | 
| 431 576 | 
             
                        token = nil
         | 
| 432 577 | 
             
                      else
         | 
| 433 578 | 
             
                        edge = Edge.new
         | 
| 434 | 
            -
                         | 
| 435 | 
            -
                         | 
| 436 | 
            -
             | 
| 437 | 
            -
                           | 
| 579 | 
            +
                        leaf_tokens = __parse_newick_get_tokens_for_leaf(ary)
         | 
| 580 | 
            +
                        token = nil
         | 
| 581 | 
            +
                        if leaf_tokens.size > 0 then
         | 
| 582 | 
            +
                          __parse_newick_leaf(leaf_tokens, cur_node, edge, options)
         | 
| 438 583 | 
             
                        end
         | 
| 439 584 | 
             
                        parent = node_stack.pop
         | 
| 440 585 | 
             
                        raise ParseError, 'unmatched parentheses' unless parent
         | 
| @@ -444,7 +589,10 @@ module Bio | |
| 444 589 | 
             
                    else
         | 
| 445 590 | 
             
                      leaf = Node.new
         | 
| 446 591 | 
             
                      edge = Edge.new
         | 
| 447 | 
            -
                       | 
| 592 | 
            +
                      ary.unshift(token)
         | 
| 593 | 
            +
                      leaf_tokens = __parse_newick_get_tokens_for_leaf(ary)
         | 
| 594 | 
            +
                      token = nil
         | 
| 595 | 
            +
                      __parse_newick_leaf(leaf_tokens, leaf, edge, options)
         | 
| 448 596 | 
             
                      nodes << leaf
         | 
| 449 597 | 
             
                      edges << Bio::Relation.new(cur_node, leaf, edge)
         | 
| 450 598 | 
             
                    end #case
         |