bio 1.4.2 → 1.4.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (139) hide show
  1. data/.travis.yml +66 -0
  2. data/ChangeLog +989 -4524
  3. data/KNOWN_ISSUES.rdoc +67 -2
  4. data/README.rdoc +89 -23
  5. data/README_DEV.rdoc +93 -2
  6. data/RELEASE_NOTES.rdoc +167 -95
  7. data/Rakefile +199 -7
  8. data/bioruby.gemspec +27 -12
  9. data/bioruby.gemspec.erb +6 -3
  10. data/doc/ChangeLog-before-1.4.2 +5013 -0
  11. data/doc/RELEASE_NOTES-1.4.2.rdoc +132 -0
  12. data/doc/Tutorial.rd +21 -3
  13. data/doc/Tutorial.rd.html +20 -12
  14. data/etc/bioinformatics/seqdatabase.ini +13 -196
  15. data/gemfiles/Gemfile.travis-jruby1.8 +7 -0
  16. data/gemfiles/Gemfile.travis-jruby1.9 +10 -0
  17. data/gemfiles/Gemfile.travis-ruby1.8 +7 -0
  18. data/gemfiles/Gemfile.travis-ruby1.9 +10 -0
  19. data/gemfiles/modify-Gemfile.rb +28 -0
  20. data/gemfiles/prepare-gemspec.rb +25 -0
  21. data/lib/bio/alignment.rb +1 -1
  22. data/lib/bio/appl/bl2seq/report.rb +3 -3
  23. data/lib/bio/appl/blast/ddbj.rb +0 -3
  24. data/lib/bio/appl/blast/format0.rb +4 -22
  25. data/lib/bio/appl/blast/genomenet.rb +33 -16
  26. data/lib/bio/appl/blast/ncbioptions.rb +8 -3
  27. data/lib/bio/appl/blast/remote.rb +6 -5
  28. data/lib/bio/appl/blast/report.rb +10 -6
  29. data/lib/bio/appl/blast/rpsblast.rb +3 -2
  30. data/lib/bio/appl/blast/wublast.rb +3 -3
  31. data/lib/bio/command.rb +118 -36
  32. data/lib/bio/data/na.rb +1 -1
  33. data/lib/bio/db/embl/embl.rb +74 -0
  34. data/lib/bio/db/embl/format_embl.rb +0 -4
  35. data/lib/bio/db/fasta.rb +57 -45
  36. data/lib/bio/db/fasta/defline.rb +1 -1
  37. data/lib/bio/db/fasta/format_fasta.rb +0 -4
  38. data/lib/bio/db/fasta/format_qual.rb +0 -5
  39. data/lib/bio/db/fastq/format_fastq.rb +0 -1
  40. data/lib/bio/db/genbank/format_genbank.rb +0 -4
  41. data/lib/bio/db/gff.rb +41 -12
  42. data/lib/bio/db/kegg/genes.rb +3 -3
  43. data/lib/bio/db/kegg/kgml.rb +465 -64
  44. data/lib/bio/db/newick.rb +0 -244
  45. data/lib/bio/db/pdb.rb +1 -4
  46. data/lib/bio/db/pdb/atom.rb +3 -2
  47. data/lib/bio/db/pdb/chain.rb +2 -3
  48. data/lib/bio/db/pdb/chemicalcomponent.rb +3 -2
  49. data/lib/bio/db/pdb/model.rb +2 -2
  50. data/lib/bio/db/pdb/pdb.rb +2 -1
  51. data/lib/bio/db/pdb/residue.rb +2 -2
  52. data/lib/bio/db/pdb/utils.rb +7 -4
  53. data/lib/bio/db/phyloxml/phyloxml_parser.rb +52 -5
  54. data/lib/bio/feature.rb +2 -3
  55. data/lib/bio/io/flatfile/autodetection.rb +1 -1
  56. data/lib/bio/io/flatfile/buffer.rb +84 -0
  57. data/lib/bio/sequence.rb +6 -4
  58. data/lib/bio/sequence/aa.rb +3 -5
  59. data/lib/bio/sequence/adapter.rb +6 -6
  60. data/lib/bio/sequence/common.rb +3 -3
  61. data/lib/bio/sequence/compat.rb +2 -7
  62. data/lib/bio/sequence/dblink.rb +6 -5
  63. data/lib/bio/sequence/format.rb +0 -6
  64. data/lib/bio/sequence/format_raw.rb +0 -4
  65. data/lib/bio/sequence/generic.rb +3 -4
  66. data/lib/bio/sequence/na.rb +4 -6
  67. data/lib/bio/sequence/quality_score.rb +2 -0
  68. data/lib/bio/sequence/sequence_masker.rb +3 -0
  69. data/lib/bio/shell/core.rb +1 -0
  70. data/lib/bio/tree.rb +1 -2
  71. data/lib/bio/tree/output.rb +264 -0
  72. data/lib/bio/util/restriction_enzyme.rb +1 -3
  73. data/lib/bio/util/restriction_enzyme/analysis.rb +8 -5
  74. data/lib/bio/util/restriction_enzyme/analysis_basic.rb +4 -3
  75. data/lib/bio/util/restriction_enzyme/cut_symbol.rb +3 -2
  76. data/lib/bio/util/restriction_enzyme/dense_int_array.rb +3 -0
  77. data/lib/bio/util/restriction_enzyme/double_stranded.rb +3 -4
  78. data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +3 -4
  79. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +3 -4
  80. data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +3 -4
  81. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +3 -4
  82. data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +3 -4
  83. data/lib/bio/util/restriction_enzyme/range/cut_range.rb +3 -4
  84. data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +3 -4
  85. data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +3 -4
  86. data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +3 -4
  87. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +3 -4
  88. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +3 -4
  89. data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +3 -4
  90. data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +3 -4
  91. data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -3
  92. data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +3 -4
  93. data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +3 -4
  94. data/lib/bio/util/restriction_enzyme/sorted_num_array.rb +3 -0
  95. data/lib/bio/util/restriction_enzyme/string_formatting.rb +3 -4
  96. data/lib/bio/version.rb +11 -2
  97. data/sample/seqdatabase.ini +210 -0
  98. data/test/bioruby_test_helper.rb +37 -12
  99. data/test/data/KEGG/test.kgml +37 -0
  100. data/test/data/command/echoarg2.bat +0 -0
  101. data/test/data/command/echoarg2.sh +4 -0
  102. data/test/functional/bio/test_command.rb +58 -28
  103. data/test/{functional → network}/bio/appl/blast/test_remote.rb +0 -0
  104. data/test/{functional → network}/bio/appl/test_blast.rb +0 -0
  105. data/test/{functional → network}/bio/appl/test_pts1.rb +0 -0
  106. data/test/{functional → network}/bio/io/test_ddbjrest.rb +0 -0
  107. data/test/{functional → network}/bio/io/test_ensembl.rb +0 -0
  108. data/test/{functional → network}/bio/io/test_pubmed.rb +0 -0
  109. data/test/{functional → network}/bio/io/test_soapwsdl.rb +0 -0
  110. data/test/{functional → network}/bio/io/test_togows.rb +0 -0
  111. data/test/network/bio/test_command.rb +35 -0
  112. data/test/runner.rb +16 -6
  113. data/test/unit/bio/appl/blast/test_report.rb +119 -0
  114. data/test/unit/bio/appl/blast/test_rpsblast.rb +1 -0
  115. data/test/unit/bio/data/test_na.rb +1 -1
  116. data/test/unit/bio/db/embl/test_embl.rb +2 -7
  117. data/test/unit/bio/db/embl/test_embl_rel89.rb +2 -7
  118. data/test/unit/bio/db/fasta/test_defline.rb +1 -1
  119. data/test/unit/bio/db/genbank/test_genpept.rb +1 -1
  120. data/test/unit/bio/db/kegg/test_drug.rb +1 -1
  121. data/test/unit/bio/db/kegg/test_genome.rb +1 -1
  122. data/test/unit/bio/db/kegg/test_glycan.rb +1 -1
  123. data/test/unit/bio/db/kegg/test_kgml.rb +1022 -0
  124. data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +2 -1
  125. data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +4 -2
  126. data/test/unit/bio/db/test_newick.rb +2 -0
  127. data/test/unit/bio/db/test_phyloxml.rb +54 -2
  128. data/test/unit/bio/db/test_phyloxml_writer.rb +15 -9
  129. data/test/unit/bio/db/test_soft.rb +1 -1
  130. data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -0
  131. data/test/unit/bio/io/flatfile/test_buffer.rb +141 -0
  132. data/test/unit/bio/sequence/test_common.rb +36 -4
  133. data/test/unit/bio/sequence/test_na.rb +1 -1
  134. data/test/unit/bio/test_command.rb +9 -4
  135. data/test/unit/bio/test_sequence.rb +2 -2
  136. data/test/unit/bio/test_tree.rb +11 -11
  137. data/test/unit/bio/util/test_restriction_enzyme.rb +1 -1
  138. metadata +1428 -655
  139. data/rdoc.zsh +0 -8
@@ -5,12 +5,11 @@
5
5
  # 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: feature.rb,v 1.13.2.1 2008/03/04 10:12:22 ngoto Exp $
9
-
10
- require 'bio/location'
11
8
 
12
9
  module Bio
13
10
 
11
+ autoload :Locations, 'bio/location' unless const_defined?(:Locations)
12
+
14
13
  # = DESCRIPTION
15
14
  # Container for the sequence annotation.
16
15
  #
@@ -482,7 +482,7 @@ module Bio
482
482
  /^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
483
483
 
484
484
  fastq = RuleRegexp[ 'Bio::Fastq',
485
- /^\@.+(?:\r|\r?\n)(?:[^\@\+].*(?:\r|\r?\n))+\+.*(?:\r|\r?\n).+(?:\r|\r?\n)/ ],
485
+ /^\@.+(?:\r|\r?\n)(?:[^\@\+].*(?:\r|\r?\n))+/ ],
486
486
 
487
487
  fastaformat = RuleProc.new('Bio::FastaFormat',
488
488
  'Bio::NBRF',
@@ -43,17 +43,101 @@ module Bio
43
43
  # *arg is passed to File.open.
44
44
  #
45
45
  # Like File.open, a block can be accepted.
46
+ #
47
+ # Unlike File.open, the default is binary mode, unless text mode
48
+ # is explicity specified in mode.
46
49
  def self.open_file(filename, *arg)
50
+ params = _parse_file_open_arg(*arg)
51
+ if params[:textmode] or /t/ =~ params[:fmode_string].to_s then
52
+ textmode = true
53
+ else
54
+ textmode = false
55
+ end
47
56
  if block_given? then
48
57
  File.open(filename, *arg) do |fobj|
58
+ fobj.binmode unless textmode
49
59
  yield self.new(fobj, filename)
50
60
  end
51
61
  else
52
62
  fobj = File.open(filename, *arg)
63
+ fobj.binmode unless textmode
53
64
  self.new(fobj, filename)
54
65
  end
55
66
  end
56
67
 
68
+ # Parses file open mode parameter.
69
+ # mode must be an Integer or a String.
70
+ def self._parse_file_open_mode(mode)
71
+ modeint = nil
72
+ modestr = nil
73
+ begin
74
+ modeint = mode.to_int
75
+ rescue NoMethodError
76
+ end
77
+ unless modeint then
78
+ begin
79
+ modestr = mode.to_str
80
+ rescue NoMethodError
81
+ end
82
+ end
83
+ if modeint then
84
+ return { :fmode_integer => modeint }
85
+ end
86
+ if modestr then
87
+ fmode, ext_enc, int_enc = modestr.split(/\:/)
88
+ ret = { :fmode_string => fmode }
89
+ ret[:external_encoding] = ext_enc if ext_enc
90
+ ret[:internal_encoding] = int_enc if int_enc
91
+ return ret
92
+ end
93
+ nil
94
+ end
95
+ private_class_method :_parse_file_open_mode
96
+
97
+ # Parses file open arguments
98
+ def self._parse_file_open_arg(*arg)
99
+ fmode_hash = nil
100
+ perm = nil
101
+
102
+ elem = arg.shift
103
+ if elem then
104
+ fmode_hash = _parse_file_open_mode(elem)
105
+ if fmode_hash then
106
+ elem = arg.shift
107
+ if elem then
108
+ begin
109
+ perm = elem.to_int
110
+ rescue NoMethodError
111
+ end
112
+ end
113
+ elem = arg.shift if perm
114
+ end
115
+ end
116
+ if elem.kind_of?(Hash) then
117
+ opt = elem.dup
118
+ else
119
+ opt = {}
120
+ end
121
+ if elem = opt[:mode] then
122
+ fmode_hash = _parse_file_open_mode(elem)
123
+ end
124
+ fmode_hash ||= {}
125
+ fmode_hash[:perm] = perm if perm
126
+ unless enc = opt[:encoding].to_s.empty? then
127
+ ext_enc, int_enc = enc.split(/\:/)
128
+ fmode_hash[:external_encoding] = ext_enc if ext_enc
129
+ fmode_hash[:internal_encoding] = int_enc if int_enc
130
+ end
131
+
132
+ [ :external_encoding, :internal_encoding,
133
+ :textmode, :binmode, :autoclose, :perm ].each do |key|
134
+ val = opt[key]
135
+ fmode_hash[key] = val if val
136
+ end
137
+ fmode_hash
138
+ end
139
+ private_class_method :_parse_file_open_arg
140
+
57
141
  # Creates a new input stream wrapper from URI specified as _uri_.
58
142
  # by using OpenURI.open_uri or URI#open.
59
143
  # _uri_ must be a String or URI object.
@@ -9,10 +9,6 @@
9
9
  # Jan Aerts <jan.aerts@bbsrc.ac.uk>
10
10
  # License:: The Ruby License
11
11
  #
12
- # $Id:$
13
- #
14
-
15
- require 'bio/sequence/compat'
16
12
 
17
13
  module Bio
18
14
 
@@ -74,6 +70,12 @@ class Sequence
74
70
  autoload :QualityScore, 'bio/sequence/quality_score'
75
71
  autoload :SequenceMasker, 'bio/sequence/sequence_masker'
76
72
 
73
+ #--
74
+ # require "bio/sequence/compat.rb" here to avoid circular require and
75
+ # possible superclass mismatch of AA class
76
+ #++
77
+ require 'bio/sequence/compat'
78
+
77
79
  include Format
78
80
  include SequenceMasker
79
81
 
@@ -6,14 +6,12 @@
6
6
  # Ryan Raaum <ryan@raaum.org>
7
7
  # License:: The Ruby License
8
8
  #
9
- # $Id: aa.rb,v 1.4 2007/04/05 23:35:41 trevor Exp $
10
- #
11
-
12
- require 'bio/sequence/common'
13
9
 
14
10
  module Bio
15
11
 
16
- autoload :AminoAcid, 'bio/data/aa'
12
+ autoload :AminoAcid, 'bio/data/aa' unless const_defined?(:AminoAcid)
13
+
14
+ require 'bio/sequence' unless const_defined?(:Sequence)
17
15
 
18
16
  class Sequence
19
17
 
@@ -5,10 +5,10 @@
5
5
  # Naohisa Goto <ng@bioruby.org>,
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id:$
9
- #
10
8
 
11
- require 'bio/sequence'
9
+ module Bio
10
+
11
+ require 'bio/sequence' unless const_defined?(:Sequence)
12
12
 
13
13
  # Internal use only. Normal users should not use this module.
14
14
  #
@@ -17,7 +17,7 @@ require 'bio/sequence'
17
17
  #
18
18
  # This module is used by using "extend", not "include".
19
19
  #
20
- module Bio::Sequence::Adapter
20
+ module Sequence::Adapter
21
21
 
22
22
  autoload :GenBank, 'bio/db/genbank/genbank_to_biosequence'
23
23
  autoload :EMBL, 'bio/db/embl/embl_to_biosequence'
@@ -107,6 +107,6 @@ __END_OF_DEF__
107
107
  true
108
108
  end
109
109
 
110
- end #module Bio::Sequence::Adapter
111
-
110
+ end #module Sequence::Adapter
112
111
 
112
+ end #module Bio
@@ -6,12 +6,12 @@
6
6
  # Ryan Raaum <ryan@raaum.org>
7
7
  # License:: The Ruby License
8
8
  #
9
- # $Id:$
10
- #
11
9
 
12
10
  module Bio
13
11
 
14
- autoload :Locations, 'bio/location'
12
+ autoload :Locations, 'bio/location' unless const_defined?(:Locations)
13
+
14
+ require 'bio/sequence' unless const_defined?(:Sequence)
15
15
 
16
16
  class Sequence
17
17
 
@@ -6,17 +6,12 @@
6
6
  # Ryan Raaum <ryan@raaum.org>
7
7
  # License:: The Ruby License
8
8
  #
9
- # $Id:$
10
- #
11
-
12
9
 
13
10
  module Bio
14
11
 
15
- class Sequence
12
+ require 'bio/sequence' unless const_defined?(:Sequence)
16
13
 
17
- autoload :Common, 'bio/sequence/common'
18
- autoload :NA, 'bio/sequence/na'
19
- autoload :AA, 'bio/sequence/aa'
14
+ class Sequence
20
15
 
21
16
  # Return sequence as
22
17
  # String[http://corelib.rubyonrails.org/classes/String.html].
@@ -5,15 +5,15 @@
5
5
  # Naohisa Goto <ng@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: dblink.rb,v 1.1.2.1 2008/06/17 15:44:22 ngoto Exp $
9
- #
10
8
 
11
- require 'bio/sequence'
9
+ module Bio
10
+
11
+ require 'bio/sequence' unless const_defined?(:Sequence)
12
12
 
13
13
  # Bio::Sequence::DBLink stores IDs with the database name.
14
14
  # Its main purpose is to store database cross-reference information
15
15
  # for a sequence entry.
16
- class Bio::Sequence::DBLink
16
+ class Sequence::DBLink
17
17
 
18
18
  # creates a new DBLink object
19
19
  def initialize(database, primary_id, *secondary_ids)
@@ -49,6 +49,7 @@ class Bio::Sequence::DBLink
49
49
  self.new(*(str.split(/\s*\;\s*/)))
50
50
  end
51
51
 
52
- end #class Bio::Sequence::DBLink
52
+ end #class Sequence::DBLink
53
53
 
54
+ end #module Bio
54
55
 
@@ -8,12 +8,6 @@
8
8
  # Jan Aerts <jan.aerts@bbsrc.ac.uk>
9
9
  # License:: The Ruby License
10
10
  #
11
- # = TODO
12
- #
13
- # porting from N. Goto's feature-output.rb on BioRuby list.
14
- #
15
- # $Id: format.rb,v 1.4.2.8 2008/06/17 15:50:05 ngoto Exp $
16
- #
17
11
 
18
12
  require 'erb'
19
13
 
@@ -4,10 +4,6 @@
4
4
  # Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
5
5
  # License:: The Ruby License
6
6
  #
7
- # $Id: format_raw.rb,v 1.1.2.1 2008/03/04 11:28:46 ngoto Exp $
8
- #
9
-
10
- require 'bio/sequence/format'
11
7
 
12
8
  module Bio::Sequence::Format::Formatter
13
9
 
@@ -5,12 +5,11 @@
5
5
  # Toshiaki Katayama <k@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: generic.rb,v 1.5 2007/04/05 23:35:41 trevor Exp $
9
- #
10
-
11
- require 'bio/sequence/common'
12
8
 
13
9
  module Bio
10
+
11
+ require 'bio/sequence' unless const_defined?(:Sequence)
12
+
14
13
  class Sequence
15
14
 
16
15
  class Generic < String #:nodoc:
@@ -6,15 +6,13 @@
6
6
  # Ryan Raaum <ryan@raaum.org>
7
7
  # License:: The Ruby License
8
8
  #
9
- # $Id: na.rb,v 1.7 2007/04/23 16:43:51 trevor Exp $
10
- #
11
-
12
- require 'bio/sequence/common'
13
9
 
14
10
  module Bio
15
11
 
16
- autoload :NucleicAcid, 'bio/data/na'
17
- autoload :CodonTable, 'bio/data/codontable'
12
+ autoload :NucleicAcid, 'bio/data/na' unless const_defined?(:NucleicAcid)
13
+ autoload :CodonTable, 'bio/data/codontable' unless const_defined?(:CodonTable)
14
+
15
+ require 'bio/sequence' unless const_defined?(:Sequence)
18
16
 
19
17
  class Sequence
20
18
 
@@ -18,6 +18,8 @@
18
18
 
19
19
  module Bio
20
20
 
21
+ require 'bio/sequence' unless const_defined?(:Sequence)
22
+
21
23
  class Sequence
22
24
 
23
25
  # Bio::Sequence::QualityScore is a name space for quality score modules.
@@ -14,6 +14,9 @@
14
14
  #
15
15
 
16
16
  module Bio
17
+
18
+ require 'bio/sequence' unless const_defined?(:Sequence)
19
+
17
20
  class Sequence
18
21
 
19
22
  # Bio::Sequence::SequenceMasker is a mix-in module to provide helpful
@@ -343,6 +343,7 @@ module Bio::Shell::Ghost
343
343
  File.open(file, "w") do |f|
344
344
  bind = Bio::Shell.cache[:binding]
345
345
  list = eval("local_variables", bind)
346
+ list.collect! { |x| x.to_s }
346
347
  list -= ["_"]
347
348
  hash = {}
348
349
  list.each do |elem|
@@ -5,7 +5,6 @@
5
5
  # Naohisa Goto <ng@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id:$
9
8
  #
10
9
 
11
10
  require 'matrix'
@@ -913,5 +912,5 @@ end #module Bio
913
912
  #---
914
913
  # temporary added
915
914
  #+++
916
- require 'bio/db/newick'
915
+ require 'bio/tree/output'
917
916
 
@@ -0,0 +1,264 @@
1
+ #
2
+ # = bio/tree/output.rb - Phylogenetic tree formatter
3
+ #
4
+ # Copyright:: Copyright (C) 2004-2006
5
+ # Naohisa Goto <ng@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ #
9
+ # == Description
10
+ #
11
+ # This file contains formatter of Newick, NHX and Phylip distance matrix.
12
+ #
13
+ # == References
14
+ #
15
+ # * http://evolution.genetics.washington.edu/phylip/newick_doc.html
16
+ # * http://www.phylosoft.org/forester/NHX.html
17
+ #
18
+
19
+ module Bio
20
+ class Tree
21
+
22
+ #---
23
+ # newick output
24
+ #+++
25
+
26
+ # default options
27
+ DEFAULT_OPTIONS =
28
+ { :indent => ' ' }
29
+
30
+ def __get_option(key, options)
31
+ if (r = options[key]) != nil then
32
+ r
33
+ elsif @options && (r = @options[key]) != nil then
34
+ r
35
+ else
36
+ DEFAULT_OPTIONS[key]
37
+ end
38
+ end
39
+ private :__get_option
40
+
41
+
42
+ # formats Newick label (unquoted_label or quoted_label)
43
+ def __to_newick_format_label(str, options)
44
+ if __get_option(:parser, options) == :naive then
45
+ return str.to_s
46
+ end
47
+ str = str.to_s
48
+ if /([\(\)\,\:\[\]\_\'\x00-\x1f\x7f])/ =~ str then
49
+ # quoted_label
50
+ return "\'" + str.gsub(/\'/, "\'\'") + "\'"
51
+ end
52
+ # unquoted_label
53
+ return str.gsub(/ /, '_')
54
+ end
55
+ private :__to_newick_format_label
56
+
57
+ # formats leaf
58
+ def __to_newick_format_leaf(node, edge, options)
59
+
60
+ label = __to_newick_format_label(get_node_name(node), options)
61
+
62
+ dist = get_edge_distance_string(edge)
63
+
64
+ bs = get_node_bootstrap_string(node)
65
+
66
+ if __get_option(:branch_length_style, options) == :disabled
67
+ dist = nil
68
+ end
69
+
70
+ case __get_option(:bootstrap_style, options)
71
+ when :disabled
72
+ label + (dist ? ":#{dist}" : '')
73
+ when :molphy
74
+ label + (dist ? ":#{dist}" : '') + (bs ? "[#{bs}]" : '')
75
+ when :traditional
76
+ label + (bs ? bs : '') + (dist ? ":#{dist}" : '')
77
+ else
78
+ # default: same as molphy style
79
+ label + (dist ? ":#{dist}" : '') + (bs ? "[#{bs}]" : '')
80
+ end
81
+ end
82
+ private :__to_newick_format_leaf
83
+
84
+ # formats leaf for NHX
85
+ def __to_newick_format_leaf_NHX(node, edge, options)
86
+
87
+ label = __to_newick_format_label(get_node_name(node), options)
88
+
89
+ dist = get_edge_distance_string(edge)
90
+
91
+ bs = get_node_bootstrap_string(node)
92
+
93
+ if __get_option(:branch_length_style, options) == :disabled
94
+ dist = nil
95
+ end
96
+
97
+ nhx = {}
98
+
99
+ # bootstrap
100
+ nhx[:B] = bs if bs and !(bs.empty?)
101
+ # EC number
102
+ nhx[:E] = node.ec_number if node.instance_eval {
103
+ defined?(@ec_number) && self.ec_number
104
+ }
105
+ # scientific name
106
+ nhx[:S] = node.scientific_name if node.instance_eval {
107
+ defined?(@scientific_name) && self.scientific_name
108
+ }
109
+ # taxonomy id
110
+ nhx[:T] = node.taxonomy_id if node.instance_eval {
111
+ defined?(@taxonomy_id) && self.taxonomy_id
112
+ }
113
+
114
+ # :D (gene duplication or speciation)
115
+ if node.instance_eval { defined?(@events) && !(self.events.empty?) } then
116
+ if node.events.include?(:gene_duplication)
117
+ nhx[:D] = 'Y'
118
+ elsif node.events.include?(:speciation)
119
+ nhx[:D] = 'N'
120
+ end
121
+ end
122
+
123
+ # log likelihood
124
+ nhx[:L] = edge.log_likelihood if edge.instance_eval {
125
+ defined?(@log_likelihood) && self.log_likelihood }
126
+ # width
127
+ nhx[:W] = edge.width if edge.instance_eval {
128
+ defined?(@width) && self.width }
129
+
130
+ # merges other parameters
131
+ flag = node.instance_eval { defined? @nhx_parameters }
132
+ nhx.merge!(node.nhx_parameters) if flag
133
+ flag = edge.instance_eval { defined? @nhx_parameters }
134
+ nhx.merge!(edge.nhx_parameters) if flag
135
+
136
+ nhx_string = nhx.keys.sort{ |a,b| a.to_s <=> b.to_s }.collect do |key|
137
+ "#{key.to_s}=#{nhx[key].to_s}"
138
+ end.join(':')
139
+ nhx_string = "[&&NHX:" + nhx_string + "]" unless nhx_string.empty?
140
+
141
+ label + (dist ? ":#{dist}" : '') + nhx_string
142
+ end
143
+ private :__to_newick_format_leaf_NHX
144
+
145
+ #
146
+ def __to_newick(parents, source, depth, format_leaf,
147
+ options, &block)
148
+ result = []
149
+ if indent_string = __get_option(:indent, options) then
150
+ indent0 = indent_string * depth
151
+ indent = indent_string * (depth + 1)
152
+ newline = "\n"
153
+ else
154
+ indent0 = indent = newline = ''
155
+ end
156
+ out_edges = self.out_edges(source)
157
+ if block_given? then
158
+ out_edges.sort! { |edge1, edge2| yield(edge1[1], edge2[1]) }
159
+ else
160
+ out_edges.sort! do |edge1, edge2|
161
+ o1 = edge1[1].order_number
162
+ o2 = edge2[1].order_number
163
+ if o1 and o2 then
164
+ o1 <=> o2
165
+ else
166
+ edge1[1].name.to_s <=> edge2[1].name.to_s
167
+ end
168
+ end
169
+ end
170
+ out_edges.each do |src, tgt, edge|
171
+ if parents.include?(tgt) then
172
+ ;;
173
+ elsif self.out_degree(tgt) == 1 then
174
+ result << indent + __send__(format_leaf, tgt, edge, options)
175
+ else
176
+ result <<
177
+ __to_newick([ src ].concat(parents), tgt, depth + 1,
178
+ format_leaf, options) +
179
+ __send__(format_leaf, tgt, edge, options)
180
+ end
181
+ end
182
+ indent0 + "(" + newline + result.join(',' + newline) +
183
+ (result.size > 0 ? newline : '') + indent0 + ')'
184
+ end
185
+ private :__to_newick
186
+
187
+ # Returns a newick formatted string.
188
+ # If block is given, the order of the node is sorted
189
+ # (as the same manner as Enumerable#sort).
190
+ #
191
+ # Available options:
192
+ # <tt>:indent</tt>::
193
+ # indent string; set false to disable (default: ' ')
194
+ # <tt>:bootstrap_style</tt>::
195
+ # <tt>:disabled</tt> disables bootstrap representations.
196
+ # <tt>:traditional</tt> for traditional style.
197
+ # <tt>:molphy</tt> for Molphy style (default).
198
+ def output_newick(options = {}, &block) #:yields: node1, node2
199
+ root = @root
200
+ root ||= self.nodes.first
201
+ return '();' unless root
202
+ __to_newick([], root, 0, :__to_newick_format_leaf, options, &block) +
203
+ __to_newick_format_leaf(root, Edge.new, options) +
204
+ ";\n"
205
+ end
206
+
207
+ alias newick output_newick
208
+
209
+
210
+ # Returns a NHX (New Hampshire eXtended) formatted string.
211
+ # If block is given, the order of the node is sorted
212
+ # (as the same manner as Enumerable#sort).
213
+ #
214
+ # Available options:
215
+ # <tt>:indent</tt>::
216
+ # indent string; set false to disable (default: ' ')
217
+ #
218
+ def output_nhx(options = {}, &block) #:yields: node1, node2
219
+ root = @root
220
+ root ||= self.nodes.first
221
+ return '();' unless root
222
+ __to_newick([], root, 0,
223
+ :__to_newick_format_leaf_NHX, options, &block) +
224
+ __to_newick_format_leaf_NHX(root, Edge.new, options) +
225
+ ";\n"
226
+ end
227
+
228
+ # Returns formatted text (or something) of the tree
229
+ # Currently supported format is: :newick, :nhx
230
+ def output(format, *arg, &block)
231
+ case format
232
+ when :newick
233
+ output_newick(*arg, &block)
234
+ when :nhx
235
+ output_nhx(*arg, &block)
236
+ when :phylip_distance_matrix
237
+ output_phylip_distance_matrix(*arg, &block)
238
+ else
239
+ raise 'Unknown format'
240
+ end
241
+ end
242
+
243
+ #---
244
+ # This method isn't suitable to written in this file?
245
+ #+++
246
+
247
+ # Generates phylip-style distance matrix as a string.
248
+ # if nodes is not given, all leaves in the tree are used.
249
+ # If the names of some of the given (or default) nodes
250
+ # are not defined or are empty, the names are automatically generated.
251
+ def output_phylip_distance_matrix(nodes = nil, options = {})
252
+ nodes = self.leaves unless nodes
253
+ names = nodes.collect do |x|
254
+ y = get_node_name(x)
255
+ y = sprintf("%x", x.__id__.abs) if y.empty?
256
+ y
257
+ end
258
+ m = self.distance_matrix(nodes)
259
+ Bio::Phylip::DistanceMatrix.generate(m, names, options)
260
+ end
261
+
262
+ end #class Tree
263
+
264
+ end #module Bio