RubyGems - semin-ulla - Versions diffs - 0.9.6 → 0.9.7 - Mend

semin-ulla 0.9.6 → 0.9.7

Files changed (15) hide show

data/History.txt +11 -0
data/Manifest.txt +0 -1
data/README.rdoc +9 -8
data/lib/narray_extensions.rb +9 -6
data/lib/nmatrix_extensions.rb +10 -8
data/lib/ulla.rb +1 -1
data/lib/ulla/cli.rb +372 -274
data/lib/ulla/environment.rb +8 -4
data/lib/ulla/environment_class_hash.rb +6 -2
data/lib/ulla/environment_feature.rb +5 -5
data/lib/ulla/environment_feature_array.rb +8 -0
data/script/console +3 -3
data/ulla.gemspec +9 -10
metadata +7 -10
data/.autotest +0 -5

data/History.txt CHANGED Viewed

@@ -1,3 +1,14 @@
+== 0.9.7 30/05/2009
+* Added --environment option to consider not only substituted amino acids' environments but also substituting amino acids' environments
+* Changed the default pseudocount, (1 / # of environment classes) to 0, a pseudocount needs to be explicitly provided with --add option
+* Changed the fixed column size (7) of raw count matrices to vary depending on the number of digits of a maximum amino acid count
+* Fixed a bug in reading an environment feature definition file (empty line)
+== 0.9.6 18/03/2009
+* Warns if Rmagick is not properly installed or missing.
 == 0.9.5 18/03/2009
 * Fixed a bug in the order of requiring libraries

data/Manifest.txt CHANGED Viewed

@@ -1,4 +1,3 @@
-.autotest
 History.txt
 Manifest.txt
 PostInstall.txt

data/README.rdoc CHANGED Viewed

@@ -18,7 +18,7 @@ http://www-cryst.bioc.cam.ac.uk/ulla
 == Requirements
-* ruby 1.8.7 or above (http://www.ruby-lang.org)
+* ruby 1.8.7 or above (1.9.0 or above recommended, http://www.ruby-lang.org)
 * rubygems 1.2.0 or above (http://rubyforge.org/projects/rubygems)
 Following RubyGems will be automatically installed if you have rubygems installed on your machine
@@ -49,7 +49,10 @@ It's pretty much the same as Kenji's subst (http://www-cryst.bioc.cam.ac.uk/~ken
     --classdef (-c) FILE: a file for the defintion of environments (default: 'classdef.dat')
     --outfile (-o) FILE: output filename (default 'allmat.dat')
     --weight (-w) INTEGER: clustering level (PID) for the BLOSUM-like weighting (default: 60)
-    --noweight: calculate substitution count with no weights
+    --noweight: calculate substitution counts with no weights
+    --environment (-e) INTEGER:
+        0 for considering only substituted amino acids' environments (default)
+        1 for considering both substituted and substituting amino acids' environments
     --smooth (-s) INTEGER:
         0 for partial smoothing (default)
         1 for full smoothing
@@ -60,14 +63,14 @@ It's pretty much the same as Kenji's subst (http://www-cryst.bioc.cam.ac.uk/~ken
         1 for both structure and sequence
         2 for using only C for both (must be set when you have no 'disulphide' or 'disulfide' annotation in templates)
     --output INTEGER:
-        0 for raw count (no smoothing performed)
+        0 for raw counts (no smoothing performed)
         1 for probabilities
-        2 for log odds ratios (default)
+        2 for log-odds (default)
     --noroundoff: do not round off log odds ratio
-    --scale INTEGER: log odds ratio matrices in 1/n bit units (default 3)
+    --scale INTEGER: log-odds matrices in 1/n bit units (default 3)
     --sigma DOUBLE: change the sigma value for smoothing (default 5.0)
     --autosigma: automatically adjust the sigma value for smoothing
-    --add DOUBLE: add this value to raw count when deriving log odds ratios without smoothing (default 1/#classes)
+    --add DOUBLE: add this value to raw counts when deriving log-odds without smoothing (default 0)
     --pidmin DOUBLE: count substitutions only for pairs with PID equal to or greater than this value (default none)
     --pidmax DOUBLE: count substitutions only for pairs with PID smaller than this value (default none)
     --heatmap INTEGER:
@@ -234,8 +237,6 @@ It's pretty much the same as Kenji's subst (http://www-cryst.bioc.cam.ac.uk/~ken
 == TODO
-* Substitution table generation considering both the source and the target environmental classes
 == Repository
 You can download a pre-built RubyGems package from

data/lib/narray_extensions.rb CHANGED Viewed

@@ -4,16 +4,19 @@ require 'facets'
 module NArrayExtensions
-  def pretty_string(opts={})
-    { :col_header   => nil,
-      :row_header   => nil }.merge!(opts)
+  def pretty_string(options={})
+    opts = {:col_header => nil,
+            :row_header => nil,
+            :col_size   => 7}.merge(options)
-    ("%-3s" % "#") + opts[:col_header].inject("") { |s, a| s + ("%7s" % a) } + "\n" +
+    ("%-3s" % "#") + opts[:col_header].inject("") { |s, a|
+      s + ("%#{opts[:col_size]}s" % a)
+    } + "\n" +
       self.to_a.inject("%-3s" % opts[:row_header]) { |s, v|
       if v.is_a? Float
-        s + ("%7.2f" % v)
+        s + ("%#{opts[:col_size]}.2f" % v)
       else
-        s + ("%7d" % v)
+        s + ("%#{opts[:col_size]}d" % v)
       end
     }
   end

data/lib/nmatrix_extensions.rb CHANGED Viewed

@@ -13,17 +13,18 @@ end
 module NMatrixExtensions
   def pretty_string(options={})
-    opts = {:col_header   => nil,
-            :row_header   => nil }.merge(options)
+    opts = {:col_header => nil,
+            :row_header => nil,
+            :col_size   => 7}.merge(options)
     ("%-3s" % "#") + opts[:col_header].inject("") { |s, a|
-      s + ("%7s" % a)
+      s + ("%#{opts[:col_size]}s" % a)
     } + "\n" + self.to_a.map_with_index { |a, i|
       ("%-3s" % opts[:row_header][i]) + a.inject("") { |s, v|
         if v.is_a? Float
-          s + ("%7.2f" % v)
+          s + ("%#{opts[:col_size]}.2f" % v)
         else
-          s + ("%7d" % v)
+          s + ("%#{opts[:col_size]}d" % v)
         end
       }
     }.join("\n")
@@ -67,6 +68,7 @@ module NMatrixExtensions
             :title?                 => true,
             :title                  => '',
             :title_font_size        => 35,
+            :title_font_scale       => 1.0,
             :print_value            => false,
             :key_font_size          => 15,
             :value_font_size        => 8,
@@ -75,15 +77,15 @@ module NMatrixExtensions
     RVG::dpi = opts[:dpi]
     rvg = RVG.new(opts[:rvg_width], opts[:rvg_height]) do |canvas|
-      title_x = (opts[:canvas_width] - opts[:title].length * opts[:title_font_size] * 0.6) / 2.0
-      title_y = opts[:header_height] - opts[:title_font_size] * 0.7
+      title_x = (opts[:canvas_width] - opts[:title].length * opts[:title_font_size] * opts[:title_font_scale] / 2.0) / 2.0
+      title_y = opts[:header_height] - opts[:title_font_size] * opts[:title_font_scale]
       canvas.viewbox(0, 0, opts[:canvas_width], opts[:canvas_height])
       canvas.background_fill = opts[:background]
       canvas.desc = opts[:title]
       if opts[:title?]
-        canvas.text(title_x, title_y, opts[:title]).styles(:font_size => opts[:title_font_size])
+        canvas.text(title_x, title_y, opts[:title]).styles(:font_size => opts[:title_font_size] * opts[:title_font_scale])
       end
       # border for whole matrix

data/lib/ulla.rb CHANGED Viewed

@@ -2,5 +2,5 @@ $:.unshift(File.dirname(__FILE__)) unless
   $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
 module Ulla
-  VERSION = '0.9.6'
+  VERSION = '0.9.7'
 end

data/lib/ulla/cli.rb CHANGED Viewed

@@ -39,6 +39,9 @@ Options:
     --outfile (-o) FILE: output filename (default 'allmat.dat')
     --weight (-w) INTEGER: clustering level (PID) for the BLOSUM-like weighting (default: 60)
     --noweight: calculate substitution counts with no weights
+    --environment (-e) INTEGER:
+        0 for considering only substituted amino acids' environments (default)
+        1 for considering both substituted and substituting amino acids' environments
     --smooth (-s) INTEGER:
         0 for partial smoothing (default)
         1 for full smoothing
@@ -56,7 +59,7 @@ Options:
     --scale INTEGER: log-odds matrices in 1/n bit units (default 3)
     --sigma DOUBLE: change the sigma value for smoothing (default 5.0)
     --autosigma: automatically adjust the sigma value for smoothing
-    --add DOUBLE: add this value to raw counts when deriving log-odds without smoothing (default 1/#classes)
+    --add DOUBLE: add this value to raw counts when deriving log-odds without smoothing (default 0)
     --pidmin DOUBLE: count substitutions only for pairs with PID equal to or greater than this value (default none)
     --pidmax DOUBLE: count substitutions only for pairs with PID smaller than this value (default none)
     --heatmap INTEGER:
@@ -88,22 +91,23 @@ Options:
       # :call-seq:
       #   Ulla::CLI::calculate_pid(seq1, seq2) -> Float
       #
-      def calculate_pid(seq1, seq2)
-        aas1  = seq1.split('')
-        aas2  = seq2.split('')
+      def calculate_pid(seq1, seq2, unit)
+        aas1  = seq1.scan(/\w{#{unit}}/)
+        aas2  = seq2.scan(/\w{#{unit}}/)
         cols  = aas1.zip(aas2)
+        gap   = ($gap || '-') * unit
         align = 0 # no. of aligned columns
         ident = 0 # no. of identical columns
         intgp = 0 # no. of internal gaps
         cols.each do |col|
-          if (col[0] != '-') && (col[1] != '-')
+          if (col[0] != gap) && (col[1] != gap)
             align += 1
             if col[0] == col[1]
               ident += 1
             end
-          elsif (((col[0] == '-') && (col[1] != '-')) ||
-                 ((col[0] != '-') && (col[1] == '-')))
+          elsif (((col[0] == gap) && (col[1] != gap)) ||
+                 ((col[0] != gap) && (col[1] == gap)))
             intgp += 1
           end
         end
@@ -148,8 +152,11 @@ Options:
         # default set of 21 amino acids including J (Cysteine, the free thiol form)
         $amino_acids    = 'ACDEFGHIKLMNPQRSTVWYJ'.split('')
+        $gap            = '-'
         $tem_list       = nil
         $tem_file       = nil
+        $environment    = 0
+        $col_size       = nil
         $classdef       = 'classdef.dat'
         $outfile        = 'allmat.dat'
         $outfh          = nil # file hanfle for outfile
@@ -176,7 +183,7 @@ Options:
         $heatmapcol     = nil
         $heatmapformat  = 'png'
         $heatmapstem    = 'heatmaps'
-        $heatmapvalues   = false
+        $heatmapvalues  = false
         $rvg_width      = 550
         $rvg_height     = 650
         $canvas_width   = 550
@@ -189,7 +196,6 @@ Options:
         $aa_mutb      = {}
         $aa_rel_mutb  = {}
         $aa_tot_freq  = {}
-        $aa_env_cnt   = Hash.new(0)
         $smooth_prob  = {}
         $tot_cnt_mat  = nil
         $tot_prob_mat = nil
@@ -209,30 +215,32 @@ Options:
         #
         opts = GetoptLong.new(
-          [ '--help',     '-h', GetoptLong::NO_ARGUMENT ],
-          [ '--tem-list', '-l', GetoptLong::REQUIRED_ARGUMENT ],
-          [ '--tem-file', '-f', GetoptLong::REQUIRED_ARGUMENT ],
-          [ '--classdef', '-c', GetoptLong::REQUIRED_ARGUMENT ],
-          [ '--smooth',   '-s', GetoptLong::REQUIRED_ARGUMENT ],
-          [ '--nosmooth',       GetoptLong::NO_ARGUMENT ],
-          [ '--p1smooth',       GetoptLong::NO_ARGUMENT ],
-          [ '--weight',   '-w', GetoptLong::REQUIRED_ARGUMENT ],
-          [ '--noweight',       GetoptLong::NO_ARGUMENT ],
-          [ '--noroundoff',     GetoptLong::NO_ARGUMENT ],
-          [ '--sigma',          GetoptLong::REQUIRED_ARGUMENT ],
-          [ '--autosigma',      GetoptLong::NO_ARGUMENT ],
-          [ '--heatmap',        GetoptLong::REQUIRED_ARGUMENT ],
-          [ '--heatmap-stem',   GetoptLong::REQUIRED_ARGUMENT ],
-          [ '--heatmap-format', GetoptLong::REQUIRED_ARGUMENT ],
-          [ '--heatmap-columns',GetoptLong::REQUIRED_ARGUMENT ],
-          [ '--heatmap-values', GetoptLong::NO_ARGUMENT ],
-          [ '--output',         GetoptLong::REQUIRED_ARGUMENT ],
-          [ '--targetenv','-t', GetoptLong::REQUIRED_ARGUMENT ],
-          [ '--cys',      '-y', GetoptLong::REQUIRED_ARGUMENT ],
-          [ '--penv',           GetoptLong::NO_ARGUMENT ],
-          [ '--outfile',  '-o', GetoptLong::REQUIRED_ARGUMENT ],
-          [ '--verbose',  '-v', GetoptLong::REQUIRED_ARGUMENT ],
-          [ '--version',        GetoptLong::NO_ARGUMENT ]
+          [ '--help',         '-h', GetoptLong::NO_ARGUMENT ],
+          [ '--tem-list',     '-l', GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--tem-file',     '-f', GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--classdef',     '-c', GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--environment',  '-e', GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--smooth',       '-s', GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--nosmooth',           GetoptLong::NO_ARGUMENT ],
+          [ '--p1smooth',           GetoptLong::NO_ARGUMENT ],
+          [ '--weight',       '-w', GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--noweight',           GetoptLong::NO_ARGUMENT ],
+          [ '--noroundoff',         GetoptLong::NO_ARGUMENT ],
+          [ '--sigma',              GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--autosigma',          GetoptLong::NO_ARGUMENT ],
+          [ '--add',                GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--heatmap',            GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--heatmap-stem',       GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--heatmap-format',     GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--heatmap-columns',    GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--heatmap-values',     GetoptLong::NO_ARGUMENT ],
+          [ '--output',             GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--targetenv',    '-t', GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--cys',          '-y', GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--penv',               GetoptLong::NO_ARGUMENT ],
+          [ '--outfile',      '-o', GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--verbose',      '-v', GetoptLong::REQUIRED_ARGUMENT ],
+          [ '--version',            GetoptLong::NO_ARGUMENT ]
         )
         begin
@@ -247,6 +255,8 @@ Options:
               $tem_file     = arg
             when '--classdef'
               $classdef     = arg
+            when '--environment'
+              $environment  = arg.to_i
             when '--output'
               $output       = arg.to_i
             when '--outfile'
@@ -335,7 +345,7 @@ Options:
           exit 1
         end
-        # warn if any input file is missing
+        # warn if any mandatory input file is missing
         if $tem_list && !File.exist?($tem_list)
           warn "Cannot find template list file, #{$tem_list}"
           exit 1
@@ -372,12 +382,12 @@ Options:
         # Reading Environment Class Definition File
         #
-        # check --cys option and modify amino_acids set if necessary
+        # if --cys option 2, then we don't care about 'J' (for both Cystine and Cystine)
         if $cys == 2
-          $amino_acids = 'ACDEFGHIKLMNPQRSTVWY'.split('')
+          $amino_acids = 'ACDEFGHIKLMNPQRSTVWY'.delete('J')
         end
-        # create an EnvironmentFeatureList object for storing all environment
+        # create an EnvironmentFeatureArray object for storing all environment
         # features
         $env_features = EnvironmentFeatureArray.new
@@ -398,9 +408,9 @@ Options:
         IO.foreach($classdef) do |line|
           line.chomp!
-          if line.start_with?('#')
+          if line.start_with?('#') || line.blank?
             next
-          elsif (env_ftr = line.chomp.split(/;/)).length == 5
+          elsif (env_ftr = line.split(/;/)).length == 5
             $logger.info "An environment feature, #{line} detected."
             if env_ftr[-1] == 'T'
               # skip silenced environment feature
@@ -418,23 +428,39 @@ Options:
                                                     env_ftr[4])
             env_index += 1
           else
-            $logger.error "\"#{line}\" doesn't seem to be a proper format for" +
-                          "a environment class definition."
+            $logger.error "\"#{line}\" doesn't seem to be a proper format for " +
+                          "an environment class definition."
             exit 1
           end
         end
+        # set the size of amino acid column unit, extended gap
+        # and extended amino acid labels
+        $col_size         = $environment == 1 ? $env_features.size : 1
+        $ext_gap          = $gap * $col_size
+        $ext_amino_acids  = []
         # a hash for storing all environment classes
         $env_classes = EnvironmentClassHash.new
         # generate all possible combinations of environment labels, and store
         # every environment class into the hash prepared above with the label
         # as a key
-        $env_features.label_combinations.each_with_index { |e, i|
-          $env_classes[e.flatten.join] = Environment.new(i,
-                                                         e.flatten.join,
-                                                         $amino_acids)
-        }
+        $env_features.label_combinations.each_with_index do |ef1, i|
+          key1 = ef1.flatten.join
+          $ext_amino_acids << key1
+          if $environment == 0
+            $env_classes[key1] = Environment.new(i, key1, $amino_acids)
+          else
+            # when considering both substituted and substituting amino acids' environtments,
+            # add target (substituting) aa's environment label
+            $env_features.label_combinations_without_aa_type.each_with_index do |ef2, j|
+              key2 = key1 + "-" + ef2.flatten.join
+              $env_classes[key2] = Environment.new(i + j, key2, $amino_acids)
+            end
+          end
+        end
         #
         # Part 3 END
@@ -512,9 +538,7 @@ Options:
                   if env_labels[key].empty?
                     env_labels[key] = labels
                   else
-                    env_labels[key].each_with_index { |e, i|
-                      env_labels[key][i] = e + labels[i]
-                    }
+                    env_labels[key].each_with_index { |e, i| env_labels[key][i] = e + labels[i] }
                   end
                 end
               end
@@ -523,97 +547,92 @@ Options:
           if $noweight
             ali.each_pair do |id1, seq1|
+              if $environment == 1
+                seq1 = seq1.split('').map_with_index { |aa, pos| aa == $gap ? $ext_gap : env_labels[id1][pos] }.join
+              end
               ali.each_pair do |id2, seq2|
                 if id1 != id2
-                  pid  = calculate_pid(seq1, seq2)
-                  s1 = seq1.split('')
-                  s2 = seq2.split('')
+                  if $environment == 1
+                    seq2 = seq2.split('').map_with_index { |aa, pos| aa == $gap ? $ext_gap : env_labels[id2][pos] }.join
+                  end
+                  pid = calculate_pid(seq1, seq2, $col_size)
+                  s1  = seq1.scan(/\S{#{$col_size}}/)
+                  s2  = seq2.scan(/\S{#{$col_size}}/)
                   # check PID_MIN
                   if $pidmin && (pid < $pidmin)
-                    $logger.info  "Skip alignment between #{id1} and #{id2} " +
-                                  "having PID, #{pid}% less than PID_MIN, #{$pidmin}."
+                    $logger.info  "Skip alignment between #{id1} and #{id2} having PID, #{pid}% less than PID_MIN, #{$pidmin}."
                     next
                   end
                   # check PID_MAX
                   if $pidmax && (pid > $pidmax)
-                    $logger.info  "Skip alignment between #{id1} and #{id2} " +
-                                  "having PID, #{pid}% greater than PID_MAX, #{$pidmax}."
+                    $logger.info  "Skip alignment between #{id1} and #{id2} having PID, #{pid}% greater than PID_MAX, #{$pidmax}."
                     next
                   end
                   s1.each_with_index do |aa1, pos|
-                    aa1.upcase!
-                    aa2 = s2[pos].upcase
+                    aa2 = s2[pos]
                     if env_labels[id1][pos].include?('X')
-                      $logger.info "Substitutions from #{id1}-#{pos}-#{aa1} were masked."
+                      $logger.info "Substitutions from #{id1}-#{pos}-#{aa1[0].chr} were masked."
                       next
                     end
                     if env_labels[id2][pos].include?('X')
-                      $logger.info "Substitutions to #{id2}-#{pos}-#{aa2} were masked."
+                      $logger.info "Substitutions to #{id2}-#{pos}-#{aa2[0].chr} were masked."
                       next
                     end
-                    unless $amino_acids.include?(aa1)
-                      $logger.warn "#{id1}-#{pos}-#{aa1} is not a standard amino acid." unless aa1 == "-"
+                    unless $amino_acids.include?(aa1[0].chr)
+                      $logger.warn "#{id1}-#{pos}-#{aa1[0].chr} is not a standard amino acid." unless aa1 == $ext_gap
                       next
                     end
-                    unless $amino_acids.include?(aa2)
-                      $logger.warn "#{id1}-#{pos}-#{aa2} is not a standard amino acid." unless aa2 == "-"
+                    unless $amino_acids.include?(aa2[0].chr)
+                      $logger.warn "#{id1}-#{pos}-#{aa2[0].chr} is not a standard amino acid." unless aa2 == $ext_gap
                       next
                     end
-                    aa1 = (disulphide.has_key?(id1) && (disulphide[id1][pos] == 'F') && (aa1 == 'C') && ($cys != 2)) ? 'J' : aa1
-                    aa2 = (disulphide.has_key?(id2) && (disulphide[id2][pos] == 'F') && (aa2 == 'C') && ($cys != 2)) ? 'J' : aa2
+                    aa1       = (disulphide.has_key?(id1) && (disulphide[id1][pos] == 'F') && (aa1[0].chr == 'C') && ($cys != 2)) ? 'J' + aa1[1..-1] : aa1
+                    aa2       = (disulphide.has_key?(id2) && (disulphide[id2][pos] == 'F') && (aa2[0].chr == 'C') && ($cys != 2)) ? 'J' + aa2[1..-1] : aa2
+                    env_label = $environment == 1 ? aa1 + '-' + aa2[1..-1] : env_labels[id1][pos]
                     if $cst_features.empty?
-                      $env_classes[env_labels[id1][pos]].increase_residue_count(aa2)
+                      $env_classes[env_label].increase_residue_count(aa2[0].chr)
                     elsif (env_labels[id1][pos].split('').values_at(*$cst_features) == env_labels[id2][pos].split('').values_at(*$cst_features))
-                      $env_classes[env_labels[id1][pos]].increase_residue_count(aa2)
+                      $env_classes[env_label].increase_residue_count(aa2[0].chr)
                     else
-                      $logger.debug "Skipped #{id1}-#{pos}-#{aa1} and #{id2}-#{pos}-#{aa2}, they have different symbols for constrained environment features each other."
+                      $logger.debug "Skipped #{id1}-#{pos}-#{aa1[0].chr} and #{id2}-#{pos}-#{aa2[0].chr} having different symbols for constrained environment features each other."
                       next
                     end
-                    grp_label = env_labels[id1][pos][1..-1]
-                    if $aa_env_cnt.has_key? grp_label
-                      if $aa_env_cnt[grp_label].has_key? aa1
-                        $aa_env_cnt[grp_label][aa1] += 1
-                      else
-                        $aa_env_cnt[grp_label][aa1] = 1
-                      end
-                    else
-                      $aa_env_cnt[grp_label] = Hash.new(0)
-                      $aa_env_cnt[grp_label][aa1] = 1
-                    end
-                    if $aa_tot_cnt.has_key? aa1
-                      $aa_tot_cnt[aa1] += 1
-                    else
-                      $aa_tot_cnt[aa1] = 1
-                    end
+                    $aa_tot_cnt.has_key?(aa1) ? $aa_tot_cnt[aa1] += 1 : $aa_tot_cnt[aa1] = 1
+                    $aa_mut_cnt.has_key?(aa1) ? $aa_mut_cnt[aa1] += 1 : $aa_mut_cnt[aa1] = 1 if aa1 != aa2
-                    if aa1 != aa2
-                      if $aa_mut_cnt.has_key? aa1
-                        $aa_mut_cnt[aa1] += 1
-                      else
-                        $aa_mut_cnt[aa1] = 1
-                      end
-                    end
-                    $logger.debug "#{id1}-#{pos}-#{aa1} -> #{id2}-#{pos}-#{aa2} substitution count (1) was added to the environments class, #{env_labels[id1][pos]}."
+                    $logger.debug "#{id1}-#{pos}-#{aa1[0].chr} -> #{id2}-#{pos}-#{aa2[0].chr} substitution count (1) was added to the environments class, #{env_label}."
                   end
                 end
               end
             end
           else
             # BLOSUM-like weighting
-            clusters = []
-            ali.each_pair { |i, s| clusters << [i] }
+            clusters  = []
+            ext_ali   = Bio::Alignment::OriginalAlignment.new
+            ali.each_pair do |key, seq|
+              clusters << [key]
+              if $environment == 1
+                ext_seq = seq.split('').map_with_index { |aa, pos| aa == $gap ? $ext_gap : env_labels[key][pos] }.join
+                ext_ali.add_seq(ext_seq, key)
+              end
+            end
+            if $environment == 1
+              ali = ext_ali
+            end
             # a loop for single linkage clustering
             begin
@@ -624,7 +643,7 @@ Options:
                   found = false
                   clusters[i].each do |c1|
                     clusters[j].each do |c2|
-                      if calculate_pid(ali[c1], ali[c2]) >= $weight
+                      if calculate_pid(ali[c1], ali[c2], $col_size) >= $weight
                         indexes << j
                         found = true
                         break
@@ -655,102 +674,58 @@ Options:
             clusters.combination(2).each do |cluster1, cluster2|
               cluster1.each do |id1|
                 cluster2.each do |id2|
-                  seq1 = ali[id1].split('')
-                  seq2 = ali[id2].split('')
+                  seq1 = ali[id1].scan(/\S{#{$col_size}}/)
+                  seq2 = ali[id2].scan(/\S{#{$col_size}}/)
                   seq1.each_with_index do |aa1, pos|
-                    aa1.upcase!
-                    aa2 = seq2[pos].upcase rescue next # should fix this in a sane way!
+                    aa2 = seq2[pos]
                     if env_labels[id1][pos].include?('X')
-                      $logger.debug "All substitutions from #{id1}-#{pos}-#{aa1} are masked."
+                      $logger.debug "All substitutions from #{id1}-#{pos}-#{aa1[0].chr} are masked."
                       next
                     end
                     if env_labels[id2][pos].include?('X')
-                      $logger.debug "All substitutions to #{id2}-#{pos}-#{aa2} are masked."
+                      $logger.debug "All substitutions to #{id2}-#{pos}-#{aa2[0].chr} are masked."
                       next
                     end
-                    unless $amino_acids.include?(aa1)
-                      $logger.warn "#{id1}-#{pos}-#{aa1} is not standard amino acid." unless aa1 == "-"
+                    unless $amino_acids.include?(aa1[0].chr)
+                      $logger.warn "#{id1}-#{pos}-#{aa1[0].chr} is not standard amino acid." unless aa1 == $ext_gap
                       next
                     end
-                    unless $amino_acids.include?(aa2)
-                      $logger.warn "#{id2}-#{pos}-#{aa2} is not standard amino acid." unless aa2 == "-"
+                    unless $amino_acids.include?(aa2[0].chr)
+                      $logger.warn "#{id2}-#{pos}-#{aa2[0].chr} is not standard amino acid." unless aa2 == $ext_gap
                       next
                     end
-                    aa1   = (disulphide.has_key?(id1) && (disulphide[id1][pos] == 'F') && (aa1 == 'C') && ($cys != 2)) ? 'J' : aa1
-                    aa2   = (disulphide.has_key?(id2) && (disulphide[id2][pos] == 'F') && (aa2 == 'C') && ($cys != 2)) ? 'J' : aa2
-                    cnt1  = 1.0 / cluster1.size
-                    cnt2  = 1.0 / cluster2.size
-                    jnt_cnt = cnt1 * cnt2
+                    aa1         = (disulphide.has_key?(id1) && (disulphide[id1][pos] == 'F') && (aa1[0].chr == 'C') && ($cys != 2)) ? 'J' + aa1[1..-1] : aa1
+                    aa2         = (disulphide.has_key?(id2) && (disulphide[id2][pos] == 'F') && (aa2[0].chr == 'C') && ($cys != 2)) ? 'J' + aa2[1..-1] : aa2
+                    cnt1        = 1.0 / cluster1.size.to_f
+                    cnt2        = 1.0 / cluster2.size.to_f
+                    jnt_cnt     = cnt1 * cnt2
+                    env_label1  = $environment == 1 ? aa1 + '-' + aa2[1..-1] : env_labels[id1][pos]
+                    env_label2  = $environment == 1 ? aa2 + '-' + aa1[1..-1] : env_labels[id2][pos]
                     if $cst_features.empty?
-                      $env_classes[env_labels[id1][pos]].increase_residue_count(aa2, jnt_cnt)
-                      $env_classes[env_labels[id2][pos]].increase_residue_count(aa1, jnt_cnt)
+                      $env_classes[env_label1].increase_residue_count(aa2[0].chr, jnt_cnt)
+                      $env_classes[env_label2].increase_residue_count(aa1[0].chr, jnt_cnt)
                     elsif (env_labels[id1][pos].split('').values_at(*$cst_features) == env_labels[id2][pos].split('').values_at(*$cst_features))
-                      $env_classes[env_labels[id1][pos]].increase_residue_count(aa2, jnt_cnt)
-                      $env_classes[env_labels[id2][pos]].increase_residue_count(aa1, jnt_cnt)
+                      $env_classes[env_label1].increase_residue_count(aa2[0].chr, jnt_cnt)
+                      $env_classes[env_label2].increase_residue_count(aa1[1].chr, jnt_cnt)
                     else
-                      $logger.debug "#{id1}-#{pos}-#{aa1} and #{id2}-#{pos}-#{aa2} have different symbols for constrained environment features each other."
+                      $logger.debug "Skipped #{id1}-#{pos}-#{aa1[0].chr} and #{id2}-#{pos}-#{aa2[0].chr} having different symbols for constrained environment features each other."
                       next
                     end
-                    grp_label1 = env_labels[id1][pos][1..-1]
-                    grp_label2 = env_labels[id2][pos][1..-1]
+                    $aa_tot_cnt.has_key?(aa1) ? $aa_tot_cnt[aa1] += cnt1 : $aa_tot_cnt[aa1] = cnt1
+                    $aa_tot_cnt.has_key?(aa2) ? $aa_tot_cnt[aa2] += cnt2 : $aa_tot_cnt[aa2] = cnt2
+                    $aa_mut_cnt.has_key?(aa1) ? $aa_mut_cnt[aa1] += cnt1 : $aa_mut_cnt[aa1] = cnt1 if aa1 == aa2
+                    $aa_mut_cnt.has_key?(aa2) ? $aa_mut_cnt[aa2] += cnt2 : $aa_mut_cnt[aa2] = cnt2 if aa1 == aa2
-                    if $aa_env_cnt.has_key? grp_label1
-                      if $aa_env_cnt[grp_label1].has_key? aa1
-                        $aa_env_cnt[grp_label1][aa1] += cnt1
-                      else
-                        $aa_env_cnt[grp_label1][aa1] = cnt1
-                      end
-                    else
-                      $aa_env_cnt[grp_label1] = Hash.new(0.0)
-                      $aa_env_cnt[grp_label1][aa1] = cnt1
-                    end
-                    if $aa_env_cnt.has_key? grp_label2
-                      if $aa_env_cnt[grp_label2].has_key? aa2
-                        $aa_env_cnt[grp_label2][aa2] += cnt2
-                      else
-                        $aa_env_cnt[grp_label2][aa2] = cnt2
-                      end
-                    else
-                      $aa_env_cnt[grp_label2] = Hash.new(0.0)
-                      $aa_env_cnt[grp_label2][aa2] = cnt2
-                    end
-                    if $aa_tot_cnt.has_key? aa1
-                      $aa_tot_cnt[aa1] += cnt1
-                    else
-                      $aa_tot_cnt[aa1] = cnt1
-                    end
-                    if $aa_tot_cnt.has_key? aa2
-                      $aa_tot_cnt[aa2] += cnt2
-                    else
-                      $aa_tot_cnt[aa2] = cnt2
-                    end
-                    if aa1 != aa2
-                      if $aa_mut_cnt.has_key? aa1
-                        $aa_mut_cnt[aa1] += cnt1
-                      else
-                        $aa_mut_cnt[aa1] = cnt1
-                      end
-                      if $aa_mut_cnt.has_key? aa2
-                        $aa_mut_cnt[aa2] += cnt2
-                      else
-                        $aa_mut_cnt[aa2] = cnt2
-                      end
-                    end
-                    $logger.debug "#{id1}-#{pos}-#{aa1} -> #{id2}-#{pos}-#{aa2} substitution count (#{"%.2f" % jnt_cnt}) was added to the environments class, #{env_labels[id1][pos]}."
-                    $logger.debug "#{id2}-#{pos}-#{aa2} -> #{id1}-#{pos}-#{aa1} substitution count (#{"%.2f" % jnt_cnt}) was added to the environments class, #{env_labels[id2][pos]}."
+                    $logger.debug "#{id1}-#{pos}-#{aa1[0].chr} -> #{id2}-#{pos}-#{aa2[0].chr} substitution count (#{"%.2f" % jnt_cnt}) was added to the environments class, #{env_label1}."
+                    $logger.debug "#{id2}-#{pos}-#{aa2[0].chr} -> #{id1}-#{pos}-#{aa1[0].chr} substitution count (#{"%.2f" % jnt_cnt}) was added to the environments class, #{env_label2}."
                   end
                 end
               end
@@ -799,66 +774,108 @@ HEADER
           $outfh.puts "# Weighting scheme: clustering at PID #{$weight} level"
         end
+        if $environment == 0
+          $outfh.puts '#'
+          $outfh.puts '# Considered environments: substituted a.a.'
+        else
+          $outfh.puts '#'
+          $outfh.puts '# Considered environments: substituted a.a. and substituting a.a.'
+        end
         # calculate amino acid frequencies and mutabilities, and
         # print them as default statistics in the header part
-        ala_factor  = if $aa_tot_cnt['A'] == 0
-                        0.0
-                      elsif $aa_mut_cnt['A'] == 0
-                        0.0
-                      else
-                        100.0 * $aa_tot_cnt['A'] / $aa_mut_cnt['A'].to_f
-                      end
-        $tot_aa     = $aa_tot_cnt.values.sum
+        if $environment == 0
+          ala_factor  = if $aa_tot_cnt['A'] == 0
+                          0.0
+                        elsif $aa_mut_cnt['A'] == 0
+                          0.0
+                        else
+                          100.0 * $aa_tot_cnt['A'] / $aa_mut_cnt['A'].to_f
+                        end
+        end
+        $tot_aa = $aa_tot_cnt.values.sum
         $outfh.puts '#'
         $outfh.puts "# Total amino acid frequencies:\n"
-        $outfh.puts "# %-3s %9s %9s %5s %8s %8s" % %w[RES TOT_OBS MUT_OBS MUTB REL_MUTB REL_FREQ]
-        min_cnt = -1
-        min_sigma = nil
+        if $environment == 0
+          $outfh.puts "# %-3s %9s %9s %5s %8s %8s" % %w[RES TOT_OBS MUT_OBS MUTB REL_MUTB REL_FREQ]
+        else
+          $outfh.puts "# %-3s %-#{$env_features.size}s %9s %9s %8s" % %w[RES ENV TOT_OBS MUT_OBS REL_FREQ]
+        end
-        $amino_acids.each do |res|
-          if ($aa_tot_cnt[res] / $sigma) < $min_cnt_sigma_ratio
-            if min_cnt < 0
-              min_cnt = $aa_tot_cnt[res]
-              min_sigma = min_cnt / $min_cnt_sigma_ratio
-            elsif (min_cnt > 0) && (min_cnt > $aa_tot_cnt[res])
-              min_cnt = $aa_tot_cnt[res]
-              min_sigma = min_cnt / $min_cnt_sigma_ratio
+        min_cnt   = 0
+        min_sigma = nil
+        aas       = $environment == 0 ? $amino_acids : $ext_amino_acids
+        aas.each do |aa|
+          if ($aa_tot_cnt[aa] / $sigma) < $min_cnt_sigma_ratio
+            if $aa_tot_cnt[aa] > 0 and min_cnt > $aa_tot_cnt[aa]
+              min_cnt = $aa_tot_cnt[aa]
+            elsif min_cnt == 0
+              min_cnt = 1
             end
-            $logger.warn "The current sigma value, #{$sigma} seems to be too big for the total count (#{"%.2f" % $aa_tot_cnt[res]}) of amino acid, #{res}."
-          end
+            min_sigma = min_cnt / $min_cnt_sigma_ratio
-          $aa_mutb[res]     = ($aa_tot_cnt[res] == 0) ? 1.0 : ($aa_mut_cnt[res] / $aa_tot_cnt[res].to_f)
-          $aa_rel_mutb[res] = $aa_mutb[res] * ala_factor
-          $aa_tot_freq[res] = ($aa_tot_cnt[res] == 0) ? 0.0 : ($aa_tot_cnt[res] / $tot_aa.to_f)
-        end
+            if $environment == 0
+              $logger.warn  "The current sigma value, #{$sigma} seems to be too big for " +
+                            "the total count (#{"%.2f" % $aa_tot_cnt[aa]}) of amino acid, #{aa}."
+            else
+              $logger.warn  "The current sigma value, #{$sigma} seems to be too big for " +
+                            "the total count (#{"%.2f" % $aa_tot_cnt[aa]}) of amino acid, #{aa[0].chr} under the environment class #{aa[1..-1]}."
+            end
+          end
-        $amino_acids.each do |res|
-          if $noweight
-            $outfh.puts '# %-3s %9d %9d %5.2f %8d %8.4f' %
-              [res, $aa_tot_cnt[res], $aa_mut_cnt[res], $aa_mutb[res], $aa_rel_mutb[res], $aa_tot_freq[res]]
-          else
-            $outfh.puts '# %-3s %9.2f %9.2f %5.2f %8d %8.4f' %
-              [res, $aa_tot_cnt[res], $aa_mut_cnt[res], $aa_mutb[res], $aa_rel_mutb[res], $aa_tot_freq[res]]
+          if $environment == 0
+            $aa_mutb[aa]     = ($aa_tot_cnt[aa] == 0) ? 1.0 : ($aa_mut_cnt[aa] / $aa_tot_cnt[aa].to_f)
+            $aa_rel_mutb[aa] = $aa_mutb[aa] * ala_factor
           end
+          $aa_tot_freq[aa] = ($aa_tot_cnt[aa] == 0) ? 0.0 : ($aa_tot_cnt[aa] / $tot_aa.to_f)
         end
-        if min_cnt > -1
+        if min_cnt > 0
           $logger.warn "We recommend you to use a sigma value equal to or smaller than #{min_sigma}."
           if $autosigma
             $logger.warn "The sigma value has been changed from #{$sigma} to #{min_sigma}."
             $sigma = min_sigma
           end
         end
+        aas.each do |aa|
+          columns = $environment == 0 ?
+                    [aa, $aa_tot_cnt[aa], $aa_mut_cnt[aa], $aa_mutb[aa], $aa_rel_mutb[aa], $aa_tot_freq[aa]] :
+                    [aa[0].chr, aa[1..-1], $aa_tot_cnt[aa], $aa_mut_cnt[aa], $aa_tot_freq[aa]]
+          if $noweight
+            if $environment == 0
+              $outfh.puts '# %-3s %9d %9d %5.2f %8d %8.4f' % columns
+            else
+              $outfh.puts "# %-3s %-#{$env_features.size}s %9d %9d %8.4f" % columns
+            end
+          else
+            if $environment == 0
+              $outfh.puts '# %-3s %9.2f %9.2f %5.2f %8d %8.4f' % columns
+            else
+              $outfh.puts "# %-3s %-#{$env_features.size}s %9.2f %9.2f %8.4f" % columns
+            end
+          end
+        end
         $outfh.puts '#'
         $outfh.puts '# RES: Amino acid one letter code'
+        $outfh.puts '# ENV: Environment label of amino acid'
         $outfh.puts '# TOT_OBS: Total count of incidence'
         $outfh.puts '# MUT_OBS: Total count of mutation'
-        $outfh.puts '# MUTB: Mutability (MUT_OBS / TOT_OBS)'
-        $outfh.puts '# REL_MUTB: Relative mutability (ALA = 100)'
+        if $environment == 0
+          $outfh.puts '# MUTB: Mutability (MUT_OBS / TOT_OBS)'
+          $outfh.puts '# REL_MUTB: Relative mutability (ALA = 100)'
+        end
         $outfh.puts '# REL_FREQ: Relative frequency'
         $outfh.puts '#'
@@ -872,7 +889,7 @@ HEADER
         # Generating substitution frequency matrices
         #
-        # calculating probabilities for each environment
+        # calculating probabilities for each environment class
         $env_classes.values.each do |e|
           if e.freq_array.sum != 0
             e.prob_array = 100.0 * e.freq_array / e.freq_array.sum
@@ -880,12 +897,12 @@ HEADER
         end
         # count raw frequencies
-        $tot_cnt_mat    = NMatrix.send($noweight ? 'int' : 'float', $amino_acids.size, $amino_acids.size)
+        $tot_cnt_mat    = NMatrix.float($amino_acids.size, $amino_acids.size)
         group_matrices  = []
         # for each combination of environment features
         $env_classes.groups_sorted_by_residue_labels.each_with_index do |group, group_no|
-          grp_cnt_mat = NMatrix.send($noweight ? 'int' : 'float', $amino_acids.size, $amino_acids.size)
+          grp_cnt_mat = NMatrix.float($amino_acids.size, $amino_acids.size)
           $amino_acids.each_with_index do |aa, aj|
             freq_array = group[1].find { |e| e.label.start_with?(aa) }.freq_array
@@ -901,6 +918,8 @@ HEADER
         if $output == 0
           heatmaps      = HeatmapArray.new if $heatmap == 1 or $heatmap == 2
           grp_max_val   = group_matrices.map { |l, m, n| m }.map { |m| m.max }.max
+          aa_max_cnt    = $aa_tot_cnt.to_a.map { |k, v| v }.max
+          mat_col_size  = aa_max_cnt.floor.to_s.size + 4
           $heatmapcol ||= Math::sqrt(group_matrices.size).round
           group_matrices.each_with_index do |(grp_label, grp_cnt_mat), grp_no|
@@ -908,7 +927,8 @@ HEADER
             stem = "#{grp_no}. #{grp_label}"
             $outfh.puts ">#{grp_label} #{grp_no}"
             $outfh.puts grp_cnt_mat.pretty_string(:col_header => $amino_acids,
-                                                  :row_header => $amino_acids)
+                                                  :row_header => $amino_acids,
+                                                  :col_size   => mat_col_size > 7 ? mat_col_size : 7)
             # for a heat map
             if $heatmap == 0 or $heatmap == 2
@@ -956,7 +976,8 @@ HEADER
           # total
           $outfh.puts '>Total'
           $outfh.puts $tot_cnt_mat.pretty_string(:col_header => $amino_acids,
-                                                 :row_header => $amino_acids)
+                                                 :row_header => $amino_acids,
+                                                 :col_size   => mat_col_size > 7 ? mat_col_size : 7)
           if $heatmap == 0 or $heatmap == 2
             stem    = "#{group_matrices.size}. TOTAL"
@@ -999,23 +1020,28 @@ HEADER
         # when nosmoothing !!!
         if ($output > 0) && $nosmooth
-          # reinitialize $tot_cnt_mat for pseudocounts
           $tot_cnt_mat = NMatrix.float($amino_acids.size, $amino_acids.size)
-          # for each combination of environment features
-          pseudo_cnt = $add || (1.0 / $env_classes.group_size)
-          # add pseudo counts for each frequency vector
-          $env_classes.values.each { |e| e.freq_array += pseudo_cnt }
+          # if pseudo count provided, reinitialize $tot_cnt_mat by adding pseudocounts
+          if $add
+            $env_classes.values.each { |e| e.freq_array += $add }
+          end
           # re-calculate probability vector for each environment class
-          $env_classes.values.each { |e| e.prob_array = 100.0 * e.freq_array / e.freq_array.sum }
+          $env_classes.values.each do |e|
+            if e.freq_array.sum == 0
+              # if no observation, then probabilities are zeros, too
+              e.prob_array = e.freq_array
+            else
+              e.prob_array = 100.0 * e.freq_array / e.freq_array.sum.to_f
+            end
+          end
           group_matrices = []
           $env_classes.groups_sorted_by_residue_labels.each_with_index do |group, group_no|
-            grp_cnt_mat = NMatrix.float($amino_acids.size, $amino_acids.size)
-            grp_prob_mat = NMatrix.float($amino_acids.size, $amino_acids.size)
+            grp_cnt_mat   = NMatrix.float($amino_acids.size, $amino_acids.size)
+            grp_prob_mat  = NMatrix.float($amino_acids.size, $amino_acids.size)
             $amino_acids.each_with_index do |aa, aj|
               env_class = group[1].find { |e| e.label.start_with?(aa) }
@@ -1039,7 +1065,6 @@ HEADER
               $outfh.puts grp_prob_mat.pretty_string(:col_header => $amino_acids,
                                                      :row_header => $amino_acids)
               # for a heat map
               if $heatmap == 0 or $heatmap == 2
                 grp_prob_mat.heatmap(:col_header    => $amino_acids,
@@ -1134,12 +1159,24 @@ HEADER
           if ($smooth == :full) || $p1smooth
             # smoothing p1 probabilities for the partial smoothing procedure if --p1smooth on or, if it is full smoothing
-            0.upto($amino_acids.size - 1) { |i| p1[i] = 100.0 * (omega1 * a0[i] + omega2 * $aa_tot_freq[$amino_acids[i]]) }
+            0.upto($amino_acids.size - 1) do |i|
+              if $environment == 0
+                p1[i] = 100.0 * (omega1 * a0[i] + omega2 * $aa_tot_freq[$amino_acids[i]])
+              else
+                p1[i] = 100.0 * (omega1 * a0[i] + omega2 * $aa_tot_freq.select { |k, v| k.start_with?($amino_acids[i]) }.map { |k, v| v }.sum)
+              end
+            end
             $smooth_prob[1] = p1
           elsif ($smooth == :partial)
             # no smoothing for p1 probabilities just as Kenji's subst
             # in this case, p1 probabilities were taken from the amino acid frequencies of your data set
-            0.upto($amino_acids.size - 1) { |i| p1[i] = 100.0 * $aa_tot_freq[$amino_acids[i]] }
+            0.upto($amino_acids.size - 1) do |i|
+              if $environment == 0
+                p1[i] = 100.0 * $aa_tot_freq[$amino_acids[i]]
+              else
+                p1[i] = 100.0 * $aa_tot_freq.select { |k, v| k.start_with?($amino_acids[i]) }.map { |k, v| v }.sum
+              end
+            end
             $smooth_prob[1] = p1
           end
@@ -1148,6 +1185,10 @@ HEADER
           #
           env_labels = $env_features.map_with_index { |ef, ei| ef.labels.map { |l| "#{ei}#{l}" } }
+          if $environment == 1
+            env_labels += $env_features[1..-1].map_with_index { |ef, ei| ef.labels.map { |l| "#{ei + $env_features.size}#{l}" } }
+          end
           if $smooth == :partial
             $outfh.puts <<HEADER
 #
@@ -1189,9 +1230,9 @@ HEADER
 # sigma value used is:  #{$sigma}
 #
 HEADER
-            1.upto($env_features.size) do |ci|
+            1.upto(env_labels.size) do |ci|
               # for partial smoothing, only P1 ~ P3, and Pn are considered
-              if (ci > 2) && (ci < $env_features.size)
+              if (ci > 2) && (ci < env_labels.size)
                 $logger.debug "Skipped the level #{ci + 1} probabilities, due to partial smoothing."
                 next
               end
@@ -1200,6 +1241,10 @@ HEADER
                 c1[0].product(*c1[1..-1]).each do |labels|
                   pattern = '.' * $env_features.size
+                  if $environment == 1
+                    pattern += '.' * ($env_features.size - 1)
+                  end
                   labels.each do |label|
                     i = label[0].chr.to_i
                     l = label[1].chr
@@ -1211,12 +1256,22 @@ HEADER
                     next
                   end
+                  if $environment == 1
+                    pattern[$env_features.size, 0] = "-"
+                  end
                   # get environments matching the pattern created above
                   # and calculate amino acid frequencies and their probabilities for all the environments
-                  envs      = $env_classes.values.select { |env| env.label.match(pattern.to_re) }
+                  envs      = $env_classes.values.select { |env| env.label.match(/^#{pattern}/) }
                   freq_arr  = envs.inject(NArray.float($amino_acids.size)) { |sum, env| sum + env.freq_array }
                   prob_arr  = NArray.float($amino_acids.size)
-                  0.upto($amino_acids.size - 1) { |i| prob_arr[i] = ((freq_arr[i] == 0) ? 0 : (freq_arr[i] / freq_arr.sum.to_f)) }
+                  0.upto($amino_acids.size - 1) do |i|
+                    if freq_arr.sum == 0
+                      prob_arr[i] = 0
+                    else
+                      prob_arr[i] = freq_arr[i] / freq_arr.sum.to_f
+                    end
+                  end
 #                  # assess whether a residue type j is compatible with a particular combination of structural features
 #                  # corrections for non-zero colum vector phenomenon by switching the smoothing procedure off as below
@@ -1254,29 +1309,23 @@ HEADER
                   if ci == 1
                     priors << $smooth_prob[1]
                   elsif ci == 2
-                    labels.combination(1).select { |c2| c2[0].start_with?('0') }.each { |c3|
+                    labels.combination(1).select { |c2| c2[0].start_with?('0') }.each do |c3|
                       priors << $smooth_prob[2][c3.to_set]
-                    }
-                  elsif ci == $env_features.size
-                    labels.combination(2).select { |c2| c2[0].start_with?('0') || c2[1].start_with?('0') }.each { |c3|
+                    end
+                  elsif ci == env_labels.size
+                    labels.combination(2).select { |c2| c2[0].start_with?('0') || c2[1].start_with?('0') }.each do |c3|
                       priors << $smooth_prob[3][c3.to_set]
-                    }
+                    end
                   end
-                  # entropy based prior weighting step
-                  entropy_max     = Math::log($amino_acids.size)
-                  entropies       = priors.map { |prior| -1.0 * prior.to_a.inject(0.0) { |s, p|
-                    begin
-                      p == 0.0 ? s - 1 : s + p * Math::log(p)
-                    rescue
-                      #puts "P: #{p}"
-                    end
-                  } }
-                  mod_entropies   = entropies.map_with_index { |entropy, i| (entropy_max - entropies[i]) / entropy_max }
+                  # entropy based weighting prior step
+                  entropy_max     = NMath::log($amino_acids.size)
+                  entropies       = priors.map { |prior| -1.0 * prior.to_a.inject(0.0) { |s, p| p == 0 ? s : s + p * Math::log(p) } }
+                  mod_entropies   = entropies.map { |entropy| (entropy_max - entropy) / entropy_max }
                   weights         = mod_entropies.map { |mod_entropy| mod_entropy / mod_entropies.sum }
                   weighted_priors = priors.map_with_index { |prior, i| prior * weights[i] }.sum
-                  # smoothing step
+                  # actual smoothing step
                   smooth_prob_arr = NArray.float($amino_acids.size)
                   big_N           = freq_arr.sum.to_f
                   small_n         = $amino_acids.size.to_f
@@ -1285,8 +1334,8 @@ HEADER
                   0.upto($amino_acids.size - 1) { |i| smooth_prob_arr[i] = 100.0 * (omega1 * weighted_priors[i] + omega2 * prob_arr[i]) }
                   # normalization step
-                  smooth_prob_arr_sum = smooth_prob_arr.sum
-                  0.upto($amino_acids.size - 1) { |i| smooth_prob_arr[i] = 100.0 * (smooth_prob_arr[i] / smooth_prob_arr_sum) }
+                  total = smooth_prob_arr.sum
+                  0.upto($amino_acids.size - 1) { |i| smooth_prob_arr[i] = 100.0 * (smooth_prob_arr[i] / total) }
                   # store smoothed probabilties in a hash using a set of envrionment labels as a key
                   if $smooth_prob.has_key?(ci + 1)
@@ -1331,36 +1380,47 @@ HEADER
 #
 HEADER
             # full smooting
-            1.upto($env_features.size) do |ci|
+            1.upto(env_labels.size) do |ci|
               env_labels.combination(ci) do |c1|
                 c1[0].product(*c1[1..-1]).each do |labels|
                   pattern = '.' * $env_features.size
+                  if $environment == 1
+                    pattern += '.' * ($env_features.size - 1)
+                  end
                   labels.each do |label|
                     j = label[0].chr.to_i
                     l = label[1].chr
                     pattern[j] = l
                   end
+                  if $environment == 1
+                    pattern[$env_features.size, 0] = "-"
+                  end
                   # get environmetns, frequencies, and probabilities
-                  envs      = $env_classes.values.select { |env| env.label.match(pattern.to_re) }
+                  envs      = $env_classes.values.select { |env| env.label.match(/^#{pattern}/) }
                   freq_arr  = envs.inject(NArray.float($amino_acids.size)) { |sum, env| sum + env.freq_array }
                   prob_arr  = NArray.float($amino_acids.size)
                   0.upto($amino_acids.size - 1) { |i| prob_arr[i] = freq_arr[i] == 0 ? 0 : freq_arr[i] / freq_arr.sum.to_f }
                   # collect priors
-                  priors  = []
+                  priors = []
                   if ci > 1
                     labels.combination(ci - 1).each { |c2| priors << $smooth_prob[ci][c2.to_set] }
                   else
                     priors << $smooth_prob[1]
                   end
-                  # entropy based weighting priors
-                  entropy_max = Math::log($amino_acids.size)
-                  entropies = priors.map do |prior|
-                    (entropy_max + prior.to_a.inject(0.0) { |s, p| s + p * Math::log(p) }) / entropy_max
-                  end
-                  weighted_priors = priors.map_with_index { |p, i| p * entropies[i] / entropies.sum }.sum
+                  # entropy based weighting priors step
+                  entropy_max     = NMath::log($amino_acids.size)
+                  entropies       = priors.map { |prior| -1.0 * prior.to_a.inject(0.0) { |s, p| p == 0 ? s : s + p * Math::log(p) } }
+                  mod_entropies   = entropies.map_with_index { |entropy, i| (entropy_max - entropies[i]) / entropy_max }
+                  weights         = mod_entropies.map { |mod_entropy| mod_entropy / mod_entropies.sum }
+                  weighted_priors = priors.map_with_index { |prior, i| prior * weights[i] }.sum
                   # smoothing step
                   smooth_prob_arr = NArray.float($amino_acids.size)
@@ -1371,8 +1431,8 @@ HEADER
                   0.upto($amino_acids.size - 1) { |i| smooth_prob_arr[i] = 100.0 * (omega1 * weighted_priors[i] + omega2 * prob_arr[i]) }
                   # normalization step
-                  smooth_prob_arr_sum = smooth_prob_arr.sum
-                  0.upto($amino_acids.size - 1) { |i| smooth_prob_arr[i] = 100.0 * (smooth_prob_arr[i] / smooth_prob_arr_sum) }
+                  total = smooth_prob_arr.sum
+                  0.upto($amino_acids.size - 1) { |i| smooth_prob_arr[i] = 100.0 * (smooth_prob_arr[i] / total) }
                   # store smoothed probabilties in a hash using a set of envrionment labels as a key
                   if $smooth_prob.has_key?(ci + 1)
@@ -1389,7 +1449,7 @@ HEADER
           # updating smoothed probability array for each envrionment
           $env_classes.values.each do |env|
-            env.smooth_prob_array = $smooth_prob[$env_features.size + 1][env.label_set]
+            env.smooth_prob_array = $smooth_prob[env_labels.size + 1][env.label_set]
           end
           # sorting environments and build 21X21 substitution matrices
@@ -1526,7 +1586,7 @@ HEADER
           end
           grp_logo_mats = []
-          factor        = $scale / Math::log(2)
+          factor        = $scale / NMath::log(2)
           $env_classes.groups_sorted_by_residue_labels.each_with_index do |group, group_no|
             # calculating substitution probability matrix for each envrionment
@@ -1536,6 +1596,11 @@ HEADER
                             NMatrix.float($amino_acids.size, $amino_acids.size + 1) :
                             NMatrix.float($amino_acids.size, $amino_acids.size)
+            if $environment == 1
+              # parse substituting aa's environment label
+              tgt_label = grp_label.split('-').last
+            end
             $amino_acids.each_with_index do |aa, aj|
               env             = grp_envs.detect { |e| e.label.start_with?(aa) }
               env.logo_array  = $cys == 0 ?
@@ -1543,19 +1608,29 @@ HEADER
                                 NArray.float($amino_acids.size)
               env.send($nosmooth ? 'prob_array' : 'smooth_prob_array').to_a.each_with_index do |prob, ai|
-                pai                   = 100.0 * $aa_tot_freq[$amino_acids[ai]]
+                if $environment == 0
+                  pai = 100.0 * $aa_tot_freq[$amino_acids[ai]]
+                else
+                  pai = 100.0 * $aa_tot_freq.select { |k, v| k.start_with?($amino_acids[ai]) }.map { |k, v| v }.sum
+                end
                 odds                  = prob / pai
-                env.logo_array[ai]    = factor * Math::log(odds)
+                env.logo_array[ai]    = factor * NMath::log(odds)
                 grp_logo_mat[aj, ai]  = env.logo_array[ai]
               end
               # adding log odds ratio for 'U' (J or C) when --cyc is 0
               if $cys == 0
-                pai                                 = 100.0 * ($aa_tot_freq['C'] + $aa_tot_freq['J'])
-                prob                                = env.send($nosmooth ? 'prob_array' : 'smooth_prob_array')[$amino_acids.index('C')] +
-                                                      env.send($nosmooth ? 'prob_array' : 'smooth_prob_array')[$amino_acids.index('J')]
-                odds                                = prob / pai
-                env.logo_array[$amino_acids.size]   = factor * Math::log(odds)
+                if $environment == 0
+                  pai = 100.0 * ($aa_tot_freq['C'] + $aa_tot_freq['J'])
+                else
+                  pai = 100.0 * ($aa_tot_freq.select { |k, v| k.start_with?('C') }.map { |k, v| v }.sum +
+                                 $aa_tot_freq.select { |k, v| k.start_with?('J') }.map { |k, v| v }.sum)
+                end
+                prob  = env.send($nosmooth ? 'prob_array' : 'smooth_prob_array')[$amino_acids.index('C')] +
+                        env.send($nosmooth ? 'prob_array' : 'smooth_prob_array')[$amino_acids.index('J')]
+                odds  = prob / pai
+                env.logo_array[$amino_acids.size]   = factor * NMath::log(odds)
                 grp_logo_mat[aj, $amino_acids.size] = env.logo_array[$amino_acids.size]
               end
             end
@@ -1569,22 +1644,32 @@ HEADER
           $amino_acids.each_with_index do |aa1, aj|
             $amino_acids.each_with_index do |aa2, ai|
-              prob  = $tot_prob_mat[aj, ai]
-              pai   = 100.0 * $aa_tot_freq[$amino_acids[ai]]
-              odds  = prob / pai
-              $tot_logo_mat[aj, ai] = factor * Math::log(odds)
+              prob = $tot_prob_mat[aj, ai]
+              if $environment == 0
+                pai = 100.0 * $aa_tot_freq[$amino_acids[ai]]
+              else
+                pai = 100.0 * $aa_tot_freq.select { |k, v| k.start_with?($amino_acids[ai]) }.map { |k, v| v }.sum
+              end
+              odds = prob / pai
+              $tot_logo_mat[aj, ai] = factor * NMath::log(odds)
             end
             # adding log odds ratio for 'U' (J or C) when --cyc is 0
             if $cys == 0
-              pai   = 100.0 * ($aa_tot_freq['C'] + $aa_tot_freq['J'])
+              if $environment == 0
+                pai = 100.0 * ($aa_tot_freq['C'] + $aa_tot_freq['J'])
+              else
+                pai = 100.0 * ($aa_tot_freq.select { |k, v| k.start_with?('C') }.map { |k, v| v }.sum +
+                               $aa_tot_freq.select { |k, v| k.start_with?('J') }.map { |k, v| v }.sum)
+              end
               prob  = $tot_prob_mat[aj, $amino_acids.index('C')] + $tot_prob_mat[aj, $amino_acids.index('J')]
               odds  = prob / pai
-              $tot_logo_mat[aj, $amino_acids.size] = factor * Math::log(odds)
+              $tot_logo_mat[aj, $amino_acids.size] = factor * NMath::log(odds)
             end
           end
           # calculating relative entropy for each amino acid pair H and
           # the expected score E in bit units
           tot_E = 0.0
@@ -1593,10 +1678,22 @@ HEADER
           0.upto($tot_logo_mat.shape[0] - 1) do |j|
             0.upto($tot_logo_mat.shape[0] - 1) do |i| # it's deliberately '0' not '1'
               if j != i
-                tot_E += $tot_logo_mat[j, i] * $aa_tot_freq[$amino_acids[j]] * $aa_tot_freq[$amino_acids[i]] / 2.0
+                if $environment == 0
+                  tot_E += $tot_logo_mat[j, i] * $aa_tot_freq[$amino_acids[j]] * $aa_tot_freq[$amino_acids[i]] / 2.0
+                else
+                  tot_E +=  $tot_logo_mat[j, i] *
+                            $aa_tot_freq.select { |k, v| k.start_with?($amino_acids[j]) }.map { |k, v| v }.sum *
+                            $aa_tot_freq.select { |k, v| k.start_with?($amino_acids[i]) }.map { |k, v| v }.sum / 2.0
+                end
                 tot_H += $tot_logo_mat[j, i] * $tot_prob_mat[j, i] / 2.0 / 10000.0
               else
-                tot_E += $tot_logo_mat[j, i] * $aa_tot_freq[$amino_acids[i]] * $aa_tot_freq[$amino_acids[i]]
+                if $environment == 0
+                  tot_E += $tot_logo_mat[j, i] * $aa_tot_freq[$amino_acids[i]] * $aa_tot_freq[$amino_acids[i]]
+                else
+                  tot_E +=  $tot_logo_mat[j, i] *
+                            $aa_tot_freq.select { |k, v| k.start_with?($amino_acids[j]) }.map { |k, v| v }.sum *
+                            $aa_tot_freq.select { |k, v| k.start_with?($amino_acids[i]) }.map { |k, v| v }.sum
+                end
                 tot_H += $tot_logo_mat[j, i] * $tot_prob_mat[j, i] / 10000.0
               end
             end
@@ -1662,9 +1759,9 @@ HEADER
               heatmaps << grp_logo_mat.heatmap(:col_header          => $amino_acids,
                                                :row_header          => row_header,
                                                :rvg_width           => $rvg_width,
-                                               :rvg_height          => $rvg_height - 50,
+                                               :rvg_height          => $rvg_height,
                                                :canvas_width        => $canvas_width,
-                                               :canvas_height       => $canvas_height - 50,
+                                               :canvas_height       => $canvas_height,
                                                :gradient_beg_color  => '#0000FF',
                                                :gradient_mid_color  => '#FFFFFF',
                                                :gradient_end_color  => '#FF0000',
@@ -1674,6 +1771,7 @@ HEADER
                                                :print_value         => $heatmapvalues,
                                                :print_gradient      => false,
                                                :title               => stem,
+                                               :title_font_scale    => 1.0,
                                                :title_font_size     => title_font_size)
             end
           end