RubyGems - egor - Versions diffs - 0.0.1 → 0.0.2 - Mend

egor 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

data/History.txt CHANGED Viewed

@@ -1,3 +1,9 @@
+== 0.0.2 2008-11-13
+* 2 major enhancement:
+  * Constrained environment features are properly handled
+  * Sane log-odds ratio matrices are produced
 == 0.0.1 2008-11-07
 * 1 major enhancement:

data/README.rdoc CHANGED Viewed

@@ -9,8 +9,10 @@ egor: Esst GeneratOR, a program for calculating environment-specific substitutio
 == FEATURES/PROBLEMS:
 * No more segmentation fault
+* Fast enough not to leave your place
+* Slow enough to check your emails or have some chats with your colleagues next you
 * Full smoothing supported
-* Infinite number of environment features can be handled (in theory)
+* In theory, infinite number of environment features can be handled
 == BASIC USAGE:

data/lib/egor/cli.rb CHANGED Viewed

@@ -41,19 +41,19 @@ Usage:
     egor [ options ] -f TEM-file -c CLASSDEF-file
 Options:
-    --tem-file (-f) STRING: a tem file
-    --tem-list (-l) STRING: a list for tem files
-    --classdef (-c) STRING: a file for the defintion of environments (default: 'classdef.dat')
-    --outfile (-o) STRING: output filename ("allmat.dat" if not specified)
+    --tem-file (-f) FILE: a tem file
+    --tem-list (-l) FILE: a list for tem files
+    --classdef (-c) FILE: a file for the defintion of environments (default: 'classdef.dat')
+    --outfile (-o) FILE: output filename ("allmat.dat" if not specified)
     --weight (-w) INTEGER: clustering level (PID) for the BLOSUM-like weighting
     --noweight: calculate substitution counts with no weights (default)
     --smooth (-s) INTEGER:
         0 for parial smoothing (default)
         1 for full smoothing
     --nosmooth: perform no smoothing operation
-    --cys (-y) INTEGER: (NOT implemented yet)
-        0 for using C and J only for structure
-        1 for both structure and sequence (default)
+    --cys (-y) INTEGER:
+        0 for using C and J only for structure (default)
+        1 for both structure and sequence
     --output INTEGER:
         0 for raw counts (no-smoothing performed)
         1 for probabilities
@@ -61,7 +61,7 @@ Options:
     --scale INTEGER: log-odds matrices in 1/n bit units (default 3)
     --sigma DOUBLE: change the sigma value for smoothing (default 5)
     --add DOUBLE: add this value to raw counts when deriving log-odds without smoothing (default 1/#classes)
-    --penv: use environment-dependent frequencies for log-odds calculation (default false) (NOT implemented yet)
+    --penv: use environment-dependent frequencies for log-odds calculation (default false) (NOT implemented yet!!!)
     --pidmin DOUBLE: count substitutions only for pairs with PID equal to or greater than this value (default none)
     --pidmax DOUBLE: count substitutions only for pairs with PID smaller than this value (default none)
     --verbose (-v) INTEGER
@@ -137,12 +137,6 @@ Options:
         $outfile      = "allmat.dat"
         $outfh        = nil # file hanfle for outfile
         $output       = 2
-        $aa_tot_obs   = {}
-        $aa_mut_obs   = {}
-        $aa_mutb      = {}
-        $aa_rel_mutb  = {}
-        $aa_rel_freq  = {}
-        $env_aa_obs   = {}
         $ali_size     = 0
         $tot_aa       = 0
         $sigma        = 5.0
@@ -154,10 +148,20 @@ Options:
         $pidmin       = nil
         $pidmax       = nil
         $scale        = 3
-        $add          = 0
+        $add          = nil
+        $cys          = 0
         $penv         = false
-        $heatmap      = false
+        $aa_tot_obs   = {}
+        $aa_mut_obs   = {}
+        $aa_mutb      = {}
+        $aa_rel_mutb  = {}
+        $aa_rel_freq  = {}
+        $env_aa_obs   = {}
         $smooth_prob  = {}
+        $tot_freq_mat = nil
+        $tot_prob_mat = nil
+        $tot_logo_mat = nil
         # Part 2.
         #
@@ -195,10 +199,8 @@ Options:
             $output       = arg.to_i
           when '--outfile'
             $outfile      = arg
-          when '--cyc'
-            $logger.error "!!! --cys option is not available yet"
-            exit 1
-            $cysteine     = (arg.to_i == 1 ? false : true)
+          when '--cys'
+            $cys          = (arg.to_i == 1 ? false : true)
           when '--weight'
             $weight       = arg.to_i
           when '--sigma'
@@ -210,15 +212,17 @@ Options:
           when '--noweight'
             $noweight     = true
           when '--smooth'
-            $smooth       = (arg.to_i == 1 ? :full : :parital)
+            $smooth       = (arg.to_i == 1 ? :full : :partial)
           when '--nosmooth'
             $nosmooth     = true
           when '--scale'
             $scale        = arg.to_f
           when '--add'
+            $logger.error "!!! --add option is not supported yet"
+            exit 1
             $add          = arg.to_f
           when '--penv'
-            $logger.error "!!! --penv option is not available yet"
+            $logger.error "!!! --penv option is not supported yet"
             exit 1
             $penv         = true
           when '--heatmap'
@@ -245,14 +249,19 @@ Options:
           exit 1
         end
         # Part 3.
         #
         # Reading Environment Class Definition File
         #
-        # a hash for storing all environment feature objects
+        # an array for storing all environment feature objects
         $env_features = []
+        # an array for storing indexes of constrained environment features
+        $cst_features = []
         # aa1 amino acid in a substitution itself is a environment feature
         $env_features << EnvironmentFeature.new("sequence",
                                                 $amino_acids,
@@ -262,24 +271,29 @@ Options:
         # read environment class definiton file and
         # store them into the hash prepared above
+        env_index = 1
         IO.foreach($classdef) do |line|
+          line.chomp!
           if line.start_with?("#")
             next
           elsif (env_ftr = line.chomp.split(/;/)).length == 5
-            $logger.info ">>> An environment feature, #{line.chomp} detected"
+            $logger.info ">>> An environment feature, #{line} detected"
             if env_ftr[-1] == "T"
               # skip silenced environment feature
-              $logger.warn "!!! The environment feature, #{line.chomp} silent"
+              $logger.warn "!!! The environment feature, #{line} silent"
               next
             end
             if env_ftr[-2] == "T"
-              $logger.warn "!!! The environment feature, #{line.chomp} constrained"
+              $cst_features << env_index
+              $logger.warn "!!! The environment feature, #{line} constrained"
             end
             $env_features << EnvironmentFeature.new(env_ftr[0],
                                                     env_ftr[1].split(""),
                                                     env_ftr[2].split(""),
                                                     env_ftr[3],
                                                     env_ftr[4])
+            env_index += 1
           else
             $logger.error "@@@ #{line} doesn't seem to be a proper format for class definition"
             exit 1
@@ -325,7 +339,7 @@ Options:
               end
             end
-            $ali_size   += ali.size
+            $ali_size   += 1
             env_labels  = {}
             disulphide  = {}
@@ -398,19 +412,27 @@ Options:
                       aa2 = s2[pos].upcase
                       if !$amino_acids.include?(aa1)
-                        $logger.warn "!!! #{id1}-#{pos}-#{aa1} is not standard amino acid" unless aa1 == "-"
+                        $logger.warn "!!! #{id1}-#{pos}-#{aa1} is not a standard amino acid" unless aa1 == "-"
                         next
                       end
                       if !$amino_acids.include?(aa2)
-                        $logger.warn "!!! #{id1}-#{pos}-#{aa2} is not standard amino acid" unless aa2 == "-"
+                        $logger.warn "!!! #{id1}-#{pos}-#{aa2} is not a standard amino acid" unless aa2 == "-"
                         next
                       end
                       aa1 = (((disulphide[id1][pos] == "F") && (aa1 == "C")) ? "J" : aa1)
                       aa2 = (((disulphide[id2][pos] == "F") && (aa2 == "C")) ? "J" : aa2)
-                      $envs[env_labels[id1][pos]].add_residue_count(aa2)
+                      if $cst_features.empty?
+                        $envs[env_labels[id1][pos]].increase_residue_count(aa2)
+                      elsif (env_labels[id1][pos].split("").values_at(*$cst_features) ==
+                             env_labels[id2][pos].split("").values_at(*$cst_features))
+                        $envs[env_labels[id1][pos]].increase_residue_count(aa2)
+                      else
+                        $logger.debug "*** #{id1}-#{pos}-#{aa1} and #{id2}-#{pos}-#{aa2} have different symbols for constrained environment features each other"
+                        next
+                      end
                       grp_label = env_labels[id1][pos][1..-1]
@@ -485,6 +507,7 @@ Options:
                   cluster2.each do |id2|
                     seq1 = ali[id1].split("")
                     seq2 = ali[id2].split("")
                     seq1.each_with_index do |aa1, pos|
                       if env_labels[id1][pos].include?("X")
                         $logger.debug "*** Substitutions from #{id1}-#{pos}-#{aa1} were masked"
@@ -511,8 +534,17 @@ Options:
                       obs1  = 1.0 / size1
                       obs2  = 1.0 / size2
-                      $envs[env_labels[id1][pos]].add_residue_count(aa2, 1.0 / (size1 * size2))
-                      $envs[env_labels[id2][pos]].add_residue_count(aa1, 1.0 / (size1 * size2))
+                      if $cst_features.empty?
+                        $envs[env_labels[id1][pos]].increase_residue_count(aa2, 1.0 / (size1 * size2))
+                        $envs[env_labels[id2][pos]].increase_residue_count(aa1, 1.0 / (size1 * size2))
+                      elsif (env_labels[id1][pos].split("").values_at(*$cst_features) ==
+                             env_labels[id2][pos].split("").values_at(*$cst_features))
+                        $envs[env_labels[id1][pos]].increase_residue_count(aa2, 1.0 / (size1 * size2))
+                        $envs[env_labels[id2][pos]].increase_residue_count(aa1, 1.0 / (size1 * size2))
+                      else
+                        $logger.debug "*** #{id1}-#{pos}-#{aa1} and #{id2}-#{pos}-#{aa2} have different symbols for constrained environment features each other"
+                        next
+                      end
                       grp_label1 = env_labels[id1][pos][1..-1]
                       grp_label2 = env_labels[id2][pos][1..-1]
@@ -605,7 +637,6 @@ HEADER
           else
             $outfh.puts "# Weighting scheme: clustering at PID #{$weight} level"
           end
-          $outfh.puts "#"
           # calculate amino acid frequencies and mutabilities, and
           # print them as default statistics in the header part
@@ -614,7 +645,7 @@ HEADER
           $outfh.puts "#"
           $outfh.puts "# Total amino acid frequencies:\n"
-          $outfh.puts "# %-3s %9s %9s %5s %8s %8s" % %w[RES MUT_OBS TOT_OBS MUTB REL_MUTB REL_FRQ]
+          $outfh.puts "# %-3s %9s %9s %5s %8s %8s" % %w[RES TOT_OBS MUT_OBS MUTB REL_MUTB REL_FRQ]
           $aa_tot_obs.each_pair do |res, freq|
             $aa_mutb[res]      = $aa_mut_obs[res] / freq.to_f
@@ -625,13 +656,18 @@ HEADER
           $amino_acids.each do |res|
             if $noweight
               $outfh.puts "# %-3s %9d %9d %5.2f %8d %8.4f" %
-                [res, $aa_mut_obs[res], $aa_tot_obs[res], $aa_mutb[res], $aa_rel_mutb[res], $aa_rel_freq[res]]
+                [res, $aa_tot_obs[res], $aa_mut_obs[res], $aa_mutb[res], $aa_rel_mutb[res], $aa_rel_freq[res]]
             else
               $outfh.puts "# %-3s %9.2f %9.2f %5.2f %8d %8.4f" %
-                [res, $aa_mut_obs[res], $aa_tot_obs[res], $aa_mutb[res], $aa_rel_mutb[res], $aa_rel_freq[res]]
+                [res, $aa_tot_obs[res], $aa_mut_obs[res], $aa_mutb[res], $aa_rel_mutb[res], $aa_rel_freq[res]]
             end
           end
-          $outfh.puts "#"
+          # Part 5.
+          #
+          # Calculating substitution frequency tables
+          #
           # calculating probabilities for each environment
           $envs.values.each do |e|
@@ -641,7 +677,7 @@ HEADER
           end
           # count raw frequencies
-          $tot_freq_matrix = ($noweight ? NMatrix.int(21,21) : NMatrix.float(21,21))
+          $tot_freq_mat = ($noweight ? NMatrix.int(21,21) : NMatrix.float(21,21))
           # for each combination of environment features
           env_groups = $envs.values.group_by { |env| env.label[1..-1] }
@@ -652,28 +688,33 @@ HEADER
               $env_features[i + 1].labels.index(l)
             }
           }.each_with_index do |group, group_no|
-            grp_freq_matrix = ($noweight ? NMatrix.int(21,21) : NMatrix.float(21,21))
+            grp_freq_mat = ($noweight ? NMatrix.int(21,21) : NMatrix.float(21,21))
             $amino_acids.each_with_index do |aa, ai|
               freq_array = group[1].find { |e| e.label.start_with?(aa) }.freq_array
-              0.upto(20) { |j| grp_freq_matrix[ai, j] = freq_array[j] }
+              0.upto(20) { |j| grp_freq_mat[ai, j] = freq_array[j] }
             end
-            $tot_freq_matrix += grp_freq_matrix
+            $tot_freq_mat += grp_freq_mat
             if $output == 0
               $outfh.puts ">#{group[0]} #{group_no}"
-              $outfh.puts grp_freq_matrix.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
+              $outfh.puts grp_freq_mat.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
             end
           end
           if $output == 0
             $outfh.puts ">Total"
-            $outfh.puts $tot_freq_matrix.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
+            $outfh.puts $tot_freq_mat.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
             exit 0
           end
-          # for probability
+          # Part 6.
+          #
+          # Calculating substitution probability tables
+          #
           if $output == 1
             $outfh.puts <<HEADER
 #
@@ -687,7 +728,7 @@ HEADER
           if ($output > 0) && $nosmooth
             # Probability matrices
-            tot_prob_matrix = NMatrix.float(21, 21)
+            $tot_prob_mat = NMatrix.float(21, 21)
             # for each combination of environment features
             env_groups = $envs.values.group_by { |env| env.label[1..-1] }
@@ -697,24 +738,24 @@ HEADER
                 $env_features[i + 1].labels.index(l)
               }
             }.each_with_index do |group, group_no|
-              grp_prob_matrix = NMatrix.float(21,21)
+              grp_prob_mat = NMatrix.float(21,21)
               $amino_acids.each_with_index do |aa, ai|
                 prob_array = group[1].find { |e| e.label.start_with?(aa) }.prob_array
-                0.upto(20) { |j| grp_prob_matrix[ai, j] = prob_array[j] }
+                0.upto(20) { |j| grp_prob_mat[ai, j] = prob_array[j] }
               end
-              tot_prob_matrix += grp_prob_matrix
+              $tot_prob_mat += grp_prob_mat
               if ($output == 1)
                 $outfh.puts ">#{group[0]} #{group_no}"
-                $outfh.puts grp_prob_matrix.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
+                $outfh.puts grp_prob_mat.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
               end
             end
             if ($output == 1)
               $outfh.puts ">Total"
-              $outfh.puts tot_prob_matrix.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
+              $outfh.puts $tot_prob_mat.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
               $outfh.close
               exit 0
             end
@@ -749,6 +790,7 @@ HEADER
             if $smooth == :partial
               $outfh.puts <<HEADER
+#
 # Partial Smoothing:
 #
 # p1(ri) (i.e., amino acid composition) is estimated by summing over
@@ -771,7 +813,6 @@ HEADER
 # Weights (omegas) are calculated as in Topham et al. 1993)
 #
 # sigma value used is:  5.00
-#
 HEADER
               1.upto($env_features.size) do |ci|
                 # for partial smoothing, only P1 ~ P3, and Pn are considered
@@ -872,6 +913,7 @@ HEADER
               end
             else
               $outfh.puts <<HEADER
+#
 # Full Smoothing:
 #
 # p1(ri) is estimated as:
@@ -897,7 +939,6 @@ HEADER
 # Weights (omegas) are calculated as in Topham et al. 1993)
 #
 # sigma value used is:  5.00
-#
 HEADER
               # full smooting
               1.upto($env_features.size) do |ci|
@@ -959,7 +1000,7 @@ HEADER
             $envs.values.each { |e| e.smooth_prob_array = $smooth_prob[$env_features.size + 1][e.label_set] }
             # for a total substitution probability matrix
-            tot_smooth_prob_matrix = NMatrix.float(21,21)
+            $tot_prob_mat = NMatrix.float(21,21)
             # grouping environments by its environment labels but amino acid label
             env_groups = $envs.values.group_by { |env| env.label[1..-1] }
@@ -972,46 +1013,54 @@ HEADER
               }
             }.each_with_index do |group, group_no|
               # calculating 21X21 substitution probability matrix for each envrionment
-              grp_prob_matrix = NMatrix.float(21,21)
+              grp_prob_mat = NMatrix.float(21,21)
               $amino_acids.each_with_index do |aa, ai|
                 smooth_prob_array = group[1].find { |e| e.label.start_with?(aa) }.smooth_prob_array
-                0.upto(20) { |j| grp_prob_matrix[ai, j] = smooth_prob_array[j] }
+                0.upto(20) { |j| grp_prob_mat[ai, j] = smooth_prob_array[j] }
               end
-              tot_smooth_prob_matrix += grp_prob_matrix
+              $tot_prob_mat += grp_prob_mat
               if $output == 1
                 $outfh.puts ">#{group[0]} #{group_no}"
-                $outfh.puts grp_prob_matrix.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
+                $outfh.puts grp_prob_mat.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
               end
             end
-            tot_smooth_prob_matrix /= env_groups.size
+            $tot_prob_mat /= env_groups.size
             if $output == 1
               $outfh.puts ">Total"
-              $outfh.puts tot_smooth_prob_matrix.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
+              $outfh.puts $tot_prob_mat.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
               $outfh.close
               exit 0
             end
+            # Part 7.
+            #
+            # Calculating log-add ratio scoring matrices
+            #
             if $output == 2
               $outfh.puts <<HEADER
 #
 # The probabilities were then divided by the background probabilities
+HEADER
+              if $penv
+                $outfh.puts <<HEADER
 # which were derived from the environment-independent amino acid frequencies.
 #                             ^^^^^^^^^^^^^^^^^^^^^^^
-#
-# Shown here are logarithms of these values multiplied by 3/log(2)
-# rounded to the nearest integer (log-odds scores in 1/3 bit units).
-#
-# For total (composite) matrix, Entropy = XXX bits, Expected score = XXX
-#
 HEADER
+              else
+                $outfh.puts <<HEADER
+# which were derived from the environment-dependent amino acid frequencies.
+#                             ^^^^^^^^^^^^^^^^^^^^^
+HEADER
+              end
-              # log-add ratio matrices from now on
-              tot_logo_mat  = NMatrix.float(21,21)
+              $tot_logo_mat = $cys ? NMatrix.float(21,22) : NMatrix.float(21,21)
+              grp_logo_mats = []
               factor        = $scale / Math::log(2)
               # grouping environments by its environment labels but amino acid label
@@ -1027,30 +1076,82 @@ HEADER
                 # calculating 21X21 substitution probability matrix for each envrionment
                 grp_label     = group[0]
                 grp_envs      = group[1]
-                grp_logo_mat  = NMatrix.float(21,21)
+                grp_logo_mat  = $cys ? NMatrix.float(21, 22) : NMatrix.float(21,21)
                 $amino_acids.each_with_index do |aa, ai|
                   env       = grp_envs.detect { |e| e.label.start_with?(aa) }
-                  logo_arr  = NArray.float(21)
+                  logo_arr  = $cys ? NArray.float(22) : NArray.float(21)
                   env.smooth_prob_array.to_a.each_with_index do |prob, j|
-                    paj = 100.0 * $aa_rel_freq[$amino_acids[j]]
-                    odds = prob == 0.0 ? 0.000001 / paj : prob / paj
+                    paj         = 100.0 * $aa_rel_freq[$amino_acids[j]]
+                    odds        = prob == 0.0 ? 0.000001 / paj : prob / paj
                     logo_arr[j] = factor * Math::log(odds)
                   end
                   0.upto(20) { |j| grp_logo_mat[ai, j] = logo_arr[j] }
+                  # adding log odds ratio for "U" (J or C) when --cyc is ON
+                  if $cys
+                    paj   = 100.0 * ($aa_rel_freq["C"] + $aa_rel_freq["J"])
+                    prob  = env.smooth_prob_array[$amino_acids.index("C")] + env.smooth_prob_array[$amino_acids.index("J")]
+                    odds  = prob == 0.0 ? 0.000001 / paj : prob / paj
+                    logo_arr[logo_arr.size - 1] = factor * Math::log(odds)
+                    grp_logo_mat[ai, logo_arr.size - 1] = logo_arr[logo_arr.size - 1]
+                  end
                 end
-                tot_logo_mat += grp_logo_mat
+                $tot_logo_mat += grp_logo_mat
+                grp_logo_mats << [grp_label, grp_logo_mat]
+              end
-                $outfh.puts ">#{grp_label} #{group_no}"
-                $outfh.puts grp_logo_mat.round.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
+              $tot_logo_mat /= env_groups.size
+              # calculating relative entropy for each amino acid pair H and
+              # the expected score E in bit units
+              #
+              # I'm a bit suspicious about this part...
+              tot_E = 0.0
+              tot_H = 0.0
+              0.upto($tot_logo_mat.shape[0] - 1) do |i|
+                0.upto($tot_logo_mat.shape[0] - 1) do |j|
+                  if i != j
+                    tot_E += $tot_logo_mat[i, j] * $aa_rel_freq[$amino_acids[i]] * $aa_rel_freq[$amino_acids[j]] / 2.0
+                    tot_H += $tot_logo_mat[i, j] * $tot_prob_mat[i, j] / 2.0 / 10000.0
+                  else
+                    tot_E += $tot_logo_mat[i, j] * $aa_rel_freq[$amino_acids[i]] * $aa_rel_freq[$amino_acids[j]]
+                    tot_H += $tot_logo_mat[i, j] * $tot_prob_mat[i, j] / 10000.0
+                  end
+                end
               end
-              tot_logo_mat /= env_groups.size
+              $outfh.puts <<HEADER
+#
+# Shown here are logarithms of these values multiplied by #{$scale}/log(2)
+# rounded to the nearest integer (log-odds scores in 1/3 bit units).
+#
+# For total (composite) matrix, Entropy = #{"%5.4f" % tot_H} bits, Expected score = #{"%5.4f" % tot_E}
+#
+HEADER
+              grp_logo_mats.each_with_index do |arr, grp_no|
+                grp_label     = arr[0]
+                grp_logo_mat  = arr[1]
-              $outfh.puts ">Total"
-              $outfh.puts tot_logo_mat.round.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
+                $outfh.puts ">#{grp_label} #{grp_no}"
+                if $cys
+                  $outfh.puts grp_logo_mat.round.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids + %w[U])
+                else
+                  $outfh.puts grp_logo_mat.round.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
+                end
+              end
+              $outfh.puts ">Total #{grp_logo_mats.size}"
+              if $cys
+                $outfh.puts $tot_logo_mat.round.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids + %w[U])
+              else
+                $outfh.puts $tot_logo_mat.round.pretty_string(:col_header => $amino_acids, :row_header => $amino_acids)
+              end
               $outfh.close
               exit 0
             end

data/lib/egor.rb CHANGED Viewed

@@ -2,5 +2,5 @@ $:.unshift(File.dirname(__FILE__)) unless
   $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
 module Egor
-  VERSION = '0.0.1'
-end
+  VERSION = '0.0.2'
+end

data/lib/environment.rb CHANGED Viewed

@@ -23,7 +23,7 @@ class Environment
     @smooth_prob_array  = NArray.float(21)
   end
-  def add_residue_count(a, inc = 1.0)
+  def increase_residue_count(a, inc = 1.0)
     @freq_array[@@amino_acids.index(a.upcase)] += inc
   end

data/lib/environment_feature.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 class EnvironmentFeature < Struct.new(:name, :symbols, :labels, :constrained, :silent)
   def to_s
-    values.join(";")
+    [name, symbols.join, labels.join, constrained, silent].join(";")
   end
   def constrained?

data/website/index.html CHANGED Viewed

@@ -44,6 +44,8 @@
 <h2>Features</h2>
 <ul>
 	<li>No more segmentation fault</li>
+	<li>Fast enough not to leave your place</li>
+	<li>Slow enough to check your emails or have some chats with your colleagues next you</li>
 	<li>Full smoothing supported</li>
 	<li>In theory, infinite number of environment features can be handled</li>
 </ul>
@@ -53,7 +55,7 @@
 or
 <pre>$ egor -l TEM-file -c classdef.dat</pre>
 <h2>Repository</h2>
-<p>You can download a pre-built RubyGem package from</p>
+<p>You can download a pre-built RubyGems package from</p>
 <ul>
 	<li>rubyforge: <a href="http://rubyforge.org/projects/egor">http://rubyforge.org/projects/egor</a></li>
 </ul>
@@ -67,12 +69,19 @@ or
 <h2>Contact</h2>
 <p>Comments are welcome, please send an email to me (seminlee at gmail dot com).</p>
     <p class="coda">
-      <a href="FIXME email">Semin Lee</a>, 10th November 2008<br>
+      Semin Lee, 12th November 2008<br>
       Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
     </p>
 </div>
 <!-- insert site tracking codes here, like Google Urchin -->
+<script type="text/javascript">
+  var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+  document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+</script>
+<script type="text/javascript">
+  var pageTracker = _gat._getTracker("UA-6291956-1");
+  pageTracker._trackPageview();
+</script>
 </body>
 </html>

data/website/index.txt CHANGED Viewed

@@ -14,6 +14,8 @@ h2. Installation
 h2. Features
 * No more segmentation fault
+* Fast enough not to leave your place
+* Slow enough to check your emails or have some chats with your colleagues next you
 * Full smoothing supported
 * In theory, infinite number of environment features can be handled
@@ -29,7 +31,7 @@ It's pretty much the same as Kenji's subst, so in most cases, you just need swap
 h2. Repository
-You can download a pre-built RubyGem package from
+You can download a pre-built RubyGems package from
 * rubyforge: "http://rubyforge.org/projects/egor":http://rubyforge.org/projects/egor

data/website/template.html.erb CHANGED Viewed

@@ -39,12 +39,19 @@
     </div>
     <%= body %>
     <p class="coda">
-      <a href="FIXME email">Semin Lee</a>, <%= modified.pretty %><br>
+      Semin Lee, <%= modified.pretty %><br>
       Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
     </p>
 </div>
 <!-- insert site tracking codes here, like Google Urchin -->
+<script type="text/javascript">
+  var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+  document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+</script>
+<script type="text/javascript">
+  var pageTracker = _gat._getTracker("UA-6291956-1");
+  pageTracker._trackPageview();
+</script>
 </body>
 </html>

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: egor
 version: !ruby/object:Gem::Version
-  version: 0.0.1
+  version: 0.0.2
 platform: ruby
 authors:
 - Semin Lee
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2008-11-10 00:00:00 +00:00
+date: 2008-11-13 00:00:00 +00:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -60,7 +60,7 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 1.0.7
+        version: 1.1.0
     version:
 - !ruby/object:Gem::Dependency
   name: hoe