RubyGems - charlie - Versions diffs - 0.6.0 → 0.7.0 - Mend

charlie 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

data/History.txt +14 -0
data/Manifest.txt +13 -22
data/README.txt +3 -3
data/Rakefile +1 -1
data/TODO.txt +11 -21
data/data/BENCHMARK +25 -23
data/data/CROSSOVER +5 -1
data/data/GENOTYPE +6 -6
data/data/MUTATION +19 -7
data/data/SELECTION +2 -1
data/data/template.html +2 -1
data/examples/EXAMPLES_README.txt +70 -0
data/examples/bitstring.rb +72 -0
data/examples/{gladiatorial_sunburn.rb → coevolution.rb} +80 -22
data/examples/function_optimization.rb +113 -0
data/examples/output/{royalroad1_report.html → bitstring_royalroad.html} +822 -655
data/examples/output/function_optimization_sombrero.html +2289 -0
data/examples/output/function_optimization_twopeak.csv +210 -0
data/examples/output/function_optimization_twopeak.html +2477 -0
data/examples/output/string_weasel.html +513 -0
data/examples/output/tsp.html +633 -882
data/examples/{money.rb → permutation.rb} +20 -8
data/examples/string.rb +98 -0
data/examples/tree.rb +37 -12
data/examples/tsp.rb +34 -22
data/lib/charlie.rb +5 -1
data/lib/charlie/1.9fixes.rb +46 -0
data/lib/charlie/crossover.rb +31 -14
data/lib/charlie/etc/minireport.rb +5 -4
data/lib/charlie/etc/monkey.rb +11 -8
data/lib/charlie/gabenchmark.rb +230 -0
data/lib/charlie/genotype.rb +4 -0
data/lib/charlie/list/list_crossover.rb +25 -5
data/lib/charlie/mutate.rb +34 -7
data/lib/charlie/permutation/permutation.rb +34 -6
data/lib/charlie/population.rb +12 -122
data/lib/charlie/selection.rb +1 -0
data/lib/charlie/tree/tree.rb +179 -17
data/test/t_common.rb +1 -1
data/test/test_benchmark.rb +19 -5
data/test/test_cross.rb +23 -1
data/test/test_evolve.rb +14 -1
data/test/test_mutator.rb +28 -2
data/test/test_permutation.rb +23 -1
data/test/test_sel.rb +3 -1
data/test/test_tree.rb +63 -1
metadata +17 -25
data/examples/bit.rb +0 -10
data/examples/function_opt_2peak.rb +0 -24
data/examples/function_opt_sombero.rb +0 -38
data/examples/gladiatorial_simple.rb +0 -17
data/examples/gridwalk.rb +0 -29
data/examples/output/flattened_sombero.html +0 -6400
data/examples/output/flattened_sombero2_.html +0 -3576
data/examples/output/fopt1_dblopt.html +0 -2160
data/examples/output/hill10.html +0 -5816
data/examples/output/hill2.csv +0 -24
data/examples/output/hill2.html +0 -384
data/examples/output/royalroad2_report.html +0 -1076
data/examples/output/royalroadquick_report.html +0 -504
data/examples/output/weasel1_report.html +0 -1076
data/examples/output/weasel2_report.html +0 -240
data/examples/royalroad.rb +0 -26
data/examples/royalroad2.rb +0 -18
data/examples/simple_climb_hill2.rb +0 -47
data/examples/weasel.rb +0 -36

data/lib/charlie/etc/monkey.rb CHANGED

@@ -24,6 +24,10 @@ module Enumerable
     zip(a2).map(&b)
   end
+  def sum
+    r=0; each{|e| r+=e }; r
+  end
   alias_method :enum_slice, :each_slice unless RUBY_VERSION < '1.9' # ruby1.9 replaces enum_* with each_*
 end
@@ -33,12 +37,10 @@ class Array
     sort_by{ rand }
   end if RUBY_VERSION < '1.9'
-  def sum # TODO 1.9, use :+
-    inject(0){|a,b|a+b}
-  end
-  def inner_product(v)
-    zip_with(v){|a,b|a*b}.sum
+  def dot_product(v)
+    r=0.0
+    each_with_index{|e,i| r+=e*v[i] }
+    r
   end
   def rand_index
@@ -50,6 +52,7 @@ class Array
   end
   def stats # TODO 1.9, use minmax
+    return transpose.map(&:stats).transpose if at(0).is_a?(Array) # return stats of each component if elements are arrays
     [min,max,average,stddev]
   end
@@ -79,9 +82,9 @@ class String
     self[rand(size)]
   end
-  def chars # TODO 1.9
+  def chars
     split('')
-  end
+  end if RUBY_VERSION < '1.9'
   def each_char(&b)
     chars.each(&b)

data/lib/charlie/gabenchmark.rb ADDED

@@ -0,0 +1,230 @@
+require 'rbconfig' # for install name
+module GABenchmark
+  extend self
+  # This method generates reports comparing several selection/crossover/mutation methods. Check the examples directory for several examples. See the BENCHMARK documentation file for more information.
+  def benchmark(genotype_class, html_outfile='report.html', csv_outfile=nil, &b)
+    start = Time.now
+    dsl_obj = StrategiesDSL.new; dsl_obj.instance_eval(&b)
+    all_tests        = dsl_obj.get_tests
+    generations      = dsl_obj.generations
+    population_size  = dsl_obj.population_size
+    repeat_tests     = dsl_obj.repeat
+    track_stat  = dsl_obj.track_stat
+    n_tests = all_tests.size
+    tests_done = 0
+    puts "#{n_tests} Total tests:"
+    overall_best = [nil, -1.0 / 0.0]
+    data = all_tests.map{|selection_module,crossover_module,mutator_module|
+      tests_done += 1
+      print "\nRunning test #{tests_done}/#{n_tests} : #{selection_module} / #{crossover_module} / #{mutator_module}\t"
+      gclass = Class.new(genotype_class) { use selection_module,crossover_module,mutator_module }
+      start_test = Time.now
+      test_stats = (0...repeat_tests).map{
+        print '.'; $stdout.flush
+        best = Population.new(gclass,population_size).evolve_silent(generations).last
+        stat = track_stat.call(best)
+        overall_best = [best, stat] if overall_best[0].nil? || (overall_best[1] <=> stat) < 0 # use <=> to allow arrays
+        stat
+      }
+      [selection_module, crossover_module,mutator_module,
+       (Time.now-start_test) / repeat_tests,  test_stats]
+    }
+    html_output(html_outfile, data, genotype_class, Time.now-start, overall_best, dsl_obj)
+    csv_output(csv_outfile  , data)
+    puts '',table_details(data).to_s
+    return data
+  end
+  private
+  ST_HEADINGS = %w[min max avg stddev time]
+  def format_stat(s)
+    if s.is_a?(Array)
+      s.map{|x|format_stat(x)}.join("\r\n")
+    else
+      '%.5f' % s
+    end
+  end
+  def table_details(data)
+    tabledata = data.map{|s,c,m,t,a|
+      [s,c,m] + (a.stats << t).map{|f| format_stat(f)}
+    }.sort_by{|row| -row[-3].to_f } # sort by avg fitness. highest to lowest. to_f on multiple formatted returns first
+    return tabledata.to_table(%w[selection crossover mutation] + ST_HEADINGS)
+  end
+  def table_group(datasets,g1_name) # array of title, data rows
+    tabledata = datasets.map{|title,dataset|
+      joined   = dataset.map(&:last).inject{|a,b|a+b}
+      avg_time = dataset.map{|r| r[-2] }.average
+      [title] + (joined.stats << avg_time).map{|f| format_stat(f)}
+    }.sort_by{|row| -row[-3].to_f } # sort by average fitness
+    return tabledata.to_table([g1_name]+ST_HEADINGS).to_html
+  end
+  def html_output(html_outfile, data, genotype_class, tot_time, overall_best, dsl_obj )
+    return unless html_outfile
+   # Generate HTML
+    html_tables = <<INFO
+      <h1>Information</h1>\n
+      <table>
+        <tr><th colspan=2>Version Info</th></tr>
+        <tr><td>Ruby Install Name</td><td>#{Config::CONFIG['ruby_install_name']}</td></tr>
+        <tr><td>Ruby Version</td><td>#{RUBY_VERSION}</td></tr>
+        <tr><td>Charlie Version</td><td>#{Charlie::VERSION}</td></tr>
+        <tr><th colspan=2>Benchmark Info</th></tr>
+        <tr><td>Genotype class</td><td>#{genotype_class}</td></tr>
+        <tr><td>Population size</td><td>#{dsl_obj.population_size}</td></tr>
+        <tr><td>Number of generations per run</td><td>#{dsl_obj.generations}</td></tr>
+        <tr><td>Number of tests </td><td>#{data.size}</td></tr>
+        <tr><td>Tests repeated </td><td>#{dsl_obj.repeat} times</td></tr>
+        <tr><td>Number of runs </td><td>#{data.size * dsl_obj.repeat}</td></tr>
+        <tr><td>Total number of generations </td><td>#{data.size * dsl_obj.repeat * dsl_obj.generations}</td></tr>
+        <tr><td>Total time</td><td>#{'%.2f' % tot_time} seconds</td></tr>
+        <tr><th colspan=2>Best Solution Info</th></tr>
+        <tr><td>Fitness</td><td>#{overall_best[1].inspect}</td></tr>
+        <tr><td>Solution</td><td><textarea rows=3 cols=40>#{overall_best[0].to_s}</textarea></td></tr>
+      </table>
+INFO
+    # - Combined stats
+    html_tables << "<h1>Stats for all</h1>"
+    html_tables <<  table_group([["All",data]],'')
+    # - stats grouped by selection, crossover, mutation methods
+    ["selection","crossover","mutation"].each_with_index{|title,i|
+      html_tables << "<h1>Stats for #{title}</h1>"
+      html_tables << table_group(data.group_by{|x|x[i]}, title)
+    }
+    # - detailed stats
+    html_tables << '<h1>Detailed Stats</h1>' <<  table_details(data).to_html
+      # write HTML stats
+    File.open(html_outfile,'w'){|f|
+      template = File.read(File.dirname(__FILE__)+"/../../data/template.html")
+      f << template.sub('{{CONTENT}}',html_tables)
+    }
+  end
+  def csv_output(csv_outfile,data)
+    return unless csv_outfile
+    File.open(csv_outfile,'w'){|f|
+      f << data.map{|r|r[0..2] << r[-1].inspect }.to_table.to_csv
+    }
+  end
+# Used in the GABenchmark#benchmark function.
+class StrategiesDSL
+  class << self
+    def attr_dsl(x)
+      x = x.to_s
+      attr_accessor x
+      alias_method 'get_'+x, x  # rename reader
+      define_method(x) {|*args| # reader with 0 args, write with 1 arg
+        return send('get_'+x) if args.empty?
+        args.size > 1 ? send(x+'=',args) : send(x+'=',*args)
+      }
+    end
+  end
+  # Number of generations run in each test.
+  attr_dsl :generations
+  # Population size used.
+  attr_dsl :population_size
+  # Number of times all tests are run. Default=10. Increase for more accuracy on the benchmark.
+  attr_dsl :repeat
+  # Pass several modules to this to test these selection methods.
+  attr_dsl :selection
+  # Pass several modules to this to test these crossover methods.
+  attr_dsl :crossover
+  # Pass several modules to this to test these mutation methods.
+  attr_dsl :mutator
+  alias :mutation  :mutator
+  alias :mutation= :mutator=
+  def initialize
+    @repeat          = 10
+    @population_size = 20
+    @generations     = 50
+    selection []
+    crossover []
+    mutator   []
+    track_stat{|best| best.fitness } # tracks maximum fitness by default
+  end
+  # Pass a block that returns one or more statistics to track. Block is passed the individual with the highest fitness after each run.
+  # * Can be used to track, for example, training error vs generalization error.
+  # * Default is fitness of the best solution.
+  # * When returning multiple values, <=> for arrays is used to determine the best individual in the info table (i.e. second elements only for tie-breaking), but min/max/avg/stddev stats are calculated independently for each component
+  def track_stat(&b)
+    return @track_stat unless block_given?
+    @track_stat = b
+  end
+  alias :track_stats :track_stat
+  # Get all the tests. Basically a cartesian product of all selection, crossover and mutation methods.
+  def get_tests
+    t = []
+    defmod = Module.new{self.name='default'}
+    selection = [@selection].flatten ; selection = [defmod] if selection.empty?
+    crossover = [@crossover].flatten ; crossover = [defmod] if crossover.empty?
+    mutator   = [@mutator].flatten   ; mutator   = [defmod] if   mutator.empty?
+    selection.each{|s|
+     crossover.each{|c|
+      mutator.each{|m|
+       t << [s,c,m]
+      }
+     }
+    }
+    t
+  end
+end
+end # GABenchmark
+=begin
+class RoyalRoad <  BitStringGenotype(64)  # Royal Road problem
+  def fitness
+    1 + genes.enum_slice(8).find_all{|e| e.all?{|x|x==1} }.size # +1 to avoid all fitness 0 for roulette
+  end
+  cache_fitness
+end
+GABenchmark.benchmark(RoyalRoad,'test_bitstring_royalroad.html','o.csv'){
+  selection TruncationSelection(0.3),
+            Elitism(ScaledRouletteSelection)
+  crossover NullCrossover, UniformCrossover
+  mutator   ListMutator(:expected_n[5],:flip)
+  generations    100
+  repeat          2 #
+  population_size 17
+  track_stat{|b| [b.fitness,b.genes.count{|x|x==1}] }
+}
+=end

data/lib/charlie/genotype.rb CHANGED

@@ -51,6 +51,10 @@ class Genotype
       self
     end
   end
+  # Used by Genotype.cache_fitness. This accessor can be used to clear the cache.
+  # Also could be used by niche selection, etc. as a place to change the effective fitness w/o changing the actual selection algorithms.
+  attr_accessor :fitness_cache
   def dup
     self.class.from_genes(genes.dup)

data/lib/charlie/list/list_crossover.rb CHANGED

@@ -1,4 +1,4 @@
-# List crossovers: SinglePointCrossover, UniformCrossover
+# List crossovers: SinglePointCrossover, UniformCrossover, NPointCrossover
 # Simple single point crossover, returns two children.
@@ -10,15 +10,36 @@ module SinglePointCrossover
   end
 end
+# n point crossover, returns two children.
+def NPointCrossover(n=2)
+ Module.new{
+  self.name = "NPointCrossover(#{n})"
+  define_method(:cross){|parent1,parent2|
+    p1 = parent1.genes; p2 = parent2.genes
+    upper_bnd = p1.size + 1
+    cross_pts = (0...n).map{rand(upper_bnd)}.sort
+    c1 = []; c2=[]
+    ([0] + cross_pts << upper_bnd).each_cons(2){|cp1,cp2|
+      c1 += p1[cp1...cp2]
+      c2 += p2[cp1...cp2]
+      p1,p2 = p2,p1
+    }
+    [c1,c2].map{|x| from_genes(x) }
+  }
+ }
+end
 # Uniform crossover, returns two children.
 module UniformCrossover
   def cross(parent1,parent2)
     c1 = []; c2=[]
-    parent1.genes.zip(parent2.genes).each{|a,b|
+    g1 = parent1.genes; g2 = parent2.genes
+    g1.each_with_index{|e,i|
       if rand(2).zero?
-        c1 << a; c2 << b
+        c1 << e; c2 << g2[i]
       else
-        c2 << a; c1 << b
+        c2 << e; c1 << g2[i]
       end
     }
     [c1,c2].map{|x| from_genes(x) }
@@ -27,4 +48,3 @@ end

data/lib/charlie/mutate.rb CHANGED

@@ -8,18 +8,45 @@ end
 # Takes mutator m1 with probability p, and mutator m2 with probability 1-p
 def PMutate(p,m1,m2=NullMutator)
+  return m1 if m1==m2
+  m1_name, m2_name = [m1,m2].map{|c| '_mutate_' + c.to_s.gsub(/[^A-Za-z0-9]/,'') + '!' }
   Module.new{
-    @@p = p
-    include m2
-    alias :mutate2! :mutate!
-    include m1
-    def mutate!(*args)
-      rand < @@p ? super(*args) : mutate2!(*args)
-    end
+    include m1.dup # dup to avoid bugs on use PMutate(..,m1) .. use m1
+    alias_method m1_name, :mutate!
+    include m2.dup
+    alias_method m2_name, :mutate!
+    define_method(:mutate!) {
+      rand < p ? send(m1_name) : send(m2_name)
+    }
     self.name= "PMutate(#{p},#{m1},#{m2})"
   }
 end
+# Variant of PMutate for more than 2 mutators
+# * Pass a hash of Module=>probability pairs. If sum(probability) < 1, NullMutator will be used for the remaining probability.
+# * example: PCrossN(SinglePointCrossover=>0.33,UniformCrossover=>0.33) for NullCrossover/SinglePointCrossover/UniformCrossover all with probability 1/3
+def PMutateN(hash)
+  tot_p = hash.inject(0){|s,(m,p)| s+p }
+  if (tot_p - 1.0).abs > 0.01 # close to 1?
+    raise ArgumentError, "PMutateN: sum of probabilities > 1.0" if tot_p > 1.0
+    hash[NullMutator] = (hash[NullMutator] || 0.0) + (1.0 - tot_p)
+  end
+  partial_sums = hash.sort_by{|m,p| -p } # max probability first
+  s = 0.0
+  partial_sums.map!{|m,p| ['_mutate_' + m.to_s.gsub(/[^A-Za-z0-9]/,'') + '!' , s+=p, m] }
+  Module.new{
+    partial_sums.each{|name,p,mod|
+      include mod.dup
+      alias_method name, :mutate!
+    }
+    define_method(:mutate!) {
+      r = rand
+      send partial_sums.find{|name,p,mod| p >= r }.first
+    }
+    self.name= "PMutateN(#{hash.inspect})"
+  }
+end

data/lib/charlie/permutation/permutation.rb CHANGED

@@ -16,12 +16,12 @@ def PermutationGenotype(n,elements=0...n)
   def to_s
     @genes.inspect
   end
-  use PermutationMutator.dup , PermutationCrossover.dup
+  use TranspositionMutator.dup , PCross(0.75,PermutationCrossover)
  }
 end
-# Transposition mutator for PermutationGenotype
-module PermutationMutator
+# Transposition mutator for PermutationGenotype. Interchanges two elements and leaves the remaining elements in their original positions.
+module TranspositionMutator
   # Transposes two elements
   def mutate!
     i1, i2 = @genes.rand_index,@genes.rand_index
@@ -30,7 +30,7 @@ module PermutationMutator
   end
 end
-# Inversion mutator for PermutationGenotype. May work on other array/string-based genotypes as well, but this is untested.
+# Inversion mutator for PermutationGenotype.
 # Takes two random indices, and reverses the elements in between (includes possible wrapping if index2 < index1)
 module InversionMutator
   # Inverts parts of the genes
@@ -48,8 +48,36 @@ module InversionMutator
 end
-# One point partial preservation crossover for PermutationGenotype
-# Child 1 is identical to parent 1 up to the cross point, and contains the remaining elements in the same order as parent 2.
+# Takes a random element of the permutation, and inserts it at a random position.
+# * Example: [1 2 3 4 5] to [1 4 2 3 5]
+module InsertionMutator
+  def mutate!
+    from, to = @genes.rand_index, @genes.rand_index
+    @genes = if to >= from
+      to += 1  # add end of array as possibility
+      (@genes[0...from] + @genes[from+1...to] << @genes[from])  + @genes[to..-1]
+    else
+      (@genes[0...to] << @genes[from]) + @genes[to...from] + @genes[from+1..-1]
+    end
+    self
+  end
+end
+#  Rotates the representation of the permutation (i.e. effectively does nothing if it represents a cycle)
+# * Example: [1 2 3 4] to [3 4 1 2]
+module RotationMutator
+  def mutate!
+   new_start = @genes.rand_index
+   @genes = @genes[new_start..-1] + @genes[0...new_start]
+   self
+  end
+end
+# * One point partial preservation crossover for PermutationGenotype
+# * Also known as partial recombination crossover (PRX).
+# * Child 1 is identical to parent 1 up to the cross point, and contains the remaining elements in the same order as parent 2.
 module PermutationCrossover
   def cross(parent1,parent2)
     p1, p2 = parent1.genes, parent2.genes