RubyGems - feldtruby - Versions diffs - 0.3.16 → 0.3.18 - Mend

feldtruby 0.3.16 → 0.3.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

checksums.yaml +7 -0
data/Gemfile.lock +9 -2
data/Rakefile +8 -0
data/feldtruby.gemspec +6 -0
data/lib/feldtruby/annotations.rb +10 -0
data/lib/feldtruby/array/basic_stats.rb +3 -1
data/lib/feldtruby/array/permutations_and_subsets.rb +17 -0
data/lib/feldtruby/float.rb +23 -0
data/lib/feldtruby/logger.rb +216 -30
data/lib/feldtruby/minitest_extensions.rb +0 -1
data/lib/feldtruby/mongodb.rb +16 -0
data/lib/feldtruby/mongodb_logger.rb +245 -0
data/lib/feldtruby/optimize/differential_evolution.rb +29 -5
data/lib/feldtruby/optimize/elite_archive.rb +91 -0
data/lib/feldtruby/optimize/max_steps_termination_criterion.rb +1 -1
data/lib/feldtruby/optimize/objective.rb +343 -222
data/lib/feldtruby/optimize/optimizer.rb +138 -60
data/lib/feldtruby/optimize/search_space.rb +10 -0
data/lib/feldtruby/optimize.rb +1 -26
data/lib/feldtruby/statistics.rb +74 -3
data/lib/feldtruby/time.rb +19 -0
data/lib/feldtruby/version.rb +1 -1
data/old/event_logger.rb +682 -0
data/spikes/comparing_samplers_on_classic_optimization_functions/analyze_sampler_comparison_results.R +78 -0
data/spikes/comparing_samplers_on_classic_optimization_functions/compare_samplers.rb +264 -0
data/spikes/comparing_samplers_on_classic_optimization_functions/results_comparing_samplers_130405_175934.csv +561 -0
data/spikes/comparing_samplers_on_classic_optimization_functions/results_comparing_samplers_levi13_beale_easom_eggholder.csv +11201 -0
data/spikes/comparing_samplers_on_classic_optimization_functions/results_comparing_samplers_levi13_beale_easom_eggholder_all_radii_4_to_30.csv +44801 -0
data/spikes/comparing_samplers_on_classic_optimization_functions/results_comparing_samplers_omnitest.csv +1401 -0
data/spikes/mongodb_logger.rb +47 -0
data/spikes/simple_de_run.rb +32 -0
data/test/helper.rb +17 -1
data/test/test_array_basic_stats.rb +5 -1
data/test/test_array_permutations_and_subsets.rb +23 -0
data/test/test_float.rb +15 -0
data/test/test_html_doc_getter.rb +1 -1
data/test/test_logger.rb +86 -48
data/test/test_mongodb_logger.rb +116 -0
data/test/test_object_annotations.rb +14 -0
data/test/test_optimize.rb +7 -6
data/test/test_optimize_differential_evolution.rb +21 -19
data/test/test_optimize_elite_archive.rb +85 -0
data/test/test_optimize_objective.rb +237 -74
data/test/test_optimize_populationbasedoptimizer.rb +72 -6
data/test/test_optimize_random_search.rb +0 -17
data/test/test_optimize_search_space.rb +15 -0
data/test/test_statistics.rb +30 -4
data/test/test_time.rb +22 -0
data/test/tmp_shorter.csv +200 -0
metadata +62 -21

data/lib/feldtruby/optimize/optimizer.rb CHANGED Viewed

@@ -1,42 +1,35 @@
 require 'feldtruby/optimize'
 require 'feldtruby/optimize/objective'
 require 'feldtruby/optimize/search_space'
-require 'feldtruby/optimize/stdout_logger'
 require 'feldtruby/optimize/max_steps_termination_criterion'
 require 'feldtruby/math/rand'
 require 'feldtruby/array'
+require 'feldtruby/logger'
-module FeldtRuby::Optimize
-	DefaultOptimizationOptions = {
-		:logger => FeldtRuby::Optimize::StdOutLogger,
-		:maxNumSteps => 10_000,
-		:terminationCriterionClass => FeldtRuby::Optimize::MaxStepsTerminationCriterion,
-		:verbose => false,
-		:populationSize => 100,
-	}
-	def self.override_default_options_with(options)
-		o = DefaultOptimizationOptions.clone.update(options)
-		o[:terminationCriterion] = o[:terminationCriterionClass].new(o[:maxNumSteps])
-		o
-	end
-end
+module FeldtRuby::Optimize
 # Find an vector of float values that optimizes a given
 # objective.
-class FeldtRuby::Optimize::Optimizer
-	attr_reader :objective, :search_space, :best, :best_quality_value, :best_sub_quality_values, :num_optimization_steps, :termination_criterion
+class Optimizer
+	include FeldtRuby::Logging
+	attr_reader :options, :objective, :search_space, :best, :best_quality_value
+	attr_reader :best_sub_quality_values, :num_optimization_steps, :termination_criterion
 	def initialize(objective, searchSpace = FeldtRuby::Optimize::DefaultSearchSpace, options = {})
 		@best = nil # To avoid warnings if not set
 		@objective, @search_space = objective, searchSpace
 		@options = FeldtRuby::Optimize.override_default_options_with(options)
+		# Must setup logger before setting options since verbosity of logger is
+		# an option!
+		setup_logger_and_distribute_to_instance_variables()
 		initialize_options(@options)
-		@objective.logger = @logger
 	end
 	def initialize_options(options)
-		@logger = options[:logger].new(self, options[:verbose])
+		self.logger.verbose = options[:verbose]
 		@termination_criterion = options[:terminationCriterion]
 	end
@@ -46,46 +39,56 @@ class FeldtRuby::Optimize::Optimizer
 		# Set up a random best since other methods require it
 		update_best([search_space.gen_candidate()])
 		begin
-			@logger.note_optimization_starts()
+			logger.log "Optimization with optimizer #{self.class.inspect} started"
 			while !termination_criterion.terminate?(self)
 				new_candidates = optimization_step()
 				@num_optimization_steps += 1
-				@logger.note_another_optimization_step(@num_optimization_steps)
 				update_best(new_candidates)
 			end
 		rescue Exception => e
-			@logger.note_termination("!!! - Optimization FAILED with exception: #{e.message} - !!!" + e.backtrace.join("\n"))
-		ensure
-			@logger.note_termination("!!! - Optimization FINISHED after #{@num_optimization_steps} steps - !!!")
+			logger.log_data :exception, {
+				:exception_class => e.class.inspect,
+				:backtrace => e.backtrace.join("\n")
+			}, "!!! - Optimization FAILED with exception: #{e.message} - !!!" + e.backtrace.join("\n")
+		ensure
+			logger.log "!!! - Optimization FINISHED after #{@num_optimization_steps} steps - !!!"
 		end
 		@objective.note_end_of_optimization(self)
-		@logger.note_end_of_optimization(self)
+		log_end_of_optimization
 		@best # return the best
 	end
+	def log_end_of_optimization
+		logger.log("End of optimization\n" +
+			"  Optimizer: #{self.class}\n" +
+			"  Best found: #{@best}\n" +
+			"  Quality of best: #{@objective.quality_of(@best)}\n" +
+			"  Time used = #{Time.human_readable_timestr(logger.elapsed_time)}, " +
+			  "Steps performed = #{@num_optimization_steps}, " +
+			  "#{Time.human_readable_timestr(time_per_step, true)}/step")
+	end
+	def time_per_step
+		logger.elapsed_time / @num_optimization_steps
+	end
 	# Run one optimization step. Default is to do nothing, i.e. this is just a superclass,
 	# but subclasses need to implement this.
 	def optimization_step()
 	end
-	# Rank all candidates, then update the best one if a new best found.
+	# Update the best if a new best was found.
 	def update_best(candidates)
-		if @best
-			ranked = objective.rank_candidates(candidates + [@best])
-		else
-			ranked = objective.rank_candidates(candidates)
-		end
-		new_best, new_quality_value, new_sub_qvalues = ranked.first
-		# Since some objectives are not deterministic the best
-		if new_best != @best
-			if @best
-				old_best, new_qv_old_best, sub_qv_old_best = ranked.select {|a| a.first == @best}.first
-			end
-			@logger.note_new_best(new_best, new_quality_value, new_sub_qvalues,
-				@best, new_qv_old_best, sub_qv_old_best)
-			@best = new_best
-			@best_quality_value = new_quality_value
-			@best_sub_quality_values = new_sub_qvalues
+		best_new, rest = objective.rank_candidates(candidates)
+		if @best.nil? || @objective.is_better_than?(best_new, @best)
+			qb = @best.nil? ? nil : @objective.quality_of(@best)
+			logger.log_data :new_best, {
+				:new_best => best_new,
+				:new_quality_value => @objective.quality_of(best_new),
+				:old_best => @best,
+				:old_quality_value => qb
+			}, "New best candidate found"
+			@best = best_new
 			true
 		else
 			false
@@ -93,13 +96,69 @@ class FeldtRuby::Optimize::Optimizer
 	end
 end
-class FeldtRuby::Optimize::PopulationBasedOptimizer < FeldtRuby::Optimize::Optimizer
+# Sample the indices of a population. This default super class just randomly
+# samples without replacement.
+class PopulationSampler
+	def initialize(optimizer, options = FeldtRuby::Optimize::DefaultOptimizationOptions)
+		@optimizer = optimizer
+		@population_size = @optimizer.population_size
+		initialize_all_indices()
+	end
+	def initialize_all_indices
+		# We set up an array of the indices to all candidates of the population so we can later sample from it
+		# This should always contain all indices even if they might be out of order. This is because we
+		# only swap! elements in this array, never delete any.
+		@all_indices = (0...@population_size).to_a
+	end
+	def sample_population_indices_without_replacement(numSamples)
+		sample_indices_without_replacement numSamples, @all_indices
+	end
+	def sample_indices_without_replacement(numSamples, indices)
+		sampled_indices = []
+		size = indices.length
+		numSamples.times do |i|
+			index = i + rand_int(size - i)
+			sampled_index, skip = indices.swap!(i, index)
+			sampled_indices << sampled_index
+		end
+		sampled_indices
+	end
+end
+# This implements a "trivial geography" similar to Spector and Kline (2006)
+# by first sampling an individual randomly and then selecting additional
+# individuals for the same tournament within a certain deme of limited size
+# for the sub-sequent individuals in the population. The version we implement
+# here is from:
+#  I. Harvey, "The Microbial Genetic Algorithm", in Advances in Artificial Life
+#  Darwin Meets von Neumann, Springer, 2011.
+class RadiusLimitedPopulationSampler < PopulationSampler
+	def initialize(optimizer, options = FeldtRuby::Optimize::DefaultOptimizationOptions)
+		super
+		@radius = options[:samplerRadius]
+	end
+	def sample_population_indices_without_replacement(numSamples)
+		i = rand(@population_size)
+		indices = (i..(i+@radius)).to_a
+		if (i+@radius) >= @population_size
+			indices.map! {|i| i % @population_size}
+		end
+		sample_indices_without_replacement numSamples, indices
+	end
+end
+class PopulationBasedOptimizer < Optimizer
 	attr_reader :population
 	def initialize_options(options)
 		super
-		initialize_population(@options[:populationSize])
-		initialize_all_indices()
+		@population_size = @options[:populationSize]
+		initialize_population(@population_size)
+		@sampler = options[:samplerClass].new(self, options)
 	end
 	# Create a population of a given size by randomly sampling candidates from the search space.
@@ -107,26 +166,29 @@ class FeldtRuby::Optimize::PopulationBasedOptimizer < FeldtRuby::Optimize::Optim
 		@population = Array.new(sizeOfPopulation).map {search_space.gen_candidate()}
 	end
-	def population_size
-		@population.length
+	# Re-initialize parts of the population.
+	def re_initialize_population(percentageOfPopulation = 0.50)
+		if percentageOfPopulation >= 1.00
+			initialize_population(@population_size)
+		else
+			num_to_replace = (percentageOfPopulation * @population_size).to_i
+			# We must use a PopulationSampler here instead of just calling sample_population_indices_without_replacement
+			# since we do not know which sampler is installed.
+			sampler = PopulationSampler.new(self, self.options)
+			indices = sampler.sample_population_indices_without_replacement(num_to_replace)
+			indices.each do |i|
+				@population[i] = search_space.gen_candidate()
+			end
+		end
 	end
-	def initialize_all_indices
-		# We set up an array of the indices to all candidates of the population so we can later sample from it
-		# This should always contain all indices even if they might be out of order. This is because we
-		# only swap! elements in this array, never delete any.
-		@all_indices = (0...population_size).to_a
+	def population_size
+		@population_size
 	end
 	# Sample indices from the population without replacement.
 	def sample_population_indices_without_replacement(numSamples)
-		sampled_indices = []
-		numSamples.times do |i|
-			index = i + rand_int(population_size - i)
-			sampled_index, skip = @all_indices.swap!(i, index)
-			sampled_indices << sampled_index
-		end
-		sampled_indices
+		@sampler.sample_population_indices_without_replacement(numSamples)
 	end
 	# Get candidates from population at given indices.
@@ -143,4 +205,20 @@ class FeldtRuby::Optimize::PopulationBasedOptimizer < FeldtRuby::Optimize::Optim
 	def update_candidate_in_population(index, candidate)
 		@population[index] = candidate
 	end
+end
+DefaultOptimizationOptions = {
+	:terminationCriterionClass => FeldtRuby::Optimize::MaxStepsTerminationCriterion,
+	:verbose => true,
+	:populationSize => 200,
+	:samplerClass => FeldtRuby::Optimize::RadiusLimitedPopulationSampler,
+	:samplerRadius => 10 # Max distance between individuals selected in same tournament
+}
+def self.override_default_options_with(options)
+	o = DefaultOptimizationOptions.clone.update(options)
+	o[:terminationCriterion] = o[:terminationCriterionClass].new(o[:maxNumSteps])
+	o
+end
 end

data/lib/feldtruby/optimize/search_space.rb CHANGED Viewed

@@ -57,6 +57,16 @@ class SearchSpace
 		self.new(min_values, max_values)
 	end
+	def self.new_from_min_max_per_variable(minMaxPairs)
+		min_values = []
+		max_values = []
+		minMaxPairs.each do |min, max|
+			min_values << min
+			max_values << max
+		end
+		self.new(min_values, max_values)
+	end
 	def num_variables
 		@min_values.length
 	end

data/lib/feldtruby/optimize.rb CHANGED Viewed

@@ -1,28 +1,3 @@
 require 'feldtruby'
-module FeldtRuby::Optimize; end
-require 'feldtruby/optimize/differential_evolution'
-module FeldtRuby::Optimize
-	# Optimize the _numVariables_ between the _min_ and _max_ values given _costFunction_.
-	# Default is to minimize.
-	def self.optimize(min, max, options = {:verbose => true},
-		objectiveFuncClass = FeldtRuby::Optimize::ObjectiveMinimizeBlock, &costFunction)
-		objective = objectiveFuncClass.new(&costFunction)
-		num_vars = costFunction.arity
-		search_space = SearchSpace.new_from_min_max(num_vars, min, max)
-		optimizer = DEOptimizer.new(objective, search_space, options)
-		optimizer.optimize()
-		optimizer.best.to_a
-	end
-	# Short hand wrapper for function minimization.
-	def self.minimize(min, max, options = {}, &costFunction)
-		optimize(min, max, options, &costFunction)
-	end
-	# Short hand wrapper for function maximization.
-	def self.maximize(min, max, options = {}, &costFunction)
-		optimize(min, max, options, FeldtRuby::Optimize::ObjectiveMaximizeBlock, &costFunction)
-	end
-end
+module FeldtRuby::Optimize; end

data/lib/feldtruby/statistics.rb CHANGED Viewed

@@ -162,6 +162,7 @@ module Statistics
   end
   def chi_squared_test(aryOrHashOfCounts)
+    puts "aryOrHashOfCounts = #{aryOrHashOfCounts}"
     counts = (Hash === aryOrHashOfCounts) ? aryOrHashOfCounts : aryOrHashOfCounts.counts
     vs = counts.values
     res = RC.call("chisq.test", vs)
@@ -256,7 +257,6 @@ module FeldtRuby::Statistics::Plotting
   def filled_contour(csvFilePath, xlabel, ylabel, title = "filled.contour")
     include_library "MASS"
-    #include_library "ggplot2"
     script = <<-EOS
       data <- read.csv(#{csvFilePath.inspect})
@@ -284,6 +284,10 @@ module FeldtRuby::Statistics::Plotting
   end
   def hexbin_heatmap(csvFilePath, xlabel, ylabel, title = "heatmap", bins = 50)
+    include_library "grid"
+    include_library "lattice"
+    include_library "hexbin"
     plot_2dims(csvFilePath,
       "f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_hex( bins = #{bins} )",
       xlabel, ylabel, title)
@@ -292,9 +296,54 @@ module FeldtRuby::Statistics::Plotting
   def scatter_plot(csvFilePath, xlabel, ylabel, title = "scatterplot")
     script = <<-EOS
-      # smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'}
+      smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'}
       f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_point(shape = 1)
-      f <- f + stat_smooth()
+      f <- f + stat_smooth(method = smoothing_method)
+    EOS
+    plot_2dims(csvFilePath, script, xlabel.to_s, ylabel.to_s, title)
+  end
+  # Data can be specified in two ways, either directly in Ruby arrays,
+  # or as strings with the path to a csv file to be loaded. In the latter
+  # case a column name must be given.
+  def load_csv_files_as_data(dataMap, columnName = nil)
+    keys = dataMap.keys.sort
+    read_csvs = ""
+    data_frame = "data.frame(1:length(d_#{keys.first})"
+    keys.each_with_index do |key, i|
+      value = dataMap[key]
+      set_name = "d_#{key}"
+      read_csvs += "#{set_name} <- "
+      if Array === value
+        read_csvs += (ruby_object_to_R_string(value) + ";\n")
+        data_frame += ", #{key} = #{set_name}"
+      else
+        read_csvs += "read.csv(#{value.inspect});\n"
+        data_frame += ", #{key} = #{set_name}$#{columnName}"
+      end
+    end
+    data_frame += ")"
+    script = "#{read_csvs}data <- #{data_frame};"
+  end
+  def density_tile2d(csvFilePath, xlabel, ylabel, title = "densitytile2d")
+    script = <<-EOS
+      f <- ggplot(data, aes(x=#{xlabel}, y=#{ylabel}))
+      f <- f + stat_density2d(geom="tile", aes(fill=..density..), contour=FALSE) + scale_fill_gradient(high="red", low="white")
     EOS
     plot_2dims(csvFilePath, script, xlabel.to_s, ylabel.to_s, title)
@@ -374,6 +423,28 @@ module FeldtRuby::Statistics::Plotting
   end
+  # Plot the densities of the data found in the column named _columnName_
+  # in the csv files in _csvFiles_.
+  def overlaid_densities_from_csv_files(columnName, csvFiles, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density")
+    load_csvs = load_csv_files_as_data csvFiles
+    script = <<-EOS
+      #{load_csvs}
+      #df <- data.frame(index = (1:#{cardinalities.first}), #{hash_to_R_params(dataMap)})
+      df.m <- melt(df, id = "index")
+      names(df.m)[2] <- _datasetsName_
+      f <- ggplot(df.m, aes(value, fill=#{datasetsName}))
+      f <- f + geom_density(alpha = 0.2, size = 0.5) + scale_color_brewer()
+      #{ggplot2_setup_and_theme()}
+      f
+    EOS
+    puts script
+    subst_eval script, {:title => title, :datasetsName => datasetsName,
+      :xlabel => xlabel, :ylabel => ylabel}
+  end
 end
 class FeldtRuby::RCommunicator

data/lib/feldtruby/time.rb CHANGED Viewed

@@ -1,3 +1,22 @@
+class Time
+	# Number of milliseconds since Unix epoch.
+	def milli_seconds
+		(to_i * 1000) + (nsec / 1_000_000)
+	end
+	# Number of microseconds since Unix epoch.
+	def micro_seconds
+		(to_i * 1_000_000) + (nsec / 1_000)
+	end
+	# Number of nanoseconds since Unix epoch.
+	def nano_seconds
+		(to_i * 1_000_000_000) + nsec
+	end
+end
 def Time.timestamp(options = {:short => false})
 	if options[:short]
 		Time.now.strftime("%y%m%d %H:%M.%S")

data/lib/feldtruby/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module FeldtRuby
-  VERSION = "0.3.16"
+  VERSION = "0.3.18"
 end