feldtruby 0.3.16 → 0.3.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile.lock +9 -2
  3. data/Rakefile +8 -0
  4. data/feldtruby.gemspec +6 -0
  5. data/lib/feldtruby/annotations.rb +10 -0
  6. data/lib/feldtruby/array/basic_stats.rb +3 -1
  7. data/lib/feldtruby/array/permutations_and_subsets.rb +17 -0
  8. data/lib/feldtruby/float.rb +23 -0
  9. data/lib/feldtruby/logger.rb +216 -30
  10. data/lib/feldtruby/minitest_extensions.rb +0 -1
  11. data/lib/feldtruby/mongodb.rb +16 -0
  12. data/lib/feldtruby/mongodb_logger.rb +245 -0
  13. data/lib/feldtruby/optimize/differential_evolution.rb +29 -5
  14. data/lib/feldtruby/optimize/elite_archive.rb +91 -0
  15. data/lib/feldtruby/optimize/max_steps_termination_criterion.rb +1 -1
  16. data/lib/feldtruby/optimize/objective.rb +343 -222
  17. data/lib/feldtruby/optimize/optimizer.rb +138 -60
  18. data/lib/feldtruby/optimize/search_space.rb +10 -0
  19. data/lib/feldtruby/optimize.rb +1 -26
  20. data/lib/feldtruby/statistics.rb +74 -3
  21. data/lib/feldtruby/time.rb +19 -0
  22. data/lib/feldtruby/version.rb +1 -1
  23. data/old/event_logger.rb +682 -0
  24. data/spikes/comparing_samplers_on_classic_optimization_functions/analyze_sampler_comparison_results.R +78 -0
  25. data/spikes/comparing_samplers_on_classic_optimization_functions/compare_samplers.rb +264 -0
  26. data/spikes/comparing_samplers_on_classic_optimization_functions/results_comparing_samplers_130405_175934.csv +561 -0
  27. data/spikes/comparing_samplers_on_classic_optimization_functions/results_comparing_samplers_levi13_beale_easom_eggholder.csv +11201 -0
  28. data/spikes/comparing_samplers_on_classic_optimization_functions/results_comparing_samplers_levi13_beale_easom_eggholder_all_radii_4_to_30.csv +44801 -0
  29. data/spikes/comparing_samplers_on_classic_optimization_functions/results_comparing_samplers_omnitest.csv +1401 -0
  30. data/spikes/mongodb_logger.rb +47 -0
  31. data/spikes/simple_de_run.rb +32 -0
  32. data/test/helper.rb +17 -1
  33. data/test/test_array_basic_stats.rb +5 -1
  34. data/test/test_array_permutations_and_subsets.rb +23 -0
  35. data/test/test_float.rb +15 -0
  36. data/test/test_html_doc_getter.rb +1 -1
  37. data/test/test_logger.rb +86 -48
  38. data/test/test_mongodb_logger.rb +116 -0
  39. data/test/test_object_annotations.rb +14 -0
  40. data/test/test_optimize.rb +7 -6
  41. data/test/test_optimize_differential_evolution.rb +21 -19
  42. data/test/test_optimize_elite_archive.rb +85 -0
  43. data/test/test_optimize_objective.rb +237 -74
  44. data/test/test_optimize_populationbasedoptimizer.rb +72 -6
  45. data/test/test_optimize_random_search.rb +0 -17
  46. data/test/test_optimize_search_space.rb +15 -0
  47. data/test/test_statistics.rb +30 -4
  48. data/test/test_time.rb +22 -0
  49. data/test/tmp_shorter.csv +200 -0
  50. metadata +62 -21
@@ -1,42 +1,35 @@
1
1
  require 'feldtruby/optimize'
2
2
  require 'feldtruby/optimize/objective'
3
3
  require 'feldtruby/optimize/search_space'
4
- require 'feldtruby/optimize/stdout_logger'
5
4
  require 'feldtruby/optimize/max_steps_termination_criterion'
6
5
  require 'feldtruby/math/rand'
7
6
  require 'feldtruby/array'
7
+ require 'feldtruby/logger'
8
8
 
9
- module FeldtRuby::Optimize
10
- DefaultOptimizationOptions = {
11
- :logger => FeldtRuby::Optimize::StdOutLogger,
12
- :maxNumSteps => 10_000,
13
- :terminationCriterionClass => FeldtRuby::Optimize::MaxStepsTerminationCriterion,
14
- :verbose => false,
15
- :populationSize => 100,
16
- }
17
-
18
- def self.override_default_options_with(options)
19
- o = DefaultOptimizationOptions.clone.update(options)
20
- o[:terminationCriterion] = o[:terminationCriterionClass].new(o[:maxNumSteps])
21
- o
22
- end
23
- end
9
+ module FeldtRuby::Optimize
24
10
 
25
11
  # Find an vector of float values that optimizes a given
26
12
  # objective.
27
- class FeldtRuby::Optimize::Optimizer
28
- attr_reader :objective, :search_space, :best, :best_quality_value, :best_sub_quality_values, :num_optimization_steps, :termination_criterion
13
+ class Optimizer
14
+ include FeldtRuby::Logging
15
+
16
+ attr_reader :options, :objective, :search_space, :best, :best_quality_value
17
+ attr_reader :best_sub_quality_values, :num_optimization_steps, :termination_criterion
29
18
 
30
19
  def initialize(objective, searchSpace = FeldtRuby::Optimize::DefaultSearchSpace, options = {})
31
20
  @best = nil # To avoid warnings if not set
32
21
  @objective, @search_space = objective, searchSpace
33
22
  @options = FeldtRuby::Optimize.override_default_options_with(options)
23
+
24
+ # Must setup logger before setting options since verbosity of logger is
25
+ # an option!
26
+ setup_logger_and_distribute_to_instance_variables()
27
+
34
28
  initialize_options(@options)
35
- @objective.logger = @logger
36
29
  end
37
30
 
38
31
  def initialize_options(options)
39
- @logger = options[:logger].new(self, options[:verbose])
32
+ self.logger.verbose = options[:verbose]
40
33
  @termination_criterion = options[:terminationCriterion]
41
34
  end
42
35
 
@@ -46,46 +39,56 @@ class FeldtRuby::Optimize::Optimizer
46
39
  # Set up a random best since other methods require it
47
40
  update_best([search_space.gen_candidate()])
48
41
  begin
49
- @logger.note_optimization_starts()
42
+ logger.log "Optimization with optimizer #{self.class.inspect} started"
50
43
  while !termination_criterion.terminate?(self)
51
44
  new_candidates = optimization_step()
52
45
  @num_optimization_steps += 1
53
- @logger.note_another_optimization_step(@num_optimization_steps)
54
46
  update_best(new_candidates)
55
47
  end
56
48
  rescue Exception => e
57
- @logger.note_termination("!!! - Optimization FAILED with exception: #{e.message} - !!!" + e.backtrace.join("\n"))
58
- ensure
59
- @logger.note_termination("!!! - Optimization FINISHED after #{@num_optimization_steps} steps - !!!")
49
+ logger.log_data :exception, {
50
+ :exception_class => e.class.inspect,
51
+ :backtrace => e.backtrace.join("\n")
52
+ }, "!!! - Optimization FAILED with exception: #{e.message} - !!!" + e.backtrace.join("\n")
53
+ ensure
54
+ logger.log "!!! - Optimization FINISHED after #{@num_optimization_steps} steps - !!!"
60
55
  end
61
56
  @objective.note_end_of_optimization(self)
62
- @logger.note_end_of_optimization(self)
57
+ log_end_of_optimization
63
58
  @best # return the best
64
59
  end
65
60
 
61
+ def log_end_of_optimization
62
+ logger.log("End of optimization\n" +
63
+ " Optimizer: #{self.class}\n" +
64
+ " Best found: #{@best}\n" +
65
+ " Quality of best: #{@objective.quality_of(@best)}\n" +
66
+ " Time used = #{Time.human_readable_timestr(logger.elapsed_time)}, " +
67
+ "Steps performed = #{@num_optimization_steps}, " +
68
+ "#{Time.human_readable_timestr(time_per_step, true)}/step")
69
+ end
70
+
71
+ def time_per_step
72
+ logger.elapsed_time / @num_optimization_steps
73
+ end
74
+
66
75
  # Run one optimization step. Default is to do nothing, i.e. this is just a superclass,
67
76
  # but subclasses need to implement this.
68
77
  def optimization_step()
69
78
  end
70
79
 
71
- # Rank all candidates, then update the best one if a new best found.
80
+ # Update the best if a new best was found.
72
81
  def update_best(candidates)
73
- if @best
74
- ranked = objective.rank_candidates(candidates + [@best])
75
- else
76
- ranked = objective.rank_candidates(candidates)
77
- end
78
- new_best, new_quality_value, new_sub_qvalues = ranked.first
79
- # Since some objectives are not deterministic the best
80
- if new_best != @best
81
- if @best
82
- old_best, new_qv_old_best, sub_qv_old_best = ranked.select {|a| a.first == @best}.first
83
- end
84
- @logger.note_new_best(new_best, new_quality_value, new_sub_qvalues,
85
- @best, new_qv_old_best, sub_qv_old_best)
86
- @best = new_best
87
- @best_quality_value = new_quality_value
88
- @best_sub_quality_values = new_sub_qvalues
82
+ best_new, rest = objective.rank_candidates(candidates)
83
+ if @best.nil? || @objective.is_better_than?(best_new, @best)
84
+ qb = @best.nil? ? nil : @objective.quality_of(@best)
85
+ logger.log_data :new_best, {
86
+ :new_best => best_new,
87
+ :new_quality_value => @objective.quality_of(best_new),
88
+ :old_best => @best,
89
+ :old_quality_value => qb
90
+ }, "New best candidate found"
91
+ @best = best_new
89
92
  true
90
93
  else
91
94
  false
@@ -93,13 +96,69 @@ class FeldtRuby::Optimize::Optimizer
93
96
  end
94
97
  end
95
98
 
96
- class FeldtRuby::Optimize::PopulationBasedOptimizer < FeldtRuby::Optimize::Optimizer
99
+ # Sample the indices of a population. This default super class just randomly
100
+ # samples without replacement.
101
+ class PopulationSampler
102
+ def initialize(optimizer, options = FeldtRuby::Optimize::DefaultOptimizationOptions)
103
+ @optimizer = optimizer
104
+ @population_size = @optimizer.population_size
105
+ initialize_all_indices()
106
+ end
107
+
108
+ def initialize_all_indices
109
+ # We set up an array of the indices to all candidates of the population so we can later sample from it
110
+ # This should always contain all indices even if they might be out of order. This is because we
111
+ # only swap! elements in this array, never delete any.
112
+ @all_indices = (0...@population_size).to_a
113
+ end
114
+
115
+ def sample_population_indices_without_replacement(numSamples)
116
+ sample_indices_without_replacement numSamples, @all_indices
117
+ end
118
+
119
+ def sample_indices_without_replacement(numSamples, indices)
120
+ sampled_indices = []
121
+ size = indices.length
122
+ numSamples.times do |i|
123
+ index = i + rand_int(size - i)
124
+ sampled_index, skip = indices.swap!(i, index)
125
+ sampled_indices << sampled_index
126
+ end
127
+ sampled_indices
128
+ end
129
+ end
130
+
131
+ # This implements a "trivial geography" similar to Spector and Kline (2006)
132
+ # by first sampling an individual randomly and then selecting additional
133
+ # individuals for the same tournament within a certain deme of limited size
134
+ # for the sub-sequent individuals in the population. The version we implement
135
+ # here is from:
136
+ # I. Harvey, "The Microbial Genetic Algorithm", in Advances in Artificial Life
137
+ # Darwin Meets von Neumann, Springer, 2011.
138
+ class RadiusLimitedPopulationSampler < PopulationSampler
139
+ def initialize(optimizer, options = FeldtRuby::Optimize::DefaultOptimizationOptions)
140
+ super
141
+ @radius = options[:samplerRadius]
142
+ end
143
+
144
+ def sample_population_indices_without_replacement(numSamples)
145
+ i = rand(@population_size)
146
+ indices = (i..(i+@radius)).to_a
147
+ if (i+@radius) >= @population_size
148
+ indices.map! {|i| i % @population_size}
149
+ end
150
+ sample_indices_without_replacement numSamples, indices
151
+ end
152
+ end
153
+
154
+ class PopulationBasedOptimizer < Optimizer
97
155
  attr_reader :population
98
156
 
99
157
  def initialize_options(options)
100
158
  super
101
- initialize_population(@options[:populationSize])
102
- initialize_all_indices()
159
+ @population_size = @options[:populationSize]
160
+ initialize_population(@population_size)
161
+ @sampler = options[:samplerClass].new(self, options)
103
162
  end
104
163
 
105
164
  # Create a population of a given size by randomly sampling candidates from the search space.
@@ -107,26 +166,29 @@ class FeldtRuby::Optimize::PopulationBasedOptimizer < FeldtRuby::Optimize::Optim
107
166
  @population = Array.new(sizeOfPopulation).map {search_space.gen_candidate()}
108
167
  end
109
168
 
110
- def population_size
111
- @population.length
169
+ # Re-initialize parts of the population.
170
+ def re_initialize_population(percentageOfPopulation = 0.50)
171
+ if percentageOfPopulation >= 1.00
172
+ initialize_population(@population_size)
173
+ else
174
+ num_to_replace = (percentageOfPopulation * @population_size).to_i
175
+ # We must use a PopulationSampler here instead of just calling sample_population_indices_without_replacement
176
+ # since we do not know which sampler is installed.
177
+ sampler = PopulationSampler.new(self, self.options)
178
+ indices = sampler.sample_population_indices_without_replacement(num_to_replace)
179
+ indices.each do |i|
180
+ @population[i] = search_space.gen_candidate()
181
+ end
182
+ end
112
183
  end
113
184
 
114
- def initialize_all_indices
115
- # We set up an array of the indices to all candidates of the population so we can later sample from it
116
- # This should always contain all indices even if they might be out of order. This is because we
117
- # only swap! elements in this array, never delete any.
118
- @all_indices = (0...population_size).to_a
185
+ def population_size
186
+ @population_size
119
187
  end
120
188
 
121
189
  # Sample indices from the population without replacement.
122
190
  def sample_population_indices_without_replacement(numSamples)
123
- sampled_indices = []
124
- numSamples.times do |i|
125
- index = i + rand_int(population_size - i)
126
- sampled_index, skip = @all_indices.swap!(i, index)
127
- sampled_indices << sampled_index
128
- end
129
- sampled_indices
191
+ @sampler.sample_population_indices_without_replacement(numSamples)
130
192
  end
131
193
 
132
194
  # Get candidates from population at given indices.
@@ -143,4 +205,20 @@ class FeldtRuby::Optimize::PopulationBasedOptimizer < FeldtRuby::Optimize::Optim
143
205
  def update_candidate_in_population(index, candidate)
144
206
  @population[index] = candidate
145
207
  end
208
+ end
209
+
210
+ DefaultOptimizationOptions = {
211
+ :terminationCriterionClass => FeldtRuby::Optimize::MaxStepsTerminationCriterion,
212
+ :verbose => true,
213
+ :populationSize => 200,
214
+ :samplerClass => FeldtRuby::Optimize::RadiusLimitedPopulationSampler,
215
+ :samplerRadius => 10 # Max distance between individuals selected in same tournament
216
+ }
217
+
218
+ def self.override_default_options_with(options)
219
+ o = DefaultOptimizationOptions.clone.update(options)
220
+ o[:terminationCriterion] = o[:terminationCriterionClass].new(o[:maxNumSteps])
221
+ o
222
+ end
223
+
146
224
  end
@@ -57,6 +57,16 @@ class SearchSpace
57
57
  self.new(min_values, max_values)
58
58
  end
59
59
 
60
+ def self.new_from_min_max_per_variable(minMaxPairs)
61
+ min_values = []
62
+ max_values = []
63
+ minMaxPairs.each do |min, max|
64
+ min_values << min
65
+ max_values << max
66
+ end
67
+ self.new(min_values, max_values)
68
+ end
69
+
60
70
  def num_variables
61
71
  @min_values.length
62
72
  end
@@ -1,28 +1,3 @@
1
1
  require 'feldtruby'
2
2
 
3
- module FeldtRuby::Optimize; end
4
-
5
- require 'feldtruby/optimize/differential_evolution'
6
- module FeldtRuby::Optimize
7
- # Optimize the _numVariables_ between the _min_ and _max_ values given _costFunction_.
8
- # Default is to minimize.
9
- def self.optimize(min, max, options = {:verbose => true},
10
- objectiveFuncClass = FeldtRuby::Optimize::ObjectiveMinimizeBlock, &costFunction)
11
- objective = objectiveFuncClass.new(&costFunction)
12
- num_vars = costFunction.arity
13
- search_space = SearchSpace.new_from_min_max(num_vars, min, max)
14
- optimizer = DEOptimizer.new(objective, search_space, options)
15
- optimizer.optimize()
16
- optimizer.best.to_a
17
- end
18
-
19
- # Short hand wrapper for function minimization.
20
- def self.minimize(min, max, options = {}, &costFunction)
21
- optimize(min, max, options, &costFunction)
22
- end
23
-
24
- # Short hand wrapper for function maximization.
25
- def self.maximize(min, max, options = {}, &costFunction)
26
- optimize(min, max, options, FeldtRuby::Optimize::ObjectiveMaximizeBlock, &costFunction)
27
- end
28
- end
3
+ module FeldtRuby::Optimize; end
@@ -162,6 +162,7 @@ module Statistics
162
162
  end
163
163
 
164
164
  def chi_squared_test(aryOrHashOfCounts)
165
+ puts "aryOrHashOfCounts = #{aryOrHashOfCounts}"
165
166
  counts = (Hash === aryOrHashOfCounts) ? aryOrHashOfCounts : aryOrHashOfCounts.counts
166
167
  vs = counts.values
167
168
  res = RC.call("chisq.test", vs)
@@ -256,7 +257,6 @@ module FeldtRuby::Statistics::Plotting
256
257
 
257
258
  def filled_contour(csvFilePath, xlabel, ylabel, title = "filled.contour")
258
259
  include_library "MASS"
259
- #include_library "ggplot2"
260
260
 
261
261
  script = <<-EOS
262
262
  data <- read.csv(#{csvFilePath.inspect})
@@ -284,6 +284,10 @@ module FeldtRuby::Statistics::Plotting
284
284
  end
285
285
 
286
286
  def hexbin_heatmap(csvFilePath, xlabel, ylabel, title = "heatmap", bins = 50)
287
+ include_library "grid"
288
+ include_library "lattice"
289
+ include_library "hexbin"
290
+
287
291
  plot_2dims(csvFilePath,
288
292
  "f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_hex( bins = #{bins} )",
289
293
  xlabel, ylabel, title)
@@ -292,9 +296,54 @@ module FeldtRuby::Statistics::Plotting
292
296
  def scatter_plot(csvFilePath, xlabel, ylabel, title = "scatterplot")
293
297
 
294
298
  script = <<-EOS
295
- # smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'}
299
+ smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'}
296
300
  f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_point(shape = 1)
297
- f <- f + stat_smooth()
301
+ f <- f + stat_smooth(method = smoothing_method)
302
+ EOS
303
+
304
+ plot_2dims(csvFilePath, script, xlabel.to_s, ylabel.to_s, title)
305
+
306
+ end
307
+
308
+ # Data can be specified in two ways, either directly in Ruby arrays,
309
+ # or as strings with the path to a csv file to be loaded. In the latter
310
+ # case a column name must be given.
311
+ def load_csv_files_as_data(dataMap, columnName = nil)
312
+
313
+ keys = dataMap.keys.sort
314
+
315
+ read_csvs = ""
316
+ data_frame = "data.frame(1:length(d_#{keys.first})"
317
+
318
+ keys.each_with_index do |key, i|
319
+
320
+ value = dataMap[key]
321
+
322
+ set_name = "d_#{key}"
323
+
324
+ read_csvs += "#{set_name} <- "
325
+
326
+ if Array === value
327
+ read_csvs += (ruby_object_to_R_string(value) + ";\n")
328
+ data_frame += ", #{key} = #{set_name}"
329
+ else
330
+ read_csvs += "read.csv(#{value.inspect});\n"
331
+ data_frame += ", #{key} = #{set_name}$#{columnName}"
332
+ end
333
+
334
+ end
335
+
336
+ data_frame += ")"
337
+
338
+ script = "#{read_csvs}data <- #{data_frame};"
339
+
340
+ end
341
+
342
+ def density_tile2d(csvFilePath, xlabel, ylabel, title = "densitytile2d")
343
+
344
+ script = <<-EOS
345
+ f <- ggplot(data, aes(x=#{xlabel}, y=#{ylabel}))
346
+ f <- f + stat_density2d(geom="tile", aes(fill=..density..), contour=FALSE) + scale_fill_gradient(high="red", low="white")
298
347
  EOS
299
348
 
300
349
  plot_2dims(csvFilePath, script, xlabel.to_s, ylabel.to_s, title)
@@ -374,6 +423,28 @@ module FeldtRuby::Statistics::Plotting
374
423
 
375
424
  end
376
425
 
426
+ # Plot the densities of the data found in the column named _columnName_
427
+ # in the csv files in _csvFiles_.
428
+ def overlaid_densities_from_csv_files(columnName, csvFiles, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density")
429
+
430
+ load_csvs = load_csv_files_as_data csvFiles
431
+
432
+ script = <<-EOS
433
+ #{load_csvs}
434
+ #df <- data.frame(index = (1:#{cardinalities.first}), #{hash_to_R_params(dataMap)})
435
+ df.m <- melt(df, id = "index")
436
+ names(df.m)[2] <- _datasetsName_
437
+ f <- ggplot(df.m, aes(value, fill=#{datasetsName}))
438
+ f <- f + geom_density(alpha = 0.2, size = 0.5) + scale_color_brewer()
439
+ #{ggplot2_setup_and_theme()}
440
+ f
441
+ EOS
442
+
443
+ puts script
444
+ subst_eval script, {:title => title, :datasetsName => datasetsName,
445
+ :xlabel => xlabel, :ylabel => ylabel}
446
+
447
+ end
377
448
  end
378
449
 
379
450
  class FeldtRuby::RCommunicator
@@ -1,3 +1,22 @@
1
+ class Time
2
+
3
+ # Number of milliseconds since Unix epoch.
4
+ def milli_seconds
5
+ (to_i * 1000) + (nsec / 1_000_000)
6
+ end
7
+
8
+ # Number of microseconds since Unix epoch.
9
+ def micro_seconds
10
+ (to_i * 1_000_000) + (nsec / 1_000)
11
+ end
12
+
13
+ # Number of nanoseconds since Unix epoch.
14
+ def nano_seconds
15
+ (to_i * 1_000_000_000) + nsec
16
+ end
17
+
18
+ end
19
+
1
20
  def Time.timestamp(options = {:short => false})
2
21
  if options[:short]
3
22
  Time.now.strftime("%y%m%d %H:%M.%S")
@@ -1,3 +1,3 @@
1
1
  module FeldtRuby
2
- VERSION = "0.3.16"
2
+ VERSION = "0.3.18"
3
3
  end