feldtruby 0.3.16 → 0.3.18

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile.lock +9 -2
  3. data/Rakefile +8 -0
  4. data/feldtruby.gemspec +6 -0
  5. data/lib/feldtruby/annotations.rb +10 -0
  6. data/lib/feldtruby/array/basic_stats.rb +3 -1
  7. data/lib/feldtruby/array/permutations_and_subsets.rb +17 -0
  8. data/lib/feldtruby/float.rb +23 -0
  9. data/lib/feldtruby/logger.rb +216 -30
  10. data/lib/feldtruby/minitest_extensions.rb +0 -1
  11. data/lib/feldtruby/mongodb.rb +16 -0
  12. data/lib/feldtruby/mongodb_logger.rb +245 -0
  13. data/lib/feldtruby/optimize/differential_evolution.rb +29 -5
  14. data/lib/feldtruby/optimize/elite_archive.rb +91 -0
  15. data/lib/feldtruby/optimize/max_steps_termination_criterion.rb +1 -1
  16. data/lib/feldtruby/optimize/objective.rb +343 -222
  17. data/lib/feldtruby/optimize/optimizer.rb +138 -60
  18. data/lib/feldtruby/optimize/search_space.rb +10 -0
  19. data/lib/feldtruby/optimize.rb +1 -26
  20. data/lib/feldtruby/statistics.rb +74 -3
  21. data/lib/feldtruby/time.rb +19 -0
  22. data/lib/feldtruby/version.rb +1 -1
  23. data/old/event_logger.rb +682 -0
  24. data/spikes/comparing_samplers_on_classic_optimization_functions/analyze_sampler_comparison_results.R +78 -0
  25. data/spikes/comparing_samplers_on_classic_optimization_functions/compare_samplers.rb +264 -0
  26. data/spikes/comparing_samplers_on_classic_optimization_functions/results_comparing_samplers_130405_175934.csv +561 -0
  27. data/spikes/comparing_samplers_on_classic_optimization_functions/results_comparing_samplers_levi13_beale_easom_eggholder.csv +11201 -0
  28. data/spikes/comparing_samplers_on_classic_optimization_functions/results_comparing_samplers_levi13_beale_easom_eggholder_all_radii_4_to_30.csv +44801 -0
  29. data/spikes/comparing_samplers_on_classic_optimization_functions/results_comparing_samplers_omnitest.csv +1401 -0
  30. data/spikes/mongodb_logger.rb +47 -0
  31. data/spikes/simple_de_run.rb +32 -0
  32. data/test/helper.rb +17 -1
  33. data/test/test_array_basic_stats.rb +5 -1
  34. data/test/test_array_permutations_and_subsets.rb +23 -0
  35. data/test/test_float.rb +15 -0
  36. data/test/test_html_doc_getter.rb +1 -1
  37. data/test/test_logger.rb +86 -48
  38. data/test/test_mongodb_logger.rb +116 -0
  39. data/test/test_object_annotations.rb +14 -0
  40. data/test/test_optimize.rb +7 -6
  41. data/test/test_optimize_differential_evolution.rb +21 -19
  42. data/test/test_optimize_elite_archive.rb +85 -0
  43. data/test/test_optimize_objective.rb +237 -74
  44. data/test/test_optimize_populationbasedoptimizer.rb +72 -6
  45. data/test/test_optimize_random_search.rb +0 -17
  46. data/test/test_optimize_search_space.rb +15 -0
  47. data/test/test_statistics.rb +30 -4
  48. data/test/test_time.rb +22 -0
  49. data/test/tmp_shorter.csv +200 -0
  50. metadata +62 -21
@@ -1,42 +1,35 @@
1
1
  require 'feldtruby/optimize'
2
2
  require 'feldtruby/optimize/objective'
3
3
  require 'feldtruby/optimize/search_space'
4
- require 'feldtruby/optimize/stdout_logger'
5
4
  require 'feldtruby/optimize/max_steps_termination_criterion'
6
5
  require 'feldtruby/math/rand'
7
6
  require 'feldtruby/array'
7
+ require 'feldtruby/logger'
8
8
 
9
- module FeldtRuby::Optimize
10
- DefaultOptimizationOptions = {
11
- :logger => FeldtRuby::Optimize::StdOutLogger,
12
- :maxNumSteps => 10_000,
13
- :terminationCriterionClass => FeldtRuby::Optimize::MaxStepsTerminationCriterion,
14
- :verbose => false,
15
- :populationSize => 100,
16
- }
17
-
18
- def self.override_default_options_with(options)
19
- o = DefaultOptimizationOptions.clone.update(options)
20
- o[:terminationCriterion] = o[:terminationCriterionClass].new(o[:maxNumSteps])
21
- o
22
- end
23
- end
9
+ module FeldtRuby::Optimize
24
10
 
25
11
  # Find an vector of float values that optimizes a given
26
12
  # objective.
27
- class FeldtRuby::Optimize::Optimizer
28
- attr_reader :objective, :search_space, :best, :best_quality_value, :best_sub_quality_values, :num_optimization_steps, :termination_criterion
13
+ class Optimizer
14
+ include FeldtRuby::Logging
15
+
16
+ attr_reader :options, :objective, :search_space, :best, :best_quality_value
17
+ attr_reader :best_sub_quality_values, :num_optimization_steps, :termination_criterion
29
18
 
30
19
  def initialize(objective, searchSpace = FeldtRuby::Optimize::DefaultSearchSpace, options = {})
31
20
  @best = nil # To avoid warnings if not set
32
21
  @objective, @search_space = objective, searchSpace
33
22
  @options = FeldtRuby::Optimize.override_default_options_with(options)
23
+
24
+ # Must setup logger before setting options since verbosity of logger is
25
+ # an option!
26
+ setup_logger_and_distribute_to_instance_variables()
27
+
34
28
  initialize_options(@options)
35
- @objective.logger = @logger
36
29
  end
37
30
 
38
31
  def initialize_options(options)
39
- @logger = options[:logger].new(self, options[:verbose])
32
+ self.logger.verbose = options[:verbose]
40
33
  @termination_criterion = options[:terminationCriterion]
41
34
  end
42
35
 
@@ -46,46 +39,56 @@ class FeldtRuby::Optimize::Optimizer
46
39
  # Set up a random best since other methods require it
47
40
  update_best([search_space.gen_candidate()])
48
41
  begin
49
- @logger.note_optimization_starts()
42
+ logger.log "Optimization with optimizer #{self.class.inspect} started"
50
43
  while !termination_criterion.terminate?(self)
51
44
  new_candidates = optimization_step()
52
45
  @num_optimization_steps += 1
53
- @logger.note_another_optimization_step(@num_optimization_steps)
54
46
  update_best(new_candidates)
55
47
  end
56
48
  rescue Exception => e
57
- @logger.note_termination("!!! - Optimization FAILED with exception: #{e.message} - !!!" + e.backtrace.join("\n"))
58
- ensure
59
- @logger.note_termination("!!! - Optimization FINISHED after #{@num_optimization_steps} steps - !!!")
49
+ logger.log_data :exception, {
50
+ :exception_class => e.class.inspect,
51
+ :backtrace => e.backtrace.join("\n")
52
+ }, "!!! - Optimization FAILED with exception: #{e.message} - !!!" + e.backtrace.join("\n")
53
+ ensure
54
+ logger.log "!!! - Optimization FINISHED after #{@num_optimization_steps} steps - !!!"
60
55
  end
61
56
  @objective.note_end_of_optimization(self)
62
- @logger.note_end_of_optimization(self)
57
+ log_end_of_optimization
63
58
  @best # return the best
64
59
  end
65
60
 
61
+ def log_end_of_optimization
62
+ logger.log("End of optimization\n" +
63
+ " Optimizer: #{self.class}\n" +
64
+ " Best found: #{@best}\n" +
65
+ " Quality of best: #{@objective.quality_of(@best)}\n" +
66
+ " Time used = #{Time.human_readable_timestr(logger.elapsed_time)}, " +
67
+ "Steps performed = #{@num_optimization_steps}, " +
68
+ "#{Time.human_readable_timestr(time_per_step, true)}/step")
69
+ end
70
+
71
+ def time_per_step
72
+ logger.elapsed_time / @num_optimization_steps
73
+ end
74
+
66
75
  # Run one optimization step. Default is to do nothing, i.e. this is just a superclass,
67
76
  # but subclasses need to implement this.
68
77
  def optimization_step()
69
78
  end
70
79
 
71
- # Rank all candidates, then update the best one if a new best found.
80
+ # Update the best if a new best was found.
72
81
  def update_best(candidates)
73
- if @best
74
- ranked = objective.rank_candidates(candidates + [@best])
75
- else
76
- ranked = objective.rank_candidates(candidates)
77
- end
78
- new_best, new_quality_value, new_sub_qvalues = ranked.first
79
- # Since some objectives are not deterministic the best
80
- if new_best != @best
81
- if @best
82
- old_best, new_qv_old_best, sub_qv_old_best = ranked.select {|a| a.first == @best}.first
83
- end
84
- @logger.note_new_best(new_best, new_quality_value, new_sub_qvalues,
85
- @best, new_qv_old_best, sub_qv_old_best)
86
- @best = new_best
87
- @best_quality_value = new_quality_value
88
- @best_sub_quality_values = new_sub_qvalues
82
+ best_new, rest = objective.rank_candidates(candidates)
83
+ if @best.nil? || @objective.is_better_than?(best_new, @best)
84
+ qb = @best.nil? ? nil : @objective.quality_of(@best)
85
+ logger.log_data :new_best, {
86
+ :new_best => best_new,
87
+ :new_quality_value => @objective.quality_of(best_new),
88
+ :old_best => @best,
89
+ :old_quality_value => qb
90
+ }, "New best candidate found"
91
+ @best = best_new
89
92
  true
90
93
  else
91
94
  false
@@ -93,13 +96,69 @@ class FeldtRuby::Optimize::Optimizer
93
96
  end
94
97
  end
95
98
 
96
- class FeldtRuby::Optimize::PopulationBasedOptimizer < FeldtRuby::Optimize::Optimizer
99
+ # Sample the indices of a population. This default super class just randomly
100
+ # samples without replacement.
101
+ class PopulationSampler
102
+ def initialize(optimizer, options = FeldtRuby::Optimize::DefaultOptimizationOptions)
103
+ @optimizer = optimizer
104
+ @population_size = @optimizer.population_size
105
+ initialize_all_indices()
106
+ end
107
+
108
+ def initialize_all_indices
109
+ # We set up an array of the indices to all candidates of the population so we can later sample from it
110
+ # This should always contain all indices even if they might be out of order. This is because we
111
+ # only swap! elements in this array, never delete any.
112
+ @all_indices = (0...@population_size).to_a
113
+ end
114
+
115
+ def sample_population_indices_without_replacement(numSamples)
116
+ sample_indices_without_replacement numSamples, @all_indices
117
+ end
118
+
119
+ def sample_indices_without_replacement(numSamples, indices)
120
+ sampled_indices = []
121
+ size = indices.length
122
+ numSamples.times do |i|
123
+ index = i + rand_int(size - i)
124
+ sampled_index, skip = indices.swap!(i, index)
125
+ sampled_indices << sampled_index
126
+ end
127
+ sampled_indices
128
+ end
129
+ end
130
+
131
+ # This implements a "trivial geography" similar to Spector and Kline (2006)
132
+ # by first sampling an individual randomly and then selecting additional
133
+ # individuals for the same tournament within a certain deme of limited size
134
+ # for the sub-sequent individuals in the population. The version we implement
135
+ # here is from:
136
+ # I. Harvey, "The Microbial Genetic Algorithm", in Advances in Artificial Life
137
+ # Darwin Meets von Neumann, Springer, 2011.
138
+ class RadiusLimitedPopulationSampler < PopulationSampler
139
+ def initialize(optimizer, options = FeldtRuby::Optimize::DefaultOptimizationOptions)
140
+ super
141
+ @radius = options[:samplerRadius]
142
+ end
143
+
144
+ def sample_population_indices_without_replacement(numSamples)
145
+ i = rand(@population_size)
146
+ indices = (i..(i+@radius)).to_a
147
+ if (i+@radius) >= @population_size
148
+ indices.map! {|i| i % @population_size}
149
+ end
150
+ sample_indices_without_replacement numSamples, indices
151
+ end
152
+ end
153
+
154
+ class PopulationBasedOptimizer < Optimizer
97
155
  attr_reader :population
98
156
 
99
157
  def initialize_options(options)
100
158
  super
101
- initialize_population(@options[:populationSize])
102
- initialize_all_indices()
159
+ @population_size = @options[:populationSize]
160
+ initialize_population(@population_size)
161
+ @sampler = options[:samplerClass].new(self, options)
103
162
  end
104
163
 
105
164
  # Create a population of a given size by randomly sampling candidates from the search space.
@@ -107,26 +166,29 @@ class FeldtRuby::Optimize::PopulationBasedOptimizer < FeldtRuby::Optimize::Optim
107
166
  @population = Array.new(sizeOfPopulation).map {search_space.gen_candidate()}
108
167
  end
109
168
 
110
- def population_size
111
- @population.length
169
+ # Re-initialize parts of the population.
170
+ def re_initialize_population(percentageOfPopulation = 0.50)
171
+ if percentageOfPopulation >= 1.00
172
+ initialize_population(@population_size)
173
+ else
174
+ num_to_replace = (percentageOfPopulation * @population_size).to_i
175
+ # We must use a PopulationSampler here instead of just calling sample_population_indices_without_replacement
176
+ # since we do not know which sampler is installed.
177
+ sampler = PopulationSampler.new(self, self.options)
178
+ indices = sampler.sample_population_indices_without_replacement(num_to_replace)
179
+ indices.each do |i|
180
+ @population[i] = search_space.gen_candidate()
181
+ end
182
+ end
112
183
  end
113
184
 
114
- def initialize_all_indices
115
- # We set up an array of the indices to all candidates of the population so we can later sample from it
116
- # This should always contain all indices even if they might be out of order. This is because we
117
- # only swap! elements in this array, never delete any.
118
- @all_indices = (0...population_size).to_a
185
+ def population_size
186
+ @population_size
119
187
  end
120
188
 
121
189
  # Sample indices from the population without replacement.
122
190
  def sample_population_indices_without_replacement(numSamples)
123
- sampled_indices = []
124
- numSamples.times do |i|
125
- index = i + rand_int(population_size - i)
126
- sampled_index, skip = @all_indices.swap!(i, index)
127
- sampled_indices << sampled_index
128
- end
129
- sampled_indices
191
+ @sampler.sample_population_indices_without_replacement(numSamples)
130
192
  end
131
193
 
132
194
  # Get candidates from population at given indices.
@@ -143,4 +205,20 @@ class FeldtRuby::Optimize::PopulationBasedOptimizer < FeldtRuby::Optimize::Optim
143
205
  def update_candidate_in_population(index, candidate)
144
206
  @population[index] = candidate
145
207
  end
208
+ end
209
+
210
+ DefaultOptimizationOptions = {
211
+ :terminationCriterionClass => FeldtRuby::Optimize::MaxStepsTerminationCriterion,
212
+ :verbose => true,
213
+ :populationSize => 200,
214
+ :samplerClass => FeldtRuby::Optimize::RadiusLimitedPopulationSampler,
215
+ :samplerRadius => 10 # Max distance between individuals selected in same tournament
216
+ }
217
+
218
+ def self.override_default_options_with(options)
219
+ o = DefaultOptimizationOptions.clone.update(options)
220
+ o[:terminationCriterion] = o[:terminationCriterionClass].new(o[:maxNumSteps])
221
+ o
222
+ end
223
+
146
224
  end
@@ -57,6 +57,16 @@ class SearchSpace
57
57
  self.new(min_values, max_values)
58
58
  end
59
59
 
60
+ def self.new_from_min_max_per_variable(minMaxPairs)
61
+ min_values = []
62
+ max_values = []
63
+ minMaxPairs.each do |min, max|
64
+ min_values << min
65
+ max_values << max
66
+ end
67
+ self.new(min_values, max_values)
68
+ end
69
+
60
70
  def num_variables
61
71
  @min_values.length
62
72
  end
@@ -1,28 +1,3 @@
1
1
  require 'feldtruby'
2
2
 
3
- module FeldtRuby::Optimize; end
4
-
5
- require 'feldtruby/optimize/differential_evolution'
6
- module FeldtRuby::Optimize
7
- # Optimize the _numVariables_ between the _min_ and _max_ values given _costFunction_.
8
- # Default is to minimize.
9
- def self.optimize(min, max, options = {:verbose => true},
10
- objectiveFuncClass = FeldtRuby::Optimize::ObjectiveMinimizeBlock, &costFunction)
11
- objective = objectiveFuncClass.new(&costFunction)
12
- num_vars = costFunction.arity
13
- search_space = SearchSpace.new_from_min_max(num_vars, min, max)
14
- optimizer = DEOptimizer.new(objective, search_space, options)
15
- optimizer.optimize()
16
- optimizer.best.to_a
17
- end
18
-
19
- # Short hand wrapper for function minimization.
20
- def self.minimize(min, max, options = {}, &costFunction)
21
- optimize(min, max, options, &costFunction)
22
- end
23
-
24
- # Short hand wrapper for function maximization.
25
- def self.maximize(min, max, options = {}, &costFunction)
26
- optimize(min, max, options, FeldtRuby::Optimize::ObjectiveMaximizeBlock, &costFunction)
27
- end
28
- end
3
+ module FeldtRuby::Optimize; end
@@ -162,6 +162,7 @@ module Statistics
162
162
  end
163
163
 
164
164
  def chi_squared_test(aryOrHashOfCounts)
165
+ puts "aryOrHashOfCounts = #{aryOrHashOfCounts}"
165
166
  counts = (Hash === aryOrHashOfCounts) ? aryOrHashOfCounts : aryOrHashOfCounts.counts
166
167
  vs = counts.values
167
168
  res = RC.call("chisq.test", vs)
@@ -256,7 +257,6 @@ module FeldtRuby::Statistics::Plotting
256
257
 
257
258
  def filled_contour(csvFilePath, xlabel, ylabel, title = "filled.contour")
258
259
  include_library "MASS"
259
- #include_library "ggplot2"
260
260
 
261
261
  script = <<-EOS
262
262
  data <- read.csv(#{csvFilePath.inspect})
@@ -284,6 +284,10 @@ module FeldtRuby::Statistics::Plotting
284
284
  end
285
285
 
286
286
  def hexbin_heatmap(csvFilePath, xlabel, ylabel, title = "heatmap", bins = 50)
287
+ include_library "grid"
288
+ include_library "lattice"
289
+ include_library "hexbin"
290
+
287
291
  plot_2dims(csvFilePath,
288
292
  "f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_hex( bins = #{bins} )",
289
293
  xlabel, ylabel, title)
@@ -292,9 +296,54 @@ module FeldtRuby::Statistics::Plotting
292
296
  def scatter_plot(csvFilePath, xlabel, ylabel, title = "scatterplot")
293
297
 
294
298
  script = <<-EOS
295
- # smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'}
299
+ smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'}
296
300
  f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_point(shape = 1)
297
- f <- f + stat_smooth()
301
+ f <- f + stat_smooth(method = smoothing_method)
302
+ EOS
303
+
304
+ plot_2dims(csvFilePath, script, xlabel.to_s, ylabel.to_s, title)
305
+
306
+ end
307
+
308
+ # Data can be specified in two ways, either directly in Ruby arrays,
309
+ # or as strings with the path to a csv file to be loaded. In the latter
310
+ # case a column name must be given.
311
+ def load_csv_files_as_data(dataMap, columnName = nil)
312
+
313
+ keys = dataMap.keys.sort
314
+
315
+ read_csvs = ""
316
+ data_frame = "data.frame(1:length(d_#{keys.first})"
317
+
318
+ keys.each_with_index do |key, i|
319
+
320
+ value = dataMap[key]
321
+
322
+ set_name = "d_#{key}"
323
+
324
+ read_csvs += "#{set_name} <- "
325
+
326
+ if Array === value
327
+ read_csvs += (ruby_object_to_R_string(value) + ";\n")
328
+ data_frame += ", #{key} = #{set_name}"
329
+ else
330
+ read_csvs += "read.csv(#{value.inspect});\n"
331
+ data_frame += ", #{key} = #{set_name}$#{columnName}"
332
+ end
333
+
334
+ end
335
+
336
+ data_frame += ")"
337
+
338
+ script = "#{read_csvs}data <- #{data_frame};"
339
+
340
+ end
341
+
342
+ def density_tile2d(csvFilePath, xlabel, ylabel, title = "densitytile2d")
343
+
344
+ script = <<-EOS
345
+ f <- ggplot(data, aes(x=#{xlabel}, y=#{ylabel}))
346
+ f <- f + stat_density2d(geom="tile", aes(fill=..density..), contour=FALSE) + scale_fill_gradient(high="red", low="white")
298
347
  EOS
299
348
 
300
349
  plot_2dims(csvFilePath, script, xlabel.to_s, ylabel.to_s, title)
@@ -374,6 +423,28 @@ module FeldtRuby::Statistics::Plotting
374
423
 
375
424
  end
376
425
 
426
+ # Plot the densities of the data found in the column named _columnName_
427
+ # in the csv files in _csvFiles_.
428
+ def overlaid_densities_from_csv_files(columnName, csvFiles, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density")
429
+
430
+ load_csvs = load_csv_files_as_data csvFiles
431
+
432
+ script = <<-EOS
433
+ #{load_csvs}
434
+ #df <- data.frame(index = (1:#{cardinalities.first}), #{hash_to_R_params(dataMap)})
435
+ df.m <- melt(df, id = "index")
436
+ names(df.m)[2] <- _datasetsName_
437
+ f <- ggplot(df.m, aes(value, fill=#{datasetsName}))
438
+ f <- f + geom_density(alpha = 0.2, size = 0.5) + scale_color_brewer()
439
+ #{ggplot2_setup_and_theme()}
440
+ f
441
+ EOS
442
+
443
+ puts script
444
+ subst_eval script, {:title => title, :datasetsName => datasetsName,
445
+ :xlabel => xlabel, :ylabel => ylabel}
446
+
447
+ end
377
448
  end
378
449
 
379
450
  class FeldtRuby::RCommunicator
@@ -1,3 +1,22 @@
1
+ class Time
2
+
3
+ # Number of milliseconds since Unix epoch.
4
+ def milli_seconds
5
+ (to_i * 1000) + (nsec / 1_000_000)
6
+ end
7
+
8
+ # Number of microseconds since Unix epoch.
9
+ def micro_seconds
10
+ (to_i * 1_000_000) + (nsec / 1_000)
11
+ end
12
+
13
+ # Number of nanoseconds since Unix epoch.
14
+ def nano_seconds
15
+ (to_i * 1_000_000_000) + nsec
16
+ end
17
+
18
+ end
19
+
1
20
  def Time.timestamp(options = {:short => false})
2
21
  if options[:short]
3
22
  Time.now.strftime("%y%m%d %H:%M.%S")
@@ -1,3 +1,3 @@
1
1
  module FeldtRuby
2
- VERSION = "0.3.16"
2
+ VERSION = "0.3.18"
3
3
  end