biopsy 0.1.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,437 @@
1
+ require 'rubystats'
2
+ require 'statsample'
3
+ require 'set'
4
+ require 'pp'
5
+ require 'matrix'
6
+
7
+ # TODO:
8
+ # - make distributions draw elements from the range, not just from distribution (DONE)
9
+ # - test on real SOAPdt data (in progress)
10
+ # - make code to run 100 times for a particular dataset, capture the trajectory, and plot the progress over time along with a histogram of the data distribution
11
+ # - plot SD and step-size over time
12
+ # - capture data about convergence (done for toy data, need to repeat for other data)
13
+
14
+ module Biopsy
15
+
16
+ # a Distribution represents the probability distribution from
17
+ # which the next value of a parameter is drawn. The set of all
18
+ # distributions acts as a probabilistic neighbourhood structure.
19
+ class Distribution
20
+
21
+ attr_reader :sd
22
+
23
+ # create a new Distribution
24
+ def initialize(mean, range, sd_increment_proportion, sd)
25
+ @mean = mean
26
+ @maxsd = range.size * 0.66
27
+ @minsd = 0.5
28
+ @sd = sd
29
+ self.limit_sd
30
+ @range = range
31
+ @sd_increment_proportion = sd_increment_proportion
32
+ self.generate_distribution
33
+ rescue
34
+ raise "generation of distribution with mean: #{@mean}, sd: #{@sd} failed."
35
+ end
36
+
37
+ # generate the distribution
38
+ def generate_distribution
39
+ @dist = Rubystats::NormalDistribution.new(@mean, @sd)
40
+ end
41
+
42
+ def limit_sd
43
+ @sd = @sd > @maxsd ? @maxsd : @sd
44
+ @sd = @sd < @minsd ? @minsd : @sd
45
+ end
46
+
47
+ # loosen the distribution by increasing the sd
48
+ # and renerating
49
+ def loosen(factor=1)
50
+ @sd += @sd_increment_proportion * factor * @range.size
51
+ self.limit_sd
52
+ self.generate_distribution
53
+ end
54
+
55
+ # tighten the distribution by reducing the sd
56
+ # and regenerating
57
+ def tighten(factor=1)
58
+ @sd -= @sd_increment_proportion * factor * @range.size unless (@sd <= 0.01)
59
+ self.limit_sd
60
+ self.generate_distribution
61
+ end
62
+
63
+ # set standard deviation to the minimum possible value
64
+ def set_sd_min
65
+ @sd = @minsd
66
+ end
67
+
68
+ # draw from the distribution
69
+ def draw
70
+ r = @dist.rng.to_i
71
+ raise "drawn number must be an integer" unless r.is_a? Integer
72
+ # keep the value inside the allowed range
73
+ r = 0 - r if r < 0
74
+ if r >= @range.size
75
+ diff = 1 + r - @range.size
76
+ r = @range.size - diff
77
+ end
78
+ @range[r]
79
+ end
80
+
81
+ end # Distribution
82
+
83
+ # a Hood represents the neighbourhood of a specific location
84
+ # in the parameter space being explored. It is generated using
85
+ # the set of Distributions, which together define the neighbourhood
86
+ # structure.
87
+ class Hood
88
+
89
+ attr_reader :best
90
+
91
+ def initialize(distributions, max_size, tabu)
92
+ # tabu
93
+ @tabu = tabu
94
+ # neighbourhood
95
+ @max_size = max_size
96
+ @members = []
97
+ @best = {
98
+ :parameters => nil,
99
+ :score => 0.0
100
+ }
101
+ # probabilities
102
+ @distributions = distributions
103
+ self.populate
104
+ end
105
+
106
+ # generate a single neighbour
107
+ def generate_neighbour
108
+ n = 0
109
+ begin
110
+ if n >= 100
111
+ # taking too long to generate a neighbour,
112
+ # loosen the neighbourhood structure so we explore further
113
+ # debug("loosening distributions")
114
+ @distributions.each do |param, dist|
115
+ dist.loosen
116
+ end
117
+ end
118
+ # preform the probabilistic step move for each parameter
119
+ neighbour = Hash[@distributions.map { |param, dist| [param, dist.draw] }]
120
+ n += 1
121
+ end while self.is_tabu?(neighbour)
122
+ @tabu << neighbour
123
+ @members << neighbour
124
+ end
125
+
126
+ # update best?
127
+ def update_best? current
128
+ @best = current.clone if current[:score] > @best[:score]
129
+ end
130
+
131
+ # true if location is tabu
132
+ def is_tabu? location
133
+ @tabu.member? location
134
+ end
135
+
136
+ # generate the population of neighbours
137
+ def populate
138
+ @max_size.times do |i|
139
+ self.generate_neighbour
140
+ end
141
+ end
142
+
143
+ # return the next neighbour from this Hood
144
+ def next
145
+ @members.pop
146
+ end
147
+
148
+ # returns true if the current neighbour is
149
+ # the last one in the Hood
150
+ def last?
151
+ @members.empty?
152
+ end
153
+
154
+ end # Hood
155
+
156
+ # A Tabu Search implementation with a domain-specific probabilistic
157
+ # learning heuristic for optimising over an unconstrained parameter
158
+ # space with costly objective evaluation.
159
+ class TabuSearch #< OptmisationAlgorithm
160
+
161
+ attr_reader :current, :best, :hood_no
162
+ attr_accessor :max_hood_size, :sd_increment_proportion, :starting_sd_divisor, :backtrack_cutoff
163
+ attr_accessor :jump_cutoff
164
+
165
+ Thread = Struct.new(:best, :tabu, :distributions,
166
+ :standard_deviations, :recent_scores,
167
+ :iterations_since_best, :backtracks,
168
+ :current, :current_hood, :loaded)
169
+
170
+ def initialize(parameter_ranges, threads=8, limit=nil)
171
+
172
+ @ranges = parameter_ranges
173
+
174
+ # solution tracking
175
+ @best = nil
176
+
177
+ # tabu list
178
+ @tabu = Set.new
179
+ @tabu_limit = nil
180
+ @start_time = Time.now
181
+
182
+ # neighbourhoods
183
+ @max_hood_size = 5
184
+ @starting_sd_divisor = 5
185
+ @standard_deviations = {}
186
+ @sd_increment_proportion = 0.05
187
+ @hood_no = 1
188
+
189
+ # adjustment tracking
190
+ @recent_scores = []
191
+ @jump_cutoff = 10
192
+
193
+ # logging
194
+ @log_data = false
195
+ @logfiles = {}
196
+ self.log_setup
197
+
198
+ # backtracking
199
+ @iterations_since_best = 0
200
+ @backtrack_cutoff = 2
201
+ @backtracks = 1.0
202
+
203
+ # convergence
204
+ @num_threads = 2
205
+ @threads = []
206
+
207
+ end # initialize
208
+
209
+ def setup start_point
210
+ @current = {:parameters => start_point, :score => nil}
211
+ @best = @current
212
+ self.setup_threads
213
+ end
214
+
215
+ # given the score for a parameter set,
216
+ # return the next parameter set to be scored
217
+ def run_one_iteration(parameters, score)
218
+ @current = {:parameters => parameters, :score => score}
219
+ # update best score?
220
+ self.update_best?
221
+ # log any data
222
+ self.log
223
+ # cycle threads
224
+ self.load_next_thread
225
+ # get next parameter set to score
226
+ self.next_candidate
227
+ @current[:parameters]
228
+ end # run_one_iteration
229
+
230
+ def setup_threads
231
+ @num_threads.times do
232
+ @threads << Thread.new
233
+ end
234
+ @threads.each do |thread|
235
+ @current = {
236
+ :parameters => self.random_start_point,
237
+ :score => nil
238
+ }
239
+ @best = @current
240
+ @standard_deviations = {}
241
+ @recent_scores = []
242
+ @tabu = Set.new
243
+ self.define_neighbourhood_structure
244
+ @current_hood = Biopsy::Hood.new(@distributions, @max_hood_size, @tabu)
245
+ thread.members.each do |sym|
246
+ ivar = self.sym_to_ivar_sym sym
247
+ thread[sym] = self.instance_variable_get(ivar)
248
+ end
249
+ thread.loaded = false
250
+ end
251
+ @current_thread = @num_threads - 2
252
+ end
253
+
254
+ def load_next_thread
255
+ thread = @threads[@current_thread]
256
+ if thread.loaded
257
+ thread.members.each do |sym|
258
+ ivar = self.sym_to_ivar_sym sym
259
+ thread[sym] = self.instance_variable_get(ivar)
260
+ end
261
+ else
262
+ thread.loaded = true
263
+ end
264
+ @current_thread = (@current_thread + 1) % @num_threads
265
+ thread = @threads[@current_thread]
266
+ thread.members.each do |sym|
267
+ ivar = self.sym_to_ivar_sym sym
268
+ self.instance_variable_set(ivar, thread[sym])
269
+ end
270
+ end
271
+
272
+ def update_best?
273
+ @current_hood.update_best? @current
274
+ if @best[:score].nil? || @current[:score] > @best[:score]
275
+ @best = @current.clone
276
+ else
277
+ @iterations_since_best += 1
278
+ end
279
+ end
280
+
281
+ # use probability distributions to define the
282
+ # initial neighbourhood structure
283
+ def define_neighbourhood_structure
284
+ # probabilities
285
+ @distributions = {}
286
+ @current[:parameters].each_pair do |param, value|
287
+ self.update_distribution(param, value)
288
+ end
289
+ end
290
+
291
+ # update the neighbourhood structure by adjusting the probability
292
+ # distributions according to total performance of each parameter
293
+ def update_neighbourhood_structure
294
+ self.update_recent_scores
295
+ best = self.backtrack_or_continue
296
+ unless @distributions.empty?
297
+ @standard_deviations = Hash[@distributions.map { |k, d| [k, d.sd] }]
298
+ end
299
+ best[:parameters].each_pair do |param, value|
300
+ self.update_distribution(param, value)
301
+ end
302
+ end
303
+
304
+ # set the distribution for parameter +:param+ to a new one centered
305
+ # around the index of +value+
306
+ def update_distribution(param, value)
307
+ mean = @ranges[param].index(value)
308
+ range = @ranges[param]
309
+ sd = self.sd_for_param(param, range)
310
+ @distributions[param] = Biopsy::Distribution.new(mean,
311
+ range,
312
+ @sd_increment_proportion,
313
+ sd)
314
+ end
315
+
316
+ # return the standard deviation to use for +:param+
317
+ def sd_for_param(param, range)
318
+ @standard_deviations.empty? ? (range.size.to_f / @starting_sd_divisor) : @standard_deviations[param]
319
+ end
320
+
321
+ # return the correct 'best' location to form a new neighbourhood around
322
+ # deciding whether to continue progressing from the current location
323
+ # or to backtrack to a previous good location to explore further
324
+ def backtrack_or_continue
325
+ best = nil
326
+ if (@iterations_since_best / @backtracks) >= @backtrack_cutoff * @max_hood_size
327
+ self.backtrack
328
+ best = @best
329
+ else
330
+ best = @current_hood.best
331
+ self.adjust_distributions_using_gradient
332
+ end
333
+ if best[:parameters].nil?
334
+ # this should never happen!
335
+ best = @best
336
+ end
337
+ best
338
+ end
339
+
340
+ def backtrack
341
+ @backtracks += 1.0
342
+ # debug('backtracked to best')
343
+ @distributions.each_pair { |k, d| d.tighten }
344
+ end
345
+
346
+ # update the array of recent scores
347
+ def update_recent_scores
348
+ @recent_scores.unshift @best[:score]
349
+ @recent_scores = @recent_scores.take @jump_cutoff
350
+ end
351
+
352
+ # use the gradient of recent best scores to update the distributions
353
+ def adjust_distributions_using_gradient
354
+ return if @recent_scores.length < 3
355
+ vx = (1..@recent_scores.length).to_a.to_scale
356
+ vy = @recent_scores.reverse.to_scale
357
+ r = Statsample::Regression::Simple.new_from_vectors(vx,vy)
358
+ slope = r.b
359
+ if slope > 0
360
+ @distributions.each_pair { |k, d| d.tighten slope }
361
+ elsif slope < 0
362
+ @distributions.each_pair { |k, d| d.loosen slope }
363
+ end
364
+ end
365
+
366
+ # shift to the next neighbourhood
367
+ def next_hood
368
+ @hood_no += 1
369
+ # debug("entering hood # #{@hood_no}")
370
+ self.update_neighbourhood_structure
371
+ @current_hood = Hood.new(@distributions, @max_hood_size, @tabu)
372
+ end
373
+
374
+ # get the next neighbour to explore from the current hood
375
+ def next_candidate
376
+ @current[:parameters] = @current_hood.next
377
+ @current[:score] = nil
378
+ # exhausted the neighbourhood?
379
+ if @current_hood.last?
380
+ # debug(@current_hood.best)
381
+ self.next_hood
382
+ end
383
+ end
384
+
385
+ # check termination conditions
386
+ # and return true if met
387
+ def finished?
388
+ return false if @threads.first.recent_scores.size < @jump_cutoff
389
+ scores = @threads.map { |t| t.recent_scores }
390
+ scores.map { |s| s.mean }.uniq.length == 1
391
+ end
392
+
393
+ # True if this algorithm chooses its own starting point
394
+ def knows_starting_point?
395
+ true
396
+ end
397
+
398
+ def log_setup
399
+ if @log_data
400
+ require 'csv'
401
+ @logfiles[:standard_deviations] = CSV.open('standard_deviations.csv', 'w')
402
+ @logfiles[:best] = CSV.open('best.csv', 'w')
403
+ @logfiles[:score] = CSV.open('score.csv', 'w')
404
+ @logfiles[:params] = CSV.open('params.csv', 'w')
405
+ end
406
+ end
407
+
408
+ def log
409
+ if @log_data
410
+ @logfiles[:standard_deviations] << @distributions.map { |k, d| d.sd }
411
+ @logfiles[:best] << [@best[:score]]
412
+ @logfiles[:score] << [@current[:score]]
413
+ @logfiles[:params] << @current[:parameters].map { |k, v| v }
414
+ end
415
+ end
416
+
417
+ def log_teardown
418
+ @logfiles.each_pair do |k, f|
419
+ f.close
420
+ end
421
+ end
422
+
423
+ def sym_to_ivar_sym sym
424
+ "@#{sym.to_s}".to_sym
425
+ end
426
+
427
+ def select_starting_point
428
+ self.random_start_point
429
+ end
430
+
431
+ def random_start_point
432
+ Hash[@ranges.map { |p, r| [p, r.sample] }]
433
+ end
434
+
435
+ end # TabuSearch
436
+
437
+ end # Biopsy
@@ -0,0 +1,110 @@
1
+ # Optimisation Framework: Settings
2
+ #
3
+ # == Description
4
+ #
5
+ # The Settings singleton object maintains general settings (as opposed to
6
+ # those specific to the experiment, which are contained in the Experiment
7
+ # object).
8
+ #
9
+ # Key settings include the location(s) of config file(s), the Domain that
10
+ # is currently active, and the directories to search for objective functions.
11
+ #
12
+ # Methods are provided for loading, listing, accessing and saving the settings
13
+ #
14
+ module Biopsy
15
+
16
+ require 'singleton'
17
+ require 'yaml'
18
+ require 'pp'
19
+
20
+ class SettingsError < StandardError
21
+ end
22
+
23
+ class Settings
24
+ include Singleton
25
+
26
+ attr_accessor :base_dir
27
+ attr_accessor :target_dir
28
+ attr_accessor :domain_dir
29
+ attr_accessor :domain
30
+ attr_accessor :objectives_dir
31
+ attr_accessor :objectives_subset
32
+ attr_accessor :sweep_cutoff
33
+
34
+ def initialize
35
+ self.set_defaults
36
+ end
37
+
38
+ def set_defaults
39
+ # defaults
40
+ @config_file = '~/.biopsyrc'
41
+ @base_dir = ['.']
42
+ @target_dir = ['targets']
43
+ @domain_dir = ['domains']
44
+ @domain = 'test_domain'
45
+ @objectives_dir = ['objectives']
46
+ @objectives_subset = nil
47
+ @sweep_cutoff = 100
48
+ end
49
+
50
+ # Loads settings from a YAML config file. If no file is
51
+ # specified, the default location ('~/.biopsyrc') is used.
52
+ # Settings loaded from the file are merged into any
53
+ # previously loaded settings.
54
+ def load(config_file=@config_file)
55
+ newsets = YAML::load_file(config_file)
56
+ raise 'Config file was not valid YAML' if newsets == false
57
+ newsets.deep_symbolize.each_pair do |key, value|
58
+ varname = "@#{key.to_s}".to_sym
59
+ unless self.instance_variables.include? varname
60
+ raise SettingsError.new "Key #{key.to_s} in settings file is not valid"
61
+ end
62
+ self.instance_variable_set(varname, value)
63
+ end
64
+ end
65
+
66
+ # Saves the settings to a YAML config file. If no file is
67
+ # specified, the default location ('~/.biopsyrc') is used.
68
+ def save(config_file=@config_file)
69
+ File.open(config_file, 'w') do |f|
70
+ f.puts self.to_s
71
+ end
72
+ end
73
+
74
+ # Returns a hash of the settings
75
+ def all_settings
76
+ settings = {}
77
+ instance_variables.each do |var|
78
+ key = var[1..-1]
79
+ settings[key] = self.instance_variable_get(var)
80
+ end
81
+ settings
82
+ end
83
+
84
+ # Returns a YAML string representation of the settings
85
+ def to_s
86
+ all_settings.to_yaml
87
+ end
88
+
89
+ # Locate the first YAML config file whose name
90
+ # excluding extension matches +:name+ (case insensitive)
91
+ # in dirs listed by the +:dir_key+ setting.
92
+ def locate_config(dir_key, name)
93
+ dir_key = "@#{dir_key.to_s}".to_sym
94
+ unless self.instance_variables.include? dir_key
95
+ raise SettingsError.new "no setting found for compulsory key #{dir_key}"
96
+ end
97
+ self.instance_variable_get(dir_key).each do |dir|
98
+ Dir.chdir ::File.expand_path(dir) do
99
+ Dir[name + '.yml'].each do |file|
100
+ return ::File.expand_path(file) if ::File.basename(file, '.yml').downcase == name.downcase
101
+ end
102
+ end
103
+ end
104
+
105
+ nil
106
+ end
107
+
108
+ end # Settings
109
+
110
+ end # Biopsy
@@ -0,0 +1,113 @@
1
+ module Biopsy
2
+
3
+ class TargetLoadError < Exception
4
+ end
5
+
6
+ class Target
7
+ require 'yaml'
8
+ require 'ostruct'
9
+
10
+ # array of input files expected by the target constructor
11
+ attr_accessor :input_files
12
+ # array of output files to keep for submission to objective
13
+ # functions during optimisation
14
+ attr_accessor :output_files
15
+ # hash mapping parameters to the ranges of values they can take
16
+ attr_reader :parameter_ranges
17
+ # path to the constructor code
18
+ attr_reader :constructor_path
19
+ attr_reader :domain
20
+
21
+ # create a new Target instance.
22
+ # arguments:
23
+ # +:domain+ the domain to which this target belongs (see Domain documentation)
24
+ def initialize domain
25
+ @domain = domain
26
+ end
27
+
28
+ # load target with +name+.
29
+ def load_by_name name
30
+ path = self.locate_definition name
31
+ raise TargetLoadError.new("Target definition file does not exist for #{name}") if path.nil?
32
+ config = YAML::load_file(path)
33
+ raise TargetLoadError.new("Target definition file #{path} is not valid YAML") if config.nil?
34
+ missing = self.check_config config.deep_symbolize
35
+ if missing
36
+ msg = "Target definition file #{path} is missing required fields: #{missing}"
37
+ raise TargetLoadError.new(msg)
38
+ end
39
+ errors = self.validate_config config
40
+ unless errors.empty?
41
+ raise TargetLoadError.new("Target definition file #{path} contains the following errors:\n - #{errors.join("\n - ")}")
42
+ end
43
+ self.store_config config
44
+ self.check_constructor
45
+ self.load_constructor
46
+ end
47
+
48
+ # given the name of a target, return the path
49
+ # to the definition YAML file. All +:target_dir+s defined in Settings are
50
+ # searched and the first matching YAML file is loaded.
51
+ def locate_definition name
52
+ Settings.instance.locate_config(:target_dir, name)
53
+ end
54
+
55
+ # verify that +:config+ contains values for all essential target settings
56
+ # returning false if no keys are missing, or an array of the missing keys
57
+ # if any cannot be found
58
+ def check_config config
59
+ required = %w(input_files output_files parameter_ranges constructor_path)
60
+ missing = false
61
+ required.each do |key|
62
+ unless config.has_key? key.to_sym
63
+ missing ||= []
64
+ missing << key
65
+ end
66
+ end
67
+ missing
68
+ end
69
+
70
+ # validate the config against the domain definition. Return an array
71
+ # whose length will be the number of errors found. Thus an array of
72
+ # length 0 indicates that the config is valid according to the domain
73
+ # specification.
74
+ def validate_config config
75
+ @domain.target_valid? config
76
+ end
77
+
78
+ # Store the values in +:config+
79
+ def store_config config
80
+ config.each_pair do |key, value|
81
+ self.instance_variable_set('@' + key.to_s, value)
82
+ end
83
+ end
84
+
85
+ # Validate the constructor. True if valid, false otherwise.
86
+ def check_constructor
87
+ raise "constructor path is not defined for this target" if @constructor_path.nil?
88
+ self.valid_ruby? @constructor_path
89
+ end
90
+
91
+ # Load constructor
92
+ def load_constructor
93
+ require File.join(Settings.instance.target_dir, @constructor_path)
94
+ file_name = File.basename(@constructor_path, '.rb')
95
+ constructor_name = file_name.camelize
96
+ @constructor = Module.const_get(constructor_name).new
97
+ end
98
+
99
+ # Run the constructor for the parameter set +:params+
100
+ def run params
101
+ @constructor.run params
102
+ end
103
+
104
+ # true if file is valid ruby
105
+ def valid_ruby? file
106
+ return false unless ::File.exists? file
107
+ result = `ruby -c #{file} &> /dev/null`
108
+ !result.size.zero?
109
+ end
110
+
111
+ end # end of class Domain
112
+
113
+ end # end of module Biopsy
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+
3
+ module Biopsy
4
+ module VERSION
5
+ MAJOR = 0
6
+ MINOR = 1
7
+ PATCH = 0
8
+ BUILD = 'alpha'
9
+
10
+ STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
11
+ end
12
+ end # Biopsy
data/lib/biopsy.rb ADDED
@@ -0,0 +1,13 @@
1
+ require "biopsy/version"
2
+ require "biopsy/base_extensions"
3
+ require "biopsy/settings"
4
+ require "biopsy/domain"
5
+ require "biopsy/experiment"
6
+ require "biopsy/target"
7
+ require "biopsy/objective_handler"
8
+ require "biopsy/objective_function"
9
+ require "biopsy/opt_algorithm"
10
+ require "biopsy/optimisers/genetic_algorithm"
11
+ require "biopsy/optimisers/tabu_search"
12
+ require "biopsy/optimisers/parameter_sweeper"
13
+ require "biopsy/objectives/fastest_optimum"