biopsy 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/biopsy/experiment.rb +34 -18
- data/lib/biopsy/optimisers/parameter_sweeper.rb +5 -4
- data/lib/biopsy/optimisers/tabu_search.rb +28 -26
- data/lib/biopsy/settings.rb +2 -0
- data/lib/biopsy/version.rb +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 227832bb75ba1c0114e59d8058a55c2ead937105
|
4
|
+
data.tar.gz: a3f5e6b364d5265878ed4a8f4e7f086282c2a156
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 06e5510f5546f565c080ff405f3406e27d8a69976ac3fe80a89814b85a9d24cbe98aa9fca01c0a38dd26d46af6a7889ebc36e1b250939577594ab4226f0180b9
|
7
|
+
data.tar.gz: 326da0583c62503acdd211ece069434ea63cbaafcce2e6fa5caabb4d3c3ff7e9125ada20dbf6c91e0f62c2f1f4f6bc32098b1300ee78b159a0ffc24807daba7d
|
data/lib/biopsy/experiment.rb
CHANGED
@@ -23,7 +23,7 @@ module Biopsy
|
|
23
23
|
|
24
24
|
# Returns a new Experiment
|
25
25
|
def initialize(target, options:{}, threads:4, start:nil, algorithm:nil,
|
26
|
-
timelimit:nil, verbosity: :quiet)
|
26
|
+
timelimit:nil, verbosity: :quiet, id:nil)
|
27
27
|
@threads = threads
|
28
28
|
@start = start
|
29
29
|
@algorithm = algorithm
|
@@ -40,6 +40,7 @@ module Biopsy
|
|
40
40
|
self.select_starting_point
|
41
41
|
@scores = {}
|
42
42
|
@iteration_count = 0
|
43
|
+
set_id id
|
43
44
|
end
|
44
45
|
|
45
46
|
# return the set of parameters to evaluate first
|
@@ -63,9 +64,9 @@ module Biopsy
|
|
63
64
|
max = Settings.instance.sweep_cutoff
|
64
65
|
n = @target.count_parameter_permutations
|
65
66
|
if n < max
|
66
|
-
@algorithm = ParameterSweeper.new(@target.parameters)
|
67
|
+
@algorithm = ParameterSweeper.new(@target.parameters, @id)
|
67
68
|
else
|
68
|
-
@algorithm = TabuSearch.new(@target.parameters)
|
69
|
+
@algorithm = TabuSearch.new(@target.parameters, @id)
|
69
70
|
end
|
70
71
|
end
|
71
72
|
|
@@ -83,6 +84,7 @@ module Biopsy
|
|
83
84
|
in_progress = true
|
84
85
|
@algorithm.setup @start
|
85
86
|
@current_params = @start
|
87
|
+
max_scores = @target.count_parameter_permutations
|
86
88
|
while in_progress
|
87
89
|
run_iteration
|
88
90
|
# update the best result
|
@@ -100,7 +102,7 @@ module Biopsy
|
|
100
102
|
end
|
101
103
|
end
|
102
104
|
# have we finished?
|
103
|
-
in_progress = !@algorithm.finished?
|
105
|
+
in_progress = !@algorithm.finished? && @scores.size < max_scores
|
104
106
|
if in_progress && !(@timelimit.nil?)
|
105
107
|
in_progress = (Time.now - start_time < @timelimit)
|
106
108
|
end
|
@@ -117,25 +119,27 @@ module Biopsy
|
|
117
119
|
# encompassing the program, objective(s) and optimiser.
|
118
120
|
# Returns the output of the optimiser.
|
119
121
|
def run_iteration
|
120
|
-
|
121
|
-
|
122
|
+
param_key = @current_params.to_s
|
123
|
+
result = nil
|
124
|
+
# lookup the result if possible
|
125
|
+
if @scores.key? param_key
|
126
|
+
result = @scores[param_key]
|
127
|
+
else
|
128
|
+
# create temp dir
|
129
|
+
curdir = Dir.pwd
|
130
|
+
Dir.chdir(self.create_tempdir) unless Settings.instance.no_tempdirs
|
122
131
|
# run the target
|
123
132
|
raw_output = @target.run @current_params.merge(@options)
|
124
133
|
# evaluate with objectives
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
result = @scores[param_key]
|
129
|
-
else
|
130
|
-
result = @objective.run_for_output(raw_output, @threads, nil)
|
131
|
-
@iteration_count += 1
|
132
|
-
self.print_progress(@iteration_count, @current_params, result, @best)
|
133
|
-
end
|
134
|
+
result = @objective.run_for_output(raw_output, @threads, nil)
|
135
|
+
@iteration_count += 1
|
136
|
+
self.print_progress(@iteration_count, @current_params, result, @best)
|
134
137
|
@scores[@current_params.to_s] = result
|
135
|
-
|
136
|
-
|
138
|
+
self.cleanup
|
139
|
+
Dir.chdir(curdir) unless Settings.instance.no_tempdirs
|
137
140
|
end
|
138
|
-
|
141
|
+
# get next steps from optimiser
|
142
|
+
@current_params = @algorithm.run_one_iteration(@current_params, result)
|
139
143
|
end
|
140
144
|
|
141
145
|
def print_progress(iteration, params, score, best)
|
@@ -148,6 +152,7 @@ module Biopsy
|
|
148
152
|
end
|
149
153
|
|
150
154
|
def cleanup
|
155
|
+
return if Settings.instance.no_tempdirs
|
151
156
|
# TODO: make this work
|
152
157
|
# remove all but essential files
|
153
158
|
essential_files = ""
|
@@ -183,6 +188,17 @@ module Biopsy
|
|
183
188
|
token
|
184
189
|
end
|
185
190
|
|
191
|
+
# set experiment ID with either user provided value, or date-time
|
192
|
+
# as fallback
|
193
|
+
def set_id id
|
194
|
+
@id = id
|
195
|
+
if @id.nil?
|
196
|
+
t = Time.now
|
197
|
+
parts = %w[y m d H M S Z].map{ |p| t.strftime "%#{p}" }
|
198
|
+
@id = "experiment_#{parts.join('_')}"
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
186
202
|
end # end of class RunHandler
|
187
203
|
|
188
204
|
end # end of module Biopsy
|
@@ -24,11 +24,12 @@ module Biopsy
|
|
24
24
|
class Combinator
|
25
25
|
|
26
26
|
include Enumerable
|
27
|
-
|
28
|
-
def initialize
|
27
|
+
|
28
|
+
def initialize(parameters, id)
|
29
29
|
@parameters = parameters
|
30
|
+
@id = id
|
30
31
|
end
|
31
|
-
|
32
|
+
|
32
33
|
def generate_combinations(index, opts, &block)
|
33
34
|
if index == @parameters.length
|
34
35
|
block.call opts.clone
|
@@ -80,7 +81,7 @@ module Biopsy
|
|
80
81
|
@current = { :parameters => parameters, :score => score }
|
81
82
|
self.update_best?
|
82
83
|
return @combinator.next
|
83
|
-
rescue
|
84
|
+
rescue
|
84
85
|
@is_finished = true
|
85
86
|
return nil
|
86
87
|
end
|
@@ -90,7 +90,7 @@ module Biopsy
|
|
90
90
|
|
91
91
|
def initialize(distributions, max_size, tabu)
|
92
92
|
# tabu
|
93
|
-
@tabu = tabu
|
93
|
+
@tabu = tabu
|
94
94
|
# neighbourhood
|
95
95
|
@max_size = max_size
|
96
96
|
@members = []
|
@@ -108,7 +108,7 @@ module Biopsy
|
|
108
108
|
n = 0
|
109
109
|
begin
|
110
110
|
if n >= 100
|
111
|
-
# taking too long to generate a neighbour,
|
111
|
+
# taking too long to generate a neighbour,
|
112
112
|
# loosen the neighbourhood structure so we explore further
|
113
113
|
# debug("loosening distributions")
|
114
114
|
@distributions.each do |param, dist|
|
@@ -162,15 +162,16 @@ module Biopsy
|
|
162
162
|
attr_accessor :max_hood_size, :sd_increment_proportion
|
163
163
|
attr_accessor :starting_sd_divisor, :backtrack_cutoff, :jump_cutoff
|
164
164
|
|
165
|
-
Thread = Struct.new(:best, :tabu, :distributions,
|
166
|
-
:standard_deviations, :recent_scores,
|
165
|
+
Thread = Struct.new(:best, :tabu, :distributions,
|
166
|
+
:standard_deviations, :recent_scores,
|
167
167
|
:iterations_since_best, :backtracks,
|
168
168
|
:current, :current_hood, :loaded,
|
169
169
|
:score_history, :best_history)
|
170
170
|
|
171
|
-
def initialize(parameter_ranges, threads=8, limit=nil)
|
171
|
+
def initialize(parameter_ranges, id, threads=8, limit=nil)
|
172
172
|
|
173
173
|
@ranges = parameter_ranges
|
174
|
+
@id = id
|
174
175
|
|
175
176
|
# solution tracking
|
176
177
|
@best = nil
|
@@ -194,7 +195,7 @@ module Biopsy
|
|
194
195
|
# logging
|
195
196
|
@score_history = []
|
196
197
|
@best_history = []
|
197
|
-
@log_data =
|
198
|
+
@log_data = true
|
198
199
|
@logfiles = {}
|
199
200
|
self.log_setup
|
200
201
|
|
@@ -323,7 +324,7 @@ module Biopsy
|
|
323
324
|
mean = @ranges[param].index(value)
|
324
325
|
range = @ranges[param]
|
325
326
|
sd = self.sd_for_param(param, range)
|
326
|
-
@distributions[param] = Biopsy::Distribution.new(mean,
|
327
|
+
@distributions[param] = Biopsy::Distribution.new(mean,
|
327
328
|
range,
|
328
329
|
@sd_increment_proportion,
|
329
330
|
sd)
|
@@ -348,14 +349,13 @@ module Biopsy
|
|
348
349
|
end
|
349
350
|
if best[:parameters].nil?
|
350
351
|
# this should never happen!
|
351
|
-
best = @best
|
352
|
+
best = @best
|
352
353
|
end
|
353
354
|
best
|
354
355
|
end
|
355
356
|
|
356
357
|
def backtrack
|
357
358
|
@backtracks += 1.0
|
358
|
-
# debug('backtracked to best')
|
359
359
|
@distributions.each_pair { |k, d| d.tighten }
|
360
360
|
end
|
361
361
|
|
@@ -368,8 +368,8 @@ module Biopsy
|
|
368
368
|
# use the gradient of recent best scores to update the distributions
|
369
369
|
def adjust_distributions_using_gradient
|
370
370
|
return if @recent_scores.length < 3
|
371
|
-
vx = (1..@recent_scores.length).to_a.
|
372
|
-
vy = @recent_scores.reverse.
|
371
|
+
vx = (1..@recent_scores.length).to_a.to_numeric
|
372
|
+
vy = @recent_scores.reverse.to_numeric
|
373
373
|
r = Statsample::Regression::Simple.new_from_vectors(vx,vy)
|
374
374
|
slope = r.b
|
375
375
|
if slope > 0
|
@@ -398,15 +398,17 @@ module Biopsy
|
|
398
398
|
end
|
399
399
|
end
|
400
400
|
|
401
|
-
# check termination conditions
|
401
|
+
# check termination conditions
|
402
402
|
# and return true if met
|
403
403
|
def finished?
|
404
|
-
return false unless @threads.all?
|
404
|
+
return false unless @threads.all? do |t|
|
405
|
+
t.recent_scores.size == @jump_cutoff
|
406
|
+
end
|
405
407
|
probabilities = self.recent_scores_combination_test
|
406
408
|
n_significant = 0
|
407
|
-
probabilities.each do |mann_u, levene|
|
409
|
+
probabilities.each do |mann_u, levene|
|
408
410
|
if mann_u <= @adjusted_alpha && levene <= @convergence_alpha
|
409
|
-
n_significant += 1
|
411
|
+
n_significant += 1
|
410
412
|
end
|
411
413
|
end
|
412
414
|
finish = n_significant >= probabilities.size * 0.5
|
@@ -415,8 +417,8 @@ module Biopsy
|
|
415
417
|
# returns a matrix of correlation probabilities for recent
|
416
418
|
# scores between all threads
|
417
419
|
def recent_scores_combination_test
|
418
|
-
combinations =
|
419
|
-
@threads.map{ |t| t.recent_scores.
|
420
|
+
combinations =
|
421
|
+
@threads.map{ |t| t.recent_scores.to_numeric }.combination(2).to_a
|
420
422
|
combinations.map do |a, b|
|
421
423
|
[Statsample::Test.u_mannwhitney(a, b).probability_exact,
|
422
424
|
Statsample::Test::Levene.new([a,b]).probability]
|
@@ -431,10 +433,10 @@ module Biopsy
|
|
431
433
|
def log_setup
|
432
434
|
if @log_data
|
433
435
|
require 'csv'
|
434
|
-
@logfiles[:standard_deviations] = CSV.open(
|
435
|
-
@logfiles[:best] = CSV.open(
|
436
|
-
@logfiles[:score] = CSV.open(
|
437
|
-
@logfiles[:params] = CSV.open(
|
436
|
+
@logfiles[:standard_deviations] = CSV.open("#{@id}_standard_deviations.csv", 'w')
|
437
|
+
@logfiles[:best] = CSV.open("#{@id}_best.csv", 'w')
|
438
|
+
@logfiles[:score] = CSV.open("#{@id}_score.csv", 'w')
|
439
|
+
@logfiles[:params] = CSV.open("#{@id}_params.csv", 'w')
|
438
440
|
end
|
439
441
|
end
|
440
442
|
|
@@ -466,13 +468,14 @@ module Biopsy
|
|
466
468
|
end
|
467
469
|
|
468
470
|
def random_start_point
|
469
|
-
Hash[@ranges.map { |p, r| [p, r.sample] }]
|
471
|
+
Hash[@ranges.map { |p, r| [p, r.sample] }]
|
470
472
|
end
|
471
473
|
|
472
474
|
def write_data
|
473
475
|
require 'csv'
|
474
|
-
|
475
|
-
|
476
|
+
pathmod = Settings.instance.no_tempdirs ? '' : '../'
|
477
|
+
path = File.expand_path("#{pathmod}#{@id}_scores.csv")
|
478
|
+
CSV.open(path, "w") do |c|
|
476
479
|
c << %w(iteration thread score best)
|
477
480
|
@threads.each_with_index do |t, t_idx|
|
478
481
|
sh = t.score_history
|
@@ -482,8 +485,7 @@ module Biopsy
|
|
482
485
|
end
|
483
486
|
end
|
484
487
|
end
|
485
|
-
|
486
|
-
puts "wrote TabuSearch run data to #{path}"
|
488
|
+
# puts "wrote TabuSearch run data to #{path}"
|
487
489
|
end
|
488
490
|
|
489
491
|
end # TabuSearch
|
data/lib/biopsy/settings.rb
CHANGED
@@ -30,6 +30,7 @@ module Biopsy
|
|
30
30
|
attr_accessor :sweep_cutoff
|
31
31
|
attr_accessor :keep_intermediates
|
32
32
|
attr_accessor :gzip_intermediates
|
33
|
+
attr_accessor :no_tempdirs
|
33
34
|
|
34
35
|
def initialize
|
35
36
|
self.set_defaults
|
@@ -45,6 +46,7 @@ module Biopsy
|
|
45
46
|
@sweep_cutoff = 100
|
46
47
|
@keep_intermediates = false
|
47
48
|
@gzip_intermediates = false
|
49
|
+
@no_tempdirs = false
|
48
50
|
end
|
49
51
|
|
50
52
|
# Loads settings from a YAML config file. If no file is
|
data/lib/biopsy/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biopsy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Richard Smith-Unna
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2015-
|
13
|
+
date: 2015-11-01 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rake
|
@@ -206,7 +206,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
206
206
|
version: '0'
|
207
207
|
requirements: []
|
208
208
|
rubyforge_project:
|
209
|
-
rubygems_version: 2.
|
209
|
+
rubygems_version: 2.4.6
|
210
210
|
signing_key:
|
211
211
|
specification_version: 4
|
212
212
|
summary: framework for optimising any computational pipeline or program
|