biopsy 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/biopsy/experiment.rb +34 -18
- data/lib/biopsy/optimisers/parameter_sweeper.rb +5 -4
- data/lib/biopsy/optimisers/tabu_search.rb +28 -26
- data/lib/biopsy/settings.rb +2 -0
- data/lib/biopsy/version.rb +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 227832bb75ba1c0114e59d8058a55c2ead937105
|
4
|
+
data.tar.gz: a3f5e6b364d5265878ed4a8f4e7f086282c2a156
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 06e5510f5546f565c080ff405f3406e27d8a69976ac3fe80a89814b85a9d24cbe98aa9fca01c0a38dd26d46af6a7889ebc36e1b250939577594ab4226f0180b9
|
7
|
+
data.tar.gz: 326da0583c62503acdd211ece069434ea63cbaafcce2e6fa5caabb4d3c3ff7e9125ada20dbf6c91e0f62c2f1f4f6bc32098b1300ee78b159a0ffc24807daba7d
|
data/lib/biopsy/experiment.rb
CHANGED
@@ -23,7 +23,7 @@ module Biopsy
|
|
23
23
|
|
24
24
|
# Returns a new Experiment
|
25
25
|
def initialize(target, options:{}, threads:4, start:nil, algorithm:nil,
|
26
|
-
timelimit:nil, verbosity: :quiet)
|
26
|
+
timelimit:nil, verbosity: :quiet, id:nil)
|
27
27
|
@threads = threads
|
28
28
|
@start = start
|
29
29
|
@algorithm = algorithm
|
@@ -40,6 +40,7 @@ module Biopsy
|
|
40
40
|
self.select_starting_point
|
41
41
|
@scores = {}
|
42
42
|
@iteration_count = 0
|
43
|
+
set_id id
|
43
44
|
end
|
44
45
|
|
45
46
|
# return the set of parameters to evaluate first
|
@@ -63,9 +64,9 @@ module Biopsy
|
|
63
64
|
max = Settings.instance.sweep_cutoff
|
64
65
|
n = @target.count_parameter_permutations
|
65
66
|
if n < max
|
66
|
-
@algorithm = ParameterSweeper.new(@target.parameters)
|
67
|
+
@algorithm = ParameterSweeper.new(@target.parameters, @id)
|
67
68
|
else
|
68
|
-
@algorithm = TabuSearch.new(@target.parameters)
|
69
|
+
@algorithm = TabuSearch.new(@target.parameters, @id)
|
69
70
|
end
|
70
71
|
end
|
71
72
|
|
@@ -83,6 +84,7 @@ module Biopsy
|
|
83
84
|
in_progress = true
|
84
85
|
@algorithm.setup @start
|
85
86
|
@current_params = @start
|
87
|
+
max_scores = @target.count_parameter_permutations
|
86
88
|
while in_progress
|
87
89
|
run_iteration
|
88
90
|
# update the best result
|
@@ -100,7 +102,7 @@ module Biopsy
|
|
100
102
|
end
|
101
103
|
end
|
102
104
|
# have we finished?
|
103
|
-
in_progress = !@algorithm.finished?
|
105
|
+
in_progress = !@algorithm.finished? && @scores.size < max_scores
|
104
106
|
if in_progress && !(@timelimit.nil?)
|
105
107
|
in_progress = (Time.now - start_time < @timelimit)
|
106
108
|
end
|
@@ -117,25 +119,27 @@ module Biopsy
|
|
117
119
|
# encompassing the program, objective(s) and optimiser.
|
118
120
|
# Returns the output of the optimiser.
|
119
121
|
def run_iteration
|
120
|
-
|
121
|
-
|
122
|
+
param_key = @current_params.to_s
|
123
|
+
result = nil
|
124
|
+
# lookup the result if possible
|
125
|
+
if @scores.key? param_key
|
126
|
+
result = @scores[param_key]
|
127
|
+
else
|
128
|
+
# create temp dir
|
129
|
+
curdir = Dir.pwd
|
130
|
+
Dir.chdir(self.create_tempdir) unless Settings.instance.no_tempdirs
|
122
131
|
# run the target
|
123
132
|
raw_output = @target.run @current_params.merge(@options)
|
124
133
|
# evaluate with objectives
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
result = @scores[param_key]
|
129
|
-
else
|
130
|
-
result = @objective.run_for_output(raw_output, @threads, nil)
|
131
|
-
@iteration_count += 1
|
132
|
-
self.print_progress(@iteration_count, @current_params, result, @best)
|
133
|
-
end
|
134
|
+
result = @objective.run_for_output(raw_output, @threads, nil)
|
135
|
+
@iteration_count += 1
|
136
|
+
self.print_progress(@iteration_count, @current_params, result, @best)
|
134
137
|
@scores[@current_params.to_s] = result
|
135
|
-
|
136
|
-
|
138
|
+
self.cleanup
|
139
|
+
Dir.chdir(curdir) unless Settings.instance.no_tempdirs
|
137
140
|
end
|
138
|
-
|
141
|
+
# get next steps from optimiser
|
142
|
+
@current_params = @algorithm.run_one_iteration(@current_params, result)
|
139
143
|
end
|
140
144
|
|
141
145
|
def print_progress(iteration, params, score, best)
|
@@ -148,6 +152,7 @@ module Biopsy
|
|
148
152
|
end
|
149
153
|
|
150
154
|
def cleanup
|
155
|
+
return if Settings.instance.no_tempdirs
|
151
156
|
# TODO: make this work
|
152
157
|
# remove all but essential files
|
153
158
|
essential_files = ""
|
@@ -183,6 +188,17 @@ module Biopsy
|
|
183
188
|
token
|
184
189
|
end
|
185
190
|
|
191
|
+
# set experiment ID with either user provided value, or date-time
|
192
|
+
# as fallback
|
193
|
+
def set_id id
|
194
|
+
@id = id
|
195
|
+
if @id.nil?
|
196
|
+
t = Time.now
|
197
|
+
parts = %w[y m d H M S Z].map{ |p| t.strftime "%#{p}" }
|
198
|
+
@id = "experiment_#{parts.join('_')}"
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
186
202
|
end # end of class RunHandler
|
187
203
|
|
188
204
|
end # end of module Biopsy
|
@@ -24,11 +24,12 @@ module Biopsy
|
|
24
24
|
class Combinator
|
25
25
|
|
26
26
|
include Enumerable
|
27
|
-
|
28
|
-
def initialize
|
27
|
+
|
28
|
+
def initialize(parameters, id)
|
29
29
|
@parameters = parameters
|
30
|
+
@id = id
|
30
31
|
end
|
31
|
-
|
32
|
+
|
32
33
|
def generate_combinations(index, opts, &block)
|
33
34
|
if index == @parameters.length
|
34
35
|
block.call opts.clone
|
@@ -80,7 +81,7 @@ module Biopsy
|
|
80
81
|
@current = { :parameters => parameters, :score => score }
|
81
82
|
self.update_best?
|
82
83
|
return @combinator.next
|
83
|
-
rescue
|
84
|
+
rescue
|
84
85
|
@is_finished = true
|
85
86
|
return nil
|
86
87
|
end
|
@@ -90,7 +90,7 @@ module Biopsy
|
|
90
90
|
|
91
91
|
def initialize(distributions, max_size, tabu)
|
92
92
|
# tabu
|
93
|
-
@tabu = tabu
|
93
|
+
@tabu = tabu
|
94
94
|
# neighbourhood
|
95
95
|
@max_size = max_size
|
96
96
|
@members = []
|
@@ -108,7 +108,7 @@ module Biopsy
|
|
108
108
|
n = 0
|
109
109
|
begin
|
110
110
|
if n >= 100
|
111
|
-
# taking too long to generate a neighbour,
|
111
|
+
# taking too long to generate a neighbour,
|
112
112
|
# loosen the neighbourhood structure so we explore further
|
113
113
|
# debug("loosening distributions")
|
114
114
|
@distributions.each do |param, dist|
|
@@ -162,15 +162,16 @@ module Biopsy
|
|
162
162
|
attr_accessor :max_hood_size, :sd_increment_proportion
|
163
163
|
attr_accessor :starting_sd_divisor, :backtrack_cutoff, :jump_cutoff
|
164
164
|
|
165
|
-
Thread = Struct.new(:best, :tabu, :distributions,
|
166
|
-
:standard_deviations, :recent_scores,
|
165
|
+
Thread = Struct.new(:best, :tabu, :distributions,
|
166
|
+
:standard_deviations, :recent_scores,
|
167
167
|
:iterations_since_best, :backtracks,
|
168
168
|
:current, :current_hood, :loaded,
|
169
169
|
:score_history, :best_history)
|
170
170
|
|
171
|
-
def initialize(parameter_ranges, threads=8, limit=nil)
|
171
|
+
def initialize(parameter_ranges, id, threads=8, limit=nil)
|
172
172
|
|
173
173
|
@ranges = parameter_ranges
|
174
|
+
@id = id
|
174
175
|
|
175
176
|
# solution tracking
|
176
177
|
@best = nil
|
@@ -194,7 +195,7 @@ module Biopsy
|
|
194
195
|
# logging
|
195
196
|
@score_history = []
|
196
197
|
@best_history = []
|
197
|
-
@log_data =
|
198
|
+
@log_data = true
|
198
199
|
@logfiles = {}
|
199
200
|
self.log_setup
|
200
201
|
|
@@ -323,7 +324,7 @@ module Biopsy
|
|
323
324
|
mean = @ranges[param].index(value)
|
324
325
|
range = @ranges[param]
|
325
326
|
sd = self.sd_for_param(param, range)
|
326
|
-
@distributions[param] = Biopsy::Distribution.new(mean,
|
327
|
+
@distributions[param] = Biopsy::Distribution.new(mean,
|
327
328
|
range,
|
328
329
|
@sd_increment_proportion,
|
329
330
|
sd)
|
@@ -348,14 +349,13 @@ module Biopsy
|
|
348
349
|
end
|
349
350
|
if best[:parameters].nil?
|
350
351
|
# this should never happen!
|
351
|
-
best = @best
|
352
|
+
best = @best
|
352
353
|
end
|
353
354
|
best
|
354
355
|
end
|
355
356
|
|
356
357
|
def backtrack
|
357
358
|
@backtracks += 1.0
|
358
|
-
# debug('backtracked to best')
|
359
359
|
@distributions.each_pair { |k, d| d.tighten }
|
360
360
|
end
|
361
361
|
|
@@ -368,8 +368,8 @@ module Biopsy
|
|
368
368
|
# use the gradient of recent best scores to update the distributions
|
369
369
|
def adjust_distributions_using_gradient
|
370
370
|
return if @recent_scores.length < 3
|
371
|
-
vx = (1..@recent_scores.length).to_a.
|
372
|
-
vy = @recent_scores.reverse.
|
371
|
+
vx = (1..@recent_scores.length).to_a.to_numeric
|
372
|
+
vy = @recent_scores.reverse.to_numeric
|
373
373
|
r = Statsample::Regression::Simple.new_from_vectors(vx,vy)
|
374
374
|
slope = r.b
|
375
375
|
if slope > 0
|
@@ -398,15 +398,17 @@ module Biopsy
|
|
398
398
|
end
|
399
399
|
end
|
400
400
|
|
401
|
-
# check termination conditions
|
401
|
+
# check termination conditions
|
402
402
|
# and return true if met
|
403
403
|
def finished?
|
404
|
-
return false unless @threads.all?
|
404
|
+
return false unless @threads.all? do |t|
|
405
|
+
t.recent_scores.size == @jump_cutoff
|
406
|
+
end
|
405
407
|
probabilities = self.recent_scores_combination_test
|
406
408
|
n_significant = 0
|
407
|
-
probabilities.each do |mann_u, levene|
|
409
|
+
probabilities.each do |mann_u, levene|
|
408
410
|
if mann_u <= @adjusted_alpha && levene <= @convergence_alpha
|
409
|
-
n_significant += 1
|
411
|
+
n_significant += 1
|
410
412
|
end
|
411
413
|
end
|
412
414
|
finish = n_significant >= probabilities.size * 0.5
|
@@ -415,8 +417,8 @@ module Biopsy
|
|
415
417
|
# returns a matrix of correlation probabilities for recent
|
416
418
|
# scores between all threads
|
417
419
|
def recent_scores_combination_test
|
418
|
-
combinations =
|
419
|
-
@threads.map{ |t| t.recent_scores.
|
420
|
+
combinations =
|
421
|
+
@threads.map{ |t| t.recent_scores.to_numeric }.combination(2).to_a
|
420
422
|
combinations.map do |a, b|
|
421
423
|
[Statsample::Test.u_mannwhitney(a, b).probability_exact,
|
422
424
|
Statsample::Test::Levene.new([a,b]).probability]
|
@@ -431,10 +433,10 @@ module Biopsy
|
|
431
433
|
def log_setup
|
432
434
|
if @log_data
|
433
435
|
require 'csv'
|
434
|
-
@logfiles[:standard_deviations] = CSV.open(
|
435
|
-
@logfiles[:best] = CSV.open(
|
436
|
-
@logfiles[:score] = CSV.open(
|
437
|
-
@logfiles[:params] = CSV.open(
|
436
|
+
@logfiles[:standard_deviations] = CSV.open("#{@id}_standard_deviations.csv", 'w')
|
437
|
+
@logfiles[:best] = CSV.open("#{@id}_best.csv", 'w')
|
438
|
+
@logfiles[:score] = CSV.open("#{@id}_score.csv", 'w')
|
439
|
+
@logfiles[:params] = CSV.open("#{@id}_params.csv", 'w')
|
438
440
|
end
|
439
441
|
end
|
440
442
|
|
@@ -466,13 +468,14 @@ module Biopsy
|
|
466
468
|
end
|
467
469
|
|
468
470
|
def random_start_point
|
469
|
-
Hash[@ranges.map { |p, r| [p, r.sample] }]
|
471
|
+
Hash[@ranges.map { |p, r| [p, r.sample] }]
|
470
472
|
end
|
471
473
|
|
472
474
|
def write_data
|
473
475
|
require 'csv'
|
474
|
-
|
475
|
-
|
476
|
+
pathmod = Settings.instance.no_tempdirs ? '' : '../'
|
477
|
+
path = File.expand_path("#{pathmod}#{@id}_scores.csv")
|
478
|
+
CSV.open(path, "w") do |c|
|
476
479
|
c << %w(iteration thread score best)
|
477
480
|
@threads.each_with_index do |t, t_idx|
|
478
481
|
sh = t.score_history
|
@@ -482,8 +485,7 @@ module Biopsy
|
|
482
485
|
end
|
483
486
|
end
|
484
487
|
end
|
485
|
-
|
486
|
-
puts "wrote TabuSearch run data to #{path}"
|
488
|
+
# puts "wrote TabuSearch run data to #{path}"
|
487
489
|
end
|
488
490
|
|
489
491
|
end # TabuSearch
|
data/lib/biopsy/settings.rb
CHANGED
@@ -30,6 +30,7 @@ module Biopsy
|
|
30
30
|
attr_accessor :sweep_cutoff
|
31
31
|
attr_accessor :keep_intermediates
|
32
32
|
attr_accessor :gzip_intermediates
|
33
|
+
attr_accessor :no_tempdirs
|
33
34
|
|
34
35
|
def initialize
|
35
36
|
self.set_defaults
|
@@ -45,6 +46,7 @@ module Biopsy
|
|
45
46
|
@sweep_cutoff = 100
|
46
47
|
@keep_intermediates = false
|
47
48
|
@gzip_intermediates = false
|
49
|
+
@no_tempdirs = false
|
48
50
|
end
|
49
51
|
|
50
52
|
# Loads settings from a YAML config file. If no file is
|
data/lib/biopsy/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biopsy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Richard Smith-Unna
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2015-
|
13
|
+
date: 2015-11-01 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rake
|
@@ -206,7 +206,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
206
206
|
version: '0'
|
207
207
|
requirements: []
|
208
208
|
rubyforge_project:
|
209
|
-
rubygems_version: 2.
|
209
|
+
rubygems_version: 2.4.6
|
210
210
|
signing_key:
|
211
211
|
specification_version: 4
|
212
212
|
summary: framework for optimising any computational pipeline or program
|