biopsy 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3f45b9377c3de44caf8abf21b07576a6e89e0c03
4
- data.tar.gz: 94e4a81fe98380482adc7b290f8067174684850d
3
+ metadata.gz: 227832bb75ba1c0114e59d8058a55c2ead937105
4
+ data.tar.gz: a3f5e6b364d5265878ed4a8f4e7f086282c2a156
5
5
  SHA512:
6
- metadata.gz: 76d90535589d1f09f74bf424d7437a45855b1065b802bc3ba4d562cc30c1c8fd775583cd856e6de02fee919a9b34619875693a96b228ef686c7e658bb9b29ea5
7
- data.tar.gz: 0dbc5e5d21297759affc2199ffff2fe90d25c1958a7380d531811bdded9fbe8caf8eb9fc0aed8b62ecb5d52d6f0f2e2292b480ae8a8e84c79f0e457f44f45c3a
6
+ metadata.gz: 06e5510f5546f565c080ff405f3406e27d8a69976ac3fe80a89814b85a9d24cbe98aa9fca01c0a38dd26d46af6a7889ebc36e1b250939577594ab4226f0180b9
7
+ data.tar.gz: 326da0583c62503acdd211ece069434ea63cbaafcce2e6fa5caabb4d3c3ff7e9125ada20dbf6c91e0f62c2f1f4f6bc32098b1300ee78b159a0ffc24807daba7d
@@ -23,7 +23,7 @@ module Biopsy
23
23
 
24
24
  # Returns a new Experiment
25
25
  def initialize(target, options:{}, threads:4, start:nil, algorithm:nil,
26
- timelimit:nil, verbosity: :quiet)
26
+ timelimit:nil, verbosity: :quiet, id:nil)
27
27
  @threads = threads
28
28
  @start = start
29
29
  @algorithm = algorithm
@@ -40,6 +40,7 @@ module Biopsy
40
40
  self.select_starting_point
41
41
  @scores = {}
42
42
  @iteration_count = 0
43
+ set_id id
43
44
  end
44
45
 
45
46
  # return the set of parameters to evaluate first
@@ -63,9 +64,9 @@ module Biopsy
63
64
  max = Settings.instance.sweep_cutoff
64
65
  n = @target.count_parameter_permutations
65
66
  if n < max
66
- @algorithm = ParameterSweeper.new(@target.parameters)
67
+ @algorithm = ParameterSweeper.new(@target.parameters, @id)
67
68
  else
68
- @algorithm = TabuSearch.new(@target.parameters)
69
+ @algorithm = TabuSearch.new(@target.parameters, @id)
69
70
  end
70
71
  end
71
72
 
@@ -83,6 +84,7 @@ module Biopsy
83
84
  in_progress = true
84
85
  @algorithm.setup @start
85
86
  @current_params = @start
87
+ max_scores = @target.count_parameter_permutations
86
88
  while in_progress
87
89
  run_iteration
88
90
  # update the best result
@@ -100,7 +102,7 @@ module Biopsy
100
102
  end
101
103
  end
102
104
  # have we finished?
103
- in_progress = !@algorithm.finished?
105
+ in_progress = !@algorithm.finished? && @scores.size < max_scores
104
106
  if in_progress && !(@timelimit.nil?)
105
107
  in_progress = (Time.now - start_time < @timelimit)
106
108
  end
@@ -117,25 +119,27 @@ module Biopsy
117
119
  # encompassing the program, objective(s) and optimiser.
118
120
  # Returns the output of the optimiser.
119
121
  def run_iteration
120
- # create temp dir
121
- Dir.chdir(self.create_tempdir) do
122
+ param_key = @current_params.to_s
123
+ result = nil
124
+ # lookup the result if possible
125
+ if @scores.key? param_key
126
+ result = @scores[param_key]
127
+ else
128
+ # create temp dir
129
+ curdir = Dir.pwd
130
+ Dir.chdir(self.create_tempdir) unless Settings.instance.no_tempdirs
122
131
  # run the target
123
132
  raw_output = @target.run @current_params.merge(@options)
124
133
  # evaluate with objectives
125
- param_key = @current_params.to_s
126
- result = nil
127
- if @scores.key? param_key
128
- result = @scores[param_key]
129
- else
130
- result = @objective.run_for_output(raw_output, @threads, nil)
131
- @iteration_count += 1
132
- self.print_progress(@iteration_count, @current_params, result, @best)
133
- end
134
+ result = @objective.run_for_output(raw_output, @threads, nil)
135
+ @iteration_count += 1
136
+ self.print_progress(@iteration_count, @current_params, result, @best)
134
137
  @scores[@current_params.to_s] = result
135
- # get next steps from optimiser
136
- @current_params = @algorithm.run_one_iteration(@current_params, result)
138
+ self.cleanup
139
+ Dir.chdir(curdir) unless Settings.instance.no_tempdirs
137
140
  end
138
- self.cleanup
141
+ # get next steps from optimiser
142
+ @current_params = @algorithm.run_one_iteration(@current_params, result)
139
143
  end
140
144
 
141
145
  def print_progress(iteration, params, score, best)
@@ -148,6 +152,7 @@ module Biopsy
148
152
  end
149
153
 
150
154
  def cleanup
155
+ return if Settings.instance.no_tempdirs
151
156
  # TODO: make this work
152
157
  # remove all but essential files
153
158
  essential_files = ""
@@ -183,6 +188,17 @@ module Biopsy
183
188
  token
184
189
  end
185
190
 
191
+ # set experiment ID with either user provided value, or date-time
192
+ # as fallback
193
+ def set_id id
194
+ @id = id
195
+ if @id.nil?
196
+ t = Time.now
197
+ parts = %w[y m d H M S Z].map{ |p| t.strftime "%#{p}" }
198
+ @id = "experiment_#{parts.join('_')}"
199
+ end
200
+ end
201
+
186
202
  end # end of class RunHandler
187
203
 
188
204
  end # end of module Biopsy
@@ -24,11 +24,12 @@ module Biopsy
24
24
  class Combinator
25
25
 
26
26
  include Enumerable
27
-
28
- def initialize parameters
27
+
28
+ def initialize(parameters, id)
29
29
  @parameters = parameters
30
+ @id = id
30
31
  end
31
-
32
+
32
33
  def generate_combinations(index, opts, &block)
33
34
  if index == @parameters.length
34
35
  block.call opts.clone
@@ -80,7 +81,7 @@ module Biopsy
80
81
  @current = { :parameters => parameters, :score => score }
81
82
  self.update_best?
82
83
  return @combinator.next
83
- rescue
84
+ rescue
84
85
  @is_finished = true
85
86
  return nil
86
87
  end
@@ -90,7 +90,7 @@ module Biopsy
90
90
 
91
91
  def initialize(distributions, max_size, tabu)
92
92
  # tabu
93
- @tabu = tabu
93
+ @tabu = tabu
94
94
  # neighbourhood
95
95
  @max_size = max_size
96
96
  @members = []
@@ -108,7 +108,7 @@ module Biopsy
108
108
  n = 0
109
109
  begin
110
110
  if n >= 100
111
- # taking too long to generate a neighbour,
111
+ # taking too long to generate a neighbour,
112
112
  # loosen the neighbourhood structure so we explore further
113
113
  # debug("loosening distributions")
114
114
  @distributions.each do |param, dist|
@@ -162,15 +162,16 @@ module Biopsy
162
162
  attr_accessor :max_hood_size, :sd_increment_proportion
163
163
  attr_accessor :starting_sd_divisor, :backtrack_cutoff, :jump_cutoff
164
164
 
165
- Thread = Struct.new(:best, :tabu, :distributions,
166
- :standard_deviations, :recent_scores,
165
+ Thread = Struct.new(:best, :tabu, :distributions,
166
+ :standard_deviations, :recent_scores,
167
167
  :iterations_since_best, :backtracks,
168
168
  :current, :current_hood, :loaded,
169
169
  :score_history, :best_history)
170
170
 
171
- def initialize(parameter_ranges, threads=8, limit=nil)
171
+ def initialize(parameter_ranges, id, threads=8, limit=nil)
172
172
 
173
173
  @ranges = parameter_ranges
174
+ @id = id
174
175
 
175
176
  # solution tracking
176
177
  @best = nil
@@ -194,7 +195,7 @@ module Biopsy
194
195
  # logging
195
196
  @score_history = []
196
197
  @best_history = []
197
- @log_data = false
198
+ @log_data = true
198
199
  @logfiles = {}
199
200
  self.log_setup
200
201
 
@@ -323,7 +324,7 @@ module Biopsy
323
324
  mean = @ranges[param].index(value)
324
325
  range = @ranges[param]
325
326
  sd = self.sd_for_param(param, range)
326
- @distributions[param] = Biopsy::Distribution.new(mean,
327
+ @distributions[param] = Biopsy::Distribution.new(mean,
327
328
  range,
328
329
  @sd_increment_proportion,
329
330
  sd)
@@ -348,14 +349,13 @@ module Biopsy
348
349
  end
349
350
  if best[:parameters].nil?
350
351
  # this should never happen!
351
- best = @best
352
+ best = @best
352
353
  end
353
354
  best
354
355
  end
355
356
 
356
357
  def backtrack
357
358
  @backtracks += 1.0
358
- # debug('backtracked to best')
359
359
  @distributions.each_pair { |k, d| d.tighten }
360
360
  end
361
361
 
@@ -368,8 +368,8 @@ module Biopsy
368
368
  # use the gradient of recent best scores to update the distributions
369
369
  def adjust_distributions_using_gradient
370
370
  return if @recent_scores.length < 3
371
- vx = (1..@recent_scores.length).to_a.to_scale
372
- vy = @recent_scores.reverse.to_scale
371
+ vx = (1..@recent_scores.length).to_a.to_numeric
372
+ vy = @recent_scores.reverse.to_numeric
373
373
  r = Statsample::Regression::Simple.new_from_vectors(vx,vy)
374
374
  slope = r.b
375
375
  if slope > 0
@@ -398,15 +398,17 @@ module Biopsy
398
398
  end
399
399
  end
400
400
 
401
- # check termination conditions
401
+ # check termination conditions
402
402
  # and return true if met
403
403
  def finished?
404
- return false unless @threads.all? { |t| t.recent_scores.size == @jump_cutoff }
404
+ return false unless @threads.all? do |t|
405
+ t.recent_scores.size == @jump_cutoff
406
+ end
405
407
  probabilities = self.recent_scores_combination_test
406
408
  n_significant = 0
407
- probabilities.each do |mann_u, levene|
409
+ probabilities.each do |mann_u, levene|
408
410
  if mann_u <= @adjusted_alpha && levene <= @convergence_alpha
409
- n_significant += 1
411
+ n_significant += 1
410
412
  end
411
413
  end
412
414
  finish = n_significant >= probabilities.size * 0.5
@@ -415,8 +417,8 @@ module Biopsy
415
417
  # returns a matrix of correlation probabilities for recent
416
418
  # scores between all threads
417
419
  def recent_scores_combination_test
418
- combinations =
419
- @threads.map{ |t| t.recent_scores.to_scale }.combination(2).to_a
420
+ combinations =
421
+ @threads.map{ |t| t.recent_scores.to_numeric }.combination(2).to_a
420
422
  combinations.map do |a, b|
421
423
  [Statsample::Test.u_mannwhitney(a, b).probability_exact,
422
424
  Statsample::Test::Levene.new([a,b]).probability]
@@ -431,10 +433,10 @@ module Biopsy
431
433
  def log_setup
432
434
  if @log_data
433
435
  require 'csv'
434
- @logfiles[:standard_deviations] = CSV.open('standard_deviations.csv', 'w')
435
- @logfiles[:best] = CSV.open('best.csv', 'w')
436
- @logfiles[:score] = CSV.open('score.csv', 'w')
437
- @logfiles[:params] = CSV.open('params.csv', 'w')
436
+ @logfiles[:standard_deviations] = CSV.open("#{@id}_standard_deviations.csv", 'w')
437
+ @logfiles[:best] = CSV.open("#{@id}_best.csv", 'w')
438
+ @logfiles[:score] = CSV.open("#{@id}_score.csv", 'w')
439
+ @logfiles[:params] = CSV.open("#{@id}_params.csv", 'w')
438
440
  end
439
441
  end
440
442
 
@@ -466,13 +468,14 @@ module Biopsy
466
468
  end
467
469
 
468
470
  def random_start_point
469
- Hash[@ranges.map { |p, r| [p, r.sample] }]
471
+ Hash[@ranges.map { |p, r| [p, r.sample] }]
470
472
  end
471
473
 
472
474
  def write_data
473
475
  require 'csv'
474
- now = Time.now.to_i
475
- CSV.open("../#{now}_scores.csv", "w") do |c|
476
+ pathmod = Settings.instance.no_tempdirs ? '' : '../'
477
+ path = File.expand_path("#{pathmod}#{@id}_scores.csv")
478
+ CSV.open(path, "w") do |c|
476
479
  c << %w(iteration thread score best)
477
480
  @threads.each_with_index do |t, t_idx|
478
481
  sh = t.score_history
@@ -482,8 +485,7 @@ module Biopsy
482
485
  end
483
486
  end
484
487
  end
485
- path = File.expand_path("../#{now}_scores.csv")
486
- puts "wrote TabuSearch run data to #{path}"
488
+ # puts "wrote TabuSearch run data to #{path}"
487
489
  end
488
490
 
489
491
  end # TabuSearch
@@ -30,6 +30,7 @@ module Biopsy
30
30
  attr_accessor :sweep_cutoff
31
31
  attr_accessor :keep_intermediates
32
32
  attr_accessor :gzip_intermediates
33
+ attr_accessor :no_tempdirs
33
34
 
34
35
  def initialize
35
36
  self.set_defaults
@@ -45,6 +46,7 @@ module Biopsy
45
46
  @sweep_cutoff = 100
46
47
  @keep_intermediates = false
47
48
  @gzip_intermediates = false
49
+ @no_tempdirs = false
48
50
  end
49
51
 
50
52
  # Loads settings from a YAML config file. If no file is
@@ -5,8 +5,8 @@ module Biopsy
5
5
 
6
6
  module VERSION
7
7
  MAJOR = 0
8
- MINOR = 2
9
- PATCH = 1
8
+ MINOR = 3
9
+ PATCH = 0
10
10
 
11
11
  STRING = [MAJOR, MINOR, PATCH].compact.join('.')
12
12
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biopsy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Richard Smith-Unna
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2015-06-02 00:00:00.000000000 Z
13
+ date: 2015-11-01 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rake
@@ -206,7 +206,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
206
206
  version: '0'
207
207
  requirements: []
208
208
  rubyforge_project:
209
- rubygems_version: 2.2.2
209
+ rubygems_version: 2.4.6
210
210
  signing_key:
211
211
  specification_version: 4
212
212
  summary: framework for optimising any computational pipeline or program