biopsy 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3f45b9377c3de44caf8abf21b07576a6e89e0c03
4
- data.tar.gz: 94e4a81fe98380482adc7b290f8067174684850d
3
+ metadata.gz: 227832bb75ba1c0114e59d8058a55c2ead937105
4
+ data.tar.gz: a3f5e6b364d5265878ed4a8f4e7f086282c2a156
5
5
  SHA512:
6
- metadata.gz: 76d90535589d1f09f74bf424d7437a45855b1065b802bc3ba4d562cc30c1c8fd775583cd856e6de02fee919a9b34619875693a96b228ef686c7e658bb9b29ea5
7
- data.tar.gz: 0dbc5e5d21297759affc2199ffff2fe90d25c1958a7380d531811bdded9fbe8caf8eb9fc0aed8b62ecb5d52d6f0f2e2292b480ae8a8e84c79f0e457f44f45c3a
6
+ metadata.gz: 06e5510f5546f565c080ff405f3406e27d8a69976ac3fe80a89814b85a9d24cbe98aa9fca01c0a38dd26d46af6a7889ebc36e1b250939577594ab4226f0180b9
7
+ data.tar.gz: 326da0583c62503acdd211ece069434ea63cbaafcce2e6fa5caabb4d3c3ff7e9125ada20dbf6c91e0f62c2f1f4f6bc32098b1300ee78b159a0ffc24807daba7d
@@ -23,7 +23,7 @@ module Biopsy
23
23
 
24
24
  # Returns a new Experiment
25
25
  def initialize(target, options:{}, threads:4, start:nil, algorithm:nil,
26
- timelimit:nil, verbosity: :quiet)
26
+ timelimit:nil, verbosity: :quiet, id:nil)
27
27
  @threads = threads
28
28
  @start = start
29
29
  @algorithm = algorithm
@@ -40,6 +40,7 @@ module Biopsy
40
40
  self.select_starting_point
41
41
  @scores = {}
42
42
  @iteration_count = 0
43
+ set_id id
43
44
  end
44
45
 
45
46
  # return the set of parameters to evaluate first
@@ -63,9 +64,9 @@ module Biopsy
63
64
  max = Settings.instance.sweep_cutoff
64
65
  n = @target.count_parameter_permutations
65
66
  if n < max
66
- @algorithm = ParameterSweeper.new(@target.parameters)
67
+ @algorithm = ParameterSweeper.new(@target.parameters, @id)
67
68
  else
68
- @algorithm = TabuSearch.new(@target.parameters)
69
+ @algorithm = TabuSearch.new(@target.parameters, @id)
69
70
  end
70
71
  end
71
72
 
@@ -83,6 +84,7 @@ module Biopsy
83
84
  in_progress = true
84
85
  @algorithm.setup @start
85
86
  @current_params = @start
87
+ max_scores = @target.count_parameter_permutations
86
88
  while in_progress
87
89
  run_iteration
88
90
  # update the best result
@@ -100,7 +102,7 @@ module Biopsy
100
102
  end
101
103
  end
102
104
  # have we finished?
103
- in_progress = !@algorithm.finished?
105
+ in_progress = !@algorithm.finished? && @scores.size < max_scores
104
106
  if in_progress && !(@timelimit.nil?)
105
107
  in_progress = (Time.now - start_time < @timelimit)
106
108
  end
@@ -117,25 +119,27 @@ module Biopsy
117
119
  # encompassing the program, objective(s) and optimiser.
118
120
  # Returns the output of the optimiser.
119
121
  def run_iteration
120
- # create temp dir
121
- Dir.chdir(self.create_tempdir) do
122
+ param_key = @current_params.to_s
123
+ result = nil
124
+ # lookup the result if possible
125
+ if @scores.key? param_key
126
+ result = @scores[param_key]
127
+ else
128
+ # create temp dir
129
+ curdir = Dir.pwd
130
+ Dir.chdir(self.create_tempdir) unless Settings.instance.no_tempdirs
122
131
  # run the target
123
132
  raw_output = @target.run @current_params.merge(@options)
124
133
  # evaluate with objectives
125
- param_key = @current_params.to_s
126
- result = nil
127
- if @scores.key? param_key
128
- result = @scores[param_key]
129
- else
130
- result = @objective.run_for_output(raw_output, @threads, nil)
131
- @iteration_count += 1
132
- self.print_progress(@iteration_count, @current_params, result, @best)
133
- end
134
+ result = @objective.run_for_output(raw_output, @threads, nil)
135
+ @iteration_count += 1
136
+ self.print_progress(@iteration_count, @current_params, result, @best)
134
137
  @scores[@current_params.to_s] = result
135
- # get next steps from optimiser
136
- @current_params = @algorithm.run_one_iteration(@current_params, result)
138
+ self.cleanup
139
+ Dir.chdir(curdir) unless Settings.instance.no_tempdirs
137
140
  end
138
- self.cleanup
141
+ # get next steps from optimiser
142
+ @current_params = @algorithm.run_one_iteration(@current_params, result)
139
143
  end
140
144
 
141
145
  def print_progress(iteration, params, score, best)
@@ -148,6 +152,7 @@ module Biopsy
148
152
  end
149
153
 
150
154
  def cleanup
155
+ return if Settings.instance.no_tempdirs
151
156
  # TODO: make this work
152
157
  # remove all but essential files
153
158
  essential_files = ""
@@ -183,6 +188,17 @@ module Biopsy
183
188
  token
184
189
  end
185
190
 
191
+ # set experiment ID with either user provided value, or date-time
192
+ # as fallback
193
+ def set_id id
194
+ @id = id
195
+ if @id.nil?
196
+ t = Time.now
197
+ parts = %w[y m d H M S Z].map{ |p| t.strftime "%#{p}" }
198
+ @id = "experiment_#{parts.join('_')}"
199
+ end
200
+ end
201
+
186
202
  end # end of class RunHandler
187
203
 
188
204
  end # end of module Biopsy
@@ -24,11 +24,12 @@ module Biopsy
24
24
  class Combinator
25
25
 
26
26
  include Enumerable
27
-
28
- def initialize parameters
27
+
28
+ def initialize(parameters, id)
29
29
  @parameters = parameters
30
+ @id = id
30
31
  end
31
-
32
+
32
33
  def generate_combinations(index, opts, &block)
33
34
  if index == @parameters.length
34
35
  block.call opts.clone
@@ -80,7 +81,7 @@ module Biopsy
80
81
  @current = { :parameters => parameters, :score => score }
81
82
  self.update_best?
82
83
  return @combinator.next
83
- rescue
84
+ rescue
84
85
  @is_finished = true
85
86
  return nil
86
87
  end
@@ -90,7 +90,7 @@ module Biopsy
90
90
 
91
91
  def initialize(distributions, max_size, tabu)
92
92
  # tabu
93
- @tabu = tabu
93
+ @tabu = tabu
94
94
  # neighbourhood
95
95
  @max_size = max_size
96
96
  @members = []
@@ -108,7 +108,7 @@ module Biopsy
108
108
  n = 0
109
109
  begin
110
110
  if n >= 100
111
- # taking too long to generate a neighbour,
111
+ # taking too long to generate a neighbour,
112
112
  # loosen the neighbourhood structure so we explore further
113
113
  # debug("loosening distributions")
114
114
  @distributions.each do |param, dist|
@@ -162,15 +162,16 @@ module Biopsy
162
162
  attr_accessor :max_hood_size, :sd_increment_proportion
163
163
  attr_accessor :starting_sd_divisor, :backtrack_cutoff, :jump_cutoff
164
164
 
165
- Thread = Struct.new(:best, :tabu, :distributions,
166
- :standard_deviations, :recent_scores,
165
+ Thread = Struct.new(:best, :tabu, :distributions,
166
+ :standard_deviations, :recent_scores,
167
167
  :iterations_since_best, :backtracks,
168
168
  :current, :current_hood, :loaded,
169
169
  :score_history, :best_history)
170
170
 
171
- def initialize(parameter_ranges, threads=8, limit=nil)
171
+ def initialize(parameter_ranges, id, threads=8, limit=nil)
172
172
 
173
173
  @ranges = parameter_ranges
174
+ @id = id
174
175
 
175
176
  # solution tracking
176
177
  @best = nil
@@ -194,7 +195,7 @@ module Biopsy
194
195
  # logging
195
196
  @score_history = []
196
197
  @best_history = []
197
- @log_data = false
198
+ @log_data = true
198
199
  @logfiles = {}
199
200
  self.log_setup
200
201
 
@@ -323,7 +324,7 @@ module Biopsy
323
324
  mean = @ranges[param].index(value)
324
325
  range = @ranges[param]
325
326
  sd = self.sd_for_param(param, range)
326
- @distributions[param] = Biopsy::Distribution.new(mean,
327
+ @distributions[param] = Biopsy::Distribution.new(mean,
327
328
  range,
328
329
  @sd_increment_proportion,
329
330
  sd)
@@ -348,14 +349,13 @@ module Biopsy
348
349
  end
349
350
  if best[:parameters].nil?
350
351
  # this should never happen!
351
- best = @best
352
+ best = @best
352
353
  end
353
354
  best
354
355
  end
355
356
 
356
357
  def backtrack
357
358
  @backtracks += 1.0
358
- # debug('backtracked to best')
359
359
  @distributions.each_pair { |k, d| d.tighten }
360
360
  end
361
361
 
@@ -368,8 +368,8 @@ module Biopsy
368
368
  # use the gradient of recent best scores to update the distributions
369
369
  def adjust_distributions_using_gradient
370
370
  return if @recent_scores.length < 3
371
- vx = (1..@recent_scores.length).to_a.to_scale
372
- vy = @recent_scores.reverse.to_scale
371
+ vx = (1..@recent_scores.length).to_a.to_numeric
372
+ vy = @recent_scores.reverse.to_numeric
373
373
  r = Statsample::Regression::Simple.new_from_vectors(vx,vy)
374
374
  slope = r.b
375
375
  if slope > 0
@@ -398,15 +398,17 @@ module Biopsy
398
398
  end
399
399
  end
400
400
 
401
- # check termination conditions
401
+ # check termination conditions
402
402
  # and return true if met
403
403
  def finished?
404
- return false unless @threads.all? { |t| t.recent_scores.size == @jump_cutoff }
404
+ return false unless @threads.all? do |t|
405
+ t.recent_scores.size == @jump_cutoff
406
+ end
405
407
  probabilities = self.recent_scores_combination_test
406
408
  n_significant = 0
407
- probabilities.each do |mann_u, levene|
409
+ probabilities.each do |mann_u, levene|
408
410
  if mann_u <= @adjusted_alpha && levene <= @convergence_alpha
409
- n_significant += 1
411
+ n_significant += 1
410
412
  end
411
413
  end
412
414
  finish = n_significant >= probabilities.size * 0.5
@@ -415,8 +417,8 @@ module Biopsy
415
417
  # returns a matrix of correlation probabilities for recent
416
418
  # scores between all threads
417
419
  def recent_scores_combination_test
418
- combinations =
419
- @threads.map{ |t| t.recent_scores.to_scale }.combination(2).to_a
420
+ combinations =
421
+ @threads.map{ |t| t.recent_scores.to_numeric }.combination(2).to_a
420
422
  combinations.map do |a, b|
421
423
  [Statsample::Test.u_mannwhitney(a, b).probability_exact,
422
424
  Statsample::Test::Levene.new([a,b]).probability]
@@ -431,10 +433,10 @@ module Biopsy
431
433
  def log_setup
432
434
  if @log_data
433
435
  require 'csv'
434
- @logfiles[:standard_deviations] = CSV.open('standard_deviations.csv', 'w')
435
- @logfiles[:best] = CSV.open('best.csv', 'w')
436
- @logfiles[:score] = CSV.open('score.csv', 'w')
437
- @logfiles[:params] = CSV.open('params.csv', 'w')
436
+ @logfiles[:standard_deviations] = CSV.open("#{@id}_standard_deviations.csv", 'w')
437
+ @logfiles[:best] = CSV.open("#{@id}_best.csv", 'w')
438
+ @logfiles[:score] = CSV.open("#{@id}_score.csv", 'w')
439
+ @logfiles[:params] = CSV.open("#{@id}_params.csv", 'w')
438
440
  end
439
441
  end
440
442
 
@@ -466,13 +468,14 @@ module Biopsy
466
468
  end
467
469
 
468
470
  def random_start_point
469
- Hash[@ranges.map { |p, r| [p, r.sample] }]
471
+ Hash[@ranges.map { |p, r| [p, r.sample] }]
470
472
  end
471
473
 
472
474
  def write_data
473
475
  require 'csv'
474
- now = Time.now.to_i
475
- CSV.open("../#{now}_scores.csv", "w") do |c|
476
+ pathmod = Settings.instance.no_tempdirs ? '' : '../'
477
+ path = File.expand_path("#{pathmod}#{@id}_scores.csv")
478
+ CSV.open(path, "w") do |c|
476
479
  c << %w(iteration thread score best)
477
480
  @threads.each_with_index do |t, t_idx|
478
481
  sh = t.score_history
@@ -482,8 +485,7 @@ module Biopsy
482
485
  end
483
486
  end
484
487
  end
485
- path = File.expand_path("../#{now}_scores.csv")
486
- puts "wrote TabuSearch run data to #{path}"
488
+ # puts "wrote TabuSearch run data to #{path}"
487
489
  end
488
490
 
489
491
  end # TabuSearch
@@ -30,6 +30,7 @@ module Biopsy
30
30
  attr_accessor :sweep_cutoff
31
31
  attr_accessor :keep_intermediates
32
32
  attr_accessor :gzip_intermediates
33
+ attr_accessor :no_tempdirs
33
34
 
34
35
  def initialize
35
36
  self.set_defaults
@@ -45,6 +46,7 @@ module Biopsy
45
46
  @sweep_cutoff = 100
46
47
  @keep_intermediates = false
47
48
  @gzip_intermediates = false
49
+ @no_tempdirs = false
48
50
  end
49
51
 
50
52
  # Loads settings from a YAML config file. If no file is
@@ -5,8 +5,8 @@ module Biopsy
5
5
 
6
6
  module VERSION
7
7
  MAJOR = 0
8
- MINOR = 2
9
- PATCH = 1
8
+ MINOR = 3
9
+ PATCH = 0
10
10
 
11
11
  STRING = [MAJOR, MINOR, PATCH].compact.join('.')
12
12
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biopsy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Richard Smith-Unna
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2015-06-02 00:00:00.000000000 Z
13
+ date: 2015-11-01 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rake
@@ -206,7 +206,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
206
206
  version: '0'
207
207
  requirements: []
208
208
  rubyforge_project:
209
- rubygems_version: 2.2.2
209
+ rubygems_version: 2.4.6
210
210
  signing_key:
211
211
  specification_version: 4
212
212
  summary: framework for optimising any computational pipeline or program