RubyGems - biopsy - Versions diffs - 0.2.1 → 0.3.0 - Mend

biopsy 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/lib/biopsy/experiment.rb +34 -18
data/lib/biopsy/optimisers/parameter_sweeper.rb +5 -4
data/lib/biopsy/optimisers/tabu_search.rb +28 -26
data/lib/biopsy/settings.rb +2 -0
data/lib/biopsy/version.rb +2 -2
metadata +3 -3

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 3f45b9377c3de44caf8abf21b07576a6e89e0c03
-  data.tar.gz: 94e4a81fe98380482adc7b290f8067174684850d
+  metadata.gz: 227832bb75ba1c0114e59d8058a55c2ead937105
+  data.tar.gz: a3f5e6b364d5265878ed4a8f4e7f086282c2a156
 SHA512:
-  metadata.gz: 76d90535589d1f09f74bf424d7437a45855b1065b802bc3ba4d562cc30c1c8fd775583cd856e6de02fee919a9b34619875693a96b228ef686c7e658bb9b29ea5
-  data.tar.gz: 0dbc5e5d21297759affc2199ffff2fe90d25c1958a7380d531811bdded9fbe8caf8eb9fc0aed8b62ecb5d52d6f0f2e2292b480ae8a8e84c79f0e457f44f45c3a
+  metadata.gz: 06e5510f5546f565c080ff405f3406e27d8a69976ac3fe80a89814b85a9d24cbe98aa9fca01c0a38dd26d46af6a7889ebc36e1b250939577594ab4226f0180b9
+  data.tar.gz: 326da0583c62503acdd211ece069434ea63cbaafcce2e6fa5caabb4d3c3ff7e9125ada20dbf6c91e0f62c2f1f4f6bc32098b1300ee78b159a0ffc24807daba7d

data/lib/biopsy/experiment.rb CHANGED

@@ -23,7 +23,7 @@ module Biopsy
     # Returns a new Experiment
     def initialize(target, options:{}, threads:4, start:nil, algorithm:nil,
-                   timelimit:nil, verbosity: :quiet)
+                   timelimit:nil, verbosity: :quiet, id:nil)
       @threads = threads
       @start = start
       @algorithm = algorithm
@@ -40,6 +40,7 @@ module Biopsy
       self.select_starting_point
       @scores = {}
       @iteration_count = 0
+      set_id id
     end
     # return the set of parameters to evaluate first
@@ -63,9 +64,9 @@ module Biopsy
       max = Settings.instance.sweep_cutoff
       n = @target.count_parameter_permutations
       if n < max
-        @algorithm = ParameterSweeper.new(@target.parameters)
+        @algorithm = ParameterSweeper.new(@target.parameters, @id)
       else
-        @algorithm = TabuSearch.new(@target.parameters)
+        @algorithm = TabuSearch.new(@target.parameters, @id)
       end
     end
@@ -83,6 +84,7 @@ module Biopsy
       in_progress = true
       @algorithm.setup @start
       @current_params = @start
+      max_scores = @target.count_parameter_permutations
       while in_progress
         run_iteration
         # update the best result
@@ -100,7 +102,7 @@ module Biopsy
            end
         end
         # have we finished?
-        in_progress = !@algorithm.finished?
+        in_progress = !@algorithm.finished? && @scores.size < max_scores
         if in_progress && !(@timelimit.nil?)
           in_progress = (Time.now - start_time < @timelimit)
         end
@@ -117,25 +119,27 @@ module Biopsy
     # encompassing the program, objective(s) and optimiser.
     # Returns the output of the optimiser.
     def run_iteration
-      # create temp dir
-      Dir.chdir(self.create_tempdir) do
+      param_key = @current_params.to_s
+      result = nil
+      # lookup the result if possible
+      if @scores.key? param_key
+        result = @scores[param_key]
+      else
+        # create temp dir
+        curdir = Dir.pwd
+        Dir.chdir(self.create_tempdir) unless Settings.instance.no_tempdirs
         # run the target
         raw_output = @target.run @current_params.merge(@options)
         # evaluate with objectives
-        param_key = @current_params.to_s
-        result = nil
-        if @scores.key? param_key
-          result = @scores[param_key]
-        else
-          result = @objective.run_for_output(raw_output, @threads, nil)
-          @iteration_count += 1
-          self.print_progress(@iteration_count, @current_params, result, @best)
-        end
+        result = @objective.run_for_output(raw_output, @threads, nil)
+        @iteration_count += 1
+        self.print_progress(@iteration_count, @current_params, result, @best)
         @scores[@current_params.to_s] = result
-        # get next steps from optimiser
-        @current_params = @algorithm.run_one_iteration(@current_params, result)
+        self.cleanup
+        Dir.chdir(curdir) unless Settings.instance.no_tempdirs
       end
-      self.cleanup
+      # get next steps from optimiser
+      @current_params = @algorithm.run_one_iteration(@current_params, result)
     end
     def print_progress(iteration, params, score, best)
@@ -148,6 +152,7 @@ module Biopsy
     end
     def cleanup
+      return if Settings.instance.no_tempdirs
       # TODO: make this work
       # remove all but essential files
       essential_files = ""
@@ -183,6 +188,17 @@ module Biopsy
       token
     end
+    # set experiment ID with either user provided value, or date-time
+    # as fallback
+    def set_id id
+      @id = id
+      if @id.nil?
+        t = Time.now
+        parts = %w[y m d H M S Z].map{ |p| t.strftime "%#{p}" }
+        @id = "experiment_#{parts.join('_')}"
+      end
+    end
   end # end of class RunHandler
 end # end of module Biopsy

data/lib/biopsy/optimisers/parameter_sweeper.rb CHANGED

@@ -24,11 +24,12 @@ module Biopsy
   class Combinator
     include Enumerable
-    def initialize parameters
+    def initialize(parameters, id)
       @parameters = parameters
+      @id = id
     end
     def generate_combinations(index, opts, &block)
       if index == @parameters.length
         block.call opts.clone
@@ -80,7 +81,7 @@ module Biopsy
       @current = { :parameters => parameters, :score => score }
       self.update_best?
       return @combinator.next
-    rescue
+    rescue
       @is_finished = true
       return nil
     end

data/lib/biopsy/optimisers/tabu_search.rb CHANGED

@@ -90,7 +90,7 @@ module Biopsy
     def initialize(distributions, max_size, tabu)
       # tabu
-      @tabu = tabu
+      @tabu = tabu
       # neighbourhood
       @max_size = max_size
       @members = []
@@ -108,7 +108,7 @@ module Biopsy
       n = 0
       begin
         if n >= 100
-          # taking too long to generate a neighbour,
+          # taking too long to generate a neighbour,
           # loosen the neighbourhood structure so we explore further
           # debug("loosening distributions")
           @distributions.each do |param, dist|
@@ -162,15 +162,16 @@ module Biopsy
     attr_accessor :max_hood_size, :sd_increment_proportion
     attr_accessor :starting_sd_divisor, :backtrack_cutoff, :jump_cutoff
-    Thread = Struct.new(:best, :tabu, :distributions,
-                        :standard_deviations, :recent_scores,
+    Thread = Struct.new(:best, :tabu, :distributions,
+                        :standard_deviations, :recent_scores,
                         :iterations_since_best, :backtracks,
                         :current, :current_hood, :loaded,
                         :score_history, :best_history)
-    def initialize(parameter_ranges, threads=8, limit=nil)
+    def initialize(parameter_ranges, id, threads=8, limit=nil)
       @ranges = parameter_ranges
+      @id = id
       # solution tracking
       @best = nil
@@ -194,7 +195,7 @@ module Biopsy
       # logging
       @score_history = []
       @best_history = []
-      @log_data = false
+      @log_data = true
       @logfiles = {}
       self.log_setup
@@ -323,7 +324,7 @@ module Biopsy
       mean = @ranges[param].index(value)
       range = @ranges[param]
       sd = self.sd_for_param(param, range)
-      @distributions[param] = Biopsy::Distribution.new(mean,
+      @distributions[param] = Biopsy::Distribution.new(mean,
                                                       range,
                                                       @sd_increment_proportion,
                                                       sd)
@@ -348,14 +349,13 @@ module Biopsy
       end
       if best[:parameters].nil?
         # this should never happen!
-        best = @best
+        best = @best
       end
       best
     end
     def backtrack
       @backtracks += 1.0
-      # debug('backtracked to best')
       @distributions.each_pair { |k, d| d.tighten }
     end
@@ -368,8 +368,8 @@ module Biopsy
     # use the gradient of recent best scores to update the distributions
     def adjust_distributions_using_gradient
       return if @recent_scores.length < 3
-      vx = (1..@recent_scores.length).to_a.to_scale
-      vy = @recent_scores.reverse.to_scale
+      vx = (1..@recent_scores.length).to_a.to_numeric
+      vy = @recent_scores.reverse.to_numeric
       r = Statsample::Regression::Simple.new_from_vectors(vx,vy)
       slope = r.b
       if slope > 0
@@ -398,15 +398,17 @@ module Biopsy
       end
     end
-    # check termination conditions
+    # check termination conditions
     # and return true if met
     def finished?
-      return false unless @threads.all? { |t| t.recent_scores.size == @jump_cutoff }
+      return false unless @threads.all? do |t|
+        t.recent_scores.size == @jump_cutoff
+      end
       probabilities = self.recent_scores_combination_test
       n_significant = 0
-      probabilities.each do |mann_u, levene|
+      probabilities.each do |mann_u, levene|
         if mann_u <= @adjusted_alpha && levene <= @convergence_alpha
-          n_significant += 1
+          n_significant += 1
         end
       end
       finish = n_significant >= probabilities.size * 0.5
@@ -415,8 +417,8 @@ module Biopsy
     # returns a matrix of correlation probabilities for recent
     # scores between all threads
     def recent_scores_combination_test
-      combinations =
-      @threads.map{ |t| t.recent_scores.to_scale }.combination(2).to_a
+      combinations =
+      @threads.map{ |t| t.recent_scores.to_numeric }.combination(2).to_a
       combinations.map do |a, b|
         [Statsample::Test.u_mannwhitney(a, b).probability_exact,
          Statsample::Test::Levene.new([a,b]).probability]
@@ -431,10 +433,10 @@ module Biopsy
     def log_setup
       if @log_data
         require 'csv'
-        @logfiles[:standard_deviations] = CSV.open('standard_deviations.csv', 'w')
-        @logfiles[:best] = CSV.open('best.csv', 'w')
-        @logfiles[:score] = CSV.open('score.csv', 'w')
-        @logfiles[:params] = CSV.open('params.csv', 'w')
+        @logfiles[:standard_deviations] = CSV.open("#{@id}_standard_deviations.csv", 'w')
+        @logfiles[:best] = CSV.open("#{@id}_best.csv", 'w')
+        @logfiles[:score] = CSV.open("#{@id}_score.csv", 'w')
+        @logfiles[:params] = CSV.open("#{@id}_params.csv", 'w')
       end
     end
@@ -466,13 +468,14 @@ module Biopsy
     end
     def random_start_point
-      Hash[@ranges.map { |p, r| [p, r.sample] }]
+      Hash[@ranges.map { |p, r| [p, r.sample] }]
     end
     def write_data
       require 'csv'
-      now = Time.now.to_i
-      CSV.open("../#{now}_scores.csv", "w") do |c|
+      pathmod = Settings.instance.no_tempdirs ? '' : '../'
+      path = File.expand_path("#{pathmod}#{@id}_scores.csv")
+      CSV.open(path, "w") do |c|
         c << %w(iteration thread score best)
         @threads.each_with_index do |t, t_idx|
           sh = t.score_history
@@ -482,8 +485,7 @@ module Biopsy
           end
         end
       end
-      path = File.expand_path("../#{now}_scores.csv")
-      puts "wrote TabuSearch run data to #{path}"
+      # puts "wrote TabuSearch run data to #{path}"
     end
   end # TabuSearch

data/lib/biopsy/settings.rb CHANGED

@@ -30,6 +30,7 @@ module Biopsy
     attr_accessor :sweep_cutoff
     attr_accessor :keep_intermediates
     attr_accessor :gzip_intermediates
+    attr_accessor :no_tempdirs
     def initialize
       self.set_defaults
@@ -45,6 +46,7 @@ module Biopsy
       @sweep_cutoff = 100
       @keep_intermediates = false
       @gzip_intermediates = false
+      @no_tempdirs = false
     end
     # Loads settings from a YAML config file. If no file is

data/lib/biopsy/version.rb CHANGED

@@ -5,8 +5,8 @@ module Biopsy
   module VERSION
     MAJOR = 0
-    MINOR = 2
-    PATCH = 1
+    MINOR = 3
+    PATCH = 0
     STRING = [MAJOR, MINOR, PATCH].compact.join('.')
   end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: biopsy
 version: !ruby/object:Gem::Version
-  version: 0.2.1
+  version: 0.3.0
 platform: ruby
 authors:
 - Richard Smith-Unna
@@ -10,7 +10,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-06-02 00:00:00.000000000 Z
+date: 2015-11-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -206,7 +206,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.2.2
+rubygems_version: 2.4.6
 signing_key:
 specification_version: 4
 summary: framework for optimising any computational pipeline or program