RubyGems - lab_tech - Versions diffs - 0.1.0 - Mend

lab_tech 0.1.0

Files changed (65) hide show

checksums.yaml +7 -0
data/MIT-LICENSE +20 -0
data/README.md +323 -0
data/Rakefile +30 -0
data/app/models/lab_tech/application_record.rb +5 -0
data/app/models/lab_tech/default_cleaner.rb +87 -0
data/app/models/lab_tech/experiment.rb +190 -0
data/app/models/lab_tech/observation.rb +40 -0
data/app/models/lab_tech/percentile.rb +41 -0
data/app/models/lab_tech/result.rb +130 -0
data/app/models/lab_tech/speedup.rb +65 -0
data/app/models/lab_tech/summary.rb +183 -0
data/config/routes.rb +2 -0
data/db/migrate/20190815192130_create_experiment_tables.rb +50 -0
data/lib/lab_tech.rb +176 -0
data/lib/lab_tech/engine.rb +6 -0
data/lib/lab_tech/version.rb +3 -0
data/lib/tasks/lab_tech_tasks.rake +4 -0
data/spec/dummy/Rakefile +6 -0
data/spec/dummy/app/assets/config/manifest.js +1 -0
data/spec/dummy/app/assets/javascripts/application.js +14 -0
data/spec/dummy/app/assets/stylesheets/application.css +15 -0
data/spec/dummy/app/controllers/application_controller.rb +2 -0
data/spec/dummy/app/jobs/application_job.rb +2 -0
data/spec/dummy/app/models/application_record.rb +3 -0
data/spec/dummy/bin/bundle +3 -0
data/spec/dummy/bin/rails +4 -0
data/spec/dummy/bin/rake +4 -0
data/spec/dummy/bin/setup +33 -0
data/spec/dummy/bin/update +28 -0
data/spec/dummy/config.ru +5 -0
data/spec/dummy/config/application.rb +35 -0
data/spec/dummy/config/boot.rb +5 -0
data/spec/dummy/config/database.yml +25 -0
data/spec/dummy/config/environment.rb +5 -0
data/spec/dummy/config/environments/development.rb +46 -0
data/spec/dummy/config/environments/production.rb +71 -0
data/spec/dummy/config/environments/test.rb +36 -0
data/spec/dummy/config/initializers/application_controller_renderer.rb +8 -0
data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
data/spec/dummy/config/initializers/cors.rb +16 -0
data/spec/dummy/config/initializers/filter_parameter_logging.rb +4 -0
data/spec/dummy/config/initializers/inflections.rb +16 -0
data/spec/dummy/config/initializers/mime_types.rb +4 -0
data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
data/spec/dummy/config/locales/en.yml +33 -0
data/spec/dummy/config/puma.rb +34 -0
data/spec/dummy/config/routes.rb +3 -0
data/spec/dummy/config/spring.rb +6 -0
data/spec/dummy/db/schema.rb +52 -0
data/spec/dummy/db/test.sqlite3 +0 -0
data/spec/dummy/log/development.log +0 -0
data/spec/dummy/log/test.log +1519 -0
data/spec/examples.txt +79 -0
data/spec/models/lab_tech/default_cleaner_spec.rb +32 -0
data/spec/models/lab_tech/experiment_spec.rb +110 -0
data/spec/models/lab_tech/percentile_spec.rb +85 -0
data/spec/models/lab_tech/result_spec.rb +198 -0
data/spec/models/lab_tech/speedup_spec.rb +133 -0
data/spec/models/lab_tech/summary_spec.rb +325 -0
data/spec/models/lab_tech_spec.rb +23 -0
data/spec/rails_helper.rb +62 -0
data/spec/spec_helper.rb +98 -0
data/spec/support/misc_helpers.rb +7 -0
metadata +238 -0

data/app/models/lab_tech/experiment.rb ADDED

@@ -0,0 +1,190 @@
+module LabTech
+  class Experiment < ActiveRecord::Base
+    self.table_name = "lab_tech_experiments"
+    include ::Scientist::Experiment
+    has_many :results, class_name: "LabTech::Result", dependent: :destroy
+    has_many :observations, class_name: "LabTech::Observation", through: :results
+    if defined?( TablePrint ) # a Very Handy Gem Indeed: http://tableprintgem.com/
+      tp.set self, *[
+        :id,
+        { :name              => { width: 100 } },
+        { :pct_enabled       => { display_name: "% Enabled" } },
+        { :pct_correct       => { display_name: "% Correct" } },
+        { :equivalent_count  => { display_name: "Equivalent" } },
+        { :timed_out_count   => { display_name: "Timed Out" } },
+        { :other_error_count => { display_name: "Other Errors" } },
+        :total_count
+      ]
+      def pct_enabled
+        format_pct( percent_enabled )
+      end
+      def pct_correct
+        return "N/A" if total_count.zero?
+        format_pct( equivalent_count, total_count )
+      end
+      def total_count
+        equivalent_count + timed_out_count + other_error_count
+      end
+      private def format_pct(x, y=nil)
+        x = 100.0 * x / y if y
+        "%3.1f%%" % x
+      end
+    end
+    ##### CLASS METHODS #####
+    def self.named(experiment_name_or_id)
+      case experiment_name_or_id
+      when String  ; exp = find_or_create_by(name: experiment_name_or_id)
+      when Integer ; exp = find(experiment_name_or_id)
+      end
+      yield exp if block_given?
+      exp
+    rescue ActiveRecord::RecordNotUnique
+      retry
+    end
+    ##### INSTANCE METHODS #####
+    def comparator
+      @_scientist_comparator
+    end
+    # TODO: DRY up the io.puts structure between this and summarize_errors
+    def compare_mismatches(limit: nil, io: $stdout, &block)
+      mismatches = results.mismatched.includes(:observations)
+      return if mismatches.empty?
+      mismatches = mismatches.limit(limit) if limit
+      io.puts
+      io.puts "=" * 100
+      io.puts "Comparing results for #{name}:"
+      io.puts
+      mismatches.each do |result|
+        io.puts
+        io.puts "-" * 100
+        io.puts "Result ##{result.id}"
+        result.compare_observations( io: io, &block )
+        io.puts "-" * 100
+      end
+      io.puts
+      io.puts "=" * 100
+      io.puts
+      nil
+    end
+    def disable
+      update_attribute :percent_enabled, 0
+    end
+    def enabled?
+      n = rand(100)
+      fail "WTF, Ruby?" unless (0..99).cover?(n) # Paranoia? Indirect documentation? YOU DECIDE.
+      n < percent_enabled
+    end
+    def enable(percent_enabled: 100)
+      update_attribute :percent_enabled, percent_enabled
+    end
+    # Oh, this is a fun one: apparently Scientist::Experiment#name is
+    # overriding the ActiveRecord attribute.  Override it back.
+    def name         ; read_attribute  :name        ; end
+    def name=(value) ; write_attribute :name, value ; end
+    def publish(scientist_result)
+      return if Rails.env.test? && !LabTech.publish_results_in_test_mode?
+      LabTech::Result.record_a_science( self, scientist_result )
+    end
+    # I don't encourage the willy-nilly destruction of experimental results...
+    # ...but sometimes you just need to start over.
+    def purge_data
+      delete_and_count = ->(scope) {
+        n0, n1 = 0, 0
+        transaction do
+          n0 = scope.count
+          scope.delete_all
+          n1 = scope.count
+        end
+        n0 - n1
+      }
+      n = delete_and_count.call( LabTech::Observation.where(result_id: self.result_ids) )
+      m = delete_and_count.call( self.results )
+      update_attributes(
+        equivalent_count:  0,
+        timed_out_count:   0,
+        other_error_count: 0,
+      )
+      puts "Deleted #{m} result(s) and #{n} observations"
+    end
+    def run(*)
+      increment_run_count
+      provide_default_cleaner
+      super
+    end
+    # TODO: DRY up the io.puts structure between this and compare_mismatches
+    def summarize_errors(limit: nil, io: $stdout)
+      errors = results.other_error
+      return if errors.empty?
+      errors = errors.limit(limit) if limit
+      io.puts
+      io.puts "=" * 100
+      io.puts "Comparing results for #{name}:"
+      io.puts
+      errors.each do |result|
+        io.puts
+        io.puts "-" * 100
+        io.puts "Result ##{result.id}"
+        result.candidates.each do |observation|
+          puts "  * " + observation.exception_class + ":  " + observation.exception_message
+        end
+        io.puts "-" * 100
+      end
+      io.puts
+      io.puts "=" * 100
+      io.puts
+      nil
+    end
+    def summarize_results
+      puts "", summary, ""
+    end
+    def summary
+      reload
+      LabTech::Summary.new(self)
+    end
+    private
+    def increment_run_count
+      LabTech.run_count[self.name] += 1
+    end
+    def provide_default_cleaner
+      return if cleaner.present?
+      clean { |value| LabTech::DefaultCleaner.call(value) }
+    end
+  end
+end

data/app/models/lab_tech/observation.rb ADDED

@@ -0,0 +1,40 @@
+module LabTech
+  class Observation < ApplicationRecord
+    self.table_name = "lab_tech_observations"
+    belongs_to :result, class_name: "LabTech::Result", foreign_key: :result_id, optional: true
+    serialize :value
+    def raised_error?
+      exception_values = [ exception_class, exception_message, exception_backtrace ]
+      exception_values.any?( &:present? )
+    end
+    def record_a_science(scientist_observation)
+      unless scientist_observation.kind_of?( Scientist::Observation )
+        raise ArgumentError, "expected a Scientist::Observation but got #{scientist_observation.class}"
+      end
+      self.name     = scientist_observation.name
+      self.duration = scientist_observation.duration
+      self.value = scientist_observation.cleaned_value
+      record_errors scientist_observation.exception
+    end
+    def timed_out?
+      exception_class == "Timeout::Error"
+    end
+    private
+    def record_errors(exception)
+      return if exception.nil?
+      self.exception_class     = exception.class
+      self.exception_message   = exception.message
+      self.exception_backtrace = exception.backtrace
+    end
+  end
+end

data/app/models/lab_tech/percentile.rb ADDED

@@ -0,0 +1,41 @@
+module LabTech
+  module Percentile
+    extend self
+    MIN_PERCENTILE = 0
+    MAX_PERCENTILE = 100
+    def call(pct, list)
+      # Make sure this list is actually sorted
+      unless sorted?(list)
+        fail "Sorry, this isn't sorted: #{list.inspect}"
+      end
+      msg = "Please pass an integer between #{MIN_PERCENTILE} and #{MAX_PERCENTILE}, not #{pct.inspect}"
+      raise ArgumentError, msg unless pct.kind_of?(Integer)
+      raise ArgumentError, msg unless (MIN_PERCENTILE..MAX_PERCENTILE).cover?(pct)
+      return list.first if pct == MIN_PERCENTILE # Avoid the need for a bounds check later
+      return list.last  if pct == MAX_PERCENTILE # By definition, I guess
+      i = ( 0.01 * pct * list.length ).ceil - 1 # Don't ask me why this works
+      list[ i ]
+    end
+    private
+    def sorted?(list)
+      ret_val = true
+      list.each_cons(2) do |a,b|
+        if a <= b
+          next
+        else
+          ret_val = false
+          break
+        end
+      end
+      ret_val
+    end
+  end
+end

data/app/models/lab_tech/result.rb ADDED

@@ -0,0 +1,130 @@
+module LabTech
+  class Result < ApplicationRecord
+    self.table_name = "lab_tech_results"
+    belongs_to :experiment, class_name: "LabTech::Experiment"
+    has_many :observations, class_name: "LabTech::Observation", dependent: :destroy
+    has_one :control,     ->() { where("name  = 'control'") }, class_name: "LabTech::Observation"
+    has_many :candidates, ->() { where("name != 'control'") }, class_name: "LabTech::Observation"
+    serialize :context
+    # NOTE: I don't think this accounts for the possibility that both the
+    # control and candidate might raise, and if so, whether raising the same
+    # exception should be considered equivalent.  (Unless I already thought of this?)
+    scope :correct,     -> { where( equivalent: true,  raised_error: false ) }
+    scope :mismatched,  -> { where( equivalent: false, raised_error: false ) }
+    scope :errored,     -> { where( equivalent: false, raised_error: true ) }
+    is_timeout = ->(is_or_is_not) {
+      col      = LabTech::Observation.table_name + ".exception_class"
+      operator = is_or_is_not ? "=" : "!="
+      value    = '"Timeout::Error"'
+      [ col, operator, value ].join(" ")
+    }
+    scope :timed_out,   -> { errored.joins(:candidates).where( is_timeout.(true)  ) }
+    scope :other_error, -> { errored.joins(:candidates).where( is_timeout.(false) ) }
+    after_create :increment_experiment_counters
+    ##### CLASS METHODS #####
+    def self.record_a_science( experiment, scientist_result )
+      self.create!(experiment: experiment) do |result|
+        result.record_a_science scientist_result
+      end
+    end
+    ##### INSTANCE METHODS #####
+    # Having multiple candidates is annoying; I've mistyped this one a lot
+    def candidate
+      candidates.first
+    end
+    DEFAULT_COMPARISON = ->(control, candidate) {
+      [ control, candidate ].map { |obs|
+        "    %20s # => %s" % [ obs.name, obs.value.inspect ]
+      }
+    }
+    def compare_observations(io: $stdout, &block)
+      block ||= DEFAULT_COMPARISON
+      candidates.each do |candidate|
+        io.puts block.( control, candidate )
+      end
+      return nil
+    end
+    def record_a_science(scientist_result)
+      unless scientist_result.kind_of?( Scientist::Result )
+        raise ArgumentError, "expected a Scientist::Result but got #{scientist_result.class}"
+      end
+      self.context = scientist_result.context
+      record_observation scientist_result.control
+      scientist_result.candidates.each do |candidate|
+        record_observation candidate
+      end
+      record_simple_stats scientist_result
+    end
+    def speedup
+      return nil unless candidates.count == 1
+      LabTech::Speedup.new(
+        baseline:   control.duration,
+        comparison: candidate.duration,
+        time:       time_delta,
+        factor:     speedup_factor,
+      )
+    end
+    def timed_out?
+      candidates.any?(&:timed_out?)
+    end
+    private
+    def increment_experiment_counters
+      increment = ->(count) {
+        Experiment.increment_counter count, self.experiment_id
+      }
+      case
+      when equivalent ; increment.( :equivalent_count )
+      when timed_out? ; increment.( :timed_out_count )
+      else            ; increment.( :other_error_count )
+      end
+    end
+    def record_observation(scientist_observation)
+      self.observations.build do |observation|
+        observation.record_a_science scientist_observation
+      end
+    end
+    def record_simple_stats(scientist_result)
+      cont, cands = scientist_result.control, scientist_result.candidates
+      self.equivalent = cands.all? { |cand| cand.equivalent_to?(cont, &experiment.comparator) }
+      raised = ->(scientist_observation) { scientist_observation.exception.present? }
+      self.raised_error = !raised.(cont) && cands.any?(&raised)
+      # Time delta makes no sense if you're running more than one candidate at a time
+      if cands.length == 1
+        self.control_duration   = cont       .duration
+        self.candidate_duration = cands.first.duration
+        x = LabTech::Speedup.new(
+          baseline:   control_duration,
+          comparison: candidate_duration,
+        )
+        self.time_delta     = x.time
+        self.speedup_factor = x.factor
+      end
+    end
+  end
+end

data/app/models/lab_tech/speedup.rb ADDED

@@ -0,0 +1,65 @@
+module LabTech
+  class Speedup
+    attr_reader :baseline, :comparison, :time, :factor
+    def self.compute_time_delta(baseline, comparison)
+      return nil if baseline.nil?
+      return nil if comparison.nil?
+      baseline.to_f - comparison.to_f
+    end
+    def self.compute_factor(baseline, comparison)
+      # Timing data should never be zero
+      return nil if [ baseline, comparison ].any? { |e| e.to_f.zero? }
+      time = compute_time_delta(baseline, comparison)
+      return nil if time.nil?
+      case
+      when time > 0   ; +1 * baseline   / comparison
+      when time.zero? ;  0
+      when time < 0   ; -1 * comparison / baseline
+      end
+    end
+    def initialize(baseline: nil, comparison: nil, time: nil, factor: nil)
+      @baseline   = baseline   &.to_f
+      @comparison = comparison &.to_f
+      @time       = time       &.to_f || compute_time_delta
+      @factor     = factor     &.to_f || compute_factor
+    end
+    include Comparable
+    def <=>(other)
+      return nil unless other.kind_of?(self.class)
+      return nil   if self .factor.nil?
+      return other if other.factor.nil?
+      self.factor <=> other.factor
+    end
+    def valid?
+      return false if time.nil?
+      return false if factor.nil?
+      expected_time_delta = compute_time_delta
+      expected_factor     = compute_factor
+      return false if expected_time_delta && ( time   != expected_time_delta )
+      return false if expected_factor     && ( factor != expected_factor     )
+      true
+    end
+    private
+    def compute_time_delta
+      self.class.compute_time_delta(baseline, comparison)
+    end
+    def compute_factor
+      self.class.compute_factor(baseline, comparison)
+    end
+  end
+end