RubyGems - lab_tech - Versions diffs - 0.1.0 - Mend

lab_tech 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

checksums.yaml +7 -0
data/MIT-LICENSE +20 -0
data/README.md +323 -0
data/Rakefile +30 -0
data/app/models/lab_tech/application_record.rb +5 -0
data/app/models/lab_tech/default_cleaner.rb +87 -0
data/app/models/lab_tech/experiment.rb +190 -0
data/app/models/lab_tech/observation.rb +40 -0
data/app/models/lab_tech/percentile.rb +41 -0
data/app/models/lab_tech/result.rb +130 -0
data/app/models/lab_tech/speedup.rb +65 -0
data/app/models/lab_tech/summary.rb +183 -0
data/config/routes.rb +2 -0
data/db/migrate/20190815192130_create_experiment_tables.rb +50 -0
data/lib/lab_tech.rb +176 -0
data/lib/lab_tech/engine.rb +6 -0
data/lib/lab_tech/version.rb +3 -0
data/lib/tasks/lab_tech_tasks.rake +4 -0
data/spec/dummy/Rakefile +6 -0
data/spec/dummy/app/assets/config/manifest.js +1 -0
data/spec/dummy/app/assets/javascripts/application.js +14 -0
data/spec/dummy/app/assets/stylesheets/application.css +15 -0
data/spec/dummy/app/controllers/application_controller.rb +2 -0
data/spec/dummy/app/jobs/application_job.rb +2 -0
data/spec/dummy/app/models/application_record.rb +3 -0
data/spec/dummy/bin/bundle +3 -0
data/spec/dummy/bin/rails +4 -0
data/spec/dummy/bin/rake +4 -0
data/spec/dummy/bin/setup +33 -0
data/spec/dummy/bin/update +28 -0
data/spec/dummy/config.ru +5 -0
data/spec/dummy/config/application.rb +35 -0
data/spec/dummy/config/boot.rb +5 -0
data/spec/dummy/config/database.yml +25 -0
data/spec/dummy/config/environment.rb +5 -0
data/spec/dummy/config/environments/development.rb +46 -0
data/spec/dummy/config/environments/production.rb +71 -0
data/spec/dummy/config/environments/test.rb +36 -0
data/spec/dummy/config/initializers/application_controller_renderer.rb +8 -0
data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
data/spec/dummy/config/initializers/cors.rb +16 -0
data/spec/dummy/config/initializers/filter_parameter_logging.rb +4 -0
data/spec/dummy/config/initializers/inflections.rb +16 -0
data/spec/dummy/config/initializers/mime_types.rb +4 -0
data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
data/spec/dummy/config/locales/en.yml +33 -0
data/spec/dummy/config/puma.rb +34 -0
data/spec/dummy/config/routes.rb +3 -0
data/spec/dummy/config/spring.rb +6 -0
data/spec/dummy/db/schema.rb +52 -0
data/spec/dummy/db/test.sqlite3 +0 -0
data/spec/dummy/log/development.log +0 -0
data/spec/dummy/log/test.log +1519 -0
data/spec/examples.txt +79 -0
data/spec/models/lab_tech/default_cleaner_spec.rb +32 -0
data/spec/models/lab_tech/experiment_spec.rb +110 -0
data/spec/models/lab_tech/percentile_spec.rb +85 -0
data/spec/models/lab_tech/result_spec.rb +198 -0
data/spec/models/lab_tech/speedup_spec.rb +133 -0
data/spec/models/lab_tech/summary_spec.rb +325 -0
data/spec/models/lab_tech_spec.rb +23 -0
data/spec/rails_helper.rb +62 -0
data/spec/spec_helper.rb +98 -0
data/spec/support/misc_helpers.rb +7 -0
metadata +238 -0

data/spec/models/lab_tech/speedup_spec.rb ADDED

@@ -0,0 +1,133 @@
+require 'rails_helper'
+RSpec.describe LabTech::Speedup do
+  # Some quick reference calculations.
+  #
+  # baseline | comparison | time | factor | comment
+  # 2.0      | 1.0        | +1.0 | +2.0   |
+  # 2.0      | 1.5        | +0.5 | +1.333 |
+  # 2.0      | 2.0        |  0.0 | +0.0   | zero by definition
+  # 2.0      | 2.5        | -0.5 | -1.25  |
+  # 2.0      | 3.0        | -1.0 | -1.5   |
+  # 2.0      | 3.5        | -1.5 | -1.75  |
+  # 2.0      | 4.0        | -2.0 | -2.0   |
+  specify ".compute_time_delta" do
+    aggregate_failures do
+      expect( described_class.compute_time_delta( 2.0, 1.0 ) ).to be_within( 0.001 ).of( +1.0 )
+      expect( described_class.compute_time_delta( 2.0, 1.5 ) ).to be_within( 0.001 ).of( +0.5 )
+      expect( described_class.compute_time_delta( 2.0, 2.0 ) ).to be_within( 0.001 ).of(  0.0 )
+      expect( described_class.compute_time_delta( 2.0, 2.5 ) ).to be_within( 0.001 ).of( -0.5 )
+      expect( described_class.compute_time_delta( 2.0, 3.0 ) ).to be_within( 0.001 ).of( -1.0 )
+      expect( described_class.compute_time_delta( 2.0, 3.5 ) ).to be_within( 0.001 ).of( -1.5 )
+      expect( described_class.compute_time_delta( 2.0, 4.0 ) ).to be_within( 0.001 ).of( -2.0 )
+    end
+  end
+  specify ".compute_factor" do
+    aggregate_failures do
+      expect( described_class.compute_factor( 2.0, 1.0 ) ).to be_within( 0.001 ).of( +2.0   )
+      expect( described_class.compute_factor( 2.0, 1.5 ) ).to be_within( 0.001 ).of( +1.333 )
+      expect( described_class.compute_factor( 2.0, 2.0 ) ).to be_within( 0.001 ).of(  0.0   )
+      expect( described_class.compute_factor( 2.0, 2.5 ) ).to be_within( 0.001 ).of( -1.25  )
+      expect( described_class.compute_factor( 2.0, 3.0 ) ).to be_within( 0.001 ).of( -1.5   )
+      expect( described_class.compute_factor( 2.0, 3.5 ) ).to be_within( 0.001 ).of( -1.75  )
+      expect( described_class.compute_factor( 2.0, 4.0 ) ).to be_within( 0.001 ).of( -2.0   )
+    end
+  end
+  def new_speedup(baseline = nil, comparison = nil, time = nil, factor = nil)
+    described_class.new( baseline: baseline, comparison: comparison, time: time, factor: factor )
+  end
+  it "acts like a simple model when all attributes are provided" do
+    x = new_speedup( 2.0, 1.0, -1.0, 2.0 )
+    expect( x.baseline   ).to eq( +2.0 )
+    expect( x.comparison ).to eq( +1.0 )
+    expect( x.time       ).to eq( -1.0 )
+    expect( x.factor     ).to eq( +2.0 )
+  end
+  it "cheerfully tolerates missing baseline and comparison" do
+    x = new_speedup( nil, nil, -1.0, 2.0 )
+    expect( x.baseline   ).to be nil
+    expect( x.comparison ).to be nil
+    expect( x.time       ).to eq( -1.0 )
+    expect( x.factor     ).to eq( +2.0 )
+  end
+  it "computes time and factor if they're missing (and it has enough data to do so)" do
+    x = new_speedup( 2.0, 1.0, nil, nil )
+    expect( x.baseline   ).to eq( +2.0 )
+    expect( x.comparison ).to eq( +1.0 )
+    expect( x.time       ).to eq( +1.0 )
+    expect( x.factor     ).to eq( +2.0 )
+  end
+  it "doesn't compute time and factor if baseline is missing" do
+    x = new_speedup( 2.0, nil, nil, nil )
+    expect( x.baseline   ).to eq( +2.0 )
+    expect( x.comparison ).to be nil
+    expect( x.time       ).to be nil
+    expect( x.factor     ).to be nil
+  end
+  it "doesn't compute time and factor if comparison is missing" do
+    x = new_speedup( nil, 2.0, nil, nil )
+    expect( x.baseline   ).to be nil
+    expect( x.comparison ).to eq( +2.0 )
+    expect( x.time       ).to be nil
+    expect( x.factor     ).to be nil
+  end
+  it "is Comparable" do
+    x = new_speedup( 2.0, 1.0 )
+    y = new_speedup( 2.0, 2.0 )
+    z = new_speedup( 2.0, 3.0 )
+    expect( [ x, z, y ].sort ).to eq( [ z, y, x ] )
+  end
+  it "is not valid if time is nil" do
+    x = new_speedup( nil, nil, -1.0, 2.0 )
+    allow( x ).to receive( :time ).and_return nil
+    expect( x ).to_not be_valid
+  end
+  it "is not valid if factor is nil" do
+    x = new_speedup( nil, nil, -1.0, 2.0 )
+    allow( x ).to receive( :factor ).and_return nil
+    expect( x ).to_not be_valid
+  end
+  it "is valid if time and factor are present (and can't be disproved)" do
+    x = new_speedup( nil, nil, -1.0, 2.0 )
+    expect( x.time   ).to be_present # precondition check
+    expect( x.factor ).to be_present # precondition check
+    expect( x ).to be_valid
+  end
+  it "is not valid if time doesn't agree with timing data" do
+    x = new_speedup( 2.0, 1.0, nil, nil )
+    allow( x ).to receive( :time ).and_return( 42 )
+    expect( x ).to_not be_valid
+  end
+  it "is not valid if factor doesn't agree with timing data" do
+    x = new_speedup( 2.0, 1.0, nil, nil )
+    allow( x ).to receive( :factor ).and_return( 42 )
+    expect( x ).to_not be_valid
+  end
+end

data/spec/models/lab_tech/summary_spec.rb ADDED

@@ -0,0 +1,325 @@
+require 'rails_helper'
+RSpec.describe LabTech::Summary do
+  let!(:experiment) { LabTech::Experiment.create(name: "wibble", percent_enabled: 100) }
+  let(:summary_text) { experiment.summary.to_s }
+  def record_experiment(cont: "foo", cand: "foo", speedup_factor: nil, baseline: 1.0, comparison: nil)
+    LabTech.publish_results_in_test_mode do
+      LabTech.science "wibble" do |e|
+        e.use { cont.respond_to?(:call) ? cont.call : cont }
+        e.try { cand.respond_to?(:call) ? cand.call : cand }
+      end
+#######################################
+########   #####    ######      #####
+   ##     ##   ##   ##   ##    ##   ##
+   ##    ##     ##  ##    ##  ##     ##
+   ##    ##     ##  ##    ##  ##     ##
+   ##    ##     ##  ##    ##  ##     ##
+   ##     ##   ##   ##   ##    ##   ##
+   ##      #####    ######      #####
+#######################################
+# TODO: use Scientist's fabricate_durations_for_testing_purposes to make
+# the below comment (and code?) unnecessary
+#######################################
+      # Don't bother stubbing Scientist's clock; you'll get the wrong results 50%
+      # of the time because it runs the `try` and `use` blocks in random order,
+      # and then you'll be very very confused.
+      if speedup_factor && comparison.nil?
+        baseline = baseline.to_f
+        comparison = \
+          case
+          when speedup_factor  > 0 ; +1.0 * baseline / speedup_factor
+          when speedup_factor == 0 ; +1.0 * baseline
+          else                     ; -1.0 * baseline * speedup_factor
+          end
+      end
+      if baseline && comparison && speedup_factor.nil?
+        speedup_factor = LabTech::Speedup.compute_factor(baseline, comparison)
+      end
+      if baseline && comparison && speedup_factor
+        result = experiment.results.last
+        result.update_attributes({
+          control_duration:   baseline,
+          candidate_duration: comparison,
+          speedup_factor:     speedup_factor,
+          time_delta:         baseline - comparison,
+        })
+        # Technically, we only needed to update the result... but for consistency, let's update the observations too.
+        result.control          .update_attributes duration: baseline
+        result.candidates.first .update_attributes duration: comparison
+      end
+    end # LabTech.publish_results_in_test_mode do
+  end
+  def wtf
+    puts
+    puts "", "Experiment"   ; tp experiment
+    puts "", "Results"      ; tp experiment.results
+    puts "", "Observations" ; tp experiment.observations
+    puts
+  end
+  context "when there are no results" do
+    before do
+      expect( experiment.results ).to be_empty # precondition check
+    end
+    it "says there are no results" do
+      expect( summary_text ).to match( /No results for experiment/ )
+    end
+  end
+  context "when the only result is a mismatch" do
+    before do
+      record_experiment cont: "foo", cand: "bar"
+    end
+    it "reports the correct counts" do
+      aggregate_failures do
+        expect( summary_text ).to_not include( "0 of 1 (0.00%) correct" )
+        expect( summary_text ).to     include( "1 of 1 (100.00%) mismatched" )
+        expect( summary_text ).to_not include( "0 of 1 (0.00%) timed out" )
+        expect( summary_text ).to_not include( "0 of 1 (0.00%) raised errors" )
+      end
+    end
+  end
+  context "when the only result is an error" do
+    before do
+      record_experiment cont: "foo", cand: ->{ raise "nope" }
+    end
+    it "reports the correct counts" do
+      aggregate_failures do
+        expect( summary_text ).to_not include( "0 of 1 (0.00%) correct" )
+        expect( summary_text ).to_not include( "0 of 1 (0.00%) mismatched" )
+        expect( summary_text ).to_not include( "0 of 1 (0.00%) timed out" )
+        expect( summary_text ).to     include( "1 of 1 (100.00%) raised errors" )
+      end
+    end
+  end
+  context "when the only result is a timeout" do
+    before do
+      record_experiment cont: "foo", cand: ->{ raise Timeout::Error, "too slow" }
+    end
+    it "reports the correct counts" do
+      aggregate_failures do
+        expect( summary_text ).to_not include( "0 of 1 (0.00%) correct" )
+        expect( summary_text ).to_not include( "0 of 1 (0.00%) mismatched" )
+        expect( summary_text ).to     include( "1 of 1 (100.00%) timed out" )
+        expect( summary_text ).to_not include( "0 of 1 (0.00%) raised errors" )
+      end
+    end
+  end
+  context "when there are correct results that somehow lack any timing data" do
+    before do
+      record_experiment
+      experiment.results.update_all time_delta: nil, speedup_factor: nil
+    end
+    it "reports the correct counts" do
+      aggregate_failures do
+        expect( summary_text ).to     include( "1 of 1 (100.00%) correct" )
+        expect( summary_text ).to_not include( "0 of 1 (0.00%) mismatched" )
+        expect( summary_text ).to_not include( "0 of 1 (0.00%) timed out" )
+        expect( summary_text ).to_not include( "0 of 1 (0.00%) raised errors" )
+      end
+    end
+    it "doesn't try to print the big table thingy" do
+      expect( summary_text ).to_not include( "Time deltas/speedups:" )
+    end
+  end
+  describe "when there are correct results that include timing data" do
+    def expect_percentile_line(percentile, *expected_strings)
+      line = summary_text.lines.detect { |e| e =~ /\s#{percentile.to_i}%/ }
+      aggregate_failures do
+        expected_strings.each do |string|
+          expect( line ).to include( string )
+        end
+      end
+    end
+    context "with a speedup factor of 0x (yawn)" do
+      before do
+        record_experiment speedup_factor: 0
+        # Make sure we got the math right there...
+        result = experiment.results.first
+        aggregate_failures do
+          expect( result.control.duration          ).to be_within( 0.001 ).of( 1.0 )
+          expect( result.candidates.first.duration ).to be_within( 0.001 ).of( 1.0 )
+        end
+      end
+      it "reports the correct counts" do
+        aggregate_failures do
+          expect( summary_text ).to     include( "1 of 1 (100.00%) correct" )
+          expect( summary_text ).to_not include( "0 of 1 (0.00%) mismatched" )
+          expect( summary_text ).to_not include( "0 of 1 (0.00%) timed out" )
+          expect( summary_text ).to_not include( "0 of 1 (0.00%) raised errors" )
+        end
+      end
+      it "prints the stats visualization, including the correct speedup factor" do
+        expect_percentile_line( 50, "+0.0x" )
+      end
+    end
+    context "with a speedup factor of 10x (yay!)" do
+      before do
+        record_experiment speedup_factor: 10
+        # Make sure we got the math right there...
+        result = experiment.results.first
+        aggregate_failures do
+          expect( result.control.duration          ).to be_within( 0.001 ).of( 1.0 )
+          expect( result.candidates.first.duration ).to be_within( 0.001 ).of( 0.1 )
+        end
+      end
+      it "prints the stats visualization, including the correct speedup factor" do
+        expect_percentile_line( 50, "+10.0x" )
+      end
+    end
+    context "with a speedup factor of -10x (boo!)" do
+      before do
+        record_experiment speedup_factor: -10
+        # Make sure we got the math right there...
+        result = experiment.results.first
+        aggregate_failures do
+          expect( result.control.duration          ).to be_within( 0.001 ).of(  1.0 )
+          expect( result.candidates.first.duration ).to be_within( 0.001 ).of( 10.0 )
+        end
+      end
+      it "prints the stats visualization, including the correct speedup factor" do
+        expect_percentile_line( 50, "-10.0x" )
+      end
+    end
+    context "with multiple results and different speedups" do
+      before do
+        record_experiment speedup_factor: -10
+        record_experiment speedup_factor:  -2
+        record_experiment speedup_factor:   0
+        record_experiment speedup_factor:   2
+        record_experiment speedup_factor:  10
+      end
+      it "reports the correct counts" do
+        aggregate_failures do
+          expect( summary_text ).to     include( "5 of 5 (100.00%) correct" )
+          expect( summary_text ).to_not include( "0 of 5 (0.00%) mismatched" )
+          expect( summary_text ).to_not include( "0 of 5 (0.00%) timed out" )
+          expect( summary_text ).to_not include( "0 of 5 (0.00%) raised errors" )
+        end
+      end
+      it "reports median time deltas, as well as 5th & 95th percentiles, on their own line" do
+        time_delta_line = summary_text.lines.detect { |e| e =~ /Median time delta/i }
+        expect( time_delta_line ).to be_present
+        expect( time_delta_line ).to include( "-9.000s" ) # 5th percentile
+        expect( time_delta_line ).to include( "+0.000s" ) # Median
+        expect( time_delta_line ).to include( "+0.900s" ) # 95th percentile
+      end
+      it "prints the stats visualization, including the correct speedup factor" do
+        # This is effectively acting as an integration test for the Array#percentile method we've monkeypatched in
+				aggregate_failures do
+					expect_percentile_line(  0, "-10.0x" )
+					expect_percentile_line( 20, "-10.0x" )
+					expect_percentile_line( 25,  "-2.0x" )
+					expect_percentile_line( 40,  "-2.0x" )
+					expect_percentile_line( 45,  "+0.0x" )
+					expect_percentile_line( 60,  "+0.0x" )
+					expect_percentile_line( 65,  "+2.0x" )
+					expect_percentile_line( 80,  "+2.0x" )
+					expect_percentile_line( 85, "+10.0x" )
+					expect_percentile_line(100, "+10.0x" )
+				end
+      end
+    end
+    context "real-world(ish) data that led to a scaling error" do
+      before do
+        record_experiment baseline: 1.7367, speedup_factor: 10.9099
+        record_experiment baseline: 0.0642, speedup_factor: -3.2183
+        record_experiment baseline: 0.0702, speedup_factor: -1.0906
+        record_experiment baseline: 0.0552, speedup_factor:  1.1123
+        record_experiment baseline: 0.0539, speedup_factor:  1.1808
+        record_experiment baseline: 0.0554, speedup_factor: -1.1269
+      end
+      it "renders properly" do
+				aggregate_failures do
+					expect_percentile_line(  0, "-3.2x" )
+					expect_percentile_line( 15, "-3.2x" )
+					expect_percentile_line( 20, "-1.1x" )
+					expect_percentile_line( 30, "-1.1x" )
+					expect_percentile_line( 35, "-1.1x" )
+					expect_percentile_line( 50, "-1.1x" )
+					expect_percentile_line( 55, "+1.1x" )
+					expect_percentile_line( 65, "+1.1x" )
+					expect_percentile_line( 70, "+1.2x" )
+					expect_percentile_line( 80, "+1.2x" )
+					expect_percentile_line( 85, "+10.9x" )
+					expect_percentile_line(100, "+10.9x" )
+				end
+      end
+    end
+    context "real-world(ish) data that led to a scaling error, part 2" do
+			before do
+				record_experiment baseline: 0.0030516        , comparison: 0.00306088
+				record_experiment baseline: 0.000261548      , comparison: 0.00220928
+				record_experiment baseline: 0.000781327      , comparison: 0.00279742
+				record_experiment baseline: 0.00201508       , comparison: 0.002386
+				record_experiment baseline: 0.000593603      , comparison: 0.00275979
+				record_experiment baseline: 0.000259521      , comparison: 0.0021131
+				record_experiment baseline: 0.000673067      , comparison: 0.00250636
+				record_experiment baseline: 0.00229586       , comparison: 0.00285059
+				record_experiment baseline: 0.002911         , comparison: 0.00275513
+				record_experiment baseline: 0.00275274       , comparison: 0.00251802
+				record_experiment baseline: 0.000236285      , comparison: 0.00198174
+				record_experiment baseline: 0.000225291      , comparison: 0.00257419
+				record_experiment baseline: 0.000356831      , comparison: 0.00244557
+				record_experiment baseline: 0.000287118      , comparison: 0.00248476
+				record_experiment baseline: 0.000556486      , comparison: 0.00261352
+				record_experiment baseline: 0.00237066       , comparison: 0.00265087
+				record_experiment baseline: 0.00183386       , comparison: 0.00211302
+				record_experiment baseline: 0.00296087       , comparison: 0.00294441
+				record_experiment baseline: 0.00031988       , comparison: 0.00323599
+			end
+      it "renders properly" do
+				aggregate_failures do
+          expect_percentile_line(  0, "-11.4x" )
+          expect_percentile_line( 50,  "-3.7x" )
+          expect_percentile_line(100,  "+1.1x" )
+				end
+      end
+    end
+  end
+end