RubyGems - kalibera - Versions diffs - 0.1 - Mend

kalibera 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 7716023defd20bb3f94d5c7f2f26959dc92ec502
+  data.tar.gz: 86a9b2b33cf855415087f6a0b9c807442447a205
+SHA512:
+  metadata.gz: cc782c6b650f3aaa9ea72752fa7cc1cf0ad3d9181f93733568716cd380400564311f231f1f63e6ac3e7093f36358b048d55c1929f399b3bb72f7f38323712dd2
+  data.tar.gz: 9c122a46376f5af2d83d4ec97de5a4388c1713ed89608bc5359fbe19b5f6caf670ea84872e6e741d9ac70fc2ed40a5f48973e427369aef9ff56c18b4d2edbffd

data/Gemfile ADDED

@@ -0,0 +1,3 @@
+source 'https://rubygems.org'
+gem 'rbzip2', '~> 0.2.0'
+gem 'memoist', '~> 0.11.0'

data/Gemfile.lock ADDED

@@ -0,0 +1,12 @@
+GEM
+  remote: https://rubygems.org/
+  specs:
+    memoist (0.11.0)
+    rbzip2 (0.2.0)
+PLATFORMS
+  ruby
+DEPENDENCIES
+  memoist (~> 0.11.0)
+  rbzip2 (~> 0.2.0)

data/LICENSE.txt ADDED

@@ -0,0 +1,22 @@
+Copyright (C) King's College London, created by Edd Barrett and Carl
+Friedrich Bolz
+Ruby transliteration (C) Chris Seaton 2014
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

data/Rakefile ADDED

@@ -0,0 +1,9 @@
+require "bundler/gem_tasks"
+require "rake/testtask"
+Rake::TestTask.new do |t|
+  t.libs << 'test'
+end
+desc "Run tests"
+task :default => :test

data/kalibera.gemspec ADDED

@@ -0,0 +1,24 @@
+require 'json'
+metadata = JSON.parse(IO.read(File.expand_path('../../shared_metadata.json', __FILE__)))
+Gem::Specification.new do |spec|
+  spec.name          = "kalibera"
+  spec.version       = metadata["metadata"]["version"]
+  spec.authors       = ["Edd Barrett", "Carl Friedrich Bolz", "Chris Seaton"]
+  spec.email         = ["chris@chrisseaton.com"]
+  spec.summary       = metadata["metadata"]["short_descr"]
+  spec.description   = metadata["metadata"]["long_descr"]
+  spec.homepage      = metadata["metadata"]["url"]
+  spec.license       = "MIT"
+  spec.files         = `git ls-files -z`.split("\x0")
+  spec.test_files    = spec.files.grep(%r{^test/})
+  spec.require_paths = ["lib"]
+  spec.add_development_dependency "bundler", "~> 1.7"
+  spec.add_development_dependency "rake", "~> 10.0"
+  spec.add_dependency "rbzip2", "~> 0.2.0"
+  spec.add_dependency "memoist", "~> 0.11.0"
+end

data/lib/kalibera.rb ADDED

	@@ -0,0 +1 @@
1	+ require "kalibera/data"

data/lib/kalibera/data.rb ADDED

@@ -0,0 +1,369 @@
+require "stringio"
+require "base64"
+require "rbzip2"
+require "bigdecimal"
+require "memoist"
+module Kalibera
+  CONSTANTS = RBzip2::Decompressor.new(StringIO.new(Base64.decode64("""\
+  QlpoOTFBWSZTWbTS4VUAC9bYAEAQAAF/4GAOGZ3e40HH2YJERUKomGbCNMAAtMBaAkCOP9U0/R+q
+  qNCqfjAqVGOY3+qk96qmmIp+CCVNDD/1VGjfqkBJpIElG6uN92vE/PP+5IxhMIIgAbOxEMKLMVSq
+  VWtZmZaEklAAAttoAAAAAAAAAAAAEklAAEklABttkksklkkknVu2dX1vW9yWrkuXJJJJJJJJJJKK
+  JWsS5dq7k3RRRbu2222227oAAFQqFCAjkB0w7eMpKWy3bVI42225QlbQAAAAAAlbQbbUqkolE7JZ
+  jjjmS5LluZkuZmZmZmZmZmZvZhOYnktttsskiaSSToAAA5znOZmMzGTSSJJJ1JO+7gLbR067u48V
+  bZIAABJCSSjElG436ySek9f1/X3vZ72+7wPk5bbJG0kYTYA2+fHiolu7u8S62JEpjmZ3YS40AEt3
+  mb8lzXwEpar+9P3s9vAq1o23mt3oaJmZvJAPQu6AlL3s9ojg6rRBmOQaKRb+zbOaL0FMxZKBTm9O
+  vLmUJuqwVc+KevulFMM/JOzWTMN5Aa7cO5hmZuioHbboGzxzZLFATHYvXg5SUqCWxmre6As43wzV
+  30514PDn2m7ema93M9u9199F6QCSfsxJ7wA5R3bTsglUQaJLy4wKYu895byRoTJb7vXsGwZzhPZ0
+  xOdgtMncj5PGCPeKFPCgenS83zcvnQwGfm3prLnb6bcxKJABZeOrvfNAUNTTobmLQ+fOHAjxo2WE
+  JaevegHIDVvW+kRAD2TpoeJWFQDKtubzWOr6EFU3xs3rojhW98aghZQmIWXe9sUXKEXKvWvk6bTH
+  GURStAQ1M7OzF07ui6Q2DYl1NojMzlvrwcO6+uY7V3ZFerzz3sIqJsGzcJN2EAAew/vvqqvvvvi7
+  xXjhGH3nGNKv2u+Bt8k4USU+SaoLuU6HNmQoYyFTN3huLP721dwHIqQzrqVhjz2+UQw0ezok7gQl
+  wyZ2YM0hgPVaZaOLK9q3TtGiaO3Br4xGyy7HfAWw72nvLmaGPeSz2c/FkuN7Qj1guqtgUU1NHry2
+  5h7KvWgs2jglhCZpYpa8qbl3PrrEDL1Jg/1VrZ8IthQhNKLznYMPozi9arWla2BODhV6yuIKmzsa
+  zhOb3kxyjcD0ExuXvdys3WRxxYEQszLy8jxqTPZB7UQJ2xbk3YGV2QcdPN2HYuoVkWxUhtErw9u3
+  0mdw5HiO0WVtRUCEyxEAOdIHV1sWmbReT4iMTzRsB7Q36e72rpwePnrPggpSxjlZ9Lm8YJrgXDzJ
+  /30MSDPwzV8s+g4Rcpy3a8c7Y1jxgHJQs8+MyLsudmYSFySWm3OrSn5p3qb++m8fvHUGfCfNCbol
+  RSZ6wp+ZM14k8S+SKwqES7PQ72DFK4PTiMCA6LbvuSSSJ1R3iJAF10sQYlhpp2GSzWBw3ty+HjLj
+  HCDTxku3yHPrNvTXekcBSOuzMfOvy3dybchXeLxvXN3vKTN/BdbwUlqXY+g4sWMoHTQT61MeXIMf
+  PhgYq8KhOEbqeMqoyhWQp03eOOpV/LVvXl2X71ztaX7tMZJ5gBCshDGQCskDme9zu9b1dcgB1khU
+  mmEk2yTySG2QPmEJp3m/jM+93nYSoe7YEPmExITTpITut87rehm+UgF13IG0nUk52+95Z+9wg49Y
+  SUraiKIYo3UOvdtq6bVDDmbPTmhtyLfS1LCPXQmYLD7c9lu5ZfdaWSGn1m82kCd4xhYOuVUH33zB
+  Kh5IsOsxNe+yB7XNd77Xc05kD5h1Jpk0hnLJpnrzXe9xdXpOJfrA4kzdhvLB1tzn3e6OqyaeM8m9
+  2HWH2m59jnvrO2w+9TTFDibQffe7880+cfu08zjLw/Mbx4faLWcMbzQ8vDWj6uDmr75CuG9hzAOl
+  1Wk0mWKqglrLcmu/uw/IVcPCtGw3hY3TgkN0PqENShQhpj5ZN7dzethJScvIGNEPPE7lcJTwYM8t
+  7zB5zMNkYZmHc1cbY1RirWMmuHzEFi7P04mPluFvMqnoirRUEEB3taRpio2svFVXtMcub+PuTmqL
+  vlSOqbSO996bd/e0AoLJ1hV97AmbtfxIsAkWBILJAUgAoEiySCwgsICwDqAZlkiyEWBFhBSCwqSc
+  9zeoAskUBYRYRYb3rmeHWXZOMgsFgLAWBq2RQWRcSVIpFikWCwWF3mAoKKCgpr9TE21BYqy8zDbW
+  LFFiixRRXLpcoooovmqiirm/rmlRVWl57xynNqqo8tVVVy1VVyrRVVVFb39rSovrvKitpVR/Woo5
+  So32dxukUUUz2YY1FFLbF1u91TbbZUWNsqVrM3336515OpjWP1DMaFZ5ufsDOXTHLBSsrN85f1/G
+  Z97s999hpF0nwOBV8gYfoGPnQqiKzPLcnpOky/b652qCQ9ti4PbvcjqmneMEtaV17cnt6NKZYybS
+  TwHdBK34b2wy3CJ1qqi8qpigCKsVSvFUFMUMtVTFPjBoq+K5AGXzuffdyXtm0+ebv5HdMVnN0mMe
+  ++473+/HTWnzd0OuWnHE20ZtC7oaZvN/jvn9efa9UHKC++prtL9ZWDu7c73vvaOTiKbTmUPJ7Pv2
+  jEFDnO6Xe/deOG0+v7Cn6z8zO2VH9TMse/fvt67+w77n7QaQffsxOJfqGteOa/HdYe1Tm6LFOpUz
+  VMR/aPvadm0zXsnMppiffYG27ZXfslV2hAJrPGmKsVfe9fSO8vVnru7tbzSU1a9cGv0qsQEdhHK7
+  rJBfbPMSKZc3wmij3ULrhE9nIwoDMp4WAK2GkIKIqrHAK0Bjvo7sA2VZ941ggrwIsfGLZTHvGSZR
+  8UGKDKFAAcC8U45fTlKQKM8fnx+IAr3rmwtVbfFhj4VZqQviRXhavLu9zOQWISS0w9PxFYCEfK1l
+  9GK0GhrKxr5CwCveB4XDEsPYWKwfHDgrBnZT4XW5dlE2tW7FAR8RGW0XMy1MQoDwyQ+Hnmvet5I/
+  HrTVYQJbJ1e3y6B7LoCh5qyXWO03X5WbxWT0UvY55cyRbhmB8ib6lkhRo5USRAoLFA4WELV93ZV/
+  DKh2MIhnIWCPBLEh3FUTBSxJC7h4Z15qTFPTRmpe1Ldj1rlkVnAKHDySryior3OheiTPKZY2GaQ6
+  N2YyvJh9wuO75VOarCWLEUdLavAs2RShYOntLrMVabUAyDnTJIQ4deJa92pAWd6KBz+F3JFOFCQt
+  NLhVQA=="""))).read.split().map { |x| Float(x) }
+  # Look up the 95% quantile from constant table.
+  def self.student_t_quantile95(ndeg)
+    index = ndeg - 1
+    if index >= CONSTANTS.size
+      index = -1 # the quantile converges, we just take the last value
+    end
+    CONSTANTS[index]
+  end
+  ConfRange = Struct.new(:lower, :median, :upper) do
+    def error
+      Kalibera.mean([upper - median, median - lower])
+    end
+  end
+  # Returns a tuples (lower, median, upper), where:
+  # lower: lower bound of 95% confidence interval
+  # median: the median value of the data
+  # upper: upper bound of 95% confidence interval
+  #
+  # Arguments:
+  # means -- the list of means (need not be sorted).
+  def self.confidence_slice(means, confidence="0.95")
+    means = means.sort
+    # There may be >1 median indicies, i.e. data is even-sized.
+    lower, middle_indicies, upper = confidence_slice_indicies(means.size, confidence)
+    median = mean(middle_indicies.map { |i| means[i] })
+    ConfRange.new(means[lower], median, means[upper - 1]) # upper is *exclusive*
+  end
+  # Returns a triple (lower, mean_indicies, upper) so that l[lower:upper]
+  # gives confidence_level of all samples. Mean_indicies is a tuple of one or
+  # two indicies that correspond to the mean position
+  #
+  # Keyword arguments:
+  # confidence_level -- desired level of confidence as a Decimal instance.
+  def self.confidence_slice_indicies(length, confidence_level=BigDecimal.new('0.95'))
+    raise unless !confidence_level.instance_of?(Float)
+    confidence_level = BigDecimal.new(confidence_level)
+    raise unless confidence_level.instance_of?(BigDecimal)
+    exclude = (1 - confidence_level) / 2
+    if length % 2 == 0
+      mean_indicies = [length / 2 - 1, length / 2]  # TRANSLITERATION: was //
+    else
+      mean_indicies = [length / 2]  # TRANSLITERATION: was //
+    end
+    lower_index = Integer(
+        (exclude * length).round(0, BigDecimal::ROUND_DOWN) # TRANSLITERATION: was quantize 1.
+    )
+    upper_index = Integer(
+        ((1 - exclude) * length).round(0, BigDecimal::ROUND_UP) # TRANSLITERATION: was quantize 1.
+    )
+    [lower_index, mean_indicies, upper_index]
+  end
+  def self.mean(l)
+    l.inject(0, :+) / Float(l.size)
+  end
+  def self.geomean(l)
+    l.inject(1, :*) ** (1.0 / Float(l.size))
+  end
+  class Data
+    extend Memoist
+    # Instances of this class store measurements (corresponding to
+    # the Y_... in the papers).
+    #
+    # Arguments:
+    # data -- Dict mapping tuples of all but the last index to lists of values.
+    # reps -- List of reps for each level, high to low.
+    def initialize(data, reps)
+      @data = data
+      @reps = reps
+      # check that all data is there
+      array = reps.map { |i| (0...i).to_a }
+      array[0].product(*array.drop(1)).each do |index|
+        self[*index] # does not crash
+      end
+    end
+    def [](*indicies)
+      raise unless indicies.size == @reps.size
+      x = @data[indicies[0...indicies.size-1]]
+      raise unless !x.nil?
+      x[indicies[-1]]
+    end
+    # Computes a list of all possible data indcies gievn that
+    # start <= index <= stop are fixed.
+    def index_iterator(start=0, stop=nil)
+      if stop.nil?
+        stop = n
+      end
+      maximum_indicies = @reps[start...stop]
+      remaining_indicies = maximum_indicies.map { |maximum| (0...maximum).to_a }
+      return [[]] if remaining_indicies.empty?
+      remaining_indicies[0].product(*remaining_indicies.drop(1))
+    end
+    # The number of levels in the experiment.
+    def n
+      @reps.size
+    end
+    # The number of repetitions for level i.
+    #
+    # Arguments:
+    # i -- mathematical index.
+    def r(i)
+      raise unless 1 <= i
+      raise unless i <= n
+      index = n - i
+      @reps[index]
+    end
+    # Compute the mean across a number of values.
+    #
+    # Keyword arguments:
+    # indicies -- tuple of fixed indicies over which to compute the mean,
+    # given from left to right. The remaining indicies are variable.
+    def mean(indicies=[])
+      remaining_indicies_cross_product =
+          index_iterator(start=indicies.size)
+      alldata = remaining_indicies_cross_product.map { |remaining| self[*(indicies + remaining)] }
+      Kalibera.mean(alldata)
+    end
+    memoize :mean
+    # Biased estimator S_i^2.
+    #
+    # Arguments:
+    # i -- the mathematical index of the level from which to compute S_i^2
+    def Si2(i)
+      raise unless 1 <= i
+      raise unless i <= n
+      # @reps is indexed from the left to right
+      index = n - i
+      factor = 1.0
+      # We compute this iteratively leveraging the fact that
+      # 1 / (a * b) = (1 / a) / b
+      for rep in @reps[0, index]
+        factor /= rep
+      end
+      # Then at this point we have:
+      # factor * (1 / (r_i - 1)) = factor / (r_i - 1)
+      factor /=  @reps[index] - 1
+      # Second line of the above definition, the lines are multiplied.
+      indicies = index_iterator(0, index+1)
+      sum = 0.0
+      for index in indicies
+        a = mean(index)
+        b = mean(index[0,index.size-1])
+        sum += (a - b) ** 2
+      end
+      factor * sum
+    end
+    memoize :Si2
+    # Compute the unbiased T_i^2 variance estimator.
+    #
+    # Arguments:
+    # i -- the mathematical index from which to compute T_i^2.
+    def Ti2(i)
+      # This is the broken implementation of T_i^2 shown in the pubslished
+      # version of "Rigorous benchmarking in reasonable time". Tomas has
+      # since fixed this in local versions of the paper.
+      #@memoize
+      #def broken_Ti2(self, i)
+      #  """ Compute the unbiased T_i^2 variance estimator.
+      #
+      #  Arguments:
+      #  i -- the mathematical index from which to compute T_i^2.
+      #  """
+      #
+      #  raise unless 1 <= i <= n
+      #  if i == 1:
+      #    return self.Si2(1)
+      #  return self.Si2(i) - self.Ti2(i - 1) / self.r(i - 1)
+      # This is the correct definition of T_i^2
+      raise unless 1 <= i
+      raise unless i <= n
+      if i == 1
+        return Si2(1)
+      end
+      Si2(i) - Si2(i - 1) / r(i - 1)
+    end
+    memoize :Ti2
+    # Computes the optimal number of repetitions for a given level.
+    #
+    # Note that the resulting number of reps is not rounded.
+    #
+    # Arguments:
+    # i -- the mathematical level of which to compute optimal reps.
+    # costs -- A list of costs for each level, *high* to *low*.
+    def optimalreps(i, costs)
+      # NOTE: Does not round
+      costs = costs.map { |x| Float(x) }
+      raise unless 1 <= i
+      raise unless i < n
+      index = n - i
+      return (costs[index - 1] / costs[index] *
+          Ti2(i) / Ti2(i + 1)) ** 0.5
+    end
+    memoize :optimalreps
+    # Compute the 95% confidence interval.
+    def confidence95
+      degfreedom = @reps[0] - 1
+      student_t_quantile95(degfreedom) *
+        (Si2(n) / @reps[0]) ** 0.5
+    end
+    # Compute a list of simulated means from bootstrap resampling.
+    #
+    # Note that, resampling occurs with replacement.
+    #
+    # Keyword arguments:
+    # iterations -- Number of resamples (and thus means) generated.
+    def bootstrap_means(iterations=1000)
+      means = []
+      for i in 0...iterations
+        values = bootstrap_sample()
+        means.push(Kalibera.mean(values))
+      end
+      means.sort()
+      means
+    end
+    # Compute a confidence interval via bootstrap method.
+    #
+    # Keyword arguments:
+    # iterations -- Number of resamplings to base result upon. Default is 10000.
+    # confidence -- The required confidence. Default is "0.95" (95%).
+    def bootstrap_confidence_interval(iterations=10000, confidence="0.95")
+      means = bootstrap_means(iterations)
+      Kalibera.confidence_slice(means, confidence)
+    end
+    def random_measurement_sample(index=[])
+      results = []
+      if index.size == n
+        results.push self[*index]
+      else
+        indicies = (0...@reps[index.size]).map { |i| rand(@reps[index.size]) }
+        for single_index in indicies
+          newindex = index + [single_index]
+          for value in random_measurement_sample(newindex)
+            results.push value
+          end
+        end
+      end
+      results
+    end
+    def bootstrap_sample
+      random_measurement_sample
+    end
+    def bootstrap_quotient(other, iterations=10000, confidence='0.95')
+      ratios = []
+      for _ in 0...iterations
+        ra = bootstrap_sample()
+        rb = other.bootstrap_sample()
+        mean_ra = Kalibera.mean(ra)
+        mean_rb = Kalibera.mean(rb)
+        if mean_rb == 0 # protect against divide by zero
+          ratios.push(Float::INFINITY)
+        else
+          ratios.push(mean_ra / mean_rb)
+        end
+      end
+      ratios.sort!
+      Kalibera.confidence_slice(ratios, confidence).values
+    end
+  end
+  def self.bootstrap_geomean(l_data_a, l_data_b, iterations=10000, confidence='0.95')
+    raise "lists need to match" unless l_data_a.size == l_data_b.size
+    geomeans = []
+    iterations.times do
+      ratios = []
+      l_data_a.zip(l_data_b).each do |a, b|
+        ra = a.bootstrap_sample
+        rb = b.bootstrap_sample
+        mean_ra = mean(ra)
+        mean_rb = mean(rb)
+        ratios << mean_ra / mean_rb
+      end
+      geomeans << geomean(ratios)
+    end
+    geomeans.sort!
+    confidence_slice(geomeans, confidence)
+  end
+end

data/test/test_data.rb ADDED

@@ -0,0 +1,410 @@
+require "test/unit"
+require "kalibera"
+# We need to match Python's random numbers when testing
+class TestData < Kalibera::Data
+  RAND = [0, 2, 2, 0, 1, 1, 1, 2, 0, 0, 2, 1, 2, 0, 1, 2, 0, 2, 2, 0, 0, 1,
+          2, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 2, 1, 1, 0, 2, 2, 0, 0]
+  def initialize(data, reps)
+    super
+    @rand_counter = 0
+  end
+  def reset_local_rand
+    @rand_counter = 0
+  end
+  def rand(r)
+    raise "mock rand designed for range=3" unless r == 3
+    raise "mock rand out of data" unless @rand_counter < RAND.size
+    n = RAND[@rand_counter]
+    @rand_counter += 1
+    n
+  end
+end
+class TestKaliberaData < Test::Unit::TestCase
+  def test_indicies
+    d = TestData.new({
+      [0, 0] => [1, 2, 3, 4, 5],
+      [0, 1] => [3, 4, 5, 6, 7]
+      }, [1, 2, 5])
+    assert_equal 1, d[0, 0, 0]
+    assert_equal 5, d[0, 0, 4]
+    assert_equal 5, d[0, 1, 2]
+  end
+  def test_rep_levels
+    d = TestData.new({
+      [0, 0] => [1, 2, 3, 4, 5],
+      [0, 1] => [3, 4, 5, 6, 7]
+      }, [1, 2, 5])
+    assert_equal 5, d.r(1) # lowest level, i.e. arity of the lists in the map
+    assert_equal 2, d.r(2)
+    assert_equal 1, d.r(3)
+    # indexs are one based, so 0 or less is invalid
+    assert_raise RuntimeError do
+      d.r(0)
+    end
+    assert_raise RuntimeError do
+      d.r(-1337)
+    end
+    # Since we have 3 levels here, levels 4 and above are bogus
+    assert_raise RuntimeError do
+      d.r(4)
+    end
+    assert_raise RuntimeError do
+      d.r(666)
+    end
+  end
+  def test_index_iter
+    d = TestData.new({
+      [0, 0] => [1, 2, 3, 4, 5],
+      [0, 1] => [3, 4, 5, 6, 7]
+      }, [1, 2, 5])
+    assert_equal [
+        [0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 0, 4],
+        [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3], [0, 1, 4],
+        ], d.index_iterator()
+    assert_equal [
+        [0, 0], [0, 1], [0, 2], [0, 3], [0, 4],
+        [1, 0], [1, 1], [1, 2], [1, 3], [1, 4],
+        ], d.index_iterator(start=1)
+    assert_equal [[0]], d.index_iterator(start=0, stop=1)
+    assert_equal [[0], [1]], d.index_iterator(start=1, stop=2)
+  end
+  def test_index_means
+    d = TestData.new({
+      [0, 0] => [0, 2]
+    }, [1, 1, 2])
+    assert_equal 1, d.mean([])
+    assert_equal 1, d.mean([0, 0])
+    assert_equal d[0, 0, 0], d.mean([0, 0, 0])
+    assert_equal d[0, 0, 1], d.mean([0, 0, 1])
+  end
+  def test_index_means2
+    # Suppose we have three levels, so n = 3.
+    # For the sake of example, level 1 is repetitions, level 2 is executions,
+    # and level 3 is compilations. Now suppose we repeat level 3 twice,
+    # level 2 twice and level 3 five times.
+    #
+    # This would be a valid data set:
+    # Note that the indicies are off-by-one due to python indicies starting
+    # from 0.
+    d = TestData.new({ [0, 0] => [ 3, 4, 4, 1, 2 ], # times for compile 1, execution 1
+           [0, 1] => [ 3, 3, 3, 3, 3 ], # compile 1, execution 2
+           [1, 0] => [ 1, 2, 3, 4, 5 ], # compile 2, execution 1
+           [1, 1] => [ 1, 1, 4, 4, 1 ], # compile 2, execution 2
+           }, [2, 2, 5]) # counts for each level (highest to lowest)
+    # By calling mean with an empty tuple we compute the mean at all levels
+    # i.e. the mean of all times:
+    x = [3, 4, 4, 1, 2, 3, 3, 3, 3, 3, 1, 2, 3, 4, 5, 1, 1, 4, 4, 1]
+    expect = x.inject(0, :+)/Float(x.size)
+    assert_equal d.mean([]), expect
+    # By calling with a singleton tuple we compute the mean for a given
+    #compilation. E.g. compilation 2
+    x = [1, 2, 3, 4, 5, 1, 1, 4, 4, 1]
+    expect = x.inject(0, :+) / Float(x.size)
+    assert_equal d.mean([1]), expect
+    # By calling with a pair we compute the mean for a given compile
+    # and execution combo.
+    # E.g. compile 1, execution 2, which is obviously a mean of 3.
+    assert_equal 3, d.mean([0, 1])
+  end
+  def test_si2
+    d = TestData.new({
+      [0, 0] => [0, 0]
+    }, [1, 1, 2])
+    assert_equal 0, d.Si2(1)
+  end
+  def test_si2_bigger_example
+    # Let's compute S_1^2 for the following data
+    d = TestData.new({
+      [0, 0] => [3,4,3],
+      [0, 1] => [1.2, 3.1, 3],
+      [1, 0] => [0.2, 1, 1.5],
+      [1, 1] => [1, 2, 3]
+    }, [2, 2, 3])
+    # So we have n = 3, r = (2, 2, 3)
+    # By my reckoning we should get something close to 0.72667 (working below)
+    # XXX Explanation from whiteboard need to go here XXX
+    assert_less_equal (d.Si2(1)-0.72667).abs, 0.0001
+  end
+  def test_ti2
+    # To verify this, consider the following data:
+    d = TestData.new({
+      [0, 0] => [3,4,3],
+      [0, 1] => [1.2, 3.1, 3],
+      [1, 0] => [0.2, 1, 1.5],
+      [1, 1] => [1, 2, 3]
+    }, [2, 2, 3])
+    # Let's manually look at S_i^2 where 1 <= i <= n:
+    #si_vec = [ d.Si2(i) for i in range(1, 4) ]
+    #print(si_vec)
+    ti_vec = (1...4).map { |i| d.Ti2(i) }
+    expect = [ 0.7266667, 0.262777778, 0.7747 ]
+    (0...expect.size).each do |i|
+      assert (ti_vec[i] - expect[i]).abs <= 0.0001, "#{} <= 0.0001"
+    end
+  end
+  def test_optimal_reps
+    d = TestData.new({
+      [0, 0] => [3,4,3],
+      [0, 1] => [1.2, 3.1, 3],
+      [1, 0] => [0.2, 1, 1.5],
+      [1, 1] => [1, 2, 3]
+    }, [2, 2, 3])
+    #ti_vec = [ d.Ti2(i) for i in range (1, 4) ]
+    #print(ti_vec)
+    # And suppose the costs (high level to low) are 100, 20 and 3 (seconds)
+    # By my reckoning, the optimal repetition counts should be r_1 = 5, r_2 = 2
+    # XXX show working XXX
+    got = [1,2].map { |i|d.optimalreps(i, [100, 20, 3]) }
+    expect = [4.2937, 1.3023]
+    (0...got.size).each do |i|
+      assert_less_equal (got[i] - expect[i]).abs, 0.001
+    end
+  end
+  def test_worked_example_3_level
+    # three level experiment
+    # This is the worked example from the paper.
+    data = TestData.new({
+      [0, 0] => [9.0, 5.0], [0, 1] => [8.0, 3.0],
+      [1, 0] => [10.0, 6.0], [1, 1] => [7.0, 11.0],
+      [2, 0] => [1.0, 12.0], [2, 1] => [2.0, 4.0],
+    }, [3, 2, 2])
+    correct = {
+        [0, 0] => 7.0,
+        [0, 1] => 5.5,
+        [1, 0] => 8.0,
+        [1, 1] => 9.0,
+        [2, 0] => 6.5,
+        [2, 1] => 3.0,
+    }
+    data.index_iterator(0, 2).each do |index|
+      assert data.mean(index) == correct[index]
+    end
+    assert_equal 6.5, data.mean()
+    assert_equal 16.5, data.Si2(1).round(1)
+    assert_equal 2.6, data.Si2(2).round(1)
+    assert_equal 3.6, data.Si2(3).round(1)
+    assert_equal 16.5, data.Ti2(1).round(1)
+    assert_equal -5.7, data.Ti2(2).round(1)
+    assert_equal 2.3, data.Ti2(3).round(1)
+  end
+  def test_worked_example_2_level
+    data = TestData.new({
+      [0] => [9.0, 5.0, 8.0, 3.0],
+      [1] => [10.0, 6.0, 7.0, 11.0],
+      [2] => [1.0, 12.0, 2.0, 4.0],
+    }, [3, 4])
+    correct = {[0] => 6.3,
+           [1] => 8.5,
+           [2] => 4.8,
+           }
+    data.index_iterator(0, 1).each do |index|
+      assert data.mean(index).round(1) == correct[index]
+    end
+    assert_equal 6.5, data.mean()
+    assert_equal 12.7, data.Si2(1).round(1)
+    assert_equal 3.6, data.Si2(2).round(1)
+    assert_equal 12.7, data.Ti2(1).round(1)
+    assert_equal 0.4, data.Ti2(2).round(1)
+  end
+  def test_bootstrap
+    # XXX needs info on how expected val was computed
+    data = TestData.new({
+        [0] => [ 2.5, 3.1, 2.7 ],
+        [1] => [ 5.1, 1.1, 2.3 ],
+        [2] => [ 4.7, 5.5, 7.1 ],
+        }, [3, 3])
+    data.reset_local_rand
+    expect = 4.8111111111
+    got = data.bootstrap_means(1) # one iteration
+    assert_less_equal (got[0] - expect).abs, 0.0001
+  end
+  def test_confidence_slice_indicies
+    assert_equal [1, [4, 5], 9], Kalibera.confidence_slice_indicies(10, '0.8')
+    assert_equal [1, [5], 10], Kalibera.confidence_slice_indicies(11, '0.8')
+    assert_equal [25, [499, 500], 975], Kalibera.confidence_slice_indicies(1000)
+  end
+  def test_confidence_slice
+    # Suppose we get back the means:
+    means = (0...1000).map { |x| x + 15 } # already sorted
+    # For a data set of size 1000, we expect alpha/2 to be 25
+    # (for a 95% confidence interval)
+    alpha_over_two = means.size * 0.025
+    assert(alpha_over_two) == 25
+    # Therefore we lose 25 items off each end of the means list.
+    # The first 25 indicies are 0, ...0, 24, so lower bound should be index 25.
+    # The last 25 indicies are -1, ...0, -25, so upper bound is index -26
+    # Put differently, the last 25 indicies are 999, ...0, 975
+    lower_index = Integer(alpha_over_two.floor)
+    upper_index = Integer(-alpha_over_two.ceil - 1)
+    lobo, hibo = [means[lower_index], means[upper_index]]
+    # Since the data is the index plus 15, we should get an
+    # interval: [25+15, 974+15]
+    expect = [25+15, 974+15]
+    assert_equal expect, [lobo, hibo]
+    # There is strictly speaking no median of 1000 items.
+    # We take the mean of the two middle items items 500 and 501 at indicies
+    # 499 and 500. Since the data is the index + 15, the middle values are
+    # 514 and 515, the mean of which is 514.5
+    median = 514.5
+    # Check the implementation.
+    confrange = Kalibera.confidence_slice(means)
+    got_lobo, got_median, got_hibo = confrange.values
+    assert_equal got_lobo, confrange.lower
+    assert_equal got_median, confrange.median
+    assert_equal got_hibo, confrange.upper
+    assert_equal lobo, got_lobo
+    assert_equal hibo, got_hibo
+    assert_equal got_median, median
+    assert_equal Kalibera.mean([median - lobo, hibo - median]), confrange.error
+  end
+  def test_confidence_slice_pass_confidence_level
+    means = (0...10).map { |x| Float(x) }
+    low, mean, high = Kalibera.confidence_slice(means, '0.8').values
+    assert_equal (4 + 5) / 2.0, mean
+    assert_equal 1, low
+    assert_equal 8, high
+    means = (0...11).map { |x| Float(x) }
+    low, mean, high = Kalibera.confidence_slice(means, '0.8').values
+    assert_equal 5, mean
+    assert_equal 1, low
+    assert_equal 9, high
+  end
+  def test_confidence_quotient
+    data1 = TestData.new({
+        [0] => [ 2.5, 3.1, 2.7 ],
+        [1] => [ 5.1, 1.1, 2.3 ],
+        [2] => [ 4.7, 5.5, 7.1 ],
+        }, [3, 3])
+    data2 = TestData.new({
+        [0] => [ 3.5, 4.1, 3.7 ],
+        [1] => [ 6.1, 2.1, 3.3 ],
+        [2] => [ 5.7, 6.5, 8.1 ],
+        }, [3, 3])
+    data1.reset_local_rand
+    data2.reset_local_rand
+    a = data1.bootstrap_sample
+    b = data2.bootstrap_sample
+    data1.reset_local_rand
+    data2.reset_local_rand
+    _, mean, _ = data1.bootstrap_quotient(data2, iterations=1)
+    assert_equal Kalibera.mean(a) / Kalibera.mean(b), mean
+  end
+  def test_confidence_quotient_div_zero
+    data1 = TestData.new({
+        [0] => [ 2.5, 3.1, 2.7 ],
+        [1] => [ 5.1, 1.1, 2.3 ],
+        [2] => [ 4.7, 5.5, 7.1 ],
+        }, [3, 3])
+    data2 = TestData.new({ # This has a mean of zero
+        [0] => [ 0, 0, 0],
+        [1] => [ 0, 0, 0],
+        [2] => [ 0, 0, 0],
+        }, [3, 3])
+    # Since all ratios will be +inf, the median should also be +inf
+    _, median, _ = data1.bootstrap_quotient(data2, iterations=1)
+    assert_equal Float::INFINITY, median
+  end
+  def test_geomean
+    assert_equal 1, Kalibera.geomean([10, 0.1])
+    assert_equal 1, Kalibera.geomean([1])
+  end
+  # This requires a very large volume of random data, which we can't easily
+  # just store in the mock random generator above.
+  #def test_geomean_data
+  #  data1 = TestData.new({
+  #      [0] => [ 2.9, 3.1, 3.0 ],
+  #      [1] => [ 3.1, 2.6, 3.3 ],
+  #      [2] => [ 3.2, 3.0, 2.9 ],
+  #      }, [3, 3])
+  #  data2 = TestData.new({
+  #      [0] => [ 3.9, 4.1, 4.0 ],
+  #      [1] => [ 4.1, 3.6, 4.3 ],
+  #      [2] => [ 4.2, 4.0, 3.9 ],
+  #      }, [3, 3])
+  #
+  #  _, mean1, _ = data1.bootstrap_quotient(data2)
+  #  _, mean2, _ = Kalibera.bootstrap_geomean([data1], [data2])
+  #  assert_equal mean2.round(3), mean1.round(3)
+  #
+  #  (_, mean, _) = Kalibera.bootstrap_geomean([data1, data2], [data2, data1])
+  #  assert_equal 1.0, mean.round(5)
+  #end
+  def assert_less_equal(x, y)
+    assert x <= y, "#{x.inspect} <= #{y.inspect}"
+  end
+end

metadata ADDED

@@ -0,0 +1,115 @@
+--- !ruby/object:Gem::Specification
+name: kalibera
+version: !ruby/object:Gem::Version
+  version: '0.1'
+platform: ruby
+authors:
+- Edd Barrett
+- Carl Friedrich Bolz
+- Chris Seaton
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2016-05-16 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: bundler
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.7'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.7'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '10.0'
+- !ruby/object:Gem::Dependency
+  name: rbzip2
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.2.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.2.0
+- !ruby/object:Gem::Dependency
+  name: memoist
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.11.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.11.0
+description: 'libkalibera contains reimplementations of the statistical computations
+  for benchmarking evaluation from the following papers by Tomas Kalibera and Richard
+  Jones: ''Rigorous benchmarking in reasonable time''; ''Quantifying performance changes
+  with effect size confidence intervals''.'
+email:
+- chris@chrisseaton.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- Gemfile
+- Gemfile.lock
+- LICENSE.txt
+- Rakefile
+- kalibera.gemspec
+- lib/kalibera.rb
+- lib/kalibera/data.rb
+- test/test_data.rb
+homepage: http://soft-dev.org/src/libkalibera/
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.5.1
+signing_key:
+specification_version: 4
+summary: An implementation of Tomas Kalibera's statistically rigorous benchmarking
+  method.
+test_files:
+- test/test_data.rb