RubyGems - daru - Versions diffs - 0.0.5 → 0.1.0 - Mend

daru 0.0.5 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

checksums.yaml +4 -4
data/.build.sh +14 -0
data/.travis.yml +26 -4
data/CONTRIBUTING.md +31 -0
data/Gemfile +1 -2
data/{History.txt → History.md} +110 -44
data/README.md +21 -288
data/Rakefile +1 -0
data/daru.gemspec +12 -8
data/lib/daru.rb +36 -1
data/lib/daru/accessors/array_wrapper.rb +8 -3
data/lib/daru/accessors/gsl_wrapper.rb +113 -0
data/lib/daru/accessors/nmatrix_wrapper.rb +6 -17
data/lib/daru/core/group_by.rb +0 -1
data/lib/daru/dataframe.rb +1192 -83
data/lib/daru/extensions/rserve.rb +21 -0
data/lib/daru/index.rb +14 -0
data/lib/daru/io/io.rb +170 -8
data/lib/daru/maths/arithmetic/dataframe.rb +4 -3
data/lib/daru/maths/arithmetic/vector.rb +4 -4
data/lib/daru/maths/statistics/dataframe.rb +48 -27
data/lib/daru/maths/statistics/vector.rb +215 -33
data/lib/daru/monkeys.rb +53 -7
data/lib/daru/multi_index.rb +21 -4
data/lib/daru/plotting/dataframe.rb +83 -25
data/lib/daru/plotting/vector.rb +9 -10
data/lib/daru/vector.rb +596 -61
data/lib/daru/version.rb +3 -0
data/spec/accessors/wrappers_spec.rb +51 -0
data/spec/core/group_by_spec.rb +0 -2
data/spec/daru_spec.rb +58 -0
data/spec/dataframe_spec.rb +768 -73
data/spec/extensions/rserve_spec.rb +52 -0
data/spec/fixtures/bank2.dat +200 -0
data/spec/fixtures/repeated_fields.csv +7 -0
data/spec/fixtures/scientific_notation.csv +4 -0
data/spec/fixtures/test_xls.xls +0 -0
data/spec/io/io_spec.rb +161 -24
data/spec/math/arithmetic/dataframe_spec.rb +26 -7
data/spec/math/arithmetic/vector_spec.rb +8 -0
data/spec/math/statistics/dataframe_spec.rb +16 -1
data/spec/math/statistics/vector_spec.rb +215 -47
data/spec/spec_helper.rb +21 -2
data/spec/vector_spec.rb +368 -12
metadata +99 -16
data/lib/version.rb +0 -3
data/notebooks/grouping_splitting_pivots.ipynb +0 -529
data/notebooks/intro_with_music_data_.ipynb +0 -303

data/spec/math/arithmetic/dataframe_spec.rb CHANGED

@@ -17,16 +17,20 @@ describe Daru::DataFrame do
     end
     it "adds two dataframes to produce a third" do
-      expect(@left + @right).to eq(Daru::DataFrame.new({a: [2,nil,nil,8,nil,nil,nil],
-        b: [20,nil,nil,80,nil,nil,nil], c: [nil,nil,nil,nil,nil,nil]}, index:
-        [0,1,2,3,4,5,6]))
+      expect(@left + @right).to eq(Daru::DataFrame.new({
+        a: [2,nil,nil,8,nil,nil,nil],
+        b: [20,nil,nil,80,nil,nil,nil],
+        c: [nil,nil,nil,nil,nil,nil]
+        }, index: [0,1,2,3,4,5,6]))
     end
   end
   context "#-" do
     it "subtracts a number from all numeric vectors" do
-      expect(@df - 2).to eq(Daru::DataFrame.new({a: [-1,0,1,2,3], b: ['a','e','i','o','u'],
-      c: [8,18,28,38,48]}))
+      expect(@df - 2).to eq(Daru::DataFrame.new({
+        a: [-1,0,1,2,3],
+        b: ['a','e','i','o','u'],
+        c: [8,18,28,38,48]}))
     end
     it "subtracts a data frame from another" do
@@ -53,13 +57,28 @@ describe Daru::DataFrame do
   context "#sqrt" do
     it "calculates sqrt" do
-      @df.sqrt
+      expect_correct_df_in_delta(@df.sqrt,
+        Daru::DataFrame.new({
+          a: [1.0,1.41421356,1.73205080,2.0,2.23606797],
+          c: [3.16227766, 4.47213595 ,5.47722557 ,6.32455532, 7.07106781]
+        }), 0.001
+      )
     end
   end
   context "#round" do
     it "rounds to precision" do
-      @df.round
+      df = Daru::DataFrame.new({
+        a: [1.3434,2.4332,5.6655,12.3344,32.233],
+        b: [1.3434,2.4332,5.6655,12.3344,32.233],
+        c: %w(a b c d e)
+      })
+      ans = Daru::DataFrame.new({
+        a: [1.34,2.43,5.67,12.33,32.23],
+        b: [1.34,2.43,5.67,12.33,32.23],
+      })
+      expect(df.round(2)).to eq(ans)
     end
   end

data/spec/math/arithmetic/vector_spec.rb CHANGED

@@ -24,6 +24,14 @@ describe Daru::Vector do
     it "puts a nil when one of the operands is nil" do
       expect(@with_md1 + @with_md2).to eq(Daru::Vector.new([nil,7,nil,nil,nil,7], name: :missing, index: [:a, :b, :c, :corona, :obi, :wan]))
     end
+    it "appropriately adds vectors with numeric and non-numeric indexes" do
+      pending "Need an alternate index implementation?"
+      v1 = Daru::Vector.new([1,2,3])
+      v2 = Daru::Vector.new([1,2,3], index: [:a,:b,:c])
+      expect(v1 + v2).to eq(Daru::Vector.new([nil]*6, index: [0,1,2,:a,:b,:c]))
+    end
   end
   context "#-" do

data/spec/math/statistics/dataframe_spec.rb CHANGED

@@ -79,10 +79,25 @@ describe Daru::DataFrame do
         f: [40,80,400]
         }, index: [:d, :e, :f]
       ))
+      test = Daru::DataFrame.rows([
+        [0.3543,0.4535,0.2424],
+        [0.123,0.53323,0.544],
+        [0.4345,0.4552,0.425]
+      ], order: [:a, :b, :c])
+      ans = Daru::DataFrame.new({
+        a: [0.0261607, -0.0071019, -0.0153640],
+        b: [-0.0071019, 0.0020747, 0.0056071],
+        c: [-0.0153640, 0.0056071, 0.0230777]
+      })
+      test.cov.each_vector_with_index do |v, i|
+        expect_correct_vector_in_delta v, ans[i], 0.01
+      end
     end
   end
-  context "#corr", focus: true do
+  context "#corr" do
     it "calculates the correlation between the numeric vectors of DataFrame" do
       expect(@df.corr).to eq(Daru::DataFrame.new({
         d: [1,1,1],

data/spec/math/statistics/vector_spec.rb CHANGED

@@ -1,35 +1,36 @@
 require 'spec_helper.rb'
 describe Daru::Vector do
-  [:array, :nmatrix].each do |dtype|
+  [:array, :gsl].each do |dtype| #nmatrix still unstable
     describe dtype do
-      before :each do
+      before do
         @dv = Daru::Vector.new [323, 11, 555, 666, 234, 21, 666, 343, 1, 2], dtype: dtype
-        @dv_with_md = Daru::Vector.new [323, 11, 555, nil, 666, 234, 21, 666, 343, nil, 1, 2]
+        @dv_with_nils = Daru::Vector.new [323, 11, 555, nil, 666, 234, 21, 666, 343, nil, 1, 2]
       end
       context "#mean" do
         it "calculates mean" do
           expect(@dv.mean).to eq(282.2)
-          expect(@dv_with_md.mean).to eq(282.2)
+          expect(@dv_with_nils.mean).to eq(282.2)
         end
       end
       context "#sum_of_squares" do
-        it "calcs sum of squares" do
-          @dv.sum_of_squares
+        it "calcs sum of squares, omits nil values" do
+          v = Daru::Vector.new [1,2,3,4,5,6], dtype: dtype
+          expect(v.sum_of_squares).to eq(17.5)
         end
       end
       context "#standard_deviation_sample" do
         it "calcs standard deviation sample" do
-          @dv.standard_deviation_sample
+          @dv_with_nils.standard_deviation_sample
         end
       end
       context "#variance_sample" do
         it "calculates sample variance" do
-          @dv.variance_sample
+          expect(@dv.variance).to be_within(0.01).of(75118.84)
         end
       end
@@ -41,7 +42,7 @@ describe Daru::Vector do
       context "#variance_population" do
         it "calculates population variance" do
-          expect(@dv.variance_population).to eq(67606.95999999999)
+          expect(@dv.variance_population).to be_within(0.001).of(67606.95999999999)
         end
       end
@@ -77,7 +78,8 @@ describe Daru::Vector do
       context "#product" do
         it "returns the product" do
-          @dv.product
+          v = Daru::Vector.new [1, 2, 3, 4, 5], dtype: dtype
+          expect(v.product).to eq(120)
         end
       end
@@ -99,35 +101,38 @@ describe Daru::Vector do
         end
       end
-      context "#percentile" do
-        it "calculates percentile" do
-          expect(@dv.percentile(50)).to eq(333.0)
+      context "#count" do
+        it "counts specified element" do
+          @dv.count(323)
         end
-      end
-      context "#recode" do
+        it "counts total number of elements" do
+          expect(@dv.count).to eq(10)
+        end
       end
-      context "#recode!" do
+      context "#coefficient_of_variation" do
+        it "calculates coefficient_of_variation" do
+          @dv.coefficient_of_variation
+        end
       end
-      context "#frequencies" do
-        it "calculates frequencies" do
-          @dv.frequencies
+      context "#percentile" do
+        it "calculates mid point percentile" do
+          expect(@dv.percentile(50)).to eq(278.5)
         end
       end
       context "#average_deviation_population" do
         it "calculates average_deviation_population" do
-          @dv.average_deviation_population
+          a = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype: dtype)
+          expect(a.average_deviation_population).to eq(20.quo(9).to_f)
         end
       end
       context "#proportion" do
         it "calculates proportion" do
-          @dv.proportion
+          expect(@dv.proportion(dtype == :gsl ? 1.0 : 1)).to eq(0.1)
         end
       end
@@ -137,43 +142,206 @@ describe Daru::Vector do
         end
       end
-      context "#ranked" do
-        it "curates by rank" do
-          @dv.ranked
+      context "#standard_error" do
+        it "calculates standard error" do
+          @dv.standard_error
         end
       end
-      context "#count" do
-        it "counts specified element" do
-          @dv.count(323)
-        end
-        it "counts total number of elements" do
-          expect(@dv.count).to eq(10)
+      context "#vector_standardized_compute" do
+        it "calculates vector_standardized_compute" do
+          @dv.vector_standardized_compute(@dv.mean, @dv.sd)
+          @dv_with_nils.vector_standardized_compute(@dv.mean, @dv.sd)
         end
       end
-      context "#coefficient_of_variation" do
-        it "calculates coefficient_of_variation" do
-          @dv.coefficient_of_variation
+      context "#vector_centered_compute" do
+        it "calculates vector_centered_compute" do
+          @dv.vector_centered_compute(@dv.mean)
+          @dv_with_nils.vector_centered_compute(@dv.mean)
         end
       end
+    end
+  end # ALL DTYPE tests
+  # Only Array tests
+  context "#percentile" do
+    it "tests linear percentile strategy" do
+      values = Daru::Vector.new [102, 104, 105, 107, 108, 109, 110, 112, 115, 116].shuffle
+      expect(values.percentil(0, :linear)).to eq(102)
+      expect(values.percentil(25, :linear)).to eq(104.75)
+      expect(values.percentil(50, :linear)).to eq(108.5)
+      expect(values.percentil(75, :linear)).to eq(112.75)
+      expect(values.percentil(100, :linear)).to eq(116)
+      values = Daru::Vector.new [102, 104, 105, 107, 108, 109, 110, 112, 115, 116, 118].shuffle
+      expect(values.percentil(0, :linear)).to eq(102)
+      expect(values.percentil(25, :linear)).to eq(105)
+      expect(values.percentil(50, :linear)).to eq(109)
+      expect(values.percentil(75, :linear)).to eq(115)
+      expect(values.percentil(100, :linear)).to eq(118)
+    end
+  end
-      context "#factor" do
+  context "#frequencies" do
+    it "calculates frequencies" do
+      vector = Daru::Vector.new([5,5,5,5,5,6,6,7,8,9,10,1,2,3,4,nil,-99,-99])
+      expect(vector.frequencies).to eq({
+        1=>1, 2=>1, 3=>1, 4=>1, 5=>5,
+        6=>2, 7=>1, 8=>1, 9=>1,10=>1, -99=>2
+      })
+    end
+  end
-      end
+  context "#ranked" do
+    it "curates by rank" do
+      vector = Daru::Vector.new([nil, 0.8, 1.2, 1.2, 2.3, 18, nil])
+      expect(vector.ranked).to eq(Daru::Vector.new([nil,1,2.5,2.5,4,5,nil]))
-      context "#median_absolute_deviation" do
-        it "calculates median_absolute_deviation" do
-          @dv.median_absolute_deviation
-        end
-      end
+      v = Daru::Vector.new [0.8, 1.2, 1.2, 2.3, 18]
+      expect(v.ranked).to eq(Daru::Vector.new [1, 2.5, 2.5, 4, 5])
+    end
-      context "#standard_error" do
-        it "calculates standard error" do
-          @dv.standard_error
-        end
+    it "tests paired ties" do
+      a = Daru::Vector.new [0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 4]
+      expected = Daru::Vector.new [2, 2, 2, 4.5, 4.5, 6, 7.5, 7.5, 10, 10, 10]
+      expect(a.ranked).to eq(expected)
+    end
+  end
+  context "#dichotomize" do
+    it "dichotomizes" do
+      a = Daru::Vector.new [0, 0, 0, 1, 2, 3, nil]
+      exp = Daru::Vector.new [0, 0, 0, 1, 1, 1, nil]
+      expect(a.dichotomize).to eq(exp)
+      a = Daru::Vector.new [1, 1, 1, 2, 2, 2, 3]
+      exp = Daru::Vector.new [0, 0, 0, 1, 1, 1, 1]
+      expect(a.dichotomize).to eq(exp)
+      a = Daru::Vector.new [0, 0, 0, 1, 2, 3, nil]
+      exp = Daru::Vector.new [0, 0, 0, 0, 1, 1, nil]
+      expect(a.dichotomize(1)).to eq(exp)
+      a = Daru::Vector.new %w(a a a b c d)
+      exp = Daru::Vector.new [0, 0, 0, 1, 1, 1]
+      expect(a.dichotomize).to eq(exp)
+    end
+  end
+  context "#median_absolute_deviation" do
+    it "calculates median_absolute_deviation" do
+      a = Daru::Vector.new [1, 1, 2, 2, 4, 6, 9]
+      expect(a.median_absolute_deviation).to eq(1)
+    end
+  end
+  context "#round" do
+    it "rounds non-nil values" do
+      vector = Daru::Vector.new([1.44,55.32,nil,4])
+      expect(vector.round(1)).to eq(Daru::Vector.new([1.4,55.3,nil,4]))
+    end
+  end
+  context "#center" do
+    it "centers" do
+      mean = rand
+      samples = 11
+      centered = Daru::Vector.new(samples.times.map { |i| i - ((samples / 2).floor).to_i })
+      not_centered = centered.recode { |v| v + mean }
+      obs = not_centered.center
+      centered.each_with_index do |v, i|
+        expect(v).to be_within(0.0001).of(obs[i])
       end
     end
   end
+  context "#standardize" do
+    it "returns a standardized vector" do
+      vector = Daru::Vector.new([11,55,33,25,nil,22])
+      expect(vector.standardize.round(2)).to eq(
+        Daru::Vector.new([-1.11, 1.57, 0.23, -0.26,nil, -0.44])
+        )
+    end
+    it "tests for vector standardized with zero variance" do
+      v1 = Daru::Vector.new 100.times.map { |_i| 1 }
+      exp = Daru::Vector.new 100.times.map { nil }
+      expect(v1.standardize).to eq(exp)
+    end
+  end
+  context "#vector_percentile" do
+    it "replaces each non-nil value with its percentile value" do
+      vector = Daru::Vector.new([1,nil,nil,2,2,3,4,nil,nil,5,5,5,6,10])
+      expect(vector.vector_percentile).to eq(Daru::Vector.new(
+        [10,nil,nil,25,25,40,50,nil,nil,70,70,70,90,100])
+      )
+    end
+  end
+  context "#sample_with_replacement" do
+    it "calculates sample_with_replacement" do
+      vec =  Daru::Vector.new(
+        [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99],
+        name: :common_all_dtypes)
+      srand(1)
+      expect(vec.sample_with_replacement(100).size).to eq(100)
+      srand(1)
+      expect(vec.sample_with_replacement(100).size).to eq(100)
+    end
+  end
+  context "#sample_without_replacement" do
+    it "calculates sample_without_replacement" do
+      vec =  Daru::Vector.new(
+        [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99],
+        name: :common_all_dtypes)
+      srand(1)
+      expect(vec.sample_without_replacement(17).sort).to eq(
+        vec.only_valid.to_a.sort)
+      expect {
+        vec.sample_without_replacement(20)
+      }.to raise_error(ArgumentError)
+      srand(1)
+      expect(vec.sample_without_replacement(17).sort).to eq(
+        vec.only_valid.to_a.sort)
+    end
+  end
+  context "#jackknife" do
+    it "jack knife correctly with named method" do
+      a = Daru::Vector.new [1, 2, 3, 4]
+      df = a.jackknife(:mean)
+      expect(df[:mean].mean).to eq (a.mean)
+      df = a.jackknife([:mean, :sd])
+      expect(df[:mean].mean).to eq(a.mean)
+      expect(df[:mean].sd).to eq(a.sd)
+    end
+    it "jack knife correctly with custom method" do
+      a   = Daru::Vector.new [17.23, 18.71, 13.93, 18.81, 15.78, 11.29, 14.91, 13.39, 18.21, 11.57, 14.28, 10.94, 18.83, 15.52, 13.45, 15.25]
+      ds  = a.jackknife(log_s2: ->(v) {  Math.log(v.variance) })
+      exp = Daru::Vector.new [1.605, 2.972, 1.151, 3.097, 0.998, 3.308, 0.942, 1.393, 2.416, 2.951, 1.043, 3.806, 3.122, 0.958, 1.362, 0.937]
+      expect_correct_vector_in_delta ds[:log_s2], exp, 0.001
+      # expect(ds[:log_s2]).to be_within(0.001).of(exp)
+      expect(ds[:log_s2].mean).to be_within(0.00001).of(2.00389)
+      expect(ds[:log_s2].variance).to be_within(0.001).of(1.091)
+    end
+    it "jack knife correctly with k > 1" do
+      rng = Distribution::Normal.rng(0,1)
+      a   = Daru::Vector.new_with_size(6) { rng.call}
+      ds = a.jackknife(:mean, 2)
+      mean = a.mean
+      exp = Daru::Vector.new [3 * mean - 2 * (a[2] + a[3] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[4] + a[5]) / 4, 3 * mean - 2 * (a[0] + a[1] + a[2] + a[3]) / 4]
+      expect_correct_vector_in_delta(exp, ds[:mean], 1e-13)
+    end
+  end
 end

data/spec/spec_helper.rb CHANGED

@@ -1,6 +1,8 @@
 require 'rspec'
-require 'awesome_print'
 require 'matrix'
+require 'awesome_print'
+require 'distribution'
+require 'tempfile'
 def mri?
   RUBY_ENGINE == 'ruby'
@@ -16,6 +18,23 @@ else
   require 'nmatrix'
 end
 $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
 $LOAD_PATH.unshift(File.dirname(__FILE__))
-require 'daru'
+require 'daru'
+ALL_DTYPES = [:nmatrix, :gsl, :array]
+# FIXME: This must go! Need to be able to use be_within
+def expect_correct_vector_in_delta v1, v2, delta
+  expect(v1.size).to eq(v2.size)
+  (0...v1.size).each do |v|
+    expect(v1[v]).to be_within(delta).of(v2[v])
+  end
+end
+def expect_correct_df_in_delta df1, df2, delta
+  df1.each_vector_with_index do |vector, i|
+    expect_correct_vector_in_delta vector, df2[i], delta
+  end
+end