statsample 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +4 -3
  3. data/History.txt +4 -0
  4. data/README.md +4 -0
  5. data/lib/statsample/converter/csv.rb +41 -54
  6. data/lib/statsample/converters.rb +18 -19
  7. data/lib/statsample/version.rb +1 -1
  8. data/test/fixtures/scientific_notation.csv +4 -0
  9. data/test/helpers_tests.rb +37 -38
  10. data/test/test_analysis.rb +96 -97
  11. data/test/test_anova_contrast.rb +22 -22
  12. data/test/test_anovaoneway.rb +12 -12
  13. data/test/test_anovatwoway.rb +16 -17
  14. data/test/test_anovatwowaywithdataset.rb +22 -24
  15. data/test/test_anovawithvectors.rb +67 -69
  16. data/test/test_awesome_print_bug.rb +9 -9
  17. data/test/test_bartlettsphericity.rb +13 -13
  18. data/test/test_bivariate.rb +122 -126
  19. data/test/test_codification.rb +51 -49
  20. data/test/test_crosstab.rb +44 -40
  21. data/test/test_csv.rb +52 -70
  22. data/test/test_dataset.rb +347 -330
  23. data/test/test_dominance_analysis.rb +22 -24
  24. data/test/test_factor.rb +163 -166
  25. data/test/test_factor_map.rb +25 -30
  26. data/test/test_factor_pa.rb +28 -28
  27. data/test/test_ggobi.rb +19 -18
  28. data/test/test_gsl.rb +13 -15
  29. data/test/test_histogram.rb +74 -77
  30. data/test/test_matrix.rb +29 -31
  31. data/test/test_multiset.rb +132 -126
  32. data/test/test_regression.rb +143 -149
  33. data/test/test_reliability.rb +149 -155
  34. data/test/test_reliability_icc.rb +100 -104
  35. data/test/test_reliability_skillscale.rb +38 -40
  36. data/test/test_resample.rb +14 -12
  37. data/test/test_rserve_extension.rb +33 -33
  38. data/test/test_srs.rb +5 -5
  39. data/test/test_statistics.rb +52 -50
  40. data/test/test_stest.rb +27 -28
  41. data/test/test_stratified.rb +10 -10
  42. data/test/test_test_f.rb +17 -17
  43. data/test/test_test_kolmogorovsmirnov.rb +21 -21
  44. data/test/test_test_t.rb +52 -52
  45. data/test/test_umannwhitney.rb +16 -16
  46. data/test/test_vector.rb +419 -410
  47. data/test/test_wilcoxonsignedrank.rb +60 -63
  48. data/test/test_xls.rb +41 -41
  49. metadata +55 -5
  50. data/web/Rakefile +0 -39
@@ -1,52 +1,52 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
2
- #require 'rserve'
3
- #require 'statsample/rserve_extension'
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ # require 'rserve'
3
+ # require 'statsample/rserve_extension'
4
4
 
5
- class StatsampleFactorTestCase < MiniTest::Unit::TestCase
5
+ class StatsampleFactorTestCase < Minitest::Test
6
6
  include Statsample::Fixtures
7
7
  # Based on Hardle and Simar
8
8
  def setup
9
- @fixtures_dir=File.expand_path(File.dirname(__FILE__)+"/fixtures")
9
+ @fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
10
10
  end
11
+
11
12
  def test_parallelanalysis_with_data
12
13
  if Statsample.has_gsl?
13
- samples=100
14
- variables=10
15
- iterations=50
14
+ samples = 100
15
+ variables = 10
16
+ iterations = 50
16
17
  rng = Distribution::Normal.rng
17
- f1=samples.times.collect {rng.call}.to_scale
18
- f2=samples.times.collect {rng.call}.to_scale
19
- vectors={}
18
+ f1 = samples.times.collect { rng.call }.to_scale
19
+ f2 = samples.times.collect { rng.call }.to_scale
20
+ vectors = {}
20
21
  variables.times do |i|
21
- if i<5
22
- vectors["v#{i}"]=samples.times.collect {|nv|
23
- f1[nv]*5+f2[nv]*2+rng.call
22
+ if i < 5
23
+ vectors["v#{i}"] = samples.times.collect {|nv|
24
+ f1[nv] * 5 + f2[nv] * 2 + rng.call
24
25
  }.to_scale
25
26
  else
26
- vectors["v#{i}"]=samples.times.collect {|nv|
27
- f2[nv]*5+f1[nv]*2+rng.call
27
+ vectors["v#{i}"] = samples.times.collect {|nv|
28
+ f2[nv] * 5 + f1[nv] * 2 + rng.call
28
29
  }.to_scale
29
30
  end
30
-
31
31
  end
32
- ds=vectors.to_dataset
33
-
34
- pa1=Statsample::Factor::ParallelAnalysis.new(ds, :bootstrap_method=>:data, :iterations=>iterations)
35
- pa2=Statsample::Factor::ParallelAnalysis.with_random_data(samples,variables,:iterations=>iterations,:percentil=>95)
32
+ ds = vectors.to_dataset
33
+
34
+ pa1 = Statsample::Factor::ParallelAnalysis.new(ds, bootstrap_method: :data, iterations: iterations)
35
+ pa2 = Statsample::Factor::ParallelAnalysis.with_random_data(samples, variables, iterations: iterations, percentil: 95)
36
36
  3.times do |n|
37
- var="ev_0000#{n+1}"
38
- assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean,0.05)
37
+ var = "ev_0000#{n + 1}"
38
+ assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean, 0.05)
39
39
  end
40
40
  else
41
- skip("Too slow without GSL")
41
+ skip('Too slow without GSL')
42
42
  end
43
-
44
43
  end
44
+
45
45
  def test_parallelanalysis
46
- pa=Statsample::Factor::ParallelAnalysis.with_random_data(305,8,:iterations=>100,:percentil=>95)
46
+ pa = Statsample::Factor::ParallelAnalysis.with_random_data(305, 8, iterations: 100, percentil: 95)
47
47
  assert_in_delta(1.2454, pa.ds_eigenvalues['ev_00001'].mean, 0.01)
48
48
  assert_in_delta(1.1542, pa.ds_eigenvalues['ev_00002'].mean, 0.01)
49
49
  assert_in_delta(1.0836, pa.ds_eigenvalues['ev_00003'].mean, 0.01)
50
- assert(pa.summary.size>0)
51
- end
50
+ assert(pa.summary.size > 0)
51
+ end
52
52
  end
data/test/test_ggobi.rb CHANGED
@@ -1,24 +1,25 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  require 'ostruct'
3
- class StatsampleGGobiTestCase < MiniTest::Unit::TestCase
4
-
3
+ class StatsampleGGobiTestCase < Minitest::Test
5
4
  def setup
6
- v1=([10.2,20.3,10,20,30,40,30,20,30,40]*10).to_vector(:scale)
7
- @v2=(%w{a b c a a a b b c d}*10).to_vector(:nominal)
8
- @v2.labels={"a"=>"letter a","d"=>"letter d"}
9
- v3=([1,2,3,4,5,4,3,2,1,2]*10).to_vector(:ordinal)
10
- @ds={'v1'=>v1,'v2'=>@v2,'v3'=>v3}.to_dataset
5
+ v1 = ([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10).to_vector(:scale)
6
+ @v2 = (%w(a b c a a a b b c d) * 10).to_vector(:nominal)
7
+ @v2.labels = { 'a' => 'letter a', 'd' => 'letter d' }
8
+ v3 = ([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10).to_vector(:ordinal)
9
+ @ds = { 'v1' => v1, 'v2' => @v2, 'v3' => v3 }.to_dataset
11
10
  end
11
+
12
12
  def test_values_definition
13
- a=[1.0,2,"a",nil]
14
- assert_equal("1.0 2 a NA", Statsample::GGobi.values_definition(a,"NA"))
13
+ a = [1.0, 2, 'a', nil]
14
+ assert_equal('1.0 2 a NA', Statsample::GGobi.values_definition(a, 'NA'))
15
15
  end
16
+
16
17
  def test_variable_definition
17
- carrier=OpenStruct.new
18
- carrier.categorials=[]
19
- carrier.conversions={}
20
- real_var_definition=Statsample::GGobi.variable_definition(carrier,@v2,'variable 2',"v2")
21
- expected=<<-EOS
18
+ carrier = OpenStruct.new
19
+ carrier.categorials = []
20
+ carrier.conversions = {}
21
+ real_var_definition = Statsample::GGobi.variable_definition(carrier, @v2, 'variable 2', 'v2')
22
+ expected = <<-EOS
22
23
  <categoricalvariable name="variable 2" nickname="v2">
23
24
  <levels count="4">
24
25
  <level value="1">letter a</level>
@@ -27,8 +28,8 @@ class StatsampleGGobiTestCase < MiniTest::Unit::TestCase
27
28
  <level value="4">letter d</level></levels>
28
29
  </categoricalvariable>
29
30
  EOS
30
- assert_equal(expected.gsub(/\s/," "),real_var_definition.gsub(/\s/," "))
31
- assert_equal({'variable 2'=>{'a'=>1,'b'=>2,'c'=>3,'d'=>4}},carrier.conversions)
32
- assert_equal(['variable 2'],carrier.categorials)
31
+ assert_equal(expected.gsub(/\s/, ' '), real_var_definition.gsub(/\s/, ' '))
32
+ assert_equal({ 'variable 2' => { 'a' => 1, 'b' => 2, 'c' => 3, 'd' => 4 } }, carrier.conversions)
33
+ assert_equal(['variable 2'], carrier.categorials)
33
34
  end
34
35
  end
data/test/test_gsl.rb CHANGED
@@ -1,17 +1,15 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
2
- class StatsampleGSLTestCase < MiniTest::Unit::TestCase
3
- should_with_gsl "matrix with gsl" do
4
- a=[1,2,3,4,20].to_vector(:scale)
5
- b=[3,2,3,4,50].to_vector(:scale)
6
- c=[6,2,3,4,3].to_vector(:scale)
7
- ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
8
- gsl=ds.to_matrix.to_gsl
9
- assert_equal(5,gsl.size1)
10
- assert_equal(3,gsl.size2)
11
- matrix=gsl.to_matrix
12
- assert_equal(5,matrix.row_size)
13
- assert_equal(3,matrix.column_size)
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleGSLTestCase < Minitest::Test
3
+ should_with_gsl 'matrix with gsl' do
4
+ a = [1, 2, 3, 4, 20].to_vector(:scale)
5
+ b = [3, 2, 3, 4, 50].to_vector(:scale)
6
+ c = [6, 2, 3, 4, 3].to_vector(:scale)
7
+ ds = { 'a' => a, 'b' => b, 'c' => c }.to_dataset
8
+ gsl = ds.to_matrix.to_gsl
9
+ assert_equal(5, gsl.size1)
10
+ assert_equal(3, gsl.size2)
11
+ matrix = gsl.to_matrix
12
+ assert_equal(5, matrix.row_size)
13
+ assert_equal(3, matrix.column_size)
14
14
  end
15
15
  end
16
-
17
-
@@ -1,112 +1,109 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
 
3
-
4
- class StatsampleHistogramTestCase < MiniTest::Unit::TestCase
3
+ class StatsampleHistogramTestCase < Minitest::Test
5
4
  context Statsample::Histogram do
6
- should "alloc correctly with integer" do
5
+ should 'alloc correctly with integer' do
7
6
  h = Statsample::Histogram.alloc(4)
8
- assert_equal([0.0]*4, h.bin)
9
- assert_equal([0.0]*5, h.range)
7
+ assert_equal([0.0] * 4, h.bin)
8
+ assert_equal([0.0] * 5, h.range)
10
9
  end
11
- should "alloc correctly with array" do
10
+ should 'alloc correctly with array' do
12
11
  h = Statsample::Histogram.alloc([1, 3, 7, 9, 20])
13
- assert_equal([0.0]*4, h.bin)
14
- assert_equal([1,3,7,9,20], h.range)
12
+ assert_equal([0.0] * 4, h.bin)
13
+ assert_equal([1, 3, 7, 9, 20], h.range)
15
14
  end
16
- should "alloc correctly with integer and min, max array" do
15
+ should 'alloc correctly with integer and min, max array' do
17
16
  h = Statsample::Histogram.alloc(5, [0, 5])
18
- assert_equal([0.0,1.0,2.0,3.0,4.0,5.0], h.range)
19
- assert_equal([0.0]*5,h.bin)
17
+ assert_equal([0.0, 1.0, 2.0, 3.0, 4.0, 5.0], h.range)
18
+ assert_equal([0.0] * 5, h.bin)
20
19
  end
21
- should "bin() method return correct number of bins" do
20
+ should 'bin() method return correct number of bins' do
22
21
  h = Statsample::Histogram.alloc(4)
23
- assert_equal(4,h.bins)
22
+ assert_equal(4, h.bins)
24
23
  end
25
- should "increment correctly" do
26
- h = Statsample::Histogram.alloc(5, [0, 5])
24
+ should 'increment correctly' do
25
+ h = Statsample::Histogram.alloc(5, [0, 5])
27
26
  h.increment 2.5
28
- assert_equal([0.0,0.0,1.0,0.0,0.0], h.bin)
29
- h.increment [0.5,0.5,3.5,3.5]
30
- assert_equal([2.0,0.0,1.0,2.0,0.0], h.bin)
27
+ assert_equal([0.0, 0.0, 1.0, 0.0, 0.0], h.bin)
28
+ h.increment [0.5, 0.5, 3.5, 3.5]
29
+ assert_equal([2.0, 0.0, 1.0, 2.0, 0.0], h.bin)
31
30
  h.increment 0
32
- assert_equal([3.0,0.0,1.0,2.0,0.0], h.bin)
31
+ assert_equal([3.0, 0.0, 1.0, 2.0, 0.0], h.bin)
33
32
  h.increment 5
34
- assert_equal([3.0,0.0,1.0,2.0,0.0], h.bin)
33
+ assert_equal([3.0, 0.0, 1.0, 2.0, 0.0], h.bin)
35
34
  end
36
-
37
- should "alloc_uniform correctly with n, min,max" do
38
- h = Statsample::Histogram.alloc_uniform(5,0,10)
39
- assert_equal(5,h.bins)
40
- assert_equal([0.0]*5,h.bin)
41
- assert_equal([0.0,2.0,4.0,6.0,8.0,10.0], h.range)
35
+
36
+ should 'alloc_uniform correctly with n, min,max' do
37
+ h = Statsample::Histogram.alloc_uniform(5, 0, 10)
38
+ assert_equal(5, h.bins)
39
+ assert_equal([0.0] * 5, h.bin)
40
+ assert_equal([0.0, 2.0, 4.0, 6.0, 8.0, 10.0], h.range)
42
41
  end
43
- should "alloc_uniform correctly with n, [min,max]" do
42
+ should 'alloc_uniform correctly with n, [min,max]' do
44
43
  h = Statsample::Histogram.alloc_uniform(5, [0, 10])
45
- assert_equal(5,h.bins)
46
- assert_equal([0.0]*5,h.bin)
47
- assert_equal([0.0,2.0,4.0,6.0,8.0,10.0], h.range)
44
+ assert_equal(5, h.bins)
45
+ assert_equal([0.0] * 5, h.bin)
46
+ assert_equal([0.0, 2.0, 4.0, 6.0, 8.0, 10.0], h.range)
48
47
  end
49
- should "get_range()" do
50
- h = Statsample::Histogram.alloc_uniform(5,2,12)
48
+ should 'get_range()' do
49
+ h = Statsample::Histogram.alloc_uniform(5, 2, 12)
51
50
  5.times {|i|
52
- assert_equal([2+i*2, 4+i*2], h.get_range(i))
51
+ assert_equal([2 + i * 2, 4 + i * 2], h.get_range(i))
52
+ }
53
+ end
54
+ should 'min() and max()' do
55
+ h = Statsample::Histogram.alloc_uniform(5, 2, 12)
56
+ assert_equal(2, h.min)
57
+ assert_equal(12, h.max)
58
+ end
59
+ should 'max_val()' do
60
+ h = Statsample::Histogram.alloc(5, [0, 5])
61
+ 100.times { h.increment(rand * 5) }
62
+ max = h.bin[0]
63
+ (1..4).each {|i|
64
+ max = h.bin[i] if h.bin[i] > max
53
65
  }
66
+ assert_equal(max, h.max_val)
54
67
  end
55
- should "min() and max()" do
56
- h=Statsample::Histogram.alloc_uniform(5,2,12)
57
- assert_equal(2,h.min)
58
- assert_equal(12,h.max)
59
- end
60
- should "max_val()" do
61
- h = Statsample::Histogram.alloc(5, [0, 5])
62
- 100.times {h.increment(rand*5)}
63
- max=h.bin[0]
64
- (1..4).each {|i|
65
- max = h.bin[i] if h.bin[i] > max
66
- }
67
- assert_equal(max,h.max_val)
68
- end
69
- should "min_val()" do
70
- h = Statsample::Histogram.alloc(5, [0, 5])
71
- 100.times {h.increment(rand*5)}
72
- min=h.bin[0]
73
- (1..4).each {|i|
74
- min = h.bin[i] if h.bin[i]<min
75
- }
76
- assert_equal(min,h.min_val)
77
- end
78
- should "return correct estimated mean" do
79
- a=[1.5,1.5,1.5,3.5,3.5,3.5].to_scale
80
- h=Statsample::Histogram.alloc(5,[0,5])
68
+ should 'min_val()' do
69
+ h = Statsample::Histogram.alloc(5, [0, 5])
70
+ 100.times { h.increment(rand * 5) }
71
+ min = h.bin[0]
72
+ (1..4).each {|i|
73
+ min = h.bin[i] if h.bin[i] < min
74
+ }
75
+ assert_equal(min, h.min_val)
76
+ end
77
+ should 'return correct estimated mean' do
78
+ a = [1.5, 1.5, 1.5, 3.5, 3.5, 3.5].to_scale
79
+ h = Statsample::Histogram.alloc(5, [0, 5])
81
80
  h.increment(a)
82
81
  assert_equal(2.5, h.estimated_mean)
83
82
  end
84
- should "return correct estimated standard deviation" do
85
- a=[0.5,1.5,1.5,1.5,2.5, 3.5,3.5,3.5,4.5].to_scale
86
- h=Statsample::Histogram.alloc(5,[0,5])
83
+ should 'return correct estimated standard deviation' do
84
+ a = [0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5].to_scale
85
+ h = Statsample::Histogram.alloc(5, [0, 5])
87
86
  h.increment(a)
88
87
  assert_equal(a.sd, h.estimated_standard_deviation)
89
88
  end
90
- should "return correct sum for all values" do
91
- h=Statsample::Histogram.alloc(5,[0,5])
92
- n=rand(100)
93
- n.times { h.increment(1)}
89
+ should 'return correct sum for all values' do
90
+ h = Statsample::Histogram.alloc(5, [0, 5])
91
+ n = rand(100)
92
+ n.times { h.increment(1) }
94
93
  assert_equal(n, h.sum)
95
94
  end
96
- should "return correct sum for a subset of values" do
97
- h=Statsample::Histogram.alloc(5,[0,5])
98
- h.increment([0.5,2.5,4.5])
99
- assert_equal(1,h.sum(0,1))
100
- assert_equal(2,h.sum(1,4))
101
-
95
+ should 'return correct sum for a subset of values' do
96
+ h = Statsample::Histogram.alloc(5, [0, 5])
97
+ h.increment([0.5, 2.5, 4.5])
98
+ assert_equal(1, h.sum(0, 1))
99
+ assert_equal(2, h.sum(1, 4))
102
100
  end
103
- should "not raise exception when all values equal" do
101
+ should 'not raise exception when all values equal' do
104
102
  assert_nothing_raised do
105
- a = [5,5,5,5,5,5].to_scale
106
- h=Statsample::Graph::Histogram.new(a)
103
+ a = [5, 5, 5, 5, 5, 5].to_scale
104
+ h = Statsample::Graph::Histogram.new(a)
107
105
  h.to_svg
108
106
  end
109
107
  end
110
-
111
108
  end
112
109
  end
data/test/test_matrix.rb CHANGED
@@ -1,50 +1,48 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
 
3
- class StatsampleMatrixTestCase < MiniTest::Unit::TestCase
4
-
3
+ class StatsampleMatrixTestCase < Minitest::Test
5
4
  def test_to_dataset
6
- m=Matrix[[1,4],[2,5],[3,6]]
5
+ m = Matrix[[1, 4], [2, 5], [3, 6]]
7
6
  m.extend Statsample::NamedMatrix
8
- m.fields_y=%w{x1 x2}
9
- m.name="test"
10
- samples=100
11
- x1=[1,2,3].to_scale
12
- x2=[4,5,6].to_scale
13
- ds={'x1'=>x1,'x2'=>x2}.to_dataset
14
- ds.name="test"
15
- obs=m.to_dataset
16
- assert_equal(ds['x1'],obs['x1'])
17
- assert_equal(ds['x2'],obs['x2'])
18
- assert_equal(ds['x1'].mean,obs['x1'].mean)
19
-
20
-
7
+ m.fields_y = %w(x1 x2)
8
+ m.name = 'test'
9
+ samples = 100
10
+ x1 = [1, 2, 3].to_scale
11
+ x2 = [4, 5, 6].to_scale
12
+ ds = { 'x1' => x1, 'x2' => x2 }.to_dataset
13
+ ds.name = 'test'
14
+ obs = m.to_dataset
15
+ assert_equal(ds['x1'], obs['x1'])
16
+ assert_equal(ds['x2'], obs['x2'])
17
+ assert_equal(ds['x1'].mean, obs['x1'].mean)
21
18
  end
19
+
22
20
  def test_covariate
23
- a=Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
21
+ a = Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
24
22
  a.extend Statsample::CovariateMatrix
25
- a.fields=%w{a b c}
23
+ a.fields = %w(a b c)
26
24
  assert_equal(:correlation, a._type)
27
25
 
28
- assert_equal(Matrix[[0.5],[0.3]], a.submatrix(%w{c a}, %w{b}))
29
- assert_equal(Matrix[[1.0, 0.2] , [0.2, 1.0]], a.submatrix(%w{c a}))
30
- assert_equal(:correlation, a.submatrix(%w{c a})._type)
26
+ assert_equal(Matrix[[0.5], [0.3]], a.submatrix(%w(c a), %w(b)))
27
+ assert_equal(Matrix[[1.0, 0.2], [0.2, 1.0]], a.submatrix(%w(c a)))
28
+ assert_equal(:correlation, a.submatrix(%w(c a))._type)
31
29
 
32
- a=Matrix[[20,30,10], [30,60,50], [10,50,50]]
30
+ a = Matrix[[20, 30, 10], [30, 60, 50], [10, 50, 50]]
33
31
 
34
32
  a.extend Statsample::CovariateMatrix
35
33
 
36
34
  assert_equal(:covariance, a._type)
37
35
 
38
- a=50.times.collect {rand()}.to_scale
39
- b=50.times.collect {rand()}.to_scale
40
- c=50.times.collect {rand()}.to_scale
41
- ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
42
- corr=Statsample::Bivariate.correlation_matrix(ds)
43
- real=Statsample::Bivariate.covariance_matrix(ds).correlation
36
+ a = 50.times.collect { rand }.to_scale
37
+ b = 50.times.collect { rand }.to_scale
38
+ c = 50.times.collect { rand }.to_scale
39
+ ds = { 'a' => a, 'b' => b, 'c' => c }.to_dataset
40
+ corr = Statsample::Bivariate.correlation_matrix(ds)
41
+ real = Statsample::Bivariate.covariance_matrix(ds).correlation
44
42
  corr.row_size.times do |i|
45
43
  corr.column_size.times do |j|
46
- assert_in_delta(corr[i,j], real[i,j],1e-15)
44
+ assert_in_delta(corr[i, j], real[i, j], 1e-15)
47
45
  end
48
46
  end
49
- end
47
+ end
50
48
  end
@@ -1,158 +1,164 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
 
3
-
4
- class StatsampleMultisetTestCase < MiniTest::Unit::TestCase
3
+ class StatsampleMultisetTestCase < Minitest::Test
5
4
  def setup
6
- @x=%w{a a a a b b b b}.to_vector
7
- @y=[1,2,3,4,5,6,7,8].to_scale
8
- @z=[10,11,12,13,14,15,16,17].to_scale
9
- @ds={'x'=>@x,'y'=>@y,'z'=>@z}.to_dataset
10
- @ms=@ds.to_multiset_by_split('x')
5
+ @x = %w(a a a a b b b b).to_vector
6
+ @y = [1, 2, 3, 4, 5, 6, 7, 8].to_scale
7
+ @z = [10, 11, 12, 13, 14, 15, 16, 17].to_scale
8
+ @ds = { 'x' => @x, 'y' => @y, 'z' => @z }.to_dataset
9
+ @ms = @ds.to_multiset_by_split('x')
11
10
  end
11
+
12
12
  def test_creation
13
- v1a=[1,2,3,4,5].to_vector
14
- v2b=[11,21,31,41,51].to_vector
15
- v3c=[21,23,34,45,56].to_vector
16
- ds1={'v1'=>v1a,'v2'=>v2b,'v3'=>v3c}.to_dataset
17
- v1b=[15,25,35,45,55].to_vector
18
- v2b=[11,21,31,41,51].to_vector
19
- v3b=[21,23,34,45,56].to_vector
20
- ds2={'v1'=>v1b,'v2'=>v2b,'v3'=>v3b}.to_dataset
21
- ms=Statsample::Multiset.new(['v1','v2','v3'])
22
- ms.add_dataset('ds1',ds1)
23
- ms.add_dataset('ds2',ds2)
24
- assert_equal(ds1,ms['ds1'])
25
- assert_equal(ds2,ms['ds2'])
26
- assert_equal(v1a,ms['ds1']['v1'])
27
- assert_not_equal(v1b,ms['ds1']['v1'])
28
- ds3={'v1'=>v1b,'v2'=>v2b}.to_dataset
13
+ v1a = [1, 2, 3, 4, 5].to_vector
14
+ v2b = [11, 21, 31, 41, 51].to_vector
15
+ v3c = [21, 23, 34, 45, 56].to_vector
16
+ ds1 = { 'v1' => v1a, 'v2' => v2b, 'v3' => v3c }.to_dataset
17
+ v1b = [15, 25, 35, 45, 55].to_vector
18
+ v2b = [11, 21, 31, 41, 51].to_vector
19
+ v3b = [21, 23, 34, 45, 56].to_vector
20
+ ds2 = { 'v1' => v1b, 'v2' => v2b, 'v3' => v3b }.to_dataset
21
+ ms = Statsample::Multiset.new(%w(v1 v2 v3))
22
+ ms.add_dataset('ds1', ds1)
23
+ ms.add_dataset('ds2', ds2)
24
+ assert_equal(ds1, ms['ds1'])
25
+ assert_equal(ds2, ms['ds2'])
26
+ assert_equal(v1a, ms['ds1']['v1'])
27
+ assert_not_equal(v1b, ms['ds1']['v1'])
28
+ ds3 = { 'v1' => v1b, 'v2' => v2b }.to_dataset
29
29
  assert_raise ArgumentError do
30
30
  ms.add_dataset(ds3)
31
31
  end
32
32
  end
33
+
33
34
  def test_creation_empty
34
- ms=Statsample::Multiset.new_empty_vectors(%w{id age name},%w{male female})
35
- ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
36
- ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
37
- ms2=Statsample::Multiset.new(%w{id age name})
38
- ms2.add_dataset('male',ds_male)
39
- ms2.add_dataset('female',ds_female)
40
- assert_equal(ms2.fields,ms.fields)
41
- assert_equal(ms2['male'],ms['male'])
42
- assert_equal(ms2['female'],ms['female'])
35
+ ms = Statsample::Multiset.new_empty_vectors(%w(id age name), %w(male female))
36
+ ds_male = { 'id' => [].to_vector, 'age' => [].to_vector, 'name' => [].to_vector }.to_dataset(%w(id age name))
37
+ ds_female = { 'id' => [].to_vector, 'age' => [].to_vector, 'name' => [].to_vector }.to_dataset(%w(id age name))
38
+ ms2 = Statsample::Multiset.new(%w(id age name))
39
+ ms2.add_dataset('male', ds_male)
40
+ ms2.add_dataset('female', ds_female)
41
+ assert_equal(ms2.fields, ms.fields)
42
+ assert_equal(ms2['male'], ms['male'])
43
+ assert_equal(ms2['female'], ms['female'])
43
44
  end
45
+
44
46
  def test_to_multiset_by_split_one
45
- sex=%w{m m m m m f f f f m}.to_vector(:nominal)
46
- city=%w{London Paris NY London Paris NY London Paris NY Tome}.to_vector(:nominal)
47
- age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
48
- ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
49
- ms=ds.to_multiset_by_split('sex')
50
- assert_equal(2,ms.n_datasets)
51
- assert_equal(%w{f m},ms.datasets.keys.sort)
52
- assert_equal(6,ms['m'].cases)
53
- assert_equal(4,ms['f'].cases)
54
- assert_equal(%w{London Paris NY London Paris Tome},ms['m']['city'].to_a)
55
- assert_equal([34,33,35,36],ms['f']['age'].to_a)
47
+ sex = %w(m m m m m f f f f m).to_vector(:nominal)
48
+ city = %w(London Paris NY London Paris NY London Paris NY Tome).to_vector(:nominal)
49
+ age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:scale)
50
+ ds = { 'sex' => sex, 'city' => city, 'age' => age }.to_dataset
51
+ ms = ds.to_multiset_by_split('sex')
52
+ assert_equal(2, ms.n_datasets)
53
+ assert_equal(%w(f m), ms.datasets.keys.sort)
54
+ assert_equal(6, ms['m'].cases)
55
+ assert_equal(4, ms['f'].cases)
56
+ assert_equal(%w(London Paris NY London Paris Tome), ms['m']['city'].to_a)
57
+ assert_equal([34, 33, 35, 36], ms['f']['age'].to_a)
56
58
  end
59
+
57
60
  def test_to_multiset_by_split_multiple
58
- sex=%w{m m m m m m m m m m f f f f f f f f f f}.to_vector(:nominal)
59
- city=%w{London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris}.to_vector(:nominal)
60
- hair=%w{blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black}.to_vector(:nominal)
61
- age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
62
- ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w{sex city hair age})
63
- ms=ds.to_multiset_by_split('sex','city','hair')
64
- assert_equal(8,ms.n_datasets)
65
- assert_equal(3,ms[%w{m London blonde}].cases)
66
- assert_equal(3,ms[%w{m London blonde}].cases)
67
- assert_equal(1,ms[%w{m Paris black}].cases)
61
+ sex = %w(m m m m m m m m m m f f f f f f f f f f).to_vector(:nominal)
62
+ city = %w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris).to_vector(:nominal)
63
+ hair = %w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black).to_vector(:nominal)
64
+ age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:scale)
65
+ ds = { 'sex' => sex, 'city' => city, 'hair' => hair, 'age' => age }.to_dataset(%w(sex city hair age))
66
+ ms = ds.to_multiset_by_split('sex', 'city', 'hair')
67
+ assert_equal(8, ms.n_datasets)
68
+ assert_equal(3, ms[%w(m London blonde)].cases)
69
+ assert_equal(3, ms[%w(m London blonde)].cases)
70
+ assert_equal(1, ms[%w(m Paris black)].cases)
68
71
  end
69
72
 
70
73
  def test_stratum_proportion
71
- ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
72
- ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
73
- assert_equal(5.0/12, ds1['q1'].proportion )
74
- assert_equal(7.0/9, ds2['q1'].proportion )
75
- ms=Statsample::Multiset.new(['q1'])
76
- ms.add_dataset('d1',ds1)
77
- ms.add_dataset('d2',ds2)
78
- ss=Statsample::StratifiedSample.new(ms,{'d1'=>50,'d2'=>100})
79
- assert_in_delta(0.655, ss.proportion('q1'),0.01)
80
- assert_in_delta(0.345, ss.proportion('q1',0),0.01)
81
-
74
+ ds1 = { 'q1' => [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0].to_vector }.to_dataset
75
+ ds2 = { 'q1' => [1, 1, 1, 1, 1, 1, 1, 0, 0].to_vector }.to_dataset
76
+ assert_equal(5.0 / 12, ds1['q1'].proportion)
77
+ assert_equal(7.0 / 9, ds2['q1'].proportion)
78
+ ms = Statsample::Multiset.new(['q1'])
79
+ ms.add_dataset('d1', ds1)
80
+ ms.add_dataset('d2', ds2)
81
+ ss = Statsample::StratifiedSample.new(ms, 'd1' => 50, 'd2' => 100)
82
+ assert_in_delta(0.655, ss.proportion('q1'), 0.01)
83
+ assert_in_delta(0.345, ss.proportion('q1', 0), 0.01)
82
84
  end
85
+
83
86
  def test_stratum_scale
84
- boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
85
- girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
86
- ms=Statsample::Multiset.new(['test'])
87
- ms.add_dataset('boys',boys)
88
- ms.add_dataset('girls',girls)
89
- ss=Statsample::StratifiedSample.new(ms,{'boys'=>10000,'girls'=>10000})
90
- assert_equal(2,ss.strata_number)
91
- assert_equal(20000,ss.population_size)
92
- assert_equal(10000,ss.stratum_size('boys'))
93
- assert_equal(10000,ss.stratum_size('girls'))
94
- assert_equal(36,ss.sample_size)
95
- assert_equal(75,ss.mean('test'))
96
- assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
97
- assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
87
+ boys = { 'test' => [50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale) }.to_dataset
88
+ girls = { 'test' => [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale) }.to_dataset
89
+ ms = Statsample::Multiset.new(['test'])
90
+ ms.add_dataset('boys', boys)
91
+ ms.add_dataset('girls', girls)
92
+ ss = Statsample::StratifiedSample.new(ms, 'boys' => 10_000, 'girls' => 10_000)
93
+ assert_equal(2, ss.strata_number)
94
+ assert_equal(20_000, ss.population_size)
95
+ assert_equal(10_000, ss.stratum_size('boys'))
96
+ assert_equal(10_000, ss.stratum_size('girls'))
97
+ assert_equal(36, ss.sample_size)
98
+ assert_equal(75, ss.mean('test'))
99
+ assert_in_delta(1.45, ss.standard_error_wor('test'), 0.01)
100
+ assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'), 0.00001)
98
101
  end
102
+
99
103
  def test_each
100
- xpe={
101
- 'a'=>%w{a a a a}.to_vector,
102
- 'b'=>%w{b b b b}.to_vector
104
+ xpe = {
105
+ 'a' => %w(a a a a).to_vector,
106
+ 'b' => %w(b b b b).to_vector
103
107
  }
104
- ype={
105
- 'a'=>[1,2,3,4].to_scale,
106
- 'b'=>[5,6,7,8].to_scale,
108
+ ype = {
109
+ 'a' => [1, 2, 3, 4].to_scale,
110
+ 'b' => [5, 6, 7, 8].to_scale
107
111
  }
108
- zpe={
109
- 'a'=>[10,11,12,13].to_scale,
110
- 'b'=>[14,15,16,17].to_scale,
112
+ zpe = {
113
+ 'a' => [10, 11, 12, 13].to_scale,
114
+ 'b' => [14, 15, 16, 17].to_scale
111
115
  }
112
- xp,yp,zp=Hash.new(),Hash.new(),Hash.new()
113
- @ms.each {|k,ds|
114
- xp[k]=ds['x']
115
- yp[k]=ds['y']
116
- zp[k]=ds['z']
116
+ xp, yp, zp = {}, {}, {}
117
+ @ms.each {|k, ds|
118
+ xp[k] = ds['x']
119
+ yp[k] = ds['y']
120
+ zp[k] = ds['z']
117
121
  }
118
- assert_equal(xpe,xp)
119
- assert_equal(ype,yp)
120
- assert_equal(zpe,zp)
121
-
122
+ assert_equal(xpe, xp)
123
+ assert_equal(ype, yp)
124
+ assert_equal(zpe, zp)
122
125
  end
126
+
123
127
  def test_multiset_union_with_block
124
-
125
- r1=rand()
126
- r2=rand()
127
- ye=[1*r1,2*r1,3*r1,4*r1,5*r2,6*r2,7*r2,8*r2].to_scale
128
-
129
- ze=[10*r1,11*r1,12*r1,13*r1, 14*r2,15*r2,16*r2,17*r2].to_scale
130
-
131
- ds2=@ms.union {|k,ds|
132
- ds['y'].recode!{|v|
133
- k=='a' ? v*r1 : v*r2}
134
- ds['z'].recode!{|v|
135
- k=='a' ? v*r1 : v*r2}
128
+ r1 = rand
129
+ r2 = rand
130
+ ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].to_scale
131
+
132
+ ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].to_scale
133
+
134
+ ds2 = @ms.union {|k, ds|
135
+ ds['y'].recode!{|v|
136
+ k == 'a' ? v * r1 : v * r2
137
+ }
138
+ ds['z'].recode!{|v|
139
+ k == 'a' ? v * r1 : v * r2
140
+ }
136
141
  }
137
- assert_equal(ye,ds2['y'])
138
- assert_equal(ze,ds2['z'])
142
+ assert_equal(ye, ds2['y'])
143
+ assert_equal(ze, ds2['z'])
139
144
  end
145
+
140
146
  def test_multiset_union
141
- r1=rand()
142
- r2=rand()
143
- ye=[1*r1,2*r1,3*r1,4*r1,5*r2,6*r2,7*r2,8*r2].to_scale
144
-
145
- ze=[10*r1,11*r1,12*r1,13*r1, 14*r2,15*r2,16*r2,17*r2].to_scale
146
- @ms.each {|k,ds|
147
- ds['y'].recode!{|v|
148
- k=='a' ? v*r1 : v*r2}
149
- ds['z'].recode!{|v|
150
- k=='a' ? v*r1 : v*r2}
151
-
147
+ r1 = rand
148
+ r2 = rand
149
+ ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].to_scale
150
+
151
+ ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].to_scale
152
+ @ms.each {|k, ds|
153
+ ds['y'].recode!{|v|
154
+ k == 'a' ? v * r1 : v * r2
155
+ }
156
+ ds['z'].recode!{|v|
157
+ k == 'a' ? v * r1 : v * r2
158
+ }
152
159
  }
153
- ds2=@ms.union
154
- assert_equal(ye,ds2['y'])
155
- assert_equal(ze,ds2['z'])
156
-
160
+ ds2 = @ms.union
161
+ assert_equal(ye, ds2['y'])
162
+ assert_equal(ze, ds2['z'])
157
163
  end
158
164
  end