statsample 1.4.1 → 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +4 -3
  3. data/History.txt +4 -0
  4. data/README.md +4 -0
  5. data/lib/statsample/converter/csv.rb +41 -54
  6. data/lib/statsample/converters.rb +18 -19
  7. data/lib/statsample/version.rb +1 -1
  8. data/test/fixtures/scientific_notation.csv +4 -0
  9. data/test/helpers_tests.rb +37 -38
  10. data/test/test_analysis.rb +96 -97
  11. data/test/test_anova_contrast.rb +22 -22
  12. data/test/test_anovaoneway.rb +12 -12
  13. data/test/test_anovatwoway.rb +16 -17
  14. data/test/test_anovatwowaywithdataset.rb +22 -24
  15. data/test/test_anovawithvectors.rb +67 -69
  16. data/test/test_awesome_print_bug.rb +9 -9
  17. data/test/test_bartlettsphericity.rb +13 -13
  18. data/test/test_bivariate.rb +122 -126
  19. data/test/test_codification.rb +51 -49
  20. data/test/test_crosstab.rb +44 -40
  21. data/test/test_csv.rb +52 -70
  22. data/test/test_dataset.rb +347 -330
  23. data/test/test_dominance_analysis.rb +22 -24
  24. data/test/test_factor.rb +163 -166
  25. data/test/test_factor_map.rb +25 -30
  26. data/test/test_factor_pa.rb +28 -28
  27. data/test/test_ggobi.rb +19 -18
  28. data/test/test_gsl.rb +13 -15
  29. data/test/test_histogram.rb +74 -77
  30. data/test/test_matrix.rb +29 -31
  31. data/test/test_multiset.rb +132 -126
  32. data/test/test_regression.rb +143 -149
  33. data/test/test_reliability.rb +149 -155
  34. data/test/test_reliability_icc.rb +100 -104
  35. data/test/test_reliability_skillscale.rb +38 -40
  36. data/test/test_resample.rb +14 -12
  37. data/test/test_rserve_extension.rb +33 -33
  38. data/test/test_srs.rb +5 -5
  39. data/test/test_statistics.rb +52 -50
  40. data/test/test_stest.rb +27 -28
  41. data/test/test_stratified.rb +10 -10
  42. data/test/test_test_f.rb +17 -17
  43. data/test/test_test_kolmogorovsmirnov.rb +21 -21
  44. data/test/test_test_t.rb +52 -52
  45. data/test/test_umannwhitney.rb +16 -16
  46. data/test/test_vector.rb +419 -410
  47. data/test/test_wilcoxonsignedrank.rb +60 -63
  48. data/test/test_xls.rb +41 -41
  49. metadata +55 -5
  50. data/web/Rakefile +0 -39
@@ -1,52 +1,52 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
2
- #require 'rserve'
3
- #require 'statsample/rserve_extension'
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ # require 'rserve'
3
+ # require 'statsample/rserve_extension'
4
4
 
5
- class StatsampleFactorTestCase < MiniTest::Unit::TestCase
5
+ class StatsampleFactorTestCase < Minitest::Test
6
6
  include Statsample::Fixtures
7
7
  # Based on Hardle and Simar
8
8
  def setup
9
- @fixtures_dir=File.expand_path(File.dirname(__FILE__)+"/fixtures")
9
+ @fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
10
10
  end
11
+
11
12
  def test_parallelanalysis_with_data
12
13
  if Statsample.has_gsl?
13
- samples=100
14
- variables=10
15
- iterations=50
14
+ samples = 100
15
+ variables = 10
16
+ iterations = 50
16
17
  rng = Distribution::Normal.rng
17
- f1=samples.times.collect {rng.call}.to_scale
18
- f2=samples.times.collect {rng.call}.to_scale
19
- vectors={}
18
+ f1 = samples.times.collect { rng.call }.to_scale
19
+ f2 = samples.times.collect { rng.call }.to_scale
20
+ vectors = {}
20
21
  variables.times do |i|
21
- if i<5
22
- vectors["v#{i}"]=samples.times.collect {|nv|
23
- f1[nv]*5+f2[nv]*2+rng.call
22
+ if i < 5
23
+ vectors["v#{i}"] = samples.times.collect {|nv|
24
+ f1[nv] * 5 + f2[nv] * 2 + rng.call
24
25
  }.to_scale
25
26
  else
26
- vectors["v#{i}"]=samples.times.collect {|nv|
27
- f2[nv]*5+f1[nv]*2+rng.call
27
+ vectors["v#{i}"] = samples.times.collect {|nv|
28
+ f2[nv] * 5 + f1[nv] * 2 + rng.call
28
29
  }.to_scale
29
30
  end
30
-
31
31
  end
32
- ds=vectors.to_dataset
33
-
34
- pa1=Statsample::Factor::ParallelAnalysis.new(ds, :bootstrap_method=>:data, :iterations=>iterations)
35
- pa2=Statsample::Factor::ParallelAnalysis.with_random_data(samples,variables,:iterations=>iterations,:percentil=>95)
32
+ ds = vectors.to_dataset
33
+
34
+ pa1 = Statsample::Factor::ParallelAnalysis.new(ds, bootstrap_method: :data, iterations: iterations)
35
+ pa2 = Statsample::Factor::ParallelAnalysis.with_random_data(samples, variables, iterations: iterations, percentil: 95)
36
36
  3.times do |n|
37
- var="ev_0000#{n+1}"
38
- assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean,0.05)
37
+ var = "ev_0000#{n + 1}"
38
+ assert_in_delta(pa1.ds_eigenvalues[var].mean, pa2.ds_eigenvalues[var].mean, 0.05)
39
39
  end
40
40
  else
41
- skip("Too slow without GSL")
41
+ skip('Too slow without GSL')
42
42
  end
43
-
44
43
  end
44
+
45
45
  def test_parallelanalysis
46
- pa=Statsample::Factor::ParallelAnalysis.with_random_data(305,8,:iterations=>100,:percentil=>95)
46
+ pa = Statsample::Factor::ParallelAnalysis.with_random_data(305, 8, iterations: 100, percentil: 95)
47
47
  assert_in_delta(1.2454, pa.ds_eigenvalues['ev_00001'].mean, 0.01)
48
48
  assert_in_delta(1.1542, pa.ds_eigenvalues['ev_00002'].mean, 0.01)
49
49
  assert_in_delta(1.0836, pa.ds_eigenvalues['ev_00003'].mean, 0.01)
50
- assert(pa.summary.size>0)
51
- end
50
+ assert(pa.summary.size > 0)
51
+ end
52
52
  end
data/test/test_ggobi.rb CHANGED
@@ -1,24 +1,25 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
  require 'ostruct'
3
- class StatsampleGGobiTestCase < MiniTest::Unit::TestCase
4
-
3
+ class StatsampleGGobiTestCase < Minitest::Test
5
4
  def setup
6
- v1=([10.2,20.3,10,20,30,40,30,20,30,40]*10).to_vector(:scale)
7
- @v2=(%w{a b c a a a b b c d}*10).to_vector(:nominal)
8
- @v2.labels={"a"=>"letter a","d"=>"letter d"}
9
- v3=([1,2,3,4,5,4,3,2,1,2]*10).to_vector(:ordinal)
10
- @ds={'v1'=>v1,'v2'=>@v2,'v3'=>v3}.to_dataset
5
+ v1 = ([10.2, 20.3, 10, 20, 30, 40, 30, 20, 30, 40] * 10).to_vector(:scale)
6
+ @v2 = (%w(a b c a a a b b c d) * 10).to_vector(:nominal)
7
+ @v2.labels = { 'a' => 'letter a', 'd' => 'letter d' }
8
+ v3 = ([1, 2, 3, 4, 5, 4, 3, 2, 1, 2] * 10).to_vector(:ordinal)
9
+ @ds = { 'v1' => v1, 'v2' => @v2, 'v3' => v3 }.to_dataset
11
10
  end
11
+
12
12
  def test_values_definition
13
- a=[1.0,2,"a",nil]
14
- assert_equal("1.0 2 a NA", Statsample::GGobi.values_definition(a,"NA"))
13
+ a = [1.0, 2, 'a', nil]
14
+ assert_equal('1.0 2 a NA', Statsample::GGobi.values_definition(a, 'NA'))
15
15
  end
16
+
16
17
  def test_variable_definition
17
- carrier=OpenStruct.new
18
- carrier.categorials=[]
19
- carrier.conversions={}
20
- real_var_definition=Statsample::GGobi.variable_definition(carrier,@v2,'variable 2',"v2")
21
- expected=<<-EOS
18
+ carrier = OpenStruct.new
19
+ carrier.categorials = []
20
+ carrier.conversions = {}
21
+ real_var_definition = Statsample::GGobi.variable_definition(carrier, @v2, 'variable 2', 'v2')
22
+ expected = <<-EOS
22
23
  <categoricalvariable name="variable 2" nickname="v2">
23
24
  <levels count="4">
24
25
  <level value="1">letter a</level>
@@ -27,8 +28,8 @@ class StatsampleGGobiTestCase < MiniTest::Unit::TestCase
27
28
  <level value="4">letter d</level></levels>
28
29
  </categoricalvariable>
29
30
  EOS
30
- assert_equal(expected.gsub(/\s/," "),real_var_definition.gsub(/\s/," "))
31
- assert_equal({'variable 2'=>{'a'=>1,'b'=>2,'c'=>3,'d'=>4}},carrier.conversions)
32
- assert_equal(['variable 2'],carrier.categorials)
31
+ assert_equal(expected.gsub(/\s/, ' '), real_var_definition.gsub(/\s/, ' '))
32
+ assert_equal({ 'variable 2' => { 'a' => 1, 'b' => 2, 'c' => 3, 'd' => 4 } }, carrier.conversions)
33
+ assert_equal(['variable 2'], carrier.categorials)
33
34
  end
34
35
  end
data/test/test_gsl.rb CHANGED
@@ -1,17 +1,15 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
2
- class StatsampleGSLTestCase < MiniTest::Unit::TestCase
3
- should_with_gsl "matrix with gsl" do
4
- a=[1,2,3,4,20].to_vector(:scale)
5
- b=[3,2,3,4,50].to_vector(:scale)
6
- c=[6,2,3,4,3].to_vector(:scale)
7
- ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
8
- gsl=ds.to_matrix.to_gsl
9
- assert_equal(5,gsl.size1)
10
- assert_equal(3,gsl.size2)
11
- matrix=gsl.to_matrix
12
- assert_equal(5,matrix.row_size)
13
- assert_equal(3,matrix.column_size)
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
+ class StatsampleGSLTestCase < Minitest::Test
3
+ should_with_gsl 'matrix with gsl' do
4
+ a = [1, 2, 3, 4, 20].to_vector(:scale)
5
+ b = [3, 2, 3, 4, 50].to_vector(:scale)
6
+ c = [6, 2, 3, 4, 3].to_vector(:scale)
7
+ ds = { 'a' => a, 'b' => b, 'c' => c }.to_dataset
8
+ gsl = ds.to_matrix.to_gsl
9
+ assert_equal(5, gsl.size1)
10
+ assert_equal(3, gsl.size2)
11
+ matrix = gsl.to_matrix
12
+ assert_equal(5, matrix.row_size)
13
+ assert_equal(3, matrix.column_size)
14
14
  end
15
15
  end
16
-
17
-
@@ -1,112 +1,109 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
 
3
-
4
- class StatsampleHistogramTestCase < MiniTest::Unit::TestCase
3
+ class StatsampleHistogramTestCase < Minitest::Test
5
4
  context Statsample::Histogram do
6
- should "alloc correctly with integer" do
5
+ should 'alloc correctly with integer' do
7
6
  h = Statsample::Histogram.alloc(4)
8
- assert_equal([0.0]*4, h.bin)
9
- assert_equal([0.0]*5, h.range)
7
+ assert_equal([0.0] * 4, h.bin)
8
+ assert_equal([0.0] * 5, h.range)
10
9
  end
11
- should "alloc correctly with array" do
10
+ should 'alloc correctly with array' do
12
11
  h = Statsample::Histogram.alloc([1, 3, 7, 9, 20])
13
- assert_equal([0.0]*4, h.bin)
14
- assert_equal([1,3,7,9,20], h.range)
12
+ assert_equal([0.0] * 4, h.bin)
13
+ assert_equal([1, 3, 7, 9, 20], h.range)
15
14
  end
16
- should "alloc correctly with integer and min, max array" do
15
+ should 'alloc correctly with integer and min, max array' do
17
16
  h = Statsample::Histogram.alloc(5, [0, 5])
18
- assert_equal([0.0,1.0,2.0,3.0,4.0,5.0], h.range)
19
- assert_equal([0.0]*5,h.bin)
17
+ assert_equal([0.0, 1.0, 2.0, 3.0, 4.0, 5.0], h.range)
18
+ assert_equal([0.0] * 5, h.bin)
20
19
  end
21
- should "bin() method return correct number of bins" do
20
+ should 'bin() method return correct number of bins' do
22
21
  h = Statsample::Histogram.alloc(4)
23
- assert_equal(4,h.bins)
22
+ assert_equal(4, h.bins)
24
23
  end
25
- should "increment correctly" do
26
- h = Statsample::Histogram.alloc(5, [0, 5])
24
+ should 'increment correctly' do
25
+ h = Statsample::Histogram.alloc(5, [0, 5])
27
26
  h.increment 2.5
28
- assert_equal([0.0,0.0,1.0,0.0,0.0], h.bin)
29
- h.increment [0.5,0.5,3.5,3.5]
30
- assert_equal([2.0,0.0,1.0,2.0,0.0], h.bin)
27
+ assert_equal([0.0, 0.0, 1.0, 0.0, 0.0], h.bin)
28
+ h.increment [0.5, 0.5, 3.5, 3.5]
29
+ assert_equal([2.0, 0.0, 1.0, 2.0, 0.0], h.bin)
31
30
  h.increment 0
32
- assert_equal([3.0,0.0,1.0,2.0,0.0], h.bin)
31
+ assert_equal([3.0, 0.0, 1.0, 2.0, 0.0], h.bin)
33
32
  h.increment 5
34
- assert_equal([3.0,0.0,1.0,2.0,0.0], h.bin)
33
+ assert_equal([3.0, 0.0, 1.0, 2.0, 0.0], h.bin)
35
34
  end
36
-
37
- should "alloc_uniform correctly with n, min,max" do
38
- h = Statsample::Histogram.alloc_uniform(5,0,10)
39
- assert_equal(5,h.bins)
40
- assert_equal([0.0]*5,h.bin)
41
- assert_equal([0.0,2.0,4.0,6.0,8.0,10.0], h.range)
35
+
36
+ should 'alloc_uniform correctly with n, min,max' do
37
+ h = Statsample::Histogram.alloc_uniform(5, 0, 10)
38
+ assert_equal(5, h.bins)
39
+ assert_equal([0.0] * 5, h.bin)
40
+ assert_equal([0.0, 2.0, 4.0, 6.0, 8.0, 10.0], h.range)
42
41
  end
43
- should "alloc_uniform correctly with n, [min,max]" do
42
+ should 'alloc_uniform correctly with n, [min,max]' do
44
43
  h = Statsample::Histogram.alloc_uniform(5, [0, 10])
45
- assert_equal(5,h.bins)
46
- assert_equal([0.0]*5,h.bin)
47
- assert_equal([0.0,2.0,4.0,6.0,8.0,10.0], h.range)
44
+ assert_equal(5, h.bins)
45
+ assert_equal([0.0] * 5, h.bin)
46
+ assert_equal([0.0, 2.0, 4.0, 6.0, 8.0, 10.0], h.range)
48
47
  end
49
- should "get_range()" do
50
- h = Statsample::Histogram.alloc_uniform(5,2,12)
48
+ should 'get_range()' do
49
+ h = Statsample::Histogram.alloc_uniform(5, 2, 12)
51
50
  5.times {|i|
52
- assert_equal([2+i*2, 4+i*2], h.get_range(i))
51
+ assert_equal([2 + i * 2, 4 + i * 2], h.get_range(i))
52
+ }
53
+ end
54
+ should 'min() and max()' do
55
+ h = Statsample::Histogram.alloc_uniform(5, 2, 12)
56
+ assert_equal(2, h.min)
57
+ assert_equal(12, h.max)
58
+ end
59
+ should 'max_val()' do
60
+ h = Statsample::Histogram.alloc(5, [0, 5])
61
+ 100.times { h.increment(rand * 5) }
62
+ max = h.bin[0]
63
+ (1..4).each {|i|
64
+ max = h.bin[i] if h.bin[i] > max
53
65
  }
66
+ assert_equal(max, h.max_val)
54
67
  end
55
- should "min() and max()" do
56
- h=Statsample::Histogram.alloc_uniform(5,2,12)
57
- assert_equal(2,h.min)
58
- assert_equal(12,h.max)
59
- end
60
- should "max_val()" do
61
- h = Statsample::Histogram.alloc(5, [0, 5])
62
- 100.times {h.increment(rand*5)}
63
- max=h.bin[0]
64
- (1..4).each {|i|
65
- max = h.bin[i] if h.bin[i] > max
66
- }
67
- assert_equal(max,h.max_val)
68
- end
69
- should "min_val()" do
70
- h = Statsample::Histogram.alloc(5, [0, 5])
71
- 100.times {h.increment(rand*5)}
72
- min=h.bin[0]
73
- (1..4).each {|i|
74
- min = h.bin[i] if h.bin[i]<min
75
- }
76
- assert_equal(min,h.min_val)
77
- end
78
- should "return correct estimated mean" do
79
- a=[1.5,1.5,1.5,3.5,3.5,3.5].to_scale
80
- h=Statsample::Histogram.alloc(5,[0,5])
68
+ should 'min_val()' do
69
+ h = Statsample::Histogram.alloc(5, [0, 5])
70
+ 100.times { h.increment(rand * 5) }
71
+ min = h.bin[0]
72
+ (1..4).each {|i|
73
+ min = h.bin[i] if h.bin[i] < min
74
+ }
75
+ assert_equal(min, h.min_val)
76
+ end
77
+ should 'return correct estimated mean' do
78
+ a = [1.5, 1.5, 1.5, 3.5, 3.5, 3.5].to_scale
79
+ h = Statsample::Histogram.alloc(5, [0, 5])
81
80
  h.increment(a)
82
81
  assert_equal(2.5, h.estimated_mean)
83
82
  end
84
- should "return correct estimated standard deviation" do
85
- a=[0.5,1.5,1.5,1.5,2.5, 3.5,3.5,3.5,4.5].to_scale
86
- h=Statsample::Histogram.alloc(5,[0,5])
83
+ should 'return correct estimated standard deviation' do
84
+ a = [0.5, 1.5, 1.5, 1.5, 2.5, 3.5, 3.5, 3.5, 4.5].to_scale
85
+ h = Statsample::Histogram.alloc(5, [0, 5])
87
86
  h.increment(a)
88
87
  assert_equal(a.sd, h.estimated_standard_deviation)
89
88
  end
90
- should "return correct sum for all values" do
91
- h=Statsample::Histogram.alloc(5,[0,5])
92
- n=rand(100)
93
- n.times { h.increment(1)}
89
+ should 'return correct sum for all values' do
90
+ h = Statsample::Histogram.alloc(5, [0, 5])
91
+ n = rand(100)
92
+ n.times { h.increment(1) }
94
93
  assert_equal(n, h.sum)
95
94
  end
96
- should "return correct sum for a subset of values" do
97
- h=Statsample::Histogram.alloc(5,[0,5])
98
- h.increment([0.5,2.5,4.5])
99
- assert_equal(1,h.sum(0,1))
100
- assert_equal(2,h.sum(1,4))
101
-
95
+ should 'return correct sum for a subset of values' do
96
+ h = Statsample::Histogram.alloc(5, [0, 5])
97
+ h.increment([0.5, 2.5, 4.5])
98
+ assert_equal(1, h.sum(0, 1))
99
+ assert_equal(2, h.sum(1, 4))
102
100
  end
103
- should "not raise exception when all values equal" do
101
+ should 'not raise exception when all values equal' do
104
102
  assert_nothing_raised do
105
- a = [5,5,5,5,5,5].to_scale
106
- h=Statsample::Graph::Histogram.new(a)
103
+ a = [5, 5, 5, 5, 5, 5].to_scale
104
+ h = Statsample::Graph::Histogram.new(a)
107
105
  h.to_svg
108
106
  end
109
107
  end
110
-
111
108
  end
112
109
  end
data/test/test_matrix.rb CHANGED
@@ -1,50 +1,48 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
 
3
- class StatsampleMatrixTestCase < MiniTest::Unit::TestCase
4
-
3
+ class StatsampleMatrixTestCase < Minitest::Test
5
4
  def test_to_dataset
6
- m=Matrix[[1,4],[2,5],[3,6]]
5
+ m = Matrix[[1, 4], [2, 5], [3, 6]]
7
6
  m.extend Statsample::NamedMatrix
8
- m.fields_y=%w{x1 x2}
9
- m.name="test"
10
- samples=100
11
- x1=[1,2,3].to_scale
12
- x2=[4,5,6].to_scale
13
- ds={'x1'=>x1,'x2'=>x2}.to_dataset
14
- ds.name="test"
15
- obs=m.to_dataset
16
- assert_equal(ds['x1'],obs['x1'])
17
- assert_equal(ds['x2'],obs['x2'])
18
- assert_equal(ds['x1'].mean,obs['x1'].mean)
19
-
20
-
7
+ m.fields_y = %w(x1 x2)
8
+ m.name = 'test'
9
+ samples = 100
10
+ x1 = [1, 2, 3].to_scale
11
+ x2 = [4, 5, 6].to_scale
12
+ ds = { 'x1' => x1, 'x2' => x2 }.to_dataset
13
+ ds.name = 'test'
14
+ obs = m.to_dataset
15
+ assert_equal(ds['x1'], obs['x1'])
16
+ assert_equal(ds['x2'], obs['x2'])
17
+ assert_equal(ds['x1'].mean, obs['x1'].mean)
21
18
  end
19
+
22
20
  def test_covariate
23
- a=Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
21
+ a = Matrix[[1.0, 0.3, 0.2], [0.3, 1.0, 0.5], [0.2, 0.5, 1.0]]
24
22
  a.extend Statsample::CovariateMatrix
25
- a.fields=%w{a b c}
23
+ a.fields = %w(a b c)
26
24
  assert_equal(:correlation, a._type)
27
25
 
28
- assert_equal(Matrix[[0.5],[0.3]], a.submatrix(%w{c a}, %w{b}))
29
- assert_equal(Matrix[[1.0, 0.2] , [0.2, 1.0]], a.submatrix(%w{c a}))
30
- assert_equal(:correlation, a.submatrix(%w{c a})._type)
26
+ assert_equal(Matrix[[0.5], [0.3]], a.submatrix(%w(c a), %w(b)))
27
+ assert_equal(Matrix[[1.0, 0.2], [0.2, 1.0]], a.submatrix(%w(c a)))
28
+ assert_equal(:correlation, a.submatrix(%w(c a))._type)
31
29
 
32
- a=Matrix[[20,30,10], [30,60,50], [10,50,50]]
30
+ a = Matrix[[20, 30, 10], [30, 60, 50], [10, 50, 50]]
33
31
 
34
32
  a.extend Statsample::CovariateMatrix
35
33
 
36
34
  assert_equal(:covariance, a._type)
37
35
 
38
- a=50.times.collect {rand()}.to_scale
39
- b=50.times.collect {rand()}.to_scale
40
- c=50.times.collect {rand()}.to_scale
41
- ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
42
- corr=Statsample::Bivariate.correlation_matrix(ds)
43
- real=Statsample::Bivariate.covariance_matrix(ds).correlation
36
+ a = 50.times.collect { rand }.to_scale
37
+ b = 50.times.collect { rand }.to_scale
38
+ c = 50.times.collect { rand }.to_scale
39
+ ds = { 'a' => a, 'b' => b, 'c' => c }.to_dataset
40
+ corr = Statsample::Bivariate.correlation_matrix(ds)
41
+ real = Statsample::Bivariate.covariance_matrix(ds).correlation
44
42
  corr.row_size.times do |i|
45
43
  corr.column_size.times do |j|
46
- assert_in_delta(corr[i,j], real[i,j],1e-15)
44
+ assert_in_delta(corr[i, j], real[i, j], 1e-15)
47
45
  end
48
46
  end
49
- end
47
+ end
50
48
  end
@@ -1,158 +1,164 @@
1
- require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
1
+ require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
2
2
 
3
-
4
- class StatsampleMultisetTestCase < MiniTest::Unit::TestCase
3
+ class StatsampleMultisetTestCase < Minitest::Test
5
4
  def setup
6
- @x=%w{a a a a b b b b}.to_vector
7
- @y=[1,2,3,4,5,6,7,8].to_scale
8
- @z=[10,11,12,13,14,15,16,17].to_scale
9
- @ds={'x'=>@x,'y'=>@y,'z'=>@z}.to_dataset
10
- @ms=@ds.to_multiset_by_split('x')
5
+ @x = %w(a a a a b b b b).to_vector
6
+ @y = [1, 2, 3, 4, 5, 6, 7, 8].to_scale
7
+ @z = [10, 11, 12, 13, 14, 15, 16, 17].to_scale
8
+ @ds = { 'x' => @x, 'y' => @y, 'z' => @z }.to_dataset
9
+ @ms = @ds.to_multiset_by_split('x')
11
10
  end
11
+
12
12
  def test_creation
13
- v1a=[1,2,3,4,5].to_vector
14
- v2b=[11,21,31,41,51].to_vector
15
- v3c=[21,23,34,45,56].to_vector
16
- ds1={'v1'=>v1a,'v2'=>v2b,'v3'=>v3c}.to_dataset
17
- v1b=[15,25,35,45,55].to_vector
18
- v2b=[11,21,31,41,51].to_vector
19
- v3b=[21,23,34,45,56].to_vector
20
- ds2={'v1'=>v1b,'v2'=>v2b,'v3'=>v3b}.to_dataset
21
- ms=Statsample::Multiset.new(['v1','v2','v3'])
22
- ms.add_dataset('ds1',ds1)
23
- ms.add_dataset('ds2',ds2)
24
- assert_equal(ds1,ms['ds1'])
25
- assert_equal(ds2,ms['ds2'])
26
- assert_equal(v1a,ms['ds1']['v1'])
27
- assert_not_equal(v1b,ms['ds1']['v1'])
28
- ds3={'v1'=>v1b,'v2'=>v2b}.to_dataset
13
+ v1a = [1, 2, 3, 4, 5].to_vector
14
+ v2b = [11, 21, 31, 41, 51].to_vector
15
+ v3c = [21, 23, 34, 45, 56].to_vector
16
+ ds1 = { 'v1' => v1a, 'v2' => v2b, 'v3' => v3c }.to_dataset
17
+ v1b = [15, 25, 35, 45, 55].to_vector
18
+ v2b = [11, 21, 31, 41, 51].to_vector
19
+ v3b = [21, 23, 34, 45, 56].to_vector
20
+ ds2 = { 'v1' => v1b, 'v2' => v2b, 'v3' => v3b }.to_dataset
21
+ ms = Statsample::Multiset.new(%w(v1 v2 v3))
22
+ ms.add_dataset('ds1', ds1)
23
+ ms.add_dataset('ds2', ds2)
24
+ assert_equal(ds1, ms['ds1'])
25
+ assert_equal(ds2, ms['ds2'])
26
+ assert_equal(v1a, ms['ds1']['v1'])
27
+ assert_not_equal(v1b, ms['ds1']['v1'])
28
+ ds3 = { 'v1' => v1b, 'v2' => v2b }.to_dataset
29
29
  assert_raise ArgumentError do
30
30
  ms.add_dataset(ds3)
31
31
  end
32
32
  end
33
+
33
34
  def test_creation_empty
34
- ms=Statsample::Multiset.new_empty_vectors(%w{id age name},%w{male female})
35
- ds_male={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
36
- ds_female={'id'=>[].to_vector,'age'=>[].to_vector, 'name'=>[].to_vector}.to_dataset(%w{id age name})
37
- ms2=Statsample::Multiset.new(%w{id age name})
38
- ms2.add_dataset('male',ds_male)
39
- ms2.add_dataset('female',ds_female)
40
- assert_equal(ms2.fields,ms.fields)
41
- assert_equal(ms2['male'],ms['male'])
42
- assert_equal(ms2['female'],ms['female'])
35
+ ms = Statsample::Multiset.new_empty_vectors(%w(id age name), %w(male female))
36
+ ds_male = { 'id' => [].to_vector, 'age' => [].to_vector, 'name' => [].to_vector }.to_dataset(%w(id age name))
37
+ ds_female = { 'id' => [].to_vector, 'age' => [].to_vector, 'name' => [].to_vector }.to_dataset(%w(id age name))
38
+ ms2 = Statsample::Multiset.new(%w(id age name))
39
+ ms2.add_dataset('male', ds_male)
40
+ ms2.add_dataset('female', ds_female)
41
+ assert_equal(ms2.fields, ms.fields)
42
+ assert_equal(ms2['male'], ms['male'])
43
+ assert_equal(ms2['female'], ms['female'])
43
44
  end
45
+
44
46
  def test_to_multiset_by_split_one
45
- sex=%w{m m m m m f f f f m}.to_vector(:nominal)
46
- city=%w{London Paris NY London Paris NY London Paris NY Tome}.to_vector(:nominal)
47
- age=[10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
48
- ds={'sex'=>sex,'city'=>city,'age'=>age}.to_dataset
49
- ms=ds.to_multiset_by_split('sex')
50
- assert_equal(2,ms.n_datasets)
51
- assert_equal(%w{f m},ms.datasets.keys.sort)
52
- assert_equal(6,ms['m'].cases)
53
- assert_equal(4,ms['f'].cases)
54
- assert_equal(%w{London Paris NY London Paris Tome},ms['m']['city'].to_a)
55
- assert_equal([34,33,35,36],ms['f']['age'].to_a)
47
+ sex = %w(m m m m m f f f f m).to_vector(:nominal)
48
+ city = %w(London Paris NY London Paris NY London Paris NY Tome).to_vector(:nominal)
49
+ age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:scale)
50
+ ds = { 'sex' => sex, 'city' => city, 'age' => age }.to_dataset
51
+ ms = ds.to_multiset_by_split('sex')
52
+ assert_equal(2, ms.n_datasets)
53
+ assert_equal(%w(f m), ms.datasets.keys.sort)
54
+ assert_equal(6, ms['m'].cases)
55
+ assert_equal(4, ms['f'].cases)
56
+ assert_equal(%w(London Paris NY London Paris Tome), ms['m']['city'].to_a)
57
+ assert_equal([34, 33, 35, 36], ms['f']['age'].to_a)
56
58
  end
59
+
57
60
  def test_to_multiset_by_split_multiple
58
- sex=%w{m m m m m m m m m m f f f f f f f f f f}.to_vector(:nominal)
59
- city=%w{London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris}.to_vector(:nominal)
60
- hair=%w{blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black}.to_vector(:nominal)
61
- age=[10,10,20,30,34,34,33,35,36,40, 10,10,20,30,34,34,33,35,36,40].to_vector(:scale)
62
- ds={'sex'=>sex,'city'=>city,'hair'=>hair,'age'=>age}.to_dataset(%w{sex city hair age})
63
- ms=ds.to_multiset_by_split('sex','city','hair')
64
- assert_equal(8,ms.n_datasets)
65
- assert_equal(3,ms[%w{m London blonde}].cases)
66
- assert_equal(3,ms[%w{m London blonde}].cases)
67
- assert_equal(1,ms[%w{m Paris black}].cases)
61
+ sex = %w(m m m m m m m m m m f f f f f f f f f f).to_vector(:nominal)
62
+ city = %w(London London London Paris Paris London London London Paris Paris London London London Paris Paris London London London Paris Paris).to_vector(:nominal)
63
+ hair = %w(blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black blonde blonde black black).to_vector(:nominal)
64
+ age = [10, 10, 20, 30, 34, 34, 33, 35, 36, 40, 10, 10, 20, 30, 34, 34, 33, 35, 36, 40].to_vector(:scale)
65
+ ds = { 'sex' => sex, 'city' => city, 'hair' => hair, 'age' => age }.to_dataset(%w(sex city hair age))
66
+ ms = ds.to_multiset_by_split('sex', 'city', 'hair')
67
+ assert_equal(8, ms.n_datasets)
68
+ assert_equal(3, ms[%w(m London blonde)].cases)
69
+ assert_equal(3, ms[%w(m London blonde)].cases)
70
+ assert_equal(1, ms[%w(m Paris black)].cases)
68
71
  end
69
72
 
70
73
  def test_stratum_proportion
71
- ds1={'q1'=>[1,1,1,1,1,0,0,0,0,0,0,0].to_vector}.to_dataset
72
- ds2={'q1'=>[1,1,1,1,1,1,1,0,0].to_vector}.to_dataset
73
- assert_equal(5.0/12, ds1['q1'].proportion )
74
- assert_equal(7.0/9, ds2['q1'].proportion )
75
- ms=Statsample::Multiset.new(['q1'])
76
- ms.add_dataset('d1',ds1)
77
- ms.add_dataset('d2',ds2)
78
- ss=Statsample::StratifiedSample.new(ms,{'d1'=>50,'d2'=>100})
79
- assert_in_delta(0.655, ss.proportion('q1'),0.01)
80
- assert_in_delta(0.345, ss.proportion('q1',0),0.01)
81
-
74
+ ds1 = { 'q1' => [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0].to_vector }.to_dataset
75
+ ds2 = { 'q1' => [1, 1, 1, 1, 1, 1, 1, 0, 0].to_vector }.to_dataset
76
+ assert_equal(5.0 / 12, ds1['q1'].proportion)
77
+ assert_equal(7.0 / 9, ds2['q1'].proportion)
78
+ ms = Statsample::Multiset.new(['q1'])
79
+ ms.add_dataset('d1', ds1)
80
+ ms.add_dataset('d2', ds2)
81
+ ss = Statsample::StratifiedSample.new(ms, 'd1' => 50, 'd2' => 100)
82
+ assert_in_delta(0.655, ss.proportion('q1'), 0.01)
83
+ assert_in_delta(0.345, ss.proportion('q1', 0), 0.01)
82
84
  end
85
+
83
86
  def test_stratum_scale
84
- boys={'test'=>[50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale)}.to_dataset
85
- girls={'test'=>[70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale)}.to_dataset
86
- ms=Statsample::Multiset.new(['test'])
87
- ms.add_dataset('boys',boys)
88
- ms.add_dataset('girls',girls)
89
- ss=Statsample::StratifiedSample.new(ms,{'boys'=>10000,'girls'=>10000})
90
- assert_equal(2,ss.strata_number)
91
- assert_equal(20000,ss.population_size)
92
- assert_equal(10000,ss.stratum_size('boys'))
93
- assert_equal(10000,ss.stratum_size('girls'))
94
- assert_equal(36,ss.sample_size)
95
- assert_equal(75,ss.mean('test'))
96
- assert_in_delta(1.45,ss.standard_error_wor('test'),0.01)
97
- assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'),0.00001)
87
+ boys = { 'test' => [50, 55, 60, 62, 62, 65, 67, 67, 70, 70, 73, 73, 75, 78, 78, 80, 85, 90].to_vector(:scale) }.to_dataset
88
+ girls = { 'test' => [70, 70, 72, 72, 75, 75, 78, 78, 80, 80, 82, 82, 85, 85, 88, 88, 90, 90].to_vector(:scale) }.to_dataset
89
+ ms = Statsample::Multiset.new(['test'])
90
+ ms.add_dataset('boys', boys)
91
+ ms.add_dataset('girls', girls)
92
+ ss = Statsample::StratifiedSample.new(ms, 'boys' => 10_000, 'girls' => 10_000)
93
+ assert_equal(2, ss.strata_number)
94
+ assert_equal(20_000, ss.population_size)
95
+ assert_equal(10_000, ss.stratum_size('boys'))
96
+ assert_equal(10_000, ss.stratum_size('girls'))
97
+ assert_equal(36, ss.sample_size)
98
+ assert_equal(75, ss.mean('test'))
99
+ assert_in_delta(1.45, ss.standard_error_wor('test'), 0.01)
100
+ assert_in_delta(ss.standard_error_wor('test'), ss.standard_error_wor_2('test'), 0.00001)
98
101
  end
102
+
99
103
  def test_each
100
- xpe={
101
- 'a'=>%w{a a a a}.to_vector,
102
- 'b'=>%w{b b b b}.to_vector
104
+ xpe = {
105
+ 'a' => %w(a a a a).to_vector,
106
+ 'b' => %w(b b b b).to_vector
103
107
  }
104
- ype={
105
- 'a'=>[1,2,3,4].to_scale,
106
- 'b'=>[5,6,7,8].to_scale,
108
+ ype = {
109
+ 'a' => [1, 2, 3, 4].to_scale,
110
+ 'b' => [5, 6, 7, 8].to_scale
107
111
  }
108
- zpe={
109
- 'a'=>[10,11,12,13].to_scale,
110
- 'b'=>[14,15,16,17].to_scale,
112
+ zpe = {
113
+ 'a' => [10, 11, 12, 13].to_scale,
114
+ 'b' => [14, 15, 16, 17].to_scale
111
115
  }
112
- xp,yp,zp=Hash.new(),Hash.new(),Hash.new()
113
- @ms.each {|k,ds|
114
- xp[k]=ds['x']
115
- yp[k]=ds['y']
116
- zp[k]=ds['z']
116
+ xp, yp, zp = {}, {}, {}
117
+ @ms.each {|k, ds|
118
+ xp[k] = ds['x']
119
+ yp[k] = ds['y']
120
+ zp[k] = ds['z']
117
121
  }
118
- assert_equal(xpe,xp)
119
- assert_equal(ype,yp)
120
- assert_equal(zpe,zp)
121
-
122
+ assert_equal(xpe, xp)
123
+ assert_equal(ype, yp)
124
+ assert_equal(zpe, zp)
122
125
  end
126
+
123
127
  def test_multiset_union_with_block
124
-
125
- r1=rand()
126
- r2=rand()
127
- ye=[1*r1,2*r1,3*r1,4*r1,5*r2,6*r2,7*r2,8*r2].to_scale
128
-
129
- ze=[10*r1,11*r1,12*r1,13*r1, 14*r2,15*r2,16*r2,17*r2].to_scale
130
-
131
- ds2=@ms.union {|k,ds|
132
- ds['y'].recode!{|v|
133
- k=='a' ? v*r1 : v*r2}
134
- ds['z'].recode!{|v|
135
- k=='a' ? v*r1 : v*r2}
128
+ r1 = rand
129
+ r2 = rand
130
+ ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].to_scale
131
+
132
+ ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].to_scale
133
+
134
+ ds2 = @ms.union {|k, ds|
135
+ ds['y'].recode!{|v|
136
+ k == 'a' ? v * r1 : v * r2
137
+ }
138
+ ds['z'].recode!{|v|
139
+ k == 'a' ? v * r1 : v * r2
140
+ }
136
141
  }
137
- assert_equal(ye,ds2['y'])
138
- assert_equal(ze,ds2['z'])
142
+ assert_equal(ye, ds2['y'])
143
+ assert_equal(ze, ds2['z'])
139
144
  end
145
+
140
146
  def test_multiset_union
141
- r1=rand()
142
- r2=rand()
143
- ye=[1*r1,2*r1,3*r1,4*r1,5*r2,6*r2,7*r2,8*r2].to_scale
144
-
145
- ze=[10*r1,11*r1,12*r1,13*r1, 14*r2,15*r2,16*r2,17*r2].to_scale
146
- @ms.each {|k,ds|
147
- ds['y'].recode!{|v|
148
- k=='a' ? v*r1 : v*r2}
149
- ds['z'].recode!{|v|
150
- k=='a' ? v*r1 : v*r2}
151
-
147
+ r1 = rand
148
+ r2 = rand
149
+ ye = [1 * r1, 2 * r1, 3 * r1, 4 * r1, 5 * r2, 6 * r2, 7 * r2, 8 * r2].to_scale
150
+
151
+ ze = [10 * r1, 11 * r1, 12 * r1, 13 * r1, 14 * r2, 15 * r2, 16 * r2, 17 * r2].to_scale
152
+ @ms.each {|k, ds|
153
+ ds['y'].recode!{|v|
154
+ k == 'a' ? v * r1 : v * r2
155
+ }
156
+ ds['z'].recode!{|v|
157
+ k == 'a' ? v * r1 : v * r2
158
+ }
152
159
  }
153
- ds2=@ms.union
154
- assert_equal(ye,ds2['y'])
155
- assert_equal(ze,ds2['z'])
156
-
160
+ ds2 = @ms.union
161
+ assert_equal(ye, ds2['y'])
162
+ assert_equal(ze, ds2['z'])
157
163
  end
158
164
  end