statsample 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data/History.txt +12 -0
  2. data/Manifest.txt +13 -0
  3. data/README.txt +2 -1
  4. data/demo/pca.rb +29 -0
  5. data/demo/umann.rb +8 -0
  6. data/lib/distribution.rb +0 -1
  7. data/lib/matrix_extension.rb +35 -21
  8. data/lib/statsample.rb +31 -28
  9. data/lib/statsample/anova.rb +7 -2
  10. data/lib/statsample/bivariate.rb +17 -11
  11. data/lib/statsample/codification.rb +136 -87
  12. data/lib/statsample/combination.rb +0 -2
  13. data/lib/statsample/converter/csv18.rb +1 -1
  14. data/lib/statsample/converter/csv19.rb +1 -1
  15. data/lib/statsample/converters.rb +176 -171
  16. data/lib/statsample/crosstab.rb +227 -154
  17. data/lib/statsample/dataset.rb +94 -12
  18. data/lib/statsample/dominanceanalysis.rb +69 -62
  19. data/lib/statsample/dominanceanalysis/bootstrap.rb +25 -21
  20. data/lib/statsample/factor.rb +18 -0
  21. data/lib/statsample/factor/pca.rb +128 -0
  22. data/lib/statsample/factor/principalaxis.rb +133 -0
  23. data/lib/statsample/factor/rotation.rb +125 -0
  24. data/lib/statsample/histogram.rb +99 -0
  25. data/lib/statsample/mle.rb +125 -126
  26. data/lib/statsample/mle/logit.rb +91 -91
  27. data/lib/statsample/mle/probit.rb +84 -85
  28. data/lib/statsample/multiset.rb +1 -1
  29. data/lib/statsample/permutation.rb +96 -0
  30. data/lib/statsample/regression.rb +1 -1
  31. data/lib/statsample/regression/binomial.rb +89 -89
  32. data/lib/statsample/regression/binomial/logit.rb +9 -9
  33. data/lib/statsample/regression/binomial/probit.rb +9 -9
  34. data/lib/statsample/regression/multiple.rb +8 -14
  35. data/lib/statsample/regression/multiple/gslengine.rb +1 -1
  36. data/lib/statsample/regression/multiple/rubyengine.rb +55 -55
  37. data/lib/statsample/resample.rb +12 -17
  38. data/lib/statsample/srs.rb +4 -1
  39. data/lib/statsample/test.rb +23 -22
  40. data/lib/statsample/test/umannwhitney.rb +182 -0
  41. data/lib/statsample/vector.rb +854 -815
  42. data/test/test_bivariate.rb +132 -132
  43. data/test/test_codification.rb +71 -50
  44. data/test/test_dataset.rb +19 -1
  45. data/test/test_factor.rb +44 -0
  46. data/test/test_histogram.rb +26 -0
  47. data/test/test_permutation.rb +37 -0
  48. data/test/test_statistics.rb +74 -63
  49. data/test/test_umannwhitney.rb +17 -0
  50. data/test/test_vector.rb +46 -30
  51. metadata +31 -4
@@ -3,144 +3,144 @@ require 'statsample'
3
3
  require 'test/unit'
4
4
  class StatsampleBivariateTestCase < Test::Unit::TestCase
5
5
  def test_sum_of_codeviated
6
- v1=[1,2,3,4,5,6].to_vector(:scale)
7
- v2=[6,2,4,10,12,8].to_vector(:scale)
8
- assert_equal(23.0, Statsample::Bivariate.sum_of_codeviated(v1,v2))
9
- end
10
- def test_pearson
11
- v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
12
- v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
13
- assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
14
- v3=[6,2, 1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
15
- v4=[2,nil,nil,nil, 3,7,8,6,4,3,2,500].to_vector(:scale)
16
- assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
17
- end
18
- def test_tetrachoric_matrix
19
- ds=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_test.txt", %w{a b c d e})
20
- tcm_obs=Statsample::Bivariate.tetrachoric_correlation_matrix(ds)
21
- tcm_exp=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_matrix.txt", %w{a b c d e}).to_matrix
22
- tcm_obs.row_size.times {|i|
23
- tcm_obs.column_size {|j|
24
- assert_in_delta(tcm_obs[i,j], tcm_exp[i,k], 0.00001)
25
- }
26
- }
27
- end
28
- def test_tetrachoric
29
- a,b,c,d=0,0,0,0
30
- assert_raise RuntimeError do
31
- tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
32
- end
33
- a,b,c,d=10,10,0,0
34
- assert_raise RuntimeError do
35
- tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
36
- end
37
- a,b,c,d=10,0,10,0
38
- assert_raise RuntimeError do
39
- tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
6
+ v1=[1,2,3,4,5,6].to_vector(:scale)
7
+ v2=[6,2,4,10,12,8].to_vector(:scale)
8
+ assert_equal(23.0, Statsample::Bivariate.sum_of_codeviated(v1,v2))
9
+ end
10
+ def test_pearson
11
+ v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
12
+ v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
13
+ assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
14
+ v3=[6,2, 1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
15
+ v4=[2,nil,nil,nil, 3,7,8,6,4,3,2,500].to_vector(:scale)
16
+ assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
17
+ end
18
+ def test_tetrachoric_matrix
19
+ ds=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_test.txt", %w{a b c d e})
20
+ tcm_obs=Statsample::Bivariate.tetrachoric_correlation_matrix(ds)
21
+ tcm_exp=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_matrix.txt", %w{a b c d e}).to_matrix
22
+ tcm_obs.row_size.times do |i|
23
+ tcm_obs.column_size do |j|
24
+ assert_in_delta(tcm_obs[i,j], tcm_exp[i,k], 0.00001)
40
25
  end
41
- a,b,c,d=10,0,0,10
26
+ end
27
+ end
28
+ def test_tetrachoric
29
+ a,b,c,d=0,0,0,0
30
+ assert_raise RuntimeError do
42
31
  tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
43
- assert_equal(1,tc.r)
44
- assert_equal(0,tc.se)
45
- a,b,c,d=0,10,10,0
32
+ end
33
+ a,b,c,d=10,10,0,0
34
+ assert_raise RuntimeError do
46
35
  tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
47
- assert_equal(-1,tc.r)
48
- assert_equal(0,tc.se)
49
-
50
- a,b,c,d = 30,40,70,20
36
+ end
37
+ a,b,c,d=10,0,10,0
38
+ assert_raise RuntimeError do
51
39
  tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
52
- assert_in_delta(-0.53980,tc.r,0.0001)
53
- assert_in_delta(0.09940,tc.se,0.0001)
54
- assert_in_delta(0.31864,tc.threshold_x,0.0001)
55
- assert_in_delta(-0.15731,tc.threshold_y,0.0001)
56
- x=%w{a a a a b b b a b b a a b b}.to_vector
57
- y=%w{0 0 1 1 0 0 1 1 1 1 0 0 1 1}.to_vector
58
- # crosstab
59
- # 0 1
60
- # a 4 3
61
- # b 2 5
62
- a,b,c,d=4,3,2,5
63
- tc1 = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
64
- tc2 = Statsample::Bivariate::Tetrachoric.new_with_vectors(x,y)
65
- assert_equal(tc1.r,tc2.r)
66
- assert_equal(tc1.se,tc2.se)
67
-
68
40
  end
69
- def test_matrix_correlation
70
- v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
71
- v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
72
- v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
73
- v4=[2,nil,nil,nil, 3,7,8,6].to_vector(:scale)
74
- ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
75
- c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
76
- expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
77
- [c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
78
- ]
79
- obt=Statsample::Bivariate.correlation_matrix(ds)
80
- for i in 0...expected.row_size
81
- for j in 0...expected.column_size
82
- #puts expected[i,j].inspect
83
- #puts obt[i,j].inspect
84
- assert_in_delta(expected[i,j], obt[i,j],0.0001,"#{expected[i,j].class}!=#{obt[i,j].class} ")
85
- end
86
- end
87
- #assert_equal(expected,obt)
41
+ a,b,c,d=10,0,0,10
42
+ tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
43
+ assert_equal(1,tc.r)
44
+ assert_equal(0,tc.se)
45
+ a,b,c,d=0,10,10,0
46
+ tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
47
+ assert_equal(-1,tc.r)
48
+ assert_equal(0,tc.se)
49
+
50
+ a,b,c,d = 30,40,70,20
51
+ tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
52
+ assert_in_delta(-0.53980,tc.r,0.0001)
53
+ assert_in_delta(0.09940,tc.se,0.0001)
54
+ assert_in_delta(0.31864,tc.threshold_x,0.0001)
55
+ assert_in_delta(-0.15731,tc.threshold_y,0.0001)
56
+ x=%w{a a a a b b b a b b a a b b}.to_vector
57
+ y=%w{0 0 1 1 0 0 1 1 1 1 0 0 1 1}.to_vector
58
+ # crosstab
59
+ # 0 1
60
+ # a 4 3
61
+ # b 2 5
62
+ a,b,c,d=4,3,2,5
63
+ tc1 = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
64
+ tc2 = Statsample::Bivariate::Tetrachoric.new_with_vectors(x,y)
65
+ assert_equal(tc1.r,tc2.r)
66
+ assert_equal(tc1.se,tc2.se)
67
+
68
+ end
69
+ def test_matrix_correlation
70
+ v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
71
+ v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
72
+ v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
73
+ v4=[2,nil,nil,nil, 3,7,8,6].to_vector(:scale)
74
+ ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
75
+ c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
76
+ expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
77
+ [c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
78
+ ]
79
+ obt=Statsample::Bivariate.correlation_matrix(ds)
80
+ for i in 0...expected.row_size
81
+ for j in 0...expected.column_size
82
+ #puts expected[i,j].inspect
83
+ #puts obt[i,j].inspect
84
+ assert_in_delta(expected[i,j], obt[i,j],0.0001,"#{expected[i,j].class}!=#{obt[i,j].class} ")
85
+ end
88
86
  end
89
- def test_prop_pearson
90
- assert_in_delta(0.42, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94), 94),0.01)
91
- assert_in_delta(0.65, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95), 95),0.01)
92
- r=0.9
93
- n=100
94
- t=Statsample::Bivariate.t_r(r,n)
95
- assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
96
- assert(Statsample::Bivariate.prop_pearson(t,n,:right)<0.05)
97
- assert(Statsample::Bivariate.prop_pearson(t,n,:left)>0.05)
87
+ #assert_equal(expected,obt)
88
+ end
89
+ def test_prop_pearson
90
+ assert_in_delta(0.42, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94), 94),0.01)
91
+ assert_in_delta(0.65, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95), 95),0.01)
92
+ r=0.9
93
+ n=100
94
+ t=Statsample::Bivariate.t_r(r,n)
95
+ assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
96
+ assert(Statsample::Bivariate.prop_pearson(t,n,:right)<0.05)
97
+ assert(Statsample::Bivariate.prop_pearson(t,n,:left)>0.05)
98
98
 
99
- r=-0.9
100
- n=100
101
- t=Statsample::Bivariate.t_r(r,n)
102
- assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
103
- assert(Statsample::Bivariate.prop_pearson(t,n,:right)>0.05)
104
- assert(Statsample::Bivariate.prop_pearson(t,n,:left)<0.05)
99
+ r=-0.9
100
+ n=100
101
+ t=Statsample::Bivariate.t_r(r,n)
102
+ assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
103
+ assert(Statsample::Bivariate.prop_pearson(t,n,:right)>0.05)
104
+ assert(Statsample::Bivariate.prop_pearson(t,n,:left)<0.05)
105
+ end
106
+ def test_covariance
107
+ if HAS_GSL
108
+ v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
109
+ v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
110
+ assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
111
+
112
+ end
113
+ end
114
+
115
+ def test_spearman
116
+ v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
117
+ v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
118
+ assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)
119
+
120
+ end
121
+ def test_point_biserial
122
+ c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
123
+ d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
124
+ assert_raise TypeError do
125
+ Statsample::Bivariate.point_biserial(c,d)
105
126
  end
106
- def test_covariance
107
- if HAS_GSL
108
- v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
109
- v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
110
- assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
111
-
112
- end
113
- end
114
-
115
- def test_spearman
116
- v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
117
- v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
118
- assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)
119
-
120
- end
121
- def test_point_biserial
122
- c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
123
- d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
124
- assert_raise TypeError do
125
- Statsample::Bivariate.point_biserial(c,d)
126
- end
127
- assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
128
- end
129
- def test_tau
130
- v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
131
- v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
132
- assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
133
- assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
134
- v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
135
- v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
136
- assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
137
- end
138
- def test_gamma
139
- m=Matrix[[10,5,2],[10,15,20]]
140
- assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
141
- m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
142
- assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
143
-
144
-
145
- end
127
+ assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
128
+ end
129
+ def test_tau
130
+ v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
131
+ v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
132
+ assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
133
+ assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
134
+ v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
135
+ v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
136
+ assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
137
+ end
138
+ def test_gamma
139
+ m=Matrix[[10,5,2],[10,15,20]]
140
+ assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
141
+ m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
142
+ assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
143
+
144
+
145
+ end
146
146
  end
@@ -1,60 +1,81 @@
1
1
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
2
  require 'statsample'
3
3
  require 'tempfile'
4
+ require 'tmpdir'
4
5
  require 'test/unit'
5
6
 
6
7
  class StatsampleCodificationTestCase < Test::Unit::TestCase
7
8
 
8
- def initialize(*args)
9
- v1=%w{run walk,run walking running sleep sleeping,dreaming sleep,dream}.to_vector
10
- @dict={'run'=>'r','walk'=>'w','walking'=>'w','running'=>'r','sleep'=>'s', 'sleeping'=>'s','dream'=>'d','dreaming'=>'d'}
11
- @ds={"v1"=>v1}.to_dataset
12
- super
13
- end
14
- def test_create_yaml
15
- assert_raise ArgumentError do
16
- Statsample::Codification.create_yaml(@ds,[])
17
- end
18
- expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
19
- yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'])
20
- h=YAML::load(yaml_hash)
21
- assert_equal(['v1'],h.keys)
22
- assert_equal(expected_keys_v1,h['v1'].keys.sort)
23
- tf = Tempfile.new("test_codification")
24
- yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'],Statsample::SPLIT_TOKEN,tf)
25
- tf.close
26
- tf.open
27
- h=YAML::load(tf)
28
- assert_equal(['v1'],h.keys)
29
- assert_equal(expected_keys_v1,h['v1'].keys.sort)
30
- tf.close(true)
31
- end
32
- def test_recodification
33
- expected=[['r'],['w','r'],['w'],['r'],['s'],['s','d'], ['s','d']]
34
- assert_equal(expected,Statsample::Codification.recode_vector(@ds['v1'],@dict))
35
- v2=['run','walk,dreaming',nil,'walk,dream,dreaming,walking'].to_vector
36
- expected=[['r'],['w','d'],nil,['w','d']]
37
- assert_equal(expected,Statsample::Codification.recode_vector(v2,@dict))
38
- end
39
- def test_recode_dataset_simple
40
- yaml=YAML::dump({'v1'=>@dict})
41
- Statsample::Codification.recode_dataset_simple!(@ds,yaml)
42
- expected_vector=['r','w,r','w','r','s','s,d', 's,d'].to_vector
43
- assert_not_equal(expected_vector,@ds['v1'])
44
- assert_equal(expected_vector,@ds['v1_recoded'])
45
- end
46
- def test_recode_dataset_split
47
- yaml=YAML::dump({'v1'=>@dict})
48
- Statsample::Codification.recode_dataset_split!(@ds,yaml)
49
- e={}
50
- e['r']=[1,1,0,1,0,0,0].to_vector
51
- e['w']=[0,1,1,0,0,0,0].to_vector
52
- e['s']=[0,0,0,0,1,1,1].to_vector
53
- e['d']=[0,0,0,0,0,1,1].to_vector
54
- e.each{|k,expected|
55
- assert_equal(expected,@ds['v1_'+k],"Error on key #{k}")
56
-
57
- }
9
+ def initialize(*args)
10
+ v1=%w{run walk,run walking running sleep sleeping,dreaming sleep,dream}.to_vector
11
+ @dict={'run'=>'r','walk'=>'w','walking'=>'w','running'=>'r','sleep'=>'s', 'sleeping'=>'s', 'dream'=>'d', 'dreaming'=>'d'}
12
+ @ds={"v1"=>v1}.to_dataset
13
+ super
14
+ end
15
+ def test_create_hash
16
+ expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
17
+ hash=Statsample::Codification.create_hash(@ds,['v1'])
18
+ assert_equal(['v1'],hash.keys)
19
+ assert_equal(expected_keys_v1,hash['v1'].keys.sort)
20
+ assert_equal(expected_keys_v1,hash['v1'].values.sort)
21
+ end
22
+ def test_create_excel
23
+ filename=Dir::tmpdir+"/test_excel"+Time.now().to_s+".xls"
24
+ #filename = Tempfile.new("test_codification_"+Time.now().to_s)
25
+ Statsample::Codification.create_excel(@ds, ['v1'], filename)
26
+ field=(["v1"]*8).to_vector
27
+ keys=%w{dream dreaming run running sleep sleeping walk walking}.to_vector
28
+ ds=Statsample::Excel.read(filename)
29
+ assert_equal(field, ds['field'])
30
+ assert_equal(keys, ds['original'])
31
+ assert_equal(keys, ds['recoded'])
32
+ hash=Statsample::Codification.excel_to_recoded_hash(filename)
33
+ assert_equal(keys.data, hash['v1'].keys.sort)
34
+ assert_equal(keys.data, hash['v1'].values.sort)
35
+
36
+ end
37
+ def test_create_yaml
38
+ assert_raise ArgumentError do
39
+ Statsample::Codification.create_yaml(@ds,[])
58
40
  end
41
+ expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
42
+ yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'])
43
+ h=YAML::load(yaml_hash)
44
+ assert_equal(['v1'],h.keys)
45
+ assert_equal(expected_keys_v1,h['v1'].keys.sort)
46
+ tf = Tempfile.new("test_codification")
47
+ yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'],tf, Statsample::SPLIT_TOKEN)
48
+ tf.close
49
+ tf.open
50
+ h=YAML::load(tf)
51
+ assert_equal(['v1'],h.keys)
52
+ assert_equal(expected_keys_v1,h['v1'].keys.sort)
53
+ tf.close(true)
54
+ end
55
+ def test_recodification
56
+ expected=[['r'],['w','r'],['w'],['r'],['s'],['s','d'], ['s','d']]
57
+ assert_equal(expected,Statsample::Codification.recode_vector(@ds['v1'],@dict))
58
+ v2=['run','walk,dreaming',nil,'walk,dream,dreaming,walking'].to_vector
59
+ expected=[['r'],['w','d'],nil,['w','d']]
60
+ assert_equal(expected,Statsample::Codification.recode_vector(v2,@dict))
61
+ end
62
+ def test_recode_dataset_simple
63
+ Statsample::Codification.recode_dataset_simple!(@ds,{'v1'=>@dict})
64
+ expected_vector=['r','w,r','w','r','s','s,d', 's,d'].to_vector
65
+ assert_not_equal(expected_vector,@ds['v1'])
66
+ assert_equal(expected_vector,@ds['v1_recoded'])
67
+ end
68
+ def test_recode_dataset_split
69
+ Statsample::Codification.recode_dataset_split!(@ds,{'v1'=>@dict})
70
+ e={}
71
+ e['r']=[1,1,0,1,0,0,0].to_vector
72
+ e['w']=[0,1,1,0,0,0,0].to_vector
73
+ e['s']=[0,0,0,0,1,1,1].to_vector
74
+ e['d']=[0,0,0,0,0,1,1].to_vector
75
+ e.each{|k,expected|
76
+ assert_equal(expected,@ds['v1_'+k],"Error on key #{k}")
77
+
78
+ }
79
+ end
59
80
 
60
81
  end
data/test/test_dataset.rb CHANGED
@@ -356,7 +356,7 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
356
356
  assert_equal(vmult,ds.compute("v1*v2"))
357
357
 
358
358
  end
359
- def test_crosstab_with_asignation
359
+ def test_crosstab_with_asignation
360
360
  v1=%w{a a a b b b c c c}.to_vector
361
361
  v2=%w{a b c a b c a b c}.to_vector
362
362
  v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
@@ -371,4 +371,22 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
371
371
  ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
372
372
  assert_equal(ds, ds2)
373
373
  end
374
+ def test_one_to_many
375
+ cases=[
376
+ ['1','george','red',10,'blue',20,nil,nil],
377
+ ['2','fred','green',15,'orange',30,'white',20],
378
+ ['3','alfred',nil,nil,nil,nil,nil,nil]
379
+ ]
380
+ ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
381
+ cases.each {|c| ds.add_case_array c }
382
+ ds.update_valid_data
383
+ ids=%w{1 1 2 2 2}.to_vector
384
+ colors=%w{red blue green orange white}.to_vector
385
+ values=[10,20,15,30,20].to_vector
386
+ col_ids=[1,2,1,2,3].to_scale
387
+ ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(['id','_col_id', 'color','value'])
388
+ assert_equal(ds_expected, ds.one_to_many(%w{id}, "car_%v%n"))
389
+
390
+ end
391
+
374
392
  end
@@ -0,0 +1,44 @@
1
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
+ require 'statsample'
3
+ require 'test/unit'
4
+ require 'matrix_extension'
5
+ class StatsampleFactorTestCase < Test::Unit::TestCase
6
+ def test_pca
7
+ require 'gsl'
8
+ a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
9
+ b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
10
+ a.recode! {|c| c-a.mean}
11
+ b.recode! {|c| c-b.mean}
12
+ ds={'a'=>a,'b'=>b}.to_dataset
13
+ cov_matrix=Statsample::Bivariate.covariance_matrix(ds)
14
+ pca=Statsample::Factor::PCA.new(cov_matrix)
15
+ expected_eigenvalues=[1.284, 0.0490]
16
+ expected_eigenvalues.each_with_index{|ev,i|
17
+ assert_in_delta(ev,pca.eigenvalues[i],0.001)
18
+ }
19
+ expected_fm_1=GSL::Matrix[[0.677], [0.735]]
20
+ expected_fm_2=GSL::Matrix[[0.677,0.735], [0.735, -0.677]]
21
+ _test_matrix(expected_fm_1,pca.feature_vector(1))
22
+ _test_matrix(expected_fm_2,pca.feature_vector(2))
23
+ end
24
+ def test_rotation_varimax
25
+ a = Matrix[ [ 0.4320, 0.8129, 0.3872] ,
26
+ [0.7950, -0.5416, 0.2565] ,
27
+ [0.5944, 0.7234, -0.3441],
28
+ [0.8945, -0.3921, -0.1863] ]
29
+ expected= Matrix[[-0.0204423, 0.938674, -0.340334],
30
+ [0.983662, 0.0730206, 0.134997],
31
+ [0.0826106, 0.435975, -0.893379],
32
+ [0.939901, -0.0965213, -0.309596]].to_gsl
33
+ varimax=Statsample::Factor::Varimax.new(a)
34
+ varimax.iterate
35
+ _test_matrix(expected,varimax.rotated)
36
+ end
37
+ def _test_matrix(a,b)
38
+ a.size1.times {|i|
39
+ a.size2.times {|j|
40
+ assert_in_delta(a[i,j], b[i,j],0.001)
41
+ }
42
+ }
43
+ end
44
+ end