statsample 0.5.1 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. data/History.txt +12 -0
  2. data/Manifest.txt +13 -0
  3. data/README.txt +2 -1
  4. data/demo/pca.rb +29 -0
  5. data/demo/umann.rb +8 -0
  6. data/lib/distribution.rb +0 -1
  7. data/lib/matrix_extension.rb +35 -21
  8. data/lib/statsample.rb +31 -28
  9. data/lib/statsample/anova.rb +7 -2
  10. data/lib/statsample/bivariate.rb +17 -11
  11. data/lib/statsample/codification.rb +136 -87
  12. data/lib/statsample/combination.rb +0 -2
  13. data/lib/statsample/converter/csv18.rb +1 -1
  14. data/lib/statsample/converter/csv19.rb +1 -1
  15. data/lib/statsample/converters.rb +176 -171
  16. data/lib/statsample/crosstab.rb +227 -154
  17. data/lib/statsample/dataset.rb +94 -12
  18. data/lib/statsample/dominanceanalysis.rb +69 -62
  19. data/lib/statsample/dominanceanalysis/bootstrap.rb +25 -21
  20. data/lib/statsample/factor.rb +18 -0
  21. data/lib/statsample/factor/pca.rb +128 -0
  22. data/lib/statsample/factor/principalaxis.rb +133 -0
  23. data/lib/statsample/factor/rotation.rb +125 -0
  24. data/lib/statsample/histogram.rb +99 -0
  25. data/lib/statsample/mle.rb +125 -126
  26. data/lib/statsample/mle/logit.rb +91 -91
  27. data/lib/statsample/mle/probit.rb +84 -85
  28. data/lib/statsample/multiset.rb +1 -1
  29. data/lib/statsample/permutation.rb +96 -0
  30. data/lib/statsample/regression.rb +1 -1
  31. data/lib/statsample/regression/binomial.rb +89 -89
  32. data/lib/statsample/regression/binomial/logit.rb +9 -9
  33. data/lib/statsample/regression/binomial/probit.rb +9 -9
  34. data/lib/statsample/regression/multiple.rb +8 -14
  35. data/lib/statsample/regression/multiple/gslengine.rb +1 -1
  36. data/lib/statsample/regression/multiple/rubyengine.rb +55 -55
  37. data/lib/statsample/resample.rb +12 -17
  38. data/lib/statsample/srs.rb +4 -1
  39. data/lib/statsample/test.rb +23 -22
  40. data/lib/statsample/test/umannwhitney.rb +182 -0
  41. data/lib/statsample/vector.rb +854 -815
  42. data/test/test_bivariate.rb +132 -132
  43. data/test/test_codification.rb +71 -50
  44. data/test/test_dataset.rb +19 -1
  45. data/test/test_factor.rb +44 -0
  46. data/test/test_histogram.rb +26 -0
  47. data/test/test_permutation.rb +37 -0
  48. data/test/test_statistics.rb +74 -63
  49. data/test/test_umannwhitney.rb +17 -0
  50. data/test/test_vector.rb +46 -30
  51. metadata +31 -4
@@ -3,144 +3,144 @@ require 'statsample'
3
3
  require 'test/unit'
4
4
  class StatsampleBivariateTestCase < Test::Unit::TestCase
5
5
  def test_sum_of_codeviated
6
- v1=[1,2,3,4,5,6].to_vector(:scale)
7
- v2=[6,2,4,10,12,8].to_vector(:scale)
8
- assert_equal(23.0, Statsample::Bivariate.sum_of_codeviated(v1,v2))
9
- end
10
- def test_pearson
11
- v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
12
- v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
13
- assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
14
- v3=[6,2, 1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
15
- v4=[2,nil,nil,nil, 3,7,8,6,4,3,2,500].to_vector(:scale)
16
- assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
17
- end
18
- def test_tetrachoric_matrix
19
- ds=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_test.txt", %w{a b c d e})
20
- tcm_obs=Statsample::Bivariate.tetrachoric_correlation_matrix(ds)
21
- tcm_exp=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_matrix.txt", %w{a b c d e}).to_matrix
22
- tcm_obs.row_size.times {|i|
23
- tcm_obs.column_size {|j|
24
- assert_in_delta(tcm_obs[i,j], tcm_exp[i,k], 0.00001)
25
- }
26
- }
27
- end
28
- def test_tetrachoric
29
- a,b,c,d=0,0,0,0
30
- assert_raise RuntimeError do
31
- tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
32
- end
33
- a,b,c,d=10,10,0,0
34
- assert_raise RuntimeError do
35
- tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
36
- end
37
- a,b,c,d=10,0,10,0
38
- assert_raise RuntimeError do
39
- tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
6
+ v1=[1,2,3,4,5,6].to_vector(:scale)
7
+ v2=[6,2,4,10,12,8].to_vector(:scale)
8
+ assert_equal(23.0, Statsample::Bivariate.sum_of_codeviated(v1,v2))
9
+ end
10
+ def test_pearson
11
+ v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
12
+ v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
13
+ assert_in_delta(0.525,Statsample::Bivariate.pearson(v1,v2), 0.001)
14
+ v3=[6,2, 1000,1000,5,4,7,8,4,3,2,nil].to_vector(:scale)
15
+ v4=[2,nil,nil,nil, 3,7,8,6,4,3,2,500].to_vector(:scale)
16
+ assert_in_delta(0.525,Statsample::Bivariate.pearson(v3,v4),0.001)
17
+ end
18
+ def test_tetrachoric_matrix
19
+ ds=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_test.txt", %w{a b c d e})
20
+ tcm_obs=Statsample::Bivariate.tetrachoric_correlation_matrix(ds)
21
+ tcm_exp=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_matrix.txt", %w{a b c d e}).to_matrix
22
+ tcm_obs.row_size.times do |i|
23
+ tcm_obs.column_size do |j|
24
+ assert_in_delta(tcm_obs[i,j], tcm_exp[i,k], 0.00001)
40
25
  end
41
- a,b,c,d=10,0,0,10
26
+ end
27
+ end
28
+ def test_tetrachoric
29
+ a,b,c,d=0,0,0,0
30
+ assert_raise RuntimeError do
42
31
  tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
43
- assert_equal(1,tc.r)
44
- assert_equal(0,tc.se)
45
- a,b,c,d=0,10,10,0
32
+ end
33
+ a,b,c,d=10,10,0,0
34
+ assert_raise RuntimeError do
46
35
  tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
47
- assert_equal(-1,tc.r)
48
- assert_equal(0,tc.se)
49
-
50
- a,b,c,d = 30,40,70,20
36
+ end
37
+ a,b,c,d=10,0,10,0
38
+ assert_raise RuntimeError do
51
39
  tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
52
- assert_in_delta(-0.53980,tc.r,0.0001)
53
- assert_in_delta(0.09940,tc.se,0.0001)
54
- assert_in_delta(0.31864,tc.threshold_x,0.0001)
55
- assert_in_delta(-0.15731,tc.threshold_y,0.0001)
56
- x=%w{a a a a b b b a b b a a b b}.to_vector
57
- y=%w{0 0 1 1 0 0 1 1 1 1 0 0 1 1}.to_vector
58
- # crosstab
59
- # 0 1
60
- # a 4 3
61
- # b 2 5
62
- a,b,c,d=4,3,2,5
63
- tc1 = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
64
- tc2 = Statsample::Bivariate::Tetrachoric.new_with_vectors(x,y)
65
- assert_equal(tc1.r,tc2.r)
66
- assert_equal(tc1.se,tc2.se)
67
-
68
40
  end
69
- def test_matrix_correlation
70
- v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
71
- v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
72
- v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
73
- v4=[2,nil,nil,nil, 3,7,8,6].to_vector(:scale)
74
- ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
75
- c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
76
- expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
77
- [c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
78
- ]
79
- obt=Statsample::Bivariate.correlation_matrix(ds)
80
- for i in 0...expected.row_size
81
- for j in 0...expected.column_size
82
- #puts expected[i,j].inspect
83
- #puts obt[i,j].inspect
84
- assert_in_delta(expected[i,j], obt[i,j],0.0001,"#{expected[i,j].class}!=#{obt[i,j].class} ")
85
- end
86
- end
87
- #assert_equal(expected,obt)
41
+ a,b,c,d=10,0,0,10
42
+ tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
43
+ assert_equal(1,tc.r)
44
+ assert_equal(0,tc.se)
45
+ a,b,c,d=0,10,10,0
46
+ tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
47
+ assert_equal(-1,tc.r)
48
+ assert_equal(0,tc.se)
49
+
50
+ a,b,c,d = 30,40,70,20
51
+ tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
52
+ assert_in_delta(-0.53980,tc.r,0.0001)
53
+ assert_in_delta(0.09940,tc.se,0.0001)
54
+ assert_in_delta(0.31864,tc.threshold_x,0.0001)
55
+ assert_in_delta(-0.15731,tc.threshold_y,0.0001)
56
+ x=%w{a a a a b b b a b b a a b b}.to_vector
57
+ y=%w{0 0 1 1 0 0 1 1 1 1 0 0 1 1}.to_vector
58
+ # crosstab
59
+ # 0 1
60
+ # a 4 3
61
+ # b 2 5
62
+ a,b,c,d=4,3,2,5
63
+ tc1 = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
64
+ tc2 = Statsample::Bivariate::Tetrachoric.new_with_vectors(x,y)
65
+ assert_equal(tc1.r,tc2.r)
66
+ assert_equal(tc1.se,tc2.se)
67
+
68
+ end
69
+ def test_matrix_correlation
70
+ v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
71
+ v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
72
+ v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
73
+ v4=[2,nil,nil,nil, 3,7,8,6].to_vector(:scale)
74
+ ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
75
+ c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
76
+ expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
77
+ [c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
78
+ ]
79
+ obt=Statsample::Bivariate.correlation_matrix(ds)
80
+ for i in 0...expected.row_size
81
+ for j in 0...expected.column_size
82
+ #puts expected[i,j].inspect
83
+ #puts obt[i,j].inspect
84
+ assert_in_delta(expected[i,j], obt[i,j],0.0001,"#{expected[i,j].class}!=#{obt[i,j].class} ")
85
+ end
88
86
  end
89
- def test_prop_pearson
90
- assert_in_delta(0.42, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94), 94),0.01)
91
- assert_in_delta(0.65, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95), 95),0.01)
92
- r=0.9
93
- n=100
94
- t=Statsample::Bivariate.t_r(r,n)
95
- assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
96
- assert(Statsample::Bivariate.prop_pearson(t,n,:right)<0.05)
97
- assert(Statsample::Bivariate.prop_pearson(t,n,:left)>0.05)
87
+ #assert_equal(expected,obt)
88
+ end
89
+ def test_prop_pearson
90
+ assert_in_delta(0.42, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94), 94),0.01)
91
+ assert_in_delta(0.65, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95), 95),0.01)
92
+ r=0.9
93
+ n=100
94
+ t=Statsample::Bivariate.t_r(r,n)
95
+ assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
96
+ assert(Statsample::Bivariate.prop_pearson(t,n,:right)<0.05)
97
+ assert(Statsample::Bivariate.prop_pearson(t,n,:left)>0.05)
98
98
 
99
- r=-0.9
100
- n=100
101
- t=Statsample::Bivariate.t_r(r,n)
102
- assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
103
- assert(Statsample::Bivariate.prop_pearson(t,n,:right)>0.05)
104
- assert(Statsample::Bivariate.prop_pearson(t,n,:left)<0.05)
99
+ r=-0.9
100
+ n=100
101
+ t=Statsample::Bivariate.t_r(r,n)
102
+ assert(Statsample::Bivariate.prop_pearson(t,n,:both)<0.05)
103
+ assert(Statsample::Bivariate.prop_pearson(t,n,:right)>0.05)
104
+ assert(Statsample::Bivariate.prop_pearson(t,n,:left)<0.05)
105
+ end
106
+ def test_covariance
107
+ if HAS_GSL
108
+ v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
109
+ v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
110
+ assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
111
+
112
+ end
113
+ end
114
+
115
+ def test_spearman
116
+ v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
117
+ v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
118
+ assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)
119
+
120
+ end
121
+ def test_point_biserial
122
+ c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
123
+ d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
124
+ assert_raise TypeError do
125
+ Statsample::Bivariate.point_biserial(c,d)
105
126
  end
106
- def test_covariance
107
- if HAS_GSL
108
- v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
109
- v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
110
- assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
111
-
112
- end
113
- end
114
-
115
- def test_spearman
116
- v1=[86,97,99,100,101,103,106,110,112,113].to_vector(:scale)
117
- v2=[0,20,28,27,50,29,7,17,6,12].to_vector(:scale)
118
- assert_in_delta(-0.175758,Statsample::Bivariate.spearman(v1,v2),0.0001)
119
-
120
- end
121
- def test_point_biserial
122
- c=[1,3,5,6,7,100,200,300,400,300].to_vector(:scale)
123
- d=[1,1,1,1,1,0,0,0,0,0].to_vector(:scale)
124
- assert_raise TypeError do
125
- Statsample::Bivariate.point_biserial(c,d)
126
- end
127
- assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
128
- end
129
- def test_tau
130
- v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
131
- v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
132
- assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
133
- assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
134
- v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
135
- v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
136
- assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
137
- end
138
- def test_gamma
139
- m=Matrix[[10,5,2],[10,15,20]]
140
- assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
141
- m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
142
- assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
143
-
144
-
145
- end
127
+ assert_in_delta(Statsample::Bivariate.point_biserial(d,c), Statsample::Bivariate.pearson(d,c), 0.0001)
128
+ end
129
+ def test_tau
130
+ v1=[1,2,3,4,5,6,7,8,9,10,11].to_vector(:ordinal)
131
+ v2=[1,3,4,5,7,8,2,9,10,6,11].to_vector(:ordinal)
132
+ assert_in_delta(0.6727,Statsample::Bivariate.tau_a(v1,v2),0.001)
133
+ assert_in_delta(0.6727,Statsample::Bivariate.tau_b((Statsample::Crosstab.new(v1,v2).to_matrix)),0.001)
134
+ v1=[12,14,14,17,19,19,19,19,19,20,21,21,21,21,21,22,23,24,24,24,26,26,27].to_vector(:ordinal)
135
+ v2=[11,4,4,2,0,0,0,0,0,0,4,0,4,0,0,0,0,4,0,0,0,0,0].to_vector(:ordinal)
136
+ assert_in_delta(-0.376201540231705, Statsample::Bivariate.tau_b(Statsample::Crosstab.new(v1,v2).to_matrix),0.001)
137
+ end
138
+ def test_gamma
139
+ m=Matrix[[10,5,2],[10,15,20]]
140
+ assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
141
+ m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
142
+ assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
143
+
144
+
145
+ end
146
146
  end
@@ -1,60 +1,81 @@
1
1
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
2
  require 'statsample'
3
3
  require 'tempfile'
4
+ require 'tmpdir'
4
5
  require 'test/unit'
5
6
 
6
7
  class StatsampleCodificationTestCase < Test::Unit::TestCase
7
8
 
8
- def initialize(*args)
9
- v1=%w{run walk,run walking running sleep sleeping,dreaming sleep,dream}.to_vector
10
- @dict={'run'=>'r','walk'=>'w','walking'=>'w','running'=>'r','sleep'=>'s', 'sleeping'=>'s','dream'=>'d','dreaming'=>'d'}
11
- @ds={"v1"=>v1}.to_dataset
12
- super
13
- end
14
- def test_create_yaml
15
- assert_raise ArgumentError do
16
- Statsample::Codification.create_yaml(@ds,[])
17
- end
18
- expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
19
- yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'])
20
- h=YAML::load(yaml_hash)
21
- assert_equal(['v1'],h.keys)
22
- assert_equal(expected_keys_v1,h['v1'].keys.sort)
23
- tf = Tempfile.new("test_codification")
24
- yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'],Statsample::SPLIT_TOKEN,tf)
25
- tf.close
26
- tf.open
27
- h=YAML::load(tf)
28
- assert_equal(['v1'],h.keys)
29
- assert_equal(expected_keys_v1,h['v1'].keys.sort)
30
- tf.close(true)
31
- end
32
- def test_recodification
33
- expected=[['r'],['w','r'],['w'],['r'],['s'],['s','d'], ['s','d']]
34
- assert_equal(expected,Statsample::Codification.recode_vector(@ds['v1'],@dict))
35
- v2=['run','walk,dreaming',nil,'walk,dream,dreaming,walking'].to_vector
36
- expected=[['r'],['w','d'],nil,['w','d']]
37
- assert_equal(expected,Statsample::Codification.recode_vector(v2,@dict))
38
- end
39
- def test_recode_dataset_simple
40
- yaml=YAML::dump({'v1'=>@dict})
41
- Statsample::Codification.recode_dataset_simple!(@ds,yaml)
42
- expected_vector=['r','w,r','w','r','s','s,d', 's,d'].to_vector
43
- assert_not_equal(expected_vector,@ds['v1'])
44
- assert_equal(expected_vector,@ds['v1_recoded'])
45
- end
46
- def test_recode_dataset_split
47
- yaml=YAML::dump({'v1'=>@dict})
48
- Statsample::Codification.recode_dataset_split!(@ds,yaml)
49
- e={}
50
- e['r']=[1,1,0,1,0,0,0].to_vector
51
- e['w']=[0,1,1,0,0,0,0].to_vector
52
- e['s']=[0,0,0,0,1,1,1].to_vector
53
- e['d']=[0,0,0,0,0,1,1].to_vector
54
- e.each{|k,expected|
55
- assert_equal(expected,@ds['v1_'+k],"Error on key #{k}")
56
-
57
- }
9
+ def initialize(*args)
10
+ v1=%w{run walk,run walking running sleep sleeping,dreaming sleep,dream}.to_vector
11
+ @dict={'run'=>'r','walk'=>'w','walking'=>'w','running'=>'r','sleep'=>'s', 'sleeping'=>'s', 'dream'=>'d', 'dreaming'=>'d'}
12
+ @ds={"v1"=>v1}.to_dataset
13
+ super
14
+ end
15
+ def test_create_hash
16
+ expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
17
+ hash=Statsample::Codification.create_hash(@ds,['v1'])
18
+ assert_equal(['v1'],hash.keys)
19
+ assert_equal(expected_keys_v1,hash['v1'].keys.sort)
20
+ assert_equal(expected_keys_v1,hash['v1'].values.sort)
21
+ end
22
+ def test_create_excel
23
+ filename=Dir::tmpdir+"/test_excel"+Time.now().to_s+".xls"
24
+ #filename = Tempfile.new("test_codification_"+Time.now().to_s)
25
+ Statsample::Codification.create_excel(@ds, ['v1'], filename)
26
+ field=(["v1"]*8).to_vector
27
+ keys=%w{dream dreaming run running sleep sleeping walk walking}.to_vector
28
+ ds=Statsample::Excel.read(filename)
29
+ assert_equal(field, ds['field'])
30
+ assert_equal(keys, ds['original'])
31
+ assert_equal(keys, ds['recoded'])
32
+ hash=Statsample::Codification.excel_to_recoded_hash(filename)
33
+ assert_equal(keys.data, hash['v1'].keys.sort)
34
+ assert_equal(keys.data, hash['v1'].values.sort)
35
+
36
+ end
37
+ def test_create_yaml
38
+ assert_raise ArgumentError do
39
+ Statsample::Codification.create_yaml(@ds,[])
58
40
  end
41
+ expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
42
+ yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'])
43
+ h=YAML::load(yaml_hash)
44
+ assert_equal(['v1'],h.keys)
45
+ assert_equal(expected_keys_v1,h['v1'].keys.sort)
46
+ tf = Tempfile.new("test_codification")
47
+ yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'],tf, Statsample::SPLIT_TOKEN)
48
+ tf.close
49
+ tf.open
50
+ h=YAML::load(tf)
51
+ assert_equal(['v1'],h.keys)
52
+ assert_equal(expected_keys_v1,h['v1'].keys.sort)
53
+ tf.close(true)
54
+ end
55
+ def test_recodification
56
+ expected=[['r'],['w','r'],['w'],['r'],['s'],['s','d'], ['s','d']]
57
+ assert_equal(expected,Statsample::Codification.recode_vector(@ds['v1'],@dict))
58
+ v2=['run','walk,dreaming',nil,'walk,dream,dreaming,walking'].to_vector
59
+ expected=[['r'],['w','d'],nil,['w','d']]
60
+ assert_equal(expected,Statsample::Codification.recode_vector(v2,@dict))
61
+ end
62
+ def test_recode_dataset_simple
63
+ Statsample::Codification.recode_dataset_simple!(@ds,{'v1'=>@dict})
64
+ expected_vector=['r','w,r','w','r','s','s,d', 's,d'].to_vector
65
+ assert_not_equal(expected_vector,@ds['v1'])
66
+ assert_equal(expected_vector,@ds['v1_recoded'])
67
+ end
68
+ def test_recode_dataset_split
69
+ Statsample::Codification.recode_dataset_split!(@ds,{'v1'=>@dict})
70
+ e={}
71
+ e['r']=[1,1,0,1,0,0,0].to_vector
72
+ e['w']=[0,1,1,0,0,0,0].to_vector
73
+ e['s']=[0,0,0,0,1,1,1].to_vector
74
+ e['d']=[0,0,0,0,0,1,1].to_vector
75
+ e.each{|k,expected|
76
+ assert_equal(expected,@ds['v1_'+k],"Error on key #{k}")
77
+
78
+ }
79
+ end
59
80
 
60
81
  end
data/test/test_dataset.rb CHANGED
@@ -356,7 +356,7 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
356
356
  assert_equal(vmult,ds.compute("v1*v2"))
357
357
 
358
358
  end
359
- def test_crosstab_with_asignation
359
+ def test_crosstab_with_asignation
360
360
  v1=%w{a a a b b b c c c}.to_vector
361
361
  v2=%w{a b c a b c a b c}.to_vector
362
362
  v3=%w{0 1 0 0 1 1 0 0 1}.to_scale
@@ -371,4 +371,22 @@ class StatsampleDatasetTestCase < Test::Unit::TestCase
371
371
  ds2={'_id'=>ev_id, 'a'=>ev_a, 'b'=>ev_b, 'c'=>ev_c}.to_dataset
372
372
  assert_equal(ds, ds2)
373
373
  end
374
+ def test_one_to_many
375
+ cases=[
376
+ ['1','george','red',10,'blue',20,nil,nil],
377
+ ['2','fred','green',15,'orange',30,'white',20],
378
+ ['3','alfred',nil,nil,nil,nil,nil,nil]
379
+ ]
380
+ ds=Statsample::Dataset.new(%w{id name car_color1 car_value1 car_color2 car_value2 car_color3 car_value3})
381
+ cases.each {|c| ds.add_case_array c }
382
+ ds.update_valid_data
383
+ ids=%w{1 1 2 2 2}.to_vector
384
+ colors=%w{red blue green orange white}.to_vector
385
+ values=[10,20,15,30,20].to_vector
386
+ col_ids=[1,2,1,2,3].to_scale
387
+ ds_expected={'id'=>ids, '_col_id'=>col_ids, 'color'=>colors, 'value'=>values}.to_dataset(['id','_col_id', 'color','value'])
388
+ assert_equal(ds_expected, ds.one_to_many(%w{id}, "car_%v%n"))
389
+
390
+ end
391
+
374
392
  end
@@ -0,0 +1,44 @@
1
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
2
+ require 'statsample'
3
+ require 'test/unit'
4
+ require 'matrix_extension'
5
+ class StatsampleFactorTestCase < Test::Unit::TestCase
6
+ def test_pca
7
+ require 'gsl'
8
+ a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
9
+ b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
10
+ a.recode! {|c| c-a.mean}
11
+ b.recode! {|c| c-b.mean}
12
+ ds={'a'=>a,'b'=>b}.to_dataset
13
+ cov_matrix=Statsample::Bivariate.covariance_matrix(ds)
14
+ pca=Statsample::Factor::PCA.new(cov_matrix)
15
+ expected_eigenvalues=[1.284, 0.0490]
16
+ expected_eigenvalues.each_with_index{|ev,i|
17
+ assert_in_delta(ev,pca.eigenvalues[i],0.001)
18
+ }
19
+ expected_fm_1=GSL::Matrix[[0.677], [0.735]]
20
+ expected_fm_2=GSL::Matrix[[0.677,0.735], [0.735, -0.677]]
21
+ _test_matrix(expected_fm_1,pca.feature_vector(1))
22
+ _test_matrix(expected_fm_2,pca.feature_vector(2))
23
+ end
24
+ def test_rotation_varimax
25
+ a = Matrix[ [ 0.4320, 0.8129, 0.3872] ,
26
+ [0.7950, -0.5416, 0.2565] ,
27
+ [0.5944, 0.7234, -0.3441],
28
+ [0.8945, -0.3921, -0.1863] ]
29
+ expected= Matrix[[-0.0204423, 0.938674, -0.340334],
30
+ [0.983662, 0.0730206, 0.134997],
31
+ [0.0826106, 0.435975, -0.893379],
32
+ [0.939901, -0.0965213, -0.309596]].to_gsl
33
+ varimax=Statsample::Factor::Varimax.new(a)
34
+ varimax.iterate
35
+ _test_matrix(expected,varimax.rotated)
36
+ end
37
+ def _test_matrix(a,b)
38
+ a.size1.times {|i|
39
+ a.size2.times {|j|
40
+ assert_in_delta(a[i,j], b[i,j],0.001)
41
+ }
42
+ }
43
+ end
44
+ end