statsample 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. data/History.txt +4 -0
  2. data/Manifest.txt +8 -19
  3. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  4. data/demo/dominance_analysis_bootstrap.rb +20 -0
  5. data/demo/dominanceanalysis.rb +11 -0
  6. data/demo/multiple_regression.rb +40 -0
  7. data/demo/polychoric.rb +13 -0
  8. data/demo/tetrachoric.rb +10 -0
  9. data/lib/distribution.rb +1 -0
  10. data/lib/distribution/normalbivariate.rb +100 -0
  11. data/lib/statsample.rb +4 -105
  12. data/lib/statsample/bivariate.rb +5 -1
  13. data/lib/statsample/bivariate/polychoric.rb +581 -0
  14. data/lib/statsample/bivariate/tetrachoric.rb +37 -5
  15. data/lib/statsample/converters.rb +11 -0
  16. data/lib/statsample/dominanceanalysis.rb +104 -90
  17. data/lib/statsample/dominanceanalysis/bootstrap.rb +160 -131
  18. data/lib/statsample/factor/pca.rb +1 -2
  19. data/lib/statsample/factor/principalaxis.rb +2 -2
  20. data/lib/statsample/graph/svghistogram.rb +170 -172
  21. data/lib/statsample/matrix.rb +79 -0
  22. data/lib/statsample/mle.rb +6 -4
  23. data/lib/statsample/mle/probit.rb +0 -1
  24. data/lib/statsample/regression/multiple/alglibengine.rb +23 -23
  25. data/lib/statsample/regression/multiple/baseengine.rb +112 -113
  26. data/lib/statsample/regression/multiple/gslengine.rb +91 -94
  27. data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
  28. data/lib/statsample/srs.rb +1 -1
  29. data/lib/statsample/test.rb +0 -1
  30. data/lib/statsample/test/umannwhitney.rb +8 -5
  31. data/po/es/statsample.po +201 -39
  32. data/po/statsample.pot +184 -32
  33. data/test/test_bivariate.rb +21 -2
  34. data/test/test_distribution.rb +58 -40
  35. data/test/test_factor.rb +0 -1
  36. data/test/test_gsl.rb +13 -14
  37. data/test/test_regression.rb +1 -1
  38. data/test/test_statistics.rb +1 -4
  39. metadata +10 -21
  40. data/demo/benchmark.rb +0 -76
  41. data/demo/chi-square.rb +0 -44
  42. data/demo/crosstab.rb +0 -7
  43. data/demo/dice.rb +0 -13
  44. data/demo/distribution_t.rb +0 -95
  45. data/demo/graph.rb +0 -9
  46. data/demo/item_analysis.rb +0 -30
  47. data/demo/mean.rb +0 -81
  48. data/demo/nunnally_6.rb +0 -34
  49. data/demo/pca.rb +0 -29
  50. data/demo/proportion.rb +0 -57
  51. data/demo/regression.rb +0 -82
  52. data/demo/sample_test.csv +0 -113
  53. data/demo/spss_matrix.rb +0 -3
  54. data/demo/strata_proportion.rb +0 -152
  55. data/demo/stratum.rb +0 -141
  56. data/demo/t-student.rb +0 -17
  57. data/demo/umann.rb +0 -8
  58. data/lib/matrix_extension.rb +0 -92
data/demo/spss_matrix.rb DELETED
@@ -1,3 +0,0 @@
1
- require File.dirname(__FILE__)+"/../lib/statsample"
2
- ds=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_test.txt", %w{a b c d e})
3
- puts Statsample::SPSS.tetrachoric_correlation_matrix(ds)
@@ -1,152 +0,0 @@
1
- require File.dirname(__FILE__)+"/../lib/statsample"
2
- require 'statsample/multiset'
3
- require 'statsample/srs'
4
- require 'statsample/resample'
5
- require 'gnuplot'
6
-
7
- tests=3000
8
- sample_size=100
9
-
10
- a=[1]*50+[0]*950
11
- b=[1]*900+[0]*100
12
- a_size=a.size
13
- b_size=b.size
14
- av=a.to_vector(:scale)
15
- bv=b.to_vector(:scale)
16
-
17
- ads={'data'=>a.to_vector(:scale)}.to_dataset
18
- bds={'data'=>b.to_vector(:scale)}.to_dataset
19
-
20
- m=Statsample::Multiset.new(['data'])
21
- m.add_dataset('a',ads)
22
- m.add_dataset('b',bds)
23
- ss=Statsample::StratifiedSample.new(m,{'a'=>a.size,'b'=>b.size})
24
-
25
- es=[{'N'=>a_size,'n'=>sample_size/2,'s'=>av.standard_deviation_population}, {'N'=>b_size,'n'=>sample_size/2,'s'=>bv.standard_deviation_population}]
26
-
27
- esp=[{'N'=>a_size,'n'=>sample_size/2,'p'=>av.proportion(1.0)}, {'N'=>b_size,'n'=>sample_size/2,'p'=>bv.proportion(1.0)}]
28
-
29
-
30
-
31
- sd_estimated_wr=Statsample::StratifiedSample.standard_error_ksd_wr(es)
32
-
33
- sd_estimated_wor = Statsample::StratifiedSample.standard_error_ksd_wor(es)
34
-
35
- sd_estimated_wor_p = Statsample::StratifiedSample.proportion_sd_ksd_wor(esp)
36
- sd_estimated_wr_p = Statsample::StratifiedSample.proportion_sd_ksd_wr(esp)
37
-
38
- pop=(a+b).to_vector(:scale)
39
- s=pop.standard_deviation_population
40
-
41
-
42
-
43
-
44
- puts "-------------"
45
-
46
- puts "Estadísticos:"
47
- puts "Mean:"+pop.mean.to_s
48
- puts "SD:"+s.to_s
49
- puts "EE con reemplazo:"+Statsample::SRS.standard_error_ksd_wr(s, sample_size, pop.size).to_s
50
- puts "EE sin reemplazo:"+Statsample::SRS.standard_error_ksd_wor(s, sample_size,pop.size).to_s
51
-
52
-
53
-
54
- puts "EE estratified con reemplazo:"+sd_estimated_wr.to_s
55
- puts "EE estratified sin reemplazo:"+sd_estimated_wor.to_s
56
- puts "EE estratified con reemplazo(p):"+sd_estimated_wr_p.to_s
57
- puts "EE estratified sin reemplazo(p):"+sd_estimated_wor_p.to_s
58
-
59
- sd_with=[]
60
- sd_without=[]
61
- sd_strat_wr=[]
62
- sd_strat_wor_1=[]
63
- sd_strat_wor_2=[]
64
- monte_with=Statsample::Resample.repeat_and_save(tests) {
65
- sample= pop.sample_with_replacement(sample_size)
66
- sd_with.push(Statsample::SRS.standard_error_esd_wr(sample.sds,sample_size,pop.size))
67
- sample.mean
68
- }
69
-
70
-
71
- monte_without=Statsample::Resample.repeat_and_save(tests) {
72
- sample= pop.sample_without_replacement(sample_size)
73
- sd_without.push(Statsample::SRS.standard_error_esd_wor(sample.sds,sample_size,pop.size))
74
- sample.mean
75
- }
76
-
77
-
78
-
79
- stratum_wor=Statsample::Resample.repeat_and_save(tests) {
80
- a_sample= {'data'=>av.sample_without_replacement(sample_size/2)}.to_dataset
81
- b_sample= {'data'=>bv.sample_without_replacement(sample_size/2)}.to_dataset
82
- m=Statsample::Multiset.new(['data'])
83
- m.add_dataset('a',a_sample)
84
- m.add_dataset('b',b_sample)
85
- ss=Statsample::StratifiedSample.new(m,{'a'=>a_size,'b'=>b_size})
86
- sd_strat_wor_1.push(ss.standard_error_wor('data'))
87
- sd_strat_wor_2.push(ss.proportion_sd_esd_wor('data',1.0))
88
- ss.mean('data')
89
- }.to_vector(:scale)
90
-
91
- stratum_wr=Statsample::Resample.repeat_and_save(tests) {
92
- a_sample= {'data'=>av.sample_with_replacement(sample_size/2)}.to_dataset
93
- b_sample= {'data'=>bv.sample_with_replacement(sample_size/2)}.to_dataset
94
- m=Statsample::Multiset.new(['data'])
95
- m.add_dataset('a',a_sample)
96
- m.add_dataset('b',b_sample)
97
- ss=Statsample::StratifiedSample.new(m,{'a'=>a_size,'b'=>b_size})
98
- sd_strat_wr.push(ss.standard_error_wr('data'))
99
- ss.mean('data')
100
- }.to_vector(:scale)
101
-
102
-
103
-
104
- v_sd_with=sd_with.to_vector(:scale)
105
- v_sd_without=sd_without.to_vector(:scale)
106
- v_sd_strat_wr=sd_strat_wr.to_vector(:scale)
107
- v_sd_strat_wor_1=sd_strat_wor_1.to_vector(:scale)
108
- v_sd_strat_wor_2=sd_strat_wor_2.to_vector(:scale)
109
-
110
- v_with=monte_with.to_vector(:scale)
111
- v_without=monte_without.to_vector(:scale)
112
- puts "=============="
113
- puts "Con reemplazo"
114
- puts "Mean:"+v_with.mean.to_s
115
- puts "Sd:"+v_with.sds.to_s
116
- puts "Sd (estimated):"+v_sd_with.mean.to_s
117
- puts "=============="
118
- puts "Sin reemplazo"
119
- puts "Mean:"+v_without.mean.to_s
120
- puts "Sd:"+v_without.sds.to_s
121
- puts "Sd (estimated):"+v_sd_without.mean.to_s
122
- puts "=============="
123
- puts "Estratificado Con reemplazo"
124
- puts "Mean:"+stratum_wr.mean.to_s
125
- puts "Sd:"+stratum_wr.sds.to_s
126
- puts "Sd (estimated):"+v_sd_strat_wr.mean.to_s
127
-
128
- puts "=============="
129
- puts "Estratificado Sin reemplazo"
130
- puts "Mean:"+stratum_wor.mean.to_s
131
- puts "Sd:"+stratum_wor.sds.to_s
132
- puts "Sd (estimated scale):"+v_sd_strat_wor_1.mean.to_s
133
- puts "Sd (estimated prop):"+v_sd_strat_wor_2.mean.to_s
134
-
135
- =begin
136
-
137
-
138
-
139
-
140
-
141
- x=[]
142
- y=[]
143
- y2=[]
144
- prev=0
145
- prev_chi=0
146
- v.frequencies.sort.each{|k,v1|
147
- x.push(k)
148
- y.push(prev+v1)
149
- prev=prev+v1
150
- }
151
- GSL::graph(GSL::Vector.alloc(x), GSL::Vector.alloc(y))
152
- =end
data/demo/stratum.rb DELETED
@@ -1,141 +0,0 @@
1
- require File.dirname(__FILE__)+"/../lib/statsample"
2
- require 'statsample/multiset'
3
- require 'statsample/srs'
4
- require 'statsample/resample'
5
- require 'gnuplot'
6
-
7
- tests=3000
8
- sample_size=50
9
-
10
- a=[10]*50+[12]*10+[14]*20+[16]*10+[19]*10
11
- b=[11000]*50+[11050]*10+[11100]*20+[11300]*10+[11240]*10
12
- a_size=a.size
13
- b_size=b.size
14
- av=a.to_vector(:scale)
15
- bv=b.to_vector(:scale)
16
-
17
- ads={'data'=>a.to_vector(:scale)}.to_dataset
18
- bds={'data'=>b.to_vector(:scale)}.to_dataset
19
-
20
- m=Statsample::Multiset.new(['data'])
21
- m.add_dataset('a',ads)
22
- m.add_dataset('b',bds)
23
- ss=Statsample::StratifiedSample.new(m,{'a'=>a.size,'b'=>b.size})
24
-
25
- es=[{'N'=>a_size,'n'=>sample_size/2,'s'=>av.standard_deviation_population}, {'N'=>b_size,'n'=>sample_size/2,'s'=>bv.standard_deviation_population}]
26
-
27
-
28
-
29
- sd_estimated_wr=Statsample::StratifiedSample.standard_error_ksd_wr(es)
30
-
31
- sd_estimated_wor = Statsample::StratifiedSample.standard_error_ksd_wor(es)
32
-
33
-
34
-
35
- pop=(a+b).to_vector(:scale)
36
- s=pop.standard_deviation_population
37
-
38
-
39
-
40
-
41
- puts "-------------"
42
-
43
- puts "Estadísticos:"
44
- puts "Mean:"+pop.mean.to_s
45
- puts "SD:"+s.to_s
46
- puts "EE con reemplazo:"+Statsample::SRS.standard_error_ksd_wr(s, sample_size, pop.size).to_s
47
- puts "EE sin reemplazo:"+Statsample::SRS.standard_error_ksd_wor(s, sample_size,pop.size).to_s
48
-
49
- puts "EE estratified con reemplazo:"+sd_estimated_wr.to_s
50
- puts "EE estratified sin reemplazo:"+sd_estimated_wor.to_s
51
- sd_with=[]
52
- sd_without=[]
53
- sd_strat_wr=[]
54
- sd_strat_wor=[]
55
- monte_with=Statsample::Resample.repeat_and_save(tests) {
56
- sample= pop.sample_with_replacement(sample_size)
57
- sd_with.push(Statsample::SRS.standard_error_esd_wr(sample.sds,sample_size,pop.size))
58
- sample.mean
59
- }
60
-
61
-
62
- monte_without=Statsample::Resample.repeat_and_save(tests) {
63
- sample= pop.sample_without_replacement(sample_size)
64
- sd_without.push(Statsample::SRS.standard_error_esd_wor(sample.sds,sample_size,pop.size))
65
- sample.mean
66
- }
67
-
68
-
69
-
70
- stratum_wor=Statsample::Resample.repeat_and_save(tests) {
71
- a_sample= {'data'=>av.sample_without_replacement(sample_size/2)}.to_dataset
72
- b_sample= {'data'=>bv.sample_without_replacement(sample_size/2)}.to_dataset
73
- m=Statsample::Multiset.new(['data'])
74
- m.add_dataset('a',a_sample)
75
- m.add_dataset('b',b_sample)
76
- ss=Statsample::StratifiedSample.new(m,{'a'=>a_size,'b'=>b_size})
77
- sd_strat_wor.push(ss.standard_error_wor('data'))
78
- ss.mean('data')
79
- }.to_vector(:scale)
80
-
81
- stratum_wr=Statsample::Resample.repeat_and_save(tests) {
82
- a_sample= {'data'=>av.sample_with_replacement(sample_size/2)}.to_dataset
83
- b_sample= {'data'=>bv.sample_with_replacement(sample_size/2)}.to_dataset
84
- m=Statsample::Multiset.new(['data'])
85
- m.add_dataset('a',a_sample)
86
- m.add_dataset('b',b_sample)
87
- ss=Statsample::StratifiedSample.new(m,{'a'=>a_size,'b'=>b_size})
88
- sd_strat_wr.push(ss.standard_error_wr('data'))
89
- ss.mean('data')
90
- }.to_vector(:scale)
91
-
92
-
93
-
94
- v_sd_with=sd_with.to_vector(:scale)
95
- v_sd_without=sd_without.to_vector(:scale)
96
- v_sd_strat_wr=sd_strat_wr.to_vector(:scale)
97
- v_sd_strat_wor=sd_strat_wor.to_vector(:scale)
98
-
99
-
100
- v_with=monte_with.to_vector(:scale)
101
- v_without=monte_without.to_vector(:scale)
102
- puts "=============="
103
- puts "Con reemplazo"
104
- puts "Mean:"+v_with.mean.to_s
105
- puts "Sd:"+v_with.sds.to_s
106
- puts "Sd (estimated):"+v_sd_with.mean.to_s
107
- puts "=============="
108
- puts "Sin reemplazo"
109
- puts "Mean:"+v_without.mean.to_s
110
- puts "Sd:"+v_without.sds.to_s
111
- puts "Sd (estimated):"+v_sd_without.mean.to_s
112
- puts "=============="
113
- puts "Estratificado Con reemplazo"
114
- puts "Mean:"+stratum_wr.mean.to_s
115
- puts "Sd:"+stratum_wr.sds.to_s
116
- puts "Sd (estimated):"+v_sd_strat_wr.mean.to_s
117
-
118
- puts "=============="
119
- puts "Estratificado Sin reemplazo"
120
- puts "Mean:"+stratum_wor.mean.to_s
121
- puts "Sd:"+stratum_wor.sds.to_s
122
- puts "Sd (estimated):"+v_sd_strat_wor.mean.to_s
123
-
124
- p v_without.plot_histogram
125
-
126
- =begin
127
-
128
-
129
-
130
- x=[]
131
- y=[]
132
- y2=[]
133
- prev=0
134
- prev_chi=0
135
- v.frequencies.sort.each{|k,v1|
136
- x.push(k)
137
- y.push(prev+v1)
138
- prev=prev+v1
139
- }
140
- GSL::graph(GSL::Vector.alloc(x), GSL::Vector.alloc(y))
141
- =end
data/demo/t-student.rb DELETED
@@ -1,17 +0,0 @@
1
- require File.dirname(__FILE__)+"/../lib/statsample"
2
-
3
-
4
- tests=3000
5
-
6
- r = GSL::Rng.alloc(GSL::Rng::TAUS, 1)
7
- sample_sizes=[5,10,20,30]
8
- sample_sizes.each{|sample_size|
9
- monte=Statsample::Resample.repeat_and_save(tests) {
10
- v=[]
11
- sample_size.times{|i|
12
- v.push(r.ugaussian)
13
- }
14
- v.to_vector(:scale).mean
15
-
16
- }
17
- }
data/demo/umann.rb DELETED
@@ -1,8 +0,0 @@
1
- require File.dirname(__FILE__)+'/../lib/statsample'
2
- v1=[1,2,3,4,7,8,9,10,14,15].to_scale
3
- v2=[5,6,11,12,13,16,17,18,19].to_scale
4
- u=Statsample::Test::UMannWhitney.new(v1,v2)
5
-
6
- puts u.summary
7
-
8
- #p Statsample::Test::UMannWhitney.exact_probability_as62(100,100)
@@ -1,92 +0,0 @@
1
- require 'matrix'
2
-
3
- if RUBY_VERSION<="1.9.0"
4
- class Vector
5
- alias_method :old_coerce, :coerce
6
- def coerce(other)
7
- case other
8
- when Numeric
9
- return Matrix::Scalar.new(other), self
10
- else
11
- raise TypeError, "#{self.class} can't be coerced into #{other.class}"
12
- end
13
- end
14
- end
15
- end
16
- class Matrix
17
- def rows_sum
18
- (0...row_size).collect {|i|
19
- row(i).to_a.inject(0) {|a,v| a+v}
20
- }
21
- end
22
- def cols_sum
23
- (0...column_size).collect {|i|
24
- column(i).to_a.inject(0) {|a,v| a+v}
25
- }
26
- end
27
- def total_sum
28
- rows_sum.inject(0){|a,v| a+v}
29
- end
30
- def row_stochastic
31
- rs=rows_sum
32
- rows=(0...row_size).collect{|i|
33
- (0...column_size).collect {|j|
34
- self[i,j].quo(rs[i])
35
- }
36
- }
37
- Matrix.rows(rows,false)
38
- end
39
- def column_stochastic
40
- cs=cols_sum
41
- rows=(0...row_size).collect{|i|
42
- (0...column_size).collect {|j|
43
- self[i,j].quo(cs[j])
44
- }
45
- }
46
- Matrix.rows(rows,false)
47
- end
48
- def double_stochastic
49
- ts=total_sum
50
- collect {|i| i.quo(ts)}
51
- end
52
- # Test if a Matrix is a identity one
53
- def identity?
54
- if regular?
55
- rows=(0...row_size).each{|i|
56
- (0...column_size).each {|j|
57
- v = self[i,j]
58
- return false if (i==j and v!=1) or (i!=j and v!=0)
59
- }
60
- }
61
- true
62
- else
63
- false
64
- end
65
- end
66
- def to_gsl
67
- out=[]
68
- self.row_size.times{|i|
69
- out[i]=self.row(i).to_a
70
- }
71
- GSL::Matrix[*out]
72
- end
73
- def orthogonal?
74
- if regular?
75
- (self * self.t).identity?
76
- else
77
- false
78
- end
79
- end
80
- end
81
-
82
-
83
- module GSL
84
- class Matrix
85
- def to_matrix
86
- rows=self.size1
87
- cols=self.size2
88
- out=(0...rows).collect{|i| (0...cols).collect {|j| self[i,j]} }
89
- ::Matrix.rows(out)
90
- end
91
- end
92
- end