statsample 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/History.txt +79 -0
  2. data/Manifest.txt +56 -0
  3. data/README.txt +77 -0
  4. data/Rakefile +22 -0
  5. data/bin/statsample +2 -0
  6. data/demo/benchmark.rb +52 -0
  7. data/demo/chi-square.rb +44 -0
  8. data/demo/dice.rb +13 -0
  9. data/demo/distribution_t.rb +95 -0
  10. data/demo/graph.rb +9 -0
  11. data/demo/item_analysis.rb +30 -0
  12. data/demo/mean.rb +81 -0
  13. data/demo/proportion.rb +57 -0
  14. data/demo/sample_test.csv +113 -0
  15. data/demo/strata_proportion.rb +152 -0
  16. data/demo/stratum.rb +141 -0
  17. data/lib/spss.rb +131 -0
  18. data/lib/statsample.rb +216 -0
  19. data/lib/statsample/anova.rb +74 -0
  20. data/lib/statsample/bivariate.rb +255 -0
  21. data/lib/statsample/chidistribution.rb +39 -0
  22. data/lib/statsample/codification.rb +120 -0
  23. data/lib/statsample/converters.rb +338 -0
  24. data/lib/statsample/crosstab.rb +122 -0
  25. data/lib/statsample/dataset.rb +526 -0
  26. data/lib/statsample/dominanceanalysis.rb +259 -0
  27. data/lib/statsample/dominanceanalysis/bootstrap.rb +126 -0
  28. data/lib/statsample/graph/gdchart.rb +45 -0
  29. data/lib/statsample/graph/svgboxplot.rb +108 -0
  30. data/lib/statsample/graph/svggraph.rb +181 -0
  31. data/lib/statsample/graph/svghistogram.rb +208 -0
  32. data/lib/statsample/graph/svgscatterplot.rb +111 -0
  33. data/lib/statsample/htmlreport.rb +232 -0
  34. data/lib/statsample/multiset.rb +281 -0
  35. data/lib/statsample/regression.rb +522 -0
  36. data/lib/statsample/reliability.rb +235 -0
  37. data/lib/statsample/resample.rb +20 -0
  38. data/lib/statsample/srs.rb +159 -0
  39. data/lib/statsample/test.rb +25 -0
  40. data/lib/statsample/vector.rb +759 -0
  41. data/test/_test_chart.rb +58 -0
  42. data/test/test_anova.rb +31 -0
  43. data/test/test_codification.rb +59 -0
  44. data/test/test_crosstab.rb +55 -0
  45. data/test/test_csv.csv +7 -0
  46. data/test/test_csv.rb +27 -0
  47. data/test/test_dataset.rb +293 -0
  48. data/test/test_ggobi.rb +42 -0
  49. data/test/test_multiset.rb +98 -0
  50. data/test/test_regression.rb +108 -0
  51. data/test/test_reliability.rb +32 -0
  52. data/test/test_resample.rb +23 -0
  53. data/test/test_srs.rb +14 -0
  54. data/test/test_statistics.rb +152 -0
  55. data/test/test_stratified.rb +19 -0
  56. data/test/test_svg_graph.rb +63 -0
  57. data/test/test_vector.rb +265 -0
  58. data/test/test_xls.rb +32 -0
  59. metadata +158 -0
data/demo/stratum.rb ADDED
@@ -0,0 +1,141 @@
1
+ require File.dirname(__FILE__)+"/../lib/statsample"
2
+ require 'statsample/multiset'
3
+ require 'statsample/srs'
4
+ require 'statsample/resample'
5
+ require 'gnuplot'
6
+
7
+ tests=3000
8
+ sample_size=50
9
+
10
+ a=[10]*50+[12]*10+[14]*20+[16]*10+[19]*10
11
+ b=[11000]*50+[11050]*10+[11100]*20+[11300]*10+[11240]*10
12
+ a_size=a.size
13
+ b_size=b.size
14
+ av=a.to_vector(:scale)
15
+ bv=b.to_vector(:scale)
16
+
17
+ ads={'data'=>a.to_vector(:scale)}.to_dataset
18
+ bds={'data'=>b.to_vector(:scale)}.to_dataset
19
+
20
+ m=Statsample::Multiset.new(['data'])
21
+ m.add_dataset('a',ads)
22
+ m.add_dataset('b',bds)
23
+ ss=Statsample::StratifiedSample.new(m,{'a'=>a.size,'b'=>b.size})
24
+
25
+ es=[{'N'=>a_size,'n'=>sample_size/2,'s'=>av.standard_deviation_population}, {'N'=>b_size,'n'=>sample_size/2,'s'=>bv.standard_deviation_population}]
26
+
27
+
28
+
29
+ sd_estimated_wr=Statsample::StratifiedSample.standard_error_ksd_wr(es)
30
+
31
+ sd_estimated_wor = Statsample::StratifiedSample.standard_error_ksd_wor(es)
32
+
33
+
34
+
35
+ pop=(a+b).to_vector(:scale)
36
+ s=pop.standard_deviation_population
37
+
38
+
39
+
40
+
41
+ puts "-------------"
42
+
43
+ puts "Estadísticos:"
44
+ puts "Mean:"+pop.mean.to_s
45
+ puts "SD:"+s.to_s
46
+ puts "EE con reemplazo:"+Statsample::SRS.standard_error_ksd_wr(s, sample_size, pop.size).to_s
47
+ puts "EE sin reemplazo:"+Statsample::SRS.standard_error_ksd_wor(s, sample_size,pop.size).to_s
48
+
49
+ puts "EE estratified con reemplazo:"+sd_estimated_wr.to_s
50
+ puts "EE estratified sin reemplazo:"+sd_estimated_wor.to_s
51
+ sd_with=[]
52
+ sd_without=[]
53
+ sd_strat_wr=[]
54
+ sd_strat_wor=[]
55
+ monte_with=Statsample::Resample.repeat_and_save(tests) {
56
+ sample= pop.sample_with_replacement(sample_size)
57
+ sd_with.push(Statsample::SRS.standard_error_esd_wr(sample.sds,sample_size,pop.size))
58
+ sample.mean
59
+ }
60
+
61
+
62
+ monte_without=Statsample::Resample.repeat_and_save(tests) {
63
+ sample= pop.sample_without_replacement(sample_size)
64
+ sd_without.push(Statsample::SRS.standard_error_esd_wor(sample.sds,sample_size,pop.size))
65
+ sample.mean
66
+ }
67
+
68
+
69
+
70
+ stratum_wor=Statsample::Resample.repeat_and_save(tests) {
71
+ a_sample= {'data'=>av.sample_without_replacement(sample_size/2)}.to_dataset
72
+ b_sample= {'data'=>bv.sample_without_replacement(sample_size/2)}.to_dataset
73
+ m=Statsample::Multiset.new(['data'])
74
+ m.add_dataset('a',a_sample)
75
+ m.add_dataset('b',b_sample)
76
+ ss=Statsample::StratifiedSample.new(m,{'a'=>a_size,'b'=>b_size})
77
+ sd_strat_wor.push(ss.standard_error_wor('data'))
78
+ ss.mean('data')
79
+ }.to_vector(:scale)
80
+
81
+ stratum_wr=Statsample::Resample.repeat_and_save(tests) {
82
+ a_sample= {'data'=>av.sample_with_replacement(sample_size/2)}.to_dataset
83
+ b_sample= {'data'=>bv.sample_with_replacement(sample_size/2)}.to_dataset
84
+ m=Statsample::Multiset.new(['data'])
85
+ m.add_dataset('a',a_sample)
86
+ m.add_dataset('b',b_sample)
87
+ ss=Statsample::StratifiedSample.new(m,{'a'=>a_size,'b'=>b_size})
88
+ sd_strat_wr.push(ss.standard_error_wr('data'))
89
+ ss.mean('data')
90
+ }.to_vector(:scale)
91
+
92
+
93
+
94
+ v_sd_with=sd_with.to_vector(:scale)
95
+ v_sd_without=sd_without.to_vector(:scale)
96
+ v_sd_strat_wr=sd_strat_wr.to_vector(:scale)
97
+ v_sd_strat_wor=sd_strat_wor.to_vector(:scale)
98
+
99
+
100
+ v_with=monte_with.to_vector(:scale)
101
+ v_without=monte_without.to_vector(:scale)
102
+ puts "=============="
103
+ puts "Con reemplazo"
104
+ puts "Mean:"+v_with.mean.to_s
105
+ puts "Sd:"+v_with.sds.to_s
106
+ puts "Sd (estimated):"+v_sd_with.mean.to_s
107
+ puts "=============="
108
+ puts "Sin reemplazo"
109
+ puts "Mean:"+v_without.mean.to_s
110
+ puts "Sd:"+v_without.sds.to_s
111
+ puts "Sd (estimated):"+v_sd_without.mean.to_s
112
+ puts "=============="
113
+ puts "Estratificado Con reemplazo"
114
+ puts "Mean:"+stratum_wr.mean.to_s
115
+ puts "Sd:"+stratum_wr.sds.to_s
116
+ puts "Sd (estimated):"+v_sd_strat_wr.mean.to_s
117
+
118
+ puts "=============="
119
+ puts "Estratificado Sin reemplazo"
120
+ puts "Mean:"+stratum_wor.mean.to_s
121
+ puts "Sd:"+stratum_wor.sds.to_s
122
+ puts "Sd (estimated):"+v_sd_strat_wor.mean.to_s
123
+
124
+ p v_without.plot_histogram
125
+
126
+ =begin
127
+
128
+
129
+
130
+ x=[]
131
+ y=[]
132
+ y2=[]
133
+ prev=0
134
+ prev_chi=0
135
+ v.frequencies.sort.each{|k,v1|
136
+ x.push(k)
137
+ y.push(prev+v1)
138
+ prev=prev+v1
139
+ }
140
+ GSL::graph(GSL::Vector.alloc(x), GSL::Vector.alloc(y))
141
+ =end
data/lib/spss.rb ADDED
@@ -0,0 +1,131 @@
1
+ # = spss.rb -
2
+ #
3
+ # Provides utilites for working with spss files
4
+ #
5
+ # Copyright (C) 2009 Claudio Bustos
6
+ #
7
+ # Claudio Bustos mailto:clbustos@gmail.com
8
+
9
+ module SPSS
10
+ module Dictionary
11
+ class Element
12
+ def add(a)
13
+ @elements.push(a)
14
+ end
15
+ def parse_elements(func=:to_s)
16
+ @elements.collect{|e| " "+e.send(func)}.join("\n")
17
+ end
18
+ def init_with config
19
+ config.each {|key,value|
20
+ self.send(key.to_s+"=",value) if methods.include? key.to_s
21
+ }
22
+ end
23
+ def initialize(config={})
24
+ @config=config
25
+ @elements=[]
26
+ end
27
+ end
28
+ class Dictionary < Element
29
+ attr_accessor :locale, :date_time, :row_count
30
+ def initialize(config={})
31
+ super
32
+ init_with ({
33
+ :locale=>"en_US",
34
+ :date_time=>Time.new().strftime("%Y-%m-%dT%H:%M:%S"),
35
+ :row_count=>1
36
+ })
37
+ init_with config
38
+ end
39
+
40
+ def to_xml
41
+ "<dictionary locale='#{@locale}' creationDateTime='#{@date_time}' rowCount='#{@row_count}' xmlns='http://xml.spss.com/spss/data'>\n"+parse_elements(:to_xml)+"\n</dictionary>"
42
+
43
+ end
44
+ def to_spss
45
+ parse_elements(:to_spss)
46
+ end
47
+ end
48
+
49
+ class MissingValue < Element
50
+ attr_accessor :data, :type, :from, :to
51
+ def initialize(data,type=nil)
52
+ @data=data
53
+ if type.nil? or type=="lowerBound" or type=="upperBound"
54
+ @type=type
55
+ else
56
+ raise Exception,"Incorrect value for type"
57
+ end
58
+ end
59
+ def to_xml
60
+ "<missingValue data='#{@data}' "+(type.nil? ? "":"type='#{type}'")+"/>"
61
+ end
62
+ end
63
+ class LabelSet
64
+ attr_accessor
65
+ def initialize(labels)
66
+ @labels=labels
67
+ end
68
+ def parse_xml(name)
69
+ "<valueLabelSet>\n "+@labels.collect{|key,value| "<valueLabel label='#{key}' value='#{value}' />"}.join("\n ")+"\n <valueLabelVariable name='#{name}' />\n</valueLabelSet>"
70
+ end
71
+ def parse_spss()
72
+ @labels.collect{|key,value| "#{key} '#{value}'"}.join("\n ")
73
+ end
74
+ end
75
+ class Variable < Element
76
+ attr_accessor :aligment, :display_width, :label, :measurement_level, :name, :type, :decimals, :width, :type_format, :labelset, :missing_values
77
+ def initialize(config={})
78
+ super
79
+ @@var_number||=1
80
+ init_with({
81
+ :aligment => "left",
82
+ :display_width => 8,
83
+ :label => "Variable #{@@var_number}",
84
+ :measurement_level => "SCALE",
85
+ :name => "var#{@@var_number}",
86
+ :type => 0,
87
+ :decimals => 2,
88
+ :width => 10,
89
+ :type_format => "F",
90
+ :labelset => nil
91
+ })
92
+ init_with config
93
+ @missing_values=[]
94
+ @@var_number+=1
95
+ end
96
+ def to_xml
97
+ labelset_s=(@labelset.nil?) ? "":"\n"+@labelset.parse_xml(@name)
98
+ missing_values=(@missing_values.size>0) ? @missing_values.collect {|m| m.to_xml}.join("\n"):""
99
+ "<variable aligment='#{@aligment}' displayWidth='#{@display_width}' label='#{@label}' measurementLevel='#{@measurement_level}' name='#{@name}' type='#{@type}'>\n<variableFormat decimals='#{@decimals}' width='#{@width}' type='#{@type_format}' />\n"+parse_elements(:to_xml)+missing_values+"</variable>"+labelset_s
100
+ end
101
+ def to_spss
102
+ out=<<HERE
103
+ VARIABLE LABELS #{@name} '#{label}' .
104
+ VARIABLE ALIGMENT #{@name} (#{@aligment.upcase}) .
105
+ VARIABLE WIDTH #{@name} (#{@display_width}) .
106
+ VARIABLE LEVEL #{@name} (#{@measurement_level.upcase}) .
107
+ HERE
108
+ if !@labelset.nil?
109
+ out << "VALUE LABELS #{@name} "+labelset.parse_spss()+" ."
110
+ end
111
+ if @missing_values.size>0
112
+ out << "MISSING VALUES #{@name} ("+@missing_values.collect{|m| m.data}.join(",")+") ."
113
+ end
114
+ out
115
+ end
116
+ end
117
+ end
118
+ end
119
+ n=SPSS::Dictionary::Dictionary.new
120
+ ls=SPSS::Dictionary::LabelSet.new({1=>"Si",2=>"No"})
121
+ var1=SPSS::Dictionary::Variable.new
122
+ var1.labelset=ls
123
+ mv1=SPSS::Dictionary::MissingValue.new("-99")
124
+ var2=SPSS::Dictionary::Variable.new
125
+ n.add(var1)
126
+ n.add(var2)
127
+ var2.missing_values=[mv1]
128
+
129
+ File.open("dic_spss.sps","wb") {|f|
130
+ f.puts n.to_spss
131
+ }
data/lib/statsample.rb ADDED
@@ -0,0 +1,216 @@
1
+ # = statsample.rb -
2
+
3
+ # Process files and databases for statistical purposes, with focus on
4
+ # estimation of parameters for several types of samples (simple random,
5
+ # stratified and multistage sampling).
6
+ #
7
+ # Copyright (C) 2008-2009 Claudio Bustos
8
+ #
9
+ # Claudio Bustos mailto:clbustos_AT_gmail.com
10
+
11
+ # :stopdoc:
12
+
13
+ $:.unshift(File.dirname(__FILE__))
14
+ $:.unshift(File.expand_path(File.dirname(__FILE__)+"/../ext"))
15
+
16
+ require 'delegate'
17
+ require 'matrix'
18
+
19
+
20
+ class Numeric
21
+ def square ; self * self ; end
22
+ end
23
+
24
+
25
+ def create_test(*args,&proc)
26
+ description=args.shift
27
+ fields=args
28
+ [description, fields, Proc.new]
29
+ end
30
+
31
+ # Process files and databases for statistical purposes, with focus on
32
+ # estimation of parameters for several types of samples (simple random,
33
+ # stratified and multistage sampling).
34
+
35
+ begin
36
+ require 'rbgsl'
37
+ HAS_GSL=true
38
+ rescue LoadError
39
+ HAS_GSL=false
40
+ end
41
+ begin
42
+ require 'alglib'
43
+ HAS_ALGIB=true
44
+ rescue LoadError
45
+ HAS_ALGIB=false
46
+ end
47
+
48
+
49
+ begin
50
+ require 'statsample/optimization'
51
+ rescue LoadError
52
+ module Statsample
53
+ OPTIMIZED=false
54
+ end
55
+ end
56
+
57
+ #
58
+ # :startdoc:
59
+ #
60
+ module Statsample
61
+ VERSION = '0.3.0'
62
+ SPLIT_TOKEN = ","
63
+ autoload(:Database, 'statsample/converters')
64
+ autoload(:Anova, 'statsample/anova')
65
+ autoload(:CSV, 'statsample/converters')
66
+ autoload(:Excel, 'statsample/converters')
67
+ autoload(:GGobi, 'statsample/converters')
68
+ autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
69
+ autoload(:HtmlReport, 'statsample/htmlreport')
70
+ autoload(:Mx, 'statsample/converters')
71
+ autoload(:Resample, 'statsample/resample')
72
+ autoload(:SRS, 'statsample/srs')
73
+ autoload(:Codification, 'statsample/codification')
74
+ autoload(:Reliability, 'statsample/reliability')
75
+ autoload(:Bivariate, 'statsample/bivariate')
76
+ autoload(:Multivariate, 'statsample/multivariate')
77
+
78
+ autoload(:Regression, 'statsample/regression')
79
+ autoload(:Test, 'statsample/test')
80
+ def self.load(filename)
81
+ fp=File.open(filename,"r")
82
+ o=Marshal.load(fp)
83
+ fp.close
84
+ o
85
+ end
86
+
87
+ module Util
88
+ # Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
89
+ def normal_order_statistic_medians(i,n)
90
+ if i==1
91
+ u= 1.0 - normal_order_statistic_medians(n,n)
92
+ elsif i==n
93
+ u=0.5**(1 / n.to_f)
94
+ else
95
+ u= (i - 0.3175) / (n + 0.365)
96
+ end
97
+ u
98
+ end
99
+ end
100
+ module Writable
101
+ def save(filename)
102
+ fp=File.open(filename,"w")
103
+ Marshal.dump(self,fp)
104
+ fp.close
105
+ end
106
+ end
107
+ module HtmlSummary
108
+ def add_line(n=nil)
109
+ self << "<hr />"
110
+ end
111
+ def nl
112
+ self << "<br />"
113
+ end
114
+ def add(text)
115
+ self << ("<p>"+text.gsub("\n","<br />")+"</p>")
116
+ end
117
+ def parse_table(table)
118
+ self << table.parse_html
119
+ end
120
+ end
121
+ module ConsoleSummary
122
+ def add_line(n=80)
123
+ self << "-"*n+"\n"
124
+ end
125
+ def nl
126
+ self << "\n"
127
+ end
128
+ def add(text)
129
+ self << text
130
+ end
131
+ def parse_table(table)
132
+ self << table.parse_console
133
+ end
134
+ end
135
+ class ReportTable
136
+ attr_reader :header
137
+ def initialize(header=[])
138
+ @header=header
139
+ @rows=[]
140
+ @max_cols=[]
141
+ end
142
+ def add_row(row)
143
+ row.each_index{|i|
144
+ @max_cols[i]=row[i].to_s.size if @max_cols[i].nil? or row[i].to_s.size > @max_cols[i]
145
+ }
146
+ @rows.push(row)
147
+ end
148
+ def add_horizontal_line
149
+ @rows.push(:hr)
150
+ end
151
+ def header=(h)
152
+ h.each_index{|i|
153
+ @max_cols[i]=h[i].to_s.size if @max_cols[i].nil? or h[i].to_s.size>@max_cols[i]
154
+ }
155
+ @header=h
156
+ end
157
+ def parse_console_row(row)
158
+ out="| "
159
+ @max_cols.each_index{|i|
160
+ if row[i].nil?
161
+ out << " "*(@max_cols[i]+2)+"|"
162
+ else
163
+ t=row[i].to_s
164
+ out << " "+t+" "*(@max_cols[i]-t.size+1)+"|"
165
+ end
166
+ }
167
+ out << "\n"
168
+ out
169
+ end
170
+ def parse_console_hr
171
+ "-"*(@max_cols.inject(0){|a,v|a+v.size+3}+2)+"\n"
172
+ end
173
+ def parse_console
174
+ out="\n"
175
+ out << parse_console_hr
176
+ out << parse_console_row(header)
177
+ out << parse_console_hr
178
+
179
+ @rows.each{|row|
180
+ if row==:hr
181
+ out << parse_console_hr
182
+ else
183
+ out << parse_console_row(row)
184
+ end
185
+ }
186
+ out << parse_console_hr
187
+
188
+ out
189
+ end
190
+ def parse_html
191
+ out="<table>\n"
192
+ if header.size>0
193
+ out << "<thead><th>"+header.join("</th><th>")+"</thead><tbody>"
194
+ end
195
+ out << "<tbody>\n"
196
+ row_with_line=false
197
+ @rows.each{|row|
198
+ if row==:hr
199
+ row_with_line=true
200
+ else
201
+ out << "<tr class='"+(row_with_line ? 'line':'')+"'><td>"
202
+ out << row.join("</td><td>") +"</td>"
203
+ out << "</tr>\n"
204
+ row_with_line=false
205
+ end
206
+ }
207
+ out << "</tbody></table>\n"
208
+ out
209
+ end
210
+ end
211
+ end
212
+
213
+ require 'statsample/vector'
214
+ require 'statsample/dataset'
215
+ require 'statsample/crosstab'
216
+