statsample 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. data/History.txt +7 -0
  2. data/Manifest.txt +15 -9
  3. data/README.txt +6 -0
  4. data/Rakefile +8 -0
  5. data/{demo → examples}/correlation_matrix.rb +0 -0
  6. data/{demo/dominanceanalysis.rb → examples/dominance_analysis.rb} +0 -0
  7. data/{demo → examples}/dominance_analysis_bootstrap.rb +0 -0
  8. data/{demo → examples}/levene.rb +0 -0
  9. data/{demo → examples}/multiple_regression.rb +5 -3
  10. data/{demo → examples}/multivariate_correlation.rb +0 -0
  11. data/{demo → examples}/polychoric.rb +0 -0
  12. data/{demo → examples}/principal_axis.rb +0 -0
  13. data/examples/t_test.rb +11 -0
  14. data/{demo → examples}/tetrachoric.rb +0 -0
  15. data/lib/statistics2.rb +1 -1
  16. data/lib/statsample.rb +57 -6
  17. data/lib/statsample/bivariate/polychoric.rb +12 -25
  18. data/lib/statsample/bivariate/tetrachoric.rb +1 -3
  19. data/lib/statsample/converter/csv.rb +11 -12
  20. data/lib/statsample/dominanceanalysis/bootstrap.rb +2 -3
  21. data/lib/statsample/factor/principalaxis.rb +0 -2
  22. data/lib/statsample/factor/rotation.rb +6 -8
  23. data/lib/statsample/graph.rb +8 -0
  24. data/lib/statsample/graph/svggraph.rb +0 -4
  25. data/lib/statsample/regression/multiple/baseengine.rb +25 -28
  26. data/lib/statsample/regression/multiple/matrixengine.rb +30 -34
  27. data/lib/statsample/test.rb +36 -1
  28. data/lib/statsample/test/levene.rb +11 -7
  29. data/lib/statsample/test/t.rb +189 -0
  30. data/test/test_anova.rb +8 -10
  31. data/test/test_bivariate.rb +40 -37
  32. data/test/test_codification.rb +9 -13
  33. data/test/test_combination.rb +37 -39
  34. data/test/test_crosstab.rb +46 -48
  35. data/test/test_csv.rb +40 -45
  36. data/test/test_dataset.rb +150 -152
  37. data/test/test_distribution.rb +24 -21
  38. data/test/test_dominance_analysis.rb +10 -12
  39. data/test/test_factor.rb +95 -91
  40. data/test/test_ggobi.rb +30 -33
  41. data/test/test_gsl.rb +4 -4
  42. data/test/test_helpers.rb +26 -0
  43. data/test/test_histogram.rb +5 -6
  44. data/test/test_logit.rb +20 -21
  45. data/test/test_matrix.rb +47 -48
  46. data/test/test_mle.rb +130 -131
  47. data/test/test_multiset.rb +95 -96
  48. data/test/test_permutation.rb +35 -36
  49. data/test/test_promise_after.rb +39 -0
  50. data/test/test_regression.rb +49 -51
  51. data/test/test_reliability.rb +29 -30
  52. data/test/test_resample.rb +22 -23
  53. data/test/test_srs.rb +8 -9
  54. data/test/test_statistics.rb +12 -6
  55. data/test/test_stest.rb +18 -10
  56. data/test/test_stratified.rb +15 -16
  57. data/test/test_svg_graph.rb +11 -22
  58. data/test/test_test_t.rb +40 -0
  59. data/test/test_umannwhitney.rb +14 -15
  60. data/test/test_vector.rb +33 -37
  61. data/test/test_xls.rb +34 -41
  62. metadata +22 -11
data/History.txt CHANGED
@@ -1,3 +1,10 @@
1
+ === 0.8.0 / 2010-03-29
2
+ * New Statsample::Test::T module, with classes and methods to do Student's t tests for one and two samples.
3
+ * Statsample::PromiseAfter module to set a number of variables without explicitly call the compute or iterate method
4
+ * All tests ported to MiniUnit
5
+ * Directory 'demo' renamed to 'examples'
6
+ * Bug fix on report_building on Statsample::Regression::Multiple classes
7
+
1
8
  === 0.7.0 / 2010-03-25
2
9
  * Ported to ReportBuilder 1.x series
3
10
  * Implementation of ruby based covariance and correlation changed to a clearer code
data/Manifest.txt CHANGED
@@ -10,15 +10,16 @@ data/repeated_fields.csv
10
10
  data/test_binomial.csv
11
11
  data/tetmat_matrix.txt
12
12
  data/tetmat_test.txt
13
- demo/correlation_matrix.rb
14
- demo/dominance_analysis_bootstrap.rb
15
- demo/dominanceanalysis.rb
16
- demo/levene.rb
17
- demo/multiple_regression.rb
18
- demo/multivariate_correlation.rb
19
- demo/polychoric.rb
20
- demo/principal_axis.rb
21
- demo/tetrachoric.rb
13
+ examples/correlation_matrix.rb
14
+ examples/dominance_analysis.rb
15
+ examples/dominance_analysis_bootstrap.rb
16
+ examples/levene.rb
17
+ examples/multiple_regression.rb
18
+ examples/multivariate_correlation.rb
19
+ examples/polychoric.rb
20
+ examples/principal_axis.rb
21
+ examples/t_test.rb
22
+ examples/tetrachoric.rb
22
23
  lib/distribution.rb
23
24
  lib/distribution/chisquare.rb
24
25
  lib/distribution/f.rb
@@ -45,6 +46,7 @@ lib/statsample/factor.rb
45
46
  lib/statsample/factor/pca.rb
46
47
  lib/statsample/factor/principalaxis.rb
47
48
  lib/statsample/factor/rotation.rb
49
+ lib/statsample/graph.rb
48
50
  lib/statsample/graph/gdchart.rb
49
51
  lib/statsample/graph/svgboxplot.rb
50
52
  lib/statsample/graph/svggraph.rb
@@ -74,6 +76,7 @@ lib/statsample/resample.rb
74
76
  lib/statsample/srs.rb
75
77
  lib/statsample/test.rb
76
78
  lib/statsample/test/levene.rb
79
+ lib/statsample/test/t.rb
77
80
  lib/statsample/test/umannwhitney.rb
78
81
  lib/statsample/vector.rb
79
82
  po/es/statsample.mo
@@ -93,12 +96,14 @@ test/test_dominance_analysis.rb
93
96
  test/test_factor.rb
94
97
  test/test_ggobi.rb
95
98
  test/test_gsl.rb
99
+ test/test_helpers.rb
96
100
  test/test_histogram.rb
97
101
  test/test_logit.rb
98
102
  test/test_matrix.rb
99
103
  test/test_mle.rb
100
104
  test/test_multiset.rb
101
105
  test/test_permutation.rb
106
+ test/test_promise_after.rb
102
107
  test/test_regression.rb
103
108
  test/test_reliability.rb
104
109
  test/test_resample.rb
@@ -107,6 +112,7 @@ test/test_statistics.rb
107
112
  test/test_stest.rb
108
113
  test/test_stratified.rb
109
114
  test/test_svg_graph.rb
115
+ test/test_test_t.rb
110
116
  test/test_umannwhitney.rb
111
117
  test/test_vector.rb
112
118
  test/test_xls.rb
data/README.txt CHANGED
@@ -11,10 +11,12 @@ Includes:
11
11
  * Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
12
12
  * Imports and exports datasets from and to Excel, CSV and plain text files.
13
13
  * Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric
14
+ * Tests: T, Levene, U-Mannwhitney, One-Way Anova
14
15
  * Regression: Simple, Multiple, Probit and Logit
15
16
  * Factorial Analysis: Extraction (PCA and Principal Axis) and Rotation (Varimax and relatives)
16
17
  * Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
17
18
  * Sample calculation related formulas
19
+ * Creates reports on text, html and rtf, using ReportBuilder
18
20
 
19
21
  == FEATURES:
20
22
 
@@ -47,6 +49,10 @@ Includes:
47
49
  * Module Statsample::Crosstab provides function to create crosstab for categorical data
48
50
  * Reliability analysis provides functions to analyze scales. Class ItemAnalysis provides statistics like mean, standard deviation for a scale, Cronbach's alpha and standarized Cronbach's alpha, and for each item: mean, correlation with total scale, mean if deleted, Cronbach's alpha is deleted. With HtmlReport, graph the histogram of the scale and the Item Characteristic Curve for each item
49
51
  * Module Statsample::SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
52
+ * Module Statsample::Test provides several methods and classes to perform inferencial statistics
53
+ * Statsample::Test::Levene
54
+ * Statsample::Test::UMannWhitney
55
+ * Statsample::Test::T
50
56
  * Interfaces to gdchart, gnuplot and SVG::Graph
51
57
 
52
58
 
data/Rakefile CHANGED
@@ -88,4 +88,12 @@ task :publicar_docs => [:clean, :docs] do
88
88
  sh %{rsync #{h.rsync_args} #{local_dir}/ #{host}:#{remote_dir}}
89
89
  end
90
90
 
91
+ task :release => [:tag] do
92
+ end
93
+
94
+ task :tag do
95
+ sh %(svn commit -m "Version bump: #{Statsample::VERSION}")
96
+ sh %(svn cp https://ruby-statsample.googlecode.com/svn/reportbuilder/trunk https://ruby-statsample.googlecode.com/svn/statsample/tags/v#{Statsample::VERSION} -m "ReportBuilder #{Statsample::VERSION} tagged")
97
+ end
98
+
91
99
  # vim: syntax=Ruby
File without changes
File without changes
File without changes
@@ -3,7 +3,7 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
3
 
4
4
  require 'statsample'
5
5
  require 'benchmark'
6
- samples=10000
6
+ samples=1000
7
7
  a=samples.times.collect {rand}.to_scale
8
8
  b=samples.times.collect {rand}.to_scale
9
9
  c=samples.times.collect {rand}.to_scale
@@ -15,7 +15,7 @@ ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+row['d']*1+rand()}
15
15
  Benchmark.bm(7) do |x|
16
16
 
17
17
 
18
- rb=ReportBuilder.new("Multiple Regression Engines")
18
+ rb=ReportBuilder.new(:name=>"Multiple Regression Engines")
19
19
 
20
20
  if Statsample.has_gsl?
21
21
  x.report("GSL:") {
@@ -29,6 +29,8 @@ end
29
29
  lr=Statsample::Regression::Multiple::RubyEngine.new(ds,'y',:name=>"Multiple Regression using RubyEngine")
30
30
  rb.add(lr.summary)
31
31
  }
32
-
33
32
  puts rb.to_text
34
33
  end
34
+
35
+
36
+
File without changes
File without changes
File without changes
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib')
3
+ require 'statsample'
4
+ a=10.times.map {rand(100)}.to_scale
5
+ t_1=Statsample::Test.t_one_sample(a,{:u=>50})
6
+ puts t_1.summary
7
+
8
+ b=20.times.map {(rand(20))**2+50}.to_scale
9
+
10
+ t_2=Statsample::Test.t_two_samples_independent(a,b)
11
+ puts t_2.summary
File without changes
data/lib/statistics2.rb CHANGED
@@ -229,7 +229,7 @@ module Statistics2 # :nodoc:
229
229
  s *= (i - 1) * c2 / i
230
230
  i += 2
231
231
  end
232
- if df & 1 != 0
232
+ if df.is_a? Float or df & 1 != 0
233
233
  0.5+(p*Math.sqrt(c2)+Math.atan(t/Math.sqrt(df)))/Math::PI
234
234
  else
235
235
  (1.0 + p) / 2.0
data/lib/statsample.rb CHANGED
@@ -18,12 +18,10 @@
18
18
  #
19
19
 
20
20
 
21
- $:.unshift(File.dirname(__FILE__))
22
- $:.unshift(File.expand_path(File.dirname(__FILE__)+"/../ext"))
23
-
21
+ #$:.unshift(File.dirname(__FILE__))
24
22
  require 'matrix'
25
23
  require 'distribution'
26
- raise "Install reportbuilder ~>1.0" unless gem 'reportbuilder','~>1.0'
24
+ gem 'reportbuilder','~>1.0'
27
25
  require 'reportbuilder'
28
26
  class Numeric
29
27
  def square ; self * self ; end
@@ -113,7 +111,7 @@ module Statsample
113
111
  false
114
112
  end
115
113
  end
116
- VERSION = '0.7.0'
114
+ VERSION = '0.8.0'
117
115
  SPLIT_TOKEN = ","
118
116
  autoload(:Database, 'statsample/converters')
119
117
  autoload(:Anova, 'statsample/anova')
@@ -140,7 +138,7 @@ module Statsample
140
138
  autoload(:Regression, 'statsample/regression')
141
139
  autoload(:Test, 'statsample/test')
142
140
  autoload(:Factor, 'statsample/factor')
143
-
141
+ autoload(:Graph, 'statsample/graph')
144
142
 
145
143
 
146
144
  class << self
@@ -154,6 +152,7 @@ module Statsample
154
152
  false
155
153
  end
156
154
  end
155
+
157
156
  # Create a matrix using vectors as columns.
158
157
  # Use:
159
158
  #
@@ -204,6 +203,58 @@ module Statsample
204
203
  u
205
204
  end
206
205
  end
206
+
207
+ module PromiseAfter
208
+ # Like memoizable module (http://promise.rubyforge.org/Promise.html)
209
+ # but this applies to set a lot or variables with one expensive method
210
+ # with a direct calling of dependent methods.
211
+ # If one of the dependent methods returns nil or false, the main method
212
+ # is called.
213
+ #
214
+ # Use when
215
+ # 1. You have one expensive operation which set many internal variables
216
+ # 2. This expensive operations depends on values which can set
217
+ # anytime BEFORE calculation of main function
218
+ #
219
+ # I use for classes which requires a iteration to set several variables,
220
+ # hiding to user the need to explicitily call the iterate method.
221
+ #
222
+ # Example:
223
+ # class ExpensiveCalculation
224
+ # include PromiseAfter
225
+ # attr_accessor :y, :z
226
+ # def initialize(y=nil,z=nil)
227
+ # @y=y
228
+ # @z=z
229
+ # def compute
230
+ # @a=@y*1000+@z*1000
231
+ # end
232
+ # def a
233
+ # @a.nil? nil : "This is the value: #{@a}"
234
+ # end
235
+ # promise_after :compute, :a, :b
236
+ # end
237
+ # puts ExpensiveCalculation.new(1,2).a
238
+
239
+ def promise_after(function, *syms)
240
+ syms.each do |sym|
241
+ # You should doc the method!
242
+ raise NoMethodError, "Method `#{sym}' doesn't exists! Create it first " unless method_defined? sym
243
+ alias_method((sym.to_s+"_without_promise_after").intern, sym)
244
+ define_method(sym) {
245
+ #sym_to_iv="@#{sym.to_s.gsub(":","")}".intern
246
+ #if !instance_variable_defined?(sym_to_iv) or instance_variable_get(sym_to_iv).nil?
247
+ if(!send(sym.to_s+"_without_promise_after"))
248
+ send(function)
249
+ end
250
+ send(sym.to_s+"_without_promise_after")
251
+ }
252
+
253
+ end
254
+ end
255
+ end
256
+
257
+
207
258
  module Writable
208
259
  def save(filename)
209
260
  fp=File.open(filename,"w")
@@ -65,6 +65,7 @@ module Statsample
65
65
 
66
66
  class Polychoric
67
67
  include GetText
68
+ extend Statsample::PromiseAfter
68
69
  bindtextdomain("statsample")
69
70
  # Name of the analysis
70
71
  attr_accessor :name
@@ -135,28 +136,16 @@ module Statsample
135
136
  compute_basic_parameters
136
137
  end
137
138
  # Returns the polychoric correlation
138
- def r
139
- if @r.nil?
140
- compute
141
- end
142
- @r
143
- end
139
+ attr_reader :r
144
140
  # Returns the rows thresholds
141
+ attr_reader :alpha
142
+ # Returns the columns thresholds
143
+ attr_reader :beta
145
144
 
146
- def threshold_x
147
- if @alpha.nil?
148
- compute
149
- end
150
- @alpha
151
- end
152
- # Returns the column thresholds
145
+ promise_after :compute, :r, :alpha, :beta
153
146
 
154
- def threshold_y
155
- if @beta.nil?
156
- compute
157
- end
158
- @beta
159
- end
147
+ alias :threshold_x :alpha
148
+ alias :threshold_y :beta
160
149
 
161
150
 
162
151
  # Start the computation of polychoric correlation
@@ -739,22 +728,20 @@ module Statsample
739
728
  end
740
729
 
741
730
  def summary
742
- rp=ReportBuilder.new()
743
- rp.add(self)
744
- rp.to_text
731
+ rp=ReportBuilder.new(:no_title=>true).add(self).to_text
745
732
  end
733
+
746
734
 
747
735
  def report_building(generator) # :nodoc:
748
- compute if @r.nil?
736
+ #compute if r.nil?
749
737
  section=ReportBuilder::Section.new(:name=>@name)
750
- t=ReportBuilder::Table.new(:name=>_("Contingence Table"),:header=>[""]+(@n.times.collect {|i| "Y=#{i}"})+["Total"])
738
+ t=ReportBuilder::Table.new(:name=>_("Contingence Table"), :header=>[""]+(@n.times.collect {|i| "Y=#{i}"})+["Total"])
751
739
  @m.times do |i|
752
740
  t.row(["X = #{i}"]+(@n.times.collect {|j| @matrix[i,j]}) + [@sumr[i]])
753
741
  end
754
742
  t.hr
755
743
  t.row(["T"]+(@n.times.collect {|j| @sumc[j]})+[@total])
756
744
  section.add(t)
757
- #generator.parse_element(t)
758
745
  section.add(sprintf("r: %0.4f",r))
759
746
  t=ReportBuilder::Table.new(:name=>_("Thresholds"), :header=>["","Value"])
760
747
  threshold_x.each_with_index {|val,i|
@@ -114,9 +114,7 @@ module Statsample
114
114
  end
115
115
  # Summary of the analysis
116
116
  def summary
117
- rp=ReportBuilder.new(:name=>@name)
118
- rp.add(self)
119
- rp.to_text
117
+ ReportBuilder.new(:name=>@name).add(self).to_text
120
118
  end
121
119
 
122
120
  def report_building(generator) # :nodoc:
@@ -1,21 +1,20 @@
1
- if RUBY_VERSION<"1.9"
2
- require 'fastercsv'
3
- Statsample::CSV_klass=FasterCSV
4
- else
5
- require 'csv'
6
- Statsample::CSV_klass=CSV
7
-
8
- end
1
+
9
2
 
10
3
  module Statsample
11
4
  class CSV < SpreadsheetBase
5
+ if RUBY_VERSION<"1.9"
6
+ require 'fastercsv'
7
+ CSV_klass=::FasterCSV
8
+ else
9
+ require 'csv'
10
+ CSV_klass=::CSV
11
+ end
12
12
  class << self
13
13
  # Returns a Dataset based on a csv file
14
14
  #
15
15
  # USE:
16
16
  # ds=Statsample::CSV.read("test_csv.csv")
17
- def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
18
-
17
+ def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
19
18
  first_row=true
20
19
  fields=[]
21
20
  fields_data={}
@@ -24,7 +23,7 @@ module Statsample
24
23
  opts={}
25
24
  opts[:col_sep]=fs unless fs.nil?
26
25
  opts[:row_sep]=rs unless rs.nil?
27
- csv=Statsample::CSV_klass.send(:open, filename,'r',opts)
26
+ csv=CSV_klass.open(filename,'r',opts)
28
27
  csv.each do |row|
29
28
  line_number+=1
30
29
  if(line_number<=ignore_lines)
@@ -51,7 +50,7 @@ module Statsample
51
50
  # Statsample::CSV.write(ds,"test_csv.csv")
52
51
  def write(dataset,filename, convert_comma=false,*opts)
53
52
 
54
- writer=Statsample::CSV_klass.send(:open, filename,'w',*opts)
53
+ writer=CSV_klass.open(filename,'w',*opts)
55
54
  writer << dataset.fields
56
55
  dataset.each_array do|row|
57
56
  if(convert_comma)
@@ -171,10 +171,9 @@ module Statsample
171
171
  end
172
172
  # Summary of analysis
173
173
  def summary
174
- rp=ReportBuilder.new()
175
- rp.add(self)
176
- rp.to_text
174
+ rp=ReportBuilder.new().add(self).to_text
177
175
  end
176
+
178
177
  def t
179
178
  Distribution::T.p_value(1-((1-@alpha) / 2), @n_samples - 1)
180
179
  end
@@ -90,8 +90,6 @@ module Factor
90
90
  @component_matrix
91
91
  end
92
92
  # Iterate to find the factors
93
- # Parameters
94
- # * m: Number of factors
95
93
  def iterate(m=nil)
96
94
  @clean=false
97
95
  m||=@m
@@ -18,13 +18,14 @@ module Factor
18
18
  class Rotation
19
19
  EPSILON=1e-15
20
20
  MAX_ITERATIONS=25
21
-
21
+ extend Statsample::PromiseAfter
22
22
  attr_reader :iterations, :rotated, :component_transformation_matrix, :h2
23
23
  # Maximum number of iterations
24
24
  attr_accessor :max_iterations
25
25
  # Maximum precision
26
26
  attr_accessor :epsilon
27
27
 
28
+ promise_after :iterate, :iterations, :rotated, :component_transformation_matrix, :h2
28
29
 
29
30
  def initialize(matrix, opts=Hash.new)
30
31
  @matrix=matrix
@@ -33,19 +34,16 @@ module Factor
33
34
  @component_transformation_matrix=nil
34
35
  @max_iterations=MAX_ITERATIONS
35
36
  @epsilon=EPSILON
37
+ @rotated=nil
36
38
  @h2=(@matrix.collect {|c| c**2} * Matrix.column_vector([1]*@m)).column(0).to_a
37
39
  opts.each{|k,v|
38
40
  self.send("#{k}=",v) if self.respond_to? k
39
41
  }
40
-
41
-
42
42
  end
43
43
  alias_method :communalities, :h2
44
44
  alias_method :rotated_component_matrix, :rotated
45
- # Start iteration of
46
- def iterate(max_i=nil)
47
- max_i||=@max_iterations
48
- @max_iterations=max_i
45
+ # Start iteration
46
+ def iterate
49
47
  t=Matrix.identity(@m)
50
48
  b=@matrix.dup
51
49
  h=Matrix.diagonal(*@h2).collect {|c| Math::sqrt(c)}
@@ -54,7 +52,7 @@ module Factor
54
52
  @not_converged=true
55
53
  @iterations=0
56
54
  while @not_converged
57
- break if iterations>max_i
55
+ break if iterations>@max_iterations
58
56
  @iterations+=1
59
57
  #puts "Iteration #{iterations}"
60
58
  num_pairs=@m*(@m-1).quo(2)