statsample 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. data/History.txt +7 -0
  2. data/Manifest.txt +15 -9
  3. data/README.txt +6 -0
  4. data/Rakefile +8 -0
  5. data/{demo → examples}/correlation_matrix.rb +0 -0
  6. data/{demo/dominanceanalysis.rb → examples/dominance_analysis.rb} +0 -0
  7. data/{demo → examples}/dominance_analysis_bootstrap.rb +0 -0
  8. data/{demo → examples}/levene.rb +0 -0
  9. data/{demo → examples}/multiple_regression.rb +5 -3
  10. data/{demo → examples}/multivariate_correlation.rb +0 -0
  11. data/{demo → examples}/polychoric.rb +0 -0
  12. data/{demo → examples}/principal_axis.rb +0 -0
  13. data/examples/t_test.rb +11 -0
  14. data/{demo → examples}/tetrachoric.rb +0 -0
  15. data/lib/statistics2.rb +1 -1
  16. data/lib/statsample.rb +57 -6
  17. data/lib/statsample/bivariate/polychoric.rb +12 -25
  18. data/lib/statsample/bivariate/tetrachoric.rb +1 -3
  19. data/lib/statsample/converter/csv.rb +11 -12
  20. data/lib/statsample/dominanceanalysis/bootstrap.rb +2 -3
  21. data/lib/statsample/factor/principalaxis.rb +0 -2
  22. data/lib/statsample/factor/rotation.rb +6 -8
  23. data/lib/statsample/graph.rb +8 -0
  24. data/lib/statsample/graph/svggraph.rb +0 -4
  25. data/lib/statsample/regression/multiple/baseengine.rb +25 -28
  26. data/lib/statsample/regression/multiple/matrixengine.rb +30 -34
  27. data/lib/statsample/test.rb +36 -1
  28. data/lib/statsample/test/levene.rb +11 -7
  29. data/lib/statsample/test/t.rb +189 -0
  30. data/test/test_anova.rb +8 -10
  31. data/test/test_bivariate.rb +40 -37
  32. data/test/test_codification.rb +9 -13
  33. data/test/test_combination.rb +37 -39
  34. data/test/test_crosstab.rb +46 -48
  35. data/test/test_csv.rb +40 -45
  36. data/test/test_dataset.rb +150 -152
  37. data/test/test_distribution.rb +24 -21
  38. data/test/test_dominance_analysis.rb +10 -12
  39. data/test/test_factor.rb +95 -91
  40. data/test/test_ggobi.rb +30 -33
  41. data/test/test_gsl.rb +4 -4
  42. data/test/test_helpers.rb +26 -0
  43. data/test/test_histogram.rb +5 -6
  44. data/test/test_logit.rb +20 -21
  45. data/test/test_matrix.rb +47 -48
  46. data/test/test_mle.rb +130 -131
  47. data/test/test_multiset.rb +95 -96
  48. data/test/test_permutation.rb +35 -36
  49. data/test/test_promise_after.rb +39 -0
  50. data/test/test_regression.rb +49 -51
  51. data/test/test_reliability.rb +29 -30
  52. data/test/test_resample.rb +22 -23
  53. data/test/test_srs.rb +8 -9
  54. data/test/test_statistics.rb +12 -6
  55. data/test/test_stest.rb +18 -10
  56. data/test/test_stratified.rb +15 -16
  57. data/test/test_svg_graph.rb +11 -22
  58. data/test/test_test_t.rb +40 -0
  59. data/test/test_umannwhitney.rb +14 -15
  60. data/test/test_vector.rb +33 -37
  61. data/test/test_xls.rb +34 -41
  62. metadata +22 -11
data/History.txt CHANGED
@@ -1,3 +1,10 @@
1
+ === 0.8.0 / 2010-03-29
2
+ * New Statsample::Test::T module, with classes and methods to do Student's t tests for one and two samples.
3
+ * Statsample::PromiseAfter module to set a number of variables without explicitly call the compute or iterate method
4
+ * All tests ported to MiniUnit
5
+ * Directory 'demo' renamed to 'examples'
6
+ * Bug fix on report_building on Statsample::Regression::Multiple classes
7
+
1
8
  === 0.7.0 / 2010-03-25
2
9
  * Ported to ReportBuilder 1.x series
3
10
  * Implementation of ruby based covariance and correlation changed to a clearer code
data/Manifest.txt CHANGED
@@ -10,15 +10,16 @@ data/repeated_fields.csv
10
10
  data/test_binomial.csv
11
11
  data/tetmat_matrix.txt
12
12
  data/tetmat_test.txt
13
- demo/correlation_matrix.rb
14
- demo/dominance_analysis_bootstrap.rb
15
- demo/dominanceanalysis.rb
16
- demo/levene.rb
17
- demo/multiple_regression.rb
18
- demo/multivariate_correlation.rb
19
- demo/polychoric.rb
20
- demo/principal_axis.rb
21
- demo/tetrachoric.rb
13
+ examples/correlation_matrix.rb
14
+ examples/dominance_analysis.rb
15
+ examples/dominance_analysis_bootstrap.rb
16
+ examples/levene.rb
17
+ examples/multiple_regression.rb
18
+ examples/multivariate_correlation.rb
19
+ examples/polychoric.rb
20
+ examples/principal_axis.rb
21
+ examples/t_test.rb
22
+ examples/tetrachoric.rb
22
23
  lib/distribution.rb
23
24
  lib/distribution/chisquare.rb
24
25
  lib/distribution/f.rb
@@ -45,6 +46,7 @@ lib/statsample/factor.rb
45
46
  lib/statsample/factor/pca.rb
46
47
  lib/statsample/factor/principalaxis.rb
47
48
  lib/statsample/factor/rotation.rb
49
+ lib/statsample/graph.rb
48
50
  lib/statsample/graph/gdchart.rb
49
51
  lib/statsample/graph/svgboxplot.rb
50
52
  lib/statsample/graph/svggraph.rb
@@ -74,6 +76,7 @@ lib/statsample/resample.rb
74
76
  lib/statsample/srs.rb
75
77
  lib/statsample/test.rb
76
78
  lib/statsample/test/levene.rb
79
+ lib/statsample/test/t.rb
77
80
  lib/statsample/test/umannwhitney.rb
78
81
  lib/statsample/vector.rb
79
82
  po/es/statsample.mo
@@ -93,12 +96,14 @@ test/test_dominance_analysis.rb
93
96
  test/test_factor.rb
94
97
  test/test_ggobi.rb
95
98
  test/test_gsl.rb
99
+ test/test_helpers.rb
96
100
  test/test_histogram.rb
97
101
  test/test_logit.rb
98
102
  test/test_matrix.rb
99
103
  test/test_mle.rb
100
104
  test/test_multiset.rb
101
105
  test/test_permutation.rb
106
+ test/test_promise_after.rb
102
107
  test/test_regression.rb
103
108
  test/test_reliability.rb
104
109
  test/test_resample.rb
@@ -107,6 +112,7 @@ test/test_statistics.rb
107
112
  test/test_stest.rb
108
113
  test/test_stratified.rb
109
114
  test/test_svg_graph.rb
115
+ test/test_test_t.rb
110
116
  test/test_umannwhitney.rb
111
117
  test/test_vector.rb
112
118
  test/test_xls.rb
data/README.txt CHANGED
@@ -11,10 +11,12 @@ Includes:
11
11
  * Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
12
12
  * Imports and exports datasets from and to Excel, CSV and plain text files.
13
13
  * Correlations: Pearson's r, Spearman's rank correlation (rho), Tetrachoric, Polychoric
14
+ * Tests: T, Levene, U-Mannwhitney, One-Way Anova
14
15
  * Regression: Simple, Multiple, Probit and Logit
15
16
  * Factorial Analysis: Extraction (PCA and Principal Axis) and Rotation (Varimax and relatives)
16
17
  * Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
17
18
  * Sample calculation related formulas
19
+ * Creates reports on text, html and rtf, using ReportBuilder
18
20
 
19
21
  == FEATURES:
20
22
 
@@ -47,6 +49,10 @@ Includes:
47
49
  * Module Statsample::Crosstab provides function to create crosstab for categorical data
48
50
  * Reliability analysis provides functions to analyze scales. Class ItemAnalysis provides statistics like mean, standard deviation for a scale, Cronbach's alpha and standarized Cronbach's alpha, and for each item: mean, correlation with total scale, mean if deleted, Cronbach's alpha is deleted. With HtmlReport, graph the histogram of the scale and the Item Characteristic Curve for each item
49
51
  * Module Statsample::SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
52
+ * Module Statsample::Test provides several methods and classes to perform inferencial statistics
53
+ * Statsample::Test::Levene
54
+ * Statsample::Test::UMannWhitney
55
+ * Statsample::Test::T
50
56
  * Interfaces to gdchart, gnuplot and SVG::Graph
51
57
 
52
58
 
data/Rakefile CHANGED
@@ -88,4 +88,12 @@ task :publicar_docs => [:clean, :docs] do
88
88
  sh %{rsync #{h.rsync_args} #{local_dir}/ #{host}:#{remote_dir}}
89
89
  end
90
90
 
91
+ task :release => [:tag] do
92
+ end
93
+
94
+ task :tag do
95
+ sh %(svn commit -m "Version bump: #{Statsample::VERSION}")
96
+ sh %(svn cp https://ruby-statsample.googlecode.com/svn/reportbuilder/trunk https://ruby-statsample.googlecode.com/svn/statsample/tags/v#{Statsample::VERSION} -m "ReportBuilder #{Statsample::VERSION} tagged")
97
+ end
98
+
91
99
  # vim: syntax=Ruby
File without changes
File without changes
File without changes
@@ -3,7 +3,7 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
3
 
4
4
  require 'statsample'
5
5
  require 'benchmark'
6
- samples=10000
6
+ samples=1000
7
7
  a=samples.times.collect {rand}.to_scale
8
8
  b=samples.times.collect {rand}.to_scale
9
9
  c=samples.times.collect {rand}.to_scale
@@ -15,7 +15,7 @@ ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+row['d']*1+rand()}
15
15
  Benchmark.bm(7) do |x|
16
16
 
17
17
 
18
- rb=ReportBuilder.new("Multiple Regression Engines")
18
+ rb=ReportBuilder.new(:name=>"Multiple Regression Engines")
19
19
 
20
20
  if Statsample.has_gsl?
21
21
  x.report("GSL:") {
@@ -29,6 +29,8 @@ end
29
29
  lr=Statsample::Regression::Multiple::RubyEngine.new(ds,'y',:name=>"Multiple Regression using RubyEngine")
30
30
  rb.add(lr.summary)
31
31
  }
32
-
33
32
  puts rb.to_text
34
33
  end
34
+
35
+
36
+
File without changes
File without changes
File without changes
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib')
3
+ require 'statsample'
4
+ a=10.times.map {rand(100)}.to_scale
5
+ t_1=Statsample::Test.t_one_sample(a,{:u=>50})
6
+ puts t_1.summary
7
+
8
+ b=20.times.map {(rand(20))**2+50}.to_scale
9
+
10
+ t_2=Statsample::Test.t_two_samples_independent(a,b)
11
+ puts t_2.summary
File without changes
data/lib/statistics2.rb CHANGED
@@ -229,7 +229,7 @@ module Statistics2 # :nodoc:
229
229
  s *= (i - 1) * c2 / i
230
230
  i += 2
231
231
  end
232
- if df & 1 != 0
232
+ if df.is_a? Float or df & 1 != 0
233
233
  0.5+(p*Math.sqrt(c2)+Math.atan(t/Math.sqrt(df)))/Math::PI
234
234
  else
235
235
  (1.0 + p) / 2.0
data/lib/statsample.rb CHANGED
@@ -18,12 +18,10 @@
18
18
  #
19
19
 
20
20
 
21
- $:.unshift(File.dirname(__FILE__))
22
- $:.unshift(File.expand_path(File.dirname(__FILE__)+"/../ext"))
23
-
21
+ #$:.unshift(File.dirname(__FILE__))
24
22
  require 'matrix'
25
23
  require 'distribution'
26
- raise "Install reportbuilder ~>1.0" unless gem 'reportbuilder','~>1.0'
24
+ gem 'reportbuilder','~>1.0'
27
25
  require 'reportbuilder'
28
26
  class Numeric
29
27
  def square ; self * self ; end
@@ -113,7 +111,7 @@ module Statsample
113
111
  false
114
112
  end
115
113
  end
116
- VERSION = '0.7.0'
114
+ VERSION = '0.8.0'
117
115
  SPLIT_TOKEN = ","
118
116
  autoload(:Database, 'statsample/converters')
119
117
  autoload(:Anova, 'statsample/anova')
@@ -140,7 +138,7 @@ module Statsample
140
138
  autoload(:Regression, 'statsample/regression')
141
139
  autoload(:Test, 'statsample/test')
142
140
  autoload(:Factor, 'statsample/factor')
143
-
141
+ autoload(:Graph, 'statsample/graph')
144
142
 
145
143
 
146
144
  class << self
@@ -154,6 +152,7 @@ module Statsample
154
152
  false
155
153
  end
156
154
  end
155
+
157
156
  # Create a matrix using vectors as columns.
158
157
  # Use:
159
158
  #
@@ -204,6 +203,58 @@ module Statsample
204
203
  u
205
204
  end
206
205
  end
206
+
207
+ module PromiseAfter
208
+ # Like memoizable module (http://promise.rubyforge.org/Promise.html)
209
+ # but this applies to set a lot or variables with one expensive method
210
+ # with a direct calling of dependent methods.
211
+ # If one of the dependent methods returns nil or false, the main method
212
+ # is called.
213
+ #
214
+ # Use when
215
+ # 1. You have one expensive operation which set many internal variables
216
+ # 2. This expensive operations depends on values which can set
217
+ # anytime BEFORE calculation of main function
218
+ #
219
+ # I use for classes which requires a iteration to set several variables,
220
+ # hiding to user the need to explicitily call the iterate method.
221
+ #
222
+ # Example:
223
+ # class ExpensiveCalculation
224
+ # include PromiseAfter
225
+ # attr_accessor :y, :z
226
+ # def initialize(y=nil,z=nil)
227
+ # @y=y
228
+ # @z=z
229
+ # def compute
230
+ # @a=@y*1000+@z*1000
231
+ # end
232
+ # def a
233
+ # @a.nil? nil : "This is the value: #{@a}"
234
+ # end
235
+ # promise_after :compute, :a, :b
236
+ # end
237
+ # puts ExpensiveCalculation.new(1,2).a
238
+
239
+ def promise_after(function, *syms)
240
+ syms.each do |sym|
241
+ # You should doc the method!
242
+ raise NoMethodError, "Method `#{sym}' doesn't exists! Create it first " unless method_defined? sym
243
+ alias_method((sym.to_s+"_without_promise_after").intern, sym)
244
+ define_method(sym) {
245
+ #sym_to_iv="@#{sym.to_s.gsub(":","")}".intern
246
+ #if !instance_variable_defined?(sym_to_iv) or instance_variable_get(sym_to_iv).nil?
247
+ if(!send(sym.to_s+"_without_promise_after"))
248
+ send(function)
249
+ end
250
+ send(sym.to_s+"_without_promise_after")
251
+ }
252
+
253
+ end
254
+ end
255
+ end
256
+
257
+
207
258
  module Writable
208
259
  def save(filename)
209
260
  fp=File.open(filename,"w")
@@ -65,6 +65,7 @@ module Statsample
65
65
 
66
66
  class Polychoric
67
67
  include GetText
68
+ extend Statsample::PromiseAfter
68
69
  bindtextdomain("statsample")
69
70
  # Name of the analysis
70
71
  attr_accessor :name
@@ -135,28 +136,16 @@ module Statsample
135
136
  compute_basic_parameters
136
137
  end
137
138
  # Returns the polychoric correlation
138
- def r
139
- if @r.nil?
140
- compute
141
- end
142
- @r
143
- end
139
+ attr_reader :r
144
140
  # Returns the rows thresholds
141
+ attr_reader :alpha
142
+ # Returns the columns thresholds
143
+ attr_reader :beta
145
144
 
146
- def threshold_x
147
- if @alpha.nil?
148
- compute
149
- end
150
- @alpha
151
- end
152
- # Returns the column thresholds
145
+ promise_after :compute, :r, :alpha, :beta
153
146
 
154
- def threshold_y
155
- if @beta.nil?
156
- compute
157
- end
158
- @beta
159
- end
147
+ alias :threshold_x :alpha
148
+ alias :threshold_y :beta
160
149
 
161
150
 
162
151
  # Start the computation of polychoric correlation
@@ -739,22 +728,20 @@ module Statsample
739
728
  end
740
729
 
741
730
  def summary
742
- rp=ReportBuilder.new()
743
- rp.add(self)
744
- rp.to_text
731
+ rp=ReportBuilder.new(:no_title=>true).add(self).to_text
745
732
  end
733
+
746
734
 
747
735
  def report_building(generator) # :nodoc:
748
- compute if @r.nil?
736
+ #compute if r.nil?
749
737
  section=ReportBuilder::Section.new(:name=>@name)
750
- t=ReportBuilder::Table.new(:name=>_("Contingence Table"),:header=>[""]+(@n.times.collect {|i| "Y=#{i}"})+["Total"])
738
+ t=ReportBuilder::Table.new(:name=>_("Contingence Table"), :header=>[""]+(@n.times.collect {|i| "Y=#{i}"})+["Total"])
751
739
  @m.times do |i|
752
740
  t.row(["X = #{i}"]+(@n.times.collect {|j| @matrix[i,j]}) + [@sumr[i]])
753
741
  end
754
742
  t.hr
755
743
  t.row(["T"]+(@n.times.collect {|j| @sumc[j]})+[@total])
756
744
  section.add(t)
757
- #generator.parse_element(t)
758
745
  section.add(sprintf("r: %0.4f",r))
759
746
  t=ReportBuilder::Table.new(:name=>_("Thresholds"), :header=>["","Value"])
760
747
  threshold_x.each_with_index {|val,i|
@@ -114,9 +114,7 @@ module Statsample
114
114
  end
115
115
  # Summary of the analysis
116
116
  def summary
117
- rp=ReportBuilder.new(:name=>@name)
118
- rp.add(self)
119
- rp.to_text
117
+ ReportBuilder.new(:name=>@name).add(self).to_text
120
118
  end
121
119
 
122
120
  def report_building(generator) # :nodoc:
@@ -1,21 +1,20 @@
1
- if RUBY_VERSION<"1.9"
2
- require 'fastercsv'
3
- Statsample::CSV_klass=FasterCSV
4
- else
5
- require 'csv'
6
- Statsample::CSV_klass=CSV
7
-
8
- end
1
+
9
2
 
10
3
  module Statsample
11
4
  class CSV < SpreadsheetBase
5
+ if RUBY_VERSION<"1.9"
6
+ require 'fastercsv'
7
+ CSV_klass=::FasterCSV
8
+ else
9
+ require 'csv'
10
+ CSV_klass=::CSV
11
+ end
12
12
  class << self
13
13
  # Returns a Dataset based on a csv file
14
14
  #
15
15
  # USE:
16
16
  # ds=Statsample::CSV.read("test_csv.csv")
17
- def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
18
-
17
+ def read(filename, empty=[''],ignore_lines=0,fs=nil,rs=nil)
19
18
  first_row=true
20
19
  fields=[]
21
20
  fields_data={}
@@ -24,7 +23,7 @@ module Statsample
24
23
  opts={}
25
24
  opts[:col_sep]=fs unless fs.nil?
26
25
  opts[:row_sep]=rs unless rs.nil?
27
- csv=Statsample::CSV_klass.send(:open, filename,'r',opts)
26
+ csv=CSV_klass.open(filename,'r',opts)
28
27
  csv.each do |row|
29
28
  line_number+=1
30
29
  if(line_number<=ignore_lines)
@@ -51,7 +50,7 @@ module Statsample
51
50
  # Statsample::CSV.write(ds,"test_csv.csv")
52
51
  def write(dataset,filename, convert_comma=false,*opts)
53
52
 
54
- writer=Statsample::CSV_klass.send(:open, filename,'w',*opts)
53
+ writer=CSV_klass.open(filename,'w',*opts)
55
54
  writer << dataset.fields
56
55
  dataset.each_array do|row|
57
56
  if(convert_comma)
@@ -171,10 +171,9 @@ module Statsample
171
171
  end
172
172
  # Summary of analysis
173
173
  def summary
174
- rp=ReportBuilder.new()
175
- rp.add(self)
176
- rp.to_text
174
+ rp=ReportBuilder.new().add(self).to_text
177
175
  end
176
+
178
177
  def t
179
178
  Distribution::T.p_value(1-((1-@alpha) / 2), @n_samples - 1)
180
179
  end
@@ -90,8 +90,6 @@ module Factor
90
90
  @component_matrix
91
91
  end
92
92
  # Iterate to find the factors
93
- # Parameters
94
- # * m: Number of factors
95
93
  def iterate(m=nil)
96
94
  @clean=false
97
95
  m||=@m
@@ -18,13 +18,14 @@ module Factor
18
18
  class Rotation
19
19
  EPSILON=1e-15
20
20
  MAX_ITERATIONS=25
21
-
21
+ extend Statsample::PromiseAfter
22
22
  attr_reader :iterations, :rotated, :component_transformation_matrix, :h2
23
23
  # Maximum number of iterations
24
24
  attr_accessor :max_iterations
25
25
  # Maximum precision
26
26
  attr_accessor :epsilon
27
27
 
28
+ promise_after :iterate, :iterations, :rotated, :component_transformation_matrix, :h2
28
29
 
29
30
  def initialize(matrix, opts=Hash.new)
30
31
  @matrix=matrix
@@ -33,19 +34,16 @@ module Factor
33
34
  @component_transformation_matrix=nil
34
35
  @max_iterations=MAX_ITERATIONS
35
36
  @epsilon=EPSILON
37
+ @rotated=nil
36
38
  @h2=(@matrix.collect {|c| c**2} * Matrix.column_vector([1]*@m)).column(0).to_a
37
39
  opts.each{|k,v|
38
40
  self.send("#{k}=",v) if self.respond_to? k
39
41
  }
40
-
41
-
42
42
  end
43
43
  alias_method :communalities, :h2
44
44
  alias_method :rotated_component_matrix, :rotated
45
- # Start iteration of
46
- def iterate(max_i=nil)
47
- max_i||=@max_iterations
48
- @max_iterations=max_i
45
+ # Start iteration
46
+ def iterate
49
47
  t=Matrix.identity(@m)
50
48
  b=@matrix.dup
51
49
  h=Matrix.diagonal(*@h2).collect {|c| Math::sqrt(c)}
@@ -54,7 +52,7 @@ module Factor
54
52
  @not_converged=true
55
53
  @iterations=0
56
54
  while @not_converged
57
- break if iterations>max_i
55
+ break if iterations>@max_iterations
58
56
  @iterations+=1
59
57
  #puts "Iteration #{iterations}"
60
58
  num_pairs=@m*(@m-1).quo(2)