statsample 1.4.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,3 @@
1
1
  module Statsample
2
- VERSION = '1.4.0'
2
+ VERSION = '1.4.1'
3
3
  end
data/lib/statsample.rb CHANGED
@@ -1,4 +1,4 @@
1
- # = statsample.rb -
1
+ # = statsample.rb -
2
2
  # Statsample - Statistic package for Ruby
3
3
  # Copyright (C) 2008-2014 Claudio Bustos
4
4
  #
@@ -17,17 +17,16 @@
17
17
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
18
  #
19
19
 
20
-
21
- #$:.unshift(File.dirname(__FILE__))
22
20
  require 'matrix'
23
21
  require 'extendmatrix'
24
22
  require 'distribution'
25
23
  require 'dirty-memoize'
26
24
  require 'reportbuilder'
27
25
 
28
-
29
26
  class Numeric
30
- def square ; self * self ; end
27
+ def square
28
+ self * self
29
+ end
31
30
  end
32
31
 
33
32
  class String
@@ -41,10 +40,10 @@ class String
41
40
  end
42
41
 
43
42
  class Module
44
- def include_aliasing(m, suffix="ruby")
43
+ def include_aliasing(m, suffix = 'ruby')
45
44
  m.instance_methods.each do |f|
46
45
  if instance_methods.include? f
47
- alias_method("#{f}_#{suffix}",f)
46
+ alias_method("#{f}_#{suffix}", f)
48
47
  remove_method f
49
48
  end
50
49
  end
@@ -60,15 +59,26 @@ class Array
60
59
  # a.recode_repeated
61
60
  # => ["a","b","c_1","c_2","d_1","d_2","d_3","e"]
62
61
  def recode_repeated
63
- if self.size!=self.uniq.size
62
+ if size != uniq.size
64
63
  # Find repeated
65
- repeated=self.inject({}) {|a,v|
66
- (a[v].nil? ? a[v]=1 : a[v]+=1); a }.find_all{|k,v| v>1}.collect{|k,v| k}
67
- ns=repeated.inject({}) {|a,v| a[v]=0;a}
68
- self.collect do |f|
64
+ repeated = inject({}) do |acc, v|
65
+ if acc[v].nil?
66
+ acc[v] = 1
67
+ else
68
+ acc[v] += 1
69
+ end
70
+ acc
71
+ end.select { |_k, v| v > 1 }.keys
72
+
73
+ ns = repeated.inject({}) do |acc, v|
74
+ acc[v] = 0
75
+ acc
76
+ end
77
+
78
+ collect do |f|
69
79
  if repeated.include? f
70
- ns[f]+=1
71
- sprintf("%s_%d",f,ns[f])
80
+ ns[f] += 1
81
+ sprintf('%s_%d', f, ns[f])
72
82
  else
73
83
  f
74
84
  end
@@ -79,61 +89,62 @@ class Array
79
89
  end
80
90
  end
81
91
 
82
- def create_test(*args,&proc)
83
- description=args.shift
84
- fields=args
92
+ def create_test(*args, &_proc)
93
+ description = args.shift
94
+ fields = args
85
95
  [description, fields, Proc.new]
86
96
  end
97
+
87
98
  #--
88
99
  # Test extensions
89
100
  begin
90
101
  require 'gettext'
91
102
  rescue LoadError
92
103
  def bindtextdomain(d) #:nodoc:
93
- d
104
+ d
94
105
  end
95
-
106
+
96
107
  # Bored module
97
108
  module GetText #:nodoc:
98
- def _(t)
99
- t
109
+ def _(t)
110
+ t
100
111
  end
101
112
  end
102
113
  end
114
+
103
115
  # Library for statistical analysis on Ruby
104
116
  #
105
117
  # * Classes for manipulation and storage of data:
106
118
  # * Module Statsample::Bivariate provides covariance and pearson, spearman, point biserial, tau a, tau b, gamma, tetrachoric (see Bivariate::Tetrachoric) and polychoric (see Bivariate::Polychoric) correlations. Include methods to create correlation and covariance matrices
107
119
  # * Multiple types of regression on Statsample::Regression
108
120
  # * Factorial Analysis algorithms on Statsample::Factor module.
109
- # * Dominance Analysis. Based on Budescu and Azen papers.link[http://psycnet.apa.org/journals/met/8/2/129/].
121
+ # * Dominance Analysis. Based on Budescu and Azen papers.link[http://psycnet.apa.org/journals/met/8/2/129/].
110
122
  # * Module Statsample::Codification, to help to codify open questions
111
123
  # * Converters to import and export data from databases, csv and excel files.
112
124
  # * Module Statsample::Crosstab provides function to create crosstab for categorical data
113
125
  # * Reliability analysis provides functions to analyze scales.
114
126
  # * Module Statsample::SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
115
- # * Interfaces to gdchart, gnuplot and SVG::Graph
127
+ # * Interfaces to gdchart, gnuplot and SVG::Graph
116
128
  #
117
129
  module Statsample
118
-
119
130
  def self.create_has_library(library)
120
131
  define_singleton_method("has_#{library}?") do
121
- cv="@@#{library}"
122
- if !class_variable_defined? cv
123
- begin
132
+ cv = "@@#{library}"
133
+ unless class_variable_defined? cv
134
+ begin
124
135
  require library.to_s
125
- class_variable_set(cv,true)
136
+ class_variable_set(cv, true)
126
137
  rescue LoadError
127
- class_variable_set(cv,false)
138
+ class_variable_set(cv, false)
128
139
  end
129
140
  end
130
141
  class_variable_get(cv)
131
142
  end
132
143
  end
133
-
144
+
134
145
  create_has_library :gsl
135
-
136
- SPLIT_TOKEN = ","
146
+
147
+ SPLIT_TOKEN = ','
137
148
  autoload(:Analysis, 'statsample/analysis')
138
149
  autoload(:Database, 'statsample/converters')
139
150
  autoload(:Anova, 'statsample/anova')
@@ -154,133 +165,122 @@ module Statsample
154
165
  autoload(:Multivariate, 'statsample/multivariate')
155
166
  autoload(:Multiset, 'statsample/multiset')
156
167
  autoload(:StratifiedSample, 'statsample/multiset')
157
- autoload(:MLE, 'statsample/mle')
168
+ autoload(:MLE, 'statsample/mle')
158
169
  autoload(:Regression, 'statsample/regression')
159
170
  autoload(:Test, 'statsample/test')
160
171
  autoload(:Factor, 'statsample/factor')
161
172
  autoload(:Graph, 'statsample/graph')
162
-
163
-
173
+
164
174
  class << self
165
175
  # Load a object saved on a file.
166
176
  def load(filename)
167
177
  if File.exist? filename
168
- o=false
169
- File.open(filename,"r") {|fp| o=Marshal.load(fp) }
178
+ o = false
179
+ File.open(filename, 'r') { |fp| o = Marshal.load(fp) }
170
180
  o
171
181
  else
172
182
  false
173
183
  end
174
184
  end
175
-
176
-
177
-
185
+
178
186
  # Create a matrix using vectors as columns.
179
187
  # Use:
180
188
  #
181
189
  # matrix=Statsample.vector_cols_matrix(v1,v2)
182
190
  def vector_cols_matrix(*vs)
183
191
  # test
184
- size=vs[0].size
185
- vs.each{|v|
186
- raise ArgumentError,"Arguments should be Vector" unless v.instance_of? Statsample::Vector
187
- raise ArgumentError,"Vectors size should be the same" if v.size!=size
188
- }
189
- Matrix.rows((0...size).to_a.collect() {|i|
190
- vs.collect{|v| v[i]}
191
- })
192
+ size = vs[0].size
193
+
194
+ vs.each do |v|
195
+ fail ArgumentError, 'Arguments should be Vector' unless v.instance_of? Statsample::Vector
196
+ fail ArgumentError, 'Vectors size should be the same' if v.size != size
197
+ end
198
+
199
+ Matrix.rows((0...size).to_a.collect { |i| vs.collect { |v| v[i] } })
192
200
  end
201
+
193
202
  # Returns a duplicate of the input vectors, without missing data
194
203
  # for any of the vectors.
195
- #
204
+ #
196
205
  # a=[1,2,3,6,7,nil,3,5].to_scale
197
206
  # b=[nil,nil,5,6,4,5,10,2].to_scale
198
207
  # c=[2,4,6,7,4,5,6,7].to_scale
199
208
  # a2,b2,c2=Statsample.only_valid(a,b,c)
200
- # => [#<Statsample::Scale:0xb748c8c8 @data=[3, 6, 7, 3, 5]>,
201
- # #<Statsample::Scale:0xb748c814 @data=[5, 6, 4, 10, 2]>,
209
+ # => [#<Statsample::Scale:0xb748c8c8 @data=[3, 6, 7, 3, 5]>,
210
+ # #<Statsample::Scale:0xb748c814 @data=[5, 6, 4, 10, 2]>,
202
211
  # #<Statsample::Scale:0xb748c760 @data=[6, 7, 4, 6, 7]>]
203
212
  #
204
213
  def only_valid(*vs)
205
- i=1
206
- h=vs.inject({}) {|a,v| a["v#{i}"]=v;i+=1;a}
207
- ds=Statsample::Dataset.new(h).dup_only_valid
214
+ i = 1
215
+ h = vs.inject({}) { |acc, v| acc["v#{i}"] = v; i += 1; acc }
216
+ ds = Statsample::Dataset.new(h).dup_only_valid
208
217
  ds.vectors.values
209
218
  end
210
-
211
- # Cheap version of #only_valid.
219
+
220
+ # Cheap version of #only_valid.
212
221
  # If any vectors have missing_values, return only valid.
213
222
  # If not, return the vectors itself
214
223
  def only_valid_clone(*vs)
215
- if vs.any? {|v| v.flawed?}
224
+ if vs.any?(&:flawed?)
216
225
  only_valid(*vs)
217
226
  else
218
227
  vs
219
228
  end
220
229
  end
221
- end
222
-
223
-
224
-
225
-
230
+ end
231
+
226
232
  module Util
227
233
  # Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
228
- def normal_order_statistic_medians(i,n)
229
- if i==1
230
- u= 1.0 - normal_order_statistic_medians(n,n)
231
- elsif i==n
232
- u=0.5**(1 / n.to_f)
234
+ def normal_order_statistic_medians(i, n)
235
+ if i == 1
236
+ u = 1.0 - normal_order_statistic_medians(n, n)
237
+ elsif i == n
238
+ u = 0.5**(1 / n.to_f)
233
239
  else
234
- u= (i - 0.3175) / (n + 0.365)
240
+ u = (i - 0.3175) / (n + 0.365)
235
241
  end
236
242
  u
237
243
  end
238
-
239
- def self.nice(s,e) # :nodoc:
240
- reverse = e<s
244
+
245
+ def self.nice(s, e) # :nodoc:
246
+ reverse = e < s
241
247
  min = reverse ? e : s
242
248
  max = reverse ? s : e
243
- span=max-min
244
- return [s, e] if (!span or (span.respond_to? :infinite? and span.infinite?))
245
-
246
- step=10**((Math::log(span).quo(Math::log(10))).round - 1).to_f
247
- out=[(min.quo(step)).floor * step, (max.quo(step)).ceil * step]
249
+ span = max - min
250
+ return [s, e] if span == 0 || (span.respond_to?(:infinite?) && span.infinite?)
251
+
252
+ step = 10**((Math.log(span).quo(Math.log(10))).round - 1).to_f
253
+ out = [(min.quo(step)).floor * step, (max.quo(step)).ceil * step]
248
254
  out.reverse! if reverse
249
255
  out
250
256
  end
251
-
252
-
253
257
  end
254
-
255
-
256
-
258
+
257
259
  module Writable
258
260
  def save(filename)
259
- fp=File.open(filename,"w")
260
- Marshal.dump(self,fp)
261
+ fp = File.open(filename, 'w')
262
+ Marshal.dump(self, fp)
261
263
  fp.close
262
- end
264
+ end
263
265
  end
264
266
  # Provides method summary to generate summaries and include GetText
265
267
  module Summarizable
266
268
  include GetText
267
- bindtextdomain("statsample")
268
- def summary(method=:to_text)
269
- ReportBuilder.new(:no_title=>true).add(self).send(method)
269
+ bindtextdomain('statsample')
270
+ def summary(method = :to_text)
271
+ ReportBuilder.new(no_title: true).add(self).send(method)
270
272
  end
271
273
  end
272
274
  module STATSAMPLE__ #:nodoc:
273
275
  end
274
276
  end
275
277
 
276
-
277
-
278
278
  #--
279
- begin
279
+ begin
280
280
  require 'statsamplert'
281
281
  rescue LoadError
282
282
  module Statsample
283
- OPTIMIZED=false
283
+ OPTIMIZED = false
284
284
  end
285
285
  end
286
286
 
data/references.txt CHANGED
@@ -8,11 +8,13 @@ References
8
8
  * Dziuban, C., & Shirkey E. (1974). When is a correlation matrix appropriate for factor analysis? Some decision rules. Psychological Bulletin, 81(6), 358-361.
9
9
  * Hayton, J., Allen, D. & Scarpello, V.(2004). Factor Retention Decisions in Exploratory Factor Analysis: a Tutorial on Parallel Analysis. <i>Organizational Research Methods, 7</i> (2), 191-205.
10
10
  * Härdle, W. & Simar, L. (2003). Applied Multivariate Statistical Analysis. Springer
11
+ * Leach, L. & Henson, R. (2007). The Use and Impact of Adjusted R2 Effects in Published Regression Research. Multiple Linear Regression Viewpoints, 33(1), 1-11.
11
12
  * Lin, J. (2007). VARIMAX_K58 [Source code]. [http://www.johnny-lin.com/idl_code/varimax_k58.pro]
12
13
  * Liu, O., & Rijmen, F. (2008). A modified procedure for parallel analysis of ordered categorical data. Behavior Research Methods, 40(2), 556-562.
13
14
  * McGraw, K. & Wong, S.P. (1996). Forming Inferences About Some Intraclass Correlation Coefficients. Psychological methods, 1(1), 30-46.
14
15
  * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
15
16
  * SPSS Manual
17
+ * Sawyer, S. (2005). Resampling Data: Using a Statistical Jacknife.
16
18
  * Shrout,P. & Fleiss, J. (1979). Intraclass Correlation: Uses in assessing rater reliability. Psychological Bulletin, 86(2), 420-428
17
19
  * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
18
20
  * http://en.wikipedia.org/wiki/Welch-Satterthwaite_equation
@@ -20,4 +22,3 @@ References
20
22
  * http://stattrek.com/Lesson6/SRS.aspx
21
23
  * http://talkstats.com/showthread.php?t=5056
22
24
  * http://www.gnu.org/software/gsl/manual/html_node/The-histogram-struct.html
23
- * http://www.taygeta.com/random/gaussian.html
@@ -0,0 +1,89 @@
1
+ $:.unshift File.expand_path("../lib/", __FILE__)
2
+
3
+ require 'statsample/version'
4
+ require 'date'
5
+
6
+ DESCRIPTION = <<MSG
7
+ A suite for basic and advanced statistics on Ruby. Tested on CRuby 1.9.3, 2.0.0
8
+ and 2.1.1. See `.travis.yml` for more information.
9
+
10
+ Include:
11
+
12
+ - Descriptive statistics: frequencies, median, mean,
13
+ standard error, skew, kurtosis (and many others).
14
+ - Imports and exports datasets from and to Excel, CSV and plain text files.
15
+ - Correlations: Pearson's r, Spearman's rank correlation (rho), point biserial,
16
+ tau a, tau b and gamma. Tetrachoric and Polychoric correlation provides by
17
+ statsample-bivariate-extension gem.
18
+ - Intra-class correlation
19
+ - Anova: generic and vector-based One-way ANOVA and Two-way ANOVA, with contrasts for
20
+ One-way ANOVA.
21
+ - Tests: F, T, Levene, U-Mannwhitney.
22
+ - Regression: Simple, Multiple (OLS), Probit and Logit
23
+ - Factorial Analysis: Extraction (PCA and Principal Axis), Rotation (Varimax,
24
+ Equimax, Quartimax) and Parallel Analysis and Velicer's MAP test, for
25
+ estimation of number of factors.
26
+ - Reliability analysis for simple scale and a DSL to easily analyze multiple
27
+ scales using factor analysis and correlations, if you want it.
28
+ - Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
29
+ - Sample calculation related formulas
30
+ - Structural Equation Modeling (SEM), using R libraries +sem+ and +OpenMx+
31
+ - Creates reports on text, html and rtf, using ReportBuilder gem
32
+ - Graphics: Histogram, Boxplot and Scatterplot.
33
+ MSG
34
+
35
+ POSTINSTALL = <<MSG
36
+ ***************************************************
37
+
38
+ Thanks for installing statsample.
39
+
40
+ On *nix, you could install statsample-optimization
41
+ to retrieve gems gsl, statistics2 and a C extension
42
+ to speed some methods.
43
+
44
+ $ [sudo] gem install statsample-optimization
45
+
46
+ *****************************************************
47
+ MSG
48
+
49
+ Gem::Specification.new do |s|
50
+ s.name = "statsample"
51
+ s.version = Statsample::VERSION
52
+ s.date = Date.today.to_s
53
+ s.homepage = "https://github.com/sciruby/statsample"
54
+
55
+ s.authors = ["Claudio Bustos", "Carlos Agarie"]
56
+ s.email = ["clbustos@gmail.com", "carlos@onox.com.br"]
57
+
58
+ s.summary = "A suite for basic and advanced statistics on Ruby"
59
+ s.description = DESCRIPTION
60
+ s.post_install_message = POSTINSTALL
61
+
62
+ s.rdoc_options = ["--main", "README.md"]
63
+ s.extra_rdoc_files = ["History.txt", "LICENSE.txt", "README.md", "references.txt"]
64
+ s.require_paths = ["lib"]
65
+
66
+ s.files = `git ls-files`.split("\n")
67
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
68
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
69
+
70
+ s.add_runtime_dependency 'spreadsheet', '~> 0.6.5'
71
+ s.add_runtime_dependency 'reportbuilder', '~> 1.4'
72
+ s.add_runtime_dependency 'minimization'
73
+ s.add_runtime_dependency 'dirty-memoize'
74
+ s.add_runtime_dependency 'extendmatrix'
75
+ s.add_runtime_dependency 'rserve-client'
76
+ s.add_runtime_dependency 'rubyvis', '~> 0.5.0'
77
+ s.add_runtime_dependency 'distribution'
78
+ s.add_runtime_dependency 'rb-gsl'
79
+ s.add_runtime_dependency 'awesome_print'
80
+
81
+ s.add_development_dependency 'bundler'
82
+ s.add_development_dependency 'rake'
83
+ s.add_development_dependency 'rdoc'
84
+ s.add_development_dependency 'shoulda'
85
+ s.add_development_dependency 'shoulda-matchers', '~> 2.2'
86
+ s.add_development_dependency 'minitest'
87
+ s.add_development_dependency 'gettext'
88
+ s.add_development_dependency 'mocha'
89
+ end
@@ -0,0 +1,16 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
2
+ class StatsampleAwesomePrintBug < MiniTest::Test
3
+ context("Awesome Print integration") do
4
+ setup do
5
+ require "awesome_print"
6
+ end
7
+ should "should be flawless" do
8
+ a=[1,2,3].to_scale
9
+
10
+ assert(a!=[1,2,3])
11
+ assert_nothing_raised do
12
+ ap a
13
+ end
14
+ end
15
+ end
16
+ end
@@ -52,4 +52,12 @@ class StatsampleCrosstabTestCase < MiniTest::Unit::TestCase
52
52
  ct=Statsample::Crosstab.new(v1,v2)
53
53
  assert_equal(Matrix[[2.5,2.5],[2.5,2.5]],ct.matrix_expected)
54
54
  end
55
+ def test_crosstab_with_scale
56
+ v1=%w{1 1 1 1 1 0 0 0 0 0}.to_scale
57
+ v2=%w{0 0 0 0 0 1 1 1 1 1}.to_scale
58
+ ct=Statsample::Crosstab.new(v1,v2)
59
+ assert_equal(Matrix[[0,5],[5,0]],ct.to_matrix)
60
+ assert_nothing_raised { ct.summary }
61
+ end
62
+
55
63
  end
@@ -100,6 +100,13 @@ class StatsampleHistogramTestCase < MiniTest::Unit::TestCase
100
100
  assert_equal(2,h.sum(1,4))
101
101
 
102
102
  end
103
+ should "not raise exception when all values equal" do
104
+ assert_nothing_raised do
105
+ a = [5,5,5,5,5,5].to_scale
106
+ h=Statsample::Graph::Histogram.new(a)
107
+ h.to_svg
108
+ end
109
+ end
103
110
 
104
111
  end
105
112
  end