statsample 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,3 @@
1
1
  module Statsample
2
- VERSION = '1.4.0'
2
+ VERSION = '1.4.1'
3
3
  end
data/lib/statsample.rb CHANGED
@@ -1,4 +1,4 @@
1
- # = statsample.rb -
1
+ # = statsample.rb -
2
2
  # Statsample - Statistic package for Ruby
3
3
  # Copyright (C) 2008-2014 Claudio Bustos
4
4
  #
@@ -17,17 +17,16 @@
17
17
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
18
  #
19
19
 
20
-
21
- #$:.unshift(File.dirname(__FILE__))
22
20
  require 'matrix'
23
21
  require 'extendmatrix'
24
22
  require 'distribution'
25
23
  require 'dirty-memoize'
26
24
  require 'reportbuilder'
27
25
 
28
-
29
26
  class Numeric
30
- def square ; self * self ; end
27
+ def square
28
+ self * self
29
+ end
31
30
  end
32
31
 
33
32
  class String
@@ -41,10 +40,10 @@ class String
41
40
  end
42
41
 
43
42
  class Module
44
- def include_aliasing(m, suffix="ruby")
43
+ def include_aliasing(m, suffix = 'ruby')
45
44
  m.instance_methods.each do |f|
46
45
  if instance_methods.include? f
47
- alias_method("#{f}_#{suffix}",f)
46
+ alias_method("#{f}_#{suffix}", f)
48
47
  remove_method f
49
48
  end
50
49
  end
@@ -60,15 +59,26 @@ class Array
60
59
  # a.recode_repeated
61
60
  # => ["a","b","c_1","c_2","d_1","d_2","d_3","e"]
62
61
  def recode_repeated
63
- if self.size!=self.uniq.size
62
+ if size != uniq.size
64
63
  # Find repeated
65
- repeated=self.inject({}) {|a,v|
66
- (a[v].nil? ? a[v]=1 : a[v]+=1); a }.find_all{|k,v| v>1}.collect{|k,v| k}
67
- ns=repeated.inject({}) {|a,v| a[v]=0;a}
68
- self.collect do |f|
64
+ repeated = inject({}) do |acc, v|
65
+ if acc[v].nil?
66
+ acc[v] = 1
67
+ else
68
+ acc[v] += 1
69
+ end
70
+ acc
71
+ end.select { |_k, v| v > 1 }.keys
72
+
73
+ ns = repeated.inject({}) do |acc, v|
74
+ acc[v] = 0
75
+ acc
76
+ end
77
+
78
+ collect do |f|
69
79
  if repeated.include? f
70
- ns[f]+=1
71
- sprintf("%s_%d",f,ns[f])
80
+ ns[f] += 1
81
+ sprintf('%s_%d', f, ns[f])
72
82
  else
73
83
  f
74
84
  end
@@ -79,61 +89,62 @@ class Array
79
89
  end
80
90
  end
81
91
 
82
- def create_test(*args,&proc)
83
- description=args.shift
84
- fields=args
92
+ def create_test(*args, &_proc)
93
+ description = args.shift
94
+ fields = args
85
95
  [description, fields, Proc.new]
86
96
  end
97
+
87
98
  #--
88
99
  # Test extensions
89
100
  begin
90
101
  require 'gettext'
91
102
  rescue LoadError
92
103
  def bindtextdomain(d) #:nodoc:
93
- d
104
+ d
94
105
  end
95
-
106
+
96
107
  # Bored module
97
108
  module GetText #:nodoc:
98
- def _(t)
99
- t
109
+ def _(t)
110
+ t
100
111
  end
101
112
  end
102
113
  end
114
+
103
115
  # Library for statistical analysis on Ruby
104
116
  #
105
117
  # * Classes for manipulation and storage of data:
106
118
  # * Module Statsample::Bivariate provides covariance and pearson, spearman, point biserial, tau a, tau b, gamma, tetrachoric (see Bivariate::Tetrachoric) and polychoric (see Bivariate::Polychoric) correlations. Include methods to create correlation and covariance matrices
107
119
  # * Multiple types of regression on Statsample::Regression
108
120
  # * Factorial Analysis algorithms on Statsample::Factor module.
109
- # * Dominance Analysis. Based on Budescu and Azen papers.link[http://psycnet.apa.org/journals/met/8/2/129/].
121
+ # * Dominance Analysis. Based on Budescu and Azen papers.link[http://psycnet.apa.org/journals/met/8/2/129/].
110
122
  # * Module Statsample::Codification, to help to codify open questions
111
123
  # * Converters to import and export data from databases, csv and excel files.
112
124
  # * Module Statsample::Crosstab provides function to create crosstab for categorical data
113
125
  # * Reliability analysis provides functions to analyze scales.
114
126
  # * Module Statsample::SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
115
- # * Interfaces to gdchart, gnuplot and SVG::Graph
127
+ # * Interfaces to gdchart, gnuplot and SVG::Graph
116
128
  #
117
129
  module Statsample
118
-
119
130
  def self.create_has_library(library)
120
131
  define_singleton_method("has_#{library}?") do
121
- cv="@@#{library}"
122
- if !class_variable_defined? cv
123
- begin
132
+ cv = "@@#{library}"
133
+ unless class_variable_defined? cv
134
+ begin
124
135
  require library.to_s
125
- class_variable_set(cv,true)
136
+ class_variable_set(cv, true)
126
137
  rescue LoadError
127
- class_variable_set(cv,false)
138
+ class_variable_set(cv, false)
128
139
  end
129
140
  end
130
141
  class_variable_get(cv)
131
142
  end
132
143
  end
133
-
144
+
134
145
  create_has_library :gsl
135
-
136
- SPLIT_TOKEN = ","
146
+
147
+ SPLIT_TOKEN = ','
137
148
  autoload(:Analysis, 'statsample/analysis')
138
149
  autoload(:Database, 'statsample/converters')
139
150
  autoload(:Anova, 'statsample/anova')
@@ -154,133 +165,122 @@ module Statsample
154
165
  autoload(:Multivariate, 'statsample/multivariate')
155
166
  autoload(:Multiset, 'statsample/multiset')
156
167
  autoload(:StratifiedSample, 'statsample/multiset')
157
- autoload(:MLE, 'statsample/mle')
168
+ autoload(:MLE, 'statsample/mle')
158
169
  autoload(:Regression, 'statsample/regression')
159
170
  autoload(:Test, 'statsample/test')
160
171
  autoload(:Factor, 'statsample/factor')
161
172
  autoload(:Graph, 'statsample/graph')
162
-
163
-
173
+
164
174
  class << self
165
175
  # Load a object saved on a file.
166
176
  def load(filename)
167
177
  if File.exist? filename
168
- o=false
169
- File.open(filename,"r") {|fp| o=Marshal.load(fp) }
178
+ o = false
179
+ File.open(filename, 'r') { |fp| o = Marshal.load(fp) }
170
180
  o
171
181
  else
172
182
  false
173
183
  end
174
184
  end
175
-
176
-
177
-
185
+
178
186
  # Create a matrix using vectors as columns.
179
187
  # Use:
180
188
  #
181
189
  # matrix=Statsample.vector_cols_matrix(v1,v2)
182
190
  def vector_cols_matrix(*vs)
183
191
  # test
184
- size=vs[0].size
185
- vs.each{|v|
186
- raise ArgumentError,"Arguments should be Vector" unless v.instance_of? Statsample::Vector
187
- raise ArgumentError,"Vectors size should be the same" if v.size!=size
188
- }
189
- Matrix.rows((0...size).to_a.collect() {|i|
190
- vs.collect{|v| v[i]}
191
- })
192
+ size = vs[0].size
193
+
194
+ vs.each do |v|
195
+ fail ArgumentError, 'Arguments should be Vector' unless v.instance_of? Statsample::Vector
196
+ fail ArgumentError, 'Vectors size should be the same' if v.size != size
197
+ end
198
+
199
+ Matrix.rows((0...size).to_a.collect { |i| vs.collect { |v| v[i] } })
192
200
  end
201
+
193
202
  # Returns a duplicate of the input vectors, without missing data
194
203
  # for any of the vectors.
195
- #
204
+ #
196
205
  # a=[1,2,3,6,7,nil,3,5].to_scale
197
206
  # b=[nil,nil,5,6,4,5,10,2].to_scale
198
207
  # c=[2,4,6,7,4,5,6,7].to_scale
199
208
  # a2,b2,c2=Statsample.only_valid(a,b,c)
200
- # => [#<Statsample::Scale:0xb748c8c8 @data=[3, 6, 7, 3, 5]>,
201
- # #<Statsample::Scale:0xb748c814 @data=[5, 6, 4, 10, 2]>,
209
+ # => [#<Statsample::Scale:0xb748c8c8 @data=[3, 6, 7, 3, 5]>,
210
+ # #<Statsample::Scale:0xb748c814 @data=[5, 6, 4, 10, 2]>,
202
211
  # #<Statsample::Scale:0xb748c760 @data=[6, 7, 4, 6, 7]>]
203
212
  #
204
213
  def only_valid(*vs)
205
- i=1
206
- h=vs.inject({}) {|a,v| a["v#{i}"]=v;i+=1;a}
207
- ds=Statsample::Dataset.new(h).dup_only_valid
214
+ i = 1
215
+ h = vs.inject({}) { |acc, v| acc["v#{i}"] = v; i += 1; acc }
216
+ ds = Statsample::Dataset.new(h).dup_only_valid
208
217
  ds.vectors.values
209
218
  end
210
-
211
- # Cheap version of #only_valid.
219
+
220
+ # Cheap version of #only_valid.
212
221
  # If any vectors have missing_values, return only valid.
213
222
  # If not, return the vectors itself
214
223
  def only_valid_clone(*vs)
215
- if vs.any? {|v| v.flawed?}
224
+ if vs.any?(&:flawed?)
216
225
  only_valid(*vs)
217
226
  else
218
227
  vs
219
228
  end
220
229
  end
221
- end
222
-
223
-
224
-
225
-
230
+ end
231
+
226
232
  module Util
227
233
  # Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
228
- def normal_order_statistic_medians(i,n)
229
- if i==1
230
- u= 1.0 - normal_order_statistic_medians(n,n)
231
- elsif i==n
232
- u=0.5**(1 / n.to_f)
234
+ def normal_order_statistic_medians(i, n)
235
+ if i == 1
236
+ u = 1.0 - normal_order_statistic_medians(n, n)
237
+ elsif i == n
238
+ u = 0.5**(1 / n.to_f)
233
239
  else
234
- u= (i - 0.3175) / (n + 0.365)
240
+ u = (i - 0.3175) / (n + 0.365)
235
241
  end
236
242
  u
237
243
  end
238
-
239
- def self.nice(s,e) # :nodoc:
240
- reverse = e<s
244
+
245
+ def self.nice(s, e) # :nodoc:
246
+ reverse = e < s
241
247
  min = reverse ? e : s
242
248
  max = reverse ? s : e
243
- span=max-min
244
- return [s, e] if (!span or (span.respond_to? :infinite? and span.infinite?))
245
-
246
- step=10**((Math::log(span).quo(Math::log(10))).round - 1).to_f
247
- out=[(min.quo(step)).floor * step, (max.quo(step)).ceil * step]
249
+ span = max - min
250
+ return [s, e] if span == 0 || (span.respond_to?(:infinite?) && span.infinite?)
251
+
252
+ step = 10**((Math.log(span).quo(Math.log(10))).round - 1).to_f
253
+ out = [(min.quo(step)).floor * step, (max.quo(step)).ceil * step]
248
254
  out.reverse! if reverse
249
255
  out
250
256
  end
251
-
252
-
253
257
  end
254
-
255
-
256
-
258
+
257
259
  module Writable
258
260
  def save(filename)
259
- fp=File.open(filename,"w")
260
- Marshal.dump(self,fp)
261
+ fp = File.open(filename, 'w')
262
+ Marshal.dump(self, fp)
261
263
  fp.close
262
- end
264
+ end
263
265
  end
264
266
  # Provides method summary to generate summaries and include GetText
265
267
  module Summarizable
266
268
  include GetText
267
- bindtextdomain("statsample")
268
- def summary(method=:to_text)
269
- ReportBuilder.new(:no_title=>true).add(self).send(method)
269
+ bindtextdomain('statsample')
270
+ def summary(method = :to_text)
271
+ ReportBuilder.new(no_title: true).add(self).send(method)
270
272
  end
271
273
  end
272
274
  module STATSAMPLE__ #:nodoc:
273
275
  end
274
276
  end
275
277
 
276
-
277
-
278
278
  #--
279
- begin
279
+ begin
280
280
  require 'statsamplert'
281
281
  rescue LoadError
282
282
  module Statsample
283
- OPTIMIZED=false
283
+ OPTIMIZED = false
284
284
  end
285
285
  end
286
286
 
data/references.txt CHANGED
@@ -8,11 +8,13 @@ References
8
8
  * Dziuban, C., & Shirkey E. (1974). When is a correlation matrix appropriate for factor analysis? Some decision rules. Psychological Bulletin, 81(6), 358-361.
9
9
  * Hayton, J., Allen, D. & Scarpello, V.(2004). Factor Retention Decisions in Exploratory Factor Analysis: a Tutorial on Parallel Analysis. <i>Organizational Research Methods, 7</i> (2), 191-205.
10
10
  * Härdle, W. & Simar, L. (2003). Applied Multivariate Statistical Analysis. Springer
11
+ * Leach, L. & Henson, R. (2007). The Use and Impact of Adjusted R2 Effects in Published Regression Research. Multiple Linear Regression Viewpoints, 33(1), 1-11.
11
12
  * Lin, J. (2007). VARIMAX_K58 [Source code]. [http://www.johnny-lin.com/idl_code/varimax_k58.pro]
12
13
  * Liu, O., & Rijmen, F. (2008). A modified procedure for parallel analysis of ordered categorical data. Behavior Research Methods, 40(2), 556-562.
13
14
  * McGraw, K. & Wong, S.P. (1996). Forming Inferences About Some Intraclass Correlation Coefficients. Psychological methods, 1(1), 30-46.
14
15
  * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
15
16
  * SPSS Manual
17
+ * Sawyer, S. (2005). Resampling Data: Using a Statistical Jacknife.
16
18
  * Shrout,P. & Fleiss, J. (1979). Intraclass Correlation: Uses in assessing rater reliability. Psychological Bulletin, 86(2), 420-428
17
19
  * Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
18
20
  * http://en.wikipedia.org/wiki/Welch-Satterthwaite_equation
@@ -20,4 +22,3 @@ References
20
22
  * http://stattrek.com/Lesson6/SRS.aspx
21
23
  * http://talkstats.com/showthread.php?t=5056
22
24
  * http://www.gnu.org/software/gsl/manual/html_node/The-histogram-struct.html
23
- * http://www.taygeta.com/random/gaussian.html
@@ -0,0 +1,89 @@
1
+ $:.unshift File.expand_path("../lib/", __FILE__)
2
+
3
+ require 'statsample/version'
4
+ require 'date'
5
+
6
+ DESCRIPTION = <<MSG
7
+ A suite for basic and advanced statistics on Ruby. Tested on CRuby 1.9.3, 2.0.0
8
+ and 2.1.1. See `.travis.yml` for more information.
9
+
10
+ Include:
11
+
12
+ - Descriptive statistics: frequencies, median, mean,
13
+ standard error, skew, kurtosis (and many others).
14
+ - Imports and exports datasets from and to Excel, CSV and plain text files.
15
+ - Correlations: Pearson's r, Spearman's rank correlation (rho), point biserial,
16
+ tau a, tau b and gamma. Tetrachoric and Polychoric correlation provides by
17
+ statsample-bivariate-extension gem.
18
+ - Intra-class correlation
19
+ - Anova: generic and vector-based One-way ANOVA and Two-way ANOVA, with contrasts for
20
+ One-way ANOVA.
21
+ - Tests: F, T, Levene, U-Mannwhitney.
22
+ - Regression: Simple, Multiple (OLS), Probit and Logit
23
+ - Factorial Analysis: Extraction (PCA and Principal Axis), Rotation (Varimax,
24
+ Equimax, Quartimax) and Parallel Analysis and Velicer's MAP test, for
25
+ estimation of number of factors.
26
+ - Reliability analysis for simple scale and a DSL to easily analyze multiple
27
+ scales using factor analysis and correlations, if you want it.
28
+ - Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
29
+ - Sample calculation related formulas
30
+ - Structural Equation Modeling (SEM), using R libraries +sem+ and +OpenMx+
31
+ - Creates reports on text, html and rtf, using ReportBuilder gem
32
+ - Graphics: Histogram, Boxplot and Scatterplot.
33
+ MSG
34
+
35
+ POSTINSTALL = <<MSG
36
+ ***************************************************
37
+
38
+ Thanks for installing statsample.
39
+
40
+ On *nix, you could install statsample-optimization
41
+ to retrieve gems gsl, statistics2 and a C extension
42
+ to speed some methods.
43
+
44
+ $ [sudo] gem install statsample-optimization
45
+
46
+ *****************************************************
47
+ MSG
48
+
49
+ Gem::Specification.new do |s|
50
+ s.name = "statsample"
51
+ s.version = Statsample::VERSION
52
+ s.date = Date.today.to_s
53
+ s.homepage = "https://github.com/sciruby/statsample"
54
+
55
+ s.authors = ["Claudio Bustos", "Carlos Agarie"]
56
+ s.email = ["clbustos@gmail.com", "carlos@onox.com.br"]
57
+
58
+ s.summary = "A suite for basic and advanced statistics on Ruby"
59
+ s.description = DESCRIPTION
60
+ s.post_install_message = POSTINSTALL
61
+
62
+ s.rdoc_options = ["--main", "README.md"]
63
+ s.extra_rdoc_files = ["History.txt", "LICENSE.txt", "README.md", "references.txt"]
64
+ s.require_paths = ["lib"]
65
+
66
+ s.files = `git ls-files`.split("\n")
67
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
68
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
69
+
70
+ s.add_runtime_dependency 'spreadsheet', '~> 0.6.5'
71
+ s.add_runtime_dependency 'reportbuilder', '~> 1.4'
72
+ s.add_runtime_dependency 'minimization'
73
+ s.add_runtime_dependency 'dirty-memoize'
74
+ s.add_runtime_dependency 'extendmatrix'
75
+ s.add_runtime_dependency 'rserve-client'
76
+ s.add_runtime_dependency 'rubyvis', '~> 0.5.0'
77
+ s.add_runtime_dependency 'distribution'
78
+ s.add_runtime_dependency 'rb-gsl'
79
+ s.add_runtime_dependency 'awesome_print'
80
+
81
+ s.add_development_dependency 'bundler'
82
+ s.add_development_dependency 'rake'
83
+ s.add_development_dependency 'rdoc'
84
+ s.add_development_dependency 'shoulda'
85
+ s.add_development_dependency 'shoulda-matchers', '~> 2.2'
86
+ s.add_development_dependency 'minitest'
87
+ s.add_development_dependency 'gettext'
88
+ s.add_development_dependency 'mocha'
89
+ end
@@ -0,0 +1,16 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
2
+ class StatsampleAwesomePrintBug < MiniTest::Test
3
+ context("Awesome Print integration") do
4
+ setup do
5
+ require "awesome_print"
6
+ end
7
+ should "should be flawless" do
8
+ a=[1,2,3].to_scale
9
+
10
+ assert(a!=[1,2,3])
11
+ assert_nothing_raised do
12
+ ap a
13
+ end
14
+ end
15
+ end
16
+ end
@@ -52,4 +52,12 @@ class StatsampleCrosstabTestCase < MiniTest::Unit::TestCase
52
52
  ct=Statsample::Crosstab.new(v1,v2)
53
53
  assert_equal(Matrix[[2.5,2.5],[2.5,2.5]],ct.matrix_expected)
54
54
  end
55
+ def test_crosstab_with_scale
56
+ v1=%w{1 1 1 1 1 0 0 0 0 0}.to_scale
57
+ v2=%w{0 0 0 0 0 1 1 1 1 1}.to_scale
58
+ ct=Statsample::Crosstab.new(v1,v2)
59
+ assert_equal(Matrix[[0,5],[5,0]],ct.to_matrix)
60
+ assert_nothing_raised { ct.summary }
61
+ end
62
+
55
63
  end
@@ -100,6 +100,13 @@ class StatsampleHistogramTestCase < MiniTest::Unit::TestCase
100
100
  assert_equal(2,h.sum(1,4))
101
101
 
102
102
  end
103
+ should "not raise exception when all values equal" do
104
+ assert_nothing_raised do
105
+ a = [5,5,5,5,5,5].to_scale
106
+ h=Statsample::Graph::Histogram.new(a)
107
+ h.to_svg
108
+ end
109
+ end
103
110
 
104
111
  end
105
112
  end